From d13621e63112ddf6417dac05f2c96ada1e9feb25 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 25 Mar 2024 16:50:48 +0100 Subject: [PATCH 001/292] Skip `CloseFileHandles` if file is not in use --- Source/Engine/Content/Storage/FlaxStorage.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Source/Engine/Content/Storage/FlaxStorage.cpp b/Source/Engine/Content/Storage/FlaxStorage.cpp index 74b114f52..674330f70 100644 --- a/Source/Engine/Content/Storage/FlaxStorage.cpp +++ b/Source/Engine/Content/Storage/FlaxStorage.cpp @@ -1336,6 +1336,11 @@ FileReadStream* FlaxStorage::OpenFile() bool FlaxStorage::CloseFileHandles() { + // Early out if no handles are opened + Array streams; + _file.GetValues(streams); + if (streams.IsEmpty() && Platform::AtomicRead(&_chunksLock) == 0) + return false; PROFILE_CPU(); // Note: this is usually called by the content manager when this file is not used or on exit @@ -1367,7 +1372,7 @@ bool FlaxStorage::CloseFileHandles() return true; // Failed, someone is still accessing the file // Close file handles (from all threads) - Array streams; + streams.Clear(); _file.GetValues(streams); for (FileReadStream* stream : streams) { From 0e00f1e0eb986e663f0f877588afeb51af2b8aed Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 25 Mar 2024 17:13:40 +0100 Subject: [PATCH 002/292] Refactor lights data in renderer storage --- .../Engine/Level/Actors/DirectionalLight.cpp | 5 +- Source/Engine/Level/Actors/PointLight.cpp | 2 +- Source/Engine/Level/Actors/SkyLight.cpp | 2 +- Source/Engine/Level/Actors/SpotLight.cpp | 2 +- .../Graph/CPU/ParticleEmitterGraph.CPU.cpp | 2 +- Source/Engine/Renderer/RenderList.cpp | 8 +- Source/Engine/Renderer/RenderList.h | 101 ++++-------------- Source/Engine/Renderer/ShadowsPass.cpp | 18 ++-- Source/Engine/Renderer/ShadowsPass.h | 18 ++-- Source/Engine/Renderer/VolumetricFogPass.cpp | 8 +- Source/Engine/Renderer/VolumetricFogPass.h | 8 +- 11 files changed, 58 insertions(+), 116 deletions(-) diff --git a/Source/Engine/Level/Actors/DirectionalLight.cpp b/Source/Engine/Level/Actors/DirectionalLight.cpp index 2059fa08e..f44375074 100644 --- a/Source/Engine/Level/Actors/DirectionalLight.cpp +++ b/Source/Engine/Level/Actors/DirectionalLight.cpp @@ -24,7 +24,7 @@ void DirectionalLight::Draw(RenderContext& renderContext) && EnumHasAnyFlags(renderContext.View.Pass, DrawPass::GBuffer) && (ViewDistance < ZeroTolerance || Float3::DistanceSquared(renderContext.View.Position, position) < ViewDistance * ViewDistance)) { - RendererDirectionalLightData data; + RenderDirectionalLightData data; data.Position = position; data.MinRoughness = MinRoughness; data.ShadowsDistance = ShadowsDistance; @@ -45,7 +45,6 @@ void DirectionalLight::Draw(RenderContext& renderContext) data.Cascade2Spacing = Cascade2Spacing; data.Cascade3Spacing = Cascade3Spacing; data.Cascade4Spacing = Cascade4Spacing; - data.PartitionMode = PartitionMode; data.ContactShadowsLength = ContactShadowsLength; data.StaticFlags = GetStaticFlags(); @@ -66,7 +65,6 @@ void DirectionalLight::Serialize(SerializeStream& stream, const void* otherObj) SERIALIZE(Cascade2Spacing); SERIALIZE(Cascade3Spacing); SERIALIZE(Cascade4Spacing); - SERIALIZE(PartitionMode); } @@ -80,7 +78,6 @@ void DirectionalLight::Deserialize(DeserializeStream& stream, ISerializeModifier DESERIALIZE(Cascade2Spacing); DESERIALIZE(Cascade3Spacing); DESERIALIZE(Cascade4Spacing); - DESERIALIZE(PartitionMode); } diff --git a/Source/Engine/Level/Actors/PointLight.cpp b/Source/Engine/Level/Actors/PointLight.cpp index ceb214922..0a4f607ae 100644 --- a/Source/Engine/Level/Actors/PointLight.cpp +++ b/Source/Engine/Level/Actors/PointLight.cpp @@ -89,7 +89,7 @@ void PointLight::Draw(RenderContext& renderContext) && radius > ZeroTolerance && (ViewDistance < ZeroTolerance || Vector3::DistanceSquared(renderContext.View.Position, position) < ViewDistance * ViewDistance)) { - RendererPointLightData data; + RenderPointLightData data; data.Position = position; data.MinRoughness = MinRoughness; data.ShadowsDistance = ShadowsDistance; diff --git a/Source/Engine/Level/Actors/SkyLight.cpp b/Source/Engine/Level/Actors/SkyLight.cpp index 3bde95f98..67116fa13 100644 --- a/Source/Engine/Level/Actors/SkyLight.cpp +++ b/Source/Engine/Level/Actors/SkyLight.cpp @@ -113,7 +113,7 @@ void SkyLight::Draw(RenderContext& renderContext) && brightness > ZeroTolerance && (ViewDistance < ZeroTolerance || Vector3::DistanceSquared(renderContext.View.Position, position) < ViewDistance * ViewDistance)) { - RendererSkyLightData data; + RenderSkyLightData data; data.Position = position; data.Color = Color.ToFloat3() * (Color.A * brightness); data.VolumetricScatteringIntensity = VolumetricScatteringIntensity; diff --git a/Source/Engine/Level/Actors/SpotLight.cpp b/Source/Engine/Level/Actors/SpotLight.cpp index d51df584b..a80c012a5 100644 --- a/Source/Engine/Level/Actors/SpotLight.cpp +++ b/Source/Engine/Level/Actors/SpotLight.cpp @@ -139,7 +139,7 @@ void SpotLight::Draw(RenderContext& renderContext) && outerConeAngle > ZeroTolerance && (ViewDistance < ZeroTolerance || Vector3::DistanceSquared(renderContext.View.Position, position) < ViewDistance * ViewDistance)) { - RendererSpotLightData data; + RenderSpotLightData data; data.Position = position; data.MinRoughness = MinRoughness; data.ShadowsDistance = ShadowsDistance; diff --git a/Source/Engine/Particles/Graph/CPU/ParticleEmitterGraph.CPU.cpp b/Source/Engine/Particles/Graph/CPU/ParticleEmitterGraph.CPU.cpp index d6ba5cea3..9c847fd91 100644 --- a/Source/Engine/Particles/Graph/CPU/ParticleEmitterGraph.CPU.cpp +++ b/Source/Engine/Particles/Graph/CPU/ParticleEmitterGraph.CPU.cpp @@ -388,7 +388,7 @@ void ParticleEmitterGraphCPUExecutor::Draw(ParticleEmitter* emitter, ParticleEff const auto module = emitter->Graph.LightModules[moduleIndex]; ASSERT(module->TypeID == 401); - RendererPointLightData lightData; + RenderPointLightData lightData; lightData.MinRoughness = 0.04f; lightData.ShadowsDistance = 2000.0f; lightData.ShadowsStrength = 1.0f; diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index 577d58255..32474a143 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -39,7 +39,7 @@ namespace CriticalSection MemPoolLocker; } -void RendererDirectionalLightData::SetupLightData(LightData* data, bool useShadow) const +void RenderDirectionalLightData::SetupLightData(LightData* data, bool useShadow) const { data->SpotAngles.X = -2.0f; data->SpotAngles.Y = 1.0f; @@ -56,7 +56,7 @@ void RendererDirectionalLightData::SetupLightData(LightData* data, bool useShado data->RadiusInv = 0; } -void RendererSpotLightData::SetupLightData(LightData* data, bool useShadow) const +void RenderSpotLightData::SetupLightData(LightData* data, bool useShadow) const { data->SpotAngles.X = CosOuterCone; data->SpotAngles.Y = InvCosConeDifference; @@ -73,7 +73,7 @@ void RendererSpotLightData::SetupLightData(LightData* data, bool useShadow) cons data->RadiusInv = 1.0f / Radius; } -void RendererPointLightData::SetupLightData(LightData* data, bool useShadow) const +void RenderPointLightData::SetupLightData(LightData* data, bool useShadow) const { data->SpotAngles.X = -2.0f; data->SpotAngles.Y = 1.0f; @@ -90,7 +90,7 @@ void RendererPointLightData::SetupLightData(LightData* data, bool useShadow) con data->RadiusInv = 1.0f / Radius; } -void RendererSkyLightData::SetupLightData(LightData* data, bool useShadow) const +void RenderSkyLightData::SetupLightData(LightData* data, bool useShadow) const { data->SpotAngles.X = AdditiveColor.X; data->SpotAngles.Y = AdditiveColor.Y; diff --git a/Source/Engine/Renderer/RenderList.h b/Source/Engine/Renderer/RenderList.h index 8d90be2e4..74b4bdf3d 100644 --- a/Source/Engine/Renderer/RenderList.h +++ b/Source/Engine/Renderer/RenderList.h @@ -22,8 +22,10 @@ class CubeTexture; struct RenderContext; struct RenderContextBatch; -struct RendererDirectionalLightData +struct RenderLightData { + Guid ID; + Float3 Position; float MinRoughness; @@ -36,49 +38,35 @@ struct RendererDirectionalLightData float ShadowsNormalOffsetScale; float ShadowsDepthBias; float ShadowsSharpness; - float VolumetricScatteringIntensity; + float ShadowsDistance; StaticFlags StaticFlags; + ShadowsCastingMode ShadowsMode; float IndirectLightingIntensity; int16 ShadowDataIndex = -1; uint8 CastVolumetricShadow : 1; uint8 RenderedVolumetricFog : 1; - float ShadowsDistance; + float VolumetricScatteringIntensity; + float ContactShadowsLength; +}; + +struct RenderDirectionalLightData : RenderLightData +{ + PartitionMode PartitionMode; int32 CascadeCount; + float Cascade1Spacing; float Cascade2Spacing; float Cascade3Spacing; float Cascade4Spacing; - PartitionMode PartitionMode; - float ContactShadowsLength; - ShadowsCastingMode ShadowsMode; - - Guid ID; - void SetupLightData(LightData* data, bool useShadow) const; }; -struct RendererSpotLightData +struct RenderSpotLightData : RenderLightData { - Float3 Position; - float MinRoughness; - - Float3 Color; - float ShadowsStrength; - - Float3 Direction; - float ShadowsFadeDistance; - - float ShadowsNormalOffsetScale; - float ShadowsDepthBias; - float ShadowsSharpness; - float VolumetricScatteringIntensity; - - float ShadowsDistance; float Radius; - float FallOffExponent; float SourceRadius; Float3 UpVector; @@ -86,77 +74,34 @@ struct RendererSpotLightData float CosOuterCone; float InvCosConeDifference; - float ContactShadowsLength; - float IndirectLightingIntensity; - ShadowsCastingMode ShadowsMode; - - StaticFlags StaticFlags; - int16 ShadowDataIndex = -1; - uint8 CastVolumetricShadow : 1; - uint8 RenderedVolumetricFog : 1; + float FallOffExponent; uint8 UseInverseSquaredFalloff : 1; GPUTexture* IESTexture; - Guid ID; void SetupLightData(LightData* data, bool useShadow) const; }; -struct RendererPointLightData +struct RenderPointLightData : RenderLightData { - Float3 Position; - float MinRoughness; - - Float3 Color; - float ShadowsStrength; - - Float3 Direction; - float ShadowsFadeDistance; - - float ShadowsNormalOffsetScale; - float ShadowsDepthBias; - float ShadowsSharpness; - float VolumetricScatteringIntensity; - - float ShadowsDistance; float Radius; - float FallOffExponent; float SourceRadius; + float FallOffExponent; float SourceLength; - float ContactShadowsLength; - float IndirectLightingIntensity; - ShadowsCastingMode ShadowsMode; - - StaticFlags StaticFlags; - int16 ShadowDataIndex = -1; - uint8 CastVolumetricShadow : 1; - uint8 RenderedVolumetricFog : 1; uint8 UseInverseSquaredFalloff : 1; GPUTexture* IESTexture; - Guid ID; void SetupLightData(LightData* data, bool useShadow) const; }; -struct RendererSkyLightData +struct RenderSkyLightData : RenderLightData { - Float3 Position; - float VolumetricScatteringIntensity; - - Float3 Color; + Float3 AdditiveColor; float Radius; - Float3 AdditiveColor; - float IndirectLightingIntensity; - - StaticFlags StaticFlags; - uint8 CastVolumetricShadow : 1; - uint8 RenderedVolumetricFog : 1; - CubeTexture* Image; - Guid ID; void SetupLightData(LightData* data, bool useShadow) const; }; @@ -318,22 +263,22 @@ public: /// /// Light pass members - directional lights /// - Array DirectionalLights; + Array DirectionalLights; /// /// Light pass members - point lights /// - Array PointLights; + Array PointLights; /// /// Light pass members - spot lights /// - Array SpotLights; + Array SpotLights; /// /// Light pass members - sky lights /// - Array SkyLights; + Array SkyLights; /// /// Environment probes to use for rendering reflections diff --git a/Source/Engine/Renderer/ShadowsPass.cpp b/Source/Engine/Renderer/ShadowsPass.cpp index ebbdfed31..99246bcce 100644 --- a/Source/Engine/Renderer/ShadowsPass.cpp +++ b/Source/Engine/Renderer/ShadowsPass.cpp @@ -219,7 +219,7 @@ void ShadowsPass::SetupRenderContext(RenderContext& renderContext, RenderContext shadowContext.Task = renderContext.Task; } -void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RendererDirectionalLightData& light) +void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderDirectionalLightData& light) { const RenderView& view = renderContext.View; auto mainCache = renderContext.List; @@ -453,7 +453,7 @@ void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& r shadowData.Constants.CascadeSplits = view.Near + Float4(cascadeSplits) * cameraRange; } -void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RendererPointLightData& light) +void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderPointLightData& light) { // Init shadow data light.ShadowDataIndex = _shadowData.Count(); @@ -493,7 +493,7 @@ void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& r shadowData.Constants.CascadeSplits = Float4::Zero; } -void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RendererSpotLightData& light) +void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderSpotLightData& light) { // Init shadow data light.ShadowDataIndex = _shadowData.Count(); @@ -585,7 +585,7 @@ void ShadowsPass::SetupShadows(RenderContext& renderContext, RenderContextBatch& } } -bool ShadowsPass::CanRenderShadow(const RenderContext& renderContext, const RendererPointLightData& light) +bool ShadowsPass::CanRenderShadow(const RenderContext& renderContext, const RenderPointLightData& light) { const Float3 lightPosition = light.Position; const float dstLightToView = Float3::Distance(lightPosition, renderContext.View.Position); @@ -597,7 +597,7 @@ bool ShadowsPass::CanRenderShadow(const RenderContext& renderContext, const Rend return fade > ZeroTolerance && _shadowMapFormat != PixelFormat::Unknown; } -bool ShadowsPass::CanRenderShadow(const RenderContext& renderContext, const RendererSpotLightData& light) +bool ShadowsPass::CanRenderShadow(const RenderContext& renderContext, const RenderSpotLightData& light) { const Float3 lightPosition = light.Position; const float dstLightToView = Float3::Distance(lightPosition, renderContext.View.Position); @@ -609,12 +609,12 @@ bool ShadowsPass::CanRenderShadow(const RenderContext& renderContext, const Rend return fade > ZeroTolerance && _shadowMapFormat != PixelFormat::Unknown; } -bool ShadowsPass::CanRenderShadow(const RenderContext& renderContext, const RendererDirectionalLightData& light) +bool ShadowsPass::CanRenderShadow(const RenderContext& renderContext, const RenderDirectionalLightData& light) { return _shadowMapFormat != PixelFormat::Unknown; } -void ShadowsPass::RenderShadow(RenderContextBatch& renderContextBatch, RendererPointLightData& light, GPUTextureView* shadowMask) +void ShadowsPass::RenderShadow(RenderContextBatch& renderContextBatch, RenderPointLightData& light, GPUTextureView* shadowMask) { if (light.ShadowDataIndex == -1) return; @@ -692,7 +692,7 @@ void ShadowsPass::RenderShadow(RenderContextBatch& renderContextBatch, RendererP VolumetricFogPass::Instance()->RenderLight(renderContext, context, light, _shadowMapCube->ViewArray(), sperLight.LightShadow); } -void ShadowsPass::RenderShadow(RenderContextBatch& renderContextBatch, RendererSpotLightData& light, GPUTextureView* shadowMask) +void ShadowsPass::RenderShadow(RenderContextBatch& renderContextBatch, RenderSpotLightData& light, GPUTextureView* shadowMask) { if (light.ShadowDataIndex == -1) return; @@ -770,7 +770,7 @@ void ShadowsPass::RenderShadow(RenderContextBatch& renderContextBatch, RendererS VolumetricFogPass::Instance()->RenderLight(renderContext, context, light, _shadowMapCube->View(faceIndex), sperLight.LightShadow); } -void ShadowsPass::RenderShadow(RenderContextBatch& renderContextBatch, RendererDirectionalLightData& light, int32 index, GPUTextureView* shadowMask) +void ShadowsPass::RenderShadow(RenderContextBatch& renderContextBatch, RenderDirectionalLightData& light, int32 index, GPUTextureView* shadowMask) { if (light.ShadowDataIndex == -1) return; diff --git a/Source/Engine/Renderer/ShadowsPass.h b/Source/Engine/Renderer/ShadowsPass.h index a811e44b1..e67abd184 100644 --- a/Source/Engine/Renderer/ShadowsPass.h +++ b/Source/Engine/Renderer/ShadowsPass.h @@ -107,7 +107,7 @@ public: /// The rendering context. /// The light. /// true if can render shadow for the specified light; otherwise, false. - bool CanRenderShadow(const RenderContext& renderContext, const RendererPointLightData& light); + bool CanRenderShadow(const RenderContext& renderContext, const RenderPointLightData& light); /// /// Determines whether can render shadow for the specified light. @@ -115,7 +115,7 @@ public: /// The rendering context. /// The light. /// true if can render shadow for the specified light; otherwise, false. - bool CanRenderShadow(const RenderContext& renderContext, const RendererSpotLightData& light); + bool CanRenderShadow(const RenderContext& renderContext, const RenderSpotLightData& light); /// /// Determines whether can render shadow for the specified light. @@ -123,7 +123,7 @@ public: /// The rendering context. /// The light. /// true if can render shadow for the specified light; otherwise, false. - bool CanRenderShadow(const RenderContext& renderContext, const RendererDirectionalLightData& light); + bool CanRenderShadow(const RenderContext& renderContext, const RenderDirectionalLightData& light); /// /// Renders the shadow mask for the given light. @@ -131,7 +131,7 @@ public: /// The rendering context batch. /// The light. /// The shadow mask (output). - void RenderShadow(RenderContextBatch& renderContextBatch, RendererPointLightData& light, GPUTextureView* shadowMask); + void RenderShadow(RenderContextBatch& renderContextBatch, RenderPointLightData& light, GPUTextureView* shadowMask); /// /// Renders the shadow mask for the given light. @@ -139,7 +139,7 @@ public: /// The rendering context batch. /// The light. /// The shadow mask (output). - void RenderShadow(RenderContextBatch& renderContextBatch, RendererSpotLightData& light, GPUTextureView* shadowMask); + void RenderShadow(RenderContextBatch& renderContextBatch, RenderSpotLightData& light, GPUTextureView* shadowMask); /// /// Renders the shadow mask for the given light. @@ -148,15 +148,15 @@ public: /// The light. /// The light index. /// The shadow mask (output). - void RenderShadow(RenderContextBatch& renderContextBatch, RendererDirectionalLightData& light, int32 index, GPUTextureView* shadowMask); + void RenderShadow(RenderContextBatch& renderContextBatch, RenderDirectionalLightData& light, int32 index, GPUTextureView* shadowMask); private: void updateShadowMapSize(); void SetupRenderContext(RenderContext& renderContext, RenderContext& shadowContext); - void SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RendererDirectionalLightData& light); - void SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RendererPointLightData& light); - void SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RendererSpotLightData& light); + void SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderDirectionalLightData& light); + void SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderPointLightData& light); + void SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderSpotLightData& light); #if COMPILE_WITH_DEV_ENV void OnShaderReloading(Asset* obj) diff --git a/Source/Engine/Renderer/VolumetricFogPass.cpp b/Source/Engine/Renderer/VolumetricFogPass.cpp index 130c64746..d94bd503b 100644 --- a/Source/Engine/Renderer/VolumetricFogPass.cpp +++ b/Source/Engine/Renderer/VolumetricFogPass.cpp @@ -387,7 +387,7 @@ void VolumetricFogPass::RenderRadialLight(RenderContext& renderContext, GPUConte } } -void VolumetricFogPass::RenderLight(RenderContext& renderContext, GPUContext* context, RendererPointLightData& light, GPUTextureView* shadowMap, LightShadowData& shadow) +void VolumetricFogPass::RenderLight(RenderContext& renderContext, GPUContext* context, RenderPointLightData& light, GPUTextureView* shadowMap, LightShadowData& shadow) { // Skip lights with no volumetric light influence or not casting volumetric shadow if (light.VolumetricScatteringIntensity <= ZeroTolerance || !light.CastVolumetricShadow) @@ -401,7 +401,7 @@ void VolumetricFogPass::RenderLight(RenderContext& renderContext, GPUContext* co context->UnBindSR(5); } -void VolumetricFogPass::RenderLight(RenderContext& renderContext, GPUContext* context, RendererSpotLightData& light, GPUTextureView* shadowMap, LightShadowData& shadow) +void VolumetricFogPass::RenderLight(RenderContext& renderContext, GPUContext* context, RenderSpotLightData& light, GPUTextureView* shadowMap, LightShadowData& shadow) { // Skip lights with no volumetric light influence or not casting volumetric shadow if (light.VolumetricScatteringIntensity <= ZeroTolerance || !light.CastVolumetricShadow) @@ -594,8 +594,8 @@ void VolumetricFogPass::Render(RenderContext& renderContext) GPUTextureView* localShadowedLightScattering = nullptr; { // Get lights to render - Array> pointLights; - Array> spotLights; + Array> pointLights; + Array> spotLights; for (int32 i = 0; i < renderContext.List->PointLights.Count(); i++) { const auto& light = renderContext.List->PointLights[i]; diff --git a/Source/Engine/Renderer/VolumetricFogPass.h b/Source/Engine/Renderer/VolumetricFogPass.h index 3f3384fd4..97eb89440 100644 --- a/Source/Engine/Renderer/VolumetricFogPass.h +++ b/Source/Engine/Renderer/VolumetricFogPass.h @@ -8,8 +8,8 @@ #include "GI/DynamicDiffuseGlobalIllumination.h" struct VolumetricFogOptions; -struct RendererSpotLightData; -struct RendererPointLightData; +struct RenderSpotLightData; +struct RenderPointLightData; /// /// Volumetric fog rendering service. @@ -156,7 +156,7 @@ public: /// The light. /// The shadow map. /// The light shadow data. - void RenderLight(RenderContext& renderContext, GPUContext* context, RendererPointLightData& light, GPUTextureView* shadowMap, LightShadowData& shadow); + void RenderLight(RenderContext& renderContext, GPUContext* context, RenderPointLightData& light, GPUTextureView* shadowMap, LightShadowData& shadow); /// /// Renders the light to the volumetric fog light scattering volume texture. Called by the light pass after shadow map rendering. Used by the shadows casting lights. @@ -166,7 +166,7 @@ public: /// The light. /// The shadow map. /// The light shadow data. - void RenderLight(RenderContext& renderContext, GPUContext* context, RendererSpotLightData& light, GPUTextureView* shadowMap, LightShadowData& shadow); + void RenderLight(RenderContext& renderContext, GPUContext* context, RenderSpotLightData& light, GPUTextureView* shadowMap, LightShadowData& shadow); /// /// Renders the volumetric fog (generates integrated light scattering 3D texture). Does nothing if feature is disabled or not supported. From cdbb2cc813a7d0760bbb69f1c60861f0610be579 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 25 Mar 2024 17:52:48 +0100 Subject: [PATCH 003/292] Refactor shader structures naming with a prefix --- .../Materials/MaterialShaderFeatures.h | 12 +++++----- .../Engine/Level/Actors/EnvironmentProbe.cpp | 2 +- Source/Engine/Level/Actors/EnvironmentProbe.h | 2 +- .../Level/Actors/ExponentialHeightFog.cpp | 6 ++--- .../Level/Actors/ExponentialHeightFog.h | 2 +- Source/Engine/Level/Actors/Sky.cpp | 6 ++--- Source/Engine/Level/Actors/Sky.h | 2 +- Source/Engine/Renderer/AmbientOcclusionPass.h | 2 +- Source/Engine/Renderer/AntiAliasing/TAA.cpp | 2 +- Source/Engine/Renderer/Config.h | 24 +++++++++---------- Source/Engine/Renderer/DrawCall.h | 2 +- Source/Engine/Renderer/GBufferPass.cpp | 4 ++-- Source/Engine/Renderer/GBufferPass.h | 2 +- .../GI/DynamicDiffuseGlobalIllumination.cpp | 2 +- .../Renderer/GI/GlobalSurfaceAtlasPass.cpp | 2 +- Source/Engine/Renderer/LightPass.cpp | 4 ++-- Source/Engine/Renderer/MotionBlurPass.cpp | 2 +- Source/Engine/Renderer/ReflectionsPass.h | 4 ++-- Source/Engine/Renderer/RenderList.cpp | 8 +++---- Source/Engine/Renderer/RenderList.h | 8 +++---- .../Renderer/ScreenSpaceReflectionsPass.cpp | 2 +- Source/Engine/Renderer/ShadowsPass.cpp | 6 ++--- Source/Engine/Renderer/ShadowsPass.h | 4 ++-- Source/Engine/Renderer/VolumetricFogPass.cpp | 6 ++--- Source/Engine/Renderer/VolumetricFogPass.h | 16 ++++++------- 25 files changed, 66 insertions(+), 66 deletions(-) diff --git a/Source/Engine/Graphics/Materials/MaterialShaderFeatures.h b/Source/Engine/Graphics/Materials/MaterialShaderFeatures.h index 0df2e9a2e..f48c6821c 100644 --- a/Source/Engine/Graphics/Materials/MaterialShaderFeatures.h +++ b/Source/Engine/Graphics/Materials/MaterialShaderFeatures.h @@ -27,14 +27,14 @@ struct ForwardShadingFeature : MaterialShaderFeature PACK_STRUCT(struct Data { - LightData DirectionalLight; - LightShadowData DirectionalLightShadow; - LightData SkyLight; - ProbeData EnvironmentProbe; - ExponentialHeightFogData ExponentialHeightFog; + ShaderLightData DirectionalLight; + ShaderLightShadowData DirectionalLightShadow; + ShaderLightData SkyLight; + ShaderEnvProbeData EnvironmentProbe; + ShaderExponentialHeightFogData ExponentialHeightFog; Float3 Dummy2; uint32 LocalLightsCount; - LightData LocalLights[MaxLocalLights]; + ShaderLightData LocalLights[MaxLocalLights]; }); static void Bind(MaterialShader::BindParameters& params, Span& cb, int32& srv); diff --git a/Source/Engine/Level/Actors/EnvironmentProbe.cpp b/Source/Engine/Level/Actors/EnvironmentProbe.cpp index d95d957a3..4584cfe14 100644 --- a/Source/Engine/Level/Actors/EnvironmentProbe.cpp +++ b/Source/Engine/Level/Actors/EnvironmentProbe.cpp @@ -61,7 +61,7 @@ bool EnvironmentProbe::IsUsingCustomProbe() const return _isUsingCustomProbe; } -void EnvironmentProbe::SetupProbeData(const RenderContext& renderContext, ProbeData* data) const +void EnvironmentProbe::SetupProbeData(const RenderContext& renderContext, ShaderEnvProbeData* data) const { const float radius = GetScaledRadius(); data->Data0 = Float4(GetPosition() - renderContext.View.Origin, 0); diff --git a/Source/Engine/Level/Actors/EnvironmentProbe.h b/Source/Engine/Level/Actors/EnvironmentProbe.h index ae64e9ce0..dd90a446c 100644 --- a/Source/Engine/Level/Actors/EnvironmentProbe.h +++ b/Source/Engine/Level/Actors/EnvironmentProbe.h @@ -95,7 +95,7 @@ public: /// /// Rendering context /// Packed probe data to set - void SetupProbeData(const RenderContext& renderContext, struct ProbeData* data) const; + void SetupProbeData(const RenderContext& renderContext, struct ShaderEnvProbeData* data) const; /// /// Gets the custom probe (null if using baked one or none). diff --git a/Source/Engine/Level/Actors/ExponentialHeightFog.cpp b/Source/Engine/Level/Actors/ExponentialHeightFog.cpp index 39febfaf8..d2ebb69b2 100644 --- a/Source/Engine/Level/Actors/ExponentialHeightFog.cpp +++ b/Source/Engine/Level/Actors/ExponentialHeightFog.cpp @@ -144,7 +144,7 @@ void ExponentialHeightFog::GetVolumetricFogOptions(VolumetricFogOptions& result) result.FogParameters = Float4(density, height, heightFalloff, 0.0f); } -void ExponentialHeightFog::GetExponentialHeightFogData(const RenderView& view, ExponentialHeightFogData& result) const +void ExponentialHeightFog::GetExponentialHeightFogData(const RenderView& view, ShaderExponentialHeightFogData& result) const { const float height = (float)GetPosition().Y; const float density = FogDensity / 1000.0f; @@ -180,8 +180,8 @@ void ExponentialHeightFog::GetExponentialHeightFogData(const RenderView& view, E } PACK_STRUCT(struct Data { - GBufferData GBuffer; - ExponentialHeightFogData ExponentialHeightFog; + ShaderGBufferData GBuffer; + ShaderExponentialHeightFogData ExponentialHeightFog; }); void ExponentialHeightFog::DrawFog(GPUContext* context, RenderContext& renderContext, GPUTextureView* output) diff --git a/Source/Engine/Level/Actors/ExponentialHeightFog.h b/Source/Engine/Level/Actors/ExponentialHeightFog.h index ee39c58e0..ea6d2adeb 100644 --- a/Source/Engine/Level/Actors/ExponentialHeightFog.h +++ b/Source/Engine/Level/Actors/ExponentialHeightFog.h @@ -155,7 +155,7 @@ public: // [IFogRenderer] void GetVolumetricFogOptions(VolumetricFogOptions& result) const override; - void GetExponentialHeightFogData(const RenderView& view, ExponentialHeightFogData& result) const override; + void GetExponentialHeightFogData(const RenderView& view, ShaderExponentialHeightFogData& result) const override; void DrawFog(GPUContext* context, RenderContext& renderContext, GPUTextureView* output) override; protected: diff --git a/Source/Engine/Level/Actors/Sky.cpp b/Source/Engine/Level/Actors/Sky.cpp index 674b5d1a9..103d37316 100644 --- a/Source/Engine/Level/Actors/Sky.cpp +++ b/Source/Engine/Level/Actors/Sky.cpp @@ -21,8 +21,8 @@ PACK_STRUCT(struct Data { Matrix WVP; Float3 ViewOffset; float Padding; - GBufferData GBuffer; - AtmosphericFogData Fog; + ShaderGBufferData GBuffer; + ShaderAtmosphericFogData Fog; }); Sky::Sky(const SpawnParams& params) @@ -52,7 +52,7 @@ Sky::~Sky() SAFE_DELETE_GPU_RESOURCE(_psFog); } -void Sky::InitConfig(AtmosphericFogData& config) const +void Sky::InitConfig(ShaderAtmosphericFogData& config) const { config.AtmosphericFogDensityScale = 1.0f; config.AtmosphericFogSunDiscScale = SunDiscScale; diff --git a/Source/Engine/Level/Actors/Sky.h b/Source/Engine/Level/Actors/Sky.h index a4ad40b7b..313843748 100644 --- a/Source/Engine/Level/Actors/Sky.h +++ b/Source/Engine/Level/Actors/Sky.h @@ -54,7 +54,7 @@ private: _psFog = nullptr; } #endif - void InitConfig(AtmosphericFogData& config) const; + void InitConfig(ShaderAtmosphericFogData& config) const; public: // [Actor] diff --git a/Source/Engine/Renderer/AmbientOcclusionPass.h b/Source/Engine/Renderer/AmbientOcclusionPass.h index 3e44f0d28..9dacae18b 100644 --- a/Source/Engine/Renderer/AmbientOcclusionPass.h +++ b/Source/Engine/Renderer/AmbientOcclusionPass.h @@ -20,7 +20,7 @@ private: // Packed shader constant buffer structure (this MUST match shader code) PACK_STRUCT(struct ASSAOConstants { - GBufferData GBuffer; + ShaderGBufferData GBuffer; Float2 ViewportPixelSize; Float2 HalfViewportPixelSize; diff --git a/Source/Engine/Renderer/AntiAliasing/TAA.cpp b/Source/Engine/Renderer/AntiAliasing/TAA.cpp index 38e38636a..4c6c899eb 100644 --- a/Source/Engine/Renderer/AntiAliasing/TAA.cpp +++ b/Source/Engine/Renderer/AntiAliasing/TAA.cpp @@ -19,7 +19,7 @@ PACK_STRUCT(struct Data float StationaryBlending; float MotionBlending; float Dummy0; - GBufferData GBuffer; + ShaderGBufferData GBuffer; }); bool TAA::Init() diff --git a/Source/Engine/Renderer/Config.h b/Source/Engine/Renderer/Config.h index fb6b210dd..b74cb20da 100644 --- a/Source/Engine/Renderer/Config.h +++ b/Source/Engine/Renderer/Config.h @@ -10,21 +10,21 @@ /// /// Structure that contains information about GBuffer for shaders. /// -struct GBufferData -{ +PACK_STRUCT(struct ShaderGBufferData + { Float4 ViewInfo; Float4 ScreenSize; Float3 ViewPos; float ViewFar; Matrix InvViewMatrix; Matrix InvProjectionMatrix; -}; + }); /// /// Structure that contains information about exponential height fog for shaders. /// -struct ExponentialHeightFogData -{ +PACK_STRUCT(struct ShaderExponentialHeightFogData + { Float3 FogInscatteringColor; float FogMinOpacity; @@ -43,13 +43,13 @@ struct ExponentialHeightFogData float VolumetricFogMaxDistance; float DirectionalInscatteringStartDistance; float StartDistance; -}; + }); /// /// Structure that contains information about atmosphere fog for shaders. /// -struct AtmosphericFogData -{ +PACK_STRUCT(struct ShaderAtmosphericFogData + { float AtmosphericFogDensityScale; float AtmosphericFogSunDiscScale; float AtmosphericFogDistanceScale; @@ -65,12 +65,12 @@ struct AtmosphericFogData Float3 AtmosphericFogSunColor; float AtmosphericFogDensityOffset; -}; + }); /// /// Structure that contains information about light for shaders. /// -PACK_STRUCT(struct LightData { +PACK_STRUCT(struct ShaderLightData { Float2 SpotAngles; float SourceRadius; float SourceLength; @@ -89,7 +89,7 @@ PACK_STRUCT(struct LightData { /// /// Structure that contains information about light for shaders. /// -PACK_STRUCT(struct LightShadowData { +PACK_STRUCT(struct ShaderLightShadowData { Float2 ShadowMapSize; float Sharpness; float Fade; @@ -104,7 +104,7 @@ PACK_STRUCT(struct LightShadowData { /// /// Packed env probe data /// -PACK_STRUCT(struct ProbeData { +PACK_STRUCT(struct ShaderEnvProbeData { Float4 Data0; // x - Position.x, y - Position.y, z - Position.z, w - unused Float4 Data1; // x - Radius , y - 1 / Radius, z - Brightness, w - unused }); diff --git a/Source/Engine/Renderer/DrawCall.h b/Source/Engine/Renderer/DrawCall.h index b77541fca..448d1361e 100644 --- a/Source/Engine/Renderer/DrawCall.h +++ b/Source/Engine/Renderer/DrawCall.h @@ -86,7 +86,7 @@ public: /// /// The rendering view. /// The result. - virtual void GetExponentialHeightFogData(const RenderView& view, ExponentialHeightFogData& result) const = 0; + virtual void GetExponentialHeightFogData(const RenderView& view, ShaderExponentialHeightFogData& result) const = 0; /// /// Draw fog using GBuffer inputs diff --git a/Source/Engine/Renderer/GBufferPass.cpp b/Source/Engine/Renderer/GBufferPass.cpp index c962160db..f196a4ee9 100644 --- a/Source/Engine/Renderer/GBufferPass.cpp +++ b/Source/Engine/Renderer/GBufferPass.cpp @@ -22,7 +22,7 @@ #include "Engine/Engine/Engine.h" PACK_STRUCT(struct GBufferPassData{ - GBufferData GBuffer; + ShaderGBufferData GBuffer; Float3 Dummy0; int32 ViewMode; }); @@ -393,7 +393,7 @@ bool GBufferPass::IsDebugView(ViewMode mode) } } -void GBufferPass::SetInputs(const RenderView& view, GBufferData& gBuffer) +void GBufferPass::SetInputs(const RenderView& view, ShaderGBufferData& gBuffer) { // GBuffer params: // ViewInfo : x-1/Projection[0,0] y-1/Projection[1,1] z-(Far / (Far - Near) w-(-Far * Near) / (Far - Near) / Far) diff --git a/Source/Engine/Renderer/GBufferPass.h b/Source/Engine/Renderer/GBufferPass.h index 660ff22db..935fa91d5 100644 --- a/Source/Engine/Renderer/GBufferPass.h +++ b/Source/Engine/Renderer/GBufferPass.h @@ -73,7 +73,7 @@ public: /// /// The rendering view. /// GBuffer input to setup - static void SetInputs(const RenderView& view, GBufferData& gBuffer); + static void SetInputs(const RenderView& view, ShaderGBufferData& gBuffer); private: diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp index c810c90aa..56d1194d5 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp @@ -46,7 +46,7 @@ PACK_STRUCT(struct Data0 DynamicDiffuseGlobalIlluminationPass::ConstantsData DDGI; GlobalSignDistanceFieldPass::ConstantsData GlobalSDF; GlobalSurfaceAtlasPass::ConstantsData GlobalSurfaceAtlas; - GBufferData GBuffer; + ShaderGBufferData GBuffer; float Padding0; float ProbesDistanceLimit; float ResetBlend; diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp index fb0faa27a..49eca6604 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp @@ -54,7 +54,7 @@ PACK_STRUCT(struct Data0 GlobalSignDistanceFieldPass::ConstantsData GlobalSDF; GlobalSurfaceAtlasPass::ConstantsData GlobalSurfaceAtlas; DynamicDiffuseGlobalIlluminationPass::ConstantsData DDGI; - LightData Light; + ShaderLightData Light; }); PACK_STRUCT(struct AtlasTileVertex diff --git a/Source/Engine/Renderer/LightPass.cpp b/Source/Engine/Renderer/LightPass.cpp index d3927b2cf..f9e7038d7 100644 --- a/Source/Engine/Renderer/LightPass.cpp +++ b/Source/Engine/Renderer/LightPass.cpp @@ -13,12 +13,12 @@ #include "Engine/Graphics/RenderTask.h" PACK_STRUCT(struct PerLight{ - LightData Light; + ShaderLightData Light; Matrix WVP; }); PACK_STRUCT(struct PerFrame{ - GBufferData GBuffer; + ShaderGBufferData GBuffer; }); String LightPass::ToString() const diff --git a/Source/Engine/Renderer/MotionBlurPass.cpp b/Source/Engine/Renderer/MotionBlurPass.cpp index 64f0aa80f..a195bb14c 100644 --- a/Source/Engine/Renderer/MotionBlurPass.cpp +++ b/Source/Engine/Renderer/MotionBlurPass.cpp @@ -18,7 +18,7 @@ #include "Engine/Engine/Time.h" PACK_STRUCT(struct Data { - GBufferData GBuffer; + ShaderGBufferData GBuffer; Matrix CurrentVP; Matrix PreviousVP; Float4 TemporalAAJitter; diff --git a/Source/Engine/Renderer/ReflectionsPass.h b/Source/Engine/Renderer/ReflectionsPass.h index 2b4276f07..a88a4eec4 100644 --- a/Source/Engine/Renderer/ReflectionsPass.h +++ b/Source/Engine/Renderer/ReflectionsPass.h @@ -18,9 +18,9 @@ class ReflectionsPass : public RendererPass private: PACK_STRUCT(struct Data { - ProbeData PData; + ShaderEnvProbeData PData; Matrix WVP; - GBufferData GBuffer; + ShaderGBufferData GBuffer; }); AssetReference _shader; diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index 32474a143..1d9ca639a 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -39,7 +39,7 @@ namespace CriticalSection MemPoolLocker; } -void RenderDirectionalLightData::SetupLightData(LightData* data, bool useShadow) const +void RenderDirectionalLightData::SetupLightData(ShaderLightData* data, bool useShadow) const { data->SpotAngles.X = -2.0f; data->SpotAngles.Y = 1.0f; @@ -56,7 +56,7 @@ void RenderDirectionalLightData::SetupLightData(LightData* data, bool useShadow) data->RadiusInv = 0; } -void RenderSpotLightData::SetupLightData(LightData* data, bool useShadow) const +void RenderSpotLightData::SetupLightData(ShaderLightData* data, bool useShadow) const { data->SpotAngles.X = CosOuterCone; data->SpotAngles.Y = InvCosConeDifference; @@ -73,7 +73,7 @@ void RenderSpotLightData::SetupLightData(LightData* data, bool useShadow) const data->RadiusInv = 1.0f / Radius; } -void RenderPointLightData::SetupLightData(LightData* data, bool useShadow) const +void RenderPointLightData::SetupLightData(ShaderLightData* data, bool useShadow) const { data->SpotAngles.X = -2.0f; data->SpotAngles.Y = 1.0f; @@ -90,7 +90,7 @@ void RenderPointLightData::SetupLightData(LightData* data, bool useShadow) const data->RadiusInv = 1.0f / Radius; } -void RenderSkyLightData::SetupLightData(LightData* data, bool useShadow) const +void RenderSkyLightData::SetupLightData(ShaderLightData* data, bool useShadow) const { data->SpotAngles.X = AdditiveColor.X; data->SpotAngles.Y = AdditiveColor.Y; diff --git a/Source/Engine/Renderer/RenderList.h b/Source/Engine/Renderer/RenderList.h index 74b4bdf3d..4dabe669d 100644 --- a/Source/Engine/Renderer/RenderList.h +++ b/Source/Engine/Renderer/RenderList.h @@ -61,7 +61,7 @@ struct RenderDirectionalLightData : RenderLightData float Cascade3Spacing; float Cascade4Spacing; - void SetupLightData(LightData* data, bool useShadow) const; + void SetupLightData(ShaderLightData* data, bool useShadow) const; }; struct RenderSpotLightData : RenderLightData @@ -79,7 +79,7 @@ struct RenderSpotLightData : RenderLightData GPUTexture* IESTexture; - void SetupLightData(LightData* data, bool useShadow) const; + void SetupLightData(ShaderLightData* data, bool useShadow) const; }; struct RenderPointLightData : RenderLightData @@ -93,7 +93,7 @@ struct RenderPointLightData : RenderLightData GPUTexture* IESTexture; - void SetupLightData(LightData* data, bool useShadow) const; + void SetupLightData(ShaderLightData* data, bool useShadow) const; }; struct RenderSkyLightData : RenderLightData @@ -103,7 +103,7 @@ struct RenderSkyLightData : RenderLightData CubeTexture* Image; - void SetupLightData(LightData* data, bool useShadow) const; + void SetupLightData(ShaderLightData* data, bool useShadow) const; }; /// diff --git a/Source/Engine/Renderer/ScreenSpaceReflectionsPass.cpp b/Source/Engine/Renderer/ScreenSpaceReflectionsPass.cpp index 39efc70dd..b3d0ea6ba 100644 --- a/Source/Engine/Renderer/ScreenSpaceReflectionsPass.cpp +++ b/Source/Engine/Renderer/ScreenSpaceReflectionsPass.cpp @@ -27,7 +27,7 @@ PACK_STRUCT(struct Data { - GBufferData GBuffer; + ShaderGBufferData GBuffer; float MaxColorMiplevel; float TraceSizeMax; diff --git a/Source/Engine/Renderer/ShadowsPass.cpp b/Source/Engine/Renderer/ShadowsPass.cpp index 99246bcce..e7da47363 100644 --- a/Source/Engine/Renderer/ShadowsPass.cpp +++ b/Source/Engine/Renderer/ShadowsPass.cpp @@ -19,9 +19,9 @@ #define PointLight_NearPlane 10.0f PACK_STRUCT(struct Data{ - GBufferData GBuffer; - LightData Light; - LightShadowData LightShadow; + ShaderGBufferData GBuffer; + ShaderLightData Light; + ShaderLightShadowData LightShadow; Matrix WVP; Matrix ViewProjectionMatrix; Float2 Dummy0; diff --git a/Source/Engine/Renderer/ShadowsPass.h b/Source/Engine/Renderer/ShadowsPass.h index e67abd184..fd96900ca 100644 --- a/Source/Engine/Renderer/ShadowsPass.h +++ b/Source/Engine/Renderer/ShadowsPass.h @@ -47,7 +47,7 @@ private: int32 ContextIndex; int32 ContextCount; bool BlendCSM; - LightShadowData Constants; + ShaderLightShadowData Constants; }; // Shader stuff @@ -91,7 +91,7 @@ public: // TODO: use full scene shadow map atlas with dynamic slots allocation int32 LastDirLightIndex = -1; GPUTextureView* LastDirLightShadowMap = nullptr; - LightShadowData LastDirLight; + ShaderLightShadowData LastDirLight; public: void Prepare(); diff --git a/Source/Engine/Renderer/VolumetricFogPass.cpp b/Source/Engine/Renderer/VolumetricFogPass.cpp index d94bd503b..0e1b10747 100644 --- a/Source/Engine/Renderer/VolumetricFogPass.cpp +++ b/Source/Engine/Renderer/VolumetricFogPass.cpp @@ -263,7 +263,7 @@ GPUTextureView* VolumetricFogPass::GetLocalShadowedLightScattering(RenderContext } template -void VolumetricFogPass::RenderRadialLight(RenderContext& renderContext, GPUContext* context, T& light, LightShadowData& shadow) +void VolumetricFogPass::RenderRadialLight(RenderContext& renderContext, GPUContext* context, T& light, ShaderLightShadowData& shadow) { // Prepare VolumetricFogOptions options; @@ -387,7 +387,7 @@ void VolumetricFogPass::RenderRadialLight(RenderContext& renderContext, GPUConte } } -void VolumetricFogPass::RenderLight(RenderContext& renderContext, GPUContext* context, RenderPointLightData& light, GPUTextureView* shadowMap, LightShadowData& shadow) +void VolumetricFogPass::RenderLight(RenderContext& renderContext, GPUContext* context, RenderPointLightData& light, GPUTextureView* shadowMap, ShaderLightShadowData& shadow) { // Skip lights with no volumetric light influence or not casting volumetric shadow if (light.VolumetricScatteringIntensity <= ZeroTolerance || !light.CastVolumetricShadow) @@ -401,7 +401,7 @@ void VolumetricFogPass::RenderLight(RenderContext& renderContext, GPUContext* co context->UnBindSR(5); } -void VolumetricFogPass::RenderLight(RenderContext& renderContext, GPUContext* context, RenderSpotLightData& light, GPUTextureView* shadowMap, LightShadowData& shadow) +void VolumetricFogPass::RenderLight(RenderContext& renderContext, GPUContext* context, RenderSpotLightData& light, GPUTextureView* shadowMap, ShaderLightShadowData& shadow) { // Skip lights with no volumetric light influence or not casting volumetric shadow if (light.VolumetricScatteringIntensity <= ZeroTolerance || !light.CastVolumetricShadow) diff --git a/Source/Engine/Renderer/VolumetricFogPass.h b/Source/Engine/Renderer/VolumetricFogPass.h index 97eb89440..cbec860fc 100644 --- a/Source/Engine/Renderer/VolumetricFogPass.h +++ b/Source/Engine/Renderer/VolumetricFogPass.h @@ -36,7 +36,7 @@ private: }); PACK_STRUCT(struct Data { - GBufferData GBuffer; + ShaderGBufferData GBuffer; Float3 GlobalAlbedo; float GlobalExtinctionScale; @@ -62,8 +62,8 @@ private: Float4 FrameJitterOffsets[8]; - LightData DirectionalLight; - LightShadowData DirectionalLightShadow; + ShaderLightData DirectionalLight; + ShaderLightShadowData DirectionalLightShadow; SkyLightData SkyLight; DynamicDiffuseGlobalIlluminationPass::ConstantsData DDGI; }); @@ -76,8 +76,8 @@ private: Float4 ViewSpaceBoundingSphere; Matrix ViewToVolumeClip; - LightData LocalLight; - LightShadowData LocalLightShadow; + ShaderLightData LocalLight; + ShaderLightShadowData LocalLightShadow; }); // Shader stuff @@ -156,7 +156,7 @@ public: /// The light. /// The shadow map. /// The light shadow data. - void RenderLight(RenderContext& renderContext, GPUContext* context, RenderPointLightData& light, GPUTextureView* shadowMap, LightShadowData& shadow); + void RenderLight(RenderContext& renderContext, GPUContext* context, RenderPointLightData& light, GPUTextureView* shadowMap, ShaderLightShadowData& shadow); /// /// Renders the light to the volumetric fog light scattering volume texture. Called by the light pass after shadow map rendering. Used by the shadows casting lights. @@ -166,7 +166,7 @@ public: /// The light. /// The shadow map. /// The light shadow data. - void RenderLight(RenderContext& renderContext, GPUContext* context, RenderSpotLightData& light, GPUTextureView* shadowMap, LightShadowData& shadow); + void RenderLight(RenderContext& renderContext, GPUContext* context, RenderSpotLightData& light, GPUTextureView* shadowMap, ShaderLightShadowData& shadow); /// /// Renders the volumetric fog (generates integrated light scattering 3D texture). Does nothing if feature is disabled or not supported. @@ -180,7 +180,7 @@ private: GPUTextureView* GetLocalShadowedLightScattering(RenderContext& renderContext, GPUContext* context, VolumetricFogOptions& options) const; void InitCircleBuffer(); template - void RenderRadialLight(RenderContext& renderContext, GPUContext* context, T& light, LightShadowData& shadow); + void RenderRadialLight(RenderContext& renderContext, GPUContext* context, T& light, ShaderLightShadowData& shadow); template void RenderRadialLight(RenderContext& renderContext, GPUContext* context, RenderView& view, VolumetricFogOptions& options, T& light, PerLight& perLight, GPUConstantBuffer* cb1); #if COMPILE_WITH_DEV_ENV From 2dfb1058b2d041d8639ba01ad9f3967a2e990ab9 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 26 Mar 2024 11:29:01 +0100 Subject: [PATCH 004/292] Optimize world matrix storage for drawable objects to use `Matrix3x4` instead of full matrix --- .../Editor/MaterialTemplates/Particle.shader | 26 ++++++------ .../Editor/MaterialTemplates/Surface.shader | 14 +++---- .../Editor/MaterialTemplates/Terrain.shader | 13 +++--- .../MaterialTemplates/VolumeParticle.shader | 12 +++--- Source/Engine/Core/Math/Matrix.cpp | 37 +++++++++++++++++ Source/Engine/Core/Math/Matrix3x4.h | 41 +++---------------- .../Materials/DeferredMaterialShader.cpp | 9 ++-- .../Materials/ForwardMaterialShader.cpp | 9 ++-- .../Graphics/Materials/MaterialShader.h | 2 +- .../Materials/ParticleMaterialShader.cpp | 11 +++-- .../Materials/TerrainMaterialShader.cpp | 5 ++- .../VolumeParticleMaterialShader.cpp | 11 +++-- .../Renderer/GlobalSignDistanceFieldPass.cpp | 25 +++++------ Source/Shaders/Common.hlsl | 6 +++ Source/Shaders/GlobalSignDistanceField.shader | 18 ++++---- 15 files changed, 134 insertions(+), 105 deletions(-) diff --git a/Content/Editor/MaterialTemplates/Particle.shader b/Content/Editor/MaterialTemplates/Particle.shader index 3f6b03a58..5cdbc40a5 100644 --- a/Content/Editor/MaterialTemplates/Particle.shader +++ b/Content/Editor/MaterialTemplates/Particle.shader @@ -26,7 +26,7 @@ struct RibbonInput // Primary constant buffer (with additional material parameters) META_CB_BEGIN(0, Data) -float4x4 WorldMatrix; +float4x3 WorldMatrix; uint SortedIndicesOffset; float PerInstanceRandom; int ParticleStride; @@ -45,7 +45,7 @@ int RibbonWidthOffset; int RibbonTwistOffset; int RibbonFacingVectorOffset; uint RibbonSegmentCount; -float4x4 WorldMatrixInverseTransposed; +float4x3 WorldMatrixInverseTransposed; @1META_CB_END // Particles attributes buffer @@ -138,7 +138,7 @@ MaterialInput GetMaterialInput(PixelInput input) #if USE_INSTANCING #define GetInstanceTransform(input) float4x4(float4(input.InstanceTransform1.xyz, 0.0f), float4(input.InstanceTransform2.xyz, 0.0f), float4(input.InstanceTransform3.xyz, 0.0f), float4(input.InstanceOrigin.xyz, 1.0f)) #else -#define GetInstanceTransform(input) WorldMatrix; +#define GetInstanceTransform(input) ToMatrix4x4(WorldMatrix); #endif // Removes the scale vector from the local to world transformation matrix (supports instancing) @@ -264,12 +264,12 @@ float4 GetParticleVec4(uint particleIndex, int offset) float3 TransformParticlePosition(float3 input) { - return mul(float4(input, 1.0f), WorldMatrix).xyz; + return mul(float4(input, 1.0f), ToMatrix4x4(WorldMatrix)).xyz; } float3 TransformParticleVector(float3 input) { - return mul(float4(input, 0.0f), WorldMatrixInverseTransposed).xyz; + return mul(float4(input, 0.0f), ToMatrix4x4(WorldMatrixInverseTransposed)).xyz; } @8 @@ -333,7 +333,7 @@ VertexOutput VS_Sprite(SpriteInput input, uint particleIndex : SV_InstanceID) float2 spriteSize = GetParticleVec2(particleIndex, SpriteSizeOffset); int spriteFacingMode = SpriteFacingModeOffset != -1 ? GetParticleInt(particleIndex, SpriteFacingModeOffset) : -1; - float4x4 world = WorldMatrix; + float4x4 world = ToMatrix4x4(WorldMatrix); float3x3 eulerMatrix = EulerMatrix(radians(particleRotation)); float3x3 viewRot = transpose((float3x3)ViewMatrix); float3 position = mul(float4(particlePosition, 1), world).xyz; @@ -463,11 +463,12 @@ VertexOutput VS_Model(ModelInput input, uint particleIndex : SV_InstanceID) } // Read particle data + float4x4 worldMatrix = ToMatrix4x4(WorldMatrix); float3 particlePosition = GetParticleVec3(particleIndex, PositionOffset); float3 particleScale = GetParticleVec3(particleIndex, ScaleOffset); float3 particleRotation = GetParticleVec3(particleIndex, RotationOffset); int modelFacingMode = ModelFacingModeOffset != -1 ? GetParticleInt(particleIndex, ModelFacingModeOffset) : -1; - float3 position = mul(float4(particlePosition, 1), WorldMatrix).xyz; + float3 position = mul(float4(particlePosition, 1), worldMatrix).xyz; // Compute final vertex position in the world float3x3 eulerMatrix = EulerMatrix(radians(particleRotation)); @@ -506,7 +507,7 @@ VertexOutput VS_Model(ModelInput input, uint particleIndex : SV_InstanceID) world = mul(world, scaleMatrix); } world = transpose(world); - world = mul(world, WorldMatrix); + world = mul(world, worldMatrix); // Calculate the vertex position in world space output.WorldPosition = mul(float4(input.Position, 1), world).xyz; @@ -520,12 +521,12 @@ VertexOutput VS_Model(ModelInput input, uint particleIndex : SV_InstanceID) #if USE_VERTEX_COLOR output.VertexColor = input.Color; #endif - output.InstanceOrigin = WorldMatrix[3].xyz; + output.InstanceOrigin = worldMatrix[3].xyz; output.InstanceParams = PerInstanceRandom; // Calculate tanget space to world space transformation matrix for unit vectors half3x3 tangentToLocal = CalcTangentToLocal(input); - half3x3 tangentToWorld = CalcTangentToWorld(WorldMatrix, tangentToLocal); + half3x3 tangentToWorld = CalcTangentToWorld(worldMatrix, tangentToLocal); output.TBN = tangentToWorld; // Get material input params if need to evaluate any material property @@ -625,12 +626,13 @@ VertexOutput VS_Ribbon(RibbonInput input, uint vertexIndex : SV_VertexID) #if USE_VERTEX_COLOR output.VertexColor = 1; #endif - output.InstanceOrigin = WorldMatrix[3].xyz; + float4x4 world = ToMatrix4x4(WorldMatrix); + output.InstanceOrigin = world[3].xyz; output.InstanceParams = PerInstanceRandom; // Calculate tanget space to world space transformation matrix for unit vectors half3x3 tangentToLocal = float3x3(tangentRight, tangentUp, cross(tangentRight, tangentUp)); - half3x3 tangentToWorld = CalcTangentToWorld(WorldMatrix, tangentToLocal); + half3x3 tangentToWorld = CalcTangentToWorld(world, tangentToLocal); output.TBN = tangentToWorld; // Get material input params if need to evaluate any material property diff --git a/Content/Editor/MaterialTemplates/Surface.shader b/Content/Editor/MaterialTemplates/Surface.shader index 1e8589ff2..f206d58fb 100644 --- a/Content/Editor/MaterialTemplates/Surface.shader +++ b/Content/Editor/MaterialTemplates/Surface.shader @@ -10,8 +10,8 @@ @7 // Primary constant buffer (with additional material parameters) META_CB_BEGIN(0, Data) -float4x4 WorldMatrix; -float4x4 PrevWorldMatrix; +float4x3 WorldMatrix; +float4x3 PrevWorldMatrix; float2 Dummy0; float LODDitherFactor; float PerInstanceRandom; @@ -171,7 +171,7 @@ MaterialInput GetMaterialInput(PixelInput input) #if USE_INSTANCING #define CalculateInstanceTransform(input) float4x4 world = GetInstanceTransform(input); output.Geometry.InstanceTransform1 = input.InstanceTransform1.xyz; output.Geometry.InstanceTransform2 = input.InstanceTransform2.xyz; output.Geometry.InstanceTransform3 = input.InstanceTransform3.xyz; #else -#define CalculateInstanceTransform(input) float4x4 world = WorldMatrix; output.Geometry.InstanceTransform1 = world[0].xyz; output.Geometry.InstanceTransform2 = world[1].xyz; output.Geometry.InstanceTransform3 = world[2].xyz; +#define CalculateInstanceTransform(input) float4x4 world = ToMatrix4x4(WorldMatrix); output.Geometry.InstanceTransform1 = world[0].xyz; output.Geometry.InstanceTransform2 = world[1].xyz; output.Geometry.InstanceTransform3 = world[2].xyz; #endif // Removes the scale vector from the local to world transformation matrix (supports instancing) @@ -328,7 +328,7 @@ VertexOutput VS(ModelInput input) // Compute world space vertex position CalculateInstanceTransform(input); output.Geometry.WorldPosition = mul(float4(input.Position.xyz, 1), world).xyz; - output.Geometry.PrevWorldPosition = mul(float4(input.Position.xyz, 1), PrevWorldMatrix).xyz; + output.Geometry.PrevWorldPosition = mul(float4(input.Position.xyz, 1), ToMatrix4x4(PrevWorldMatrix)).xyz; // Compute clip space position output.Position = mul(float4(output.Geometry.WorldPosition, 1), ViewProjectionMatrix); @@ -402,7 +402,7 @@ float4 VS_Depth(ModelInput_PosOnly input) : SV_Position #if USE_INSTANCING float4x4 world = GetInstanceTransform(input); #else - float4x4 world = WorldMatrix; + float4x4 world = ToMatrix4x4(WorldMatrix); #endif float3 worldPosition = mul(float4(input.Position.xyz, 1), world).xyz; float4 position = mul(float4(worldPosition, 1), ViewProjectionMatrix); @@ -511,9 +511,9 @@ VertexOutput VS_Skinned(ModelInput_Skinned input) output.Geometry.WorldPosition = mul(float4(position, 1), world).xyz; #if PER_BONE_MOTION_BLUR float3 prevPosition = SkinPrevPosition(input); - output.Geometry.PrevWorldPosition = mul(float4(prevPosition, 1), PrevWorldMatrix).xyz; + output.Geometry.PrevWorldPosition = mul(float4(prevPosition, 1), ToMatrix4x4(PrevWorldMatrix)).xyz; #else - output.Geometry.PrevWorldPosition = mul(float4(position, 1), PrevWorldMatrix).xyz; + output.Geometry.PrevWorldPosition = mul(float4(position, 1), ToMatrix4x4(PrevWorldMatrix)).xyz; #endif // Compute clip space position diff --git a/Content/Editor/MaterialTemplates/Terrain.shader b/Content/Editor/MaterialTemplates/Terrain.shader index 32395d583..71504f6ed 100644 --- a/Content/Editor/MaterialTemplates/Terrain.shader +++ b/Content/Editor/MaterialTemplates/Terrain.shader @@ -17,7 +17,7 @@ @7 // Primary constant buffer (with additional material parameters) META_CB_BEGIN(0, Data) -float4x4 WorldMatrix; +float4x3 WorldMatrix; float3 WorldInvScale; float WorldDeterminantSign; float PerInstanceRandom; @@ -194,7 +194,7 @@ float3 TransformViewVectorToWorld(MaterialInput input, float3 viewVector) // Transforms a vector from local space to world space float3 TransformLocalVectorToWorld(MaterialInput input, float3 localVector) { - float3x3 localToWorld = (float3x3)WorldMatrix; + float3x3 localToWorld = (float3x3)ToMatrix4x4(WorldMatrix); //localToWorld = RemoveScaleFromLocalToWorld(localToWorld); return mul(localVector, localToWorld); } @@ -202,7 +202,7 @@ float3 TransformLocalVectorToWorld(MaterialInput input, float3 localVector) // Transforms a vector from local space to world space float3 TransformWorldVectorToLocal(MaterialInput input, float3 worldVector) { - float3x3 localToWorld = (float3x3)WorldMatrix; + float3x3 localToWorld = (float3x3)ToMatrix4x4(WorldMatrix); //localToWorld = RemoveScaleFromLocalToWorld(localToWorld); return mul(localToWorld, worldVector); } @@ -210,7 +210,7 @@ float3 TransformWorldVectorToLocal(MaterialInput input, float3 worldVector) // Gets the current object position float3 GetObjectPosition(MaterialInput input) { - return WorldMatrix[3].xyz; + return ToMatrix4x4(WorldMatrix)[3].xyz; } // Gets the current object size @@ -365,7 +365,8 @@ VertexOutput VS(TerrainVertexInput input) float3 position = float3(positionXZ.x, height, positionXZ.y); // Compute world space vertex position - output.Geometry.WorldPosition = mul(float4(position, 1), WorldMatrix).xyz; + float4x4 worldMatrix = ToMatrix4x4(WorldMatrix); + output.Geometry.WorldPosition = mul(float4(position, 1), worldMatrix).xyz; // Compute clip space position output.Position = mul(float4(output.Geometry.WorldPosition, 1), ViewProjectionMatrix); @@ -389,7 +390,7 @@ VertexOutput VS(TerrainVertexInput input) // Compute world space normal vector float3x3 tangentToLocal = CalcTangentBasisFromWorldNormal(normal); - float3x3 tangentToWorld = CalcTangentToWorld(WorldMatrix, tangentToLocal); + float3x3 tangentToWorld = CalcTangentToWorld(worldMatrix, tangentToLocal); output.Geometry.WorldNormal = tangentToWorld[2]; // Get material input params if need to evaluate any material property diff --git a/Content/Editor/MaterialTemplates/VolumeParticle.shader b/Content/Editor/MaterialTemplates/VolumeParticle.shader index c21e7c3bb..3d182e0fd 100644 --- a/Content/Editor/MaterialTemplates/VolumeParticle.shader +++ b/Content/Editor/MaterialTemplates/VolumeParticle.shader @@ -13,8 +13,8 @@ // Primary constant buffer (with additional material parameters) META_CB_BEGIN(0, Data) float4x4 InverseViewProjectionMatrix; -float4x4 WorldMatrix; -float4x4 WorldMatrixInverseTransposed; +float4x3 WorldMatrix; +float4x3 WorldMatrixInverseTransposed; float3 GridSize; float PerInstanceRandom; float Dummy0; @@ -49,7 +49,7 @@ struct MaterialInput #endif }; -#define GetInstanceTransform(input) WorldMatrix; +#define GetInstanceTransform(input) ToMatrix4x4(WorldMatrix); // Removes the scale vector from the local to world transformation matrix (supports instancing) float3x3 RemoveScaleFromLocalToWorld(float3x3 localToWorld) @@ -170,12 +170,12 @@ float4 GetParticleVec4(uint particleIndex, int offset) float3 TransformParticlePosition(float3 input) { - return mul(float4(input, 1.0f), WorldMatrix).xyz; + return mul(float4(input, 1.0f), ToMatrix4x4(WorldMatrix)).xyz; } float3 TransformParticleVector(float3 input) { - return mul(float4(input, 0.0f), WorldMatrixInverseTransposed).xyz; + return mul(float4(input, 0.0f), ToMatrix4x4(WorldMatrixInverseTransposed)).xyz; } @8 @@ -219,7 +219,7 @@ void PS_VolumetricFog(Quad_GS2PS input, out float4 VBufferA : SV_Target0, out fl materialInput.ParticleIndex = ParticleIndex; materialInput.TBN = float3x3(float3(1, 0, 0), float3(0, 1, 0), float3(0, 0, 1)); materialInput.TwoSidedSign = 1.0f; - materialInput.InstanceOrigin = WorldMatrix[3].xyz; + materialInput.InstanceOrigin = ToMatrix4x4(WorldMatrix)[3].xyz; materialInput.InstanceParams = PerInstanceRandom; materialInput.SvPosition = clipPos; Material material = GetMaterialPS(materialInput); diff --git a/Source/Engine/Core/Math/Matrix.cpp b/Source/Engine/Core/Math/Matrix.cpp index 8445bd84a..5fc2321e1 100644 --- a/Source/Engine/Core/Math/Matrix.cpp +++ b/Source/Engine/Core/Math/Matrix.cpp @@ -2,6 +2,7 @@ #include "Matrix.h" #include "Matrix3x3.h" +#include "Matrix3x4.h" #include "Vector2.h" #include "Quaternion.h" #include "Transform.h" @@ -887,3 +888,39 @@ Float4 Matrix::TransformPosition(const Matrix& m, const Float4& v) m.Values[0][3] * v.Raw[0] + m.Values[1][3] * v.Raw[1] + m.Values[2][3] * v.Raw[2] + m.Values[3][3] * v.Raw[3] ); } + +void Matrix3x4::SetMatrix(const Matrix& m) +{ + const float* src = m.Raw; + float* dst = Raw; + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; + dst[4] = src[4]; + dst[5] = src[5]; + dst[6] = src[6]; + dst[7] = src[7]; + dst[8] = src[8]; + dst[9] = src[9]; + dst[10] = src[10]; + dst[11] = src[11]; +} + +void Matrix3x4::SetMatrixTranspose(const Matrix& m) +{ + const float* src = m.Raw; + float* dst = Raw; + dst[0] = src[0]; + dst[1] = src[4]; + dst[2] = src[8]; + dst[3] = src[12]; + dst[4] = src[1]; + dst[5] = src[5]; + dst[6] = src[9]; + dst[7] = src[13]; + dst[8] = src[2]; + dst[9] = src[6]; + dst[10] = src[10]; + dst[11] = src[14]; +} diff --git a/Source/Engine/Core/Math/Matrix3x4.h b/Source/Engine/Core/Math/Matrix3x4.h index aee3570a1..91133f816 100644 --- a/Source/Engine/Core/Math/Matrix3x4.h +++ b/Source/Engine/Core/Math/Matrix3x4.h @@ -9,43 +9,14 @@ /// struct FLAXENGINE_API Matrix3x4 { - float M[3][4]; - - void SetMatrix(const Matrix& m) + union { - const float* src = m.Raw; - float* dst = &M[0][0]; - dst[0] = src[0]; - dst[1] = src[1]; - dst[2] = src[2]; - dst[3] = src[3]; - dst[4] = src[4]; - dst[5] = src[5]; - dst[6] = src[6]; - dst[7] = src[7]; - dst[8] = src[8]; - dst[9] = src[9]; - dst[10] = src[10]; - dst[11] = src[11]; - } + float Values[3][4]; + float Raw[12]; + }; - void SetMatrixTranspose(const Matrix& m) - { - const float* src = m.Raw; - float* dst = &M[0][0]; - dst[0] = src[0]; - dst[1] = src[4]; - dst[2] = src[8]; - dst[3] = src[12]; - dst[4] = src[1]; - dst[5] = src[5]; - dst[6] = src[9]; - dst[7] = src[13]; - dst[8] = src[2]; - dst[9] = src[6]; - dst[10] = src[10]; - dst[11] = src[14]; - } + void SetMatrix(const Matrix& m); + void SetMatrixTranspose(const Matrix& m); }; template<> diff --git a/Source/Engine/Graphics/Materials/DeferredMaterialShader.cpp b/Source/Engine/Graphics/Materials/DeferredMaterialShader.cpp index eeffb67d3..88dd34b35 100644 --- a/Source/Engine/Graphics/Materials/DeferredMaterialShader.cpp +++ b/Source/Engine/Graphics/Materials/DeferredMaterialShader.cpp @@ -3,6 +3,7 @@ #include "DeferredMaterialShader.h" #include "MaterialShaderFeatures.h" #include "MaterialParams.h" +#include "Engine/Core/Math/Matrix3x4.h" #include "Engine/Graphics/RenderBuffers.h" #include "Engine/Graphics/RenderView.h" #include "Engine/Renderer/DrawCall.h" @@ -17,8 +18,8 @@ #include "Engine/Graphics/RenderTask.h" PACK_STRUCT(struct DeferredMaterialShaderData { - Matrix WorldMatrix; - Matrix PrevWorldMatrix; + Matrix3x4 WorldMatrix; + Matrix3x4 PrevWorldMatrix; Float2 Dummy0; float LODDitherFactor; float PerInstanceRandom; @@ -70,8 +71,8 @@ void DeferredMaterialShader::Bind(BindParameters& params) // Setup material constants { - Matrix::Transpose(drawCall.World, materialData->WorldMatrix); - Matrix::Transpose(drawCall.Surface.PrevWorld, materialData->PrevWorldMatrix); + materialData->WorldMatrix.SetMatrixTranspose(drawCall.World); + materialData->PrevWorldMatrix.SetMatrixTranspose(drawCall.Surface.PrevWorld); materialData->WorldDeterminantSign = drawCall.WorldDeterminantSign; materialData->LODDitherFactor = drawCall.Surface.LODDitherFactor; materialData->PerInstanceRandom = drawCall.PerInstanceRandom; diff --git a/Source/Engine/Graphics/Materials/ForwardMaterialShader.cpp b/Source/Engine/Graphics/Materials/ForwardMaterialShader.cpp index af2ad7957..6707fb802 100644 --- a/Source/Engine/Graphics/Materials/ForwardMaterialShader.cpp +++ b/Source/Engine/Graphics/Materials/ForwardMaterialShader.cpp @@ -3,6 +3,7 @@ #include "ForwardMaterialShader.h" #include "MaterialShaderFeatures.h" #include "MaterialParams.h" +#include "Engine/Core/Math/Matrix3x4.h" #include "Engine/Graphics/GPUContext.h" #include "Engine/Graphics/GPUDevice.h" #include "Engine/Graphics/GPULimits.h" @@ -18,8 +19,8 @@ #endif PACK_STRUCT(struct ForwardMaterialShaderData { - Matrix WorldMatrix; - Matrix PrevWorldMatrix; + Matrix3x4 WorldMatrix; + Matrix3x4 PrevWorldMatrix; Float2 Dummy0; float LODDitherFactor; float PerInstanceRandom; @@ -76,8 +77,8 @@ void ForwardMaterialShader::Bind(BindParameters& params) // Setup material constants { - Matrix::Transpose(drawCall.World, materialData->WorldMatrix); - Matrix::Transpose(drawCall.Surface.PrevWorld, materialData->PrevWorldMatrix); + materialData->WorldMatrix.SetMatrixTranspose(drawCall.World); + materialData->PrevWorldMatrix.SetMatrixTranspose(drawCall.Surface.PrevWorld); materialData->WorldDeterminantSign = drawCall.WorldDeterminantSign; materialData->LODDitherFactor = drawCall.Surface.LODDitherFactor; materialData->PerInstanceRandom = drawCall.PerInstanceRandom; diff --git a/Source/Engine/Graphics/Materials/MaterialShader.h b/Source/Engine/Graphics/Materials/MaterialShader.h index d79ef49b0..7a5b842e2 100644 --- a/Source/Engine/Graphics/Materials/MaterialShader.h +++ b/Source/Engine/Graphics/Materials/MaterialShader.h @@ -10,7 +10,7 @@ /// /// Current materials shader version. /// -#define MATERIAL_GRAPH_VERSION 161 +#define MATERIAL_GRAPH_VERSION 162 class Material; class GPUShader; diff --git a/Source/Engine/Graphics/Materials/ParticleMaterialShader.cpp b/Source/Engine/Graphics/Materials/ParticleMaterialShader.cpp index cd0fb31f4..998a77843 100644 --- a/Source/Engine/Graphics/Materials/ParticleMaterialShader.cpp +++ b/Source/Engine/Graphics/Materials/ParticleMaterialShader.cpp @@ -3,6 +3,7 @@ #include "ParticleMaterialShader.h" #include "MaterialShaderFeatures.h" #include "MaterialParams.h" +#include "Engine/Core/Math/Matrix3x4.h" #include "Engine/Renderer/DrawCall.h" #include "Engine/Renderer/RenderList.h" #include "Engine/Graphics/RenderView.h" @@ -15,7 +16,7 @@ #include "Engine/Particles/Graph/CPU/ParticleEmitterGraph.CPU.h" PACK_STRUCT(struct ParticleMaterialShaderData { - Matrix WorldMatrix; + Matrix3x4 WorldMatrix; uint32 SortedIndicesOffset; float PerInstanceRandom; int32 ParticleStride; @@ -34,7 +35,7 @@ PACK_STRUCT(struct ParticleMaterialShaderData { int32 RibbonTwistOffset; int32 RibbonFacingVectorOffset; uint32 RibbonSegmentCount; - Matrix WorldMatrixInverseTransposed; + Matrix3x4 WorldMatrixInverseTransposed; }); DrawPass ParticleMaterialShader::GetDrawModes() const @@ -101,7 +102,7 @@ void ParticleMaterialShader::Bind(BindParameters& params) static StringView ParticleScaleOffset(TEXT("Scale")); static StringView ParticleModelFacingModeOffset(TEXT("ModelFacingMode")); - Matrix::Transpose(drawCall.World, materialData->WorldMatrix); + materialData->WorldMatrix.SetMatrixTranspose(drawCall.World); materialData->SortedIndicesOffset = drawCall.Particle.Particles->GPU.SortedIndices && params.RenderContext.View.Pass != DrawPass::Depth ? sortedIndicesOffset : 0xFFFFFFFF; materialData->PerInstanceRandom = drawCall.PerInstanceRandom; materialData->ParticleStride = drawCall.Particle.Particles->Stride; @@ -113,7 +114,9 @@ void ParticleMaterialShader::Bind(BindParameters& params) materialData->RotationOffset = drawCall.Particle.Particles->Layout->FindAttributeOffset(ParticleRotationOffset, ParticleAttribute::ValueTypes::Float3, -1); materialData->ScaleOffset = drawCall.Particle.Particles->Layout->FindAttributeOffset(ParticleScaleOffset, ParticleAttribute::ValueTypes::Float3, -1); materialData->ModelFacingModeOffset = drawCall.Particle.Particles->Layout->FindAttributeOffset(ParticleModelFacingModeOffset, ParticleAttribute::ValueTypes::Int, -1); - Matrix::Invert(drawCall.World, materialData->WorldMatrixInverseTransposed); + Matrix worldMatrixInverseTransposed; + Matrix::Invert(drawCall.World, worldMatrixInverseTransposed); + materialData->WorldMatrixInverseTransposed.SetMatrix(worldMatrixInverseTransposed); } // Select pipeline state based on current pass and render mode diff --git a/Source/Engine/Graphics/Materials/TerrainMaterialShader.cpp b/Source/Engine/Graphics/Materials/TerrainMaterialShader.cpp index bcf194907..19415c891 100644 --- a/Source/Engine/Graphics/Materials/TerrainMaterialShader.cpp +++ b/Source/Engine/Graphics/Materials/TerrainMaterialShader.cpp @@ -3,6 +3,7 @@ #include "TerrainMaterialShader.h" #include "MaterialShaderFeatures.h" #include "MaterialParams.h" +#include "Engine/Core/Math/Matrix3x4.h" #include "Engine/Graphics/GPUContext.h" #include "Engine/Graphics/GPULimits.h" #include "Engine/Graphics/GPUDevice.h" @@ -16,7 +17,7 @@ #include "Engine/Terrain/TerrainPatch.h" PACK_STRUCT(struct TerrainMaterialShaderData { - Matrix WorldMatrix; + Matrix3x4 WorldMatrix; Float3 WorldInvScale; float WorldDeterminantSign; float PerInstanceRandom; @@ -66,7 +67,7 @@ void TerrainMaterialShader::Bind(BindParameters& params) // Setup material constants { - Matrix::Transpose(drawCall.World, materialData->WorldMatrix); + materialData->WorldMatrix.SetMatrixTranspose(drawCall.World); const float scaleX = Float3(drawCall.World.M11, drawCall.World.M12, drawCall.World.M13).Length(); const float scaleY = Float3(drawCall.World.M21, drawCall.World.M22, drawCall.World.M23).Length(); const float scaleZ = Float3(drawCall.World.M31, drawCall.World.M32, drawCall.World.M33).Length(); diff --git a/Source/Engine/Graphics/Materials/VolumeParticleMaterialShader.cpp b/Source/Engine/Graphics/Materials/VolumeParticleMaterialShader.cpp index 106682148..585b4bf29 100644 --- a/Source/Engine/Graphics/Materials/VolumeParticleMaterialShader.cpp +++ b/Source/Engine/Graphics/Materials/VolumeParticleMaterialShader.cpp @@ -3,6 +3,7 @@ #include "VolumeParticleMaterialShader.h" #include "MaterialShaderFeatures.h" #include "MaterialParams.h" +#include "Engine/Core/Math/Matrix3x4.h" #include "Engine/Renderer/DrawCall.h" #include "Engine/Renderer/VolumetricFogPass.h" #include "Engine/Renderer/RenderList.h" @@ -16,8 +17,8 @@ PACK_STRUCT(struct VolumeParticleMaterialShaderData { Matrix InverseViewProjectionMatrix; - Matrix WorldMatrix; - Matrix WorldMatrixInverseTransposed; + Matrix3x4 WorldMatrix; + Matrix3x4 WorldMatrixInverseTransposed; Float3 GridSize; float PerInstanceRandom; float Dummy0; @@ -76,8 +77,10 @@ void VolumeParticleMaterialShader::Bind(BindParameters& params) // Setup material constants { Matrix::Transpose(view.IVP, materialData->InverseViewProjectionMatrix); - Matrix::Transpose(drawCall.World, materialData->WorldMatrix); - Matrix::Invert(drawCall.World, materialData->WorldMatrixInverseTransposed); + materialData->WorldMatrix.SetMatrixTranspose(drawCall.World); + Matrix worldMatrixInverseTransposed; + Matrix::Invert(drawCall.World, worldMatrixInverseTransposed); + materialData->WorldMatrixInverseTransposed.SetMatrix(worldMatrixInverseTransposed); materialData->GridSize = customData->GridSize; materialData->PerInstanceRandom = drawCall.PerInstanceRandom; materialData->VolumetricFogMaxDistance = customData->VolumetricFogMaxDistance; diff --git a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp index 278802995..ca4733496 100644 --- a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp +++ b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp @@ -3,6 +3,7 @@ #include "GlobalSignDistanceFieldPass.h" #include "RenderList.h" #include "Engine/Core/Math/Vector3.h" +#include "Engine/Core/Math/Matrix3x4.h" #include "Engine/Core/Collections/HashSet.h" #include "Engine/Engine/Engine.h" #include "Engine/Content/Content.h" @@ -39,8 +40,8 @@ static_assert(GLOBAL_SDF_RASTERIZE_MODEL_MAX_COUNT % 4 == 0, "Must be multiple o PACK_STRUCT(struct ObjectRasterizeData { - Matrix WorldToVolume; // TODO: use 3x4 matrix - Matrix VolumeToWorld; // TODO: use 3x4 matrix + Matrix3x4 WorldToVolume; + Matrix3x4 VolumeToWorld; Float3 VolumeToUVWMul; float MipOffset; Float3 VolumeToUVWAdd; @@ -670,15 +671,15 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex // Add object data for the GPU buffer uint16 dataIndex = _objectsBufferCount++; ObjectRasterizeData objectData; - Matrix localToWorldM, worldToLocal, volumeToWorld; - Matrix::Transformation(object.LocalToWorld.Scale, object.LocalToWorld.Orientation, object.LocalToWorld.Translation - _sdfData->Origin, localToWorldM); - Matrix::Invert(localToWorldM, worldToLocal); + Matrix localToWorld, worldToLocal, volumeToWorld; + Matrix::Transformation(object.LocalToWorld.Scale, object.LocalToWorld.Orientation, object.LocalToWorld.Translation - _sdfData->Origin, localToWorld); + Matrix::Invert(localToWorld, worldToLocal); BoundingBox localVolumeBounds(object.SDF->LocalBoundsMin, object.SDF->LocalBoundsMax); Float3 volumeLocalBoundsExtent = localVolumeBounds.GetSize() * 0.5f; Matrix worldToVolume = worldToLocal * Matrix::Translation(-(localVolumeBounds.Minimum + volumeLocalBoundsExtent)); Matrix::Invert(worldToVolume, volumeToWorld); - Matrix::Transpose(worldToVolume, objectData.WorldToVolume); - Matrix::Transpose(volumeToWorld, objectData.VolumeToWorld); + objectData.WorldToVolume.SetMatrixTranspose(worldToVolume); + objectData.VolumeToWorld.SetMatrixTranspose(volumeToWorld); objectData.VolumeLocalBoundsExtent = volumeLocalBoundsExtent; objectData.VolumeToUVWMul = object.SDF->LocalToUVWMul; objectData.VolumeToUVWAdd = object.SDF->LocalToUVWAdd + (localVolumeBounds.Minimum + volumeLocalBoundsExtent) * object.SDF->LocalToUVWMul; @@ -702,11 +703,11 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex // Add object data for the GPU buffer uint16 dataIndex = _objectsBufferCount++; ObjectRasterizeData objectData; - Matrix localToWorldM, worldToLocal; - Matrix::Transformation(object.LocalToWorld.Scale, object.LocalToWorld.Orientation, object.LocalToWorld.Translation - _sdfData->Origin, localToWorldM); - Matrix::Invert(localToWorldM, worldToLocal); - Matrix::Transpose(worldToLocal, objectData.WorldToVolume); - Matrix::Transpose(localToWorldM, objectData.VolumeToWorld); + Matrix localToWorld, worldToLocal; + Matrix::Transformation(object.LocalToWorld.Scale, object.LocalToWorld.Orientation, object.LocalToWorld.Translation - _sdfData->Origin, localToWorld); + Matrix::Invert(localToWorld, worldToLocal); + objectData.WorldToVolume.SetMatrixTranspose(worldToLocal); + objectData.VolumeToWorld.SetMatrixTranspose(localToWorld); objectData.VolumeToUVWMul = Float3(object.LocalToUV.X, 1.0f, object.LocalToUV.Y); objectData.VolumeToUVWAdd = Float3(object.LocalToUV.Z, 0.0f, object.LocalToUV.W); objectData.MipOffset = (float)_cascadeIndex * 0.5f; // Use lower-quality mip for far cascades diff --git a/Source/Shaders/Common.hlsl b/Source/Shaders/Common.hlsl index 4fa3b5bb2..74335d87a 100644 --- a/Source/Shaders/Common.hlsl +++ b/Source/Shaders/Common.hlsl @@ -230,4 +230,10 @@ float4 SampleUnwrappedTexture3D(Texture2D tex, SamplerState s, float3 uvw, float return lerp(rg0, rg1, fracW); } +// Converts compact 4x3 object transformation matrix into a full 4x4 matrix. +float4x4 ToMatrix4x4(float4x3 m) +{ + return float4x4(float4(m[0].xyz, 0.0f), float4(m[1].xyz, 0.0f), float4(m[2].xyz, 0.0f), float4(m._m30, m._m31, m._m32, 1.0f)); +} + #endif diff --git a/Source/Shaders/GlobalSignDistanceField.shader b/Source/Shaders/GlobalSignDistanceField.shader index 882e96306..9810c53f8 100644 --- a/Source/Shaders/GlobalSignDistanceField.shader +++ b/Source/Shaders/GlobalSignDistanceField.shader @@ -11,8 +11,8 @@ struct ObjectRasterizeData { - float4x4 WorldToVolume; // TODO: use 3x4 matrix - float4x4 VolumeToWorld; // TODO: use 3x4 matrix + float4x3 WorldToVolume; + float4x3 VolumeToWorld; float3 VolumeToUVWMul; float MipOffset; float3 VolumeToUVWAdd; @@ -74,14 +74,15 @@ Texture3D ObjectsTextures[GLOBAL_SDF_RASTERIZE_MODEL_MAX_COUNT] : registe float DistanceToModelSDF(float minDistance, ObjectRasterizeData modelData, Texture3D modelSDFTex, float3 worldPos) { // Object scaling is the length of the rows - float3 volumeToWorldScale = float3(length(modelData.VolumeToWorld[0]), length(modelData.VolumeToWorld[1]), length(modelData.VolumeToWorld[2])); + float4x4 volumeToWorld = ToMatrix4x4(modelData.VolumeToWorld); + float3 volumeToWorldScale = float3(length(volumeToWorld[0]), length(volumeToWorld[1]), length(volumeToWorld[2])); float volumeScale = min(volumeToWorldScale.x, min(volumeToWorldScale.y, volumeToWorldScale.z)); // Compute SDF volume UVs and distance in world-space to the volume bounds - float3 volumePos = mul(float4(worldPos, 1), modelData.WorldToVolume).xyz; + float3 volumePos = mul(float4(worldPos, 1), ToMatrix4x4(modelData.WorldToVolume)).xyz; float3 volumeUV = volumePos * modelData.VolumeToUVWMul + modelData.VolumeToUVWAdd; float3 volumePosClamped = clamp(volumePos, -modelData.VolumeLocalBoundsExtent, modelData.VolumeLocalBoundsExtent); - float3 worldPosClamped = mul(float4(volumePosClamped, 1), modelData.VolumeToWorld).xyz; + float3 worldPosClamped = mul(float4(volumePosClamped, 1), volumeToWorld).xyz; float distanceToVolume = distance(worldPos, worldPosClamped); if (distanceToVolume < 0.01f) distanceToVolume = length((volumePos - volumePosClamped) * volumeToWorldScale); @@ -152,7 +153,7 @@ void CS_RasterizeHeightfield(uint3 DispatchThreadId : SV_DispatchThreadID) ObjectRasterizeData objectData = ObjectsBuffer[Objects[i / 4][i % 4]]; // Convert voxel world-space position into heightfield local-space position and get heightfield UV - float3 volumePos = mul(float4(voxelWorldPos, 1), objectData.WorldToVolume).xyz; + float3 volumePos = mul(float4(voxelWorldPos, 1), ToMatrix4x4(objectData.WorldToVolume)).xyz; float3 volumeUV = volumePos * objectData.VolumeToUVWMul + objectData.VolumeToUVWAdd; float2 heightfieldUV = float2(volumeUV.x, volumeUV.z); @@ -168,8 +169,9 @@ void CS_RasterizeHeightfield(uint3 DispatchThreadId : SV_DispatchThreadID) float height = (float)((int)(heightmapValue.x * 255.0) + ((int)(heightmapValue.y * 255) << 8)) / 65535.0; float2 positionXZ = volumePos.xz; float3 position = float3(positionXZ.x, height, positionXZ.y); - float3 heightfieldPosition = mul(float4(position, 1), objectData.VolumeToWorld).xyz; - float3 heightfieldNormal = normalize(float3(objectData.VolumeToWorld[0].y, objectData.VolumeToWorld[1].y, objectData.VolumeToWorld[2].y)); + float4x4 volumeToWorld = ToMatrix4x4(objectData.VolumeToWorld); + float3 heightfieldPosition = mul(float4(position, 1), volumeToWorld).xyz; + float3 heightfieldNormal = normalize(float3(volumeToWorld[0].y, volumeToWorld[1].y, volumeToWorld[2].y)); // Calculate distance from voxel center to the heightfield float objectDistance = dot(heightfieldNormal, voxelWorldPos - heightfieldPosition); From 01d91bf10254dbb8fff21e17bb8e65db80534e2f Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 26 Mar 2024 14:05:24 +0100 Subject: [PATCH 005/292] Optimize decals rendering --- .../Materials/DecalMaterialShader.cpp | 3 +- Source/Engine/Level/Actors/Decal.cpp | 18 ++- .../Renderer/Editor/MaterialComplexity.cpp | 11 +- .../Renderer/Editor/QuadOverdrawPass.cpp | 12 +- Source/Engine/Renderer/GBufferPass.cpp | 134 ++++++++---------- Source/Engine/Renderer/RenderList.h | 9 +- 6 files changed, 92 insertions(+), 95 deletions(-) diff --git a/Source/Engine/Graphics/Materials/DecalMaterialShader.cpp b/Source/Engine/Graphics/Materials/DecalMaterialShader.cpp index 980fc0282..9b2654f21 100644 --- a/Source/Engine/Graphics/Materials/DecalMaterialShader.cpp +++ b/Source/Engine/Graphics/Materials/DecalMaterialShader.cpp @@ -34,8 +34,7 @@ void DecalMaterialShader::Bind(BindParameters& params) ASSERT_LOW_LAYER(cb.Length() >= sizeof(DecalMaterialShaderData)); auto materialData = reinterpret_cast(cb.Get()); cb = Span(cb.Get() + sizeof(DecalMaterialShaderData), cb.Length() - sizeof(DecalMaterialShaderData)); - int32 srv = 0; - const bool isCameraInside = OrientedBoundingBox(Vector3::Half, params.FirstDrawCall->World).Contains(view.Position) == ContainmentType::Contains; + const bool isCameraInside = OrientedBoundingBox(Vector3::Half, drawCall.World).Contains(view.Position) == ContainmentType::Contains; // Setup parameters MaterialParameter::BindMeta bindMeta; diff --git a/Source/Engine/Level/Actors/Decal.cpp b/Source/Engine/Level/Actors/Decal.cpp index 0d08bc933..dbec91b57 100644 --- a/Source/Engine/Level/Actors/Decal.cpp +++ b/Source/Engine/Level/Actors/Decal.cpp @@ -69,20 +69,26 @@ void Decal::OnLayerChanged() void Decal::Draw(RenderContext& renderContext) { + MaterialBase* material = Material; if (EnumHasAnyFlags(renderContext.View.Flags, ViewFlags::Decals) && EnumHasAnyFlags(renderContext.View.Pass, DrawPass::GBuffer) && - Material && - Material->IsLoaded() && - Material->IsDecal()) + material && + material->IsLoaded() && + material->IsDecal()) { + // Check if decal is being culled const auto lodView = (renderContext.LodProxyView ? renderContext.LodProxyView : &renderContext.View); const float screenRadiusSquared = RenderTools::ComputeBoundsScreenRadiusSquared(_sphere.Center - renderContext.View.Origin, (float)_sphere.Radius, *lodView) * renderContext.View.ModelLODDistanceFactorSqrt; - - // Check if decal is being culled if (Math::Square(DrawMinScreenSize * 0.5f) > screenRadiusSquared) return; - renderContext.List->Decals.Add(this); + RenderDecalData data; + Transform transform = GetTransform(); + transform.Scale *= _size; + renderContext.View.GetWorldMatrix(transform, data.World); + data.SortOrder = SortOrder; + data.Material = material; + renderContext.List->Decals.Add(data); } } diff --git a/Source/Engine/Renderer/Editor/MaterialComplexity.cpp b/Source/Engine/Renderer/Editor/MaterialComplexity.cpp index 2df397df5..1d7534485 100644 --- a/Source/Engine/Renderer/Editor/MaterialComplexity.cpp +++ b/Source/Engine/Renderer/Editor/MaterialComplexity.cpp @@ -130,17 +130,14 @@ void MaterialComplexityMaterialShader::Draw(RenderContext& renderContext, GPUCon MaterialBase::BindParameters bindParams(context, renderContext, drawCall); bindParams.BindViewData(); drawCall.WorldDeterminantSign = 1.0f; + drawCall.PerInstanceRandom = 0.0f; context->SetRenderTarget(lightBuffer); for (int32 i = 0; i < decals.Count(); i++) { - const auto decal = decals[i]; - ASSERT(decal && decal->Material); - Transform transform = decal->GetTransform(); - transform.Scale *= decal->GetSize(); - renderContext.View.GetWorldMatrix(transform, drawCall.World); + const RenderDecalData& decal = decals.Get()[i]; + drawCall.World = decal.World; drawCall.ObjectPosition = drawCall.World.GetTranslation(); - drawCall.Material = decal->Material; - drawCall.PerInstanceRandom = decal->GetPerInstanceRandom(); + drawCall.Material = decal.Material; decalsWrapper.Bind(bindParams); boxModel->Render(context); } diff --git a/Source/Engine/Renderer/Editor/QuadOverdrawPass.cpp b/Source/Engine/Renderer/Editor/QuadOverdrawPass.cpp index 20e06550a..2674c66bd 100644 --- a/Source/Engine/Renderer/Editor/QuadOverdrawPass.cpp +++ b/Source/Engine/Renderer/Editor/QuadOverdrawPass.cpp @@ -49,7 +49,8 @@ void QuadOverdrawPass::Render(RenderContext& renderContext, GPUContext* context, context->BindUA(1, overdrawTexture->View()); context->BindUA(2, liveCountTexture->View()); DrawCall drawCall; - drawCall.PerInstanceRandom = 1.0f; + drawCall.WorldDeterminantSign = 1.0f; + drawCall.PerInstanceRandom = 0.0f; MaterialBase::BindParameters bindParams(context, renderContext, drawCall); bindParams.BindViewData(); renderContext.View.Pass = DrawPass::QuadOverdraw; @@ -62,13 +63,8 @@ void QuadOverdrawPass::Render(RenderContext& renderContext, GPUContext* context, // Draw decals for (int32 i = 0; i < renderContext.List->Decals.Count(); i++) { - const auto decal = renderContext.List->Decals[i]; - ASSERT(decal && decal->Material); - Transform transform = decal->GetTransform(); - transform.Scale *= decal->GetSize(); - renderContext.View.GetWorldMatrix(transform, drawCall.World); - drawCall.ObjectPosition = drawCall.World.GetTranslation(); - drawCall.PerInstanceRandom = decal->GetPerInstanceRandom(); + const RenderDecalData& decal = renderContext.List->Decals.Get()[i]; + drawCall.World = decal.World; defaultMaterial->Bind(bindParams); boxModel->Render(context); } diff --git a/Source/Engine/Renderer/GBufferPass.cpp b/Source/Engine/Renderer/GBufferPass.cpp index f196a4ee9..589401892 100644 --- a/Source/Engine/Renderer/GBufferPass.cpp +++ b/Source/Engine/Renderer/GBufferPass.cpp @@ -59,13 +59,8 @@ bool GBufferPass::Init() bool GBufferPass::setupResources() { - ASSERT(_gBufferShader); - - // Check if shader has not been loaded - if (!_gBufferShader->IsLoaded()) - { + if (!_gBufferShader || !_gBufferShader->IsLoaded()) return true; - } auto gbuffer = _gBufferShader->GetShader(); // Validate shader constant buffers sizes @@ -228,9 +223,13 @@ void GBufferPass::Fill(RenderContext& renderContext, GPUTexture* lightBuffer) context->ResetRenderTarget(); } -bool SortDecal(Decal* const& a, Decal* const& b) +bool SortDecal(RenderDecalData const& a, RenderDecalData const& b) { - return a->SortOrder < b->SortOrder; + if (a.SortOrder == b.SortOrder) + { + return (uintptr)a.Material < (uintptr)b.Material; + } + return a.SortOrder < b.SortOrder; } void GBufferPass::RenderDebug(RenderContext& renderContext) @@ -428,96 +427,89 @@ void GBufferPass::DrawSky(RenderContext& renderContext, GPUContext* context) void GBufferPass::DrawDecals(RenderContext& renderContext, GPUTextureView* lightBuffer) { - // Skip if no decals to render auto& decals = renderContext.List->Decals; - if (decals.IsEmpty() || _boxModel == nullptr || !_boxModel->CanBeRendered() || EnumHasNoneFlags(renderContext.View.Flags, ViewFlags::Decals)) + const auto boxModel = _boxModel.Get(); + if (decals.IsEmpty() || boxModel == nullptr || !boxModel->CanBeRendered() || EnumHasNoneFlags(renderContext.View.Flags, ViewFlags::Decals)) return; - PROFILE_GPU_CPU("Decals"); - - // Cache data - auto device = GPUDevice::Instance; - auto context = device->GetMainContext(); - auto model = _boxModel.Get(); + auto context = GPUDevice::Instance->GetMainContext(); auto buffers = renderContext.Buffers; // Sort decals from the lowest order to the highest order - Sorting::QuickSort(decals.Get(), (int32)decals.Count(), &SortDecal); - - // TODO: batch decals using the same material - - // TODO: sort decals by the blending mode within the same order + Sorting::QuickSort(decals.Get(), decals.Count(), &SortDecal); // Prepare DrawCall drawCall; MaterialBase::BindParameters bindParams(context, renderContext, drawCall); bindParams.BindViewData(); - drawCall.Material = nullptr; - drawCall.WorldDeterminantSign = 1.0f; + MaterialDecalBlendingMode decalBlendingMode = (MaterialDecalBlendingMode)-1; + MaterialUsageFlags usageFlags = (MaterialUsageFlags)-1; + boxModel->LODs.Get()->Meshes.Get()->GetDrawCallGeometry(drawCall); + context->BindVB(ToSpan(drawCall.Geometry.VertexBuffers, 3)); + context->BindIB(drawCall.Geometry.IndexBuffer); + context->ResetRenderTarget(); // Draw all decals for (int32 i = 0; i < decals.Count(); i++) { - const auto decal = decals[i]; - ASSERT(decal && decal->Material); - Transform transform = decal->GetTransform(); - transform.Scale *= decal->GetSize(); - renderContext.View.GetWorldMatrix(transform, drawCall.World); - drawCall.ObjectPosition = drawCall.World.GetTranslation(); - drawCall.ObjectRadius = decal->GetSphere().Radius; + const RenderDecalData& decal = decals.Get()[i]; - context->ResetRenderTarget(); - - // Bind output - const MaterialInfo& info = decal->Material->GetInfo(); - switch (info.DecalBlendingMode) + // Bind output (skip if won't change in-between decals) + const MaterialInfo& info = decal.Material->GetInfo(); + const MaterialUsageFlags infoUsageFlags = info.UsageFlags & (MaterialUsageFlags::UseEmissive | MaterialUsageFlags::UseNormal); + if (decalBlendingMode != info.DecalBlendingMode || usageFlags != infoUsageFlags) { - case MaterialDecalBlendingMode::Translucent: - { - GPUTextureView* targetBuffers[4]; - int32 count = 2; - targetBuffers[0] = buffers->GBuffer0->View(); - targetBuffers[1] = buffers->GBuffer2->View(); - if (EnumHasAnyFlags(info.UsageFlags, MaterialUsageFlags::UseEmissive)) + decalBlendingMode = info.DecalBlendingMode; + usageFlags = infoUsageFlags; + switch (decalBlendingMode) { - count++; - targetBuffers[2] = lightBuffer; - - if (EnumHasAnyFlags(info.UsageFlags, MaterialUsageFlags::UseNormal)) + case MaterialDecalBlendingMode::Translucent: + { + GPUTextureView* targetBuffers[4]; + int32 count = 2; + targetBuffers[0] = buffers->GBuffer0->View(); + targetBuffers[1] = buffers->GBuffer2->View(); + if (EnumHasAnyFlags(usageFlags, MaterialUsageFlags::UseEmissive)) { count++; - targetBuffers[3] = buffers->GBuffer1->View(); + targetBuffers[2] = lightBuffer; + if (EnumHasAnyFlags(usageFlags, MaterialUsageFlags::UseNormal)) + { + count++; + targetBuffers[3] = buffers->GBuffer1->View(); + } } + else if (EnumHasAnyFlags(usageFlags, MaterialUsageFlags::UseNormal)) + { + count++; + targetBuffers[2] = buffers->GBuffer1->View(); + } + context->SetRenderTarget(nullptr, ToSpan(targetBuffers, count)); + break; } - else if (EnumHasAnyFlags(info.UsageFlags, MaterialUsageFlags::UseNormal)) + case MaterialDecalBlendingMode::Stain: { - count++; - targetBuffers[2] = buffers->GBuffer1->View(); + context->SetRenderTarget(buffers->GBuffer0->View()); + break; + } + case MaterialDecalBlendingMode::Normal: + { + context->SetRenderTarget(buffers->GBuffer1->View()); + break; + } + case MaterialDecalBlendingMode::Emissive: + { + context->SetRenderTarget(lightBuffer); + break; + } } - context->SetRenderTarget(nullptr, ToSpan(targetBuffers, count)); - break; - } - case MaterialDecalBlendingMode::Stain: - { - context->SetRenderTarget(buffers->GBuffer0->View()); - break; - } - case MaterialDecalBlendingMode::Normal: - { - context->SetRenderTarget(buffers->GBuffer1->View()); - break; - } - case MaterialDecalBlendingMode::Emissive: - { - context->SetRenderTarget(lightBuffer); - break; - } } // Draw decal - drawCall.PerInstanceRandom = decal->GetPerInstanceRandom(); - decal->Material->Bind(bindParams); - model->Render(context); + drawCall.World = decal.World; + decal.Material->Bind(bindParams); + // TODO: use hardware instancing + context->DrawIndexedInstanced(drawCall.Draw.IndicesCount, 1, 0, 0, 0); } context->ResetSR(); diff --git a/Source/Engine/Renderer/RenderList.h b/Source/Engine/Renderer/RenderList.h index 4dabe669d..5c0c8e39e 100644 --- a/Source/Engine/Renderer/RenderList.h +++ b/Source/Engine/Renderer/RenderList.h @@ -106,6 +106,13 @@ struct RenderSkyLightData : RenderLightData void SetupLightData(ShaderLightData* data, bool useShadow) const; }; +struct RenderDecalData +{ + Matrix World; + MaterialBase* Material; + int32 SortOrder; +}; + /// /// The draw calls list types. /// @@ -288,7 +295,7 @@ public: /// /// Decals registered for the rendering. /// - Array Decals; + Array Decals; /// /// Local volumetric fog particles registered for the rendering. From 4ab572426da7718cebea5b68d361f1625cb92547 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 26 Mar 2024 14:27:10 +0100 Subject: [PATCH 006/292] Various renamings --- .../Materials/MaterialShaderFeatures.cpp | 8 +- .../Renderer/GI/GlobalSurfaceAtlasPass.cpp | 6 +- Source/Engine/Renderer/LightPass.cpp | 8 +- Source/Engine/Renderer/RenderList.cpp | 113 +++++++++--------- Source/Engine/Renderer/RenderList.h | 8 +- Source/Engine/Renderer/ShadowsPass.cpp | 6 +- Source/Engine/Renderer/VolumetricFogPass.cpp | 6 +- 7 files changed, 78 insertions(+), 77 deletions(-) diff --git a/Source/Engine/Graphics/Materials/MaterialShaderFeatures.cpp b/Source/Engine/Graphics/Materials/MaterialShaderFeatures.cpp index 43646cc0e..c89688a9d 100644 --- a/Source/Engine/Graphics/Materials/MaterialShaderFeatures.cpp +++ b/Source/Engine/Graphics/Materials/MaterialShaderFeatures.cpp @@ -50,7 +50,7 @@ void ForwardShadingFeature::Bind(MaterialShader::BindParameters& params, SpanUnBindSR(dirLightShaderRegisterIndex); } - dirLight.SetupLightData(&data.DirectionalLight, useShadow); + dirLight.SetShaderData(data.DirectionalLight, useShadow); } else { @@ -63,7 +63,7 @@ void ForwardShadingFeature::Bind(MaterialShader::BindParameters& params, SpanSkyLights.HasItems()) { auto& skyLight = cache->SkyLights.First(); - skyLight.SetupLightData(&data.SkyLight, false); + skyLight.SetShaderData(data.SkyLight, false); const auto texture = skyLight.Image ? skyLight.Image->GetTexture() : nullptr; params.GPUContext->BindSR(skyLightShaderRegisterIndex, GET_TEXTURE_VIEW_SAFE(texture)); } @@ -106,7 +106,7 @@ void ForwardShadingFeature::Bind(MaterialShader::BindParameters& params, SpanPointLights[i]; if (CollisionsHelper::SphereIntersectsSphere(objectBounds, BoundingSphere(light.Position, light.Radius))) { - light.SetupLightData(&data.LocalLights[data.LocalLightsCount], false); + light.SetShaderData(data.LocalLights[data.LocalLightsCount], false); data.LocalLightsCount++; } } @@ -115,7 +115,7 @@ void ForwardShadingFeature::Bind(MaterialShader::BindParameters& params, SpanSpotLights[i]; if (CollisionsHelper::SphereIntersectsSphere(objectBounds, BoundingSphere(light.Position, light.Radius))) { - light.SetupLightData(&data.LocalLights[data.LocalLightsCount], false); + light.SetShaderData(data.LocalLights[data.LocalLightsCount], false); data.LocalLightsCount++; } } diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp index 49eca6604..1aa172fa1 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp @@ -961,7 +961,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co PROFILE_GPU_CPU_NAMED("Directional Light"); const bool useShadow = CanRenderShadow(renderContext.View, light); // TODO: test perf/quality when using Shadow Map for directional light (ShadowsPass::Instance()->LastDirLightShadowMap) instead of Global SDF trace - light.SetupLightData(&data.Light, useShadow); + light.SetShaderData(data.Light, useShadow); data.Light.Color *= light.IndirectLightingIntensity; data.LightShadowsStrength = 1.0f - light.ShadowsStrength; context->UpdateCB(_cb0, &data); @@ -994,7 +994,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co // Draw draw light PROFILE_GPU_CPU_NAMED("Point Light"); const bool useShadow = CanRenderShadow(renderContext.View, light); - light.SetupLightData(&data.Light, useShadow); + light.SetShaderData(data.Light, useShadow); data.Light.Color *= light.IndirectLightingIntensity; data.LightShadowsStrength = 1.0f - light.ShadowsStrength; context->UpdateCB(_cb0, &data); @@ -1027,7 +1027,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co // Draw draw light PROFILE_GPU_CPU_NAMED("Spot Light"); const bool useShadow = CanRenderShadow(renderContext.View, light); - light.SetupLightData(&data.Light, useShadow); + light.SetShaderData(data.Light, useShadow); data.Light.Color *= light.IndirectLightingIntensity; data.LightShadowsStrength = 1.0f - light.ShadowsStrength; context->UpdateCB(_cb0, &data); diff --git a/Source/Engine/Renderer/LightPass.cpp b/Source/Engine/Renderer/LightPass.cpp index f9e7038d7..bdbe22be8 100644 --- a/Source/Engine/Renderer/LightPass.cpp +++ b/Source/Engine/Renderer/LightPass.cpp @@ -277,7 +277,7 @@ void LightPass::RenderLight(RenderContextBatch& renderContextBatch, GPUTextureVi context->UnBindSR(5); // Pack light properties buffer - light.SetupLightData(&perLight.Light, renderShadow); + light.SetShaderData(perLight.Light, renderShadow); Matrix::Transpose(wvp, perLight.WVP); if (useIES) { @@ -334,7 +334,7 @@ void LightPass::RenderLight(RenderContextBatch& renderContextBatch, GPUTextureVi context->UnBindSR(5); // Pack light properties buffer - light.SetupLightData(&perLight.Light, renderShadow); + light.SetShaderData(perLight.Light, renderShadow); Matrix::Transpose(wvp, perLight.WVP); if (useIES) { @@ -377,7 +377,7 @@ void LightPass::RenderLight(RenderContextBatch& renderContextBatch, GPUTextureVi context->UnBindSR(5); // Pack light properties buffer - light.SetupLightData(&perLight.Light, renderShadow); + light.SetShaderData(perLight.Light, renderShadow); // Calculate lighting context->UpdateCB(cb0, &perLight); @@ -411,7 +411,7 @@ void LightPass::RenderLight(RenderContextBatch& renderContextBatch, GPUTextureVi Matrix::Multiply(world, view.ViewProjection(), wvp); // Pack light properties buffer - light.SetupLightData(&perLight.Light, false); + light.SetShaderData(perLight.Light, false); Matrix::Transpose(wvp, perLight.WVP); // Bind source image diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index 1d9ca639a..67f6bab03 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -39,72 +39,73 @@ namespace CriticalSection MemPoolLocker; } -void RenderDirectionalLightData::SetupLightData(ShaderLightData* data, bool useShadow) const +void RenderDirectionalLightData::SetShaderData(ShaderLightData& data, bool useShadow) const { - data->SpotAngles.X = -2.0f; - data->SpotAngles.Y = 1.0f; - data->SourceRadius = 0; - data->SourceLength = 0; - data->Color = Color; - data->MinRoughness = Math::Max(MinRoughness, MIN_ROUGHNESS); - data->Position = Float3::Zero; - data->CastShadows = useShadow ? 1.0f : 0.0f; - data->Direction = -Direction; - data->Radius = 0; - data->FalloffExponent = 0; - data->InverseSquared = 0; - data->RadiusInv = 0; + data.SpotAngles.X = -2.0f; + data.SpotAngles.Y = 1.0f; + data.SourceRadius = 0; + data.SourceLength = 0; + data.Color = Color; + data.MinRoughness = Math::Max(MinRoughness, MIN_ROUGHNESS); + data.Position = Float3::Zero; + data.CastShadows = useShadow ? 1.0f : 0.0f; + data.Direction = -Direction; + data.Radius = 0; + data.FalloffExponent = 0; + data.InverseSquared = 0; + data.RadiusInv = 0; } -void RenderSpotLightData::SetupLightData(ShaderLightData* data, bool useShadow) const +void RenderSpotLightData::SetShaderData(ShaderLightData& data, bool useShadow) const { - data->SpotAngles.X = CosOuterCone; - data->SpotAngles.Y = InvCosConeDifference; - data->SourceRadius = SourceRadius; - data->SourceLength = 0.0f; - data->Color = Color; - data->MinRoughness = Math::Max(MinRoughness, MIN_ROUGHNESS); - data->Position = Position; - data->CastShadows = useShadow ? 1.0f : 0.0f; - data->Direction = Direction; - data->Radius = Radius; - data->FalloffExponent = FallOffExponent; - data->InverseSquared = UseInverseSquaredFalloff ? 1.0f : 0.0f; - data->RadiusInv = 1.0f / Radius; + data.SpotAngles.X = CosOuterCone; + data.SpotAngles.Y = InvCosConeDifference; + data.SourceRadius = SourceRadius; + data.SourceLength = 0.0f; + data.Color = Color; + data.MinRoughness = Math::Max(MinRoughness, MIN_ROUGHNESS); + data.Position = Position; + data.CastShadows = useShadow ? 1.0f : 0.0f; + data.Direction = Direction; + data.Radius = Radius; + data.FalloffExponent = FallOffExponent; + data.InverseSquared = UseInverseSquaredFalloff ? 1.0f : 0.0f; + data.RadiusInv = 1.0f / Radius; } -void RenderPointLightData::SetupLightData(ShaderLightData* data, bool useShadow) const +void RenderPointLightData::SetShaderData(ShaderLightData& data, bool useShadow) const { - data->SpotAngles.X = -2.0f; - data->SpotAngles.Y = 1.0f; - data->SourceRadius = SourceRadius; - data->SourceLength = SourceLength; - data->Color = Color; - data->MinRoughness = Math::Max(MinRoughness, MIN_ROUGHNESS); - data->Position = Position; - data->CastShadows = useShadow ? 1.0f : 0.0f; - data->Direction = Direction; - data->Radius = Radius; - data->FalloffExponent = FallOffExponent; - data->InverseSquared = UseInverseSquaredFalloff ? 1.0f : 0.0f; - data->RadiusInv = 1.0f / Radius; + data.SpotAngles.X = -2.0f; + data.SpotAngles.Y = 1.0f; + data.SourceRadius = SourceRadius; + data.SourceLength = SourceLength; + data.Color = Color; + data.MinRoughness = Math::Max(MinRoughness, MIN_ROUGHNESS); + data.Position = Position; + data.CastShadows = useShadow ? 1.0f : 0.0f; + data.Direction = Direction; + data.Radius = Radius; + data.FalloffExponent = FallOffExponent; + data.InverseSquared = UseInverseSquaredFalloff ? 1.0f : 0.0f; + data.RadiusInv = 1.0f / Radius; } -void RenderSkyLightData::SetupLightData(ShaderLightData* data, bool useShadow) const +void RenderSkyLightData::SetShaderData(ShaderLightData& data, bool useShadow) const { - data->SpotAngles.X = AdditiveColor.X; - data->SpotAngles.Y = AdditiveColor.Y; - data->SourceRadius = AdditiveColor.Z; - data->SourceLength = Image ? Image->StreamingTexture()->TotalMipLevels() - 2.0f : 0.0f; - data->Color = Color; - data->MinRoughness = MIN_ROUGHNESS; - data->Position = Position; - data->CastShadows = useShadow ? 1.0f : 0.0f; - data->Direction = Float3::Forward; - data->Radius = Radius; - data->FalloffExponent = 0; - data->InverseSquared = 0; - data->RadiusInv = 1.0f / Radius; + data.SpotAngles.X = AdditiveColor.X; + data.SpotAngles.Y = AdditiveColor.Y; + data.SourceRadius = AdditiveColor.Z; + data.SourceLength = Image ? Image->StreamingTexture()->TotalMipLevels() - 2.0f : 0.0f; + data.Color = Color; + data.MinRoughness = MIN_ROUGHNESS; + data.Position = Position; + data.CastShadows = useShadow ? 1.0f : 0.0f; + data.Direction = Float3::Forward; + data.Radius = Radius; + data.FalloffExponent = 0; + data.InverseSquared = 0; + data.RadiusInv = 1.0f / Radius; +} } void* RendererAllocation::Allocate(uintptr size) diff --git a/Source/Engine/Renderer/RenderList.h b/Source/Engine/Renderer/RenderList.h index 5c0c8e39e..ebfdaf03d 100644 --- a/Source/Engine/Renderer/RenderList.h +++ b/Source/Engine/Renderer/RenderList.h @@ -61,7 +61,7 @@ struct RenderDirectionalLightData : RenderLightData float Cascade3Spacing; float Cascade4Spacing; - void SetupLightData(ShaderLightData* data, bool useShadow) const; + void SetShaderData(ShaderLightData& data, bool useShadow) const; }; struct RenderSpotLightData : RenderLightData @@ -79,7 +79,7 @@ struct RenderSpotLightData : RenderLightData GPUTexture* IESTexture; - void SetupLightData(ShaderLightData* data, bool useShadow) const; + void SetShaderData(ShaderLightData& data, bool useShadow) const; }; struct RenderPointLightData : RenderLightData @@ -93,7 +93,7 @@ struct RenderPointLightData : RenderLightData GPUTexture* IESTexture; - void SetupLightData(ShaderLightData* data, bool useShadow) const; + void SetShaderData(ShaderLightData& data, bool useShadow) const; }; struct RenderSkyLightData : RenderLightData @@ -103,7 +103,7 @@ struct RenderSkyLightData : RenderLightData CubeTexture* Image; - void SetupLightData(ShaderLightData* data, bool useShadow) const; + void SetShaderData(ShaderLightData& data, bool useShadow) const; }; struct RenderDecalData diff --git a/Source/Engine/Renderer/ShadowsPass.cpp b/Source/Engine/Renderer/ShadowsPass.cpp index e7da47363..de1d0c878 100644 --- a/Source/Engine/Renderer/ShadowsPass.cpp +++ b/Source/Engine/Renderer/ShadowsPass.cpp @@ -661,7 +661,7 @@ void ShadowsPass::RenderShadow(RenderContextBatch& renderContextBatch, RenderPoi // Setup shader data Data sperLight; GBufferPass::SetInputs(view, sperLight.GBuffer); - light.SetupLightData(&sperLight.Light, true); + light.SetShaderData(sperLight.Light, true); sperLight.LightShadow = shadowData.Constants; Matrix::Transpose(view.ViewProjection(), sperLight.ViewProjectionMatrix); sperLight.ContactShadowsDistance = light.ShadowsDistance; @@ -739,7 +739,7 @@ void ShadowsPass::RenderShadow(RenderContextBatch& renderContextBatch, RenderSpo // Setup shader data Data sperLight; GBufferPass::SetInputs(view, sperLight.GBuffer); - light.SetupLightData(&sperLight.Light, true); + light.SetShaderData(sperLight.Light, true); sperLight.LightShadow = shadowData.Constants; Matrix::Transpose(view.ViewProjection(), sperLight.ViewProjectionMatrix); sperLight.ContactShadowsDistance = light.ShadowsDistance; @@ -809,7 +809,7 @@ void ShadowsPass::RenderShadow(RenderContextBatch& renderContextBatch, RenderDir Data sperLight; auto& view = renderContext.View; GBufferPass::SetInputs(view, sperLight.GBuffer); - light.SetupLightData(&sperLight.Light, true); + light.SetShaderData(sperLight.Light, true); sperLight.LightShadow = shadowData.Constants; Matrix::Transpose(view.ViewProjection(), sperLight.ViewProjectionMatrix); sperLight.ContactShadowsDistance = light.ShadowsDistance; diff --git a/Source/Engine/Renderer/VolumetricFogPass.cpp b/Source/Engine/Renderer/VolumetricFogPass.cpp index 0e1b10747..adbe4c837 100644 --- a/Source/Engine/Renderer/VolumetricFogPass.cpp +++ b/Source/Engine/Renderer/VolumetricFogPass.cpp @@ -305,7 +305,7 @@ void VolumetricFogPass::RenderRadialLight(RenderContext& renderContext, GPUConte perLight.LocalLightScatteringIntensity = light.VolumetricScatteringIntensity; perLight.ViewSpaceBoundingSphere = Float4(viewSpaceLightBoundsOrigin, radius); Matrix::Transpose(view.Projection, perLight.ViewToVolumeClip); - light.SetupLightData(&perLight.LocalLight, true); + light.SetShaderData(perLight.LocalLight, true); perLight.LocalLightShadow = shadow; // Upload data @@ -366,7 +366,7 @@ void VolumetricFogPass::RenderRadialLight(RenderContext& renderContext, GPUConte perLight.LocalLightScatteringIntensity = light.VolumetricScatteringIntensity; perLight.ViewSpaceBoundingSphere = Float4(viewSpaceLightBoundsOrigin, radius); Matrix::Transpose(renderContext.View.Projection, perLight.ViewToVolumeClip); - light.SetupLightData(&perLight.LocalLight, withShadow); + light.SetShaderData(perLight.LocalLight, withShadow); // Upload data context->UpdateCB(cb1, &perLight); @@ -442,7 +442,7 @@ void VolumetricFogPass::Render(RenderContext& renderContext) { const auto shadowPass = ShadowsPass::Instance(); const bool useShadow = dirLight.CastVolumetricShadow && shadowPass->LastDirLightIndex == dirLightIndex; - dirLight.SetupLightData(&_cache.Data.DirectionalLight, useShadow); + dirLight.SetShaderData(_cache.Data.DirectionalLight, useShadow); _cache.Data.DirectionalLight.Color *= brightness; if (useShadow) { From 55af307c43dccdb6ab69dfdd48f789c235d1e8ad Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 26 Mar 2024 15:01:12 +0100 Subject: [PATCH 007/292] Optimize env probes data storage in renderer --- .../Materials/MaterialShaderFeatures.cpp | 20 +++++------- .../Engine/Level/Actors/EnvironmentProbe.cpp | 32 +++++++++++++------ Source/Engine/Level/Actors/EnvironmentProbe.h | 7 ---- Source/Engine/Renderer/ReflectionsPass.cpp | 30 +++++++---------- Source/Engine/Renderer/RenderList.cpp | 5 +++ Source/Engine/Renderer/RenderList.h | 13 +++++++- Source/Engine/Renderer/Renderer.cpp | 5 +++ 7 files changed, 63 insertions(+), 49 deletions(-) diff --git a/Source/Engine/Graphics/Materials/MaterialShaderFeatures.cpp b/Source/Engine/Graphics/Materials/MaterialShaderFeatures.cpp index c89688a9d..e2e07b397 100644 --- a/Source/Engine/Graphics/Materials/MaterialShaderFeatures.cpp +++ b/Source/Engine/Graphics/Materials/MaterialShaderFeatures.cpp @@ -74,24 +74,21 @@ void ForwardShadingFeature::Bind(MaterialShader::BindParameters& params, SpanEnvironmentProbes.Count(); i++) { - const auto p = cache->EnvironmentProbes[i]; - if (CollisionsHelper::SphereIntersectsSphere(objectBoundsWorld, p->GetSphere())) + const RenderEnvironmentProbeData& probe = cache->EnvironmentProbes.Get()[i]; + if (CollisionsHelper::SphereIntersectsSphere(objectBounds, BoundingSphere(probe.Position, probe.Radius))) { - probe = p; + noEnvProbe = false; + probe.SetShaderData(data.EnvironmentProbe); + params.GPUContext->BindSR(envProbeShaderRegisterIndex, probe.Texture); break; } } - if (probe && probe->GetProbe()) - { - probe->SetupProbeData(params.RenderContext, &data.EnvironmentProbe); - params.GPUContext->BindSR(envProbeShaderRegisterIndex, probe->GetProbe()); - } - else + if (noEnvProbe) { data.EnvironmentProbe.Data1 = Float4::Zero; params.GPUContext->UnBindSR(envProbeShaderRegisterIndex); @@ -99,7 +96,6 @@ void ForwardShadingFeature::Bind(MaterialShader::BindParameters& params, SpanPointLights.Count() && data.LocalLightsCount < MaxLocalLights; i++) { diff --git a/Source/Engine/Level/Actors/EnvironmentProbe.cpp b/Source/Engine/Level/Actors/EnvironmentProbe.cpp index 4584cfe14..ada74d410 100644 --- a/Source/Engine/Level/Actors/EnvironmentProbe.cpp +++ b/Source/Engine/Level/Actors/EnvironmentProbe.cpp @@ -4,6 +4,7 @@ #include "Engine/Platform/FileSystem.h" #include "Engine/Graphics/RenderView.h" #include "Engine/Graphics/RenderTask.h" +#include "Engine/Graphics/GPUContext.h" #include "Engine/Graphics/Textures/GPUTexture.h" #include "Engine/Graphics/Textures/TextureData.h" #include "Engine/Renderer/RenderList.h" @@ -12,7 +13,7 @@ #include "Engine/Content/Content.h" #include "Engine/ContentExporters/AssetExporters.h" #include "Engine/ContentImporters/AssetsImportingManager.h" -#include "Engine/Graphics/GPUContext.h" +#include "Engine/Graphics/RenderTools.h" #include "Engine/Serialization/Serialization.h" #include "Engine/Level/Scene/Scene.h" @@ -61,13 +62,6 @@ bool EnvironmentProbe::IsUsingCustomProbe() const return _isUsingCustomProbe; } -void EnvironmentProbe::SetupProbeData(const RenderContext& renderContext, ShaderEnvProbeData* data) const -{ - const float radius = GetScaledRadius(); - data->Data0 = Float4(GetPosition() - renderContext.View.Origin, 0); - data->Data1 = Float4(radius, 1.0f / radius, Brightness, 0); -} - CubeTexture* EnvironmentProbe::GetCustomProbe() const { return _isUsingCustomProbe ? _probe : nullptr; @@ -181,11 +175,29 @@ void EnvironmentProbe::Draw(RenderContext& renderContext) EnumHasAnyFlags(renderContext.View.Flags, ViewFlags::Reflections) && EnumHasAnyFlags(renderContext.View.Pass, DrawPass::GBuffer)) { + // Size culling + const Float3 position = _sphere.Center - renderContext.View.Origin; + const float radius = GetScaledRadius(); + const float drawMinScreenSize = 0.02f; + const auto lodView = (renderContext.LodProxyView ? renderContext.LodProxyView : &renderContext.View); + const float screenRadiusSquared = RenderTools::ComputeBoundsScreenRadiusSquared(position, radius, *lodView) * renderContext.View.ModelLODDistanceFactorSqrt; + if (Math::Square(drawMinScreenSize * 0.5f) > screenRadiusSquared) + return; + + // Realtime probe update if (UpdateMode == ProbeUpdateMode::Realtime) ProbesRenderer::Bake(this, 0.0f); - if ((_probe != nullptr && _probe->IsLoaded()) || _probeTexture != nullptr) + + GPUTexture* texture = GetProbe(); + if (texture) { - renderContext.List->EnvironmentProbes.Add(this); + RenderEnvironmentProbeData data; + data.Texture = texture; + data.Position = position; + data.Radius = radius; + data.Brightness = Brightness; + data.HashID = GetHash(_id); + renderContext.List->EnvironmentProbes.Add(data); } } } diff --git a/Source/Engine/Level/Actors/EnvironmentProbe.h b/Source/Engine/Level/Actors/EnvironmentProbe.h index dd90a446c..bde48d309 100644 --- a/Source/Engine/Level/Actors/EnvironmentProbe.h +++ b/Source/Engine/Level/Actors/EnvironmentProbe.h @@ -90,13 +90,6 @@ public: /// API_PROPERTY() bool IsUsingCustomProbe() const; - /// - /// Setup probe data structure - /// - /// Rendering context - /// Packed probe data to set - void SetupProbeData(const RenderContext& renderContext, struct ShaderEnvProbeData* data) const; - /// /// Gets the custom probe (null if using baked one or none). /// diff --git a/Source/Engine/Renderer/ReflectionsPass.cpp b/Source/Engine/Renderer/ReflectionsPass.cpp index f7e825e14..9a136d354 100644 --- a/Source/Engine/Renderer/ReflectionsPass.cpp +++ b/Source/Engine/Renderer/ReflectionsPass.cpp @@ -336,19 +336,15 @@ void ReflectionsPass::Dispose() _preIntegratedGF = nullptr; } -bool sortProbes(EnvironmentProbe* const& p1, EnvironmentProbe* const& p2) +bool SortProbes(RenderEnvironmentProbeData const& p1, RenderEnvironmentProbeData const& p2) { // Compare by radius - int32 res = static_cast(p2->GetScaledRadius() - p1->GetScaledRadius()); - - // Check if are the same + int32 res = static_cast(p2.Radius - p1.Radius); if (res == 0) { // Compare by ID to prevent flickering - res = GetHash(p2->GetID()) - GetHash(p1->GetID()); + res = p2.HashID - p1.HashID; } - - // Return result return res < 0; } @@ -400,38 +396,34 @@ void ReflectionsPass::Render(RenderContext& renderContext, GPUTextureView* light context->SetRenderTarget(*reflectionsBuffer); // Sort probes by the radius - Sorting::QuickSort(renderContext.List->EnvironmentProbes.Get(), renderContext.List->EnvironmentProbes.Count(), &sortProbes); - - // TODO: don't render too far probes, check area of the screen and apply culling! + Sorting::QuickSort(renderContext.List->EnvironmentProbes.Get(), renderContext.List->EnvironmentProbes.Count(), &SortProbes); // Render all env probes - for (int32 probeIndex = 0; probeIndex < probesCount; probeIndex++) + for (int32 i = 0; i < probesCount; i++) { // Cache data - auto probe = renderContext.List->EnvironmentProbes[probeIndex]; - float probeRadius = probe->GetScaledRadius(); - Float3 probePosition = probe->GetPosition() - renderContext.View.Origin; + const RenderEnvironmentProbeData& probe = renderContext.List->EnvironmentProbes.Get()[i]; // Get distance from view center to light center less radius (check if view is inside a sphere) const float sphereModelScale = 2.0f; - float distance = ViewToCenterLessRadius(view, probePosition, probeRadius); + float distance = ViewToCenterLessRadius(view, probe.Position, probe.Radius); bool isViewInside = distance < 0; // Calculate world view projection matrix for the light sphere Matrix world, wvp, matrix; - Matrix::Scaling(probeRadius * sphereModelScale, wvp); - Matrix::Translation(probePosition, matrix); + Matrix::Scaling(probe.Radius * sphereModelScale, wvp); + Matrix::Translation(probe.Position, matrix); Matrix::Multiply(wvp, matrix, world); Matrix::Multiply(world, view.ViewProjection(), wvp); // Pack probe properties buffer - probe->SetupProbeData(renderContext, &data.PData); + probe.SetShaderData(data.PData); Matrix::Transpose(wvp, data.WVP); // Render reflections context->UpdateCB(cb, &data); context->BindCB(0, cb); - context->BindSR(4, probe->GetProbe()); + context->BindSR(4, probe.Texture); context->SetState(isViewInside ? _psProbeInverted : _psProbeNormal); _sphereModel->Render(context); diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index 67f6bab03..29b9bd622 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -106,6 +106,11 @@ void RenderSkyLightData::SetShaderData(ShaderLightData& data, bool useShadow) co data.InverseSquared = 0; data.RadiusInv = 1.0f / Radius; } + +void RenderEnvironmentProbeData::SetShaderData(ShaderEnvProbeData& data) const +{ + data.Data0 = Float4(Position, 0); + data.Data1 = Float4(Radius, 1.0f / Radius, Brightness, 0); } void* RendererAllocation::Allocate(uintptr size) diff --git a/Source/Engine/Renderer/RenderList.h b/Source/Engine/Renderer/RenderList.h index ebfdaf03d..5f2aecbd9 100644 --- a/Source/Engine/Renderer/RenderList.h +++ b/Source/Engine/Renderer/RenderList.h @@ -106,6 +106,17 @@ struct RenderSkyLightData : RenderLightData void SetShaderData(ShaderLightData& data, bool useShadow) const; }; +struct RenderEnvironmentProbeData +{ + GPUTexture* Texture; + Float3 Position; + float Radius; + float Brightness; + uint32 HashID; + + void SetShaderData(ShaderEnvProbeData& data) const; +}; + struct RenderDecalData { Matrix World; @@ -290,7 +301,7 @@ public: /// /// Environment probes to use for rendering reflections /// - Array EnvironmentProbes; + Array EnvironmentProbes; /// /// Decals registered for the rendering. diff --git a/Source/Engine/Renderer/Renderer.cpp b/Source/Engine/Renderer/Renderer.cpp index f3c4ac6d2..0ef39bb30 100644 --- a/Source/Engine/Renderer/Renderer.cpp +++ b/Source/Engine/Renderer/Renderer.cpp @@ -180,6 +180,11 @@ void Renderer::Render(SceneRenderTask* task) RenderContextBatch renderContextBatch(task); renderContextBatch.Contexts.Add(renderContext); + // Pre-init render view cache early in case it's used in PreRender drawing + Float4 jitter = renderContext.View.TemporalAAJitter; // Preserve temporal jitter value (PrepareCache modifies it) + renderContext.View.PrepareCache(renderContext, viewport.Width, viewport.Height, Float2::Zero); + renderContext.View.TemporalAAJitter = jitter; + #if USE_EDITOR // Turn on low quality rendering during baking lightmaps (leave more GPU power for baking) const auto flags = renderContext.View.Flags; From 5c356ec22a534b19d4e888e18b92a8523b7a1af6 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 26 Mar 2024 16:50:58 +0100 Subject: [PATCH 008/292] Fix Global Surface Atlas defragmentation flicker when atlas it nearly full --- .../Renderer/GI/GlobalSurfaceAtlasPass.cpp | 24 ++++++++++++------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp index 1aa172fa1..58b5ffde0 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp @@ -79,10 +79,7 @@ struct GlobalSurfaceAtlasTile : RectPack } void OnInsert(class GlobalSurfaceAtlasCustomBuffer* buffer, void* actorObject, int32 tileIndex); - - void OnFree() - { - } + void OnFree(GlobalSurfaceAtlasCustomBuffer* buffer); }; struct GlobalSurfaceAtlasObject @@ -135,6 +132,7 @@ public: int32 Resolution = 0; uint64 LastFrameAtlasInsertFail = 0; uint64 LastFrameAtlasDefragmentation = 0; + int32 AtlasPixelsUsed = 0; GPUTexture* AtlasDepth = nullptr; GPUTexture* AtlasEmissive = nullptr; GPUTexture* AtlasGBuffer0 = nullptr; @@ -170,6 +168,7 @@ public: CulledObjectsCounterIndex = -1; CulledObjectsUsageHistory.Clear(); LastFrameAtlasDefragmentation = Engine::FrameCount; + AtlasPixelsUsed = 0; SAFE_DELETE(AtlasTiles); Objects.Clear(); Lights.Clear(); @@ -230,6 +229,12 @@ public: void GlobalSurfaceAtlasTile::OnInsert(GlobalSurfaceAtlasCustomBuffer* buffer, void* actorObject, int32 tileIndex) { buffer->Objects[actorObject].Tiles[tileIndex] = this; + buffer->AtlasPixelsUsed += (int32)Width * (int32)Height; +} + +void GlobalSurfaceAtlasTile::OnFree(GlobalSurfaceAtlasCustomBuffer* buffer) +{ + buffer->AtlasPixelsUsed -= (int32)Width * (int32)Height; } String GlobalSurfaceAtlasPass::ToString() const @@ -422,9 +427,10 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co else { // Perform atlas defragmentation if needed - // TODO: track atlas used vs free ratio to skip defragmentation if it's nearly full (then maybe auto resize up?) + constexpr float maxUsageToDefrag = 0.8f; if (currentFrame - surfaceAtlasData.LastFrameAtlasInsertFail < 10 && - currentFrame - surfaceAtlasData.LastFrameAtlasDefragmentation > 60) + currentFrame - surfaceAtlasData.LastFrameAtlasDefragmentation > 60 && + (float)surfaceAtlasData.AtlasPixelsUsed / (resolution * resolution) < maxUsageToDefrag) { surfaceAtlasData.ClearObjects(); } @@ -516,7 +522,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co for (auto& tile : it->Value.Tiles) { if (tile) - tile->Free(); + tile->Free(&surfaceAtlasData); } surfaceAtlasData.Objects.Remove(it); } @@ -1217,7 +1223,7 @@ void GlobalSurfaceAtlasPass::RasterizeActor(Actor* actor, void* actorObject, con // Skip too small surfaces if (object && object->Tiles[tileIndex]) { - object->Tiles[tileIndex]->Free(); + object->Tiles[tileIndex]->Free(&surfaceAtlasData); object->Tiles[tileIndex] = nullptr; } continue; @@ -1240,7 +1246,7 @@ void GlobalSurfaceAtlasPass::RasterizeActor(Actor* actor, void* actorObject, con anyTile = true; continue; } - object->Tiles[tileIndex]->Free(); + object->Tiles[tileIndex]->Free(&surfaceAtlasData); } // Insert tile into atlas From f7470af42dad4eb141692c8237567803bc6bc89d Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 26 Mar 2024 18:04:08 +0100 Subject: [PATCH 009/292] Optimize depth pass rendering to batch simple materials together --- Source/Engine/Graphics/Materials/IMaterial.h | 2 +- Source/Engine/Renderer/RenderList.cpp | 47 ++++++++++++-------- Source/Engine/Renderer/RenderList.h | 10 +++-- Source/Engine/Renderer/Renderer.cpp | 6 +-- 4 files changed, 39 insertions(+), 26 deletions(-) diff --git a/Source/Engine/Graphics/Materials/IMaterial.h b/Source/Engine/Graphics/Materials/IMaterial.h index 83b6abf52..5235dd59e 100644 --- a/Source/Engine/Graphics/Materials/IMaterial.h +++ b/Source/Engine/Graphics/Materials/IMaterial.h @@ -119,7 +119,7 @@ public: struct InstancingHandler { void (*GetHash)(const DrawCall& drawCall, uint32& batchKey); - bool (*CanBatch)(const DrawCall& a, const DrawCall& b); + bool (*CanBatch)(const DrawCall& a, const DrawCall& b, DrawPass pass); void (*WriteDrawCall)(struct InstanceData* instanceData, const DrawCall& drawCall); }; diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index 29b9bd622..efe3edd2a 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -551,26 +551,21 @@ void RenderList::AddDrawCall(const RenderContextBatch& renderContextBatch, DrawP namespace { - /// - /// Checks if this draw call be batched together with the other one. - /// - /// The first draw call. - /// The second draw call. - /// True if can merge them, otherwise false. - FORCE_INLINE bool CanBatchWith(const DrawCall& a, const DrawCall& b) + FORCE_INLINE bool CanBatchWith(const DrawCall& a, const DrawCall& b, DrawPass pass) { - IMaterial::InstancingHandler handler; - return a.Material == b.Material && - a.Material->CanUseInstancing(handler) && + IMaterial::InstancingHandler handlerA, handlerB; + return a.Material->CanUseInstancing(handlerA) && + b.Material->CanUseInstancing(handlerB) && Platform::MemoryCompare(&a.Geometry, &b.Geometry, sizeof(a.Geometry)) == 0 && a.InstanceCount != 0 && b.InstanceCount != 0 && - handler.CanBatch(a, b) && + handlerA.CanBatch == handlerB.CanBatch && + handlerA.CanBatch(a, b, pass) && a.WorldDeterminantSign * b.WorldDeterminantSign > 0; } } -void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseDistance, DrawCallsList& list, const RenderListBuffer& drawCalls) +void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseDistance, DrawCallsList& list, const RenderListBuffer& drawCalls, DrawPass pass) { PROFILE_CPU(); const auto* drawCallsData = drawCalls.Get(); @@ -625,7 +620,7 @@ void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseD for (int32 j = i + 1; j < listSize; j++) { const DrawCall& other = drawCallsData[listData[j]]; - if (!CanBatchWith(drawCall, other)) + if (!CanBatchWith(drawCall, other, pass)) break; batchSize++; instanceCount += other.InstanceCount; @@ -917,13 +912,29 @@ void SurfaceDrawCallHandler::GetHash(const DrawCall& drawCall, uint32& batchKey) batchKey = (batchKey * 397) ^ ::GetHash(drawCall.Surface.Lightmap); } -bool SurfaceDrawCallHandler::CanBatch(const DrawCall& a, const DrawCall& b) +bool SurfaceDrawCallHandler::CanBatch(const DrawCall& a, const DrawCall& b, DrawPass pass) { // TODO: find reason why batching static meshes with lightmap causes problems with sampling in shader (flickering when meshes in batch order gets changes due to async draw calls collection) - return a.Surface.Lightmap == nullptr && b.Surface.Lightmap == nullptr && - //return a.Surface.Lightmap == b.Surface.Lightmap && - a.Surface.Skinning == nullptr && - b.Surface.Skinning == nullptr; + if (a.Surface.Lightmap == nullptr && b.Surface.Lightmap == nullptr && + //return a.Surface.Lightmap == b.Surface.Lightmap && + a.Surface.Skinning == nullptr && + b.Surface.Skinning == nullptr) + { + if (a.Material != b.Material) + { + // Batch simple materials during depth-only drawing (when using default vertex shader and no pixel shader) + if (pass == DrawPass::Depth) + { + constexpr MaterialUsageFlags complexUsageFlags = MaterialUsageFlags::UseMask | MaterialUsageFlags::UsePositionOffset | MaterialUsageFlags::UseDisplacement; + const bool aIsSimple = EnumHasNoneFlags(a.Material->GetInfo().UsageFlags, complexUsageFlags); + const bool bIsSimple = EnumHasNoneFlags(b.Material->GetInfo().UsageFlags, complexUsageFlags); + return aIsSimple && bIsSimple; + } + return false; + } + return true; + } + return false; } void SurfaceDrawCallHandler::WriteDrawCall(InstanceData* instanceData, const DrawCall& drawCall) diff --git a/Source/Engine/Renderer/RenderList.h b/Source/Engine/Renderer/RenderList.h index 5f2aecbd9..15b5a9842 100644 --- a/Source/Engine/Renderer/RenderList.h +++ b/Source/Engine/Renderer/RenderList.h @@ -480,9 +480,10 @@ public: /// The rendering context. /// If set to true reverse draw call distance to the view. Results in back to front sorting. /// The collected draw calls list type. - API_FUNCTION() FORCE_INLINE void SortDrawCalls(API_PARAM(Ref) const RenderContext& renderContext, bool reverseDistance, DrawCallsListType listType) + /// The draw pass (optional). + API_FUNCTION() FORCE_INLINE void SortDrawCalls(API_PARAM(Ref) const RenderContext& renderContext, bool reverseDistance, DrawCallsListType listType, DrawPass pass = DrawPass::All) { - SortDrawCalls(renderContext, reverseDistance, DrawCallsLists[(int32)listType], DrawCalls); + SortDrawCalls(renderContext, reverseDistance, DrawCallsLists[(int32)listType], DrawCalls, pass); } /// @@ -492,7 +493,8 @@ public: /// If set to true reverse draw call distance to the view. Results in back to front sorting. /// The collected draw calls indices list. /// The collected draw calls list. - void SortDrawCalls(const RenderContext& renderContext, bool reverseDistance, DrawCallsList& list, const RenderListBuffer& drawCalls); + /// The draw pass (optional). + void SortDrawCalls(const RenderContext& renderContext, bool reverseDistance, DrawCallsList& list, const RenderListBuffer& drawCalls, DrawPass pass = DrawPass::All); /// /// Executes the collected draw calls. @@ -543,6 +545,6 @@ PACK_STRUCT(struct FLAXENGINE_API InstanceData struct SurfaceDrawCallHandler { static void GetHash(const DrawCall& drawCall, uint32& batchKey); - static bool CanBatch(const DrawCall& a, const DrawCall& b); + static bool CanBatch(const DrawCall& a, const DrawCall& b, DrawPass pass); static void WriteDrawCall(InstanceData* instanceData, const DrawCall& drawCall); }; diff --git a/Source/Engine/Renderer/Renderer.cpp b/Source/Engine/Renderer/Renderer.cpp index 0ef39bb30..7500b2f1e 100644 --- a/Source/Engine/Renderer/Renderer.cpp +++ b/Source/Engine/Renderer/Renderer.cpp @@ -244,7 +244,7 @@ void Renderer::DrawSceneDepth(GPUContext* context, SceneRenderTask* task, GPUTex DrawActors(renderContext, customActors); // Sort draw calls - renderContext.List->SortDrawCalls(renderContext, false, DrawCallsListType::Depth); + renderContext.List->SortDrawCalls(renderContext, false, DrawCallsListType::Depth, DrawPass::Depth); // Execute draw calls const float width = (float)output->Width(); @@ -405,8 +405,8 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont for (int32 i = 1; i < renderContextBatch.Contexts.Count(); i++) { auto& shadowContext = renderContextBatch.Contexts[i]; - shadowContext.List->SortDrawCalls(shadowContext, false, DrawCallsListType::Depth); - shadowContext.List->SortDrawCalls(shadowContext, false, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls); + shadowContext.List->SortDrawCalls(shadowContext, false, DrawCallsListType::Depth, DrawPass::Depth); + shadowContext.List->SortDrawCalls(shadowContext, false, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls, DrawPass::Depth); } } From bc9cdf5cdb9bb8eb3d6c3c71572e439b76fb893d Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 2 Apr 2024 14:34:43 +0200 Subject: [PATCH 010/292] Update version --- Flax.flaxproj | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Flax.flaxproj b/Flax.flaxproj index f7806caac..383d9e14e 100644 --- a/Flax.flaxproj +++ b/Flax.flaxproj @@ -2,9 +2,9 @@ "Name": "Flax", "Version": { "Major": 1, - "Minor": 8, + "Minor": 9, "Revision": 0, - "Build": 6510 + "Build": 6600 }, "Company": "Flax", "Copyright": "Copyright (c) 2012-2024 Wojciech Figat. All rights reserved.", From 13a04c2941e0684b21cdb440c2633ea79300e311 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 2 Apr 2024 14:56:26 +0200 Subject: [PATCH 011/292] Add `stencilValue` for stencil buffer clearing --- Source/Engine/Graphics/GPUContext.h | 3 ++- .../Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp | 6 ++---- Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h | 2 +- .../Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp | 6 ++---- Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h | 2 +- Source/Engine/GraphicsDevice/Null/GPUContextNull.h | 2 +- Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp | 5 ++--- Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h | 2 +- 8 files changed, 12 insertions(+), 16 deletions(-) diff --git a/Source/Engine/Graphics/GPUContext.h b/Source/Engine/Graphics/GPUContext.h index a222686db..5542e63ec 100644 --- a/Source/Engine/Graphics/GPUContext.h +++ b/Source/Engine/Graphics/GPUContext.h @@ -186,7 +186,8 @@ public: /// /// The depth buffer to clear. /// The clear depth value. - API_FUNCTION() virtual void ClearDepth(GPUTextureView* depthBuffer, float depthValue = 1.0f) = 0; + /// The clear stencil value. + API_FUNCTION() virtual void ClearDepth(GPUTextureView* depthBuffer, float depthValue = 1.0f, uint8 stencilValue = 0) = 0; /// /// Clears an unordered access buffer with a float value. diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp index 1a65167dc..3dc90afd1 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp @@ -151,21 +151,19 @@ bool GPUContextDX11::IsDepthBufferBinded() void GPUContextDX11::Clear(GPUTextureView* rt, const Color& color) { auto rtDX11 = static_cast(rt); - if (rtDX11) { _context->ClearRenderTargetView(rtDX11->RTV(), color.Raw); } } -void GPUContextDX11::ClearDepth(GPUTextureView* depthBuffer, float depthValue) +void GPUContextDX11::ClearDepth(GPUTextureView* depthBuffer, float depthValue, uint8 stencilValue) { auto depthBufferDX11 = static_cast(depthBuffer); - if (depthBufferDX11) { ASSERT(depthBufferDX11->DSV()); - _context->ClearDepthStencilView(depthBufferDX11->DSV(), D3D11_CLEAR_DEPTH, depthValue, 0xff); + _context->ClearDepthStencilView(depthBufferDX11->DSV(), D3D11_CLEAR_DEPTH, depthValue, stencilValue); } } diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h index 8db265065..4540bb652 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h @@ -108,7 +108,7 @@ public: void* GetNativePtr() const override; bool IsDepthBufferBinded() override; void Clear(GPUTextureView* rt, const Color& color) override; - void ClearDepth(GPUTextureView* depthBuffer, float depthValue) override; + void ClearDepth(GPUTextureView* depthBuffer, float depthValue, uint8 stencilValue) override; void ClearUA(GPUBuffer* buf, const Float4& value) override; void ClearUA(GPUBuffer* buf, const uint32 value[4]) override; void ClearUA(GPUTexture* texture, const uint32 value[4]) override; diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp index b64c2236e..2eb143857 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp @@ -703,7 +703,6 @@ bool GPUContextDX12::IsDepthBufferBinded() void GPUContextDX12::Clear(GPUTextureView* rt, const Color& color) { auto rtDX12 = static_cast(rt); - if (rtDX12) { SetResourceState(rtDX12->GetResourceOwner(), D3D12_RESOURCE_STATE_RENDER_TARGET, rtDX12->SubresourceIndex); @@ -713,16 +712,15 @@ void GPUContextDX12::Clear(GPUTextureView* rt, const Color& color) } } -void GPUContextDX12::ClearDepth(GPUTextureView* depthBuffer, float depthValue) +void GPUContextDX12::ClearDepth(GPUTextureView* depthBuffer, float depthValue, uint8 stencilValue) { auto depthBufferDX12 = static_cast(depthBuffer); - if (depthBufferDX12) { SetResourceState(depthBufferDX12->GetResourceOwner(), D3D12_RESOURCE_STATE_DEPTH_WRITE, depthBufferDX12->SubresourceIndex); flushRBs(); - _commandList->ClearDepthStencilView(depthBufferDX12->DSV(), D3D12_CLEAR_FLAG_DEPTH, depthValue, 0xff, 0, nullptr); + _commandList->ClearDepthStencilView(depthBufferDX12->DSV(), D3D12_CLEAR_FLAG_DEPTH, depthValue, stencilValue, 0, nullptr); } } diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h index 70ca799eb..72b6e4547 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h @@ -159,7 +159,7 @@ public: void* GetNativePtr() const override; bool IsDepthBufferBinded() override; void Clear(GPUTextureView* rt, const Color& color) override; - void ClearDepth(GPUTextureView* depthBuffer, float depthValue) override; + void ClearDepth(GPUTextureView* depthBuffer, float depthValue, uint8 stencilValue) override; void ClearUA(GPUBuffer* buf, const Float4& value) override; void ClearUA(GPUBuffer* buf, const uint32 value[4]) override; void ClearUA(GPUTexture* texture, const uint32 value[4]) override; diff --git a/Source/Engine/GraphicsDevice/Null/GPUContextNull.h b/Source/Engine/GraphicsDevice/Null/GPUContextNull.h index abc52473c..fc185a587 100644 --- a/Source/Engine/GraphicsDevice/Null/GPUContextNull.h +++ b/Source/Engine/GraphicsDevice/Null/GPUContextNull.h @@ -48,7 +48,7 @@ public: { } - void ClearDepth(GPUTextureView* depthBuffer, float depthValue) override + void ClearDepth(GPUTextureView* depthBuffer, float depthValue, uint8 stencilValue) override { } diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp index cb223124f..38ef0eb68 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp @@ -797,10 +797,9 @@ void GPUContextVulkan::Clear(GPUTextureView* rt, const Color& color) } } -void GPUContextVulkan::ClearDepth(GPUTextureView* depthBuffer, float depthValue) +void GPUContextVulkan::ClearDepth(GPUTextureView* depthBuffer, float depthValue, uint8 stencilValue) { const auto rtVulkan = static_cast(depthBuffer); - if (rtVulkan) { // TODO: detect if inside render pass and use ClearAttachments @@ -815,7 +814,7 @@ void GPUContextVulkan::ClearDepth(GPUTextureView* depthBuffer, float depthValue) VkClearDepthStencilValue clear; clear.depth = depthValue; - clear.stencil = 0; + clear.stencil = stencilValue; vkCmdClearDepthStencilImage(cmdBuffer->GetHandle(), rtVulkan->Image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear, 1, &rtVulkan->Info.subresourceRange); } } diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h index e28bbc1a5..07ffa330b 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h @@ -151,7 +151,7 @@ public: void* GetNativePtr() const override; bool IsDepthBufferBinded() override; void Clear(GPUTextureView* rt, const Color& color) override; - void ClearDepth(GPUTextureView* depthBuffer, float depthValue) override; + void ClearDepth(GPUTextureView* depthBuffer, float depthValue, uint8 stencilValue) override; void ClearUA(GPUBuffer* buf, const Float4& value) override; void ClearUA(GPUBuffer* buf, const uint32 value[4]) override; void ClearUA(GPUTexture* texture, const uint32 value[4]) override; From 017def29d4fc33ccf79d355e973911328c8bde15 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 3 Apr 2024 13:29:45 +0200 Subject: [PATCH 012/292] Rename `ShadowSamplerPCF` to `ShadowSamplerLinear` --- .../DirectX/DX11/GPUContextDX11.cpp | 2 +- .../DirectX/DX11/GPUDeviceDX11.cpp | 39 ++++--------------- .../DirectX/DX11/GPUDeviceDX11.h | 18 ++++----- .../DirectX/DX12/GPUDeviceDX12.cpp | 3 -- Source/Shaders/Common.hlsl | 13 +------ 5 files changed, 20 insertions(+), 55 deletions(-) diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp index 3dc90afd1..316df4643 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp @@ -114,7 +114,7 @@ void GPUContextDX11::FrameBegin() _device->_samplerLinearWrap, _device->_samplerPointWrap, _device->_samplerShadow, - _device->_samplerShadowPCF + _device->_samplerShadowLinear }; _context->VSSetSamplers(0, ARRAY_COUNT(samplers), samplers); _context->DSSetSamplers(0, ARRAY_COUNT(samplers), samplers); diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp index b15163bfe..81a56b43a 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp @@ -227,16 +227,7 @@ GPUDevice* GPUDeviceDX11::Create() GPUDeviceDX11::GPUDeviceDX11(IDXGIFactory* dxgiFactory, GPUAdapterDX* adapter) : GPUDeviceDX(getRendererType(adapter), getShaderProfile(adapter), adapter) - , _device(nullptr) - , _imContext(nullptr) , _factoryDXGI(dxgiFactory) - , _mainContext(nullptr) - , _samplerLinearClamp(nullptr) - , _samplerPointClamp(nullptr) - , _samplerLinearWrap(nullptr) - , _samplerPointWrap(nullptr) - , _samplerShadow(nullptr) - , _samplerShadowPCF(nullptr) { Platform::MemoryClear(RasterizerStates, sizeof(RasterizerStates)); Platform::MemoryClear(DepthStencilStates, sizeof(DepthStencilStates)); @@ -450,14 +441,17 @@ bool GPUDeviceDX11::Init() { D3D11_SAMPLER_DESC samplerDesc; Platform::MemoryClear(&samplerDesc, sizeof(samplerDesc)); + samplerDesc.MinLOD = 0; + samplerDesc.MaxLOD = D3D11_FLOAT32_MAX; + samplerDesc.MipLODBias = 0.0f; + samplerDesc.MaxAnisotropy = 1; + samplerDesc.ComparisonFunc = D3D11_COMPARISON_LESS_EQUAL; // Linear Clamp samplerDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; samplerDesc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; samplerDesc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; samplerDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; - samplerDesc.MinLOD = 0; - samplerDesc.MaxLOD = D3D11_FLOAT32_MAX; result = _device->CreateSamplerState(&samplerDesc, &_samplerLinearClamp); LOG_DIRECTX_RESULT_WITH_RETURN(result, true); @@ -466,8 +460,6 @@ bool GPUDeviceDX11::Init() samplerDesc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; samplerDesc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; samplerDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; - samplerDesc.MinLOD = 0; - samplerDesc.MaxLOD = D3D11_FLOAT32_MAX; result = _device->CreateSamplerState(&samplerDesc, &_samplerPointClamp); LOG_DIRECTX_RESULT_WITH_RETURN(result, true); @@ -476,8 +468,6 @@ bool GPUDeviceDX11::Init() samplerDesc.AddressU = D3D11_TEXTURE_ADDRESS_WRAP; samplerDesc.AddressV = D3D11_TEXTURE_ADDRESS_WRAP; samplerDesc.AddressW = D3D11_TEXTURE_ADDRESS_WRAP; - samplerDesc.MinLOD = 0; - samplerDesc.MaxLOD = D3D11_FLOAT32_MAX; result = _device->CreateSamplerState(&samplerDesc, &_samplerLinearWrap); LOG_DIRECTX_RESULT_WITH_RETURN(result, true); @@ -486,8 +476,6 @@ bool GPUDeviceDX11::Init() samplerDesc.AddressU = D3D11_TEXTURE_ADDRESS_WRAP; samplerDesc.AddressV = D3D11_TEXTURE_ADDRESS_WRAP; samplerDesc.AddressW = D3D11_TEXTURE_ADDRESS_WRAP; - samplerDesc.MinLOD = 0; - samplerDesc.MaxLOD = D3D11_FLOAT32_MAX; result = _device->CreateSamplerState(&samplerDesc, &_samplerPointWrap); LOG_DIRECTX_RESULT_WITH_RETURN(result, true); @@ -496,26 +484,15 @@ bool GPUDeviceDX11::Init() samplerDesc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; samplerDesc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; samplerDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; - samplerDesc.MipLODBias = 0.0f; - samplerDesc.MaxAnisotropy = 1; - samplerDesc.ComparisonFunc = D3D11_COMPARISON_LESS_EQUAL; - samplerDesc.MinLOD = 0; - samplerDesc.MaxLOD = D3D11_FLOAT32_MAX; result = _device->CreateSamplerState(&samplerDesc, &_samplerShadow); LOG_DIRECTX_RESULT_WITH_RETURN(result, true); - // Shadow PCF + // Shadow Linear samplerDesc.Filter = D3D11_FILTER_COMPARISON_MIN_MAG_MIP_LINEAR; samplerDesc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; samplerDesc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; samplerDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; - samplerDesc.MipLODBias = 0.0f; - samplerDesc.MaxAnisotropy = 1; - samplerDesc.ComparisonFunc = D3D11_COMPARISON_LESS_EQUAL; - samplerDesc.BorderColor[0] = samplerDesc.BorderColor[1] = samplerDesc.BorderColor[2] = samplerDesc.BorderColor[3] = 0; - samplerDesc.MinLOD = 0; - samplerDesc.MaxLOD = D3D11_FLOAT32_MAX; - result = _device->CreateSamplerState(&samplerDesc, &_samplerShadowPCF); + result = _device->CreateSamplerState(&samplerDesc, &_samplerShadowLinear); LOG_DIRECTX_RESULT_WITH_RETURN(result, true); } @@ -616,7 +593,7 @@ void GPUDeviceDX11::Dispose() SAFE_RELEASE(_samplerLinearWrap); SAFE_RELEASE(_samplerPointWrap); SAFE_RELEASE(_samplerShadow); - SAFE_RELEASE(_samplerShadowPCF); + SAFE_RELEASE(_samplerShadowLinear); // for (auto i = BlendStates.Begin(); i.IsNotEnd(); ++i) { diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.h b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.h index 4e35d61ae..42bb6122e 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.h +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.h @@ -24,20 +24,20 @@ class GPUDeviceDX11 : public GPUDeviceDX private: // Private Stuff - ID3D11Device* _device; - ID3D11DeviceContext* _imContext; + ID3D11Device* _device = nullptr; + ID3D11DeviceContext* _imContext = nullptr; IDXGIFactory* _factoryDXGI; - GPUContextDX11* _mainContext; + GPUContextDX11* _mainContext = nullptr; bool _allowTearing = false; // Static Samplers - ID3D11SamplerState* _samplerLinearClamp; - ID3D11SamplerState* _samplerPointClamp; - ID3D11SamplerState* _samplerLinearWrap; - ID3D11SamplerState* _samplerPointWrap; - ID3D11SamplerState* _samplerShadow; - ID3D11SamplerState* _samplerShadowPCF; + ID3D11SamplerState* _samplerLinearClamp = nullptr; + ID3D11SamplerState* _samplerPointClamp = nullptr; + ID3D11SamplerState* _samplerLinearWrap = nullptr; + ID3D11SamplerState* _samplerPointWrap = nullptr; + ID3D11SamplerState* _samplerShadow = nullptr; + ID3D11SamplerState* _samplerShadowLinear = nullptr; // Shared data for pipeline states CriticalSection BlendStatesWriteLocker; diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp index d564bed55..859c8b503 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp @@ -568,7 +568,6 @@ bool GPUDeviceDX12::Init() // Static samplers D3D12_STATIC_SAMPLER_DESC staticSamplers[6]; static_assert(GPU_STATIC_SAMPLERS_COUNT == ARRAY_COUNT(staticSamplers), "Update static samplers setup."); - // TODO: describe visibilities for the static samples, maybe use all pixel? or again pixel + all combo? // Linear Clamp staticSamplers[0].Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR; staticSamplers[0].AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; @@ -650,8 +649,6 @@ bool GPUDeviceDX12::Init() staticSamplers[5].RegisterSpace = 0; staticSamplers[5].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - // TODO: static samplers for the shadow pass change into bindable samplers or sth? - // Init D3D12_ROOT_SIGNATURE_DESC rootSignatureDesc; rootSignatureDesc.NumParameters = ARRAY_COUNT(rootParameters); diff --git a/Source/Shaders/Common.hlsl b/Source/Shaders/Common.hlsl index 74335d87a..11da02c41 100644 --- a/Source/Shaders/Common.hlsl +++ b/Source/Shaders/Common.hlsl @@ -88,42 +88,33 @@ // Performs branching by using control flow instructions like jmp and label. #define BRANCH [branch] -/// Performs branching by using the cnd instructions. +// Performs branching by using the cnd instructions. #define FLATTEN [flatten] #endif // Compiler attribute fallback - #ifndef UNROLL #define UNROLL #endif - #ifndef LOOP #define LOOP #endif - #ifndef BRANCH #define BRANCH #endif - #ifndef FLATTEN #define FLATTEN #endif -// TODO: cleanup global samplers with per-platform customization support #ifndef SamplerLinearClamp - // Static samplers sampler SamplerLinearClamp : register(s0); sampler SamplerPointClamp : register(s1); sampler SamplerLinearWrap : register(s2); sampler SamplerPointWrap : register(s3); - -// TODO: use custom pipeline layouts and bind different sampler during shadows rendering SamplerComparisonState ShadowSampler : register(s4); -SamplerComparisonState ShadowSamplerPCF : register(s5); - +SamplerComparisonState ShadowSamplerLinear : register(s5); #endif // General purpose macros From 61323f85264a40becad17f19f533846803cdd1ed Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 4 Apr 2024 12:54:07 +0200 Subject: [PATCH 013/292] Refactor shadows rendering to use Shadow Map Atlas --- .../Features/ForwardShading.hlsl | 12 +- Source/Engine/Core/Core.h | 5 + .../Graphics/Materials/MaterialShader.h | 2 +- .../Materials/MaterialShaderFeatures.cpp | 26 +- .../Materials/MaterialShaderFeatures.h | 3 +- .../Engine/Level/Actors/DirectionalLight.cpp | 2 +- Source/Engine/Level/Actors/PointLight.cpp | 3 +- Source/Engine/Level/Actors/SkyLight.cpp | 3 +- Source/Engine/Level/Actors/SpotLight.cpp | 3 +- .../Graph/CPU/ParticleEmitterGraph.CPU.cpp | 6 - Source/Engine/Renderer/Config.h | 19 +- .../Renderer/GI/GlobalSurfaceAtlasPass.cpp | 46 +- Source/Engine/Renderer/LightPass.cpp | 92 +- Source/Engine/Renderer/LightPass.h | 10 +- Source/Engine/Renderer/RenderList.cpp | 35 +- Source/Engine/Renderer/RenderList.h | 60 +- Source/Engine/Renderer/Renderer.cpp | 7 +- Source/Engine/Renderer/ShadowsPass.cpp | 989 ++++++++++-------- Source/Engine/Renderer/ShadowsPass.h | 137 +-- Source/Engine/Renderer/VolumetricFogPass.cpp | 235 +---- Source/Engine/Renderer/VolumetricFogPass.h | 27 +- Source/Shaders/GI/GlobalSurfaceAtlas.shader | 2 +- Source/Shaders/Lighting.hlsl | 24 +- Source/Shaders/LightingCommon.hlsl | 10 +- Source/Shaders/Lights.shader | 6 +- Source/Shaders/PCFKernels.hlsl | 135 --- Source/Shaders/Quad.shader | 9 +- Source/Shaders/Shadows.shader | 77 +- Source/Shaders/ShadowsCommon.hlsl | 69 +- Source/Shaders/ShadowsSampling.hlsl | 818 ++++----------- Source/Shaders/VolumetricFog.shader | 69 +- 31 files changed, 1115 insertions(+), 1826 deletions(-) delete mode 100644 Source/Shaders/PCFKernels.hlsl diff --git a/Content/Editor/MaterialTemplates/Features/ForwardShading.hlsl b/Content/Editor/MaterialTemplates/Features/ForwardShading.hlsl index 9dbc19369..7f7d16545 100644 --- a/Content/Editor/MaterialTemplates/Features/ForwardShading.hlsl +++ b/Content/Editor/MaterialTemplates/Features/ForwardShading.hlsl @@ -16,7 +16,6 @@ #include "./Flax/ExponentialHeightFog.hlsl" @2// Forward Shading: Constants LightData DirectionalLight; -LightShadowData DirectionalLightShadow; LightData SkyLight; ProbeData EnvironmentProbe; ExponentialHeightFogData ExponentialHeightFog; @@ -26,9 +25,9 @@ LightData LocalLights[MAX_LOCAL_LIGHTS]; @3// Forward Shading: Resources TextureCube EnvProbe : register(t__SRV__); TextureCube SkyLightTexture : register(t__SRV__); -Texture2DArray DirectionalLightShadowMap : register(t__SRV__); +Buffer ShadowsBuffer : register(t__SRV__); +Texture2D ShadowMap : register(t__SRV__); @4// Forward Shading: Utilities -DECLARE_LIGHTSHADOWDATA_ACCESS(DirectionalLightShadow); @5// Forward Shading: Shaders // Pixel Shader function for Forward Pass @@ -80,11 +79,8 @@ void PS_Forward( // Calculate lighting from a single directional light float4 shadowMask = 1.0f; - if (DirectionalLight.CastShadows > 0) - { - LightShadowData directionalLightShadowData = GetDirectionalLightShadowData(); - shadowMask.r = SampleShadow(DirectionalLight, directionalLightShadowData, DirectionalLightShadowMap, gBuffer, shadowMask.g); - } + ShadowSample shadow = SampleDirectionalLightShadow(DirectionalLight, ShadowsBuffer, ShadowMap, gBuffer); + shadowMask = GetShadowMask(shadow); float4 light = GetLighting(ViewPos, DirectionalLight, gBuffer, shadowMask, false, false); // Calculate lighting from sky light diff --git a/Source/Engine/Core/Core.h b/Source/Engine/Core/Core.h index 01f80b7b2..cfc8471d1 100644 --- a/Source/Engine/Core/Core.h +++ b/Source/Engine/Core/Core.h @@ -23,3 +23,8 @@ #define OUT_OF_MEMORY Platform::OutOfMemory(__LINE__, __FILE__) #define MISSING_CODE(info) Platform::MissingCode(__LINE__, __FILE__, info) #define NON_COPYABLE(type) type(type&&) = delete; type(const type&) = delete; type& operator=(const type&) = delete; type& operator=(type&&) = delete; +#define POD_COPYABLE(type) \ + type(const type& other) { Platform::MemoryCopy(this, &other, sizeof(type)); } \ + type(type&& other) noexcept { Platform::MemoryCopy(this, &other, sizeof(type)); } \ + type& operator=(const type& other) { Platform::MemoryCopy(this, &other, sizeof(type)); return *this; } \ + type& operator=(type&& other) noexcept { Platform::MemoryCopy(this, &other, sizeof(type)); return *this; } diff --git a/Source/Engine/Graphics/Materials/MaterialShader.h b/Source/Engine/Graphics/Materials/MaterialShader.h index 7a5b842e2..5a4cec20c 100644 --- a/Source/Engine/Graphics/Materials/MaterialShader.h +++ b/Source/Engine/Graphics/Materials/MaterialShader.h @@ -10,7 +10,7 @@ /// /// Current materials shader version. /// -#define MATERIAL_GRAPH_VERSION 162 +#define MATERIAL_GRAPH_VERSION 163 class Material; class GPUShader; diff --git a/Source/Engine/Graphics/Materials/MaterialShaderFeatures.cpp b/Source/Engine/Graphics/Materials/MaterialShaderFeatures.cpp index e2e07b397..b6d7ef23a 100644 --- a/Source/Engine/Graphics/Materials/MaterialShaderFeatures.cpp +++ b/Source/Engine/Graphics/Materials/MaterialShaderFeatures.cpp @@ -21,7 +21,8 @@ void ForwardShadingFeature::Bind(MaterialShader::BindParameters& params, Span= sizeof(Data)); const int32 envProbeShaderRegisterIndex = srv + 0; const int32 skyLightShaderRegisterIndex = srv + 1; - const int32 dirLightShaderRegisterIndex = srv + 2; + const int32 shadowsBufferRegisterIndex = srv + 2; + const int32 shadowMapShaderRegisterIndex = srv + 3; const bool canUseShadow = view.Pass != DrawPass::Depth; // Set fog input @@ -39,24 +40,19 @@ void ForwardShadingFeature::Bind(MaterialShader::BindParameters& params, SpanDirectionalLights.HasItems()) { const auto& dirLight = cache->DirectionalLights.First(); - const auto shadowPass = ShadowsPass::Instance(); - const bool useShadow = shadowPass->LastDirLightIndex == 0 && canUseShadow; - if (useShadow) - { - data.DirectionalLightShadow = shadowPass->LastDirLight; - params.GPUContext->BindSR(dirLightShaderRegisterIndex, shadowPass->LastDirLightShadowMap); - } - else - { - params.GPUContext->UnBindSR(dirLightShaderRegisterIndex); - } + GPUTexture* shadowMapAtlas; + GPUBufferView* shadowsBuffer; + ShadowsPass::GetShadowAtlas(params.RenderContext.Buffers, shadowMapAtlas, shadowsBuffer); + const bool useShadow = shadowMapAtlas && canUseShadow && dirLight.HasShadow; dirLight.SetShaderData(data.DirectionalLight, useShadow); + params.GPUContext->BindSR(shadowsBufferRegisterIndex, shadowsBuffer); + params.GPUContext->BindSR(shadowMapShaderRegisterIndex, shadowMapAtlas); } else { - data.DirectionalLight.Color = Float3::Zero; - data.DirectionalLight.CastShadows = 0.0f; - params.GPUContext->UnBindSR(dirLightShaderRegisterIndex); + Platform::MemoryClear(&data.DirectionalLight, sizeof(data.DirectionalLight)); + params.GPUContext->UnBindSR(shadowsBufferRegisterIndex); + params.GPUContext->UnBindSR(shadowMapShaderRegisterIndex); } // Set sky light diff --git a/Source/Engine/Graphics/Materials/MaterialShaderFeatures.h b/Source/Engine/Graphics/Materials/MaterialShaderFeatures.h index f48c6821c..81a6e260d 100644 --- a/Source/Engine/Graphics/Materials/MaterialShaderFeatures.h +++ b/Source/Engine/Graphics/Materials/MaterialShaderFeatures.h @@ -23,12 +23,11 @@ struct ForwardShadingFeature : MaterialShaderFeature { enum { MaxLocalLights = 4 }; - enum { SRVs = 3 }; + enum { SRVs = 4 }; PACK_STRUCT(struct Data { ShaderLightData DirectionalLight; - ShaderLightShadowData DirectionalLightShadow; ShaderLightData SkyLight; ShaderEnvProbeData EnvironmentProbe; ShaderExponentialHeightFogData ExponentialHeightFog; diff --git a/Source/Engine/Level/Actors/DirectionalLight.cpp b/Source/Engine/Level/Actors/DirectionalLight.cpp index f44375074..3a2a44e45 100644 --- a/Source/Engine/Level/Actors/DirectionalLight.cpp +++ b/Source/Engine/Level/Actors/DirectionalLight.cpp @@ -38,7 +38,6 @@ void DirectionalLight::Draw(RenderContext& renderContext) data.VolumetricScatteringIntensity = VolumetricScatteringIntensity; data.IndirectLightingIntensity = IndirectLightingIntensity; data.CastVolumetricShadow = CastVolumetricShadow; - data.RenderedVolumetricFog = 0; data.ShadowsMode = ShadowsMode; data.CascadeCount = CascadeCount; data.Cascade1Spacing = Cascade1Spacing; @@ -49,6 +48,7 @@ void DirectionalLight::Draw(RenderContext& renderContext) data.ContactShadowsLength = ContactShadowsLength; data.StaticFlags = GetStaticFlags(); data.ID = GetID(); + data.ScreenSize = 1.0f; renderContext.List->DirectionalLights.Add(data); } } diff --git a/Source/Engine/Level/Actors/PointLight.cpp b/Source/Engine/Level/Actors/PointLight.cpp index 0a4f607ae..ff12f8dfd 100644 --- a/Source/Engine/Level/Actors/PointLight.cpp +++ b/Source/Engine/Level/Actors/PointLight.cpp @@ -2,6 +2,7 @@ #include "PointLight.h" #include "Engine/Graphics/RenderTask.h" +#include "Engine/Graphics/RenderTools.h" #include "Engine/Graphics/RenderView.h" #include "Engine/Renderer/RenderList.h" #include "Engine/Serialization/Serialization.h" @@ -102,7 +103,6 @@ void PointLight::Draw(RenderContext& renderContext) data.ShadowsSharpness = ShadowsSharpness; data.VolumetricScatteringIntensity = VolumetricScatteringIntensity; data.CastVolumetricShadow = CastVolumetricShadow; - data.RenderedVolumetricFog = 0; data.ShadowsMode = ShadowsMode; data.Radius = radius; data.FallOffExponent = FallOffExponent; @@ -114,6 +114,7 @@ void PointLight::Draw(RenderContext& renderContext) data.IESTexture = IESTexture ? IESTexture->GetTexture() : nullptr; data.StaticFlags = GetStaticFlags(); data.ID = GetID(); + data.ScreenSize = Math::Min(1.0f, Math::Sqrt(RenderTools::ComputeBoundsScreenRadiusSquared(position, (float)_sphere.Radius, renderContext.View))); renderContext.List->PointLights.Add(data); } } diff --git a/Source/Engine/Level/Actors/SkyLight.cpp b/Source/Engine/Level/Actors/SkyLight.cpp index 67116fa13..19152a557 100644 --- a/Source/Engine/Level/Actors/SkyLight.cpp +++ b/Source/Engine/Level/Actors/SkyLight.cpp @@ -11,6 +11,7 @@ #include "Engine/Content/Content.h" #include "Engine/Serialization/Serialization.h" #include "Engine/ContentImporters/AssetsImportingManager.h" +#include "Engine/Graphics/RenderTools.h" #include "Engine/Level/Scene/Scene.h" SkyLight::SkyLight(const SpawnParams& params) @@ -118,13 +119,13 @@ void SkyLight::Draw(RenderContext& renderContext) data.Color = Color.ToFloat3() * (Color.A * brightness); data.VolumetricScatteringIntensity = VolumetricScatteringIntensity; data.CastVolumetricShadow = CastVolumetricShadow; - data.RenderedVolumetricFog = 0; data.AdditiveColor = AdditiveColor.ToFloat3() * (AdditiveColor.A * brightness); data.IndirectLightingIntensity = IndirectLightingIntensity; data.Radius = GetScaledRadius(); data.Image = GetSource(); data.StaticFlags = GetStaticFlags(); data.ID = GetID(); + data.ScreenSize = Math::Min(1.0f, Math::Sqrt(RenderTools::ComputeBoundsScreenRadiusSquared(position, (float)_sphere.Radius, renderContext.View))); renderContext.List->SkyLights.Add(data); } } diff --git a/Source/Engine/Level/Actors/SpotLight.cpp b/Source/Engine/Level/Actors/SpotLight.cpp index a80c012a5..ab9cde364 100644 --- a/Source/Engine/Level/Actors/SpotLight.cpp +++ b/Source/Engine/Level/Actors/SpotLight.cpp @@ -5,6 +5,7 @@ #include "Engine/Renderer/RenderList.h" #include "Engine/Content/Assets/IESProfile.h" #include "Engine/Graphics/RenderTask.h" +#include "Engine/Graphics/RenderTools.h" #include "Engine/Serialization/Serialization.h" #include "Engine/Level/Scene/SceneRendering.h" @@ -152,7 +153,6 @@ void SpotLight::Draw(RenderContext& renderContext) data.ShadowsSharpness = ShadowsSharpness; data.VolumetricScatteringIntensity = VolumetricScatteringIntensity; data.CastVolumetricShadow = CastVolumetricShadow; - data.RenderedVolumetricFog = 0; data.ShadowsMode = ShadowsMode; data.Radius = radius; data.FallOffExponent = FallOffExponent; @@ -167,6 +167,7 @@ void SpotLight::Draw(RenderContext& renderContext) data.OuterConeAngle = outerConeAngle; data.StaticFlags = GetStaticFlags(); data.ID = GetID(); + data.ScreenSize = Math::Min(1.0f, Math::Sqrt(RenderTools::ComputeBoundsScreenRadiusSquared(position, (float)_sphere.Radius, renderContext.View))); renderContext.List->SpotLights.Add(data); } } diff --git a/Source/Engine/Particles/Graph/CPU/ParticleEmitterGraph.CPU.cpp b/Source/Engine/Particles/Graph/CPU/ParticleEmitterGraph.CPU.cpp index 9c847fd91..d7441c67d 100644 --- a/Source/Engine/Particles/Graph/CPU/ParticleEmitterGraph.CPU.cpp +++ b/Source/Engine/Particles/Graph/CPU/ParticleEmitterGraph.CPU.cpp @@ -399,12 +399,6 @@ void ParticleEmitterGraphCPUExecutor::Draw(ParticleEmitter* emitter, ParticleEff lightData.ShadowsSharpness = 1.0f; lightData.UseInverseSquaredFalloff = false; lightData.VolumetricScatteringIntensity = 1.0f; - lightData.CastVolumetricShadow = false; - lightData.RenderedVolumetricFog = 0; - lightData.ShadowsMode = ShadowsCastingMode::None; - lightData.SourceRadius = 0.0f; - lightData.SourceLength = 0.0f; - lightData.IESTexture = nullptr; for (int32 particleIndex = 0; particleIndex < count; particleIndex++) { diff --git a/Source/Engine/Renderer/Config.h b/Source/Engine/Renderer/Config.h index b74cb20da..c62941b67 100644 --- a/Source/Engine/Renderer/Config.h +++ b/Source/Engine/Renderer/Config.h @@ -77,28 +77,13 @@ PACK_STRUCT(struct ShaderLightData { Float3 Color; float MinRoughness; Float3 Position; - float CastShadows; + uint32 ShadowsBufferAddress; Float3 Direction; float Radius; float FalloffExponent; float InverseSquared; - float Dummy0; float RadiusInv; - }); - -/// -/// Structure that contains information about light for shaders. -/// -PACK_STRUCT(struct ShaderLightShadowData { - Float2 ShadowMapSize; - float Sharpness; - float Fade; - float NormalOffsetScale; - float Bias; - float FadeDistance; - uint32 NumCascades; - Float4 CascadeSplits; - Matrix ShadowVP[6]; + float Dummy0; }); /// diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp index 58b5ffde0..d4e212222 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp @@ -97,27 +97,7 @@ struct GlobalSurfaceAtlasObject Platform::MemoryClear(this, sizeof(GlobalSurfaceAtlasObject)); } - GlobalSurfaceAtlasObject(const GlobalSurfaceAtlasObject& other) - { - Platform::MemoryCopy(this, &other, sizeof(GlobalSurfaceAtlasObject)); - } - - GlobalSurfaceAtlasObject(GlobalSurfaceAtlasObject&& other) noexcept - { - Platform::MemoryCopy(this, &other, sizeof(GlobalSurfaceAtlasObject)); - } - - GlobalSurfaceAtlasObject& operator=(const GlobalSurfaceAtlasObject& other) - { - Platform::MemoryCopy(this, &other, sizeof(GlobalSurfaceAtlasObject)); - return *this; - } - - GlobalSurfaceAtlasObject& operator=(GlobalSurfaceAtlasObject&& other) noexcept - { - Platform::MemoryCopy(this, &other, sizeof(GlobalSurfaceAtlasObject)); - return *this; - } + POD_COPYABLE(GlobalSurfaceAtlasObject); }; struct GlobalSurfaceAtlasLight @@ -130,9 +110,9 @@ class GlobalSurfaceAtlasCustomBuffer : public RenderBuffers::CustomBuffer, publi { public: int32 Resolution = 0; + int32 AtlasPixelsUsed = 0; uint64 LastFrameAtlasInsertFail = 0; uint64 LastFrameAtlasDefragmentation = 0; - int32 AtlasPixelsUsed = 0; GPUTexture* AtlasDepth = nullptr; GPUTexture* AtlasEmissive = nullptr; GPUTexture* AtlasGBuffer0 = nullptr; @@ -163,7 +143,7 @@ public: { } - FORCE_INLINE void ClearObjects() + void ClearObjects() { CulledObjectsCounterIndex = -1; CulledObjectsUsageHistory.Clear(); @@ -174,7 +154,7 @@ public: Lights.Clear(); } - FORCE_INLINE void Clear() + void Reset() { RenderTargetPool::Release(AtlasDepth); RenderTargetPool::Release(AtlasEmissive); @@ -189,7 +169,7 @@ public: { SAFE_DELETE_GPU_RESOURCE(ChunksBuffer); SAFE_DELETE_GPU_RESOURCE(CulledObjectsBuffer); - Clear(); + Reset(); } // [ISceneRenderingListener] @@ -400,7 +380,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co bool noCache = surfaceAtlasData.Resolution != resolution; if (noCache) { - surfaceAtlasData.Clear(); + surfaceAtlasData.Reset(); auto desc = GPUTextureDescription::New2D(resolution, resolution, PixelFormat::Unknown); uint64 memUsage = 0; @@ -963,9 +943,9 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co if (_vertexBuffer->Data.Count() == 0) continue; - // Draw draw light + // Draw light PROFILE_GPU_CPU_NAMED("Directional Light"); - const bool useShadow = CanRenderShadow(renderContext.View, light); + const bool useShadow = light.CanRenderShadow(renderContext.View); // TODO: test perf/quality when using Shadow Map for directional light (ShadowsPass::Instance()->LastDirLightShadowMap) instead of Global SDF trace light.SetShaderData(data.Light, useShadow); data.Light.Color *= light.IndirectLightingIntensity; @@ -997,9 +977,9 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co if (_vertexBuffer->Data.Count() == 0) continue; - // Draw draw light + // Draw light PROFILE_GPU_CPU_NAMED("Point Light"); - const bool useShadow = CanRenderShadow(renderContext.View, light); + const bool useShadow = light.CanRenderShadow(renderContext.View); light.SetShaderData(data.Light, useShadow); data.Light.Color *= light.IndirectLightingIntensity; data.LightShadowsStrength = 1.0f - light.ShadowsStrength; @@ -1030,9 +1010,9 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co if (_vertexBuffer->Data.Count() == 0) continue; - // Draw draw light + // Draw light PROFILE_GPU_CPU_NAMED("Spot Light"); - const bool useShadow = CanRenderShadow(renderContext.View, light); + const bool useShadow = light.CanRenderShadow(renderContext.View); light.SetShaderData(data.Light, useShadow); data.Light.Color *= light.IndirectLightingIntensity; data.LightShadowsStrength = 1.0f - light.ShadowsStrength; @@ -1048,7 +1028,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co surfaceAtlasData.Lights.Remove(it); } - // Draw draw indirect light from Global Illumination + // Draw indirect light from Global Illumination if (EnumHasAnyFlags(renderContext.View.Flags, ViewFlags::GI)) { switch (giSettings.Mode) diff --git a/Source/Engine/Renderer/LightPass.cpp b/Source/Engine/Renderer/LightPass.cpp index bdbe22be8..e1330ece5 100644 --- a/Source/Engine/Renderer/LightPass.cpp +++ b/Source/Engine/Renderer/LightPass.cpp @@ -3,14 +3,15 @@ #include "LightPass.h" #include "ShadowsPass.h" #include "GBufferPass.h" +#include "Engine/Core/Collections/Sorting.h" #include "Engine/Graphics/RenderBuffers.h" #include "Engine/Graphics/RenderTools.h" +#include "Engine/Graphics/RenderTask.h" #include "Engine/Graphics/GPULimits.h" +#include "Engine/Graphics/GPUContext.h" #include "Engine/Graphics/RenderTargetPool.h" #include "Engine/Content/Assets/CubeTexture.h" #include "Engine/Content/Content.h" -#include "Engine/Graphics/GPUContext.h" -#include "Engine/Graphics/RenderTask.h" PACK_STRUCT(struct PerLight{ ShaderLightData Light; @@ -49,8 +50,9 @@ bool LightPass::Init() _shader.Get()->OnReloading.Bind(this); #endif + // Pick the format for shadow mask (rendered shadow projection into screen-space) auto format = PixelFormat::R8G8_UNorm; - if (EnumHasNoneFlags(GPUDevice::Instance->GetFormatFeatures(format).Support, (FormatSupport::RenderTarget | FormatSupport::ShaderSample | FormatSupport::Texture2D))) + if (EnumHasNoneFlags(GPUDevice::Instance->GetFormatFeatures(format).Support, FormatSupport::RenderTarget | FormatSupport::ShaderSample | FormatSupport::Texture2D)) { format = PixelFormat::B8G8R8A8_UNorm; } @@ -151,27 +153,48 @@ void LightPass::Dispose() _sphereModel = nullptr; } -void LightPass::RenderLight(RenderContextBatch& renderContextBatch, GPUTextureView* lightBuffer) +template +bool SortLights(T const& p1, T const& p2) +{ + // Compare by screen size + int32 res = static_cast(p2.ScreenSize * 100 - p1.ScreenSize * 100); + if (res == 0) + { + // Compare by brightness + res = static_cast(p2.Color.SumValues() * 100 - p1.Color.SumValues() * 100); + if (res == 0) + { + // Compare by ID to stabilize order + res = GetHash(p2.ID) - GetHash(p1.ID); + } + } + return res < 0; +} + +void LightPass::SetupLights(RenderContext& renderContext, RenderContextBatch& renderContextBatch) +{ + PROFILE_CPU(); + + // Sort lights + Sorting::QuickSort(renderContext.List->DirectionalLights.Get(), renderContext.List->DirectionalLights.Count(), &SortLights); + Sorting::QuickSort(renderContext.List->PointLights.Get(), renderContext.List->PointLights.Count(), &SortLights); + Sorting::QuickSort(renderContext.List->SpotLights.Get(), renderContext.List->SpotLights.Count(), &SortLights); +} + +void LightPass::RenderLights(RenderContextBatch& renderContextBatch, GPUTextureView* lightBuffer) { const float sphereModelScale = 3.0f; - - // Ensure to have valid data if (checkIfSkipPass()) - { - // Resources are missing. Do not perform rendering. return; - } - PROFILE_GPU_CPU("Lights"); // Cache data auto device = GPUDevice::Instance; auto context = device->GetMainContext(); - auto& renderContext = renderContextBatch.Contexts[0]; + auto& renderContext = renderContextBatch.GetMainContext(); auto& view = renderContext.View; auto mainCache = renderContext.List; const auto lightShader = _shader->GetShader(); - const bool useShadows = ShadowsPass::Instance()->IsReady() && EnumHasAnyFlags(view.Flags, ViewFlags::Shadows); const bool disableSpecular = (view.Flags & ViewFlags::SpecularLight) == ViewFlags::None; // Check if debug lights @@ -242,12 +265,9 @@ void LightPass::RenderLight(RenderContextBatch& renderContextBatch, GPUTextureVi for (int32 lightIndex = 0; lightIndex < mainCache->PointLights.Count(); lightIndex++) { PROFILE_GPU_CPU_NAMED("Point Light"); - - // Cache data auto& light = mainCache->PointLights[lightIndex]; float lightRadius = light.Radius; Float3 lightPosition = light.Position; - const bool renderShadow = useShadows && light.ShadowDataIndex != -1; bool useIES = light.IESTexture != nullptr; // Get distance from view center to light center less radius (check if view is inside a sphere) @@ -261,23 +281,19 @@ void LightPass::RenderLight(RenderContextBatch& renderContextBatch, GPUTextureVi Matrix::Multiply(wvp, matrix, world); Matrix::Multiply(world, view.ViewProjection(), wvp); - // Check if render shadow - if (renderShadow) + // Fullscreen shadow mask rendering + if (light.HasShadow) { GET_SHADOW_MASK(); - ShadowsPass::Instance()->RenderShadow(renderContextBatch, light, shadowMaskView); - - // Bind output + ShadowsPass::Instance()->RenderShadowMask(renderContextBatch, light, shadowMaskView); context->SetRenderTarget(depthBufferRTV, lightBuffer); - - // Set shadow mask context->BindSR(5, shadowMaskView); } else context->UnBindSR(5); // Pack light properties buffer - light.SetShaderData(perLight.Light, renderShadow); + light.SetShaderData(perLight.Light, light.HasShadow); Matrix::Transpose(wvp, perLight.WVP); if (useIES) { @@ -299,12 +315,9 @@ void LightPass::RenderLight(RenderContextBatch& renderContextBatch, GPUTextureVi for (int32 lightIndex = 0; lightIndex < mainCache->SpotLights.Count(); lightIndex++) { PROFILE_GPU_CPU_NAMED("Spot Light"); - - // Cache data auto& light = mainCache->SpotLights[lightIndex]; float lightRadius = light.Radius; Float3 lightPosition = light.Position; - const bool renderShadow = useShadows && light.ShadowDataIndex != -1; bool useIES = light.IESTexture != nullptr; // Get distance from view center to light center less radius (check if view is inside a sphere) @@ -318,23 +331,19 @@ void LightPass::RenderLight(RenderContextBatch& renderContextBatch, GPUTextureVi Matrix::Multiply(wvp, matrix, world); Matrix::Multiply(world, view.ViewProjection(), wvp); - // Check if render shadow - if (renderShadow) + // Fullscreen shadow mask rendering + if (light.HasShadow) { GET_SHADOW_MASK(); - ShadowsPass::Instance()->RenderShadow(renderContextBatch, light, shadowMaskView); - - // Bind output + ShadowsPass::Instance()->RenderShadowMask(renderContextBatch, light, shadowMaskView); context->SetRenderTarget(depthBufferRTV, lightBuffer); - - // Set shadow mask context->BindSR(5, shadowMaskView); } else context->UnBindSR(5); // Pack light properties buffer - light.SetShaderData(perLight.Light, renderShadow); + light.SetShaderData(perLight.Light, light.HasShadow); Matrix::Transpose(wvp, perLight.WVP); if (useIES) { @@ -356,28 +365,21 @@ void LightPass::RenderLight(RenderContextBatch& renderContextBatch, GPUTextureVi for (int32 lightIndex = 0; lightIndex < mainCache->DirectionalLights.Count(); lightIndex++) { PROFILE_GPU_CPU_NAMED("Directional Light"); - - // Cache data auto& light = mainCache->DirectionalLights[lightIndex]; - const bool renderShadow = useShadows && light.ShadowDataIndex != -1; - // Check if render shadow - if (renderShadow) + // Fullscreen shadow mask rendering + if (light.HasShadow) { GET_SHADOW_MASK(); - ShadowsPass::Instance()->RenderShadow(renderContextBatch, light, lightIndex, shadowMaskView); - - // Bind output + ShadowsPass::Instance()->RenderShadowMask(renderContextBatch, light, shadowMaskView); context->SetRenderTarget(depthBufferRTV, lightBuffer); - - // Set shadow mask context->BindSR(5, shadowMaskView); } else context->UnBindSR(5); // Pack light properties buffer - light.SetShaderData(perLight.Light, renderShadow); + light.SetShaderData(perLight.Light, light.HasShadow); // Calculate lighting context->UpdateCB(cb0, &perLight); diff --git a/Source/Engine/Renderer/LightPass.h b/Source/Engine/Renderer/LightPass.h index 118f22363..a399c7bfd 100644 --- a/Source/Engine/Renderer/LightPass.h +++ b/Source/Engine/Renderer/LightPass.h @@ -27,15 +27,19 @@ private: PixelFormat _shadowMaskFormat; public: + /// + /// Setups the lights rendering for batched scene drawing. + /// + void SetupLights(RenderContext& renderContext, RenderContextBatch& renderContextBatch); + /// /// Performs the lighting rendering for the input task. /// /// The rendering context batch. /// The light accumulation buffer (input and output). - void RenderLight(RenderContextBatch& renderContextBatch, GPUTextureView* lightBuffer); + void RenderLights(RenderContextBatch& renderContextBatch, GPUTextureView* lightBuffer); private: - #if COMPILE_WITH_DEV_ENV void OnShaderReloading(Asset* obj) { @@ -51,14 +55,12 @@ private: #endif public: - // [RendererPass] String ToString() const override; bool Init() override; void Dispose() override; protected: - // [RendererPass] bool setupResources() override; }; diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index efe3edd2a..8f07180f3 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -39,6 +39,24 @@ namespace CriticalSection MemPoolLocker; } +bool RenderLightData::CanRenderShadow(const RenderView& view) const +{ + bool result = false; + switch (ShadowsMode) + { + case ShadowsCastingMode::StaticOnly: + result = view.IsOfflinePass; + break; + case ShadowsCastingMode::DynamicOnly: + result = !view.IsOfflinePass; + break; + case ShadowsCastingMode::All: + result = true; + break; + } + return result && ShadowsStrength > ZeroTolerance; +} + void RenderDirectionalLightData::SetShaderData(ShaderLightData& data, bool useShadow) const { data.SpotAngles.X = -2.0f; @@ -48,7 +66,7 @@ void RenderDirectionalLightData::SetShaderData(ShaderLightData& data, bool useSh data.Color = Color; data.MinRoughness = Math::Max(MinRoughness, MIN_ROUGHNESS); data.Position = Float3::Zero; - data.CastShadows = useShadow ? 1.0f : 0.0f; + data.ShadowsBufferAddress = useShadow ? ShadowsBufferAddress : 0; data.Direction = -Direction; data.Radius = 0; data.FalloffExponent = 0; @@ -56,6 +74,15 @@ void RenderDirectionalLightData::SetShaderData(ShaderLightData& data, bool useSh data.RadiusInv = 0; } +bool RenderLocalLightData::CanRenderShadow(const RenderView& view) const +{ + // Fade shadow on distance + const float fadeDistance = Math::Max(ShadowsFadeDistance, 0.1f); + const float dstLightToView = Float3::Distance(Position, view.Position); + const float fade = 1 - Math::Saturate((dstLightToView - Radius - ShadowsDistance + fadeDistance) / fadeDistance); + return fade > ZeroTolerance && RenderLightData::CanRenderShadow(view); +} + void RenderSpotLightData::SetShaderData(ShaderLightData& data, bool useShadow) const { data.SpotAngles.X = CosOuterCone; @@ -65,7 +92,7 @@ void RenderSpotLightData::SetShaderData(ShaderLightData& data, bool useShadow) c data.Color = Color; data.MinRoughness = Math::Max(MinRoughness, MIN_ROUGHNESS); data.Position = Position; - data.CastShadows = useShadow ? 1.0f : 0.0f; + data.ShadowsBufferAddress = useShadow ? ShadowsBufferAddress : 0; data.Direction = Direction; data.Radius = Radius; data.FalloffExponent = FallOffExponent; @@ -82,7 +109,7 @@ void RenderPointLightData::SetShaderData(ShaderLightData& data, bool useShadow) data.Color = Color; data.MinRoughness = Math::Max(MinRoughness, MIN_ROUGHNESS); data.Position = Position; - data.CastShadows = useShadow ? 1.0f : 0.0f; + data.ShadowsBufferAddress = useShadow ? ShadowsBufferAddress : 0; data.Direction = Direction; data.Radius = Radius; data.FalloffExponent = FallOffExponent; @@ -99,7 +126,7 @@ void RenderSkyLightData::SetShaderData(ShaderLightData& data, bool useShadow) co data.Color = Color; data.MinRoughness = MIN_ROUGHNESS; data.Position = Position; - data.CastShadows = useShadow ? 1.0f : 0.0f; + data.ShadowsBufferAddress = useShadow ? ShadowsBufferAddress : 0; data.Direction = Float3::Forward; data.Radius = Radius; data.FalloffExponent = 0; diff --git a/Source/Engine/Renderer/RenderList.h b/Source/Engine/Renderer/RenderList.h index 15b5a9842..3a7ab373e 100644 --- a/Source/Engine/Renderer/RenderList.h +++ b/Source/Engine/Renderer/RenderList.h @@ -43,55 +43,82 @@ struct RenderLightData StaticFlags StaticFlags; ShadowsCastingMode ShadowsMode; float IndirectLightingIntensity; - int16 ShadowDataIndex = -1; + uint8 HasShadow : 1; uint8 CastVolumetricShadow : 1; - uint8 RenderedVolumetricFog : 1; + uint8 UseInverseSquaredFalloff : 1; + uint8 IsDirectionalLight : 1; + uint8 IsPointLight : 1; + uint8 IsSpotLight : 1; + uint8 IsSkyLight : 1; float VolumetricScatteringIntensity; float ContactShadowsLength; + float ScreenSize; + uint32 ShadowsBufferAddress; + + RenderLightData() + { + Platform::MemoryClear(this, sizeof(RenderLightData)); + } + + POD_COPYABLE(RenderLightData); + bool CanRenderShadow(const RenderView& view) const; }; struct RenderDirectionalLightData : RenderLightData { - PartitionMode PartitionMode; - int32 CascadeCount; - float Cascade1Spacing; float Cascade2Spacing; float Cascade3Spacing; float Cascade4Spacing; + PartitionMode PartitionMode; + int32 CascadeCount; + + RenderDirectionalLightData() + { + IsDirectionalLight = 1; + } + void SetShaderData(ShaderLightData& data, bool useShadow) const; }; -struct RenderSpotLightData : RenderLightData +struct RenderLocalLightData : RenderLightData { + GPUTexture* IESTexture; + float Radius; float SourceRadius; + bool CanRenderShadow(const RenderView& view) const; +}; + +struct RenderSpotLightData : RenderLocalLightData +{ Float3 UpVector; float OuterConeAngle; float CosOuterCone; float InvCosConeDifference; float FallOffExponent; - uint8 UseInverseSquaredFalloff : 1; - GPUTexture* IESTexture; + RenderSpotLightData() + { + IsSpotLight = 1; + } void SetShaderData(ShaderLightData& data, bool useShadow) const; }; -struct RenderPointLightData : RenderLightData +struct RenderPointLightData : RenderLocalLightData { - float Radius; - float SourceRadius; - float FallOffExponent; float SourceLength; - uint8 UseInverseSquaredFalloff : 1; - GPUTexture* IESTexture; + RenderPointLightData() + { + IsPointLight = 1; + } void SetShaderData(ShaderLightData& data, bool useShadow) const; }; @@ -103,6 +130,11 @@ struct RenderSkyLightData : RenderLightData CubeTexture* Image; + RenderSkyLightData() + { + IsSkyLight = 1; + } + void SetShaderData(ShaderLightData& data, bool useShadow) const; }; diff --git a/Source/Engine/Renderer/Renderer.cpp b/Source/Engine/Renderer/Renderer.cpp index 7500b2f1e..8b97bc1d5 100644 --- a/Source/Engine/Renderer/Renderer.cpp +++ b/Source/Engine/Renderer/Renderer.cpp @@ -348,7 +348,6 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont // Prepare renderContext.View.Prepare(renderContext); renderContext.Buffers->Prepare(); - ShadowsPass::Instance()->Prepare(); // Build batch of render contexts (main view and shadow projections) { @@ -371,6 +370,7 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont drawShadows = false; break; } + LightPass::Instance()->SetupLights(renderContext, renderContextBatch); if (drawShadows) ShadowsPass::Instance()->SetupShadows(renderContext, renderContextBatch); #if USE_EDITOR @@ -404,7 +404,7 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont renderContext.List->SortDrawCalls(renderContext, false, DrawCallsListType::MotionVectors); for (int32 i = 1; i < renderContextBatch.Contexts.Count(); i++) { - auto& shadowContext = renderContextBatch.Contexts[i]; + auto& shadowContext = renderContextBatch.Contexts.Get()[i]; shadowContext.List->SortDrawCalls(shadowContext, false, DrawCallsListType::Depth, DrawPass::Depth); shadowContext.List->SortDrawCalls(shadowContext, false, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls, DrawPass::Depth); } @@ -487,7 +487,8 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont // Render lighting renderContextBatch.GetMainContext() = renderContext; // Sync render context in batch with the current value - LightPass::Instance()->RenderLight(renderContextBatch, *lightBuffer); + ShadowsPass::Instance()->RenderShadowMaps(renderContextBatch); + LightPass::Instance()->RenderLights(renderContextBatch, *lightBuffer); if (EnumHasAnyFlags(renderContext.View.Flags, ViewFlags::GI)) { switch (renderContext.List->Settings.GlobalIllumination.Mode) diff --git a/Source/Engine/Renderer/ShadowsPass.cpp b/Source/Engine/Renderer/ShadowsPass.cpp index de1d0c878..bbd036592 100644 --- a/Source/Engine/Renderer/ShadowsPass.cpp +++ b/Source/Engine/Renderer/ShadowsPass.cpp @@ -9,19 +9,22 @@ #include "Engine/Graphics/RenderBuffers.h" #include "Engine/Graphics/PixelFormatExtensions.h" #include "Engine/Content/Content.h" +#include "Engine/Engine/Engine.h" +#include "Engine/Graphics/RenderTools.h" +#include "Engine/Level/Scene/SceneRendering.h" #include "Engine/Scripting/Enums.h" +#include "Engine/Utilities/RectPack.h" #if USE_EDITOR #include "Engine/Renderer/Lightmaps.h" #endif +#define MaxTiles 6 #define NormalOffsetScaleTweak 100.0f -#define SpotLight_NearPlane 10.0f -#define PointLight_NearPlane 10.0f +#define LocalLightNearPlane 10.0f PACK_STRUCT(struct Data{ ShaderGBufferData GBuffer; ShaderLightData Light; - ShaderLightShadowData LightShadow; Matrix WVP; Matrix ViewProjectionMatrix; Float2 Dummy0; @@ -29,28 +32,116 @@ PACK_STRUCT(struct Data{ float ContactShadowsLength; }); -ShadowsPass::ShadowsPass() - : _shader(nullptr) - , _shadowMapsSizeCSM(0) - , _shadowMapsSizeCube(0) - , _shadowMapCSM(nullptr) - , _shadowMapCube(nullptr) - , _currentShadowMapsQuality((Quality)((int32)Quality::Ultra + 1)) - , _sphereModel(nullptr) - , maxShadowsQuality(0) +struct ShadowsAtlasTile : RectPack { + ShadowsAtlasTile(uint16 x, uint16 y, uint16 width, uint16 height) + : RectPack(x, y, width, height) + { + } + + void OnInsert(class ShadowsCustomBuffer* buffer); + void OnFree(ShadowsCustomBuffer* buffer); +}; + +uint16 QuantizeResolution(float input) +{ + uint16 output = Math::FloorToInt(input); + uint16 alignment = 16; + if (output >= 512) + alignment = 64; + else if (output >= 256) + alignment = 32; + output = Math::AlignDown(output, alignment); + return output; } -uint64 ShadowsPass::GetShadowMapsMemoryUsage() const +struct ShadowAtlasLight { - uint64 result = 0; + uint64 LastFrameUsed; + int32 ContextIndex; + int32 ContextCount; + uint16 Resolution; + uint16 TilesNeeded; + bool BlendCSM; + float Sharpness, Fade, NormalOffsetScale, Bias, FadeDistance; + Float4 CascadeSplits; + ShadowsAtlasTile* Tiles[MaxTiles]; + Matrix WorldToShadow[MaxTiles]; - if (_shadowMapCSM) - result += _shadowMapCSM->GetMemoryUsage(); - if (_shadowMapCube) - result += _shadowMapCube->GetMemoryUsage(); + ShadowAtlasLight() + { + Platform::MemoryClear(this, sizeof(ShadowAtlasLight)); + } - return result; + POD_COPYABLE(ShadowAtlasLight); + + void SetWorldToShadow(int32 index, const Matrix& shadowViewProjection) + { + // Transform Clip Space [-1,+1]^2 to UV Space [0,1]^2 (saves MAD instruction in shader) + const Matrix ClipToUV( + 0.5f, 0.0f, 0.0f, 0.0f, + 0.0f, -0.5f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.5f, 0.5f, 0.0f, 1.0f); + Matrix m; + Matrix::Multiply(shadowViewProjection, ClipToUV, m); + Matrix::Transpose(m, WorldToShadow[index]); + } +}; + +class ShadowsCustomBuffer : public RenderBuffers::CustomBuffer +{ +public: + int32 Resolution = 0; + int32 AtlasPixelsUsed = 0; + mutable bool ClearShadowMapAtlas = true; + Vector3 ViewOrigin; + GPUTexture* ShadowMapAtlas = nullptr; + DynamicTypedBuffer ShadowsBuffer; + GPUBufferView* ShadowsBufferView = nullptr; + ShadowsAtlasTile* AtlasTiles = nullptr; // TODO: optimize with a single allocation for atlas tiles + Dictionary Lights; + + ShadowsCustomBuffer() + : ShadowsBuffer(1024, PixelFormat::R32G32B32A32_Float, false, TEXT("ShadowsBuffer")) + { + ShadowMapAtlas = GPUDevice::Instance->CreateTexture(TEXT("Shadow Map Atlas")); + } + + void ClearTiles() + { + ClearShadowMapAtlas = true; + AtlasPixelsUsed = 0; + SAFE_DELETE(AtlasTiles); + for (auto it = Lights.Begin(); it.IsNotEnd(); ++it) + { + auto& atlasLight = it->Value; + Platform::MemoryClear(atlasLight.Tiles, sizeof(atlasLight.Tiles)); + } + } + + void Reset() + { + Lights.Clear(); + ClearTiles(); + ViewOrigin = Vector3::Zero; + } + + ~ShadowsCustomBuffer() + { + Reset(); + SAFE_DELETE_GPU_RESOURCE(ShadowMapAtlas); + } +}; + +void ShadowsAtlasTile::OnInsert(ShadowsCustomBuffer* buffer) +{ + buffer->AtlasPixelsUsed += (int32)Width * (int32)Height; +} + +void ShadowsAtlasTile::OnFree(ShadowsCustomBuffer* buffer) +{ + buffer->AtlasPixelsUsed -= (int32)Width * (int32)Height; } String ShadowsPass::ToString() const @@ -69,13 +160,7 @@ bool ShadowsPass::Init() _shader = Content::LoadAsyncInternal(TEXT("Shaders/Shadows")); _sphereModel = Content::LoadAsyncInternal(TEXT("Engine/Models/SphereLowPoly")); if (_shader == nullptr || _sphereModel == nullptr) - { return true; - } - - // Create shadow maps - _shadowMapCSM = GPUDevice::Instance->CreateTexture(TEXT("Shadow Map CSM")); - _shadowMapCube = GPUDevice::Instance->CreateTexture(TEXT("Shadow Map Cube")); #if COMPILE_WITH_DEV_ENV _shader.Get()->OnReloading.Bind(this); @@ -141,62 +226,22 @@ bool ShadowsPass::setupResources() if (_psShadowSpot.Create(psDesc, shader, "PS_SpotLight")) return true; } + if (_psDepthClear == nullptr) + { + psDesc = GPUPipelineState::Description::DefaultFullscreenTriangle; + psDesc.PS = GPUDevice::Instance->QuadShader->GetPS("PS_DepthClear"); + psDesc.DepthEnable = true; + psDesc.DepthWriteEnable = true; + psDesc.DepthFunc = ComparisonFunc::Always; + psDesc.BlendMode.RenderTargetWriteMask = BlendingMode::ColorWrite::None; + _psDepthClear = GPUDevice::Instance->CreatePipelineState(); + if (_psDepthClear->Init(psDesc)) + return true; + } return false; } -void ShadowsPass::updateShadowMapSize() -{ - // Temporary data - int32 newSizeCSM = 0; - int32 newSizeCube = 0; - - // Select new size - _currentShadowMapsQuality = Graphics::ShadowMapsQuality; - if (_shadowMapFormat != PixelFormat::Unknown) - { - switch (_currentShadowMapsQuality) - { - case Quality::Ultra: - newSizeCSM = 2048; - newSizeCube = 1024; - break; - case Quality::High: - newSizeCSM = 1024; - newSizeCube = 1024; - break; - case Quality::Medium: - newSizeCSM = 1024; - newSizeCube = 512; - break; - case Quality::Low: - newSizeCSM = 512; - newSizeCube = 256; - break; - } - } - - // Check if size will change - if (newSizeCSM > 0 && newSizeCSM != _shadowMapsSizeCSM) - { - if (_shadowMapCSM->Init(GPUTextureDescription::New2D(newSizeCSM, newSizeCSM, _shadowMapFormat, GPUTextureFlags::ShaderResource | GPUTextureFlags::DepthStencil, 1, MAX_CSM_CASCADES))) - { - LOG(Fatal, "Cannot setup shadow map '{0}' Size: {1}, format: {2}.", TEXT("CSM"), newSizeCSM, ScriptingEnum::ToString(_shadowMapFormat)); - return; - } - _shadowMapsSizeCSM = newSizeCSM; - } - if (newSizeCube > 0 && newSizeCube != _shadowMapsSizeCube) - { - if (_shadowMapCube->Init(GPUTextureDescription::NewCube(newSizeCube, _shadowMapFormat, GPUTextureFlags::ShaderResource | GPUTextureFlags::DepthStencil))) - { - LOG(Fatal, "Cannot setup shadow map '{0}' Size: {1}, format: {2}.", TEXT("Cube"), newSizeCube, ScriptingEnum::ToString(_shadowMapFormat)); - return; - } - _shadowMapsSizeCube = newSizeCube; - } -} - void ShadowsPass::SetupRenderContext(RenderContext& renderContext, RenderContext& shadowContext) { const auto& view = renderContext.View; @@ -219,15 +264,38 @@ void ShadowsPass::SetupRenderContext(RenderContext& renderContext, RenderContext shadowContext.Task = renderContext.Task; } -void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderDirectionalLightData& light) +void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderLightData& light, ShadowAtlasLight& atlasLight) { + // Copy light properties + atlasLight.Sharpness = light.ShadowsSharpness; + atlasLight.Fade = light.ShadowsStrength; + atlasLight.NormalOffsetScale = light.ShadowsNormalOffsetScale * NormalOffsetScaleTweak * (1.0f / (float)atlasLight.Resolution); + atlasLight.Bias = light.ShadowsDepthBias; + atlasLight.FadeDistance = Math::Max(light.ShadowsFadeDistance, 0.1f); +} + +void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderLocalLightData& light, ShadowAtlasLight& atlasLight) +{ + SetupLight(renderContext, renderContextBatch, (RenderLightData&)light, atlasLight); + + // Fade shadow on distance + const float fadeDistance = Math::Max(light.ShadowsFadeDistance, 0.1f); + const float dstLightToView = Float3::Distance(light.Position, renderContext.View.Position); + const float fade = 1 - Math::Saturate((dstLightToView - light.Radius - light.ShadowsDistance + fadeDistance) / fadeDistance); + atlasLight.Fade *= fade; +} + +void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderDirectionalLightData& light, ShadowAtlasLight& atlasLight) +{ + SetupLight(renderContext, renderContextBatch, (RenderLightData&)light, atlasLight); + const RenderView& view = renderContext.View; auto mainCache = renderContext.List; Float3 lightDirection = light.Direction; float shadowsDistance = Math::Min(view.Far, light.ShadowsDistance); int32 csmCount = Math::Clamp(light.CascadeCount, 0, MAX_CSM_CASCADES); bool blendCSM = Graphics::AllowCSMBlending; - const auto shadowMapsSizeCSM = (float)_shadowMapsSizeCSM; + const auto shadowMapsSize = (float)atlasLight.Resolution; #if USE_EDITOR if (IsRunningRadiancePass) blendCSM = false; @@ -248,7 +316,6 @@ void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& r float maxDistance; float cascadeSplits[MAX_CSM_CASCADES]; { - // TODO: use HiZ and get view min/max range to fit cascades better minDistance = cameraNear; maxDistance = cameraNear + shadowsDistance; @@ -306,6 +373,7 @@ void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& r for (int32 i = 0; i < MAX_CSM_CASCADES; i++) cascadeSplits[i] = (cascadeSplits[i] - cameraNear) / cameraRange; } + atlasLight.CascadeSplits = view.Near + Float4(cascadeSplits) * cameraRange; // Select best Up vector Float3 side = Float3::UnitX; @@ -327,12 +395,10 @@ void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& r Matrix shadowView, shadowProjection, shadowVP; // Init shadow data - light.ShadowDataIndex = _shadowData.Count(); - auto& shadowData = _shadowData.AddOne(); - shadowData.ContextIndex = renderContextBatch.Contexts.Count(); - shadowData.ContextCount = csmCount; - shadowData.BlendCSM = blendCSM; - renderContextBatch.Contexts.AddDefault(shadowData.ContextCount); + atlasLight.ContextIndex = renderContextBatch.Contexts.Count(); + atlasLight.ContextCount = csmCount; + atlasLight.BlendCSM = blendCSM; + renderContextBatch.Contexts.AddDefault(atlasLight.ContextCount); // Create the different view and projection matrices for each split float splitMinRatio = 0; @@ -380,7 +446,7 @@ void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& r if (stabilization == ViewSnapping) { // Snap the target to the texel units (reference: ShaderX7 - Practical Cascaded Shadows Maps) - float shadowMapHalfSize = shadowMapsSizeCSM * 0.5f; + float shadowMapHalfSize = shadowMapsSize * 0.5f; float x = Math::Ceil(Float3::Dot(target, upDirection) * shadowMapHalfSize / boundingVSRadius) * boundingVSRadius / shadowMapHalfSize; float y = Math::Ceil(Float3::Dot(target, side) * shadowMapHalfSize / boundingVSRadius) * boundingVSRadius / shadowMapHalfSize; float z = Float3::Dot(target, lightDirection); @@ -411,125 +477,78 @@ void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& r // Stabilize the shadow matrix on the projection if (stabilization == ProjectionSnapping) { - Float3 shadowPixelPosition = shadowVP.GetTranslation() * (shadowMapsSizeCSM * 0.5f); + Float3 shadowPixelPosition = shadowVP.GetTranslation() * (shadowMapsSize * 0.5f); shadowPixelPosition.Z = 0; const Float3 shadowPixelPositionRounded(Math::Round(shadowPixelPosition.X), Math::Round(shadowPixelPosition.Y), 0.0f); - const Float4 shadowPixelOffset((shadowPixelPositionRounded - shadowPixelPosition) * (2.0f / shadowMapsSizeCSM), 0.0f); + const Float4 shadowPixelOffset((shadowPixelPositionRounded - shadowPixelPosition) * (2.0f / shadowMapsSize), 0.0f); shadowProjection.SetRow4(shadowProjection.GetRow4() + shadowPixelOffset); Matrix::Multiply(shadowView, shadowProjection, shadowVP); } - // Transform NDC space [-1,+1]^2 to texture space [0,1]^2 - { - const Matrix T( - 0.5f, 0.0f, 0.0f, 0.0f, - 0.0f, -0.5f, 0.0f, 0.0f, - 0.0f, 0.0f, 1.0f, 0.0f, - 0.5f, 0.5f, 0.0f, 1.0f); - Matrix m; - Matrix::Multiply(shadowVP, T, m); - Matrix::Transpose(m, shadowData.Constants.ShadowVP[cascadeIndex]); - } + atlasLight.SetWorldToShadow(cascadeIndex, shadowVP); // Setup context for cascade - auto& shadowContext = renderContextBatch.Contexts[shadowData.ContextIndex + cascadeIndex]; + auto& shadowContext = renderContextBatch.Contexts[atlasLight.ContextIndex + cascadeIndex]; SetupRenderContext(renderContext, shadowContext); shadowContext.List->Clear(); shadowContext.View.Position = -lightDirection * shadowsDistance + view.Position; shadowContext.View.Direction = lightDirection; shadowContext.View.SetUp(shadowView, shadowProjection); shadowContext.View.CullingFrustum.SetMatrix(cullingVP); - shadowContext.View.PrepareCache(shadowContext, shadowMapsSizeCSM, shadowMapsSizeCSM, Float2::Zero, &view); + shadowContext.View.PrepareCache(shadowContext, shadowMapsSize, shadowMapsSize, Float2::Zero, &view); } - - // Setup constant buffer data - shadowData.Constants.ShadowMapSize = shadowMapsSizeCSM; - shadowData.Constants.Sharpness = light.ShadowsSharpness; - shadowData.Constants.Fade = Math::Saturate(light.ShadowsStrength); - shadowData.Constants.NormalOffsetScale = light.ShadowsNormalOffsetScale * NormalOffsetScaleTweak * (1.0f / shadowMapsSizeCSM); - shadowData.Constants.Bias = light.ShadowsDepthBias; - shadowData.Constants.FadeDistance = Math::Max(light.ShadowsFadeDistance, 0.1f); - shadowData.Constants.NumCascades = csmCount; - shadowData.Constants.CascadeSplits = view.Near + Float4(cascadeSplits) * cameraRange; } -void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderPointLightData& light) +void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderPointLightData& light, ShadowAtlasLight& atlasLight) { + SetupLight(renderContext, renderContextBatch, (RenderLocalLightData&)light, atlasLight); + // Init shadow data - light.ShadowDataIndex = _shadowData.Count(); - auto& shadowData = _shadowData.AddOne(); - shadowData.ContextIndex = renderContextBatch.Contexts.Count(); - shadowData.ContextCount = 6; - renderContextBatch.Contexts.AddDefault(shadowData.ContextCount); + atlasLight.ContextIndex = renderContextBatch.Contexts.Count(); + atlasLight.ContextCount = 6; + renderContextBatch.Contexts.AddDefault(atlasLight.ContextCount); const auto& view = renderContext.View; - const auto shadowMapsSizeCube = (float)_shadowMapsSizeCube; + const auto shadowMapsSize = (float)atlasLight.Resolution; // Fade shadow on distance const float fadeDistance = Math::Max(light.ShadowsFadeDistance, 0.1f); const float dstLightToView = Float3::Distance(light.Position, view.Position); const float fade = 1 - Math::Saturate((dstLightToView - light.Radius - light.ShadowsDistance + fadeDistance) / fadeDistance); + atlasLight.Fade *= fade; // Render depth to all 6 faces of the cube map for (int32 faceIndex = 0; faceIndex < 6; faceIndex++) { - auto& shadowContext = renderContextBatch.Contexts[shadowData.ContextIndex + faceIndex]; + auto& shadowContext = renderContextBatch.Contexts[atlasLight.ContextIndex + faceIndex]; SetupRenderContext(renderContext, shadowContext); shadowContext.List->Clear(); - shadowContext.View.SetUpCube(PointLight_NearPlane, light.Radius, light.Position); + shadowContext.View.SetUpCube(LocalLightNearPlane, light.Radius, light.Position); shadowContext.View.SetFace(faceIndex); - shadowContext.View.PrepareCache(shadowContext, shadowMapsSizeCube, shadowMapsSizeCube, Float2::Zero, &view); - Matrix::Transpose(shadowContext.View.ViewProjection(), shadowData.Constants.ShadowVP[faceIndex]); + shadowContext.View.PrepareCache(shadowContext, shadowMapsSize, shadowMapsSize, Float2::Zero, &view); + atlasLight.SetWorldToShadow(faceIndex, shadowContext.View.ViewProjection()); } - - // Setup constant buffer data - shadowData.Constants.ShadowMapSize = shadowMapsSizeCube; - shadowData.Constants.Sharpness = light.ShadowsSharpness; - shadowData.Constants.Fade = Math::Saturate(light.ShadowsStrength * fade); - shadowData.Constants.NormalOffsetScale = light.ShadowsNormalOffsetScale * NormalOffsetScaleTweak * (1.0f / shadowMapsSizeCube); - shadowData.Constants.Bias = light.ShadowsDepthBias; - shadowData.Constants.FadeDistance = Math::Max(light.ShadowsFadeDistance, 0.1f); - shadowData.Constants.NumCascades = 1; - shadowData.Constants.CascadeSplits = Float4::Zero; } -void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderSpotLightData& light) +void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderSpotLightData& light, ShadowAtlasLight& atlasLight) { + SetupLight(renderContext, renderContextBatch, (RenderLocalLightData&)light, atlasLight); + // Init shadow data - light.ShadowDataIndex = _shadowData.Count(); - auto& shadowData = _shadowData.AddOne(); - shadowData.ContextIndex = renderContextBatch.Contexts.Count(); - shadowData.ContextCount = 1; - renderContextBatch.Contexts.AddDefault(shadowData.ContextCount); + atlasLight.ContextIndex = renderContextBatch.Contexts.Count(); + atlasLight.ContextCount = 1; + renderContextBatch.Contexts.AddDefault(atlasLight.ContextCount); const auto& view = renderContext.View; - const auto shadowMapsSizeCube = (float)_shadowMapsSizeCube; + const auto shadowMapsSize = (float)atlasLight.Resolution; - // Fade shadow on distance - const float fadeDistance = Math::Max(light.ShadowsFadeDistance, 0.1f); - const float dstLightToView = Float3::Distance(light.Position, view.Position); - const float fade = 1 - Math::Saturate((dstLightToView - light.Radius - light.ShadowsDistance + fadeDistance) / fadeDistance); - - // Render depth to all 1 face of the cube map - constexpr int32 faceIndex = 0; - { - auto& shadowContext = renderContextBatch.Contexts[shadowData.ContextIndex + faceIndex]; - SetupRenderContext(renderContext, shadowContext); - shadowContext.List->Clear(); - shadowContext.View.SetProjector(SpotLight_NearPlane, light.Radius, light.Position, light.Direction, light.UpVector, light.OuterConeAngle * 2.0f); - shadowContext.View.PrepareCache(shadowContext, shadowMapsSizeCube, shadowMapsSizeCube, Float2::Zero, &view); - Matrix::Transpose(shadowContext.View.ViewProjection(), shadowData.Constants.ShadowVP[faceIndex]); - } - - // Setup constant buffer data - shadowData.Constants.ShadowMapSize = shadowMapsSizeCube; - shadowData.Constants.Sharpness = light.ShadowsSharpness; - shadowData.Constants.Fade = Math::Saturate(light.ShadowsStrength * fade); - shadowData.Constants.NormalOffsetScale = light.ShadowsNormalOffsetScale * NormalOffsetScaleTweak * (1.0f / shadowMapsSizeCube); - shadowData.Constants.Bias = light.ShadowsDepthBias; - shadowData.Constants.FadeDistance = Math::Max(light.ShadowsFadeDistance, 0.1f); - shadowData.Constants.NumCascades = 1; - shadowData.Constants.CascadeSplits = Float4::Zero; + // Render depth to a single projection + auto& shadowContext = renderContextBatch.Contexts[atlasLight.ContextIndex]; + SetupRenderContext(renderContext, shadowContext); + shadowContext.List->Clear(); + shadowContext.View.SetProjector(LocalLightNearPlane, light.Radius, light.Position, light.Direction, light.UpVector, light.OuterConeAngle * 2.0f); + shadowContext.View.PrepareCache(shadowContext, shadowMapsSize, shadowMapsSize, Float2::Zero, &view); + atlasLight.SetWorldToShadow(0, shadowContext.View.ViewProjection()); } void ShadowsPass::Dispose() @@ -543,294 +562,420 @@ void ShadowsPass::Dispose() _psShadowSpot.Delete(); _shader = nullptr; _sphereModel = nullptr; - SAFE_DELETE_GPU_RESOURCE(_shadowMapCSM); - SAFE_DELETE_GPU_RESOURCE(_shadowMapCube); -} - -void ShadowsPass::Prepare() -{ - // Clear cached data - _shadowData.Clear(); - LastDirLightIndex = -1; - LastDirLightShadowMap = nullptr; + SAFE_DELETE_GPU_RESOURCE(_psDepthClear); } void ShadowsPass::SetupShadows(RenderContext& renderContext, RenderContextBatch& renderContextBatch) { PROFILE_CPU(); - auto& view = renderContext.View; + maxShadowsQuality = Math::Clamp(Math::Min((int32)Graphics::ShadowsQuality, (int32)renderContext.View.MaxShadowsQuality), 0, (int32)Quality::MAX - 1); - // Update shadow map - const auto shadowMapsQuality = Graphics::ShadowMapsQuality; - if (shadowMapsQuality != _currentShadowMapsQuality) - updateShadowMapSize(); - auto shadowsQuality = Graphics::ShadowsQuality; - maxShadowsQuality = Math::Clamp(Math::Min(static_cast(shadowsQuality), static_cast(view.MaxShadowsQuality)), 0, static_cast(Quality::MAX) - 1); - - // Create shadow projections for lights + // Early out and skip shadows setup if no lights is actively casting shadows + // RenderBuffers will automatically free any old ShadowsCustomBuffer after a few frames if we don't update LastFrameUsed + if (_shadowMapFormat == PixelFormat::Unknown || checkIfSkipPass() || EnumHasNoneFlags(renderContext.View.Flags, ViewFlags::Shadows)) + return; + Array shadowedLights; for (auto& light : renderContext.List->DirectionalLights) { - if (::CanRenderShadow(view, light) && CanRenderShadow(renderContext, light)) - SetupLight(renderContext, renderContextBatch, light); - } - for (auto& light : renderContext.List->PointLights) - { - if (::CanRenderShadow(view, light) && CanRenderShadow(renderContext, light)) - SetupLight(renderContext, renderContextBatch, light); + if (light.CanRenderShadow(renderContext.View)) + shadowedLights.Add(&light); } for (auto& light : renderContext.List->SpotLights) { - if (::CanRenderShadow(view, light) && CanRenderShadow(renderContext, light)) - SetupLight(renderContext, renderContextBatch, light); + if (light.CanRenderShadow(renderContext.View)) + shadowedLights.Add(&light); } -} - -bool ShadowsPass::CanRenderShadow(const RenderContext& renderContext, const RenderPointLightData& light) -{ - const Float3 lightPosition = light.Position; - const float dstLightToView = Float3::Distance(lightPosition, renderContext.View.Position); - - // Fade shadow on distance - const float fadeDistance = Math::Max(light.ShadowsFadeDistance, 0.1f); - const float fade = 1 - Math::Saturate((dstLightToView - light.Radius - light.ShadowsDistance + fadeDistance) / fadeDistance); - - return fade > ZeroTolerance && _shadowMapFormat != PixelFormat::Unknown; -} - -bool ShadowsPass::CanRenderShadow(const RenderContext& renderContext, const RenderSpotLightData& light) -{ - const Float3 lightPosition = light.Position; - const float dstLightToView = Float3::Distance(lightPosition, renderContext.View.Position); - - // Fade shadow on distance - const float fadeDistance = Math::Max(light.ShadowsFadeDistance, 0.1f); - const float fade = 1 - Math::Saturate((dstLightToView - light.Radius - light.ShadowsDistance + fadeDistance) / fadeDistance); - - return fade > ZeroTolerance && _shadowMapFormat != PixelFormat::Unknown; -} - -bool ShadowsPass::CanRenderShadow(const RenderContext& renderContext, const RenderDirectionalLightData& light) -{ - return _shadowMapFormat != PixelFormat::Unknown; -} - -void ShadowsPass::RenderShadow(RenderContextBatch& renderContextBatch, RenderPointLightData& light, GPUTextureView* shadowMask) -{ - if (light.ShadowDataIndex == -1) - return; - PROFILE_GPU_CPU("Shadow"); - GPUContext* context = GPUDevice::Instance->GetMainContext(); - RenderContext& renderContext = renderContextBatch.GetMainContext(); - ShadowData& shadowData = _shadowData[light.ShadowDataIndex]; - const float sphereModelScale = 3.0f; - auto& view = renderContext.View; - auto shader = _shader->GetShader(); - - // TODO: here we can use lower shadows quality based on light distance to view (LOD switching) and per light setting for max quality - int32 shadowQuality = maxShadowsQuality; - - // Set up GPU context and render view - const auto shadowMapsSizeCube = (float)_shadowMapsSizeCube; - context->SetViewportAndScissors(shadowMapsSizeCube, shadowMapsSizeCube); - - // Render depth to all 6 faces of the cube map - for (int32 faceIndex = 0; faceIndex < 6; faceIndex++) + for (auto& light : renderContext.List->PointLights) { - auto rt = _shadowMapCube->View(faceIndex); - context->ResetSR(); - context->SetRenderTarget(rt, static_cast(nullptr)); - context->ClearDepth(rt); - auto& shadowContext = renderContextBatch.Contexts[shadowData.ContextIndex + faceIndex]; - shadowContext.List->ExecuteDrawCalls(shadowContext, DrawCallsListType::Depth); - shadowContext.List->ExecuteDrawCalls(shadowContext, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls, nullptr); + if (light.CanRenderShadow(renderContext.View)) + shadowedLights.Add(&light); + } + if (shadowedLights.IsEmpty()) + return; + + // Initialize shadow atlas + auto& shadows = *renderContext.Buffers->GetCustomBuffer(TEXT("Shadows")); + const auto currentFrame = Engine::FrameCount; + shadows.LastFrameUsed = currentFrame; + int32 atlasResolution; + switch (Graphics::ShadowMapsQuality) + { + case Quality::Low: + atlasResolution = 1024; + break; + case Quality::Medium: + atlasResolution = 2048; + break; + case Quality::High: + atlasResolution = 4096; + break; + case Quality::Ultra: + atlasResolution = 8192; + break; + default: + return; + } + const int32 baseLightResolution = atlasResolution / MAX_CSM_CASCADES; // Allow to store 4 CSM cascades in a single row in all cases + if (shadows.Resolution != atlasResolution) + { + shadows.Reset(); + auto desc = GPUTextureDescription::New2D(atlasResolution, atlasResolution, _shadowMapFormat, GPUTextureFlags::ShaderResource | GPUTextureFlags::DepthStencil); + if (shadows.ShadowMapAtlas->Init(desc)) + { + LOG(Fatal, "Failed to setup shadow map of size {0}x{1} and format {2}", desc.Width, desc.Height, ScriptingEnum::ToString(desc.Format)); + return; + } + shadows.ClearShadowMapAtlas = true; + shadows.Resolution = atlasResolution; + } + if (renderContext.View.Origin != shadows.ViewOrigin) + { + // Large Worlds chunk movement so invalidate cached shadows + shadows.Reset(); + shadows.ViewOrigin = renderContext.View.Origin; + } + if (!shadows.AtlasTiles) + shadows.AtlasTiles = New(0, 0, atlasResolution, atlasResolution); + + // Update/add lights + for (const RenderLightData* light : shadowedLights) + { + auto& atlasLight = shadows.Lights[light->ID]; + + // Calculate resolution for this light + // TODO: add support for fixed shadow map resolution assigned per-light + float lightResolutionFloat = baseLightResolution * light->ScreenSize; + atlasLight.Resolution = QuantizeResolution(lightResolutionFloat); + + // Cull too small lights + constexpr uint16 MinResolution = 16; + if (atlasLight.Resolution < MinResolution) + continue; + + if (light->IsDirectionalLight) + atlasLight.TilesNeeded = Math::Clamp(((const RenderDirectionalLightData*)light)->CascadeCount, 0, MAX_CSM_CASCADES); + else if (light->IsPointLight) + atlasLight.TilesNeeded = 6; + else + atlasLight.TilesNeeded = 1; + atlasLight.LastFrameUsed = currentFrame; + } + + // Remove unused lights (before inserting any new ones to make space in the atlas) + for (auto it = shadows.Lights.Begin(); it.IsNotEnd(); ++it) + { + if (it->Value.LastFrameUsed != currentFrame) + { + for (auto& tile : it->Value.Tiles) + { + if (tile) + tile->Free(&shadows); + } + shadows.Lights.Remove(it); + } + } + + // Calculate size requirements for atlas + int32 atlasPixelsNeeded = 0; + for (auto it = shadows.Lights.Begin(); it.IsNotEnd(); ++it) + { + const auto& atlasLight = it->Value; + atlasPixelsNeeded += atlasLight.Resolution * atlasLight.Resolution * atlasLight.TilesNeeded; + } + const int32 atlasPixelsAllowed = atlasResolution * atlasResolution; + const float atlasPixelsCoverage = (float)atlasPixelsNeeded / atlasPixelsAllowed; + + // If atlas is overflown then scale down the shadows resolution + float resolutionScale = 1.0f; + if (atlasPixelsCoverage > 1.0f) + resolutionScale /= atlasPixelsCoverage; + float finalScale = 1.0f; + bool defragDone = false; +RETRY_ATLAS_SETUP: + + // Apply additional scale to the shadows resolution + if (!Math::IsOne(resolutionScale)) + { + finalScale *= resolutionScale; + for (const RenderLightData* light : shadowedLights) + { + auto& atlasLight = shadows.Lights[light->ID]; + if (light->IsDirectionalLight && !defragDone) + continue; // Reduce scaling on directional light shadows (before defrag) + atlasLight.Resolution = QuantizeResolution(atlasLight.Resolution * resolutionScale); + } + } + + // Macro checks if light has proper amount of tiles already assigned and the resolution is matching +#define IS_LIGHT_TILE_REUSABLE (atlasLight.ContextCount == atlasLight.TilesNeeded && atlasLight.Tiles[0] && atlasLight.Tiles[0]->Width == atlasLight.Resolution) + + // Remove incorrect tiles before allocating new ones + for (RenderLightData* light : shadowedLights) + { + auto& atlasLight = shadows.Lights[light->ID]; + if (IS_LIGHT_TILE_REUSABLE) + continue; + + // Remove existing tiles + for (auto& tile : atlasLight.Tiles) + { + if (tile) + { + tile->Free(&shadows); + tile = nullptr; + } + } + } + + // Insert tiles into the atlas (already sorted to favor the first ones) + for (RenderLightData* light : shadowedLights) + { + auto& atlasLight = shadows.Lights[light->ID]; + if (IS_LIGHT_TILE_REUSABLE || atlasLight.Resolution < 16) + continue; + + // Try to insert tiles + bool failedToInsert = false; + for (int32 tileIndex = 0; tileIndex < atlasLight.TilesNeeded; tileIndex++) + { + auto tile = shadows.AtlasTiles->Insert(atlasLight.Resolution, atlasLight.Resolution, 0, &shadows); + if (!tile) + { + // Free any previous tiles that were added + for (int32 i = 0; i < tileIndex; i++) + { + atlasLight.Tiles[i]->Free(&shadows); + atlasLight.Tiles[i] = nullptr; + } + failedToInsert = true; + break; + } + atlasLight.Tiles[tileIndex] = tile; + } + if (failedToInsert) + { + if (defragDone) + { + // Already defragmented atlas so scale it down + resolutionScale = 0.8f; + } + else + { + // Defragment atlas without changing scale + defragDone = true; + resolutionScale = 1.0f; + } + + // Rebuild atlas + shadows.ClearTiles(); + shadows.AtlasTiles = New(0, 0, atlasResolution, atlasResolution); + goto RETRY_ATLAS_SETUP; + } + } + + // Setup shadows for all lights + for (RenderLightData* light : shadowedLights) + { + auto& atlasLight = shadows.Lights[light->ID]; + if (atlasLight.Tiles[0] && atlasLight.Tiles[0]->Width == atlasLight.Resolution) + { + light->HasShadow = true; + if (light->IsPointLight) + SetupLight(renderContext, renderContextBatch, *(RenderPointLightData*)light, atlasLight); + else if (light->IsSpotLight) + SetupLight(renderContext, renderContextBatch, *(RenderSpotLightData*)light, atlasLight); + else //if (light->IsDirectionalLight) + SetupLight(renderContext, renderContextBatch, *(RenderDirectionalLightData*)light, atlasLight); + } + } + +#undef IS_LIGHT_TILE_REUSABLE + + // Update shadows buffer (contains packed data with all shadow projections in the atlas) + const float atlasResolutionInv = 1.0f / (float)atlasResolution; + shadows.ShadowsBuffer.Clear(); + shadows.ShadowsBuffer.Write(Float4::Zero); // Insert dummy prefix so ShadowsBufferAddress=0 indicates no shadow + for (RenderLightData* light : shadowedLights) + { + auto& atlasLight = shadows.Lights[light->ID]; + if (atlasLight.Tiles[0] == nullptr) + { + light->ShadowsBufferAddress = 0; // Clear to indicate no shadow + continue; + } + + // Cache start of the shadow data for this light + light->ShadowsBufferAddress = shadows.ShadowsBuffer.Data.Count() / sizeof(Float4); + + // Write shadow data (this must match HLSL) + const int32 tilesCount = atlasLight.ContextCount; + { + // Shadow info + auto* packed = shadows.ShadowsBuffer.WriteReserve(2); + Color32 packed0x((byte)(atlasLight.Sharpness * (255.0f / 10.0f)), (byte)(atlasLight.Fade * 255.0f), tilesCount, 0); + packed[0] = Float4(*(const float*)&packed0x, atlasLight.FadeDistance, atlasLight.NormalOffsetScale, atlasLight.Bias); + packed[1] = atlasLight.CascadeSplits; + } + for (int32 tileIndex = 0; tileIndex < tilesCount; tileIndex++) + { + // Shadow projection info + const ShadowsAtlasTile* tile = atlasLight.Tiles[tileIndex]; + ASSERT(tile); + const Matrix& worldToShadow = atlasLight.WorldToShadow[tileIndex]; + auto* packed = shadows.ShadowsBuffer.WriteReserve(5); + packed[0] = Float4(tile->Width, tile->Height, tile->X, tile->Y) * atlasResolutionInv; // UV to AtlasUV via a single MAD instruction + packed[1] = worldToShadow.GetColumn1(); + packed[2] = worldToShadow.GetColumn2(); + packed[3] = worldToShadow.GetColumn3(); + packed[4] = worldToShadow.GetColumn4(); + } + } + GPUContext* context = GPUDevice::Instance->GetMainContext(); + shadows.ShadowsBuffer.Flush(context); + shadows.ShadowsBufferView = shadows.ShadowsBuffer.GetBuffer()->View(); +} + +void ShadowsPass::RenderShadowMaps(RenderContextBatch& renderContextBatch) +{ + const RenderContext& renderContext = renderContextBatch.GetMainContext(); + const ShadowsCustomBuffer* shadowsPtr = renderContext.Buffers->FindCustomBuffer(TEXT("Shadows")); + if (shadowsPtr == nullptr || shadowsPtr->Lights.IsEmpty() || shadowsPtr->LastFrameUsed != Engine::FrameCount) + return; + PROFILE_GPU_CPU("ShadowMaps"); + const ShadowsCustomBuffer& shadows = *shadowsPtr; + GPUContext* context = GPUDevice::Instance->GetMainContext(); + context->ResetSR(); + context->SetRenderTarget(shadows.ShadowMapAtlas->View(), (GPUTextureView*)nullptr); + GPUConstantBuffer* quadShaderCB; + if (shadows.ClearShadowMapAtlas) + { + context->ClearDepth(shadows.ShadowMapAtlas->View()); + } + else + { + QuadShaderData quadShaderData; + quadShaderData.Color = Float4::One; // Color.r is used by PS_DepthClear in Quad shader to clear depth + quadShaderCB = GPUDevice::Instance->QuadShader->GetCB(0); + context->UpdateCB(quadShaderCB, &quadShaderData); + } + + // Render depth to all shadow map tiles + for (auto& e : shadows.Lights) + { + const ShadowAtlasLight& atlasLight = e.Value; + for (int32 tileIndex = 0; tileIndex < atlasLight.ContextCount; tileIndex++) + { + const ShadowsAtlasTile* tile = atlasLight.Tiles[tileIndex]; + if (!tile) + break; + + // Set viewport for tile + context->SetViewportAndScissors(Viewport(tile->X, tile->Y, tile->Width, tile->Height)); + + if (!shadows.ClearShadowMapAtlas) + { + // Clear tile depth + context->BindCB(0, quadShaderCB); + context->SetState(_psDepthClear); + context->DrawFullscreenTriangle(); + } + + // Draw objects depth + auto& shadowContext = renderContextBatch.Contexts[atlasLight.ContextIndex + tileIndex]; + shadowContext.List->ExecuteDrawCalls(shadowContext, DrawCallsListType::Depth); + shadowContext.List->ExecuteDrawCalls(shadowContext, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls, nullptr); + } } // Restore GPU context context->ResetSR(); context->ResetRenderTarget(); - const Viewport viewport = renderContext.Task->GetViewport(); - GPUTexture* depthBuffer = renderContext.Buffers->DepthBuffer; - GPUTextureView* depthBufferSRV = EnumHasAnyFlags(depthBuffer->Flags(), GPUTextureFlags::ReadOnlyDepthView) ? depthBuffer->ViewReadOnlyDepth() : depthBuffer->View(); - context->SetViewportAndScissors(viewport); - context->BindSR(0, renderContext.Buffers->GBuffer0); - context->BindSR(1, renderContext.Buffers->GBuffer1); - context->BindSR(2, renderContext.Buffers->GBuffer2); - context->BindSR(3, depthBufferSRV); - context->BindSR(4, renderContext.Buffers->GBuffer3); - - // Setup shader data - Data sperLight; - GBufferPass::SetInputs(view, sperLight.GBuffer); - light.SetShaderData(sperLight.Light, true); - sperLight.LightShadow = shadowData.Constants; - Matrix::Transpose(view.ViewProjection(), sperLight.ViewProjectionMatrix); - sperLight.ContactShadowsDistance = light.ShadowsDistance; - sperLight.ContactShadowsLength = EnumHasAnyFlags(view.Flags, ViewFlags::ContactShadows) ? light.ContactShadowsLength : 0.0f; - - // Calculate world view projection matrix for the light sphere - Matrix world, wvp, matrix; - Matrix::Scaling(light.Radius * sphereModelScale, wvp); - Matrix::Translation(light.Position, matrix); - Matrix::Multiply(wvp, matrix, world); - Matrix::Multiply(world, view.ViewProjection(), wvp); - Matrix::Transpose(wvp, sperLight.WVP); - - // Render shadow in screen space - context->UpdateCB(shader->GetCB(0), &sperLight); - context->BindCB(0, shader->GetCB(0)); - context->BindCB(1, shader->GetCB(1)); - context->BindSR(5, _shadowMapCube->ViewArray()); - context->SetRenderTarget(shadowMask); - context->SetState(_psShadowPoint.Get(shadowQuality + (sperLight.ContactShadowsLength > ZeroTolerance ? 4 : 0))); - _sphereModel->Render(context); - - // Cleanup - context->ResetRenderTarget(); - context->UnBindSR(5); - - // Render volumetric light with shadow - VolumetricFogPass::Instance()->RenderLight(renderContext, context, light, _shadowMapCube->ViewArray(), sperLight.LightShadow); -} - -void ShadowsPass::RenderShadow(RenderContextBatch& renderContextBatch, RenderSpotLightData& light, GPUTextureView* shadowMask) -{ - if (light.ShadowDataIndex == -1) - return; - PROFILE_GPU_CPU("Shadow"); - GPUContext* context = GPUDevice::Instance->GetMainContext(); - RenderContext& renderContext = renderContextBatch.GetMainContext(); - ShadowData& shadowData = _shadowData[light.ShadowDataIndex]; - const float sphereModelScale = 3.0f; - auto& view = renderContext.View; - auto shader = _shader->GetShader(); - - // TODO: here we can use lower shadows quality based on light distance to view (LOD switching) and per light setting for max quality - int32 shadowQuality = maxShadowsQuality; - - // Set up GPU context and render view - const auto shadowMapsSizeCube = (float)_shadowMapsSizeCube; - context->SetViewportAndScissors(shadowMapsSizeCube, shadowMapsSizeCube); - - // Render depth to all 1 face of the cube map - constexpr int32 faceIndex = 0; - { - auto rt = _shadowMapCube->View(faceIndex); - context->ResetSR(); - context->SetRenderTarget(rt, static_cast(nullptr)); - context->ClearDepth(rt); - auto& shadowContext = renderContextBatch.Contexts[shadowData.ContextIndex + faceIndex]; - shadowContext.List->ExecuteDrawCalls(shadowContext, DrawCallsListType::Depth); - shadowContext.List->ExecuteDrawCalls(shadowContext, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls, nullptr); - } - - // Restore GPU context - context->ResetSR(); - context->ResetRenderTarget(); - const Viewport viewport = renderContext.Task->GetViewport(); - GPUTexture* depthBuffer = renderContext.Buffers->DepthBuffer; - GPUTextureView* depthBufferSRV = EnumHasAllFlags(depthBuffer->Flags(), GPUTextureFlags::ReadOnlyDepthView) ? depthBuffer->ViewReadOnlyDepth() : depthBuffer->View(); - context->SetViewportAndScissors(viewport); - context->BindSR(0, renderContext.Buffers->GBuffer0); - context->BindSR(1, renderContext.Buffers->GBuffer1); - context->BindSR(2, renderContext.Buffers->GBuffer2); - context->BindSR(3, depthBufferSRV); - context->BindSR(4, renderContext.Buffers->GBuffer3); - - // Setup shader data - Data sperLight; - GBufferPass::SetInputs(view, sperLight.GBuffer); - light.SetShaderData(sperLight.Light, true); - sperLight.LightShadow = shadowData.Constants; - Matrix::Transpose(view.ViewProjection(), sperLight.ViewProjectionMatrix); - sperLight.ContactShadowsDistance = light.ShadowsDistance; - sperLight.ContactShadowsLength = EnumHasAnyFlags(view.Flags, ViewFlags::ContactShadows) ? light.ContactShadowsLength : 0.0f; - - // Calculate world view projection matrix for the light sphere - Matrix world, wvp, matrix; - Matrix::Scaling(light.Radius * sphereModelScale, wvp); - Matrix::Translation(light.Position, matrix); - Matrix::Multiply(wvp, matrix, world); - Matrix::Multiply(world, view.ViewProjection(), wvp); - Matrix::Transpose(wvp, sperLight.WVP); - - // Render shadow in screen space - context->UpdateCB(shader->GetCB(0), &sperLight); - context->BindCB(0, shader->GetCB(0)); - context->BindCB(1, shader->GetCB(1)); - context->BindSR(5, _shadowMapCube->View(faceIndex)); - context->SetRenderTarget(shadowMask); - context->SetState(_psShadowSpot.Get(shadowQuality + (sperLight.ContactShadowsLength > ZeroTolerance ? 4 : 0))); - _sphereModel->Render(context); - - // Cleanup - context->ResetRenderTarget(); - context->UnBindSR(5); - - // Render volumetric light with shadow - VolumetricFogPass::Instance()->RenderLight(renderContext, context, light, _shadowMapCube->View(faceIndex), sperLight.LightShadow); -} - -void ShadowsPass::RenderShadow(RenderContextBatch& renderContextBatch, RenderDirectionalLightData& light, int32 index, GPUTextureView* shadowMask) -{ - if (light.ShadowDataIndex == -1) - return; - PROFILE_GPU_CPU("Shadow"); - GPUContext* context = GPUDevice::Instance->GetMainContext(); - RenderContext& renderContext = renderContextBatch.GetMainContext(); - ShadowData& shadowData = _shadowData[light.ShadowDataIndex]; - const float shadowMapsSizeCSM = (float)_shadowMapsSizeCSM; - context->SetViewportAndScissors(shadowMapsSizeCSM, shadowMapsSizeCSM); - - // Render shadow map for each projection - for (int32 cascadeIndex = 0; cascadeIndex < shadowData.ContextCount; cascadeIndex++) - { - const auto rt = _shadowMapCSM->View(cascadeIndex); - context->ResetSR(); - context->SetRenderTarget(rt, static_cast(nullptr)); - context->ClearDepth(rt); - auto& shadowContext = renderContextBatch.Contexts[shadowData.ContextIndex + cascadeIndex]; - shadowContext.List->ExecuteDrawCalls(shadowContext, DrawCallsListType::Depth); - shadowContext.List->ExecuteDrawCalls(shadowContext, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls, nullptr); - } - - // Restore GPU context - context->ResetSR(); - context->ResetRenderTarget(); - GPUTexture* depthBuffer = renderContext.Buffers->DepthBuffer; - GPUTextureView* depthBufferSRV = EnumHasAnyFlags(depthBuffer->Flags(), GPUTextureFlags::ReadOnlyDepthView) ? depthBuffer->ViewReadOnlyDepth() : depthBuffer->View(); context->SetViewportAndScissors(renderContext.Task->GetViewport()); - context->BindSR(0, renderContext.Buffers->GBuffer0); - context->BindSR(1, renderContext.Buffers->GBuffer1); - context->BindSR(2, renderContext.Buffers->GBuffer2); - context->BindSR(3, depthBufferSRV); - context->BindSR(4, renderContext.Buffers->GBuffer3); + shadows.ClearShadowMapAtlas = false; +} + +void ShadowsPass::RenderShadowMask(RenderContextBatch& renderContextBatch, RenderLightData& light, GPUTextureView* shadowMask) +{ + ASSERT(light.HasShadow); + PROFILE_GPU_CPU("Shadow"); + GPUContext* context = GPUDevice::Instance->GetMainContext(); + RenderContext& renderContext = renderContextBatch.GetMainContext(); + const ShadowsCustomBuffer& shadows = *renderContext.Buffers->FindCustomBuffer(TEXT("Shadows")); + ASSERT(shadows.LastFrameUsed == Engine::FrameCount); + const ShadowAtlasLight& atlasLight = shadows.Lights.At(light.ID); + const float sphereModelScale = 3.0f; + auto& view = renderContext.View; + auto shader = _shader->GetShader(); + const bool isLocalLight = light.IsPointLight || light.IsSpotLight; + + // TODO: here we can use lower shadows quality based on light distance to view (LOD switching) and per light setting for max quality + int32 shadowQuality = maxShadowsQuality; // Setup shader data Data sperLight; - auto& view = renderContext.View; GBufferPass::SetInputs(view, sperLight.GBuffer); - light.SetShaderData(sperLight.Light, true); - sperLight.LightShadow = shadowData.Constants; + if (light.IsDirectionalLight) + ((RenderDirectionalLightData&)light).SetShaderData(sperLight.Light, true); + else if (light.IsPointLight) + ((RenderPointLightData&)light).SetShaderData(sperLight.Light, true); + else if (light.IsSpotLight) + ((RenderSpotLightData&)light).SetShaderData(sperLight.Light, true); Matrix::Transpose(view.ViewProjection(), sperLight.ViewProjectionMatrix); sperLight.ContactShadowsDistance = light.ShadowsDistance; sperLight.ContactShadowsLength = EnumHasAnyFlags(view.Flags, ViewFlags::ContactShadows) ? light.ContactShadowsLength : 0.0f; + if (isLocalLight) + { + // Calculate world view projection matrix for the light sphere + Matrix world, wvp, matrix; + Matrix::Scaling(((RenderLocalLightData&)light).Radius * sphereModelScale, wvp); + Matrix::Translation(light.Position, matrix); + Matrix::Multiply(wvp, matrix, world); + Matrix::Multiply(world, view.ViewProjection(), wvp); + Matrix::Transpose(wvp, sperLight.WVP); + } + // TODO: reimplement cascades blending for directional lights (but with dithering) // Render shadow in screen space - auto shader = _shader->GetShader(); - context->UpdateCB(shader->GetCB(0), &sperLight); - context->BindCB(0, shader->GetCB(0)); - context->BindCB(1, shader->GetCB(1)); - context->BindSR(5, _shadowMapCSM->ViewArray()); + GPUConstantBuffer* cb0 = shader->GetCB(0); + context->UpdateCB(cb0, &sperLight); + context->BindCB(0, cb0); + context->BindSR(5, shadows.ShadowsBufferView); + context->BindSR(6, shadows.ShadowMapAtlas); + const int32 permutationIndex = shadowQuality + (sperLight.ContactShadowsLength > ZeroTolerance ? 4 : 0); context->SetRenderTarget(shadowMask); - context->SetState(_psShadowDir.Get(maxShadowsQuality + static_cast(Quality::MAX) * shadowData.BlendCSM + (sperLight.ContactShadowsLength > ZeroTolerance ? 8 : 0))); - context->DrawFullscreenTriangle(); + if (light.IsPointLight) + { + context->SetState(_psShadowPoint.Get(permutationIndex)); + _sphereModel->Render(context); + } + else if (light.IsSpotLight) + { + context->SetState(_psShadowSpot.Get(permutationIndex)); + _sphereModel->Render(context); + } + else //if (light.IsDirectionalLight) + { + context->SetState(_psShadowDir.Get(permutationIndex)); + context->DrawFullscreenTriangle(); + } // Cleanup context->ResetRenderTarget(); context->UnBindSR(5); - - // Cache params for the volumetric fog or other effects that use dir light shadow sampling - LastDirLightIndex = index; - LastDirLightShadowMap = _shadowMapCSM->ViewArray(); - LastDirLight = sperLight.LightShadow; + context->UnBindSR(6); +} + +void ShadowsPass::GetShadowAtlas(const RenderBuffers* renderBuffers, GPUTexture*& shadowMapAtlas, GPUBufferView*& shadowsBuffer) +{ + const ShadowsCustomBuffer* shadowsPtr = renderBuffers->FindCustomBuffer(TEXT("Shadows")); + if (shadowsPtr && shadowsPtr->ShadowMapAtlas && shadowsPtr->LastFrameUsed == Engine::FrameCount) + { + shadowMapAtlas = shadowsPtr->ShadowMapAtlas; + shadowsBuffer = shadowsPtr->ShadowsBufferView; + } + else + { + shadowMapAtlas = nullptr; + shadowsBuffer = nullptr; + } } diff --git a/Source/Engine/Renderer/ShadowsPass.h b/Source/Engine/Renderer/ShadowsPass.h index fd96900ca..17d21b6d6 100644 --- a/Source/Engine/Renderer/ShadowsPass.h +++ b/Source/Engine/Renderer/ShadowsPass.h @@ -9,121 +9,31 @@ #include "Engine/Content/Assets/Model.h" #include "Engine/Graphics/RenderTask.h" -/// -/// Pixel format for fullscreen render target used for shadows calculations -/// -#define SHADOWS_PASS_SS_RR_FORMAT PixelFormat::R11G11B10_Float - -template -bool CanRenderShadow(const RenderView& view, const T& light) -{ - bool result = false; - switch ((ShadowsCastingMode)light.ShadowsMode) - { - case ShadowsCastingMode::StaticOnly: - result = view.IsOfflinePass; - break; - case ShadowsCastingMode::DynamicOnly: - result = !view.IsOfflinePass; - break; - case ShadowsCastingMode::All: - result = true; - break; - default: - break; - } - return result && light.ShadowsStrength > ZeroTolerance; -} - /// /// Shadows rendering service. /// class ShadowsPass : public RendererPass { private: - - struct ShadowData - { - int32 ContextIndex; - int32 ContextCount; - bool BlendCSM; - ShaderLightShadowData Constants; - }; - - // Shader stuff AssetReference _shader; - GPUPipelineStatePermutationsPs(Quality::MAX) * 2 * 2> _psShadowDir; + AssetReference _sphereModel; + GPUPipelineState* _psDepthClear = nullptr; + GPUPipelineStatePermutationsPs(Quality::MAX) * 2> _psShadowDir; GPUPipelineStatePermutationsPs(Quality::MAX) * 2> _psShadowPoint; GPUPipelineStatePermutationsPs(Quality::MAX) * 2> _psShadowSpot; - PixelFormat _shadowMapFormat; - - // Shadow maps stuff - int32 _shadowMapsSizeCSM; - int32 _shadowMapsSizeCube; - GPUTexture* _shadowMapCSM; - GPUTexture* _shadowMapCube; - Quality _currentShadowMapsQuality; - - // Shadow map rendering stuff - AssetReference _sphereModel; - Array _shadowData; - - // Cached state for the current frame rendering (setup via Prepare) - int32 maxShadowsQuality; + PixelFormat _shadowMapFormat; // Cached on initialization + int32 maxShadowsQuality = 0; // Cached state for the current frame rendering (setup via Prepare) public: - - /// - /// Init - /// - ShadowsPass(); - -public: - - /// - /// Gets current GPU memory usage by the shadow maps - /// - /// GPU memory used in bytes - uint64 GetShadowMapsMemoryUsage() const; - -public: - - // TODO: use full scene shadow map atlas with dynamic slots allocation - int32 LastDirLightIndex = -1; - GPUTextureView* LastDirLightShadowMap = nullptr; - ShaderLightShadowData LastDirLight; - -public: - void Prepare(); - /// /// Setups the shadows rendering for batched scene drawing. Checks which lights will cast a shadow. /// void SetupShadows(RenderContext& renderContext, RenderContextBatch& renderContextBatch); /// - /// Determines whether can render shadow for the specified light. + /// Renders the shadow maps for all lights (into atlas). /// - /// The rendering context. - /// The light. - /// true if can render shadow for the specified light; otherwise, false. - bool CanRenderShadow(const RenderContext& renderContext, const RenderPointLightData& light); - - /// - /// Determines whether can render shadow for the specified light. - /// - /// The rendering context. - /// The light. - /// true if can render shadow for the specified light; otherwise, false. - bool CanRenderShadow(const RenderContext& renderContext, const RenderSpotLightData& light); - - /// - /// Determines whether can render shadow for the specified light. - /// - /// The rendering context. - /// The light. - /// true if can render shadow for the specified light; otherwise, false. - bool CanRenderShadow(const RenderContext& renderContext, const RenderDirectionalLightData& light); + void RenderShadowMaps(RenderContextBatch& renderContextBatch); /// /// Renders the shadow mask for the given light. @@ -131,32 +41,23 @@ public: /// The rendering context batch. /// The light. /// The shadow mask (output). - void RenderShadow(RenderContextBatch& renderContextBatch, RenderPointLightData& light, GPUTextureView* shadowMask); + void RenderShadowMask(RenderContextBatch& renderContextBatch, RenderLightData& light, GPUTextureView* shadowMask); /// - /// Renders the shadow mask for the given light. + /// Gets the shadow atlas texture and shadows buffer for shadow projection in shaders. /// - /// The rendering context batch. - /// The light. - /// The shadow mask (output). - void RenderShadow(RenderContextBatch& renderContextBatch, RenderSpotLightData& light, GPUTextureView* shadowMask); - - /// - /// Renders the shadow mask for the given light. - /// - /// The rendering context batch. - /// The light. - /// The light index. - /// The shadow mask (output). - void RenderShadow(RenderContextBatch& renderContextBatch, RenderDirectionalLightData& light, int32 index, GPUTextureView* shadowMask); + /// The render buffers that store frame context. + /// The output shadow map atlas texture or null if unused. + /// The output shadows buffer or null if unused. + static void GetShadowAtlas(const RenderBuffers* renderBuffers, GPUTexture*& shadowMapAtlas, GPUBufferView*& shadowsBuffer); private: - - void updateShadowMapSize(); void SetupRenderContext(RenderContext& renderContext, RenderContext& shadowContext); - void SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderDirectionalLightData& light); - void SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderPointLightData& light); - void SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderSpotLightData& light); + void SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderLightData& light, struct ShadowAtlasLight& atlasLight); + void SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderLocalLightData& light, ShadowAtlasLight& atlasLight); + void SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderDirectionalLightData& light, ShadowAtlasLight& atlasLight); + void SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderPointLightData& light, ShadowAtlasLight& atlasLight); + void SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderSpotLightData& light, ShadowAtlasLight& atlasLight); #if COMPILE_WITH_DEV_ENV void OnShaderReloading(Asset* obj) @@ -169,14 +70,12 @@ private: #endif public: - // [RendererPass] String ToString() const override; bool Init() override; void Dispose() override; protected: - // [RendererPass] bool setupResources() override; }; diff --git a/Source/Engine/Renderer/VolumetricFogPass.cpp b/Source/Engine/Renderer/VolumetricFogPass.cpp index adbe4c837..fe3b2c2e8 100644 --- a/Source/Engine/Renderer/VolumetricFogPass.cpp +++ b/Source/Engine/Renderer/VolumetricFogPass.cpp @@ -6,12 +6,12 @@ #include "Engine/Graphics/Graphics.h" #include "Engine/Graphics/RenderTask.h" #include "Engine/Graphics/RenderBuffers.h" -#include "Engine/Graphics/GPULimits.h" #include "Engine/Graphics/RenderTargetPool.h" +#include "Engine/Graphics/GPULimits.h" +#include "Engine/Graphics/GPUContext.h" #include "Engine/Content/Assets/CubeTexture.h" #include "Engine/Content/Content.h" #include "Engine/Engine/Engine.h" -#include "Engine/Graphics/GPUContext.h" // Must match shader source int32 VolumetricFogGridInjectionGroupSize = 4; @@ -143,38 +143,30 @@ bool VolumetricFogPass::Init(RenderContext& renderContext, GPUContext* context, switch (quality) { case Quality::Low: - { _cache.GridPixelSize = 16; _cache.GridSizeZ = 64; _cache.FogJitter = false; _cache.MissedHistorySamplesCount = 1; break; - } case Quality::Medium: - { _cache.GridPixelSize = 16; _cache.GridSizeZ = 64; _cache.FogJitter = true; _cache.MissedHistorySamplesCount = 4; break; - } case Quality::High: - { _cache.GridPixelSize = 16; _cache.GridSizeZ = 128; _cache.FogJitter = true; _cache.MissedHistorySamplesCount = 4; break; - } case Quality::Ultra: - { _cache.GridPixelSize = 8; _cache.GridSizeZ = 256; _cache.FogJitter = true; _cache.MissedHistorySamplesCount = 8; break; } - } // Prepare const int32 width = renderContext.Buffers->GetWidth(); @@ -202,7 +194,6 @@ bool VolumetricFogPass::Init(RenderContext& renderContext, GPUContext* context, _cache.Data.VolumetricFogMaxDistance = options.Distance; _cache.Data.MissedHistorySamplesCount = Math::Clamp(_cache.MissedHistorySamplesCount, 1, (int32)ARRAY_COUNT(_cache.Data.FrameJitterOffsets)); Matrix::Transpose(view.PrevViewProjection, _cache.Data.PrevWorldToClip); - _cache.Data.DirectionalLightShadow.NumCascades = 0; _cache.Data.SkyLight.VolumetricScatteringIntensity = 0; // Fill frame jitter history @@ -262,83 +253,6 @@ GPUTextureView* VolumetricFogPass::GetLocalShadowedLightScattering(RenderContext return renderContext.Buffers->LocalShadowedLightScattering->ViewVolume(); } -template -void VolumetricFogPass::RenderRadialLight(RenderContext& renderContext, GPUContext* context, T& light, ShaderLightShadowData& shadow) -{ - // Prepare - VolumetricFogOptions options; - if (Init(renderContext, context, options)) - return; - auto& view = renderContext.View; - - // Calculate light volume bounds in camera frustum depth range (min and max) - const Float3 center = light.Position; - const float radius = light.Radius; - Float3 viewSpaceLightBoundsOrigin = Float3::Transform(center, view.View); - float furthestSliceIndexUnclamped = ComputeZSliceFromDepth(viewSpaceLightBoundsOrigin.Z + radius, options, _cache.GridSizeZ); - float closestSliceIndexUnclamped = ComputeZSliceFromDepth(viewSpaceLightBoundsOrigin.Z - radius, options, _cache.GridSizeZ); - int32 volumeZBoundsMin = (int32)Math::Clamp(closestSliceIndexUnclamped, 0.0f, _cache.GridSize.Z - 1.0f); - int32 volumeZBoundsMax = (int32)Math::Clamp(furthestSliceIndexUnclamped, 0.0f, _cache.GridSize.Z - 1.0f); - - // Cull light - if ((view.Position - center).LengthSquared() >= (options.Distance + radius) * (options.Distance + radius) || volumeZBoundsMin >= volumeZBoundsMax) - return; - - PROFILE_GPU_CPU("Volumetric Fog Light"); - - // Allocate temporary buffer for light scattering injection - auto localShadowedLightScattering = GetLocalShadowedLightScattering(renderContext, context, options); - - // Prepare - PerLight perLight; - auto cb0 = _shader->GetShader()->GetCB(0); - auto cb1 = _shader->GetShader()->GetCB(1); - - // Bind the output - context->SetRenderTarget(localShadowedLightScattering); - context->SetViewportAndScissors(_cache.Data.GridSize.X, _cache.Data.GridSize.Y); - - // Setup data - perLight.SliceToDepth.X = _cache.Data.GridSize.Z; - perLight.SliceToDepth.Y = _cache.Data.VolumetricFogMaxDistance; - perLight.MinZ = volumeZBoundsMin; - perLight.LocalLightScatteringIntensity = light.VolumetricScatteringIntensity; - perLight.ViewSpaceBoundingSphere = Float4(viewSpaceLightBoundsOrigin, radius); - Matrix::Transpose(view.Projection, perLight.ViewToVolumeClip); - light.SetShaderData(perLight.LocalLight, true); - perLight.LocalLightShadow = shadow; - - // Upload data - context->UpdateCB(cb1, &perLight); - context->BindCB(0, cb0); - context->BindCB(1, cb1); - - // Ensure to have valid buffers created - if (_vbCircleRasterize == nullptr || _ibCircleRasterize == nullptr) - InitCircleBuffer(); - - // Call rendering to the volume - const int32 psIndex = 1; - context->SetState(_psInjectLight.Get(psIndex)); - const int32 instanceCount = volumeZBoundsMax - volumeZBoundsMin; - const int32 indexCount = _ibCircleRasterize->GetElementsCount(); - ASSERT(instanceCount > 0); - context->BindVB(ToSpan(&_vbCircleRasterize, 1)); - context->BindIB(_ibCircleRasterize); - context->DrawIndexedInstanced(indexCount, instanceCount, 0); - - // Cleanup - context->UnBindCB(0); - context->UnBindCB(1); - auto viewport = renderContext.Task->GetViewport(); - context->SetViewportAndScissors(viewport); - context->ResetRenderTarget(); - context->FlushState(); - - // Mark as rendered - light.RenderedVolumetricFog = 1; -} - template void VolumetricFogPass::RenderRadialLight(RenderContext& renderContext, GPUContext* context, RenderView& view, VolumetricFogOptions& options, T& light, PerLight& perLight, GPUConstantBuffer* cb1) { @@ -353,106 +267,67 @@ void VolumetricFogPass::RenderRadialLight(RenderContext& renderContext, GPUConte const float closestSliceIndexUnclamped = ComputeZSliceFromDepth(viewSpaceLightBoundsOrigin.Z - radius, options, cache.GridSizeZ); const int32 volumeZBoundsMin = (int32)Math::Clamp(closestSliceIndexUnclamped, 0.0f, cache.GridSize.Z - 1.0f); const int32 volumeZBoundsMax = (int32)Math::Clamp(furthestSliceIndexUnclamped, 0.0f, cache.GridSize.Z - 1.0f); - - if (volumeZBoundsMin < volumeZBoundsMax) - { - // TODO: use full scene shadows atlas and render point/spot lights with shadow into a fog volume - bool withShadow = false; - - // Setup data - perLight.SliceToDepth.X = cache.Data.GridSize.Z; - perLight.SliceToDepth.Y = cache.Data.VolumetricFogMaxDistance; - perLight.MinZ = volumeZBoundsMin; - perLight.LocalLightScatteringIntensity = light.VolumetricScatteringIntensity; - perLight.ViewSpaceBoundingSphere = Float4(viewSpaceLightBoundsOrigin, radius); - Matrix::Transpose(renderContext.View.Projection, perLight.ViewToVolumeClip); - light.SetShaderData(perLight.LocalLight, withShadow); - - // Upload data - context->UpdateCB(cb1, &perLight); - context->BindCB(1, cb1); - - // Ensure to have valid buffers created - if (_vbCircleRasterize == nullptr || _ibCircleRasterize == nullptr) - InitCircleBuffer(); - - // Call rendering to the volume - const int32 psIndex = withShadow ? 1 : 0; - context->SetState(_psInjectLight.Get(psIndex)); - const int32 instanceCount = volumeZBoundsMax - volumeZBoundsMin; - const int32 indexCount = _ibCircleRasterize->GetElementsCount(); - context->BindVB(ToSpan(&_vbCircleRasterize, 1)); - context->BindIB(_ibCircleRasterize); - context->DrawIndexedInstanced(indexCount, instanceCount, 0); - } -} - -void VolumetricFogPass::RenderLight(RenderContext& renderContext, GPUContext* context, RenderPointLightData& light, GPUTextureView* shadowMap, ShaderLightShadowData& shadow) -{ - // Skip lights with no volumetric light influence or not casting volumetric shadow - if (light.VolumetricScatteringIntensity <= ZeroTolerance || !light.CastVolumetricShadow) + if (volumeZBoundsMin >= volumeZBoundsMax) return; - ASSERT(shadowMap); - context->BindSR(5, shadowMap); + // Setup data + perLight.SliceToDepth.X = cache.Data.GridSize.Z; + perLight.SliceToDepth.Y = cache.Data.VolumetricFogMaxDistance; + perLight.MinZ = volumeZBoundsMin; + perLight.LocalLightScatteringIntensity = light.VolumetricScatteringIntensity; + perLight.ViewSpaceBoundingSphere = Float4(viewSpaceLightBoundsOrigin, radius); + Matrix::Transpose(renderContext.View.Projection, perLight.ViewToVolumeClip); + const bool withShadow = light.CastVolumetricShadow && light.HasShadow; + light.SetShaderData(perLight.LocalLight, withShadow); - RenderRadialLight(renderContext, context, light, shadow); + // Upload data + context->UpdateCB(cb1, &perLight); + context->BindCB(1, cb1); - context->UnBindSR(5); -} + // Ensure to have valid buffers created + if (_vbCircleRasterize == nullptr || _ibCircleRasterize == nullptr) + InitCircleBuffer(); -void VolumetricFogPass::RenderLight(RenderContext& renderContext, GPUContext* context, RenderSpotLightData& light, GPUTextureView* shadowMap, ShaderLightShadowData& shadow) -{ - // Skip lights with no volumetric light influence or not casting volumetric shadow - if (light.VolumetricScatteringIntensity <= ZeroTolerance || !light.CastVolumetricShadow) - return; - ASSERT(shadowMap); - - context->BindSR(6, shadowMap); - - RenderRadialLight(renderContext, context, light, shadow); - - context->UnBindSR(6); + // Call rendering to the volume + const int32 psIndex = withShadow ? 1 : 0; + context->SetState(_psInjectLight.Get(psIndex)); + const int32 instanceCount = volumeZBoundsMax - volumeZBoundsMin; + const int32 indexCount = _ibCircleRasterize->GetElementsCount(); + context->BindVB(ToSpan(&_vbCircleRasterize, 1)); + context->BindIB(_ibCircleRasterize); + context->DrawIndexedInstanced(indexCount, instanceCount, 0); } void VolumetricFogPass::Render(RenderContext& renderContext) { - // Prepare VolumetricFogOptions options; auto context = GPUDevice::Instance->GetMainContext(); if (Init(renderContext, context, options)) return; auto& view = renderContext.View; auto& cache = _cache; - PROFILE_GPU_CPU("Volumetric Fog"); // TODO: test exponential depth distribution (should give better quality near the camera) // TODO: use tiled light culling and render unshadowed lights in single pass + // Try to get shadows atlas + GPUTexture* shadowMap; + GPUBufferView* shadowsBuffer; + ShadowsPass::GetShadowAtlas(renderContext.Buffers, shadowMap, shadowsBuffer); + // Init directional light data - GPUTextureView* dirLightShadowMap = nullptr; + Platform::MemoryClear(&_cache.Data.DirectionalLight, sizeof(_cache.Data.DirectionalLight)); if (renderContext.List->DirectionalLights.HasItems()) { const int32 dirLightIndex = (int32)renderContext.List->DirectionalLights.Count() - 1; const auto& dirLight = renderContext.List->DirectionalLights[dirLightIndex]; const float brightness = dirLight.VolumetricScatteringIntensity; - if (brightness > ZeroTolerance) { - const auto shadowPass = ShadowsPass::Instance(); - const bool useShadow = dirLight.CastVolumetricShadow && shadowPass->LastDirLightIndex == dirLightIndex; + const bool useShadow = shadowMap && dirLight.CastVolumetricShadow && dirLight.HasShadow; dirLight.SetShaderData(_cache.Data.DirectionalLight, useShadow); _cache.Data.DirectionalLight.Color *= brightness; - if (useShadow) - { - _cache.Data.DirectionalLightShadow = shadowPass->LastDirLight; - dirLightShadowMap = shadowPass->LastDirLightShadowMap; - } - else - { - _cache.Data.DirectionalLightShadow.NumCascades = 0; - } } } @@ -475,6 +350,7 @@ void VolumetricFogPass::Render(RenderContext& renderContext) // Init sky light data GPUTexture* skyLightImage = nullptr; + Platform::MemoryClear(&_cache.Data.SkyLight, sizeof(_cache.Data.SkyLight)); if (renderContext.List->SkyLights.HasItems() && !useDDGI) { const auto& skyLight = renderContext.List->SkyLights.Last(); @@ -510,13 +386,10 @@ void VolumetricFogPass::Render(RenderContext& renderContext) // Initialize fog volume properties { PROFILE_GPU("Initialize"); - context->ResetRenderTarget(); context->BindUA(0, vBufferA->ViewVolume()); context->BindUA(1, vBufferB->ViewVolume()); - context->Dispatch(_csInitialize, groupCountX, groupCountY, groupCountZ); - context->ResetUA(); } @@ -557,7 +430,7 @@ void VolumetricFogPass::Render(RenderContext& renderContext) const int32 volumeZBoundsMax = (int32)Math::Clamp(furthestSliceIndexUnclamped, 0.0f, cache.GridSize.Z - 1.0f); // Culling - if ((view.Position - center).LengthSquared() >= (options.Distance + radius) * (options.Distance + radius) || volumeZBoundsMin >= volumeZBoundsMax) + if ((view.Position - center).LengthSquared() >= Math::Square(options.Distance + radius) || volumeZBoundsMin >= volumeZBoundsMax) continue; // Setup material shader data @@ -598,25 +471,17 @@ void VolumetricFogPass::Render(RenderContext& renderContext) Array> spotLights; for (int32 i = 0; i < renderContext.List->PointLights.Count(); i++) { - const auto& light = renderContext.List->PointLights[i]; - if (light.VolumetricScatteringIntensity > ZeroTolerance && !light.RenderedVolumetricFog) - { - if ((view.Position - light.Position).LengthSquared() < (options.Distance + light.Radius) * (options.Distance + light.Radius)) - { - pointLights.Add(&light); - } - } + const auto& light = renderContext.List->PointLights.Get()[i]; + if (light.VolumetricScatteringIntensity > ZeroTolerance && + (view.Position - light.Position).LengthSquared() < Math::Square(options.Distance + light.Radius)) + pointLights.Add(&light); } for (int32 i = 0; i < renderContext.List->SpotLights.Count(); i++) { - const auto& light = renderContext.List->SpotLights[i]; - if (light.VolumetricScatteringIntensity > ZeroTolerance && !light.RenderedVolumetricFog) - { - if ((view.Position - light.Position).LengthSquared() < (options.Distance + light.Radius) * (options.Distance + light.Radius)) - { - spotLights.Add(&light); - } - } + const auto& light = renderContext.List->SpotLights.Get()[i]; + if (light.VolumetricScatteringIntensity > ZeroTolerance && + (view.Position - light.Position).LengthSquared() < Math::Square(options.Distance + light.Radius)) + spotLights.Add(&light); } // Skip if no lights to render @@ -638,6 +503,8 @@ void VolumetricFogPass::Render(RenderContext& renderContext) context->SetViewportAndScissors((float)volumeDesc.Width, (float)volumeDesc.Height); // Render them to the volume + context->BindSR(0, shadowMap); + context->BindSR(1, shadowsBuffer); for (int32 i = 0; i < pointLights.Count(); i++) RenderRadialLight(renderContext, context, view, options, *pointLights[i], perLight, cb1); for (int32 i = 0; i < spotLights.Count(); i++) @@ -666,19 +533,19 @@ void VolumetricFogPass::Render(RenderContext& renderContext) context->BindSR(1, vBufferB->ViewVolume()); context->BindSR(2, lightScatteringHistory ? lightScatteringHistory->ViewVolume() : nullptr); context->BindSR(3, localShadowedLightScattering); - context->BindSR(4, dirLightShadowMap); - + context->BindSR(4, shadowMap); + context->BindSR(5, shadowsBuffer); int32 csIndex; if (useDDGI) { - context->BindSR(5, bindingDataDDGI.ProbesData); - context->BindSR(6, bindingDataDDGI.ProbesDistance); - context->BindSR(7, bindingDataDDGI.ProbesIrradiance); + context->BindSR(6, bindingDataDDGI.ProbesData); + context->BindSR(7, bindingDataDDGI.ProbesDistance); + context->BindSR(8, bindingDataDDGI.ProbesIrradiance); csIndex = 1; } else { - context->BindSR(5, skyLightImage); + context->BindSR(6, skyLightImage); csIndex = 0; } context->Dispatch(_csLightScattering.Get(csIndex), groupCountX, groupCountY, groupCountZ); diff --git a/Source/Engine/Renderer/VolumetricFogPass.h b/Source/Engine/Renderer/VolumetricFogPass.h index cbec860fc..47757cf47 100644 --- a/Source/Engine/Renderer/VolumetricFogPass.h +++ b/Source/Engine/Renderer/VolumetricFogPass.h @@ -32,7 +32,7 @@ private: Float3 MultiplyColor; float VolumetricScatteringIntensity; Float3 AdditiveColor; - float Dummt0; + float Dummy0; }); PACK_STRUCT(struct Data { @@ -63,7 +63,6 @@ private: Float4 FrameJitterOffsets[8]; ShaderLightData DirectionalLight; - ShaderLightShadowData DirectionalLightShadow; SkyLightData SkyLight; DynamicDiffuseGlobalIlluminationPass::ConstantsData DDGI; }); @@ -77,7 +76,6 @@ private: Matrix ViewToVolumeClip; ShaderLightData LocalLight; - ShaderLightShadowData LocalLightShadow; }); // Shader stuff @@ -147,27 +145,6 @@ public: VolumetricFogPass(); public: - - /// - /// Renders the light to the volumetric fog light scattering volume texture. Called by the light pass after shadow map rendering. Used by the shadows casting lights. - /// - /// The rendering context. - /// The GPU commands context. - /// The light. - /// The shadow map. - /// The light shadow data. - void RenderLight(RenderContext& renderContext, GPUContext* context, RenderPointLightData& light, GPUTextureView* shadowMap, ShaderLightShadowData& shadow); - - /// - /// Renders the light to the volumetric fog light scattering volume texture. Called by the light pass after shadow map rendering. Used by the shadows casting lights. - /// - /// The rendering context. - /// The GPU commands context. - /// The light. - /// The shadow map. - /// The light shadow data. - void RenderLight(RenderContext& renderContext, GPUContext* context, RenderSpotLightData& light, GPUTextureView* shadowMap, ShaderLightShadowData& shadow); - /// /// Renders the volumetric fog (generates integrated light scattering 3D texture). Does nothing if feature is disabled or not supported. /// @@ -180,8 +157,6 @@ private: GPUTextureView* GetLocalShadowedLightScattering(RenderContext& renderContext, GPUContext* context, VolumetricFogOptions& options) const; void InitCircleBuffer(); template - void RenderRadialLight(RenderContext& renderContext, GPUContext* context, T& light, ShaderLightShadowData& shadow); - template void RenderRadialLight(RenderContext& renderContext, GPUContext* context, RenderView& view, VolumetricFogOptions& options, T& light, PerLight& perLight, GPUConstantBuffer* cb1); #if COMPILE_WITH_DEV_ENV void OnShaderReloading(Asset* obj) diff --git a/Source/Shaders/GI/GlobalSurfaceAtlas.shader b/Source/Shaders/GI/GlobalSurfaceAtlas.shader index 4765614ae..1af8880bc 100644 --- a/Source/Shaders/GI/GlobalSurfaceAtlas.shader +++ b/Source/Shaders/GI/GlobalSurfaceAtlas.shader @@ -159,7 +159,7 @@ float4 PS_Lighting(AtlasVertexOutput input) : SV_Target float toLightDst = GLOBAL_SDF_WORLD_SIZE; #endif float4 shadowMask = 1; - if (Light.CastShadows > 0) + if (Light.ShadowsBufferAddress != 0) { float NoL = dot(gBuffer.Normal, L); float shadowBias = 10.0f; diff --git a/Source/Shaders/Lighting.hlsl b/Source/Shaders/Lighting.hlsl index 42512752e..736d6a29c 100644 --- a/Source/Shaders/Lighting.hlsl +++ b/Source/Shaders/Lighting.hlsl @@ -5,15 +5,15 @@ #include "./Flax/LightingCommon.hlsl" -ShadowData GetShadow(LightData lightData, GBufferSample gBuffer, float4 shadowMask) +ShadowSample GetShadow(LightData lightData, GBufferSample gBuffer, float4 shadowMask) { - ShadowData shadow; + ShadowSample shadow; shadow.SurfaceShadow = gBuffer.AO * shadowMask.r; shadow.TransmissionShadow = shadowMask.g; return shadow; } -LightingData StandardShading(GBufferSample gBuffer, float energy, float3 L, float3 V, half3 N) +LightSample StandardShading(GBufferSample gBuffer, float energy, float3 L, float3 V, half3 N) { float3 diffuseColor = GetDiffuseColor(gBuffer); float3 H = normalize(V + L); @@ -22,7 +22,7 @@ LightingData StandardShading(GBufferSample gBuffer, float energy, float3 L, floa float NoH = saturate(dot(N, H)); float VoH = saturate(dot(V, H)); - LightingData lighting; + LightSample lighting; lighting.Diffuse = Diffuse_Lambert(diffuseColor); #if LIGHTING_NO_SPECULAR lighting.Specular = 0; @@ -37,9 +37,9 @@ LightingData StandardShading(GBufferSample gBuffer, float energy, float3 L, floa return lighting; } -LightingData SubsurfaceShading(GBufferSample gBuffer, float energy, float3 L, float3 V, half3 N) +LightSample SubsurfaceShading(GBufferSample gBuffer, float energy, float3 L, float3 V, half3 N) { - LightingData lighting = StandardShading(gBuffer, energy, L, V, N); + LightSample lighting = StandardShading(gBuffer, energy, L, V, N); #if defined(USE_GBUFFER_CUSTOM_DATA) // Fake effect of the light going through the material float3 subsurfaceColor = gBuffer.CustomData.rgb; @@ -53,9 +53,9 @@ LightingData SubsurfaceShading(GBufferSample gBuffer, float energy, float3 L, fl return lighting; } -LightingData FoliageShading(GBufferSample gBuffer, float energy, float3 L, float3 V, half3 N) +LightSample FoliageShading(GBufferSample gBuffer, float energy, float3 L, float3 V, half3 N) { - LightingData lighting = StandardShading(gBuffer, energy, L, V, N); + LightSample lighting = StandardShading(gBuffer, energy, L, V, N); #if defined(USE_GBUFFER_CUSTOM_DATA) // Fake effect of the light going through the thin foliage float3 subsurfaceColor = gBuffer.CustomData.rgb; @@ -67,7 +67,7 @@ LightingData FoliageShading(GBufferSample gBuffer, float energy, float3 L, float return lighting; } -LightingData SurfaceShading(GBufferSample gBuffer, float energy, float3 L, float3 V, half3 N) +LightSample SurfaceShading(GBufferSample gBuffer, float energy, float3 L, float3 V, half3 N) { switch (gBuffer.ShadingModel) { @@ -79,7 +79,7 @@ LightingData SurfaceShading(GBufferSample gBuffer, float energy, float3 L, float case SHADING_MODEL_FOLIAGE: return FoliageShading(gBuffer, energy, L, V, N); default: - return (LightingData)0; + return (LightSample)0; } } @@ -121,7 +121,7 @@ float4 GetLighting(float3 viewPos, LightData lightData, GBufferSample gBuffer, f float3 toLight = lightData.Direction; // Calculate shadow - ShadowData shadow = GetShadow(lightData, gBuffer, shadowMask); + ShadowSample shadow = GetShadow(lightData, gBuffer, shadowMask); // Calculate attenuation if (isRadial) @@ -147,7 +147,7 @@ float4 GetLighting(float3 viewPos, LightData lightData, GBufferSample gBuffer, f float energy = AreaLightSpecular(lightData, gBuffer.Roughness, toLight, L, V, N); // Calculate direct lighting - LightingData lighting = SurfaceShading(gBuffer, energy, L, V, N); + LightSample lighting = SurfaceShading(gBuffer, energy, L, V, N); // Calculate final light color float3 surfaceLight = (lighting.Diffuse + lighting.Specular) * shadow.SurfaceShadow; diff --git a/Source/Shaders/LightingCommon.hlsl b/Source/Shaders/LightingCommon.hlsl index 239f2a076..7011acb5b 100644 --- a/Source/Shaders/LightingCommon.hlsl +++ b/Source/Shaders/LightingCommon.hlsl @@ -27,26 +27,26 @@ struct LightData float MinRoughness; float3 Position; - float CastShadows; + uint ShadowsBufferAddress; float3 Direction; float Radius; float FalloffExponent; float InverseSquared; - float Dummy0; float RadiusInv; + float Dummy0; }; -// Structure that contains information about shadow -struct ShadowData +// Structure that contains information about shadow sampling result +struct ShadowSample { float SurfaceShadow; float TransmissionShadow; }; // Structure that contains information about direct lighting calculations result -struct LightingData +struct LightSample { float3 Diffuse; float3 Specular; diff --git a/Source/Shaders/Lights.shader b/Source/Shaders/Lights.shader index 45d2a027f..eb11dc8a8 100644 --- a/Source/Shaders/Lights.shader +++ b/Source/Shaders/Lights.shader @@ -61,7 +61,7 @@ void PS_Directional(Quad_VS2PS input, out float4 output : SV_Target0) // Sample shadow mask float4 shadowMask = 1; BRANCH - if (Light.CastShadows > 0) + if (Light.ShadowsBufferAddress != 0) { shadowMask = SAMPLE_RT(Shadow, input.TexCoord); } @@ -98,7 +98,7 @@ void PS_Point(Model_VS2PS input, out float4 output : SV_Target0) // Sample shadow mask float4 shadowMask = 1; BRANCH - if (Light.CastShadows > 0) + if (Light.ShadowsBufferAddress != 0) { shadowMask = SAMPLE_RT(Shadow, uv); } @@ -140,7 +140,7 @@ void PS_Spot(Model_VS2PS input, out float4 output : SV_Target0) // Sample shadow mask float4 shadowMask = 1; BRANCH - if (Light.CastShadows > 0) + if (Light.ShadowsBufferAddress != 0) { shadowMask = SAMPLE_RT(Shadow, uv); } diff --git a/Source/Shaders/PCFKernels.hlsl b/Source/Shaders/PCFKernels.hlsl deleted file mode 100644 index 6629f9497..000000000 --- a/Source/Shaders/PCFKernels.hlsl +++ /dev/null @@ -1,135 +0,0 @@ -// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. - -#ifndef __PCF_KERNELS__ -#define __PCF_KERNELS__ - -// Cascades Shadow Mapping - -#if FilterSizeCSM == 2 - -#elif FilterSizeCSM == 3 - -static const float CSMFilterWeightsSum = 7; -static const float CSMFilterWeights[3][3] = -{ - { 0.5,1.0,0.5 }, - { 1.0,1.0,1.0 }, - { 0.5,1.0,0.5 } -}; - -#elif FilterSizeCSM == 5 - -static const float CSMFilterWeightsSum = 17; -static const float CSMFilterWeights[5][5] = -{ - { 0.0,0.5,1.0,0.5,0.0 }, - { 0.5,1.0,1.0,1.0,0.5 }, - { 1.0,1.0,1.0,1.0,1.0 }, - { 0.5,1.0,1.0,1.0,0.5 }, - { 0.0,0.5,1.0,0.5,0.0 } -}; - -#elif FilterSizeCSM == 7 - -static const float CSMFilterWeightsSum = 33; -static const float CSMFilterWeights[7][7] = -{ - { 0.0,0.0,0.5,1.0,0.5,0.0,0.0 }, - { 0.0,1.0,1.0,1.0,1.0,1.0,0.0 }, - { 0.5,1.0,1.0,1.0,1.0,1.0,0.5 }, - { 1.0,1.0,1.0,1.0,1.0,1.0,1.0 }, - { 0.5,1.0,1.0,1.0,1.0,1.0,0.5 }, - { 0.0,1.0,1.0,1.0,1.0,1.0,0.0 }, - { 0.0,0.0,0.5,1.0,0.5,0.0,0.0 } -}; - -#elif FilterSizeCSM == 9 - -static const float CSMFilterWeightsSum = 53; -static const float CSMFilterWeights[9][9] = -{ - { 0.0,0.0,0.0,0.5,1.0,0.5,0.0,0.0,0.0 }, - { 0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0 }, - { 0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0 }, - { 0.5,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.5 }, - { 1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0 }, - { 0.5,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.5 }, - { 0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0 }, - { 0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0 }, - { 0.0,0.0,0.0,0.5,1.0,0.5,0.0,0.0,0.0 } -}; - -#endif - -// Cube Map Shadows - -#if FilterSizeCube == 5 - -// 5 random points in disc with radius 2.5 -static const float2 PCFDiscSamples[5] = -{ - float2(0.000000, 2.500000), - float2(2.377641, 0.772542), - float2(1.469463, -2.022543), - float2(-1.469463, -2.022542), - float2(-2.377641, 0.772543), -}; - -#elif FilterSizeCube == 12 - -// 12 random points in disc with radius 2.5 -static const float2 PCFDiscSamples[12] = -{ - float2(0.000000, 2.500000), - float2(1.767767, 1.767767), - float2(2.500000, -0.000000), - float2(1.767767, -1.767767), - float2(-0.000000, -2.500000), - float2(-1.767767, -1.767767), - float2(-2.500000, 0.000000), - float2(-1.767766, 1.767768), - float2(-1.006119, -0.396207), - float2(1.000015, 0.427335), - float2(0.416807, -1.006577), - float2(-0.408872, 1.024430), -}; - -#elif FilterSizeCube == 29 - -// 29 random points in disc with radius 2.5 -static const float2 PCFDiscSamples[29] = -{ - float2(0.000000, 2.500000), - float2(1.016842, 2.283864), - float2(1.857862, 1.672826), - float2(2.377641, 0.772542), - float2(2.486305, -0.261321), - float2(2.165063, -1.250000), - float2(1.469463, -2.022543), - float2(0.519779, -2.445369), - float2(-0.519779, -2.445369), - float2(-1.469463, -2.022542), - float2(-2.165064, -1.250000), - float2(-2.486305, -0.261321), - float2(-2.377641, 0.772543), - float2(-1.857862, 1.672827), - float2(-1.016841, 2.283864), - float2(0.091021, -0.642186), - float2(0.698035, 0.100940), - float2(0.959731, -1.169393), - float2(-1.053880, 1.180380), - float2(-1.479156, -0.606937), - float2(-0.839488, -1.320002), - float2(1.438566, 0.705359), - float2(0.067064, -1.605197), - float2(0.728706, 1.344722), - float2(1.521424, -0.380184), - float2(-0.199515, 1.590091), - float2(-1.524323, 0.364010), - float2(-0.692694, -0.086749), - float2(-0.082476, 0.654088), -}; - -#endif - -#endif diff --git a/Source/Shaders/Quad.shader b/Source/Shaders/Quad.shader index 9bebc93c1..da6fce92c 100644 --- a/Source/Shaders/Quad.shader +++ b/Source/Shaders/Quad.shader @@ -56,8 +56,6 @@ float4 PS_CopyLinear(Quad_VS2PS input) : SV_Target #endif -#ifdef _PS_Clear - // Pixel Shader for clearing a render target with a solid color META_PS(true, FEATURE_LEVEL_ES2) float4 PS_Clear(Quad_VS2PS input) : SV_Target @@ -65,4 +63,9 @@ float4 PS_Clear(Quad_VS2PS input) : SV_Target return Color; } -#endif +// Pixel Shader for clearing depth buffer +META_PS(true, FEATURE_LEVEL_ES2) +float PS_DepthClear(Quad_VS2PS input) : SV_Depth +{ + return Color.r; +} diff --git a/Source/Shaders/Shadows.shader b/Source/Shaders/Shadows.shader index 9db800f33..aff46ac8e 100644 --- a/Source/Shaders/Shadows.shader +++ b/Source/Shaders/Shadows.shader @@ -10,7 +10,6 @@ META_CB_BEGIN(0, PerLight) GBufferData GBuffer; LightData Light; -LightShadowData LightShadow; float4x4 WVP; float4x4 ViewProjectionMatrix; float2 Dummy0; @@ -18,8 +17,10 @@ float ContactShadowsDistance; float ContactShadowsLength; META_CB_END +Buffer ShadowsBuffer : register(t5); +Texture2D ShadowMap : register(t6); + DECLARE_GBUFFERDATA_ACCESS(GBuffer) -DECLARE_LIGHTSHADOWDATA_ACCESS(LightShadow); #if CONTACT_SHADOWS @@ -67,10 +68,6 @@ Model_VS2PS VS_Model(ModelInput_PosOnly input) return output; } -#ifdef _PS_PointLight - -TextureCube ShadowMapPoint : register(t5); - // Pixel shader for point light shadow rendering META_PS(true, FEATURE_LEVEL_ES2) META_PERMUTATION_2(SHADOWS_QUALITY=0,CONTACT_SHADOWS=0) @@ -83,9 +80,6 @@ META_PERMUTATION_2(SHADOWS_QUALITY=2,CONTACT_SHADOWS=1) META_PERMUTATION_2(SHADOWS_QUALITY=3,CONTACT_SHADOWS=1) float4 PS_PointLight(Model_VS2PS input) : SV_Target0 { - float shadow = 1; - float subsurfaceShadow = 1; - // Obtain texture coordinates corresponding to the current pixel float2 uv = (input.ScreenPos.xy / input.ScreenPos.w) * float2(0.5, -0.5) + float2(0.5, 0.5); @@ -94,68 +88,43 @@ float4 PS_PointLight(Model_VS2PS input) : SV_Target0 GBufferSample gBuffer = SampleGBuffer(gBufferData, uv); // Sample shadow - LightShadowData lightShadowData = GetLightShadowData(); - shadow = SampleShadow(Light, lightShadowData, ShadowMapPoint, gBuffer, subsurfaceShadow); + ShadowSample shadow = SamplePointLightShadow(Light, ShadowsBuffer, ShadowMap, gBuffer); #if CONTACT_SHADOWS // Calculate screen-space contact shadow - shadow *= RayCastScreenSpaceShadow(gBufferData, gBuffer, gBuffer.WorldPos, normalize(Light.Position - gBuffer.WorldPos), ContactShadowsLength); + shadow.SurfaceShadow *= RayCastScreenSpaceShadow(gBufferData, gBuffer, gBuffer.WorldPos, normalize(Light.Position - gBuffer.WorldPos), ContactShadowsLength); #endif - return float4(shadow, subsurfaceShadow, 1, 1); + return GetShadowMask(shadow); } -#endif - -#ifdef _PS_DirLight - -Texture2DArray ShadowMapDir : register(t5); - // Pixel shader for directional light shadow rendering META_PS(true, FEATURE_LEVEL_ES2) -META_PERMUTATION_3(SHADOWS_QUALITY=0,CSM_BLENDING=0,CONTACT_SHADOWS=0) -META_PERMUTATION_3(SHADOWS_QUALITY=1,CSM_BLENDING=0,CONTACT_SHADOWS=0) -META_PERMUTATION_3(SHADOWS_QUALITY=2,CSM_BLENDING=0,CONTACT_SHADOWS=0) -META_PERMUTATION_3(SHADOWS_QUALITY=3,CSM_BLENDING=0,CONTACT_SHADOWS=0) -META_PERMUTATION_3(SHADOWS_QUALITY=0,CSM_BLENDING=1,CONTACT_SHADOWS=0) -META_PERMUTATION_3(SHADOWS_QUALITY=1,CSM_BLENDING=1,CONTACT_SHADOWS=0) -META_PERMUTATION_3(SHADOWS_QUALITY=2,CSM_BLENDING=1,CONTACT_SHADOWS=0) -META_PERMUTATION_3(SHADOWS_QUALITY=3,CSM_BLENDING=1,CONTACT_SHADOWS=0) -META_PERMUTATION_3(SHADOWS_QUALITY=0,CSM_BLENDING=0,CONTACT_SHADOWS=1) -META_PERMUTATION_3(SHADOWS_QUALITY=1,CSM_BLENDING=0,CONTACT_SHADOWS=1) -META_PERMUTATION_3(SHADOWS_QUALITY=2,CSM_BLENDING=0,CONTACT_SHADOWS=1) -META_PERMUTATION_3(SHADOWS_QUALITY=3,CSM_BLENDING=0,CONTACT_SHADOWS=1) -META_PERMUTATION_3(SHADOWS_QUALITY=0,CSM_BLENDING=1,CONTACT_SHADOWS=1) -META_PERMUTATION_3(SHADOWS_QUALITY=1,CSM_BLENDING=1,CONTACT_SHADOWS=1) -META_PERMUTATION_3(SHADOWS_QUALITY=2,CSM_BLENDING=1,CONTACT_SHADOWS=1) -META_PERMUTATION_3(SHADOWS_QUALITY=3,CSM_BLENDING=1,CONTACT_SHADOWS=1) +META_PERMUTATION_2(SHADOWS_QUALITY=0,CONTACT_SHADOWS=0) +META_PERMUTATION_2(SHADOWS_QUALITY=1,CONTACT_SHADOWS=0) +META_PERMUTATION_2(SHADOWS_QUALITY=2,CONTACT_SHADOWS=0) +META_PERMUTATION_2(SHADOWS_QUALITY=3,CONTACT_SHADOWS=0) +META_PERMUTATION_2(SHADOWS_QUALITY=0,CONTACT_SHADOWS=1) +META_PERMUTATION_2(SHADOWS_QUALITY=1,CONTACT_SHADOWS=1) +META_PERMUTATION_2(SHADOWS_QUALITY=2,CONTACT_SHADOWS=1) +META_PERMUTATION_2(SHADOWS_QUALITY=3,CONTACT_SHADOWS=1) float4 PS_DirLight(Quad_VS2PS input) : SV_Target0 { - float shadow = 1; - float subsurfaceShadow = 1; - // Sample GBuffer GBufferData gBufferData = GetGBufferData(); GBufferSample gBuffer = SampleGBuffer(gBufferData, input.TexCoord); // Sample shadow - LightShadowData lightShadowData = GetLightShadowData(); - shadow = SampleShadow(Light, lightShadowData, ShadowMapDir, gBuffer, subsurfaceShadow); + ShadowSample shadow = SampleDirectionalLightShadow(Light, ShadowsBuffer, ShadowMap, gBuffer); #if CONTACT_SHADOWS // Calculate screen-space contact shadow - shadow *= RayCastScreenSpaceShadow(gBufferData, gBuffer, gBuffer.WorldPos, Light.Direction, ContactShadowsLength); + shadow.SurfaceShadow *= RayCastScreenSpaceShadow(gBufferData, gBuffer, gBuffer.WorldPos, Light.Direction, ContactShadowsLength); #endif - return float4(shadow, subsurfaceShadow, 1, 1); + return GetShadowMask(shadow); } -#endif - -#ifdef _PS_SpotLight - -Texture2D ShadowMapSpot : register(t5); - // Pixel shader for spot light shadow rendering META_PS(true, FEATURE_LEVEL_ES2) META_PERMUTATION_2(SHADOWS_QUALITY=0,CONTACT_SHADOWS=0) @@ -168,9 +137,6 @@ META_PERMUTATION_2(SHADOWS_QUALITY=2,CONTACT_SHADOWS=1) META_PERMUTATION_2(SHADOWS_QUALITY=3,CONTACT_SHADOWS=1) float4 PS_SpotLight(Model_VS2PS input) : SV_Target0 { - float shadow = 1; - float subsurfaceShadow = 1; - // Obtain texture coordinates corresponding to the current pixel float2 uv = (input.ScreenPos.xy / input.ScreenPos.w) * float2(0.5, -0.5) + float2(0.5, 0.5); @@ -179,15 +145,12 @@ float4 PS_SpotLight(Model_VS2PS input) : SV_Target0 GBufferSample gBuffer = SampleGBuffer(gBufferData, uv); // Sample shadow - LightShadowData lightShadowData = GetLightShadowData(); - shadow = SampleShadow(Light, lightShadowData, ShadowMapSpot, gBuffer, subsurfaceShadow); + ShadowSample shadow = SampleSpotLightShadow(Light, ShadowsBuffer, ShadowMap, gBuffer); #if CONTACT_SHADOWS // Calculate screen-space contact shadow - shadow *= RayCastScreenSpaceShadow(gBufferData, gBuffer, gBuffer.WorldPos, normalize(Light.Position - gBuffer.WorldPos), ContactShadowsLength); + shadow.SurfaceShadow *= RayCastScreenSpaceShadow(gBufferData, gBuffer, gBuffer.WorldPos, normalize(Light.Position - gBuffer.WorldPos), ContactShadowsLength); #endif - return float4(shadow, subsurfaceShadow, 1, 1); + return GetShadowMask(shadow); } - -#endif diff --git a/Source/Shaders/ShadowsCommon.hlsl b/Source/Shaders/ShadowsCommon.hlsl index 23ddf4214..43fd0c2d5 100644 --- a/Source/Shaders/ShadowsCommon.hlsl +++ b/Source/Shaders/ShadowsCommon.hlsl @@ -12,32 +12,57 @@ #ifndef SHADOWS_QUALITY #define SHADOWS_QUALITY 0 #endif -#ifndef CSM_BLENDING -#define CSM_BLENDING 0 -#endif -// Structure that contains information about light -struct LightShadowData +// Shadow data for the light +struct ShadowData { - float2 ShadowMapSize; float Sharpness; float Fade; - + float FadeDistance; float NormalOffsetScale; float Bias; - float FadeDistance; - uint NumCascades; - + uint TilesCount; float4 CascadeSplits; - float4x4 ShadowVP[6]; }; -#ifdef PLATFORM_ANDROID -// #AdrenoVK_CB_STRUCT_MEMBER_ACCESS_BUG -#define DECLARE_LIGHTSHADOWDATA_ACCESS(uniformName) LightShadowData Get##uniformName##Data() { LightShadowData tmp; tmp.ShadowMapSize = uniformName.ShadowMapSize; tmp.Sharpness = uniformName.Sharpness; tmp.Fade = uniformName.Fade; tmp.NormalOffsetScale = uniformName.NormalOffsetScale; tmp.Bias = uniformName.Bias; tmp.FadeDistance = uniformName.FadeDistance; tmp.NumCascades = uniformName.NumCascades; tmp.CascadeSplits = uniformName.CascadeSplits; tmp.ShadowVP[0] = uniformName.ShadowVP[0]; tmp.ShadowVP[1] = uniformName.ShadowVP[1]; tmp.ShadowVP[2] = uniformName.ShadowVP[2]; tmp.ShadowVP[3] = uniformName.ShadowVP[3]; tmp.ShadowVP[4] = uniformName.ShadowVP[4]; tmp.ShadowVP[5] = uniformName.ShadowVP[5]; return tmp; } -#else -#define DECLARE_LIGHTSHADOWDATA_ACCESS(uniformName) LightShadowData Get##uniformName##Data() { return uniformName; } -#endif +// Shadow projection tile data for the light +struct ShadowTileData +{ + float4 ShadowToAtlas; + float4x4 WorldToShadow; +}; + +// Loads the shadow data of the light in the shadow buffer +ShadowData LoadShadowsBuffer(Buffer shadowsBuffer, uint shadowsBufferAddress) +{ + // This must match C++ + float4 vector0 = shadowsBuffer.Load(shadowsBufferAddress + 0); + float4 vector1 = shadowsBuffer.Load(shadowsBufferAddress + 1); + ShadowData shadow; + uint packed0x = asuint(vector0.x); + shadow.Sharpness = (packed0x & 0x000000ff) * (10.0f / 255.0f); + shadow.Fade = ((packed0x & 0x0000ff00) >> 8) * (1.0f / 255.0f); + shadow.TilesCount = ((packed0x & 0x00ff0000) >> 16); + shadow.FadeDistance = vector0.y; + shadow.NormalOffsetScale = vector0.z; + shadow.Bias = vector0.w; + shadow.CascadeSplits = vector1; + return shadow; +} + +// Loads the shadow tile data of the light in the shadow buffer +ShadowTileData LoadShadowsBufferTile(Buffer shadowsBuffer, uint shadowsBufferAddress, uint tileIndex) +{ + // This must match C++ + shadowsBufferAddress += tileIndex * 5 + 2; + ShadowTileData tile; + tile.ShadowToAtlas = shadowsBuffer.Load(shadowsBufferAddress + 0); + tile.WorldToShadow[0] = shadowsBuffer.Load(shadowsBufferAddress + 1); + tile.WorldToShadow[1] = shadowsBuffer.Load(shadowsBufferAddress + 2); + tile.WorldToShadow[2] = shadowsBuffer.Load(shadowsBufferAddress + 3); + tile.WorldToShadow[3] = shadowsBuffer.Load(shadowsBufferAddress + 4); + return tile; +} float3 GetShadowPositionOffset(float offsetScale, float NoL, float3 normal) { @@ -48,8 +73,16 @@ float3 GetShadowPositionOffset(float offsetScale, float NoL, float3 normal) float CalculateSubsurfaceOcclusion(float opacity, float sceneDepth, float shadowMapDepth) { float thickness = max(sceneDepth - shadowMapDepth, 0); - float occlusion = 1 - thickness * lerp(1.0f, 100.0f, opacity); + float occlusion = 1 - saturate(thickness * lerp(1.0f, 100.0f, opacity)); return shadowMapDepth > 0.99f ? 1 : occlusion; } +float PostProcessShadow(ShadowData lightShadow, float shadow) +{ + // Apply shadow fade and sharpness + shadow = saturate((shadow - 0.5) * lightShadow.Sharpness + 0.5); + shadow = lerp(1.0f, shadow, lightShadow.Fade); + return shadow; +} + #endif diff --git a/Source/Shaders/ShadowsSampling.hlsl b/Source/Shaders/ShadowsSampling.hlsl index e7d10a7b9..d8510011a 100644 --- a/Source/Shaders/ShadowsSampling.hlsl +++ b/Source/Shaders/ShadowsSampling.hlsl @@ -7,46 +7,24 @@ #include "./Flax/GBufferCommon.hlsl" #include "./Flax/LightingCommon.hlsl" -// Select shadows filter based on quality -// Supported sampling kernel sizes fo each shadowing technique: -// CSM: 2, 3, 5, 7, 9 -// Cube: 2, 5, 12, 29 -// Spot: 2, 5, 12, 29 -#if SHADOWS_QUALITY == 0 - -#define FilterSizeCSM 2 -#define FilterSizeCube 2 -#define FilterSizeSpot 2 - -#elif SHADOWS_QUALITY == 1 - - #define FilterSizeCSM 3 - #define FilterSizeCube 5 - #define FilterSizeSpot 5 - -#elif SHADOWS_QUALITY == 2 - - #define FilterSizeCSM 5 - #define FilterSizeCube 12 - #define FilterSizeSpot 12 - -#else // SHADOWS_QUALITY == 3 - - #define FilterSizeCSM 7 - #define FilterSizeCube 12 - #define FilterSizeSpot 12 - +#if FEATURE_LEVEL >= FEATURE_LEVEL_SM5 +#define SAMPLE_SHADOW_MAP(shadowMap, shadowUV, sceneDepth) shadowMap.SampleCmpLevelZero(ShadowSamplerLinear, shadowUV, sceneDepth) +#define SAMPLE_SHADOW_MAP_OFFSET(shadowMap, shadowUV, texelOffset, sceneDepth) shadowMap.SampleCmpLevelZero(ShadowSamplerLinear, shadowUV, sceneDepth, texelOffset) +#else +#define SAMPLE_SHADOW_MAP(shadowMap, shadowUV, sceneDepth) (sceneDepth < shadowMap.SampleLevel(SamplerLinearClamp, shadowUV, 0).r) +#define SAMPLE_SHADOW_MAP_OFFSET(shadowMap, shadowUV, texelOffset, sceneDepth) (sceneDepth < shadowMap.SampleLevel(SamplerLinearClamp, shadowUV, 0, texelOffset).r) #endif -#if SHADOWS_QUALITY != 0 -#include "./Flax/PCFKernels.hlsl" -#endif +float4 GetShadowMask(ShadowSample shadow) +{ + return float4(shadow.SurfaceShadow, shadow.TransmissionShadow, 1, 1); +} // Gets the cube texture face index to use for shadow map sampling for the given view-to-light direction vector // Where: direction = normalize(worldPosition - lightPosition) -int GetCubeFaceIndex(float3 direction) +uint GetCubeFaceIndex(float3 direction) { - int cubeFaceIndex; + uint cubeFaceIndex; float3 absDirection = abs(direction); float maxDirection = max(absDirection.x, max(absDirection.y, absDirection.z)); if (maxDirection == absDirection.x) @@ -58,666 +36,230 @@ int GetCubeFaceIndex(float3 direction) return cubeFaceIndex; } -// Samples the shadow map with a fixed-size PCF kernel optimized with GatherCmpRed. -// Uses code from "Fast Conventional Shadow Filtering" by Holger Gruen, in GPU Pro. -float SampleShadowMapFixedSizePCF(Texture2DArray shadowMap, float2 shadowMapSize, float sceneDepth, float2 shadowPos, uint cascadeIndex) +float2 GetLightShadowAtlasUV(ShadowData shadow, ShadowTileData shadowTile, float3 samplePosition, out float4 shadowPosition) { -#if FilterSizeCSM == 2 - -#if FEATURE_LEVEL >= FEATURE_LEVEL_SM5 - - return shadowMap.SampleCmpLevelZero(ShadowSamplerPCF, float3(shadowPos.xy, cascadeIndex), sceneDepth); - -#else - - return sceneDepth < shadowMap.SampleLevel(SamplerLinearClamp, float3(shadowPos.xy, cascadeIndex), 0).r; - -#endif - -#else - - const int FS_2 = FilterSizeCSM / 2; - float2 tc = shadowPos.xy; - float4 s = 0.0f; - float2 stc = (shadowMapSize * tc.xy) + float2(0.5f, 0.5f); - float2 tcs = floor(stc); - float2 fc; - int row; - int col; - float4 v1[FS_2 + 1]; - float2 v0[FS_2 + 1]; - float3 baseUV = float3(tc.xy, cascadeIndex); - float2 shadowMapSizeInv = 1.0f / shadowMapSize; - - fc.xy = stc - tcs; - tc.xy = tcs * shadowMapSizeInv; - - // Loop over the rows - UNROLL - for (row = -FS_2; row <= FS_2; row += 2) - { - UNROLL - for (col = -FS_2; col <= FS_2; col += 2) - { - float value = CSMFilterWeights[row + FS_2][col + FS_2]; - - if (col > -FS_2) - value += CSMFilterWeights[row + FS_2][col + FS_2 - 1]; - - if (col < FS_2) - value += CSMFilterWeights[row + FS_2][col + FS_2 + 1]; - - if (row > -FS_2) { - value += CSMFilterWeights[row + FS_2 - 1][col + FS_2]; - - if (col < FS_2) - value += CSMFilterWeights[row + FS_2 - 1][col + FS_2 + 1]; - - if (col > -FS_2) - value += CSMFilterWeights[row + FS_2 - 1][col + FS_2 - 1]; - } - - if (value != 0.0f) - { - // Gather returns xyzw which is counter clockwise order starting with the sample to the lower left of the queried location -#if CAN_USE_GATHER - - v1[(col + FS_2) / 2] = shadowMap.GatherCmp(ShadowSampler, baseUV, sceneDepth, int2(col, row)); - -#else - - float4 gather; - - gather.x = sceneDepth < shadowMap.SampleLevel(SamplerPointClamp, float3(tc.xy + float2(0, 1) * shadowMapSizeInv, cascadeIndex), 0, int2(col, row)).r; - gather.y = sceneDepth < shadowMap.SampleLevel(SamplerPointClamp, float3(tc.xy + float2(1, 1) * shadowMapSizeInv, cascadeIndex), 0, int2(col, row)).r; - gather.z = sceneDepth < shadowMap.SampleLevel(SamplerPointClamp, float3(tc.xy + float2(1, 0) * shadowMapSizeInv, cascadeIndex), 0, int2(col, row)).r; - gather.w = sceneDepth < shadowMap.SampleLevel(SamplerPointClamp, float3(tc.xy + float2(0, 0) * shadowMapSizeInv, cascadeIndex), 0, int2(col, row)).r; - - v1[(col + FS_2) / 2] = gather; - -#endif - } - else - v1[(col + FS_2) / 2] = 0.0f; - - if (col == -FS_2) - { - s.x += (1.0f - fc.y) * (v1[0].w * (CSMFilterWeights[row + FS_2][col + FS_2] - - CSMFilterWeights[row + FS_2][col + FS_2] * fc.x) - + v1[0].z * (fc.x * (CSMFilterWeights[row + FS_2][col + FS_2] - - CSMFilterWeights[row + FS_2][col + FS_2 + 1]) - + CSMFilterWeights[row + FS_2][col + FS_2 + 1])); - s.y += fc.y * (v1[0].x * (CSMFilterWeights[row + FS_2][col + FS_2] - - CSMFilterWeights[row + FS_2][col + FS_2] * fc.x) - + v1[0].y * (fc.x * (CSMFilterWeights[row + FS_2][col + FS_2] - - CSMFilterWeights[row + FS_2][col + FS_2 + 1]) - + CSMFilterWeights[row + FS_2][col + FS_2 + 1])); - if(row > -FS_2) - { - s.z += (1.0f - fc.y) * (v0[0].x * (CSMFilterWeights[row + FS_2 - 1][col + FS_2] - - CSMFilterWeights[row + FS_2 - 1][col + FS_2] * fc.x) - + v0[0].y * (fc.x * (CSMFilterWeights[row + FS_2 - 1][col + FS_2] - - CSMFilterWeights[row + FS_2 - 1][col + FS_2 + 1]) - + CSMFilterWeights[row + FS_2 - 1][col + FS_2 + 1])); - s.w += fc.y * (v1[0].w * (CSMFilterWeights[row + FS_2 - 1][col + FS_2] - - CSMFilterWeights[row + FS_2 - 1][col + FS_2] * fc.x) - + v1[0].z * (fc.x * (CSMFilterWeights[row + FS_2 - 1][col + FS_2] - - CSMFilterWeights[row + FS_2 - 1][col + FS_2 + 1]) - + CSMFilterWeights[row + FS_2 - 1][col + FS_2 + 1])); - } - } - else if (col == FS_2) - { - s.x += (1 - fc.y) * (v1[FS_2].w * (fc.x * (CSMFilterWeights[row + FS_2][col + FS_2 - 1] - - CSMFilterWeights[row + FS_2][col + FS_2]) + CSMFilterWeights[row + FS_2][col + FS_2]) - + v1[FS_2].z * fc.x * CSMFilterWeights[row + FS_2][col + FS_2]); - s.y += fc.y * (v1[FS_2].x * (fc.x * (CSMFilterWeights[row + FS_2][col + FS_2 - 1] - - CSMFilterWeights[row + FS_2][col + FS_2] ) + CSMFilterWeights[row + FS_2][col + FS_2]) - + v1[FS_2].y * fc.x * CSMFilterWeights[row + FS_2][col + FS_2]); - if(row > -FS_2) { - s.z += (1 - fc.y) * (v0[FS_2].x * (fc.x * (CSMFilterWeights[row + FS_2 - 1][col + FS_2 - 1] - - CSMFilterWeights[row + FS_2 - 1][col + FS_2]) - + CSMFilterWeights[row + FS_2 - 1][col + FS_2]) - + v0[FS_2].y * fc.x * CSMFilterWeights[row + FS_2 - 1][col + FS_2]); - s.w += fc.y * (v1[FS_2].w * (fc.x * (CSMFilterWeights[row + FS_2 - 1][col + FS_2 - 1] - - CSMFilterWeights[row + FS_2 - 1][col + FS_2]) - + CSMFilterWeights[row + FS_2 - 1][col + FS_2]) - + v1[FS_2].z * fc.x * CSMFilterWeights[row + FS_2 - 1][col + FS_2]); - } - } - else - { - s.x += (1 - fc.y) * (v1[(col + FS_2) / 2].w * (fc.x * (CSMFilterWeights[row + FS_2][col + FS_2 - 1] - - CSMFilterWeights[row + FS_2][col + FS_2 + 0] ) + CSMFilterWeights[row + FS_2][col + FS_2 + 0]) - + v1[(col + FS_2) / 2].z * (fc.x * (CSMFilterWeights[row + FS_2][col + FS_2 - 0] - - CSMFilterWeights[row + FS_2][col + FS_2 + 1]) + CSMFilterWeights[row + FS_2][col + FS_2 + 1])); - s.y += fc.y * (v1[(col + FS_2) / 2].x * (fc.x * (CSMFilterWeights[row + FS_2][col + FS_2-1] - - CSMFilterWeights[row + FS_2][col + FS_2 + 0]) + CSMFilterWeights[row + FS_2][col + FS_2 + 0]) - + v1[(col + FS_2) / 2].y * (fc.x * (CSMFilterWeights[row + FS_2][col + FS_2 - 0] - - CSMFilterWeights[row + FS_2][col + FS_2 + 1]) + CSMFilterWeights[row + FS_2][col + FS_2 + 1])); - if(row > -FS_2) { - s.z += (1 - fc.y) * (v0[(col + FS_2) / 2].x * (fc.x * (CSMFilterWeights[row + FS_2 - 1][col + FS_2 - 1] - - CSMFilterWeights[row + FS_2 - 1][col + FS_2 + 0]) + CSMFilterWeights[row + FS_2 - 1][col + FS_2 + 0]) - + v0[(col + FS_2) / 2].y * (fc.x * (CSMFilterWeights[row + FS_2 - 1][col + FS_2 - 0] - - CSMFilterWeights[row + FS_2 - 1][col + FS_2 + 1]) + CSMFilterWeights[row + FS_2 - 1][col + FS_2 + 1])); - s.w += fc.y * (v1[(col + FS_2) / 2].w * (fc.x * (CSMFilterWeights[row + FS_2 - 1][col + FS_2 - 1] - - CSMFilterWeights[row + FS_2 - 1][col + FS_2 + 0]) + CSMFilterWeights[row + FS_2 - 1][col + FS_2 + 0]) - + v1[(col + FS_2) / 2].z * (fc.x * (CSMFilterWeights[row + FS_2 - 1][col + FS_2 - 0] - - CSMFilterWeights[row + FS_2 - 1][col + FS_2 + 1]) + CSMFilterWeights[row + FS_2 - 1][col + FS_2 + 1])); - } - } - - if (row != FS_2) - v0[(col + FS_2) / 2] = v1[(col + FS_2) / 2].xy; - } - } - - return dot(s, 1.0f) / CSMFilterWeightsSum; - -#endif -} - -// Helper function for SampleShadowMapOptimizedPCF -float SampleShadowMap(Texture2DArray shadowMap, float2 baseUv, float u, float v, float2 shadowMapSizeInv, uint cascadeIndex, float depth) -{ - float2 uv = baseUv + float2(u, v) * shadowMapSizeInv; - return shadowMap.SampleCmpLevelZero(ShadowSamplerPCF, float3(uv, cascadeIndex), depth); -} - -// The method used in The Witness -float SampleShadowMapOptimizedPCF(Texture2DArray shadowMap, float2 shadowMapSize, float sceneDepth, float2 shadowPos, uint cascadeIndex) -{ - float2 uv = shadowPos.xy * shadowMapSize; // 1 unit - 1 texel - float2 shadowMapSizeInv = 1.0f / shadowMapSize; - - float2 baseUv; - baseUv.x = floor(uv.x + 0.5); - baseUv.y = floor(uv.y + 0.5); - float s = (uv.x + 0.5 - baseUv.x); - float t = (uv.y + 0.5 - baseUv.y); - baseUv -= float2(0.5, 0.5); - baseUv *= shadowMapSizeInv; - - float sum = 0; - -#if FilterSizeCSM == 2 - - return shadowMap.SampleCmpLevelZero(ShadowSamplerPCF, float3(shadowPos.xy, cascadeIndex), sceneDepth); - -#elif FilterSizeCSM == 3 - - float uw0 = (3 - 2 * s); - float uw1 = (1 + 2 * s); - - float u0 = (2 - s) / uw0 - 1; - float u1 = s / uw1 + 1; - - float vw0 = (3 - 2 * t); - float vw1 = (1 + 2 * t); - - float v0 = (2 - t) / vw0 - 1; - float v1 = t / vw1 + 1; - - sum += uw0 * vw0 * SampleShadowMap(shadowMap, baseUv, u0, v0, shadowMapSizeInv, cascadeIndex, sceneDepth); - sum += uw1 * vw0 * SampleShadowMap(shadowMap, baseUv, u1, v0, shadowMapSizeInv, cascadeIndex, sceneDepth); - sum += uw0 * vw1 * SampleShadowMap(shadowMap, baseUv, u0, v1, shadowMapSizeInv, cascadeIndex, sceneDepth); - sum += uw1 * vw1 * SampleShadowMap(shadowMap, baseUv, u1, v1, shadowMapSizeInv, cascadeIndex, sceneDepth); - - return sum * 1.0f / 16; - -#elif FilterSizeCSM == 5 - - float uw0 = (4 - 3 * s); - float uw1 = 7; - float uw2 = (1 + 3 * s); - - float u0 = (3 - 2 * s) / uw0 - 2; - float u1 = (3 + s) / uw1; - float u2 = s / uw2 + 2; - - float vw0 = (4 - 3 * t); - float vw1 = 7; - float vw2 = (1 + 3 * t); - - float v0 = (3 - 2 * t) / vw0 - 2; - float v1 = (3 + t) / vw1; - float v2 = t / vw2 + 2; - - sum += uw0 * vw0 * SampleShadowMap(shadowMap, baseUv, u0, v0, shadowMapSizeInv, cascadeIndex, sceneDepth); - sum += uw1 * vw0 * SampleShadowMap(shadowMap, baseUv, u1, v0, shadowMapSizeInv, cascadeIndex, sceneDepth); - sum += uw2 * vw0 * SampleShadowMap(shadowMap, baseUv, u2, v0, shadowMapSizeInv, cascadeIndex, sceneDepth); - - sum += uw0 * vw1 * SampleShadowMap(shadowMap, baseUv, u0, v1, shadowMapSizeInv, cascadeIndex, sceneDepth); - sum += uw1 * vw1 * SampleShadowMap(shadowMap, baseUv, u1, v1, shadowMapSizeInv, cascadeIndex, sceneDepth); - sum += uw2 * vw1 * SampleShadowMap(shadowMap, baseUv, u2, v1, shadowMapSizeInv, cascadeIndex, sceneDepth); - - sum += uw0 * vw2 * SampleShadowMap(shadowMap, baseUv, u0, v2, shadowMapSizeInv, cascadeIndex, sceneDepth); - sum += uw1 * vw2 * SampleShadowMap(shadowMap, baseUv, u1, v2, shadowMapSizeInv, cascadeIndex, sceneDepth); - sum += uw2 * vw2 * SampleShadowMap(shadowMap, baseUv, u2, v2, shadowMapSizeInv, cascadeIndex, sceneDepth); - - return sum * 1.0f / 144; - -#else // FilterSizeCSM == 7 - - float uw0 = (5 * s - 6); - float uw1 = (11 * s - 28); - float uw2 = -(11 * s + 17); - float uw3 = -(5 * s + 1); - - float u0 = (4 * s - 5) / uw0 - 3; - float u1 = (4 * s - 16) / uw1 - 1; - float u2 = -(7 * s + 5) / uw2 + 1; - float u3 = -s / uw3 + 3; - - float vw0 = (5 * t - 6); - float vw1 = (11 * t - 28); - float vw2 = -(11 * t + 17); - float vw3 = -(5 * t + 1); - - float v0 = (4 * t - 5) / vw0 - 3; - float v1 = (4 * t - 16) / vw1 - 1; - float v2 = -(7 * t + 5) / vw2 + 1; - float v3 = -t / vw3 + 3; - - sum += uw0 * vw0 * SampleShadowMap(shadowMap, baseUv, u0, v0, shadowMapSizeInv, cascadeIndex, sceneDepth); - sum += uw1 * vw0 * SampleShadowMap(shadowMap, baseUv, u1, v0, shadowMapSizeInv, cascadeIndex, sceneDepth); - sum += uw2 * vw0 * SampleShadowMap(shadowMap, baseUv, u2, v0, shadowMapSizeInv, cascadeIndex, sceneDepth); - sum += uw3 * vw0 * SampleShadowMap(shadowMap, baseUv, u3, v0, shadowMapSizeInv, cascadeIndex, sceneDepth); - - sum += uw0 * vw1 * SampleShadowMap(shadowMap, baseUv, u0, v1, shadowMapSizeInv, cascadeIndex, sceneDepth); - sum += uw1 * vw1 * SampleShadowMap(shadowMap, baseUv, u1, v1, shadowMapSizeInv, cascadeIndex, sceneDepth); - sum += uw2 * vw1 * SampleShadowMap(shadowMap, baseUv, u2, v1, shadowMapSizeInv, cascadeIndex, sceneDepth); - sum += uw3 * vw1 * SampleShadowMap(shadowMap, baseUv, u3, v1, shadowMapSizeInv, cascadeIndex, sceneDepth); - - sum += uw0 * vw2 * SampleShadowMap(shadowMap, baseUv, u0, v2, shadowMapSizeInv, cascadeIndex, sceneDepth); - sum += uw1 * vw2 * SampleShadowMap(shadowMap, baseUv, u1, v2, shadowMapSizeInv, cascadeIndex, sceneDepth); - sum += uw2 * vw2 * SampleShadowMap(shadowMap, baseUv, u2, v2, shadowMapSizeInv, cascadeIndex, sceneDepth); - sum += uw3 * vw2 * SampleShadowMap(shadowMap, baseUv, u3, v2, shadowMapSizeInv, cascadeIndex, sceneDepth); - - sum += uw0 * vw3 * SampleShadowMap(shadowMap, baseUv, u0, v3, shadowMapSizeInv, cascadeIndex, sceneDepth); - sum += uw1 * vw3 * SampleShadowMap(shadowMap, baseUv, u1, v3, shadowMapSizeInv, cascadeIndex, sceneDepth); - sum += uw2 * vw3 * SampleShadowMap(shadowMap, baseUv, u2, v3, shadowMapSizeInv, cascadeIndex, sceneDepth); - sum += uw3 * vw3 * SampleShadowMap(shadowMap, baseUv, u3, v3, shadowMapSizeInv, cascadeIndex, sceneDepth); - - return sum * (1.0f / 2704); - -#endif -} - -// Samples the shadow from the shadow map cascade -float SampleShadowCascade(Texture2DArray shadowMap, float2 shadowMapSize, float sceneDepth, float2 shadowPosition, uint cascadeIndex) -{ - float shadow = SampleShadowMapFixedSizePCF(shadowMap, shadowMapSize, sceneDepth, shadowPosition, cascadeIndex); - //float shadow = SampleShadowMapOptimizedPCF(shadowMap, shadowMapSize, sceneDepth, shadowPosition, cascadeIndex); - return shadow; -} - -// Samples the shadow for the given directional light (cascaded shadow map sampling) -float SampleShadow(LightData light, LightShadowData shadow, Texture2DArray shadowMap, float3 worldPosition, float viewDepth) -{ - // Create a blend factor which is one before and at the fade plane - float fade = saturate((viewDepth - shadow.CascadeSplits[shadow.NumCascades - 1] + shadow.FadeDistance) / shadow.FadeDistance); - BRANCH - if (fade >= 1.0) - { - return 1; - } - - // Figure out which cascade to sample from - uint cascadeIndex = 0; - for (uint i = 0; i < shadow.NumCascades - 1; i++) - { - if (viewDepth > shadow.CascadeSplits[i]) - cascadeIndex = i + 1; - } - - // Project into shadow space - float4 shadowPosition = mul(float4(worldPosition, 1.0f), shadow.ShadowVP[cascadeIndex]); - shadowPosition.xy /= shadowPosition.w; + // Project into shadow space (WorldToShadow is pre-multiplied to convert Clip Space to UV Space) + shadowPosition = mul(float4(samplePosition, 1.0f), shadowTile.WorldToShadow); shadowPosition.z -= shadow.Bias; + shadowPosition.xyz /= shadowPosition.w; - // Sample shadow - float result = SampleShadowCascade(shadowMap, shadow.ShadowMapSize, shadowPosition.z, shadowPosition.xy, cascadeIndex); + // UV Space -> Atlas Tile UV Space + float2 shadowMapUV = saturate(shadowPosition.xy); + shadowMapUV = shadowMapUV * shadowTile.ShadowToAtlas.xy + shadowTile.ShadowToAtlas.zw; + return shadowMapUV; +} - // Increase the sharpness for higher cascades to match the filter radius - const float SharpnessScale[MaxNumCascades] = { 1.0f, 1.5f, 3.0f, 3.5f }; - float sharpness = shadow.Sharpness * SharpnessScale[cascadeIndex]; - -#if CSM_BLENDING - // Sample the next cascade, and blend between the two results to smooth the transition - const float BlendThreshold = 0.1f; - float nextSplit = shadow.CascadeSplits[cascadeIndex]; - float splitSize = cascadeIndex == 0 ? nextSplit : nextSplit - shadow.CascadeSplits[cascadeIndex - 1]; - float splitDist = (nextSplit - viewDepth) / splitSize; - BRANCH - if (splitDist <= BlendThreshold && cascadeIndex != shadow.NumCascades - 1) - { - // Find the position of this pixel in light space of next cascade - shadowPosition = mul(float4(worldPosition, 1.0f), shadow.ShadowVP[cascadeIndex + 1]); - shadowPosition.xy /= shadowPosition.w; - shadowPosition.z -= shadow.Bias; - - // Sample next cascade and blur result - float nextSplitShadow = SampleShadowCascade(shadowMap, shadow.ShadowMapSize, shadowPosition.z, shadowPosition.xy, cascadeIndex + 1); - float lerpAmount = smoothstep(0.0f, BlendThreshold, splitDist); - lerpAmount = splitDist / BlendThreshold; - result = lerp(nextSplitShadow, result, lerpAmount); - - // Blur sharpness as well - sharpness = lerp(shadow.Sharpness * SharpnessScale[cascadeIndex + 1], sharpness, lerpAmount); - } +float SampleShadowMap(Texture2D shadowMap, float2 shadowMapUV, float sceneDepth) +{ + // Single hardware sample with filtering + float result = SAMPLE_SHADOW_MAP(shadowMap, shadowMapUV, sceneDepth); + +#if SHADOWS_QUALITY == 1 + result += SAMPLE_SHADOW_MAP_OFFSET(shadowMap, shadowMapUV, int2(-1, 0), sceneDepth); + result += SAMPLE_SHADOW_MAP_OFFSET(shadowMap, shadowMapUV, int2(0, -1), sceneDepth); + result += SAMPLE_SHADOW_MAP_OFFSET(shadowMap, shadowMapUV, int2(0, 1), sceneDepth); + result += SAMPLE_SHADOW_MAP_OFFSET(shadowMap, shadowMapUV, int2(1, 0), sceneDepth); + result = result * (1.0f / 4.0); +#elif SHADOWS_QUALITY == 2 || SHADOWS_QUALITY == 3 + // TODO: implement Percentage-Closer Soft Shadows (PCSS) for Ultra quality + result += SAMPLE_SHADOW_MAP_OFFSET(shadowMap, shadowMapUV, int2(-1, -1), sceneDepth); + result += SAMPLE_SHADOW_MAP_OFFSET(shadowMap, shadowMapUV, int2(-1, 0), sceneDepth); + result += SAMPLE_SHADOW_MAP_OFFSET(shadowMap, shadowMapUV, int2(-1, 1), sceneDepth); + result += SAMPLE_SHADOW_MAP_OFFSET(shadowMap, shadowMapUV, int2(0, -1), sceneDepth); + result += SAMPLE_SHADOW_MAP_OFFSET(shadowMap, shadowMapUV, int2(0, 1), sceneDepth); + result += SAMPLE_SHADOW_MAP_OFFSET(shadowMap, shadowMapUV, int2(1, -1), sceneDepth); + result += SAMPLE_SHADOW_MAP_OFFSET(shadowMap, shadowMapUV, int2(1, 0), sceneDepth); + result += SAMPLE_SHADOW_MAP_OFFSET(shadowMap, shadowMapUV, int2(1, 1), sceneDepth); + result = result * (1.0f / 9.0); #endif - // Apply shadow fade and sharpness - result = saturate((result - 0.5) * sharpness + 0.5); - result = lerp(1.0f, result, (1 - fade) * shadow.Fade); return result; } -// Samples the shadow for the given directional light (cascaded shadow map sampling) for the material surface (supports subsurface shadowing) -float SampleShadow(LightData light, LightShadowData shadow, Texture2DArray shadowMap, GBufferSample gBuffer, out float subsurfaceShadow) +// Samples the shadow for the given directional light on the material surface (supports subsurface shadowing) +ShadowSample SampleDirectionalLightShadow(LightData light, Buffer shadowsBuffer, Texture2D shadowMap, GBufferSample gBuffer) { - subsurfaceShadow = 1; - - // Create a blend factor which is one before and at the fade plane - float viewDepth = gBuffer.ViewPos.z; - float fade = saturate((viewDepth - shadow.CascadeSplits[shadow.NumCascades - 1] + shadow.FadeDistance) / shadow.FadeDistance); - BRANCH - if (fade >= 1.0) - { - return 1; - } - - // Figure out which cascade to sample from - uint cascadeIndex = 0; - for (uint i = 0; i < shadow.NumCascades - 1; i++) - { - if (viewDepth > shadow.CascadeSplits[i]) - cascadeIndex = i + 1; - } - -#if defined(USE_GBUFFER_CUSTOM_DATA) - // Subsurface shadowing - BRANCH - if (IsSubsurfaceMode(gBuffer.ShadingModel)) - { - // Get subsurface material info - float opacity = gBuffer.CustomData.a; - - // Project into shadow space - float4 shadowPosition = mul(float4(gBuffer.WorldPos, 1.0f), shadow.ShadowVP[cascadeIndex]); - shadowPosition.xy /= shadowPosition.w; - shadowPosition.z -= shadow.Bias; - - // Sample shadow map (single hardware sample with hardware filtering) - float shadowMapDepth = shadowMap.SampleLevel(SamplerLinearClamp, float3(shadowPosition.xy, cascadeIndex), 0).r; - subsurfaceShadow = CalculateSubsurfaceOcclusion(opacity, shadowPosition.z, shadowMapDepth); - - // Apply shadow fade - subsurfaceShadow = lerp(1.0f, subsurfaceShadow, (1 - fade) * shadow.Fade); - } -#endif - - float3 samplePosWS = gBuffer.WorldPos; - #if !LIGHTING_NO_DIRECTIONAL // Skip if surface is in a full shadow float NoL = dot(gBuffer.Normal, light.Direction); BRANCH - if (NoL <= 0) - { - return 0; - } - - // Apply normal offset bias - samplePosWS += GetShadowPositionOffset(shadow.NormalOffsetScale, NoL, gBuffer.Normal); + if (NoL <= 0 +#if defined(USE_GBUFFER_CUSTOM_DATA) + && !IsSubsurfaceMode(gBuffer.ShadingModel) +#endif + ) + return (ShadowSample)0; #endif - // Sample shadow - return SampleShadow(light, shadow, shadowMap, samplePosWS, viewDepth); -} + ShadowSample result; + result.SurfaceShadow = 1; + result.TransmissionShadow = 1; + + // Load shadow data + if (light.ShadowsBufferAddress == 0) + return result; // No shadow assigned + ShadowData shadow = LoadShadowsBuffer(shadowsBuffer, light.ShadowsBufferAddress); -// Samples the shadow for the given spot light (PCF shadow map sampling) -float SampleShadow(LightData light, LightShadowData shadow, Texture2D shadowMap, float3 worldPosition) -{ - float3 toLight = light.Position - worldPosition; - float toLightLength = length(toLight); - float3 L = toLight / toLightLength; -#if LIGHTING_NO_DIRECTIONAL - float dirCheck = 1.0f; -#else - float dirCheck = dot(-light.Direction, L); -#endif - - // Skip pixels outside of the light influence + // Create a blend factor which is one before and at the fade plane + float viewDepth = gBuffer.ViewPos.z; + float fade = saturate((viewDepth - shadow.CascadeSplits[shadow.TilesCount - 1] + shadow.FadeDistance) / shadow.FadeDistance); BRANCH - if (toLightLength > light.Radius || dirCheck < 0) + if (fade >= 1.0) + return result; + + // Figure out which cascade to sample from + uint cascadeIndex = 0; + for (uint i = 0; i < shadow.TilesCount - 1; i++) { - return 1; + if (viewDepth > shadow.CascadeSplits[i]) + cascadeIndex = i + 1; } + ShadowTileData shadowTile = LoadShadowsBufferTile(shadowsBuffer, light.ShadowsBufferAddress, cascadeIndex); - // Negate direction and use normalized value - toLight = -L; - - // Project into shadow space - float4 shadowPosition = mul(float4(worldPosition, 1.0f), shadow.ShadowVP[0]); - shadowPosition.z -= shadow.Bias; - shadowPosition.xyz /= shadowPosition.w; - - float2 shadowMapUVs = shadowPosition.xy * float2(0.5f, -0.5f) + float2(0.5f, 0.5f); - -#if FilterSizeSpot == 2 - - // Use single hardware sample with filtering - float result = shadowMap.SampleCmpLevelZero(ShadowSamplerPCF, shadowMapUVs, shadowPosition.z); - -#else - - float3 sideVector = normalize(cross(toLight, float3(0, 0, 1))); - float3 upVector = cross(sideVector, toLight); - - float shadowMapSizeInv = 1.0f / shadow.ShadowMapSize.x; - sideVector *= shadowMapSizeInv; - upVector *= shadowMapSizeInv; - - // Use PCF filter - float result = 0; - UNROLL - for(int i = 0; i < FilterSizeCube; i++) - { - float2 samplePos = shadowMapUVs + sideVector.xy * PCFDiscSamples[i].x + upVector.xy * PCFDiscSamples[i].y; - result += shadowMap.SampleCmpLevelZero(ShadowSamplerPCF, samplePos, shadowPosition.z); - } - result *= (1.0f / FilterSizeCube); - + float3 samplePosition = gBuffer.WorldPos; +#if !LIGHTING_NO_DIRECTIONAL + // Apply normal offset bias + samplePosition += GetShadowPositionOffset(shadow.NormalOffsetScale, NoL, gBuffer.Normal); #endif - // Apply shadow fade and sharpness - result = saturate((result - 0.5) * shadow.Sharpness + 0.5); - result = lerp(1.0f, result, shadow.Fade); + // Project position into shadow atlas UV + float4 shadowPosition; + float2 shadowMapUV = GetLightShadowAtlasUV(shadow, shadowTile, samplePosition, shadowPosition); + // Sample shadow map + result.SurfaceShadow = SampleShadowMap(shadowMap, shadowMapUV, shadowPosition.z); + + // Increase the sharpness for higher cascades to match the filter radius + const float SharpnessScale[MaxNumCascades] = { 1.0f, 1.5f, 3.0f, 3.5f }; + shadow.Sharpness *= SharpnessScale[cascadeIndex]; + +#if defined(USE_GBUFFER_CUSTOM_DATA) + // Subsurface shadowing + BRANCH + if (IsSubsurfaceMode(gBuffer.ShadingModel)) + { + float opacity = gBuffer.CustomData.a; + shadowMapUV = GetLightShadowAtlasUV(shadow, shadowTile, gBuffer.WorldPos, shadowPosition); + float shadowMapDepth = shadowMap.SampleLevel(SamplerLinearClamp, shadowMapUV, 0).r; + result.TransmissionShadow = CalculateSubsurfaceOcclusion(opacity, shadowPosition.z, shadowMapDepth); + result.TransmissionShadow = PostProcessShadow(shadow, result.TransmissionShadow); + } +#endif + + result.SurfaceShadow = PostProcessShadow(shadow, result.SurfaceShadow); return result; } -// Samples the shadow for the given spot light (PCF shadow map sampling) for the material surface (supports subsurface shadowing) -float SampleShadow(LightData light, LightShadowData shadow, Texture2D shadowMap, GBufferSample gBuffer, out float subsurfaceShadow) +// Samples the shadow for the given local light on the material surface (supports subsurface shadowing) +ShadowSample SampleLocalLightShadow(LightData light, Buffer shadowsBuffer, Texture2D shadowMap, GBufferSample gBuffer, float3 L, float toLightLength, uint tileIndex) { - subsurfaceShadow = 1; - float3 toLight = light.Position - gBuffer.WorldPos; - float toLightLength = length(toLight); - float3 L = toLight / toLightLength; -#if LIGHTING_NO_DIRECTIONAL - float dirCheck = 1.0f; -#else - float dirCheck = dot(-light.Direction, L); +#if !LIGHTING_NO_DIRECTIONAL + // Skip if surface is in a full shadow + float NoL = dot(gBuffer.Normal, L); + BRANCH + if (NoL <= 0 +#if defined(USE_GBUFFER_CUSTOM_DATA) + && !IsSubsurfaceMode(gBuffer.ShadingModel) #endif + ) + return (ShadowSample)0; +#endif + + ShadowSample result; + result.SurfaceShadow = 1; + result.TransmissionShadow = 1; // Skip pixels outside of the light influence BRANCH - if (toLightLength > light.Radius || dirCheck < 0) - { - return 1; - } + if (toLightLength > light.Radius) + return result; + + // Load shadow data + if (light.ShadowsBufferAddress == 0) + return result; // No shadow assigned + ShadowData shadow = LoadShadowsBuffer(shadowsBuffer, light.ShadowsBufferAddress); + ShadowTileData shadowTile = LoadShadowsBufferTile(shadowsBuffer, light.ShadowsBufferAddress, tileIndex); + + float3 samplePosition = gBuffer.WorldPos; +#if !LIGHTING_NO_DIRECTIONAL + // Apply normal offset bias + samplePosition += GetShadowPositionOffset(shadow.NormalOffsetScale, NoL, gBuffer.Normal); +#endif + + // Project position into shadow atlas UV + float4 shadowPosition; + float2 shadowMapUV = GetLightShadowAtlasUV(shadow, shadowTile, samplePosition, shadowPosition); + + // Sample shadow map + result.SurfaceShadow = SampleShadowMap(shadowMap, shadowMapUV, shadowPosition.z); #if defined(USE_GBUFFER_CUSTOM_DATA) // Subsurface shadowing BRANCH if (IsSubsurfaceMode(gBuffer.ShadingModel)) { - // Get subsurface material info float opacity = gBuffer.CustomData.a; - - // Project into shadow space - float4 shadowPosition = mul(float4(gBuffer.WorldPos, 1.0f), shadow.ShadowVP[0]); - shadowPosition.z -= shadow.Bias; - shadowPosition.xyz /= shadowPosition.w; - - // Sample shadow map (use single hardware sample with filtering) - float shadowMapDepth = shadowMap.SampleLevel(SamplerLinearClamp, shadowPosition.xy * float2(0.5f, -0.5f) + float2(0.5f, 0.5f), 0).r; - subsurfaceShadow = CalculateSubsurfaceOcclusion(opacity, shadowPosition.z, shadowMapDepth); - - // Apply shadow fade - subsurfaceShadow = lerp(1.0f, subsurfaceShadow, shadow.Fade); + shadowMapUV = GetLightShadowAtlasUV(shadow, shadowTile, gBuffer.WorldPos, shadowPosition); + float shadowMapDepth = shadowMap.SampleLevel(SamplerLinearClamp, shadowMapUV, 0).r; + result.TransmissionShadow = CalculateSubsurfaceOcclusion(opacity, shadowPosition.z, shadowMapDepth); + result.TransmissionShadow = PostProcessShadow(shadow, result.TransmissionShadow); } #endif - - float3 samplePosWS = gBuffer.WorldPos; - -#if !LIGHTING_NO_DIRECTIONAL - // Skip if surface is in a full shadow - float NoL = dot(gBuffer.Normal, L); - BRANCH - if (NoL <= 0) - { - return 0; - } - - // Apply normal offset bias - samplePosWS += GetShadowPositionOffset(shadow.NormalOffsetScale, NoL, gBuffer.Normal); -#endif - - // Sample shadow - return SampleShadow(light, shadow, shadowMap, samplePosWS); -} - -// Samples the shadow for the given point light (PCF shadow map sampling) -float SampleShadow(LightData light, LightShadowData shadow, TextureCube shadowMap, float3 worldPosition) -{ - float3 toLight = light.Position - worldPosition; - float toLightLength = length(toLight); - float3 L = toLight / toLightLength; - - // Skip pixels outside of the light influence - BRANCH - if (toLightLength > light.Radius) - { - return 1; - } - - // Negate direction and use normalized value - toLight = -L; - - // Figure out which cube face we're sampling from - int cubeFaceIndex = GetCubeFaceIndex(toLight); - - // Project into shadow space - float4 shadowPosition = mul(float4(worldPosition, 1.0f), shadow.ShadowVP[cubeFaceIndex]); - shadowPosition.z -= shadow.Bias; - shadowPosition.xyz /= shadowPosition.w; - -#if FilterSizeCube == 2 - - // Use single hardware sample with filtering - float result = shadowMap.SampleCmpLevelZero(ShadowSamplerPCF, toLight, shadowPosition.z); - -#else - - float3 sideVector = normalize(cross(toLight, float3(0, 0, 1))); - float3 upVector = cross(sideVector, toLight); - - float shadowMapSizeInv = 1.0f / shadow.ShadowMapSize.x; - sideVector *= shadowMapSizeInv; - upVector *= shadowMapSizeInv; - - // Use PCF filter - float result = 0; - UNROLL - for (int i = 0; i < FilterSizeCube; i++) - { - float3 cubeSamplePos = toLight + sideVector * PCFDiscSamples[i].x + upVector * PCFDiscSamples[i].y; - result += shadowMap.SampleCmpLevelZero(ShadowSamplerPCF, cubeSamplePos, shadowPosition.z); - } - result *= (1.0f / FilterSizeCube); - -#endif - - // Apply shadow fade and sharpness - result = saturate((result - 0.5) * shadow.Sharpness + 0.5); - result = lerp(1.0f, result, shadow.Fade); + result.SurfaceShadow = PostProcessShadow(shadow, result.SurfaceShadow); return result; } -// Samples the shadow for the given point light (PCF shadow map sampling) for the material surface (supports subsurface shadowing) -float SampleShadow(LightData light, LightShadowData shadow, TextureCube shadowMap, GBufferSample gBuffer, out float subsurfaceShadow) +// Samples the shadow for the given spot light on the material surface (supports subsurface shadowing) +ShadowSample SampleSpotLightShadow(LightData light, Buffer shadowsBuffer, Texture2D shadowMap, GBufferSample gBuffer) +{ + float3 toLight = light.Position - gBuffer.WorldPos; + float toLightLength = length(toLight); + float3 L = toLight / toLightLength; + return SampleLocalLightShadow(light, shadowsBuffer, shadowMap, gBuffer, L, toLightLength, 0); +} + +// Samples the shadow for the given point light on the material surface (supports subsurface shadowing) +ShadowSample SamplePointLightShadow(LightData light, Buffer shadowsBuffer, Texture2D shadowMap, GBufferSample gBuffer) { - subsurfaceShadow = 1; float3 toLight = light.Position - gBuffer.WorldPos; float toLightLength = length(toLight); float3 L = toLight / toLightLength; - // Skip pixels outside of the light influence - BRANCH - if (toLightLength > light.Radius) - { - return 1; - } - - // Negate direction and use normalized value - toLight = -L; - // Figure out which cube face we're sampling from - int cubeFaceIndex = GetCubeFaceIndex(toLight); + uint cubeFaceIndex = GetCubeFaceIndex(-L); -#if defined(USE_GBUFFER_CUSTOM_DATA) - // Subsurface shadowing - BRANCH - if (IsSubsurfaceMode(gBuffer.ShadingModel)) - { - // Get subsurface material info - float opacity = gBuffer.CustomData.a; + return SampleLocalLightShadow(light, shadowsBuffer, shadowMap, gBuffer, L, toLightLength, cubeFaceIndex); +} - // Project into shadow space - float4 shadowPosition = mul(float4(gBuffer.WorldPos, 1.0f), shadow.ShadowVP[cubeFaceIndex]); - shadowPosition.z -= shadow.Bias; - shadowPosition.xyz /= shadowPosition.w; +GBufferSample GetDummyGBufferSample(float3 worldPosition) +{ + GBufferSample gBuffer = (GBufferSample)0; + gBuffer.ShadingModel = SHADING_MODEL_LIT; + gBuffer.WorldPos = worldPosition; + return gBuffer; +} - // Sample shadow map (use single hardware sample with filtering) - float shadowMapDepth = shadowMap.SampleLevel(SamplerLinearClamp, toLight, 0).r; - subsurfaceShadow = CalculateSubsurfaceOcclusion(opacity, shadowPosition.z, shadowMapDepth); +// Samples the shadow for the given directional light at custom location +ShadowSample SampleDirectionalLightShadow(LightData light, Buffer shadowsBuffer, Texture2D shadowMap, float3 worldPosition, float viewDepth) +{ + GBufferSample gBuffer = GetDummyGBufferSample(worldPosition); + gBuffer.ViewPos.z = viewDepth; + return SampleDirectionalLightShadow(light, shadowsBuffer, shadowMap, gBuffer); +} - // Apply shadow fade - subsurfaceShadow = lerp(1.0f, subsurfaceShadow, shadow.Fade); - } -#endif - - float3 samplePosWS = gBuffer.WorldPos; +// Samples the shadow for the given spot light at custom location +ShadowSample SampleSpotLightShadow(LightData light, Buffer shadowsBuffer, Texture2D shadowMap, float3 worldPosition) +{ + GBufferSample gBuffer = GetDummyGBufferSample(worldPosition); + return SampleSpotLightShadow(light, shadowsBuffer, shadowMap, gBuffer); +} -#if !LIGHTING_NO_DIRECTIONAL - // Skip if surface is in a full shadow - float NoL = dot(gBuffer.Normal, L); - BRANCH - if (NoL <= 0) - { - return 0; - } - - // Apply normal offset bias - samplePosWS += GetShadowPositionOffset(shadow.NormalOffsetScale, NoL, gBuffer.Normal); -#endif - - // Sample shadow - return SampleShadow(light, shadow, shadowMap, samplePosWS); +// Samples the shadow for the given point light at custom location +ShadowSample SamplePointLightShadow(LightData light, Buffer shadowsBuffer, Texture2D shadowMap, float3 worldPosition) +{ + GBufferSample gBuffer = GetDummyGBufferSample(worldPosition); + return SamplePointLightShadow(light, shadowsBuffer, shadowMap, gBuffer); } #endif diff --git a/Source/Shaders/VolumetricFog.shader b/Source/Shaders/VolumetricFog.shader index 49f50a8eb..0dbbb208f 100644 --- a/Source/Shaders/VolumetricFog.shader +++ b/Source/Shaders/VolumetricFog.shader @@ -6,6 +6,7 @@ // "Physically Based and Unified Volumetric Rendering in Frostbite" - Sebastien Hillaire at Siggraph 2015 #define NO_GBUFFER_SAMPLING +#define LIGHTING_NO_DIRECTIONAL 1 // Debug voxels world space positions #define DEBUG_VOXEL_WS_POS 0 @@ -24,11 +25,10 @@ struct SkyLightData float3 MultiplyColor; float VolumetricScatteringIntensity; float3 AdditiveColor; - float Dummt0; + float Dummy0; }; META_CB_BEGIN(0, Data) - GBufferData GBuffer; float3 GlobalAlbedo; @@ -54,14 +54,11 @@ float4x4 PrevWorldToClip; float4 FrameJitterOffsets[8]; LightData DirectionalLight; -LightShadowData DirectionalLightShadow; SkyLightData SkyLight; DDGIData DDGI; - META_CB_END META_CB_BEGIN(1, PerLight) - float2 SliceToDepth; int MinZ; float LocalLightScatteringIntensity; @@ -70,8 +67,6 @@ float4 ViewSpaceBoundingSphere; float4x4 ViewToVolumeClip; LightData LocalLight; -LightShadowData LocalLightShadow; - META_CB_END // The Henyey-Greenstein phase function @@ -162,28 +157,8 @@ void GS_WriteToSlice(triangle Quad_VS2GS input[3], inout TriangleStream ShadowMapCube : register(t5); -Texture2D ShadowMapSpot : register(t6); - -float ComputeVolumeShadowing(float3 worldPosition, bool isSpotLight) -{ - float shadow = 1; - - // TODO: use single shadowmaps atlas for whole scene (with slots) - same code path for spot and point lights - if (isSpotLight) - { - shadow = SampleShadow(LocalLight, LocalLightShadow, ShadowMapSpot, worldPosition); - } - else - { - shadow = SampleShadow(LocalLight, LocalLightShadow, ShadowMapCube, worldPosition); - } - - return shadow; -} - +Texture2D ShadowMap : register(t0); +Buffer ShadowsBuffer : register(t1); #endif META_PS(true, FEATURE_LEVEL_SM5) @@ -225,15 +200,22 @@ float4 PS_InjectLight(Quad_GS2PS input) : SV_Target0 GetRadialLightAttenuation(LocalLight, isSpotLight, float3(0, 0, 1), distanceSqr, distanceBias * distanceBias, toLight, L, NoL, attenuation); // Peek the shadow - float shadowFactor = 1.0f; + float shadow = 1.0f; #if USE_SHADOW if (attenuation > 0) { - shadowFactor = ComputeVolumeShadowing(positionWS, isSpotLight); + if (isSpotLight) + { + shadow = SampleSpotLightShadow(LocalLight, ShadowsBuffer, ShadowMap, positionWS).SurfaceShadow; + } + else + { + shadow = SamplePointLightShadow(LocalLight, ShadowsBuffer, ShadowMap, positionWS).SurfaceShadow; + } } #endif - scattering.rgb += LocalLight.Color * (GetPhase(PhaseG, dot(L, -cameraVector)) * attenuation * shadowFactor * LocalLightScatteringIntensity); + scattering.rgb += LocalLight.Color * (GetPhase(PhaseG, dot(L, -cameraVector)) * attenuation * shadow * LocalLightScatteringIntensity); } scattering.rgb /= (float)samplesCount; @@ -281,13 +263,14 @@ Texture3D VBufferA : register(t0); Texture3D VBufferB : register(t1); Texture3D LightScatteringHistory : register(t2); Texture3D LocalShadowedLightScattering : register(t3); -Texture2DArray ShadowMapCSM : register(t4); +Texture2D ShadowMap : register(t4); +Buffer ShadowsBuffer : register(t5); #if USE_DDGI -Texture2D ProbesData : register(t5); -Texture2D ProbesDistance : register(t6); -Texture2D ProbesIrradiance : register(t7); +Texture2D ProbesData : register(t6); +Texture2D ProbesDistance : register(t7); +Texture2D ProbesIrradiance : register(t8); #else -TextureCube SkyLightImage : register(t5); +TextureCube SkyLightImage : register(t6); #endif META_CS(true, FEATURE_LEVEL_SM5) @@ -319,16 +302,8 @@ void CS_LightScattering(uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_ float3 cameraVectorNormalized = cameraVector / cameraVectorLength; // Directional light - BRANCH - if (DirectionalLightShadow.NumCascades < 10) // NumCascades==10 if no dir light - { - // Try to sample CSM shadow at the voxel position - float shadow = 1; - if (DirectionalLightShadow.NumCascades > 0) - { - shadow = SampleShadow(DirectionalLight, DirectionalLightShadow, ShadowMapCSM, positionWS, cameraVectorLength); - } - + { + float shadow = SampleDirectionalLightShadow(DirectionalLight, ShadowsBuffer, ShadowMap, positionWS, cameraVectorLength).SurfaceShadow; lightScattering += DirectionalLight.Color * (8 * shadow * GetPhase(PhaseG, dot(DirectionalLight.Direction, cameraVectorNormalized))); } From 3d0d41ebffbdab04d888b9d5fd42f095a4bc6665 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 4 Apr 2024 13:29:38 +0200 Subject: [PATCH 014/292] Add reducing shadows quality for smaller local lights --- Source/Engine/Renderer/ShadowsPass.cpp | 16 +++++++++++----- Source/Engine/Renderer/ShadowsPass.h | 2 +- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/Source/Engine/Renderer/ShadowsPass.cpp b/Source/Engine/Renderer/ShadowsPass.cpp index bbd036592..64f043d28 100644 --- a/Source/Engine/Renderer/ShadowsPass.cpp +++ b/Source/Engine/Renderer/ShadowsPass.cpp @@ -568,7 +568,7 @@ void ShadowsPass::Dispose() void ShadowsPass::SetupShadows(RenderContext& renderContext, RenderContextBatch& renderContextBatch) { PROFILE_CPU(); - maxShadowsQuality = Math::Clamp(Math::Min((int32)Graphics::ShadowsQuality, (int32)renderContext.View.MaxShadowsQuality), 0, (int32)Quality::MAX - 1); + _maxShadowsQuality = Math::Clamp(Math::Min((int32)Graphics::ShadowsQuality, (int32)renderContext.View.MaxShadowsQuality), 0, (int32)Quality::MAX - 1); // Early out and skip shadows setup if no lights is actively casting shadows // RenderBuffers will automatically free any old ShadowsCustomBuffer after a few frames if we don't update LastFrameUsed @@ -902,14 +902,20 @@ void ShadowsPass::RenderShadowMask(RenderContextBatch& renderContextBatch, Rende RenderContext& renderContext = renderContextBatch.GetMainContext(); const ShadowsCustomBuffer& shadows = *renderContext.Buffers->FindCustomBuffer(TEXT("Shadows")); ASSERT(shadows.LastFrameUsed == Engine::FrameCount); - const ShadowAtlasLight& atlasLight = shadows.Lights.At(light.ID); const float sphereModelScale = 3.0f; auto& view = renderContext.View; auto shader = _shader->GetShader(); const bool isLocalLight = light.IsPointLight || light.IsSpotLight; - - // TODO: here we can use lower shadows quality based on light distance to view (LOD switching) and per light setting for max quality - int32 shadowQuality = maxShadowsQuality; + int32 shadowQuality = _maxShadowsQuality; + if (isLocalLight) + { + // Reduce shadows quality for smaller lights + if (light.ScreenSize < 0.25f) + shadowQuality--; + if (light.ScreenSize < 0.1f) + shadowQuality--; + shadowQuality = Math::Max(shadowQuality, 0); + } // Setup shader data Data sperLight; diff --git a/Source/Engine/Renderer/ShadowsPass.h b/Source/Engine/Renderer/ShadowsPass.h index 17d21b6d6..5f8558abd 100644 --- a/Source/Engine/Renderer/ShadowsPass.h +++ b/Source/Engine/Renderer/ShadowsPass.h @@ -22,7 +22,7 @@ private: GPUPipelineStatePermutationsPs(Quality::MAX) * 2> _psShadowPoint; GPUPipelineStatePermutationsPs(Quality::MAX) * 2> _psShadowSpot; PixelFormat _shadowMapFormat; // Cached on initialization - int32 maxShadowsQuality = 0; // Cached state for the current frame rendering (setup via Prepare) + int32 _maxShadowsQuality = 0; // Cached state for the current frame rendering (setup via Prepare) public: /// From 8bd409e95d5863d717c1689bd0dd9aef5aa862c2 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 4 Apr 2024 14:35:22 +0200 Subject: [PATCH 015/292] DIsable certain shader features in Volumetric Fog shader --- Source/Shaders/VolumetricFog.shader | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Source/Shaders/VolumetricFog.shader b/Source/Shaders/VolumetricFog.shader index 0dbbb208f..0032149b2 100644 --- a/Source/Shaders/VolumetricFog.shader +++ b/Source/Shaders/VolumetricFog.shader @@ -7,6 +7,8 @@ #define NO_GBUFFER_SAMPLING #define LIGHTING_NO_DIRECTIONAL 1 +#define LIGHTING_NO_SPECULAR 0 +#define SHADOWS_QUALITY 0 // Debug voxels world space positions #define DEBUG_VOXEL_WS_POS 0 From 0cc6669cbdfc58617348bbe7406ebc239aa42d46 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 5 Apr 2024 10:59:34 +0200 Subject: [PATCH 016/292] Reimplement cascaded shadow maps blending via dithering --- .../Editor/Windows/GraphicsQualityWindow.cs | 9 -------- Source/Engine/Core/Config/GraphicsSettings.h | 3 ++- Source/Engine/Graphics/Graphics.cpp | 1 - Source/Engine/Graphics/Graphics.h | 3 ++- Source/Engine/Graphics/RenderTools.cpp | 8 +++++++ Source/Engine/Graphics/RenderTools.h | 4 ++++ .../GI/DynamicDiffuseGlobalIllumination.cpp | 15 ++----------- .../Renderer/ScreenSpaceReflectionsPass.cpp | 9 +------- Source/Engine/Renderer/ShadowsPass.cpp | 18 ++++----------- Source/Shaders/Shadows.shader | 6 +++-- Source/Shaders/ShadowsSampling.hlsl | 22 ++++++++++++++++--- 11 files changed, 46 insertions(+), 52 deletions(-) diff --git a/Source/Editor/Windows/GraphicsQualityWindow.cs b/Source/Editor/Windows/GraphicsQualityWindow.cs index 59cd61f25..a1b6b3eab 100644 --- a/Source/Editor/Windows/GraphicsQualityWindow.cs +++ b/Source/Editor/Windows/GraphicsQualityWindow.cs @@ -1,6 +1,5 @@ // Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. -using System.Collections.Generic; using System.ComponentModel; using FlaxEditor.CustomEditors; using FlaxEngine; @@ -96,14 +95,6 @@ namespace FlaxEditor.Windows set => Graphics.ShadowMapsQuality = value; } - [DefaultValue(false)] - [EditorOrder(1320), EditorDisplay("Quality", "Allow CSM Blending"), Tooltip("Enables cascades splits blending for directional light shadows.")] - public bool AllowCSMBlending - { - get => Graphics.AllowCSMBlending; - set => Graphics.AllowCSMBlending = value; - } - [NoSerialize, DefaultValue(1.0f), Limit(0.05f, 5, 0)] [EditorOrder(1400), EditorDisplay("Quality")] [Tooltip("The scale of the rendering resolution relative to the output dimensions. If lower than 1 the scene and postprocessing will be rendered at a lower resolution and upscaled to the output backbuffer.")] diff --git a/Source/Engine/Core/Config/GraphicsSettings.h b/Source/Engine/Core/Config/GraphicsSettings.h index 97c5f81cc..a3a5e7ded 100644 --- a/Source/Engine/Core/Config/GraphicsSettings.h +++ b/Source/Engine/Core/Config/GraphicsSettings.h @@ -61,9 +61,10 @@ public: /// /// Enables cascades splits blending for directional light shadows. + /// [Deprecated in v1.9] /// API_FIELD(Attributes="EditorOrder(1320), DefaultValue(false), EditorDisplay(\"Quality\", \"Allow CSM Blending\")") - bool AllowCSMBlending = false; + DEPRECATED bool AllowCSMBlending = false; /// /// Default probes cubemap resolution (use for Environment Probes, can be overriden per-actor). diff --git a/Source/Engine/Graphics/Graphics.cpp b/Source/Engine/Graphics/Graphics.cpp index 1e834ff55..954fc7c73 100644 --- a/Source/Engine/Graphics/Graphics.cpp +++ b/Source/Engine/Graphics/Graphics.cpp @@ -65,7 +65,6 @@ void GraphicsSettings::Apply() Graphics::VolumetricFogQuality = VolumetricFogQuality; Graphics::ShadowsQuality = ShadowsQuality; Graphics::ShadowMapsQuality = ShadowMapsQuality; - Graphics::AllowCSMBlending = AllowCSMBlending; Graphics::GlobalSDFQuality = GlobalSDFQuality; Graphics::GIQuality = GIQuality; Graphics::PostProcessSettings = ::PostProcessSettings(); diff --git a/Source/Engine/Graphics/Graphics.h b/Source/Engine/Graphics/Graphics.h index 7f49d450c..55ded56e5 100644 --- a/Source/Engine/Graphics/Graphics.h +++ b/Source/Engine/Graphics/Graphics.h @@ -50,8 +50,9 @@ public: /// /// Enables cascades splits blending for directional light shadows. + /// [Deprecated in v1.9] /// - API_FIELD() static bool AllowCSMBlending; + API_FIELD() DEPRECATED static bool AllowCSMBlending; /// /// The Global SDF quality. Controls the volume texture resolution and amount of cascades to use. diff --git a/Source/Engine/Graphics/RenderTools.cpp b/Source/Engine/Graphics/RenderTools.cpp index f43fc8c75..edd30af7a 100644 --- a/Source/Engine/Graphics/RenderTools.cpp +++ b/Source/Engine/Graphics/RenderTools.cpp @@ -552,6 +552,14 @@ void RenderTools::ComputeCascadeUpdateFrequency(int32 cascadeIndex, int32 cascad } } +float RenderTools::ComputeTemporalTime() +{ + const float time = Time::Draw.UnscaledTime.GetTotalSeconds(); + const float scale = 10; + const float integral = roundf(time / scale) * scale; + return time - integral; +} + void RenderTools::CalculateTangentFrame(FloatR10G10B10A2& resultNormal, FloatR10G10B10A2& resultTangent, const Float3& normal) { // Calculate tangent diff --git a/Source/Engine/Graphics/RenderTools.h b/Source/Engine/Graphics/RenderTools.h index 06b059975..07fcab89a 100644 --- a/Source/Engine/Graphics/RenderTools.h +++ b/Source/Engine/Graphics/RenderTools.h @@ -121,6 +121,10 @@ public: return (frameIndex % updateFrequency == updatePhrase) || updateForce; } + // Calculates temporal offset in the dithering factor that gets cleaned out by TAA. + // Returns 0-1 value based on unscaled draw time for temporal effects to reduce artifacts from screen-space dithering when using Temporal Anti-Aliasing. + static float ComputeTemporalTime(); + static void CalculateTangentFrame(FloatR10G10B10A2& resultNormal, FloatR10G10B10A2& resultTangent, const Float3& normal); static void CalculateTangentFrame(FloatR10G10B10A2& resultNormal, FloatR10G10B10A2& resultTangent, const Float3& normal, const Float3& tangent); }; diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp index 56d1194d5..e488bab81 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp @@ -13,13 +13,13 @@ #include "Engine/Engine/Engine.h" #include "Engine/Content/Content.h" #include "Engine/Debug/DebugDraw.h" -#include "Engine/Engine/Time.h" #include "Engine/Graphics/GPUContext.h" #include "Engine/Graphics/GPUDevice.h" #include "Engine/Graphics/Graphics.h" #include "Engine/Graphics/RenderTask.h" #include "Engine/Graphics/RenderBuffers.h" #include "Engine/Graphics/RenderTargetPool.h" +#include "Engine/Graphics/RenderTools.h" #include "Engine/Graphics/Shaders/GPUShader.h" #include "Engine/Level/Actors/BrushMode.h" #include "Engine/Renderer/GBufferPass.h" @@ -480,18 +480,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont auto& cascade = ddgiData.Cascades[cascadeIndex]; data.ProbeScrollClears[cascadeIndex] = Int4(cascade.ProbeScrollClears, 0); } - if (renderContext.List->Setup.UseTemporalAAJitter) - { - // Use temporal offset in the dithering factor (gets cleaned out by TAA) - const float time = Time::Draw.UnscaledTime.GetTotalSeconds(); - const float scale = 10; - const float integral = roundf(time / scale) * scale; - data.TemporalTime = time - integral; - } - else - { - data.TemporalTime = 0.0f; - } + data.TemporalTime = renderContext.List->Setup.UseTemporalAAJitter ? RenderTools::ComputeTemporalTime() : 0.0f; GBufferPass::SetInputs(renderContext.View, data.GBuffer); context->UpdateCB(_cb0, &data); context->BindCB(0, _cb0); diff --git a/Source/Engine/Renderer/ScreenSpaceReflectionsPass.cpp b/Source/Engine/Renderer/ScreenSpaceReflectionsPass.cpp index b3d0ea6ba..5e758d148 100644 --- a/Source/Engine/Renderer/ScreenSpaceReflectionsPass.cpp +++ b/Source/Engine/Renderer/ScreenSpaceReflectionsPass.cpp @@ -11,7 +11,6 @@ #include "Engine/Graphics/RenderTools.h" #include "Engine/Graphics/RenderTargetPool.h" #include "Engine/Graphics/RenderBuffers.h" -#include "Engine/Engine/Time.h" #include "Engine/Platform/Window.h" #include "Utils/MultiScaler.h" #include "Engine/Engine/Engine.h" @@ -247,13 +246,7 @@ void ScreenSpaceReflectionsPass::Render(RenderContext& renderContext, GPUTexture data.TemporalEffect = useTemporal ? 1.0f : 0.0f; if (useTemporal) { - const float time = Time::Draw.UnscaledTime.GetTotalSeconds(); - - // Keep time in smaller range to prevent temporal noise errors - const double scale = 10; - const double integral = round(time / scale) * scale; - data.TemporalTime = static_cast(time - integral); - + data.TemporalTime = RenderTools::ComputeTemporalTime(); buffers->LastFrameTemporalSSR = Engine::FrameCount; if (!buffers->TemporalSSR || buffers->TemporalSSR->Width() != temporalWidth || buffers->TemporalSSR->Height() != temporalHeight) { diff --git a/Source/Engine/Renderer/ShadowsPass.cpp b/Source/Engine/Renderer/ShadowsPass.cpp index 64f043d28..aa304d43b 100644 --- a/Source/Engine/Renderer/ShadowsPass.cpp +++ b/Source/Engine/Renderer/ShadowsPass.cpp @@ -27,7 +27,8 @@ PACK_STRUCT(struct Data{ ShaderLightData Light; Matrix WVP; Matrix ViewProjectionMatrix; - Float2 Dummy0; + float Dummy0; + float TemporalTime; float ContactShadowsDistance; float ContactShadowsLength; }); @@ -62,7 +63,6 @@ struct ShadowAtlasLight int32 ContextCount; uint16 Resolution; uint16 TilesNeeded; - bool BlendCSM; float Sharpness, Fade, NormalOffsetScale, Bias, FadeDistance; Float4 CascadeSplits; ShadowsAtlasTile* Tiles[MaxTiles]; @@ -294,15 +294,7 @@ void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& r Float3 lightDirection = light.Direction; float shadowsDistance = Math::Min(view.Far, light.ShadowsDistance); int32 csmCount = Math::Clamp(light.CascadeCount, 0, MAX_CSM_CASCADES); - bool blendCSM = Graphics::AllowCSMBlending; const auto shadowMapsSize = (float)atlasLight.Resolution; -#if USE_EDITOR - if (IsRunningRadiancePass) - blendCSM = false; -#elif PLATFORM_SWITCH || PLATFORM_IOS || PLATFORM_ANDROID - // Disable cascades blending on low-end platforms - blendCSM = false; -#endif // Views with orthographic cameras cannot use cascades, we force it to 1 shadow map here if (view.Projection.M44 == 1.0f) @@ -397,7 +389,6 @@ void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& r // Init shadow data atlasLight.ContextIndex = renderContextBatch.Contexts.Count(); atlasLight.ContextCount = csmCount; - atlasLight.BlendCSM = blendCSM; renderContextBatch.Contexts.AddDefault(atlasLight.ContextCount); // Create the different view and projection matrices for each split @@ -413,9 +404,7 @@ void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& r // Calculate cascade split frustum corners in view space for (int32 j = 0; j < 4; j++) { - float overlap = 0; - if (blendCSM) - overlap = 0.2f * (splitMinRatio - oldSplitMinRatio); + float overlap = 0.1f * (splitMinRatio - oldSplitMinRatio); // CSM blending overlap const auto frustumRangeVS = mainCache->FrustumCornersVs[j + 4] - mainCache->FrustumCornersVs[j]; frustumCorners[j] = mainCache->FrustumCornersVs[j] + frustumRangeVS * (splitMinRatio - overlap); frustumCorners[j + 4] = mainCache->FrustumCornersVs[j] + frustumRangeVS * splitMaxRatio; @@ -927,6 +916,7 @@ void ShadowsPass::RenderShadowMask(RenderContextBatch& renderContextBatch, Rende else if (light.IsSpotLight) ((RenderSpotLightData&)light).SetShaderData(sperLight.Light, true); Matrix::Transpose(view.ViewProjection(), sperLight.ViewProjectionMatrix); + sperLight.TemporalTime = renderContext.List->Setup.UseTemporalAAJitter ? RenderTools::ComputeTemporalTime() : 0.0f; sperLight.ContactShadowsDistance = light.ShadowsDistance; sperLight.ContactShadowsLength = EnumHasAnyFlags(view.Flags, ViewFlags::ContactShadows) ? light.ContactShadowsLength : 0.0f; if (isLocalLight) diff --git a/Source/Shaders/Shadows.shader b/Source/Shaders/Shadows.shader index aff46ac8e..92c2d0815 100644 --- a/Source/Shaders/Shadows.shader +++ b/Source/Shaders/Shadows.shader @@ -1,6 +1,7 @@ // Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. #define USE_GBUFFER_CUSTOM_DATA +#define SHADOWS_CSM_BLENDING 1 #include "./Flax/Common.hlsl" #include "./Flax/GBuffer.hlsl" @@ -12,7 +13,8 @@ GBufferData GBuffer; LightData Light; float4x4 WVP; float4x4 ViewProjectionMatrix; -float2 Dummy0; +float Dummy0; +float TemporalTime; float ContactShadowsDistance; float ContactShadowsLength; META_CB_END @@ -115,7 +117,7 @@ float4 PS_DirLight(Quad_VS2PS input) : SV_Target0 GBufferSample gBuffer = SampleGBuffer(gBufferData, input.TexCoord); // Sample shadow - ShadowSample shadow = SampleDirectionalLightShadow(Light, ShadowsBuffer, ShadowMap, gBuffer); + ShadowSample shadow = SampleDirectionalLightShadow(Light, ShadowsBuffer, ShadowMap, gBuffer, TemporalTime); #if CONTACT_SHADOWS // Calculate screen-space contact shadow diff --git a/Source/Shaders/ShadowsSampling.hlsl b/Source/Shaders/ShadowsSampling.hlsl index d8510011a..20806daf6 100644 --- a/Source/Shaders/ShadowsSampling.hlsl +++ b/Source/Shaders/ShadowsSampling.hlsl @@ -6,6 +6,9 @@ #include "./Flax/ShadowsCommon.hlsl" #include "./Flax/GBufferCommon.hlsl" #include "./Flax/LightingCommon.hlsl" +#ifdef SHADOWS_CSM_BLENDING +#include "./Flax/Random.hlsl" +#endif #if FEATURE_LEVEL >= FEATURE_LEVEL_SM5 #define SAMPLE_SHADOW_MAP(shadowMap, shadowUV, sceneDepth) shadowMap.SampleCmpLevelZero(ShadowSamplerLinear, shadowUV, sceneDepth) @@ -77,7 +80,7 @@ float SampleShadowMap(Texture2D shadowMap, float2 shadowMapUV, float scen } // Samples the shadow for the given directional light on the material surface (supports subsurface shadowing) -ShadowSample SampleDirectionalLightShadow(LightData light, Buffer shadowsBuffer, Texture2D shadowMap, GBufferSample gBuffer) +ShadowSample SampleDirectionalLightShadow(LightData light, Buffer shadowsBuffer, Texture2D shadowMap, GBufferSample gBuffer, float dither = 0.0f) { #if !LIGHTING_NO_DIRECTIONAL // Skip if surface is in a full shadow @@ -114,6 +117,19 @@ ShadowSample SampleDirectionalLightShadow(LightData light, Buffer shadow if (viewDepth > shadow.CascadeSplits[i]) cascadeIndex = i + 1; } +#ifdef SHADOWS_CSM_BLENDING + const float BlendThreshold = 0.05f; + float nextSplit = shadow.CascadeSplits[cascadeIndex]; + float splitSize = cascadeIndex == 0 ? nextSplit : nextSplit - shadow.CascadeSplits[cascadeIndex - 1]; + float splitDist = (nextSplit - viewDepth) / splitSize; + if (splitDist <= BlendThreshold && cascadeIndex != shadow.TilesCount - 1) + { + // Blend with the next cascade but with screen-space dithering (gets cleaned out by TAA) + float lerpAmount = 1 - splitDist / BlendThreshold; + if (step(RandN2(gBuffer.ViewPos.xy + dither).x, lerpAmount)) + cascadeIndex++; + } +#endif ShadowTileData shadowTile = LoadShadowsBufferTile(shadowsBuffer, light.ShadowsBufferAddress, cascadeIndex); float3 samplePosition = gBuffer.WorldPos; @@ -241,11 +257,11 @@ GBufferSample GetDummyGBufferSample(float3 worldPosition) } // Samples the shadow for the given directional light at custom location -ShadowSample SampleDirectionalLightShadow(LightData light, Buffer shadowsBuffer, Texture2D shadowMap, float3 worldPosition, float viewDepth) +ShadowSample SampleDirectionalLightShadow(LightData light, Buffer shadowsBuffer, Texture2D shadowMap, float3 worldPosition, float viewDepth, float dither = 0.0f) { GBufferSample gBuffer = GetDummyGBufferSample(worldPosition); gBuffer.ViewPos.z = viewDepth; - return SampleDirectionalLightShadow(light, shadowsBuffer, shadowMap, gBuffer); + return SampleDirectionalLightShadow(light, shadowsBuffer, shadowMap, gBuffer, dither); } // Samples the shadow for the given spot light at custom location From 3efd1e4e840f1234c494ba8c19c486bd721bf28b Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 5 Apr 2024 12:48:09 +0200 Subject: [PATCH 017/292] Optimize local lights sphere mesh rendering to match the area better --- Source/Engine/Core/Math/Matrix.h | 2 +- Source/Engine/Graphics/RenderTools.cpp | 36 ++++++++------ Source/Engine/Graphics/RenderTools.h | 11 +---- Source/Engine/Renderer/LightPass.cpp | 57 +++++++--------------- Source/Engine/Renderer/ReflectionsPass.cpp | 23 +++------ Source/Engine/Renderer/ShadowsPass.cpp | 15 +++--- 6 files changed, 53 insertions(+), 91 deletions(-) diff --git a/Source/Engine/Core/Math/Matrix.h b/Source/Engine/Core/Math/Matrix.h index 0a76a98d1..03a34ecc8 100644 --- a/Source/Engine/Core/Math/Matrix.h +++ b/Source/Engine/Core/Math/Matrix.h @@ -890,7 +890,7 @@ public: /// The translation. /// Angle of rotation in radians. Angles are measured clockwise when looking along the rotation axis toward the origin. /// The scaling. - /// When the method completes, contains the created rotation matrix. + /// When the method completes, contains the created transformation matrix. static void Transformation(const Float3& scaling, const Quaternion& rotation, const Float3& translation, Matrix& result); // Creates a 3D affine transformation matrix. diff --git a/Source/Engine/Graphics/RenderTools.cpp b/Source/Engine/Graphics/RenderTools.cpp index edd30af7a..1679326bc 100644 --- a/Source/Engine/Graphics/RenderTools.cpp +++ b/Source/Engine/Graphics/RenderTools.cpp @@ -587,6 +587,26 @@ void RenderTools::CalculateTangentFrame(FloatR10G10B10A2& resultNormal, FloatR10 resultTangent = Float1010102(tangent * 0.5f + 0.5f, sign); } +void RenderTools::ComputeSphereModelDrawMatrix(const RenderView& view, const Float3& position, float radius, Matrix& resultWorld, bool& resultIsViewInside) +{ + // Construct world matrix + constexpr float sphereModelScale = 0.0202f; // Manually tweaked for 'Engine/Models/Sphere' + const float scaling = radius * sphereModelScale; + resultWorld = Matrix::Identity; + resultWorld.M11 = scaling; + resultWorld.M22 = scaling; + resultWorld.M33 = scaling; + resultWorld.M41 = position.X; + resultWorld.M42 = position.Y; + resultWorld.M43 = position.Z; + + // Check if view is inside the sphere + float viewToCenter = Float3::Distance(view.Position, position); + //if (radius + viewToCenter > view.Far) + // radius = view.Far - viewToCenter; // Clamp radius + resultIsViewInside = viewToCenter - radius < 5.0f; // Manually tweaked bias +} + int32 MipLevelsCount(int32 width, bool useMipLevels) { if (!useMipLevels) @@ -644,22 +664,6 @@ int32 MipLevelsCount(int32 width, int32 height, int32 depth, bool useMipLevels) return result; } -float ViewToCenterLessRadius(const RenderView& view, const Float3& center, float radius) -{ - // Calculate distance from view to sphere center - float viewToCenter = Float3::Distance(view.Position, center); - - // Check if need to fix the radius - //if (radius + viewToCenter > view.Far) - { - // Clamp radius - //radius = view.Far - viewToCenter; - } - - // Calculate result - return viewToCenter - radius; -} - void MeshBase::SetMaterialSlotIndex(int32 value) { if (value < 0 || value >= _model->MaterialSlots.Count()) diff --git a/Source/Engine/Graphics/RenderTools.h b/Source/Engine/Graphics/RenderTools.h index 07fcab89a..00d3398cd 100644 --- a/Source/Engine/Graphics/RenderTools.h +++ b/Source/Engine/Graphics/RenderTools.h @@ -127,6 +127,8 @@ public: static void CalculateTangentFrame(FloatR10G10B10A2& resultNormal, FloatR10G10B10A2& resultTangent, const Float3& normal); static void CalculateTangentFrame(FloatR10G10B10A2& resultNormal, FloatR10G10B10A2& resultTangent, const Float3& normal, const Float3& tangent); + + static void ComputeSphereModelDrawMatrix(const RenderView& view, const Float3& position, float radius, Matrix& resultWorld, bool& resultIsViewInside); }; // Calculate mip levels count for a texture 1D @@ -149,12 +151,3 @@ extern int32 MipLevelsCount(int32 width, int32 height, bool useMipLevels = true) // @param useMipLevels True if use mip levels, otherwise false (use only 1 mip) // @returns Mip levels count extern int32 MipLevelsCount(int32 width, int32 height, int32 depth, bool useMipLevels = true); - -/// -/// Calculate distance from view center to the sphere center less sphere radius, clamped to fit view far plane -/// -/// Render View -/// Sphere center -/// Sphere radius -/// Distance from view center to the sphere center less sphere radius -extern float ViewToCenterLessRadius(const RenderView& view, const Float3& center, float radius); diff --git a/Source/Engine/Renderer/LightPass.cpp b/Source/Engine/Renderer/LightPass.cpp index e1330ece5..72fcc5a0c 100644 --- a/Source/Engine/Renderer/LightPass.cpp +++ b/Source/Engine/Renderer/LightPass.cpp @@ -40,7 +40,7 @@ bool LightPass::Init() // Load assets _shader = Content::LoadAsyncInternal(TEXT("Shaders/Lights")); - _sphereModel = Content::LoadAsyncInternal(TEXT("Engine/Models/SphereLowPoly")); + _sphereModel = Content::LoadAsyncInternal(TEXT("Engine/Models/Sphere")); if (_shader == nullptr || _sphereModel == nullptr) { return true; @@ -96,7 +96,7 @@ bool LightPass::setupResources() psDesc.BlendMode = BlendingMode::Add; psDesc.BlendMode.RenderTargetWriteMask = BlendingMode::ColorWrite::RGB; psDesc.VS = shader->GetVS("VS_Model"); - psDesc.CullMode = CullMode::Inverted; + psDesc.CullMode = CullMode::TwoSided; if (_psLightPointInverted.Create(psDesc, shader, "PS_Point")) return true; psDesc.CullMode = CullMode::Normal; @@ -110,7 +110,7 @@ bool LightPass::setupResources() psDesc.BlendMode = BlendingMode::Add; psDesc.BlendMode.RenderTargetWriteMask = BlendingMode::ColorWrite::RGB; psDesc.VS = shader->GetVS("VS_Model"); - psDesc.CullMode = CullMode::Inverted; + psDesc.CullMode = CullMode::TwoSided; if (_psLightSpotInverted.Create(psDesc, shader, "PS_Spot")) return true; psDesc.CullMode = CullMode::Normal; @@ -128,7 +128,7 @@ bool LightPass::setupResources() psDesc.PS = shader->GetPS("PS_Sky"); if (_psLightSkyNormal->Init(psDesc)) return true; - psDesc.CullMode = CullMode::Inverted; + psDesc.CullMode = CullMode::TwoSided; if (_psLightSkyInverted->Init(psDesc)) return true; } @@ -183,7 +183,6 @@ void LightPass::SetupLights(RenderContext& renderContext, RenderContextBatch& re void LightPass::RenderLights(RenderContextBatch& renderContextBatch, GPUTextureView* lightBuffer) { - const float sphereModelScale = 3.0f; if (checkIfSkipPass()) return; PROFILE_GPU_CPU("Lights"); @@ -230,6 +229,7 @@ void LightPass::RenderLights(RenderContextBatch& renderContextBatch, GPUTextureV // Temporary data PerLight perLight; PerFrame perFrame; + auto& sphereMesh = _sphereModel->LODs.Get()[0].Meshes.Get()[0]; // Bind output GPUTexture* depthBuffer = renderContext.Buffers->DepthBuffer; @@ -266,19 +266,12 @@ void LightPass::RenderLights(RenderContextBatch& renderContextBatch, GPUTextureV { PROFILE_GPU_CPU_NAMED("Point Light"); auto& light = mainCache->PointLights[lightIndex]; - float lightRadius = light.Radius; - Float3 lightPosition = light.Position; bool useIES = light.IESTexture != nullptr; - // Get distance from view center to light center less radius (check if view is inside a sphere) - float distance = ViewToCenterLessRadius(view, lightPosition, lightRadius * sphereModelScale); - bool isViewInside = distance < 0; - // Calculate world view projection matrix for the light sphere - Matrix world, wvp, matrix; - Matrix::Scaling(lightRadius * sphereModelScale, wvp); - Matrix::Translation(lightPosition, matrix); - Matrix::Multiply(wvp, matrix, world); + Matrix world, wvp; + bool isViewInside; + RenderTools::ComputeSphereModelDrawMatrix(renderContext.View, light.Position, light.Radius, world, isViewInside); Matrix::Multiply(world, view.ViewProjection(), wvp); // Fullscreen shadow mask rendering @@ -306,7 +299,7 @@ void LightPass::RenderLights(RenderContextBatch& renderContextBatch, GPUTextureV context->BindCB(1, cb1); int32 permutationIndex = (disableSpecular ? 1 : 0) + (useIES ? 2 : 0); context->SetState((isViewInside ? _psLightPointInverted : _psLightPointNormal).Get(permutationIndex)); - _sphereModel->Render(context); + sphereMesh.Render(context); } context->UnBindCB(0); @@ -316,19 +309,12 @@ void LightPass::RenderLights(RenderContextBatch& renderContextBatch, GPUTextureV { PROFILE_GPU_CPU_NAMED("Spot Light"); auto& light = mainCache->SpotLights[lightIndex]; - float lightRadius = light.Radius; - Float3 lightPosition = light.Position; bool useIES = light.IESTexture != nullptr; - // Get distance from view center to light center less radius (check if view is inside a sphere) - float distance = ViewToCenterLessRadius(view, lightPosition, lightRadius * sphereModelScale); - bool isViewInside = distance < 0; - // Calculate world view projection matrix for the light sphere - Matrix world, wvp, matrix; - Matrix::Scaling(lightRadius * sphereModelScale, wvp); - Matrix::Translation(lightPosition, matrix); - Matrix::Multiply(wvp, matrix, world); + Matrix world, wvp; + bool isViewInside; + RenderTools::ComputeSphereModelDrawMatrix(renderContext.View, light.Position, light.Radius, world, isViewInside); Matrix::Multiply(world, view.ViewProjection(), wvp); // Fullscreen shadow mask rendering @@ -356,7 +342,7 @@ void LightPass::RenderLights(RenderContextBatch& renderContextBatch, GPUTextureV context->BindCB(1, cb1); int32 permutationIndex = (disableSpecular ? 1 : 0) + (useIES ? 2 : 0); context->SetState((isViewInside ? _psLightSpotInverted : _psLightSpotNormal).Get(permutationIndex)); - _sphereModel->Render(context); + sphereMesh.Render(context); } context->UnBindCB(0); @@ -395,21 +381,12 @@ void LightPass::RenderLights(RenderContextBatch& renderContextBatch, GPUTextureV for (int32 lightIndex = 0; lightIndex < mainCache->SkyLights.Count(); lightIndex++) { PROFILE_GPU_CPU_NAMED("Sky Light"); - - // Cache data auto& light = mainCache->SkyLights[lightIndex]; - float lightRadius = light.Radius; - Float3 lightPosition = light.Position; - - // Get distance from view center to light center less radius (check if view is inside a sphere) - float distance = ViewToCenterLessRadius(view, lightPosition, lightRadius * sphereModelScale); - bool isViewInside = distance < 0; // Calculate world view projection matrix for the light sphere - Matrix world, wvp, matrix; - Matrix::Scaling(lightRadius * sphereModelScale, wvp); - Matrix::Translation(lightPosition, matrix); - Matrix::Multiply(wvp, matrix, world); + Matrix world, wvp; + bool isViewInside; + RenderTools::ComputeSphereModelDrawMatrix(renderContext.View, light.Position, light.Radius, world, isViewInside); Matrix::Multiply(world, view.ViewProjection(), wvp); // Pack light properties buffer @@ -424,7 +401,7 @@ void LightPass::RenderLights(RenderContextBatch& renderContextBatch, GPUTextureV context->BindCB(0, cb0); context->BindCB(1, cb1); context->SetState(isViewInside ? _psLightSkyInverted : _psLightSkyNormal); - _sphereModel->Render(context); + sphereMesh.Render(context); } RenderTargetPool::Release(shadowMask); diff --git a/Source/Engine/Renderer/ReflectionsPass.cpp b/Source/Engine/Renderer/ReflectionsPass.cpp index 9a136d354..3a548f8ab 100644 --- a/Source/Engine/Renderer/ReflectionsPass.cpp +++ b/Source/Engine/Renderer/ReflectionsPass.cpp @@ -267,12 +267,10 @@ bool ReflectionsPass::Init() // Load assets _shader = Content::LoadAsyncInternal(TEXT("Shaders/Reflections")); - _sphereModel = Content::LoadAsyncInternal(TEXT("Engine/Models/SphereLowPoly")); + _sphereModel = Content::LoadAsyncInternal(TEXT("Engine/Models/Sphere")); _preIntegratedGF = Content::LoadAsyncInternal(PRE_INTEGRATED_GF_ASSET_NAME); if (_shader == nullptr || _sphereModel == nullptr || _preIntegratedGF == nullptr) - { return true; - } #if COMPILE_WITH_DEV_ENV _shader.Get()->OnReloading.Bind(this); #endif @@ -305,7 +303,7 @@ bool ReflectionsPass::setupResources() psDesc.PS = shader->GetPS("PS_EnvProbe"); if (_psProbeNormal->Init(psDesc)) return true; - psDesc.CullMode = CullMode::Inverted; + psDesc.CullMode = CullMode::TwoSided; if (_psProbeInverted->Init(psDesc)) return true; } @@ -399,21 +397,15 @@ void ReflectionsPass::Render(RenderContext& renderContext, GPUTextureView* light Sorting::QuickSort(renderContext.List->EnvironmentProbes.Get(), renderContext.List->EnvironmentProbes.Count(), &SortProbes); // Render all env probes + auto& sphereMesh = _sphereModel->LODs.Get()[0].Meshes.Get()[0]; for (int32 i = 0; i < probesCount; i++) { - // Cache data const RenderEnvironmentProbeData& probe = renderContext.List->EnvironmentProbes.Get()[i]; - // Get distance from view center to light center less radius (check if view is inside a sphere) - const float sphereModelScale = 2.0f; - float distance = ViewToCenterLessRadius(view, probe.Position, probe.Radius); - bool isViewInside = distance < 0; - // Calculate world view projection matrix for the light sphere - Matrix world, wvp, matrix; - Matrix::Scaling(probe.Radius * sphereModelScale, wvp); - Matrix::Translation(probe.Position, matrix); - Matrix::Multiply(wvp, matrix, world); + Matrix world, wvp; + bool isViewInside; + RenderTools::ComputeSphereModelDrawMatrix(renderContext.View, probe.Position, probe.Radius, world, isViewInside); Matrix::Multiply(world, view.ViewProjection(), wvp); // Pack probe properties buffer @@ -424,9 +416,8 @@ void ReflectionsPass::Render(RenderContext& renderContext, GPUTextureView* light context->UpdateCB(cb, &data); context->BindCB(0, cb); context->BindSR(4, probe.Texture); - context->SetState(isViewInside ? _psProbeInverted : _psProbeNormal); - _sphereModel->Render(context); + sphereMesh.Render(context); } context->UnBindSR(4); diff --git a/Source/Engine/Renderer/ShadowsPass.cpp b/Source/Engine/Renderer/ShadowsPass.cpp index aa304d43b..7da06041c 100644 --- a/Source/Engine/Renderer/ShadowsPass.cpp +++ b/Source/Engine/Renderer/ShadowsPass.cpp @@ -158,7 +158,7 @@ bool ShadowsPass::Init() // Load assets _shader = Content::LoadAsyncInternal(TEXT("Shaders/Shadows")); - _sphereModel = Content::LoadAsyncInternal(TEXT("Engine/Models/SphereLowPoly")); + _sphereModel = Content::LoadAsyncInternal(TEXT("Engine/Models/Sphere")); if (_shader == nullptr || _sphereModel == nullptr) return true; @@ -891,7 +891,6 @@ void ShadowsPass::RenderShadowMask(RenderContextBatch& renderContextBatch, Rende RenderContext& renderContext = renderContextBatch.GetMainContext(); const ShadowsCustomBuffer& shadows = *renderContext.Buffers->FindCustomBuffer(TEXT("Shadows")); ASSERT(shadows.LastFrameUsed == Engine::FrameCount); - const float sphereModelScale = 3.0f; auto& view = renderContext.View; auto shader = _shader->GetShader(); const bool isLocalLight = light.IsPointLight || light.IsSpotLight; @@ -922,14 +921,12 @@ void ShadowsPass::RenderShadowMask(RenderContextBatch& renderContextBatch, Rende if (isLocalLight) { // Calculate world view projection matrix for the light sphere - Matrix world, wvp, matrix; - Matrix::Scaling(((RenderLocalLightData&)light).Radius * sphereModelScale, wvp); - Matrix::Translation(light.Position, matrix); - Matrix::Multiply(wvp, matrix, world); + Matrix world, wvp; + bool isInside; + RenderTools::ComputeSphereModelDrawMatrix(renderContext.View, light.Position, ((RenderLocalLightData&)light).Radius, world, isInside); Matrix::Multiply(world, view.ViewProjection(), wvp); Matrix::Transpose(wvp, sperLight.WVP); } - // TODO: reimplement cascades blending for directional lights (but with dithering) // Render shadow in screen space GPUConstantBuffer* cb0 = shader->GetCB(0); @@ -942,12 +939,12 @@ void ShadowsPass::RenderShadowMask(RenderContextBatch& renderContextBatch, Rende if (light.IsPointLight) { context->SetState(_psShadowPoint.Get(permutationIndex)); - _sphereModel->Render(context); + _sphereModel->LODs.Get()[0].Meshes.Get()[0].Render(context); } else if (light.IsSpotLight) { context->SetState(_psShadowSpot.Get(permutationIndex)); - _sphereModel->Render(context); + _sphereModel->LODs.Get()[0].Meshes.Get()[0].Render(context); } else //if (light.IsDirectionalLight) { From 4c8528dcae507e43303a61475dda2c4930fde6b4 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 5 Apr 2024 12:48:34 +0200 Subject: [PATCH 018/292] Remove branch macro as it's just texture sample in lights shader --- Source/Shaders/Lights.shader | 3 --- 1 file changed, 3 deletions(-) diff --git a/Source/Shaders/Lights.shader b/Source/Shaders/Lights.shader index eb11dc8a8..363bdb9a1 100644 --- a/Source/Shaders/Lights.shader +++ b/Source/Shaders/Lights.shader @@ -60,7 +60,6 @@ void PS_Directional(Quad_VS2PS input, out float4 output : SV_Target0) // Sample shadow mask float4 shadowMask = 1; - BRANCH if (Light.ShadowsBufferAddress != 0) { shadowMask = SAMPLE_RT(Shadow, input.TexCoord); @@ -97,7 +96,6 @@ void PS_Point(Model_VS2PS input, out float4 output : SV_Target0) // Sample shadow mask float4 shadowMask = 1; - BRANCH if (Light.ShadowsBufferAddress != 0) { shadowMask = SAMPLE_RT(Shadow, uv); @@ -139,7 +137,6 @@ void PS_Spot(Model_VS2PS input, out float4 output : SV_Target0) // Sample shadow mask float4 shadowMask = 1; - BRANCH if (Light.ShadowsBufferAddress != 0) { shadowMask = SAMPLE_RT(Shadow, uv); From 708fba5136aa07a22bfd84a484a006dfcfe48a3f Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 8 Apr 2024 00:04:57 +0200 Subject: [PATCH 019/292] Add variable rate update for shadow maps atlas based on distance to light --- .../Engine/Level/Actors/DirectionalLight.cpp | 2 + Source/Engine/Level/Actors/Light.cpp | 4 + Source/Engine/Level/Actors/Light.h | 12 + Source/Engine/Level/Actors/PointLight.cpp | 2 + Source/Engine/Level/Actors/SpotLight.cpp | 2 + .../Graph/CPU/ParticleEmitterGraph.CPU.cpp | 4 +- Source/Engine/Renderer/RenderList.cpp | 2 +- Source/Engine/Renderer/RenderList.h | 3 + Source/Engine/Renderer/ShadowsPass.cpp | 432 ++++++++++++------ Source/Engine/Renderer/ShadowsPass.h | 12 +- Source/Shaders/ShadowsSampling.hlsl | 4 +- 11 files changed, 328 insertions(+), 151 deletions(-) diff --git a/Source/Engine/Level/Actors/DirectionalLight.cpp b/Source/Engine/Level/Actors/DirectionalLight.cpp index 3a2a44e45..3d198e042 100644 --- a/Source/Engine/Level/Actors/DirectionalLight.cpp +++ b/Source/Engine/Level/Actors/DirectionalLight.cpp @@ -38,6 +38,8 @@ void DirectionalLight::Draw(RenderContext& renderContext) data.VolumetricScatteringIntensity = VolumetricScatteringIntensity; data.IndirectLightingIntensity = IndirectLightingIntensity; data.CastVolumetricShadow = CastVolumetricShadow; + data.ShadowsUpdateRate = ShadowsUpdateRate; + data.ShadowsUpdateRateAtDistance = ShadowsUpdateRateAtDistance; data.ShadowsMode = ShadowsMode; data.CascadeCount = CascadeCount; data.Cascade1Spacing = Cascade1Spacing; diff --git a/Source/Engine/Level/Actors/Light.cpp b/Source/Engine/Level/Actors/Light.cpp index 44e54f9c0..6a0dfa726 100644 --- a/Source/Engine/Level/Actors/Light.cpp +++ b/Source/Engine/Level/Actors/Light.cpp @@ -100,6 +100,8 @@ void LightWithShadow::Serialize(SerializeStream& stream, const void* otherObj) SERIALIZE(ShadowsDepthBias); SERIALIZE(ShadowsNormalOffsetScale); SERIALIZE(ContactShadowsLength); + SERIALIZE(ShadowsUpdateRate); + SERIALIZE(ShadowsUpdateRateAtDistance); } void LightWithShadow::Deserialize(DeserializeStream& stream, ISerializeModifier* modifier) @@ -116,4 +118,6 @@ void LightWithShadow::Deserialize(DeserializeStream& stream, ISerializeModifier* DESERIALIZE(ShadowsDepthBias); DESERIALIZE(ShadowsNormalOffsetScale); DESERIALIZE(ContactShadowsLength); + DESERIALIZE(ShadowsUpdateRate); + DESERIALIZE(ShadowsUpdateRateAtDistance); } diff --git a/Source/Engine/Level/Actors/Light.h b/Source/Engine/Level/Actors/Light.h index ed12f6fbd..afe644c27 100644 --- a/Source/Engine/Level/Actors/Light.h +++ b/Source/Engine/Level/Actors/Light.h @@ -128,6 +128,18 @@ public: API_FIELD(Attributes="EditorOrder(99), EditorDisplay(\"Shadow\"), Limit(0.0f, 0.1f, 0.001f)") float ContactShadowsLength = 0.0f; + /// + /// Frequency of shadow updates. 1 - every frame, 0.5 - every second frame, 0 - on start or change. It's the inverse value of how many frames should happen in-between shadow map updates (eg. inverse of 0.5 is 2 thus shadow will update every 2nd frame). + /// + API_FIELD(Attributes="EditorOrder(100), EditorDisplay(\"Shadow\", \"Update Rate\"), Limit(0.0f, 1.0f)") + float ShadowsUpdateRate = 1.0f; + + /// + /// Frequency of shadow updates at the maximum distance from the view at which shadows are still rendered. This value is multiplied by ShadowsUpdateRate and allows scaling the update rate in-between the shadow range. For example, if light is near view, it will get normal shadow updates but will reduce this rate when far from view. See ShadowsUpdateRate to learn more. + /// + API_FIELD(Attributes="EditorOrder(105), EditorDisplay(\"Shadow\", \"Update Rate At Distance\"), Limit(0.0f, 1.0f)") + float ShadowsUpdateRateAtDistance = 0.5f; + /// /// Describes how a visual element casts shadows. /// diff --git a/Source/Engine/Level/Actors/PointLight.cpp b/Source/Engine/Level/Actors/PointLight.cpp index ff12f8dfd..626008a54 100644 --- a/Source/Engine/Level/Actors/PointLight.cpp +++ b/Source/Engine/Level/Actors/PointLight.cpp @@ -103,6 +103,8 @@ void PointLight::Draw(RenderContext& renderContext) data.ShadowsSharpness = ShadowsSharpness; data.VolumetricScatteringIntensity = VolumetricScatteringIntensity; data.CastVolumetricShadow = CastVolumetricShadow; + data.ShadowsUpdateRate = ShadowsUpdateRate; + data.ShadowsUpdateRateAtDistance = ShadowsUpdateRateAtDistance; data.ShadowsMode = ShadowsMode; data.Radius = radius; data.FallOffExponent = FallOffExponent; diff --git a/Source/Engine/Level/Actors/SpotLight.cpp b/Source/Engine/Level/Actors/SpotLight.cpp index ab9cde364..f2b6046ee 100644 --- a/Source/Engine/Level/Actors/SpotLight.cpp +++ b/Source/Engine/Level/Actors/SpotLight.cpp @@ -153,6 +153,8 @@ void SpotLight::Draw(RenderContext& renderContext) data.ShadowsSharpness = ShadowsSharpness; data.VolumetricScatteringIntensity = VolumetricScatteringIntensity; data.CastVolumetricShadow = CastVolumetricShadow; + data.ShadowsUpdateRate = ShadowsUpdateRate; + data.ShadowsUpdateRateAtDistance = ShadowsUpdateRateAtDistance; data.ShadowsMode = ShadowsMode; data.Radius = radius; data.FallOffExponent = FallOffExponent; diff --git a/Source/Engine/Particles/Graph/CPU/ParticleEmitterGraph.CPU.cpp b/Source/Engine/Particles/Graph/CPU/ParticleEmitterGraph.CPU.cpp index d7441c67d..f0cf76ce4 100644 --- a/Source/Engine/Particles/Graph/CPU/ParticleEmitterGraph.CPU.cpp +++ b/Source/Engine/Particles/Graph/CPU/ParticleEmitterGraph.CPU.cpp @@ -391,7 +391,9 @@ void ParticleEmitterGraphCPUExecutor::Draw(ParticleEmitter* emitter, ParticleEff RenderPointLightData lightData; lightData.MinRoughness = 0.04f; lightData.ShadowsDistance = 2000.0f; - lightData.ShadowsStrength = 1.0f; + lightData.ShadowsStrength = 0.0f; + lightData.ShadowsUpdateRate = 1.0f; + lightData.ShadowsUpdateRateAtDistance = 0.5f; lightData.Direction = Float3::Forward; lightData.ShadowsFadeDistance = 50.0f; lightData.ShadowsNormalOffsetScale = 10.0f; diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index 8f07180f3..4039c5565 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -80,7 +80,7 @@ bool RenderLocalLightData::CanRenderShadow(const RenderView& view) const const float fadeDistance = Math::Max(ShadowsFadeDistance, 0.1f); const float dstLightToView = Float3::Distance(Position, view.Position); const float fade = 1 - Math::Saturate((dstLightToView - Radius - ShadowsDistance + fadeDistance) / fadeDistance); - return fade > ZeroTolerance && RenderLightData::CanRenderShadow(view); + return fade > ZeroTolerance && Radius > 10 && RenderLightData::CanRenderShadow(view); } void RenderSpotLightData::SetShaderData(ShaderLightData& data, bool useShadow) const diff --git a/Source/Engine/Renderer/RenderList.h b/Source/Engine/Renderer/RenderList.h index 3a7ab373e..e84660ced 100644 --- a/Source/Engine/Renderer/RenderList.h +++ b/Source/Engine/Renderer/RenderList.h @@ -56,6 +56,9 @@ struct RenderLightData float ScreenSize; uint32 ShadowsBufferAddress; + float ShadowsUpdateRate; + float ShadowsUpdateRateAtDistance; + RenderLightData() { Platform::MemoryClear(this, sizeof(RenderLightData)); diff --git a/Source/Engine/Renderer/ShadowsPass.cpp b/Source/Engine/Renderer/ShadowsPass.cpp index 7da06041c..5f129d0be 100644 --- a/Source/Engine/Renderer/ShadowsPass.cpp +++ b/Source/Engine/Renderer/ShadowsPass.cpp @@ -18,7 +18,8 @@ #include "Engine/Renderer/Lightmaps.h" #endif -#define MaxTiles 6 +#define SHADOWS_MAX_TILES 6 +#define SHADOWS_MIN_RESOLUTION 16 #define NormalOffsetScaleTweak 100.0f #define LocalLightNearPlane 10.0f @@ -33,10 +34,10 @@ PACK_STRUCT(struct Data{ float ContactShadowsLength; }); -struct ShadowsAtlasTile : RectPack +struct ShadowsAtlasRectTile : RectPack { - ShadowsAtlasTile(uint16 x, uint16 y, uint16 width, uint16 height) - : RectPack(x, y, width, height) + ShadowsAtlasRectTile(uint16 x, uint16 y, uint16 width, uint16 height) + : RectPack(x, y, width, height) { } @@ -56,26 +57,25 @@ uint16 QuantizeResolution(float input) return output; } -struct ShadowAtlasLight +// State for shadow projection +struct ShadowAtlasLightTile { - uint64 LastFrameUsed; - int32 ContextIndex; - int32 ContextCount; - uint16 Resolution; - uint16 TilesNeeded; - float Sharpness, Fade, NormalOffsetScale, Bias, FadeDistance; - Float4 CascadeSplits; - ShadowsAtlasTile* Tiles[MaxTiles]; - Matrix WorldToShadow[MaxTiles]; + ShadowsAtlasRectTile* RectTile; + Matrix WorldToShadow; + float FramesToUpdate; // Amount of frames (with fraction) until the next shadow update can happen + bool SkipUpdate; + Viewport CachedViewport; // The viewport used the last time to render shadow to the atlas - ShadowAtlasLight() + void Free(ShadowsCustomBuffer* buffer) { - Platform::MemoryClear(this, sizeof(ShadowAtlasLight)); + if (RectTile) + { + RectTile->Free(buffer); + RectTile = nullptr; + } } - POD_COPYABLE(ShadowAtlasLight); - - void SetWorldToShadow(int32 index, const Matrix& shadowViewProjection) + void SetWorldToShadow(const Matrix& shadowViewProjection) { // Transform Clip Space [-1,+1]^2 to UV Space [0,1]^2 (saves MAD instruction in shader) const Matrix ClipToUV( @@ -85,7 +85,120 @@ struct ShadowAtlasLight 0.5f, 0.5f, 0.0f, 1.0f); Matrix m; Matrix::Multiply(shadowViewProjection, ClipToUV, m); - Matrix::Transpose(m, WorldToShadow[index]); + Matrix::Transpose(m, WorldToShadow); + } +}; + +// State for shadow cache sed to invalidate any prerendered shadow depths +struct ShadowAtlasLightCache +{ + bool Valid; + float ShadowsUpdateRate; + float ShadowsUpdateRateAtDistance; + Float3 Position; + float Radius; + Float3 Direction; + float Distance; + Float4 CascadeSplits; + + void Set(const RenderView& view, const RenderLightData& light, const Float4& cascadeSplits = Float4::Zero) + { + Valid = true; + Distance = light.ShadowsDistance; + ShadowsUpdateRate = light.ShadowsUpdateRate; + ShadowsUpdateRateAtDistance = light.ShadowsUpdateRateAtDistance; + if (light.IsDirectionalLight) + { + // Sun + Position = view.Position; + Direction = light.Direction; + CascadeSplits = cascadeSplits; + } + else + { + // Local light + const auto& localLight = (const RenderLocalLightData&)light; + Position = light.Position; + Radius = localLight.Radius; + } + } +}; + +// State for light's shadows rendering +struct ShadowAtlasLight +{ + uint64 LastFrameUsed; + int32 ContextIndex; + int32 ContextCount; + uint16 Resolution; + uint8 TilesNeeded; + uint8 TilesCount; + float Sharpness, Fade, NormalOffsetScale, Bias, FadeDistance, Distance; + Float4 CascadeSplits; + ShadowAtlasLightTile Tiles[SHADOWS_MAX_TILES]; + ShadowAtlasLightCache Cache; + + ShadowAtlasLight() + { + Platform::MemoryClear(this, sizeof(ShadowAtlasLight)); + } + + POD_COPYABLE(ShadowAtlasLight); + + float CalculateUpdateRateInv(const RenderLightData& light, float distanceFromView, bool& freezeUpdate) const + { + const float shadowsUpdateRate = light.ShadowsUpdateRate; + const float shadowsUpdateRateAtDistance = shadowsUpdateRate * light.ShadowsUpdateRateAtDistance; + float updateRate = Math::Lerp(shadowsUpdateRate, shadowsUpdateRateAtDistance, Math::Saturate(distanceFromView / Distance)); + // TODO: add global shadows update rate scale to be adjusted per-platform + freezeUpdate = updateRate <= ZeroTolerance; + if (freezeUpdate) + return 0.0f; + return 1.0f / updateRate; + } + + void ValidateCache(const RenderView& view, const RenderLightData& light) + { + if (!Cache.Valid) + return; + if (!Math::NearEqual(Cache.Distance, light.ShadowsDistance) || + !Math::NearEqual(Cache.ShadowsUpdateRate, light.ShadowsUpdateRate) || + !Math::NearEqual(Cache.ShadowsUpdateRateAtDistance, light.ShadowsUpdateRateAtDistance)) + { + // Invalidate + Cache.Valid = false; + } + if (light.IsDirectionalLight) + { + // Sun + if (Float3::Dot(Cache.Direction, light.Direction) < 0.999999f || + !Float3::NearEqual(Cache.Position, view.Position, 1.0f) || + !Float4::NearEqual(Cache.CascadeSplits, CascadeSplits)) + { + // Invalidate + Cache.Valid = false; + } + } + else + { + // Local light + const auto& localLight = (const RenderLocalLightData&)light; + if (!Float3::NearEqual(Cache.Position, light.Position, 1.0f) || + !Math::NearEqual(Cache.Radius, localLight.Radius)) + { + // Invalidate + Cache.Valid = false; + } + } + for (int32 i = 0; i < TilesCount && Cache.Valid; i++) + { + auto& tile = Tiles[i]; + if (tile.CachedViewport != Viewport(tile.RectTile->X, tile.RectTile->Y, tile.RectTile->Width, tile.RectTile->Height)) + { + // Invalidate + Cache.Valid = false; + } + } } }; @@ -99,7 +212,7 @@ public: GPUTexture* ShadowMapAtlas = nullptr; DynamicTypedBuffer ShadowsBuffer; GPUBufferView* ShadowsBufferView = nullptr; - ShadowsAtlasTile* AtlasTiles = nullptr; // TODO: optimize with a single allocation for atlas tiles + ShadowsAtlasRectTile* AtlasTiles = nullptr; // TODO: optimize with a single allocation for atlas tiles Dictionary Lights; ShadowsCustomBuffer() @@ -117,6 +230,7 @@ public: { auto& atlasLight = it->Value; Platform::MemoryClear(atlasLight.Tiles, sizeof(atlasLight.Tiles)); + Platform::MemoryClear(&atlasLight.Cache, sizeof(atlasLight.Cache)); } } @@ -134,12 +248,12 @@ public: } }; -void ShadowsAtlasTile::OnInsert(ShadowsCustomBuffer* buffer) +void ShadowsAtlasRectTile::OnInsert(ShadowsCustomBuffer* buffer) { buffer->AtlasPixelsUsed += (int32)Width * (int32)Height; } -void ShadowsAtlasTile::OnFree(ShadowsCustomBuffer* buffer) +void ShadowsAtlasRectTile::OnFree(ShadowsCustomBuffer* buffer) { buffer->AtlasPixelsUsed -= (int32)Width * (int32)Height; } @@ -262,6 +376,7 @@ void ShadowsPass::SetupRenderContext(RenderContext& renderContext, RenderContext shadowContext.List = RenderList::GetFromPool(); shadowContext.Buffers = renderContext.Buffers; shadowContext.Task = renderContext.Task; + shadowContext.List->Clear(); } void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderLightData& light, ShadowAtlasLight& atlasLight) @@ -272,17 +387,47 @@ void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& r atlasLight.NormalOffsetScale = light.ShadowsNormalOffsetScale * NormalOffsetScaleTweak * (1.0f / (float)atlasLight.Resolution); atlasLight.Bias = light.ShadowsDepthBias; atlasLight.FadeDistance = Math::Max(light.ShadowsFadeDistance, 0.1f); + atlasLight.Distance = Math::Min(renderContext.View.Far, light.ShadowsDistance); } -void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderLocalLightData& light, ShadowAtlasLight& atlasLight) +bool ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderLocalLightData& light, ShadowAtlasLight& atlasLight) { SetupLight(renderContext, renderContextBatch, (RenderLightData&)light, atlasLight); // Fade shadow on distance const float fadeDistance = Math::Max(light.ShadowsFadeDistance, 0.1f); - const float dstLightToView = Float3::Distance(light.Position, renderContext.View.Position); - const float fade = 1 - Math::Saturate((dstLightToView - light.Radius - light.ShadowsDistance + fadeDistance) / fadeDistance); + const float dstLightToView = Float3::Distance(light.Position, renderContext.View.Position) - light.Radius; + const float fade = 1 - Math::Saturate((dstLightToView - atlasLight.Distance + fadeDistance) / fadeDistance); atlasLight.Fade *= fade; + + // Update cached state (invalidate it if the light changed) + atlasLight.ValidateCache(renderContext.View, light); + + // Calculate update rate based on the distance to the view + bool freezeUpdate; + const float updateRateInv = atlasLight.CalculateUpdateRateInv(light, dstLightToView, freezeUpdate); + float& framesToUpdate = atlasLight.Tiles[0].FramesToUpdate; // Use the first tile for all local light projections to be in sync + if ((framesToUpdate > 0.0f || freezeUpdate) && atlasLight.Cache.Valid) + { + // Light state matches the cached state and the update rate allows us to reuse the cached shadow map so skip update + if (!freezeUpdate) + framesToUpdate -= 1.0f; + for (auto& tile : atlasLight.Tiles) + tile.SkipUpdate = true; + return true; + } + framesToUpdate += updateRateInv - 1.0f; + + // Cache the current state + atlasLight.Cache.Set(renderContext.View, light); + for (int32 i = 0; i < atlasLight.TilesCount; i++) + { + auto& tile = atlasLight.Tiles[i]; + tile.SkipUpdate = false; + tile.CachedViewport = Viewport(tile.RectTile->X, tile.RectTile->Y, tile.RectTile->Width, tile.RectTile->Height); + } + + return false; } void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderDirectionalLightData& light, ShadowAtlasLight& atlasLight) @@ -290,29 +435,16 @@ void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& r SetupLight(renderContext, renderContextBatch, (RenderLightData&)light, atlasLight); const RenderView& view = renderContext.View; - auto mainCache = renderContext.List; - Float3 lightDirection = light.Direction; - float shadowsDistance = Math::Min(view.Far, light.ShadowsDistance); - int32 csmCount = Math::Clamp(light.CascadeCount, 0, MAX_CSM_CASCADES); + const int32 csmCount = atlasLight.TilesCount; const auto shadowMapsSize = (float)atlasLight.Resolution; - // Views with orthographic cameras cannot use cascades, we force it to 1 shadow map here - if (view.Projection.M44 == 1.0f) - csmCount = 1; - // Calculate cascade splits - auto cameraNear = view.Near; - auto cameraFar = view.Far; - auto cameraRange = cameraFar - cameraNear; - float minDistance; - float maxDistance; + const float minDistance = view.Near; + const float maxDistance = view.Near + atlasLight.Distance; + const float viewRange = view.Far - view.Near; float cascadeSplits[MAX_CSM_CASCADES]; { - minDistance = cameraNear; - maxDistance = cameraNear + shadowsDistance; - PartitionMode partitionMode = light.PartitionMode; - float pssmFactor = 0.5f; float splitDistance0 = light.Cascade1Spacing; float splitDistance1 = Math::Max(splitDistance0, light.Cascade2Spacing); float splitDistance2 = Math::Max(splitDistance1, light.Cascade3Spacing); @@ -346,8 +478,8 @@ void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& r } else if (partitionMode == PartitionMode::Logarithmic || partitionMode == PartitionMode::PSSM) { + const float pssmFactor = 0.5f; const float lambda = partitionMode == PartitionMode::PSSM ? pssmFactor : 1.0f; - const auto range = maxDistance - minDistance; const auto ratio = maxDistance / minDistance; const auto logRatio = Math::Clamp(1.0f - lambda, 0.0f, 1.0f); @@ -355,7 +487,7 @@ void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& r { // Compute cascade split (between znear and zfar) const float distribute = static_cast(cascadeLevel + 1) / csmCount; - float logZ = static_cast(minDistance * powf(ratio, distribute)); + float logZ = minDistance * Math::Pow(ratio, distribute); float uniformZ = minDistance + range * distribute; cascadeSplits[cascadeLevel] = Math::Lerp(uniformZ, logZ, logRatio); } @@ -363,9 +495,44 @@ void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& r // Convert distance splits to ratios cascade in the range [0, 1] for (int32 i = 0; i < MAX_CSM_CASCADES; i++) - cascadeSplits[i] = (cascadeSplits[i] - cameraNear) / cameraRange; + cascadeSplits[i] = (cascadeSplits[i] - view.Near) / viewRange; } - atlasLight.CascadeSplits = view.Near + Float4(cascadeSplits) * cameraRange; + atlasLight.CascadeSplits = view.Near + Float4(cascadeSplits) * viewRange; + + // Update cached state (invalidate it if the light changed) + atlasLight.ValidateCache(renderContext.View, light); + + // Update cascades to check which should be updated this frame + atlasLight.ContextIndex = renderContextBatch.Contexts.Count(); + atlasLight.ContextCount = 0; + for (int32 cascadeIndex = 0; cascadeIndex < csmCount; cascadeIndex++) + { + const float dstToCascade = atlasLight.CascadeSplits.Raw[cascadeIndex]; + bool freezeUpdate; + const float updateRateInv = atlasLight.CalculateUpdateRateInv(light, dstToCascade, freezeUpdate); + auto& tile = atlasLight.Tiles[cascadeIndex]; + if ((tile.FramesToUpdate > 0.0f || freezeUpdate) && atlasLight.Cache.Valid) + { + // Light state matches the cached state and the update rate allows us to reuse the cached shadow map so skip update + if (!freezeUpdate) + tile.FramesToUpdate -= 1.0f; + tile.SkipUpdate = true; + continue; + } + tile.FramesToUpdate += updateRateInv - 1.0f; + + // Cache the current state + tile.SkipUpdate = false; + tile.CachedViewport = Viewport(tile.RectTile->X, tile.RectTile->Y, tile.RectTile->Width, tile.RectTile->Height); + atlasLight.ContextCount++; + } + + // Init shadow data + atlasLight.ContextIndex = renderContextBatch.Contexts.Count(); + if (atlasLight.ContextCount == 0) + return; + renderContextBatch.Contexts.AddDefault(atlasLight.ContextCount); + atlasLight.Cache.Set(renderContext.View, light, atlasLight.CascadeSplits); // Select best Up vector Float3 side = Float3::UnitX; @@ -374,37 +541,34 @@ void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& r for (int32 i = 0; i < ARRAY_COUNT(vectorUps); i++) { const Float3 vectorUp = vectorUps[i]; - if (Math::Abs(Float3::Dot(lightDirection, vectorUp)) < (1.0f - 0.0001f)) + if (Math::Abs(Float3::Dot(light.Direction, vectorUp)) < (1.0f - 0.0001f)) { - side = Float3::Normalize(Float3::Cross(vectorUp, lightDirection)); - upDirection = Float3::Normalize(Float3::Cross(lightDirection, side)); + side = Float3::Normalize(Float3::Cross(vectorUp, light.Direction)); + upDirection = Float3::Normalize(Float3::Cross(light.Direction, side)); break; } } - // Temporary data - Float3 frustumCorners[8]; - Matrix shadowView, shadowProjection, shadowVP; - - // Init shadow data - atlasLight.ContextIndex = renderContextBatch.Contexts.Count(); - atlasLight.ContextCount = csmCount; - renderContextBatch.Contexts.AddDefault(atlasLight.ContextCount); - // Create the different view and projection matrices for each split float splitMinRatio = 0; - float splitMaxRatio = (minDistance - cameraNear) / cameraRange; + float splitMaxRatio = (minDistance - view.Near) / viewRange; + int32 contextIndex = 0; for (int32 cascadeIndex = 0; cascadeIndex < csmCount; cascadeIndex++) { - // Cascade splits const auto oldSplitMinRatio = splitMinRatio; splitMinRatio = splitMaxRatio; splitMaxRatio = cascadeSplits[cascadeIndex]; + auto& tile = atlasLight.Tiles[cascadeIndex]; + if (tile.SkipUpdate) + continue; + // Calculate cascade split frustum corners in view space + Float3 frustumCorners[8]; for (int32 j = 0; j < 4; j++) { float overlap = 0.1f * (splitMinRatio - oldSplitMinRatio); // CSM blending overlap + const RenderList* mainCache = renderContext.List; const auto frustumRangeVS = mainCache->FrustumCornersVs[j + 4] - mainCache->FrustumCornersVs[j]; frustumCorners[j] = mainCache->FrustumCornersVs[j] + frustumRangeVS * (splitMinRatio - overlap); frustumCorners[j + 4] = mainCache->FrustumCornersVs[j] + frustumRangeVS * splitMaxRatio; @@ -438,8 +602,8 @@ void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& r float shadowMapHalfSize = shadowMapsSize * 0.5f; float x = Math::Ceil(Float3::Dot(target, upDirection) * shadowMapHalfSize / boundingVSRadius) * boundingVSRadius / shadowMapHalfSize; float y = Math::Ceil(Float3::Dot(target, side) * shadowMapHalfSize / boundingVSRadius) * boundingVSRadius / shadowMapHalfSize; - float z = Float3::Dot(target, lightDirection); - target = upDirection * x + side * y + lightDirection * z; + float z = Float3::Dot(target, light.Direction); + target = upDirection * x + side * y + light.Direction * z; } } @@ -447,7 +611,8 @@ void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& r const auto farClip = cascadeMaxBoundLS.Z - cascadeMinBoundLS.Z; // Create shadow view matrix - Matrix::LookAt(target - lightDirection * cascadeMaxBoundLS.Z, target, upDirection, shadowView); + Matrix shadowView, shadowProjection, shadowVP; + Matrix::LookAt(target - light.Direction * cascadeMaxBoundLS.Z, target, upDirection, shadowView); // Create viewport for culling with extended near/far planes due to culling issues Matrix cullingVP; @@ -474,14 +639,13 @@ void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& r Matrix::Multiply(shadowView, shadowProjection, shadowVP); } - atlasLight.SetWorldToShadow(cascadeIndex, shadowVP); + tile.SetWorldToShadow(shadowVP); // Setup context for cascade - auto& shadowContext = renderContextBatch.Contexts[atlasLight.ContextIndex + cascadeIndex]; + auto& shadowContext = renderContextBatch.Contexts[atlasLight.ContextIndex + contextIndex++]; SetupRenderContext(renderContext, shadowContext); - shadowContext.List->Clear(); - shadowContext.View.Position = -lightDirection * shadowsDistance + view.Position; - shadowContext.View.Direction = lightDirection; + shadowContext.View.Position = light.Direction * -atlasLight.Distance + view.Position; + shadowContext.View.Direction = light.Direction; shadowContext.View.SetUp(shadowView, shadowProjection); shadowContext.View.CullingFrustum.SetMatrix(cullingVP); shadowContext.View.PrepareCache(shadowContext, shadowMapsSize, shadowMapsSize, Float2::Zero, &view); @@ -490,54 +654,40 @@ void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& r void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderPointLightData& light, ShadowAtlasLight& atlasLight) { - SetupLight(renderContext, renderContextBatch, (RenderLocalLightData&)light, atlasLight); + if (SetupLight(renderContext, renderContextBatch, (RenderLocalLightData&)light, atlasLight)) + return; - // Init shadow data + // Render depth to all 6 faces of the cube map atlasLight.ContextIndex = renderContextBatch.Contexts.Count(); atlasLight.ContextCount = 6; renderContextBatch.Contexts.AddDefault(atlasLight.ContextCount); - - const auto& view = renderContext.View; - const auto shadowMapsSize = (float)atlasLight.Resolution; - - // Fade shadow on distance - const float fadeDistance = Math::Max(light.ShadowsFadeDistance, 0.1f); - const float dstLightToView = Float3::Distance(light.Position, view.Position); - const float fade = 1 - Math::Saturate((dstLightToView - light.Radius - light.ShadowsDistance + fadeDistance) / fadeDistance); - atlasLight.Fade *= fade; - - // Render depth to all 6 faces of the cube map for (int32 faceIndex = 0; faceIndex < 6; faceIndex++) { auto& shadowContext = renderContextBatch.Contexts[atlasLight.ContextIndex + faceIndex]; SetupRenderContext(renderContext, shadowContext); - shadowContext.List->Clear(); shadowContext.View.SetUpCube(LocalLightNearPlane, light.Radius, light.Position); shadowContext.View.SetFace(faceIndex); - shadowContext.View.PrepareCache(shadowContext, shadowMapsSize, shadowMapsSize, Float2::Zero, &view); - atlasLight.SetWorldToShadow(faceIndex, shadowContext.View.ViewProjection()); + const auto shadowMapsSize = (float)atlasLight.Resolution; + shadowContext.View.PrepareCache(shadowContext, shadowMapsSize, shadowMapsSize, Float2::Zero, &renderContext.View); + atlasLight.Tiles[faceIndex].SetWorldToShadow(shadowContext.View.ViewProjection()); } } void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderSpotLightData& light, ShadowAtlasLight& atlasLight) { - SetupLight(renderContext, renderContextBatch, (RenderLocalLightData&)light, atlasLight); + if (SetupLight(renderContext, renderContextBatch, (RenderLocalLightData&)light, atlasLight)) + return; - // Init shadow data + // Render depth to a single projection atlasLight.ContextIndex = renderContextBatch.Contexts.Count(); atlasLight.ContextCount = 1; renderContextBatch.Contexts.AddDefault(atlasLight.ContextCount); - - const auto& view = renderContext.View; - const auto shadowMapsSize = (float)atlasLight.Resolution; - - // Render depth to a single projection auto& shadowContext = renderContextBatch.Contexts[atlasLight.ContextIndex]; SetupRenderContext(renderContext, shadowContext); - shadowContext.List->Clear(); shadowContext.View.SetProjector(LocalLightNearPlane, light.Radius, light.Position, light.Direction, light.UpVector, light.OuterConeAngle * 2.0f); - shadowContext.View.PrepareCache(shadowContext, shadowMapsSize, shadowMapsSize, Float2::Zero, &view); - atlasLight.SetWorldToShadow(0, shadowContext.View.ViewProjection()); + const auto shadowMapsSize = (float)atlasLight.Resolution; + shadowContext.View.PrepareCache(shadowContext, shadowMapsSize, shadowMapsSize, Float2::Zero, &renderContext.View); + atlasLight.Tiles[0].SetWorldToShadow(shadowContext.View.ViewProjection()); } void ShadowsPass::Dispose() @@ -624,7 +774,7 @@ void ShadowsPass::SetupShadows(RenderContext& renderContext, RenderContextBatch& shadows.ViewOrigin = renderContext.View.Origin; } if (!shadows.AtlasTiles) - shadows.AtlasTiles = New(0, 0, atlasResolution, atlasResolution); + shadows.AtlasTiles = New(0, 0, atlasResolution, atlasResolution); // Update/add lights for (const RenderLightData* light : shadowedLights) @@ -637,12 +787,17 @@ void ShadowsPass::SetupShadows(RenderContext& renderContext, RenderContextBatch& atlasLight.Resolution = QuantizeResolution(lightResolutionFloat); // Cull too small lights - constexpr uint16 MinResolution = 16; - if (atlasLight.Resolution < MinResolution) + if (atlasLight.Resolution < SHADOWS_MIN_RESOLUTION) continue; if (light->IsDirectionalLight) - atlasLight.TilesNeeded = Math::Clamp(((const RenderDirectionalLightData*)light)->CascadeCount, 0, MAX_CSM_CASCADES); + { + atlasLight.TilesNeeded = Math::Clamp(((const RenderDirectionalLightData*)light)->CascadeCount, 1, MAX_CSM_CASCADES); + + // Views with orthographic cameras cannot use cascades, we force it to 1 shadow map here + if (renderContext.View.IsOrthographicProjection()) + atlasLight.TilesNeeded = 1; + } else if (light->IsPointLight) atlasLight.TilesNeeded = 6; else @@ -655,11 +810,8 @@ void ShadowsPass::SetupShadows(RenderContext& renderContext, RenderContextBatch& { if (it->Value.LastFrameUsed != currentFrame) { - for (auto& tile : it->Value.Tiles) - { - if (tile) - tile->Free(&shadows); - } + for (ShadowAtlasLightTile& tile : it->Value.Tiles) + tile.Free(&shadows); shadows.Lights.Remove(it); } } @@ -696,50 +848,41 @@ RETRY_ATLAS_SETUP: } // Macro checks if light has proper amount of tiles already assigned and the resolution is matching -#define IS_LIGHT_TILE_REUSABLE (atlasLight.ContextCount == atlasLight.TilesNeeded && atlasLight.Tiles[0] && atlasLight.Tiles[0]->Width == atlasLight.Resolution) +#define IS_LIGHT_TILE_REUSABLE (atlasLight.TilesCount == atlasLight.TilesNeeded && atlasLight.Tiles[0].RectTile && atlasLight.Tiles[0].RectTile->Width == atlasLight.Resolution) // Remove incorrect tiles before allocating new ones for (RenderLightData* light : shadowedLights) { - auto& atlasLight = shadows.Lights[light->ID]; + ShadowAtlasLight& atlasLight = shadows.Lights[light->ID]; if (IS_LIGHT_TILE_REUSABLE) continue; // Remove existing tiles - for (auto& tile : atlasLight.Tiles) - { - if (tile) - { - tile->Free(&shadows); - tile = nullptr; - } - } + for (ShadowAtlasLightTile& tile : atlasLight.Tiles) + tile.Free(&shadows); } // Insert tiles into the atlas (already sorted to favor the first ones) for (RenderLightData* light : shadowedLights) { auto& atlasLight = shadows.Lights[light->ID]; - if (IS_LIGHT_TILE_REUSABLE || atlasLight.Resolution < 16) + if (IS_LIGHT_TILE_REUSABLE || atlasLight.Resolution < SHADOWS_MIN_RESOLUTION) continue; // Try to insert tiles bool failedToInsert = false; for (int32 tileIndex = 0; tileIndex < atlasLight.TilesNeeded; tileIndex++) { - auto tile = shadows.AtlasTiles->Insert(atlasLight.Resolution, atlasLight.Resolution, 0, &shadows); - if (!tile) + auto rectTile = shadows.AtlasTiles->Insert(atlasLight.Resolution, atlasLight.Resolution, 0, &shadows); + if (!rectTile) { // Free any previous tiles that were added for (int32 i = 0; i < tileIndex; i++) - { - atlasLight.Tiles[i]->Free(&shadows); - atlasLight.Tiles[i] = nullptr; - } + atlasLight.Tiles[i].Free(&shadows); failedToInsert = true; break; } - atlasLight.Tiles[tileIndex] = tile; + atlasLight.Tiles[tileIndex].RectTile = rectTile; } if (failedToInsert) { @@ -757,7 +900,7 @@ RETRY_ATLAS_SETUP: // Rebuild atlas shadows.ClearTiles(); - shadows.AtlasTiles = New(0, 0, atlasResolution, atlasResolution); + shadows.AtlasTiles = New(0, 0, atlasResolution, atlasResolution); goto RETRY_ATLAS_SETUP; } } @@ -766,9 +909,14 @@ RETRY_ATLAS_SETUP: for (RenderLightData* light : shadowedLights) { auto& atlasLight = shadows.Lights[light->ID]; - if (atlasLight.Tiles[0] && atlasLight.Tiles[0]->Width == atlasLight.Resolution) + if (atlasLight.Tiles[0].RectTile && atlasLight.Tiles[0].RectTile->Width == atlasLight.Resolution) { + // Invalidate cache when whole atlas will be cleared + if (shadows.ClearShadowMapAtlas) + atlasLight.Cache.Valid = false; + light->HasShadow = true; + atlasLight.TilesCount = atlasLight.TilesNeeded; if (light->IsPointLight) SetupLight(renderContext, renderContextBatch, *(RenderPointLightData*)light, atlasLight); else if (light->IsSpotLight) @@ -787,7 +935,7 @@ RETRY_ATLAS_SETUP: for (RenderLightData* light : shadowedLights) { auto& atlasLight = shadows.Lights[light->ID]; - if (atlasLight.Tiles[0] == nullptr) + if (atlasLight.Tiles[0].RectTile == nullptr) { light->ShadowsBufferAddress = 0; // Clear to indicate no shadow continue; @@ -797,26 +945,24 @@ RETRY_ATLAS_SETUP: light->ShadowsBufferAddress = shadows.ShadowsBuffer.Data.Count() / sizeof(Float4); // Write shadow data (this must match HLSL) - const int32 tilesCount = atlasLight.ContextCount; { // Shadow info auto* packed = shadows.ShadowsBuffer.WriteReserve(2); - Color32 packed0x((byte)(atlasLight.Sharpness * (255.0f / 10.0f)), (byte)(atlasLight.Fade * 255.0f), tilesCount, 0); + Color32 packed0x((byte)(atlasLight.Sharpness * (255.0f / 10.0f)), (byte)(atlasLight.Fade * 255.0f), (byte)atlasLight.TilesCount, 0); packed[0] = Float4(*(const float*)&packed0x, atlasLight.FadeDistance, atlasLight.NormalOffsetScale, atlasLight.Bias); packed[1] = atlasLight.CascadeSplits; } - for (int32 tileIndex = 0; tileIndex < tilesCount; tileIndex++) + for (int32 tileIndex = 0; tileIndex < atlasLight.TilesCount; tileIndex++) { // Shadow projection info - const ShadowsAtlasTile* tile = atlasLight.Tiles[tileIndex]; - ASSERT(tile); - const Matrix& worldToShadow = atlasLight.WorldToShadow[tileIndex]; + const ShadowAtlasLightTile& tile = atlasLight.Tiles[tileIndex]; + ASSERT(tile.RectTile); auto* packed = shadows.ShadowsBuffer.WriteReserve(5); - packed[0] = Float4(tile->Width, tile->Height, tile->X, tile->Y) * atlasResolutionInv; // UV to AtlasUV via a single MAD instruction - packed[1] = worldToShadow.GetColumn1(); - packed[2] = worldToShadow.GetColumn2(); - packed[3] = worldToShadow.GetColumn3(); - packed[4] = worldToShadow.GetColumn4(); + packed[0] = Float4(tile.RectTile->Width, tile.RectTile->Height, tile.RectTile->X, tile.RectTile->Y) * atlasResolutionInv; // UV to AtlasUV via a single MAD instruction + packed[1] = tile.WorldToShadow.GetColumn1(); + packed[2] = tile.WorldToShadow.GetColumn2(); + packed[3] = tile.WorldToShadow.GetColumn3(); + packed[4] = tile.WorldToShadow.GetColumn4(); } } GPUContext* context = GPUDevice::Instance->GetMainContext(); @@ -852,14 +998,18 @@ void ShadowsPass::RenderShadowMaps(RenderContextBatch& renderContextBatch) for (auto& e : shadows.Lights) { const ShadowAtlasLight& atlasLight = e.Value; - for (int32 tileIndex = 0; tileIndex < atlasLight.ContextCount; tileIndex++) + int32 contextIndex = 0; + for (int32 tileIndex = 0; tileIndex < atlasLight.TilesCount; tileIndex++) { - const ShadowsAtlasTile* tile = atlasLight.Tiles[tileIndex]; - if (!tile) + const ShadowAtlasLightTile& tile = atlasLight.Tiles[tileIndex]; + if (!tile.RectTile) break; + if (tile.SkipUpdate) + continue; // Set viewport for tile - context->SetViewportAndScissors(Viewport(tile->X, tile->Y, tile->Width, tile->Height)); + ASSERT_LOW_LAYER(tile.CachedViewport == Viewport(tile.RectTile->X, tile.RectTile->Y, tile.RectTile->Width, tile.RectTile->Height)); + context->SetViewportAndScissors(tile.CachedViewport); if (!shadows.ClearShadowMapAtlas) { @@ -870,7 +1020,7 @@ void ShadowsPass::RenderShadowMaps(RenderContextBatch& renderContextBatch) } // Draw objects depth - auto& shadowContext = renderContextBatch.Contexts[atlasLight.ContextIndex + tileIndex]; + auto& shadowContext = renderContextBatch.Contexts[atlasLight.ContextIndex + contextIndex++]; shadowContext.List->ExecuteDrawCalls(shadowContext, DrawCallsListType::Depth); shadowContext.List->ExecuteDrawCalls(shadowContext, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls, nullptr); } diff --git a/Source/Engine/Renderer/ShadowsPass.h b/Source/Engine/Renderer/ShadowsPass.h index 5f8558abd..c22a17881 100644 --- a/Source/Engine/Renderer/ShadowsPass.h +++ b/Source/Engine/Renderer/ShadowsPass.h @@ -52,12 +52,12 @@ public: static void GetShadowAtlas(const RenderBuffers* renderBuffers, GPUTexture*& shadowMapAtlas, GPUBufferView*& shadowsBuffer); private: - void SetupRenderContext(RenderContext& renderContext, RenderContext& shadowContext); - void SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderLightData& light, struct ShadowAtlasLight& atlasLight); - void SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderLocalLightData& light, ShadowAtlasLight& atlasLight); - void SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderDirectionalLightData& light, ShadowAtlasLight& atlasLight); - void SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderPointLightData& light, ShadowAtlasLight& atlasLight); - void SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderSpotLightData& light, ShadowAtlasLight& atlasLight); + static void SetupRenderContext(RenderContext& renderContext, RenderContext& shadowContext); + static void SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderLightData& light, struct ShadowAtlasLight& atlasLight); + static bool SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderLocalLightData& light, ShadowAtlasLight& atlasLight); + static void SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderDirectionalLightData& light, ShadowAtlasLight& atlasLight); + static void SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderPointLightData& light, ShadowAtlasLight& atlasLight); + static void SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderSpotLightData& light, ShadowAtlasLight& atlasLight); #if COMPILE_WITH_DEV_ENV void OnShaderReloading(Asset* obj) diff --git a/Source/Shaders/ShadowsSampling.hlsl b/Source/Shaders/ShadowsSampling.hlsl index 20806daf6..6d9390570 100644 --- a/Source/Shaders/ShadowsSampling.hlsl +++ b/Source/Shaders/ShadowsSampling.hlsl @@ -146,8 +146,8 @@ ShadowSample SampleDirectionalLightShadow(LightData light, Buffer shadow result.SurfaceShadow = SampleShadowMap(shadowMap, shadowMapUV, shadowPosition.z); // Increase the sharpness for higher cascades to match the filter radius - const float SharpnessScale[MaxNumCascades] = { 1.0f, 1.5f, 3.0f, 3.5f }; - shadow.Sharpness *= SharpnessScale[cascadeIndex]; + //const float SharpnessScale[MaxNumCascades] = { 1.0f, 1.5f, 3.0f, 3.5f }; + //shadow.Sharpness *= SharpnessScale[cascadeIndex]; #if defined(USE_GBUFFER_CUSTOM_DATA) // Subsurface shadowing From ff7c986fb118d778ac43c2cd4ced4666bb3fb628 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 9 Apr 2024 16:58:22 +0200 Subject: [PATCH 020/292] Add better stability to Cascaded Shadow Maps projection --- Source/Engine/Renderer/ShadowsPass.cpp | 125 ++++++++++--------------- 1 file changed, 50 insertions(+), 75 deletions(-) diff --git a/Source/Engine/Renderer/ShadowsPass.cpp b/Source/Engine/Renderer/ShadowsPass.cpp index 5f129d0be..6625d4f1b 100644 --- a/Source/Engine/Renderer/ShadowsPass.cpp +++ b/Source/Engine/Renderer/ShadowsPass.cpp @@ -534,21 +534,6 @@ void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& r renderContextBatch.Contexts.AddDefault(atlasLight.ContextCount); atlasLight.Cache.Set(renderContext.View, light, atlasLight.CascadeSplits); - // Select best Up vector - Float3 side = Float3::UnitX; - Float3 upDirection = Float3::UnitX; - Float3 vectorUps[] = { Float3::UnitY, Float3::UnitX, Float3::UnitZ }; - for (int32 i = 0; i < ARRAY_COUNT(vectorUps); i++) - { - const Float3 vectorUp = vectorUps[i]; - if (Math::Abs(Float3::Dot(light.Direction, vectorUp)) < (1.0f - 0.0001f)) - { - side = Float3::Normalize(Float3::Cross(vectorUp, light.Direction)); - upDirection = Float3::Normalize(Float3::Cross(light.Direction, side)); - break; - } - } - // Create the different view and projection matrices for each split float splitMinRatio = 0; float splitMaxRatio = (minDistance - view.Near) / viewRange; @@ -564,81 +549,71 @@ void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& r continue; // Calculate cascade split frustum corners in view space - Float3 frustumCorners[8]; + Float3 frustumCornersVs[8]; for (int32 j = 0; j < 4; j++) { - float overlap = 0.1f * (splitMinRatio - oldSplitMinRatio); // CSM blending overlap + float overlapWithPrevSplit = 0.1f * (splitMinRatio - oldSplitMinRatio); // CSM blending overlap const RenderList* mainCache = renderContext.List; const auto frustumRangeVS = mainCache->FrustumCornersVs[j + 4] - mainCache->FrustumCornersVs[j]; - frustumCorners[j] = mainCache->FrustumCornersVs[j] + frustumRangeVS * (splitMinRatio - overlap); - frustumCorners[j + 4] = mainCache->FrustumCornersVs[j] + frustumRangeVS * splitMaxRatio; + frustumCornersVs[j] = mainCache->FrustumCornersVs[j] + frustumRangeVS * (splitMinRatio - overlapWithPrevSplit); + frustumCornersVs[j + 4] = mainCache->FrustumCornersVs[j] + frustumRangeVS * splitMaxRatio; } - // Perform stabilization - enum StabilizationMode - { - None, - ProjectionSnapping, - ViewSnapping, - }; - const StabilizationMode stabilization = ViewSnapping; // TODO: expose to graphics settings maybe - Float3 cascadeMinBoundLS; - Float3 cascadeMaxBoundLS; - Float3 target; - { - // Make sure we are using the same direction when stabilizing - BoundingSphere boundingVS; - BoundingSphere::FromPoints(frustumCorners, ARRAY_COUNT(frustumCorners), boundingVS); + // Transform the frustum from camera view space to world-space + Float3 frustumCornersWs[8]; + for (int32 i = 0; i < 8; i++) + Float3::Transform(frustumCornersVs[i], renderContext.View.IV, frustumCornersWs[i]); - // Compute bounding box center - Float3::TransformCoordinate(boundingVS.Center, view.IV, target); - float boundingVSRadius = (float)boundingVS.Radius; - cascadeMaxBoundLS = Float3(boundingVSRadius); - cascadeMinBoundLS = -cascadeMaxBoundLS; + // Calculate the centroid of the view frustum slice + Float3 frustumCenter = Float3::Zero; + for (int32 i = 0; i < 8; i++) + frustumCenter += frustumCornersWs[i]; + frustumCenter *= 1.0f / 8.0f; - if (stabilization == ViewSnapping) - { - // Snap the target to the texel units (reference: ShaderX7 - Practical Cascaded Shadows Maps) - float shadowMapHalfSize = shadowMapsSize * 0.5f; - float x = Math::Ceil(Float3::Dot(target, upDirection) * shadowMapHalfSize / boundingVSRadius) * boundingVSRadius / shadowMapHalfSize; - float y = Math::Ceil(Float3::Dot(target, side) * shadowMapHalfSize / boundingVSRadius) * boundingVSRadius / shadowMapHalfSize; - float z = Float3::Dot(target, light.Direction); - target = upDirection * x + side * y + light.Direction * z; - } - } + // Calculate the radius of a bounding sphere surrounding the frustum corners + float frustumRadius = 0.0f; + for (int32 i = 0; i < 8; i++) + frustumRadius = Math::Max(frustumRadius, (frustumCornersWs[i] - frustumCenter).LengthSquared()); + frustumRadius = Math::Ceil(Math::Sqrt(frustumRadius) * 16.0f) / 16.0f; - const auto nearClip = 0.0f; - const auto farClip = cascadeMaxBoundLS.Z - cascadeMinBoundLS.Z; + // Snap cascade center to the texel size + float texelsPerUnit = (float)atlasLight.Resolution / (frustumRadius * 2.0f); + frustumCenter *= texelsPerUnit; + frustumCenter = Float3::Floor(frustumCenter); + frustumCenter /= texelsPerUnit; - // Create shadow view matrix - Matrix shadowView, shadowProjection, shadowVP; - Matrix::LookAt(target - light.Direction * cascadeMaxBoundLS.Z, target, upDirection, shadowView); + // Cascade bounds are built around the sphere at the frustum center to reduce shadow shimmering + Float3 maxExtents = Float3(frustumRadius); + Float3 minExtents = -maxExtents; + Float3 cascadeExtents = maxExtents - minExtents; - // Create viewport for culling with extended near/far planes due to culling issues - Matrix cullingVP; - { - const float cullRangeExtent = 100000.0f; - Matrix::OrthoOffCenter(cascadeMinBoundLS.X, cascadeMaxBoundLS.X, cascadeMinBoundLS.Y, cascadeMaxBoundLS.Y, -cullRangeExtent, farClip + cullRangeExtent, shadowProjection); - Matrix::Multiply(shadowView, shadowProjection, cullingVP); - } + Matrix shadowView, shadowProjection, shadowVP, cullingVP; - // Create shadow projection matrix - Matrix::OrthoOffCenter(cascadeMinBoundLS.X, cascadeMaxBoundLS.X, cascadeMinBoundLS.Y, cascadeMaxBoundLS.Y, nearClip, farClip, shadowProjection); + // Create view matrix + Matrix::LookAt(frustumCenter + light.Direction * minExtents.Z, frustumCenter, Float3::Up, shadowView); - // Construct shadow matrix (View * Projection) + // Create viewport for culling with extended near/far planes due to culling issues (aka pancaking) + const float cullRangeExtent = 100000.0f; + Matrix::OrthoOffCenter(minExtents.X, maxExtents.X, minExtents.Y, maxExtents.Y, -cullRangeExtent, cascadeExtents.Z + cullRangeExtent, shadowProjection); + Matrix::Multiply(shadowView, shadowProjection, cullingVP); + + // Create projection matrix + Matrix::OrthoOffCenter(minExtents.X, maxExtents.X, minExtents.Y, maxExtents.Y, 0.0f, cascadeExtents.Z, shadowProjection); Matrix::Multiply(shadowView, shadowProjection, shadowVP); - // Stabilize the shadow matrix on the projection - if (stabilization == ProjectionSnapping) - { - Float3 shadowPixelPosition = shadowVP.GetTranslation() * (shadowMapsSize * 0.5f); - shadowPixelPosition.Z = 0; - const Float3 shadowPixelPositionRounded(Math::Round(shadowPixelPosition.X), Math::Round(shadowPixelPosition.Y), 0.0f); - const Float4 shadowPixelOffset((shadowPixelPositionRounded - shadowPixelPosition) * (2.0f / shadowMapsSize), 0.0f); - shadowProjection.SetRow4(shadowProjection.GetRow4() + shadowPixelOffset); - Matrix::Multiply(shadowView, shadowProjection, shadowVP); - } + // Round the projection matrix by projecting the world-space origin and calculating the fractional offset in texel space of the shadow map + Float4 shadowOrigin = Float4(0.0f, 0.0f, 0.0f, 1.0f); + shadowOrigin = Float4::Transform(shadowOrigin, shadowVP); + shadowOrigin = shadowOrigin * (shadowMapsSize / 2.0f); + Float4 roundedOrigin = Float4::Round(shadowOrigin); + Float4 roundOffset = roundedOrigin - shadowOrigin; + roundOffset = roundOffset * (2.0f / shadowMapsSize); + roundOffset.Z = 0.0f; + roundOffset.W = 0.0f; + shadowProjection.SetRow4(shadowProjection.GetRow4() + roundOffset); + // Calculate view*projection matrix + Matrix::Multiply(shadowView, shadowProjection, shadowVP); tile.SetWorldToShadow(shadowVP); // Setup context for cascade @@ -958,7 +933,7 @@ RETRY_ATLAS_SETUP: const ShadowAtlasLightTile& tile = atlasLight.Tiles[tileIndex]; ASSERT(tile.RectTile); auto* packed = shadows.ShadowsBuffer.WriteReserve(5); - packed[0] = Float4(tile.RectTile->Width, tile.RectTile->Height, tile.RectTile->X, tile.RectTile->Y) * atlasResolutionInv; // UV to AtlasUV via a single MAD instruction + packed[0] = Float4(tile.RectTile->Width - 1.0f, tile.RectTile->Height - 1.0f, tile.RectTile->X, tile.RectTile->Y) * atlasResolutionInv; // UV to AtlasUV via a single MAD instruction packed[1] = tile.WorldToShadow.GetColumn1(); packed[2] = tile.WorldToShadow.GetColumn2(); packed[3] = tile.WorldToShadow.GetColumn3(); From e7bef5e880de256563250a77d1200771011c1d6b Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 9 Apr 2024 17:55:29 +0200 Subject: [PATCH 021/292] Bring back Optimized PCF sampling for shadow maps 61323f85264a40becad17f19f533846803cdd1ed --- Source/Shaders/ShadowsSampling.hlsl | 132 ++++++++++++++++++++++++++-- 1 file changed, 126 insertions(+), 6 deletions(-) diff --git a/Source/Shaders/ShadowsSampling.hlsl b/Source/Shaders/ShadowsSampling.hlsl index 6d9390570..354e5065e 100644 --- a/Source/Shaders/ShadowsSampling.hlsl +++ b/Source/Shaders/ShadowsSampling.hlsl @@ -54,9 +54,7 @@ float2 GetLightShadowAtlasUV(ShadowData shadow, ShadowTileData shadowTile, float float SampleShadowMap(Texture2D shadowMap, float2 shadowMapUV, float sceneDepth) { - // Single hardware sample with filtering float result = SAMPLE_SHADOW_MAP(shadowMap, shadowMapUV, sceneDepth); - #if SHADOWS_QUALITY == 1 result += SAMPLE_SHADOW_MAP_OFFSET(shadowMap, shadowMapUV, int2(-1, 0), sceneDepth); result += SAMPLE_SHADOW_MAP_OFFSET(shadowMap, shadowMapUV, int2(0, -1), sceneDepth); @@ -64,7 +62,6 @@ float SampleShadowMap(Texture2D shadowMap, float2 shadowMapUV, float scen result += SAMPLE_SHADOW_MAP_OFFSET(shadowMap, shadowMapUV, int2(1, 0), sceneDepth); result = result * (1.0f / 4.0); #elif SHADOWS_QUALITY == 2 || SHADOWS_QUALITY == 3 - // TODO: implement Percentage-Closer Soft Shadows (PCSS) for Ultra quality result += SAMPLE_SHADOW_MAP_OFFSET(shadowMap, shadowMapUV, int2(-1, -1), sceneDepth); result += SAMPLE_SHADOW_MAP_OFFSET(shadowMap, shadowMapUV, int2(-1, 0), sceneDepth); result += SAMPLE_SHADOW_MAP_OFFSET(shadowMap, shadowMapUV, int2(-1, 1), sceneDepth); @@ -75,10 +72,133 @@ float SampleShadowMap(Texture2D shadowMap, float2 shadowMapUV, float scen result += SAMPLE_SHADOW_MAP_OFFSET(shadowMap, shadowMapUV, int2(1, 1), sceneDepth); result = result * (1.0f / 9.0); #endif - return result; } +float SampleShadowMapOptimizedPCFHelper(Texture2D shadowMap, float2 baseUV, float u, float v, float2 shadowMapSizeInv, float sceneDepth) +{ + float2 uv = baseUV + float2(u, v) * shadowMapSizeInv; + return SAMPLE_SHADOW_MAP(shadowMap, uv, sceneDepth); +} + +// [Shadow map sampling method used in The Witness, https://github.com/TheRealMJP/Shadows] +float SampleShadowMapOptimizedPCF(Texture2D shadowMap, float2 shadowMapUV, float sceneDepth) +{ +#if SHADOWS_QUALITY != 0 + float2 shadowMapSize; + shadowMap.GetDimensions(shadowMapSize.x, shadowMapSize.y); + + float2 uv = shadowMapUV.xy * shadowMapSize; // 1 unit - 1 texel + float2 shadowMapSizeInv = 1.0f / shadowMapSize; + + float2 baseUV; + baseUV.x = floor(uv.x + 0.5); + baseUV.y = floor(uv.y + 0.5); + float s = (uv.x + 0.5 - baseUV.x); + float t = (uv.y + 0.5 - baseUV.y); + baseUV -= float2(0.5, 0.5); + baseUV *= shadowMapSizeInv; + + float sum = 0; +#endif +#if SHADOWS_QUALITY == 0 + return SAMPLE_SHADOW_MAP(shadowMap, shadowMapUV, sceneDepth); +#elif SHADOWS_QUALITY == 1 + float uw0 = (3 - 2 * s); + float uw1 = (1 + 2 * s); + + float u0 = (2 - s) / uw0 - 1; + float u1 = s / uw1 + 1; + + float vw0 = (3 - 2 * t); + float vw1 = (1 + 2 * t); + + float v0 = (2 - t) / vw0 - 1; + float v1 = t / vw1 + 1; + + sum += uw0 * vw0 * SampleShadowMapOptimizedPCFHelper(shadowMap, baseUV, u0, v0, shadowMapSizeInv, sceneDepth); + sum += uw1 * vw0 * SampleShadowMapOptimizedPCFHelper(shadowMap, baseUV, u1, v0, shadowMapSizeInv, sceneDepth); + sum += uw0 * vw1 * SampleShadowMapOptimizedPCFHelper(shadowMap, baseUV, u0, v1, shadowMapSizeInv, sceneDepth); + sum += uw1 * vw1 * SampleShadowMapOptimizedPCFHelper(shadowMap, baseUV, u1, v1, shadowMapSizeInv, sceneDepth); + + return sum * 1.0f / 16; +#elif SHADOWS_QUALITY == 2 + float uw0 = (4 - 3 * s); + float uw1 = 7; + float uw2 = (1 + 3 * s); + + float u0 = (3 - 2 * s) / uw0 - 2; + float u1 = (3 + s) / uw1; + float u2 = s / uw2 + 2; + + float vw0 = (4 - 3 * t); + float vw1 = 7; + float vw2 = (1 + 3 * t); + + float v0 = (3 - 2 * t) / vw0 - 2; + float v1 = (3 + t) / vw1; + float v2 = t / vw2 + 2; + + sum += uw0 * vw0 * SampleShadowMapOptimizedPCFHelper(shadowMap, baseUV, u0, v0, shadowMapSizeInv, sceneDepth); + sum += uw1 * vw0 * SampleShadowMapOptimizedPCFHelper(shadowMap, baseUV, u1, v0, shadowMapSizeInv, sceneDepth); + sum += uw2 * vw0 * SampleShadowMapOptimizedPCFHelper(shadowMap, baseUV, u2, v0, shadowMapSizeInv, sceneDepth); + + sum += uw0 * vw1 * SampleShadowMapOptimizedPCFHelper(shadowMap, baseUV, u0, v1, shadowMapSizeInv, sceneDepth); + sum += uw1 * vw1 * SampleShadowMapOptimizedPCFHelper(shadowMap, baseUV, u1, v1, shadowMapSizeInv, sceneDepth); + sum += uw2 * vw1 * SampleShadowMapOptimizedPCFHelper(shadowMap, baseUV, u2, v1, shadowMapSizeInv, sceneDepth); + + sum += uw0 * vw2 * SampleShadowMapOptimizedPCFHelper(shadowMap, baseUV, u0, v2, shadowMapSizeInv, sceneDepth); + sum += uw1 * vw2 * SampleShadowMapOptimizedPCFHelper(shadowMap, baseUV, u1, v2, shadowMapSizeInv, sceneDepth); + sum += uw2 * vw2 * SampleShadowMapOptimizedPCFHelper(shadowMap, baseUV, u2, v2, shadowMapSizeInv, sceneDepth); + + return sum * 1.0f / 144; +#elif SHADOWS_QUALITY == 3 + float uw0 = (5 * s - 6); + float uw1 = (11 * s - 28); + float uw2 = -(11 * s + 17); + float uw3 = -(5 * s + 1); + + float u0 = (4 * s - 5) / uw0 - 3; + float u1 = (4 * s - 16) / uw1 - 1; + float u2 = -(7 * s + 5) / uw2 + 1; + float u3 = -s / uw3 + 3; + + float vw0 = (5 * t - 6); + float vw1 = (11 * t - 28); + float vw2 = -(11 * t + 17); + float vw3 = -(5 * t + 1); + + float v0 = (4 * t - 5) / vw0 - 3; + float v1 = (4 * t - 16) / vw1 - 1; + float v2 = -(7 * t + 5) / vw2 + 1; + float v3 = -t / vw3 + 3; + + sum += uw0 * vw0 * SampleShadowMapOptimizedPCFHelper(shadowMap, baseUV, u0, v0, shadowMapSizeInv, sceneDepth); + sum += uw1 * vw0 * SampleShadowMapOptimizedPCFHelper(shadowMap, baseUV, u1, v0, shadowMapSizeInv, sceneDepth); + sum += uw2 * vw0 * SampleShadowMapOptimizedPCFHelper(shadowMap, baseUV, u2, v0, shadowMapSizeInv, sceneDepth); + sum += uw3 * vw0 * SampleShadowMapOptimizedPCFHelper(shadowMap, baseUV, u3, v0, shadowMapSizeInv, sceneDepth); + + sum += uw0 * vw1 * SampleShadowMapOptimizedPCFHelper(shadowMap, baseUV, u0, v1, shadowMapSizeInv, sceneDepth); + sum += uw1 * vw1 * SampleShadowMapOptimizedPCFHelper(shadowMap, baseUV, u1, v1, shadowMapSizeInv, sceneDepth); + sum += uw2 * vw1 * SampleShadowMapOptimizedPCFHelper(shadowMap, baseUV, u2, v1, shadowMapSizeInv, sceneDepth); + sum += uw3 * vw1 * SampleShadowMapOptimizedPCFHelper(shadowMap, baseUV, u3, v1, shadowMapSizeInv, sceneDepth); + + sum += uw0 * vw2 * SampleShadowMapOptimizedPCFHelper(shadowMap, baseUV, u0, v2, shadowMapSizeInv, sceneDepth); + sum += uw1 * vw2 * SampleShadowMapOptimizedPCFHelper(shadowMap, baseUV, u1, v2, shadowMapSizeInv, sceneDepth); + sum += uw2 * vw2 * SampleShadowMapOptimizedPCFHelper(shadowMap, baseUV, u2, v2, shadowMapSizeInv, sceneDepth); + sum += uw3 * vw2 * SampleShadowMapOptimizedPCFHelper(shadowMap, baseUV, u3, v2, shadowMapSizeInv, sceneDepth); + + sum += uw0 * vw3 * SampleShadowMapOptimizedPCFHelper(shadowMap, baseUV, u0, v3, shadowMapSizeInv, sceneDepth); + sum += uw1 * vw3 * SampleShadowMapOptimizedPCFHelper(shadowMap, baseUV, u1, v3, shadowMapSizeInv, sceneDepth); + sum += uw2 * vw3 * SampleShadowMapOptimizedPCFHelper(shadowMap, baseUV, u2, v3, shadowMapSizeInv, sceneDepth); + sum += uw3 * vw3 * SampleShadowMapOptimizedPCFHelper(shadowMap, baseUV, u3, v3, shadowMapSizeInv, sceneDepth); + + return sum * (1.0f / 2704); +#else + return 0.0f; +#endif +} + // Samples the shadow for the given directional light on the material surface (supports subsurface shadowing) ShadowSample SampleDirectionalLightShadow(LightData light, Buffer shadowsBuffer, Texture2D shadowMap, GBufferSample gBuffer, float dither = 0.0f) { @@ -143,7 +263,7 @@ ShadowSample SampleDirectionalLightShadow(LightData light, Buffer shadow float2 shadowMapUV = GetLightShadowAtlasUV(shadow, shadowTile, samplePosition, shadowPosition); // Sample shadow map - result.SurfaceShadow = SampleShadowMap(shadowMap, shadowMapUV, shadowPosition.z); + result.SurfaceShadow = SampleShadowMapOptimizedPCF(shadowMap, shadowMapUV, shadowPosition.z); // Increase the sharpness for higher cascades to match the filter radius //const float SharpnessScale[MaxNumCascades] = { 1.0f, 1.5f, 3.0f, 3.5f }; @@ -207,7 +327,7 @@ ShadowSample SampleLocalLightShadow(LightData light, Buffer shadowsBuffe float2 shadowMapUV = GetLightShadowAtlasUV(shadow, shadowTile, samplePosition, shadowPosition); // Sample shadow map - result.SurfaceShadow = SampleShadowMap(shadowMap, shadowMapUV, shadowPosition.z); + result.SurfaceShadow = SampleShadowMapOptimizedPCF(shadowMap, shadowMapUV, shadowPosition.z); #if defined(USE_GBUFFER_CUSTOM_DATA) // Subsurface shadowing From 89f7e442f7d27058c075b8ebd952b81bc2cff8f3 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 10 Apr 2024 11:03:18 +0200 Subject: [PATCH 022/292] Fix point light seams due to missing shadow map borders --- Source/Engine/Renderer/ShadowsPass.cpp | 47 +++++++++++++++++--------- Source/Engine/Renderer/ShadowsPass.h | 11 +++--- 2 files changed, 36 insertions(+), 22 deletions(-) diff --git a/Source/Engine/Renderer/ShadowsPass.cpp b/Source/Engine/Renderer/ShadowsPass.cpp index 6625d4f1b..a99a3fce3 100644 --- a/Source/Engine/Renderer/ShadowsPass.cpp +++ b/Source/Engine/Renderer/ShadowsPass.cpp @@ -133,7 +133,7 @@ struct ShadowAtlasLight uint16 Resolution; uint8 TilesNeeded; uint8 TilesCount; - float Sharpness, Fade, NormalOffsetScale, Bias, FadeDistance, Distance; + float Sharpness, Fade, NormalOffsetScale, Bias, FadeDistance, Distance, TileBorder; Float4 CascadeSplits; ShadowAtlasLightTile Tiles[SHADOWS_MAX_TILES]; ShadowAtlasLightCache Cache; @@ -205,6 +205,7 @@ struct ShadowAtlasLight class ShadowsCustomBuffer : public RenderBuffers::CustomBuffer { public: + int32 MaxShadowsQuality = 0; int32 Resolution = 0; int32 AtlasPixelsUsed = 0; mutable bool ClearShadowMapAtlas = true; @@ -379,7 +380,7 @@ void ShadowsPass::SetupRenderContext(RenderContext& renderContext, RenderContext shadowContext.List->Clear(); } -void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderLightData& light, ShadowAtlasLight& atlasLight) +void ShadowsPass::SetupLight(ShadowsCustomBuffer& shadows, RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderLightData& light, ShadowAtlasLight& atlasLight) { // Copy light properties atlasLight.Sharpness = light.ShadowsSharpness; @@ -390,9 +391,9 @@ void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& r atlasLight.Distance = Math::Min(renderContext.View.Far, light.ShadowsDistance); } -bool ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderLocalLightData& light, ShadowAtlasLight& atlasLight) +bool ShadowsPass::SetupLight(ShadowsCustomBuffer& shadows, RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderLocalLightData& light, ShadowAtlasLight& atlasLight) { - SetupLight(renderContext, renderContextBatch, (RenderLightData&)light, atlasLight); + SetupLight(shadows, renderContext, renderContextBatch, (RenderLightData&)light, atlasLight); // Fade shadow on distance const float fadeDistance = Math::Max(light.ShadowsFadeDistance, 0.1f); @@ -430,9 +431,9 @@ bool ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& r return false; } -void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderDirectionalLightData& light, ShadowAtlasLight& atlasLight) +void ShadowsPass::SetupLight(ShadowsCustomBuffer& shadows, RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderDirectionalLightData& light, ShadowAtlasLight& atlasLight) { - SetupLight(renderContext, renderContextBatch, (RenderLightData&)light, atlasLight); + SetupLight(shadows, renderContext, renderContextBatch, (RenderLightData&)light, atlasLight); const RenderView& view = renderContext.View; const int32 csmCount = atlasLight.TilesCount; @@ -627,11 +628,17 @@ void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& r } } -void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderPointLightData& light, ShadowAtlasLight& atlasLight) +void ShadowsPass::SetupLight(ShadowsCustomBuffer& shadows, RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderPointLightData& light, ShadowAtlasLight& atlasLight) { - if (SetupLight(renderContext, renderContextBatch, (RenderLocalLightData&)light, atlasLight)) + if (SetupLight(shadows, renderContext, renderContextBatch, (RenderLocalLightData&)light, atlasLight)) return; + // Prevent sampling shadow map at borders that includes nearby data due to filtering of virtual cubemap sides + atlasLight.TileBorder = 1.0f * (shadows.MaxShadowsQuality + 1); + const float borderScale = (float)atlasLight.Resolution / (atlasLight.Resolution + 2 * atlasLight.TileBorder); + Matrix borderScaleMatrix; + Matrix::Scaling(borderScale, borderScale, 1.0f, borderScaleMatrix); + // Render depth to all 6 faces of the cube map atlasLight.ContextIndex = renderContextBatch.Contexts.Count(); atlasLight.ContextCount = 6; @@ -641,6 +648,12 @@ void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& r auto& shadowContext = renderContextBatch.Contexts[atlasLight.ContextIndex + faceIndex]; SetupRenderContext(renderContext, shadowContext); shadowContext.View.SetUpCube(LocalLightNearPlane, light.Radius, light.Position); + + // Apply border to the projection matrix + shadowContext.View.Projection = shadowContext.View.Projection * borderScaleMatrix; + shadowContext.View.NonJitteredProjection = shadowContext.View.Projection; + Matrix::Invert(shadowContext.View.Projection, shadowContext.View.IP); + shadowContext.View.SetFace(faceIndex); const auto shadowMapsSize = (float)atlasLight.Resolution; shadowContext.View.PrepareCache(shadowContext, shadowMapsSize, shadowMapsSize, Float2::Zero, &renderContext.View); @@ -648,9 +661,9 @@ void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& r } } -void ShadowsPass::SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderSpotLightData& light, ShadowAtlasLight& atlasLight) +void ShadowsPass::SetupLight(ShadowsCustomBuffer& shadows, RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderSpotLightData& light, ShadowAtlasLight& atlasLight) { - if (SetupLight(renderContext, renderContextBatch, (RenderLocalLightData&)light, atlasLight)) + if (SetupLight(shadows, renderContext, renderContextBatch, (RenderLocalLightData&)light, atlasLight)) return; // Render depth to a single projection @@ -682,7 +695,6 @@ void ShadowsPass::Dispose() void ShadowsPass::SetupShadows(RenderContext& renderContext, RenderContextBatch& renderContextBatch) { PROFILE_CPU(); - _maxShadowsQuality = Math::Clamp(Math::Min((int32)Graphics::ShadowsQuality, (int32)renderContext.View.MaxShadowsQuality), 0, (int32)Quality::MAX - 1); // Early out and skip shadows setup if no lights is actively casting shadows // RenderBuffers will automatically free any old ShadowsCustomBuffer after a few frames if we don't update LastFrameUsed @@ -711,6 +723,7 @@ void ShadowsPass::SetupShadows(RenderContext& renderContext, RenderContextBatch& auto& shadows = *renderContext.Buffers->GetCustomBuffer(TEXT("Shadows")); const auto currentFrame = Engine::FrameCount; shadows.LastFrameUsed = currentFrame; + shadows.MaxShadowsQuality = Math::Clamp(Math::Min((int32)Graphics::ShadowsQuality, (int32)renderContext.View.MaxShadowsQuality), 0, (int32)Quality::MAX - 1); int32 atlasResolution; switch (Graphics::ShadowMapsQuality) { @@ -893,11 +906,11 @@ RETRY_ATLAS_SETUP: light->HasShadow = true; atlasLight.TilesCount = atlasLight.TilesNeeded; if (light->IsPointLight) - SetupLight(renderContext, renderContextBatch, *(RenderPointLightData*)light, atlasLight); + SetupLight(shadows, renderContext, renderContextBatch, *(RenderPointLightData*)light, atlasLight); else if (light->IsSpotLight) - SetupLight(renderContext, renderContextBatch, *(RenderSpotLightData*)light, atlasLight); + SetupLight(shadows, renderContext, renderContextBatch, *(RenderSpotLightData*)light, atlasLight); else //if (light->IsDirectionalLight) - SetupLight(renderContext, renderContextBatch, *(RenderDirectionalLightData*)light, atlasLight); + SetupLight(shadows, renderContext, renderContextBatch, *(RenderDirectionalLightData*)light, atlasLight); } } @@ -927,13 +940,15 @@ RETRY_ATLAS_SETUP: packed[0] = Float4(*(const float*)&packed0x, atlasLight.FadeDistance, atlasLight.NormalOffsetScale, atlasLight.Bias); packed[1] = atlasLight.CascadeSplits; } + const float tileBorder = atlasLight.TileBorder; for (int32 tileIndex = 0; tileIndex < atlasLight.TilesCount; tileIndex++) { // Shadow projection info const ShadowAtlasLightTile& tile = atlasLight.Tiles[tileIndex]; ASSERT(tile.RectTile); auto* packed = shadows.ShadowsBuffer.WriteReserve(5); - packed[0] = Float4(tile.RectTile->Width - 1.0f, tile.RectTile->Height - 1.0f, tile.RectTile->X, tile.RectTile->Y) * atlasResolutionInv; // UV to AtlasUV via a single MAD instruction + // UV to AtlasUV via a single MAD instruction + packed[0] = Float4(tile.RectTile->Width - tileBorder * 2, tile.RectTile->Height - tileBorder * 2, tile.RectTile->X + tileBorder, tile.RectTile->Y + tileBorder) * atlasResolutionInv; packed[1] = tile.WorldToShadow.GetColumn1(); packed[2] = tile.WorldToShadow.GetColumn2(); packed[3] = tile.WorldToShadow.GetColumn3(); @@ -1019,7 +1034,7 @@ void ShadowsPass::RenderShadowMask(RenderContextBatch& renderContextBatch, Rende auto& view = renderContext.View; auto shader = _shader->GetShader(); const bool isLocalLight = light.IsPointLight || light.IsSpotLight; - int32 shadowQuality = _maxShadowsQuality; + int32 shadowQuality = shadows.MaxShadowsQuality; if (isLocalLight) { // Reduce shadows quality for smaller lights diff --git a/Source/Engine/Renderer/ShadowsPass.h b/Source/Engine/Renderer/ShadowsPass.h index c22a17881..5e9421d2c 100644 --- a/Source/Engine/Renderer/ShadowsPass.h +++ b/Source/Engine/Renderer/ShadowsPass.h @@ -22,7 +22,6 @@ private: GPUPipelineStatePermutationsPs(Quality::MAX) * 2> _psShadowPoint; GPUPipelineStatePermutationsPs(Quality::MAX) * 2> _psShadowSpot; PixelFormat _shadowMapFormat; // Cached on initialization - int32 _maxShadowsQuality = 0; // Cached state for the current frame rendering (setup via Prepare) public: /// @@ -53,11 +52,11 @@ public: private: static void SetupRenderContext(RenderContext& renderContext, RenderContext& shadowContext); - static void SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderLightData& light, struct ShadowAtlasLight& atlasLight); - static bool SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderLocalLightData& light, ShadowAtlasLight& atlasLight); - static void SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderDirectionalLightData& light, ShadowAtlasLight& atlasLight); - static void SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderPointLightData& light, ShadowAtlasLight& atlasLight); - static void SetupLight(RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderSpotLightData& light, ShadowAtlasLight& atlasLight); + static void SetupLight(class ShadowsCustomBuffer& shadows, RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderLightData& light, struct ShadowAtlasLight& atlasLight); + static bool SetupLight(ShadowsCustomBuffer& shadows, RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderLocalLightData& light, ShadowAtlasLight& atlasLight); + static void SetupLight(ShadowsCustomBuffer& shadows, RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderDirectionalLightData& light, ShadowAtlasLight& atlasLight); + static void SetupLight(ShadowsCustomBuffer& shadows, RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderPointLightData& light, ShadowAtlasLight& atlasLight); + static void SetupLight(ShadowsCustomBuffer& shadows, RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderSpotLightData& light, ShadowAtlasLight& atlasLight); #if COMPILE_WITH_DEV_ENV void OnShaderReloading(Asset* obj) From b4547ec4d2bcf842661435e7dc78be6e5965bedb Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 10 Apr 2024 11:03:33 +0200 Subject: [PATCH 023/292] Minor fixes --- .../Engine/Graphics/Materials/MaterialShader.h | 2 +- Source/Engine/Renderer/RenderList.h | 18 ++++++++++++------ .../MaterialGenerator.Textures.cpp | 2 +- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/Source/Engine/Graphics/Materials/MaterialShader.h b/Source/Engine/Graphics/Materials/MaterialShader.h index 5a4cec20c..4233b4700 100644 --- a/Source/Engine/Graphics/Materials/MaterialShader.h +++ b/Source/Engine/Graphics/Materials/MaterialShader.h @@ -10,7 +10,7 @@ /// /// Current materials shader version. /// -#define MATERIAL_GRAPH_VERSION 163 +#define MATERIAL_GRAPH_VERSION 164 class Material; class GPUShader; diff --git a/Source/Engine/Renderer/RenderList.h b/Source/Engine/Renderer/RenderList.h index e84660ced..a58eeb0c4 100644 --- a/Source/Engine/Renderer/RenderList.h +++ b/Source/Engine/Renderer/RenderList.h @@ -59,12 +59,6 @@ struct RenderLightData float ShadowsUpdateRate; float ShadowsUpdateRateAtDistance; - RenderLightData() - { - Platform::MemoryClear(this, sizeof(RenderLightData)); - } - - POD_COPYABLE(RenderLightData); bool CanRenderShadow(const RenderView& view) const; }; @@ -80,9 +74,12 @@ struct RenderDirectionalLightData : RenderLightData RenderDirectionalLightData() { + Platform::MemoryClear(this, sizeof(RenderDirectionalLightData)); IsDirectionalLight = 1; } + POD_COPYABLE(RenderDirectionalLightData); + void SetShaderData(ShaderLightData& data, bool useShadow) const; }; @@ -107,9 +104,12 @@ struct RenderSpotLightData : RenderLocalLightData RenderSpotLightData() { + Platform::MemoryClear(this, sizeof(RenderSpotLightData)); IsSpotLight = 1; } + POD_COPYABLE(RenderSpotLightData); + void SetShaderData(ShaderLightData& data, bool useShadow) const; }; @@ -120,9 +120,12 @@ struct RenderPointLightData : RenderLocalLightData RenderPointLightData() { + Platform::MemoryClear(this, sizeof(RenderPointLightData)); IsPointLight = 1; } + POD_COPYABLE(RenderPointLightData); + void SetShaderData(ShaderLightData& data, bool useShadow) const; }; @@ -135,9 +138,12 @@ struct RenderSkyLightData : RenderLightData RenderSkyLightData() { + Platform::MemoryClear(this, sizeof(RenderSkyLightData)); IsSkyLight = 1; } + POD_COPYABLE(RenderSkyLightData); + void SetShaderData(ShaderLightData& data, bool useShadow) const; }; diff --git a/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Textures.cpp b/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Textures.cpp index 9b0a334e6..854f25398 100644 --- a/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Textures.cpp +++ b/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Textures.cpp @@ -653,7 +653,7 @@ void MaterialGenerator::ProcessGroupTextures(Box* box, Node* node, Value& value) // Write operations auto framesCount = writeLocal(VariantType::Float, String::Format(TEXT("{0}.x * {1}.y"), framesXY.Value, framesXY.Value), node); - frame = writeLocal(VariantType::Float, String::Format(TEXT("fmod({0}, {1})"), frame.Value, framesCount.Value), node); + frame = writeLocal(VariantType::Float, String::Format(TEXT("fmod(floor({0}), {1})"), frame.Value, framesCount.Value), node); auto framesXYInv = writeOperation2(node, Value::One.AsFloat2(), framesXY, '/'); auto frameY = writeLocal(VariantType::Float, String::Format(TEXT("abs({0} * {1}.y - (floor({2} * {3}.x) + {0} * 1))"), invertY.Value, framesXY.Value, frame.Value, framesXYInv.Value), node); auto frameX = writeLocal(VariantType::Float, String::Format(TEXT("abs({0} * {1}.x - (({2} - {1}.x * floor({2} * {3}.x)) + {0} * 1))"), invertX.Value, framesXY.Value, frame.Value, framesXYInv.Value), node); From 340ef194d378007ed9dfa3c57e8ef9f683703954 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 10 Apr 2024 13:36:59 +0200 Subject: [PATCH 024/292] Add grey out to obsolete/deprecated members in properties panel --- Source/Editor/CustomEditors/CustomEditor.cs | 4 ++++ Source/Editor/CustomEditors/Values/ValueContainer.cs | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/Source/Editor/CustomEditors/CustomEditor.cs b/Source/Editor/CustomEditors/CustomEditor.cs index 7f540b767..c6f62746e 100644 --- a/Source/Editor/CustomEditors/CustomEditor.cs +++ b/Source/Editor/CustomEditors/CustomEditor.cs @@ -377,6 +377,10 @@ namespace FlaxEditor.CustomEditors else if (Values.HasDefaultValue && CanRevertDefaultValue) color = Color.Yellow * 0.8f; LinkedLabel.HighlightStripColor = color; + + // Grey out deprecated members + if (Values.IsObsolete) + LinkedLabel.TextColor = LinkedLabel.TextColorHighlighted = FlaxEngine.GUI.Style.Current.ForegroundGrey; } } diff --git a/Source/Editor/CustomEditors/Values/ValueContainer.cs b/Source/Editor/CustomEditors/Values/ValueContainer.cs index 04bc8bd7c..c6efda318 100644 --- a/Source/Editor/CustomEditors/Values/ValueContainer.cs +++ b/Source/Editor/CustomEditors/Values/ValueContainer.cs @@ -139,6 +139,11 @@ namespace FlaxEditor.CustomEditors /// public bool IsArray => Type != ScriptType.Null && Type.IsArray; + /// + /// True if member or type has that marks it as obsolete. + /// + public bool IsObsolete { get; } + /// /// Gets the values types array (without duplicates). /// @@ -160,6 +165,7 @@ namespace FlaxEditor.CustomEditors { Info = info; Type = Info.ValueType; + IsObsolete = Info.HasAttribute(typeof(ObsoleteAttribute), true); } /// From c4949de28f06b1f5fc19b2023e83e4dfcba87ac6 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 11 Apr 2024 10:20:21 +0200 Subject: [PATCH 025/292] Add new Static Flag `Shadow` for cached shadow maps --- Flax.flaxproj | 2 +- Source/Engine/Level/Actor.cpp | 7 +++++-- Source/Engine/Level/Types.h | 7 ++++++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/Flax.flaxproj b/Flax.flaxproj index 383d9e14e..46f3f93e4 100644 --- a/Flax.flaxproj +++ b/Flax.flaxproj @@ -4,7 +4,7 @@ "Major": 1, "Minor": 9, "Revision": 0, - "Build": 6600 + "Build": 6601 }, "Company": "Flax", "Copyright": "Copyright (c) 2012-2024 Wojciech Figat. All rights reserved.", diff --git a/Source/Engine/Level/Actor.cpp b/Source/Engine/Level/Actor.cpp index 2615f09fa..2e1870a57 100644 --- a/Source/Engine/Level/Actor.cpp +++ b/Source/Engine/Level/Actor.cpp @@ -1062,9 +1062,12 @@ void Actor::Deserialize(DeserializeStream& stream, ISerializeModifier* modifier) // StaticFlags update - added StaticFlags::Navigation // [Deprecated on 17.05.2020, expires on 17.05.2021] if (modifier->EngineBuild < 6178 && (int32)_staticFlags == (1 + 2 + 4)) - { _staticFlags |= StaticFlags::Navigation; - } + + // StaticFlags update - added StaticFlags::Shadow + // [Deprecated on 17.05.2020, expires on 17.05.2021] + if (modifier->EngineBuild < 6601 && (int32)_staticFlags == (1 + 2 + 4 + 8)) + _staticFlags |= StaticFlags::Shadow; const auto tag = stream.FindMember("Tag"); if (tag != stream.MemberEnd()) diff --git a/Source/Engine/Level/Types.h b/Source/Engine/Level/Types.h index 920cf65ca..b6ca23526 100644 --- a/Source/Engine/Level/Types.h +++ b/Source/Engine/Level/Types.h @@ -97,10 +97,15 @@ API_ENUM(Attributes="Flags") enum class StaticFlags /// Navigation = 1 << 3, + /// + /// Object is considered to have static shadowing (casting and receiving). + /// + Shadow = 1 << 4, + /// /// Object is fully static in the scene. /// - FullyStatic = Transform | ReflectionProbe | Lightmap | Navigation, + FullyStatic = ReflectionProbe | Lightmap | Transform | Navigation | Shadow, /// /// Maximum value of the enum (force to int). From eac1d19a09e5a2e3416360fb8701e78d2f92ba86 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 11 Apr 2024 10:21:13 +0200 Subject: [PATCH 026/292] Add additional `StaticFlagsCompare` to Render View for dynamic or static only drawing --- Source/Engine/Graphics/RenderView.h | 5 +++++ Source/Engine/Level/Scene/SceneRendering.cpp | 6 +++--- Source/Engine/Renderer/ProbesRenderer.cpp | 2 +- Source/Engine/Renderer/RenderList.cpp | 4 +++- Source/Engine/ShadowsOfMordor/Builder.cpp | 2 +- 5 files changed, 13 insertions(+), 6 deletions(-) diff --git a/Source/Engine/Graphics/RenderView.h b/Source/Engine/Graphics/RenderView.h index 0b37e8b28..b44810648 100644 --- a/Source/Engine/Graphics/RenderView.h +++ b/Source/Engine/Graphics/RenderView.h @@ -127,6 +127,11 @@ public: /// API_FIELD() StaticFlags StaticFlagsMask = StaticFlags::None; + /// + /// The static flags mask comparision rhs. Allows to draw objects that don't pass the static flags mask. Objects are checked with the following formula: (ObjectStaticFlags and StaticFlagsMask) == StaticFlagsMaskCompare. + /// + API_FIELD() StaticFlags StaticFlagsCompare = StaticFlags::None; + /// /// The view flags. /// diff --git a/Source/Engine/Level/Scene/SceneRendering.cpp b/Source/Engine/Level/Scene/SceneRendering.cpp index 00084eef1..69d8edf4c 100644 --- a/Source/Engine/Level/Scene/SceneRendering.cpp +++ b/Source/Engine/Level/Scene/SceneRendering.cpp @@ -212,12 +212,12 @@ void SceneRendering::DrawActorsJob(int32) PROFILE_CPU(); auto& mainContext = _drawBatch->GetMainContext(); const auto& view = mainContext.View; - if (view.IsOfflinePass) + if (view.StaticFlagsMask != StaticFlags::None) { - // Offline pass with additional static flags culling + // Static-flags culling FOR_EACH_BATCH_ACTOR e.Bounds.Center -= view.Origin; - if (CHECK_ACTOR && (e.Actor->GetStaticFlags() & view.StaticFlagsMask) != StaticFlags::None) + if (CHECK_ACTOR && (e.Actor->GetStaticFlags() & view.StaticFlagsMask) == view.StaticFlagsCompare) { DRAW_ACTOR(*_drawBatch); } diff --git a/Source/Engine/Renderer/ProbesRenderer.cpp b/Source/Engine/Renderer/ProbesRenderer.cpp index f604f914e..dd88edc8c 100644 --- a/Source/Engine/Renderer/ProbesRenderer.cpp +++ b/Source/Engine/Renderer/ProbesRenderer.cpp @@ -273,7 +273,7 @@ bool ProbesRenderer::Init() view.Mode = ViewMode::NoPostFx; view.IsOfflinePass = true; view.IsSingleFrame = true; - view.StaticFlagsMask = StaticFlags::ReflectionProbe; + view.StaticFlagsMask = view.StaticFlagsCompare = StaticFlags::ReflectionProbe; view.MaxShadowsQuality = Quality::Low; task->IsCameraCut = true; task->Resize(probeResolution, probeResolution); diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index 4039c5565..a3a99a3ee 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -569,7 +569,9 @@ void RenderList::AddDrawCall(const RenderContextBatch& renderContextBatch, DrawP const RenderContext& renderContext = renderContextBatch.Contexts.Get()[i]; ASSERT_LOW_LAYER(renderContext.View.Pass == DrawPass::Depth); drawModes = modes & renderContext.View.Pass; - if (drawModes != DrawPass::None && renderContext.View.CullingFrustum.Intersects(bounds)) + if (drawModes != DrawPass::None && + (staticFlags & renderContext.View.StaticFlagsMask) == renderContext.View.StaticFlagsCompare && + renderContext.View.CullingFrustum.Intersects(bounds)) { renderContext.List->ShadowDepthDrawCallsList.Indices.Add(index); } diff --git a/Source/Engine/ShadowsOfMordor/Builder.cpp b/Source/Engine/ShadowsOfMordor/Builder.cpp index b9a9305c4..80c59e225 100644 --- a/Source/Engine/ShadowsOfMordor/Builder.cpp +++ b/Source/Engine/ShadowsOfMordor/Builder.cpp @@ -457,7 +457,7 @@ bool ShadowsOfMordor::Builder::initResources() view.IsOfflinePass = true; view.Near = HEMISPHERES_NEAR_PLANE; view.Far = HEMISPHERES_FAR_PLANE; - view.StaticFlagsMask = StaticFlags::Lightmap; + view.StaticFlagsMask = view.StaticFlagsCompare = StaticFlags::Lightmap; view.MaxShadowsQuality = Quality::Low; _task->Resize(HEMISPHERES_RESOLUTION, HEMISPHERES_RESOLUTION); From 890b2da1089df6cbf11bf28a10d840b81bfd0f79 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 11 Apr 2024 15:35:18 +0200 Subject: [PATCH 027/292] Add **shadows caching for static geometry** --- Source/Engine/Renderer/ShadowsPass.cpp | 480 +++++++++++++++++++++---- Source/Engine/Renderer/ShadowsPass.h | 5 +- Source/Shaders/Quad.shader | 13 + 3 files changed, 433 insertions(+), 65 deletions(-) diff --git a/Source/Engine/Renderer/ShadowsPass.cpp b/Source/Engine/Renderer/ShadowsPass.cpp index a99a3fce3..32ec246aa 100644 --- a/Source/Engine/Renderer/ShadowsPass.cpp +++ b/Source/Engine/Renderer/ShadowsPass.cpp @@ -20,6 +20,7 @@ #define SHADOWS_MAX_TILES 6 #define SHADOWS_MIN_RESOLUTION 16 +#define SHADOWS_BASE_LIGHT_RESOLUTION(atlasResolution) atlasResolution / MAX_CSM_CASCADES // Allow to store 4 CSM cascades in a single row in all cases #define NormalOffsetScaleTweak 100.0f #define LocalLightNearPlane 10.0f @@ -61,12 +62,14 @@ uint16 QuantizeResolution(float input) struct ShadowAtlasLightTile { ShadowsAtlasRectTile* RectTile; + ShadowsAtlasRectTile* StaticRectTile; Matrix WorldToShadow; float FramesToUpdate; // Amount of frames (with fraction) until the next shadow update can happen bool SkipUpdate; + bool HasStaticGeometry; Viewport CachedViewport; // The viewport used the last time to render shadow to the atlas - void Free(ShadowsCustomBuffer* buffer) + void FreeDynamic(ShadowsCustomBuffer* buffer) { if (RectTile) { @@ -75,6 +78,28 @@ struct ShadowAtlasLightTile } } + void FreeStatic(ShadowsCustomBuffer* buffer) + { + if (StaticRectTile) + { + StaticRectTile->Free((ShadowsCustomBuffer*)nullptr); + StaticRectTile = nullptr; + } + } + + void Free(ShadowsCustomBuffer* buffer) + { + FreeDynamic(buffer); + FreeStatic(buffer); + } + + void ClearDynamic() + { + RectTile = nullptr; + FramesToUpdate = 0; + SkipUpdate = false; + } + void SetWorldToShadow(const Matrix& shadowViewProjection) { // Transform Clip Space [-1,+1]^2 to UV Space [0,1]^2 (saves MAD instruction in shader) @@ -92,9 +117,11 @@ struct ShadowAtlasLightTile // State for shadow cache sed to invalidate any prerendered shadow depths struct ShadowAtlasLightCache { - bool Valid; + bool StaticValid; + bool DynamicValid; float ShadowsUpdateRate; float ShadowsUpdateRateAtDistance; + float OuterConeAngle; Float3 Position; float Radius; Float3 Direction; @@ -103,15 +130,16 @@ struct ShadowAtlasLightCache void Set(const RenderView& view, const RenderLightData& light, const Float4& cascadeSplits = Float4::Zero) { - Valid = true; + StaticValid = true; + DynamicValid = true; Distance = light.ShadowsDistance; ShadowsUpdateRate = light.ShadowsUpdateRate; ShadowsUpdateRateAtDistance = light.ShadowsUpdateRateAtDistance; + Direction = light.Direction; if (light.IsDirectionalLight) { // Sun Position = view.Position; - Direction = light.Direction; CascadeSplits = cascadeSplits; } else @@ -120,6 +148,8 @@ struct ShadowAtlasLightCache const auto& localLight = (const RenderLocalLightData&)light; Position = light.Position; Radius = localLight.Radius; + if (light.IsSpotLight) + OuterConeAngle = ((const RenderSpotLightData&)light).OuterConeAngle; } } }; @@ -127,12 +157,37 @@ struct ShadowAtlasLightCache // State for light's shadows rendering struct ShadowAtlasLight { + // Static shadow map is created in 2 passes: + // - once to check if any static objects are in-use per tile (ShadowAtlasLightTile::HasStaticGeometry) + // - then to render those objects into the shadow map. + // When any static objects gets modified in the light range the second step is repeated. + // When light is changed then both steps are repeated. + enum StaticStates + { + // Not using static shadow map at all. + Unused, + // Static objects are rendered separately to dynamic objects to check if light projections need to allocate static shadow map. + WaitForGeometryCheck, + // Static objects will be rendered into static shadow map. + UpdateStaticShadow, + // Static objects are up-to-date and can be copied from static shadow map. + CopyStaticShadow, + // None of the tiles has static geometry nearby. + NoStaticGeometry, + // One of the tiles failed to insert into static atlas so fallback to default dynamic logic. + FailedToInsertTiles, + }; + uint64 LastFrameUsed; int32 ContextIndex; int32 ContextCount; uint16 Resolution; + uint16 StaticResolution; uint8 TilesNeeded; uint8 TilesCount; + bool HasStaticShadowContext; + StaticStates StaticState; + BoundingSphere Bounds; float Sharpness, Fade, NormalOffsetScale, Bias, FadeDistance, Distance, TileBorder; Float4 CascadeSplits; ShadowAtlasLightTile Tiles[SHADOWS_MAX_TILES]; @@ -145,6 +200,16 @@ struct ShadowAtlasLight POD_COPYABLE(ShadowAtlasLight); + bool HasStaticGeometry() const + { + for (auto& tile : Tiles) + { + if (tile.HasStaticGeometry) + return true; + } + return false; + } + float CalculateUpdateRateInv(const RenderLightData& light, float distanceFromView, bool& freezeUpdate) const { const float shadowsUpdateRate = light.ShadowsUpdateRate; @@ -159,24 +224,24 @@ struct ShadowAtlasLight void ValidateCache(const RenderView& view, const RenderLightData& light) { - if (!Cache.Valid) + if (!Cache.StaticValid || !Cache.DynamicValid) return; if (!Math::NearEqual(Cache.Distance, light.ShadowsDistance) || !Math::NearEqual(Cache.ShadowsUpdateRate, light.ShadowsUpdateRate) || - !Math::NearEqual(Cache.ShadowsUpdateRateAtDistance, light.ShadowsUpdateRateAtDistance)) + !Math::NearEqual(Cache.ShadowsUpdateRateAtDistance, light.ShadowsUpdateRateAtDistance) || + Float3::Dot(Cache.Direction, light.Direction) < 0.999999f) { // Invalidate - Cache.Valid = false; + Cache.StaticValid = false; } if (light.IsDirectionalLight) { // Sun - if (Float3::Dot(Cache.Direction, light.Direction) < 0.999999f || - !Float3::NearEqual(Cache.Position, view.Position, 1.0f) || + if (!Float3::NearEqual(Cache.Position, view.Position, 1.0f) || !Float4::NearEqual(Cache.CascadeSplits, CascadeSplits)) { // Invalidate - Cache.Valid = false; + Cache.StaticValid = false; } } else @@ -187,33 +252,43 @@ struct ShadowAtlasLight !Math::NearEqual(Cache.Radius, localLight.Radius)) { // Invalidate - Cache.Valid = false; + Cache.StaticValid = false; + } + if (light.IsSpotLight && !Math::NearEqual(Cache.OuterConeAngle, ((const RenderSpotLightData&)light).OuterConeAngle)) + { + // Invalidate + Cache.StaticValid = false; } } - for (int32 i = 0; i < TilesCount && Cache.Valid; i++) + Cache.DynamicValid &= Cache.StaticValid; + for (int32 i = 0; i < TilesCount && !Cache.DynamicValid; i++) { auto& tile = Tiles[i]; if (tile.CachedViewport != Viewport(tile.RectTile->X, tile.RectTile->Y, tile.RectTile->Width, tile.RectTile->Height)) { // Invalidate - Cache.Valid = false; + Cache.DynamicValid = false; } } } }; -class ShadowsCustomBuffer : public RenderBuffers::CustomBuffer +class ShadowsCustomBuffer : public RenderBuffers::CustomBuffer, public ISceneRenderingListener { public: int32 MaxShadowsQuality = 0; int32 Resolution = 0; int32 AtlasPixelsUsed = 0; + bool EnableStaticShadows = true; mutable bool ClearShadowMapAtlas = true; + mutable bool ClearStaticShadowMapAtlas = false; Vector3 ViewOrigin; GPUTexture* ShadowMapAtlas = nullptr; + GPUTexture* StaticShadowMapAtlas = nullptr; DynamicTypedBuffer ShadowsBuffer; GPUBufferView* ShadowsBufferView = nullptr; ShadowsAtlasRectTile* AtlasTiles = nullptr; // TODO: optimize with a single allocation for atlas tiles + ShadowsAtlasRectTile* StaticAtlasTiles = nullptr; // TODO: optimize with a single allocation for atlas tiles Dictionary Lights; ShadowsCustomBuffer() @@ -222,41 +297,106 @@ public: ShadowMapAtlas = GPUDevice::Instance->CreateTexture(TEXT("Shadow Map Atlas")); } - void ClearTiles() + void ClearDynamic() { ClearShadowMapAtlas = true; AtlasPixelsUsed = 0; - SAFE_DELETE(AtlasTiles); for (auto it = Lights.Begin(); it.IsNotEnd(); ++it) { auto& atlasLight = it->Value; - Platform::MemoryClear(atlasLight.Tiles, sizeof(atlasLight.Tiles)); - Platform::MemoryClear(&atlasLight.Cache, sizeof(atlasLight.Cache)); + atlasLight.Cache.DynamicValid = false; + for (int32 i = 0; i < atlasLight.TilesCount; i++) + atlasLight.Tiles[i].ClearDynamic(); } + SAFE_DELETE(AtlasTiles); } void Reset() { Lights.Clear(); - ClearTiles(); + SAFE_DELETE(StaticAtlasTiles); + ClearDynamic(); ViewOrigin = Vector3::Zero; } + void InitStaticAtlas() + { + if (StaticAtlasTiles) + return; + const int32 atlasResolution = Resolution * 2; + StaticAtlasTiles = New(0, 0, atlasResolution, atlasResolution); + if (!StaticShadowMapAtlas) + StaticShadowMapAtlas = GPUDevice::Instance->CreateTexture(TEXT("Static Shadow Map Atlas")); + auto desc = ShadowMapAtlas->GetDescription(); + desc.Width = desc.Height = atlasResolution; + if (StaticShadowMapAtlas->Init(desc)) + { + LOG(Fatal, "Failed to setup shadow map of size {0}x{1} and format {2}", desc.Width, desc.Height, ScriptingEnum::ToString(desc.Format)); + return; + } + ClearStaticShadowMapAtlas = true; + } + + void DirtyStaticBounds(const BoundingSphere& bounds) + { + // TODO: use octree to improve bounds-testing + // TODO: build list of modified bounds and dirty them in batch on next frame start (ideally in async within shadows setup job) + for (auto& e : Lights) + { + auto& atlasLight = e.Value; + if (atlasLight.StaticState == ShadowAtlasLight::CopyStaticShadow && atlasLight.Bounds.Intersects(bounds)) + { + // Invalidate static shadow + atlasLight.Cache.StaticValid = false; + } + } + } + ~ShadowsCustomBuffer() { Reset(); SAFE_DELETE_GPU_RESOURCE(ShadowMapAtlas); + SAFE_DELETE_GPU_RESOURCE(StaticShadowMapAtlas); + } + + // [ISceneRenderingListener] + void OnSceneRenderingAddActor(Actor* a) override + { + if (a->HasStaticFlag(StaticFlags::Shadow)) + DirtyStaticBounds(a->GetSphere()); + } + + void OnSceneRenderingUpdateActor(Actor* a, const BoundingSphere& prevBounds) override + { + // Dirty static objects to redraw when changed (eg. material modification) + if (a->HasStaticFlag(StaticFlags::Shadow)) + { + DirtyStaticBounds(prevBounds); + DirtyStaticBounds(a->GetSphere()); + } + } + + void OnSceneRenderingRemoveActor(Actor* a) override + { + if (a->HasStaticFlag(StaticFlags::Shadow)) + DirtyStaticBounds(a->GetSphere()); + } + + void OnSceneRenderingClear(SceneRendering* scene) override + { } }; void ShadowsAtlasRectTile::OnInsert(ShadowsCustomBuffer* buffer) { - buffer->AtlasPixelsUsed += (int32)Width * (int32)Height; + if (buffer) + buffer->AtlasPixelsUsed += (int32)Width * (int32)Height; } void ShadowsAtlasRectTile::OnFree(ShadowsCustomBuffer* buffer) { - buffer->AtlasPixelsUsed -= (int32)Width * (int32)Height; + if (buffer) + buffer->AtlasPixelsUsed -= (int32)Width * (int32)Height; } String ShadowsPass::ToString() const @@ -353,11 +493,23 @@ bool ShadowsPass::setupResources() if (_psDepthClear->Init(psDesc)) return true; } + if (_psDepthCopy == nullptr) + { + psDesc = GPUPipelineState::Description::DefaultFullscreenTriangle; + psDesc.PS = GPUDevice::Instance->QuadShader->GetPS("PS_DepthCopy"); + psDesc.DepthEnable = true; + psDesc.DepthWriteEnable = true; + psDesc.DepthFunc = ComparisonFunc::Always; + psDesc.BlendMode.RenderTargetWriteMask = BlendingMode::ColorWrite::None; + _psDepthCopy = GPUDevice::Instance->CreatePipelineState(); + if (_psDepthCopy->Init(psDesc)) + return true; + } return false; } -void ShadowsPass::SetupRenderContext(RenderContext& renderContext, RenderContext& shadowContext) +void ShadowsPass::SetupRenderContext(RenderContext& renderContext, RenderContext& shadowContext, ShadowAtlasLight* atlasLight, RenderContext* dynamicContext) { const auto& view = renderContext.View; @@ -366,14 +518,31 @@ void ShadowsPass::SetupRenderContext(RenderContext& renderContext, RenderContext // Prepare properties auto& shadowView = shadowContext.View; - shadowView.Flags = view.Flags; - shadowView.StaticFlagsMask = view.StaticFlagsMask; - shadowView.RenderLayersMask = view.RenderLayersMask; - shadowView.IsOfflinePass = view.IsOfflinePass; - shadowView.ModelLODBias = view.ModelLODBias; - shadowView.ModelLODDistanceFactor = view.ModelLODDistanceFactor; - shadowView.Pass = DrawPass::Depth; - shadowView.Origin = view.Origin; + if (dynamicContext) + { + // Duplicate dynamic view but with static only geometry + shadowView = dynamicContext->View; + shadowView.StaticFlagsMask = StaticFlags::Shadow; + shadowView.StaticFlagsCompare = StaticFlags::Shadow; + } + else + { + shadowView.Flags = view.Flags; + shadowView.StaticFlagsMask = view.StaticFlagsMask; + shadowView.StaticFlagsCompare = view.StaticFlagsCompare; + shadowView.RenderLayersMask = view.RenderLayersMask; + shadowView.IsOfflinePass = view.IsOfflinePass; + shadowView.ModelLODBias = view.ModelLODBias; + shadowView.ModelLODDistanceFactor = view.ModelLODDistanceFactor; + shadowView.Pass = DrawPass::Depth; + shadowView.Origin = view.Origin; + if (atlasLight && atlasLight->StaticState != ShadowAtlasLight::Unused && atlasLight->StaticState != ShadowAtlasLight::FailedToInsertTiles) + { + // Draw only dynamic geometry + shadowView.StaticFlagsMask = StaticFlags::Shadow; + shadowView.StaticFlagsCompare = StaticFlags::None; + } + } shadowContext.List = RenderList::GetFromPool(); shadowContext.Buffers = renderContext.Buffers; shadowContext.Task = renderContext.Task; @@ -389,11 +558,14 @@ void ShadowsPass::SetupLight(ShadowsCustomBuffer& shadows, RenderContext& render atlasLight.Bias = light.ShadowsDepthBias; atlasLight.FadeDistance = Math::Max(light.ShadowsFadeDistance, 0.1f); atlasLight.Distance = Math::Min(renderContext.View.Far, light.ShadowsDistance); + atlasLight.Bounds.Center = light.Position + renderContext.View.Position; + atlasLight.Bounds.Radius = 0.0f; } bool ShadowsPass::SetupLight(ShadowsCustomBuffer& shadows, RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderLocalLightData& light, ShadowAtlasLight& atlasLight) { SetupLight(shadows, renderContext, renderContextBatch, (RenderLightData&)light, atlasLight); + atlasLight.Bounds.Radius = light.Radius; // Fade shadow on distance const float fadeDistance = Math::Max(light.ShadowsFadeDistance, 0.1f); @@ -404,11 +576,83 @@ bool ShadowsPass::SetupLight(ShadowsCustomBuffer& shadows, RenderContext& render // Update cached state (invalidate it if the light changed) atlasLight.ValidateCache(renderContext.View, light); + // Update static shadow logic + atlasLight.HasStaticShadowContext = shadows.EnableStaticShadows && EnumHasAllFlags(light.StaticFlags, StaticFlags::Shadow); + if (!atlasLight.HasStaticShadowContext) + atlasLight.StaticState = ShadowAtlasLight::Unused; + switch (atlasLight.StaticState) + { + case ShadowAtlasLight::Unused: + if (atlasLight.HasStaticShadowContext) + atlasLight.StaticState = ShadowAtlasLight::WaitForGeometryCheck; + break; + case ShadowAtlasLight::WaitForGeometryCheck: + if (atlasLight.HasStaticGeometry()) + { + // Calculate static resolution for the light based on the world-bounds, not view-dependant + shadows.InitStaticAtlas(); + const int32 baseLightResolution = SHADOWS_BASE_LIGHT_RESOLUTION(shadows.Resolution); + int32 staticResolution = Math::RoundToInt(Math::Saturate(light.Radius / 1000.0f) * baseLightResolution); + if (!Math::IsPowerOfTwo(staticResolution)) + staticResolution = Math::RoundUpToPowerOf2(staticResolution); + atlasLight.StaticResolution = staticResolution; + + // Allocate static shadow map slot for all used tiles + for (int32 tileIndex = 0; tileIndex < atlasLight.TilesCount; tileIndex++) + { + auto& tile = atlasLight.Tiles[tileIndex]; + if (tile.StaticRectTile == nullptr) + { + tile.StaticRectTile = shadows.StaticAtlasTiles->Insert(atlasLight.StaticResolution, atlasLight.StaticResolution, 0, (ShadowsCustomBuffer*)nullptr); + if (!tile.StaticRectTile) + { + // Failed to insert tile to switch back to the default rendering + atlasLight.StaticState = ShadowAtlasLight::FailedToInsertTiles; + for (int32 i = 0; i < tileIndex; i++) + atlasLight.Tiles[i].FreeStatic(&shadows); + break; + } + } + } + if (atlasLight.StaticState == ShadowAtlasLight::WaitForGeometryCheck) + { + // Now we know the tiles with static geometry and we can render those + atlasLight.StaticState = ShadowAtlasLight::UpdateStaticShadow; + } + } + else + { + // Not using static geometry for this light shadows + atlasLight.StaticState = ShadowAtlasLight::NoStaticGeometry; + } + break; + case ShadowAtlasLight::CopyStaticShadow: + // Light was modified so update the static shadows + if (!atlasLight.Cache.StaticValid && atlasLight.HasStaticShadowContext) + atlasLight.StaticState = ShadowAtlasLight::UpdateStaticShadow; + break; + } + switch (atlasLight.StaticState) + { + case ShadowAtlasLight::CopyStaticShadow: + case ShadowAtlasLight::NoStaticGeometry: + case ShadowAtlasLight::FailedToInsertTiles: + // Skip collecting static draws + atlasLight.HasStaticShadowContext = false; + break; + } + if (atlasLight.HasStaticShadowContext) + { + // If rendering finds any static draws then it's set to true + for (auto& tile : atlasLight.Tiles) + tile.HasStaticGeometry = false; + } + // Calculate update rate based on the distance to the view bool freezeUpdate; const float updateRateInv = atlasLight.CalculateUpdateRateInv(light, dstLightToView, freezeUpdate); float& framesToUpdate = atlasLight.Tiles[0].FramesToUpdate; // Use the first tile for all local light projections to be in sync - if ((framesToUpdate > 0.0f || freezeUpdate) && atlasLight.Cache.Valid) + if ((framesToUpdate > 0.0f || freezeUpdate) && atlasLight.Cache.DynamicValid && !atlasLight.HasStaticShadowContext) { // Light state matches the cached state and the update rate allows us to reuse the cached shadow map so skip update if (!freezeUpdate) @@ -512,7 +756,7 @@ void ShadowsPass::SetupLight(ShadowsCustomBuffer& shadows, RenderContext& render bool freezeUpdate; const float updateRateInv = atlasLight.CalculateUpdateRateInv(light, dstToCascade, freezeUpdate); auto& tile = atlasLight.Tiles[cascadeIndex]; - if ((tile.FramesToUpdate > 0.0f || freezeUpdate) && atlasLight.Cache.Valid) + if ((tile.FramesToUpdate > 0.0f || freezeUpdate) && atlasLight.Cache.DynamicValid) { // Light state matches the cached state and the update rate allows us to reuse the cached shadow map so skip update if (!freezeUpdate) @@ -639,14 +883,16 @@ void ShadowsPass::SetupLight(ShadowsCustomBuffer& shadows, RenderContext& render Matrix borderScaleMatrix; Matrix::Scaling(borderScale, borderScale, 1.0f, borderScaleMatrix); - // Render depth to all 6 faces of the cube map atlasLight.ContextIndex = renderContextBatch.Contexts.Count(); - atlasLight.ContextCount = 6; + atlasLight.ContextCount = atlasLight.HasStaticShadowContext ? 12 : 6; renderContextBatch.Contexts.AddDefault(atlasLight.ContextCount); + + // Render depth to all 6 faces of the cube map + int32 contextIndex = 0; for (int32 faceIndex = 0; faceIndex < 6; faceIndex++) { - auto& shadowContext = renderContextBatch.Contexts[atlasLight.ContextIndex + faceIndex]; - SetupRenderContext(renderContext, shadowContext); + auto& shadowContext = renderContextBatch.Contexts[atlasLight.ContextIndex + contextIndex++]; + SetupRenderContext(renderContext, shadowContext, &atlasLight); shadowContext.View.SetUpCube(LocalLightNearPlane, light.Radius, light.Position); // Apply border to the projection matrix @@ -658,6 +904,13 @@ void ShadowsPass::SetupLight(ShadowsCustomBuffer& shadows, RenderContext& render const auto shadowMapsSize = (float)atlasLight.Resolution; shadowContext.View.PrepareCache(shadowContext, shadowMapsSize, shadowMapsSize, Float2::Zero, &renderContext.View); atlasLight.Tiles[faceIndex].SetWorldToShadow(shadowContext.View.ViewProjection()); + + // Draw static geometry separately to be cached + if (atlasLight.HasStaticShadowContext) + { + auto& shadowContextStatic = renderContextBatch.Contexts[atlasLight.ContextIndex + contextIndex++]; + SetupRenderContext(renderContext, shadowContextStatic, &atlasLight, &shadowContext); + } } } @@ -666,16 +919,23 @@ void ShadowsPass::SetupLight(ShadowsCustomBuffer& shadows, RenderContext& render if (SetupLight(shadows, renderContext, renderContextBatch, (RenderLocalLightData&)light, atlasLight)) return; - // Render depth to a single projection atlasLight.ContextIndex = renderContextBatch.Contexts.Count(); - atlasLight.ContextCount = 1; + atlasLight.ContextCount = atlasLight.HasStaticShadowContext ? 2 : 1; renderContextBatch.Contexts.AddDefault(atlasLight.ContextCount); + + // Render depth to a single projection auto& shadowContext = renderContextBatch.Contexts[atlasLight.ContextIndex]; - SetupRenderContext(renderContext, shadowContext); + SetupRenderContext(renderContext, shadowContext, &atlasLight); shadowContext.View.SetProjector(LocalLightNearPlane, light.Radius, light.Position, light.Direction, light.UpVector, light.OuterConeAngle * 2.0f); - const auto shadowMapsSize = (float)atlasLight.Resolution; - shadowContext.View.PrepareCache(shadowContext, shadowMapsSize, shadowMapsSize, Float2::Zero, &renderContext.View); + shadowContext.View.PrepareCache(shadowContext, atlasLight.Resolution, atlasLight.Resolution, Float2::Zero, &renderContext.View); atlasLight.Tiles[0].SetWorldToShadow(shadowContext.View.ViewProjection()); + + // Draw static geometry separately to be cached + if (atlasLight.HasStaticShadowContext) + { + auto& shadowContextStatic = renderContextBatch.Contexts[atlasLight.ContextIndex + 1]; + SetupRenderContext(renderContext, shadowContextStatic, &atlasLight, &shadowContext); + } } void ShadowsPass::Dispose() @@ -690,6 +950,7 @@ void ShadowsPass::Dispose() _shader = nullptr; _sphereModel = nullptr; SAFE_DELETE_GPU_RESOURCE(_psDepthClear); + SAFE_DELETE_GPU_RESOURCE(_psDepthCopy); } void ShadowsPass::SetupShadows(RenderContext& renderContext, RenderContextBatch& renderContextBatch) @@ -724,6 +985,7 @@ void ShadowsPass::SetupShadows(RenderContext& renderContext, RenderContextBatch& const auto currentFrame = Engine::FrameCount; shadows.LastFrameUsed = currentFrame; shadows.MaxShadowsQuality = Math::Clamp(Math::Min((int32)Graphics::ShadowsQuality, (int32)renderContext.View.MaxShadowsQuality), 0, (int32)Quality::MAX - 1); + shadows.EnableStaticShadows = !renderContext.View.IsOfflinePass && !renderContext.View.IsSingleFrame; int32 atlasResolution; switch (Graphics::ShadowMapsQuality) { @@ -742,7 +1004,6 @@ void ShadowsPass::SetupShadows(RenderContext& renderContext, RenderContextBatch& default: return; } - const int32 baseLightResolution = atlasResolution / MAX_CSM_CASCADES; // Allow to store 4 CSM cascades in a single row in all cases if (shadows.Resolution != atlasResolution) { shadows.Reset(); @@ -765,6 +1026,7 @@ void ShadowsPass::SetupShadows(RenderContext& renderContext, RenderContextBatch& shadows.AtlasTiles = New(0, 0, atlasResolution, atlasResolution); // Update/add lights + const int32 baseLightResolution = SHADOWS_BASE_LIGHT_RESOLUTION(atlasResolution); for (const RenderLightData* light : shadowedLights) { auto& atlasLight = shadows.Lights[light->ID]; @@ -846,8 +1108,9 @@ RETRY_ATLAS_SETUP: continue; // Remove existing tiles + atlasLight.Cache.DynamicValid = false; for (ShadowAtlasLightTile& tile : atlasLight.Tiles) - tile.Free(&shadows); + tile.FreeDynamic(&shadows); } // Insert tiles into the atlas (already sorted to favor the first ones) @@ -866,7 +1129,7 @@ RETRY_ATLAS_SETUP: { // Free any previous tiles that were added for (int32 i = 0; i < tileIndex; i++) - atlasLight.Tiles[i].Free(&shadows); + atlasLight.Tiles[i].FreeDynamic(&shadows); failedToInsert = true; break; } @@ -887,7 +1150,7 @@ RETRY_ATLAS_SETUP: } // Rebuild atlas - shadows.ClearTiles(); + shadows.ClearDynamic(); shadows.AtlasTiles = New(0, 0, atlasResolution, atlasResolution); goto RETRY_ATLAS_SETUP; } @@ -901,7 +1164,9 @@ RETRY_ATLAS_SETUP: { // Invalidate cache when whole atlas will be cleared if (shadows.ClearShadowMapAtlas) - atlasLight.Cache.Valid = false; + atlasLight.Cache.DynamicValid = false; + if (shadows.ClearStaticShadowMapAtlas) + atlasLight.Cache.StaticValid = false; light->HasShadow = true; atlasLight.TilesCount = atlasLight.TilesNeeded; @@ -913,6 +1178,12 @@ RETRY_ATLAS_SETUP: SetupLight(shadows, renderContext, renderContextBatch, *(RenderDirectionalLightData*)light, atlasLight); } } + if (shadows.StaticAtlasTiles) + { + // Register for active scenes changes to invalidate static shadows + for (SceneRendering* scene : renderContext.List->Scenes) + shadows.ListenSceneRendering(scene); + } #undef IS_LIGHT_TILE_REUSABLE @@ -970,41 +1241,110 @@ void ShadowsPass::RenderShadowMaps(RenderContextBatch& renderContextBatch) const ShadowsCustomBuffer& shadows = *shadowsPtr; GPUContext* context = GPUDevice::Instance->GetMainContext(); context->ResetSR(); - context->SetRenderTarget(shadows.ShadowMapAtlas->View(), (GPUTextureView*)nullptr); - GPUConstantBuffer* quadShaderCB; - if (shadows.ClearShadowMapAtlas) + GPUConstantBuffer* quadShaderCB = GPUDevice::Instance->QuadShader->GetCB(0); + QuadShaderData quadShaderData; + + // Update static shadows + if (shadows.StaticShadowMapAtlas) { - context->ClearDepth(shadows.ShadowMapAtlas->View()); - } - else - { - QuadShaderData quadShaderData; - quadShaderData.Color = Float4::One; // Color.r is used by PS_DepthClear in Quad shader to clear depth - quadShaderCB = GPUDevice::Instance->QuadShader->GetCB(0); - context->UpdateCB(quadShaderCB, &quadShaderData); + PROFILE_GPU_CPU("Static"); + if (shadows.ClearStaticShadowMapAtlas) + context->ClearDepth(shadows.StaticShadowMapAtlas->View()); + bool renderedAny = false; + for (auto& e : shadows.Lights) + { + ShadowAtlasLight& atlasLight = e.Value; + if (atlasLight.StaticState != ShadowAtlasLight::UpdateStaticShadow || !atlasLight.HasStaticShadowContext) + continue; + int32 contextIndex = 0; + for (int32 tileIndex = 0; tileIndex < atlasLight.TilesCount; tileIndex++) + { + ShadowAtlasLightTile& tile = atlasLight.Tiles[tileIndex]; + if (!tile.RectTile) + break; + if (!tile.StaticRectTile) + continue; + if (!renderedAny) + { + renderedAny = true; + context->SetRenderTarget(shadows.StaticShadowMapAtlas->View(), (GPUTextureView*)nullptr); + } + + // Set viewport for tile + context->SetViewportAndScissors(Viewport(tile.StaticRectTile->X, tile.StaticRectTile->Y, tile.StaticRectTile->Width, tile.StaticRectTile->Height)); + if (!shadows.ClearStaticShadowMapAtlas) + { + // Color.r is used by PS_DepthClear in Quad shader to clear depth + quadShaderData.Color = Float4::One; + context->UpdateCB(quadShaderCB, &quadShaderData); + context->BindCB(0, quadShaderCB); + + // Clear tile depth + context->SetState(_psDepthClear); + context->DrawFullscreenTriangle(); + } + + // Draw objects depth + contextIndex++; // Skip dynamic context + auto& shadowContextStatic = renderContextBatch.Contexts[atlasLight.ContextIndex + contextIndex++]; + if (!shadowContextStatic.List->DrawCallsLists[(int32)DrawCallsListType::Depth].IsEmpty() || !shadowContextStatic.List->ShadowDepthDrawCallsList.IsEmpty()) + { + shadowContextStatic.List->ExecuteDrawCalls(shadowContextStatic, DrawCallsListType::Depth); + shadowContextStatic.List->ExecuteDrawCalls(shadowContextStatic, shadowContextStatic.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls, nullptr); + tile.HasStaticGeometry = true; + } + } + + // Go into copying shadow for the next draw + atlasLight.StaticState = ShadowAtlasLight::CopyStaticShadow; + } + shadows.ClearStaticShadowMapAtlas = false; + if (renderedAny) + { + context->ResetSR(); + context->ResetRenderTarget(); + } } // Render depth to all shadow map tiles + if (shadows.ClearShadowMapAtlas) + context->ClearDepth(shadows.ShadowMapAtlas->View()); + context->SetRenderTarget(shadows.ShadowMapAtlas->View(), (GPUTextureView*)nullptr); for (auto& e : shadows.Lights) { - const ShadowAtlasLight& atlasLight = e.Value; + ShadowAtlasLight& atlasLight = e.Value; int32 contextIndex = 0; for (int32 tileIndex = 0; tileIndex < atlasLight.TilesCount; tileIndex++) { - const ShadowAtlasLightTile& tile = atlasLight.Tiles[tileIndex]; + ShadowAtlasLightTile& tile = atlasLight.Tiles[tileIndex]; if (!tile.RectTile) break; if (tile.SkipUpdate) continue; // Set viewport for tile - ASSERT_LOW_LAYER(tile.CachedViewport == Viewport(tile.RectTile->X, tile.RectTile->Y, tile.RectTile->Width, tile.RectTile->Height)); context->SetViewportAndScissors(tile.CachedViewport); - - if (!shadows.ClearShadowMapAtlas) + if (tile.StaticRectTile && atlasLight.StaticState == ShadowAtlasLight::CopyStaticShadow) { - // Clear tile depth + // Color.xyzw is used by PS_DepthCopy in Quad shader to scale input texture UVs + const float staticAtlasResolutionInv = 1.0f / shadows.StaticShadowMapAtlas->Width(); + quadShaderData.Color = Float4(tile.StaticRectTile->Width, tile.StaticRectTile->Height, tile.StaticRectTile->X, tile.StaticRectTile->Y) * staticAtlasResolutionInv; + context->UpdateCB(quadShaderCB, &quadShaderData); context->BindCB(0, quadShaderCB); + + // Copy tile depth + context->BindSR(0, shadows.StaticShadowMapAtlas->View()); + context->SetState(_psDepthCopy); + context->DrawFullscreenTriangle(); + } + else if (!shadows.ClearShadowMapAtlas) + { + // Color.r is used by PS_DepthClear in Quad shader to clear depth + quadShaderData.Color = Float4::One; + context->UpdateCB(quadShaderCB, &quadShaderData); + context->BindCB(0, quadShaderCB); + + // Clear tile depth context->SetState(_psDepthClear); context->DrawFullscreenTriangle(); } @@ -1013,6 +1353,20 @@ void ShadowsPass::RenderShadowMaps(RenderContextBatch& renderContextBatch) auto& shadowContext = renderContextBatch.Contexts[atlasLight.ContextIndex + contextIndex++]; shadowContext.List->ExecuteDrawCalls(shadowContext, DrawCallsListType::Depth); shadowContext.List->ExecuteDrawCalls(shadowContext, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls, nullptr); + if (atlasLight.HasStaticShadowContext) + { + auto& shadowContextStatic = renderContextBatch.Contexts[atlasLight.ContextIndex + contextIndex++]; + if (!shadowContextStatic.List->DrawCallsLists[(int32)DrawCallsListType::Depth].IsEmpty() || !shadowContextStatic.List->ShadowDepthDrawCallsList.IsEmpty()) + { + if (atlasLight.StaticState != ShadowAtlasLight::CopyStaticShadow) + { + // Draw static objects directly to the shadow map + shadowContextStatic.List->ExecuteDrawCalls(shadowContextStatic, DrawCallsListType::Depth); + shadowContextStatic.List->ExecuteDrawCalls(shadowContextStatic, shadowContextStatic.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls, nullptr); + } + tile.HasStaticGeometry = true; + } + } } } diff --git a/Source/Engine/Renderer/ShadowsPass.h b/Source/Engine/Renderer/ShadowsPass.h index 5e9421d2c..df647aac6 100644 --- a/Source/Engine/Renderer/ShadowsPass.h +++ b/Source/Engine/Renderer/ShadowsPass.h @@ -18,6 +18,7 @@ private: AssetReference _shader; AssetReference _sphereModel; GPUPipelineState* _psDepthClear = nullptr; + GPUPipelineState* _psDepthCopy = nullptr; GPUPipelineStatePermutationsPs(Quality::MAX) * 2> _psShadowDir; GPUPipelineStatePermutationsPs(Quality::MAX) * 2> _psShadowPoint; GPUPipelineStatePermutationsPs(Quality::MAX) * 2> _psShadowSpot; @@ -51,8 +52,8 @@ public: static void GetShadowAtlas(const RenderBuffers* renderBuffers, GPUTexture*& shadowMapAtlas, GPUBufferView*& shadowsBuffer); private: - static void SetupRenderContext(RenderContext& renderContext, RenderContext& shadowContext); - static void SetupLight(class ShadowsCustomBuffer& shadows, RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderLightData& light, struct ShadowAtlasLight& atlasLight); + static void SetupRenderContext(RenderContext& renderContext, RenderContext& shadowContext, struct ShadowAtlasLight* atlasLight = nullptr, RenderContext* dynamicContext = nullptr); + static void SetupLight(class ShadowsCustomBuffer& shadows, RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderLightData& light, ShadowAtlasLight& atlasLight); static bool SetupLight(ShadowsCustomBuffer& shadows, RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderLocalLightData& light, ShadowAtlasLight& atlasLight); static void SetupLight(ShadowsCustomBuffer& shadows, RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderDirectionalLightData& light, ShadowAtlasLight& atlasLight); static void SetupLight(ShadowsCustomBuffer& shadows, RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderPointLightData& light, ShadowAtlasLight& atlasLight); diff --git a/Source/Shaders/Quad.shader b/Source/Shaders/Quad.shader index da6fce92c..dcb401bc9 100644 --- a/Source/Shaders/Quad.shader +++ b/Source/Shaders/Quad.shader @@ -69,3 +69,16 @@ float PS_DepthClear(Quad_VS2PS input) : SV_Depth { return Color.r; } + +#ifdef _PS_DepthCopy + +Texture2D Source : register(t0); + +// Pixel Shader for copying depth buffer +META_PS(true, FEATURE_LEVEL_ES2) +float PS_DepthCopy(Quad_VS2PS input) : SV_Depth +{ + return Source.SampleLevel(SamplerPointClamp, input.TexCoord * Color.xy + Color.zw, 0).r; +} + +#endif From 4e65b76b8ce2c7e99113f15359c191654877babc Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 11 Apr 2024 15:58:34 +0200 Subject: [PATCH 028/292] Optimize `BoundingSphere.Intersects` to be inlined by the compiler --- Source/Engine/Core/Math/BoundingSphere.cpp | 6 +++++- Source/Engine/Graphics/Materials/MaterialShaderFeatures.cpp | 6 +++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/Source/Engine/Core/Math/BoundingSphere.cpp b/Source/Engine/Core/Math/BoundingSphere.cpp index 5b95ca0cc..dbd312dd2 100644 --- a/Source/Engine/Core/Math/BoundingSphere.cpp +++ b/Source/Engine/Core/Math/BoundingSphere.cpp @@ -51,7 +51,11 @@ bool BoundingSphere::Intersects(const BoundingBox& box) const bool BoundingSphere::Intersects(const BoundingSphere& sphere) const { - return CollisionsHelper::SphereIntersectsSphere(*this, sphere); + const Real radiisum = Radius + sphere.Radius; + const Real x = Center.X - sphere.Center.X; + const Real y = Center.Y - sphere.Center.Y; + const Real z = Center.Z - sphere.Center.Z; + return x * x + y * y + z * z <= radiisum * radiisum; } ContainmentType BoundingSphere::Contains(const Vector3& point) const diff --git a/Source/Engine/Graphics/Materials/MaterialShaderFeatures.cpp b/Source/Engine/Graphics/Materials/MaterialShaderFeatures.cpp index b6d7ef23a..c36fd9758 100644 --- a/Source/Engine/Graphics/Materials/MaterialShaderFeatures.cpp +++ b/Source/Engine/Graphics/Materials/MaterialShaderFeatures.cpp @@ -76,7 +76,7 @@ void ForwardShadingFeature::Bind(MaterialShader::BindParameters& params, SpanEnvironmentProbes.Count(); i++) { const RenderEnvironmentProbeData& probe = cache->EnvironmentProbes.Get()[i]; - if (CollisionsHelper::SphereIntersectsSphere(objectBounds, BoundingSphere(probe.Position, probe.Radius))) + if (objectBounds.Intersects(BoundingSphere(probe.Position, probe.Radius))) { noEnvProbe = false; probe.SetShaderData(data.EnvironmentProbe); @@ -96,7 +96,7 @@ void ForwardShadingFeature::Bind(MaterialShader::BindParameters& params, SpanPointLights.Count() && data.LocalLightsCount < MaxLocalLights; i++) { const auto& light = cache->PointLights[i]; - if (CollisionsHelper::SphereIntersectsSphere(objectBounds, BoundingSphere(light.Position, light.Radius))) + if (objectBounds.Intersects(BoundingSphere(light.Position, light.Radius))) { light.SetShaderData(data.LocalLights[data.LocalLightsCount], false); data.LocalLightsCount++; @@ -105,7 +105,7 @@ void ForwardShadingFeature::Bind(MaterialShader::BindParameters& params, SpanSpotLights.Count() && data.LocalLightsCount < MaxLocalLights; i++) { const auto& light = cache->SpotLights[i]; - if (CollisionsHelper::SphereIntersectsSphere(objectBounds, BoundingSphere(light.Position, light.Radius))) + if (objectBounds.Intersects(BoundingSphere(light.Position, light.Radius))) { light.SetShaderData(data.LocalLights[data.LocalLightsCount], false); data.LocalLightsCount++; From 803249f12624ec30bfb6cd8e4683af0d14805495 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 11 Apr 2024 16:19:35 +0200 Subject: [PATCH 029/292] Minor tweaks --- Source/Engine/Level/Actors/StaticModel.cpp | 6 +++--- Source/Engine/Utilities/RectPack.h | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Source/Engine/Level/Actors/StaticModel.cpp b/Source/Engine/Level/Actors/StaticModel.cpp index 07ff609bd..d85088a69 100644 --- a/Source/Engine/Level/Actors/StaticModel.cpp +++ b/Source/Engine/Level/Actors/StaticModel.cpp @@ -400,13 +400,13 @@ void StaticModel::Draw(RenderContextBatch& renderContextBatch) bool StaticModel::IntersectsItself(const Ray& ray, Real& distance, Vector3& normal) { bool result = false; - if (Model != nullptr && Model->IsLoaded()) { Mesh* mesh; - result = Model->Intersects(ray, _transform, distance, normal, &mesh); + Matrix world; + GetLocalToWorldMatrix(world); + result = Model->Intersects(ray, world, distance, normal, &mesh); } - return result; } diff --git a/Source/Engine/Utilities/RectPack.h b/Source/Engine/Utilities/RectPack.h index 5bef82c14..974295c0a 100644 --- a/Source/Engine/Utilities/RectPack.h +++ b/Source/Engine/Utilities/RectPack.h @@ -142,7 +142,8 @@ struct RectPack template void Free(Args&&...args) { - ASSERT(IsUsed); + if (!IsUsed) + return; IsUsed = false; ((NodeType*)this)->OnFree(Forward(args)...); } From a532ea7b4258970001f5bbf5da5b7d49dd03816b Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 11 Apr 2024 16:34:42 +0200 Subject: [PATCH 030/292] Add `InvalidateShadow` for manual shadow cache refresh --- Source/Engine/Level/Actors/DirectionalLight.cpp | 1 + Source/Engine/Level/Actors/Light.cpp | 5 +++++ Source/Engine/Level/Actors/Light.h | 8 ++++++++ Source/Engine/Level/Actors/PointLight.cpp | 1 + Source/Engine/Level/Actors/SpotLight.cpp | 1 + Source/Engine/Renderer/RenderList.h | 1 + Source/Engine/Renderer/ShadowsPass.cpp | 3 +++ 7 files changed, 20 insertions(+) diff --git a/Source/Engine/Level/Actors/DirectionalLight.cpp b/Source/Engine/Level/Actors/DirectionalLight.cpp index 3d198e042..cef1eba97 100644 --- a/Source/Engine/Level/Actors/DirectionalLight.cpp +++ b/Source/Engine/Level/Actors/DirectionalLight.cpp @@ -39,6 +39,7 @@ void DirectionalLight::Draw(RenderContext& renderContext) data.IndirectLightingIntensity = IndirectLightingIntensity; data.CastVolumetricShadow = CastVolumetricShadow; data.ShadowsUpdateRate = ShadowsUpdateRate; + data.ShadowFrame = _invalidateShadowFrame; data.ShadowsUpdateRateAtDistance = ShadowsUpdateRateAtDistance; data.ShadowsMode = ShadowsMode; data.CascadeCount = CascadeCount; diff --git a/Source/Engine/Level/Actors/Light.cpp b/Source/Engine/Level/Actors/Light.cpp index 6a0dfa726..81aeb2e4a 100644 --- a/Source/Engine/Level/Actors/Light.cpp +++ b/Source/Engine/Level/Actors/Light.cpp @@ -84,6 +84,11 @@ LightWithShadow::LightWithShadow(const SpawnParams& params) { } +void LightWithShadow::InvalidateShadow() +{ + _invalidateShadowFrame++; +} + void LightWithShadow::Serialize(SerializeStream& stream, const void* otherObj) { // Base diff --git a/Source/Engine/Level/Actors/Light.h b/Source/Engine/Level/Actors/Light.h index afe644c27..8fd40c530 100644 --- a/Source/Engine/Level/Actors/Light.h +++ b/Source/Engine/Level/Actors/Light.h @@ -79,6 +79,9 @@ public: API_CLASS(Abstract) class FLAXENGINE_API LightWithShadow : public Light { DECLARE_SCENE_OBJECT_ABSTRACT(LightWithShadow); +protected: + uint32 _invalidateShadowFrame = 0; + public: /// /// The minimum roughness value used to clamp material surface roughness during shading pixel. @@ -145,6 +148,11 @@ public: /// API_FIELD(Attributes="EditorOrder(60), EditorDisplay(\"Shadow\", \"Mode\")") ShadowsCastingMode ShadowsMode = ShadowsCastingMode::All; + + /// + /// Marks the light shadow to be refreshes during next drawing. Invalidates any cached shadow map and redraws static shadows of the object (if any in use). + /// + API_FUNCTION() void InvalidateShadow(); public: // [Light] diff --git a/Source/Engine/Level/Actors/PointLight.cpp b/Source/Engine/Level/Actors/PointLight.cpp index 626008a54..5f2be645b 100644 --- a/Source/Engine/Level/Actors/PointLight.cpp +++ b/Source/Engine/Level/Actors/PointLight.cpp @@ -105,6 +105,7 @@ void PointLight::Draw(RenderContext& renderContext) data.CastVolumetricShadow = CastVolumetricShadow; data.ShadowsUpdateRate = ShadowsUpdateRate; data.ShadowsUpdateRateAtDistance = ShadowsUpdateRateAtDistance; + data.ShadowFrame = _invalidateShadowFrame; data.ShadowsMode = ShadowsMode; data.Radius = radius; data.FallOffExponent = FallOffExponent; diff --git a/Source/Engine/Level/Actors/SpotLight.cpp b/Source/Engine/Level/Actors/SpotLight.cpp index f2b6046ee..892bdf02e 100644 --- a/Source/Engine/Level/Actors/SpotLight.cpp +++ b/Source/Engine/Level/Actors/SpotLight.cpp @@ -155,6 +155,7 @@ void SpotLight::Draw(RenderContext& renderContext) data.CastVolumetricShadow = CastVolumetricShadow; data.ShadowsUpdateRate = ShadowsUpdateRate; data.ShadowsUpdateRateAtDistance = ShadowsUpdateRateAtDistance; + data.ShadowFrame = _invalidateShadowFrame; data.ShadowsMode = ShadowsMode; data.Radius = radius; data.FallOffExponent = FallOffExponent; diff --git a/Source/Engine/Renderer/RenderList.h b/Source/Engine/Renderer/RenderList.h index a58eeb0c4..0fe3bc960 100644 --- a/Source/Engine/Renderer/RenderList.h +++ b/Source/Engine/Renderer/RenderList.h @@ -58,6 +58,7 @@ struct RenderLightData float ShadowsUpdateRate; float ShadowsUpdateRateAtDistance; + uint32 ShadowFrame; bool CanRenderShadow(const RenderView& view) const; }; diff --git a/Source/Engine/Renderer/ShadowsPass.cpp b/Source/Engine/Renderer/ShadowsPass.cpp index 32ec246aa..d6a40238a 100644 --- a/Source/Engine/Renderer/ShadowsPass.cpp +++ b/Source/Engine/Renderer/ShadowsPass.cpp @@ -121,6 +121,7 @@ struct ShadowAtlasLightCache bool DynamicValid; float ShadowsUpdateRate; float ShadowsUpdateRateAtDistance; + uint32 ShadowFrame; float OuterConeAngle; Float3 Position; float Radius; @@ -136,6 +137,7 @@ struct ShadowAtlasLightCache ShadowsUpdateRate = light.ShadowsUpdateRate; ShadowsUpdateRateAtDistance = light.ShadowsUpdateRateAtDistance; Direction = light.Direction; + ShadowFrame = light.ShadowFrame; if (light.IsDirectionalLight) { // Sun @@ -229,6 +231,7 @@ struct ShadowAtlasLight if (!Math::NearEqual(Cache.Distance, light.ShadowsDistance) || !Math::NearEqual(Cache.ShadowsUpdateRate, light.ShadowsUpdateRate) || !Math::NearEqual(Cache.ShadowsUpdateRateAtDistance, light.ShadowsUpdateRateAtDistance) || + Cache.ShadowFrame != light.ShadowFrame || Float3::Dot(Cache.Direction, light.Direction) < 0.999999f) { // Invalidate From 62444315dee14e00e372292be3f1594ae931ce9c Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 11 Apr 2024 16:38:43 +0200 Subject: [PATCH 031/292] Add `METERS_TO_UNITS` and impl metric units in shadows rendering --- Source/Engine/Engine/Units.h | 5 +++++ Source/Engine/Renderer/ShadowsPass.cpp | 9 +++++---- 2 files changed, 10 insertions(+), 4 deletions(-) create mode 100644 Source/Engine/Engine/Units.h diff --git a/Source/Engine/Engine/Units.h b/Source/Engine/Engine/Units.h new file mode 100644 index 000000000..6ddd7273e --- /dev/null +++ b/Source/Engine/Engine/Units.h @@ -0,0 +1,5 @@ +// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. + +#pragma once + +#define METERS_TO_UNITS(meters) (meters * 100.0f) diff --git a/Source/Engine/Renderer/ShadowsPass.cpp b/Source/Engine/Renderer/ShadowsPass.cpp index d6a40238a..1d9ad83fe 100644 --- a/Source/Engine/Renderer/ShadowsPass.cpp +++ b/Source/Engine/Renderer/ShadowsPass.cpp @@ -10,6 +10,7 @@ #include "Engine/Graphics/PixelFormatExtensions.h" #include "Engine/Content/Content.h" #include "Engine/Engine/Engine.h" +#include "Engine/Engine/Units.h" #include "Engine/Graphics/RenderTools.h" #include "Engine/Level/Scene/SceneRendering.h" #include "Engine/Scripting/Enums.h" @@ -21,8 +22,8 @@ #define SHADOWS_MAX_TILES 6 #define SHADOWS_MIN_RESOLUTION 16 #define SHADOWS_BASE_LIGHT_RESOLUTION(atlasResolution) atlasResolution / MAX_CSM_CASCADES // Allow to store 4 CSM cascades in a single row in all cases -#define NormalOffsetScaleTweak 100.0f -#define LocalLightNearPlane 10.0f +#define NormalOffsetScaleTweak METERS_TO_UNITS(1) +#define LocalLightNearPlane METERS_TO_UNITS(0.1f) PACK_STRUCT(struct Data{ ShaderGBufferData GBuffer; @@ -251,7 +252,7 @@ struct ShadowAtlasLight { // Local light const auto& localLight = (const RenderLocalLightData&)light; - if (!Float3::NearEqual(Cache.Position, light.Position, 1.0f) || + if (!Float3::NearEqual(Cache.Position, light.Position, METERS_TO_UNITS(0.1f)) || !Math::NearEqual(Cache.Radius, localLight.Radius)) { // Invalidate @@ -595,7 +596,7 @@ bool ShadowsPass::SetupLight(ShadowsCustomBuffer& shadows, RenderContext& render // Calculate static resolution for the light based on the world-bounds, not view-dependant shadows.InitStaticAtlas(); const int32 baseLightResolution = SHADOWS_BASE_LIGHT_RESOLUTION(shadows.Resolution); - int32 staticResolution = Math::RoundToInt(Math::Saturate(light.Radius / 1000.0f) * baseLightResolution); + int32 staticResolution = Math::RoundToInt(Math::Saturate(light.Radius / METERS_TO_UNITS(10)) * baseLightResolution); if (!Math::IsPowerOfTwo(staticResolution)) staticResolution = Math::RoundUpToPowerOf2(staticResolution); atlasLight.StaticResolution = staticResolution; From 6233718b06458ca4ec036fd391a1e8453f3ea373 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 11 Apr 2024 17:33:42 +0200 Subject: [PATCH 032/292] Update engine shaders --- Content/Editor/Camera/M_Camera.flax | 4 ++-- Content/Editor/CubeTexturePreviewMaterial.flax | 4 ++-- Content/Editor/DebugMaterials/DDGIDebugProbes.flax | 4 ++-- Content/Editor/DebugMaterials/SingleColor/Decal.flax | 2 +- Content/Editor/DebugMaterials/SingleColor/Particle.flax | 4 ++-- Content/Editor/DebugMaterials/SingleColor/Surface.flax | 4 ++-- .../Editor/DebugMaterials/SingleColor/SurfaceAdditive.flax | 4 ++-- Content/Editor/DebugMaterials/SingleColor/Terrain.flax | 4 ++-- Content/Editor/DefaultFontMaterial.flax | 4 ++-- Content/Editor/Gizmo/FoliageBrushMaterial.flax | 4 ++-- Content/Editor/Gizmo/Material.flax | 4 ++-- Content/Editor/Gizmo/MaterialWire.flax | 4 ++-- Content/Editor/Gizmo/SelectionOutlineMaterial.flax | 4 ++-- Content/Editor/Gizmo/VertexColorsPreviewMaterial.flax | 4 ++-- Content/Editor/Highlight Material.flax | 4 ++-- Content/Editor/Icons/IconsMaterial.flax | 4 ++-- Content/Editor/IesProfilePreviewMaterial.flax | 4 ++-- Content/Editor/Particles/Particle Material Color.flax | 4 ++-- Content/Editor/Particles/Smoke Material.flax | 4 ++-- Content/Editor/SpriteMaterial.flax | 4 ++-- Content/Editor/Terrain/Circle Brush Material.flax | 4 ++-- Content/Editor/Terrain/Highlight Terrain Material.flax | 4 ++-- Content/Editor/TexturePreviewMaterial.flax | 4 ++-- Content/Editor/Wires Debug Material.flax | 4 ++-- Content/Engine/DefaultDeformableMaterial.flax | 4 ++-- Content/Engine/DefaultMaterial.flax | 4 ++-- Content/Engine/DefaultTerrainMaterial.flax | 4 ++-- Content/Engine/SingleColorMaterial.flax | 4 ++-- Content/Engine/SkyboxMaterial.flax | 4 ++-- Content/Shaders/GI/GlobalSurfaceAtlas.flax | 4 ++-- Content/Shaders/GlobalSignDistanceField.flax | 4 ++-- Content/Shaders/Lights.flax | 4 ++-- Content/Shaders/Quad.flax | 4 ++-- Content/Shaders/Shadows.flax | 4 ++-- Content/Shaders/VolumetricFog.flax | 4 ++-- 35 files changed, 69 insertions(+), 69 deletions(-) diff --git a/Content/Editor/Camera/M_Camera.flax b/Content/Editor/Camera/M_Camera.flax index 26ba67429..e5c62741b 100644 --- a/Content/Editor/Camera/M_Camera.flax +++ b/Content/Editor/Camera/M_Camera.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:966db15e769106e448eedaaf8bff4d5724f63e813fc7f81c9da2962b37c9b57e -size 30094 +oid sha256:96c714dacb46f6e26f5a50b61bc1cf774422cea8b87eefb971e50ea4040b215c +size 30159 diff --git a/Content/Editor/CubeTexturePreviewMaterial.flax b/Content/Editor/CubeTexturePreviewMaterial.flax index 828d620d2..719f0f126 100644 --- a/Content/Editor/CubeTexturePreviewMaterial.flax +++ b/Content/Editor/CubeTexturePreviewMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:28e5aaeb274e7590bc9ec13212e041d4d14baef562487507ea3621ec040c393b -size 31807 +oid sha256:0854958ac0b11cb0af8641828ecaf7dbe82b5dd21d592d9211d87074c83fba39 +size 31874 diff --git a/Content/Editor/DebugMaterials/DDGIDebugProbes.flax b/Content/Editor/DebugMaterials/DDGIDebugProbes.flax index cc4a16fe3..41a7b07df 100644 --- a/Content/Editor/DebugMaterials/DDGIDebugProbes.flax +++ b/Content/Editor/DebugMaterials/DDGIDebugProbes.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5d0dc041c6b8712c2f892ac3185c1f9e0b3608b774db5590bcaad3ad0775dc93 -size 41042 +oid sha256:2f1bb84b9ac03d93f2144e44d76f6964290368438ec4e4a4cdf6f42f0698f8f2 +size 41107 diff --git a/Content/Editor/DebugMaterials/SingleColor/Decal.flax b/Content/Editor/DebugMaterials/SingleColor/Decal.flax index f5ed08a03..9fdcca698 100644 --- a/Content/Editor/DebugMaterials/SingleColor/Decal.flax +++ b/Content/Editor/DebugMaterials/SingleColor/Decal.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6854135456c47f2c693ebd875d83812f29a6c93b6cddad9e267de2db261a6133 +oid sha256:4d4d4ca84c767ba0bb1b54f08058820dc08e0070109e758a31069426ba38cbe5 size 7489 diff --git a/Content/Editor/DebugMaterials/SingleColor/Particle.flax b/Content/Editor/DebugMaterials/SingleColor/Particle.flax index e22b6c1fa..e25dfafa3 100644 --- a/Content/Editor/DebugMaterials/SingleColor/Particle.flax +++ b/Content/Editor/DebugMaterials/SingleColor/Particle.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a54c7442f97baa1ab835485f906911372847808e2865790109b0524614663210 -size 31722 +oid sha256:22279958c1a48fc8ffeff65399b48a2370bcbd89218ba5338f0f1a6fbf837800 +size 31681 diff --git a/Content/Editor/DebugMaterials/SingleColor/Surface.flax b/Content/Editor/DebugMaterials/SingleColor/Surface.flax index 95bab955f..363a4bd97 100644 --- a/Content/Editor/DebugMaterials/SingleColor/Surface.flax +++ b/Content/Editor/DebugMaterials/SingleColor/Surface.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:129afa8e75f4e83307e3171993f6e3ae52a205456a629beae4a58ed84e435292 -size 29990 +oid sha256:d3c76781b1c31a1018710e212e0d0f578d4f683ca46f2f4b063fa631f9643651 +size 30055 diff --git a/Content/Editor/DebugMaterials/SingleColor/SurfaceAdditive.flax b/Content/Editor/DebugMaterials/SingleColor/SurfaceAdditive.flax index 790690cd8..93358deef 100644 --- a/Content/Editor/DebugMaterials/SingleColor/SurfaceAdditive.flax +++ b/Content/Editor/DebugMaterials/SingleColor/SurfaceAdditive.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4cd7963263033f8f6e09de86da1ce14aa9baee3f1811986d56441cc62164e231 -size 31882 +oid sha256:9d085a000fa4d2e16992e3dc6beafd68f7b1c31eeb683f5bb9f6a28fa2c469a8 +size 31772 diff --git a/Content/Editor/DebugMaterials/SingleColor/Terrain.flax b/Content/Editor/DebugMaterials/SingleColor/Terrain.flax index 6e85901e7..b5940f5d0 100644 --- a/Content/Editor/DebugMaterials/SingleColor/Terrain.flax +++ b/Content/Editor/DebugMaterials/SingleColor/Terrain.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e21ef2afb59c22647d644502c6c0b6316f3e1b75be0b4ac95e4cde0701969d45 -size 20769 +oid sha256:4be04d090787f83bd2fe2d6fe932fe3b308831d51aa16bbda832779a87249431 +size 20826 diff --git a/Content/Editor/DefaultFontMaterial.flax b/Content/Editor/DefaultFontMaterial.flax index 87e8f541c..4535445cd 100644 --- a/Content/Editor/DefaultFontMaterial.flax +++ b/Content/Editor/DefaultFontMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a7838428fc9afa03649b25168f2458f7c06fc99c6a50f17579daa19ccc93916e -size 30169 +oid sha256:61f4cc3f4d2076ecac4f51a681428366b51eeec9d97b500c4f0e1dd4cd6b2536 +size 30234 diff --git a/Content/Editor/Gizmo/FoliageBrushMaterial.flax b/Content/Editor/Gizmo/FoliageBrushMaterial.flax index f1fa6e3f5..3ce357477 100644 --- a/Content/Editor/Gizmo/FoliageBrushMaterial.flax +++ b/Content/Editor/Gizmo/FoliageBrushMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f87fe1172cc96b6aaef5653b9d7c5ef336b7f5934c620ae13661fd89b636cfcc -size 37909 +oid sha256:2cfd83a91f02c66e7fc3e566a4628d7888b146a14ee7536ad4555c9d2c78c254 +size 37799 diff --git a/Content/Editor/Gizmo/Material.flax b/Content/Editor/Gizmo/Material.flax index 80d319a03..6af56e65e 100644 --- a/Content/Editor/Gizmo/Material.flax +++ b/Content/Editor/Gizmo/Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fc18e2ff1f55cfc41a5b083843b11e1573ac7066bc58153262b2b61ea6105fce -size 32486 +oid sha256:15b45d14202358b368e33274961c9970663a90597ad7c5b4f37f31d925f0d5f1 +size 32376 diff --git a/Content/Editor/Gizmo/MaterialWire.flax b/Content/Editor/Gizmo/MaterialWire.flax index db5910a4f..d56bd8597 100644 --- a/Content/Editor/Gizmo/MaterialWire.flax +++ b/Content/Editor/Gizmo/MaterialWire.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4dcae1147c6a225a8e5e0613d15950b77c05f3f90f60acdfa3943063ccc582a5 -size 31624 +oid sha256:5572a4037617c2e88bea953c0985e2f0d90a4df3a1a5c02543876c972d118988 +size 31514 diff --git a/Content/Editor/Gizmo/SelectionOutlineMaterial.flax b/Content/Editor/Gizmo/SelectionOutlineMaterial.flax index 1deecbcd9..159fa8933 100644 --- a/Content/Editor/Gizmo/SelectionOutlineMaterial.flax +++ b/Content/Editor/Gizmo/SelectionOutlineMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:811b50ce2d2672bf7ce12dc161f6dc2038c5cfe8792012632ed89fb951c4058f -size 16202 +oid sha256:6e6f1350af69d3d6d0b36b033dd35c22dccd484744cc187961769e2ec6df807b +size 16166 diff --git a/Content/Editor/Gizmo/VertexColorsPreviewMaterial.flax b/Content/Editor/Gizmo/VertexColorsPreviewMaterial.flax index f9f1b97c7..76fb8804e 100644 --- a/Content/Editor/Gizmo/VertexColorsPreviewMaterial.flax +++ b/Content/Editor/Gizmo/VertexColorsPreviewMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:30bcf9eb156bafe9b5536c040994877eab6cb0fe942fc31d744e5bff1bb02838 -size 31103 +oid sha256:f59c3bf09d03d65ac2a165948830627b8be532a41e049f1a56f49e7f578bb352 +size 31168 diff --git a/Content/Editor/Highlight Material.flax b/Content/Editor/Highlight Material.flax index c13a1c0d4..8b715b86e 100644 --- a/Content/Editor/Highlight Material.flax +++ b/Content/Editor/Highlight Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eb2d2b798b5fe819a0c137603b1c1bb598530ef01efb681690e5ce0901a550a9 -size 30271 +oid sha256:95d9c7d46086c09963581d752d595ad99464d4db80e6769de0a574c30e6ee0e8 +size 30161 diff --git a/Content/Editor/Icons/IconsMaterial.flax b/Content/Editor/Icons/IconsMaterial.flax index 7966e8335..713c85bb4 100644 --- a/Content/Editor/Icons/IconsMaterial.flax +++ b/Content/Editor/Icons/IconsMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2d4413df45e276c0cf6b0ede54ecf8dbf1a6f16ee12db3df9b409db2a632adc7 -size 30199 +oid sha256:aa7e2cf4c9a7d0dc8f7d6465ca221468c689b5dbd0399e9b0150a9785286701b +size 30089 diff --git a/Content/Editor/IesProfilePreviewMaterial.flax b/Content/Editor/IesProfilePreviewMaterial.flax index ba6d02979..d17183c07 100644 --- a/Content/Editor/IesProfilePreviewMaterial.flax +++ b/Content/Editor/IesProfilePreviewMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4d3c6d38944ae59d48c60c7e937c43b03d4659398ac05ac863fd4da16156529f -size 18217 +oid sha256:ade8f09bce5c137cb9cda74b8bef44eae8b912413dfa1cc32306ecf720fefc6e +size 18205 diff --git a/Content/Editor/Particles/Particle Material Color.flax b/Content/Editor/Particles/Particle Material Color.flax index 3fbacc935..91dcb336c 100644 --- a/Content/Editor/Particles/Particle Material Color.flax +++ b/Content/Editor/Particles/Particle Material Color.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3217b2dc041d3c41e77522a0766d99de0af26fd3ec28142cb9bb505b0fbc7723 -size 29953 +oid sha256:0e323ea10f886fe27064d827e2fabf3ce6cbc2a176080326bd0e03f271d9098c +size 29912 diff --git a/Content/Editor/Particles/Smoke Material.flax b/Content/Editor/Particles/Smoke Material.flax index d12139eff..621ebb304 100644 --- a/Content/Editor/Particles/Smoke Material.flax +++ b/Content/Editor/Particles/Smoke Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8657cb23bdf2423f4535cb67da0487be72864a12d53e9521fdc99bce752ff518 -size 37141 +oid sha256:e72b5cbd7ca6b968986ac4e40a3a031ca5bf23d7e59d5b9369fcb6a3a481e657 +size 37107 diff --git a/Content/Editor/SpriteMaterial.flax b/Content/Editor/SpriteMaterial.flax index 75f4d1f89..c73a6d096 100644 --- a/Content/Editor/SpriteMaterial.flax +++ b/Content/Editor/SpriteMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eae8e6b9147611dcf98b181339d3b581128a877b55c8b52b586b7d3fe43d01b2 -size 31182 +oid sha256:c4ae3d583979e95557ebe303ec447e4d4808704b86ec39fa9f7f6692bc8502be +size 31247 diff --git a/Content/Editor/Terrain/Circle Brush Material.flax b/Content/Editor/Terrain/Circle Brush Material.flax index 9b21e6c82..c26badef3 100644 --- a/Content/Editor/Terrain/Circle Brush Material.flax +++ b/Content/Editor/Terrain/Circle Brush Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a20f7283220500bb17c8c5afbab5a5c1bfdc03703868b8ced7da462657806fd7 -size 27409 +oid sha256:ecac8c9ea886e3196312daa5cd9b7302640f8aaafbd7867db4045d3ad7e4a33a +size 27498 diff --git a/Content/Editor/Terrain/Highlight Terrain Material.flax b/Content/Editor/Terrain/Highlight Terrain Material.flax index 00560e1cb..cc5b03f54 100644 --- a/Content/Editor/Terrain/Highlight Terrain Material.flax +++ b/Content/Editor/Terrain/Highlight Terrain Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f48c4f43dcf7e05255b41d763e1ca9b21d629df43d35b59b8574ae955d392fc4 -size 20822 +oid sha256:65f5766f9f820cf73d91281f71b81b6a1e87d3d2f05941014815b0f172f57cd6 +size 20879 diff --git a/Content/Editor/TexturePreviewMaterial.flax b/Content/Editor/TexturePreviewMaterial.flax index b21daaa5a..c2f9e30e7 100644 --- a/Content/Editor/TexturePreviewMaterial.flax +++ b/Content/Editor/TexturePreviewMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:52921ebe6efaf3a74950e817a4e2224ef2ef54e0698efc042ba9cd2c17693e54 -size 10568 +oid sha256:ec352914d63c8a2df600f4232f54f8c1b603dfbaba2e7b3a409bed1db8075393 +size 10570 diff --git a/Content/Editor/Wires Debug Material.flax b/Content/Editor/Wires Debug Material.flax index caa177eac..1112b3e6a 100644 --- a/Content/Editor/Wires Debug Material.flax +++ b/Content/Editor/Wires Debug Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6f8018999baf4b415d400b0b6150de27fbed48e2c93afbd16d074efc39b9c206 -size 30271 +oid sha256:1508dde475cb3119373ef35798445ddbe3d8889c4a4cc9f1a2a475ef3fc3530d +size 30161 diff --git a/Content/Engine/DefaultDeformableMaterial.flax b/Content/Engine/DefaultDeformableMaterial.flax index 052d5946e..a14bb5b04 100644 --- a/Content/Engine/DefaultDeformableMaterial.flax +++ b/Content/Engine/DefaultDeformableMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a70603cf2ace948b588924b39a1d31e309238782581f26b3388b8d9d3b44d4d7 -size 18530 +oid sha256:e2a822ec9b556e3e9083067b45aea2c41d5ad52843437c1b675a3b41029001f4 +size 18524 diff --git a/Content/Engine/DefaultMaterial.flax b/Content/Engine/DefaultMaterial.flax index 588f85571..744686ab8 100644 --- a/Content/Engine/DefaultMaterial.flax +++ b/Content/Engine/DefaultMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:de63950e622426ed8905d19097a5f9d189da4a6300ae9a77577f0e7fab59a1bb -size 32021 +oid sha256:27327918e9de3efed1709d8631d83e50512379b87d3085536c0102a476bfbf3d +size 32080 diff --git a/Content/Engine/DefaultTerrainMaterial.flax b/Content/Engine/DefaultTerrainMaterial.flax index e9af0516e..bb8ba3384 100644 --- a/Content/Engine/DefaultTerrainMaterial.flax +++ b/Content/Engine/DefaultTerrainMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7c206df1af51d206c805449d5e6a4ce3fcaf70ac0c5310cf77dc0921a56e8552 -size 22912 +oid sha256:352410f2c8ee23a040618101b3f023b82d204fafe690aa83b5991ba95096a46e +size 22963 diff --git a/Content/Engine/SingleColorMaterial.flax b/Content/Engine/SingleColorMaterial.flax index 31af3b1c8..503871025 100644 --- a/Content/Engine/SingleColorMaterial.flax +++ b/Content/Engine/SingleColorMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c87f422816ea68b3d6524b4f727d007ac4d5d2484931cf7a61cfe722b9c111d7 -size 30191 +oid sha256:1172d0f1d41899aa585d3a2ff1d7ef55ba50f9e42692f0eb648dad4643aea0a2 +size 30256 diff --git a/Content/Engine/SkyboxMaterial.flax b/Content/Engine/SkyboxMaterial.flax index df2859f7b..45b32d9ac 100644 --- a/Content/Engine/SkyboxMaterial.flax +++ b/Content/Engine/SkyboxMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0e8a62e94c4e265902ed2e95ea059dcc0aca50f79f7a6986ff5a6d99f034f3d3 -size 31389 +oid sha256:d021de8170348e1b7fb73106fd244db949382bf9433d2cfcac9283145d35d6cd +size 31454 diff --git a/Content/Shaders/GI/GlobalSurfaceAtlas.flax b/Content/Shaders/GI/GlobalSurfaceAtlas.flax index b72123fea..d3f164f56 100644 --- a/Content/Shaders/GI/GlobalSurfaceAtlas.flax +++ b/Content/Shaders/GI/GlobalSurfaceAtlas.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e3e314b762c684f358646124dcfb91f348574526e1e6c2dcf140dc9f02ad08e5 -size 12616 +oid sha256:5ef0f096465bb267138c7f10ec745e171a6fd642a22801f339eb6da260665f0b +size 12626 diff --git a/Content/Shaders/GlobalSignDistanceField.flax b/Content/Shaders/GlobalSignDistanceField.flax index b51257d54..0affdb165 100644 --- a/Content/Shaders/GlobalSignDistanceField.flax +++ b/Content/Shaders/GlobalSignDistanceField.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b6a82529bb03a334f1b40d7e4814d4cd35de447be21b7f29d736fc4cf203bee9 -size 11798 +oid sha256:fb1ffe921c3a317cf6d90562db6ba897baccc43223111ab74b5d1ac23665264e +size 11827 diff --git a/Content/Shaders/Lights.flax b/Content/Shaders/Lights.flax index d44c26b54..94e4ce4e2 100644 --- a/Content/Shaders/Lights.flax +++ b/Content/Shaders/Lights.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:285cd4cdf25bcda8d86274cd2231d482e89fbbd795c3c712b1a281f38c405042 -size 5122 +oid sha256:396724aaead871353657a7962b36f6aa01b68ac20754e9b822fb9971d4fe4b40 +size 5129 diff --git a/Content/Shaders/Quad.flax b/Content/Shaders/Quad.flax index 7a9b98866..1d72311b3 100644 --- a/Content/Shaders/Quad.flax +++ b/Content/Shaders/Quad.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b5fe22946a9d9dc288593aea28473add3337cea5c82df5909a9d4fcf1f392917 -size 2242 +oid sha256:baeb87ed9d4228f410a7e1228e16d13589e56aa34cbba4233c53a147442aa395 +size 2647 diff --git a/Content/Shaders/Shadows.flax b/Content/Shaders/Shadows.flax index 68f640ba9..ef25d1879 100644 --- a/Content/Shaders/Shadows.flax +++ b/Content/Shaders/Shadows.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bbb6f0c654179cb6613cc50a586e778c86b51ed0d64d99f8ac4ff10d1645745e -size 7654 +oid sha256:236797180d9933b69146c2651d3778997e0da6d055ad74477b254f4cb3767783 +size 6505 diff --git a/Content/Shaders/VolumetricFog.flax b/Content/Shaders/VolumetricFog.flax index ffd6a0487..3613c79b6 100644 --- a/Content/Shaders/VolumetricFog.flax +++ b/Content/Shaders/VolumetricFog.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bcc2890d1b35691a686d81a3779f8a2caa9e4244a724040f774c487252619d3c -size 13633 +oid sha256:9ccc821da8613409f4c829f14958534f87c142edc6ac0f0d73d8a9e6e3fc6efc +size 13299 From 5f860db6a512c800e77ddea0d77f68c32fe84093 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 11 Apr 2024 17:34:21 +0200 Subject: [PATCH 033/292] Fix typo --- Source/Engine/Renderer/ShadowsPass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Engine/Renderer/ShadowsPass.cpp b/Source/Engine/Renderer/ShadowsPass.cpp index 1d9ad83fe..fe2fa0cd2 100644 --- a/Source/Engine/Renderer/ShadowsPass.cpp +++ b/Source/Engine/Renderer/ShadowsPass.cpp @@ -562,7 +562,7 @@ void ShadowsPass::SetupLight(ShadowsCustomBuffer& shadows, RenderContext& render atlasLight.Bias = light.ShadowsDepthBias; atlasLight.FadeDistance = Math::Max(light.ShadowsFadeDistance, 0.1f); atlasLight.Distance = Math::Min(renderContext.View.Far, light.ShadowsDistance); - atlasLight.Bounds.Center = light.Position + renderContext.View.Position; + atlasLight.Bounds.Center = light.Position + renderContext.View.Origin; // Keep bounds in world-space to properly handle DirtyStaticBounds atlasLight.Bounds.Radius = 0.0f; } From 7342629a86630577ead19afab8ee4302e9a3cde2 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 11 Apr 2024 18:47:32 +0200 Subject: [PATCH 034/292] Add dynamic resolution for static shadow map tiles limited by current dynamic res --- Source/Engine/Renderer/ShadowsPass.cpp | 96 +++++++++++++++++++++----- 1 file changed, 79 insertions(+), 17 deletions(-) diff --git a/Source/Engine/Renderer/ShadowsPass.cpp b/Source/Engine/Renderer/ShadowsPass.cpp index fe2fa0cd2..e2c1c1f42 100644 --- a/Source/Engine/Renderer/ShadowsPass.cpp +++ b/Source/Engine/Renderer/ShadowsPass.cpp @@ -21,6 +21,7 @@ #define SHADOWS_MAX_TILES 6 #define SHADOWS_MIN_RESOLUTION 16 +#define SHADOWS_MAX_STATIC_ATLAS_CAPACITY_TO_DEFRAG 0.7f #define SHADOWS_BASE_LIGHT_RESOLUTION(atlasResolution) atlasResolution / MAX_CSM_CASCADES // Allow to store 4 CSM cascades in a single row in all cases #define NormalOffsetScaleTweak METERS_TO_UNITS(1) #define LocalLightNearPlane METERS_TO_UNITS(0.1f) @@ -38,12 +39,14 @@ PACK_STRUCT(struct Data{ struct ShadowsAtlasRectTile : RectPack { + bool IsStatic; + ShadowsAtlasRectTile(uint16 x, uint16 y, uint16 width, uint16 height) : RectPack(x, y, width, height) { } - void OnInsert(class ShadowsCustomBuffer* buffer); + void OnInsert(class ShadowsCustomBuffer* buffer, bool isStatic); void OnFree(ShadowsCustomBuffer* buffer); }; @@ -83,7 +86,7 @@ struct ShadowAtlasLightTile { if (StaticRectTile) { - StaticRectTile->Free((ShadowsCustomBuffer*)nullptr); + StaticRectTile->Free(buffer); StaticRectTile = nullptr; } } @@ -101,6 +104,13 @@ struct ShadowAtlasLightTile SkipUpdate = false; } + void ClearStatic() + { + StaticRectTile = nullptr; + FramesToUpdate = 0; + SkipUpdate = false; + } + void SetWorldToShadow(const Matrix& shadowViewProjection) { // Transform Clip Space [-1,+1]^2 to UV Space [0,1]^2 (saves MAD instruction in shader) @@ -283,6 +293,7 @@ public: int32 MaxShadowsQuality = 0; int32 Resolution = 0; int32 AtlasPixelsUsed = 0; + int32 StaticAtlasPixelsUsed = 0; bool EnableStaticShadows = true; mutable bool ClearShadowMapAtlas = true; mutable bool ClearStaticShadowMapAtlas = false; @@ -304,7 +315,6 @@ public: void ClearDynamic() { ClearShadowMapAtlas = true; - AtlasPixelsUsed = 0; for (auto it = Lights.Begin(); it.IsNotEnd(); ++it) { auto& atlasLight = it->Value; @@ -313,11 +323,27 @@ public: atlasLight.Tiles[i].ClearDynamic(); } SAFE_DELETE(AtlasTiles); + AtlasPixelsUsed = 0; + } + + void ClearStatic() + { + ClearStaticShadowMapAtlas = true; + for (auto it = Lights.Begin(); it.IsNotEnd(); ++it) + { + auto& atlasLight = it->Value; + atlasLight.Cache.StaticValid = false; + for (int32 i = 0; i < atlasLight.TilesCount; i++) + atlasLight.Tiles[i].ClearDynamic(); + } + SAFE_DELETE(StaticAtlasTiles); + StaticAtlasPixelsUsed = 0; } void Reset() { Lights.Clear(); + StaticAtlasPixelsUsed = 0; SAFE_DELETE(StaticAtlasTiles); ClearDynamic(); ViewOrigin = Vector3::Zero; @@ -391,16 +417,23 @@ public: } }; -void ShadowsAtlasRectTile::OnInsert(ShadowsCustomBuffer* buffer) +void ShadowsAtlasRectTile::OnInsert(ShadowsCustomBuffer* buffer, bool isStatic) { - if (buffer) - buffer->AtlasPixelsUsed += (int32)Width * (int32)Height; + IsStatic = isStatic; + const int32 pixels = (int32)Width * (int32)Height; + if (isStatic) + buffer->StaticAtlasPixelsUsed += pixels; + else + buffer->AtlasPixelsUsed += pixels; } void ShadowsAtlasRectTile::OnFree(ShadowsCustomBuffer* buffer) { - if (buffer) - buffer->AtlasPixelsUsed -= (int32)Width * (int32)Height; + const int32 pixels = (int32)Width * (int32)Height; + if (IsStatic) + buffer->StaticAtlasPixelsUsed -= pixels; + else + buffer->AtlasPixelsUsed -= pixels; } String ShadowsPass::ToString() const @@ -582,7 +615,24 @@ bool ShadowsPass::SetupLight(ShadowsCustomBuffer& shadows, RenderContext& render // Update static shadow logic atlasLight.HasStaticShadowContext = shadows.EnableStaticShadows && EnumHasAllFlags(light.StaticFlags, StaticFlags::Shadow); - if (!atlasLight.HasStaticShadowContext) + if (atlasLight.HasStaticShadowContext) + { + // Calculate static resolution for the light based on the world-bounds, not view-dependant + shadows.InitStaticAtlas(); + const int32 baseLightResolution = SHADOWS_BASE_LIGHT_RESOLUTION(shadows.Resolution) / 2; + int32 staticResolution = Math::RoundToInt(Math::Saturate(light.Radius / METERS_TO_UNITS(10)) * baseLightResolution); + staticResolution = Math::Clamp(staticResolution, atlasLight.Resolution, atlasLight.Resolution * 2); // Limit static shadow to be max x2 the current dynamic shadow res + if (!Math::IsPowerOfTwo(staticResolution)) + staticResolution = Math::RoundUpToPowerOf2(staticResolution); // Round up to power of two to reduce fragmentation of the static atlas and redraws + if (staticResolution != atlasLight.StaticResolution) + { + atlasLight.StaticResolution = staticResolution; + atlasLight.StaticState = ShadowAtlasLight::Unused; + for (auto& tile : atlasLight.Tiles) + tile.FreeStatic(&shadows); + } + } + else atlasLight.StaticState = ShadowAtlasLight::Unused; switch (atlasLight.StaticState) { @@ -593,13 +643,7 @@ bool ShadowsPass::SetupLight(ShadowsCustomBuffer& shadows, RenderContext& render case ShadowAtlasLight::WaitForGeometryCheck: if (atlasLight.HasStaticGeometry()) { - // Calculate static resolution for the light based on the world-bounds, not view-dependant shadows.InitStaticAtlas(); - const int32 baseLightResolution = SHADOWS_BASE_LIGHT_RESOLUTION(shadows.Resolution); - int32 staticResolution = Math::RoundToInt(Math::Saturate(light.Radius / METERS_TO_UNITS(10)) * baseLightResolution); - if (!Math::IsPowerOfTwo(staticResolution)) - staticResolution = Math::RoundUpToPowerOf2(staticResolution); - atlasLight.StaticResolution = staticResolution; // Allocate static shadow map slot for all used tiles for (int32 tileIndex = 0; tileIndex < atlasLight.TilesCount; tileIndex++) @@ -607,7 +651,7 @@ bool ShadowsPass::SetupLight(ShadowsCustomBuffer& shadows, RenderContext& render auto& tile = atlasLight.Tiles[tileIndex]; if (tile.StaticRectTile == nullptr) { - tile.StaticRectTile = shadows.StaticAtlasTiles->Insert(atlasLight.StaticResolution, atlasLight.StaticResolution, 0, (ShadowsCustomBuffer*)nullptr); + tile.StaticRectTile = shadows.StaticAtlasTiles->Insert(atlasLight.StaticResolution, atlasLight.StaticResolution, 0, &shadows, true); if (!tile.StaticRectTile) { // Failed to insert tile to switch back to the default rendering @@ -1026,6 +1070,24 @@ void ShadowsPass::SetupShadows(RenderContext& renderContext, RenderContextBatch& shadows.Reset(); shadows.ViewOrigin = renderContext.View.Origin; } + if (shadows.StaticAtlasTiles && (float)shadows.StaticAtlasPixelsUsed / (shadows.StaticAtlasTiles->Width * shadows.StaticAtlasTiles->Height) < SHADOWS_MAX_STATIC_ATLAS_CAPACITY_TO_DEFRAG) + { + float app = (float)shadows.StaticAtlasPixelsUsed / (shadows.StaticAtlasTiles->Width * shadows.StaticAtlasTiles->Height); + // Defragment static shadow atlas if it failed to insert any light but it's still should have space + bool anyStaticFailed = false; + for (auto& e : shadows.Lights) + { + if (e.Value.StaticState == ShadowAtlasLight::FailedToInsertTiles) + { + anyStaticFailed = true; + break; + } + } + if (anyStaticFailed) + { + shadows.ClearStatic(); + } + } if (!shadows.AtlasTiles) shadows.AtlasTiles = New(0, 0, atlasResolution, atlasResolution); @@ -1128,7 +1190,7 @@ RETRY_ATLAS_SETUP: bool failedToInsert = false; for (int32 tileIndex = 0; tileIndex < atlasLight.TilesNeeded; tileIndex++) { - auto rectTile = shadows.AtlasTiles->Insert(atlasLight.Resolution, atlasLight.Resolution, 0, &shadows); + auto rectTile = shadows.AtlasTiles->Insert(atlasLight.Resolution, atlasLight.Resolution, 0, &shadows, false); if (!rectTile) { // Free any previous tiles that were added From 00f2a0b825c02f8e9fd270a640d016768f3b15aa Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 12 Apr 2024 11:29:21 +0200 Subject: [PATCH 035/292] Improve doc --- Source/Engine/Graphics/Enums.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Engine/Graphics/Enums.h b/Source/Engine/Graphics/Enums.h index 3cbb4a1e1..01ac11667 100644 --- a/Source/Engine/Graphics/Enums.h +++ b/Source/Engine/Graphics/Enums.h @@ -252,7 +252,7 @@ API_ENUM() enum class PartitionMode Logarithmic = 1, /// - /// PSSM cascade splits. + /// Parallel-Split Shadow Maps cascade splits. /// PSSM = 2, }; From 25f3cef8c3b66f81a2a6dfae849310434e632fa1 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 14 Apr 2024 23:44:08 +0200 Subject: [PATCH 036/292] Fix artifacts due to light shape culling and use depth test to improve perf --- Source/Engine/Graphics/RenderTools.cpp | 23 ++++--- Source/Engine/Renderer/LightPass.cpp | 70 +++++++++++----------- Source/Engine/Renderer/LightPass.h | 24 ++++---- Source/Engine/Renderer/ReflectionsPass.cpp | 37 ++++++------ Source/Engine/Renderer/ReflectionsPass.h | 27 ++------- Source/Engine/Renderer/ShadowsPass.cpp | 44 +++++++++----- Source/Engine/Renderer/ShadowsPass.h | 2 + 7 files changed, 113 insertions(+), 114 deletions(-) diff --git a/Source/Engine/Graphics/RenderTools.cpp b/Source/Engine/Graphics/RenderTools.cpp index 1679326bc..679555141 100644 --- a/Source/Engine/Graphics/RenderTools.cpp +++ b/Source/Engine/Graphics/RenderTools.cpp @@ -337,14 +337,14 @@ void RenderTools::ComputePitch(PixelFormat format, int32 width, int32 height, ui case PixelFormat::ASTC_8x8_UNorm_sRGB: case PixelFormat::ASTC_10x10_UNorm: case PixelFormat::ASTC_10x10_UNorm_sRGB: - { - const int32 blockSize = PixelFormatExtensions::ComputeBlockSize(format); - uint32 nbw = Math::Max(1, Math::DivideAndRoundUp(width, blockSize)); - uint32 nbh = Math::Max(1, Math::DivideAndRoundUp(height, blockSize)); - rowPitch = nbw * 16; // All ASTC blocks use 128 bits - slicePitch = rowPitch * nbh; - } - break; + { + const int32 blockSize = PixelFormatExtensions::ComputeBlockSize(format); + uint32 nbw = Math::Max(1, Math::DivideAndRoundUp(width, blockSize)); + uint32 nbh = Math::Max(1, Math::DivideAndRoundUp(height, blockSize)); + rowPitch = nbw * 16; // All ASTC blocks use 128 bits + slicePitch = rowPitch * nbh; + } + break; case PixelFormat::R8G8_B8G8_UNorm: case PixelFormat::G8R8_G8B8_UNorm: ASSERT(PixelFormatExtensions::IsPacked(format)); @@ -590,7 +590,7 @@ void RenderTools::CalculateTangentFrame(FloatR10G10B10A2& resultNormal, FloatR10 void RenderTools::ComputeSphereModelDrawMatrix(const RenderView& view, const Float3& position, float radius, Matrix& resultWorld, bool& resultIsViewInside) { // Construct world matrix - constexpr float sphereModelScale = 0.0202f; // Manually tweaked for 'Engine/Models/Sphere' + constexpr float sphereModelScale = 0.0205f; // Manually tweaked for 'Engine/Models/Sphere' with some slack const float scaling = radius * sphereModelScale; resultWorld = Matrix::Identity; resultWorld.M11 = scaling; @@ -601,10 +601,7 @@ void RenderTools::ComputeSphereModelDrawMatrix(const RenderView& view, const Flo resultWorld.M43 = position.Z; // Check if view is inside the sphere - float viewToCenter = Float3::Distance(view.Position, position); - //if (radius + viewToCenter > view.Far) - // radius = view.Far - viewToCenter; // Clamp radius - resultIsViewInside = viewToCenter - radius < 5.0f; // Manually tweaked bias + resultIsViewInside = Float3::DistanceSquared(view.Position, position) < Math::Square(radius * 1.1f); // Manually tweaked bias } int32 MipLevelsCount(int32 width, bool useMipLevels) diff --git a/Source/Engine/Renderer/LightPass.cpp b/Source/Engine/Renderer/LightPass.cpp index 72fcc5a0c..93ad7a4a1 100644 --- a/Source/Engine/Renderer/LightPass.cpp +++ b/Source/Engine/Renderer/LightPass.cpp @@ -31,20 +31,18 @@ bool LightPass::Init() { // Create pipeline states _psLightDir.CreatePipelineStates(); - _psLightPointNormal.CreatePipelineStates(); - _psLightPointInverted.CreatePipelineStates(); - _psLightSpotNormal.CreatePipelineStates(); - _psLightSpotInverted.CreatePipelineStates(); - _psLightSkyNormal = GPUDevice::Instance->CreatePipelineState(); - _psLightSkyInverted = GPUDevice::Instance->CreatePipelineState(); + _psLightPoint.CreatePipelineStates(); + _psLightPointInside.CreatePipelineStates(); + _psLightSpot.CreatePipelineStates(); + _psLightSpotInside.CreatePipelineStates(); + _psLightSky = GPUDevice::Instance->CreatePipelineState(); + _psLightSkyInside = GPUDevice::Instance->CreatePipelineState(); // Load assets _shader = Content::LoadAsyncInternal(TEXT("Shaders/Lights")); _sphereModel = Content::LoadAsyncInternal(TEXT("Engine/Models/Sphere")); if (_shader == nullptr || _sphereModel == nullptr) - { return true; - } #if COMPILE_WITH_DEV_ENV _shader.Get()->OnReloading.Bind(this); @@ -90,46 +88,50 @@ bool LightPass::setupResources() if (_psLightDir.Create(psDesc, shader, "PS_Directional")) return true; } - if (!_psLightPointNormal.IsValid() || !_psLightPointInverted.IsValid()) + if (!_psLightPoint.IsValid()) { psDesc = GPUPipelineState::Description::DefaultNoDepth; psDesc.BlendMode = BlendingMode::Add; psDesc.BlendMode.RenderTargetWriteMask = BlendingMode::ColorWrite::RGB; psDesc.VS = shader->GetVS("VS_Model"); - psDesc.CullMode = CullMode::TwoSided; - if (_psLightPointInverted.Create(psDesc, shader, "PS_Point")) - return true; - psDesc.CullMode = CullMode::Normal; psDesc.DepthEnable = true; - if (_psLightPointNormal.Create(psDesc, shader, "PS_Point")) + psDesc.CullMode = CullMode::Normal; + if (_psLightPoint.Create(psDesc, shader, "PS_Point")) + return true; + psDesc.DepthFunc = ComparisonFunc::Greater; + psDesc.CullMode = CullMode::Inverted; + if (_psLightPointInside.Create(psDesc, shader, "PS_Point")) return true; } - if (!_psLightSpotNormal.IsValid() || !_psLightSpotInverted.IsValid()) + if (!_psLightSpot.IsValid()) { psDesc = GPUPipelineState::Description::DefaultNoDepth; psDesc.BlendMode = BlendingMode::Add; psDesc.BlendMode.RenderTargetWriteMask = BlendingMode::ColorWrite::RGB; psDesc.VS = shader->GetVS("VS_Model"); - psDesc.CullMode = CullMode::TwoSided; - if (_psLightSpotInverted.Create(psDesc, shader, "PS_Spot")) - return true; - psDesc.CullMode = CullMode::Normal; psDesc.DepthEnable = true; - if (_psLightSpotNormal.Create(psDesc, shader, "PS_Spot")) + psDesc.CullMode = CullMode::Normal; + if (_psLightSpot.Create(psDesc, shader, "PS_Spot")) + return true; + psDesc.DepthFunc = ComparisonFunc::Greater; + psDesc.CullMode = CullMode::Inverted; + if (_psLightSpotInside.Create(psDesc, shader, "PS_Spot")) return true; } - if (!_psLightSkyNormal->IsValid() || !_psLightSkyInverted->IsValid()) + if (!_psLightSky->IsValid()) { psDesc = GPUPipelineState::Description::DefaultNoDepth; psDesc.BlendMode = BlendingMode::Add; psDesc.BlendMode.RenderTargetWriteMask = BlendingMode::ColorWrite::RGB; - psDesc.CullMode = CullMode::Normal; psDesc.VS = shader->GetVS("VS_Model"); psDesc.PS = shader->GetPS("PS_Sky"); - if (_psLightSkyNormal->Init(psDesc)) + psDesc.DepthEnable = true; + psDesc.CullMode = CullMode::Normal; + if (_psLightSky->Init(psDesc)) return true; - psDesc.CullMode = CullMode::TwoSided; - if (_psLightSkyInverted->Init(psDesc)) + psDesc.DepthFunc = ComparisonFunc::Greater; + psDesc.CullMode = CullMode::Inverted; + if (_psLightSkyInside->Init(psDesc)) return true; } @@ -143,12 +145,12 @@ void LightPass::Dispose() // Cleanup _psLightDir.Delete(); - _psLightPointNormal.Delete(); - _psLightPointInverted.Delete(); - _psLightSpotNormal.Delete(); - _psLightSpotInverted.Delete(); - SAFE_DELETE_GPU_RESOURCE(_psLightSkyNormal); - SAFE_DELETE_GPU_RESOURCE(_psLightSkyInverted); + _psLightPoint.Delete(); + _psLightPointInside.Delete(); + _psLightSpot.Delete(); + _psLightSpotInside.Delete(); + SAFE_DELETE_GPU_RESOURCE(_psLightSky); + SAFE_DELETE_GPU_RESOURCE(_psLightSkyInside); SAFE_DELETE_GPU_RESOURCE(_psClearDiffuse); _sphereModel = nullptr; } @@ -298,7 +300,7 @@ void LightPass::RenderLights(RenderContextBatch& renderContextBatch, GPUTextureV context->BindCB(0, cb0); context->BindCB(1, cb1); int32 permutationIndex = (disableSpecular ? 1 : 0) + (useIES ? 2 : 0); - context->SetState((isViewInside ? _psLightPointInverted : _psLightPointNormal).Get(permutationIndex)); + context->SetState((isViewInside ? _psLightPointInside : _psLightPoint).Get(permutationIndex)); sphereMesh.Render(context); } @@ -341,7 +343,7 @@ void LightPass::RenderLights(RenderContextBatch& renderContextBatch, GPUTextureV context->BindCB(0, cb0); context->BindCB(1, cb1); int32 permutationIndex = (disableSpecular ? 1 : 0) + (useIES ? 2 : 0); - context->SetState((isViewInside ? _psLightSpotInverted : _psLightSpotNormal).Get(permutationIndex)); + context->SetState((isViewInside ? _psLightSpotInside : _psLightSpot).Get(permutationIndex)); sphereMesh.Render(context); } @@ -400,7 +402,7 @@ void LightPass::RenderLights(RenderContextBatch& renderContextBatch, GPUTextureV context->UpdateCB(cb0, &perLight); context->BindCB(0, cb0); context->BindCB(1, cb1); - context->SetState(isViewInside ? _psLightSkyInverted : _psLightSkyNormal); + context->SetState(isViewInside ? _psLightSkyInside : _psLightSky); sphereMesh.Render(context); } diff --git a/Source/Engine/Renderer/LightPass.h b/Source/Engine/Renderer/LightPass.h index a399c7bfd..935052497 100644 --- a/Source/Engine/Renderer/LightPass.h +++ b/Source/Engine/Renderer/LightPass.h @@ -16,12 +16,12 @@ class LightPass : public RendererPass private: AssetReference _shader; GPUPipelineStatePermutationsPs<2> _psLightDir; - GPUPipelineStatePermutationsPs<4> _psLightPointNormal; - GPUPipelineStatePermutationsPs<4> _psLightPointInverted; - GPUPipelineStatePermutationsPs<4> _psLightSpotNormal; - GPUPipelineStatePermutationsPs<4> _psLightSpotInverted; - GPUPipelineState* _psLightSkyNormal = nullptr; - GPUPipelineState* _psLightSkyInverted = nullptr; + GPUPipelineStatePermutationsPs<4> _psLightPoint; + GPUPipelineStatePermutationsPs<4> _psLightPointInside; + GPUPipelineStatePermutationsPs<4> _psLightSpot; + GPUPipelineStatePermutationsPs<4> _psLightSpotInside; + GPUPipelineState* _psLightSky = nullptr; + GPUPipelineState* _psLightSkyInside = nullptr; GPUPipelineState* _psClearDiffuse = nullptr; AssetReference _sphereModel; PixelFormat _shadowMaskFormat; @@ -44,12 +44,12 @@ private: void OnShaderReloading(Asset* obj) { _psLightDir.Release(); - _psLightPointNormal.Release(); - _psLightPointInverted.Release(); - _psLightSpotNormal.Release(); - _psLightSpotInverted.Release(); - _psLightSkyNormal->ReleaseGPU(); - _psLightSkyInverted->ReleaseGPU(); + _psLightPoint.Release(); + _psLightPointInside.Release(); + _psLightSpot.Release(); + _psLightSpotInside.Release(); + _psLightSky->ReleaseGPU(); + _psLightSkyInside->ReleaseGPU(); invalidateResources(); } #endif diff --git a/Source/Engine/Renderer/ReflectionsPass.cpp b/Source/Engine/Renderer/ReflectionsPass.cpp index 3a548f8ab..4f362392f 100644 --- a/Source/Engine/Renderer/ReflectionsPass.cpp +++ b/Source/Engine/Renderer/ReflectionsPass.cpp @@ -13,6 +13,12 @@ #include "Engine/Graphics/RenderTargetPool.h" #include "Engine/Level/Actors/EnvironmentProbe.h" +PACK_STRUCT(struct Data { + ShaderEnvProbeData PData; + Matrix WVP; + ShaderGBufferData GBuffer; + }); + #if GENERATE_GF_CACHE // This code below (PreIntegratedGF namespace) is based on many Siggraph presentations about BRDF shading: @@ -239,13 +245,6 @@ namespace PreIntegratedGF class Model; -ReflectionsPass::ReflectionsPass() - : _psProbeNormal(nullptr) - , _psProbeInverted(nullptr) - , _psCombinePass(nullptr) -{ -} - String ReflectionsPass::ToString() const { return TEXT("ReflectionsPass"); @@ -254,15 +253,13 @@ String ReflectionsPass::ToString() const bool ReflectionsPass::Init() { #if GENERATE_GF_CACHE - // Generate cache PreIntegratedGF::Generate(); - #endif // Create pipeline states - _psProbeNormal = GPUDevice::Instance->CreatePipelineState(); - _psProbeInverted = GPUDevice::Instance->CreatePipelineState(); + _psProbe = GPUDevice::Instance->CreatePipelineState(); + _psProbeInside = GPUDevice::Instance->CreatePipelineState(); _psCombinePass = GPUDevice::Instance->CreatePipelineState(); // Load assets @@ -294,17 +291,19 @@ bool ReflectionsPass::setupResources() // Create pipeline stages GPUPipelineState::Description psDesc; - if (!_psProbeNormal->IsValid() || !_psProbeInverted->IsValid()) + if (!_psProbe->IsValid()) { psDesc = GPUPipelineState::Description::DefaultNoDepth; psDesc.BlendMode = BlendingMode::AlphaBlend; - psDesc.CullMode = CullMode::Normal; psDesc.VS = shader->GetVS("VS_Model"); psDesc.PS = shader->GetPS("PS_EnvProbe"); - if (_psProbeNormal->Init(psDesc)) + psDesc.CullMode = CullMode::Normal; + psDesc.DepthEnable = true; + if (_psProbe->Init(psDesc)) return true; - psDesc.CullMode = CullMode::TwoSided; - if (_psProbeInverted->Init(psDesc)) + psDesc.DepthFunc = ComparisonFunc::Greater; + psDesc.CullMode = CullMode::Inverted; + if (_psProbeInside->Init(psDesc)) return true; } psDesc = GPUPipelineState::Description::DefaultFullscreenTriangle; @@ -326,8 +325,8 @@ void ReflectionsPass::Dispose() RendererPass::Dispose(); // Cleanup - SAFE_DELETE_GPU_RESOURCE(_psProbeNormal); - SAFE_DELETE_GPU_RESOURCE(_psProbeInverted); + SAFE_DELETE_GPU_RESOURCE(_psProbe); + SAFE_DELETE_GPU_RESOURCE(_psProbeInside); SAFE_DELETE_GPU_RESOURCE(_psCombinePass); _shader = nullptr; _sphereModel = nullptr; @@ -416,7 +415,7 @@ void ReflectionsPass::Render(RenderContext& renderContext, GPUTextureView* light context->UpdateCB(cb, &data); context->BindCB(0, cb); context->BindSR(4, probe.Texture); - context->SetState(isViewInside ? _psProbeInverted : _psProbeNormal); + context->SetState(isViewInside ? _psProbeInside : _psProbe); sphereMesh.Render(context); } diff --git a/Source/Engine/Renderer/ReflectionsPass.h b/Source/Engine/Renderer/ReflectionsPass.h index a88a4eec4..d41b83d49 100644 --- a/Source/Engine/Renderer/ReflectionsPass.h +++ b/Source/Engine/Renderer/ReflectionsPass.h @@ -16,30 +16,15 @@ class ReflectionsPass : public RendererPass { private: - - PACK_STRUCT(struct Data { - ShaderEnvProbeData PData; - Matrix WVP; - ShaderGBufferData GBuffer; - }); - AssetReference _shader; - GPUPipelineState* _psProbeNormal; - GPUPipelineState* _psProbeInverted; - GPUPipelineState* _psCombinePass; + GPUPipelineState* _psProbe = nullptr; + GPUPipelineState* _psProbeInside = nullptr; + GPUPipelineState* _psCombinePass = nullptr; AssetReference _sphereModel; AssetReference _preIntegratedGF; public: - - /// - /// Init - /// - ReflectionsPass(); - -public: - /// /// Perform reflections pass rendering for the input task. /// @@ -48,7 +33,6 @@ public: void Render(RenderContext& renderContext, GPUTextureView* lightBuffer); public: - // [RendererPass] String ToString() const override; bool Init() override; @@ -56,15 +40,14 @@ public: #if COMPILE_WITH_DEV_ENV void OnShaderReloading(Asset* obj) { - _psProbeNormal->ReleaseGPU(); - _psProbeInverted->ReleaseGPU(); + _psProbe->ReleaseGPU(); + _psProbeInside->ReleaseGPU(); _psCombinePass->ReleaseGPU(); invalidateResources(); } #endif protected: - // [RendererPass] bool setupResources() override; }; diff --git a/Source/Engine/Renderer/ShadowsPass.cpp b/Source/Engine/Renderer/ShadowsPass.cpp index e2c1c1f42..205f3301c 100644 --- a/Source/Engine/Renderer/ShadowsPass.cpp +++ b/Source/Engine/Renderer/ShadowsPass.cpp @@ -446,7 +446,9 @@ bool ShadowsPass::Init() // Create pipeline states _psShadowDir.CreatePipelineStates(); _psShadowPoint.CreatePipelineStates(); + _psShadowPointInside.CreatePipelineStates(); _psShadowSpot.CreatePipelineStates(); + _psShadowSpotInside.CreatePipelineStates(); // Load assets _shader = Content::LoadAsyncInternal(TEXT("Shaders/Shadows")); @@ -496,27 +498,40 @@ bool ShadowsPass::setupResources() // Create pipeline stages GPUPipelineState::Description psDesc; - if (!_psShadowPoint.IsValid()) - { - psDesc = GPUPipelineState::Description::DefaultNoDepth; - psDesc.CullMode = CullMode::TwoSided; - psDesc.VS = shader->GetVS("VS_Model"); - if (_psShadowPoint.Create(psDesc, shader, "PS_PointLight")) - return true; - } if (!_psShadowDir.IsValid()) { psDesc = GPUPipelineState::Description::DefaultFullscreenTriangle; + psDesc.BlendMode.RenderTargetWriteMask = BlendingMode::ColorWrite::RG; if (_psShadowDir.Create(psDesc, shader, "PS_DirLight")) return true; } + if (!_psShadowPoint.IsValid()) + { + psDesc = GPUPipelineState::Description::DefaultNoDepth; + psDesc.BlendMode.RenderTargetWriteMask = BlendingMode::ColorWrite::RG; + psDesc.VS = shader->GetVS("VS_Model"); + psDesc.DepthEnable = true; + psDesc.CullMode = CullMode::Normal; + if (_psShadowPoint.Create(psDesc, shader, "PS_PointLight")) + return true; + psDesc.DepthFunc = ComparisonFunc::Greater; + psDesc.CullMode = CullMode::Inverted; + if (_psShadowPointInside.Create(psDesc, shader, "PS_PointLight")) + return true; + } if (!_psShadowSpot.IsValid()) { psDesc = GPUPipelineState::Description::DefaultNoDepth; - psDesc.CullMode = CullMode::TwoSided; + psDesc.BlendMode.RenderTargetWriteMask = BlendingMode::ColorWrite::RG; psDesc.VS = shader->GetVS("VS_Model"); + psDesc.DepthEnable = true; + psDesc.CullMode = CullMode::Normal; if (_psShadowSpot.Create(psDesc, shader, "PS_SpotLight")) return true; + psDesc.DepthFunc = ComparisonFunc::Greater; + psDesc.CullMode = CullMode::Inverted; + if (_psShadowSpotInside.Create(psDesc, shader, "PS_SpotLight")) + return true; } if (_psDepthClear == nullptr) { @@ -994,7 +1009,9 @@ void ShadowsPass::Dispose() // Cleanup _psShadowDir.Delete(); _psShadowPoint.Delete(); + _psShadowPointInside.Delete(); _psShadowSpot.Delete(); + _psShadowSpotInside.Delete(); _shader = nullptr; _sphereModel = nullptr; SAFE_DELETE_GPU_RESOURCE(_psDepthClear); @@ -1072,7 +1089,6 @@ void ShadowsPass::SetupShadows(RenderContext& renderContext, RenderContextBatch& } if (shadows.StaticAtlasTiles && (float)shadows.StaticAtlasPixelsUsed / (shadows.StaticAtlasTiles->Width * shadows.StaticAtlasTiles->Height) < SHADOWS_MAX_STATIC_ATLAS_CAPACITY_TO_DEFRAG) { - float app = (float)shadows.StaticAtlasPixelsUsed / (shadows.StaticAtlasTiles->Width * shadows.StaticAtlasTiles->Height); // Defragment static shadow atlas if it failed to insert any light but it's still should have space bool anyStaticFailed = false; for (auto& e : shadows.Lights) @@ -1478,12 +1494,12 @@ void ShadowsPass::RenderShadowMask(RenderContextBatch& renderContextBatch, Rende sperLight.TemporalTime = renderContext.List->Setup.UseTemporalAAJitter ? RenderTools::ComputeTemporalTime() : 0.0f; sperLight.ContactShadowsDistance = light.ShadowsDistance; sperLight.ContactShadowsLength = EnumHasAnyFlags(view.Flags, ViewFlags::ContactShadows) ? light.ContactShadowsLength : 0.0f; + bool isViewInside; if (isLocalLight) { // Calculate world view projection matrix for the light sphere Matrix world, wvp; - bool isInside; - RenderTools::ComputeSphereModelDrawMatrix(renderContext.View, light.Position, ((RenderLocalLightData&)light).Radius, world, isInside); + RenderTools::ComputeSphereModelDrawMatrix(renderContext.View, light.Position, ((RenderLocalLightData&)light).Radius, world, isViewInside); Matrix::Multiply(world, view.ViewProjection(), wvp); Matrix::Transpose(wvp, sperLight.WVP); } @@ -1498,12 +1514,12 @@ void ShadowsPass::RenderShadowMask(RenderContextBatch& renderContextBatch, Rende context->SetRenderTarget(shadowMask); if (light.IsPointLight) { - context->SetState(_psShadowPoint.Get(permutationIndex)); + context->SetState((isViewInside ? _psShadowPointInside : _psShadowPoint).Get(permutationIndex)); _sphereModel->LODs.Get()[0].Meshes.Get()[0].Render(context); } else if (light.IsSpotLight) { - context->SetState(_psShadowSpot.Get(permutationIndex)); + context->SetState((isViewInside ? _psShadowSpotInside : _psShadowSpot).Get(permutationIndex)); _sphereModel->LODs.Get()[0].Meshes.Get()[0].Render(context); } else //if (light.IsDirectionalLight) diff --git a/Source/Engine/Renderer/ShadowsPass.h b/Source/Engine/Renderer/ShadowsPass.h index df647aac6..551b3a504 100644 --- a/Source/Engine/Renderer/ShadowsPass.h +++ b/Source/Engine/Renderer/ShadowsPass.h @@ -21,7 +21,9 @@ private: GPUPipelineState* _psDepthCopy = nullptr; GPUPipelineStatePermutationsPs(Quality::MAX) * 2> _psShadowDir; GPUPipelineStatePermutationsPs(Quality::MAX) * 2> _psShadowPoint; + GPUPipelineStatePermutationsPs(Quality::MAX) * 2> _psShadowPointInside; GPUPipelineStatePermutationsPs(Quality::MAX) * 2> _psShadowSpot; + GPUPipelineStatePermutationsPs(Quality::MAX) * 2> _psShadowSpotInside; PixelFormat _shadowMapFormat; // Cached on initialization public: From cf23892bd4606169f3967db494b2a6cc0f9ad4e5 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 14 Apr 2024 23:51:20 +0200 Subject: [PATCH 037/292] Fix sun shadows invalidate when view rotates --- Source/Engine/Renderer/ShadowsPass.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/Source/Engine/Renderer/ShadowsPass.cpp b/Source/Engine/Renderer/ShadowsPass.cpp index 205f3301c..2809c1a6f 100644 --- a/Source/Engine/Renderer/ShadowsPass.cpp +++ b/Source/Engine/Renderer/ShadowsPass.cpp @@ -19,6 +19,8 @@ #include "Engine/Renderer/Lightmaps.h" #endif +#define SHADOWS_POSITION_ERROR METERS_TO_UNITS(0.1f) +#define SHADOWS_ROTATION_ERROR 0.9999f #define SHADOWS_MAX_TILES 6 #define SHADOWS_MIN_RESOLUTION 16 #define SHADOWS_MAX_STATIC_ATLAS_CAPACITY_TO_DEFRAG 0.7f @@ -139,6 +141,7 @@ struct ShadowAtlasLightCache Float3 Direction; float Distance; Float4 CascadeSplits; + Float3 ViewDirection; void Set(const RenderView& view, const RenderLightData& light, const Float4& cascadeSplits = Float4::Zero) { @@ -153,6 +156,7 @@ struct ShadowAtlasLightCache { // Sun Position = view.Position; + ViewDirection = view.Direction; CascadeSplits = cascadeSplits; } else @@ -243,7 +247,7 @@ struct ShadowAtlasLight !Math::NearEqual(Cache.ShadowsUpdateRate, light.ShadowsUpdateRate) || !Math::NearEqual(Cache.ShadowsUpdateRateAtDistance, light.ShadowsUpdateRateAtDistance) || Cache.ShadowFrame != light.ShadowFrame || - Float3::Dot(Cache.Direction, light.Direction) < 0.999999f) + Float3::Dot(Cache.Direction, light.Direction) < SHADOWS_ROTATION_ERROR) { // Invalidate Cache.StaticValid = false; @@ -251,8 +255,9 @@ struct ShadowAtlasLight if (light.IsDirectionalLight) { // Sun - if (!Float3::NearEqual(Cache.Position, view.Position, 1.0f) || - !Float4::NearEqual(Cache.CascadeSplits, CascadeSplits)) + if (!Float3::NearEqual(Cache.Position, view.Position, SHADOWS_POSITION_ERROR) || + !Float4::NearEqual(Cache.CascadeSplits, CascadeSplits) || + Float3::Dot(Cache.ViewDirection, view.Direction) < SHADOWS_ROTATION_ERROR) { // Invalidate Cache.StaticValid = false; @@ -262,7 +267,7 @@ struct ShadowAtlasLight { // Local light const auto& localLight = (const RenderLocalLightData&)light; - if (!Float3::NearEqual(Cache.Position, light.Position, METERS_TO_UNITS(0.1f)) || + if (!Float3::NearEqual(Cache.Position, light.Position, SHADOWS_POSITION_ERROR) || !Math::NearEqual(Cache.Radius, localLight.Radius)) { // Invalidate From ebe05d4a51a591f0f0378c51f1471381691d3d20 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 24 Apr 2024 17:35:58 +0200 Subject: [PATCH 038/292] Refactor `RenderToolsDX` to support new pixel formats properly --- Source/Engine/Graphics/Graphics.Build.cs | 7 + .../DirectX/DX11/GPUSwapChainDX11.cpp | 6 +- .../GraphicsDevice/DirectX/RenderToolsDX.cpp | 274 ++++++++++++++++++ .../GraphicsDevice/DirectX/RenderToolsDX.h | 163 +---------- 4 files changed, 287 insertions(+), 163 deletions(-) create mode 100644 Source/Engine/GraphicsDevice/DirectX/RenderToolsDX.cpp diff --git a/Source/Engine/Graphics/Graphics.Build.cs b/Source/Engine/Graphics/Graphics.Build.cs index d740b8a15..3e902e204 100644 --- a/Source/Engine/Graphics/Graphics.Build.cs +++ b/Source/Engine/Graphics/Graphics.Build.cs @@ -107,5 +107,12 @@ public class Graphics : EngineModule { options.PrivateDefinitions.Add("COMPILE_WITH_GPU_PARTICLES"); } + + // Manually include file with shared DirectX code + if (options.PrivateDependencies.Contains("GraphicsDeviceDX11") || + options.PrivateDependencies.Contains("GraphicsDeviceDX12")) + { + options.SourceFiles.Add(Path.Combine(FolderPath, "../GraphicsDevice/DirectX/RenderToolsDX.cpp")); + } } } diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUSwapChainDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUSwapChainDX11.cpp index 4b247f781..ef7ecdb43 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUSwapChainDX11.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUSwapChainDX11.cpp @@ -261,11 +261,7 @@ bool GPUSwapChainDX11::Resize(int32 width, int32 height) _width = width; _height = height; -#if PLATFORM_WINDOWS - _memoryUsage = RenderTools::CalculateTextureMemoryUsage(RenderToolsDX::ToPixelFormat(swapChainDesc.BufferDesc.Format), _width, _height, 1) * swapChainDesc.BufferCount; -#else - _memoryUsage = RenderTools::CalculateTextureMemoryUsage(RenderToolsDX::ToPixelFormat(swapChainDesc.Format), _width, _height, 1) * swapChainDesc.BufferCount; -#endif + _memoryUsage = RenderTools::CalculateTextureMemoryUsage(_format, _width, _height, 1) * swapChainDesc.BufferCount; getBackBuffer(); diff --git a/Source/Engine/GraphicsDevice/DirectX/RenderToolsDX.cpp b/Source/Engine/GraphicsDevice/DirectX/RenderToolsDX.cpp new file mode 100644 index 000000000..dd4df4b23 --- /dev/null +++ b/Source/Engine/GraphicsDevice/DirectX/RenderToolsDX.cpp @@ -0,0 +1,274 @@ +// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. + +#if GRAPHICS_API_DIRECTX11 || GRAPHICS_API_DIRECTX12 + +#include "RenderToolsDX.h" +#include "Engine/Core/Types/StringBuilder.h" +#include "Engine/Graphics/GPUDevice.h" +#include + +// @formatter:off + +DXGI_FORMAT PixelFormatToDXGIFormat[108] = +{ + DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_R32G32B32A32_TYPELESS, + DXGI_FORMAT_R32G32B32A32_FLOAT, + DXGI_FORMAT_R32G32B32A32_UINT, + DXGI_FORMAT_R32G32B32A32_SINT, + DXGI_FORMAT_R32G32B32_TYPELESS, + DXGI_FORMAT_R32G32B32_FLOAT, + DXGI_FORMAT_R32G32B32_UINT, + DXGI_FORMAT_R32G32B32_SINT, + DXGI_FORMAT_R16G16B16A16_TYPELESS, + DXGI_FORMAT_R16G16B16A16_FLOAT, + DXGI_FORMAT_R16G16B16A16_UNORM, + DXGI_FORMAT_R16G16B16A16_UINT, + DXGI_FORMAT_R16G16B16A16_SNORM, + DXGI_FORMAT_R16G16B16A16_SINT, + DXGI_FORMAT_R32G32_TYPELESS, + DXGI_FORMAT_R32G32_FLOAT, + DXGI_FORMAT_R32G32_UINT, + DXGI_FORMAT_R32G32_SINT, + DXGI_FORMAT_R32G8X24_TYPELESS, + DXGI_FORMAT_D32_FLOAT_S8X24_UINT, + DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS, + DXGI_FORMAT_X32_TYPELESS_G8X24_UINT, + DXGI_FORMAT_R10G10B10A2_TYPELESS, + DXGI_FORMAT_R10G10B10A2_UNORM, + DXGI_FORMAT_R10G10B10A2_UINT, + DXGI_FORMAT_R11G11B10_FLOAT, + DXGI_FORMAT_R8G8B8A8_TYPELESS, + DXGI_FORMAT_R8G8B8A8_UNORM, + DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, + DXGI_FORMAT_R8G8B8A8_UINT, + DXGI_FORMAT_R8G8B8A8_SNORM, + DXGI_FORMAT_R8G8B8A8_SINT, + DXGI_FORMAT_R16G16_TYPELESS, + DXGI_FORMAT_R16G16_FLOAT, + DXGI_FORMAT_R16G16_UNORM, + DXGI_FORMAT_R16G16_UINT, + DXGI_FORMAT_R16G16_SNORM, + DXGI_FORMAT_R16G16_SINT, + DXGI_FORMAT_R32_TYPELESS, + DXGI_FORMAT_D32_FLOAT, + DXGI_FORMAT_R32_FLOAT, + DXGI_FORMAT_R32_UINT, + DXGI_FORMAT_R32_SINT, + DXGI_FORMAT_R24G8_TYPELESS, + DXGI_FORMAT_D24_UNORM_S8_UINT, + DXGI_FORMAT_R24_UNORM_X8_TYPELESS, + DXGI_FORMAT_X24_TYPELESS_G8_UINT, + DXGI_FORMAT_R8G8_TYPELESS, + DXGI_FORMAT_R8G8_UNORM, + DXGI_FORMAT_R8G8_UINT, + DXGI_FORMAT_R8G8_SNORM, + DXGI_FORMAT_R8G8_SINT, + DXGI_FORMAT_R16_TYPELESS, + DXGI_FORMAT_R16_FLOAT, + DXGI_FORMAT_D16_UNORM, + DXGI_FORMAT_R16_UNORM, + DXGI_FORMAT_R16_UINT, + DXGI_FORMAT_R16_SNORM, + DXGI_FORMAT_R16_SINT, + DXGI_FORMAT_R8_TYPELESS, + DXGI_FORMAT_R8_UNORM, + DXGI_FORMAT_R8_UINT, + DXGI_FORMAT_R8_SNORM, + DXGI_FORMAT_R8_SINT, + DXGI_FORMAT_A8_UNORM, + DXGI_FORMAT_R1_UNORM, + DXGI_FORMAT_R9G9B9E5_SHAREDEXP, + DXGI_FORMAT_R8G8_B8G8_UNORM, + DXGI_FORMAT_G8R8_G8B8_UNORM, + DXGI_FORMAT_BC1_TYPELESS, + DXGI_FORMAT_BC1_UNORM, + DXGI_FORMAT_BC1_UNORM_SRGB, + DXGI_FORMAT_BC2_TYPELESS, + DXGI_FORMAT_BC2_UNORM, + DXGI_FORMAT_BC2_UNORM_SRGB, + DXGI_FORMAT_BC3_TYPELESS, + DXGI_FORMAT_BC3_UNORM, + DXGI_FORMAT_BC3_UNORM_SRGB, + DXGI_FORMAT_BC4_TYPELESS, + DXGI_FORMAT_BC4_UNORM, + DXGI_FORMAT_BC4_SNORM, + DXGI_FORMAT_BC5_TYPELESS, + DXGI_FORMAT_BC5_UNORM, + DXGI_FORMAT_BC5_SNORM, + DXGI_FORMAT_B5G6R5_UNORM, + DXGI_FORMAT_B5G5R5A1_UNORM, + DXGI_FORMAT_B8G8R8A8_UNORM, + DXGI_FORMAT_B8G8R8X8_UNORM, + DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM, + DXGI_FORMAT_B8G8R8A8_TYPELESS, + DXGI_FORMAT_B8G8R8A8_UNORM_SRGB, + DXGI_FORMAT_B8G8R8X8_TYPELESS, + DXGI_FORMAT_B8G8R8X8_UNORM_SRGB, + DXGI_FORMAT_BC6H_TYPELESS, + DXGI_FORMAT_BC6H_UF16, + DXGI_FORMAT_BC6H_SF16, + DXGI_FORMAT_BC7_TYPELESS, + DXGI_FORMAT_BC7_UNORM, + DXGI_FORMAT_BC7_UNORM_SRGB, + DXGI_FORMAT_UNKNOWN, // ASTC_4x4_UNorm + DXGI_FORMAT_UNKNOWN, // ASTC_4x4_UNorm_sRGB + DXGI_FORMAT_UNKNOWN, // ASTC_6x6_UNorm + DXGI_FORMAT_UNKNOWN, // ASTC_6x6_UNorm_sRGB + DXGI_FORMAT_UNKNOWN, // ASTC_8x8_UNorm + DXGI_FORMAT_UNKNOWN, // ASTC_8x8_UNorm_sRGB + DXGI_FORMAT_UNKNOWN, // ASTC_10x10_UNorm + DXGI_FORMAT_UNKNOWN, // ASTC_10x10_UNorm_sRGB +}; + +// @formatter:on + +DXGI_FORMAT RenderToolsDX::ToDxgiFormat(PixelFormat format) +{ + return PixelFormatToDXGIFormat[(int32)format]; +} + +const Char* RenderToolsDX::GetFeatureLevelString(const D3D_FEATURE_LEVEL featureLevel) +{ + switch (featureLevel) + { + case D3D_FEATURE_LEVEL_9_1: + return TEXT("9.1"); + case D3D_FEATURE_LEVEL_9_2: + return TEXT("9.2"); + case D3D_FEATURE_LEVEL_9_3: + return TEXT("9.3"); + case D3D_FEATURE_LEVEL_10_0: + return TEXT("10"); + case D3D_FEATURE_LEVEL_10_1: + return TEXT("10.1"); + case D3D_FEATURE_LEVEL_11_0: + return TEXT("11"); + case D3D_FEATURE_LEVEL_11_1: + return TEXT("11.1"); +#if GRAPHICS_API_DIRECTX12 + case D3D_FEATURE_LEVEL_12_0: + return TEXT("12"); + case D3D_FEATURE_LEVEL_12_1: + return TEXT("12.1"); +#endif + default: + return TEXT("?"); + } +} + +String RenderToolsDX::GetD3DErrorString(HRESULT errorCode) +{ + StringBuilder sb(256); + + // Switch error code +#define D3DERR(x) case x: sb.Append(TEXT(#x)); break + switch (errorCode) + { + // Windows + D3DERR(S_OK); + D3DERR(E_FAIL); + D3DERR(E_INVALIDARG); + D3DERR(E_OUTOFMEMORY); + D3DERR(E_NOINTERFACE); + D3DERR(E_NOTIMPL); + + // DirectX +#if WITH_D3DX_LIBS + D3DERR(D3DERR_INVALIDCALL); + D3DERR(D3DERR_WASSTILLDRAWING); +#endif + + // DirectX 11 + D3DERR(D3D11_ERROR_FILE_NOT_FOUND); + D3DERR(D3D11_ERROR_TOO_MANY_UNIQUE_STATE_OBJECTS); + + // DirectX 12 + //D3DERR(D3D12_ERROR_FILE_NOT_FOUND); + //D3DERR(D3D12_ERROR_TOO_MANY_UNIQUE_STATE_OBJECTS); + //D3DERR(D3D12_ERROR_TOO_MANY_UNIQUE_VIEW_OBJECTS); + + // DXGI + D3DERR(DXGI_ERROR_INVALID_CALL); + D3DERR(DXGI_ERROR_NOT_FOUND); + D3DERR(DXGI_ERROR_MORE_DATA); + D3DERR(DXGI_ERROR_UNSUPPORTED); + D3DERR(DXGI_ERROR_DEVICE_REMOVED); + D3DERR(DXGI_ERROR_DEVICE_HUNG); + D3DERR(DXGI_ERROR_DEVICE_RESET); + D3DERR(DXGI_ERROR_WAS_STILL_DRAWING); + D3DERR(DXGI_ERROR_FRAME_STATISTICS_DISJOINT); + D3DERR(DXGI_ERROR_GRAPHICS_VIDPN_SOURCE_IN_USE); + D3DERR(DXGI_ERROR_DRIVER_INTERNAL_ERROR); + D3DERR(DXGI_ERROR_NONEXCLUSIVE); + D3DERR(DXGI_ERROR_NOT_CURRENTLY_AVAILABLE); + D3DERR(DXGI_ERROR_REMOTE_CLIENT_DISCONNECTED); + D3DERR(DXGI_ERROR_REMOTE_OUTOFMEMORY); + D3DERR(DXGI_ERROR_ACCESS_LOST); + D3DERR(DXGI_ERROR_WAIT_TIMEOUT); + D3DERR(DXGI_ERROR_SESSION_DISCONNECTED); + D3DERR(DXGI_ERROR_RESTRICT_TO_OUTPUT_STALE); + D3DERR(DXGI_ERROR_CANNOT_PROTECT_CONTENT); + D3DERR(DXGI_ERROR_ACCESS_DENIED); + D3DERR(DXGI_ERROR_NAME_ALREADY_EXISTS); + D3DERR(DXGI_ERROR_SDK_COMPONENT_MISSING); +#if GRAPHICS_API_DIRECTX12 + D3DERR(DXGI_ERROR_NOT_CURRENT); + D3DERR(DXGI_ERROR_HW_PROTECTION_OUTOFMEMORY); + D3DERR(D3D12_ERROR_DRIVER_VERSION_MISMATCH); +#endif + + default: + { + sb.AppendFormat(TEXT("0x{0:x}"), static_cast(errorCode)); + } + break; + } +#undef D3DERR + + if (errorCode == DXGI_ERROR_DEVICE_REMOVED || errorCode == DXGI_ERROR_DEVICE_RESET || errorCode == DXGI_ERROR_DRIVER_INTERNAL_ERROR) + { + HRESULT reason = S_OK; + const RendererType rendererType = GPUDevice::Instance ? GPUDevice::Instance->GetRendererType() : RendererType::Unknown; + void* nativePtr = GPUDevice::Instance ? GPUDevice::Instance->GetNativePtr() : nullptr; +#if GRAPHICS_API_DIRECTX12 + if (rendererType == RendererType::DirectX12 && nativePtr) + { + reason = ((ID3D12Device*)nativePtr)->GetDeviceRemovedReason(); + } +#endif +#if GRAPHICS_API_DIRECTX11 + if ((rendererType == RendererType::DirectX11 || + rendererType == RendererType::DirectX10_1 || + rendererType == RendererType::DirectX10) && nativePtr) + { + reason = ((ID3D11Device*)nativePtr)->GetDeviceRemovedReason(); + } +#endif + const Char* reasonStr = nullptr; + switch (reason) + { + case DXGI_ERROR_DEVICE_HUNG: + reasonStr = TEXT("HUNG"); + break; + case DXGI_ERROR_DEVICE_REMOVED: + reasonStr = TEXT("REMOVED"); + break; + case DXGI_ERROR_DEVICE_RESET: + reasonStr = TEXT("RESET"); + break; + case DXGI_ERROR_DRIVER_INTERNAL_ERROR: + reasonStr = TEXT("INTERNAL_ERROR"); + break; + case DXGI_ERROR_INVALID_CALL: + reasonStr = TEXT("INVALID_CALL"); + break; + } + if (reasonStr != nullptr) + sb.AppendFormat(TEXT(", Device Removed Reason: {0}"), reasonStr); + } + + return sb.ToString(); +} + +#endif diff --git a/Source/Engine/GraphicsDevice/DirectX/RenderToolsDX.h b/Source/Engine/GraphicsDevice/DirectX/RenderToolsDX.h index 6036f61e5..3973ec631 100644 --- a/Source/Engine/GraphicsDevice/DirectX/RenderToolsDX.h +++ b/Source/Engine/GraphicsDevice/DirectX/RenderToolsDX.h @@ -8,10 +8,7 @@ #include "Engine/Graphics//RenderTools.h" #include "Engine/Graphics/Enums.h" #include "IncludeDirectXHeaders.h" -#include "Engine/Core/Types/StringBuilder.h" #include "Engine/Core/Log.h" -#include "Engine/Utilities/StringConverter.h" -#include /// /// Set of utilities for rendering on DirectX platform. @@ -61,25 +58,12 @@ namespace RenderToolsDX #endif - /// - /// summary DXGI Pixel Format to the Flax Pixel Format. - /// - /// The DXGI format. - /// The Flax Pixel Format - FORCE_INLINE PixelFormat ToPixelFormat(const DXGI_FORMAT format) - { - return static_cast(format); - } - /// /// Converts Flax Pixel Format to the DXGI Format. /// /// The Flax Pixel Format. /// The DXGI Format - FORCE_INLINE DXGI_FORMAT ToDxgiFormat(const PixelFormat format) - { - return static_cast(format); - } + extern DXGI_FORMAT ToDxgiFormat(PixelFormat format); // Aligns location to the next multiple of align value. template @@ -89,34 +73,7 @@ namespace RenderToolsDX return ((location + (align - 1)) & ~(align - 1)); } - static const Char* GetFeatureLevelString(const D3D_FEATURE_LEVEL featureLevel) - { - switch (featureLevel) - { - case D3D_FEATURE_LEVEL_9_1: - return TEXT("9.1"); - case D3D_FEATURE_LEVEL_9_2: - return TEXT("9.2"); - case D3D_FEATURE_LEVEL_9_3: - return TEXT("9.3"); - case D3D_FEATURE_LEVEL_10_0: - return TEXT("10"); - case D3D_FEATURE_LEVEL_10_1: - return TEXT("10.1"); - case D3D_FEATURE_LEVEL_11_0: - return TEXT("11"); - case D3D_FEATURE_LEVEL_11_1: - return TEXT("11.1"); -#if GRAPHICS_API_DIRECTX12 - case D3D_FEATURE_LEVEL_12_0: - return TEXT("12"); - case D3D_FEATURE_LEVEL_12_1: - return TEXT("12.1"); -#endif - default: - return TEXT("?"); - } - } + extern const Char* GetFeatureLevelString(const D3D_FEATURE_LEVEL featureLevel); // Calculate a subresource index for a texture FORCE_INLINE uint32 CalcSubresourceIndex(uint32 mipSlice, uint32 arraySlice, uint32 mipLevels) @@ -140,119 +97,7 @@ namespace RenderToolsDX return count; } - static String GetD3DErrorString(HRESULT errorCode) - { - StringBuilder sb(256); - - // Switch error code -#define D3DERR(x) case x: sb.Append(TEXT(#x)); break - switch (errorCode) - { - // Windows - D3DERR(S_OK); - D3DERR(E_FAIL); - D3DERR(E_INVALIDARG); - D3DERR(E_OUTOFMEMORY); - D3DERR(E_NOINTERFACE); - D3DERR(E_NOTIMPL); - - // DirectX -#if WITH_D3DX_LIBS - D3DERR(D3DERR_INVALIDCALL); - D3DERR(D3DERR_WASSTILLDRAWING); -#endif - - // DirectX 11 - D3DERR(D3D11_ERROR_FILE_NOT_FOUND); - D3DERR(D3D11_ERROR_TOO_MANY_UNIQUE_STATE_OBJECTS); - - // DirectX 12 - //D3DERR(D3D12_ERROR_FILE_NOT_FOUND); - //D3DERR(D3D12_ERROR_TOO_MANY_UNIQUE_STATE_OBJECTS); - //D3DERR(D3D12_ERROR_TOO_MANY_UNIQUE_VIEW_OBJECTS); - - // DXGI - D3DERR(DXGI_ERROR_INVALID_CALL); - D3DERR(DXGI_ERROR_NOT_FOUND); - D3DERR(DXGI_ERROR_MORE_DATA); - D3DERR(DXGI_ERROR_UNSUPPORTED); - D3DERR(DXGI_ERROR_DEVICE_REMOVED); - D3DERR(DXGI_ERROR_DEVICE_HUNG); - D3DERR(DXGI_ERROR_DEVICE_RESET); - D3DERR(DXGI_ERROR_WAS_STILL_DRAWING); - D3DERR(DXGI_ERROR_FRAME_STATISTICS_DISJOINT); - D3DERR(DXGI_ERROR_GRAPHICS_VIDPN_SOURCE_IN_USE); - D3DERR(DXGI_ERROR_DRIVER_INTERNAL_ERROR); - D3DERR(DXGI_ERROR_NONEXCLUSIVE); - D3DERR(DXGI_ERROR_NOT_CURRENTLY_AVAILABLE); - D3DERR(DXGI_ERROR_REMOTE_CLIENT_DISCONNECTED); - D3DERR(DXGI_ERROR_REMOTE_OUTOFMEMORY); - D3DERR(DXGI_ERROR_ACCESS_LOST); - D3DERR(DXGI_ERROR_WAIT_TIMEOUT); - D3DERR(DXGI_ERROR_SESSION_DISCONNECTED); - D3DERR(DXGI_ERROR_RESTRICT_TO_OUTPUT_STALE); - D3DERR(DXGI_ERROR_CANNOT_PROTECT_CONTENT); - D3DERR(DXGI_ERROR_ACCESS_DENIED); - D3DERR(DXGI_ERROR_NAME_ALREADY_EXISTS); - D3DERR(DXGI_ERROR_SDK_COMPONENT_MISSING); -#if GRAPHICS_API_DIRECTX12 - D3DERR(DXGI_ERROR_NOT_CURRENT); - D3DERR(DXGI_ERROR_HW_PROTECTION_OUTOFMEMORY); - D3DERR(D3D12_ERROR_DRIVER_VERSION_MISMATCH); -#endif - - default: - { - sb.AppendFormat(TEXT("0x{0:x}"), static_cast(errorCode)); - } - break; - } -#undef D3DERR - - if (errorCode == DXGI_ERROR_DEVICE_REMOVED || errorCode == DXGI_ERROR_DEVICE_RESET || errorCode == DXGI_ERROR_DRIVER_INTERNAL_ERROR) - { - HRESULT reason = S_OK; - const RendererType rendererType = GPUDevice::Instance ? GPUDevice::Instance->GetRendererType() : RendererType::Unknown; - void* nativePtr = GPUDevice::Instance ? GPUDevice::Instance->GetNativePtr() : nullptr; -#if GRAPHICS_API_DIRECTX12 - if (rendererType == RendererType::DirectX12 && nativePtr) - { - reason = ((ID3D12Device*)nativePtr)->GetDeviceRemovedReason(); - } -#endif -#if GRAPHICS_API_DIRECTX11 - if ((rendererType == RendererType::DirectX11 || - rendererType == RendererType::DirectX10_1 || - rendererType == RendererType::DirectX10) && nativePtr) - { - reason = ((ID3D11Device*)nativePtr)->GetDeviceRemovedReason(); - } -#endif - const Char* reasonStr = nullptr; - switch (reason) - { - case DXGI_ERROR_DEVICE_HUNG: - reasonStr = TEXT("HUNG"); - break; - case DXGI_ERROR_DEVICE_REMOVED: - reasonStr = TEXT("REMOVED"); - break; - case DXGI_ERROR_DEVICE_RESET: - reasonStr = TEXT("RESET"); - break; - case DXGI_ERROR_DRIVER_INTERNAL_ERROR: - reasonStr = TEXT("INTERNAL_ERROR"); - break; - case DXGI_ERROR_INVALID_CALL: - reasonStr = TEXT("INVALID_CALL"); - break; - } - if (reasonStr != nullptr) - sb.AppendFormat(TEXT(", Device Removed Reason: {0}"), reasonStr); - } - - return sb.ToString(); - } + extern String GetD3DErrorString(HRESULT errorCode); inline void ValidateD3DResult(HRESULT result, const char* file = "", uint32 line = 0) { @@ -286,6 +131,8 @@ namespace RenderToolsDX #if GPU_ENABLE_DIAGNOSTICS || COMPILE_WITH_SHADER_COMPILER +#include "Engine/Utilities/StringConverter.h" + // Link DXGI lib #pragma comment(lib, "dxguid.lib") From 3ebf73ec22aec4ad95f978e7557d542095526d18 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 25 Apr 2024 10:26:23 +0200 Subject: [PATCH 039/292] Add video texture format `YUY2` --- Source/Engine/Graphics/PixelFormat.h | 5 ++ .../Engine/Graphics/PixelFormatExtensions.cpp | 24 +++++----- .../Engine/Graphics/PixelFormatExtensions.h | 14 ------ Source/Engine/Graphics/RenderTools.cpp | 48 ++++++++----------- .../DirectX/DX11/GPUDeviceDX11.cpp | 22 ++++----- .../DirectX/DX12/GPUDeviceDX12.cpp | 4 -- .../GraphicsDevice/DirectX/RenderToolsDX.cpp | 3 +- .../Vulkan/RenderToolsVulkan.cpp | 3 +- 8 files changed, 49 insertions(+), 74 deletions(-) diff --git a/Source/Engine/Graphics/PixelFormat.h b/Source/Engine/Graphics/PixelFormat.h index 1f96a4d82..9335b10c6 100644 --- a/Source/Engine/Graphics/PixelFormat.h +++ b/Source/Engine/Graphics/PixelFormat.h @@ -553,6 +553,11 @@ API_ENUM() enum class PixelFormat : uint32 /// ASTC_10x10_UNorm_sRGB = 107, + /// + /// Packed YUV 4:2:2 video texture format. The mapping to the view channel in shader is Y0->R8, U0->G8, Y1->B8, and V0->A8. + /// + YUY2 = 108, + /// /// The maximum format value (for internal use only). /// diff --git a/Source/Engine/Graphics/PixelFormatExtensions.cpp b/Source/Engine/Graphics/PixelFormatExtensions.cpp index f77c15683..2b1f68457 100644 --- a/Source/Engine/Graphics/PixelFormatExtensions.cpp +++ b/Source/Engine/Graphics/PixelFormatExtensions.cpp @@ -46,6 +46,7 @@ void PixelFormatExtensions::Init() PixelFormat::BC7_UNorm_sRGB, PixelFormat::ASTC_4x4_UNorm, PixelFormat::ASTC_4x4_UNorm_sRGB, + PixelFormat::YUY2, }; InitFormat(formats2, 8); @@ -373,19 +374,15 @@ bool PixelFormatExtensions::IsCompressedASTC(PixelFormat format) } } -bool PixelFormatExtensions::IsPacked(const PixelFormat format) -{ - return format == PixelFormat::R8G8_B8G8_UNorm || format == PixelFormat::G8R8_G8B8_UNorm; -} - -bool PixelFormatExtensions::IsPlanar(const PixelFormat format) -{ - return false; -} - bool PixelFormatExtensions::IsVideo(const PixelFormat format) { - return false; + switch (format) + { + case PixelFormat::YUY2: + return true; + default: + return false; + } } bool PixelFormatExtensions::IsSRGB(const PixelFormat format) @@ -966,7 +963,6 @@ PixelFormat PixelFormatExtensions::FindShaderResourceFormat(const PixelFormat fo return PixelFormat::R32_Float; case PixelFormat::R16_Typeless: return PixelFormat::R16_UNorm; - case PixelFormat::D16_UNorm: return PixelFormat::R16_UNorm; case PixelFormat::D24_UNorm_S8_UInt: @@ -975,6 +971,8 @@ PixelFormat PixelFormatExtensions::FindShaderResourceFormat(const PixelFormat fo return PixelFormat::R32_Float; case PixelFormat::D32_Float_S8X24_UInt: return PixelFormat::R32_Float_X8X24_Typeless; + case PixelFormat::YUY2: + return PixelFormat::R8G8B8A8_UNorm; } return format; } @@ -987,6 +985,8 @@ PixelFormat PixelFormatExtensions::FindUnorderedAccessFormat(const PixelFormat f return PixelFormat::B8G8R8A8_UNorm; case PixelFormat::R8G8B8A8_Typeless: return PixelFormat::R8G8B8A8_UNorm; + case PixelFormat::YUY2: + return PixelFormat::R8G8B8A8_UNorm; } return format; } diff --git a/Source/Engine/Graphics/PixelFormatExtensions.h b/Source/Engine/Graphics/PixelFormatExtensions.h index 498b9270c..d5296ea65 100644 --- a/Source/Engine/Graphics/PixelFormatExtensions.h +++ b/Source/Engine/Graphics/PixelFormatExtensions.h @@ -102,20 +102,6 @@ public: /// True if the is a compressed format from ASTC formats family. API_FUNCTION() static bool IsCompressedASTC(PixelFormat format); - /// - /// Determines whether the specified is packed. - /// - /// The Pixel Format. - /// true if the specified is packed; otherwise, false. - API_FUNCTION() static bool IsPacked(PixelFormat format); - - /// - /// Determines whether the specified is planar. - /// - /// The Pixel Format. - /// true if the specified is planar; otherwise, false. - API_FUNCTION() static bool IsPlanar(PixelFormat format); - /// /// Determines whether the specified is video. /// diff --git a/Source/Engine/Graphics/RenderTools.cpp b/Source/Engine/Graphics/RenderTools.cpp index 679555141..e27cacf9c 100644 --- a/Source/Engine/Graphics/RenderTools.cpp +++ b/Source/Engine/Graphics/RenderTools.cpp @@ -298,14 +298,13 @@ void RenderTools::ComputePitch(PixelFormat format, int32 width, int32 height, ui case PixelFormat::BC4_Typeless: case PixelFormat::BC4_UNorm: case PixelFormat::BC4_SNorm: - ASSERT(PixelFormatExtensions::IsCompressed(format)); - { - uint32 nbw = Math::Max(1, (width + 3) / 4); - uint32 nbh = Math::Max(1, (height + 3) / 4); - rowPitch = nbw * 8; - slicePitch = rowPitch * nbh; - } - break; + { + const uint32 nbw = Math::Max(1, (width + 3) / 4); + const uint32 nbh = Math::Max(1, (height + 3) / 4); + rowPitch = nbw * 8; + slicePitch = rowPitch * nbh; + } + break; case PixelFormat::BC2_Typeless: case PixelFormat::BC2_UNorm: case PixelFormat::BC2_UNorm_sRGB: @@ -321,14 +320,13 @@ void RenderTools::ComputePitch(PixelFormat format, int32 width, int32 height, ui case PixelFormat::BC7_Typeless: case PixelFormat::BC7_UNorm: case PixelFormat::BC7_UNorm_sRGB: - ASSERT(PixelFormatExtensions::IsCompressed(format)); - { - uint32 nbw = Math::Max(1, (width + 3) / 4); - uint32 nbh = Math::Max(1, (height + 3) / 4); - rowPitch = nbw * 16; - slicePitch = rowPitch * nbh; - } - break; + { + const uint32 nbw = Math::Max(1, (width + 3) / 4); + const uint32 nbh = Math::Max(1, (height + 3) / 4); + rowPitch = nbw * 16; + slicePitch = rowPitch * nbh; + } + break; case PixelFormat::ASTC_4x4_UNorm: case PixelFormat::ASTC_4x4_UNorm_sRGB: case PixelFormat::ASTC_6x6_UNorm: @@ -339,28 +337,22 @@ void RenderTools::ComputePitch(PixelFormat format, int32 width, int32 height, ui case PixelFormat::ASTC_10x10_UNorm_sRGB: { const int32 blockSize = PixelFormatExtensions::ComputeBlockSize(format); - uint32 nbw = Math::Max(1, Math::DivideAndRoundUp(width, blockSize)); - uint32 nbh = Math::Max(1, Math::DivideAndRoundUp(height, blockSize)); + const uint32 nbw = Math::Max(1, Math::DivideAndRoundUp(width, blockSize)); + const uint32 nbh = Math::Max(1, Math::DivideAndRoundUp(height, blockSize)); rowPitch = nbw * 16; // All ASTC blocks use 128 bits slicePitch = rowPitch * nbh; } break; case PixelFormat::R8G8_B8G8_UNorm: case PixelFormat::G8R8_G8B8_UNorm: - ASSERT(PixelFormatExtensions::IsPacked(format)); + case PixelFormat::YUY2: rowPitch = ((width + 1) >> 1) * 4; slicePitch = rowPitch * height; break; default: - ASSERT(PixelFormatExtensions::IsValid(format)); - ASSERT(!PixelFormatExtensions::IsCompressed(format) && !PixelFormatExtensions::IsPacked(format) && !PixelFormatExtensions::IsPlanar(format)); - { - uint32 bpp = PixelFormatExtensions::SizeInBits(format); - - // Default byte alignment - rowPitch = (width * bpp + 7) / 8; - slicePitch = rowPitch * height; - } + // Default byte alignment + rowPitch = (width * PixelFormatExtensions::SizeInBits(format) + 7) / 8; + slicePitch = rowPitch * height; break; } } diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp index 07bd0ec69..2d90b50ae 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp @@ -273,18 +273,6 @@ ID3D11BlendState* GPUDeviceDX11::GetBlendState(const BlendingMode& blending) return blendState; } -static MSAALevel GetMaximumMultisampleCount(ID3D11Device* device, DXGI_FORMAT dxgiFormat) -{ - int32 maxCount = 1; - UINT numQualityLevels; - for (int32 i = 2; i <= 8; i *= 2) - { - if (SUCCEEDED(device->CheckMultisampleQualityLevels(dxgiFormat, i, &numQualityLevels)) && numQualityLevels > 0) - maxCount = i; - } - return static_cast(maxCount); -} - bool GPUDeviceDX11::Init() { HRESULT result; @@ -392,10 +380,16 @@ bool GPUDeviceDX11::Init() { auto format = static_cast(i); auto dxgiFormat = RenderToolsDX::ToDxgiFormat(format); - auto maximumMultisampleCount = GetMaximumMultisampleCount(_device, dxgiFormat); + int32 maxCount = 1; + UINT numQualityLevels; + for (int32 c = 2; c <= 8; c *= 2) + { + if (SUCCEEDED(_device->CheckMultisampleQualityLevels(dxgiFormat, c, &numQualityLevels)) && numQualityLevels > 0) + maxCount = c; + } UINT formatSupport = 0; _device->CheckFormatSupport(dxgiFormat, &formatSupport); - FeaturesPerFormat[i] = FormatFeatures(format, maximumMultisampleCount, (FormatSupport)formatSupport); + FeaturesPerFormat[i] = FormatFeatures(format, static_cast(maxCount), (FormatSupport)formatSupport); } } diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp index b070a34d0..05da7c839 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp @@ -398,14 +398,10 @@ bool GPUDeviceDX12::Init() { const PixelFormat format = static_cast(i); const DXGI_FORMAT dxgiFormat = RenderToolsDX::ToDxgiFormat(format); - D3D12_FEATURE_DATA_FORMAT_SUPPORT formatInfo = { dxgiFormat }; if (FAILED(_device->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, &formatInfo, sizeof(formatInfo)))) - { formatInfo.Support1 = D3D12_FORMAT_SUPPORT1_NONE; - } const MSAALevel maximumMultisampleCount = GetMaximumMultisampleCount(_device, dxgiFormat); - FeaturesPerFormat[i] = FormatFeatures(format, maximumMultisampleCount, (FormatSupport)formatInfo.Support1); } } diff --git a/Source/Engine/GraphicsDevice/DirectX/RenderToolsDX.cpp b/Source/Engine/GraphicsDevice/DirectX/RenderToolsDX.cpp index dd4df4b23..43e751395 100644 --- a/Source/Engine/GraphicsDevice/DirectX/RenderToolsDX.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/RenderToolsDX.cpp @@ -9,7 +9,7 @@ // @formatter:off -DXGI_FORMAT PixelFormatToDXGIFormat[108] = +DXGI_FORMAT PixelFormatToDXGIFormat[109] = { DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R32G32B32A32_TYPELESS, @@ -119,6 +119,7 @@ DXGI_FORMAT PixelFormatToDXGIFormat[108] = DXGI_FORMAT_UNKNOWN, // ASTC_8x8_UNorm_sRGB DXGI_FORMAT_UNKNOWN, // ASTC_10x10_UNorm DXGI_FORMAT_UNKNOWN, // ASTC_10x10_UNorm_sRGB + DXGI_FORMAT_YUY2, }; // @formatter:on diff --git a/Source/Engine/GraphicsDevice/Vulkan/RenderToolsVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/RenderToolsVulkan.cpp index cd9d49b6b..d1fe90ee2 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/RenderToolsVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/RenderToolsVulkan.cpp @@ -8,7 +8,7 @@ // @formatter:off -VkFormat RenderToolsVulkan::PixelFormatToVkFormat[108] = +VkFormat RenderToolsVulkan::PixelFormatToVkFormat[109] = { VK_FORMAT_UNDEFINED, VK_FORMAT_R32G32B32A32_SFLOAT, @@ -118,6 +118,7 @@ VkFormat RenderToolsVulkan::PixelFormatToVkFormat[108] = VK_FORMAT_ASTC_8x8_SRGB_BLOCK, VK_FORMAT_ASTC_10x10_UNORM_BLOCK, VK_FORMAT_ASTC_10x10_SRGB_BLOCK, + VK_FORMAT_G8B8G8R8_422_UNORM, // YUY2 }; VkBlendFactor RenderToolsVulkan::BlendToVkBlendFactor[20] = From 10c47b8c2aec7913e246c23ca04b191d40cdfe98 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 25 Apr 2024 17:09:54 +0200 Subject: [PATCH 040/292] Fix missing namespace --- Source/Engine/Graphics/Graphics.Build.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/Source/Engine/Graphics/Graphics.Build.cs b/Source/Engine/Graphics/Graphics.Build.cs index 3e902e204..e4bad1f87 100644 --- a/Source/Engine/Graphics/Graphics.Build.cs +++ b/Source/Engine/Graphics/Graphics.Build.cs @@ -1,5 +1,6 @@ // Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. +using System.IO; using System.Collections.Generic; using Flax.Build; using Flax.Build.NativeCpp; From 97078cda7e9219905ae45fca106772149cd1a308 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 25 Apr 2024 17:10:39 +0200 Subject: [PATCH 041/292] Fix GPU Tasks queue to be executed on frame start, rather than end --- .../Graphics/Async/DefaultGPUTasksExecutor.cpp | 11 +++++------ Source/Engine/Graphics/Async/GPUTask.h | 15 ++++++++------- Source/Engine/Graphics/Async/GPUTasksContext.cpp | 12 ++++++------ Source/Engine/Graphics/Async/GPUTasksContext.h | 3 --- Source/Engine/Graphics/Async/GPUTasksManager.cpp | 5 +++++ 5 files changed, 24 insertions(+), 22 deletions(-) diff --git a/Source/Engine/Graphics/Async/DefaultGPUTasksExecutor.cpp b/Source/Engine/Graphics/Async/DefaultGPUTasksExecutor.cpp index bb7e082bb..ea090e3c9 100644 --- a/Source/Engine/Graphics/Async/DefaultGPUTasksExecutor.cpp +++ b/Source/Engine/Graphics/Async/DefaultGPUTasksExecutor.cpp @@ -23,19 +23,18 @@ void DefaultGPUTasksExecutor::FrameBegin() _context = createContext(); _context->OnFrameBegin(); -} -void DefaultGPUTasksExecutor::FrameEnd() -{ - ASSERT(_context != nullptr); - - // Default implementation performs async operations on end of the frame which is synchronized with a rendering thread + // Default implementation performs async operations on start of the frame which is synchronized with a rendering thread GPUTask* buffer[32]; const int32 count = GPUDevice::Instance->GetTasksManager()->RequestWork(buffer, 32); for (int32 i = 0; i < count; i++) { _context->Run(buffer[i]); } +} +void DefaultGPUTasksExecutor::FrameEnd() +{ + ASSERT(_context != nullptr); _context->OnFrameEnd(); } diff --git a/Source/Engine/Graphics/Async/GPUTask.h b/Source/Engine/Graphics/Async/GPUTask.h index 2dde19109..45d21479d 100644 --- a/Source/Engine/Graphics/Async/GPUTask.h +++ b/Source/Engine/Graphics/Async/GPUTask.h @@ -19,7 +19,7 @@ public: /// /// Describes GPU work type /// - DECLARE_ENUM_4(Type, Custom, CopyResource, UploadTexture, UploadBuffer); + DECLARE_ENUM_EX_4(Type, byte, 0, Custom, CopyResource, UploadTexture, UploadBuffer); /// /// Describes GPU work result value @@ -32,13 +32,15 @@ private: /// Type _type; + byte _syncLatency; + /// /// Synchronization point when async task has been done /// GPUSyncPoint _syncPoint; /// - /// The context that performed this task, it's should synchronize it. + /// The context that performed this task, it should synchronize it. /// GPUTasksContext* _context; @@ -47,8 +49,10 @@ protected: /// Initializes a new instance of the class. /// /// The type. - GPUTask(const Type type) + /// Amount of frames until async operation is synced with GPU. + GPUTask(const Type type, byte syncLatency = GPU_ASYNC_LATENCY) : _type(type) + , _syncLatency(syncLatency) , _syncPoint(0) , _context(nullptr) { @@ -58,7 +62,6 @@ public: /// /// Gets a task type. /// - /// The type. FORCE_INLINE Type GetType() const { return _type; @@ -67,17 +70,15 @@ public: /// /// Gets work finish synchronization point /// - /// Finish task sync point FORCE_INLINE GPUSyncPoint GetSyncPoint() const { - return _syncPoint; + return _syncPoint + _syncLatency; } public: /// /// Checks if operation is syncing /// - /// True if operation is syncing, otherwise false FORCE_INLINE bool IsSyncing() const { return IsRunning() && _syncPoint != 0; diff --git a/Source/Engine/Graphics/Async/GPUTasksContext.cpp b/Source/Engine/Graphics/Async/GPUTasksContext.cpp index 924276521..e98007a7f 100644 --- a/Source/Engine/Graphics/Async/GPUTasksContext.cpp +++ b/Source/Engine/Graphics/Async/GPUTasksContext.cpp @@ -70,16 +70,16 @@ void GPUTasksContext::OnFrameBegin() ++_currentSyncPoint; // Try to flush done jobs - auto currentSyncPointGPU = _currentSyncPoint - GPU_ASYNC_LATENCY; for (int32 i = 0; i < _tasksDone.Count(); i++) { - if (_tasksDone[i]->GetSyncPoint() <= currentSyncPointGPU) + auto task = _tasksDone[i]; + if (task->GetSyncPoint() <= _currentSyncPoint && task->GetState() != TaskState::Finished) { // TODO: add stats counter and count performed jobs, print to log on exit. - - auto job = _tasksDone[i]; - job->Sync(); - + task->Sync(); + } + if (task->GetState() == TaskState::Finished) + { _tasksDone.RemoveAt(i); i--; _totalTasksDoneCount++; diff --git a/Source/Engine/Graphics/Async/GPUTasksContext.h b/Source/Engine/Graphics/Async/GPUTasksContext.h index 1114144c6..1739e17a3 100644 --- a/Source/Engine/Graphics/Async/GPUTasksContext.h +++ b/Source/Engine/Graphics/Async/GPUTasksContext.h @@ -42,7 +42,6 @@ public: /// /// Gets graphics device handle /// - /// Graphics device FORCE_INLINE GPUDevice* GetDevice() const { return GPU->GetDevice(); @@ -51,7 +50,6 @@ public: /// /// Gets current synchronization point of that context (CPU position, GPU has some latency) /// - /// Context sync point FORCE_INLINE GPUSyncPoint GetCurrentSyncPoint() const { return _currentSyncPoint; @@ -60,7 +58,6 @@ public: /// /// Gets total amount of tasks done by this context /// - /// Done tasks count FORCE_INLINE int32 GetTotalTasksDoneCount() const { return _totalTasksDoneCount; diff --git a/Source/Engine/Graphics/Async/GPUTasksManager.cpp b/Source/Engine/Graphics/Async/GPUTasksManager.cpp index 42df38c0c..54cd45ead 100644 --- a/Source/Engine/Graphics/Async/GPUTasksManager.cpp +++ b/Source/Engine/Graphics/Async/GPUTasksManager.cpp @@ -30,6 +30,11 @@ void GPUTask::Execute(GPUTasksContext* context) // Save task completion point (for synchronization) _syncPoint = context->GetCurrentSyncPoint(); _context = context; + if (_syncLatency == 0) + { + // No delay on sync + Sync(); + } } } From 0cdce9dba23ba1bf45d2d41625f0b13c92ada6bd Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 25 Apr 2024 23:09:18 +0200 Subject: [PATCH 042/292] Upgrade GDK to `230305` and support `v143` MSVC toolset --- .../Tools/Flax.Build/Deps/Dependencies/DirectXTex.cs | 9 ++------- Source/Tools/Flax.Build/Platforms/GDK/GDK.cs | 2 +- Source/Tools/Flax.Build/Platforms/GDK/GDKToolchain.cs | 10 +++++++--- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/DirectXTex.cs b/Source/Tools/Flax.Build/Deps/Dependencies/DirectXTex.cs index 318223228..3cc80f888 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/DirectXTex.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/DirectXTex.cs @@ -73,13 +73,8 @@ namespace Flax.Deps.Dependencies case TargetPlatform.XboxOne: case TargetPlatform.XboxScarlett: { - var solutionPath = Path.Combine(root, "DirectXTex_GXDK_2019.sln"); - File.Copy(Path.Combine(GetBinariesFolder(options, platform), "DirectXTex_GXDK_2019.sln"), solutionPath, true); - var projectFileContents = File.ReadAllText(Path.Combine(GetBinariesFolder(options, platform), "DirectXTex_GXDK_2019.vcxproj")); - projectFileContents = projectFileContents.Replace("___VS_TOOLSET___", "v142"); - var projectPath = Path.Combine(root, "DirectXTex", "DirectXTex_GXDK_2019.vcxproj"); - File.WriteAllText(projectPath, projectFileContents); - var binFolder = Path.Combine(root, "DirectXTex", "Bin", "GXDK_2019"); + var solutionPath = Path.Combine(root, "DirectXTex_GDK_2022.sln"); + var binFolder = Path.Combine(root, "DirectXTex", "Bin", "GDK_2022"); var xboxName = platform == TargetPlatform.XboxOne ? "Gaming.Xbox.XboxOne.x64" : "Gaming.Xbox.Scarlett.x64"; Deploy.VCEnvironment.BuildSolution(solutionPath, configuration, xboxName); var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); diff --git a/Source/Tools/Flax.Build/Platforms/GDK/GDK.cs b/Source/Tools/Flax.Build/Platforms/GDK/GDK.cs index 59f73b20d..a21a4bb1b 100644 --- a/Source/Tools/Flax.Build/Platforms/GDK/GDK.cs +++ b/Source/Tools/Flax.Build/Platforms/GDK/GDK.cs @@ -53,7 +53,7 @@ namespace Flax.Build.Platforms var versionText = contents.Substring(start, end - start); Version = new Version(int.Parse(versionText), 0); - var minEdition = 200500; + var minEdition = 230305; if (Version.Major < minEdition) { Log.Error(string.Format("Unsupported GDK version {0}. Minimum supported is edition {1}.", Version.Major, minEdition)); diff --git a/Source/Tools/Flax.Build/Platforms/GDK/GDKToolchain.cs b/Source/Tools/Flax.Build/Platforms/GDK/GDKToolchain.cs index 9da10ccc0..8e56950e7 100644 --- a/Source/Tools/Flax.Build/Platforms/GDK/GDKToolchain.cs +++ b/Source/Tools/Flax.Build/Platforms/GDK/GDKToolchain.cs @@ -26,8 +26,9 @@ namespace Flax.Build.Platforms SystemIncludePaths.Add(Path.Combine(GDK.Instance.RootPath, "GRDK\\GameKit\\Include")); SystemLibraryPaths.Add(Path.Combine(GDK.Instance.RootPath, "GRDK\\GameKit\\Lib\\amd64")); var xboxServicesPath = Path.Combine(GDK.Instance.RootPath, "GRDK\\ExtensionLibraries\\Xbox.Services.API.C\\DesignTime\\CommonConfiguration\\Neutral\\"); + var xboxServicesToolset = Toolset > WindowsPlatformToolset.v142 ? WindowsPlatformToolset.v142 : Toolset; SystemIncludePaths.Add(xboxServicesPath + "Include"); - SystemLibraryPaths.Add(xboxServicesPath + "Lib\\Release\\" + Toolset); + SystemLibraryPaths.Add(xboxServicesPath + "Lib\\Release\\" + xboxServicesToolset); } /// @@ -42,7 +43,8 @@ namespace Flax.Build.Platforms options.LinkEnv.InputLibraries.Add("xgameruntime.lib"); options.LinkEnv.InputLibraries.Add("xgameplatform.lib"); - options.LinkEnv.InputLibraries.Add($"Microsoft.Xbox.Services.{(int)Toolset}.GDK.C.lib"); + var xboxServicesToolset = Toolset > WindowsPlatformToolset.v142 ? WindowsPlatformToolset.v142 : Toolset; + options.LinkEnv.InputLibraries.Add($"Microsoft.Xbox.Services.{(int)xboxServicesToolset}.GDK.C.lib"); var toolsetPath = WindowsPlatformBase.GetToolsets()[Toolset]; var toolsPath = WindowsPlatformBase.GetVCToolPath64(Toolset); @@ -50,7 +52,9 @@ namespace Flax.Build.Platforms throw new Exception("Don't use debug CRT on GDK."); var name = Path.GetFileName(toolsetPath); var redistToolsPath = Path.Combine(toolsPath, "..", "..", "..", "..", "..", "..", "Redist/MSVC/"); - var paths = Directory.GetDirectories(redistToolsPath, name.Substring(0, 5) + "*"); + var paths = Directory.GetDirectories(redistToolsPath, name.Substring(0, 2) + "*"); + if (paths.Length == 0) + throw new Exception($"Failed to find MSVC redistribute binaries for toolset '{Toolset}' inside folder '{toolsPath}'"); redistToolsPath = Path.Combine(paths[0], "x64", "Microsoft.VC" + (int)Toolset + ".CRT"); redistToolsPath = Utilities.RemovePathRelativeParts(redistToolsPath); options.DependencyFiles.Add(Path.Combine(redistToolsPath, "concrt140.dll")); From 8a45dda98c8e69db2122d1b7d12dde069724f90e Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 1 May 2024 01:05:15 +0200 Subject: [PATCH 043/292] Add support for custom file proxies in Editor --- Source/Editor/Content/Proxy/AssetProxy.cs | 4 +-- .../Editor/Content/Proxy/CSharpScriptProxy.cs | 6 ++++ Source/Editor/Content/Proxy/ContentProxy.cs | 10 +++++++ Source/Editor/Content/Proxy/CppProxy.cs | 6 ++++ Source/Editor/Content/Proxy/FileProxy.cs | 6 ++++ .../Editor/Modules/ContentDatabaseModule.cs | 30 +++++-------------- 6 files changed, 36 insertions(+), 26 deletions(-) diff --git a/Source/Editor/Content/Proxy/AssetProxy.cs b/Source/Editor/Content/Proxy/AssetProxy.cs index a752f697c..35a9c1213 100644 --- a/Source/Editor/Content/Proxy/AssetProxy.cs +++ b/Source/Editor/Content/Proxy/AssetProxy.cs @@ -30,9 +30,7 @@ namespace FlaxEditor.Content /// /// Determines whether [is virtual proxy]. /// - /// - /// true if [is virtual proxy]; otherwise, false. - /// + /// true if [is virtual proxy]; otherwise, false. public bool IsVirtualProxy() { return IsVirtual && CanExport == false; diff --git a/Source/Editor/Content/Proxy/CSharpScriptProxy.cs b/Source/Editor/Content/Proxy/CSharpScriptProxy.cs index 0ea4df501..afab13f5d 100644 --- a/Source/Editor/Content/Proxy/CSharpScriptProxy.cs +++ b/Source/Editor/Content/Proxy/CSharpScriptProxy.cs @@ -29,6 +29,12 @@ namespace FlaxEditor.Content return item is CSharpScriptItem; } + /// + public override ContentItem ConstructItem(string path) + { + return new CSharpScriptItem(path); + } + /// public override void Create(string outputPath, object arg) { diff --git a/Source/Editor/Content/Proxy/ContentProxy.cs b/Source/Editor/Content/Proxy/ContentProxy.cs index 1bb354717..3a8f66b2f 100644 --- a/Source/Editor/Content/Proxy/ContentProxy.cs +++ b/Source/Editor/Content/Proxy/ContentProxy.cs @@ -39,6 +39,16 @@ namespace FlaxEditor.Content return false; } + /// + /// Constructs the item for the file. + /// + /// The file path. + /// Created item or null. + public virtual ContentItem ConstructItem(string path) + { + return null; + } + /// /// Gets a value indicating whether this proxy if for assets. /// diff --git a/Source/Editor/Content/Proxy/CppProxy.cs b/Source/Editor/Content/Proxy/CppProxy.cs index f4b92322b..0a32ccec6 100644 --- a/Source/Editor/Content/Proxy/CppProxy.cs +++ b/Source/Editor/Content/Proxy/CppProxy.cs @@ -87,6 +87,12 @@ namespace FlaxEditor.Content return item is CppScriptItem; } + /// + public override ContentItem ConstructItem(string path) + { + return new CppScriptItem(path); + } + /// protected override void GetTemplatePaths(out string headerTemplate, out string sourceTemplate) { diff --git a/Source/Editor/Content/Proxy/FileProxy.cs b/Source/Editor/Content/Proxy/FileProxy.cs index 5f77dbd8f..4e12ab588 100644 --- a/Source/Editor/Content/Proxy/FileProxy.cs +++ b/Source/Editor/Content/Proxy/FileProxy.cs @@ -20,6 +20,12 @@ namespace FlaxEditor.Content return item is FileItem; } + /// + public override ContentItem ConstructItem(string path) + { + return new FileItem(path); + } + /// public override string FileExtension => string.Empty; diff --git a/Source/Editor/Modules/ContentDatabaseModule.cs b/Source/Editor/Modules/ContentDatabaseModule.cs index 0e4e96d30..9596f2df4 100644 --- a/Source/Editor/Modules/ContentDatabaseModule.cs +++ b/Source/Editor/Modules/ContentDatabaseModule.cs @@ -129,12 +129,9 @@ namespace FlaxEditor.Modules for (int i = 0; i < Proxy.Count; i++) { if (Proxy[i].IsProxyFor(item)) - { return Proxy[i]; - } } } - return null; } @@ -147,11 +144,8 @@ namespace FlaxEditor.Modules for (int i = 0; i < Proxy.Count; i++) { if (Proxy[i].IsProxyFor()) - { return Proxy[i]; - } } - return null; } @@ -164,17 +158,12 @@ namespace FlaxEditor.Modules { if (string.IsNullOrEmpty(extension)) throw new ArgumentNullException(); - extension = StringUtils.NormalizeExtension(extension); - for (int i = 0; i < Proxy.Count; i++) { - if (Proxy[i].FileExtension == extension) - { + if (string.Equals(Proxy[i].FileExtension, extension, StringComparison.Ordinal)) return Proxy[i]; - } } - return null; } @@ -189,30 +178,23 @@ namespace FlaxEditor.Modules for (int i = 0; i < Proxy.Count; i++) { if (Proxy[i] is AssetProxy proxy && proxy.AcceptsAsset(typeName, path)) - { return proxy; - } } - return null; } + /// /// Gets the virtual proxy object from given path. - ///

use case if the asset u trying to display is not a flax asset but u like to add custom functionality - ///

to context menu,or display it the asset ///
/// The asset path. /// Asset proxy or null if cannot find. - public AssetProxy GetAssetVirtuallProxy(string path) + public AssetProxy GetAssetVirtualProxy(string path) { for (int i = 0; i < Proxy.Count; i++) { if (Proxy[i] is AssetProxy proxy && proxy.IsVirtualProxy() && path.EndsWith(proxy.FileExtension, StringComparison.OrdinalIgnoreCase)) - { return proxy; - } } - return null; } @@ -1016,11 +998,13 @@ namespace FlaxEditor.Modules } if (item == null) { - var proxy = GetAssetVirtuallProxy(path); + var proxy = GetAssetVirtualProxy(path); item = proxy?.ConstructItem(path, assetInfo.TypeName, ref assetInfo.ID); if (item == null) { - item = new FileItem(path); + item = GetProxy(Path.GetExtension(path))?.ConstructItem(path); + if (item == null) + item = new FileItem(path); } } From 0d8c9f662600b7d544c0a00d6ae64a864a5a0df2 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 1 May 2024 01:25:16 +0200 Subject: [PATCH 044/292] Add `Video` module --- Source/Editor/Content/Items/VideoItem.cs | 28 + Source/Editor/Content/Proxy/VideoProxy.cs | 48 ++ .../CustomEditors/Editors/IBrushEditor.cs | 1 + .../Editor/Modules/ContentDatabaseModule.cs | 1 + .../Utilities/ViewportIconsRenderer.cpp | 2 + Source/Editor/Windows/Assets/VideoWindow.cs | 232 +++++++++ Source/Engine/Engine/Engine.Build.cs | 1 + .../Engine/Graphics/Async/GPUTasksContext.cpp | 7 +- Source/Engine/Graphics/GPUDevice.cpp | 14 + Source/Engine/Graphics/GPUDevice.h | 5 + .../Engine/UI/GUI/Brushes/GPUTextureBrush.cs | 4 +- Source/Engine/UI/GUI/Brushes/VideoBrush.cs | 61 +++ Source/Engine/Video/MF/VideoBackendMF.cpp | 489 ++++++++++++++++++ Source/Engine/Video/MF/VideoBackendMF.h | 30 ++ Source/Engine/Video/Types.h | 56 ++ Source/Engine/Video/Video.Build.cs | 45 ++ Source/Engine/Video/Video.cpp | 245 +++++++++ Source/Engine/Video/Video.h | 14 + Source/Engine/Video/VideoBackend.h | 43 ++ Source/Engine/Video/VideoPlayer.cpp | 186 +++++++ Source/Engine/Video/VideoPlayer.h | 162 ++++++ Source/Shaders/Quad.shader | 29 ++ 22 files changed, 1699 insertions(+), 4 deletions(-) create mode 100644 Source/Editor/Content/Items/VideoItem.cs create mode 100644 Source/Editor/Content/Proxy/VideoProxy.cs create mode 100644 Source/Editor/Windows/Assets/VideoWindow.cs create mode 100644 Source/Engine/UI/GUI/Brushes/VideoBrush.cs create mode 100644 Source/Engine/Video/MF/VideoBackendMF.cpp create mode 100644 Source/Engine/Video/MF/VideoBackendMF.h create mode 100644 Source/Engine/Video/Types.h create mode 100644 Source/Engine/Video/Video.Build.cs create mode 100644 Source/Engine/Video/Video.cpp create mode 100644 Source/Engine/Video/Video.h create mode 100644 Source/Engine/Video/VideoBackend.h create mode 100644 Source/Engine/Video/VideoPlayer.cpp create mode 100644 Source/Engine/Video/VideoPlayer.h diff --git a/Source/Editor/Content/Items/VideoItem.cs b/Source/Editor/Content/Items/VideoItem.cs new file mode 100644 index 000000000..e4e761ab7 --- /dev/null +++ b/Source/Editor/Content/Items/VideoItem.cs @@ -0,0 +1,28 @@ +// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. + +using FlaxEngine; + +namespace FlaxEditor.Content +{ + /// + /// Content item that contains video media file. + /// + /// + public sealed class VideoItem : FileItem + { + /// + /// Initializes a new instance of the class. + /// + /// The file path. + public VideoItem(string path) + : base(path) + { + } + + /// + public override string TypeDescription => "Video"; + + /// + public override SpriteHandle DefaultThumbnail => Editor.Instance.Icons.ColorWheel128; + } +} diff --git a/Source/Editor/Content/Proxy/VideoProxy.cs b/Source/Editor/Content/Proxy/VideoProxy.cs new file mode 100644 index 000000000..fd6df593b --- /dev/null +++ b/Source/Editor/Content/Proxy/VideoProxy.cs @@ -0,0 +1,48 @@ +// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. + +using FlaxEditor.Windows; +using FlaxEditor.Windows.Assets; +using FlaxEngine; + +namespace FlaxEditor.Content +{ + /// + /// A video media file proxy object. + /// + public class VideoProxy : ContentProxy + { + private readonly string _extension; + + internal VideoProxy(string extension) + { + _extension = extension; + } + + /// + public override string Name => "Video"; + + /// + public override string FileExtension => _extension; + + /// + public override Color AccentColor => Color.FromRGB(0x11f7f1); + + /// + public override bool IsProxyFor(ContentItem item) + { + return item is VideoItem; + } + + /// + public override ContentItem ConstructItem(string path) + { + return new VideoItem(path); + } + + /// + public override EditorWindow Open(Editor editor, ContentItem item) + { + return new VideoWindow(editor, (VideoItem)item); + } + } +} diff --git a/Source/Editor/CustomEditors/Editors/IBrushEditor.cs b/Source/Editor/CustomEditors/Editors/IBrushEditor.cs index edf3296d2..59d09f37c 100644 --- a/Source/Editor/CustomEditors/Editors/IBrushEditor.cs +++ b/Source/Editor/CustomEditors/Editors/IBrushEditor.cs @@ -25,6 +25,7 @@ namespace FlaxEditor.CustomEditors.Editors new OptionType("Linear Gradient", typeof(LinearGradientBrush)), new OptionType("Texture 9-Slicing", typeof(Texture9SlicingBrush)), new OptionType("Sprite 9-Slicing", typeof(Sprite9SlicingBrush)), + new OptionType("Video", typeof(VideoBrush)), }; } } diff --git a/Source/Editor/Modules/ContentDatabaseModule.cs b/Source/Editor/Modules/ContentDatabaseModule.cs index 9596f2df4..052ca14c5 100644 --- a/Source/Editor/Modules/ContentDatabaseModule.cs +++ b/Source/Editor/Modules/ContentDatabaseModule.cs @@ -1090,6 +1090,7 @@ namespace FlaxEditor.Modules Proxy.Add(new VisualScriptProxy()); Proxy.Add(new BehaviorTreeProxy()); Proxy.Add(new LocalizedStringTableProxy()); + Proxy.Add(new VideoProxy("mp4")); Proxy.Add(new WidgetProxy()); Proxy.Add(new FileProxy()); Proxy.Add(new SpawnableJsonAssetProxy()); diff --git a/Source/Editor/Utilities/ViewportIconsRenderer.cpp b/Source/Editor/Utilities/ViewportIconsRenderer.cpp index 797b4edea..5f178fe34 100644 --- a/Source/Editor/Utilities/ViewportIconsRenderer.cpp +++ b/Source/Editor/Utilities/ViewportIconsRenderer.cpp @@ -21,6 +21,7 @@ #include "Engine/Level/Actors/Sky.h" #include "Engine/Level/Actors/SkyLight.h" #include "Engine/Level/Actors/SpotLight.h" +#include "Engine/Video/VideoPlayer.h" #define ICON_RADIUS 7.0f @@ -283,6 +284,7 @@ bool ViewportIconsRendererService::Init() MAP_TYPE(Sky, Skybox); MAP_TYPE(SkyLight, SkyLight); MAP_TYPE(SpotLight, PointLight); + MAP_TYPE(VideoPlayer, SceneAnimationPlayer); #undef MAP_TYPE return false; diff --git a/Source/Editor/Windows/Assets/VideoWindow.cs b/Source/Editor/Windows/Assets/VideoWindow.cs new file mode 100644 index 000000000..7bd6c4adc --- /dev/null +++ b/Source/Editor/Windows/Assets/VideoWindow.cs @@ -0,0 +1,232 @@ +// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. + +using FlaxEditor.Content; +using FlaxEngine; +using FlaxEngine.GUI; + +namespace FlaxEditor.Windows.Assets +{ + /// + /// Editor window to view video media. + /// + public sealed class VideoWindow : EditorWindow, IContentItemOwner + { + private VideoItem _item; + private Image _frame; + private VideoPlayer _videoPlayer; + private Image _seekBegin, _seekEnd, _seekLeft, _seekRight, _playPause, _stop; + + /// + public VideoWindow(Editor editor, VideoItem item) + : base(editor, false, ScrollBars.None) + { + _item = item; + _item.AddReference(this); + Title = _item.ShortName; + + // Setup video player + _videoPlayer = new VideoPlayer + { + PlayOnStart = false, + Url = item.Path, + }; + + // Setup UI + var style = Style.Current; + var icons = Editor.Icons; + var playbackButtonsSize = 24.0f; + var playbackButtonsMouseOverColor = Color.FromBgra(0xFFBBBBBB); + _frame = new Image + { + Brush = new VideoBrush(_videoPlayer), + AnchorPreset = AnchorPresets.StretchAll, + Offsets = new Margin(0.0f, 0.0f, 0.0f, playbackButtonsSize), + Parent = this, + }; + var playbackButtonsArea = new ContainerControl + { + AutoFocus = false, + ClipChildren = false, + BackgroundColor = style.LightBackground, + AnchorPreset = AnchorPresets.HorizontalStretchBottom, + Offsets = new Margin(0, 0, -playbackButtonsSize, playbackButtonsSize), + Parent = this + }; + var playbackButtonsPanel = new ContainerControl + { + AutoFocus = false, + ClipChildren = false, + AnchorPreset = AnchorPresets.VerticalStretchCenter, + Offsets = Margin.Zero, + Parent = playbackButtonsArea, + }; + _seekBegin = new Image(playbackButtonsPanel.Width, 0, playbackButtonsSize, playbackButtonsSize) + { + TooltipText = "Rewind to timeline start (Home)", + Brush = new SpriteBrush(icons.Skip64), + MouseOverColor = playbackButtonsMouseOverColor, + Rotation = 180.0f, + Parent = playbackButtonsPanel + }; + _seekBegin.Clicked += (image, button) => SeekBegin(); + playbackButtonsPanel.Width += playbackButtonsSize; + _seekLeft = new Image(playbackButtonsPanel.Width, 0, playbackButtonsSize, playbackButtonsSize) + { + TooltipText = "Move one frame back (Left Arrow)", + Brush = new SpriteBrush(icons.Left32), + MouseOverColor = playbackButtonsMouseOverColor, + Parent = playbackButtonsPanel + }; + _seekLeft.Clicked += (image, button) => SeekLeft(); + playbackButtonsPanel.Width += playbackButtonsSize; + _stop = new Image(playbackButtonsPanel.Width, 0, playbackButtonsSize, playbackButtonsSize) + { + TooltipText = "Stop playback", + Brush = new SpriteBrush(icons.Stop64), + MouseOverColor = playbackButtonsMouseOverColor, + Parent = playbackButtonsPanel + }; + _stop.Clicked += (image, button) => Stop(); + playbackButtonsPanel.Width += playbackButtonsSize; + _playPause = new Image(playbackButtonsPanel.Width, 0, playbackButtonsSize, playbackButtonsSize) + { + TooltipText = "Play/pause playback (Space)", + Brush = new SpriteBrush(icons.Play64), + MouseOverColor = playbackButtonsMouseOverColor, + Parent = playbackButtonsPanel + }; + _playPause.Clicked += (image, button) => PlayPause(); + playbackButtonsPanel.Width += playbackButtonsSize; + _seekRight = new Image(playbackButtonsPanel.Width, 0, playbackButtonsSize, playbackButtonsSize) + { + TooltipText = "Move one frame forward (Right Arrow)", + Brush = new SpriteBrush(icons.Right32), + MouseOverColor = playbackButtonsMouseOverColor, + Parent = playbackButtonsPanel + }; + _seekRight.Clicked += (image, button) => SeekRight(); + playbackButtonsPanel.Width += playbackButtonsSize; + _seekEnd = new Image(playbackButtonsPanel.Width, 0, playbackButtonsSize, playbackButtonsSize) + { + TooltipText = "Rewind to timeline end (End)", + Brush = new SpriteBrush(icons.Skip64), + MouseOverColor = playbackButtonsMouseOverColor, + Parent = playbackButtonsPanel + }; + _seekEnd.Clicked += (image, button) => SeekEnd(); + playbackButtonsPanel.Width += playbackButtonsSize; + playbackButtonsPanel.X = (playbackButtonsPanel.Parent.Width - playbackButtonsPanel.Width) * 0.5f; + } + + private void PlayPause() + { + if (_videoPlayer.State == VideoPlayer.States.Playing) + _videoPlayer.Pause(); + else + _videoPlayer.Play(); + } + + private void Stop() + { + _videoPlayer.Stop(); + } + + private void SeekBegin() + { + _videoPlayer.Time = 0.0f; + } + + private void SeekEnd() + { + _videoPlayer.Time = _videoPlayer.Duration; + } + + private void SeekLeft() + { + if (_videoPlayer.State == VideoPlayer.States.Paused) + _videoPlayer.Time -= 1.0f / _videoPlayer.FrameRate; + } + + private void SeekRight() + { + if (_videoPlayer.State == VideoPlayer.States.Paused) + _videoPlayer.Time += 1.0f / _videoPlayer.FrameRate; + } + + /// + public override bool OnKeyDown(KeyboardKeys key) + { + if (base.OnKeyDown(key)) + return true; + + switch (key) + { + case KeyboardKeys.ArrowLeft: + SeekLeft(); + return true; + case KeyboardKeys.ArrowRight: + SeekRight(); + return true; + case KeyboardKeys.Home: + SeekBegin(); + return true; + case KeyboardKeys.End: + SeekEnd(); + return true; + case KeyboardKeys.Spacebar: + PlayPause(); + return true; + } + + return false; + } + + /// + public override void Update(float deltaTime) + { + base.Update(deltaTime); + + // Update UI + var state = _videoPlayer.State; + var icons = Editor.Icons; + _stop.Enabled = state != VideoPlayer.States.Stopped; + _seekLeft.Enabled = _seekRight.Enabled = state != VideoPlayer.States.Playing; + ((SpriteBrush)_playPause.Brush).Sprite = state == VideoPlayer.States.Playing ? icons.Pause64 : icons.Play64; + } + + /// + public override void OnDestroy() + { + if (IsDisposing) + return; + _item.RemoveReference(this); + _item = null; + + base.OnDestroy(); + } + + /// + public void OnItemDeleted(ContentItem item) + { + if (item == _item) + Close(); + } + + /// + public void OnItemRenamed(ContentItem item) + { + } + + /// + public void OnItemReimported(ContentItem item) + { + } + + /// + public void OnItemDispose(ContentItem item) + { + if (item == _item) + Close(); + } + } +} diff --git a/Source/Engine/Engine/Engine.Build.cs b/Source/Engine/Engine/Engine.Build.cs index e5893d30d..43d7c8bf0 100644 --- a/Source/Engine/Engine/Engine.Build.cs +++ b/Source/Engine/Engine/Engine.Build.cs @@ -17,6 +17,7 @@ public class Engine : EngineModule options.PublicDependencies.Add("AI"); options.PublicDependencies.Add("Animations"); options.PublicDependencies.Add("Audio"); + options.PublicDependencies.Add("Video"); options.PublicDependencies.Add("Content"); options.PublicDependencies.Add("Debug"); options.PublicDependencies.Add("Foliage"); diff --git a/Source/Engine/Graphics/Async/GPUTasksContext.cpp b/Source/Engine/Graphics/Async/GPUTasksContext.cpp index e98007a7f..f4f58f4d0 100644 --- a/Source/Engine/Graphics/Async/GPUTasksContext.cpp +++ b/Source/Engine/Graphics/Async/GPUTasksContext.cpp @@ -34,8 +34,11 @@ GPUTasksContext::~GPUTasksContext() for (int32 i = 0; i < tasks.Count(); i++) { auto task = tasks[i]; - LOG(Warning, "{0} has been canceled before a sync", task->ToString()); - tasks[i]->CancelSync(); + if (task->GetSyncPoint() <= _currentSyncPoint && task->GetState() != TaskState::Finished) + { + LOG(Warning, "{0} has been canceled before a sync", task->ToString()); + task->CancelSync(); + } } #if GPU_TASKS_USE_DEDICATED_CONTEXT diff --git a/Source/Engine/Graphics/GPUDevice.cpp b/Source/Engine/Graphics/GPUDevice.cpp index 0a4277a40..c2aa0c82d 100644 --- a/Source/Engine/Graphics/GPUDevice.cpp +++ b/Source/Engine/Graphics/GPUDevice.cpp @@ -296,6 +296,7 @@ struct GPUDevice::PrivateData AssetReference QuadShader; GPUPipelineState* PS_CopyLinear = nullptr; GPUPipelineState* PS_Clear = nullptr; + GPUPipelineState* PS_DecodeYUY2 = nullptr; GPUBuffer* FullscreenTriangleVB = nullptr; AssetReference DefaultMaterial; SoftAssetReference DefaultDeformableMaterial; @@ -489,6 +490,7 @@ void GPUDevice::preDispose() _res->DefaultBlackTexture = nullptr; SAFE_DELETE_GPU_RESOURCE(_res->PS_CopyLinear); SAFE_DELETE_GPU_RESOURCE(_res->PS_Clear); + SAFE_DELETE_GPU_RESOURCE(_res->PS_DecodeYUY2); SAFE_DELETE_GPU_RESOURCE(_res->FullscreenTriangleVB); Locker.Unlock(); @@ -701,6 +703,18 @@ GPUPipelineState* GPUDevice::GetClearPS() const return _res->PS_Clear; } +GPUPipelineState* GPUDevice::GetDecodeYUY2PS() const +{ + if (_res->PS_DecodeYUY2 == nullptr) + { + auto psDesc = GPUPipelineState::Description::DefaultFullscreenTriangle; + psDesc.PS = QuadShader->GetPS("PS_DecodeYUY2"); + _res->PS_DecodeYUY2 = const_cast(this)->CreatePipelineState(); + _res->PS_DecodeYUY2->Init(psDesc); + } + return _res->PS_DecodeYUY2; +} + GPUBuffer* GPUDevice::GetFullscreenTriangleVB() const { return _res->FullscreenTriangleVB; diff --git a/Source/Engine/Graphics/GPUDevice.h b/Source/Engine/Graphics/GPUDevice.h index 6b0de95ed..f1dfecf60 100644 --- a/Source/Engine/Graphics/GPUDevice.h +++ b/Source/Engine/Graphics/GPUDevice.h @@ -270,6 +270,11 @@ public: ///
GPUPipelineState* GetClearPS() const; + /// + /// Gets the shader pipeline state object for YUY2 frame decoding to RGBA. + /// + GPUPipelineState* GetDecodeYUY2PS() const; + /// /// Gets the fullscreen-triangle vertex buffer. /// diff --git a/Source/Engine/UI/GUI/Brushes/GPUTextureBrush.cs b/Source/Engine/UI/GUI/Brushes/GPUTextureBrush.cs index 03244cc3d..c33c62789 100644 --- a/Source/Engine/UI/GUI/Brushes/GPUTextureBrush.cs +++ b/Source/Engine/UI/GUI/Brushes/GPUTextureBrush.cs @@ -17,7 +17,7 @@ namespace FlaxEngine.GUI /// /// The texture sampling filter mode. /// - [ExpandGroups, Tooltip("The texture sampling filter mode.")] + [ExpandGroups] public BrushFilter Filter = BrushFilter.Linear; /// @@ -37,7 +37,7 @@ namespace FlaxEngine.GUI } /// - public Float2 Size => Texture != null ? Texture.Size : Float2.Zero; + public Float2 Size => Texture != null ? Texture.Size : Float2.One; /// public void Draw(Rectangle rect, Color color) diff --git a/Source/Engine/UI/GUI/Brushes/VideoBrush.cs b/Source/Engine/UI/GUI/Brushes/VideoBrush.cs new file mode 100644 index 000000000..035ba7a0f --- /dev/null +++ b/Source/Engine/UI/GUI/Brushes/VideoBrush.cs @@ -0,0 +1,61 @@ +// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. + +namespace FlaxEngine.GUI +{ + /// + /// Implementation of for frame displaying. + /// + /// + public sealed class VideoBrush : IBrush + { + /// + /// The video player to display frame from it. + /// + public VideoPlayer Player; + + /// + /// The texture sampling filter mode. + /// + [ExpandGroups] + public BrushFilter Filter = BrushFilter.Linear; + + /// + /// Initializes a new instance of the class. + /// + public VideoBrush() + { + } + + /// + /// Initializes a new instance of the struct. + /// + /// The video player to preview. + public VideoBrush(VideoPlayer player) + { + Player = player; + } + + /// + public Float2 Size + { + get + { + if (Player && Player.Size.LengthSquared > 0) + return (Float2)Player.Size; + return new Float2(1920, 1080); + } + } + + /// + public void Draw(Rectangle rect, Color color) + { + var texture = Player?.Frame; + if (texture == null || !texture.IsAllocated) + texture = GPUDevice.Instance.DefaultBlackTexture; + if (Filter == BrushFilter.Point) + Render2D.DrawTexturePoint(texture, rect, color); + else + Render2D.DrawTexture(texture, rect, color); + } + } +} diff --git a/Source/Engine/Video/MF/VideoBackendMF.cpp b/Source/Engine/Video/MF/VideoBackendMF.cpp new file mode 100644 index 000000000..8ccefb403 --- /dev/null +++ b/Source/Engine/Video/MF/VideoBackendMF.cpp @@ -0,0 +1,489 @@ +// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. + +#if VIDEO_API_MF + +#include "VideoBackendMF.h" +#include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Core/Log.h" +#include "Engine/Engine/Time.h" +#include "Engine/Audio/Types.h" +#if USE_EDITOR +#include "Editor/Editor.h" +#endif +#include +#include +#include + +#define VIDEO_API_MF_ERROR(api, err) LOG(Warning, "[VideoBackendMF] {} failed with error 0x{:x}", TEXT(#api), (uint64)err) + +struct VideoPlayerMF +{ + IMFSourceReader* SourceReader; + uint8 Loop : 1; + uint8 Playing : 1; + uint8 FirstFrame : 1; + uint8 Seek : 1; + TimeSpan Time; +}; + +namespace +{ + Array Players; + + bool Configure(VideoBackendPlayer& player, VideoPlayerMF& playerMF, DWORD streamIndex) + { + PROFILE_CPU_NAMED("Configure"); + IMFMediaType *mediaType = nullptr, *nativeType = nullptr; + bool result = true; + + // Find the native format of the stream + HRESULT hr = playerMF.SourceReader->GetNativeMediaType(MF_SOURCE_READER_FIRST_VIDEO_STREAM, MF_SOURCE_READER_CURRENT_TYPE_INDEX, &nativeType); + if (FAILED(hr)) + { + VIDEO_API_MF_ERROR(GetNativeMediaType, hr); + goto END; + } + hr = playerMF.SourceReader->GetCurrentMediaType(streamIndex, &mediaType); + if (FAILED(hr)) + { + VIDEO_API_MF_ERROR(GetCurrentMediaType, hr); + goto END; + } + GUID majorType, subtype; + hr = mediaType->GetGUID(MF_MT_MAJOR_TYPE, &majorType); + if (FAILED(hr)) + { + VIDEO_API_MF_ERROR(GetGUID, hr); + goto END; + } + hr = mediaType->GetGUID(MF_MT_SUBTYPE, &subtype); + if (FAILED(hr)) + { + VIDEO_API_MF_ERROR(GetGUID, hr); + goto END; + } + + // Extract media information + if (majorType == MFMediaType_Video) + { + UINT32 width, height; + hr = MFGetAttributeSize(mediaType, MF_MT_FRAME_SIZE, &width, &height); + if (SUCCEEDED(hr)) + { + player.Width = player.VideoFrameWidth = width; + player.Height = player.VideoFrameHeight = height; + } + MFVideoArea videoArea; + hr = mediaType->GetBlob(MF_MT_MINIMUM_DISPLAY_APERTURE, (UINT8*)&videoArea, sizeof(MFVideoArea), NULL); + if (SUCCEEDED(hr) && videoArea.Area.cx > 0 && videoArea.Area.cy > 0) + { + // Video frame has different size in memory than for display (eg. 1080p video will use 1088 height due to H264 decoding) + player.Width = videoArea.Area.cx; + player.Height = videoArea.Area.cy; + } + player.AvgBitRate = MFGetAttributeUINT32(mediaType, MF_MT_AVG_BITRATE, 0); + uint64_t fpsValue; + hr = mediaType->GetUINT64(MF_MT_FRAME_RATE, &fpsValue); + if (SUCCEEDED(hr)) + { + player.FrameRate = (float)HI32(fpsValue) / (float)LO32(fpsValue); + } + if (subtype == MFVideoFormat_RGB32) + player.Format = PixelFormat::B8G8R8X8_UNorm; + else if (subtype == MFVideoFormat_ARGB32) + player.Format = PixelFormat::B8G8R8A8_UNorm; + else if (subtype == MFVideoFormat_RGB555) + player.Format = PixelFormat::B5G6R5_UNorm; + else if (subtype == MFVideoFormat_RGB555) + player.Format = PixelFormat::B5G5R5A1_UNorm; + else if (subtype == MFVideoFormat_YUY2) + player.Format = PixelFormat::YUY2; +#if (WDK_NTDDI_VERSION >= NTDDI_WIN10) + else if (subtype == MFVideoFormat_A2R10G10B10) + player.Format = PixelFormat::R10G10B10A2_UNorm; + else if (subtype == MFVideoFormat_A16B16G16R16F) + player.Format = PixelFormat::R16G16B16A16_Float; +#endif + else + { + // Reconfigure decoder to output supported format by force + IMFMediaType* customType = nullptr; + hr = MFCreateMediaType(&customType); + if (FAILED(hr)) + { + VIDEO_API_MF_ERROR(MFCreateMediaType, hr); + goto END; + } + customType->SetGUID(MF_MT_MAJOR_TYPE, majorType); + customType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_YUY2); + MFSetAttributeSize(customType, MF_MT_FRAME_SIZE, width, height); + hr = playerMF.SourceReader->SetCurrentMediaType(streamIndex, nullptr, customType); + if (FAILED(hr)) + { + VIDEO_API_MF_ERROR(SetCurrentMediaType, hr); + goto END; + } + player.Format = PixelFormat::YUY2; + customType->Release(); + } + } + else if (majorType == MFMediaType_Audio) + { + player.AudioInfo.SampleRate = MFGetAttributeUINT32(mediaType, MF_MT_AUDIO_SAMPLES_PER_SECOND, 0); + player.AudioInfo.NumChannels = MFGetAttributeUINT32(mediaType, MF_MT_AUDIO_NUM_CHANNELS, 0); + player.AudioInfo.BitDepth = MFGetAttributeUINT32(mediaType, MF_MT_AUDIO_BITS_PER_SAMPLE, 16); + } + + result = false; + END: + SAFE_RELEASE(mediaType); + return result; + } +} + +bool VideoBackendMF::Player_Create(const VideoBackendPlayerInfo& info, VideoBackendPlayer& player) +{ + PROFILE_CPU(); + player = VideoBackendPlayer(); + auto& playerMF = player.GetBackendState(); + + // Load media + IMFAttributes* attributes = nullptr; + HRESULT hr = MFCreateAttributes(&attributes, 1); + if (FAILED(hr)) + { + VIDEO_API_MF_ERROR(MFCreateAttributes, hr); + return true; + } + attributes->SetUINT32(MF_READWRITE_ENABLE_HARDWARE_TRANSFORMS, 1); + attributes->SetUINT32(MF_SOURCE_READER_ENABLE_VIDEO_PROCESSING, 1); + IMFSourceReader* sourceReader = nullptr; + hr = MFCreateSourceReaderFromURL(*info.Url, attributes, &sourceReader); + attributes->Release(); + if (FAILED(hr) || !sourceReader) + { + VIDEO_API_MF_ERROR(MFCreateSourceReaderFromURL, hr); + return true; + } + sourceReader->SetStreamSelection(MF_SOURCE_READER_FIRST_VIDEO_STREAM, 1); + sourceReader->SetStreamSelection(MF_SOURCE_READER_FIRST_AUDIO_STREAM, 1); + playerMF.SourceReader = sourceReader; + + // Read media info + if (Configure(player, playerMF, MF_SOURCE_READER_FIRST_VIDEO_STREAM) || + Configure(player, playerMF, MF_SOURCE_READER_FIRST_AUDIO_STREAM)) + return true; + PROPVARIANT var; + hr = sourceReader->GetPresentationAttribute(MF_SOURCE_READER_MEDIASOURCE, MF_PD_DURATION, &var); + if (SUCCEEDED(hr)) + { + player.Duration.Ticks = var.vt == VT_UI8 ? var.uhVal.QuadPart : 0; + PropVariantClear(&var); + } + + // Setup player data + player.Backend = this; + playerMF.Loop = info.Loop; + playerMF.FirstFrame = 1; + Players.Add(&player); + + return false; +} + +void VideoBackendMF::Player_Destroy(VideoBackendPlayer& player) +{ + PROFILE_CPU(); + player.ReleaseResources(); + auto& playerMF = player.GetBackendState(); + playerMF.SourceReader->Release(); + Players.Remove(&player); + player = VideoBackendPlayer(); +} + +void VideoBackendMF::Player_UpdateInfo(VideoBackendPlayer& player, const VideoBackendPlayerInfo& info) +{ + PROFILE_CPU(); + auto& playerMF = player.GetBackendState(); + playerMF.Loop = true; +} + +void VideoBackendMF::Player_Play(VideoBackendPlayer& player) +{ + PROFILE_CPU(); + auto& playerMF = player.GetBackendState(); + playerMF.Playing = 1; +} + +void VideoBackendMF::Player_Pause(VideoBackendPlayer& player) +{ + PROFILE_CPU(); + auto& playerMF = player.GetBackendState(); + playerMF.Playing = 0; +} + +void VideoBackendMF::Player_Stop(VideoBackendPlayer& player) +{ + PROFILE_CPU(); + auto& playerMF = player.GetBackendState(); + playerMF.Time = TimeSpan::Zero(); + playerMF.Playing = 0; + playerMF.FirstFrame = 1; + playerMF.Seek = 1; +} + +void VideoBackendMF::Player_Seek(VideoBackendPlayer& player, TimeSpan time) +{ + PROFILE_CPU(); + auto& playerMF = player.GetBackendState(); + playerMF.Time = time; + playerMF.Seek = 1; +} + +TimeSpan VideoBackendMF::Player_GetTime(const VideoBackendPlayer& player) +{ + PROFILE_CPU(); + auto& playerMF = player.GetBackendState(); + return playerMF.Time; +} + +const Char* VideoBackendMF::Base_Name() +{ + return TEXT("Media Foundation"); +} + +bool VideoBackendMF::Base_Init() +{ + PROFILE_CPU(); + + // Init COM + HRESULT hr = CoInitializeEx(0, COINIT_MULTITHREADED); + if (FAILED(hr) && hr != 0x80010106) // 0x80010106 = Cannot change thread mode after it is set. + { + VIDEO_API_MF_ERROR(CoInitializeEx, hr); + return true; + } + + // Init Media Foundation + hr = MFStartup(MF_VERSION); + if (FAILED(hr)) + { + VIDEO_API_MF_ERROR(MFStartup, hr); + return true; + } + + return false; +} + +void VideoBackendMF::Base_Update() +{ + PROFILE_CPU(); + // TODO: use async Task Graph to update videos + HRESULT hr; + for (auto* e : Players) + { + auto& player = *e; + auto& playerMF = player.GetBackendState(); + + // Skip paused player + if (!playerMF.Playing && !playerMF.Seek) + continue; + + bool useTimeScale = true; +#if USE_EDITOR + if (!Editor::IsPlayMode) + useTimeScale = false; +#endif + TimeSpan dt = useTimeScale ? Time::Update.DeltaTime : Time::Update.UnscaledDeltaTime; + + // Update playback time + if (playerMF.FirstFrame) + { + playerMF.FirstFrame = 0; + playerMF.Seek = 1; + } + else if (playerMF.Playing) + { + playerMF.Time += dt; + } + if (playerMF.Time > player.Duration) + { + if (playerMF.Loop) + { + // Loop + playerMF.Time.Ticks %= player.Duration.Ticks; + playerMF.Seek = 1; + } + else + { + // End + playerMF.Time = player.Duration; + } + } + + // Update current position + int32 seeks = 0; + SEEK_START: + if (playerMF.Seek) + { + seeks++; + playerMF.Seek = 0; + PROPVARIANT var; + PropVariantInit(&var); + var.vt = VT_I8; + var.hVal.QuadPart = playerMF.Time.Ticks; + PROFILE_CPU_NAMED("SetCurrentPosition"); + playerMF.SourceReader->SetCurrentPosition(GUID_NULL, var); + + // Note: + // SetCurrentPosition method does not guarantee exact seeking. + // The accuracy of the seek depends on the media content. + // If the media content contains a video stream, the SetCurrentPosition method typically seeks to the nearest key frame before the desired position. + // After seeking, the application should call ReadSample and advance to the desired position. + } + + // Check if the current frame is valid (eg. when playing 24fps video at 60fps) + if (player.VideoFrameDuration.Ticks > 0 && + Math::IsInRange(playerMF.Time, player.VideoFrameTime, player.VideoFrameTime + player.VideoFrameDuration)) + { + continue; + } + + // Read samples until frame is matching the current time + int32 samplesLeft = 500; + for (; samplesLeft > 0; samplesLeft--) + { + // Read sample + DWORD streamIndex = 0, flags = 0; + LONGLONG samplePos = 0, sampleDuration = 0; + IMFSample* videoSample = nullptr; + { + PROFILE_CPU_NAMED("ReadSample"); + hr = playerMF.SourceReader->ReadSample(MF_SOURCE_READER_FIRST_VIDEO_STREAM, 0, &streamIndex, &flags, &samplePos, &videoSample); + if (FAILED(hr)) + { + VIDEO_API_MF_ERROR(ReadSample, hr); + break; + } + } + TimeSpan frameTime((int64)samplePos); + TimeSpan franeDuration = player.FrameRate > 0 ? TimeSpan::FromSeconds(1.0 / player.FrameRate) : dt; + if (videoSample && videoSample->GetSampleDuration(&sampleDuration) == S_OK && sampleDuration > 0) + { + franeDuration.Ticks = sampleDuration; + } + //const int32 framesToTime = (playerMF.Time.Ticks - frameTime.Ticks) / franeDuration.Ticks; + const bool isGoodSample = Math::IsInRange(playerMF.Time, frameTime, frameTime + franeDuration); + + // Process sample + if (videoSample && isGoodSample) + { + PROFILE_CPU_NAMED("ProcessSample"); + + // Lock sample buffer memory (try to use 2D buffer for more direct memory access) + IMFMediaBuffer* buffer = nullptr; + IMF2DBuffer* buffer2D = nullptr; + BYTE* bufferData = nullptr; + LONG bufferStride = 0; + if (videoSample->GetBufferByIndex(0, &buffer) == S_OK && buffer->QueryInterface(IID_PPV_ARGS(&buffer2D)) == S_OK) + { + LONG bufferPitch = 0; + hr = buffer2D->Lock2D(&bufferData, &bufferPitch); + if (FAILED(hr)) + { + VIDEO_API_MF_ERROR(GetCurrentLength, hr); + goto PROCESS_SAMPLE_END; + } + if (bufferPitch < 0) + bufferPitch = -bufferPitch; // Flipped image + bufferStride = bufferPitch * player.VideoFrameHeight; + } + else + { + if (buffer) + { + buffer->Release(); + buffer = nullptr; + } + DWORD bufferLength; + hr = videoSample->ConvertToContiguousBuffer(&buffer); + if (FAILED(hr)) + { + VIDEO_API_MF_ERROR(ConvertToContiguousBuffer, hr); + goto PROCESS_SAMPLE_END; + } + hr = buffer->GetCurrentLength(&bufferLength); + if (FAILED(hr)) + { + VIDEO_API_MF_ERROR(GetCurrentLength, hr); + goto PROCESS_SAMPLE_END; + } + DWORD bufferMaxLen = 0, bufferCurrentLength = 0; + hr = buffer->Lock(&bufferData, &bufferMaxLen, &bufferCurrentLength); + if (FAILED(hr)) + { + VIDEO_API_MF_ERROR(Lock, hr); + goto PROCESS_SAMPLE_END; + } + bufferStride = bufferCurrentLength; + } + + // Send pixels to the texture + player.UpdateVideoFrame(Span(bufferData, bufferStride), frameTime, franeDuration); + + // Unlock sample buffer memory + if (buffer2D) + { + hr = buffer2D->Unlock2D(); + if (FAILED(hr)) + { + VIDEO_API_MF_ERROR(Unlock2D, hr); + } + } + else + { + hr = buffer->Unlock(); + if (FAILED(hr)) + { + VIDEO_API_MF_ERROR(Unlock, hr); + } + } + + PROCESS_SAMPLE_END: + buffer->Release(); + } + if (videoSample) + videoSample->Release(); + + if (flags & MF_SOURCE_READERF_ENDOFSTREAM) + { + // Media ended + break; + } + if (flags & MF_SOURCE_READERF_NATIVEMEDIATYPECHANGED || flags & MF_SOURCE_READERF_CURRENTMEDIATYPECHANGED) + { + // Format/metadata might have changed so update the stream + Configure(player, playerMF, streamIndex); + } + + // End loop if got good sample or need to seek back + if (isGoodSample) + break; + } + if (samplesLeft == 0 && seeks < 2) + { + // Failed to pick a valid sample so try again with seeking + playerMF.Seek = 1; + goto SEEK_START; + } + } +} + +void VideoBackendMF::Base_Dispose() +{ + PROFILE_CPU(); + + // Shutdown + MFShutdown(); +} + +#endif diff --git a/Source/Engine/Video/MF/VideoBackendMF.h b/Source/Engine/Video/MF/VideoBackendMF.h new file mode 100644 index 000000000..7b97008de --- /dev/null +++ b/Source/Engine/Video/MF/VideoBackendMF.h @@ -0,0 +1,30 @@ +// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. + +#pragma once + +#if VIDEO_API_MF + +#include "../VideoBackend.h" + +/// +/// The Media Foundation video backend. +/// +class VideoBackendMF : public VideoBackend +{ +public: + // [VideoBackend] + bool Player_Create(const VideoBackendPlayerInfo& info, VideoBackendPlayer& player) override; + void Player_Destroy(VideoBackendPlayer& player) override; + void Player_UpdateInfo(VideoBackendPlayer& player, const VideoBackendPlayerInfo& info) override; + void Player_Play(VideoBackendPlayer& player) override; + void Player_Pause(VideoBackendPlayer& player) override; + void Player_Stop(VideoBackendPlayer& player) override; + void Player_Seek(VideoBackendPlayer& player, TimeSpan time) override; + TimeSpan Player_GetTime(const VideoBackendPlayer& player) override; + const Char* Base_Name() override; + bool Base_Init() override; + void Base_Update() override; + void Base_Dispose() override; +}; + +#endif diff --git a/Source/Engine/Video/Types.h b/Source/Engine/Video/Types.h new file mode 100644 index 000000000..1a1a3fad1 --- /dev/null +++ b/Source/Engine/Video/Types.h @@ -0,0 +1,56 @@ +// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. + +#pragma once + +#include "Engine/Core/Types/BaseTypes.h" +#include "Engine/Core/Types/TimeSpan.h" +#include "Engine/Core/Types/DataContainer.h" +#include "Engine/Audio/Types.h" +#include "Engine/Graphics/PixelFormat.h" + +class Video; +class VideoPlayer; +class VideoBackend; +struct VideoBackendPlayer; +struct VideoBackendPlayerInfo; +class GPUTexture; +class GPUBuffer; +class GPUPipelineState; + +/// +/// Video player instance created by backend. +/// +struct VideoBackendPlayer +{ + VideoBackend* Backend = nullptr; + GPUTexture* Frame = nullptr; + GPUBuffer* FrameUpload = nullptr; + int32 Width = 0, Height = 0, AvgBitRate = 0, FramesCount = 0; + int32 VideoFrameWidth = 0, VideoFrameHeight = 0; + PixelFormat Format = PixelFormat::Unknown; + float FrameRate = 0.0f; + TimeSpan Duration = TimeSpan(0); + TimeSpan VideoFrameTime = TimeSpan(0), VideoFrameDuration = TimeSpan(0); + AudioDataInfo AudioInfo = {}; + BytesContainer VideoFrameMemory; + class GPUUploadVideoFrameTask* UploadVideoFrameTask = nullptr; + uintptr BackendState[8] = {}; + + template + FORCE_INLINE T& GetBackendState() + { + static_assert(sizeof(T) <= sizeof(BackendState), "Increase state data to fit per-backend storage."); + return *(T*)BackendState; + } + + template + FORCE_INLINE const T& GetBackendState() const + { + static_assert(sizeof(T) <= sizeof(BackendState), "Increase state data to fit per-backend storage."); + return *(const T*)BackendState; + } + + void InitVideoFrame(); + void UpdateVideoFrame(Span frame, TimeSpan time, TimeSpan duration); + void ReleaseResources(); +}; diff --git a/Source/Engine/Video/Video.Build.cs b/Source/Engine/Video/Video.Build.cs new file mode 100644 index 000000000..f2368329c --- /dev/null +++ b/Source/Engine/Video/Video.Build.cs @@ -0,0 +1,45 @@ +// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. + +using System.Collections.Generic; +using System.IO; +using Flax.Build; +using Flax.Build.NativeCpp; + +/// +/// Video module. +/// +public class Video : EngineModule +{ + /// + public override void Setup(BuildOptions options) + { + base.Setup(options); + + options.SourcePaths.Clear(); + options.SourceFiles.AddRange(Directory.GetFiles(FolderPath, "*.*", SearchOption.TopDirectoryOnly)); + + switch (options.Platform.Target) + { + case TargetPlatform.Windows: + case TargetPlatform.UWP: + case TargetPlatform.XboxOne: + case TargetPlatform.XboxScarlett: + // Media Foundation + options.SourcePaths.Add(Path.Combine(FolderPath, "MF")); + options.CompileEnv.PreprocessorDefinitions.Add("VIDEO_API_MF"); + options.OutputFiles.Add("mf.lib"); + options.OutputFiles.Add("mfcore.lib"); + options.OutputFiles.Add("mfplat.lib"); + options.OutputFiles.Add("mfplay.lib"); + options.OutputFiles.Add("mfreadwrite.lib"); + options.OutputFiles.Add("mfuuid.lib"); + break; + } + } + + /// + public override void GetFilesToDeploy(List files) + { + files.AddRange(Directory.GetFiles(FolderPath, "*.h", SearchOption.TopDirectoryOnly)); + } +} diff --git a/Source/Engine/Video/Video.cpp b/Source/Engine/Video/Video.cpp new file mode 100644 index 000000000..fe6eb40d3 --- /dev/null +++ b/Source/Engine/Video/Video.cpp @@ -0,0 +1,245 @@ +// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. + +#include "Video.h" +#include "VideoBackend.h" +#include "Engine/Core/Log.h" +#include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Engine/EngineService.h" +#include "Engine/Graphics/GPUDevice.h" +#include "Engine/Graphics/GPUBuffer.h" +#include "Engine/Graphics/GPUResource.h" +#include "Engine/Graphics/GPUPipelineState.h" +#include "Engine/Graphics/PixelFormatExtensions.h" +#include "Engine/Graphics/RenderTools.h" +#include "Engine/Graphics/Async/GPUTask.h" +#include "Engine/Graphics/Shaders/GPUShader.h" +#include "Engine/Graphics/Textures/GPUTexture.h" +#include "Engine/Scripting/Enums.h" +#if VIDEO_API_MF +#include "MF/VideoBackendMF.h" +#endif + +/// +/// Video frame upload task to the GPU. +/// +class GPUUploadVideoFrameTask : public GPUTask +{ +private: + VideoBackendPlayer* _player; + +public: + GPUUploadVideoFrameTask(VideoBackendPlayer* player) + : GPUTask(Type::UploadTexture, 0) + , _player(player) + { + } + +public: + // [GPUTask] + bool HasReference(Object* resource) const override + { + return _player && _player->Frame == resource; + } + +protected: + // [GPUTask] + Result run(GPUTasksContext* context) override + { + if (!_player || _player->VideoFrameMemory.IsInvalid()) + return Result::MissingResources; + GPUTexture* frame = _player->Frame; + if (!frame->IsAllocated()) + return Result::MissingResources; + + if (PixelFormatExtensions::IsVideo(_player->Format)) + { + // Allocate compressed frame uploading texture + if (!_player->FrameUpload) + _player->FrameUpload = GPUDevice::Instance->CreateBuffer(TEXT("VideoFrameUpload")); + auto desc = GPUBufferDescription::Buffer(_player->VideoFrameMemory.Length(), GPUBufferFlags::ShaderResource, PixelFormat::R32_UInt, nullptr, 4, GPUResourceUsage::Dynamic); + // TODO: add support for Transient textures (single frame data upload) + if (_player->FrameUpload->GetDescription() != desc) + { + if (_player->FrameUpload->Init(desc)) + return Result::Failed; + } + + // Upload compressed texture data + context->GPU->UpdateBuffer(_player->FrameUpload, _player->VideoFrameMemory.Get(), _player->VideoFrameMemory.Length()); + + // Decompress data into RGBA texture + auto cb = GPUDevice::Instance->QuadShader->GetCB(0); + QuadShaderData cbData; + cbData.Color = Float4((float)_player->Width, (float)_player->Height, 0, 0); + context->GPU->UpdateCB(cb, &cbData); + context->GPU->BindCB(0, cb); + context->GPU->SetViewportAndScissors((float)_player->Width, (float)_player->Height); + context->GPU->SetRenderTarget(frame->View()); + context->GPU->BindSR(0, _player->FrameUpload->View()); + ASSERT_LOW_LAYER(_player->Format == PixelFormat::YUY2); + context->GPU->SetState(GPUDevice::Instance->GetDecodeYUY2PS()); + context->GPU->DrawFullscreenTriangle(); + } + else + { + // Raw texture data upload + uint32 rowPitch, slicePitch; + frame->ComputePitch(0, rowPitch, slicePitch); + context->GPU->UpdateTexture(frame, 0, 0, _player->VideoFrameMemory.Get(), rowPitch, slicePitch); + } + + // Frame has been updated + _player->FramesCount++; + + return Result::Ok; + } + + void OnEnd() override + { + // Unlink + if (_player && _player->UploadVideoFrameTask == this) + _player->UploadVideoFrameTask = nullptr; + _player = nullptr; + + GPUTask::OnEnd(); + } +}; + +class VideoService : public EngineService +{ +public: + VideoService() + : EngineService(TEXT("Video"), -40) + { + } + + VideoBackend* Backends[4] = {}; + + void InitBackend(int32 index, VideoBackend* backend) + { + LOG(Info, "Video initialization... (backend: {0})", backend->Base_Name()); + if (backend->Base_Init()) + { + LOG(Warning, "Failed to initialize Video backend."); + } + Backends[index] = backend; + } + + void Update() override; + void Dispose() override; +}; + +VideoService VideoServiceInstance; + +void VideoService::Update() +{ + PROFILE_CPU_NAMED("Video.Update"); + + // Update backends + for (VideoBackend*& backend : VideoServiceInstance.Backends) + { + if (backend) + backend->Base_Update(); + } +} + +void VideoService::Dispose() +{ + PROFILE_CPU_NAMED("Video.Dispose"); + + // Dispose backends + for (VideoBackend*& backend : VideoServiceInstance.Backends) + { + if (backend) + { + delete backend; + backend = nullptr; + } + } +} + +bool Video::CreatePlayerBackend(const VideoBackendPlayerInfo& info, VideoBackendPlayer& player) +{ + // Pick the first backend to support the player info + int32 index = 0; + VideoBackend* backend; +#define TRY_USE_BACKEND(type) \ + backend = VideoServiceInstance.Backends[index]; \ + if (!backend) \ + VideoServiceInstance.InitBackend(index, backend = new type()); \ + if (!backend->Player_Create(info, player)) \ + return false; +#if VIDEO_API_MF + TRY_USE_BACKEND(VideoBackendMF); +#endif +#undef TRY_USE_BACKEND + + LOG(Error, "Failed to setup Video playback backend for '{}'", info.Url); + return true; +} + +void VideoBackendPlayer::InitVideoFrame() +{ + if (!GPUDevice::Instance) + return; + if (!Frame) + Frame = GPUDevice::Instance->CreateTexture(TEXT("VideoFrame")); +} + +void VideoBackendPlayer::UpdateVideoFrame(Span frame, TimeSpan time, TimeSpan duration) +{ + PROFILE_CPU(); + VideoFrameTime = time; + VideoFrameDuration = duration; + if (!GPUDevice::Instance || GPUDevice::Instance->GetRendererType() == RendererType::Null) + return; + + // Ensure that sampled frame data matches the target texture size + uint32 rowPitch, slicePitch; + RenderTools::ComputePitch(Format, VideoFrameWidth, VideoFrameHeight, rowPitch, slicePitch); + if (slicePitch != frame.Length()) + { + LOG(Warning, "Incorrect video frame stride {}, doesn't match stride {} of video {}x{} in format {}", frame.Length(), slicePitch, Width, Height, ScriptingEnum::ToString(Format)); + return; + } + + // Copy frame into buffer for video frames uploading + if (VideoFrameMemory.Length() < (int32)slicePitch) + { + VideoFrameMemory.Allocate(slicePitch); + if (VideoFrameMemory.IsInvalid()) + { + OUT_OF_MEMORY; + return; + } + } + Platform::MemoryCopy(VideoFrameMemory.Get(), frame.Get(), slicePitch); + + // Update output frame texture + InitVideoFrame(); + auto desc = GPUTextureDescription::New2D(Width, Height, PixelFormat::R8G8B8A8_UNorm, GPUTextureFlags::ShaderResource | GPUTextureFlags::RenderTarget); + if (Frame->GetDescription() != desc) + { + if (Frame->Init(desc)) + { + LOG(Error, "Failed to allocate video frame texture"); + return; + } + } + + // Start texture upload task (if not already - only one is needed to upload the latest frame) + if (!UploadVideoFrameTask) + { + UploadVideoFrameTask = New(this); + UploadVideoFrameTask->Start(); + } +} + +void VideoBackendPlayer::ReleaseResources() +{ + if (UploadVideoFrameTask) + UploadVideoFrameTask->Cancel(); + VideoFrameMemory.Release(); + SAFE_DELETE_GPU_RESOURCE(Frame); + SAFE_DELETE_GPU_RESOURCE(FrameUpload); +} diff --git a/Source/Engine/Video/Video.h b/Source/Engine/Video/Video.h new file mode 100644 index 000000000..08c0edf8b --- /dev/null +++ b/Source/Engine/Video/Video.h @@ -0,0 +1,14 @@ +// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. + +#pragma once + +#include "Types.h" + +/// +/// The video service used for video media playback. +/// +class Video +{ +public: + static bool CreatePlayerBackend(const VideoBackendPlayerInfo& info, VideoBackendPlayer& player); +}; diff --git a/Source/Engine/Video/VideoBackend.h b/Source/Engine/Video/VideoBackend.h new file mode 100644 index 000000000..74d50affb --- /dev/null +++ b/Source/Engine/Video/VideoBackend.h @@ -0,0 +1,43 @@ +// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. + +#pragma once + +#include "Types.h" +#include "Engine/Core/Types/BaseTypes.h" +#include "Engine/Core/Types/StringView.h" + +/// +/// Description for new video player creation by backend. +/// +struct VideoBackendPlayerInfo +{ + StringView Url; + bool Loop; +}; + +/// +/// The helper class for that handles active Video backend operations. +/// +class VideoBackend +{ +public: + virtual ~VideoBackend() + { + } + + // Player + virtual bool Player_Create(const VideoBackendPlayerInfo& info, VideoBackendPlayer& player) = 0; + virtual void Player_Destroy(VideoBackendPlayer& player) = 0; + virtual void Player_UpdateInfo(VideoBackendPlayer& player, const VideoBackendPlayerInfo& info) = 0; + virtual void Player_Play(VideoBackendPlayer& player) = 0; + virtual void Player_Pause(VideoBackendPlayer& player) = 0; + virtual void Player_Stop(VideoBackendPlayer& player) = 0; + virtual void Player_Seek(VideoBackendPlayer& player, TimeSpan time) = 0; + virtual TimeSpan Player_GetTime(const VideoBackendPlayer& player) = 0; + + // Base + virtual const Char* Base_Name() = 0; + virtual bool Base_Init() = 0; + virtual void Base_Update() = 0; + virtual void Base_Dispose() = 0; +}; diff --git a/Source/Engine/Video/VideoPlayer.cpp b/Source/Engine/Video/VideoPlayer.cpp new file mode 100644 index 000000000..baf8e617e --- /dev/null +++ b/Source/Engine/Video/VideoPlayer.cpp @@ -0,0 +1,186 @@ +// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. + +#include "VideoPlayer.h" +#include "Video.h" +#include "VideoBackend.h" +#include "Engine/Core/Log.h" +#include "Engine/Core/Math/Vector2.h" +#if USE_EDITOR +#include "Engine/Engine/Time.h" +#include "Engine/Level/Scene/SceneRendering.h" +#endif + +VideoPlayer::VideoPlayer(const SpawnParams& params) + : Actor(params) +{ +} + +VideoPlayer::~VideoPlayer() +{ + // Ensure to free player memory + Stop(); + if (_player.Backend) + _player.Backend->Player_Destroy(_player); +} + +void VideoPlayer::SetIsLooping(bool value) +{ + if (_loop == value) + return; + _loop = value; + UpdateInfo(); +} + +void VideoPlayer::Play() +{ + auto state = _state; + if (state == States::Playing) + return; + + if (!_player.Backend) + { + if (Url.IsEmpty()) + { + LOG(Warning, "Cannot play Video source without an url ({0})", GetNamePath()); + return; + } + + // Create video player + VideoBackendPlayerInfo info; + GetInfo(info); + if (Video::CreatePlayerBackend(info, _player)) + return; + + // Pre-allocate output video texture + _player.InitVideoFrame(); + } + + _player.Backend->Player_Play(_player); + _state = States::Playing; +} + +void VideoPlayer::Pause() +{ + if (_state != States::Playing) + return; + _state = States::Paused; + if (_player.Backend) + _player.Backend->Player_Pause(_player); +} + +void VideoPlayer::Stop() +{ + if (_state == States::Stopped) + return; + _state = States::Stopped; + if (_player.Backend) + _player.Backend->Player_Stop(_player); +} + +float VideoPlayer::GetTime() const +{ + if (_state == States::Stopped || _player.Backend == nullptr) + return 0.0f; + return _player.Backend->Player_GetTime(_player).GetTotalSeconds(); +} + +void VideoPlayer::SetTime(float time) +{ + if (_state == States::Stopped || _player.Backend == nullptr) + return; + TimeSpan timeSpan = TimeSpan::FromSeconds(time); + timeSpan.Ticks = Math::Clamp(timeSpan.Ticks, 0, _player.Duration.Ticks); + _player.Backend->Player_Seek(_player, timeSpan); +} + +float VideoPlayer::GetDuration() const +{ + return _player.Duration.GetTotalSeconds(); +} + +float VideoPlayer::GetFrameRate() const +{ + return _player.FrameRate; +} + +int32 VideoPlayer::GetFramesCount() const +{ + return _player.FramesCount; +} + +bool VideoPlayer::IntersectsItself(const Ray& ray, Real& distance, Vector3& normal) +{ + return false; +} + +Int2 VideoPlayer::GetSize() const +{ + return Int2(_player.Width, _player.Height); +} + +GPUTexture* VideoPlayer::GetFrame() const +{ + return _player.Frame; +} + +void VideoPlayer::GetInfo(VideoBackendPlayerInfo& info) const +{ + info.Url = Url; + info.Loop = _loop; +} + +void VideoPlayer::UpdateInfo() +{ + if (_player.Backend) + { + VideoBackendPlayerInfo info; + GetInfo(info); + _player.Backend->Player_UpdateInfo(_player, info); + } +} + +void VideoPlayer::OnEnable() +{ +#if USE_EDITOR + GetSceneRendering()->AddViewportIcon(this); +#endif + + Actor::OnEnable(); +} + +void VideoPlayer::OnDisable() +{ + Stop(); + if (_player.Backend) + _player.Backend->Player_Destroy(_player); +#if USE_EDITOR + GetSceneRendering()->RemoveViewportIcon(this); +#endif + + Actor::OnDisable(); +} + +void VideoPlayer::OnTransformChanged() +{ + Actor::OnTransformChanged(); + + _box = BoundingBox(_transform.Translation); + _sphere = BoundingSphere(_transform.Translation, 0.0f); +} + +void VideoPlayer::BeginPlay(SceneBeginData* data) +{ + Actor::BeginPlay(data); + + // Play on start + if (IsActiveInHierarchy() && PlayOnStart) + { +#if USE_EDITOR + if (Time::GetGamePaused()) + return; +#endif + Play(); + if (StartTime > 0) + SetTime(StartTime); + } +} diff --git a/Source/Engine/Video/VideoPlayer.h b/Source/Engine/Video/VideoPlayer.h new file mode 100644 index 000000000..34507626b --- /dev/null +++ b/Source/Engine/Video/VideoPlayer.h @@ -0,0 +1,162 @@ +// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. + +#pragma once + +#include "Engine/Level/Actor.h" +#include "Engine/Content/AssetReference.h" +#include "Types.h" + +/// +/// Video playback utility. Video content can be presented in UI (via VideoBrush), used in materials (via texture parameter bind) or used manually in shaders. +/// +API_CLASS(Attributes="ActorContextMenu(\"New/Visuals/Video Player\"), ActorToolbox(\"Visuals\")") + +class FLAXENGINE_API VideoPlayer : public Actor +{ + DECLARE_SCENE_OBJECT(VideoPlayer); + API_AUTO_SERIALIZATION(); + +public: + /// + /// Valid states in which VideoPlayer can be in. + /// + API_ENUM() enum class States + { + /// + /// The video is currently stopped (play will resume from start). + /// + Stopped = 0, + + /// + /// The video is currently playing. + /// + Playing = 1, + + /// + /// The video is currently paused (play will resume from paused point). + /// + Paused = 2, + }; + +private: + VideoBackendPlayer _player; + States _state = States::Stopped; + bool _loop = false; + +public: + ~VideoPlayer(); + + /// + /// The video clip Url path used as a source of the media. Can be local file (absolute or relative path), or streamed resource ('http://'). + /// + API_FIELD(Attributes="EditorOrder(10), DefaultValue(\"\"), EditorDisplay(\"Video Player\")") + String Url; + + /// + /// Determines whether the video clip should loop when it finishes playing. + /// + API_PROPERTY(Attributes="EditorOrder(20), DefaultValue(false), EditorDisplay(\"Video Player\")") + FORCE_INLINE bool GetIsLooping() const + { + return _loop; + } + + /// + /// Determines whether the video clip should loop when it finishes playing. + /// + API_PROPERTY() void SetIsLooping(bool value); + + /// + /// Determines whether the video clip should auto play on level start. + /// + API_FIELD(Attributes="EditorOrder(30), DefaultValue(false), EditorDisplay(\"Video Player\", \"Play On Start\")") + bool PlayOnStart = false; + + /// + /// Determines the time (in seconds) at which the video clip starts playing if Play On Start is enabled. + /// + API_FIELD(Attributes = "EditorOrder(35), DefaultValue(0.0f), Limit(0, float.MaxValue, 0.01f), EditorDisplay(\"Video Player\"), VisibleIf(nameof(PlayOnStart))") + float StartTime = 0.0f; + +public: + /// + /// Starts playing the currently assigned video Url. + /// + API_FUNCTION() void Play(); + + /// + /// Pauses the video playback. + /// + API_FUNCTION() void Pause(); + + /// + /// Stops video playback, rewinding it to the start. + /// + API_FUNCTION() void Stop(); + + /// + /// Gets the current state of the video playback (playing/paused/stopped). + /// + API_PROPERTY() FORCE_INLINE VideoPlayer::States GetState() const + { + return _state; + } + + /// + /// Gets the current time of playback. If playback has not yet started, it specifies the time at which playback will start at. The time is in seconds, in range [0, Duration]. + /// + API_PROPERTY(Attributes="HideInEditor, NoSerialize") float GetTime() const; + + /// + /// Sets the current time of playback. If playback has not yet started, it specifies the time at which playback will start at. The time is in seconds, in range [0, Duration]. + /// + /// The time. + API_PROPERTY() void SetTime(float time); + + /// + /// Gets the media duration of playback (in seconds). + /// + API_PROPERTY() float GetDuration() const; + + /// + /// Gets the media frame rate of playback (amount of frames to be played per second). + /// + API_PROPERTY() float GetFrameRate() const; + + /// + /// Gets the amount of video frames decoded and send to GPU during playback. Can be used to detect if video has started playback with any visible changes (for video frame texture contents). + /// + API_PROPERTY() int32 GetFramesCount() const; + + /// + /// Gets the video frame dimensions (in pixels). + /// + API_PROPERTY() Int2 GetSize() const; + + /// + /// Gets the video frame texture (GPU resource). Created on the playback start. Can be binded to materials and shaders to display the video image. + /// + API_PROPERTY() GPUTexture* GetFrame() const; + +private: + void GetInfo(VideoBackendPlayerInfo& info) const; + void UpdateInfo(); + +public: + // [Actor] +#if USE_EDITOR + BoundingBox GetEditorBox() const override + { + const Vector3 size(50); + return BoundingBox(_transform.Translation - size, _transform.Translation + size); + } +#endif + bool IntersectsItself(const Ray& ray, Real& distance, Vector3& normal) override; + +protected: + // [Actor] + void OnEnable() override; + void OnDisable() override; + void OnTransformChanged() override; + void BeginPlay(SceneBeginData* data) override; +}; diff --git a/Source/Shaders/Quad.shader b/Source/Shaders/Quad.shader index dcb401bc9..117a21d1a 100644 --- a/Source/Shaders/Quad.shader +++ b/Source/Shaders/Quad.shader @@ -82,3 +82,32 @@ float PS_DepthCopy(Quad_VS2PS input) : SV_Depth } #endif + +#ifdef _PS_DecodeYUY2 + +// Raw memory with texture of format YUY2 and size passed in Color.xy +Buffer SourceYUY2 : register(t0); + +// Pixel Shader for copying depth buffer +META_PS(true, FEATURE_LEVEL_ES2) +float4 PS_DecodeYUY2(Quad_VS2PS input) : SV_Target +{ + // Read YUY2 pixel + uint p = (uint)input.Position.y * (uint)Color.x + (uint)input.Position.x; + uint data = SourceYUY2[p / 2]; + + // Unpack YUY components + uint v = (data & 0xff000000) >> 24; + uint y1 = (data & 0xff0000) >> 16; + uint u = (data & 0xff00) >> 8; + uint y0 = data & 0x000000FF; + uint y = p % 2 == 0 ? y0: y1; + + // Convert yuv to rgb + float r = (y + 1.402 * (v - 128.0)); + float g = (y - 0.344 * (u - 128.0) - 0.714 * (v - 128.0)); + float b = (y + 1.772 * (u - 128.0)); + return float4(r, g, b, 256.0f) / 256.0f; +} + +#endif From 863794d3c0b611d7fad86c7d4d672db4a9446391 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 1 May 2024 01:30:03 +0200 Subject: [PATCH 045/292] Add playback buttons and info label to Video Player actor editor --- .../SceneGraph/Actors/VideoPlayerEditor.cs | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 Source/Editor/SceneGraph/Actors/VideoPlayerEditor.cs diff --git a/Source/Editor/SceneGraph/Actors/VideoPlayerEditor.cs b/Source/Editor/SceneGraph/Actors/VideoPlayerEditor.cs new file mode 100644 index 000000000..c497d05f3 --- /dev/null +++ b/Source/Editor/SceneGraph/Actors/VideoPlayerEditor.cs @@ -0,0 +1,69 @@ +// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. + +using System; +using FlaxEngine; +using FlaxEngine.GUI; + +namespace FlaxEditor.CustomEditors.Dedicated +{ + /// + /// Custom editor for . + /// + [CustomEditor(typeof(VideoPlayer)), DefaultEditor] + public class VideoPlayerEditor : ActorEditor + { + private Label _infoLabel; + + /// + public override void Initialize(LayoutElementsContainer layout) + { + base.Initialize(layout); + + // Show playback options during simulation + if (Editor.IsPlayMode) + { + var playbackGroup = layout.Group("Playback"); + playbackGroup.Panel.Open(); + + _infoLabel = playbackGroup.Label(string.Empty).Label; + _infoLabel.AutoHeight = true; + + var grid = playbackGroup.CustomContainer(); + var gridControl = grid.CustomControl; + gridControl.ClipChildren = false; + gridControl.Height = Button.DefaultHeight; + gridControl.SlotsHorizontally = 3; + gridControl.SlotsVertically = 1; + grid.Button("Play").Button.Clicked += () => Foreach(x => x.Play()); + grid.Button("Pause").Button.Clicked += () => Foreach(x => x.Pause()); + grid.Button("Stop").Button.Clicked += () => Foreach(x => x.Stop()); + } + } + + /// + public override void Refresh() + { + base.Refresh(); + + if (_infoLabel != null) + { + var text = string.Empty; + foreach (var value in Values) + { + if (value is VideoPlayer player) + text += $"Time: {player.Time:##0.0}s / {player.Duration:##0.0}s\nResolution: {player.Size.X}x{player.Size.Y}, Frame Rate: {player.FrameRate}"; + } + _infoLabel.Text = text; + } + } + + private void Foreach(Action func) + { + foreach (var value in Values) + { + if (value is VideoPlayer player) + func(player); + } + } + } +} From dca8e391faba0903080ef737eaec75bb5d8f83dc Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 2 May 2024 18:48:49 +0200 Subject: [PATCH 046/292] Rollback video thumbnail as generic for now --- Source/Editor/Content/Items/VideoItem.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Editor/Content/Items/VideoItem.cs b/Source/Editor/Content/Items/VideoItem.cs index e4e761ab7..088f71894 100644 --- a/Source/Editor/Content/Items/VideoItem.cs +++ b/Source/Editor/Content/Items/VideoItem.cs @@ -23,6 +23,6 @@ namespace FlaxEditor.Content public override string TypeDescription => "Video"; /// - public override SpriteHandle DefaultThumbnail => Editor.Instance.Icons.ColorWheel128; + public override SpriteHandle DefaultThumbnail => Editor.Instance.Icons.Document128; } } From 754ed56119de2847deb4210ed8f1fd8c63d65da3 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 3 May 2024 12:26:03 +0200 Subject: [PATCH 047/292] Add `FilePathEditor` custom editor for path-based editing asset/url refs with a file picker --- Source/Editor/Content/AssetPickerValidator.cs | 30 +-- .../CustomEditors/Editors/AssetRefEditor.cs | 215 +++++++++++++++--- Source/Editor/GUI/AssetPicker.cs | 2 - Source/Editor/Utilities/Utils.cs | 22 ++ Source/Engine/Video/VideoPlayer.h | 2 +- 5 files changed, 212 insertions(+), 59 deletions(-) diff --git a/Source/Editor/Content/AssetPickerValidator.cs b/Source/Editor/Content/AssetPickerValidator.cs index f43ab6a29..109ea2f79 100644 --- a/Source/Editor/Content/AssetPickerValidator.cs +++ b/Source/Editor/Content/AssetPickerValidator.cs @@ -1,3 +1,5 @@ +// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. + using System; using System.IO; using FlaxEditor.Scripting; @@ -94,30 +96,8 @@ public class AssetPickerValidator : IContentItemOwner /// public string SelectedPath { - get - { - string path = _selectedItem?.Path ?? _selected?.Path; - if (path != null) - { - // Convert into path relative to the project (cross-platform) - var projectFolder = Globals.ProjectFolder; - if (path.StartsWith(projectFolder)) - path = path.Substring(projectFolder.Length + 1); - } - return path; - } - set - { - if (string.IsNullOrEmpty(value)) - { - SelectedItem = null; - } - else - { - var path = StringUtils.IsRelative(value) ? Path.Combine(Globals.ProjectFolder, value) : value; - SelectedItem = Editor.Instance.ContentDatabase.Find(path); - } - } + get => Utilities.Utils.ToPathProject(_selectedItem?.Path ?? _selected?.Path); + set => SelectedItem = string.IsNullOrEmpty(value) ? null : Editor.Instance.ContentDatabase.Find(Utilities.Utils.ToPathAbsolute(value)); } /// @@ -242,7 +222,7 @@ public class AssetPickerValidator : IContentItemOwner /// /// Initializes a new instance of the class. /// - /// The assets types that this picker accepts. + /// The asset types that this picker accepts. public AssetPickerValidator(ScriptType assetType) { _type = assetType; diff --git a/Source/Editor/CustomEditors/Editors/AssetRefEditor.cs b/Source/Editor/CustomEditors/Editors/AssetRefEditor.cs index 26e1b9842..76f0e12cd 100644 --- a/Source/Editor/CustomEditors/Editors/AssetRefEditor.cs +++ b/Source/Editor/CustomEditors/Editors/AssetRefEditor.cs @@ -6,6 +6,7 @@ using FlaxEditor.Content; using FlaxEditor.GUI; using FlaxEditor.Scripting; using FlaxEngine; +using FlaxEngine.GUI; using FlaxEngine.Utilities; namespace FlaxEditor.CustomEditors.Editors @@ -50,7 +51,6 @@ namespace FlaxEditor.CustomEditors.Editors if (HasDifferentTypes) return; Picker = layout.Custom().CustomControl; - var value = Values[0]; _valueType = Values.Type.Type != typeof(object) || value == null ? Values.Type : TypeUtils.GetObjectType(value); var assetType = _valueType; @@ -58,37 +58,8 @@ namespace FlaxEditor.CustomEditors.Editors assetType = new ScriptType(typeof(Asset)); else if (_valueType.Type != null && _valueType.Type.Name == typeof(JsonAssetReference<>).Name) assetType = new ScriptType(_valueType.Type.GenericTypeArguments[0]); - - float height = 48; - var attributes = Values.GetAttributes(); - var assetReference = (AssetReferenceAttribute)attributes?.FirstOrDefault(x => x is AssetReferenceAttribute); - if (assetReference != null) - { - if (assetReference.UseSmallPicker) - height = 32; - - if (string.IsNullOrEmpty(assetReference.TypeName)) - { - } - else if (assetReference.TypeName.Length > 1 && assetReference.TypeName[0] == '.') - { - // Generic file picker - assetType = ScriptType.Null; - Picker.Validator.FileExtension = assetReference.TypeName; - } - else - { - var customType = TypeUtils.GetType(assetReference.TypeName); - if (customType != ScriptType.Null) - assetType = customType; - else if (!Content.Settings.GameSettings.OptionalPlatformSettings.Contains(assetReference.TypeName)) - Debug.LogWarning(string.Format("Unknown asset type '{0}' to use for asset picker filter.", assetReference.TypeName)); - else - assetType = ScriptType.Void; - } - } - Picker.Validator.AssetType = assetType; + ApplyAssetReferenceAttribute(Values, out var height, Picker.Validator); Picker.Height = height; Picker.SelectedItemChanged += OnSelectedItemChanged; } @@ -115,6 +86,37 @@ namespace FlaxEditor.CustomEditors.Editors SetValue(Picker.Validator.SelectedAsset); } + internal static void ApplyAssetReferenceAttribute(ValueContainer values, out float height, AssetPickerValidator validator) + { + height = 48; + var attributes = values.GetAttributes(); + var assetReference = (AssetReferenceAttribute)attributes?.FirstOrDefault(x => x is AssetReferenceAttribute); + if (assetReference != null) + { + if (assetReference.UseSmallPicker) + height = 32; + if (string.IsNullOrEmpty(assetReference.TypeName)) + { + } + else if (assetReference.TypeName.Length > 1 && assetReference.TypeName[0] == '.') + { + // Generic file picker + validator.AssetType = ScriptType.Null; + validator.FileExtension = assetReference.TypeName; + } + else + { + var customType = TypeUtils.GetType(assetReference.TypeName); + if (customType != ScriptType.Null) + validator.AssetType = customType; + else if (!Content.Settings.GameSettings.OptionalPlatformSettings.Contains(assetReference.TypeName)) + Debug.LogWarning(string.Format("Unknown asset type '{0}' to use for asset picker filter.", assetReference.TypeName)); + else + validator.AssetType = ScriptType.Void; + } + } + } + /// public override void Refresh() { @@ -140,4 +142,155 @@ namespace FlaxEditor.CustomEditors.Editors } } } + + /// + /// Default implementation of the inspector used to edit reference to the files via path (absolute or relative to the project). + /// + /// Supports editing reference to the asset via path using various containers: or or . + public class FilePathEditor : CustomEditor + { + private sealed class TextBoxWithPicker : TextBox + { + private const float DropdownIconMargin = 3.0f; + private const float DropdownIconSize = 12.0f; + private Rectangle DropdownRect => new Rectangle(Width - DropdownIconSize - DropdownIconMargin, DropdownIconMargin, DropdownIconSize, DropdownIconSize); + + public Action ShowPicker; + + public override void Draw() + { + base.Draw(); + + var style = FlaxEngine.GUI.Style.Current; + var dropdownRect = DropdownRect; + Render2D.DrawSprite(style.ArrowDown, dropdownRect, Enabled ? (DropdownRect.Contains(PointFromWindow(RootWindow.MousePosition)) ? style.BorderSelected : style.Foreground) : style.ForegroundDisabled); + } + + public override bool OnMouseDown(Float2 location, MouseButton button) + { + if (DropdownRect.Contains(ref location)) + { + Focus(); + ShowPicker(); + return true; + } + + return base.OnMouseDown(location, button); + } + + public override void OnMouseMove(Float2 location) + { + base.OnMouseMove(location); + + if (DropdownRect.Contains(ref location)) + Cursor = CursorType.Default; + else + Cursor = CursorType.IBeam; + } + + protected override Rectangle TextRectangle + { + get + { + var result = base.TextRectangle; + result.Size.X -= DropdownIconSize + DropdownIconMargin * 2; + return result; + } + } + + protected override Rectangle TextClipRectangle + { + get + { + var result = base.TextClipRectangle; + result.Size.X -= DropdownIconSize + DropdownIconMargin * 2; + return result; + } + } + } + + private TextBoxWithPicker _textBox; + private AssetPickerValidator _validator; + private bool _isRefreshing; + + /// + public override DisplayStyle Style => DisplayStyle.Inline; + + /// + public override void Initialize(LayoutElementsContainer layout) + { + if (HasDifferentTypes) + return; + _textBox = layout.Custom().CustomControl; + _textBox.ShowPicker = OnShowPicker; + _textBox.EditEnd += OnEditEnd; + _validator = new AssetPickerValidator(ScriptType.Null); + AssetRefEditor.ApplyAssetReferenceAttribute(Values, out _, _validator); + } + + private void OnShowPicker() + { + if (_validator.AssetType != ScriptType.Null) + AssetSearchPopup.Show(_textBox, _textBox.BottomLeft, _validator.IsValid, SetPickerPath); + else + ContentSearchPopup.Show(_textBox, _textBox.BottomLeft, _validator.IsValid, SetPickerPath); + } + + private void SetPickerPath(ContentItem item) + { + var path = Utilities.Utils.ToPathProject(item.Path); + SetPath(path); + + _isRefreshing = true; + _textBox.Defocus(); + _textBox.Text = path; + _isRefreshing = false; + + _textBox.RootWindow.Focus(); + _textBox.Focus(); + } + + private void OnEditEnd() + { + SetPath(_textBox.Text); + } + + private string GetPath() + { + var value = Values[0]; + if (value is AssetItem assetItem) + return Utilities.Utils.ToPathProject(assetItem.Path); + if (value is Asset asset) + return Utilities.Utils.ToPathProject(asset.Path); + if (value is string str) + return str; + return null; + } + + private void SetPath(string path) + { + if (_isRefreshing) + return; + var value = Values[0]; + if (value is AssetItem) + SetValue(Editor.Instance.ContentDatabase.Find(Utilities.Utils.ToPathAbsolute(path))); + else if (value is Asset) + SetValue(FlaxEngine.Content.LoadAsync(path)); + else if (value is string) + SetValue(path); + } + + /// + public override void Refresh() + { + base.Refresh(); + + if (!HasDifferentValues) + { + _isRefreshing = true; + _textBox.Text = GetPath(); + _isRefreshing = false; + } + } + } } diff --git a/Source/Editor/GUI/AssetPicker.cs b/Source/Editor/GUI/AssetPicker.cs index 5c8f02a06..b630836ab 100644 --- a/Source/Editor/GUI/AssetPicker.cs +++ b/Source/Editor/GUI/AssetPicker.cs @@ -5,10 +5,8 @@ using System.IO; using FlaxEditor.Content; using FlaxEditor.GUI.Drag; using FlaxEditor.Scripting; -using FlaxEditor.Utilities; using FlaxEngine; using FlaxEngine.GUI; -using FlaxEngine.Utilities; namespace FlaxEditor.GUI { diff --git a/Source/Editor/Utilities/Utils.cs b/Source/Editor/Utilities/Utils.cs index fbbbd71bf..b14d58eb6 100644 --- a/Source/Editor/Utilities/Utils.cs +++ b/Source/Editor/Utilities/Utils.cs @@ -1471,5 +1471,27 @@ namespace FlaxEditor.Utilities inputActions.Add(options => options.GenerateScriptsProject, () => Editor.Instance.ProgressReporting.GenerateScriptsProjectFiles.RunAsync()); inputActions.Add(options => options.RecompileScripts, ScriptsBuilder.Compile); } + + internal static string ToPathProject(string path) + { + if (path != null) + { + // Convert into path relative to the project (cross-platform) + var projectFolder = Globals.ProjectFolder; + if (path.StartsWith(projectFolder)) + path = path.Substring(projectFolder.Length + 1); + } + return path; + } + + internal static string ToPathAbsolute(string path) + { + if (path != null) + { + // Convert into global path to if relative to the project + path = StringUtils.IsRelative(path) ? Path.Combine(Globals.ProjectFolder, path) : path; + } + return path; + } } } diff --git a/Source/Engine/Video/VideoPlayer.h b/Source/Engine/Video/VideoPlayer.h index 34507626b..dd5fccd28 100644 --- a/Source/Engine/Video/VideoPlayer.h +++ b/Source/Engine/Video/VideoPlayer.h @@ -49,7 +49,7 @@ public: /// /// The video clip Url path used as a source of the media. Can be local file (absolute or relative path), or streamed resource ('http://'). /// - API_FIELD(Attributes="EditorOrder(10), DefaultValue(\"\"), EditorDisplay(\"Video Player\")") + API_FIELD(Attributes="EditorOrder(10), DefaultValue(\"\"), EditorDisplay(\"Video Player\"), AssetReference(\".mp4\"), CustomEditorAlias(\"FlaxEditor.CustomEditors.Editors.FilePathEditor\")") String Url; /// From 388a0f4196170e6b6521d884f68263afc07ba3c8 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 3 May 2024 14:32:23 +0200 Subject: [PATCH 048/292] Add initial audio buffer support in video player --- Source/Engine/Video/MF/VideoBackendMF.cpp | 308 ++++++++++++---------- Source/Engine/Video/Types.h | 38 ++- Source/Engine/Video/Video.cpp | 34 ++- 3 files changed, 230 insertions(+), 150 deletions(-) diff --git a/Source/Engine/Video/MF/VideoBackendMF.cpp b/Source/Engine/Video/MF/VideoBackendMF.cpp index 8ccefb403..84715db45 100644 --- a/Source/Engine/Video/MF/VideoBackendMF.cpp +++ b/Source/Engine/Video/MF/VideoBackendMF.cpp @@ -37,7 +37,7 @@ namespace bool result = true; // Find the native format of the stream - HRESULT hr = playerMF.SourceReader->GetNativeMediaType(MF_SOURCE_READER_FIRST_VIDEO_STREAM, MF_SOURCE_READER_CURRENT_TYPE_INDEX, &nativeType); + HRESULT hr = playerMF.SourceReader->GetNativeMediaType(streamIndex, MF_SOURCE_READER_CURRENT_TYPE_INDEX, &nativeType); if (FAILED(hr)) { VIDEO_API_MF_ERROR(GetNativeMediaType, hr); @@ -81,7 +81,7 @@ namespace player.Width = videoArea.Area.cx; player.Height = videoArea.Area.cy; } - player.AvgBitRate = MFGetAttributeUINT32(mediaType, MF_MT_AVG_BITRATE, 0); + player.AvgVideoBitRate = MFGetAttributeUINT32(mediaType, MF_MT_AVG_BITRATE, 0); uint64_t fpsValue; hr = mediaType->GetUINT64(MF_MT_FRAME_RATE, &fpsValue); if (SUCCEEDED(hr)) @@ -132,6 +132,26 @@ namespace player.AudioInfo.SampleRate = MFGetAttributeUINT32(mediaType, MF_MT_AUDIO_SAMPLES_PER_SECOND, 0); player.AudioInfo.NumChannels = MFGetAttributeUINT32(mediaType, MF_MT_AUDIO_NUM_CHANNELS, 0); player.AudioInfo.BitDepth = MFGetAttributeUINT32(mediaType, MF_MT_AUDIO_BITS_PER_SAMPLE, 16); + if (subtype != MFAudioFormat_PCM) + { + // Reconfigure decoder to output audio data in PCM format + IMFMediaType* customType = nullptr; + hr = MFCreateMediaType(&customType); + if (FAILED(hr)) + { + VIDEO_API_MF_ERROR(MFCreateMediaType, hr); + goto END; + } + customType->SetGUID(MF_MT_MAJOR_TYPE, majorType); + customType->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_PCM); + hr = playerMF.SourceReader->SetCurrentMediaType(streamIndex, nullptr, customType); + if (FAILED(hr)) + { + VIDEO_API_MF_ERROR(SetCurrentMediaType, hr); + goto END; + } + customType->Release(); + } } result = false; @@ -139,6 +159,156 @@ namespace SAFE_RELEASE(mediaType); return result; } + + bool ReadStream(VideoBackendPlayer& player, VideoPlayerMF& playerMF, DWORD streamIndex, TimeSpan dt) + { + const bool isVideo = streamIndex == MF_SOURCE_READER_FIRST_VIDEO_STREAM; + const bool isAudio = streamIndex == MF_SOURCE_READER_FIRST_AUDIO_STREAM; + const TimeSpan lastFrameTime = isVideo ? player.VideoFrameTime : player.AudioBufferTime; + const TimeSpan lastFrameDuration = isVideo ? player.VideoFrameDuration : player.AudioBufferDuration; + + // Check if the current frame is valid (eg. when playing 24fps video at 60fps) + if (lastFrameDuration.Ticks > 0 && + Math::IsInRange(playerMF.Time, lastFrameTime, lastFrameTime + lastFrameDuration)) + { + return false; + } + + // Read samples until frame is matching the current time + int32 samplesLeft = 500; + HRESULT hr; + for (; samplesLeft > 0; samplesLeft--) + { + // Read sample + DWORD flags = 0; + LONGLONG samplePos = 0, sampleDuration = 0; + IMFSample* sample = nullptr; + { + PROFILE_CPU_NAMED("ReadSample"); + hr = playerMF.SourceReader->ReadSample(streamIndex, 0, &streamIndex, &flags, &samplePos, &sample); + if (FAILED(hr)) + { + VIDEO_API_MF_ERROR(ReadSample, hr); + break; + } + } + TimeSpan frameTime((int64)samplePos); + TimeSpan franeDuration = player.FrameRate > 0 ? TimeSpan::FromSeconds(1.0 / player.FrameRate) : dt; + if (sample && sample->GetSampleDuration(&sampleDuration) == S_OK && sampleDuration > 0) + { + franeDuration.Ticks = sampleDuration; + } + //const int32 framesToTime = (playerMF.Time.Ticks - frameTime.Ticks) / franeDuration.Ticks; + const bool isGoodSample = Math::IsInRange(playerMF.Time, frameTime, frameTime + franeDuration); + + // Process sample + if (sample && isGoodSample) + { + PROFILE_CPU_NAMED("ProcessSample"); + + // Lock sample buffer memory (try to use 2D buffer for more direct memory access) + IMFMediaBuffer* buffer = nullptr; + IMF2DBuffer* buffer2D = nullptr; + BYTE* bufferData = nullptr; + LONG bufferStride = 0; + if (isVideo && sample->GetBufferByIndex(0, &buffer) == S_OK && buffer->QueryInterface(IID_PPV_ARGS(&buffer2D)) == S_OK) + { + LONG bufferPitch = 0; + hr = buffer2D->Lock2D(&bufferData, &bufferPitch); + if (FAILED(hr)) + { + VIDEO_API_MF_ERROR(GetCurrentLength, hr); + goto PROCESS_SAMPLE_END; + } + if (bufferPitch < 0) + bufferPitch = -bufferPitch; // Flipped image + bufferStride = bufferPitch * player.VideoFrameHeight; + } + else + { + if (buffer) + { + buffer->Release(); + buffer = nullptr; + } + DWORD bufferLength; + hr = sample->ConvertToContiguousBuffer(&buffer); + if (FAILED(hr)) + { + VIDEO_API_MF_ERROR(ConvertToContiguousBuffer, hr); + goto PROCESS_SAMPLE_END; + } + hr = buffer->GetCurrentLength(&bufferLength); + if (FAILED(hr)) + { + VIDEO_API_MF_ERROR(GetCurrentLength, hr); + goto PROCESS_SAMPLE_END; + } + DWORD bufferMaxLen = 0, bufferCurrentLength = 0; + hr = buffer->Lock(&bufferData, &bufferMaxLen, &bufferCurrentLength); + if (FAILED(hr)) + { + VIDEO_API_MF_ERROR(Lock, hr); + goto PROCESS_SAMPLE_END; + } + bufferStride = bufferCurrentLength; + } + + Span bufferSpan(bufferData, bufferStride); + if (isVideo) + { + // Send pixels to the texture + player.UpdateVideoFrame(bufferSpan, frameTime, franeDuration); + } + else if (isAudio) + { + // Send PCM data + player.UpdateAudioBuffer(bufferSpan, frameTime, franeDuration); + } + + // Unlock sample buffer memory + if (buffer2D) + { + hr = buffer2D->Unlock2D(); + if (FAILED(hr)) + { + VIDEO_API_MF_ERROR(Unlock2D, hr); + } + } + else + { + hr = buffer->Unlock(); + if (FAILED(hr)) + { + VIDEO_API_MF_ERROR(Unlock, hr); + } + } + + PROCESS_SAMPLE_END: + buffer->Release(); + } + if (sample) + sample->Release(); + + if (flags & MF_SOURCE_READERF_ENDOFSTREAM) + { + // Media ended + break; + } + if (flags & MF_SOURCE_READERF_NATIVEMEDIATYPECHANGED || flags & MF_SOURCE_READERF_CURRENTMEDIATYPECHANGED) + { + // Format/metadata might have changed so update the stream + Configure(player, playerMF, streamIndex); + } + + // End loop if got good sample or need to seek back + if (isGoodSample) + break; + } + + // True if run out of samples and failed to get frame for the current time + return samplesLeft == 0; + } } bool VideoBackendMF::Player_Create(const VideoBackendPlayerInfo& info, VideoBackendPlayer& player) @@ -278,7 +448,6 @@ void VideoBackendMF::Base_Update() { PROFILE_CPU(); // TODO: use async Task Graph to update videos - HRESULT hr; for (auto* e : Players) { auto& player = *e; @@ -341,140 +510,15 @@ void VideoBackendMF::Base_Update() // After seeking, the application should call ReadSample and advance to the desired position. } - // Check if the current frame is valid (eg. when playing 24fps video at 60fps) - if (player.VideoFrameDuration.Ticks > 0 && - Math::IsInRange(playerMF.Time, player.VideoFrameTime, player.VideoFrameTime + player.VideoFrameDuration)) - { - continue; - } - - // Read samples until frame is matching the current time - int32 samplesLeft = 500; - for (; samplesLeft > 0; samplesLeft--) - { - // Read sample - DWORD streamIndex = 0, flags = 0; - LONGLONG samplePos = 0, sampleDuration = 0; - IMFSample* videoSample = nullptr; - { - PROFILE_CPU_NAMED("ReadSample"); - hr = playerMF.SourceReader->ReadSample(MF_SOURCE_READER_FIRST_VIDEO_STREAM, 0, &streamIndex, &flags, &samplePos, &videoSample); - if (FAILED(hr)) - { - VIDEO_API_MF_ERROR(ReadSample, hr); - break; - } - } - TimeSpan frameTime((int64)samplePos); - TimeSpan franeDuration = player.FrameRate > 0 ? TimeSpan::FromSeconds(1.0 / player.FrameRate) : dt; - if (videoSample && videoSample->GetSampleDuration(&sampleDuration) == S_OK && sampleDuration > 0) - { - franeDuration.Ticks = sampleDuration; - } - //const int32 framesToTime = (playerMF.Time.Ticks - frameTime.Ticks) / franeDuration.Ticks; - const bool isGoodSample = Math::IsInRange(playerMF.Time, frameTime, frameTime + franeDuration); - - // Process sample - if (videoSample && isGoodSample) - { - PROFILE_CPU_NAMED("ProcessSample"); - - // Lock sample buffer memory (try to use 2D buffer for more direct memory access) - IMFMediaBuffer* buffer = nullptr; - IMF2DBuffer* buffer2D = nullptr; - BYTE* bufferData = nullptr; - LONG bufferStride = 0; - if (videoSample->GetBufferByIndex(0, &buffer) == S_OK && buffer->QueryInterface(IID_PPV_ARGS(&buffer2D)) == S_OK) - { - LONG bufferPitch = 0; - hr = buffer2D->Lock2D(&bufferData, &bufferPitch); - if (FAILED(hr)) - { - VIDEO_API_MF_ERROR(GetCurrentLength, hr); - goto PROCESS_SAMPLE_END; - } - if (bufferPitch < 0) - bufferPitch = -bufferPitch; // Flipped image - bufferStride = bufferPitch * player.VideoFrameHeight; - } - else - { - if (buffer) - { - buffer->Release(); - buffer = nullptr; - } - DWORD bufferLength; - hr = videoSample->ConvertToContiguousBuffer(&buffer); - if (FAILED(hr)) - { - VIDEO_API_MF_ERROR(ConvertToContiguousBuffer, hr); - goto PROCESS_SAMPLE_END; - } - hr = buffer->GetCurrentLength(&bufferLength); - if (FAILED(hr)) - { - VIDEO_API_MF_ERROR(GetCurrentLength, hr); - goto PROCESS_SAMPLE_END; - } - DWORD bufferMaxLen = 0, bufferCurrentLength = 0; - hr = buffer->Lock(&bufferData, &bufferMaxLen, &bufferCurrentLength); - if (FAILED(hr)) - { - VIDEO_API_MF_ERROR(Lock, hr); - goto PROCESS_SAMPLE_END; - } - bufferStride = bufferCurrentLength; - } - - // Send pixels to the texture - player.UpdateVideoFrame(Span(bufferData, bufferStride), frameTime, franeDuration); - - // Unlock sample buffer memory - if (buffer2D) - { - hr = buffer2D->Unlock2D(); - if (FAILED(hr)) - { - VIDEO_API_MF_ERROR(Unlock2D, hr); - } - } - else - { - hr = buffer->Unlock(); - if (FAILED(hr)) - { - VIDEO_API_MF_ERROR(Unlock, hr); - } - } - - PROCESS_SAMPLE_END: - buffer->Release(); - } - if (videoSample) - videoSample->Release(); - - if (flags & MF_SOURCE_READERF_ENDOFSTREAM) - { - // Media ended - break; - } - if (flags & MF_SOURCE_READERF_NATIVEMEDIATYPECHANGED || flags & MF_SOURCE_READERF_CURRENTMEDIATYPECHANGED) - { - // Format/metadata might have changed so update the stream - Configure(player, playerMF, streamIndex); - } - - // End loop if got good sample or need to seek back - if (isGoodSample) - break; - } - if (samplesLeft == 0 && seeks < 2) + // Update streams + if (ReadStream(player, playerMF, MF_SOURCE_READER_FIRST_VIDEO_STREAM, dt)) { // Failed to pick a valid sample so try again with seeking playerMF.Seek = 1; goto SEEK_START; } + if (player.AudioInfo.BitDepth != 0) + ReadStream(player, playerMF, MF_SOURCE_READER_FIRST_AUDIO_STREAM, dt); } } diff --git a/Source/Engine/Video/Types.h b/Source/Engine/Video/Types.h index 1a1a3fad1..ee03b0238 100644 --- a/Source/Engine/Video/Types.h +++ b/Source/Engine/Video/Types.h @@ -6,6 +6,7 @@ #include "Engine/Core/Types/TimeSpan.h" #include "Engine/Core/Types/DataContainer.h" #include "Engine/Audio/Types.h" +#include "Engine/Audio/Config.h" #include "Engine/Graphics/PixelFormat.h" class Video; @@ -22,19 +23,29 @@ class GPUPipelineState; /// struct VideoBackendPlayer { - VideoBackend* Backend = nullptr; - GPUTexture* Frame = nullptr; - GPUBuffer* FrameUpload = nullptr; - int32 Width = 0, Height = 0, AvgBitRate = 0, FramesCount = 0; - int32 VideoFrameWidth = 0, VideoFrameHeight = 0; - PixelFormat Format = PixelFormat::Unknown; - float FrameRate = 0.0f; - TimeSpan Duration = TimeSpan(0); - TimeSpan VideoFrameTime = TimeSpan(0), VideoFrameDuration = TimeSpan(0); - AudioDataInfo AudioInfo = {}; + VideoBackend* Backend; + GPUTexture* Frame; + GPUBuffer* FrameUpload; + int32 Width, Height, AvgVideoBitRate, FramesCount; + int32 VideoFrameWidth, VideoFrameHeight; + PixelFormat Format; + float FrameRate; + TimeSpan Duration; + TimeSpan VideoFrameTime, VideoFrameDuration; + TimeSpan AudioBufferTime, AudioBufferDuration; + AudioDataInfo AudioInfo; BytesContainer VideoFrameMemory; - class GPUUploadVideoFrameTask* UploadVideoFrameTask = nullptr; - uintptr BackendState[8] = {}; + AUDIO_BUFFER_ID_TYPE AudioBuffer; + AUDIO_SOURCE_ID_TYPE AudioSource; + class GPUUploadVideoFrameTask* UploadVideoFrameTask; + uintptr BackendState[8]; + + VideoBackendPlayer() + { + Platform::MemoryClear(this, sizeof(VideoBackendPlayer)); + } + + POD_COPYABLE(VideoBackendPlayer); template FORCE_INLINE T& GetBackendState() @@ -51,6 +62,7 @@ struct VideoBackendPlayer } void InitVideoFrame(); - void UpdateVideoFrame(Span frame, TimeSpan time, TimeSpan duration); + void UpdateVideoFrame(Span data, TimeSpan time, TimeSpan duration); + void UpdateAudioBuffer(Span data, TimeSpan time, TimeSpan duration); void ReleaseResources(); }; diff --git a/Source/Engine/Video/Video.cpp b/Source/Engine/Video/Video.cpp index fe6eb40d3..814713c3b 100644 --- a/Source/Engine/Video/Video.cpp +++ b/Source/Engine/Video/Video.cpp @@ -2,13 +2,13 @@ #include "Video.h" #include "VideoBackend.h" +#include "Engine/Audio/AudioBackend.h" #include "Engine/Core/Log.h" #include "Engine/Profiler/ProfilerCPU.h" #include "Engine/Engine/EngineService.h" #include "Engine/Graphics/GPUDevice.h" #include "Engine/Graphics/GPUBuffer.h" #include "Engine/Graphics/GPUResource.h" -#include "Engine/Graphics/GPUPipelineState.h" #include "Engine/Graphics/PixelFormatExtensions.h" #include "Engine/Graphics/RenderTools.h" #include "Engine/Graphics/Async/GPUTask.h" @@ -186,7 +186,7 @@ void VideoBackendPlayer::InitVideoFrame() Frame = GPUDevice::Instance->CreateTexture(TEXT("VideoFrame")); } -void VideoBackendPlayer::UpdateVideoFrame(Span frame, TimeSpan time, TimeSpan duration) +void VideoBackendPlayer::UpdateVideoFrame(Span data, TimeSpan time, TimeSpan duration) { PROFILE_CPU(); VideoFrameTime = time; @@ -197,9 +197,9 @@ void VideoBackendPlayer::UpdateVideoFrame(Span frame, TimeSpan time, TimeS // Ensure that sampled frame data matches the target texture size uint32 rowPitch, slicePitch; RenderTools::ComputePitch(Format, VideoFrameWidth, VideoFrameHeight, rowPitch, slicePitch); - if (slicePitch != frame.Length()) + if (slicePitch != data.Length()) { - LOG(Warning, "Incorrect video frame stride {}, doesn't match stride {} of video {}x{} in format {}", frame.Length(), slicePitch, Width, Height, ScriptingEnum::ToString(Format)); + LOG(Warning, "Incorrect video frame stride {}, doesn't match stride {} of video {}x{} in format {}", data.Length(), slicePitch, Width, Height, ScriptingEnum::ToString(Format)); return; } @@ -213,7 +213,7 @@ void VideoBackendPlayer::UpdateVideoFrame(Span frame, TimeSpan time, TimeS return; } } - Platform::MemoryCopy(VideoFrameMemory.Get(), frame.Get(), slicePitch); + Platform::MemoryCopy(VideoFrameMemory.Get(), data.Get(), slicePitch); // Update output frame texture InitVideoFrame(); @@ -235,8 +235,32 @@ void VideoBackendPlayer::UpdateVideoFrame(Span frame, TimeSpan time, TimeS } } +void VideoBackendPlayer::UpdateAudioBuffer(Span data, TimeSpan time, TimeSpan duration) +{ + PROFILE_CPU(); + AudioBufferTime = time; + AudioBufferDuration = duration; + auto start = time.GetTotalMilliseconds(); + auto dur = duration.GetTotalMilliseconds(); + auto end = (time + duration).GetTotalMilliseconds(); + if (!AudioBackend::Instance) + return; + + // Update audio buffer + if (!AudioBuffer) + AudioBuffer = AudioBackend::Buffer::Create(); + AudioDataInfo dataInfo = AudioInfo; + const uint32 samplesPerSecond = dataInfo.SampleRate * dataInfo.NumChannels; + const uint32 maxSamplesInData = (uint32)data.Length() * 8 / dataInfo.BitDepth; + const uint32 maxSamplesInDuration = (uint32)Math::CeilToInt(samplesPerSecond * duration.GetTotalSeconds()); + dataInfo.NumSamples = Math::Min(maxSamplesInData, maxSamplesInDuration); + AudioBackend::Buffer::Write(AudioBuffer, data.Get(), dataInfo); +} + void VideoBackendPlayer::ReleaseResources() { + if (AudioBuffer) + AudioBackend::Buffer::Delete(AudioBuffer); if (UploadVideoFrameTask) UploadVideoFrameTask->Cancel(); VideoFrameMemory.Release(); From f6045035661577ff9ec21e1eb69047fb29156649 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 4 May 2024 21:47:47 +0200 Subject: [PATCH 049/292] Refactor Audio Backend to support single listener only --- Source/Engine/Audio/AudioClip.cpp | 16 +- Source/Engine/Audio/AudioClip.h | 2 +- Source/Engine/Audio/AudioSource.cpp | 41 +- Source/Engine/Audio/AudioSource.h | 9 +- Source/Engine/Audio/Config.h | 11 +- .../Engine/Audio/OpenAL/AudioBackendOAL.cpp | 361 ++++++------------ .../Audio/XAudio2/AudioBackendXAudio2.cpp | 9 +- Source/Engine/Engine/Units.h | 2 + Source/Engine/Video/MF/VideoBackendMF.cpp | 5 +- Source/Engine/Video/Types.h | 6 +- Source/Engine/Video/VideoPlayer.h | 1 + 11 files changed, 164 insertions(+), 299 deletions(-) diff --git a/Source/Engine/Audio/AudioClip.cpp b/Source/Engine/Audio/AudioClip.cpp index 0204dd222..0abcc5479 100644 --- a/Source/Engine/Audio/AudioClip.cpp +++ b/Source/Engine/Audio/AudioClip.cpp @@ -32,7 +32,7 @@ bool AudioClip::StreamingTask::Run() { const auto idx = queue[i]; uint32& bufferId = clip->Buffers[idx]; - if (bufferId == AUDIO_BUFFER_ID_INVALID) + if (bufferId == 0) { bufferId = AudioBackend::Buffer::Create(); } @@ -40,7 +40,7 @@ bool AudioClip::StreamingTask::Run() { // Release unused data AudioBackend::Buffer::Delete(bufferId); - bufferId = AUDIO_BUFFER_ID_INVALID; + bufferId = 0; } } @@ -267,7 +267,7 @@ Task* AudioClip::CreateStreamingTask(int32 residency) for (int32 i = 0; i < StreamingQueue.Count(); i++) { const int32 idx = StreamingQueue[i]; - if (Buffers[idx] == AUDIO_BUFFER_ID_INVALID) + if (Buffers[idx] == 0) { const auto task = (Task*)RequestChunkDataAsync(idx); if (task) @@ -383,8 +383,8 @@ Asset::LoadResult AudioClip::load() void AudioClip::unload(bool isReloading) { bool hasAnyBuffer = false; - for (const AUDIO_BUFFER_ID_TYPE bufferId : Buffers) - hasAnyBuffer |= bufferId != AUDIO_BUFFER_ID_INVALID; + for (const uint32 bufferId : Buffers) + hasAnyBuffer |= bufferId != 0; // Stop any audio sources that are using this clip right now // TODO: find better way to collect audio sources using audio clip and impl it for AudioStreamingHandler too @@ -399,9 +399,9 @@ void AudioClip::unload(bool isReloading) StreamingQueue.Clear(); if (hasAnyBuffer && AudioBackend::Instance) { - for (AUDIO_BUFFER_ID_TYPE bufferId : Buffers) + for (uint32 bufferId : Buffers) { - if (bufferId != AUDIO_BUFFER_ID_INVALID) + if (bufferId != 0) AudioBackend::Buffer::Delete(bufferId); } } @@ -414,7 +414,7 @@ bool AudioClip::WriteBuffer(int32 chunkIndex) { // Ignore if buffer is not created const uint32 bufferId = Buffers[chunkIndex]; - if (bufferId == AUDIO_BUFFER_ID_INVALID) + if (bufferId == 0) return false; // Ensure audio backend exists diff --git a/Source/Engine/Audio/AudioClip.h b/Source/Engine/Audio/AudioClip.h index eee1557ca..f5061ed6f 100644 --- a/Source/Engine/Audio/AudioClip.h +++ b/Source/Engine/Audio/AudioClip.h @@ -88,7 +88,7 @@ public: /// /// The audio backend buffers (internal ids) collection used by this audio clip. /// - Array> Buffers; + Array> Buffers; /// /// The streaming cache. Contains indices of chunks to stream. If empty no streaming required. Managed by AudioStreamingHandler and used by the Audio streaming tasks. diff --git a/Source/Engine/Audio/AudioSource.cpp b/Source/Engine/Audio/AudioSource.cpp index c990332a0..dbe57e150 100644 --- a/Source/Engine/Audio/AudioSource.cpp +++ b/Source/Engine/Audio/AudioSource.cpp @@ -32,7 +32,7 @@ void AudioSource::SetVolume(float value) if (Math::NearEqual(_volume, value)) return; _volume = value; - if (SourceIDs.HasItems()) + if (SourceID) AudioBackend::Source::VolumeChanged(this); } @@ -42,7 +42,7 @@ void AudioSource::SetPitch(float value) if (Math::NearEqual(_pitch, value)) return; _pitch = value; - if (SourceIDs.HasItems()) + if (SourceID) AudioBackend::Source::PitchChanged(this); } @@ -52,7 +52,7 @@ void AudioSource::SetPan(float value) if (Math::NearEqual(_pan, value)) return; _pan = value; - if (SourceIDs.HasItems()) + if (SourceID) AudioBackend::Source::PanChanged(this); } @@ -63,7 +63,7 @@ void AudioSource::SetIsLooping(bool value) _loop = value; // When streaming we handle looping manually by the proper buffers submission - if (SourceIDs.HasItems() && !UseStreaming()) + if (SourceID && !UseStreaming()) AudioBackend::Source::IsLoopingChanged(this); } @@ -83,7 +83,7 @@ void AudioSource::SetMinDistance(float value) if (Math::NearEqual(_minDistance, value)) return; _minDistance = value; - if (SourceIDs.HasItems()) + if (SourceID) AudioBackend::Source::SpatialSetupChanged(this); } @@ -93,7 +93,7 @@ void AudioSource::SetAttenuation(float value) if (Math::NearEqual(_attenuation, value)) return; _attenuation = value; - if (SourceIDs.HasItems()) + if (SourceID) AudioBackend::Source::SpatialSetupChanged(this); } @@ -103,7 +103,7 @@ void AudioSource::SetDopplerFactor(float value) if (Math::NearEqual(_dopplerFactor, value)) return; _dopplerFactor = value; - if (SourceIDs.HasItems()) + if (SourceID) AudioBackend::Source::SpatialSetupChanged(this); } @@ -112,7 +112,7 @@ void AudioSource::SetAllowSpatialization(bool value) if (_allowSpatialization == value) return; _allowSpatialization = value; - if (SourceIDs.HasItems()) + if (SourceID) AudioBackend::Source::SpatialSetupChanged(this); } @@ -152,7 +152,7 @@ void AudioSource::Play() RequestStreamingBuffersUpdate(); } } - else if (SourceIDs.HasItems()) + else if (SourceID) { // Play it right away SetNonStreamingBuffer(); @@ -187,14 +187,13 @@ void AudioSource::Stop() _state = States::Stopped; _isActuallyPlayingSth = false; _streamingFirstChunk = 0; - - if (SourceIDs.HasItems()) + if (SourceID) AudioBackend::Source::Stop(this); } float AudioSource::GetTime() const { - if (_state == States::Stopped || SourceIDs.IsEmpty() || !Clip->IsLoaded()) + if (_state == States::Stopped || SourceID == 0 || !Clip->IsLoaded()) return 0.0f; float time = AudioBackend::Source::GetCurrentBufferTime(this); @@ -265,10 +264,10 @@ void AudioSource::Cleanup() _savedTime = GetTime(); Stop(); - if (SourceIDs.HasItems()) + if (SourceID) { AudioBackend::Source::Cleanup(this); - SourceIDs.Clear(); + SourceID = 0; } } @@ -283,7 +282,7 @@ void AudioSource::OnClipLoaded() AudioBackend::Source::ClipLoaded(this); // Start playing if source was waiting for the clip to load - if (SourceIDs.HasItems() && _state == States::Playing && !_isActuallyPlayingSth) + if (SourceID && _state == States::Playing && !_isActuallyPlayingSth) { if (Clip->IsStreamable()) { @@ -329,7 +328,7 @@ void AudioSource::SetNonStreamingBuffer() void AudioSource::PlayInternal() { - if (_clipChanged && SourceIDs.HasItems()) + if (_clipChanged && SourceID != 0) { // If clip was changed between source setup (OnEnable) and actual playback start then ensure to flush any runtime properties with the audio backend _clipChanged = false; @@ -420,13 +419,13 @@ void AudioSource::Update() } // Skip other update logic if it's not valid streamable source - if (!UseStreaming() || SourceIDs.IsEmpty()) + if (!UseStreaming() || SourceID == 0) return; auto clip = Clip.Get(); clip->Locker.Lock(); // Handle streaming buffers queue submit (ensure that clip has loaded the first chunk with audio data) - if (_needToUpdateStreamingBuffers && clip->Buffers[_streamingFirstChunk] != AUDIO_BUFFER_ID_INVALID) + if (_needToUpdateStreamingBuffers && clip->Buffers[_streamingFirstChunk] != 0) { // Get buffers in a queue count int32 numQueuedBuffers; @@ -434,11 +433,11 @@ void AudioSource::Update() // Queue missing buffers uint32 bufferId; - if (numQueuedBuffers < 1 && (bufferId = clip->Buffers[_streamingFirstChunk]) != AUDIO_BUFFER_ID_INVALID) + if (numQueuedBuffers < 1 && (bufferId = clip->Buffers[_streamingFirstChunk]) != 0) { AudioBackend::Source::QueueBuffer(this, bufferId); } - if (numQueuedBuffers < 2 && _streamingFirstChunk + 1 < clip->Buffers.Count() && (bufferId = clip->Buffers[_streamingFirstChunk + 1]) != AUDIO_BUFFER_ID_INVALID) + if (numQueuedBuffers < 2 && _streamingFirstChunk + 1 < clip->Buffers.Count() && (bufferId = clip->Buffers[_streamingFirstChunk + 1]) != 0) { AudioBackend::Source::QueueBuffer(this, bufferId); } @@ -535,7 +534,7 @@ void AudioSource::OnTransformChanged() _box = BoundingBox(_transform.Translation); _sphere = BoundingSphere(_transform.Translation, 0.0f); - if (IsActiveInHierarchy() && SourceIDs.HasItems()) + if (IsActiveInHierarchy() && SourceID) { AudioBackend::Source::TransformChanged(this); } diff --git a/Source/Engine/Audio/AudioSource.h b/Source/Engine/Audio/AudioSource.h index 7e3c17d80..f94e04fb9 100644 --- a/Source/Engine/Audio/AudioSource.h +++ b/Source/Engine/Audio/AudioSource.h @@ -8,7 +8,7 @@ #include "Config.h" /// -/// Represents a source for emitting audio. Audio can be played spatially (gun shot), or normally (music). Each audio source must have an AudioClip to play - back, and it can also have a position in the case of spatial(3D) audio. +/// Represents a source for emitting audio. Audio can be played spatially (gun shot), or normally (music). Each audio source must have an AudioClip to play - back, and it can also have a position in the case of spatial (3D) audio. /// /// /// Whether or not an audio source is spatial is controlled by the assigned AudioClip.The volume and the pitch of a spatial audio source is controlled by its position and the AudioListener's position/direction/velocity. @@ -19,6 +19,7 @@ class FLAXENGINE_API AudioSource : public Actor DECLARE_SCENE_OBJECT(AudioSource); friend class AudioStreamingHandler; friend class AudioClip; + public: /// /// Valid states in which AudioSource can be in. @@ -66,9 +67,9 @@ private: public: /// - /// The internal IDs of this audio source used by the audio backend (unique ID per context/listener). + /// The internal ID of this audio source used by the audio backend. Empty if 0. /// - Array> SourceIDs; + uint32 SourceID = 0; /// /// The audio clip asset used as a source of the sound. @@ -260,7 +261,7 @@ public: API_PROPERTY() void SetTime(float time); /// - /// Returns true if the sound source is three dimensional (volume and pitch varies based on listener distance and velocity). + /// Returns true if the sound source is three-dimensional (volume and pitch varies based on listener distance and velocity). /// API_PROPERTY() bool Is3D() const; diff --git a/Source/Engine/Audio/Config.h b/Source/Engine/Audio/Config.h index 287e25d2d..e824b8e0f 100644 --- a/Source/Engine/Audio/Config.h +++ b/Source/Engine/Audio/Config.h @@ -5,16 +5,7 @@ #include "Engine/Core/Config.h" // The maximum amount of listeners used at once -#define AUDIO_MAX_LISTENERS 8 +#define AUDIO_MAX_LISTENERS 1 // The maximum amount of audio emitter buffers #define AUDIO_MAX_SOURCE_BUFFERS (ASSET_FILE_DATA_CHUNKS) - -// The type of the audio source IDs used to identify it (per listener) -#define AUDIO_SOURCE_ID_TYPE uint32 - -// The type of the audio buffer IDs used to identify it -#define AUDIO_BUFFER_ID_TYPE uint32 - -// The buffer ID that is invalid (unused) -#define AUDIO_BUFFER_ID_INVALID 0 diff --git a/Source/Engine/Audio/OpenAL/AudioBackendOAL.cpp b/Source/Engine/Audio/OpenAL/AudioBackendOAL.cpp index ea22143fd..86850e0c4 100644 --- a/Source/Engine/Audio/OpenAL/AudioBackendOAL.cpp +++ b/Source/Engine/Audio/OpenAL/AudioBackendOAL.cpp @@ -6,6 +6,7 @@ #include "Engine/Platform/StringUtils.h" #include "Engine/Core/Log.h" #include "Engine/Tools/AudioTool/AudioTool.h" +#include "Engine/Engine/Units.h" #include "Engine/Profiler/ProfilerCPU.h" #include "Engine/Audio/Audio.h" #include "Engine/Audio/AudioListener.h" @@ -19,12 +20,9 @@ #include #include -#define ALC_MULTIPLE_LISTENERS 0 - -#define FLAX_COORD_SCALE 0.01f // units are meters -#define FLAX_DST_TO_OAL(x) x * FLAX_COORD_SCALE -#define FLAX_POS_TO_OAL(vec) ((ALfloat)vec.X * -FLAX_COORD_SCALE), ((ALfloat)vec.Y * FLAX_COORD_SCALE), ((ALfloat)vec.Z * FLAX_COORD_SCALE) -#define FLAX_VEL_TO_OAL(vec) ((ALfloat)vec.X * -(FLAX_COORD_SCALE*FLAX_COORD_SCALE)), ((ALfloat)vec.Y * (FLAX_COORD_SCALE*FLAX_COORD_SCALE)), ((ALfloat)vec.Z * (FLAX_COORD_SCALE*FLAX_COORD_SCALE)) +#define FLAX_DST_TO_OAL(x) x * UNITS_TO_METERS_SCALE +#define FLAX_POS_TO_OAL(vec) ((ALfloat)vec.X * -UNITS_TO_METERS_SCALE), ((ALfloat)vec.Y * UNITS_TO_METERS_SCALE), ((ALfloat)vec.Z * UNITS_TO_METERS_SCALE) +#define FLAX_VEL_TO_OAL(vec) ((ALfloat)vec.X * -(UNITS_TO_METERS_SCALE*UNITS_TO_METERS_SCALE)), ((ALfloat)vec.Y * (UNITS_TO_METERS_SCALE*UNITS_TO_METERS_SCALE)), ((ALfloat)vec.Z * (UNITS_TO_METERS_SCALE*UNITS_TO_METERS_SCALE)) #if BUILD_RELEASE #define ALC_CHECK_ERROR(method) #else @@ -39,75 +37,30 @@ } #endif -#if ALC_MULTIPLE_LISTENERS -#define ALC_FOR_EACH_CONTEXT() \ - for (int32 i = 0; i < Contexts.Count(); i++) - { \ - if (Contexts.Count() > 1) \ - alcMakeContextCurrent(Contexts[i]); -#define ALC_GET_DEFAULT_CONTEXT() \ - if (Contexts.Count() > 1) \ - alcMakeContextCurrent(Contexts[0]); -#define ALC_GET_LISTENER_CONTEXT(listener) \ - if (Contexts.Count() > 1) \ - alcMakeContextCurrent(ALC::GetContext(listener))); -#else -#define ALC_FOR_EACH_CONTEXT() { int32 i = 0; -#define ALC_GET_DEFAULT_CONTEXT() -#define ALC_GET_LISTENER_CONTEXT(listener) -#endif - namespace ALC { ALCdevice* Device = nullptr; - Array> Contexts; + ALCcontext* Context = nullptr; AudioBackend::FeatureFlags Features = AudioBackend::FeatureFlags::None; bool IsExtensionSupported(const char* extension) { if (Device == nullptr) return false; - const int32 length = StringUtils::Length(extension); if ((length > 2) && (StringUtils::Compare(extension, "ALC", 3) == 0)) return alcIsExtensionPresent(Device, extension) != AL_FALSE; return alIsExtensionPresent(extension) != AL_FALSE; } - ALCcontext* GetContext(const class AudioListener* listener) + void ClearContext() { -#if ALC_MULTIPLE_LISTENERS - const auto& listeners = Audio::Listeners; - if (listeners.HasItems()) + if (Context) { - ASSERT(listeners.Count() == Contexts.Count()); - - const int32 numContexts = Contexts.Count(); - ALC_FOR_EACH_CONTEXT() - { - if (listeners[i] == listener) - return Contexts[i]; - } + alcMakeContextCurrent(nullptr); + alcDestroyContext(Context); + Context = nullptr; } - ASSERT(Contexts.HasItems()); -#else - ASSERT(Contexts.Count() == 1); -#endif - return Contexts[0]; - } - - FORCE_INLINE const Array>& GetContexts() - { - return Contexts; - } - - void ClearContexts() - { - alcMakeContextCurrent(nullptr); - - for (ALCcontext* context : Contexts) - alcDestroyContext(context); - Contexts.Clear(); } namespace Listener @@ -126,58 +79,51 @@ namespace ALC { void Rebuild(AudioSource* source) { - ASSERT(source->SourceIDs.IsEmpty()); + ASSERT(source->SourceID == 0); const bool is3D = source->Is3D(); const bool loop = source->GetIsLooping() && !source->UseStreaming(); - ALC_FOR_EACH_CONTEXT() - uint32 sourceID = 0; - alGenSources(1, &sourceID); + uint32 sourceID = 0; + alGenSources(1, &sourceID); + source->SourceID = sourceID; - source->SourceIDs.Add(sourceID); - } - - ALC_FOR_EACH_CONTEXT() - const uint32 sourceID = source->SourceIDs[i]; - - alSourcef(sourceID, AL_GAIN, source->GetVolume()); - alSourcef(sourceID, AL_PITCH, source->GetPitch()); - alSourcef(sourceID, AL_SEC_OFFSET, 0.0f); - alSourcei(sourceID, AL_LOOPING, loop); - alSourcei(sourceID, AL_SOURCE_RELATIVE, !is3D); - alSourcei(sourceID, AL_BUFFER, 0); - if (is3D) - { + alSourcef(sourceID, AL_GAIN, source->GetVolume()); + alSourcef(sourceID, AL_PITCH, source->GetPitch()); + alSourcef(sourceID, AL_SEC_OFFSET, 0.0f); + alSourcei(sourceID, AL_LOOPING, loop); + alSourcei(sourceID, AL_SOURCE_RELATIVE, !is3D); + alSourcei(sourceID, AL_BUFFER, 0); + if (is3D) + { #ifdef AL_SOFT_source_spatialize - alSourcei(sourceID, AL_SOURCE_SPATIALIZE_SOFT, AL_TRUE); -#endif - alSourcef(sourceID, AL_ROLLOFF_FACTOR, source->GetAttenuation()); - alSourcef(sourceID, AL_DOPPLER_FACTOR, source->GetDopplerFactor()); - alSourcef(sourceID, AL_REFERENCE_DISTANCE, FLAX_DST_TO_OAL(source->GetMinDistance())); - alSource3f(sourceID, AL_POSITION, FLAX_POS_TO_OAL(source->GetPosition())); - alSource3f(sourceID, AL_VELOCITY, FLAX_VEL_TO_OAL(source->GetVelocity())); - } - else - { - alSourcef(sourceID, AL_ROLLOFF_FACTOR, 0.0f); - alSourcef(sourceID, AL_DOPPLER_FACTOR, 1.0f); - alSourcef(sourceID, AL_REFERENCE_DISTANCE, 0.0f); - alSource3f(sourceID, AL_POSITION, 0.0f, 0.0f, 0.0f); - alSource3f(sourceID, AL_VELOCITY, 0.0f, 0.0f, 0.0f); - } -#ifdef AL_EXT_STEREO_ANGLES - const float panAngle = source->GetPan() * PI_HALF; - const ALfloat panAngles[2] = { (ALfloat)(PI / 6.0 - panAngle), (ALfloat)(-PI / 6.0 - panAngle) }; // Angles are specified counter-clockwise in radians - alSourcefv(sourceID, AL_STEREO_ANGLES, panAngles); + alSourcei(sourceID, AL_SOURCE_SPATIALIZE_SOFT, AL_TRUE); #endif + alSourcef(sourceID, AL_ROLLOFF_FACTOR, source->GetAttenuation()); + alSourcef(sourceID, AL_DOPPLER_FACTOR, source->GetDopplerFactor()); + alSourcef(sourceID, AL_REFERENCE_DISTANCE, FLAX_DST_TO_OAL(source->GetMinDistance())); + alSource3f(sourceID, AL_POSITION, FLAX_POS_TO_OAL(source->GetPosition())); + alSource3f(sourceID, AL_VELOCITY, FLAX_VEL_TO_OAL(source->GetVelocity())); } + else + { + alSourcef(sourceID, AL_ROLLOFF_FACTOR, 0.0f); + alSourcef(sourceID, AL_DOPPLER_FACTOR, 1.0f); + alSourcef(sourceID, AL_REFERENCE_DISTANCE, 0.0f); + alSource3f(sourceID, AL_POSITION, 0.0f, 0.0f, 0.0f); + alSource3f(sourceID, AL_VELOCITY, 0.0f, 0.0f, 0.0f); + } +#ifdef AL_EXT_STEREO_ANGLES + const float panAngle = source->GetPan() * PI_HALF; + const ALfloat panAngles[2] = { (ALfloat)(PI / 6.0 - panAngle), (ALfloat)(-PI / 6.0 - panAngle) }; // Angles are specified counter-clockwise in radians + alSourcefv(sourceID, AL_STEREO_ANGLES, panAngles); +#endif // Restore state after Cleanup source->Restore(); } } - void RebuildContexts(bool isChangingDevice) + void RebuildContext(bool isChangingDevice) { LOG(Info, "Rebuilding audio contexts"); @@ -187,7 +133,7 @@ namespace ALC source->Cleanup(); } - ClearContexts(); + ClearContext(); if (Device == nullptr) return; @@ -200,23 +146,8 @@ namespace ALC attrList = attrsHrtf; } -#if ALC_MULTIPLE_LISTENERS - const int32 numListeners = Audio::Listeners.Count(); - const int32 numContexts = numListeners > 1 ? numListeners : 1; - Contexts.Resize(numContexts); - - ALC_FOR_EACH_CONTEXT() - ALCcontext* context = alcCreateContext(Device, attrList); - Contexts[i] = context; - } -#else - Contexts.Resize(1); - Contexts[0] = alcCreateContext(Device, attrList); -#endif - - // If only one context is available keep it active as an optimization. - // Audio listeners and sources will avoid excessive context switching in such case. - alcMakeContextCurrent(Contexts[0]); + Context = alcCreateContext(Device, attrList); + alcMakeContextCurrent(Context); for (AudioListener* listener : Audio::Listeners) Listener::Rebuild(listener); @@ -315,33 +246,22 @@ const Char* GetOpenALErrorString(int error) void AudioBackendOAL::Listener_OnAdd(AudioListener* listener) { -#if ALC_MULTIPLE_LISTENERS - ALC::RebuildContexts(false); -#else AudioBackend::Listener::TransformChanged(listener); alListenerf(AL_GAIN, Audio::GetVolume()); -#endif } void AudioBackendOAL::Listener_OnRemove(AudioListener* listener) { -#if ALC_MULTIPLE_LISTENERS - ALC::RebuildContexts(false); -#endif } void AudioBackendOAL::Listener_VelocityChanged(AudioListener* listener) { - ALC_GET_LISTENER_CONTEXT(listener) - const Float3 velocity = listener->GetVelocity(); alListener3f(AL_VELOCITY, FLAX_VEL_TO_OAL(velocity)); } void AudioBackendOAL::Listener_TransformChanged(AudioListener* listener) { - ALC_GET_LISTENER_CONTEXT(listener) - const Float3 position = listener->GetPosition(); const Quaternion orientation = listener->GetOrientation(); const Float3 flipX(-1, 1, 1); @@ -359,7 +279,7 @@ void AudioBackendOAL::Listener_TransformChanged(AudioListener* listener) void AudioBackendOAL::Listener_ReinitializeAll() { - ALC::RebuildContexts(false); + ALC::RebuildContext(false); } void AudioBackendOAL::Source_OnAdd(AudioSource* source) @@ -376,36 +296,28 @@ void AudioBackendOAL::Source_VelocityChanged(AudioSource* source) { if (!source->Is3D()) return; - ALC_FOR_EACH_CONTEXT() - const uint32 sourceID = source->SourceIDs[i]; - alSource3f(sourceID, AL_VELOCITY, FLAX_VEL_TO_OAL(source->GetVelocity())); - } + const uint32 sourceID = source->SourceID; + alSource3f(sourceID, AL_VELOCITY, FLAX_VEL_TO_OAL(source->GetVelocity())); } void AudioBackendOAL::Source_TransformChanged(AudioSource* source) { if (!source->Is3D()) return; - ALC_FOR_EACH_CONTEXT() - const uint32 sourceID = source->SourceIDs[i]; - alSource3f(sourceID, AL_POSITION, FLAX_POS_TO_OAL(source->GetPosition())); - } + const uint32 sourceID = source->SourceID; + alSource3f(sourceID, AL_POSITION, FLAX_POS_TO_OAL(source->GetPosition())); } void AudioBackendOAL::Source_VolumeChanged(AudioSource* source) { - ALC_FOR_EACH_CONTEXT() - const uint32 sourceID = source->SourceIDs[i]; - alSourcef(sourceID, AL_GAIN, source->GetVolume()); - } + const uint32 sourceID = source->SourceID; + alSourcef(sourceID, AL_GAIN, source->GetVolume()); } void AudioBackendOAL::Source_PitchChanged(AudioSource* source) { - ALC_FOR_EACH_CONTEXT() - const uint32 sourceID = source->SourceIDs[i]; - alSourcef(sourceID, AL_PITCH, source->GetPitch()); - } + const uint32 sourceID = source->SourceID; + alSourcef(sourceID, AL_PITCH, source->GetPitch()); } void AudioBackendOAL::Source_PanChanged(AudioSource* source) @@ -413,131 +325,106 @@ void AudioBackendOAL::Source_PanChanged(AudioSource* source) #ifdef AL_EXT_STEREO_ANGLES const float panAngle = source->GetPan() * PI_HALF; const ALfloat panAngles[2] = { (ALfloat)(PI / 6.0 - panAngle), (ALfloat)(-PI / 6.0 - panAngle) }; // Angles are specified counter-clockwise in radians - ALC_FOR_EACH_CONTEXT() - const uint32 sourceID = source->SourceIDs[i]; - alSourcefv(sourceID, AL_STEREO_ANGLES, panAngles); - } + const uint32 sourceID = source->SourceID; + alSourcefv(sourceID, AL_STEREO_ANGLES, panAngles); #endif } void AudioBackendOAL::Source_IsLoopingChanged(AudioSource* source) { const bool loop = source->GetIsLooping() && !source->UseStreaming(); - ALC_FOR_EACH_CONTEXT() - const uint32 sourceID = source->SourceIDs[i]; - alSourcei(sourceID, AL_LOOPING, loop); - } + const uint32 sourceID = source->SourceID; + alSourcei(sourceID, AL_LOOPING, loop); } void AudioBackendOAL::Source_SpatialSetupChanged(AudioSource* source) { const bool is3D = source->Is3D(); - ALC_FOR_EACH_CONTEXT() - const uint32 sourceID = source->SourceIDs[i]; - alSourcei(sourceID, AL_SOURCE_RELATIVE, !is3D); - if (is3D) - { + const uint32 sourceID = source->SourceID; + alSourcei(sourceID, AL_SOURCE_RELATIVE, !is3D); + if (is3D) + { #ifdef AL_SOFT_source_spatialize - alSourcei(sourceID, AL_SOURCE_SPATIALIZE_SOFT, AL_TRUE); + alSourcei(sourceID, AL_SOURCE_SPATIALIZE_SOFT, AL_TRUE); #endif - alSourcef(sourceID, AL_ROLLOFF_FACTOR, source->GetAttenuation()); - alSourcef(sourceID, AL_DOPPLER_FACTOR, source->GetDopplerFactor()); - alSourcef(sourceID, AL_REFERENCE_DISTANCE, FLAX_DST_TO_OAL(source->GetMinDistance())); - } - else - { - alSourcef(sourceID, AL_ROLLOFF_FACTOR, 0.0f); - alSourcef(sourceID, AL_DOPPLER_FACTOR, 1.0f); - alSourcef(sourceID, AL_REFERENCE_DISTANCE, 0.0f); - } + alSourcef(sourceID, AL_ROLLOFF_FACTOR, source->GetAttenuation()); + alSourcef(sourceID, AL_DOPPLER_FACTOR, source->GetDopplerFactor()); + alSourcef(sourceID, AL_REFERENCE_DISTANCE, FLAX_DST_TO_OAL(source->GetMinDistance())); + } + else + { + alSourcef(sourceID, AL_ROLLOFF_FACTOR, 0.0f); + alSourcef(sourceID, AL_DOPPLER_FACTOR, 1.0f); + alSourcef(sourceID, AL_REFERENCE_DISTANCE, 0.0f); } } void AudioBackendOAL::Source_ClipLoaded(AudioSource* source) { - if (source->SourceIDs.Count() < ALC::Contexts.Count()) + if (source->SourceID == 0) return; const auto clip = source->Clip.Get(); const bool is3D = source->Is3D(); const bool loop = source->GetIsLooping() && !clip->IsStreamable(); - ALC_FOR_EACH_CONTEXT() - const uint32 sourceID = source->SourceIDs[i]; - alSourcei(sourceID, AL_SOURCE_RELATIVE, !is3D); - alSourcei(sourceID, AL_LOOPING, loop); - } + const uint32 sourceID = source->SourceID; + alSourcei(sourceID, AL_SOURCE_RELATIVE, !is3D); + alSourcei(sourceID, AL_LOOPING, loop); } void AudioBackendOAL::Source_Cleanup(AudioSource* source) { - ALC_FOR_EACH_CONTEXT() - const uint32 sourceID = source->SourceIDs[i]; - alSourcei(sourceID, AL_BUFFER, 0); - ALC_CHECK_ERROR(alSourcei); - alDeleteSources(1, &sourceID); - ALC_CHECK_ERROR(alDeleteSources); - } + const uint32 sourceID = source->SourceID; + alSourcei(sourceID, AL_BUFFER, 0); + ALC_CHECK_ERROR(alSourcei); + alDeleteSources(1, &sourceID); + ALC_CHECK_ERROR(alDeleteSources); } void AudioBackendOAL::Source_Play(AudioSource* source) { - ALC_FOR_EACH_CONTEXT() - const uint32 sourceID = source->SourceIDs[i]; - - // Play - alSourcePlay(sourceID); - ALC_CHECK_ERROR(alSourcePlay); - } + const uint32 sourceID = source->SourceID; + alSourcePlay(sourceID); + ALC_CHECK_ERROR(alSourcePlay); } void AudioBackendOAL::Source_Pause(AudioSource* source) { - ALC_FOR_EACH_CONTEXT() - const uint32 sourceID = source->SourceIDs[i]; - - // Pause - alSourcePause(sourceID); - ALC_CHECK_ERROR(alSourcePause); - } + const uint32 sourceID = source->SourceID; + alSourcePause(sourceID); + ALC_CHECK_ERROR(alSourcePause); } void AudioBackendOAL::Source_Stop(AudioSource* source) { - ALC_FOR_EACH_CONTEXT() - const uint32 sourceID = source->SourceIDs[i]; + const uint32 sourceID = source->SourceID; - // Stop and rewind - alSourceRewind(sourceID); - ALC_CHECK_ERROR(alSourceRewind); - alSourcef(sourceID, AL_SEC_OFFSET, 0.0f); + // Stop and rewind + alSourceRewind(sourceID); + ALC_CHECK_ERROR(alSourceRewind); + alSourcef(sourceID, AL_SEC_OFFSET, 0.0f); - // Unset streaming buffers - alSourcei(sourceID, AL_BUFFER, 0); - ALC_CHECK_ERROR(alSourcei); - } + // Unset streaming buffers + alSourcei(sourceID, AL_BUFFER, 0); + ALC_CHECK_ERROR(alSourcei); } void AudioBackendOAL::Source_SetCurrentBufferTime(AudioSource* source, float value) { - ALC_FOR_EACH_CONTEXT() - const uint32 sourceID = source->SourceIDs[i]; - - alSourcef(sourceID, AL_SEC_OFFSET, value); - } + const uint32 sourceID = source->SourceID; + alSourcef(sourceID, AL_SEC_OFFSET, value); } float AudioBackendOAL::Source_GetCurrentBufferTime(const AudioSource* source) { - ALC_GET_DEFAULT_CONTEXT() - #if 0 float time; - alGetSourcef(source->SourceIDs[0], AL_SEC_OFFSET, &time); + alGetSourcef(source->SourceID, AL_SEC_OFFSET, &time); #else ASSERT(source->Clip && source->Clip->IsLoaded()); const AudioDataInfo& clipInfo = source->Clip->AudioHeader.Info; ALint samplesPlayed; - alGetSourcei(source->SourceIDs[0], AL_SAMPLE_OFFSET, &samplesPlayed); + alGetSourcei(source->SourceID, AL_SAMPLE_OFFSET, &samplesPlayed); const uint32 totalSamples = clipInfo.NumSamples / clipInfo.NumChannels; const float time = (samplesPlayed % totalSamples) / static_cast(Math::Max(1U, clipInfo.SampleRate)); #endif @@ -548,56 +435,44 @@ float AudioBackendOAL::Source_GetCurrentBufferTime(const AudioSource* source) void AudioBackendOAL::Source_SetNonStreamingBuffer(AudioSource* source) { const uint32 bufferId = source->Clip->Buffers[0]; - ALC_FOR_EACH_CONTEXT() - const uint32 sourceID = source->SourceIDs[i]; - - alSourcei(sourceID, AL_BUFFER, bufferId); - ALC_CHECK_ERROR(alSourcei); - } + const uint32 sourceID = source->SourceID; + alSourcei(sourceID, AL_BUFFER, bufferId); + ALC_CHECK_ERROR(alSourcei); } void AudioBackendOAL::Source_GetProcessedBuffersCount(AudioSource* source, int32& processedBuffersCount) { - ALC_GET_DEFAULT_CONTEXT() - // Check the first context only - const uint32 sourceID = source->SourceIDs[0]; + const uint32 sourceID = source->SourceID; alGetSourcei(sourceID, AL_BUFFERS_PROCESSED, &processedBuffersCount); ALC_CHECK_ERROR(alGetSourcei); } void AudioBackendOAL::Source_GetQueuedBuffersCount(AudioSource* source, int32& queuedBuffersCount) { - ALC_GET_DEFAULT_CONTEXT() - // Check the first context only - const uint32 sourceID = source->SourceIDs[0]; + const uint32 sourceID = source->SourceID; alGetSourcei(sourceID, AL_BUFFERS_QUEUED, &queuedBuffersCount); ALC_CHECK_ERROR(alGetSourcei); } void AudioBackendOAL::Source_QueueBuffer(AudioSource* source, uint32 bufferId) { - ALC_FOR_EACH_CONTEXT() - const uint32 sourceID = source->SourceIDs[i]; + const uint32 sourceID = source->SourceID; - // Queue new buffer - alSourceQueueBuffers(sourceID, 1, &bufferId); - ALC_CHECK_ERROR(alSourceQueueBuffers); - } + // Queue new buffer + alSourceQueueBuffers(sourceID, 1, &bufferId); + ALC_CHECK_ERROR(alSourceQueueBuffers); } void AudioBackendOAL::Source_DequeueProcessedBuffers(AudioSource* source) { ALuint buffers[AUDIO_MAX_SOURCE_BUFFERS]; - ALC_FOR_EACH_CONTEXT() - const uint32 sourceID = source->SourceIDs[i]; - - int32 numProcessedBuffers; - alGetSourcei(sourceID, AL_BUFFERS_PROCESSED, &numProcessedBuffers); - alSourceUnqueueBuffers(sourceID, numProcessedBuffers, buffers); - ALC_CHECK_ERROR(alSourceUnqueueBuffers); - } + const uint32 sourceID = source->SourceID; + int32 numProcessedBuffers; + alGetSourcei(sourceID, AL_BUFFERS_PROCESSED, &numProcessedBuffers); + alSourceUnqueueBuffers(sourceID, numProcessedBuffers, buffers); + ALC_CHECK_ERROR(alSourceUnqueueBuffers); } uint32 AudioBackendOAL::Buffer_Create() @@ -729,7 +604,7 @@ void AudioBackendOAL::Base_OnActiveDeviceChanged() // Cleanup for (AudioSource* source : Audio::Sources) source->Cleanup(); - ALC::ClearContexts(); + ALC::ClearContext(); if (ALC::Device != nullptr) { alcCloseDevice(ALC::Device); @@ -746,7 +621,7 @@ void AudioBackendOAL::Base_OnActiveDeviceChanged() } // Setup - ALC::RebuildContexts(true); + ALC::RebuildContext(true); } void AudioBackendOAL::Base_SetDopplerFactor(float value) @@ -756,9 +631,7 @@ void AudioBackendOAL::Base_SetDopplerFactor(float value) void AudioBackendOAL::Base_SetVolume(float value) { - ALC_FOR_EACH_CONTEXT() - alListenerf(AL_GAIN, value); - } + alListenerf(AL_GAIN, value); } bool AudioBackendOAL::Base_Init() @@ -862,7 +735,7 @@ bool AudioBackendOAL::Base_Init() int32 clampedIndex = Math::Clamp(activeDeviceIndex, -1, Audio::Devices.Count() - 1); if (clampedIndex == Audio::GetActiveDeviceIndex()) { - ALC::RebuildContexts(true); + ALC::RebuildContext(true); } Audio::SetActiveDeviceIndex(activeDeviceIndex); #ifdef AL_SOFT_source_spatialize diff --git a/Source/Engine/Audio/XAudio2/AudioBackendXAudio2.cpp b/Source/Engine/Audio/XAudio2/AudioBackendXAudio2.cpp index 7c16f05ca..37fab4f1d 100644 --- a/Source/Engine/Audio/XAudio2/AudioBackendXAudio2.cpp +++ b/Source/Engine/Audio/XAudio2/AudioBackendXAudio2.cpp @@ -218,9 +218,9 @@ namespace XAudio2 Source* GetSource(const AudioSource* source) { - if (source->SourceIDs.Count() == 0) + if (source->SourceID == 0) return nullptr; - const AUDIO_SOURCE_ID_TYPE sourceId = source->SourceIDs[0]; + const uint32 sourceId = source->SourceID; // 0 is invalid ID so shift them return &Sources[sourceId - 1]; } @@ -333,7 +333,7 @@ void AudioBackendXAudio2::Source_OnAdd(AudioSource* source) // Get first free source XAudio2::Source* aSource = nullptr; - AUDIO_SOURCE_ID_TYPE sourceID; + uint32 sourceID; for (int32 i = 0; i < XAudio2::Sources.Count(); i++) { if (XAudio2::Sources[i].IsFree()) @@ -377,8 +377,7 @@ void AudioBackendXAudio2::Source_OnAdd(AudioSource* source) if (FAILED(hr)) return; - sourceID++; // 0 is invalid ID so shift them - source->SourceIDs.Add(sourceID); + source->SourceID = sourceID + 1; // 0 is invalid ID so shift them // Prepare source state aSource->Callback.Source = source; diff --git a/Source/Engine/Engine/Units.h b/Source/Engine/Engine/Units.h index 6ddd7273e..1b1c0a55a 100644 --- a/Source/Engine/Engine/Units.h +++ b/Source/Engine/Engine/Units.h @@ -3,3 +3,5 @@ #pragma once #define METERS_TO_UNITS(meters) (meters * 100.0f) +#define UNITS_TO_METERS(units) (units * 0.01f) +#define UNITS_TO_METERS_SCALE 0.01f diff --git a/Source/Engine/Video/MF/VideoBackendMF.cpp b/Source/Engine/Video/MF/VideoBackendMF.cpp index 84715db45..6396c7379 100644 --- a/Source/Engine/Video/MF/VideoBackendMF.cpp +++ b/Source/Engine/Video/MF/VideoBackendMF.cpp @@ -254,16 +254,15 @@ namespace bufferStride = bufferCurrentLength; } - Span bufferSpan(bufferData, bufferStride); if (isVideo) { // Send pixels to the texture - player.UpdateVideoFrame(bufferSpan, frameTime, franeDuration); + player.UpdateVideoFrame(Span(bufferData, bufferStride), frameTime, franeDuration); } else if (isAudio) { // Send PCM data - player.UpdateAudioBuffer(bufferSpan, frameTime, franeDuration); + player.UpdateAudioBuffer(Span(bufferData, bufferStride), frameTime, franeDuration); } // Unlock sample buffer memory diff --git a/Source/Engine/Video/Types.h b/Source/Engine/Video/Types.h index ee03b0238..40ccb8e0f 100644 --- a/Source/Engine/Video/Types.h +++ b/Source/Engine/Video/Types.h @@ -2,11 +2,11 @@ #pragma once +#include "Engine/Core/Core.h" #include "Engine/Core/Types/BaseTypes.h" #include "Engine/Core/Types/TimeSpan.h" #include "Engine/Core/Types/DataContainer.h" #include "Engine/Audio/Types.h" -#include "Engine/Audio/Config.h" #include "Engine/Graphics/PixelFormat.h" class Video; @@ -35,8 +35,8 @@ struct VideoBackendPlayer TimeSpan AudioBufferTime, AudioBufferDuration; AudioDataInfo AudioInfo; BytesContainer VideoFrameMemory; - AUDIO_BUFFER_ID_TYPE AudioBuffer; - AUDIO_SOURCE_ID_TYPE AudioSource; + uint32 AudioBuffer; + uint32 AudioSource; class GPUUploadVideoFrameTask* UploadVideoFrameTask; uintptr BackendState[8]; diff --git a/Source/Engine/Video/VideoPlayer.h b/Source/Engine/Video/VideoPlayer.h index dd5fccd28..75416e36b 100644 --- a/Source/Engine/Video/VideoPlayer.h +++ b/Source/Engine/Video/VideoPlayer.h @@ -2,6 +2,7 @@ #pragma once +#include "Engine/Core/Math/Vector2.h" #include "Engine/Level/Actor.h" #include "Engine/Content/AssetReference.h" #include "Types.h" From 5b2af6b3d55518b5e4f17c43d73830b290823265 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 4 May 2024 22:16:20 +0200 Subject: [PATCH 050/292] Refactor Audio Backend to not depend on `AudioListener` object --- Source/Engine/Audio/Audio.cpp | 6 +- Source/Engine/Audio/AudioBackend.h | 31 ++--- Source/Engine/Audio/AudioBackendTools.h | 7 ++ Source/Engine/Audio/AudioListener.cpp | 5 +- Source/Engine/Audio/None/AudioBackendNone.cpp | 10 +- Source/Engine/Audio/None/AudioBackendNone.h | 8 +- .../Engine/Audio/OpenAL/AudioBackendOAL.cpp | 27 ++--- Source/Engine/Audio/OpenAL/AudioBackendOAL.h | 8 +- .../Audio/XAudio2/AudioBackendXAudio2.cpp | 107 ++---------------- .../Audio/XAudio2/AudioBackendXAudio2.h | 8 +- 10 files changed, 53 insertions(+), 164 deletions(-) diff --git a/Source/Engine/Audio/Audio.cpp b/Source/Engine/Audio/Audio.cpp index 6db9baa87..25b6f39e4 100644 --- a/Source/Engine/Audio/Audio.cpp +++ b/Source/Engine/Audio/Audio.cpp @@ -2,6 +2,7 @@ #include "Audio.h" #include "AudioBackend.h" +#include "AudioListener.h" #include "AudioSettings.h" #include "FlaxEngine.Gen.h" #include "Engine/Scripting/ScriptingType.h" @@ -159,14 +160,15 @@ void Audio::OnAddListener(AudioListener* listener) } Listeners.Add(listener); - AudioBackend::Listener::OnAdd(listener); + AudioBackend::Listener::Reset(); + AudioBackend::Listener::TransformChanged(listener->GetPosition(), listener->GetOrientation()); } void Audio::OnRemoveListener(AudioListener* listener) { if (!Listeners.Remove(listener)) { - AudioBackend::Listener::OnRemove(listener); + AudioBackend::Listener::Reset(); } } diff --git a/Source/Engine/Audio/AudioBackend.h b/Source/Engine/Audio/AudioBackend.h index 7726bd627..e7424e1ad 100644 --- a/Source/Engine/Audio/AudioBackend.h +++ b/Source/Engine/Audio/AudioBackend.h @@ -15,7 +15,6 @@ class AudioBackend friend class AudioService; public: - enum class FeatureFlags { None = 0, @@ -26,12 +25,10 @@ public: static AudioBackend* Instance; private: - // Listener - virtual void Listener_OnAdd(AudioListener* listener) = 0; - virtual void Listener_OnRemove(AudioListener* listener) = 0; - virtual void Listener_VelocityChanged(AudioListener* listener) = 0; - virtual void Listener_TransformChanged(AudioListener* listener) = 0; + virtual void Listener_Reset() = 0; + virtual void Listener_VelocityChanged(const Vector3& velocity) = 0; + virtual void Listener_TransformChanged(const Vector3& position, const Quaternion& orientation) = 0; virtual void Listener_ReinitializeAll() = 0; // Source @@ -73,35 +70,27 @@ private: virtual void Base_Dispose() = 0; public: - virtual ~AudioBackend() { } public: - class Listener { public: - - FORCE_INLINE static void OnAdd(AudioListener* listener) + FORCE_INLINE static void Reset() { - Instance->Listener_OnAdd(listener); + Instance->Listener_Reset(); } - FORCE_INLINE static void OnRemove(AudioListener* listener) + FORCE_INLINE static void VelocityChanged(const Vector3& velocity) { - Instance->Listener_OnRemove(listener); + Instance->Listener_VelocityChanged(velocity); } - FORCE_INLINE static void VelocityChanged(AudioListener* listener) + FORCE_INLINE static void TransformChanged(const Vector3& position, const Quaternion& orientation) { - Instance->Listener_VelocityChanged(listener); - } - - FORCE_INLINE static void TransformChanged(AudioListener* listener) - { - Instance->Listener_TransformChanged(listener); + Instance->Listener_TransformChanged(position, orientation); } FORCE_INLINE static void ReinitializeAll() @@ -113,7 +102,6 @@ public: class Source { public: - FORCE_INLINE static void OnAdd(AudioSource* source) { Instance->Source_OnAdd(source); @@ -223,7 +211,6 @@ public: class Buffer { public: - FORCE_INLINE static uint32 Create() { return Instance->Buffer_Create(); diff --git a/Source/Engine/Audio/AudioBackendTools.h b/Source/Engine/Audio/AudioBackendTools.h index 2630dca6f..8ef709144 100644 --- a/Source/Engine/Audio/AudioBackendTools.h +++ b/Source/Engine/Audio/AudioBackendTools.h @@ -21,6 +21,13 @@ public: Vector3 Velocity; Vector3 Position; Quaternion Orientation; + + void Reset() + { + Velocity = Vector3::Zero; + Position = Vector3::Zero; + Orientation = Quaternion::Identity; + } }; struct Source diff --git a/Source/Engine/Audio/AudioListener.cpp b/Source/Engine/Audio/AudioListener.cpp index 5202b62ec..ce5243869 100644 --- a/Source/Engine/Audio/AudioListener.cpp +++ b/Source/Engine/Audio/AudioListener.cpp @@ -9,6 +9,7 @@ AudioListener::AudioListener(const SpawnParams& params) : Actor(params) , _velocity(Vector3::Zero) + , _prevPos(Vector3::Zero) { } @@ -27,7 +28,7 @@ void AudioListener::Update() _prevPos = pos; if (_velocity != prevVelocity) { - AudioBackend::Listener::VelocityChanged(this); + AudioBackend::Listener::VelocityChanged(_velocity); } } @@ -68,6 +69,6 @@ void AudioListener::OnTransformChanged() if (IsActiveInHierarchy() && IsDuringPlay()) { - AudioBackend::Listener::TransformChanged(this); + AudioBackend::Listener::TransformChanged(GetPosition(), GetOrientation()); } } diff --git a/Source/Engine/Audio/None/AudioBackendNone.cpp b/Source/Engine/Audio/None/AudioBackendNone.cpp index 1b86872a8..a28fb91b2 100644 --- a/Source/Engine/Audio/None/AudioBackendNone.cpp +++ b/Source/Engine/Audio/None/AudioBackendNone.cpp @@ -6,19 +6,15 @@ #include "Engine/Audio/Audio.h" #include "Engine/Audio/AudioSource.h" -void AudioBackendNone::Listener_OnAdd(AudioListener* listener) +void AudioBackendNone::Listener_Reset() { } -void AudioBackendNone::Listener_OnRemove(AudioListener* listener) +void AudioBackendNone::Listener_VelocityChanged(const Vector3& velocity) { } -void AudioBackendNone::Listener_VelocityChanged(AudioListener* listener) -{ -} - -void AudioBackendNone::Listener_TransformChanged(AudioListener* listener) +void AudioBackendNone::Listener_TransformChanged(const Vector3& position, const Quaternion& orientation) { } diff --git a/Source/Engine/Audio/None/AudioBackendNone.h b/Source/Engine/Audio/None/AudioBackendNone.h index 8206c5942..5a28331b6 100644 --- a/Source/Engine/Audio/None/AudioBackendNone.h +++ b/Source/Engine/Audio/None/AudioBackendNone.h @@ -12,12 +12,10 @@ class AudioBackendNone : public AudioBackend { public: - // [AudioBackend] - void Listener_OnAdd(AudioListener* listener) override; - void Listener_OnRemove(AudioListener* listener) override; - void Listener_VelocityChanged(AudioListener* listener) override; - void Listener_TransformChanged(AudioListener* listener) override; + void Listener_Reset() override; + void Listener_VelocityChanged(const Vector3& velocity) override; + void Listener_TransformChanged(const Vector3& position, const Quaternion& orientation) override; void Listener_ReinitializeAll() override; void Source_OnAdd(AudioSource* source) override; void Source_OnRemove(AudioSource* source) override; diff --git a/Source/Engine/Audio/OpenAL/AudioBackendOAL.cpp b/Source/Engine/Audio/OpenAL/AudioBackendOAL.cpp index 86850e0c4..77d8f43c9 100644 --- a/Source/Engine/Audio/OpenAL/AudioBackendOAL.cpp +++ b/Source/Engine/Audio/OpenAL/AudioBackendOAL.cpp @@ -65,13 +65,11 @@ namespace ALC namespace Listener { - void Rebuild(AudioListener* listener) + void Rebuild(const AudioListener* listener) { - AudioBackend::Listener::TransformChanged(listener); - - const Float3 velocity = listener->GetVelocity(); - alListener3f(AL_VELOCITY, FLAX_VEL_TO_OAL(velocity)); - alListenerf(AL_GAIN, Audio::GetVolume()); + AudioBackend::Listener::Reset(); + AudioBackend::Listener::TransformChanged(listener->GetPosition(), listener->GetOrientation()); + AudioBackend::Listener::VelocityChanged(listener->GetVelocity()); } } @@ -244,35 +242,24 @@ const Char* GetOpenALErrorString(int error) return TEXT("???"); } -void AudioBackendOAL::Listener_OnAdd(AudioListener* listener) +void AudioBackendOAL::Listener_Reset() { - AudioBackend::Listener::TransformChanged(listener); alListenerf(AL_GAIN, Audio::GetVolume()); } -void AudioBackendOAL::Listener_OnRemove(AudioListener* listener) +void AudioBackendOAL::Listener_VelocityChanged(const Vector3& velocity) { -} - -void AudioBackendOAL::Listener_VelocityChanged(AudioListener* listener) -{ - const Float3 velocity = listener->GetVelocity(); alListener3f(AL_VELOCITY, FLAX_VEL_TO_OAL(velocity)); } -void AudioBackendOAL::Listener_TransformChanged(AudioListener* listener) +void AudioBackendOAL::Listener_TransformChanged(const Vector3& position, const Quaternion& orientation) { - const Float3 position = listener->GetPosition(); - const Quaternion orientation = listener->GetOrientation(); const Float3 flipX(-1, 1, 1); const Float3 alOrientation[2] = { - // Forward orientation * Float3::Forward * flipX, - // Up orientation * Float3::Up * flipX }; - alListenerfv(AL_ORIENTATION, (float*)alOrientation); alListener3f(AL_POSITION, FLAX_POS_TO_OAL(position)); } diff --git a/Source/Engine/Audio/OpenAL/AudioBackendOAL.h b/Source/Engine/Audio/OpenAL/AudioBackendOAL.h index 23e8c08cf..3375251a4 100644 --- a/Source/Engine/Audio/OpenAL/AudioBackendOAL.h +++ b/Source/Engine/Audio/OpenAL/AudioBackendOAL.h @@ -12,12 +12,10 @@ class AudioBackendOAL : public AudioBackend { public: - // [AudioBackend] - void Listener_OnAdd(AudioListener* listener) override; - void Listener_OnRemove(AudioListener* listener) override; - void Listener_VelocityChanged(AudioListener* listener) override; - void Listener_TransformChanged(AudioListener* listener) override; + void Listener_Reset() override; + void Listener_VelocityChanged(const Vector3& velocity) override; + void Listener_TransformChanged(const Vector3& position, const Quaternion& orientation) override; void Listener_ReinitializeAll() override; void Source_OnAdd(AudioSource* source) override; void Source_OnRemove(AudioSource* source) override; diff --git a/Source/Engine/Audio/XAudio2/AudioBackendXAudio2.cpp b/Source/Engine/Audio/XAudio2/AudioBackendXAudio2.cpp index 37fab4f1d..4f24013e1 100644 --- a/Source/Engine/Audio/XAudio2/AudioBackendXAudio2.cpp +++ b/Source/Engine/Audio/XAudio2/AudioBackendXAudio2.cpp @@ -9,7 +9,6 @@ #include "Engine/Core/Log.h" #include "Engine/Audio/Audio.h" #include "Engine/Audio/AudioSource.h" -#include "Engine/Audio/AudioListener.h" #include "Engine/Threading/Threading.h" #if PLATFORM_WINDOWS @@ -42,33 +41,6 @@ namespace XAudio2 { struct Listener : AudioBackendTools::Listener { - AudioListener* AudioListener; - - Listener() - { - Init(); - } - - void Init() - { - AudioListener = nullptr; - } - - bool IsFree() const - { - return AudioListener == nullptr; - } - - void UpdateTransform() - { - Position = AudioListener->GetPosition(); - Orientation = AudioListener->GetOrientation(); - } - - void UpdateVelocity() - { - Velocity = AudioListener->GetVelocity(); - } }; class VoiceCallback : public IXAudio2VoiceCallback @@ -188,41 +160,17 @@ namespace XAudio2 int32 Channels; bool ForceDirty = true; AudioBackendTools::Settings Settings; - Listener Listeners[AUDIO_MAX_LISTENERS]; + Listener Listener; CriticalSection Locker; ChunkedArray Sources; ChunkedArray Buffers; // TODO: use ChunkedArray for better performance or use buffers pool? EngineCallback Callback; - Listener* GetListener() - { - for (int32 i = 0; i < AUDIO_MAX_LISTENERS; i++) - { - if (Listeners[i].AudioListener) - return &Listeners[i]; - } - - return nullptr; - } - - Listener* GetListener(const AudioListener* listener) - { - for (int32 i = 0; i < AUDIO_MAX_LISTENERS; i++) - { - if (Listeners[i].AudioListener == listener) - return &Listeners[i]; - } - - return nullptr; - } - Source* GetSource(const AudioSource* source) { if (source->SourceID == 0) return nullptr; - const uint32 sourceId = source->SourceID; - // 0 is invalid ID so shift them - return &Sources[sourceId - 1]; + return &Sources[source->SourceID - 1]; // 0 is invalid ID so shift them } void MarkAllDirty() @@ -266,56 +214,23 @@ namespace XAudio2 } } -void AudioBackendXAudio2::Listener_OnAdd(AudioListener* listener) +void AudioBackendXAudio2::Listener_Reset() { - // Get first free listener - XAudio2::Listener* aListener = nullptr; - for (int32 i = 0; i < AUDIO_MAX_LISTENERS; i++) - { - if (XAudio2::Listeners[i].IsFree()) - { - aListener = &XAudio2::Listeners[i]; - break; - } - } - ASSERT(aListener); - - // Setup - aListener->AudioListener = listener; - aListener->UpdateTransform(); - aListener->UpdateVelocity(); - + XAudio2::Listener->Reset(); XAudio2::MarkAllDirty(); } -void AudioBackendXAudio2::Listener_OnRemove(AudioListener* listener) +void AudioBackendXAudio2::Listener_VelocityChanged(const Vector3& velocity) { - XAudio2::Listener* aListener = XAudio2::GetListener(listener); - if (aListener) - { - aListener->Init(); - XAudio2::MarkAllDirty(); - } + XAudio2::Listener.Velocity = velocity; + XAudio2::MarkAllDirty(); } -void AudioBackendXAudio2::Listener_VelocityChanged(AudioListener* listener) +void AudioBackendXAudio2::Listener_TransformChanged(const Vector3& position, const Quaternion& orientation) { - XAudio2::Listener* aListener = XAudio2::GetListener(listener); - if (aListener) - { - aListener->UpdateVelocity(); - XAudio2::MarkAllDirty(); - } -} - -void AudioBackendXAudio2::Listener_TransformChanged(AudioListener* listener) -{ - XAudio2::Listener* aListener = XAudio2::GetListener(listener); - if (aListener) - { - aListener->UpdateTransform(); - XAudio2::MarkAllDirty(); - } + XAudio2::Listener.Position = position; + XAudio2::Listener.Orientation = orientation; + XAudio2::MarkAllDirty(); } void AudioBackendXAudio2::Listener_ReinitializeAll() diff --git a/Source/Engine/Audio/XAudio2/AudioBackendXAudio2.h b/Source/Engine/Audio/XAudio2/AudioBackendXAudio2.h index 6d9961775..073dbfdc3 100644 --- a/Source/Engine/Audio/XAudio2/AudioBackendXAudio2.h +++ b/Source/Engine/Audio/XAudio2/AudioBackendXAudio2.h @@ -12,12 +12,10 @@ class AudioBackendXAudio2 : public AudioBackend { public: - // [AudioBackend] - void Listener_OnAdd(AudioListener* listener) override; - void Listener_OnRemove(AudioListener* listener) override; - void Listener_VelocityChanged(AudioListener* listener) override; - void Listener_TransformChanged(AudioListener* listener) override; + void Listener_Reset() override; + void Listener_VelocityChanged(const Vector3& velocity) override; + void Listener_TransformChanged(const Vector3& position, const Quaternion& orientation) override; void Listener_ReinitializeAll() override; void Source_OnAdd(AudioSource* source) override; void Source_OnRemove(AudioSource* source) override; From f43cd979073d6a2a24430f0eb5af2690333a9835 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 6 May 2024 10:36:36 +0200 Subject: [PATCH 051/292] Refactor Audio Backend to not depend on `AudioSource` object --- Source/Engine/Audio/Audio.cpp | 40 --- Source/Engine/Audio/Audio.h | 7 - Source/Engine/Audio/AudioBackend.h | 138 ++++----- Source/Engine/Audio/AudioClip.cpp | 26 +- Source/Engine/Audio/AudioListener.cpp | 23 +- Source/Engine/Audio/AudioSource.cpp | 162 +++++----- Source/Engine/Audio/AudioSource.h | 16 - Source/Engine/Audio/Config.h | 1 + Source/Engine/Audio/None/AudioBackendNone.cpp | 53 ++-- Source/Engine/Audio/None/AudioBackendNone.h | 44 ++- .../Engine/Audio/OpenAL/AudioBackendOAL.cpp | 269 ++++++++-------- Source/Engine/Audio/OpenAL/AudioBackendOAL.h | 44 ++- .../Audio/XAudio2/AudioBackendXAudio2.cpp | 292 ++++++++---------- .../Audio/XAudio2/AudioBackendXAudio2.h | 44 ++- 14 files changed, 532 insertions(+), 627 deletions(-) diff --git a/Source/Engine/Audio/Audio.cpp b/Source/Engine/Audio/Audio.cpp index 25b6f39e4..e279f4d44 100644 --- a/Source/Engine/Audio/Audio.cpp +++ b/Source/Engine/Audio/Audio.cpp @@ -2,7 +2,6 @@ #include "Audio.h" #include "AudioBackend.h" -#include "AudioListener.h" #include "AudioSettings.h" #include "FlaxEngine.Gen.h" #include "Engine/Scripting/ScriptingType.h" @@ -149,45 +148,6 @@ void Audio::SetEnableHRTF(bool value) AudioBackend::Listener::ReinitializeAll(); } -void Audio::OnAddListener(AudioListener* listener) -{ - ASSERT(!Listeners.Contains(listener)); - - if (Listeners.Count() >= AUDIO_MAX_LISTENERS) - { - LOG(Error, "Unsupported amount of the audio listeners!"); - return; - } - - Listeners.Add(listener); - AudioBackend::Listener::Reset(); - AudioBackend::Listener::TransformChanged(listener->GetPosition(), listener->GetOrientation()); -} - -void Audio::OnRemoveListener(AudioListener* listener) -{ - if (!Listeners.Remove(listener)) - { - AudioBackend::Listener::Reset(); - } -} - -void Audio::OnAddSource(AudioSource* source) -{ - ASSERT(!Sources.Contains(source)); - - Sources.Add(source); - AudioBackend::Source::OnAdd(source); -} - -void Audio::OnRemoveSource(AudioSource* source) -{ - if (!Sources.Remove(source)) - { - AudioBackend::Source::OnRemove(source); - } -} - bool AudioService::Init() { PROFILE_CPU_NAMED("Audio.Init"); diff --git a/Source/Engine/Audio/Audio.h b/Source/Engine/Audio/Audio.h index a074fefda..5a82a6da8 100644 --- a/Source/Engine/Audio/Audio.h +++ b/Source/Engine/Audio/Audio.h @@ -97,11 +97,4 @@ public: /// /// The value. API_PROPERTY() static void SetEnableHRTF(bool value); - -public: - static void OnAddListener(AudioListener* listener); - static void OnRemoveListener(AudioListener* listener); - - static void OnAddSource(AudioSource* source); - static void OnRemoveSource(AudioSource* source); }; diff --git a/Source/Engine/Audio/AudioBackend.h b/Source/Engine/Audio/AudioBackend.h index e7424e1ad..ee94e21a3 100644 --- a/Source/Engine/Audio/AudioBackend.h +++ b/Source/Engine/Audio/AudioBackend.h @@ -32,32 +32,30 @@ private: virtual void Listener_ReinitializeAll() = 0; // Source - virtual void Source_OnAdd(AudioSource* source) = 0; - virtual void Source_OnRemove(AudioSource* source) = 0; - virtual void Source_VelocityChanged(AudioSource* source) = 0; - virtual void Source_TransformChanged(AudioSource* source) = 0; - virtual void Source_VolumeChanged(AudioSource* source) = 0; - virtual void Source_PitchChanged(AudioSource* source) = 0; - virtual void Source_PanChanged(AudioSource* source) = 0; - virtual void Source_IsLoopingChanged(AudioSource* source) = 0; - virtual void Source_SpatialSetupChanged(AudioSource* source) = 0; - virtual void Source_ClipLoaded(AudioSource* source) = 0; - virtual void Source_Cleanup(AudioSource* source) = 0; - virtual void Source_Play(AudioSource* source) = 0; - virtual void Source_Pause(AudioSource* source) = 0; - virtual void Source_Stop(AudioSource* source) = 0; - virtual void Source_SetCurrentBufferTime(AudioSource* source, float value) = 0; - virtual float Source_GetCurrentBufferTime(const AudioSource* source) = 0; - virtual void Source_SetNonStreamingBuffer(AudioSource* source) = 0; - virtual void Source_GetProcessedBuffersCount(AudioSource* source, int32& processedBuffersCount) = 0; - virtual void Source_GetQueuedBuffersCount(AudioSource* source, int32& queuedBuffersCount) = 0; - virtual void Source_QueueBuffer(AudioSource* source, uint32 bufferId) = 0; - virtual void Source_DequeueProcessedBuffers(AudioSource* source) = 0; + virtual uint32 Source_Add(const AudioDataInfo& format, const Vector3& position, const Quaternion& orientation, float volume, float pitch, float pan, bool loop, bool spatial, float attenuation, float minDistance, float doppler) = 0; + virtual void Source_Remove(uint32 sourceID) = 0; + virtual void Source_VelocityChanged(uint32 sourceID, const Vector3& velocity) = 0; + virtual void Source_TransformChanged(uint32 sourceID, const Vector3& position, const Quaternion& orientation) = 0; + virtual void Source_VolumeChanged(uint32 sourceID, float volume) = 0; + virtual void Source_PitchChanged(uint32 sourceID, float pitch) = 0; + virtual void Source_PanChanged(uint32 sourceID, float pan) = 0; + virtual void Source_IsLoopingChanged(uint32 sourceID, bool loop) = 0; + virtual void Source_SpatialSetupChanged(uint32 sourceID, bool spatial, float attenuation, float minDistance, float doppler) = 0; + virtual void Source_Play(uint32 sourceID) = 0; + virtual void Source_Pause(uint32 sourceID) = 0; + virtual void Source_Stop(uint32 sourceID) = 0; + virtual void Source_SetCurrentBufferTime(uint32 sourceID, float value) = 0; + virtual float Source_GetCurrentBufferTime(uint32 id) = 0; + virtual void Source_SetNonStreamingBuffer(uint32 sourceID, uint32 bufferID) = 0; + virtual void Source_GetProcessedBuffersCount(uint32 sourceID, int32& processedBuffersCount) = 0; + virtual void Source_GetQueuedBuffersCount(uint32 sourceID, int32& queuedBuffersCount) = 0; + virtual void Source_QueueBuffer(uint32 sourceID, uint32 bufferID) = 0; + virtual void Source_DequeueProcessedBuffers(uint32 sourceID) = 0; // Buffer virtual uint32 Buffer_Create() = 0; - virtual void Buffer_Delete(uint32 bufferId) = 0; - virtual void Buffer_Write(uint32 bufferId, byte* samples, const AudioDataInfo& info) = 0; + virtual void Buffer_Delete(uint32 bufferID) = 0; + virtual void Buffer_Write(uint32 bufferID, byte* samples, const AudioDataInfo& info) = 0; // Base virtual const Char* Base_Name() = 0; @@ -102,109 +100,99 @@ public: class Source { public: - FORCE_INLINE static void OnAdd(AudioSource* source) + FORCE_INLINE static uint32 Add(const AudioDataInfo& format, const Vector3& position, const Quaternion& orientation, float volume, float pitch, float pan, bool loop, bool spatial, float attenuation, float minDistance, float doppler) { - Instance->Source_OnAdd(source); + return Instance->Source_Add(format, position, orientation, volume, pitch, pan, loop, spatial, attenuation, minDistance, doppler); } - FORCE_INLINE static void OnRemove(AudioSource* source) + FORCE_INLINE static void Remove(uint32 sourceID) { - Instance->Source_OnRemove(source); + Instance->Source_Remove(sourceID); } - FORCE_INLINE static void VelocityChanged(AudioSource* source) + FORCE_INLINE static void VelocityChanged(uint32 sourceID, const Vector3& velocity) { - Instance->Source_VelocityChanged(source); + Instance->Source_VelocityChanged(sourceID, velocity); } - FORCE_INLINE static void TransformChanged(AudioSource* source) + FORCE_INLINE static void TransformChanged(uint32 sourceID, const Vector3& position, const Quaternion& orientation) { - Instance->Source_TransformChanged(source); + Instance->Source_TransformChanged(sourceID, position, orientation); } - FORCE_INLINE static void VolumeChanged(AudioSource* source) + FORCE_INLINE static void VolumeChanged(uint32 sourceID, float volume) { - Instance->Source_VolumeChanged(source); + Instance->Source_VolumeChanged(sourceID, volume); } - FORCE_INLINE static void PitchChanged(AudioSource* source) + FORCE_INLINE static void PitchChanged(uint32 sourceID, float pitch) { - Instance->Source_PitchChanged(source); + Instance->Source_PitchChanged(sourceID, pitch); } - FORCE_INLINE static void PanChanged(AudioSource* source) + FORCE_INLINE static void PanChanged(uint32 sourceID, float pan) { - Instance->Source_PanChanged(source); + Instance->Source_PanChanged(sourceID, pan); } - FORCE_INLINE static void IsLoopingChanged(AudioSource* source) + FORCE_INLINE static void IsLoopingChanged(uint32 sourceID, bool loop) { - Instance->Source_IsLoopingChanged(source); + Instance->Source_IsLoopingChanged(sourceID, loop); } - FORCE_INLINE static void SpatialSetupChanged(AudioSource* source) + FORCE_INLINE static void SpatialSetupChanged(uint32 sourceID, bool spatial, float attenuation, float minDistance, float doppler) { - Instance->Source_SpatialSetupChanged(source); + Instance->Source_SpatialSetupChanged(sourceID, spatial, attenuation, minDistance, doppler); } - FORCE_INLINE static void ClipLoaded(AudioSource* source) + FORCE_INLINE static void Play(uint32 sourceID) { - Instance->Source_ClipLoaded(source); + Instance->Source_Play(sourceID); } - FORCE_INLINE static void Cleanup(AudioSource* source) + FORCE_INLINE static void Pause(uint32 sourceID) { - Instance->Source_Cleanup(source); + Instance->Source_Pause(sourceID); } - FORCE_INLINE static void Play(AudioSource* source) + FORCE_INLINE static void Stop(uint32 sourceID) { - Instance->Source_Play(source); + Instance->Source_Stop(sourceID); } - FORCE_INLINE static void Pause(AudioSource* source) + FORCE_INLINE static void SetCurrentBufferTime(uint32 sourceID, float value) { - Instance->Source_Pause(source); + Instance->Source_SetCurrentBufferTime(sourceID, value); } - FORCE_INLINE static void Stop(AudioSource* source) + FORCE_INLINE static float GetCurrentBufferTime(uint32 sourceID) { - Instance->Source_Stop(source); + return Instance->Source_GetCurrentBufferTime(sourceID); } - FORCE_INLINE static void SetCurrentBufferTime(AudioSource* source, float value) + FORCE_INLINE static void SetNonStreamingBuffer(uint32 sourceID, uint32 bufferID) { - Instance->Source_SetCurrentBufferTime(source, value); + Instance->Source_SetNonStreamingBuffer(sourceID, bufferID); } - FORCE_INLINE static float GetCurrentBufferTime(const AudioSource* source) + FORCE_INLINE static void GetProcessedBuffersCount(uint32 sourceID, int32& processedBuffersCount) { - return Instance->Source_GetCurrentBufferTime(source); + Instance->Source_GetProcessedBuffersCount(sourceID, processedBuffersCount); } - FORCE_INLINE static void SetNonStreamingBuffer(AudioSource* source) + FORCE_INLINE static void GetQueuedBuffersCount(uint32 sourceID, int32& queuedBuffersCount) { - Instance->Source_SetNonStreamingBuffer(source); + Instance->Source_GetQueuedBuffersCount(sourceID, queuedBuffersCount); } - FORCE_INLINE static void GetProcessedBuffersCount(AudioSource* source, int32& processedBuffersCount) + FORCE_INLINE static void QueueBuffer(uint32 sourceID, uint32 bufferID) { - Instance->Source_GetProcessedBuffersCount(source, processedBuffersCount); + Instance->Source_QueueBuffer(sourceID, bufferID); } - FORCE_INLINE static void GetQueuedBuffersCount(AudioSource* source, int32& queuedBuffersCount) + FORCE_INLINE static void DequeueProcessedBuffers(uint32 sourceID) { - Instance->Source_GetQueuedBuffersCount(source, queuedBuffersCount); - } - - FORCE_INLINE static void QueueBuffer(AudioSource* source, uint32 bufferId) - { - Instance->Source_QueueBuffer(source, bufferId); - } - - FORCE_INLINE static void DequeueProcessedBuffers(AudioSource* source) - { - Instance->Source_DequeueProcessedBuffers(source); + Instance->Source_DequeueProcessedBuffers(sourceID); } }; @@ -216,14 +204,14 @@ public: return Instance->Buffer_Create(); } - FORCE_INLINE static void Delete(uint32 bufferId) + FORCE_INLINE static void Delete(uint32 bufferID) { - Instance->Buffer_Delete(bufferId); + Instance->Buffer_Delete(bufferID); } - FORCE_INLINE static void Write(uint32 bufferId, byte* samples, const AudioDataInfo& info) + FORCE_INLINE static void Write(uint32 bufferID, byte* samples, const AudioDataInfo& info) { - Instance->Buffer_Write(bufferId, samples, info); + Instance->Buffer_Write(bufferID, samples, info); } }; diff --git a/Source/Engine/Audio/AudioClip.cpp b/Source/Engine/Audio/AudioClip.cpp index 0abcc5479..93e85f800 100644 --- a/Source/Engine/Audio/AudioClip.cpp +++ b/Source/Engine/Audio/AudioClip.cpp @@ -31,16 +31,16 @@ bool AudioClip::StreamingTask::Run() for (int32 i = 0; i < queue.Count(); i++) { const auto idx = queue[i]; - uint32& bufferId = clip->Buffers[idx]; - if (bufferId == 0) + uint32& bufferID = clip->Buffers[idx]; + if (bufferID == 0) { - bufferId = AudioBackend::Buffer::Create(); + bufferID = AudioBackend::Buffer::Create(); } else { // Release unused data - AudioBackend::Buffer::Delete(bufferId); - bufferId = 0; + AudioBackend::Buffer::Delete(bufferID); + bufferID = 0; } } @@ -383,8 +383,8 @@ Asset::LoadResult AudioClip::load() void AudioClip::unload(bool isReloading) { bool hasAnyBuffer = false; - for (const uint32 bufferId : Buffers) - hasAnyBuffer |= bufferId != 0; + for (const uint32 bufferID : Buffers) + hasAnyBuffer |= bufferID != 0; // Stop any audio sources that are using this clip right now // TODO: find better way to collect audio sources using audio clip and impl it for AudioStreamingHandler too @@ -399,10 +399,10 @@ void AudioClip::unload(bool isReloading) StreamingQueue.Clear(); if (hasAnyBuffer && AudioBackend::Instance) { - for (uint32 bufferId : Buffers) + for (uint32 bufferID : Buffers) { - if (bufferId != 0) - AudioBackend::Buffer::Delete(bufferId); + if (bufferID != 0) + AudioBackend::Buffer::Delete(bufferID); } } Buffers.Clear(); @@ -413,8 +413,8 @@ void AudioClip::unload(bool isReloading) bool AudioClip::WriteBuffer(int32 chunkIndex) { // Ignore if buffer is not created - const uint32 bufferId = Buffers[chunkIndex]; - if (bufferId == 0) + const uint32 bufferID = Buffers[chunkIndex]; + if (bufferID == 0) return false; // Ensure audio backend exists @@ -475,6 +475,6 @@ bool AudioClip::WriteBuffer(int32 chunkIndex) } // Write samples to the audio buffer - AudioBackend::Buffer::Write(bufferId, data.Get(), info); + AudioBackend::Buffer::Write(bufferID, data.Get(), info); return false; } diff --git a/Source/Engine/Audio/AudioListener.cpp b/Source/Engine/Audio/AudioListener.cpp index ce5243869..48cac4205 100644 --- a/Source/Engine/Audio/AudioListener.cpp +++ b/Source/Engine/Audio/AudioListener.cpp @@ -3,6 +3,7 @@ #include "AudioListener.h" #include "Engine/Engine/Time.h" #include "Engine/Level/Scene/Scene.h" +#include "Engine/Core/Log.h" #include "AudioBackend.h" #include "Audio.h" @@ -36,9 +37,18 @@ void AudioListener::OnEnable() { _prevPos = GetPosition(); _velocity = Vector3::Zero; - - Audio::OnAddListener(this); - GetScene()->Ticking.Update.AddTick(this); + if (Audio::Listeners.Count() >= AUDIO_MAX_LISTENERS) + { + LOG(Error, "Unsupported amount of the audio listeners!"); + } + else + { + ASSERT(!Audio::Listeners.Contains(this)); + Audio::Listeners.Add(this); + AudioBackend::Listener::Reset(); + AudioBackend::Listener::TransformChanged(GetPosition(), GetOrientation()); + GetScene()->Ticking.Update.AddTick(this); + } #if USE_EDITOR GetSceneRendering()->AddViewportIcon(this); #endif @@ -52,8 +62,11 @@ void AudioListener::OnDisable() #if USE_EDITOR GetSceneRendering()->RemoveViewportIcon(this); #endif - GetScene()->Ticking.Update.RemoveTick(this); - Audio::OnRemoveListener(this); + if (!Audio::Listeners.Remove(this)) + { + GetScene()->Ticking.Update.RemoveTick(this); + AudioBackend::Listener::Reset(); + } // Base Actor::OnDisable(); diff --git a/Source/Engine/Audio/AudioSource.cpp b/Source/Engine/Audio/AudioSource.cpp index dbe57e150..b9e0ed92d 100644 --- a/Source/Engine/Audio/AudioSource.cpp +++ b/Source/Engine/Audio/AudioSource.cpp @@ -33,7 +33,7 @@ void AudioSource::SetVolume(float value) return; _volume = value; if (SourceID) - AudioBackend::Source::VolumeChanged(this); + AudioBackend::Source::VolumeChanged(SourceID, _volume); } void AudioSource::SetPitch(float value) @@ -43,7 +43,7 @@ void AudioSource::SetPitch(float value) return; _pitch = value; if (SourceID) - AudioBackend::Source::PitchChanged(this); + AudioBackend::Source::PitchChanged(SourceID, _pitch); } void AudioSource::SetPan(float value) @@ -53,7 +53,7 @@ void AudioSource::SetPan(float value) return; _pan = value; if (SourceID) - AudioBackend::Source::PanChanged(this); + AudioBackend::Source::PanChanged(SourceID, _pan); } void AudioSource::SetIsLooping(bool value) @@ -64,7 +64,7 @@ void AudioSource::SetIsLooping(bool value) // When streaming we handle looping manually by the proper buffers submission if (SourceID && !UseStreaming()) - AudioBackend::Source::IsLoopingChanged(this); + AudioBackend::Source::IsLoopingChanged(SourceID, _loop); } void AudioSource::SetPlayOnStart(bool value) @@ -84,7 +84,7 @@ void AudioSource::SetMinDistance(float value) return; _minDistance = value; if (SourceID) - AudioBackend::Source::SpatialSetupChanged(this); + AudioBackend::Source::SpatialSetupChanged(SourceID, Is3D(), _attenuation, _minDistance, _dopplerFactor); } void AudioSource::SetAttenuation(float value) @@ -94,7 +94,7 @@ void AudioSource::SetAttenuation(float value) return; _attenuation = value; if (SourceID) - AudioBackend::Source::SpatialSetupChanged(this); + AudioBackend::Source::SpatialSetupChanged(SourceID, Is3D(), _attenuation, _minDistance, _dopplerFactor); } void AudioSource::SetDopplerFactor(float value) @@ -104,7 +104,7 @@ void AudioSource::SetDopplerFactor(float value) return; _dopplerFactor = value; if (SourceID) - AudioBackend::Source::SpatialSetupChanged(this); + AudioBackend::Source::SpatialSetupChanged(SourceID, Is3D(), _attenuation, _minDistance, _dopplerFactor); } void AudioSource::SetAllowSpatialization(bool value) @@ -113,7 +113,7 @@ void AudioSource::SetAllowSpatialization(bool value) return; _allowSpatialization = value; if (SourceID) - AudioBackend::Source::SpatialSetupChanged(this); + AudioBackend::Source::SpatialSetupChanged(SourceID, Is3D(), _attenuation, _minDistance, _dopplerFactor); } void AudioSource::Play() @@ -121,19 +121,26 @@ void AudioSource::Play() auto state = _state; if (state == States::Playing) return; - if (Clip == nullptr) + if (Clip == nullptr || Clip->WaitForLoaded()) { LOG(Warning, "Cannot play audio source without a clip ({0})", GetNamePath()); return; } + if (SourceID == 0) + { + // Create audio source + SourceID = AudioBackend::Source::Add(Clip->Info(), GetPosition(), GetOrientation(), GetVolume(), GetPitch(), GetPan(), GetIsLooping() && !UseStreaming(), Is3D(), GetAttenuation(), GetMinDistance(), GetDopplerFactor()); + if (SourceID == 0) + { + LOG(Warning, "Cannot create audio source ({0})", GetNamePath()); + return; + } + } + _state = States::Playing; _isActuallyPlayingSth = false; - // Don't block scripting if audio is not loaded or has missing streaming data - if (!Clip->IsLoaded()) - return; - // Audio clips with disabled streaming are controlled by audio source, otherwise streaming manager will play it if (Clip->IsStreamable()) { @@ -155,7 +162,7 @@ void AudioSource::Play() else if (SourceID) { // Play it right away - SetNonStreamingBuffer(); + AudioBackend::Source::SetNonStreamingBuffer(SourceID, Clip->Buffers[0]); PlayInternal(); } else @@ -171,10 +178,9 @@ void AudioSource::Pause() return; _state = States::Paused; - if (_isActuallyPlayingSth) { - AudioBackend::Source::Pause(this); + AudioBackend::Source::Pause(SourceID); _isActuallyPlayingSth = false; } } @@ -188,7 +194,7 @@ void AudioSource::Stop() _isActuallyPlayingSth = false; _streamingFirstChunk = 0; if (SourceID) - AudioBackend::Source::Stop(this); + AudioBackend::Source::Stop(SourceID); } float AudioSource::GetTime() const @@ -196,13 +202,13 @@ float AudioSource::GetTime() const if (_state == States::Stopped || SourceID == 0 || !Clip->IsLoaded()) return 0.0f; - float time = AudioBackend::Source::GetCurrentBufferTime(this); + float time = AudioBackend::Source::GetCurrentBufferTime(SourceID); if (UseStreaming()) { // Apply time offset to the first streaming buffer binded to the source including the already queued buffers int32 numProcessedBuffers = 0; - AudioBackend::Source::GetProcessedBuffersCount(const_cast(this), numProcessedBuffers); + AudioBackend::Source::GetProcessedBuffersCount(SourceID, numProcessedBuffers); time += Clip->GetBufferStartTime(_streamingFirstChunk + numProcessedBuffers); } @@ -234,7 +240,7 @@ void AudioSource::SetTime(float time) time = relativeTime; } - AudioBackend::Source::SetCurrentBufferTime(this, time); + AudioBackend::Source::SetCurrentBufferTime(SourceID, time); // Restore state if was stopped if (isActuallyPlayingSth) @@ -258,31 +264,29 @@ void AudioSource::RequestStreamingBuffersUpdate() _needToUpdateStreamingBuffers = true; } -void AudioSource::Cleanup() +void AudioSource::OnClipChanged() { - _savedState = GetState(); - _savedTime = GetTime(); Stop(); + // Destroy current source (will be created on the next play), because clip might use different spatial options or audio data format if (SourceID) { - AudioBackend::Source::Cleanup(this); + AudioBackend::Source::Remove(SourceID); SourceID = 0; } } -void AudioSource::OnClipChanged() -{ - Stop(); - _clipChanged = true; -} - void AudioSource::OnClipLoaded() { - AudioBackend::Source::ClipLoaded(this); + if (!SourceID) + return; + + // Reset spatial and playback + AudioBackend::Source::IsLoopingChanged(SourceID, _loop && !UseStreaming()); + AudioBackend::Source::SpatialSetupChanged(SourceID, Is3D(), _attenuation, _minDistance, _dopplerFactor); // Start playing if source was waiting for the clip to load - if (SourceID && _state == States::Playing && !_isActuallyPlayingSth) + if (_state == States::Playing && !_isActuallyPlayingSth) { if (Clip->IsStreamable()) { @@ -292,7 +296,7 @@ void AudioSource::OnClipLoaded() else { // Play it right away - SetNonStreamingBuffer(); + AudioBackend::Source::SetNonStreamingBuffer(SourceID, Clip->Buffers[0]); PlayInternal(); } } @@ -300,42 +304,14 @@ void AudioSource::OnClipLoaded() bool AudioSource::UseStreaming() const { - return Clip && Clip->IsLoaded() && Clip->IsStreamable(); -} - -void AudioSource::Restore() -{ - if (Clip) - { - if (_savedState != States::Stopped) - Play(); - if (_savedState == States::Paused) - Pause(); - - SetTime(_savedTime); - - if (_savedState != States::Stopped && UseStreaming()) - RequestStreamingBuffersUpdate(); - } -} - -void AudioSource::SetNonStreamingBuffer() -{ - ASSERT(Clip && !Clip->IsStreamable()); - - AudioBackend::Source::SetNonStreamingBuffer(this); + if (Clip == nullptr || Clip->WaitForLoaded()) + return false; + return Clip->IsStreamable(); } void AudioSource::PlayInternal() { - if (_clipChanged && SourceID != 0) - { - // If clip was changed between source setup (OnEnable) and actual playback start then ensure to flush any runtime properties with the audio backend - _clipChanged = false; - AudioBackend::Source::SpatialSetupChanged(this); - } - AudioBackend::Source::Play(this); - + AudioBackend::Source::Play(SourceID); _isActuallyPlayingSth = true; } @@ -413,9 +389,9 @@ void AudioSource::Update() const auto prevVelocity = _velocity; _velocity = (pos - _prevPos) / dt; _prevPos = pos; - if (_velocity != prevVelocity) + if (_velocity != prevVelocity && Is3D()) { - AudioBackend::Source::VelocityChanged(this); + AudioBackend::Source::VelocityChanged(SourceID, _velocity); } // Skip other update logic if it's not valid streamable source @@ -429,17 +405,17 @@ void AudioSource::Update() { // Get buffers in a queue count int32 numQueuedBuffers; - AudioBackend::Source::GetQueuedBuffersCount(this, numQueuedBuffers); + AudioBackend::Source::GetQueuedBuffersCount(SourceID, numQueuedBuffers); // Queue missing buffers - uint32 bufferId; - if (numQueuedBuffers < 1 && (bufferId = clip->Buffers[_streamingFirstChunk]) != 0) + uint32 bufferID; + if (numQueuedBuffers < 1 && (bufferID = clip->Buffers[_streamingFirstChunk]) != 0) { - AudioBackend::Source::QueueBuffer(this, bufferId); + AudioBackend::Source::QueueBuffer(SourceID, bufferID); } - if (numQueuedBuffers < 2 && _streamingFirstChunk + 1 < clip->Buffers.Count() && (bufferId = clip->Buffers[_streamingFirstChunk + 1]) != 0) + if (numQueuedBuffers < 2 && _streamingFirstChunk + 1 < clip->Buffers.Count() && (bufferID = clip->Buffers[_streamingFirstChunk + 1]) != 0) { - AudioBackend::Source::QueueBuffer(this, bufferId); + AudioBackend::Source::QueueBuffer(SourceID, bufferID); } // Clear flag @@ -457,13 +433,13 @@ void AudioSource::Update() { // Get the processed buffers count int32 numProcessedBuffers = 0; - AudioBackend::Source::GetProcessedBuffersCount(this, numProcessedBuffers); + AudioBackend::Source::GetProcessedBuffersCount(SourceID, numProcessedBuffers); if (numProcessedBuffers > 0) { ASSERT(numProcessedBuffers <= ASSET_FILE_DATA_CHUNKS); // Unbind processed buffers from the source - AudioBackend::Source::DequeueProcessedBuffers(this); + AudioBackend::Source::DequeueProcessedBuffers(SourceID); // Move the chunk pointer (AudioStreamingHandler will request new chunks streaming) _streamingFirstChunk += numProcessedBuffers; @@ -500,27 +476,53 @@ void AudioSource::OnEnable() { _prevPos = GetPosition(); _velocity = Vector3::Zero; - _clipChanged = false; - Audio::OnAddSource(this); + // Add source + ASSERT_LOW_LAYER(!Audio::Sources.Contains(this)); + Audio::Sources.Add(this); GetScene()->Ticking.Update.AddTick(this); #if USE_EDITOR GetSceneRendering()->AddViewportIcon(this); #endif + // Restore playback state + if (Clip) + { + if (_savedState != States::Stopped) + Play(); + if (_savedState == States::Paused) + Pause(); + + SetTime(_savedTime); + + if (_savedState != States::Stopped && UseStreaming()) + RequestStreamingBuffersUpdate(); + } + // Base Actor::OnEnable(); } void AudioSource::OnDisable() { + // Cache playback state + _savedState = GetState(); + _savedTime = GetTime(); + + // End playback Stop(); + // Remove source #if USE_EDITOR GetSceneRendering()->RemoveViewportIcon(this); #endif GetScene()->Ticking.Update.RemoveTick(this); - Audio::OnRemoveSource(this); + if (SourceID) + { + AudioBackend::Source::Remove(SourceID); + SourceID = 0; + } + Audio::Sources.Remove(this); // Base Actor::OnDisable(); @@ -534,9 +536,9 @@ void AudioSource::OnTransformChanged() _box = BoundingBox(_transform.Translation); _sphere = BoundingSphere(_transform.Translation, 0.0f); - if (IsActiveInHierarchy() && SourceID) + if (IsActiveInHierarchy() && SourceID && Is3D()) { - AudioBackend::Source::TransformChanged(this); + AudioBackend::Source::TransformChanged(SourceID, _transform.Translation, _transform.Orientation); } } diff --git a/Source/Engine/Audio/AudioSource.h b/Source/Engine/Audio/AudioSource.h index f94e04fb9..682c07563 100644 --- a/Source/Engine/Audio/AudioSource.h +++ b/Source/Engine/Audio/AudioSource.h @@ -55,7 +55,6 @@ private: bool _playOnStart; float _startTime; bool _allowSpatialization; - bool _clipChanged = false; bool _isActuallyPlayingSth = false; bool _needToUpdateStreamingBuffers = false; @@ -270,11 +269,6 @@ public: /// API_PROPERTY() bool UseStreaming() const; - /// - /// Restores the saved time position and resumes/pauses the playback based on the state before. Used to restore audio source state after data rebuild (eg. by audio backend). - /// - void Restore(); - public: /// /// Determines whether this audio source started playing audio via audio backend. After audio play it may wait for audio clip data to be loaded or streamed. @@ -289,20 +283,10 @@ public: /// void RequestStreamingBuffersUpdate(); - /// - /// Cleanups the cached data. Called by the Audio manager. - /// - void Cleanup(); - private: void OnClipChanged(); void OnClipLoaded(); - /// - /// Sets the single buffer from the audio clip that is not using dynamic streaming - /// - void SetNonStreamingBuffer(); - /// /// Plays the audio source. Should have buffer(s) binded before. /// diff --git a/Source/Engine/Audio/Config.h b/Source/Engine/Audio/Config.h index e824b8e0f..764468ae5 100644 --- a/Source/Engine/Audio/Config.h +++ b/Source/Engine/Audio/Config.h @@ -3,6 +3,7 @@ #pragma once #include "Engine/Core/Config.h" +#include "Engine/Content/Config.h" // The maximum amount of listeners used at once #define AUDIO_MAX_LISTENERS 1 diff --git a/Source/Engine/Audio/None/AudioBackendNone.cpp b/Source/Engine/Audio/None/AudioBackendNone.cpp index a28fb91b2..4bf2df88f 100644 --- a/Source/Engine/Audio/None/AudioBackendNone.cpp +++ b/Source/Engine/Audio/None/AudioBackendNone.cpp @@ -22,91 +22,82 @@ void AudioBackendNone::Listener_ReinitializeAll() { } -void AudioBackendNone::Source_OnAdd(AudioSource* source) +uint32 AudioBackendNone::Source_Add(const AudioDataInfo& format, const Vector3& position, const Quaternion& orientation, float volume, float pitch, float pan, bool loop, bool spatial, float attenuation, float minDistance, float doppler) { - source->Restore(); + return 1; } -void AudioBackendNone::Source_OnRemove(AudioSource* source) -{ - source->Cleanup(); -} - -void AudioBackendNone::Source_VelocityChanged(AudioSource* source) +void AudioBackendNone::Source_Remove(uint32 sourceID) { } -void AudioBackendNone::Source_TransformChanged(AudioSource* source) +void AudioBackendNone::Source_VelocityChanged(uint32 sourceID, const Vector3& velocity) { } -void AudioBackendNone::Source_VolumeChanged(AudioSource* source) +void AudioBackendNone::Source_TransformChanged(uint32 sourceID, const Vector3& position, const Quaternion& orientation) { } -void AudioBackendNone::Source_PitchChanged(AudioSource* source) +void AudioBackendNone::Source_VolumeChanged(uint32 sourceID, float volume) { } -void AudioBackendNone::Source_PanChanged(AudioSource* source) +void AudioBackendNone::Source_PitchChanged(uint32 sourceID, float pitch) { } -void AudioBackendNone::Source_IsLoopingChanged(AudioSource* source) +void AudioBackendNone::Source_PanChanged(uint32 sourceID, float pan) { } -void AudioBackendNone::Source_SpatialSetupChanged(AudioSource* source) +void AudioBackendNone::Source_IsLoopingChanged(uint32 sourceID, bool loop) { } -void AudioBackendNone::Source_ClipLoaded(AudioSource* source) +void AudioBackendNone::Source_SpatialSetupChanged(uint32 sourceID, bool spatial, float attenuation, float minDistance, float doppler) { } -void AudioBackendNone::Source_Cleanup(AudioSource* source) +void AudioBackendNone::Source_Play(uint32 sourceID) { } -void AudioBackendNone::Source_Play(AudioSource* source) +void AudioBackendNone::Source_Pause(uint32 sourceID) { } -void AudioBackendNone::Source_Pause(AudioSource* source) +void AudioBackendNone::Source_Stop(uint32 sourceID) { } -void AudioBackendNone::Source_Stop(AudioSource* source) +void AudioBackendNone::Source_SetCurrentBufferTime(uint32 sourceID, float value) { } -void AudioBackendNone::Source_SetCurrentBufferTime(AudioSource* source, float value) -{ -} - -float AudioBackendNone::Source_GetCurrentBufferTime(const AudioSource* source) +float AudioBackendNone::Source_GetCurrentBufferTime(uint32 sourceID) { return 0.0f; } -void AudioBackendNone::Source_SetNonStreamingBuffer(AudioSource* source) +void AudioBackendNone::Source_SetNonStreamingBuffer(uint32 sourceID, uint32 bufferID) { } -void AudioBackendNone::Source_GetProcessedBuffersCount(AudioSource* source, int32& processedBuffersCount) +void AudioBackendNone::Source_GetProcessedBuffersCount(uint32 sourceID, int32& processedBuffersCount) { processedBuffersCount = 0; } -void AudioBackendNone::Source_GetQueuedBuffersCount(AudioSource* source, int32& queuedBuffersCount) +void AudioBackendNone::Source_GetQueuedBuffersCount(uint32 sourceID, int32& queuedBuffersCount) { } -void AudioBackendNone::Source_QueueBuffer(AudioSource* source, uint32 bufferId) +void AudioBackendNone::Source_QueueBuffer(uint32 sourceID, uint32 bufferID) { } -void AudioBackendNone::Source_DequeueProcessedBuffers(AudioSource* source) +void AudioBackendNone::Source_DequeueProcessedBuffers(uint32 sourceID) { } @@ -115,11 +106,11 @@ uint32 AudioBackendNone::Buffer_Create() return 1; } -void AudioBackendNone::Buffer_Delete(uint32 bufferId) +void AudioBackendNone::Buffer_Delete(uint32 bufferID) { } -void AudioBackendNone::Buffer_Write(uint32 bufferId, byte* samples, const AudioDataInfo& info) +void AudioBackendNone::Buffer_Write(uint32 bufferID, byte* samples, const AudioDataInfo& info) { } diff --git a/Source/Engine/Audio/None/AudioBackendNone.h b/Source/Engine/Audio/None/AudioBackendNone.h index 5a28331b6..6ceb3149e 100644 --- a/Source/Engine/Audio/None/AudioBackendNone.h +++ b/Source/Engine/Audio/None/AudioBackendNone.h @@ -17,30 +17,28 @@ public: void Listener_VelocityChanged(const Vector3& velocity) override; void Listener_TransformChanged(const Vector3& position, const Quaternion& orientation) override; void Listener_ReinitializeAll() override; - void Source_OnAdd(AudioSource* source) override; - void Source_OnRemove(AudioSource* source) override; - void Source_VelocityChanged(AudioSource* source) override; - void Source_TransformChanged(AudioSource* source) override; - void Source_VolumeChanged(AudioSource* source) override; - void Source_PitchChanged(AudioSource* source) override; - void Source_PanChanged(AudioSource* source) override; - void Source_IsLoopingChanged(AudioSource* source) override; - void Source_SpatialSetupChanged(AudioSource* source) override; - void Source_ClipLoaded(AudioSource* source) override; - void Source_Cleanup(AudioSource* source) override; - void Source_Play(AudioSource* source) override; - void Source_Pause(AudioSource* source) override; - void Source_Stop(AudioSource* source) override; - void Source_SetCurrentBufferTime(AudioSource* source, float value) override; - float Source_GetCurrentBufferTime(const AudioSource* source) override; - void Source_SetNonStreamingBuffer(AudioSource* source) override; - void Source_GetProcessedBuffersCount(AudioSource* source, int32& processedBuffersCount) override; - void Source_GetQueuedBuffersCount(AudioSource* source, int32& queuedBuffersCount) override; - void Source_QueueBuffer(AudioSource* source, uint32 bufferId) override; - void Source_DequeueProcessedBuffers(AudioSource* source) override; + uint32 Source_Add(const AudioDataInfo& format, const Vector3& position, const Quaternion& orientation, float volume, float pitch, float pan, bool loop, bool spatial, float attenuation, float minDistance, float doppler) override; + void Source_Remove(uint32 sourceID) override; + void Source_VelocityChanged(uint32 sourceID, const Vector3& velocity) override; + void Source_TransformChanged(uint32 sourceID, const Vector3& position, const Quaternion& orientation) override; + void Source_VolumeChanged(uint32 sourceID, float volume) override; + void Source_PitchChanged(uint32 sourceID, float pitch) override; + void Source_PanChanged(uint32 sourceID, float pan) override; + void Source_IsLoopingChanged(uint32 sourceID, bool loop) override; + void Source_SpatialSetupChanged(uint32 sourceID, bool spatial, float attenuation, float minDistance, float doppler) override; + void Source_Play(uint32 sourceID) override; + void Source_Pause(uint32 sourceID) override; + void Source_Stop(uint32 sourceID) override; + void Source_SetCurrentBufferTime(uint32 sourceID, float value) override; + float Source_GetCurrentBufferTime(uint32 sourceID) override; + void Source_SetNonStreamingBuffer(uint32 sourceID, uint32 bufferID) override; + void Source_GetProcessedBuffersCount(uint32 sourceID, int32& processedBuffersCount) override; + void Source_GetQueuedBuffersCount(uint32 sourceID, int32& queuedBuffersCount) override; + void Source_QueueBuffer(uint32 sourceID, uint32 bufferID) override; + void Source_DequeueProcessedBuffers(uint32 sourceID) override; uint32 Buffer_Create() override; - void Buffer_Delete(uint32 bufferId) override; - void Buffer_Write(uint32 bufferId, byte* samples, const AudioDataInfo& info) override; + void Buffer_Delete(uint32 bufferID) override; + void Buffer_Write(uint32 bufferID, byte* samples, const AudioDataInfo& info) override; const Char* Base_Name() override; FeatureFlags Base_Features() override; void Base_OnActiveDeviceChanged() override; diff --git a/Source/Engine/Audio/OpenAL/AudioBackendOAL.cpp b/Source/Engine/Audio/OpenAL/AudioBackendOAL.cpp index 77d8f43c9..d29e3a31b 100644 --- a/Source/Engine/Audio/OpenAL/AudioBackendOAL.cpp +++ b/Source/Engine/Audio/OpenAL/AudioBackendOAL.cpp @@ -5,6 +5,7 @@ #include "AudioBackendOAL.h" #include "Engine/Platform/StringUtils.h" #include "Engine/Core/Log.h" +#include "Engine/Core/Collections/Dictionary.h" #include "Engine/Tools/AudioTool/AudioTool.h" #include "Engine/Engine/Units.h" #include "Engine/Profiler/ProfilerCPU.h" @@ -42,6 +43,8 @@ namespace ALC ALCdevice* Device = nullptr; ALCcontext* Context = nullptr; AudioBackend::FeatureFlags Features = AudioBackend::FeatureFlags::None; + CriticalSection Locker; + Dictionary SourceIDtoFormat; bool IsExtensionSupported(const char* extension) { @@ -75,32 +78,28 @@ namespace ALC namespace Source { - void Rebuild(AudioSource* source) + void Rebuild(uint32& sourceID, const Vector3& position, const Quaternion& orientation, float volume, float pitch, float pan, bool loop, bool spatial, float attenuation, float minDistance, float doppler) { - ASSERT(source->SourceID == 0); - const bool is3D = source->Is3D(); - const bool loop = source->GetIsLooping() && !source->UseStreaming(); - - uint32 sourceID = 0; + ASSERT_LOW_LAYER(sourceID == 0); alGenSources(1, &sourceID); - source->SourceID = sourceID; + ASSERT_LOW_LAYER(sourceID != 0); - alSourcef(sourceID, AL_GAIN, source->GetVolume()); - alSourcef(sourceID, AL_PITCH, source->GetPitch()); + alSourcef(sourceID, AL_GAIN, volume); + alSourcef(sourceID, AL_PITCH, pitch); alSourcef(sourceID, AL_SEC_OFFSET, 0.0f); alSourcei(sourceID, AL_LOOPING, loop); - alSourcei(sourceID, AL_SOURCE_RELATIVE, !is3D); + alSourcei(sourceID, AL_SOURCE_RELATIVE, !spatial); alSourcei(sourceID, AL_BUFFER, 0); - if (is3D) + if (spatial) { #ifdef AL_SOFT_source_spatialize alSourcei(sourceID, AL_SOURCE_SPATIALIZE_SOFT, AL_TRUE); #endif - alSourcef(sourceID, AL_ROLLOFF_FACTOR, source->GetAttenuation()); - alSourcef(sourceID, AL_DOPPLER_FACTOR, source->GetDopplerFactor()); - alSourcef(sourceID, AL_REFERENCE_DISTANCE, FLAX_DST_TO_OAL(source->GetMinDistance())); - alSource3f(sourceID, AL_POSITION, FLAX_POS_TO_OAL(source->GetPosition())); - alSource3f(sourceID, AL_VELOCITY, FLAX_VEL_TO_OAL(source->GetVelocity())); + alSourcef(sourceID, AL_ROLLOFF_FACTOR, attenuation); + alSourcef(sourceID, AL_DOPPLER_FACTOR, doppler); + alSourcef(sourceID, AL_REFERENCE_DISTANCE, FLAX_DST_TO_OAL(minDistance)); + alSource3f(sourceID, AL_POSITION, FLAX_POS_TO_OAL(position)); + alSource3f(sourceID, AL_VELOCITY, FLAX_VEL_TO_OAL(Vector3::Zero)); } else { @@ -111,26 +110,23 @@ namespace ALC alSource3f(sourceID, AL_VELOCITY, 0.0f, 0.0f, 0.0f); } #ifdef AL_EXT_STEREO_ANGLES - const float panAngle = source->GetPan() * PI_HALF; + const float panAngle = pan * PI_HALF; const ALfloat panAngles[2] = { (ALfloat)(PI / 6.0 - panAngle), (ALfloat)(-PI / 6.0 - panAngle) }; // Angles are specified counter-clockwise in radians alSourcefv(sourceID, AL_STEREO_ANGLES, panAngles); #endif - - // Restore state after Cleanup - source->Restore(); } } - void RebuildContext(bool isChangingDevice) + struct AudioSourceState + { + AudioSource::States State; + float Time; + }; + + void RebuildContext(const Array& states) { LOG(Info, "Rebuilding audio contexts"); - if (!isChangingDevice) - { - for (AudioSource* source : Audio::Sources) - source->Cleanup(); - } - ClearContext(); if (Device == nullptr) @@ -150,8 +146,39 @@ namespace ALC for (AudioListener* listener : Audio::Listeners) Listener::Rebuild(listener); - for (AudioSource* source : Audio::Sources) - Source::Rebuild(source); + for (int32 i = 0; i < states.Count(); i++) + { + AudioSource* source = Audio::Sources[i]; + Source::Rebuild(source->SourceID, source->GetPosition(), source->GetOrientation(), source->GetVolume(), source->GetPitch(), source->GetPan(), source->GetIsLooping() && !source->UseStreaming(), source->Is3D(), source->GetAttenuation(), source->GetMinDistance(), source->GetDopplerFactor()); + + if (states.HasItems()) + { + // Restore playback state + auto& state = states[i]; + if (state.State != AudioSource::States::Stopped) + source->Play(); + if (state.State == AudioSource::States::Paused) + source->Pause(); + if (state.State != AudioSource::States::Stopped) + source->SetTime(state.Time); + } + } + } + + void RebuildContext(bool isChangingDevice) + { + Array states; + if (!isChangingDevice) + { + states.EnsureCapacity(Audio::Sources.Count()); + for (AudioSource* source : Audio::Sources) + { + states.Add({ source->GetState(), source->GetTime() }); + source->Stop(); + } + } + + RebuildContext(states); } } @@ -269,74 +296,76 @@ void AudioBackendOAL::Listener_ReinitializeAll() ALC::RebuildContext(false); } -void AudioBackendOAL::Source_OnAdd(AudioSource* source) +uint32 AudioBackendOAL::Source_Add(const AudioDataInfo& format, const Vector3& position, const Quaternion& orientation, float volume, float pitch, float pan, bool loop, bool spatial, float attenuation, float minDistance, float doppler) { - ALC::Source::Rebuild(source); + uint32 sourceID = 0; + ALC::Source::Rebuild(sourceID, position, orientation, volume, pitch, pan, loop, spatial, attenuation, minDistance, doppler); + + // Cache audio data format assigned on source (used in Source_GetCurrentBufferTime) + ALC::Locker.Lock(); + ALC::SourceIDtoFormat[sourceID] = format; + ALC::Locker.Unlock(); + + return sourceID; } -void AudioBackendOAL::Source_OnRemove(AudioSource* source) +void AudioBackendOAL::Source_Remove(uint32 sourceID) { - source->Cleanup(); + alSourcei(sourceID, AL_BUFFER, 0); + ALC_CHECK_ERROR(alSourcei); + alDeleteSources(1, &sourceID); + ALC_CHECK_ERROR(alDeleteSources); + + ALC::Locker.Lock(); + ALC::SourceIDtoFormat.Remove(sourceID); + ALC::Locker.Unlock(); } -void AudioBackendOAL::Source_VelocityChanged(AudioSource* source) +void AudioBackendOAL::Source_VelocityChanged(uint32 sourceID, const Vector3& velocity) { - if (!source->Is3D()) - return; - const uint32 sourceID = source->SourceID; - alSource3f(sourceID, AL_VELOCITY, FLAX_VEL_TO_OAL(source->GetVelocity())); + alSource3f(sourceID, AL_VELOCITY, FLAX_VEL_TO_OAL(velocity)); } -void AudioBackendOAL::Source_TransformChanged(AudioSource* source) +void AudioBackendOAL::Source_TransformChanged(uint32 sourceID, const Vector3& position, const Quaternion& orientation) { - if (!source->Is3D()) - return; - const uint32 sourceID = source->SourceID; - alSource3f(sourceID, AL_POSITION, FLAX_POS_TO_OAL(source->GetPosition())); + alSource3f(sourceID, AL_POSITION, FLAX_POS_TO_OAL(position)); } -void AudioBackendOAL::Source_VolumeChanged(AudioSource* source) +void AudioBackendOAL::Source_VolumeChanged(uint32 sourceID, float volume) { - const uint32 sourceID = source->SourceID; - alSourcef(sourceID, AL_GAIN, source->GetVolume()); + alSourcef(sourceID, AL_GAIN, volume); } -void AudioBackendOAL::Source_PitchChanged(AudioSource* source) +void AudioBackendOAL::Source_PitchChanged(uint32 sourceID, float pitch) { - const uint32 sourceID = source->SourceID; - alSourcef(sourceID, AL_PITCH, source->GetPitch()); + alSourcef(sourceID, AL_PITCH, pitch); } -void AudioBackendOAL::Source_PanChanged(AudioSource* source) +void AudioBackendOAL::Source_PanChanged(uint32 sourceID, float pan) { #ifdef AL_EXT_STEREO_ANGLES - const float panAngle = source->GetPan() * PI_HALF; + const float panAngle = pan * PI_HALF; const ALfloat panAngles[2] = { (ALfloat)(PI / 6.0 - panAngle), (ALfloat)(-PI / 6.0 - panAngle) }; // Angles are specified counter-clockwise in radians - const uint32 sourceID = source->SourceID; alSourcefv(sourceID, AL_STEREO_ANGLES, panAngles); #endif } -void AudioBackendOAL::Source_IsLoopingChanged(AudioSource* source) +void AudioBackendOAL::Source_IsLoopingChanged(uint32 sourceID, bool loop) { - const bool loop = source->GetIsLooping() && !source->UseStreaming(); - const uint32 sourceID = source->SourceID; alSourcei(sourceID, AL_LOOPING, loop); } -void AudioBackendOAL::Source_SpatialSetupChanged(AudioSource* source) +void AudioBackendOAL::Source_SpatialSetupChanged(uint32 sourceID, bool spatial, float attenuation, float minDistance, float doppler) { - const bool is3D = source->Is3D(); - const uint32 sourceID = source->SourceID; - alSourcei(sourceID, AL_SOURCE_RELATIVE, !is3D); - if (is3D) + alSourcei(sourceID, AL_SOURCE_RELATIVE, !spatial); + if (spatial) { #ifdef AL_SOFT_source_spatialize alSourcei(sourceID, AL_SOURCE_SPATIALIZE_SOFT, AL_TRUE); #endif - alSourcef(sourceID, AL_ROLLOFF_FACTOR, source->GetAttenuation()); - alSourcef(sourceID, AL_DOPPLER_FACTOR, source->GetDopplerFactor()); - alSourcef(sourceID, AL_REFERENCE_DISTANCE, FLAX_DST_TO_OAL(source->GetMinDistance())); + alSourcef(sourceID, AL_ROLLOFF_FACTOR, attenuation); + alSourcef(sourceID, AL_DOPPLER_FACTOR, doppler); + alSourcef(sourceID, AL_REFERENCE_DISTANCE, FLAX_DST_TO_OAL(minDistance)); } else { @@ -346,46 +375,20 @@ void AudioBackendOAL::Source_SpatialSetupChanged(AudioSource* source) } } -void AudioBackendOAL::Source_ClipLoaded(AudioSource* source) +void AudioBackendOAL::Source_Play(uint32 sourceID) { - if (source->SourceID == 0) - return; - const auto clip = source->Clip.Get(); - const bool is3D = source->Is3D(); - const bool loop = source->GetIsLooping() && !clip->IsStreamable(); - - const uint32 sourceID = source->SourceID; - alSourcei(sourceID, AL_SOURCE_RELATIVE, !is3D); - alSourcei(sourceID, AL_LOOPING, loop); -} - -void AudioBackendOAL::Source_Cleanup(AudioSource* source) -{ - const uint32 sourceID = source->SourceID; - alSourcei(sourceID, AL_BUFFER, 0); - ALC_CHECK_ERROR(alSourcei); - alDeleteSources(1, &sourceID); - ALC_CHECK_ERROR(alDeleteSources); -} - -void AudioBackendOAL::Source_Play(AudioSource* source) -{ - const uint32 sourceID = source->SourceID; alSourcePlay(sourceID); ALC_CHECK_ERROR(alSourcePlay); } -void AudioBackendOAL::Source_Pause(AudioSource* source) +void AudioBackendOAL::Source_Pause(uint32 sourceID) { - const uint32 sourceID = source->SourceID; alSourcePause(sourceID); ALC_CHECK_ERROR(alSourcePause); } -void AudioBackendOAL::Source_Stop(AudioSource* source) +void AudioBackendOAL::Source_Stop(uint32 sourceID) { - const uint32 sourceID = source->SourceID; - // Stop and rewind alSourceRewind(sourceID); ALC_CHECK_ERROR(alSourceRewind); @@ -396,67 +399,61 @@ void AudioBackendOAL::Source_Stop(AudioSource* source) ALC_CHECK_ERROR(alSourcei); } -void AudioBackendOAL::Source_SetCurrentBufferTime(AudioSource* source, float value) +void AudioBackendOAL::Source_SetCurrentBufferTime(uint32 sourceID, float value) { - const uint32 sourceID = source->SourceID; alSourcef(sourceID, AL_SEC_OFFSET, value); } -float AudioBackendOAL::Source_GetCurrentBufferTime(const AudioSource* source) +float AudioBackendOAL::Source_GetCurrentBufferTime(uint32 sourceID) { #if 0 float time; - alGetSourcef(source->SourceID, AL_SEC_OFFSET, &time); + alGetSourcef(sourceID, AL_SEC_OFFSET, &time); #else - ASSERT(source->Clip && source->Clip->IsLoaded()); - const AudioDataInfo& clipInfo = source->Clip->AudioHeader.Info; + ALC::Locker.Lock(); + AudioDataInfo clipInfo = ALC::SourceIDtoFormat[sourceID]; + ALC::Locker.Unlock(); ALint samplesPlayed; - alGetSourcei(source->SourceID, AL_SAMPLE_OFFSET, &samplesPlayed); + alGetSourcei(sourceID, AL_SAMPLE_OFFSET, &samplesPlayed); const uint32 totalSamples = clipInfo.NumSamples / clipInfo.NumChannels; - const float time = (samplesPlayed % totalSamples) / static_cast(Math::Max(1U, clipInfo.SampleRate)); + if (totalSamples > 0) + samplesPlayed %= totalSamples; + const float time = samplesPlayed / static_cast(Math::Max(1U, clipInfo.SampleRate)); #endif - return time; } -void AudioBackendOAL::Source_SetNonStreamingBuffer(AudioSource* source) +void AudioBackendOAL::Source_SetNonStreamingBuffer(uint32 sourceID, uint32 bufferID) { - const uint32 bufferId = source->Clip->Buffers[0]; - const uint32 sourceID = source->SourceID; - alSourcei(sourceID, AL_BUFFER, bufferId); + alSourcei(sourceID, AL_BUFFER, bufferID); ALC_CHECK_ERROR(alSourcei); } -void AudioBackendOAL::Source_GetProcessedBuffersCount(AudioSource* source, int32& processedBuffersCount) +void AudioBackendOAL::Source_GetProcessedBuffersCount(uint32 sourceID, int32& processedBuffersCount) { // Check the first context only - const uint32 sourceID = source->SourceID; alGetSourcei(sourceID, AL_BUFFERS_PROCESSED, &processedBuffersCount); ALC_CHECK_ERROR(alGetSourcei); } -void AudioBackendOAL::Source_GetQueuedBuffersCount(AudioSource* source, int32& queuedBuffersCount) +void AudioBackendOAL::Source_GetQueuedBuffersCount(uint32 sourceID, int32& queuedBuffersCount) { // Check the first context only - const uint32 sourceID = source->SourceID; alGetSourcei(sourceID, AL_BUFFERS_QUEUED, &queuedBuffersCount); ALC_CHECK_ERROR(alGetSourcei); } -void AudioBackendOAL::Source_QueueBuffer(AudioSource* source, uint32 bufferId) +void AudioBackendOAL::Source_QueueBuffer(uint32 sourceID, uint32 bufferID) { - const uint32 sourceID = source->SourceID; - // Queue new buffer - alSourceQueueBuffers(sourceID, 1, &bufferId); + alSourceQueueBuffers(sourceID, 1, &bufferID); ALC_CHECK_ERROR(alSourceQueueBuffers); } -void AudioBackendOAL::Source_DequeueProcessedBuffers(AudioSource* source) +void AudioBackendOAL::Source_DequeueProcessedBuffers(uint32 sourceID) { - ALuint buffers[AUDIO_MAX_SOURCE_BUFFERS]; - const uint32 sourceID = source->SourceID; int32 numProcessedBuffers; + ALuint buffers[AUDIO_MAX_SOURCE_BUFFERS]; alGetSourcei(sourceID, AL_BUFFERS_PROCESSED, &numProcessedBuffers); alSourceUnqueueBuffers(sourceID, numProcessedBuffers, buffers); ALC_CHECK_ERROR(alSourceUnqueueBuffers); @@ -464,19 +461,19 @@ void AudioBackendOAL::Source_DequeueProcessedBuffers(AudioSource* source) uint32 AudioBackendOAL::Buffer_Create() { - uint32 bufferId; - alGenBuffers(1, &bufferId); + uint32 bufferID; + alGenBuffers(1, &bufferID); ALC_CHECK_ERROR(alGenBuffers); - return bufferId; + return bufferID; } -void AudioBackendOAL::Buffer_Delete(uint32 bufferId) +void AudioBackendOAL::Buffer_Delete(uint32 bufferID) { - alDeleteBuffers(1, &bufferId); + alDeleteBuffers(1, &bufferID); ALC_CHECK_ERROR(alDeleteBuffers); } -void AudioBackendOAL::Buffer_Write(uint32 bufferId, byte* samples, const AudioDataInfo& info) +void AudioBackendOAL::Buffer_Write(uint32 bufferID, byte* samples, const AudioDataInfo& info) { PROFILE_CPU(); @@ -495,7 +492,7 @@ void AudioBackendOAL::Buffer_Write(uint32 bufferId, byte* samples, const AudioDa AudioTool::ConvertToFloat(samples, info.BitDepth, sampleBufferFloat, info.NumSamples); format = GetOpenALBufferFormat(info.NumChannels, 32); - alBufferData(bufferId, format, sampleBufferFloat, bufferSize, info.SampleRate); + alBufferData(bufferID, format, sampleBufferFloat, bufferSize, info.SampleRate); ALC_CHECK_ERROR(alBufferData); Allocator::Free(sampleBufferFloat); } @@ -507,7 +504,7 @@ void AudioBackendOAL::Buffer_Write(uint32 bufferId, byte* samples, const AudioDa AudioTool::ConvertBitDepth(samples, info.BitDepth, sampleBuffer16, 16, info.NumSamples); format = GetOpenALBufferFormat(info.NumChannels, 16); - alBufferData(bufferId, format, sampleBuffer16, bufferSize, info.SampleRate); + alBufferData(bufferID, format, sampleBuffer16, bufferSize, info.SampleRate); ALC_CHECK_ERROR(alBufferData); Allocator::Free(sampleBuffer16); } @@ -520,13 +517,13 @@ void AudioBackendOAL::Buffer_Write(uint32 bufferId, byte* samples, const AudioDa for (uint32 i = 0; i < info.NumSamples; i++) sampleBuffer[i] = ((int8*)samples)[i] + 128; - alBufferData(bufferId, format, sampleBuffer, bufferSize, info.SampleRate); + alBufferData(bufferID, format, sampleBuffer, bufferSize, info.SampleRate); ALC_CHECK_ERROR(alBufferData); Allocator::Free(sampleBuffer); } else if (format) { - alBufferData(bufferId, format, samples, info.NumSamples * (info.BitDepth / 8), info.SampleRate); + alBufferData(bufferID, format, samples, info.NumSamples * (info.BitDepth / 8), info.SampleRate); ALC_CHECK_ERROR(alBufferData); } } @@ -543,7 +540,7 @@ void AudioBackendOAL::Buffer_Write(uint32 bufferId, byte* samples, const AudioDa AudioTool::ConvertBitDepth(samples, info.BitDepth, sampleBuffer32, 32, info.NumSamples); format = GetOpenALBufferFormat(info.NumChannels, 32); - alBufferData(bufferId, format, sampleBuffer32, bufferSize, info.SampleRate); + alBufferData(bufferID, format, sampleBuffer32, bufferSize, info.SampleRate); ALC_CHECK_ERROR(alBufferData); Allocator::Free(sampleBuffer32); @@ -558,14 +555,14 @@ void AudioBackendOAL::Buffer_Write(uint32 bufferId, byte* samples, const AudioDa sampleBuffer[i] = ((int8*)samples)[i] + 128; format = GetOpenALBufferFormat(info.NumChannels, 16); - alBufferData(bufferId, format, sampleBuffer, bufferSize, info.SampleRate); + alBufferData(bufferID, format, sampleBuffer, bufferSize, info.SampleRate); ALC_CHECK_ERROR(alBufferData); Allocator::Free(sampleBuffer); } else if (format) { - alBufferData(bufferId, format, samples, info.NumSamples * (info.BitDepth / 8), info.SampleRate); + alBufferData(bufferID, format, samples, info.NumSamples * (info.BitDepth / 8), info.SampleRate); ALC_CHECK_ERROR(alBufferData); } } @@ -589,8 +586,18 @@ AudioBackend::FeatureFlags AudioBackendOAL::Base_Features() void AudioBackendOAL::Base_OnActiveDeviceChanged() { // Cleanup + Array states; + states.EnsureCapacity(Audio::Sources.Count()); for (AudioSource* source : Audio::Sources) - source->Cleanup(); + { + states.Add({ source->GetState(), source->GetTime() }); + source->Stop(); + if (source->SourceID) + { + Source_Remove(source->SourceID); + source->SourceID = 0; + } + } ALC::ClearContext(); if (ALC::Device != nullptr) { @@ -608,7 +615,7 @@ void AudioBackendOAL::Base_OnActiveDeviceChanged() } // Setup - ALC::RebuildContext(true); + ALC::RebuildContext(states); } void AudioBackendOAL::Base_SetDopplerFactor(float value) diff --git a/Source/Engine/Audio/OpenAL/AudioBackendOAL.h b/Source/Engine/Audio/OpenAL/AudioBackendOAL.h index 3375251a4..258f5e967 100644 --- a/Source/Engine/Audio/OpenAL/AudioBackendOAL.h +++ b/Source/Engine/Audio/OpenAL/AudioBackendOAL.h @@ -17,30 +17,28 @@ public: void Listener_VelocityChanged(const Vector3& velocity) override; void Listener_TransformChanged(const Vector3& position, const Quaternion& orientation) override; void Listener_ReinitializeAll() override; - void Source_OnAdd(AudioSource* source) override; - void Source_OnRemove(AudioSource* source) override; - void Source_VelocityChanged(AudioSource* source) override; - void Source_TransformChanged(AudioSource* source) override; - void Source_VolumeChanged(AudioSource* source) override; - void Source_PitchChanged(AudioSource* source) override; - void Source_PanChanged(AudioSource* source) override; - void Source_IsLoopingChanged(AudioSource* source) override; - void Source_SpatialSetupChanged(AudioSource* source) override; - void Source_ClipLoaded(AudioSource* source) override; - void Source_Cleanup(AudioSource* source) override; - void Source_Play(AudioSource* source) override; - void Source_Pause(AudioSource* source) override; - void Source_Stop(AudioSource* source) override; - void Source_SetCurrentBufferTime(AudioSource* source, float value) override; - float Source_GetCurrentBufferTime(const AudioSource* source) override; - void Source_SetNonStreamingBuffer(AudioSource* source) override; - void Source_GetProcessedBuffersCount(AudioSource* source, int32& processedBuffersCount) override; - void Source_GetQueuedBuffersCount(AudioSource* source, int32& queuedBuffersCount) override; - void Source_QueueBuffer(AudioSource* source, uint32 bufferId) override; - void Source_DequeueProcessedBuffers(AudioSource* source) override; + uint32 Source_Add(const AudioDataInfo& format, const Vector3& position, const Quaternion& orientation, float volume, float pitch, float pan, bool loop, bool spatial, float attenuation, float minDistance, float doppler) override; + void Source_Remove(uint32 sourceID) override; + void Source_VelocityChanged(uint32 sourceID, const Vector3& velocity) override; + void Source_TransformChanged(uint32 sourceID, const Vector3& position, const Quaternion& orientation) override; + void Source_VolumeChanged(uint32 sourceID, float volume) override; + void Source_PitchChanged(uint32 sourceID, float pitch) override; + void Source_PanChanged(uint32 sourceID, float pan) override; + void Source_IsLoopingChanged(uint32 sourceID, bool loop) override; + void Source_SpatialSetupChanged(uint32 sourceID, bool spatial, float attenuation, float minDistance, float doppler) override; + void Source_Play(uint32 sourceID) override; + void Source_Pause(uint32 sourceID) override; + void Source_Stop(uint32 sourceID) override; + void Source_SetCurrentBufferTime(uint32 sourceID, float value) override; + float Source_GetCurrentBufferTime(uint32 sourceID) override; + void Source_SetNonStreamingBuffer(uint32 sourceID, uint32 bufferID) override; + void Source_GetProcessedBuffersCount(uint32 sourceID, int32& processedBuffersCount) override; + void Source_GetQueuedBuffersCount(uint32 sourceID, int32& queuedBuffersCount) override; + void Source_QueueBuffer(uint32 sourceID, uint32 bufferID) override; + void Source_DequeueProcessedBuffers(uint32 sourceID) override; uint32 Buffer_Create() override; - void Buffer_Delete(uint32 bufferId) override; - void Buffer_Write(uint32 bufferId, byte* samples, const AudioDataInfo& info) override; + void Buffer_Delete(uint32 bufferID) override; + void Buffer_Write(uint32 bufferID, byte* samples, const AudioDataInfo& info) override; const Char* Base_Name() override; FeatureFlags Base_Features() override; void Base_OnActiveDeviceChanged() override; diff --git a/Source/Engine/Audio/XAudio2/AudioBackendXAudio2.cpp b/Source/Engine/Audio/XAudio2/AudioBackendXAudio2.cpp index 4f24013e1..4c6f93947 100644 --- a/Source/Engine/Audio/XAudio2/AudioBackendXAudio2.cpp +++ b/Source/Engine/Audio/XAudio2/AudioBackendXAudio2.cpp @@ -8,7 +8,6 @@ #include "Engine/Core/Collections/ChunkedArray.h" #include "Engine/Core/Log.h" #include "Engine/Audio/Audio.h" -#include "Engine/Audio/AudioSource.h" #include "Engine/Threading/Threading.h" #if PLATFORM_WINDOWS @@ -76,7 +75,7 @@ namespace XAudio2 } public: - AudioSource* Source; + uint32 SourceID; void PeekSamples(); }; @@ -85,6 +84,7 @@ namespace XAudio2 { IXAudio2SourceVoice* Voice; WAVEFORMATEX Format; + AudioDataInfo Info; XAUDIO2_SEND_DESCRIPTOR Destination; float StartTimeForQueueBuffer; float LastBufferStartTime; @@ -93,6 +93,8 @@ namespace XAudio2 int32 Channels; bool IsDirty; bool IsPlaying; + bool IsLoop; + uint32 LastBufferID; VoiceCallback Callback; Source() @@ -112,6 +114,8 @@ namespace XAudio2 IsDirty = false; Is3D = false; IsPlaying = false; + IsLoop = false; + LastBufferID = 0; LastBufferStartSamplesPlayed = 0; BuffersProcessed = 0; } @@ -120,17 +124,6 @@ namespace XAudio2 { return Voice == nullptr; } - - void UpdateTransform(const AudioSource* source) - { - Position = source->GetPosition(); - Orientation = source->GetOrientation(); - } - - void UpdateVelocity(const AudioSource* source) - { - Velocity = source->GetVelocity(); - } }; struct Buffer @@ -166,11 +159,11 @@ namespace XAudio2 ChunkedArray Buffers; // TODO: use ChunkedArray for better performance or use buffers pool? EngineCallback Callback; - Source* GetSource(const AudioSource* source) + Source* GetSource(uint32 sourceID) { - if (source->SourceID == 0) + if (sourceID == 0) return nullptr; - return &Sources[source->SourceID - 1]; // 0 is invalid ID so shift them + return &Sources[sourceID - 1]; // 0 is invalid ID so shift them } void MarkAllDirty() @@ -178,9 +171,9 @@ namespace XAudio2 ForceDirty = true; } - void QueueBuffer(Source* aSource, const AudioSource* source, const int32 bufferId, XAUDIO2_BUFFER& buffer) + void QueueBuffer(Source* aSource, const int32 bufferID, XAUDIO2_BUFFER& buffer) { - Buffer* aBuffer = Buffers[bufferId - 1]; + Buffer* aBuffer = Buffers[bufferID - 1]; buffer.pAudioData = aBuffer->Data.Get(); buffer.AudioBytes = aBuffer->Data.Count(); @@ -200,14 +193,14 @@ namespace XAudio2 void VoiceCallback::OnBufferEnd(void* pBufferContext) { - auto aSource = GetSource(Source); + auto aSource = GetSource(SourceID); if (aSource->IsPlaying) aSource->BuffersProcessed++; } void VoiceCallback::PeekSamples() { - auto aSource = GetSource(Source); + auto aSource = GetSource(SourceID); XAUDIO2_VOICE_STATE state; aSource->Voice->GetState(&state); aSource->LastBufferStartSamplesPlayed = state.SamplesPlayed; @@ -216,7 +209,7 @@ namespace XAudio2 void AudioBackendXAudio2::Listener_Reset() { - XAudio2::Listener->Reset(); + XAudio2::Listener.Reset(); XAudio2::MarkAllDirty(); } @@ -238,17 +231,13 @@ void AudioBackendXAudio2::Listener_ReinitializeAll() // TODO: Implement XAudio2 reinitialization; read HRTF audio value from Audio class } -void AudioBackendXAudio2::Source_OnAdd(AudioSource* source) +uint32 AudioBackendXAudio2::Source_Add(const AudioDataInfo& format, const Vector3& position, const Quaternion& orientation, float volume, float pitch, float pan, bool loop, bool spatial, float attenuation, float minDistance, float doppler) { - // Skip if has no clip (needs audio data to create a source - needs data format information) - if (source->Clip == nullptr || !source->Clip->IsLoaded()) - return; - auto clip = source->Clip.Get(); ScopeLock lock(XAudio2::Locker); // Get first free source XAudio2::Source* aSource = nullptr; - uint32 sourceID; + uint32 sourceID = 0; for (int32 i = 0; i < XAudio2::Sources.Count(); i++) { if (XAudio2::Sources[i].IsFree()) @@ -266,115 +255,124 @@ void AudioBackendXAudio2::Source_OnAdd(AudioSource* source) XAudio2::Sources.Add(src); aSource = &XAudio2::Sources[sourceID]; } + sourceID++; // 0 is invalid ID so shift them // Initialize audio data format information (from clip) - const auto& header = clip->AudioHeader; - auto& format = aSource->Format; - format.wFormatTag = WAVE_FORMAT_PCM; - format.nChannels = clip->Is3D() ? 1 : header.Info.NumChannels; // 3d audio is always mono (AudioClip auto-converts before buffer write if FeatureFlags::SpatialMultiChannel is unset) - format.nSamplesPerSec = header.Info.SampleRate; - format.wBitsPerSample = header.Info.BitDepth; - format.nBlockAlign = (WORD)(format.nChannels * (format.wBitsPerSample / 8)); - format.nAvgBytesPerSec = format.nSamplesPerSec * format.nBlockAlign; - format.cbSize = 0; + aSource->Info = format; + auto& aFormat = aSource->Format; + aFormat.wFormatTag = WAVE_FORMAT_PCM; + aFormat.nChannels = spatial ? 1 : format.NumChannels; // 3d audio is always mono (AudioClip auto-converts before buffer write if FeatureFlags::SpatialMultiChannel is unset) + aFormat.nSamplesPerSec = format.SampleRate; + aFormat.wBitsPerSample = format.BitDepth; + aFormat.nBlockAlign = (WORD)(aFormat.nChannels * (aFormat.wBitsPerSample / 8)); + aFormat.nAvgBytesPerSec = aFormat.nSamplesPerSec * aFormat.nBlockAlign; + aFormat.cbSize = 0; // Setup dry effect aSource->Destination.pOutputVoice = XAudio2::MasteringVoice; // Create voice - const XAUDIO2_VOICE_SENDS sendList = - { - 1, - &aSource->Destination - }; + const XAUDIO2_VOICE_SENDS sendList = { 1, &aSource->Destination }; HRESULT hr = XAudio2::Instance->CreateSourceVoice(&aSource->Voice, &aSource->Format, 0, 2.0f, &aSource->Callback, &sendList); XAUDIO2_CHECK_ERROR(CreateSourceVoice); if (FAILED(hr)) - return; - - source->SourceID = sourceID + 1; // 0 is invalid ID so shift them + return 0; // Prepare source state - aSource->Callback.Source = source; + aSource->Callback.SourceID = sourceID; aSource->IsDirty = true; - aSource->Is3D = source->Is3D(); - aSource->Pitch = source->GetPitch(); - aSource->Pan = source->GetPan(); - aSource->DopplerFactor = source->GetDopplerFactor(); - aSource->Volume = source->GetVolume(); - aSource->MinDistance = source->GetMinDistance(); - aSource->Attenuation = source->GetAttenuation(); - aSource->Channels = format.nChannels; - aSource->UpdateTransform(source); - aSource->UpdateVelocity(source); - hr = aSource->Voice->SetVolume(source->GetVolume()); + aSource->IsLoop = loop; + aSource->Is3D = spatial; + aSource->Pitch = pitch; + aSource->Pan = pan; + aSource->DopplerFactor = doppler; + aSource->Volume = volume; + aSource->MinDistance = minDistance; + aSource->Attenuation = attenuation; + aSource->Channels = aFormat.nChannels; + aSource->Position = position; + aSource->Orientation = orientation; + aSource->Velocity = Vector3::Zero; + hr = aSource->Voice->SetVolume(volume); XAUDIO2_CHECK_ERROR(SetVolume); - source->Restore(); + return sourceID; } -void AudioBackendXAudio2::Source_OnRemove(AudioSource* source) +void AudioBackendXAudio2::Source_Remove(uint32 sourceID) { ScopeLock lock(XAudio2::Locker); - source->Cleanup(); + auto aSource = XAudio2::GetSource(sourceID); + if (!aSource) + return; + + // Free source + if (aSource->Voice) + { + aSource->Voice->DestroyVoice(); + } + aSource->Init(); } -void AudioBackendXAudio2::Source_VelocityChanged(AudioSource* source) +void AudioBackendXAudio2::Source_VelocityChanged(uint32 sourceID, const Vector3& velocity) { - auto aSource = XAudio2::GetSource(source); + auto aSource = XAudio2::GetSource(sourceID); if (aSource) { - aSource->UpdateVelocity(source); + aSource->Velocity = velocity; aSource->IsDirty = true; } } -void AudioBackendXAudio2::Source_TransformChanged(AudioSource* source) +void AudioBackendXAudio2::Source_TransformChanged(uint32 sourceID, const Vector3& position, const Quaternion& orientation) { - auto aSource = XAudio2::GetSource(source); + auto aSource = XAudio2::GetSource(sourceID); if (aSource) { - aSource->UpdateTransform(source); + aSource->Position = position; + aSource->Orientation = orientation; aSource->IsDirty = true; } } -void AudioBackendXAudio2::Source_VolumeChanged(AudioSource* source) +void AudioBackendXAudio2::Source_VolumeChanged(uint32 sourceID, float volume) { - auto aSource = XAudio2::GetSource(source); + auto aSource = XAudio2::GetSource(sourceID); if (aSource && aSource->Voice) { - aSource->Volume = source->GetVolume(); - const HRESULT hr = aSource->Voice->SetVolume(source->GetVolume()); + aSource->Volume = volume; + const HRESULT hr = aSource->Voice->SetVolume(volume); XAUDIO2_CHECK_ERROR(SetVolume); } } -void AudioBackendXAudio2::Source_PitchChanged(AudioSource* source) +void AudioBackendXAudio2::Source_PitchChanged(uint32 sourceID, float pitch) { - auto aSource = XAudio2::GetSource(source); + auto aSource = XAudio2::GetSource(sourceID); if (aSource) { - aSource->Pitch = source->GetPitch(); + aSource->Pitch = pitch; aSource->IsDirty = true; } } -void AudioBackendXAudio2::Source_PanChanged(AudioSource* source) +void AudioBackendXAudio2::Source_PanChanged(uint32 sourceID, float pan) { - auto aSource = XAudio2::GetSource(source); + auto aSource = XAudio2::GetSource(sourceID); if (aSource) { - aSource->Pan = source->GetPan(); + aSource->Pan = pan; aSource->IsDirty = true; } } -void AudioBackendXAudio2::Source_IsLoopingChanged(AudioSource* source) +void AudioBackendXAudio2::Source_IsLoopingChanged(uint32 sourceID, bool loop) { - auto aSource = XAudio2::GetSource(source); + ScopeLock lock(XAudio2::Locker); + auto aSource = XAudio2::GetSource(sourceID); if (!aSource || !aSource->Voice) return; + aSource->IsLoop = loop; // Skip if has no buffers (waiting for data or sth) XAUDIO2_VOICE_STATE state; @@ -382,15 +380,12 @@ void AudioBackendXAudio2::Source_IsLoopingChanged(AudioSource* source) if (state.BuffersQueued == 0) return; - // Looping is defined during buffer submission so reset source buffer (this is called only for non-streamable sources that ue single buffer) - - XAudio2::Locker.Lock(); - const uint32 bufferId = source->Clip->Buffers[0]; - XAudio2::Buffer* aBuffer = XAudio2::Buffers[bufferId - 1]; - XAudio2::Locker.Unlock(); + // Looping is defined during buffer submission so reset source buffer (this is called only for non-streamable sources that use a single buffer) + const uint32 bufferID = aSource->LastBufferID; + XAudio2::Buffer* aBuffer = XAudio2::Buffers[bufferID - 1]; HRESULT hr; - const bool isPlaying = source->IsActuallyPlayingSth(); + const bool isPlaying = aSource->IsPlaying; if (isPlaying) { hr = aSource->Voice->Stop(); @@ -406,7 +401,7 @@ void AudioBackendXAudio2::Source_IsLoopingChanged(AudioSource* source) XAUDIO2_BUFFER buffer = { 0 }; buffer.pContext = aBuffer; buffer.Flags = XAUDIO2_END_OF_STREAM; - if (source->GetIsLooping()) + if (loop) buffer.LoopCount = XAUDIO2_LOOP_INFINITE; // Restore play position @@ -415,7 +410,7 @@ void AudioBackendXAudio2::Source_IsLoopingChanged(AudioSource* source) buffer.PlayLength = totalSamples - buffer.PlayBegin; aSource->StartTimeForQueueBuffer = 0; - XAudio2::QueueBuffer(aSource, source, bufferId, buffer); + XAudio2::QueueBuffer(aSource, bufferID, buffer); if (isPlaying) { @@ -424,48 +419,22 @@ void AudioBackendXAudio2::Source_IsLoopingChanged(AudioSource* source) } } -void AudioBackendXAudio2::Source_SpatialSetupChanged(AudioSource* source) +void AudioBackendXAudio2::Source_SpatialSetupChanged(uint32 sourceID, bool spatial, float attenuation, float minDistance, float doppler) { - auto aSource = XAudio2::GetSource(source); + auto aSource = XAudio2::GetSource(sourceID); if (aSource) { - aSource->Is3D = source->Is3D(); - aSource->MinDistance = source->GetMinDistance(); - aSource->Attenuation = source->GetAttenuation(); - aSource->DopplerFactor = source->GetDopplerFactor(); + aSource->Is3D = spatial; + aSource->MinDistance = minDistance; + aSource->Attenuation = attenuation; + aSource->DopplerFactor = doppler; aSource->IsDirty = true; } } -void AudioBackendXAudio2::Source_ClipLoaded(AudioSource* source) +void AudioBackendXAudio2::Source_Play(uint32 sourceID) { - ScopeLock lock(XAudio2::Locker); - auto aSource = XAudio2::GetSource(source); - if (!aSource) - { - // Register source if clip was missing - Source_OnAdd(source); - } -} - -void AudioBackendXAudio2::Source_Cleanup(AudioSource* source) -{ - ScopeLock lock(XAudio2::Locker); - auto aSource = XAudio2::GetSource(source); - if (!aSource) - return; - - // Free source - if (aSource->Voice) - { - aSource->Voice->DestroyVoice(); - } - aSource->Init(); -} - -void AudioBackendXAudio2::Source_Play(AudioSource* source) -{ - auto aSource = XAudio2::GetSource(source); + auto aSource = XAudio2::GetSource(sourceID); if (aSource && aSource->Voice && !aSource->IsPlaying) { // Play @@ -475,9 +444,9 @@ void AudioBackendXAudio2::Source_Play(AudioSource* source) } } -void AudioBackendXAudio2::Source_Pause(AudioSource* source) +void AudioBackendXAudio2::Source_Pause(uint32 sourceID) { - auto aSource = XAudio2::GetSource(source); + auto aSource = XAudio2::GetSource(sourceID); if (aSource && aSource->Voice && aSource->IsPlaying) { // Pause @@ -487,9 +456,9 @@ void AudioBackendXAudio2::Source_Pause(AudioSource* source) } } -void AudioBackendXAudio2::Source_Stop(AudioSource* source) +void AudioBackendXAudio2::Source_Stop(uint32 sourceID) { - auto aSource = XAudio2::GetSource(source); + auto aSource = XAudio2::GetSource(sourceID); if (aSource && aSource->Voice) { aSource->StartTimeForQueueBuffer = 0.0f; @@ -509,9 +478,9 @@ void AudioBackendXAudio2::Source_Stop(AudioSource* source) } } -void AudioBackendXAudio2::Source_SetCurrentBufferTime(AudioSource* source, float value) +void AudioBackendXAudio2::Source_SetCurrentBufferTime(uint32 sourceID, float value) { - const auto aSource = XAudio2::GetSource(source); + const auto aSource = XAudio2::GetSource(sourceID); if (aSource) { // Store start time so next buffer submitted will start from here (assumes audio is stopped) @@ -519,60 +488,63 @@ void AudioBackendXAudio2::Source_SetCurrentBufferTime(AudioSource* source, float } } -float AudioBackendXAudio2::Source_GetCurrentBufferTime(const AudioSource* source) +float AudioBackendXAudio2::Source_GetCurrentBufferTime(uint32 sourceID) { float time = 0; - auto aSource = XAudio2::GetSource(source); + auto aSource = XAudio2::GetSource(sourceID); if (aSource) { - ASSERT(source->Clip && source->Clip->IsLoaded()); - const auto& clipInfo = source->Clip->AudioHeader.Info; + const auto& clipInfo = aSource->Info; XAUDIO2_VOICE_STATE state; aSource->Voice->GetState(&state); - const uint32 numChannels = clipInfo.NumChannels; - const uint32 totalSamples = clipInfo.NumSamples / numChannels; + const uint32 totalSamples = clipInfo.NumSamples / clipInfo.NumChannels; const uint32 sampleRate = clipInfo.SampleRate; // / clipInfo.NumChannels; - state.SamplesPlayed -= aSource->LastBufferStartSamplesPlayed % totalSamples; // Offset by the last buffer start to get time relative to its begin - time = aSource->LastBufferStartTime + (state.SamplesPlayed % totalSamples) / static_cast(Math::Max(1U, sampleRate)); + uint64 lastBufferStartSamplesPlayed = aSource->LastBufferStartSamplesPlayed; + if (totalSamples > 0) + lastBufferStartSamplesPlayed %= totalSamples; + state.SamplesPlayed -= lastBufferStartSamplesPlayed % totalSamples; // Offset by the last buffer start to get time relative to its begin + if (totalSamples > 0) + state.SamplesPlayed %= totalSamples; + time = aSource->LastBufferStartTime + state.SamplesPlayed / static_cast(Math::Max(1U, sampleRate)); } return time; } -void AudioBackendXAudio2::Source_SetNonStreamingBuffer(AudioSource* source) +void AudioBackendXAudio2::Source_SetNonStreamingBuffer(uint32 sourceID, uint32 bufferID) { - auto aSource = XAudio2::GetSource(source); + auto aSource = XAudio2::GetSource(sourceID); if (!aSource) return; + aSource->LastBufferID = bufferID; // Use for looping change XAudio2::Locker.Lock(); - const uint32 bufferId = source->Clip->Buffers[0]; - XAudio2::Buffer* aBuffer = XAudio2::Buffers[bufferId - 1]; + XAudio2::Buffer* aBuffer = XAudio2::Buffers[bufferID - 1]; XAudio2::Locker.Unlock(); XAUDIO2_BUFFER buffer = { 0 }; buffer.pContext = aBuffer; buffer.Flags = XAUDIO2_END_OF_STREAM; - if (source->GetIsLooping()) + if (aSource->IsLoop) buffer.LoopCount = XAUDIO2_LOOP_INFINITE; // Queue single buffer - XAudio2::QueueBuffer(aSource, source, bufferId, buffer); + XAudio2::QueueBuffer(aSource, bufferID, buffer); } -void AudioBackendXAudio2::Source_GetProcessedBuffersCount(AudioSource* source, int32& processedBuffersCount) +void AudioBackendXAudio2::Source_GetProcessedBuffersCount(uint32 sourceID, int32& processedBuffersCount) { processedBuffersCount = 0; - auto aSource = XAudio2::GetSource(source); + auto aSource = XAudio2::GetSource(sourceID); if (aSource && aSource->Voice) { processedBuffersCount = aSource->BuffersProcessed; } } -void AudioBackendXAudio2::Source_GetQueuedBuffersCount(AudioSource* source, int32& queuedBuffersCount) +void AudioBackendXAudio2::Source_GetQueuedBuffersCount(uint32 sourceID, int32& queuedBuffersCount) { queuedBuffersCount = 0; - auto aSource = XAudio2::GetSource(source); + auto aSource = XAudio2::GetSource(sourceID); if (aSource && aSource->Voice) { XAUDIO2_VOICE_STATE state; @@ -581,23 +553,24 @@ void AudioBackendXAudio2::Source_GetQueuedBuffersCount(AudioSource* source, int3 } } -void AudioBackendXAudio2::Source_QueueBuffer(AudioSource* source, uint32 bufferId) +void AudioBackendXAudio2::Source_QueueBuffer(uint32 sourceID, uint32 bufferID) { - auto aSource = XAudio2::GetSource(source); + auto aSource = XAudio2::GetSource(sourceID); if (!aSource) return; + aSource->LastBufferID = bufferID; // Use for looping change - XAudio2::Buffer* aBuffer = XAudio2::Buffers[bufferId - 1]; + XAudio2::Buffer* aBuffer = XAudio2::Buffers[bufferID - 1]; XAUDIO2_BUFFER buffer = { 0 }; buffer.pContext = aBuffer; - XAudio2::QueueBuffer(aSource, source, bufferId, buffer); + XAudio2::QueueBuffer(aSource, bufferID, buffer); } -void AudioBackendXAudio2::Source_DequeueProcessedBuffers(AudioSource* source) +void AudioBackendXAudio2::Source_DequeueProcessedBuffers(uint32 sourceID) { - auto aSource = XAudio2::GetSource(source); + auto aSource = XAudio2::GetSource(sourceID); if (aSource && aSource->Voice) { const HRESULT hr = aSource->Voice->FlushSourceBuffers(); @@ -608,7 +581,7 @@ void AudioBackendXAudio2::Source_DequeueProcessedBuffers(AudioSource* source) uint32 AudioBackendXAudio2::Buffer_Create() { - uint32 bufferId; + uint32 bufferID; ScopeLock lock(XAudio2::Locker); // Get first free buffer slot @@ -619,7 +592,7 @@ uint32 AudioBackendXAudio2::Buffer_Create() { aBuffer = New(); XAudio2::Buffers[i] = aBuffer; - bufferId = i + 1; + bufferID = i + 1; break; } } @@ -628,28 +601,28 @@ uint32 AudioBackendXAudio2::Buffer_Create() // Add new slot aBuffer = New(); XAudio2::Buffers.Add(aBuffer); - bufferId = XAudio2::Buffers.Count(); + bufferID = XAudio2::Buffers.Count(); } aBuffer->Data.Resize(0); - return bufferId; + return bufferID; } -void AudioBackendXAudio2::Buffer_Delete(uint32 bufferId) +void AudioBackendXAudio2::Buffer_Delete(uint32 bufferID) { ScopeLock lock(XAudio2::Locker); - XAudio2::Buffer*& aBuffer = XAudio2::Buffers[bufferId - 1]; + XAudio2::Buffer*& aBuffer = XAudio2::Buffers[bufferID - 1]; aBuffer->Data.Resize(0); Delete(aBuffer); aBuffer = nullptr; } -void AudioBackendXAudio2::Buffer_Write(uint32 bufferId, byte* samples, const AudioDataInfo& info) +void AudioBackendXAudio2::Buffer_Write(uint32 bufferID, byte* samples, const AudioDataInfo& info) { CHECK(info.NumChannels <= MAX_INPUT_CHANNELS); XAudio2::Locker.Lock(); - XAudio2::Buffer* aBuffer = XAudio2::Buffers[bufferId - 1]; + XAudio2::Buffer* aBuffer = XAudio2::Buffers[bufferID - 1]; XAudio2::Locker.Unlock(); const uint32 samplesLength = info.NumSamples * info.BitDepth / 8; @@ -735,7 +708,6 @@ bool AudioBackendXAudio2::Base_Init() void AudioBackendXAudio2::Base_Update() { // Update dirty voices - const auto listener = XAudio2::GetListener(); float outputMatrix[MAX_CHANNELS_MATRIX_SIZE]; for (int32 i = 0; i < XAudio2::Sources.Count(); i++) { @@ -743,7 +715,7 @@ void AudioBackendXAudio2::Base_Update() if (source.IsFree() || !(source.IsDirty || XAudio2::ForceDirty)) continue; - auto mix = AudioBackendTools::CalculateSoundMix(XAudio2::Settings, *listener, source, XAudio2::Channels); + auto mix = AudioBackendTools::CalculateSoundMix(XAudio2::Settings, XAudio2::Listener, source, XAudio2::Channels); mix.VolumeIntoChannels(); AudioBackendTools::MapChannels(source.Channels, XAudio2::Channels, mix.Channels, outputMatrix); diff --git a/Source/Engine/Audio/XAudio2/AudioBackendXAudio2.h b/Source/Engine/Audio/XAudio2/AudioBackendXAudio2.h index 073dbfdc3..56c79bb69 100644 --- a/Source/Engine/Audio/XAudio2/AudioBackendXAudio2.h +++ b/Source/Engine/Audio/XAudio2/AudioBackendXAudio2.h @@ -17,30 +17,28 @@ public: void Listener_VelocityChanged(const Vector3& velocity) override; void Listener_TransformChanged(const Vector3& position, const Quaternion& orientation) override; void Listener_ReinitializeAll() override; - void Source_OnAdd(AudioSource* source) override; - void Source_OnRemove(AudioSource* source) override; - void Source_VelocityChanged(AudioSource* source) override; - void Source_TransformChanged(AudioSource* source) override; - void Source_VolumeChanged(AudioSource* source) override; - void Source_PitchChanged(AudioSource* source) override; - void Source_PanChanged(AudioSource* source) override; - void Source_IsLoopingChanged(AudioSource* source) override; - void Source_SpatialSetupChanged(AudioSource* source) override; - void Source_ClipLoaded(AudioSource* source) override; - void Source_Cleanup(AudioSource* source) override; - void Source_Play(AudioSource* source) override; - void Source_Pause(AudioSource* source) override; - void Source_Stop(AudioSource* source) override; - void Source_SetCurrentBufferTime(AudioSource* source, float value) override; - float Source_GetCurrentBufferTime(const AudioSource* source) override; - void Source_SetNonStreamingBuffer(AudioSource* source) override; - void Source_GetProcessedBuffersCount(AudioSource* source, int32& processedBuffersCount) override; - void Source_GetQueuedBuffersCount(AudioSource* source, int32& queuedBuffersCount) override; - void Source_QueueBuffer(AudioSource* source, uint32 bufferId) override; - void Source_DequeueProcessedBuffers(AudioSource* source) override; + uint32 Source_Add(const AudioDataInfo& format, const Vector3& position, const Quaternion& orientation, float volume, float pitch, float pan, bool loop, bool spatial, float attenuation, float minDistance, float doppler) override; + void Source_Remove(uint32 sourceID) override; + void Source_VelocityChanged(uint32 sourceID, const Vector3& velocity) override; + void Source_TransformChanged(uint32 sourceID, const Vector3& position, const Quaternion& orientation) override; + void Source_VolumeChanged(uint32 sourceID, float volume) override; + void Source_PitchChanged(uint32 sourceID, float pitch) override; + void Source_PanChanged(uint32 sourceID, float pan) override; + void Source_IsLoopingChanged(uint32 sourceID, bool loop) override; + void Source_SpatialSetupChanged(uint32 sourceID, bool spatial, float attenuation, float minDistance, float doppler) override; + void Source_Play(uint32 sourceID) override; + void Source_Pause(uint32 sourceID) override; + void Source_Stop(uint32 sourceID) override; + void Source_SetCurrentBufferTime(uint32 sourceID, float value) override; + float Source_GetCurrentBufferTime(uint32 sourceID) override; + void Source_SetNonStreamingBuffer(uint32 sourceID, uint32 bufferID) override; + void Source_GetProcessedBuffersCount(uint32 sourceID, int32& processedBuffersCount) override; + void Source_GetQueuedBuffersCount(uint32 sourceID, int32& queuedBuffersCount) override; + void Source_QueueBuffer(uint32 sourceID, uint32 bufferID) override; + void Source_DequeueProcessedBuffers(uint32 sourceID) override; uint32 Buffer_Create() override; - void Buffer_Delete(uint32 bufferId) override; - void Buffer_Write(uint32 bufferId, byte* samples, const AudioDataInfo& info) override; + void Buffer_Delete(uint32 bufferID) override; + void Buffer_Write(uint32 bufferID, byte* samples, const AudioDataInfo& info) override; const Char* Base_Name() override; FeatureFlags Base_Features() override; void Base_OnActiveDeviceChanged() override; From 4b8970f67400af1b53fa891a8e1bde282ed9bcb8 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 6 May 2024 10:41:07 +0200 Subject: [PATCH 052/292] Update assets --- Content/Editor/Camera/M_Camera.flax | 4 ++-- Content/Editor/CubeTexturePreviewMaterial.flax | 4 ++-- Content/Editor/DebugMaterials/DDGIDebugProbes.flax | 4 ++-- Content/Editor/DebugMaterials/SingleColor/Decal.flax | 2 +- Content/Editor/DebugMaterials/SingleColor/Particle.flax | 2 +- Content/Editor/DebugMaterials/SingleColor/Surface.flax | 4 ++-- .../Editor/DebugMaterials/SingleColor/SurfaceAdditive.flax | 4 ++-- Content/Editor/DebugMaterials/SingleColor/Terrain.flax | 2 +- Content/Editor/DefaultFontMaterial.flax | 4 ++-- Content/Editor/Gizmo/FoliageBrushMaterial.flax | 4 ++-- Content/Editor/Gizmo/Material.flax | 4 ++-- Content/Editor/Gizmo/MaterialWire.flax | 4 ++-- Content/Editor/Gizmo/SelectionOutlineMaterial.flax | 2 +- Content/Editor/Gizmo/VertexColorsPreviewMaterial.flax | 4 ++-- Content/Editor/Highlight Material.flax | 4 ++-- Content/Editor/Icons/IconsMaterial.flax | 4 ++-- Content/Editor/IesProfilePreviewMaterial.flax | 2 +- Content/Editor/Particles/Particle Material Color.flax | 2 +- Content/Editor/Particles/Smoke Material.flax | 2 +- Content/Editor/SpriteMaterial.flax | 4 ++-- Content/Editor/Terrain/Circle Brush Material.flax | 2 +- Content/Editor/Terrain/Highlight Terrain Material.flax | 2 +- Content/Editor/TexturePreviewMaterial.flax | 2 +- Content/Editor/Wires Debug Material.flax | 4 ++-- Content/Engine/DefaultDeformableMaterial.flax | 2 +- Content/Engine/DefaultMaterial.flax | 4 ++-- Content/Engine/DefaultTerrainMaterial.flax | 2 +- Content/Engine/SingleColorMaterial.flax | 4 ++-- Content/Engine/SkyboxMaterial.flax | 4 ++-- Content/Shaders/Quad.flax | 4 ++-- 30 files changed, 48 insertions(+), 48 deletions(-) diff --git a/Content/Editor/Camera/M_Camera.flax b/Content/Editor/Camera/M_Camera.flax index e5c62741b..7a1d035cc 100644 --- a/Content/Editor/Camera/M_Camera.flax +++ b/Content/Editor/Camera/M_Camera.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:96c714dacb46f6e26f5a50b61bc1cf774422cea8b87eefb971e50ea4040b215c -size 30159 +oid sha256:ec04224ecf3c58c749c902f0ca5419c54896ac17ca48de1e9075afbca23e6a6e +size 30521 diff --git a/Content/Editor/CubeTexturePreviewMaterial.flax b/Content/Editor/CubeTexturePreviewMaterial.flax index 719f0f126..c5cda3152 100644 --- a/Content/Editor/CubeTexturePreviewMaterial.flax +++ b/Content/Editor/CubeTexturePreviewMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0854958ac0b11cb0af8641828ecaf7dbe82b5dd21d592d9211d87074c83fba39 -size 31874 +oid sha256:80a2b852e2d85fa098e1e402669254b11430acb3ad4a235633582c6ba2ac0914 +size 32236 diff --git a/Content/Editor/DebugMaterials/DDGIDebugProbes.flax b/Content/Editor/DebugMaterials/DDGIDebugProbes.flax index 41a7b07df..24f908bbc 100644 --- a/Content/Editor/DebugMaterials/DDGIDebugProbes.flax +++ b/Content/Editor/DebugMaterials/DDGIDebugProbes.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2f1bb84b9ac03d93f2144e44d76f6964290368438ec4e4a4cdf6f42f0698f8f2 -size 41107 +oid sha256:7972dcbec033481c0ace42eb0dff24154f0cdb32b4d039200b54d8be932b9806 +size 41469 diff --git a/Content/Editor/DebugMaterials/SingleColor/Decal.flax b/Content/Editor/DebugMaterials/SingleColor/Decal.flax index 9fdcca698..fd259c950 100644 --- a/Content/Editor/DebugMaterials/SingleColor/Decal.flax +++ b/Content/Editor/DebugMaterials/SingleColor/Decal.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4d4d4ca84c767ba0bb1b54f08058820dc08e0070109e758a31069426ba38cbe5 +oid sha256:1e4cd9ca7d54553b805d87d1908d780205b28b52ae6e00dcf70a5aa27d2236be size 7489 diff --git a/Content/Editor/DebugMaterials/SingleColor/Particle.flax b/Content/Editor/DebugMaterials/SingleColor/Particle.flax index e25dfafa3..55d544e59 100644 --- a/Content/Editor/DebugMaterials/SingleColor/Particle.flax +++ b/Content/Editor/DebugMaterials/SingleColor/Particle.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:22279958c1a48fc8ffeff65399b48a2370bcbd89218ba5338f0f1a6fbf837800 +oid sha256:6644f415267c6ccfd4277ef633b165cf21d8081833f8ccd456e8c8ae7defe33e size 31681 diff --git a/Content/Editor/DebugMaterials/SingleColor/Surface.flax b/Content/Editor/DebugMaterials/SingleColor/Surface.flax index 363a4bd97..1a290abf9 100644 --- a/Content/Editor/DebugMaterials/SingleColor/Surface.flax +++ b/Content/Editor/DebugMaterials/SingleColor/Surface.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d3c76781b1c31a1018710e212e0d0f578d4f683ca46f2f4b063fa631f9643651 -size 30055 +oid sha256:3cea6b86ff521ecd437c328888c3d8aa06782705fc5608ca5d1f94b5cb727dc5 +size 30417 diff --git a/Content/Editor/DebugMaterials/SingleColor/SurfaceAdditive.flax b/Content/Editor/DebugMaterials/SingleColor/SurfaceAdditive.flax index 93358deef..ced4d8700 100644 --- a/Content/Editor/DebugMaterials/SingleColor/SurfaceAdditive.flax +++ b/Content/Editor/DebugMaterials/SingleColor/SurfaceAdditive.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9d085a000fa4d2e16992e3dc6beafd68f7b1c31eeb683f5bb9f6a28fa2c469a8 -size 31772 +oid sha256:e9f1d68b6af48224eb2ba618b0770234c98cf902821e9aed9bb2a3308a66190d +size 32134 diff --git a/Content/Editor/DebugMaterials/SingleColor/Terrain.flax b/Content/Editor/DebugMaterials/SingleColor/Terrain.flax index b5940f5d0..95abeb2e7 100644 --- a/Content/Editor/DebugMaterials/SingleColor/Terrain.flax +++ b/Content/Editor/DebugMaterials/SingleColor/Terrain.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4be04d090787f83bd2fe2d6fe932fe3b308831d51aa16bbda832779a87249431 +oid sha256:a044ea32b8bbb570481e4913ab479378805b21d5e7cf215525820041ca9778e4 size 20826 diff --git a/Content/Editor/DefaultFontMaterial.flax b/Content/Editor/DefaultFontMaterial.flax index 4535445cd..dd7dd4187 100644 --- a/Content/Editor/DefaultFontMaterial.flax +++ b/Content/Editor/DefaultFontMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:61f4cc3f4d2076ecac4f51a681428366b51eeec9d97b500c4f0e1dd4cd6b2536 -size 30234 +oid sha256:64a5880d4a39025682a5be5b24890d70c2adb6f0a7cb91c1f605003cef7b826c +size 30596 diff --git a/Content/Editor/Gizmo/FoliageBrushMaterial.flax b/Content/Editor/Gizmo/FoliageBrushMaterial.flax index 3ce357477..c7b992e69 100644 --- a/Content/Editor/Gizmo/FoliageBrushMaterial.flax +++ b/Content/Editor/Gizmo/FoliageBrushMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2cfd83a91f02c66e7fc3e566a4628d7888b146a14ee7536ad4555c9d2c78c254 -size 37799 +oid sha256:a7bde5eabf7c13d54ca3728c41aab08e77d47b06c5a8e855c7c1b1b7f6ef1649 +size 38161 diff --git a/Content/Editor/Gizmo/Material.flax b/Content/Editor/Gizmo/Material.flax index 6af56e65e..37330d22d 100644 --- a/Content/Editor/Gizmo/Material.flax +++ b/Content/Editor/Gizmo/Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:15b45d14202358b368e33274961c9970663a90597ad7c5b4f37f31d925f0d5f1 -size 32376 +oid sha256:a8824519ee184e8ec041f2e3898cb0cbe0a5771e1bc87945fb12426231070bbd +size 32738 diff --git a/Content/Editor/Gizmo/MaterialWire.flax b/Content/Editor/Gizmo/MaterialWire.flax index d56bd8597..d459fefb7 100644 --- a/Content/Editor/Gizmo/MaterialWire.flax +++ b/Content/Editor/Gizmo/MaterialWire.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5572a4037617c2e88bea953c0985e2f0d90a4df3a1a5c02543876c972d118988 -size 31514 +oid sha256:729f0d8845e8fb3feac4cb85ba0eded1da5dbe8b9727d69f94f06a8f7181399b +size 31876 diff --git a/Content/Editor/Gizmo/SelectionOutlineMaterial.flax b/Content/Editor/Gizmo/SelectionOutlineMaterial.flax index 159fa8933..14cafb11c 100644 --- a/Content/Editor/Gizmo/SelectionOutlineMaterial.flax +++ b/Content/Editor/Gizmo/SelectionOutlineMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6e6f1350af69d3d6d0b36b033dd35c22dccd484744cc187961769e2ec6df807b +oid sha256:be0bbc49bc0ff36a7043c84e2c046fccc9e9e07fb82557f64df2155c731ad735 size 16166 diff --git a/Content/Editor/Gizmo/VertexColorsPreviewMaterial.flax b/Content/Editor/Gizmo/VertexColorsPreviewMaterial.flax index 76fb8804e..210f95d9c 100644 --- a/Content/Editor/Gizmo/VertexColorsPreviewMaterial.flax +++ b/Content/Editor/Gizmo/VertexColorsPreviewMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f59c3bf09d03d65ac2a165948830627b8be532a41e049f1a56f49e7f578bb352 -size 31168 +oid sha256:4ff9de800c9936d91b929d702e132f84260f1f4d852927745b46f5b6470b484f +size 31530 diff --git a/Content/Editor/Highlight Material.flax b/Content/Editor/Highlight Material.flax index 8b715b86e..0119b246b 100644 --- a/Content/Editor/Highlight Material.flax +++ b/Content/Editor/Highlight Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:95d9c7d46086c09963581d752d595ad99464d4db80e6769de0a574c30e6ee0e8 -size 30161 +oid sha256:b00eb326111e276edc04f82ac55e79aaa763b1db6335582cc2b626d02fc7c86f +size 30523 diff --git a/Content/Editor/Icons/IconsMaterial.flax b/Content/Editor/Icons/IconsMaterial.flax index 713c85bb4..2e26c8088 100644 --- a/Content/Editor/Icons/IconsMaterial.flax +++ b/Content/Editor/Icons/IconsMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:aa7e2cf4c9a7d0dc8f7d6465ca221468c689b5dbd0399e9b0150a9785286701b -size 30089 +oid sha256:557850506c6354ec27dde4cb747303ae413c4cda607675936d78fb04691abfb7 +size 30451 diff --git a/Content/Editor/IesProfilePreviewMaterial.flax b/Content/Editor/IesProfilePreviewMaterial.flax index d17183c07..ef0b72c07 100644 --- a/Content/Editor/IesProfilePreviewMaterial.flax +++ b/Content/Editor/IesProfilePreviewMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ade8f09bce5c137cb9cda74b8bef44eae8b912413dfa1cc32306ecf720fefc6e +oid sha256:6886c10b0e81d55e2c355bb67fc5454d9528ffe3e5b104aa58607e89708f31ba size 18205 diff --git a/Content/Editor/Particles/Particle Material Color.flax b/Content/Editor/Particles/Particle Material Color.flax index 91dcb336c..3d3b6733a 100644 --- a/Content/Editor/Particles/Particle Material Color.flax +++ b/Content/Editor/Particles/Particle Material Color.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0e323ea10f886fe27064d827e2fabf3ce6cbc2a176080326bd0e03f271d9098c +oid sha256:9364ba9a7dd268b15d94caca9b05eb9552ed3113542b9bd4ec6f556d982c6c81 size 29912 diff --git a/Content/Editor/Particles/Smoke Material.flax b/Content/Editor/Particles/Smoke Material.flax index 621ebb304..1175c0659 100644 --- a/Content/Editor/Particles/Smoke Material.flax +++ b/Content/Editor/Particles/Smoke Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e72b5cbd7ca6b968986ac4e40a3a031ca5bf23d7e59d5b9369fcb6a3a481e657 +oid sha256:efb898179f3ce9e2ba6ab7a24194777fd0849003e534e7fd5d0c50447a1608b3 size 37107 diff --git a/Content/Editor/SpriteMaterial.flax b/Content/Editor/SpriteMaterial.flax index c73a6d096..c4dd0be82 100644 --- a/Content/Editor/SpriteMaterial.flax +++ b/Content/Editor/SpriteMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c4ae3d583979e95557ebe303ec447e4d4808704b86ec39fa9f7f6692bc8502be -size 31247 +oid sha256:69e13c6df67c185083c69367977ba37b274eaef9d08d3202f036a01d313b670f +size 31609 diff --git a/Content/Editor/Terrain/Circle Brush Material.flax b/Content/Editor/Terrain/Circle Brush Material.flax index c26badef3..0a49faf4d 100644 --- a/Content/Editor/Terrain/Circle Brush Material.flax +++ b/Content/Editor/Terrain/Circle Brush Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ecac8c9ea886e3196312daa5cd9b7302640f8aaafbd7867db4045d3ad7e4a33a +oid sha256:03e69fdfba63e931c034c46f28e882d5bd03318264110974975521bcdaf5bb80 size 27498 diff --git a/Content/Editor/Terrain/Highlight Terrain Material.flax b/Content/Editor/Terrain/Highlight Terrain Material.flax index cc5b03f54..58225f4df 100644 --- a/Content/Editor/Terrain/Highlight Terrain Material.flax +++ b/Content/Editor/Terrain/Highlight Terrain Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:65f5766f9f820cf73d91281f71b81b6a1e87d3d2f05941014815b0f172f57cd6 +oid sha256:93e4b8b33d344ebd56762e6ccb75eb66a95635caeabd4465f53a273de7becebd size 20879 diff --git a/Content/Editor/TexturePreviewMaterial.flax b/Content/Editor/TexturePreviewMaterial.flax index c2f9e30e7..8f2a99bba 100644 --- a/Content/Editor/TexturePreviewMaterial.flax +++ b/Content/Editor/TexturePreviewMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ec352914d63c8a2df600f4232f54f8c1b603dfbaba2e7b3a409bed1db8075393 +oid sha256:79e0a71beb547e03deb7ef2081e3f25d7ab10109ef64cff5882e1f6240603f17 size 10570 diff --git a/Content/Editor/Wires Debug Material.flax b/Content/Editor/Wires Debug Material.flax index 1112b3e6a..e733d25be 100644 --- a/Content/Editor/Wires Debug Material.flax +++ b/Content/Editor/Wires Debug Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1508dde475cb3119373ef35798445ddbe3d8889c4a4cc9f1a2a475ef3fc3530d -size 30161 +oid sha256:d05de5d7645b414807673b006a93805192c07a50a50a6be365fe522ea4f69304 +size 30523 diff --git a/Content/Engine/DefaultDeformableMaterial.flax b/Content/Engine/DefaultDeformableMaterial.flax index a14bb5b04..056de43fe 100644 --- a/Content/Engine/DefaultDeformableMaterial.flax +++ b/Content/Engine/DefaultDeformableMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e2a822ec9b556e3e9083067b45aea2c41d5ad52843437c1b675a3b41029001f4 +oid sha256:a6598cfc4b823c593c4d7bb803a0919e3bd1d0da39f251e17e099afc67686573 size 18524 diff --git a/Content/Engine/DefaultMaterial.flax b/Content/Engine/DefaultMaterial.flax index 744686ab8..8e9b8a19d 100644 --- a/Content/Engine/DefaultMaterial.flax +++ b/Content/Engine/DefaultMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:27327918e9de3efed1709d8631d83e50512379b87d3085536c0102a476bfbf3d -size 32080 +oid sha256:316320b701074fbc005840741365da69f3fbe0b4e3035b301ed2c1b9178ee940 +size 32442 diff --git a/Content/Engine/DefaultTerrainMaterial.flax b/Content/Engine/DefaultTerrainMaterial.flax index bb8ba3384..910a9de77 100644 --- a/Content/Engine/DefaultTerrainMaterial.flax +++ b/Content/Engine/DefaultTerrainMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:352410f2c8ee23a040618101b3f023b82d204fafe690aa83b5991ba95096a46e +oid sha256:39b75d36a61f2060d556271a60ccd6426011797ae91fce109dce4ac360cf7e64 size 22963 diff --git a/Content/Engine/SingleColorMaterial.flax b/Content/Engine/SingleColorMaterial.flax index 503871025..4f09e5897 100644 --- a/Content/Engine/SingleColorMaterial.flax +++ b/Content/Engine/SingleColorMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1172d0f1d41899aa585d3a2ff1d7ef55ba50f9e42692f0eb648dad4643aea0a2 -size 30256 +oid sha256:64f1141ed962a6582a320e5fa7e83305d7271593f80cf760dfb8a03b957152d2 +size 30618 diff --git a/Content/Engine/SkyboxMaterial.flax b/Content/Engine/SkyboxMaterial.flax index 45b32d9ac..0af37967f 100644 --- a/Content/Engine/SkyboxMaterial.flax +++ b/Content/Engine/SkyboxMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d021de8170348e1b7fb73106fd244db949382bf9433d2cfcac9283145d35d6cd -size 31454 +oid sha256:b5aa76eac8fdb989a2f1821b0ee815ec13dc75cda64e0950c3885a00b7bb713b +size 31816 diff --git a/Content/Shaders/Quad.flax b/Content/Shaders/Quad.flax index 1d72311b3..65ce310fe 100644 --- a/Content/Shaders/Quad.flax +++ b/Content/Shaders/Quad.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:baeb87ed9d4228f410a7e1228e16d13589e56aa34cbba4233c53a147442aa395 -size 2647 +oid sha256:94f68ef9c2d7bc7d453fdd432f85f4643ae3be1bcf839bc1d5d81803d6b2ce7c +size 3505 From deb2319190e1b1b857c2037212b455171e27bd23 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 8 May 2024 10:19:08 +0200 Subject: [PATCH 053/292] Add audio playback support to video player --- Source/Engine/Video/MF/VideoBackendMF.cpp | 68 ++++++++++++++++---- Source/Engine/Video/Types.h | 10 ++- Source/Engine/Video/Video.cpp | 76 ++++++++++++++++++++--- 3 files changed, 131 insertions(+), 23 deletions(-) diff --git a/Source/Engine/Video/MF/VideoBackendMF.cpp b/Source/Engine/Video/MF/VideoBackendMF.cpp index 6396c7379..6f0225041 100644 --- a/Source/Engine/Video/MF/VideoBackendMF.cpp +++ b/Source/Engine/Video/MF/VideoBackendMF.cpp @@ -162,22 +162,58 @@ namespace bool ReadStream(VideoBackendPlayer& player, VideoPlayerMF& playerMF, DWORD streamIndex, TimeSpan dt) { + PROFILE_CPU_NAMED("ReadStream"); + ZoneText(player.DebugUrl, player.DebugUrlLen); const bool isVideo = streamIndex == MF_SOURCE_READER_FIRST_VIDEO_STREAM; const bool isAudio = streamIndex == MF_SOURCE_READER_FIRST_AUDIO_STREAM; - const TimeSpan lastFrameTime = isVideo ? player.VideoFrameTime : player.AudioBufferTime; - const TimeSpan lastFrameDuration = isVideo ? player.VideoFrameDuration : player.AudioBufferDuration; - - // Check if the current frame is valid (eg. when playing 24fps video at 60fps) - if (lastFrameDuration.Ticks > 0 && - Math::IsInRange(playerMF.Time, lastFrameTime, lastFrameTime + lastFrameDuration)) + int32 goodSamples = 1; + TimeSpan validTimeRangeStart(0), validTimeRangeEnd(0); + if (isAudio) { - return false; + constexpr int32 AudioFramesQueue = 10; // How many frames to read into the audio buffers queue in advance (to improve audio playback smoothness) + if (player.AudioBufferDuration.Ticks == 0) + { + // Read more samples for audio to enqueue multiple audio buffers for smoother playback + goodSamples = AudioFramesQueue; + } + else + { + // Skip reading if the last sample was already over this range (we've got enough in a queue) + validTimeRangeStart = player.AudioBufferTime - player.AudioBufferDuration * AudioFramesQueue; + validTimeRangeEnd = validTimeRangeStart + player.AudioBufferDuration; + if (Math::IsInRange(playerMF.Time, validTimeRangeStart, validTimeRangeEnd)) + { + return false; + } + + // Allow to read future samples within queue range + validTimeRangeStart = player.AudioBufferTime - player.AudioBufferDuration; + validTimeRangeEnd = player.AudioBufferTime + player.AudioBufferDuration * AudioFramesQueue; + + // Read more samples to keep queue at capacity + TimeSpan targetQueueEnd = playerMF.Time + player.AudioBufferDuration * AudioFramesQueue; + TimeSpan activeBufferEnd = player.AudioBufferTime + player.AudioBufferDuration; + TimeSpan missingQueueDuration = targetQueueEnd - activeBufferEnd; + goodSamples = (int32)Math::DivideAndRoundUp(missingQueueDuration.Ticks, player.AudioBufferDuration.Ticks); + if (goodSamples < 1) + goodSamples = 1; + } + } + else if (isVideo) + { + // Check if the current frame is valid (eg. when playing 24fps video at 60fps) + if (player.VideoFrameDuration.Ticks > 0 && + Math::IsInRange(playerMF.Time, player.VideoFrameTime, player.VideoFrameTime + player.VideoFrameDuration)) + { + return false; + } } // Read samples until frame is matching the current time int32 samplesLeft = 500; + int32 goodSamplesLeft = goodSamples; HRESULT hr; - for (; samplesLeft > 0; samplesLeft--) + for (; samplesLeft > 0 && goodSamplesLeft > 0; samplesLeft--) { // Read sample DWORD flags = 0; @@ -199,7 +235,11 @@ namespace franeDuration.Ticks = sampleDuration; } //const int32 framesToTime = (playerMF.Time.Ticks - frameTime.Ticks) / franeDuration.Ticks; - const bool isGoodSample = Math::IsInRange(playerMF.Time, frameTime, frameTime + franeDuration); + bool isGoodSample = goodSamples != goodSamplesLeft; // If we've reached good frame, then use following frames too + if (validTimeRangeStart.Ticks != 0) + isGoodSample |= Math::IsInRange(frameTime, validTimeRangeStart, validTimeRangeEnd); // Ensure frame hits the valid range + else + isGoodSample |= Math::IsInRange(playerMF.Time, frameTime, frameTime + franeDuration); // Ensure current time hits this frame range // Process sample if (sample && isGoodSample) @@ -288,6 +328,8 @@ namespace } if (sample) sample->Release(); + if (isGoodSample) + goodSamplesLeft--; if (flags & MF_SOURCE_READERF_ENDOFSTREAM) { @@ -299,10 +341,6 @@ namespace // Format/metadata might have changed so update the stream Configure(player, playerMF, streamIndex); } - - // End loop if got good sample or need to seek back - if (isGoodSample) - break; } // True if run out of samples and failed to get frame for the current time @@ -354,6 +392,7 @@ bool VideoBackendMF::Player_Create(const VideoBackendPlayerInfo& info, VideoBack player.Backend = this; playerMF.Loop = info.Loop; playerMF.FirstFrame = 1; + player.Created(info); Players.Add(&player); return false; @@ -493,6 +532,9 @@ void VideoBackendMF::Base_Update() SEEK_START: if (playerMF.Seek) { + // Reset cached frames timings + player.VideoFrameDuration = player.AudioBufferDuration = TimeSpan::Zero(); + seeks++; playerMF.Seek = 0; PROPVARIANT var; diff --git a/Source/Engine/Video/Types.h b/Source/Engine/Video/Types.h index 40ccb8e0f..20b065493 100644 --- a/Source/Engine/Video/Types.h +++ b/Source/Engine/Video/Types.h @@ -26,6 +26,11 @@ struct VideoBackendPlayer VideoBackend* Backend; GPUTexture* Frame; GPUBuffer* FrameUpload; + class GPUUploadVideoFrameTask* UploadVideoFrameTask; +#ifdef TRACY_ENABLE + Char* DebugUrl; + int32 DebugUrlLen; +#endif int32 Width, Height, AvgVideoBitRate, FramesCount; int32 VideoFrameWidth, VideoFrameHeight; PixelFormat Format; @@ -35,9 +40,9 @@ struct VideoBackendPlayer TimeSpan AudioBufferTime, AudioBufferDuration; AudioDataInfo AudioInfo; BytesContainer VideoFrameMemory; - uint32 AudioBuffer; uint32 AudioSource; - class GPUUploadVideoFrameTask* UploadVideoFrameTask; + uint32 NextAudioBuffer; + uint32 AudioBuffers[30]; uintptr BackendState[8]; VideoBackendPlayer() @@ -61,6 +66,7 @@ struct VideoBackendPlayer return *(const T*)BackendState; } + void Created(const VideoBackendPlayerInfo& info); void InitVideoFrame(); void UpdateVideoFrame(Span data, TimeSpan time, TimeSpan duration); void UpdateAudioBuffer(Span data, TimeSpan time, TimeSpan duration); diff --git a/Source/Engine/Video/Video.cpp b/Source/Engine/Video/Video.cpp index 814713c3b..185b96739 100644 --- a/Source/Engine/Video/Video.cpp +++ b/Source/Engine/Video/Video.cpp @@ -4,6 +4,7 @@ #include "VideoBackend.h" #include "Engine/Audio/AudioBackend.h" #include "Engine/Core/Log.h" +#include "Engine/Core/Math/Quaternion.h" #include "Engine/Profiler/ProfilerCPU.h" #include "Engine/Engine/EngineService.h" #include "Engine/Graphics/GPUDevice.h" @@ -50,6 +51,8 @@ protected: GPUTexture* frame = _player->Frame; if (!frame->IsAllocated()) return Result::MissingResources; + PROFILE_CPU(); + ZoneText(_player->DebugUrl, _player->DebugUrlLen); if (PixelFormatExtensions::IsVideo(_player->Format)) { @@ -178,6 +181,15 @@ bool Video::CreatePlayerBackend(const VideoBackendPlayerInfo& info, VideoBackend return true; } +void VideoBackendPlayer::Created(const VideoBackendPlayerInfo& info) +{ +#ifdef TRACY_ENABLE + DebugUrlLen = info.Url.Length(); + DebugUrl = (Char*)Allocator::Allocate(DebugUrlLen * sizeof(Char) + 2); + Platform::MemoryCopy(DebugUrl, *info.Url, DebugUrlLen * 2 + 2); +#endif +} + void VideoBackendPlayer::InitVideoFrame() { if (!GPUDevice::Instance) @@ -189,6 +201,7 @@ void VideoBackendPlayer::InitVideoFrame() void VideoBackendPlayer::UpdateVideoFrame(Span data, TimeSpan time, TimeSpan duration) { PROFILE_CPU(); + ZoneText(DebugUrl, DebugUrlLen); VideoFrameTime = time; VideoFrameDuration = duration; if (!GPUDevice::Instance || GPUDevice::Instance->GetRendererType() == RendererType::Null) @@ -238,32 +251,79 @@ void VideoBackendPlayer::UpdateVideoFrame(Span data, TimeSpan time, TimeSp void VideoBackendPlayer::UpdateAudioBuffer(Span data, TimeSpan time, TimeSpan duration) { PROFILE_CPU(); + ZoneText(DebugUrl, DebugUrlLen); AudioBufferTime = time; AudioBufferDuration = duration; - auto start = time.GetTotalMilliseconds(); - auto dur = duration.GetTotalMilliseconds(); - auto end = (time + duration).GetTotalMilliseconds(); if (!AudioBackend::Instance) return; + // Setup audio source + bool newSource = AudioSource == 0; + if (newSource) + { + // TODO: spatial video player + // TODO: video player volume/pan control + AudioSource = AudioBackend::Source::Add(AudioInfo, Vector3::Zero, Quaternion::Identity, 1.0f, 1.0f, 0.0f, false, false, 1.0f, 1000.0f, 1.0f); + } + else + { + // Get the processed buffers count + int32 numProcessedBuffers = 0; + AudioBackend::Source::GetProcessedBuffersCount(AudioSource, numProcessedBuffers); + if (numProcessedBuffers > 0) + { + // Unbind processed buffers from the source + AudioBackend::Source::DequeueProcessedBuffers(AudioSource); + } + } + + // Get audio buffer + uint32 bufferId = AudioBuffers[NextAudioBuffer]; + if (bufferId == 0) + { + bufferId = AudioBackend::Buffer::Create(); + AudioBuffers[NextAudioBuffer] = bufferId; + } + NextAudioBuffer = (NextAudioBuffer + 1) % ARRAY_COUNT(AudioBuffers); + // Update audio buffer - if (!AudioBuffer) - AudioBuffer = AudioBackend::Buffer::Create(); AudioDataInfo dataInfo = AudioInfo; const uint32 samplesPerSecond = dataInfo.SampleRate * dataInfo.NumChannels; const uint32 maxSamplesInData = (uint32)data.Length() * 8 / dataInfo.BitDepth; const uint32 maxSamplesInDuration = (uint32)Math::CeilToInt(samplesPerSecond * duration.GetTotalSeconds()); dataInfo.NumSamples = Math::Min(maxSamplesInData, maxSamplesInDuration); - AudioBackend::Buffer::Write(AudioBuffer, data.Get(), dataInfo); + AudioBackend::Buffer::Write(bufferId, data.Get(), dataInfo); + + // Append audio buffer + AudioBackend::Source::QueueBuffer(AudioSource, bufferId); + if (newSource) + { + AudioBackend::Source::Play(AudioSource); + } } void VideoBackendPlayer::ReleaseResources() { - if (AudioBuffer) - AudioBackend::Buffer::Delete(AudioBuffer); + if (AudioSource) + { + AudioBackend::Source::Stop(AudioSource); + AudioBackend::Source::Remove(AudioSource); + AudioSource = 0; + } + for (uint32& bufferId : AudioBuffers) + { + if (bufferId) + { + AudioBackend::Buffer::Delete(bufferId); + bufferId = 0; + } + } if (UploadVideoFrameTask) UploadVideoFrameTask->Cancel(); VideoFrameMemory.Release(); SAFE_DELETE_GPU_RESOURCE(Frame); SAFE_DELETE_GPU_RESOURCE(FrameUpload); +#ifdef TRACY_ENABLE + Allocator::Free(DebugUrl); +#endif } From 47f3ecbde26ba8fd753ed71f50c19a33114f4dd8 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 8 May 2024 10:19:22 +0200 Subject: [PATCH 054/292] Fix video player shutdown on editor window close --- Source/Editor/Windows/Assets/VideoWindow.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Source/Editor/Windows/Assets/VideoWindow.cs b/Source/Editor/Windows/Assets/VideoWindow.cs index 7bd6c4adc..958608914 100644 --- a/Source/Editor/Windows/Assets/VideoWindow.cs +++ b/Source/Editor/Windows/Assets/VideoWindow.cs @@ -199,6 +199,8 @@ namespace FlaxEditor.Windows.Assets { if (IsDisposing) return; + _videoPlayer.Stop(); + Object.Destroy(ref _videoPlayer); _item.RemoveReference(this); _item = null; From a11fa46ee2f3c88f87fe8bf19a423ed2ecb575cd Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 8 May 2024 10:19:39 +0200 Subject: [PATCH 055/292] Add cpu profile events to GPU tasks executor queue --- Source/Engine/Graphics/Async/DefaultGPUTasksExecutor.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Source/Engine/Graphics/Async/DefaultGPUTasksExecutor.cpp b/Source/Engine/Graphics/Async/DefaultGPUTasksExecutor.cpp index ea090e3c9..97f180ca2 100644 --- a/Source/Engine/Graphics/Async/DefaultGPUTasksExecutor.cpp +++ b/Source/Engine/Graphics/Async/DefaultGPUTasksExecutor.cpp @@ -5,6 +5,7 @@ #include "GPUTask.h" #include "GPUTasksManager.h" #include "Engine/Graphics/GPUDevice.h" +#include "Engine/Profiler/ProfilerCPU.h" DefaultGPUTasksExecutor::DefaultGPUTasksExecutor() : _context(nullptr) @@ -18,6 +19,8 @@ String DefaultGPUTasksExecutor::ToString() const void DefaultGPUTasksExecutor::FrameBegin() { + PROFILE_CPU(); + // Ensure to have valid async context if (_context == nullptr) _context = createContext(); @@ -35,6 +38,7 @@ void DefaultGPUTasksExecutor::FrameBegin() void DefaultGPUTasksExecutor::FrameEnd() { + PROFILE_CPU(); ASSERT(_context != nullptr); _context->OnFrameEnd(); } From e51d2dda00da486e6784c90c0978ce0c4343b487 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 8 May 2024 10:20:04 +0200 Subject: [PATCH 056/292] Fix playing 6-channel audio with XAudio2 backend --- Source/Engine/Audio/AudioBackendTools.h | 14 ++------------ Source/Engine/Audio/OpenAL/AudioBackendOAL.cpp | 2 +- .../Engine/Audio/XAudio2/AudioBackendXAudio2.cpp | 8 ++++---- 3 files changed, 7 insertions(+), 17 deletions(-) diff --git a/Source/Engine/Audio/AudioBackendTools.h b/Source/Engine/Audio/AudioBackendTools.h index 8ef709144..ea3b7c10b 100644 --- a/Source/Engine/Audio/AudioBackendTools.h +++ b/Source/Engine/Audio/AudioBackendTools.h @@ -160,18 +160,8 @@ public: break; case 2: default: // TODO: implement multi-channel support (eg. 5.1, 7.1) - if (sourceChannels == 1) - { - outputMatrix[0] = channels[FrontLeft]; - outputMatrix[1] = channels[FrontRight]; - } - else if (sourceChannels == 2) - { - outputMatrix[0] = channels[FrontLeft]; - outputMatrix[1] = 0.0f; - outputMatrix[2] = 0.0f; - outputMatrix[3] = channels[FrontRight]; - } + outputMatrix[0] = channels[FrontLeft]; + outputMatrix[sourceChannels + 1] = channels[FrontRight]; break; } } diff --git a/Source/Engine/Audio/OpenAL/AudioBackendOAL.cpp b/Source/Engine/Audio/OpenAL/AudioBackendOAL.cpp index d29e3a31b..879061d61 100644 --- a/Source/Engine/Audio/OpenAL/AudioBackendOAL.cpp +++ b/Source/Engine/Audio/OpenAL/AudioBackendOAL.cpp @@ -498,7 +498,7 @@ void AudioBackendOAL::Buffer_Write(uint32 bufferID, byte* samples, const AudioDa } else { - LOG(Warning, "OpenAL doesn't support bit depth larger than 16. Your audio data will be truncated."); + LOG(Warning, "OpenAL doesn't support bit depth larger than 16. Audio data will be truncated."); const uint32 bufferSize = info.NumSamples * 2; byte* sampleBuffer16 = (byte*)Allocator::Allocate(bufferSize); AudioTool::ConvertBitDepth(samples, info.BitDepth, sampleBuffer16, 16, info.NumSamples); diff --git a/Source/Engine/Audio/XAudio2/AudioBackendXAudio2.cpp b/Source/Engine/Audio/XAudio2/AudioBackendXAudio2.cpp index 4c6f93947..b692da8f4 100644 --- a/Source/Engine/Audio/XAudio2/AudioBackendXAudio2.cpp +++ b/Source/Engine/Audio/XAudio2/AudioBackendXAudio2.cpp @@ -19,11 +19,9 @@ // Include XAudio library // Documentation: https://docs.microsoft.com/en-us/windows/desktop/xaudio2/xaudio2-apis-portal #include -//#include -//#include // TODO: implement multi-channel support (eg. 5.1, 7.1) -#define MAX_INPUT_CHANNELS 2 +#define MAX_INPUT_CHANNELS 6 #define MAX_OUTPUT_CHANNELS 2 #define MAX_CHANNELS_MATRIX_SIZE (MAX_INPUT_CHANNELS*MAX_OUTPUT_CHANNELS) #if ENABLE_ASSERTION @@ -151,6 +149,7 @@ namespace XAudio2 IXAudio2* Instance = nullptr; IXAudio2MasteringVoice* MasteringVoice = nullptr; int32 Channels; + DWORD ChannelMask; bool ForceDirty = true; AudioBackendTools::Settings Settings; Listener Listener; @@ -683,7 +682,7 @@ bool AudioBackendXAudio2::Base_Init() } XAUDIO2_VOICE_DETAILS details; XAudio2::MasteringVoice->GetVoiceDetails(&details); -#if 0 +#if MAX_OUTPUT_CHANNELS > 2 // TODO: implement multi-channel support (eg. 5.1, 7.1) XAudio2::Channels = details.InputChannels; hr = XAudio2::MasteringVoice->GetChannelMask(&XAudio2::ChannelMask); @@ -694,6 +693,7 @@ bool AudioBackendXAudio2::Base_Init() } #else XAudio2::Channels = 2; + XAudio2::ChannelMask = SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT; #endif LOG(Info, "XAudio2: {0} channels at {1} kHz", XAudio2::Channels, details.InputSampleRate / 1000.0f); From b91f51fb465137ad6615155ba2e9c7767a8b91f8 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 8 May 2024 12:35:18 +0200 Subject: [PATCH 057/292] Add async videos update --- Source/Engine/Video/MF/VideoBackendMF.cpp | 170 +++++++++++----------- Source/Engine/Video/MF/VideoBackendMF.h | 2 +- Source/Engine/Video/Video.cpp | 28 +++- Source/Engine/Video/Video.h | 1 + Source/Engine/Video/VideoBackend.h | 2 +- 5 files changed, 116 insertions(+), 87 deletions(-) diff --git a/Source/Engine/Video/MF/VideoBackendMF.cpp b/Source/Engine/Video/MF/VideoBackendMF.cpp index 6f0225041..207f9b72c 100644 --- a/Source/Engine/Video/MF/VideoBackendMF.cpp +++ b/Source/Engine/Video/MF/VideoBackendMF.cpp @@ -4,6 +4,7 @@ #include "VideoBackendMF.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Threading/TaskGraph.h" #include "Engine/Core/Log.h" #include "Engine/Engine/Time.h" #include "Engine/Audio/Types.h" @@ -26,7 +27,7 @@ struct VideoPlayerMF TimeSpan Time; }; -namespace +namespace MF { Array Players; @@ -346,6 +347,82 @@ namespace // True if run out of samples and failed to get frame for the current time return samplesLeft == 0; } + + void UpdatePlayer(int32 index) + { + PROFILE_CPU(); + auto& player = *Players[index]; + ZoneText(player.DebugUrl, player.DebugUrlLen); + auto& playerMF = player.GetBackendState(); + + // Skip paused player + if (!playerMF.Playing && !playerMF.Seek) + return; + + bool useTimeScale = true; +#if USE_EDITOR + if (!Editor::IsPlayMode) + useTimeScale = false; +#endif + TimeSpan dt = useTimeScale ? Time::Update.DeltaTime : Time::Update.UnscaledDeltaTime; + + // Update playback time + if (playerMF.FirstFrame) + { + playerMF.FirstFrame = 0; + playerMF.Seek = 1; + } + else if (playerMF.Playing) + { + playerMF.Time += dt; + } + if (playerMF.Time > player.Duration) + { + if (playerMF.Loop) + { + // Loop + playerMF.Time.Ticks %= player.Duration.Ticks; + playerMF.Seek = 1; + } + else + { + // End + playerMF.Time = player.Duration; + } + } + + // Update current position + SEEK_START: + if (playerMF.Seek) + { + // Reset cached frames timings + player.VideoFrameDuration = player.AudioBufferDuration = TimeSpan::Zero(); + + playerMF.Seek = 0; + PROPVARIANT var; + PropVariantInit(&var); + var.vt = VT_I8; + var.hVal.QuadPart = playerMF.Time.Ticks; + PROFILE_CPU_NAMED("SetCurrentPosition"); + playerMF.SourceReader->SetCurrentPosition(GUID_NULL, var); + + // Note: + // SetCurrentPosition method does not guarantee exact seeking. + // The accuracy of the seek depends on the media content. + // If the media content contains a video stream, the SetCurrentPosition method typically seeks to the nearest key frame before the desired position. + // After seeking, the application should call ReadSample and advance to the desired position. + } + + // Update streams + if (ReadStream(player, playerMF, MF_SOURCE_READER_FIRST_VIDEO_STREAM, dt)) + { + // Failed to pick a valid sample so try again with seeking + playerMF.Seek = 1; + goto SEEK_START; + } + if (player.AudioInfo.BitDepth != 0) + ReadStream(player, playerMF, MF_SOURCE_READER_FIRST_AUDIO_STREAM, dt); + } } bool VideoBackendMF::Player_Create(const VideoBackendPlayerInfo& info, VideoBackendPlayer& player) @@ -377,8 +454,8 @@ bool VideoBackendMF::Player_Create(const VideoBackendPlayerInfo& info, VideoBack playerMF.SourceReader = sourceReader; // Read media info - if (Configure(player, playerMF, MF_SOURCE_READER_FIRST_VIDEO_STREAM) || - Configure(player, playerMF, MF_SOURCE_READER_FIRST_AUDIO_STREAM)) + if (MF::Configure(player, playerMF, MF_SOURCE_READER_FIRST_VIDEO_STREAM) || + MF::Configure(player, playerMF, MF_SOURCE_READER_FIRST_AUDIO_STREAM)) return true; PROPVARIANT var; hr = sourceReader->GetPresentationAttribute(MF_SOURCE_READER_MEDIASOURCE, MF_PD_DURATION, &var); @@ -393,7 +470,7 @@ bool VideoBackendMF::Player_Create(const VideoBackendPlayerInfo& info, VideoBack playerMF.Loop = info.Loop; playerMF.FirstFrame = 1; player.Created(info); - Players.Add(&player); + MF::Players.Add(&player); return false; } @@ -404,7 +481,7 @@ void VideoBackendMF::Player_Destroy(VideoBackendPlayer& player) player.ReleaseResources(); auto& playerMF = player.GetBackendState(); playerMF.SourceReader->Release(); - Players.Remove(&player); + MF::Players.Remove(&player); player = VideoBackendPlayer(); } @@ -482,85 +559,12 @@ bool VideoBackendMF::Base_Init() return false; } -void VideoBackendMF::Base_Update() +void VideoBackendMF::Base_Update(TaskGraph* graph) { - PROFILE_CPU(); - // TODO: use async Task Graph to update videos - for (auto* e : Players) - { - auto& player = *e; - auto& playerMF = player.GetBackendState(); - - // Skip paused player - if (!playerMF.Playing && !playerMF.Seek) - continue; - - bool useTimeScale = true; -#if USE_EDITOR - if (!Editor::IsPlayMode) - useTimeScale = false; -#endif - TimeSpan dt = useTimeScale ? Time::Update.DeltaTime : Time::Update.UnscaledDeltaTime; - - // Update playback time - if (playerMF.FirstFrame) - { - playerMF.FirstFrame = 0; - playerMF.Seek = 1; - } - else if (playerMF.Playing) - { - playerMF.Time += dt; - } - if (playerMF.Time > player.Duration) - { - if (playerMF.Loop) - { - // Loop - playerMF.Time.Ticks %= player.Duration.Ticks; - playerMF.Seek = 1; - } - else - { - // End - playerMF.Time = player.Duration; - } - } - - // Update current position - int32 seeks = 0; - SEEK_START: - if (playerMF.Seek) - { - // Reset cached frames timings - player.VideoFrameDuration = player.AudioBufferDuration = TimeSpan::Zero(); - - seeks++; - playerMF.Seek = 0; - PROPVARIANT var; - PropVariantInit(&var); - var.vt = VT_I8; - var.hVal.QuadPart = playerMF.Time.Ticks; - PROFILE_CPU_NAMED("SetCurrentPosition"); - playerMF.SourceReader->SetCurrentPosition(GUID_NULL, var); - - // Note: - // SetCurrentPosition method does not guarantee exact seeking. - // The accuracy of the seek depends on the media content. - // If the media content contains a video stream, the SetCurrentPosition method typically seeks to the nearest key frame before the desired position. - // After seeking, the application should call ReadSample and advance to the desired position. - } - - // Update streams - if (ReadStream(player, playerMF, MF_SOURCE_READER_FIRST_VIDEO_STREAM, dt)) - { - // Failed to pick a valid sample so try again with seeking - playerMF.Seek = 1; - goto SEEK_START; - } - if (player.AudioInfo.BitDepth != 0) - ReadStream(player, playerMF, MF_SOURCE_READER_FIRST_AUDIO_STREAM, dt); - } + // Schedule work to update all videos models in async + Function job; + job.Bind(MF::UpdatePlayer); + graph->DispatchJob(job, MF::Players.Count()); } void VideoBackendMF::Base_Dispose() diff --git a/Source/Engine/Video/MF/VideoBackendMF.h b/Source/Engine/Video/MF/VideoBackendMF.h index 7b97008de..da3f6044e 100644 --- a/Source/Engine/Video/MF/VideoBackendMF.h +++ b/Source/Engine/Video/MF/VideoBackendMF.h @@ -23,7 +23,7 @@ public: TimeSpan Player_GetTime(const VideoBackendPlayer& player) override; const Char* Base_Name() override; bool Base_Init() override; - void Base_Update() override; + void Base_Update(TaskGraph* graph) override; void Base_Dispose() override; }; diff --git a/Source/Engine/Video/Video.cpp b/Source/Engine/Video/Video.cpp index 185b96739..2da3d2b4d 100644 --- a/Source/Engine/Video/Video.cpp +++ b/Source/Engine/Video/Video.cpp @@ -6,6 +6,7 @@ #include "Engine/Core/Log.h" #include "Engine/Core/Math/Quaternion.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Engine/Engine.h" #include "Engine/Engine/EngineService.h" #include "Engine/Graphics/GPUDevice.h" #include "Engine/Graphics/GPUBuffer.h" @@ -16,6 +17,7 @@ #include "Engine/Graphics/Shaders/GPUShader.h" #include "Engine/Graphics/Textures/GPUTexture.h" #include "Engine/Scripting/Enums.h" +#include "Engine/Threading/TaskGraph.h" #if VIDEO_API_MF #include "MF/VideoBackendMF.h" #endif @@ -108,6 +110,12 @@ protected: } }; +class VideoSystem : public TaskGraphSystem +{ +public: + void Execute(TaskGraph* graph) override; +}; + class VideoService : public EngineService { public: @@ -128,13 +136,15 @@ public: Backends[index] = backend; } + bool Init() override; void Update() override; void Dispose() override; }; VideoService VideoServiceInstance; +TaskGraphSystem* Video::System = nullptr; -void VideoService::Update() +void VideoSystem::Execute(TaskGraph* graph) { PROFILE_CPU_NAMED("Video.Update"); @@ -142,10 +152,22 @@ void VideoService::Update() for (VideoBackend*& backend : VideoServiceInstance.Backends) { if (backend) - backend->Base_Update(); + backend->Base_Update(graph); } } +bool VideoService::Init() +{ + Video::System = New(); + Engine::UpdateGraph->AddSystem(Video::System); + return false; +} + +void VideoService::Update() +{ + PROFILE_CPU_NAMED("Video.Update"); +} + void VideoService::Dispose() { PROFILE_CPU_NAMED("Video.Dispose"); @@ -159,6 +181,8 @@ void VideoService::Dispose() backend = nullptr; } } + + SAFE_DELETE(Video::System); } bool Video::CreatePlayerBackend(const VideoBackendPlayerInfo& info, VideoBackendPlayer& player) diff --git a/Source/Engine/Video/Video.h b/Source/Engine/Video/Video.h index 08c0edf8b..46c757015 100644 --- a/Source/Engine/Video/Video.h +++ b/Source/Engine/Video/Video.h @@ -10,5 +10,6 @@ class Video { public: + static class TaskGraphSystem* System; static bool CreatePlayerBackend(const VideoBackendPlayerInfo& info, VideoBackendPlayer& player); }; diff --git a/Source/Engine/Video/VideoBackend.h b/Source/Engine/Video/VideoBackend.h index 74d50affb..9829483b6 100644 --- a/Source/Engine/Video/VideoBackend.h +++ b/Source/Engine/Video/VideoBackend.h @@ -38,6 +38,6 @@ public: // Base virtual const Char* Base_Name() = 0; virtual bool Base_Init() = 0; - virtual void Base_Update() = 0; + virtual void Base_Update(class TaskGraph* graph) = 0; virtual void Base_Dispose() = 0; }; From 66b828ae9208767ea3e0bcf26bb387c7d3412af6 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 8 May 2024 15:54:37 +0200 Subject: [PATCH 058/292] Refactor `Asset::GetReferences` to support file path references --- .../Editor/Cooker/Steps/CollectAssetsStep.cpp | 57 ++++++++----------- .../Editor/Cooker/Steps/CollectAssetsStep.h | 8 --- Source/Engine/AI/BehaviorTree.cpp | 8 +-- Source/Engine/AI/BehaviorTree.h | 2 +- .../SceneAnimations/SceneAnimation.cpp | 6 +- .../SceneAnimations/SceneAnimation.h | 2 +- Source/Engine/Content/Asset.cpp | 11 +++- Source/Engine/Content/Asset.h | 11 ++-- .../Engine/Content/Assets/AnimationGraph.cpp | 8 +-- Source/Engine/Content/Assets/AnimationGraph.h | 2 +- Source/Engine/Content/Assets/MaterialBase.cpp | 10 ++++ Source/Engine/Content/Assets/MaterialBase.h | 10 +--- .../Content/Assets/MaterialInstance.cpp | 8 +-- .../Engine/Content/Assets/MaterialInstance.h | 2 +- Source/Engine/Content/Assets/Model.cpp | 8 +-- Source/Engine/Content/Assets/Model.h | 2 +- Source/Engine/Content/Assets/SkeletonMask.h | 8 +-- Source/Engine/Content/Assets/SkinnedModel.cpp | 8 +-- Source/Engine/Content/Assets/SkinnedModel.h | 2 +- Source/Engine/Content/Assets/VisualScript.h | 8 +-- Source/Engine/Content/JsonAsset.cpp | 15 ++--- Source/Engine/Content/JsonAsset.h | 6 +- .../Graphics/Materials/MaterialParams.cpp | 4 +- .../Graphics/Materials/MaterialParams.h | 6 +- Source/Engine/Level/Scene/Scene.cpp | 3 +- Source/Engine/Particles/ParticleEmitter.h | 8 +-- Source/Engine/Particles/ParticleSystem.cpp | 8 +-- Source/Engine/Particles/ParticleSystem.h | 17 +++--- Source/Engine/Visject/Graph.h | 8 +-- 29 files changed, 121 insertions(+), 135 deletions(-) diff --git a/Source/Editor/Cooker/Steps/CollectAssetsStep.cpp b/Source/Editor/Cooker/Steps/CollectAssetsStep.cpp index 960be87ad..4787339e6 100644 --- a/Source/Editor/Cooker/Steps/CollectAssetsStep.cpp +++ b/Source/Editor/Cooker/Steps/CollectAssetsStep.cpp @@ -10,47 +10,26 @@ #include "Engine/Content/Assets/Shader.h" #include "Engine/Content/Cache/AssetsCache.h" -bool CollectAssetsStep::Process(CookingData& data, Asset* asset) -{ - // Skip virtual/temporary assets - if (asset->IsVirtual()) - return false; - - // Keep reference to the asset - AssetReference ref(asset); - - // Asset should have loaded data - if (asset->WaitForLoaded()) - return false; - - // Gather asset references - _references.Clear(); - asset->Locker.Lock(); - asset->GetReferences(_references); - asset->Locker.Unlock(); - _assetsQueue.Add(_references); - - return false; -} - bool CollectAssetsStep::Perform(CookingData& data) { LOG(Info, "Searching for assets to include in a build. Using {0} root assets.", data.RootAssets.Count()); data.StepProgress(TEXT("Collecting assets"), 0); // Initialize assets queue - _assetsQueue.Clear(); - _assetsQueue.EnsureCapacity(1024); + Array assetsQueue; + assetsQueue.Clear(); + assetsQueue.EnsureCapacity(1024); for (auto i = data.RootAssets.Begin(); i.IsNotEnd(); ++i) - _assetsQueue.Add(i->Item); + assetsQueue.Add(i->Item); // Iterate through the assets graph AssetInfo assetInfo; - while (_assetsQueue.HasItems()) + Array references; + Array files; + while (assetsQueue.HasItems()) { BUILD_STEP_CANCEL_CHECK; - - const auto assetId = _assetsQueue.Dequeue(); + const auto assetId = assetsQueue.Dequeue(); // Skip already processed or invalid assets if (!assetId.IsValid() @@ -69,14 +48,26 @@ bool CollectAssetsStep::Perform(CookingData& data) } // Load asset - const auto asset = Content::LoadAsync(assetId); + AssetReference asset = Content::LoadAsync(assetId); if (asset == nullptr) continue; - - // Process that asset LOG_STR(Info, asset->GetPath()); data.Assets.Add(assetId); - Process(data, asset); + + // Skip virtual/temporary assets + if (asset->IsVirtual()) + continue; + + // Asset should have loaded data + if (asset->WaitForLoaded()) + continue; + + // Gather asset references + references.Clear(); + asset->Locker.Lock(); + asset->GetReferences(references, files); + asset->Locker.Unlock(); + assetsQueue.Add(references); } data.Stats.TotalAssets = data.Assets.Count(); diff --git a/Source/Editor/Cooker/Steps/CollectAssetsStep.h b/Source/Editor/Cooker/Steps/CollectAssetsStep.h index 15e7f04e9..03df52528 100644 --- a/Source/Editor/Cooker/Steps/CollectAssetsStep.h +++ b/Source/Editor/Cooker/Steps/CollectAssetsStep.h @@ -12,15 +12,7 @@ class Asset; /// class CollectAssetsStep : public GameCooker::BuildStep { -private: - - Array _assetsQueue; - Array _references; - - bool Process(CookingData& data, Asset* asset); - public: - // [BuildStep] bool Perform(CookingData& data) override; }; diff --git a/Source/Engine/AI/BehaviorTree.cpp b/Source/Engine/AI/BehaviorTree.cpp index e0407ede9..8cbb9f608 100644 --- a/Source/Engine/AI/BehaviorTree.cpp +++ b/Source/Engine/AI/BehaviorTree.cpp @@ -232,12 +232,12 @@ void BehaviorTree::OnScriptsReloadEnd() Graph.Setup(this); } -void BehaviorTree::GetReferences(Array& output) const +void BehaviorTree::GetReferences(Array& assets, Array& files) const { // Base - BinaryAsset::GetReferences(output); + BinaryAsset::GetReferences(assets, files); - Graph.GetReferences(output); + Graph.GetReferences(assets); // Extract refs from serialized nodes data for (const BehaviorTreeGraphNode& n : Graph.Nodes) @@ -246,7 +246,7 @@ void BehaviorTree::GetReferences(Array& output) const continue; const Variant& data = n.Values[1]; if (data.Type == VariantType::Blob) - JsonAssetBase::GetReferences(StringAnsiView((char*)data.AsBlob.Data, data.AsBlob.Length), output); + JsonAssetBase::GetReferences(StringAnsiView((char*)data.AsBlob.Data, data.AsBlob.Length), assets); } } diff --git a/Source/Engine/AI/BehaviorTree.h b/Source/Engine/AI/BehaviorTree.h index a31b58da8..1463cd9a5 100644 --- a/Source/Engine/AI/BehaviorTree.h +++ b/Source/Engine/AI/BehaviorTree.h @@ -98,7 +98,7 @@ public: // [BinaryAsset] void OnScriptingDispose() override; #if USE_EDITOR - void GetReferences(Array& output) const override; + void GetReferences(Array& assets, Array& files) const override; #endif protected: diff --git a/Source/Engine/Animations/SceneAnimations/SceneAnimation.cpp b/Source/Engine/Animations/SceneAnimations/SceneAnimation.cpp index b3e7a6e27..71cc8f1f4 100644 --- a/Source/Engine/Animations/SceneAnimations/SceneAnimation.cpp +++ b/Source/Engine/Animations/SceneAnimations/SceneAnimation.cpp @@ -75,16 +75,16 @@ bool SceneAnimation::SaveTimeline(const BytesContainer& data) #if USE_EDITOR -void SceneAnimation::GetReferences(Array& output) const +void SceneAnimation::GetReferences(Array& assets, Array& files) const { // Base - BinaryAsset::GetReferences(output); + BinaryAsset::GetReferences(assets, files); for (int32 i = 0; i < Tracks.Count(); i++) { const auto& track = Tracks[i]; if (track.Asset) - output.Add(track.Asset->GetID()); + assets.Add(track.Asset->GetID()); } } diff --git a/Source/Engine/Animations/SceneAnimations/SceneAnimation.h b/Source/Engine/Animations/SceneAnimations/SceneAnimation.h index e08f0c38c..9232589b5 100644 --- a/Source/Engine/Animations/SceneAnimations/SceneAnimation.h +++ b/Source/Engine/Animations/SceneAnimations/SceneAnimation.h @@ -464,7 +464,7 @@ public: public: // [BinaryAsset] #if USE_EDITOR - void GetReferences(Array& output) const override; + void GetReferences(Array& assets, Array& files) const override; #endif protected: diff --git a/Source/Engine/Content/Asset.cpp b/Source/Engine/Content/Asset.cpp index ddff7fc8a..b63abcc24 100644 --- a/Source/Engine/Content/Asset.cpp +++ b/Source/Engine/Content/Asset.cpp @@ -536,6 +536,14 @@ void Asset::CancelStreaming() #if USE_EDITOR +void Asset::GetReferences(Array& assets, Array& files) const +{ + // Fallback to the old API +PRAGMA_DISABLE_DEPRECATION_WARNINGS; + GetReferences(assets); +PRAGMA_ENABLE_DEPRECATION_WARNINGS; +} + void Asset::GetReferences(Array& output) const { // No refs by default @@ -544,7 +552,8 @@ void Asset::GetReferences(Array& output) const Array Asset::GetReferences() const { Array result; - GetReferences(result); + Array files; + GetReferences(result, files); return result; } diff --git a/Source/Engine/Content/Asset.h b/Source/Engine/Content/Asset.h index d1256b7c7..15c5cae6f 100644 --- a/Source/Engine/Content/Asset.h +++ b/Source/Engine/Content/Asset.h @@ -179,10 +179,13 @@ public: /// For some asset types (e.g. scene or prefab) it may contain invalid asset ids due to not perfect gather method, /// which is optimized to perform scan very quickly. Before using those ids perform simple validation via Content cache API. /// The result collection contains only 1-level-deep references (only direct ones) and is invalid if asset is not loaded. - /// Also the output data may have duplicated asset ids or even invalid ids (Guid::Empty). + /// Also, the output data may have duplicated asset ids or even invalid ids (Guid::Empty). /// - /// The output collection of the asset ids referenced by this asset. - virtual void GetReferences(Array& output) const; + /// The output collection of the asset ids referenced by this asset. + /// The output list of file paths referenced by this asset. Files might come from project Content folder (relative path is preserved in cooked game), or external location (copied into Content root folder of cooked game). + virtual void GetReferences(Array& assets, Array& files) const; + // [Deprecated in v1.9] + DEPRECATED virtual void GetReferences(Array& output) const; /// /// Gets the asset references. Supported only in Editor. @@ -191,7 +194,7 @@ public: /// For some asset types (e.g. scene or prefab) it may contain invalid asset ids due to not perfect gather method, /// which is optimized to perform scan very quickly. Before using those ids perform simple validation via Content cache API. /// The result collection contains only 1-level-deep references (only direct ones) and is invalid if asset is not loaded. - /// Also the output data may have duplicated asset ids or even invalid ids (Guid::Empty). + /// Also, the output data may have duplicated asset ids or even invalid ids (Guid::Empty). /// /// The collection of the asset ids referenced by this asset. API_FUNCTION() Array GetReferences() const; diff --git a/Source/Engine/Content/Assets/AnimationGraph.cpp b/Source/Engine/Content/Assets/AnimationGraph.cpp index d33aa6125..f5d3a8073 100644 --- a/Source/Engine/Content/Assets/AnimationGraph.cpp +++ b/Source/Engine/Content/Assets/AnimationGraph.cpp @@ -222,12 +222,10 @@ void AnimationGraph::FindDependencies(AnimGraphBase* graph) } } -void AnimationGraph::GetReferences(Array& output) const +void AnimationGraph::GetReferences(Array& assets, Array& files) const { - // Base - BinaryAsset::GetReferences(output); - - Graph.GetReferences(output); + BinaryAsset::GetReferences(assets, files); + Graph.GetReferences(assets); } #endif diff --git a/Source/Engine/Content/Assets/AnimationGraph.h b/Source/Engine/Content/Assets/AnimationGraph.h index fd208aa7f..77468e448 100644 --- a/Source/Engine/Content/Assets/AnimationGraph.h +++ b/Source/Engine/Content/Assets/AnimationGraph.h @@ -64,7 +64,7 @@ private: public: // [BinaryAsset] #if USE_EDITOR - void GetReferences(Array& output) const override; + void GetReferences(Array& assets, Array& files) const override; #endif protected: diff --git a/Source/Engine/Content/Assets/MaterialBase.cpp b/Source/Engine/Content/Assets/MaterialBase.cpp index 433463059..d537a5746 100644 --- a/Source/Engine/Content/Assets/MaterialBase.cpp +++ b/Source/Engine/Content/Assets/MaterialBase.cpp @@ -45,3 +45,13 @@ MaterialInstance* MaterialBase::CreateVirtualInstance() instance->SetBaseMaterial(this); return instance; } + +#if USE_EDITOR + +void MaterialBase::GetReferences(Array& assets, Array& files) const +{ + BinaryAsset::GetReferences(assets, files); + Params.GetReferences(assets); +} + +#endif diff --git a/Source/Engine/Content/Assets/MaterialBase.h b/Source/Engine/Content/Assets/MaterialBase.h index 3d1a624a4..81316b106 100644 --- a/Source/Engine/Content/Assets/MaterialBase.h +++ b/Source/Engine/Content/Assets/MaterialBase.h @@ -25,7 +25,7 @@ public: Action ParamsChanged; /// - /// Returns true if material is an material instance. + /// Returns true if material is a material instance. /// virtual bool IsMaterialInstance() const = 0; @@ -77,12 +77,6 @@ public: public: // [BinaryAsset] #if USE_EDITOR - void GetReferences(Array& output) const override - { - // Base - BinaryAsset::GetReferences(output); - - Params.GetReferences(output); - } + void GetReferences(Array& assets, Array& files) const override; #endif }; diff --git a/Source/Engine/Content/Assets/MaterialInstance.cpp b/Source/Engine/Content/Assets/MaterialInstance.cpp index 2c436f396..4afcde7b9 100644 --- a/Source/Engine/Content/Assets/MaterialInstance.cpp +++ b/Source/Engine/Content/Assets/MaterialInstance.cpp @@ -123,13 +123,11 @@ bool MaterialInstance::IsMaterialInstance() const #if USE_EDITOR -void MaterialInstance::GetReferences(Array& output) const +void MaterialInstance::GetReferences(Array& assets, Array& files) const { - // Base - MaterialBase::GetReferences(output); - + MaterialBase::GetReferences(assets, files); if (_baseMaterial) - output.Add(_baseMaterial->GetID()); + assets.Add(_baseMaterial->GetID()); } #endif diff --git a/Source/Engine/Content/Assets/MaterialInstance.h b/Source/Engine/Content/Assets/MaterialInstance.h index 5a99cdb41..3e9f2ec2a 100644 --- a/Source/Engine/Content/Assets/MaterialInstance.h +++ b/Source/Engine/Content/Assets/MaterialInstance.h @@ -50,7 +50,7 @@ public: // [MaterialBase] bool IsMaterialInstance() const override; #if USE_EDITOR - void GetReferences(Array& output) const override; + void GetReferences(Array& assets, Array& files) const override; #endif // [IMaterial] diff --git a/Source/Engine/Content/Assets/Model.cpp b/Source/Engine/Content/Assets/Model.cpp index 9143e3247..3bcaf4330 100644 --- a/Source/Engine/Content/Assets/Model.cpp +++ b/Source/Engine/Content/Assets/Model.cpp @@ -788,15 +788,13 @@ void Model::CancelStreaming() #if USE_EDITOR -void Model::GetReferences(Array& output) const +void Model::GetReferences(Array& assets, Array& files) const { // Base - BinaryAsset::GetReferences(output); + BinaryAsset::GetReferences(assets, files); for (int32 i = 0; i < MaterialSlots.Count(); i++) - { - output.Add(MaterialSlots[i].Material.GetID()); - } + assets.Add(MaterialSlots[i].Material.GetID()); } #endif diff --git a/Source/Engine/Content/Assets/Model.h b/Source/Engine/Content/Assets/Model.h index 16c4d45f4..c02a3bdcf 100644 --- a/Source/Engine/Content/Assets/Model.h +++ b/Source/Engine/Content/Assets/Model.h @@ -251,7 +251,7 @@ public: void InitAsVirtual() override; void CancelStreaming() override; #if USE_EDITOR - void GetReferences(Array& output) const override; + void GetReferences(Array& assets, Array& files) const override; #endif // [StreamableResource] diff --git a/Source/Engine/Content/Assets/SkeletonMask.h b/Source/Engine/Content/Assets/SkeletonMask.h index 5ecc0e384..f14975b37 100644 --- a/Source/Engine/Content/Assets/SkeletonMask.h +++ b/Source/Engine/Content/Assets/SkeletonMask.h @@ -68,12 +68,10 @@ private: public: // [BinaryAsset] #if USE_EDITOR - void GetReferences(Array& output) const override + void GetReferences(Array& assets, Array& files) const override { - // Base - BinaryAsset::GetReferences(output); - - output.Add(Skeleton.GetID()); + BinaryAsset::GetReferences(assets, files); + assets.Add(Skeleton.GetID()); } #endif diff --git a/Source/Engine/Content/Assets/SkinnedModel.cpp b/Source/Engine/Content/Assets/SkinnedModel.cpp index 15be02616..d70c9fb35 100644 --- a/Source/Engine/Content/Assets/SkinnedModel.cpp +++ b/Source/Engine/Content/Assets/SkinnedModel.cpp @@ -974,15 +974,13 @@ void SkinnedModel::CancelStreaming() #if USE_EDITOR -void SkinnedModel::GetReferences(Array& output) const +void SkinnedModel::GetReferences(Array& assets, Array& files) const { // Base - BinaryAsset::GetReferences(output); + BinaryAsset::GetReferences(assets, files); for (int32 i = 0; i < MaterialSlots.Count(); i++) - { - output.Add(MaterialSlots[i].Material.GetID()); - } + assets.Add(MaterialSlots[i].Material.GetID()); } #endif diff --git a/Source/Engine/Content/Assets/SkinnedModel.h b/Source/Engine/Content/Assets/SkinnedModel.h index 53feb4080..1461844a1 100644 --- a/Source/Engine/Content/Assets/SkinnedModel.h +++ b/Source/Engine/Content/Assets/SkinnedModel.h @@ -323,7 +323,7 @@ public: void InitAsVirtual() override; void CancelStreaming() override; #if USE_EDITOR - void GetReferences(Array& output) const override; + void GetReferences(Array& assets, Array& files) const override; #endif // [StreamableResource] diff --git a/Source/Engine/Content/Assets/VisualScript.h b/Source/Engine/Content/Assets/VisualScript.h index dc63609a6..b6bda5ffb 100644 --- a/Source/Engine/Content/Assets/VisualScript.h +++ b/Source/Engine/Content/Assets/VisualScript.h @@ -277,12 +277,10 @@ public: public: // [BinaryAsset] #if USE_EDITOR - void GetReferences(Array& output) const override + void GetReferences(Array& assets, Array& files) const override { - // Base - BinaryAsset::GetReferences(output); - - Graph.GetReferences(output); + BinaryAsset::GetReferences(assets, files); + Graph.GetReferences(assets); } #endif diff --git a/Source/Engine/Content/JsonAsset.cpp b/Source/Engine/Content/JsonAsset.cpp index 854c06001..e9960d424 100644 --- a/Source/Engine/Content/JsonAsset.cpp +++ b/Source/Engine/Content/JsonAsset.cpp @@ -109,20 +109,20 @@ uint64 JsonAssetBase::GetMemoryUsage() const #if USE_EDITOR -void FindIds(ISerializable::DeserializeStream& node, Array& output) +void FindIds(ISerializable::DeserializeStream& node, Array& output, Array& files) { if (node.IsObject()) { for (auto i = node.MemberBegin(); i != node.MemberEnd(); ++i) { - FindIds(i->value, output); + FindIds(i->value, output, files); } } else if (node.IsArray()) { for (rapidjson::SizeType i = 0; i < node.Size(); i++) { - FindIds(node[i], output); + FindIds(node[i], output, files); } } else if (node.IsString()) @@ -137,13 +137,14 @@ void FindIds(ISerializable::DeserializeStream& node, Array& output) } } -void JsonAssetBase::GetReferences(const StringAnsiView& json, Array& output) +void JsonAssetBase::GetReferences(const StringAnsiView& json, Array& assets) { ISerializable::SerializeDocument document; document.Parse(json.Get(), json.Length()); if (document.HasParseError()) return; - FindIds(document, output); + Array files; + FindIds(document, assets, files); } bool JsonAssetBase::Save(const StringView& path) const @@ -207,7 +208,7 @@ bool JsonAssetBase::Save(JsonWriter& writer) const return false; } -void JsonAssetBase::GetReferences(Array& output) const +void JsonAssetBase::GetReferences(Array& assets, Array& files) const { if (Data == nullptr) return; @@ -219,7 +220,7 @@ void JsonAssetBase::GetReferences(Array& output) const // It produces many invalid ids (like refs to scene objects). // But it's super fast, super low-memory and doesn't involve any advanced systems integration. - FindIds(*Data, output); + FindIds(*Data, assets, files); } #endif diff --git a/Source/Engine/Content/JsonAsset.h b/Source/Engine/Content/JsonAsset.h index 8eca7f610..b974f3087 100644 --- a/Source/Engine/Content/JsonAsset.h +++ b/Source/Engine/Content/JsonAsset.h @@ -70,8 +70,8 @@ public: /// Parses Json string to find any object references inside it. It can produce list of references to assets and/or scene objects. Supported only in Editor. /// /// The Json string. - /// The output list of object IDs references by the asset (appended, not cleared). - API_FUNCTION() static void GetReferences(const StringAnsiView& json, API_PARAM(Out) Array& output); + /// The output list of object IDs references by the asset (appended, not cleared). + API_FUNCTION() static void GetReferences(const StringAnsiView& json, API_PARAM(Out) Array& assets); /// /// Saves this asset to the file. Supported only in Editor. @@ -97,7 +97,7 @@ public: const String& GetPath() const override; uint64 GetMemoryUsage() const override; #if USE_EDITOR - void GetReferences(Array& output) const override; + void GetReferences(Array& assets, Array& files) const override; #endif protected: diff --git a/Source/Engine/Graphics/Materials/MaterialParams.cpp b/Source/Engine/Graphics/Materials/MaterialParams.cpp index 1904f9565..f54d90601 100644 --- a/Source/Engine/Graphics/Materials/MaterialParams.cpp +++ b/Source/Engine/Graphics/Materials/MaterialParams.cpp @@ -1042,12 +1042,12 @@ void MaterialParams::Save(BytesContainer& data, const Array& output) const +void MaterialParams::GetReferences(Array& assets) const { for (int32 i = 0; i < Count(); i++) { if (At(i)._asAsset) - output.Add(At(i)._asAsset->GetID()); + assets.Add(At(i)._asAsset->GetID()); } } diff --git a/Source/Engine/Graphics/Materials/MaterialParams.h b/Source/Engine/Graphics/Materials/MaterialParams.h index 68326d544..2092c5c49 100644 --- a/Source/Engine/Graphics/Materials/MaterialParams.h +++ b/Source/Engine/Graphics/Materials/MaterialParams.h @@ -417,13 +417,11 @@ public: public: #if USE_EDITOR - /// /// Gets the asset references (see Asset.GetReferences for more info). /// - /// The output. - void GetReferences(Array& output) const; - + /// The output assets. + void GetReferences(Array& assets) const; #endif bool HasContentLoaded() const; diff --git a/Source/Engine/Level/Scene/Scene.cpp b/Source/Engine/Level/Scene/Scene.cpp index 0a2bdd387..eae239a8c 100644 --- a/Source/Engine/Level/Scene/Scene.cpp +++ b/Source/Engine/Level/Scene/Scene.cpp @@ -127,7 +127,8 @@ Array Scene::GetAssetReferences() const const auto asset = Content::Load(GetID()); if (asset) { - asset->GetReferences(result); + Array files; + asset->GetReferences(result, files); } else { diff --git a/Source/Engine/Particles/ParticleEmitter.h b/Source/Engine/Particles/ParticleEmitter.h index 082396ba9..d07ffc108 100644 --- a/Source/Engine/Particles/ParticleEmitter.h +++ b/Source/Engine/Particles/ParticleEmitter.h @@ -172,12 +172,10 @@ public: public: // [BinaryAsset] #if USE_EDITOR - void GetReferences(Array& output) const override + void GetReferences(Array& assets, Array& files) const override { - // Base - BinaryAsset::GetReferences(output); - - Graph.GetReferences(output); + BinaryAsset::GetReferences(assets, files); + Graph.GetReferences(assets); } #endif diff --git a/Source/Engine/Particles/ParticleSystem.cpp b/Source/Engine/Particles/ParticleSystem.cpp index 419e8da74..4c0fcc076 100644 --- a/Source/Engine/Particles/ParticleSystem.cpp +++ b/Source/Engine/Particles/ParticleSystem.cpp @@ -181,19 +181,19 @@ void ParticleSystem::InitAsVirtual() #if USE_EDITOR -void ParticleSystem::GetReferences(Array& output) const +void ParticleSystem::GetReferences(Array& assets, Array& files) const { // Base - BinaryAsset::GetReferences(output); + BinaryAsset::GetReferences(assets, files); for (int32 i = 0; i < Emitters.Count(); i++) - output.Add(Emitters[i].GetID()); + assets.Add(Emitters[i].GetID()); for (auto i = EmittersParametersOverrides.Begin(); i.IsNotEnd(); ++i) { const auto id = (Guid)i->Value; if (id.IsValid()) - output.Add(id); + assets.Add(id); } } diff --git a/Source/Engine/Particles/ParticleSystem.h b/Source/Engine/Particles/ParticleSystem.h index 82365790f..cd472b49f 100644 --- a/Source/Engine/Particles/ParticleSystem.h +++ b/Source/Engine/Particles/ParticleSystem.h @@ -13,6 +13,7 @@ API_CLASS(NoSpawn) class FLAXENGINE_API ParticleSystem : public BinaryAsset { DECLARE_BINARY_ASSET_HEADER(ParticleSystem, 1); + public: /// /// The particle system timeline track data. @@ -173,7 +174,7 @@ public: /// Spawns the particles at the given location. /// /// The spawn position. - /// If set to true effect be be auto-destroyed after duration. + /// If set to true effect be auto-destroyed after duration. /// The spawned effect. API_FUNCTION() ParticleEffect* Spawn(const Vector3& position, bool autoDestroy = false) { @@ -185,7 +186,7 @@ public: /// /// The spawn position. /// The spawn rotation. - /// If set to true effect be be auto-destroyed after duration. + /// If set to true effect be auto-destroyed after duration. /// The spawned effect. API_FUNCTION() ParticleEffect* Spawn(const Vector3& position, const Quaternion& rotation, bool autoDestroy = false) { @@ -196,9 +197,9 @@ public: /// Spawns the particles at the given location. /// /// The spawn transform. - /// If set to true effect be be auto-destroyed after duration. + /// If set to true effect be auto-destroyed after duration. /// The spawned effect. - API_FUNCTION() ParticleEffect* Spawn(Transform transform, bool autoDestroy = false) + API_FUNCTION() ParticleEffect* Spawn(const Transform& transform, bool autoDestroy = false) { return Spawn(nullptr, transform, autoDestroy); } @@ -208,7 +209,7 @@ public: /// /// The parent actor (can be null to link it to the first loaded scene). /// The spawn position. - /// If set to true effect be be auto-destroyed after duration. + /// If set to true effect be auto-destroyed after duration. /// The spawned effect. API_FUNCTION() ParticleEffect* Spawn(Actor* parent, const Vector3& position, bool autoDestroy = false) { @@ -221,7 +222,7 @@ public: /// The parent actor (can be null to link it to the first loaded scene). /// The spawn position. /// The spawn rotation. - /// If set to true effect be be auto-destroyed after duration. + /// If set to true effect be auto-destroyed after duration. /// The spawned effect. API_FUNCTION() ParticleEffect* Spawn(Actor* parent, Vector3 position, Quaternion rotation, bool autoDestroy = false) { @@ -233,7 +234,7 @@ public: ///
/// The parent actor (can be null to link it to the first loaded scene). /// The spawn transform. - /// If set to true effect be be auto-destroyed after duration. + /// If set to true effect be auto-destroyed after duration. /// The spawned effect. API_FUNCTION() ParticleEffect* Spawn(Actor* parent, const Transform& transform, bool autoDestroy = false); @@ -241,7 +242,7 @@ public: // [BinaryAsset] void InitAsVirtual() override; #if USE_EDITOR - void GetReferences(Array& output) const override; + void GetReferences(Array& assets, Array& files) const override; #endif protected: diff --git a/Source/Engine/Visject/Graph.h b/Source/Engine/Visject/Graph.h index f2a475567..c0ef06455 100644 --- a/Source/Engine/Visject/Graph.h +++ b/Source/Engine/Visject/Graph.h @@ -529,15 +529,15 @@ public: /// Gets the asset references. ///
/// - /// The output collection of the asset ids referenced by this object. - virtual void GetReferences(Array& output) const + /// The output collection of the asset ids referenced by this object. + virtual void GetReferences(Array& assets) const { for (int32 i = 0; i < Parameters.Count(); i++) { const auto& p = Parameters[i]; const Guid id = (Guid)p.Value; if (id.IsValid()) - output.Add(id); + assets.Add(id); } for (int32 i = 0; i < Nodes.Count(); i++) @@ -547,7 +547,7 @@ public: { const Guid id = (Guid)n.Values[j]; if (id.IsValid()) - output.Add(id); + assets.Add(id); } } } From 481a6de8215a2abe1208c9afb3e8d9041a444a36 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 8 May 2024 17:27:46 +0200 Subject: [PATCH 059/292] Add support for cooking raw files referenced by assets --- Source/Editor/Cooker/CookingData.h | 15 +++-- Source/Editor/Cooker/GameCooker.cpp | 7 --- .../Editor/Cooker/Steps/CollectAssetsStep.cpp | 7 ++- Source/Editor/Cooker/Steps/CookAssetsStep.cpp | 56 ++++++++++++++----- Source/Engine/Content/JsonAsset.cpp | 14 ++++- 5 files changed, 71 insertions(+), 28 deletions(-) diff --git a/Source/Editor/Cooker/CookingData.h b/Source/Editor/Cooker/CookingData.h index 779b472a0..ef12d204e 100644 --- a/Source/Editor/Cooker/CookingData.h +++ b/Source/Editor/Cooker/CookingData.h @@ -285,24 +285,22 @@ public: /// /// The total assets amount in the build. /// - int32 TotalAssets; + int32 TotalAssets = 0; /// /// The cooked assets (TotalAssets - CookedAssets is amount of reused cached assets). /// - int32 CookedAssets; + int32 CookedAssets = 0; /// - /// The final output content size in MB. + /// The final output content size (in bytes). /// - int32 ContentSizeMB; + uint64 ContentSize = 0; /// /// The asset type stats. Key is the asset typename, value is the stats container. /// Dictionary AssetStats; - - Statistics(); }; /// @@ -328,6 +326,11 @@ public: /// HashSet Assets; + /// + /// The final files collection to include in build (valid only after CollectAssetsStep). + /// + HashSet Files; + struct BinaryModuleInfo { String Name; diff --git a/Source/Editor/Cooker/GameCooker.cpp b/Source/Editor/Cooker/GameCooker.cpp index ba6dd012c..118dd397f 100644 --- a/Source/Editor/Cooker/GameCooker.cpp +++ b/Source/Editor/Cooker/GameCooker.cpp @@ -202,13 +202,6 @@ bool CookingData::AssetTypeStatistics::operator<(const AssetTypeStatistics& othe return Count > other.Count; } -CookingData::Statistics::Statistics() -{ - TotalAssets = 0; - CookedAssets = 0; - ContentSizeMB = 0; -} - CookingData::CookingData(const SpawnParams& params) : ScriptingObject(params) { diff --git a/Source/Editor/Cooker/Steps/CollectAssetsStep.cpp b/Source/Editor/Cooker/Steps/CollectAssetsStep.cpp index 4787339e6..55e8b5037 100644 --- a/Source/Editor/Cooker/Steps/CollectAssetsStep.cpp +++ b/Source/Editor/Cooker/Steps/CollectAssetsStep.cpp @@ -29,7 +29,7 @@ bool CollectAssetsStep::Perform(CookingData& data) while (assetsQueue.HasItems()) { BUILD_STEP_CANCEL_CHECK; - const auto assetId = assetsQueue.Dequeue(); + const Guid assetId = assetsQueue.Dequeue(); // Skip already processed or invalid assets if (!assetId.IsValid() @@ -68,6 +68,11 @@ bool CollectAssetsStep::Perform(CookingData& data) asset->GetReferences(references, files); asset->Locker.Unlock(); assetsQueue.Add(references); + for (String& file : files) + { + if (file.HasChars()) + data.Files.Add(MoveTemp(file)); + } } data.Stats.TotalAssets = data.Assets.Count(); diff --git a/Source/Editor/Cooker/Steps/CookAssetsStep.cpp b/Source/Editor/Cooker/Steps/CookAssetsStep.cpp index 2558181aa..52050708e 100644 --- a/Source/Editor/Cooker/Steps/CookAssetsStep.cpp +++ b/Source/Editor/Cooker/Steps/CookAssetsStep.cpp @@ -891,7 +891,6 @@ bool CookAssetsStep::Process(CookingData& data, CacheData& cache, JsonAssetBase* class PackageBuilder : public NonCopyable { private: - int32 _packageIndex; int32 MaxAssetsPerPackage; int32 MaxPackageSize; @@ -904,7 +903,6 @@ private: uint64 packagesSizeTotal; public: - /// /// Initializes a new instance of the class. /// @@ -933,7 +931,6 @@ public: } public: - uint64 GetPackagesSizeTotal() const { return packagesSizeTotal; @@ -1042,8 +1039,11 @@ bool CookAssetsStep::Perform(CookingData& data) float Step1ProgressEnd = 0.6f; String Step1Info = TEXT("Cooking assets"); float Step2ProgressStart = Step1ProgressEnd; - float Step2ProgressEnd = 0.9f; - String Step2Info = TEXT("Packaging assets"); + float Step2ProgressEnd = 0.8f; + String Step2Info = TEXT("Cooking files"); + float Step3ProgressStart = Step2ProgressStart; + float Step3ProgressEnd = 0.9f; + String Step3Info = TEXT("Packaging assets"); data.StepProgress(TEXT("Loading build cache"), 0); @@ -1100,11 +1100,14 @@ bool CookAssetsStep::Perform(CookingData& data) #endif int32 subStepIndex = 0; AssetReference assetRef; - assetRef.Unload.Bind([]() { LOG(Error, "Asset gets unloaded while cooking it!"); Platform::Sleep(100); }); + assetRef.Unload.Bind([] + { + LOG(Error, "Asset got unloaded while cooking it!"); + Platform::Sleep(100); + }); for (auto i = data.Assets.Begin(); i.IsNotEnd(); ++i) { BUILD_STEP_CANCEL_CHECK; - data.StepProgress(Step1Info, Math::Lerp(Step1ProgressStart, Step1ProgressEnd, static_cast(subStepIndex++) / data.Assets.Count())); const Guid assetId = i->Item; @@ -1184,6 +1187,35 @@ bool CookAssetsStep::Perform(CookingData& data) // Save build cache header cache.Save(data); + // Process all files + for (auto i = data.Files.Begin(); i.IsNotEnd(); ++i) + { + BUILD_STEP_CANCEL_CHECK; + data.StepProgress(Step2Info, Math::Lerp(Step2ProgressStart, Step2ProgressEnd, (float)subStepIndex++ / data.Files.Count())); + const String& filePath = i->Item; + + // Calculate destination path + String cookedPath = data.DataOutputPath; + if (FileSystem::IsRelative(filePath)) + cookedPath /= filePath; + else + cookedPath /= String(TEXT("Content")) / StringUtils::GetFileName(filePath); + + // Copy file + if (!FileSystem::FileExists(cookedPath) || FileSystem::GetFileLastEditTime(cookedPath) >= FileSystem::GetFileLastEditTime(filePath)) + { + if (FileSystem::CreateDirectory(StringUtils::GetDirectoryName(cookedPath))) + return true; + if (FileSystem::CopyFile(cookedPath, filePath)) + return true; + } + + // Count stats of file extension + auto& assetStats = data.Stats.AssetStats[FileSystem::GetExtension(cookedPath)]; + assetStats.Count++; + assetStats.ContentSize += FileSystem::GetFileSize(cookedPath); + } + // Create build game header { GameHeaderFlags gameFlags = GameHeaderFlags::None; @@ -1229,13 +1261,11 @@ bool CookAssetsStep::Perform(CookingData& data) for (auto i = AssetsRegistry.Begin(); i.IsNotEnd(); ++i) { BUILD_STEP_CANCEL_CHECK; - - data.StepProgress(Step2Info, Math::Lerp(Step2ProgressStart, Step2ProgressEnd, static_cast(subStepIndex++) / AssetsRegistry.Count())); + data.StepProgress(Step3Info, Math::Lerp(Step3ProgressStart, Step3ProgressEnd, (float)subStepIndex++ / AssetsRegistry.Count())); const auto assetId = i->Key; String cookedFilePath; cache.GetFilePath(assetId, cookedFilePath); - if (!FileSystem::FileExists(cookedFilePath)) { LOG(Warning, "Missing cooked file for asset \'{0}\'", assetId); @@ -1253,12 +1283,12 @@ bool CookAssetsStep::Perform(CookingData& data) return true; for (auto& e : data.Stats.AssetStats) e.Value.TypeName = e.Key; - data.Stats.ContentSizeMB = static_cast(packageBuilder.GetPackagesSizeTotal() / (1024 * 1024)); + data.Stats.ContentSize += packageBuilder.GetPackagesSizeTotal(); } BUILD_STEP_CANCEL_CHECK; - data.StepProgress(TEXT("Creating assets cache"), Step2ProgressEnd); + data.StepProgress(TEXT("Creating assets cache"), Step3ProgressEnd); // Create asset paths mapping for the assets. // Assets mapping is use to convert paths used in Content::Load(path) into the asset id. @@ -1291,7 +1321,7 @@ bool CookAssetsStep::Perform(CookingData& data) } // Print stats - LOG(Info, "Cooked {0} assets, total assets: {1}, total content packages size: {2} MB", data.Stats.CookedAssets, AssetsRegistry.Count(), data.Stats.ContentSizeMB); + LOG(Info, "Cooked {0} assets, total assets: {1}, total content packages size: {2} MB", data.Stats.CookedAssets, AssetsRegistry.Count(), (int32)(data.Stats.ContentSize / (1024 * 1024))); { Array assetTypes; data.Stats.AssetStats.GetValues(assetTypes); diff --git a/Source/Engine/Content/JsonAsset.cpp b/Source/Engine/Content/JsonAsset.cpp index e9960d424..a06e53350 100644 --- a/Source/Engine/Content/JsonAsset.cpp +++ b/Source/Engine/Content/JsonAsset.cpp @@ -125,14 +125,26 @@ void FindIds(ISerializable::DeserializeStream& node, Array& output, Array< FindIds(node[i], output, files); } } - else if (node.IsString()) + else if (node.IsString() && node.GetStringLength() != 0) { if (node.GetStringLength() == 32) { // Try parse as Guid in format `N` (32 hex chars) Guid id; if (!Guid::Parse(node.GetStringAnsiView(), id)) + { output.Add(id); + return; + } + } + if (node.GetStringLength() < 512) + { + // Try to detect file paths + String path = node.GetText(); + if (FileSystem::FileExists(path)) + { + files.Add(MoveTemp(path)); + } } } } From 40652a0ebc42ca7c985048ea21fb4b669a999e6e Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 8 May 2024 17:28:14 +0200 Subject: [PATCH 060/292] Fix `HashSet` adding item via move operation --- Source/Engine/Core/Collections/HashSet.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Engine/Core/Collections/HashSet.h b/Source/Engine/Core/Collections/HashSet.h index b76244860..12378f2ea 100644 --- a/Source/Engine/Core/Collections/HashSet.h +++ b/Source/Engine/Core/Collections/HashSet.h @@ -58,7 +58,7 @@ public: } template - FORCE_INLINE void Occupy(ItemType& item) + FORCE_INLINE void Occupy(ItemType&& item) { Memory::MoveItems(&Item, &item, 1); _state = Occupied; From 571ba6773d1ab7d04a7cbdc4dd3a896ed03f0420 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 8 May 2024 17:28:46 +0200 Subject: [PATCH 061/292] Optimize win32 filesystem access via static buffer or unlimited path len --- .../Engine/Platform/Win32/Win32FileSystem.cpp | 8 ++--- Source/Engine/Utilities/StringConverter.h | 33 +++++++++++++++++++ 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/Source/Engine/Platform/Win32/Win32FileSystem.cpp b/Source/Engine/Platform/Win32/Win32FileSystem.cpp index 486b1c603..ed11e0f90 100644 --- a/Source/Engine/Platform/Win32/Win32FileSystem.cpp +++ b/Source/Engine/Platform/Win32/Win32FileSystem.cpp @@ -8,16 +8,12 @@ #include "Engine/Core/Types/StringView.h" #include "Engine/Core/Math/Math.h" #include "Engine/Core/Collections/Array.h" +#include "Engine/Utilities/StringConverter.h" #include "IncludeWindowsHeaders.h" const DateTime WindowsEpoch(1970, 1, 1); -#define WIN32_INIT_BUFFER(path, buffer) \ - Char buffer[MAX_PATH]; \ - if (path.Length() > MAX_PATH) \ - return true; \ - Platform::MemoryCopy(buffer, path.Get(), path.Length() * sizeof(Char)); \ - buffer[path.Length()] = 0 +#define WIN32_INIT_BUFFER(path, buffer) StringAsTerminated<> buffer(path.Get(), path.Length()) bool Win32FileSystem::CreateDirectory(const StringView& path) { diff --git a/Source/Engine/Utilities/StringConverter.h b/Source/Engine/Utilities/StringConverter.h index 2dbccca52..fd56a40b9 100644 --- a/Source/Engine/Utilities/StringConverter.h +++ b/Source/Engine/Utilities/StringConverter.h @@ -132,3 +132,36 @@ public: this->_static = text; } }; + +template +class StringAsTerminated +{ +protected: + const CharType* _static = nullptr; + CharType* _dynamic = nullptr; + +public: + StringAsTerminated(const CharType* str, int32 length) + { + if (length != 0 && str[length] == 0) // Unsafe to access out of bounds... + { + _static = str; + } + else + { + _dynamic = (CharType*)Allocator::Allocate((length + 1) * sizeof(CharType)); + Platform::MemoryCopy(_dynamic, str, length * sizeof(CharType)); + _dynamic[length] = 0; + } + } + + ~StringAsTerminated() + { + Allocator::Free(_dynamic); + } + + operator const CharType*() const + { + return _static ? _static : _dynamic; + } +}; From 33202a74b06dfbf8be7b4cf1e71d41d5a04b7ba8 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 8 May 2024 19:15:25 +0200 Subject: [PATCH 062/292] Fix --- Source/Editor/Cooker/Platform/Android/AndroidPlatformTools.cpp | 2 -- Source/Engine/Content/JsonAsset.cpp | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/Source/Editor/Cooker/Platform/Android/AndroidPlatformTools.cpp b/Source/Editor/Cooker/Platform/Android/AndroidPlatformTools.cpp index d55ad01e3..bd6e7a25c 100644 --- a/Source/Editor/Cooker/Platform/Android/AndroidPlatformTools.cpp +++ b/Source/Editor/Cooker/Platform/Android/AndroidPlatformTools.cpp @@ -301,9 +301,7 @@ bool AndroidPlatformTools::OnPostProcess(CookingData& data) const auto buildSettings = BuildSettings::Get(); if (buildSettings->SkipPackaging) - { return false; - } GameCooker::PackageFiles(); // Validate environment variables diff --git a/Source/Engine/Content/JsonAsset.cpp b/Source/Engine/Content/JsonAsset.cpp index a06e53350..0dda52592 100644 --- a/Source/Engine/Content/JsonAsset.cpp +++ b/Source/Engine/Content/JsonAsset.cpp @@ -3,6 +3,7 @@ #include "JsonAsset.h" #if USE_EDITOR #include "Engine/Platform/File.h" +#include "Engine/Platform/FileSystem.h" #include "Engine/Core/Types/DataContainer.h" #include "Engine/Level/Level.h" #else From 4acaa62a07b8d9000110ca9ed009ea6b3ba5b72b Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 10 May 2024 12:52:07 +0200 Subject: [PATCH 063/292] Fix crash in `AudioBackendOAL::Source_DequeueProcessedBuffers` when buffer count is large --- Source/Engine/Audio/OpenAL/AudioBackendOAL.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Audio/OpenAL/AudioBackendOAL.cpp b/Source/Engine/Audio/OpenAL/AudioBackendOAL.cpp index 879061d61..9730b6ddf 100644 --- a/Source/Engine/Audio/OpenAL/AudioBackendOAL.cpp +++ b/Source/Engine/Audio/OpenAL/AudioBackendOAL.cpp @@ -453,9 +453,10 @@ void AudioBackendOAL::Source_QueueBuffer(uint32 sourceID, uint32 bufferID) void AudioBackendOAL::Source_DequeueProcessedBuffers(uint32 sourceID) { int32 numProcessedBuffers; - ALuint buffers[AUDIO_MAX_SOURCE_BUFFERS]; alGetSourcei(sourceID, AL_BUFFERS_PROCESSED, &numProcessedBuffers); - alSourceUnqueueBuffers(sourceID, numProcessedBuffers, buffers); + Array> buffers; + buffers.Resize(numProcessedBuffers); + alSourceUnqueueBuffers(sourceID, numProcessedBuffers, buffers.Get()); ALC_CHECK_ERROR(alSourceUnqueueBuffers); } From ffe510560241a2f085c5dfb20f5a0590129c4c28 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 10 May 2024 13:12:07 +0200 Subject: [PATCH 064/292] Fixes for audio playback in videos --- Source/Engine/Video/MF/VideoBackendMF.cpp | 23 +++++++++++++-- Source/Engine/Video/Types.h | 4 +++ Source/Engine/Video/Video.cpp | 34 +++++++++++++++++++++-- 3 files changed, 55 insertions(+), 6 deletions(-) diff --git a/Source/Engine/Video/MF/VideoBackendMF.cpp b/Source/Engine/Video/MF/VideoBackendMF.cpp index 207f9b72c..08929fe5d 100644 --- a/Source/Engine/Video/MF/VideoBackendMF.cpp +++ b/Source/Engine/Video/MF/VideoBackendMF.cpp @@ -330,7 +330,11 @@ namespace MF if (sample) sample->Release(); if (isGoodSample) + { goodSamplesLeft--; + if (!sample) + return true; // Got good sample but without data so seek + } if (flags & MF_SOURCE_READERF_ENDOFSTREAM) { @@ -392,6 +396,7 @@ namespace MF } // Update current position + bool canSeek = true; SEEK_START: if (playerMF.Seek) { @@ -418,7 +423,12 @@ namespace MF { // Failed to pick a valid sample so try again with seeking playerMF.Seek = 1; - goto SEEK_START; + if (canSeek) + { + // Prevent deadlock on sync + canSeek = false; + goto SEEK_START; + } } if (player.AudioInfo.BitDepth != 0) ReadStream(player, playerMF, MF_SOURCE_READER_FIRST_AUDIO_STREAM, dt); @@ -497,6 +507,7 @@ void VideoBackendMF::Player_Play(VideoBackendPlayer& player) PROFILE_CPU(); auto& playerMF = player.GetBackendState(); playerMF.Playing = 1; + player.PlayAudio(); } void VideoBackendMF::Player_Pause(VideoBackendPlayer& player) @@ -504,6 +515,7 @@ void VideoBackendMF::Player_Pause(VideoBackendPlayer& player) PROFILE_CPU(); auto& playerMF = player.GetBackendState(); playerMF.Playing = 0; + player.PauseAudio(); } void VideoBackendMF::Player_Stop(VideoBackendPlayer& player) @@ -514,14 +526,19 @@ void VideoBackendMF::Player_Stop(VideoBackendPlayer& player) playerMF.Playing = 0; playerMF.FirstFrame = 1; playerMF.Seek = 1; + player.StopAudio(); } void VideoBackendMF::Player_Seek(VideoBackendPlayer& player, TimeSpan time) { PROFILE_CPU(); auto& playerMF = player.GetBackendState(); - playerMF.Time = time; - playerMF.Seek = 1; + if (playerMF.Time != time) + { + playerMF.Time = time; + playerMF.Seek = 1; + player.StopAudio(); + } } TimeSpan VideoBackendMF::Player_GetTime(const VideoBackendPlayer& player) diff --git a/Source/Engine/Video/Types.h b/Source/Engine/Video/Types.h index 20b065493..bbf920b5c 100644 --- a/Source/Engine/Video/Types.h +++ b/Source/Engine/Video/Types.h @@ -35,6 +35,7 @@ struct VideoBackendPlayer int32 VideoFrameWidth, VideoFrameHeight; PixelFormat Format; float FrameRate; + uint8 IsAudioPlayPending : 1; TimeSpan Duration; TimeSpan VideoFrameTime, VideoFrameDuration; TimeSpan AudioBufferTime, AudioBufferDuration; @@ -67,6 +68,9 @@ struct VideoBackendPlayer } void Created(const VideoBackendPlayerInfo& info); + void PlayAudio(); + void PauseAudio(); + void StopAudio(); void InitVideoFrame(); void UpdateVideoFrame(Span data, TimeSpan time, TimeSpan duration); void UpdateAudioBuffer(Span data, TimeSpan time, TimeSpan duration); diff --git a/Source/Engine/Video/Video.cpp b/Source/Engine/Video/Video.cpp index 2da3d2b4d..705517e20 100644 --- a/Source/Engine/Video/Video.cpp +++ b/Source/Engine/Video/Video.cpp @@ -214,6 +214,33 @@ void VideoBackendPlayer::Created(const VideoBackendPlayerInfo& info) #endif } +void VideoBackendPlayer::PlayAudio() +{ + if (AudioSource) + { + IsAudioPlayPending = 0; + AudioBackend::Source::Play(AudioSource); + } +} + +void VideoBackendPlayer::PauseAudio() +{ + if (AudioSource) + { + IsAudioPlayPending = 0; + AudioBackend::Source::Pause(AudioSource); + } +} + +void VideoBackendPlayer::StopAudio() +{ + if (AudioSource) + { + AudioBackend::Source::Stop(AudioSource); + IsAudioPlayPending = 1; + } +} + void VideoBackendPlayer::InitVideoFrame() { if (!GPUDevice::Instance) @@ -282,12 +309,12 @@ void VideoBackendPlayer::UpdateAudioBuffer(Span data, TimeSpan time, TimeS return; // Setup audio source - bool newSource = AudioSource == 0; - if (newSource) + if (AudioSource == 0) { // TODO: spatial video player // TODO: video player volume/pan control AudioSource = AudioBackend::Source::Add(AudioInfo, Vector3::Zero, Quaternion::Identity, 1.0f, 1.0f, 0.0f, false, false, 1.0f, 1000.0f, 1.0f); + IsAudioPlayPending = 1; } else { @@ -320,8 +347,9 @@ void VideoBackendPlayer::UpdateAudioBuffer(Span data, TimeSpan time, TimeS // Append audio buffer AudioBackend::Source::QueueBuffer(AudioSource, bufferId); - if (newSource) + if (IsAudioPlayPending) { + IsAudioPlayPending = 0; AudioBackend::Source::Play(AudioSource); } } From 196aa020fde8b626367dcf8b12d22159a9c32317 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 10 May 2024 13:16:07 +0200 Subject: [PATCH 065/292] Fix video playback if fie has no audio track --- Source/Engine/Video/MF/VideoBackendMF.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Video/MF/VideoBackendMF.cpp b/Source/Engine/Video/MF/VideoBackendMF.cpp index 08929fe5d..264022893 100644 --- a/Source/Engine/Video/MF/VideoBackendMF.cpp +++ b/Source/Engine/Video/MF/VideoBackendMF.cpp @@ -460,12 +460,12 @@ bool VideoBackendMF::Player_Create(const VideoBackendPlayerInfo& info, VideoBack return true; } sourceReader->SetStreamSelection(MF_SOURCE_READER_FIRST_VIDEO_STREAM, 1); - sourceReader->SetStreamSelection(MF_SOURCE_READER_FIRST_AUDIO_STREAM, 1); + bool hasAudio = sourceReader->SetStreamSelection(MF_SOURCE_READER_FIRST_AUDIO_STREAM, 1) == S_OK; playerMF.SourceReader = sourceReader; // Read media info if (MF::Configure(player, playerMF, MF_SOURCE_READER_FIRST_VIDEO_STREAM) || - MF::Configure(player, playerMF, MF_SOURCE_READER_FIRST_AUDIO_STREAM)) + hasAudio && MF::Configure(player, playerMF, MF_SOURCE_READER_FIRST_AUDIO_STREAM)) return true; PROPVARIANT var; hr = sourceReader->GetPresentationAttribute(MF_SOURCE_READER_MEDIASOURCE, MF_PD_DURATION, &var); From f0d143ecaa7011a9ba1e527ae9cdd71beab33362 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 10 May 2024 13:29:08 +0200 Subject: [PATCH 066/292] Fix decoding 480p videos --- Source/Engine/Video/Video.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Engine/Video/Video.cpp b/Source/Engine/Video/Video.cpp index 705517e20..5b14d6005 100644 --- a/Source/Engine/Video/Video.cpp +++ b/Source/Engine/Video/Video.cpp @@ -75,7 +75,7 @@ protected: // Decompress data into RGBA texture auto cb = GPUDevice::Instance->QuadShader->GetCB(0); QuadShaderData cbData; - cbData.Color = Float4((float)_player->Width, (float)_player->Height, 0, 0); + cbData.Color = Float4((float)_player->VideoFrameWidth, (float)_player->VideoFrameHeight, 0, 0); context->GPU->UpdateCB(cb, &cbData); context->GPU->BindCB(0, cb); context->GPU->SetViewportAndScissors((float)_player->Width, (float)_player->Height); From 6b31d51e318e3d67b9c4ef3d638519ea23d6c448 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 10 May 2024 13:54:52 +0200 Subject: [PATCH 067/292] Add volume, pan and spatial audio options for video playback --- Source/Engine/Audio/AudioSource.h | 7 +-- Source/Engine/Video/MF/VideoBackendMF.cpp | 5 +- Source/Engine/Video/Types.h | 8 +++ Source/Engine/Video/Video.cpp | 30 ++++++++- Source/Engine/Video/VideoBackend.h | 6 ++ Source/Engine/Video/VideoPlayer.cpp | 50 +++++++++++++++ Source/Engine/Video/VideoPlayer.h | 76 ++++++++++++++++++++++- 7 files changed, 171 insertions(+), 11 deletions(-) diff --git a/Source/Engine/Audio/AudioSource.h b/Source/Engine/Audio/AudioSource.h index 682c07563..b90bb4a73 100644 --- a/Source/Engine/Audio/AudioSource.h +++ b/Source/Engine/Audio/AudioSource.h @@ -5,7 +5,6 @@ #include "Engine/Level/Actor.h" #include "Engine/Content/AssetReference.h" #include "AudioClip.h" -#include "Config.h" /// /// Represents a source for emitting audio. Audio can be played spatially (gun shot), or normally (music). Each audio source must have an AudioClip to play - back, and it can also have a position in the case of spatial (3D) audio. @@ -141,7 +140,7 @@ public: API_PROPERTY() void SetIsLooping(bool value); /// - /// Determines whether the audio clip should auto play on level start. + /// Determines whether the audio clip should autoplay on level start. /// API_PROPERTY(Attributes="EditorOrder(50), DefaultValue(false), EditorDisplay(\"Audio Source\", \"Play On Start\")") FORCE_INLINE bool GetPlayOnStart() const @@ -159,7 +158,7 @@ public: } /// - /// Determines whether the audio clip should auto play on game start. + /// Determines whether the audio clip should autoplay on game start. /// API_PROPERTY() void SetPlayOnStart(bool value); @@ -211,7 +210,7 @@ public: API_PROPERTY() void SetDopplerFactor(float value); /// - /// If checked, source can play spatial 3d audio (when audio clip supports it), otherwise will always play as 2d sound. At 0, no distance attenuation ever occurs. + /// If checked, source can play spatial 3d audio (when audio clip supports it), otherwise will always play as 2d sound. /// API_PROPERTY(Attributes="EditorOrder(80), DefaultValue(true), EditorDisplay(\"Audio Source\")") FORCE_INLINE bool GetAllowSpatialization() const diff --git a/Source/Engine/Video/MF/VideoBackendMF.cpp b/Source/Engine/Video/MF/VideoBackendMF.cpp index 264022893..1aa623502 100644 --- a/Source/Engine/Video/MF/VideoBackendMF.cpp +++ b/Source/Engine/Video/MF/VideoBackendMF.cpp @@ -432,6 +432,8 @@ namespace MF } if (player.AudioInfo.BitDepth != 0) ReadStream(player, playerMF, MF_SOURCE_READER_FIRST_AUDIO_STREAM, dt); + + player.Tick(); } } @@ -499,7 +501,8 @@ void VideoBackendMF::Player_UpdateInfo(VideoBackendPlayer& player, const VideoBa { PROFILE_CPU(); auto& playerMF = player.GetBackendState(); - playerMF.Loop = true; + playerMF.Loop = info.Loop; + player.Updated(info); } void VideoBackendMF::Player_Play(VideoBackendPlayer& player) diff --git a/Source/Engine/Video/Types.h b/Source/Engine/Video/Types.h index bbf920b5c..5113b00d8 100644 --- a/Source/Engine/Video/Types.h +++ b/Source/Engine/Video/Types.h @@ -27,6 +27,7 @@ struct VideoBackendPlayer GPUTexture* Frame; GPUBuffer* FrameUpload; class GPUUploadVideoFrameTask* UploadVideoFrameTask; + const Transform* Transform; #ifdef TRACY_ENABLE Char* DebugUrl; int32 DebugUrlLen; @@ -35,6 +36,11 @@ struct VideoBackendPlayer int32 VideoFrameWidth, VideoFrameHeight; PixelFormat Format; float FrameRate; + float AudioVolume; + float AudioPan; + float AudioMinDistance; + float AudioAttenuation; + uint8 IsAudioSpatial : 1; uint8 IsAudioPlayPending : 1; TimeSpan Duration; TimeSpan VideoFrameTime, VideoFrameDuration; @@ -68,11 +74,13 @@ struct VideoBackendPlayer } void Created(const VideoBackendPlayerInfo& info); + void Updated(const VideoBackendPlayerInfo& info); void PlayAudio(); void PauseAudio(); void StopAudio(); void InitVideoFrame(); void UpdateVideoFrame(Span data, TimeSpan time, TimeSpan duration); void UpdateAudioBuffer(Span data, TimeSpan time, TimeSpan duration); + void Tick(); void ReleaseResources(); }; diff --git a/Source/Engine/Video/Video.cpp b/Source/Engine/Video/Video.cpp index 5b14d6005..7af4caedf 100644 --- a/Source/Engine/Video/Video.cpp +++ b/Source/Engine/Video/Video.cpp @@ -5,6 +5,7 @@ #include "Engine/Audio/AudioBackend.h" #include "Engine/Core/Log.h" #include "Engine/Core/Math/Quaternion.h" +#include "Engine/Core/Math/Transform.h" #include "Engine/Profiler/ProfilerCPU.h" #include "Engine/Engine/Engine.h" #include "Engine/Engine/EngineService.h" @@ -212,6 +213,23 @@ void VideoBackendPlayer::Created(const VideoBackendPlayerInfo& info) DebugUrl = (Char*)Allocator::Allocate(DebugUrlLen * sizeof(Char) + 2); Platform::MemoryCopy(DebugUrl, *info.Url, DebugUrlLen * 2 + 2); #endif + Updated(info); +} + +void VideoBackendPlayer::Updated(const VideoBackendPlayerInfo& info) +{ + IsAudioSpatial = info.Spatial; + AudioVolume = info.Volume; + AudioPan = info.Pan; + AudioMinDistance = info.MinDistance; + AudioAttenuation = info.Attenuation; + Transform = info.Transform; + if (AudioSource) + { + AudioBackend::Source::VolumeChanged(AudioSource, AudioVolume); + AudioBackend::Source::PanChanged(AudioSource, AudioPan); + AudioBackend::Source::SpatialSetupChanged(AudioSource, IsAudioSpatial, AudioAttenuation, AudioMinDistance, 1.0f); + } } void VideoBackendPlayer::PlayAudio() @@ -311,9 +329,7 @@ void VideoBackendPlayer::UpdateAudioBuffer(Span data, TimeSpan time, TimeS // Setup audio source if (AudioSource == 0) { - // TODO: spatial video player - // TODO: video player volume/pan control - AudioSource = AudioBackend::Source::Add(AudioInfo, Vector3::Zero, Quaternion::Identity, 1.0f, 1.0f, 0.0f, false, false, 1.0f, 1000.0f, 1.0f); + AudioSource = AudioBackend::Source::Add(AudioInfo, Vector3::Zero, Quaternion::Identity, AudioVolume, 1.0f, AudioPan, false, IsAudioSpatial, AudioAttenuation, AudioMinDistance, 1.0f); IsAudioPlayPending = 1; } else @@ -354,6 +370,14 @@ void VideoBackendPlayer::UpdateAudioBuffer(Span data, TimeSpan time, TimeS } } +void VideoBackendPlayer::Tick() +{ + if (AudioSource && IsAudioSpatial && Transform) + { + AudioBackend::Source::TransformChanged(AudioSource, Transform->Translation, Transform->Orientation); + } +} + void VideoBackendPlayer::ReleaseResources() { if (AudioSource) diff --git a/Source/Engine/Video/VideoBackend.h b/Source/Engine/Video/VideoBackend.h index 9829483b6..008268c55 100644 --- a/Source/Engine/Video/VideoBackend.h +++ b/Source/Engine/Video/VideoBackend.h @@ -13,6 +13,12 @@ struct VideoBackendPlayerInfo { StringView Url; bool Loop; + bool Spatial; + float Volume; + float Pan; + float MinDistance; + float Attenuation; + const Transform* Transform; }; /// diff --git a/Source/Engine/Video/VideoPlayer.cpp b/Source/Engine/Video/VideoPlayer.cpp index baf8e617e..5bdcad7bf 100644 --- a/Source/Engine/Video/VideoPlayer.cpp +++ b/Source/Engine/Video/VideoPlayer.cpp @@ -31,6 +31,50 @@ void VideoPlayer::SetIsLooping(bool value) UpdateInfo(); } +void VideoPlayer::SetIsAudioSpatial(bool value) +{ + if (_isSpatial == value) + return; + _isSpatial = value; + UpdateInfo(); +} + +void VideoPlayer::SetAudioVolume(float value) +{ + value = Math::Saturate(value); + if (Math::NearEqual(_volume, value)) + return; + _volume = value; + UpdateInfo(); +} + +void VideoPlayer::SetAudioPan(float value) +{ + value = Math::Clamp(value, -1.0f, 1.0f); + if (Math::NearEqual(_pan, value)) + return; + _pan = value; + UpdateInfo(); +} + +void VideoPlayer::SetAudioMinDistance(float value) +{ + value = Math::Max(0.0f, value); + if (Math::NearEqual(_minDistance, value)) + return; + _minDistance = value; + UpdateInfo(); +} + +void VideoPlayer::SetAudioAttenuation(float value) +{ + value = Math::Max(0.0f, value); + if (Math::NearEqual(_attenuation, value)) + return; + _attenuation = value; + UpdateInfo(); +} + void VideoPlayer::Play() { auto state = _state; @@ -127,6 +171,12 @@ void VideoPlayer::GetInfo(VideoBackendPlayerInfo& info) const { info.Url = Url; info.Loop = _loop; + info.Spatial = _isSpatial; + info.Volume = _volume; + info.Pan = _pan; + info.MinDistance = _minDistance; + info.Attenuation = _attenuation; + info.Transform = &_transform; } void VideoPlayer::UpdateInfo() diff --git a/Source/Engine/Video/VideoPlayer.h b/Source/Engine/Video/VideoPlayer.h index 75416e36b..24ee2ebce 100644 --- a/Source/Engine/Video/VideoPlayer.h +++ b/Source/Engine/Video/VideoPlayer.h @@ -11,7 +11,6 @@ /// Video playback utility. Video content can be presented in UI (via VideoBrush), used in materials (via texture parameter bind) or used manually in shaders. /// API_CLASS(Attributes="ActorContextMenu(\"New/Visuals/Video Player\"), ActorToolbox(\"Visuals\")") - class FLAXENGINE_API VideoPlayer : public Actor { DECLARE_SCENE_OBJECT(VideoPlayer); @@ -42,7 +41,8 @@ public: private: VideoBackendPlayer _player; States _state = States::Stopped; - bool _loop = false; + bool _loop = false, _isSpatial = false; + float _volume = 1.0f, _pan = 0.0f, _minDistance = 1000.0f, _attenuation = 1.0f; public: ~VideoPlayer(); @@ -68,7 +68,7 @@ public: API_PROPERTY() void SetIsLooping(bool value); /// - /// Determines whether the video clip should auto play on level start. + /// Determines whether the video clip should autoplay on level start. /// API_FIELD(Attributes="EditorOrder(30), DefaultValue(false), EditorDisplay(\"Video Player\", \"Play On Start\")") bool PlayOnStart = false; @@ -79,6 +79,76 @@ public: API_FIELD(Attributes = "EditorOrder(35), DefaultValue(0.0f), Limit(0, float.MaxValue, 0.01f), EditorDisplay(\"Video Player\"), VisibleIf(nameof(PlayOnStart))") float StartTime = 0.0f; + /// + /// If checked, video player us using spatialization to play 3d audio, otherwise will always play as 2d sound. + /// + API_PROPERTY(Attributes="EditorOrder(50), DefaultValue(false), EditorDisplay(\"Video Player\")") + FORCE_INLINE bool GetIsAudioSpatial() const + { + return _isSpatial; + } + + /// + /// If checked, source can play spatial 3d audio (when audio clip supports it), otherwise will always play as 2d sound. At 0, no distance attenuation ever occurs. + /// + API_PROPERTY() void SetIsAudioSpatial(bool value); + + /// + /// Gets the volume of the audio played from this video, in [0, 1] range. + /// + API_PROPERTY(Attributes="EditorOrder(100), DefaultValue(1.0f), Limit(0, 1, 0.01f), EditorDisplay(\"Video Player\")") + FORCE_INLINE float GetAudioVolume() const + { + return _volume; + } + + /// + /// Sets the volume of the audio played from this video, in [0, 1] range. + /// + API_PROPERTY() void SetAudioVolume(float value); + + /// + /// Gets the stereo pan of the played audio (-1 is left speaker, 1 is right speaker, 0 is balanced). The default is 1. Used by non-spatial audio only. + /// + API_PROPERTY(Attributes="EditorOrder(110), DefaultValue(0.0f), Limit(-1.0f, 1.0f), EditorDisplay(\"Video Player\"), VisibleIf(nameof(IsAudioSpatial), true)") + FORCE_INLINE float GetAudioPan() const + { + return _pan; + } + + /// + /// Sets the stereo pan of the played audio (-1 is left speaker, 1 is right speaker, 0 is balanced). The default is 0. Used by non-spatial audio only. + /// + API_PROPERTY() void SetAudioPan(float value); + + /// + /// Gets the minimum distance at which audio attenuation starts. When the listener is closer to the video player than this value, audio is heard at full volume. Once farther away the audio starts attenuating. + /// + API_PROPERTY(Attributes="EditorOrder(120), DefaultValue(1000.0f), Limit(0, float.MaxValue, 0.1f), EditorDisplay(\"Video Player\"), VisibleIf(nameof(IsAudioSpatial))") + FORCE_INLINE float GetAudioMinDistance() const + { + return _minDistance; + } + + /// + /// Sets the minimum distance at which audio attenuation starts. When the listener is closer to the video player than this value, audio is heard at full volume. Once farther away the audio starts attenuating. + /// + API_PROPERTY() void SetAudioMinDistance(float value); + + /// + /// Gets the attenuation that controls how quickly does audio volume drop off as the listener moves further from the video player. + /// + API_PROPERTY(Attributes="EditorOrder(130), DefaultValue(1.0f), Limit(0, float.MaxValue, 0.1f), EditorDisplay(\"Video Player\"), VisibleIf(nameof(IsAudioSpatial))") + FORCE_INLINE float GetAudioAttenuation() const + { + return _attenuation; + } + + /// + /// Sets the attenuation that controls how quickly does audio volume drop off as the listener moves further from the video player. At 0, no distance attenuation ever occurs. + /// + API_PROPERTY() void SetAudioAttenuation(float value); + public: /// /// Starts playing the currently assigned video Url. From df086f3b3b6c32a7d61f5ced88f2dd74aa53177e Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 13 May 2024 15:03:44 +0200 Subject: [PATCH 068/292] Add more backends --- Source/Engine/Video/Video.Build.cs | 12 ++++++++++++ Source/Engine/Video/Video.cpp | 18 ++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/Source/Engine/Video/Video.Build.cs b/Source/Engine/Video/Video.Build.cs index f2368329c..a30105f8c 100644 --- a/Source/Engine/Video/Video.Build.cs +++ b/Source/Engine/Video/Video.Build.cs @@ -34,6 +34,18 @@ public class Video : EngineModule options.OutputFiles.Add("mfreadwrite.lib"); options.OutputFiles.Add("mfuuid.lib"); break; + case TargetPlatform.PS4: + options.SourcePaths.Add(Path.Combine(Globals.EngineRoot, "Source", "Platforms", "PS4", "Engine", "Video")); + options.CompileEnv.PreprocessorDefinitions.Add("VIDEO_API_PS4"); + break; + case TargetPlatform.PS5: + options.SourcePaths.Add(Path.Combine(Globals.EngineRoot, "Source", "Platforms", "PS5", "Engine", "Video")); + options.CompileEnv.PreprocessorDefinitions.Add("VIDEO_API_PS5"); + break; + case TargetPlatform.Switch: + options.SourcePaths.Add(Path.Combine(Globals.EngineRoot, "Source", "Platforms", "Switch", "Engine", "Video")); + options.CompileEnv.PreprocessorDefinitions.Add("VIDEO_API_SWITCH"); + break; } } diff --git a/Source/Engine/Video/Video.cpp b/Source/Engine/Video/Video.cpp index 7af4caedf..6b98941ff 100644 --- a/Source/Engine/Video/Video.cpp +++ b/Source/Engine/Video/Video.cpp @@ -22,6 +22,15 @@ #if VIDEO_API_MF #include "MF/VideoBackendMF.h" #endif +#if VIDEO_API_PS4 +#include "Platforms/PS4/Engine/Video/VideoBackendPS4.h" +#endif +#if VIDEO_API_PS5 +#include "Platforms/PS5/Engine/Video/VideoBackendPS5.h" +#endif +#if VIDEO_API_SWITCH +#include "Platforms/Switch/Engine/Video/VideoBackendSwitch.h" +#endif /// /// Video frame upload task to the GPU. @@ -200,6 +209,15 @@ bool Video::CreatePlayerBackend(const VideoBackendPlayerInfo& info, VideoBackend #if VIDEO_API_MF TRY_USE_BACKEND(VideoBackendMF); #endif +#if VIDEO_API_PS4 + TRY_USE_BACKEND(VideoBackendPS4); +#endif +#if VIDEO_API_PS5 + TRY_USE_BACKEND(VideoBackendPS5); +#endif +#if VIDEO_API_SWITCH + TRY_USE_BACKEND(VideoBackendSwitch); +#endif #undef TRY_USE_BACKEND LOG(Error, "Failed to setup Video playback backend for '{}'", info.Url); From 3593f835cdf7438e14665791843bb1f850133637 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 13 May 2024 15:03:55 +0200 Subject: [PATCH 069/292] Remove unused property of video player --- Source/Engine/Video/MF/VideoBackendMF.cpp | 1 - Source/Engine/Video/Types.h | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/Source/Engine/Video/MF/VideoBackendMF.cpp b/Source/Engine/Video/MF/VideoBackendMF.cpp index 1aa623502..0fb52b765 100644 --- a/Source/Engine/Video/MF/VideoBackendMF.cpp +++ b/Source/Engine/Video/MF/VideoBackendMF.cpp @@ -82,7 +82,6 @@ namespace MF player.Width = videoArea.Area.cx; player.Height = videoArea.Area.cy; } - player.AvgVideoBitRate = MFGetAttributeUINT32(mediaType, MF_MT_AVG_BITRATE, 0); uint64_t fpsValue; hr = mediaType->GetUINT64(MF_MT_FRAME_RATE, &fpsValue); if (SUCCEEDED(hr)) diff --git a/Source/Engine/Video/Types.h b/Source/Engine/Video/Types.h index 5113b00d8..1f5c36cce 100644 --- a/Source/Engine/Video/Types.h +++ b/Source/Engine/Video/Types.h @@ -32,7 +32,7 @@ struct VideoBackendPlayer Char* DebugUrl; int32 DebugUrlLen; #endif - int32 Width, Height, AvgVideoBitRate, FramesCount; + int32 Width, Height, FramesCount; int32 VideoFrameWidth, VideoFrameHeight; PixelFormat Format; float FrameRate; From a742ce1d321de204f0f70c61f83bc08af56cf4f1 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 13 May 2024 22:40:27 +0200 Subject: [PATCH 070/292] Optimize `FileReadStream` seeking if new position is within the cached buffer --- .../Engine/Serialization/FileReadStream.cpp | 41 ++++++++++++++++++- Source/Engine/Serialization/FileReadStream.h | 1 + 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/Source/Engine/Serialization/FileReadStream.cpp b/Source/Engine/Serialization/FileReadStream.cpp index c542c9490..4aaee55cc 100644 --- a/Source/Engine/Serialization/FileReadStream.cpp +++ b/Source/Engine/Serialization/FileReadStream.cpp @@ -4,6 +4,8 @@ #include "Engine/Core/Log.h" #include "Engine/Platform/File.h" +#define USE_FILE_POS (1) + FileReadStream* FileReadStream::Open(const StringView& path) { const auto file = File::Open(path, FileMode::OpenExisting, FileAccess::Read, FileShare::Read); @@ -24,8 +26,12 @@ FileReadStream::FileReadStream(File* file) : _file(file) , _virtualPosInBuffer(0) , _bufferSize(0) +#if USE_FILE_POS + , _filePosition(file->GetPosition()) +#else + , _filePosition(0) +#endif { - ASSERT_LOW_LAYER(_file); } FileReadStream::~FileReadStream() @@ -62,16 +68,40 @@ uint32 FileReadStream::GetLength() uint32 FileReadStream::GetPosition() { +#if USE_FILE_POS + return _filePosition - _bufferSize + _virtualPosInBuffer; +#else return _file->GetPosition() - _bufferSize + _virtualPosInBuffer; +#endif } void FileReadStream::SetPosition(uint32 seek) { +#if USE_FILE_POS + // Skip if position won't change + if (GetPosition() == seek) + { + return; + } + + // Try to seek with virtual position + uint32 bufferStartPos = _filePosition - _bufferSize; + if (seek >= GetPosition() && seek < _filePosition) + { + _virtualPosInBuffer = seek - bufferStartPos; + return; + } +#endif + // Seek _file->SetPosition(seek); + _filePosition = _file->GetPosition(); // Update buffer _hasError |= _file->Read(_buffer, FILESTREAM_BUFFER_SIZE, &_bufferSize) != 0; +#if USE_FILE_POS + _filePosition += _bufferSize; +#endif _virtualPosInBuffer = 0; } @@ -88,6 +118,9 @@ void FileReadStream::ReadBytes(void* data, uint32 bytes) { CHECK(_virtualPosInBuffer == 0); _hasError |= _file->Read(_buffer, FILESTREAM_BUFFER_SIZE, &_bufferSize) != 0; +#if USE_FILE_POS + _filePosition += _bufferSize; +#endif } // Check if buffer has enough data for this read @@ -107,6 +140,9 @@ void FileReadStream::ReadBytes(void* data, uint32 bytes) bytes -= bufferBytesLeft; _virtualPosInBuffer = 0; _hasError |= _file->Read(_buffer, FILESTREAM_BUFFER_SIZE, &_bufferSize) != 0; +#if USE_FILE_POS + _filePosition += _bufferSize; +#endif } // Read as much as can using whole buffer @@ -116,6 +152,9 @@ void FileReadStream::ReadBytes(void* data, uint32 bytes) data = (byte*)data + FILESTREAM_BUFFER_SIZE; bytes -= FILESTREAM_BUFFER_SIZE; _hasError |= _file->Read(_buffer, FILESTREAM_BUFFER_SIZE, &_bufferSize) != 0; +#if USE_FILE_POS + _filePosition += _bufferSize; +#endif } // Read the rest of the buffer but without flushing its data diff --git a/Source/Engine/Serialization/FileReadStream.h b/Source/Engine/Serialization/FileReadStream.h index c5a9c22c7..ff88efcc1 100644 --- a/Source/Engine/Serialization/FileReadStream.h +++ b/Source/Engine/Serialization/FileReadStream.h @@ -15,6 +15,7 @@ private: File* _file; uint32 _virtualPosInBuffer; // Current position in the buffer (index) uint32 _bufferSize; // Amount of loaded bytes from the file to the buffer + uint32 _filePosition; // Cached position in the file (native) byte _buffer[FILESTREAM_BUFFER_SIZE]; public: From 3ae30a59b32d368dd47094550c15b0cb71f6c94e Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 14 May 2024 13:13:37 +0200 Subject: [PATCH 071/292] Fix engine with c# scripting disabled --- Source/Engine/Scripting/Runtime/None.cpp | 35 +++++++++++++++++++++ Source/Engine/Scripting/ScriptingObject.cpp | 10 ++++++ Source/Engine/Scripting/ScriptingObject.h | 10 ++++++ 3 files changed, 55 insertions(+) diff --git a/Source/Engine/Scripting/Runtime/None.cpp b/Source/Engine/Scripting/Runtime/None.cpp index bb83fd27d..f4cdd92e2 100644 --- a/Source/Engine/Scripting/Runtime/None.cpp +++ b/Source/Engine/Scripting/Runtime/None.cpp @@ -253,6 +253,41 @@ MObject* MCore::Exception::GetNotSupported(const char* msg) return nullptr; } +::String MCore::Type::ToString(MType* type) +{ + return ::String::Empty; +} + +MClass* MCore::Type::GetClass(MType* type) +{ + return nullptr; +} + +MType* MCore::Type::GetElementType(MType* type) +{ + return nullptr; +} + +int32 MCore::Type::GetSize(MType* type) +{ + return 0; +} + +MTypes MCore::Type::GetType(MType* type) +{ + return MTypes::End; +} + +bool MCore::Type::IsPointer(MType* type) +{ + return false; +} + +bool MCore::Type::IsReference(MType* type) +{ + return false; +} + const MAssembly::ClassesDictionary& MAssembly::GetClasses() const { _hasCachedClasses = true; diff --git a/Source/Engine/Scripting/ScriptingObject.cpp b/Source/Engine/Scripting/ScriptingObject.cpp index 0eafa2451..0efb817d2 100644 --- a/Source/Engine/Scripting/ScriptingObject.cpp +++ b/Source/Engine/Scripting/ScriptingObject.cpp @@ -260,7 +260,11 @@ ScriptingObject* ScriptingObject::ToNative(MObject* obj) bool ScriptingObject::Is(const ScriptingTypeHandle& type) const { CHECK_RETURN(type, false); +#if SCRIPTING_OBJECT_CAST_WITH_CSHARP return _type == type || CanCast(GetClass(), type.GetType().ManagedClass); +#else + return CanCast(GetTypeHandle(), type); +#endif } void ScriptingObject::ChangeID(const Guid& newId) @@ -421,10 +425,16 @@ void ScriptingObject::UnregisterObject() bool ScriptingObject::CanCast(const ScriptingTypeHandle& from, const ScriptingTypeHandle& to) { + if (from == to) + return true; if (!from && !to) return true; CHECK_RETURN(from && to, false); +#if SCRIPTING_OBJECT_CAST_WITH_CSHARP return CanCast(from.GetType().ManagedClass, to.GetType().ManagedClass); +#else + return to.IsAssignableFrom(from); +#endif } bool ScriptingObject::CanCast(const MClass* from, const MClass* to) diff --git a/Source/Engine/Scripting/ScriptingObject.h b/Source/Engine/Scripting/ScriptingObject.h index 9de79109f..d893d2700 100644 --- a/Source/Engine/Scripting/ScriptingObject.h +++ b/Source/Engine/Scripting/ScriptingObject.h @@ -7,6 +7,8 @@ #include "Engine/Core/Delegate.h" #include "ManagedCLR/MTypes.h" +#define SCRIPTING_OBJECT_CAST_WITH_CSHARP (USE_CSHARP) + /// /// Represents object from unmanaged memory that can use accessed via scripting. /// @@ -156,7 +158,11 @@ public: template static T* Cast(ScriptingObject* obj) { +#if SCRIPTING_OBJECT_CAST_WITH_CSHARP return obj && CanCast(obj->GetClass(), T::GetStaticClass()) ? static_cast(obj) : nullptr; +#else + return obj && CanCast(obj->GetTypeHandle(), T::TypeInitializer) ? static_cast(obj) : nullptr; +#endif } bool Is(const ScriptingTypeHandle& type) const; @@ -169,7 +175,11 @@ public: template bool Is() const { +#if SCRIPTING_OBJECT_CAST_WITH_CSHARP return CanCast(GetClass(), T::GetStaticClass()); +#else + return CanCast(GetTypeHandle(), T::TypeInitializer); +#endif } public: From 9d2dc919200aee32e531df7ce8041fbf74c9c378 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 15 May 2024 11:14:16 +0200 Subject: [PATCH 072/292] Add `PixelFormat::NV12` --- Source/Engine/Graphics/PixelFormat.h | 5 +++++ Source/Engine/Graphics/PixelFormatExtensions.cpp | 3 +++ Source/Engine/Graphics/RenderTools.cpp | 4 ++++ Source/Engine/GraphicsDevice/DirectX/RenderToolsDX.cpp | 3 ++- Source/Engine/GraphicsDevice/Vulkan/RenderToolsVulkan.cpp | 3 ++- Source/Engine/Video/MF/VideoBackendMF.cpp | 2 ++ 6 files changed, 18 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Graphics/PixelFormat.h b/Source/Engine/Graphics/PixelFormat.h index 9335b10c6..0326eb36f 100644 --- a/Source/Engine/Graphics/PixelFormat.h +++ b/Source/Engine/Graphics/PixelFormat.h @@ -558,6 +558,11 @@ API_ENUM() enum class PixelFormat : uint32 /// YUY2 = 108, + /// + /// Packed YUV 4:2:0 video texture format. Texture uses separate views to access the luma (Y) plane separately from the chroma (UV) planes. For luminance data view, the mapping to the view channel is Y->R8. For chrominance data view, the mapping to the view channel is U->R8 and V->G8. + /// + NV12 = 109, + /// /// The maximum format value (for internal use only). /// diff --git a/Source/Engine/Graphics/PixelFormatExtensions.cpp b/Source/Engine/Graphics/PixelFormatExtensions.cpp index 2b1f68457..2a341eec4 100644 --- a/Source/Engine/Graphics/PixelFormatExtensions.cpp +++ b/Source/Engine/Graphics/PixelFormatExtensions.cpp @@ -150,6 +150,8 @@ void PixelFormatExtensions::Init() PixelFormat::BC4_UNorm, }; InitFormat(formats8, 4); + + sizeOfInBits[(int32)PixelFormat::NV12] = 12; } int32 PixelFormatExtensions::SizeInBits(PixelFormat format) @@ -379,6 +381,7 @@ bool PixelFormatExtensions::IsVideo(const PixelFormat format) switch (format) { case PixelFormat::YUY2: + case PixelFormat::NV12: return true; default: return false; diff --git a/Source/Engine/Graphics/RenderTools.cpp b/Source/Engine/Graphics/RenderTools.cpp index e27cacf9c..3e47d974c 100644 --- a/Source/Engine/Graphics/RenderTools.cpp +++ b/Source/Engine/Graphics/RenderTools.cpp @@ -349,6 +349,10 @@ void RenderTools::ComputePitch(PixelFormat format, int32 width, int32 height, ui rowPitch = ((width + 1) >> 1) * 4; slicePitch = rowPitch * height; break; + case PixelFormat::NV12: + rowPitch = width; + slicePitch = width * height * 3 / 2; + break; default: // Default byte alignment rowPitch = (width * PixelFormatExtensions::SizeInBits(format) + 7) / 8; diff --git a/Source/Engine/GraphicsDevice/DirectX/RenderToolsDX.cpp b/Source/Engine/GraphicsDevice/DirectX/RenderToolsDX.cpp index 43e751395..b82126702 100644 --- a/Source/Engine/GraphicsDevice/DirectX/RenderToolsDX.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/RenderToolsDX.cpp @@ -9,7 +9,7 @@ // @formatter:off -DXGI_FORMAT PixelFormatToDXGIFormat[109] = +DXGI_FORMAT PixelFormatToDXGIFormat[110] = { DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R32G32B32A32_TYPELESS, @@ -120,6 +120,7 @@ DXGI_FORMAT PixelFormatToDXGIFormat[109] = DXGI_FORMAT_UNKNOWN, // ASTC_10x10_UNorm DXGI_FORMAT_UNKNOWN, // ASTC_10x10_UNorm_sRGB DXGI_FORMAT_YUY2, + DXGI_FORMAT_NV12, }; // @formatter:on diff --git a/Source/Engine/GraphicsDevice/Vulkan/RenderToolsVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/RenderToolsVulkan.cpp index d1fe90ee2..5c0a66d75 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/RenderToolsVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/RenderToolsVulkan.cpp @@ -8,7 +8,7 @@ // @formatter:off -VkFormat RenderToolsVulkan::PixelFormatToVkFormat[109] = +VkFormat RenderToolsVulkan::PixelFormatToVkFormat[110] = { VK_FORMAT_UNDEFINED, VK_FORMAT_R32G32B32A32_SFLOAT, @@ -119,6 +119,7 @@ VkFormat RenderToolsVulkan::PixelFormatToVkFormat[109] = VK_FORMAT_ASTC_10x10_UNORM_BLOCK, VK_FORMAT_ASTC_10x10_SRGB_BLOCK, VK_FORMAT_G8B8G8R8_422_UNORM, // YUY2 + VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, // NV12 }; VkBlendFactor RenderToolsVulkan::BlendToVkBlendFactor[20] = diff --git a/Source/Engine/Video/MF/VideoBackendMF.cpp b/Source/Engine/Video/MF/VideoBackendMF.cpp index 0fb52b765..b1f57bd6a 100644 --- a/Source/Engine/Video/MF/VideoBackendMF.cpp +++ b/Source/Engine/Video/MF/VideoBackendMF.cpp @@ -96,6 +96,8 @@ namespace MF player.Format = PixelFormat::B5G6R5_UNorm; else if (subtype == MFVideoFormat_RGB555) player.Format = PixelFormat::B5G5R5A1_UNorm; + else if (subtype == MFVideoFormat_NV12) + player.Format = PixelFormat::NV12; else if (subtype == MFVideoFormat_YUY2) player.Format = PixelFormat::YUY2; #if (WDK_NTDDI_VERSION >= NTDDI_WIN10) From 82bf4238df29cfe1a2c5002b8ab924523cd4b6b2 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 15 May 2024 11:15:19 +0200 Subject: [PATCH 073/292] Add support for decoding `NV12` into RGB image --- Content/Shaders/Quad.flax | 4 +- Source/Engine/Graphics/GPUDevice.cpp | 13 ++++++ Source/Engine/Graphics/GPUDevice.h | 5 +++ Source/Engine/Video/MF/VideoBackendMF.cpp | 2 +- Source/Engine/Video/Video.cpp | 15 ++++++- Source/Shaders/Quad.shader | 51 ++++++++++++++++++----- 6 files changed, 75 insertions(+), 15 deletions(-) diff --git a/Content/Shaders/Quad.flax b/Content/Shaders/Quad.flax index 65ce310fe..6bdf6ce8e 100644 --- a/Content/Shaders/Quad.flax +++ b/Content/Shaders/Quad.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:94f68ef9c2d7bc7d453fdd432f85f4643ae3be1bcf839bc1d5d81803d6b2ce7c -size 3505 +oid sha256:60680a4ce8deee4de81d8aafed454fb5aace3a6d18a11eda3b96e81ccaf801a9 +size 4443 diff --git a/Source/Engine/Graphics/GPUDevice.cpp b/Source/Engine/Graphics/GPUDevice.cpp index c2aa0c82d..bdacb9766 100644 --- a/Source/Engine/Graphics/GPUDevice.cpp +++ b/Source/Engine/Graphics/GPUDevice.cpp @@ -297,6 +297,7 @@ struct GPUDevice::PrivateData GPUPipelineState* PS_CopyLinear = nullptr; GPUPipelineState* PS_Clear = nullptr; GPUPipelineState* PS_DecodeYUY2 = nullptr; + GPUPipelineState* PS_DecodeNV12 = nullptr; GPUBuffer* FullscreenTriangleVB = nullptr; AssetReference DefaultMaterial; SoftAssetReference DefaultDeformableMaterial; @@ -715,6 +716,18 @@ GPUPipelineState* GPUDevice::GetDecodeYUY2PS() const return _res->PS_DecodeYUY2; } +GPUPipelineState* GPUDevice::GetDecodeNV12PS() const +{ + if (_res->PS_DecodeNV12 == nullptr) + { + auto psDesc = GPUPipelineState::Description::DefaultFullscreenTriangle; + psDesc.PS = QuadShader->GetPS("PS_DecodeNV12"); + _res->PS_DecodeNV12 = const_cast(this)->CreatePipelineState(); + _res->PS_DecodeNV12->Init(psDesc); + } + return _res->PS_DecodeNV12; +} + GPUBuffer* GPUDevice::GetFullscreenTriangleVB() const { return _res->FullscreenTriangleVB; diff --git a/Source/Engine/Graphics/GPUDevice.h b/Source/Engine/Graphics/GPUDevice.h index f1dfecf60..081f22d03 100644 --- a/Source/Engine/Graphics/GPUDevice.h +++ b/Source/Engine/Graphics/GPUDevice.h @@ -275,6 +275,11 @@ public: /// GPUPipelineState* GetDecodeYUY2PS() const; + /// + /// Gets the shader pipeline state object for NV12 frame decoding to RGBA. + /// + GPUPipelineState* GetDecodeNV12PS() const; + /// /// Gets the fullscreen-triangle vertex buffer. /// diff --git a/Source/Engine/Video/MF/VideoBackendMF.cpp b/Source/Engine/Video/MF/VideoBackendMF.cpp index b1f57bd6a..ef020b858 100644 --- a/Source/Engine/Video/MF/VideoBackendMF.cpp +++ b/Source/Engine/Video/MF/VideoBackendMF.cpp @@ -253,7 +253,7 @@ namespace MF IMF2DBuffer* buffer2D = nullptr; BYTE* bufferData = nullptr; LONG bufferStride = 0; - if (isVideo && sample->GetBufferByIndex(0, &buffer) == S_OK && buffer->QueryInterface(IID_PPV_ARGS(&buffer2D)) == S_OK) + if (isVideo && player.Format != PixelFormat::NV12 && sample->GetBufferByIndex(0, &buffer) == S_OK && buffer->QueryInterface(IID_PPV_ARGS(&buffer2D)) == S_OK) { LONG bufferPitch = 0; hr = buffer2D->Lock2D(&bufferData, &bufferPitch); diff --git a/Source/Engine/Video/Video.cpp b/Source/Engine/Video/Video.cpp index 6b98941ff..20bf55e0d 100644 --- a/Source/Engine/Video/Video.cpp +++ b/Source/Engine/Video/Video.cpp @@ -91,8 +91,19 @@ protected: context->GPU->SetViewportAndScissors((float)_player->Width, (float)_player->Height); context->GPU->SetRenderTarget(frame->View()); context->GPU->BindSR(0, _player->FrameUpload->View()); - ASSERT_LOW_LAYER(_player->Format == PixelFormat::YUY2); - context->GPU->SetState(GPUDevice::Instance->GetDecodeYUY2PS()); + GPUPipelineState* pso; + switch (_player->Format) + { + case PixelFormat::YUY2: + pso = GPUDevice::Instance->GetDecodeYUY2PS(); + break; + case PixelFormat::NV12: + pso = GPUDevice::Instance->GetDecodeNV12PS(); + break; + default: + return Result::Failed; + } + context->GPU->SetState(pso); context->GPU->DrawFullscreenTriangle(); } else diff --git a/Source/Shaders/Quad.shader b/Source/Shaders/Quad.shader index 117a21d1a..0a6c2ba80 100644 --- a/Source/Shaders/Quad.shader +++ b/Source/Shaders/Quad.shader @@ -83,12 +83,21 @@ float PS_DepthCopy(Quad_VS2PS input) : SV_Depth #endif +float4 yuv2rgb(int y, int u, int v) +{ + u -= 128; + v -= 128; + float r = y + 1.402 * v; + float g = y - 0.34414 * u - 0.71414 * v; + float b = y + 1.772 * u; + return float4(r, g, b, 256.0f) / 256.0f; +} + #ifdef _PS_DecodeYUY2 // Raw memory with texture of format YUY2 and size passed in Color.xy Buffer SourceYUY2 : register(t0); -// Pixel Shader for copying depth buffer META_PS(true, FEATURE_LEVEL_ES2) float4 PS_DecodeYUY2(Quad_VS2PS input) : SV_Target { @@ -97,17 +106,39 @@ float4 PS_DecodeYUY2(Quad_VS2PS input) : SV_Target uint data = SourceYUY2[p / 2]; // Unpack YUY components - uint v = (data & 0xff000000) >> 24; - uint y1 = (data & 0xff0000) >> 16; - uint u = (data & 0xff00) >> 8; - uint y0 = data & 0x000000FF; - uint y = p % 2 == 0 ? y0: y1; + int v = ((data & 0xff000000) >> 24); + int y1 = (data & 0xff0000) >> 16; + int u = ((data & 0xff00) >> 8); + int y0 = data & 0xff; + int y = p % 2 == 0 ? y0: y1; // Convert yuv to rgb - float r = (y + 1.402 * (v - 128.0)); - float g = (y - 0.344 * (u - 128.0) - 0.714 * (v - 128.0)); - float b = (y + 1.772 * (u - 128.0)); - return float4(r, g, b, 256.0f) / 256.0f; + return yuv2rgb(y, u, v); +} + +#endif + +#ifdef _PS_DecodeNV12 + +// Raw memory with texture of format NV12 and size passed in Color.xy +Buffer SourceNV12 : register(t0); + +META_PS(true, FEATURE_LEVEL_ES2) +float4 PS_DecodeNV12(Quad_VS2PS input) : SV_Target +{ + // Read NV12 pixel (Y plane of size w*h, followed by interleaved UV plane is of size w*h/2) + uint size = (uint)(Color.x * Color.y); + uint p = (uint)input.Position.y * (uint)Color.x + (uint)input.Position.x; + uint y = (SourceNV12[p / 4] >> ((p % 4) * 8)) & 0xff; + p = (uint)(input.Position.y * 0.5f) * (uint)Color.x + (uint)input.Position.x; + p = (p / 2) * 2; + uint u = (SourceNV12[size / 4 + p / 4] >> ((p % 4) * 8)) & 0xff; + p = (uint)(input.Position.y * 0.5f) * (uint)Color.x + (uint)input.Position.x; + p = (p / 2) * 2 + 1; + uint v = (SourceNV12[size / 4 + p / 4] >> ((p % 4) * 8)) & 0xff; + + // Convert yuv to rgb + return yuv2rgb(y, u, v); } #endif From 1d6e8c4b7cb56f3274bbbc5b089e66150ff8597a Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 15 May 2024 23:39:10 +0200 Subject: [PATCH 074/292] Add video support on Android --- .../Shaders/Cache/ShaderAssetBase.cpp | 4 +- .../Video/Android/VideoBackendAndroid.cpp | 587 ++++++++++++++++++ .../Video/Android/VideoBackendAndroid.h | 30 + Source/Engine/Video/Video.Build.cs | 4 + Source/Engine/Video/Video.cpp | 6 + .../Platforms/Android/AndroidToolchain.cs | 1 + 6 files changed, 631 insertions(+), 1 deletion(-) create mode 100644 Source/Engine/Video/Android/VideoBackendAndroid.cpp create mode 100644 Source/Engine/Video/Android/VideoBackendAndroid.h diff --git a/Source/Engine/Graphics/Shaders/Cache/ShaderAssetBase.cpp b/Source/Engine/Graphics/Shaders/Cache/ShaderAssetBase.cpp index 033d50466..8a88e2d25 100644 --- a/Source/Engine/Graphics/Shaders/Cache/ShaderAssetBase.cpp +++ b/Source/Engine/Graphics/Shaders/Cache/ShaderAssetBase.cpp @@ -4,10 +4,12 @@ #include "ShaderStorage.h" #include "ShaderCacheManager.h" #include "Engine/Core/Log.h" -#include "Engine/Engine/CommandLine.h" #include "Engine/Graphics/GPUDevice.h" #include "Engine/Graphics/Shaders/GPUShader.h" +#if COMPILE_WITH_SHADER_COMPILER +#include "Engine/Engine/CommandLine.h" #include "Engine/Serialization/MemoryReadStream.h" +#endif #include "Engine/ShadowsOfMordor/AtlasChartsPacker.h" ShaderStorage::CachingMode ShaderStorage::CurrentCachingMode = diff --git a/Source/Engine/Video/Android/VideoBackendAndroid.cpp b/Source/Engine/Video/Android/VideoBackendAndroid.cpp new file mode 100644 index 000000000..46d04f062 --- /dev/null +++ b/Source/Engine/Video/Android/VideoBackendAndroid.cpp @@ -0,0 +1,587 @@ +// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. + +#if VIDEO_API_ANDROID + +#include "VideoBackendAndroid.h" +#include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Serialization/FileReadStream.h" +#include "Engine/Threading/TaskGraph.h" +#include "Engine/Core/Log.h" +#include "Engine/Engine/Time.h" +#include "Engine/Engine/Globals.h" +#include "Engine/Audio/Types.h" +#include "Engine/Utilities/StringConverter.h" +#include +#include +#include + +#define VIDEO_API_ANDROID_DEBUG (0) +#define VIDEO_API_ANDROID_ERROR(api, err) LOG(Warning, "[VideoBackendAndroid] {} failed with error {}", TEXT(#api), (int64)err) + +struct VideoPlayerAndroid +{ + AMediaExtractor* Extractor; + AMediaCodec* VideoCodec; + AMediaCodec* AudioCodec; + AMediaFormat* VideoFormat; + AMediaFormat* AudioFormat; + uint8 Loop : 1; + uint8 Playing : 1; + uint8 InputEnded : 1; + uint8 OutputEnded : 1; + uint16 VideoStride; + uint16 VideoTrackIndex; + uint16 AudioTrackIndex; +}; + +namespace Android +{ + Array Players; + + // Reference: http://developer.android.com/reference/android/media/MediaCodecInfo.CodecCapabilities.html + enum ColourFormat + { + COLOR_Format12bitRGB444 = 3, + COLOR_Format16bitARGB1555 = 5, + COLOR_Format16bitARGB4444 = 4, + COLOR_Format16bitBGR565 = 7, + COLOR_Format16bitRGB565 = 6, + COLOR_Format18BitBGR666 = 41, + COLOR_Format18bitARGB1665 = 9, + COLOR_Format18bitRGB666 = 8, + COLOR_Format19bitARGB1666 = 10, + COLOR_Format24BitABGR6666 = 43, + COLOR_Format24BitARGB6666 = 42, + COLOR_Format24bitARGB1887 = 13, + COLOR_Format24bitBGR888 = 12, + COLOR_Format24bitRGB888 = 11, + COLOR_Format32bitABGR8888 = 0x7f00a000, + COLOR_Format32bitARGB8888 = 16, + COLOR_Format32bitBGRA8888 = 15, + COLOR_Format8bitRGB332 = 2, + COLOR_FormatCbYCrY = 27, + COLOR_FormatCrYCbY = 28, + COLOR_FormatL16 = 36, + COLOR_FormatL2 = 33, + COLOR_FormatL32 = 38, + COLOR_FormatL4 = 34, + COLOR_FormatL8 = 35, + COLOR_FormatMonochrome = 1, + COLOR_FormatRGBAFlexible = 0x7f36a888, + COLOR_FormatRGBFlexible = 0x7f36b888, + COLOR_FormatRawBayer10bit = 31, + COLOR_FormatRawBayer8bit = 30, + COLOR_FormatRawBayer8bitcompressed = 32, + COLOR_FormatSurface = 0x7f000789, + COLOR_FormatYCbYCr = 25, + COLOR_FormatYCrYCb = 26, + COLOR_FormatYUV411PackedPlanar = 18, + COLOR_FormatYUV411Planar = 17, + COLOR_FormatYUV420Flexible = 0x7f420888, + COLOR_FormatYUV420PackedPlanar = 20, + COLOR_FormatYUV420PackedSemiPlanar = 39, + COLOR_FormatYUV420Planar = 19, + COLOR_FormatYUV420SemiPlanar = 21, + COLOR_FormatYUV422Flexible = 0x7f422888, + COLOR_FormatYUV422PackedPlanar = 23, + COLOR_FormatYUV422PackedSemiPlanar = 40, + COLOR_FormatYUV422Planar = 22, + COLOR_FormatYUV422SemiPlanar = 24, + COLOR_FormatYUV444Flexible = 0x7f444888, + COLOR_FormatYUV444Interleaved = 29, + COLOR_QCOM_FormatYUV420SemiPlanar = 0x7fa30c00, + COLOR_TI_FormatYUV420PackedSemiPlanar = 0x7f000100, + }; + + ssize_t AMediaDataSourceReadAt(void* userdata, off64_t offset, void* buffer, size_t size) + { + if (size == 0) + return 0; + auto* stream = (FileReadStream*)userdata; + stream->SetPosition((uint32)offset); + stream->ReadBytes(buffer, size); + return (ssize_t)size; + } + + ssize_t AMediaDataSourceGetSize(void* userdata) + { + auto* stream = (FileReadStream*)userdata; + return (ssize_t)stream->GetLength(); + } + + void AMediaDataSourceClose(void* userdata) + { + auto* stream = (FileReadStream*)userdata; + Delete(stream); + } + + void UpdateFormat(VideoBackendPlayer& player, VideoPlayerAndroid& playerAndroid, AMediaCodec* codec, AMediaFormat* format) + { + const bool isVideo = codec == playerAndroid.VideoCodec; + const bool isAudio = codec == playerAndroid.AudioCodec; + if (isVideo) + { + int32_t frameWidth = 0, frameHeight = 0, frameRate = 0, colorFormat = 0, stride = 0; + float frameRateF = 0.0f; + AMediaFormat_getInt32(format, AMEDIAFORMAT_KEY_WIDTH, &frameWidth); + AMediaFormat_getInt32(format, AMEDIAFORMAT_KEY_HEIGHT, &frameHeight); + if (AMediaFormat_getInt32(format, AMEDIAFORMAT_KEY_FRAME_RATE, &frameRate) && frameRate > 0) + player.FrameRate = (float)frameRate; + else if (AMediaFormat_getFloat(format, AMEDIAFORMAT_KEY_FRAME_RATE, &frameRateF) && frameRateF > 0) + player.FrameRate = frameRateF; + else + player.FrameRate = 60; + AMediaFormat_getInt32(format, AMEDIAFORMAT_KEY_STRIDE, &stride); + playerAndroid.VideoStride = stride; + player.Width = player.VideoFrameWidth = frameWidth; + player.Height = player.VideoFrameHeight = frameHeight; + AMediaFormat_getInt32(format, AMEDIAFORMAT_KEY_COLOR_FORMAT, &colorFormat); + switch (colorFormat) + { + case COLOR_Format32bitABGR8888: + player.Format = PixelFormat::R8G8B8A8_UNorm; + break; + case COLOR_Format32bitBGRA8888: + player.Format = PixelFormat::B8G8R8A8_UNorm; + break; + case COLOR_FormatYUV420SemiPlanar: + player.Format = PixelFormat::NV12; + break; + case COLOR_FormatYUV422SemiPlanar: + player.Format = PixelFormat::YUY2; + break; + default: + player.Format = PixelFormat::Unknown; + LOG(Error, "[VideoBackendAndroid] Unsupported video color format {}", colorFormat); + break; + } +#if VIDEO_API_ANDROID_DEBUG + LOG(Info, "[VideoBackendAndroid] Video track: {}x{}, {}fps", player.Width, player.Height, player.FrameRate); +#endif + } + else if (isAudio) + { + int32_t sampleRate = 0, channelCount = 0, bitsPerSample = 0; + AMediaFormat_getInt32(format, AMEDIAFORMAT_KEY_SAMPLE_RATE, &sampleRate); + AMediaFormat_getInt32(format, AMEDIAFORMAT_KEY_CHANNEL_COUNT, &channelCount); + player.AudioInfo.SampleRate = sampleRate; + player.AudioInfo.NumChannels = channelCount; + if (AMediaFormat_getInt32(format, "bits-per-sample", &bitsPerSample) && bitsPerSample > 0) + player.AudioInfo.BitDepth = bitsPerSample; + else + player.AudioInfo.BitDepth = 16; +#if VIDEO_API_ANDROID_DEBUG + LOG(Info, "[VideoBackendAndroid] Audio track: {} channels, {} bits, {} kHz sample rate", player.AudioInfo.NumChannels, player.AudioInfo.BitDepth, player.AudioInfo.SampleRate / 1000); +#endif + } + } + + void ReadCodecOutput(VideoBackendPlayer& player, VideoPlayerAndroid& playerAndroid, AMediaCodec* codec, AMediaFormat* format) + { + if (!codec) + return; + PROFILE_CPU(); + AMediaCodecBufferInfo bufferInfo; + ssize_t bufferIndex = AMediaCodec_dequeueOutputBuffer(codec, &bufferInfo, 0); + if (bufferIndex >= 0) + { + if (bufferInfo.size > 0) + { + TimeSpan frameTime(bufferInfo.presentationTimeUs * 10), frameDuration = TimeSpan::FromSeconds(1.0f / player.FrameRate); + size_t bufferSize = 0; + uint8_t* buffer = AMediaCodec_getOutputBuffer(codec, bufferIndex, &bufferSize); + ASSERT(buffer && bufferSize); + if (codec == playerAndroid.VideoCodec) + { + // Depending on the format vide frame might have different dimensions (eg. h264 block padding) so deduct this from buffer size (Android doesn't report correct frame cropping) + switch (player.Format) + { + case PixelFormat::YUY2: + case PixelFormat::NV12: + //player.VideoFrameHeight = bufferSize / playerAndroid.VideoStride / 3 * 2; + bufferSize = player.VideoFrameHeight * playerAndroid.VideoStride * 3 / 2; + break; + } + + // TODO: use VideoStride and repack texture if stride is different from RenderTools::ComputePitch (UpdateVideoFrame can handle pitch convert directly into frame buffer) + player.UpdateVideoFrame(Span(buffer, bufferSize), frameTime, frameDuration); + } + else if (codec == playerAndroid.AudioCodec) + { + player.UpdateAudioBuffer(Span(buffer + bufferInfo.offset, bufferInfo.size), frameTime, frameDuration); + } + } + media_status_t status = AMediaCodec_releaseOutputBuffer(codec, bufferIndex, false); + if (status != AMEDIA_OK) + { + VIDEO_API_ANDROID_ERROR(AMediaCodec_releaseOutputBuffer, status); + } + } + else if (bufferIndex == AMEDIACODEC_INFO_TRY_AGAIN_LATER) + { + // Skip + } + else if (bufferIndex == AMEDIACODEC_INFO_OUTPUT_BUFFERS_CHANGED) + { + // Ignore + } + else if (bufferIndex == AMEDIACODEC_INFO_OUTPUT_FORMAT_CHANGED) + { + if (format) + { + AMediaFormat_delete(format); + format = nullptr; + } + format = AMediaCodec_getOutputFormat(codec); + ASSERT_LOW_LAYER(format); + UpdateFormat(player, playerAndroid, codec, format); + } + else + { + VIDEO_API_ANDROID_ERROR(AMediaCodec_dequeueOutputBuffer, bufferIndex); + } + } + + void UpdatePlayer(int32 index) + { + PROFILE_CPU(); + auto& player = *Players[index]; + ZoneText(player.DebugUrl, player.DebugUrlLen); + auto& playerAndroid = player.GetBackendState(); + + // Skip paused player + if (!playerAndroid.Playing || (playerAndroid.InputEnded && playerAndroid.OutputEnded)) + return; + media_status_t status; + ssize_t bufferIndex; + + // Get current sample info + int64_t presentationTimeUs = AMediaExtractor_getSampleTime(playerAndroid.Extractor); + int trackIndex = AMediaExtractor_getSampleTrackIndex(playerAndroid.Extractor); + if (trackIndex < 0) + { +#if VIDEO_API_ANDROID_DEBUG + LOG(Info, "[VideoBackendAndroid] Samples track ended"); +#endif + if (playerAndroid.Loop) + { + // Loop + status = AMediaExtractor_seekTo(playerAndroid.Extractor, 0, AMEDIAEXTRACTOR_SEEK_CLOSEST_SYNC); + if (status != AMEDIA_OK) + { + VIDEO_API_ANDROID_ERROR(AMediaExtractor_seekTo, status); + } + if (playerAndroid.VideoCodec) + AMediaCodec_flush(playerAndroid.VideoCodec); + if (playerAndroid.AudioCodec) + AMediaCodec_flush(playerAndroid.VideoCodec); + } + else + { + // Emd + playerAndroid.InputEnded = playerAndroid.OutputEnded = 1; + } + } + else if (trackIndex == playerAndroid.VideoTrackIndex || trackIndex == playerAndroid.AudioTrackIndex) + { + auto codec = trackIndex == playerAndroid.VideoTrackIndex ? playerAndroid.VideoCodec : playerAndroid.AudioCodec; + + // Process input buffer + bufferIndex = AMediaCodec_dequeueInputBuffer(codec, 2000); + if (bufferIndex >= 0) + { + size_t bufferSize; + uint8_t* buffer = AMediaCodec_getInputBuffer(codec, bufferIndex, &bufferSize); + ssize_t sampleSize = AMediaExtractor_readSampleData(playerAndroid.Extractor, buffer, bufferSize); + uint32_t queueInputFlags = 0; + if (sampleSize < 0) + { + queueInputFlags |= AMEDIACODEC_BUFFER_FLAG_END_OF_STREAM; + sampleSize = 0; + } + status = AMediaCodec_queueInputBuffer(codec, bufferIndex, 0, sampleSize, presentationTimeUs, queueInputFlags); + if (status != AMEDIA_OK) + { + VIDEO_API_ANDROID_ERROR(AMediaCodec_queueInputBuffer, status); + } + AMediaExtractor_advance(playerAndroid.Extractor); + } + else if (bufferIndex == AMEDIACODEC_INFO_TRY_AGAIN_LATER) + { + // Skip + } + else + { + VIDEO_API_ANDROID_ERROR(AMediaCodec_dequeueInputBuffer, bufferIndex); + } + } + + if (!playerAndroid.OutputEnded) + { + // Process output buffers + ReadCodecOutput(player, playerAndroid, playerAndroid.VideoCodec, playerAndroid.VideoFormat); + ReadCodecOutput(player, playerAndroid, playerAndroid.AudioCodec, playerAndroid.AudioFormat); + } + + player.Tick(); + } +} + +bool VideoBackendAndroid::Player_Create(const VideoBackendPlayerInfo& info, VideoBackendPlayer& player) +{ + PROFILE_CPU(); + player = VideoBackendPlayer(); + auto& playerAndroid = player.GetBackendState(); + media_status_t status; + + // Load media + playerAndroid.Extractor = AMediaExtractor_new(); + if (!playerAndroid.Extractor) + { + VIDEO_API_ANDROID_ERROR(AMediaExtractor_new, 0); + return true; + } + FileReadStream* fileStream = nullptr; + if (!info.Url.StartsWith(TEXT("http"), StringSearchCase::IgnoreCase)) + { + if (info.Url.StartsWith(TEXT("Content/"), StringSearchCase::CaseSensitive)) + fileStream = FileReadStream::Open(Globals::ProjectFolder / info.Url); + else + fileStream = FileReadStream::Open(info.Url); + } + if (fileStream) + { + // File (AAsset* or Unix handle) +#if VIDEO_API_ANDROID_DEBUG + LOG(Info, "[VideoBackendAndroid] Loading local file"); +#endif + auto* mediaSource = AMediaDataSource_new(); + AMediaDataSource_setUserdata(mediaSource, fileStream); + AMediaDataSource_setReadAt(mediaSource, Android::AMediaDataSourceReadAt); + AMediaDataSource_setGetSize(mediaSource, Android::AMediaDataSourceGetSize); + AMediaDataSource_setClose(mediaSource, Android::AMediaDataSourceClose); + status = AMediaExtractor_setDataSourceCustom(playerAndroid.Extractor, mediaSource); + } + else + { + // Url +#if VIDEO_API_ANDROID_DEBUG + LOG(Info, "[VideoBackendAndroid] Loading url"); +#endif + const StringAsANSI<> url(info.Url.Get(), info.Url.Length()); + status = AMediaExtractor_setDataSource(playerAndroid.Extractor, url.Get()); + } + if (status != AMEDIA_OK) + { + VIDEO_API_ANDROID_ERROR(AMediaExtractor_setDataSource, status); + AMediaExtractor_delete(playerAndroid.Extractor); + return true; + } + + // Load tracks + playerAndroid.VideoTrackIndex = playerAndroid.AudioTrackIndex = MAX_uint16; + player.FrameRate = 24; + size_t tracks = AMediaExtractor_getTrackCount(playerAndroid.Extractor); + for (size_t trackIndex = 0; trackIndex < tracks; trackIndex++) + { + AMediaFormat* trackFormat = AMediaExtractor_getTrackFormat(playerAndroid.Extractor, trackIndex); +#if VIDEO_API_ANDROID_DEBUG + const char* trackFormatName = AMediaFormat_toString(trackFormat); + LOG(Info, "[VideoBackendAndroid] Track [{}]: {}", trackIndex, String(trackFormatName)); +#endif + const char* mime; + if (AMediaFormat_getString(trackFormat, AMEDIAFORMAT_KEY_MIME, &mime)) + { + if (playerAndroid.VideoCodec == nullptr && !strncmp(mime, "video/", 6)) + { + // Video track + playerAndroid.VideoCodec = AMediaCodec_createDecoderByType(mime); + status = AMediaCodec_configure(playerAndroid.VideoCodec, trackFormat, nullptr, nullptr, 0); + if (status != AMEDIA_OK) + { + VIDEO_API_ANDROID_ERROR(AMediaCodec_configure, status); + AMediaCodec_delete(playerAndroid.VideoCodec); + playerAndroid.VideoCodec = nullptr; + } + else + { + status = AMediaExtractor_selectTrack(playerAndroid.Extractor, trackIndex); + if (status != AMEDIA_OK) + { + VIDEO_API_ANDROID_ERROR(AMediaExtractor_selectTrack, status); + AMediaCodec_delete(playerAndroid.VideoCodec); + playerAndroid.VideoCodec = nullptr; + } + else + { + playerAndroid.VideoTrackIndex = trackIndex; + Android::UpdateFormat(player, playerAndroid, playerAndroid.VideoCodec, trackFormat); + } + } + } + else if (playerAndroid.AudioCodec == nullptr && !strncmp(mime, "audio/", 6)) + { + // Audio track + playerAndroid.AudioCodec = AMediaCodec_createDecoderByType(mime); + status = AMediaCodec_configure(playerAndroid.AudioCodec, trackFormat, nullptr, nullptr, 0); + if (status != AMEDIA_OK) + { + VIDEO_API_ANDROID_ERROR(AMediaCodec_configure, status); + AMediaCodec_delete(playerAndroid.AudioCodec); + playerAndroid.AudioCodec = nullptr; + } + else + { + status = AMediaExtractor_selectTrack(playerAndroid.Extractor, trackIndex); + if (status != AMEDIA_OK) + { + VIDEO_API_ANDROID_ERROR(AMediaExtractor_selectTrack, status); + AMediaCodec_delete(playerAndroid.AudioCodec); + playerAndroid.AudioCodec = nullptr; + } + else + { + playerAndroid.AudioTrackIndex = trackIndex; + Android::UpdateFormat(player, playerAndroid, playerAndroid.AudioCodec, trackFormat); + } + } + } + } + AMediaFormat_delete(trackFormat); + } + + // Setup player data + player.Backend = this; + playerAndroid.Loop = info.Loop; + player.Created(info); + Android::Players.Add(&player); + + return false; +} + +void VideoBackendAndroid::Player_Destroy(VideoBackendPlayer& player) +{ + PROFILE_CPU(); + player.ReleaseResources(); + auto& playerAndroid = player.GetBackendState(); + if (playerAndroid.VideoFormat) + AMediaFormat_delete(playerAndroid.VideoFormat); + if (playerAndroid.VideoCodec) + AMediaCodec_delete(playerAndroid.VideoCodec); + if (playerAndroid.AudioFormat) + AMediaFormat_delete(playerAndroid.AudioFormat); + if (playerAndroid.AudioCodec) + AMediaCodec_delete(playerAndroid.AudioCodec); + AMediaExtractor_delete(playerAndroid.Extractor); + Android::Players.Remove(&player); + player = VideoBackendPlayer(); +} + +void VideoBackendAndroid::Player_UpdateInfo(VideoBackendPlayer& player, const VideoBackendPlayerInfo& info) +{ + PROFILE_CPU(); + auto& playerAndroid = player.GetBackendState(); + playerAndroid.Loop = info.Loop; + player.Updated(info); +} + +void VideoBackendAndroid::Player_Play(VideoBackendPlayer& player) +{ + PROFILE_CPU(); + auto& playerAndroid = player.GetBackendState(); + playerAndroid.Playing = 1; + playerAndroid.InputEnded = playerAndroid.OutputEnded = 0; + if (playerAndroid.VideoCodec) + AMediaCodec_start(playerAndroid.VideoCodec); + if (playerAndroid.AudioCodec) + AMediaCodec_start(playerAndroid.AudioCodec); + player.PlayAudio(); +} + +void VideoBackendAndroid::Player_Pause(VideoBackendPlayer& player) +{ + PROFILE_CPU(); + auto& playerAndroid = player.GetBackendState(); + playerAndroid.Playing = 0; + if (playerAndroid.VideoCodec) + AMediaCodec_stop(playerAndroid.VideoCodec); + if (playerAndroid.AudioCodec) + AMediaCodec_stop(playerAndroid.AudioCodec); + player.PauseAudio(); +} + +void VideoBackendAndroid::Player_Stop(VideoBackendPlayer& player) +{ + PROFILE_CPU(); + auto& playerAndroid = player.GetBackendState(); + player.VideoFrameDuration = player.AudioBufferDuration = TimeSpan::Zero(); + playerAndroid.Playing = 0; + playerAndroid.InputEnded = playerAndroid.OutputEnded = 0; + media_status_t status = AMediaExtractor_seekTo(playerAndroid.Extractor, 0, AMEDIAEXTRACTOR_SEEK_CLOSEST_SYNC); + if (status != AMEDIA_OK) + { + VIDEO_API_ANDROID_ERROR(AMediaExtractor_seekTo, status); + } + if (playerAndroid.VideoCodec) + { + AMediaCodec_stop(playerAndroid.VideoCodec); + AMediaCodec_flush(playerAndroid.VideoCodec); + } + if (playerAndroid.AudioCodec) + { + AMediaCodec_stop(playerAndroid.VideoCodec); + AMediaCodec_flush(playerAndroid.VideoCodec); + } + player.StopAudio(); +} + +void VideoBackendAndroid::Player_Seek(VideoBackendPlayer& player, TimeSpan time) +{ + PROFILE_CPU(); + auto& playerAndroid = player.GetBackendState(); + player.VideoFrameDuration = player.AudioBufferDuration = TimeSpan::Zero(); + media_status_t status = AMediaExtractor_seekTo(playerAndroid.Extractor, time.Ticks / 10, AMEDIAEXTRACTOR_SEEK_PREVIOUS_SYNC); + if (status != AMEDIA_OK) + { + VIDEO_API_ANDROID_ERROR(AMediaExtractor_seekTo, status); + } + if (playerAndroid.VideoCodec) + AMediaCodec_flush(playerAndroid.VideoCodec); + if (playerAndroid.AudioCodec) + AMediaCodec_flush(playerAndroid.AudioCodec); + player.StopAudio(); +} + +TimeSpan VideoBackendAndroid::Player_GetTime(const VideoBackendPlayer& player) +{ + PROFILE_CPU(); + auto& playerAndroid = player.GetBackendState(); + int64 time = AMediaExtractor_getSampleTime(playerAndroid.Extractor); + if (time < 0) + return TimeSpan::Zero(); + return TimeSpan(time * 10); +} + +const Char* VideoBackendAndroid::Base_Name() +{ + return TEXT("Android NDK Media"); +} + +bool VideoBackendAndroid::Base_Init() +{ + return false; +} + +void VideoBackendAndroid::Base_Update(TaskGraph* graph) +{ + // Schedule work to update all videos models in async + Function job; + job.Bind(Android::UpdatePlayer); + graph->DispatchJob(job, Android::Players.Count()); +} + +void VideoBackendAndroid::Base_Dispose() +{ +} + +#endif diff --git a/Source/Engine/Video/Android/VideoBackendAndroid.h b/Source/Engine/Video/Android/VideoBackendAndroid.h new file mode 100644 index 000000000..240ed0a7a --- /dev/null +++ b/Source/Engine/Video/Android/VideoBackendAndroid.h @@ -0,0 +1,30 @@ +// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. + +#pragma once + +#if VIDEO_API_ANDROID + +#include "../VideoBackend.h" + +/// +/// The Android NDK Media Codec video backend. +/// +class VideoBackendAndroid : public VideoBackend +{ +public: + // [VideoBackend] + bool Player_Create(const VideoBackendPlayerInfo& info, VideoBackendPlayer& player) override; + void Player_Destroy(VideoBackendPlayer& player) override; + void Player_UpdateInfo(VideoBackendPlayer& player, const VideoBackendPlayerInfo& info) override; + void Player_Play(VideoBackendPlayer& player) override; + void Player_Pause(VideoBackendPlayer& player) override; + void Player_Stop(VideoBackendPlayer& player) override; + void Player_Seek(VideoBackendPlayer& player, TimeSpan time) override; + TimeSpan Player_GetTime(const VideoBackendPlayer& player) override; + const Char* Base_Name() override; + bool Base_Init() override; + void Base_Update(TaskGraph* graph) override; + void Base_Dispose() override; +}; + +#endif diff --git a/Source/Engine/Video/Video.Build.cs b/Source/Engine/Video/Video.Build.cs index a30105f8c..ee0fef30f 100644 --- a/Source/Engine/Video/Video.Build.cs +++ b/Source/Engine/Video/Video.Build.cs @@ -46,6 +46,10 @@ public class Video : EngineModule options.SourcePaths.Add(Path.Combine(Globals.EngineRoot, "Source", "Platforms", "Switch", "Engine", "Video")); options.CompileEnv.PreprocessorDefinitions.Add("VIDEO_API_SWITCH"); break; + case TargetPlatform.Android: + options.SourcePaths.Add(Path.Combine(FolderPath, "Android")); + options.CompileEnv.PreprocessorDefinitions.Add("VIDEO_API_ANDROID"); + break; } } diff --git a/Source/Engine/Video/Video.cpp b/Source/Engine/Video/Video.cpp index 20bf55e0d..1dbef13a0 100644 --- a/Source/Engine/Video/Video.cpp +++ b/Source/Engine/Video/Video.cpp @@ -22,6 +22,9 @@ #if VIDEO_API_MF #include "MF/VideoBackendMF.h" #endif +#if VIDEO_API_ANDROID +#include "Android/VideoBackendAndroid.h" +#endif #if VIDEO_API_PS4 #include "Platforms/PS4/Engine/Video/VideoBackendPS4.h" #endif @@ -220,6 +223,9 @@ bool Video::CreatePlayerBackend(const VideoBackendPlayerInfo& info, VideoBackend #if VIDEO_API_MF TRY_USE_BACKEND(VideoBackendMF); #endif +#if VIDEO_API_ANDROID + TRY_USE_BACKEND(VideoBackendAndroid); +#endif #if VIDEO_API_PS4 TRY_USE_BACKEND(VideoBackendPS4); #endif diff --git a/Source/Tools/Flax.Build/Platforms/Android/AndroidToolchain.cs b/Source/Tools/Flax.Build/Platforms/Android/AndroidToolchain.cs index 111f22028..1537f014f 100644 --- a/Source/Tools/Flax.Build/Platforms/Android/AndroidToolchain.cs +++ b/Source/Tools/Flax.Build/Platforms/Android/AndroidToolchain.cs @@ -71,6 +71,7 @@ namespace Flax.Build.Platforms options.LinkEnv.InputLibraries.Add("c"); options.LinkEnv.InputLibraries.Add("z"); options.LinkEnv.InputLibraries.Add("log"); + options.LinkEnv.InputLibraries.Add("mediandk"); options.LinkEnv.InputLibraries.Add("android"); } From 2af4e8fe1055f81a1c4658e9ab0df1278248a16d Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 22 May 2024 11:53:46 +0200 Subject: [PATCH 075/292] Add AV video backend for macOS and iOS --- Source/Engine/Video/AV/VideoBackendAV.cpp | 289 ++++++++++++++++++ Source/Engine/Video/AV/VideoBackendAV.h | 30 ++ .../Video/Android/VideoBackendAndroid.cpp | 2 +- Source/Engine/Video/MF/VideoBackendMF.cpp | 2 +- Source/Engine/Video/Video.Build.cs | 6 + Source/Engine/Video/Video.cpp | 20 +- .../Flax.Build/Platforms/Mac/MacToolchain.cs | 3 + .../Flax.Build/Platforms/iOS/iOSToolchain.cs | 3 + 8 files changed, 346 insertions(+), 9 deletions(-) create mode 100644 Source/Engine/Video/AV/VideoBackendAV.cpp create mode 100644 Source/Engine/Video/AV/VideoBackendAV.h diff --git a/Source/Engine/Video/AV/VideoBackendAV.cpp b/Source/Engine/Video/AV/VideoBackendAV.cpp new file mode 100644 index 000000000..f0c2a71b0 --- /dev/null +++ b/Source/Engine/Video/AV/VideoBackendAV.cpp @@ -0,0 +1,289 @@ +// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. + +#if VIDEO_API_AV + +#include "VideoBackendAV.h" +#include "Engine/Platform/Apple/AppleUtils.h" +#include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Threading/TaskGraph.h" +#include "Engine/Core/Log.h" +#include "Engine/Engine/Globals.h" +#include + +#define VIDEO_API_AV_ERROR(api, err) LOG(Warning, "[VideoBackendAV] {} failed with error 0x{:x}", TEXT(#api), (uint64)err) + +struct VideoPlayerAV +{ + AVPlayer* Player; + AVPlayerItemVideoOutput* Output; + int8 PendingPlay : 1; + int8 PendingPause : 1; + int8 PendingSeek : 1; + TimeSpan SeekTime; +}; + +namespace AV +{ + Array Players; + + TimeSpan ConvertTime(const CMTime& t) + { + return TimeSpan::FromSeconds(t.timescale != 0 ? (t.value / (double)t.timescale) : 0.0); + } + + CMTime ConvertTime(const TimeSpan& t) + { + return CMTime{(CMTimeValue)(100000.0 * t.GetTotalSeconds()), (CMTimeScale)100000, kCMTimeFlags_Valid, {}}; + } + + void UpdatePlayer(int32 index) + { + PROFILE_CPU(); + auto& player = *Players[index]; + ZoneText(player.DebugUrl, player.DebugUrlLen); + auto& playerAV = player.GetBackendState(); + + // Update format + AVPlayerItem* playerItem = [playerAV.Player currentItem]; + if (!playerItem) + return; + if (player.Width == 0) + { + CGSize size = [playerItem presentationSize]; + player.Width = player.VideoFrameWidth = size.width; + player.Height = player.VideoFrameHeight = size.height; + NSArray* tracks = [playerItem tracks]; + for (NSUInteger i = 0; i < [tracks count]; i++) + { + AVPlayerItemTrack* track = (AVPlayerItemTrack*)[tracks objectAtIndex:i]; + AVAssetTrack* assetTrack = track.assetTrack; + NSString* mediaType = assetTrack.mediaType; + if ([mediaType isEqualToString:AVMediaTypeVideo] && playerAV.Output == nullptr) + { + player.FrameRate = assetTrack.nominalFrameRate; + if (player.FrameRate <= 0.0f) + { + CMTime frameDuration = assetTrack.minFrameDuration; + if ((frameDuration.flags & kCMTimeFlags_Valid) != 0) + player.FrameRate = (float)frameDuration.timescale / (float)frameDuration.value; + else + player.FrameRate = 25; + } + CGSize frameSize = assetTrack.naturalSize; + player.Width = player.VideoFrameWidth = frameSize.width; + player.Height = player.VideoFrameHeight = frameSize.height; + + CMFormatDescriptionRef desc = (CMFormatDescriptionRef)[assetTrack.formatDescriptions objectAtIndex:0]; + CMVideoCodecType codec = CMFormatDescriptionGetMediaSubType(desc); + int32 pixelFormat = kCVPixelFormatType_32BGRA; // TODO: use packed vieo format + player.Format = PixelFormat::B8G8R8A8_UNorm; + + NSMutableDictionary* attributes = [NSMutableDictionary dictionary]; + [attributes setObject:[NSNumber numberWithInt: pixelFormat] forKey:(NSString*)kCVPixelBufferPixelFormatTypeKey]; + [attributes setObject:[NSNumber numberWithInteger:1] forKey:(NSString*)kCVPixelBufferBytesPerRowAlignmentKey]; + + playerAV.Output = [[AVPlayerItemVideoOutput alloc] initWithPixelBufferAttributes:attributes]; + [playerItem addOutput: playerAV.Output]; + } + else if ([mediaType isEqualToString:AVMediaTypeAudio]) + { + CMFormatDescriptionRef desc = (CMFormatDescriptionRef)[assetTrack.formatDescriptions objectAtIndex:0]; + const AudioStreamBasicDescription* audioDesc = CMAudioFormatDescriptionGetStreamBasicDescription(desc); + player.AudioInfo.SampleRate = audioDesc->mSampleRate; + player.AudioInfo.NumChannels = audioDesc->mChannelsPerFrame; + player.AudioInfo.BitDepth = audioDesc->mBitsPerChannel > 0 ? audioDesc->mBitsPerChannel : 16; + } + } + } + + // Wait for the video to be ready + //AVPlayerStatus status = [playerAV.Player status]; + //AVPlayerTimeControlStatus timeControlStatus = [playerAV.Player timeControlStatus]; + if (playerAV.Output == nullptr) + return; + + // Control playback + if (playerAV.PendingPlay) + { + playerAV.PendingPlay = 0; + [playerAV.Player play]; + } + else if (playerAV.PendingPause) + { + playerAV.PendingPause = 0; + [playerAV.Player pause]; + } + if (playerAV.PendingSeek) + { + playerAV.PendingSeek = 0; + [playerAV.Player seekToTime:AV::ConvertTime(playerAV.SeekTime)]; + //[playerAV.Player seekToTime:time toleranceBefore:time toleranceAfter:time]; + } + + // Check if there is a new video frame to process + CMTime currentTime = [playerAV.Player currentTime]; + if (playerAV.Output && [playerAV.Output hasNewPixelBufferForItemTime: currentTime]) + { + CVPixelBufferRef buffer = [playerAV.Output copyPixelBufferForItemTime:currentTime itemTimeForDisplay:nullptr]; + if (buffer) + { + const int32 bufferWidth = CVPixelBufferGetWidth(buffer); + const int32 bufferHeight = CVPixelBufferGetHeight(buffer); + const int32 bufferStride = CVPixelBufferGetBytesPerRow(buffer); + const int32 bufferSize = bufferStride * bufferHeight; + + // TODO: use Metal Texture Cache for faster GPU-based video processing + + if (CVPixelBufferLockBaseAddress(buffer, kCVPixelBufferLock_ReadOnly) == kCVReturnSuccess) + { + uint8* bufferData = (uint8*)CVPixelBufferGetBaseAddress(buffer); + player.UpdateVideoFrame(Span(bufferData, bufferSize), ConvertTime(currentTime), TimeSpan::FromSeconds(1.0f / player.FrameRate)); + CVPixelBufferUnlockBaseAddress(buffer, kCVPixelBufferLock_ReadOnly); + } + + CVPixelBufferRelease(buffer); + } + } + + player.Tick(); + } +} + +bool VideoBackendAV::Player_Create(const VideoBackendPlayerInfo& info, VideoBackendPlayer& player) +{ + PROFILE_CPU(); + player = VideoBackendPlayer(); + auto& playerAV = player.GetBackendState(); + + // Load media + NSURL* url; + if (info.Url.StartsWith(TEXT("http"), StringSearchCase::IgnoreCase)) + { + url = [NSURL URLWithString:(NSString*)AppleUtils::ToString(info.Url)]; + + } + else + { +#if PLATFORM_MAC + if (info.Url.StartsWith(TEXT("Content/"), StringSearchCase::CaseSensitive)) + url = [NSURL fileURLWithPath:(NSString*)AppleUtils::ToString(Globals::ProjectFolder / info.Url) isDirectory:NO]; + else + url = [NSURL fileURLWithPath:(NSString*)AppleUtils::ToString(info.Url) isDirectory:NO]; +#else + url = [NSURL fileURLWithPath:(NSString*)AppleUtils::ToString(StringUtils::GetFileName(info.Url)) isDirectory:NO]; +#endif + } + playerAV.Player = [AVPlayer playerWithURL:url]; + if (playerAV.Player == nullptr) + { + return true; + } + [playerAV.Player retain]; + + // Configure player + //[playerAV.Player addObserver:playerStatusObserver.get() forKeyPath:"status" options:NSKeyValueObservingOptionNew context:&player]; + playerAV.Player.actionAtItemEnd = info.Loop ? AVPlayerActionAtItemEndNone : AVPlayerActionAtItemEndPause; + [playerAV.Player setVolume: info.Volume]; + + // Setup player data + player.Backend = this; + player.Created(info); + AV::Players.Add(&player); + + return false; +} + +void VideoBackendAV::Player_Destroy(VideoBackendPlayer& player) +{ + PROFILE_CPU(); + player.ReleaseResources(); + auto& playerAV = player.GetBackendState(); + if (playerAV.PendingPause) + [playerAV.Player pause]; + if (playerAV.Output) + [playerAV.Output release]; + [playerAV.Player release]; + AV::Players.Remove(&player); + player = VideoBackendPlayer(); +} + +void VideoBackendAV::Player_UpdateInfo(VideoBackendPlayer& player, const VideoBackendPlayerInfo& info) +{ + PROFILE_CPU(); + auto& playerAV = player.GetBackendState(); + playerAV.Player.actionAtItemEnd = info.Loop ? AVPlayerActionAtItemEndNone : AVPlayerActionAtItemEndPause; + // TODO: spatial audio + // TODO: audio pan + [playerAV.Player setVolume: info.Volume]; + player.Updated(info); +} + +void VideoBackendAV::Player_Play(VideoBackendPlayer& player) +{ + PROFILE_CPU(); + auto& playerAV = player.GetBackendState(); + playerAV.PendingPlay = true; + playerAV.PendingPause = false; + player.PlayAudio(); +} + +void VideoBackendAV::Player_Pause(VideoBackendPlayer& player) +{ + PROFILE_CPU(); + auto& playerAV = player.GetBackendState(); + playerAV.PendingPlay = false; + playerAV.PendingPause = true; + player.PauseAudio(); +} + +void VideoBackendAV::Player_Stop(VideoBackendPlayer& player) +{ + PROFILE_CPU(); + auto& playerAV = player.GetBackendState(); + playerAV.PendingPlay = false; + playerAV.PendingPause = true; + playerAV.PendingSeek = true; + playerAV.SeekTime = TimeSpan::Zero(); + player.StopAudio(); +} + +void VideoBackendAV::Player_Seek(VideoBackendPlayer& player, TimeSpan time) +{ + PROFILE_CPU(); + auto& playerAV = player.GetBackendState(); + playerAV.PendingSeek = true; + playerAV.SeekTime = time; +} + +TimeSpan VideoBackendAV::Player_GetTime(const VideoBackendPlayer& player) +{ + PROFILE_CPU(); + auto& playerAV = player.GetBackendState(); + if (playerAV.PendingSeek) + return playerAV.SeekTime; + return AV::ConvertTime([playerAV.Player currentTime]); +} + +const Char* VideoBackendAV::Base_Name() +{ + return TEXT("AVFoundation"); +} + +bool VideoBackendAV::Base_Init() +{ + return false; +} + +void VideoBackendAV::Base_Update(TaskGraph* graph) +{ + // Schedule work to update all videos in async + Function job; + job.Bind(AV::UpdatePlayer); + graph->DispatchJob(job, AV::Players.Count()); +} + +void VideoBackendAV::Base_Dispose() +{ +} + +#endif diff --git a/Source/Engine/Video/AV/VideoBackendAV.h b/Source/Engine/Video/AV/VideoBackendAV.h new file mode 100644 index 000000000..85f7456e4 --- /dev/null +++ b/Source/Engine/Video/AV/VideoBackendAV.h @@ -0,0 +1,30 @@ +// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. + +#pragma once + +#if VIDEO_API_AV + +#include "../VideoBackend.h" + +/// +/// The AVFoundation video backend. +/// +class VideoBackendAV : public VideoBackend +{ +public: + // [VideoBackend] + bool Player_Create(const VideoBackendPlayerInfo& info, VideoBackendPlayer& player) override; + void Player_Destroy(VideoBackendPlayer& player) override; + void Player_UpdateInfo(VideoBackendPlayer& player, const VideoBackendPlayerInfo& info) override; + void Player_Play(VideoBackendPlayer& player) override; + void Player_Pause(VideoBackendPlayer& player) override; + void Player_Stop(VideoBackendPlayer& player) override; + void Player_Seek(VideoBackendPlayer& player, TimeSpan time) override; + TimeSpan Player_GetTime(const VideoBackendPlayer& player) override; + const Char* Base_Name() override; + bool Base_Init() override; + void Base_Update(TaskGraph* graph) override; + void Base_Dispose() override; +}; + +#endif diff --git a/Source/Engine/Video/Android/VideoBackendAndroid.cpp b/Source/Engine/Video/Android/VideoBackendAndroid.cpp index 46d04f062..bfa1296a3 100644 --- a/Source/Engine/Video/Android/VideoBackendAndroid.cpp +++ b/Source/Engine/Video/Android/VideoBackendAndroid.cpp @@ -574,7 +574,7 @@ bool VideoBackendAndroid::Base_Init() void VideoBackendAndroid::Base_Update(TaskGraph* graph) { - // Schedule work to update all videos models in async + // Schedule work to update all videos in async Function job; job.Bind(Android::UpdatePlayer); graph->DispatchJob(job, Android::Players.Count()); diff --git a/Source/Engine/Video/MF/VideoBackendMF.cpp b/Source/Engine/Video/MF/VideoBackendMF.cpp index ef020b858..29911575b 100644 --- a/Source/Engine/Video/MF/VideoBackendMF.cpp +++ b/Source/Engine/Video/MF/VideoBackendMF.cpp @@ -582,7 +582,7 @@ bool VideoBackendMF::Base_Init() void VideoBackendMF::Base_Update(TaskGraph* graph) { - // Schedule work to update all videos models in async + // Schedule work to update all videos in async Function job; job.Bind(MF::UpdatePlayer); graph->DispatchJob(job, MF::Players.Count()); diff --git a/Source/Engine/Video/Video.Build.cs b/Source/Engine/Video/Video.Build.cs index ee0fef30f..d9e15ca4b 100644 --- a/Source/Engine/Video/Video.Build.cs +++ b/Source/Engine/Video/Video.Build.cs @@ -34,6 +34,12 @@ public class Video : EngineModule options.OutputFiles.Add("mfreadwrite.lib"); options.OutputFiles.Add("mfuuid.lib"); break; + case TargetPlatform.Mac: + case TargetPlatform.iOS: + // AVFoundation + options.SourcePaths.Add(Path.Combine(FolderPath, "AV")); + options.CompileEnv.PreprocessorDefinitions.Add("VIDEO_API_AV"); + break; case TargetPlatform.PS4: options.SourcePaths.Add(Path.Combine(Globals.EngineRoot, "Source", "Platforms", "PS4", "Engine", "Video")); options.CompileEnv.PreprocessorDefinitions.Add("VIDEO_API_PS4"); diff --git a/Source/Engine/Video/Video.cpp b/Source/Engine/Video/Video.cpp index 1dbef13a0..cb2881923 100644 --- a/Source/Engine/Video/Video.cpp +++ b/Source/Engine/Video/Video.cpp @@ -22,6 +22,9 @@ #if VIDEO_API_MF #include "MF/VideoBackendMF.h" #endif +#if VIDEO_API_AV +#include "AV/VideoBackendAV.h" +#endif #if VIDEO_API_ANDROID #include "Android/VideoBackendAndroid.h" #endif @@ -109,13 +112,17 @@ protected: context->GPU->SetState(pso); context->GPU->DrawFullscreenTriangle(); } - else + else if (frame->Format() == _player->Format) { // Raw texture data upload uint32 rowPitch, slicePitch; frame->ComputePitch(0, rowPitch, slicePitch); context->GPU->UpdateTexture(frame, 0, 0, _player->VideoFrameMemory.Get(), rowPitch, slicePitch); } + else + { + LOG(Warning, "Incorrect video player data format {} for player texture format {}", ScriptingEnum::ToString(_player->Format), ScriptingEnum::ToString(_player->Frame->Format())); + } // Frame has been updated _player->FramesCount++; @@ -161,7 +168,6 @@ public: } bool Init() override; - void Update() override; void Dispose() override; }; @@ -187,11 +193,6 @@ bool VideoService::Init() return false; } -void VideoService::Update() -{ - PROFILE_CPU_NAMED("Video.Update"); -} - void VideoService::Dispose() { PROFILE_CPU_NAMED("Video.Dispose"); @@ -223,6 +224,9 @@ bool Video::CreatePlayerBackend(const VideoBackendPlayerInfo& info, VideoBackend #if VIDEO_API_MF TRY_USE_BACKEND(VideoBackendMF); #endif +#if VIDEO_API_AV + TRY_USE_BACKEND(VideoBackendAV); +#endif #if VIDEO_API_ANDROID TRY_USE_BACKEND(VideoBackendAndroid); #endif @@ -335,6 +339,8 @@ void VideoBackendPlayer::UpdateVideoFrame(Span data, TimeSpan time, TimeSp // Update output frame texture InitVideoFrame(); auto desc = GPUTextureDescription::New2D(Width, Height, PixelFormat::R8G8B8A8_UNorm, GPUTextureFlags::ShaderResource | GPUTextureFlags::RenderTarget); + if (!PixelFormatExtensions::IsVideo(Format)) + desc.Format = Format; // Use raw format reported by the backend (eg. BGRA) if (Frame->GetDescription() != desc) { if (Frame->Init(desc)) diff --git a/Source/Tools/Flax.Build/Platforms/Mac/MacToolchain.cs b/Source/Tools/Flax.Build/Platforms/Mac/MacToolchain.cs index 99854a1a1..00d7a4a07 100644 --- a/Source/Tools/Flax.Build/Platforms/Mac/MacToolchain.cs +++ b/Source/Tools/Flax.Build/Platforms/Mac/MacToolchain.cs @@ -45,10 +45,13 @@ namespace Flax.Build.Platforms options.LinkEnv.InputLibraries.Add("bz2"); options.LinkEnv.InputLibraries.Add("CoreFoundation.framework"); options.LinkEnv.InputLibraries.Add("CoreGraphics.framework"); + options.LinkEnv.InputLibraries.Add("CoreMedia.framework"); + options.LinkEnv.InputLibraries.Add("CoreVideo.framework"); options.LinkEnv.InputLibraries.Add("SystemConfiguration.framework"); options.LinkEnv.InputLibraries.Add("IOKit.framework"); options.LinkEnv.InputLibraries.Add("Cocoa.framework"); options.LinkEnv.InputLibraries.Add("QuartzCore.framework"); + options.LinkEnv.InputLibraries.Add("AVFoundation.framework"); } protected override void AddArgsCommon(BuildOptions options, List args) diff --git a/Source/Tools/Flax.Build/Platforms/iOS/iOSToolchain.cs b/Source/Tools/Flax.Build/Platforms/iOS/iOSToolchain.cs index 273382d7c..9addc55a0 100644 --- a/Source/Tools/Flax.Build/Platforms/iOS/iOSToolchain.cs +++ b/Source/Tools/Flax.Build/Platforms/iOS/iOSToolchain.cs @@ -47,10 +47,13 @@ namespace Flax.Build.Platforms options.LinkEnv.InputLibraries.Add("Foundation.framework"); options.LinkEnv.InputLibraries.Add("CoreFoundation.framework"); options.LinkEnv.InputLibraries.Add("CoreGraphics.framework"); + options.LinkEnv.InputLibraries.Add("CoreMedia.framework"); + options.LinkEnv.InputLibraries.Add("CoreVideo.framework"); options.LinkEnv.InputLibraries.Add("SystemConfiguration.framework"); options.LinkEnv.InputLibraries.Add("IOKit.framework"); options.LinkEnv.InputLibraries.Add("UIKit.framework"); options.LinkEnv.InputLibraries.Add("QuartzCore.framework"); + options.LinkEnv.InputLibraries.Add("AVFoundation.framework"); } protected override void AddArgsCommon(BuildOptions options, List args) From 9653fc672585fd9b22d43e5601fe2d6ac8c2b677 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 22 May 2024 13:01:24 +0200 Subject: [PATCH 076/292] Optimize stack memory in volumetric fog lights culling --- Source/Engine/Renderer/VolumetricFogPass.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Source/Engine/Renderer/VolumetricFogPass.cpp b/Source/Engine/Renderer/VolumetricFogPass.cpp index fe3b2c2e8..5a6fe1d02 100644 --- a/Source/Engine/Renderer/VolumetricFogPass.cpp +++ b/Source/Engine/Renderer/VolumetricFogPass.cpp @@ -467,21 +467,21 @@ void VolumetricFogPass::Render(RenderContext& renderContext) GPUTextureView* localShadowedLightScattering = nullptr; { // Get lights to render - Array> pointLights; - Array> spotLights; + Array> pointLights; + Array> spotLights; for (int32 i = 0; i < renderContext.List->PointLights.Count(); i++) { const auto& light = renderContext.List->PointLights.Get()[i]; if (light.VolumetricScatteringIntensity > ZeroTolerance && (view.Position - light.Position).LengthSquared() < Math::Square(options.Distance + light.Radius)) - pointLights.Add(&light); + pointLights.Add(i); } for (int32 i = 0; i < renderContext.List->SpotLights.Count(); i++) { const auto& light = renderContext.List->SpotLights.Get()[i]; if (light.VolumetricScatteringIntensity > ZeroTolerance && (view.Position - light.Position).LengthSquared() < Math::Square(options.Distance + light.Radius)) - spotLights.Add(&light); + spotLights.Add(i); } // Skip if no lights to render @@ -506,9 +506,9 @@ void VolumetricFogPass::Render(RenderContext& renderContext) context->BindSR(0, shadowMap); context->BindSR(1, shadowsBuffer); for (int32 i = 0; i < pointLights.Count(); i++) - RenderRadialLight(renderContext, context, view, options, *pointLights[i], perLight, cb1); + RenderRadialLight(renderContext, context, view, options, renderContext.List->PointLights[pointLights[i]], perLight, cb1); for (int32 i = 0; i < spotLights.Count(); i++) - RenderRadialLight(renderContext, context, view, options, *spotLights[i], perLight, cb1); + RenderRadialLight(renderContext, context, view, options, renderContext.List->SpotLights[spotLights[i]], perLight, cb1); // Cleanup context->UnBindCB(1); From 91aa5a0fb9e3db9f642496c11b08fd786899502d Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 23 May 2024 13:15:29 +0200 Subject: [PATCH 077/292] Fix video player docs --- Source/Engine/Video/VideoPlayer.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/Source/Engine/Video/VideoPlayer.h b/Source/Engine/Video/VideoPlayer.h index 24ee2ebce..87f75327b 100644 --- a/Source/Engine/Video/VideoPlayer.h +++ b/Source/Engine/Video/VideoPlayer.h @@ -108,7 +108,7 @@ public: API_PROPERTY() void SetAudioVolume(float value); /// - /// Gets the stereo pan of the played audio (-1 is left speaker, 1 is right speaker, 0 is balanced). The default is 1. Used by non-spatial audio only. + /// Gets the stereo pan of the played audio (-1 is left speaker, 1 is right speaker, 0 is balanced). The default is 0. Used by non-spatial audio only. /// API_PROPERTY(Attributes="EditorOrder(110), DefaultValue(0.0f), Limit(-1.0f, 1.0f), EditorDisplay(\"Video Player\"), VisibleIf(nameof(IsAudioSpatial), true)") FORCE_INLINE float GetAudioPan() const @@ -122,7 +122,7 @@ public: API_PROPERTY() void SetAudioPan(float value); /// - /// Gets the minimum distance at which audio attenuation starts. When the listener is closer to the video player than this value, audio is heard at full volume. Once farther away the audio starts attenuating. + /// Gets the minimum distance at which audio attenuation starts. When the listener is closer to the video player than this value, audio is heard at full volume. Once farther away the audio starts attenuating. Used by spatial audio only. /// API_PROPERTY(Attributes="EditorOrder(120), DefaultValue(1000.0f), Limit(0, float.MaxValue, 0.1f), EditorDisplay(\"Video Player\"), VisibleIf(nameof(IsAudioSpatial))") FORCE_INLINE float GetAudioMinDistance() const @@ -131,12 +131,12 @@ public: } /// - /// Sets the minimum distance at which audio attenuation starts. When the listener is closer to the video player than this value, audio is heard at full volume. Once farther away the audio starts attenuating. + /// Sets the minimum distance at which audio attenuation starts. When the listener is closer to the video player than this value, audio is heard at full volume. Once farther away the audio starts attenuating. Used by spatial audio only. /// API_PROPERTY() void SetAudioMinDistance(float value); /// - /// Gets the attenuation that controls how quickly does audio volume drop off as the listener moves further from the video player. + /// Gets the attenuation that controls how quickly does audio volume drop off as the listener moves further from the video player. Used by spatial audio only. /// API_PROPERTY(Attributes="EditorOrder(130), DefaultValue(1.0f), Limit(0, float.MaxValue, 0.1f), EditorDisplay(\"Video Player\"), VisibleIf(nameof(IsAudioSpatial))") FORCE_INLINE float GetAudioAttenuation() const @@ -145,7 +145,7 @@ public: } /// - /// Sets the attenuation that controls how quickly does audio volume drop off as the listener moves further from the video player. At 0, no distance attenuation ever occurs. + /// Sets the attenuation that controls how quickly does audio volume drop off as the listener moves further from the video player. At 0, no distance attenuation ever occurs. Used by spatial audio only. /// API_PROPERTY() void SetAudioAttenuation(float value); @@ -174,33 +174,33 @@ public: } /// - /// Gets the current time of playback. If playback has not yet started, it specifies the time at which playback will start at. The time is in seconds, in range [0, Duration]. + /// Gets the current time of playback. The time is in seconds, in range [0, Duration]. /// API_PROPERTY(Attributes="HideInEditor, NoSerialize") float GetTime() const; /// - /// Sets the current time of playback. If playback has not yet started, it specifies the time at which playback will start at. The time is in seconds, in range [0, Duration]. + /// Sets the current time of playback. The time is in seconds, in range [0, Duration]. /// /// The time. API_PROPERTY() void SetTime(float time); /// - /// Gets the media duration of playback (in seconds). + /// Gets the media duration of playback (in seconds). Valid only when media was loaded by the video backend. /// API_PROPERTY() float GetDuration() const; /// - /// Gets the media frame rate of playback (amount of frames to be played per second). + /// Gets the media frame rate of playback (amount of frames to be played per second). Valid only when media was loaded by the video backend. /// API_PROPERTY() float GetFrameRate() const; /// - /// Gets the amount of video frames decoded and send to GPU during playback. Can be used to detect if video has started playback with any visible changes (for video frame texture contents). + /// Gets the amount of video frames decoded and send to GPU during playback. Can be used to detect if video has started playback with any visible changes (for video frame texture contents). Valid only when media was loaded by the video backend. /// API_PROPERTY() int32 GetFramesCount() const; /// - /// Gets the video frame dimensions (in pixels). + /// Gets the video frame dimensions (in pixels). Valid only when media was loaded by the video backend. /// API_PROPERTY() Int2 GetSize() const; From 206ff89587da773bd6642f8db1ead7551353c019 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 23 May 2024 13:58:46 +0200 Subject: [PATCH 078/292] Add constant buffers alignment for data structures (16-byte boundaries) --- Source/Engine/Debug/DebugDraw.cpp | 2 +- Source/Engine/Graphics/Config.h | 3 +++ Source/Engine/Graphics/RenderTools.h | 3 ++- Source/Engine/Level/Actors/ExponentialHeightFog.cpp | 2 +- Source/Engine/Level/Actors/Sky.cpp | 2 +- Source/Engine/Particles/Graph/GPU/GPUParticles.cpp | 2 +- Source/Engine/Particles/Particles.cpp | 2 +- Source/Engine/Render2D/Render2D.cpp | 4 ++-- Source/Engine/Renderer/AmbientOcclusionPass.h | 2 +- Source/Engine/Renderer/AntiAliasing/FXAA.cpp | 2 +- Source/Engine/Renderer/AntiAliasing/SMAA.h | 2 +- Source/Engine/Renderer/AntiAliasing/TAA.cpp | 2 +- Source/Engine/Renderer/AtmospherePreCompute.cpp | 2 +- Source/Engine/Renderer/ColorGradingPass.cpp | 2 +- Source/Engine/Renderer/Config.h | 11 ++++++----- Source/Engine/Renderer/DepthOfFieldPass.cpp | 2 +- Source/Engine/Renderer/Editor/LightmapUVsDensity.cpp | 2 +- Source/Engine/Renderer/Editor/VertexColors.cpp | 2 +- Source/Engine/Renderer/EyeAdaptationPass.cpp | 2 +- Source/Engine/Renderer/GBufferPass.cpp | 2 +- .../Renderer/GI/DynamicDiffuseGlobalIllumination.cpp | 4 ++-- .../Renderer/GI/DynamicDiffuseGlobalIllumination.h | 2 +- Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp | 2 +- Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.h | 2 +- .../Engine/Renderer/GlobalSignDistanceFieldPass.cpp | 6 +++--- Source/Engine/Renderer/GlobalSignDistanceFieldPass.h | 2 +- Source/Engine/Renderer/HistogramPass.cpp | 2 +- Source/Engine/Renderer/LightPass.cpp | 4 ++-- Source/Engine/Renderer/MotionBlurPass.cpp | 2 +- Source/Engine/Renderer/PostProcessingPass.h | 4 ++-- Source/Engine/Renderer/ProbesRenderer.cpp | 2 +- Source/Engine/Renderer/ReflectionsPass.cpp | 2 +- Source/Engine/Renderer/ScreenSpaceReflectionsPass.cpp | 2 +- Source/Engine/Renderer/ShadowsPass.cpp | 2 +- Source/Engine/Renderer/Utils/BitonicSort.cpp | 2 +- Source/Engine/Renderer/Utils/MultiScaler.cpp | 2 +- Source/Engine/Renderer/VolumetricFogPass.h | 6 +++--- Source/Engine/ShadowsOfMordor/Builder.Jobs.cpp | 2 +- 38 files changed, 54 insertions(+), 49 deletions(-) diff --git a/Source/Engine/Debug/DebugDraw.cpp b/Source/Engine/Debug/DebugDraw.cpp index 942069130..a3a9c21a8 100644 --- a/Source/Engine/Debug/DebugDraw.cpp +++ b/Source/Engine/Debug/DebugDraw.cpp @@ -125,7 +125,7 @@ PACK_STRUCT(struct Vertex { Color32 Color; }); -PACK_STRUCT(struct Data { +PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data { Matrix ViewProjection; Float2 Padding; float ClipPosZBias; diff --git a/Source/Engine/Graphics/Config.h b/Source/Engine/Graphics/Config.h index 6bb5496b9..c37594bdb 100644 --- a/Source/Engine/Graphics/Config.h +++ b/Source/Engine/Graphics/Config.h @@ -28,6 +28,9 @@ // Maximum amount of thread groups per dimension for compute dispatch #define GPU_MAX_CS_DISPATCH_THREAD_GROUPS 65535 +// Alignment of the shader data structures (16-byte boundaries) to improve memory copies efficiency. +#define GPU_SHADER_DATA_ALIGNMENT 16 + // Enable/disable assertion for graphics layers #define GPU_ENABLE_ASSERTION 1 diff --git a/Source/Engine/Graphics/RenderTools.h b/Source/Engine/Graphics/RenderTools.h index 00d3398cd..8a19718f1 100644 --- a/Source/Engine/Graphics/RenderTools.h +++ b/Source/Engine/Graphics/RenderTools.h @@ -2,6 +2,7 @@ #pragma once +#include "Config.h" #include "PixelFormat.h" #include "RenderView.h" #include "Engine/Scripting/ScriptingType.h" @@ -11,7 +12,7 @@ class SkinnedModel; struct RenderContext; struct FloatR10G10B10A2; -PACK_STRUCT(struct QuadShaderData +PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) QuadShaderData { Float4 Color; }); diff --git a/Source/Engine/Level/Actors/ExponentialHeightFog.cpp b/Source/Engine/Level/Actors/ExponentialHeightFog.cpp index d2ebb69b2..10be7bdf0 100644 --- a/Source/Engine/Level/Actors/ExponentialHeightFog.cpp +++ b/Source/Engine/Level/Actors/ExponentialHeightFog.cpp @@ -179,7 +179,7 @@ void ExponentialHeightFog::GetExponentialHeightFogData(const RenderView& view, S result.VolumetricFogMaxDistance = VolumetricFogDistance; } -PACK_STRUCT(struct Data { +PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data { ShaderGBufferData GBuffer; ShaderExponentialHeightFogData ExponentialHeightFog; }); diff --git a/Source/Engine/Level/Actors/Sky.cpp b/Source/Engine/Level/Actors/Sky.cpp index 103d37316..f0a7c354d 100644 --- a/Source/Engine/Level/Actors/Sky.cpp +++ b/Source/Engine/Level/Actors/Sky.cpp @@ -17,7 +17,7 @@ #include "Engine/Serialization/Serialization.h" #include "Engine/Level/Scene/SceneRendering.h" -PACK_STRUCT(struct Data { +PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data { Matrix WVP; Float3 ViewOffset; float Padding; diff --git a/Source/Engine/Particles/Graph/GPU/GPUParticles.cpp b/Source/Engine/Particles/Graph/GPU/GPUParticles.cpp index fae5e807a..e4de0ad89 100644 --- a/Source/Engine/Particles/Graph/GPU/GPUParticles.cpp +++ b/Source/Engine/Particles/Graph/GPU/GPUParticles.cpp @@ -13,7 +13,7 @@ #include "Engine/Graphics/Shaders/GPUShader.h" #include "Engine/Graphics/Shaders/GPUConstantBuffer.h" -PACK_STRUCT(struct GPUParticlesData { +PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) GPUParticlesData { Matrix ViewProjectionMatrix; Matrix InvViewProjectionMatrix; Matrix InvViewMatrix; diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index 66fc91cce..0e468b66f 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -569,7 +569,7 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa #if COMPILE_WITH_GPU_PARTICLES -PACK_STRUCT(struct GPUParticlesSortingData { +PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) GPUParticlesSortingData { Float3 ViewPosition; uint32 ParticleCounterOffset; uint32 ParticleStride; diff --git a/Source/Engine/Render2D/Render2D.cpp b/Source/Engine/Render2D/Render2D.cpp index c65a9f401..41a136314 100644 --- a/Source/Engine/Render2D/Render2D.cpp +++ b/Source/Engine/Render2D/Render2D.cpp @@ -52,11 +52,11 @@ // True if enable downscaling when rendering blur const bool DownsampleForBlur = false; -PACK_STRUCT(struct Data { +PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data { Matrix ViewProjection; }); -PACK_STRUCT(struct BlurData { +PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) BlurData { Float2 InvBufferSize; uint32 SampleCount; float Dummy0; diff --git a/Source/Engine/Renderer/AmbientOcclusionPass.h b/Source/Engine/Renderer/AmbientOcclusionPass.h index 9dacae18b..d89ad38b4 100644 --- a/Source/Engine/Renderer/AmbientOcclusionPass.h +++ b/Source/Engine/Renderer/AmbientOcclusionPass.h @@ -19,7 +19,7 @@ class AmbientOcclusionPass : public RendererPass private: // Packed shader constant buffer structure (this MUST match shader code) - PACK_STRUCT(struct ASSAOConstants { + PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) ASSAOConstants { ShaderGBufferData GBuffer; Float2 ViewportPixelSize; diff --git a/Source/Engine/Renderer/AntiAliasing/FXAA.cpp b/Source/Engine/Renderer/AntiAliasing/FXAA.cpp index 1f9b215e5..032230d64 100644 --- a/Source/Engine/Renderer/AntiAliasing/FXAA.cpp +++ b/Source/Engine/Renderer/AntiAliasing/FXAA.cpp @@ -7,7 +7,7 @@ #include "Engine/Graphics/Graphics.h" #include "Engine/Graphics/RenderTask.h" -PACK_STRUCT(struct Data +PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data { Float4 ScreenSize; }); diff --git a/Source/Engine/Renderer/AntiAliasing/SMAA.h b/Source/Engine/Renderer/AntiAliasing/SMAA.h index 3b18727cd..7a6690f5d 100644 --- a/Source/Engine/Renderer/AntiAliasing/SMAA.h +++ b/Source/Engine/Renderer/AntiAliasing/SMAA.h @@ -15,7 +15,7 @@ class SMAA : public RendererPass { private: - PACK_STRUCT(struct Data + PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data { Float4 RtSize; }); diff --git a/Source/Engine/Renderer/AntiAliasing/TAA.cpp b/Source/Engine/Renderer/AntiAliasing/TAA.cpp index 4c6c899eb..168e15ed9 100644 --- a/Source/Engine/Renderer/AntiAliasing/TAA.cpp +++ b/Source/Engine/Renderer/AntiAliasing/TAA.cpp @@ -11,7 +11,7 @@ #include "Engine/Renderer/GBufferPass.h" #include "Engine/Engine/Engine.h" -PACK_STRUCT(struct Data +PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data { Float2 ScreenSizeInv; Float2 JitterInv; diff --git a/Source/Engine/Renderer/AtmospherePreCompute.cpp b/Source/Engine/Renderer/AtmospherePreCompute.cpp index c761268b8..6d17e7613 100644 --- a/Source/Engine/Renderer/AtmospherePreCompute.cpp +++ b/Source/Engine/Renderer/AtmospherePreCompute.cpp @@ -61,7 +61,7 @@ protected: bool Run() override; }; -PACK_STRUCT(struct Data +PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data { float First; float AtmosphereR; diff --git a/Source/Engine/Renderer/ColorGradingPass.cpp b/Source/Engine/Renderer/ColorGradingPass.cpp index 7ac0da588..9edc83969 100644 --- a/Source/Engine/Renderer/ColorGradingPass.cpp +++ b/Source/Engine/Renderer/ColorGradingPass.cpp @@ -8,7 +8,7 @@ #include "Engine/Graphics/RenderTargetPool.h" #include "Engine/Graphics/RenderTask.h" -PACK_STRUCT(struct Data { +PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data { Float4 ColorSaturationShadows; Float4 ColorContrastShadows; Float4 ColorGammaShadows; diff --git a/Source/Engine/Renderer/Config.h b/Source/Engine/Renderer/Config.h index c62941b67..a861eb127 100644 --- a/Source/Engine/Renderer/Config.h +++ b/Source/Engine/Renderer/Config.h @@ -6,11 +6,12 @@ #include "Engine/Core/Math/Vector2.h" #include "Engine/Core/Math/Vector3.h" #include "Engine/Core/Math/Vector4.h" +#include "Engine/Graphics/Config.h" /// /// Structure that contains information about GBuffer for shaders. /// -PACK_STRUCT(struct ShaderGBufferData +PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) ShaderGBufferData { Float4 ViewInfo; Float4 ScreenSize; @@ -23,7 +24,7 @@ PACK_STRUCT(struct ShaderGBufferData /// /// Structure that contains information about exponential height fog for shaders. /// -PACK_STRUCT(struct ShaderExponentialHeightFogData +PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) ShaderExponentialHeightFogData { Float3 FogInscatteringColor; float FogMinOpacity; @@ -48,7 +49,7 @@ PACK_STRUCT(struct ShaderExponentialHeightFogData /// /// Structure that contains information about atmosphere fog for shaders. /// -PACK_STRUCT(struct ShaderAtmosphericFogData +PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) ShaderAtmosphericFogData { float AtmosphericFogDensityScale; float AtmosphericFogSunDiscScale; @@ -70,7 +71,7 @@ PACK_STRUCT(struct ShaderAtmosphericFogData /// /// Structure that contains information about light for shaders. /// -PACK_STRUCT(struct ShaderLightData { +PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) ShaderLightData { Float2 SpotAngles; float SourceRadius; float SourceLength; @@ -89,7 +90,7 @@ PACK_STRUCT(struct ShaderLightData { /// /// Packed env probe data /// -PACK_STRUCT(struct ShaderEnvProbeData { +PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) ShaderEnvProbeData { Float4 Data0; // x - Position.x, y - Position.y, z - Position.z, w - unused Float4 Data1; // x - Radius , y - 1 / Radius, z - Brightness, w - unused }); diff --git a/Source/Engine/Renderer/DepthOfFieldPass.cpp b/Source/Engine/Renderer/DepthOfFieldPass.cpp index 59a21094d..23c626e9c 100644 --- a/Source/Engine/Renderer/DepthOfFieldPass.cpp +++ b/Source/Engine/Renderer/DepthOfFieldPass.cpp @@ -18,7 +18,7 @@ #define DOF_GRID_SIZE 450 #define DOF_DEPTH_BLUR_FORMAT PixelFormat::R16G16_Float -PACK_STRUCT(struct Data { +PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data { Float2 ProjectionAB; float BokehDepthCullThreshold; float BokehDepthCutoff; diff --git a/Source/Engine/Renderer/Editor/LightmapUVsDensity.cpp b/Source/Engine/Renderer/Editor/LightmapUVsDensity.cpp index 113d997f2..e51117434 100644 --- a/Source/Engine/Renderer/Editor/LightmapUVsDensity.cpp +++ b/Source/Engine/Renderer/Editor/LightmapUVsDensity.cpp @@ -19,7 +19,7 @@ #include "Engine/Level/Scene/Scene.h" #include "Engine/Level/Actors/StaticModel.h" -PACK_STRUCT(struct LightmapUVsDensityMaterialShaderData { +PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) LightmapUVsDensityMaterialShaderData { Matrix ViewProjectionMatrix; Matrix WorldMatrix; Rectangle LightmapArea; diff --git a/Source/Engine/Renderer/Editor/VertexColors.cpp b/Source/Engine/Renderer/Editor/VertexColors.cpp index 8b26e13e5..26513f1c8 100644 --- a/Source/Engine/Renderer/Editor/VertexColors.cpp +++ b/Source/Engine/Renderer/Editor/VertexColors.cpp @@ -12,7 +12,7 @@ #include "Engine/Graphics/RenderTask.h" #include "Engine/Renderer/DrawCall.h" -PACK_STRUCT(struct VertexColorsMaterialShaderData { +PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) VertexColorsMaterialShaderData { Matrix ViewProjectionMatrix; Matrix WorldMatrix; }); diff --git a/Source/Engine/Renderer/EyeAdaptationPass.cpp b/Source/Engine/Renderer/EyeAdaptationPass.cpp index 9b664278c..79e0c30a8 100644 --- a/Source/Engine/Renderer/EyeAdaptationPass.cpp +++ b/Source/Engine/Renderer/EyeAdaptationPass.cpp @@ -16,7 +16,7 @@ #include "Engine/Engine/Engine.h" #include "Engine/Engine/Time.h" -PACK_STRUCT(struct EyeAdaptationData { +PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) EyeAdaptationData { float MinBrightness; float MaxBrightness; float SpeedUp; diff --git a/Source/Engine/Renderer/GBufferPass.cpp b/Source/Engine/Renderer/GBufferPass.cpp index 589401892..4addcb6c6 100644 --- a/Source/Engine/Renderer/GBufferPass.cpp +++ b/Source/Engine/Renderer/GBufferPass.cpp @@ -21,7 +21,7 @@ #include "Engine/Level/Actors/Decal.h" #include "Engine/Engine/Engine.h" -PACK_STRUCT(struct GBufferPassData{ +PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) GBufferPassData{ ShaderGBufferData GBuffer; Float3 Dummy0; int32 ViewMode; diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp index e488bab81..10a40473e 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp @@ -41,7 +41,7 @@ #define DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE 8 #define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32 -PACK_STRUCT(struct Data0 +PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data0 { DynamicDiffuseGlobalIlluminationPass::ConstantsData DDGI; GlobalSignDistanceFieldPass::ConstantsData GlobalSDF; @@ -54,7 +54,7 @@ PACK_STRUCT(struct Data0 Int4 ProbeScrollClears[4]; }); -PACK_STRUCT(struct Data1 +PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data1 { // TODO: use push constants on Vulkan or root signature data on DX12 to reduce overhead of changing single DWORD Float2 Padding1; diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h index 09986636d..ed2642153 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h @@ -13,7 +13,7 @@ class FLAXENGINE_API DynamicDiffuseGlobalIlluminationPass : public RendererPass< { public: // Constant buffer data for DDGI access on a GPU. - PACK_STRUCT(struct ConstantsData + PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) ConstantsData { Float4 ProbesOriginAndSpacing[4]; Int4 ProbesScrollOffsets[4]; diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp index d4e212222..f48bd627e 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp @@ -42,7 +42,7 @@ #include "Engine/Debug/DebugDraw.h" #endif -PACK_STRUCT(struct Data0 +PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data0 { Float3 ViewWorldPos; float ViewNearPlane; diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.h b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.h index 443b3a401..7961eb8b3 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.h +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.h @@ -11,7 +11,7 @@ class FLAXENGINE_API GlobalSurfaceAtlasPass : public RendererPass { private: - PACK_STRUCT(struct Data { + PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data { float BloomLimit; float BloomThreshold; float BloomMagnitude; @@ -56,7 +56,7 @@ private: Matrix LensFlareStarMat; }); - PACK_STRUCT(struct GaussianBlurData { + PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) GaussianBlurData { Float2 Size; float Dummy3; float Dummy4; diff --git a/Source/Engine/Renderer/ProbesRenderer.cpp b/Source/Engine/Renderer/ProbesRenderer.cpp index dd88edc8c..da4e56f31 100644 --- a/Source/Engine/Renderer/ProbesRenderer.cpp +++ b/Source/Engine/Renderer/ProbesRenderer.cpp @@ -69,7 +69,7 @@ public: } }; -PACK_STRUCT(struct Data +PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data { Float2 Dummy0; int32 CubeFace; diff --git a/Source/Engine/Renderer/ReflectionsPass.cpp b/Source/Engine/Renderer/ReflectionsPass.cpp index 4f362392f..f37f711fb 100644 --- a/Source/Engine/Renderer/ReflectionsPass.cpp +++ b/Source/Engine/Renderer/ReflectionsPass.cpp @@ -13,7 +13,7 @@ #include "Engine/Graphics/RenderTargetPool.h" #include "Engine/Level/Actors/EnvironmentProbe.h" -PACK_STRUCT(struct Data { +PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data { ShaderEnvProbeData PData; Matrix WVP; ShaderGBufferData GBuffer; diff --git a/Source/Engine/Renderer/ScreenSpaceReflectionsPass.cpp b/Source/Engine/Renderer/ScreenSpaceReflectionsPass.cpp index 5e758d148..82873b81c 100644 --- a/Source/Engine/Renderer/ScreenSpaceReflectionsPass.cpp +++ b/Source/Engine/Renderer/ScreenSpaceReflectionsPass.cpp @@ -24,7 +24,7 @@ #define TEXTURE1 5 #define TEXTURE2 6 -PACK_STRUCT(struct Data +PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data { ShaderGBufferData GBuffer; diff --git a/Source/Engine/Renderer/ShadowsPass.cpp b/Source/Engine/Renderer/ShadowsPass.cpp index 2809c1a6f..59d9bf45a 100644 --- a/Source/Engine/Renderer/ShadowsPass.cpp +++ b/Source/Engine/Renderer/ShadowsPass.cpp @@ -28,7 +28,7 @@ #define NormalOffsetScaleTweak METERS_TO_UNITS(1) #define LocalLightNearPlane METERS_TO_UNITS(0.1f) -PACK_STRUCT(struct Data{ +PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data{ ShaderGBufferData GBuffer; ShaderLightData Light; Matrix WVP; diff --git a/Source/Engine/Renderer/Utils/BitonicSort.cpp b/Source/Engine/Renderer/Utils/BitonicSort.cpp index 78f6e89b9..8255afdf1 100644 --- a/Source/Engine/Renderer/Utils/BitonicSort.cpp +++ b/Source/Engine/Renderer/Utils/BitonicSort.cpp @@ -15,7 +15,7 @@ struct Item uint32 Value; }; -PACK_STRUCT(struct Data { +PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data { Item NullItem; uint32 CounterOffset; uint32 MaxIterations; diff --git a/Source/Engine/Renderer/Utils/MultiScaler.cpp b/Source/Engine/Renderer/Utils/MultiScaler.cpp index 1edb81fa8..f6a7c6693 100644 --- a/Source/Engine/Renderer/Utils/MultiScaler.cpp +++ b/Source/Engine/Renderer/Utils/MultiScaler.cpp @@ -5,7 +5,7 @@ #include "Engine/Content/Content.h" #include "Engine/Graphics/GPUContext.h" -PACK_STRUCT(struct Data { +PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data { Float2 TexelSize; Float2 Padding; }); diff --git a/Source/Engine/Renderer/VolumetricFogPass.h b/Source/Engine/Renderer/VolumetricFogPass.h index 47757cf47..d39c7638f 100644 --- a/Source/Engine/Renderer/VolumetricFogPass.h +++ b/Source/Engine/Renderer/VolumetricFogPass.h @@ -28,14 +28,14 @@ public: private: - PACK_STRUCT(struct SkyLightData { + PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) SkyLightData { Float3 MultiplyColor; float VolumetricScatteringIntensity; Float3 AdditiveColor; float Dummy0; }); - PACK_STRUCT(struct Data { + PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data { ShaderGBufferData GBuffer; Float3 GlobalAlbedo; @@ -67,7 +67,7 @@ private: DynamicDiffuseGlobalIlluminationPass::ConstantsData DDGI; }); - PACK_STRUCT(struct PerLight { + PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) PerLight { Float2 SliceToDepth; int32 MinZ; float LocalLightScatteringIntensity; diff --git a/Source/Engine/ShadowsOfMordor/Builder.Jobs.cpp b/Source/Engine/ShadowsOfMordor/Builder.Jobs.cpp index b3f299524..b846ab0a9 100644 --- a/Source/Engine/ShadowsOfMordor/Builder.Jobs.cpp +++ b/Source/Engine/ShadowsOfMordor/Builder.Jobs.cpp @@ -23,7 +23,7 @@ namespace ShadowsOfMordor { - PACK_STRUCT(struct ShaderData { + PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) ShaderData { Rectangle LightmapArea; Matrix WorldMatrix; Matrix ToTangentSpace; From 681c473e7a0449eb03a87569a07103f9de8ff2f3 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 23 May 2024 14:34:26 +0200 Subject: [PATCH 079/292] Re-enable CSM cascades filter width adjustment --- Source/Engine/Renderer/ShadowsPass.h | 2 ++ Source/Shaders/ShadowsSampling.hlsl | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Renderer/ShadowsPass.h b/Source/Engine/Renderer/ShadowsPass.h index 551b3a504..be52f4acc 100644 --- a/Source/Engine/Renderer/ShadowsPass.h +++ b/Source/Engine/Renderer/ShadowsPass.h @@ -66,7 +66,9 @@ private: { _psShadowDir.Release(); _psShadowPoint.Release(); + _psShadowPointInside.Release(); _psShadowSpot.Release(); + _psShadowSpotInside.Release(); invalidateResources(); } #endif diff --git a/Source/Shaders/ShadowsSampling.hlsl b/Source/Shaders/ShadowsSampling.hlsl index 354e5065e..c56904e6b 100644 --- a/Source/Shaders/ShadowsSampling.hlsl +++ b/Source/Shaders/ShadowsSampling.hlsl @@ -266,8 +266,8 @@ ShadowSample SampleDirectionalLightShadow(LightData light, Buffer shadow result.SurfaceShadow = SampleShadowMapOptimizedPCF(shadowMap, shadowMapUV, shadowPosition.z); // Increase the sharpness for higher cascades to match the filter radius - //const float SharpnessScale[MaxNumCascades] = { 1.0f, 1.5f, 3.0f, 3.5f }; - //shadow.Sharpness *= SharpnessScale[cascadeIndex]; + const float SharpnessScale[MaxNumCascades] = { 1.0f, 1.5f, 3.0f, 3.5f }; + shadow.Sharpness *= SharpnessScale[cascadeIndex]; #if defined(USE_GBUFFER_CUSTOM_DATA) // Subsurface shadowing From 1481d1a1e99aff547bc9c480f80ac96ccd7bbfd4 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 23 May 2024 14:47:16 +0200 Subject: [PATCH 080/292] Add `ShadowsResolution` for manually specified shadowmap resolution for lights --- .../Engine/Level/Actors/DirectionalLight.cpp | 1 + Source/Engine/Level/Actors/Light.h | 30 +++++++++++++++++-- Source/Engine/Level/Actors/PointLight.cpp | 1 + Source/Engine/Level/Actors/SpotLight.cpp | 1 + Source/Engine/Renderer/RenderList.h | 1 + Source/Engine/Renderer/ShadowsPass.cpp | 12 ++++++-- 6 files changed, 41 insertions(+), 5 deletions(-) diff --git a/Source/Engine/Level/Actors/DirectionalLight.cpp b/Source/Engine/Level/Actors/DirectionalLight.cpp index cef1eba97..97f9c3b58 100644 --- a/Source/Engine/Level/Actors/DirectionalLight.cpp +++ b/Source/Engine/Level/Actors/DirectionalLight.cpp @@ -40,6 +40,7 @@ void DirectionalLight::Draw(RenderContext& renderContext) data.CastVolumetricShadow = CastVolumetricShadow; data.ShadowsUpdateRate = ShadowsUpdateRate; data.ShadowFrame = _invalidateShadowFrame; + data.ShadowsResolution = (int32)ShadowsResolution; data.ShadowsUpdateRateAtDistance = ShadowsUpdateRateAtDistance; data.ShadowsMode = ShadowsMode; data.CascadeCount = CascadeCount; diff --git a/Source/Engine/Level/Actors/Light.h b/Source/Engine/Level/Actors/Light.h index 8fd40c530..9c3d6ff73 100644 --- a/Source/Engine/Level/Actors/Light.h +++ b/Source/Engine/Level/Actors/Light.h @@ -66,7 +66,7 @@ public: const Vector3 size(50); return BoundingBox(_transform.Translation - size, _transform.Translation + size); } - + virtual void DrawLightsDebug(RenderView& view); #endif void Serialize(SerializeStream& stream, const void* otherObj) override; @@ -79,6 +79,26 @@ public: API_CLASS(Abstract) class FLAXENGINE_API LightWithShadow : public Light { DECLARE_SCENE_OBJECT_ABSTRACT(LightWithShadow); + + /// + /// List of fixed resolutions for light shadow map. + /// + API_ENUM() enum class ShadowMapResolution + { + // Use automatic dynamic resolution based on distance to view. + Dynamic = 0, + // Shadow map of size 128x128. + _128 = 128, + // Shadow map of size 256x256. + _256 = 256, + // Shadow map of size 512x512. + _512 = 512, + // Shadow map of size 1024x1024. + _1024 = 1024, + // Shadow map of size 2048x2048. + _2048 = 2048, + }; + protected: uint32 _invalidateShadowFrame = 0; @@ -143,12 +163,18 @@ public: API_FIELD(Attributes="EditorOrder(105), EditorDisplay(\"Shadow\", \"Update Rate At Distance\"), Limit(0.0f, 1.0f)") float ShadowsUpdateRateAtDistance = 0.5f; + /// + /// Defines the resolution of the shadow map texture used to draw objects projection from light-point-of-view. Higher values increase shadow quality at cost of performance. + /// + API_FIELD(Attributes="EditorOrder(105), EditorDisplay(\"Shadow\", \"Resolution\")") + ShadowMapResolution ShadowsResolution = ShadowMapResolution::Dynamic; + /// /// Describes how a visual element casts shadows. /// API_FIELD(Attributes="EditorOrder(60), EditorDisplay(\"Shadow\", \"Mode\")") ShadowsCastingMode ShadowsMode = ShadowsCastingMode::All; - + /// /// Marks the light shadow to be refreshes during next drawing. Invalidates any cached shadow map and redraws static shadows of the object (if any in use). /// diff --git a/Source/Engine/Level/Actors/PointLight.cpp b/Source/Engine/Level/Actors/PointLight.cpp index 5f2be645b..3c85be612 100644 --- a/Source/Engine/Level/Actors/PointLight.cpp +++ b/Source/Engine/Level/Actors/PointLight.cpp @@ -106,6 +106,7 @@ void PointLight::Draw(RenderContext& renderContext) data.ShadowsUpdateRate = ShadowsUpdateRate; data.ShadowsUpdateRateAtDistance = ShadowsUpdateRateAtDistance; data.ShadowFrame = _invalidateShadowFrame; + data.ShadowsResolution = (int32)ShadowsResolution; data.ShadowsMode = ShadowsMode; data.Radius = radius; data.FallOffExponent = FallOffExponent; diff --git a/Source/Engine/Level/Actors/SpotLight.cpp b/Source/Engine/Level/Actors/SpotLight.cpp index 892bdf02e..38457c94e 100644 --- a/Source/Engine/Level/Actors/SpotLight.cpp +++ b/Source/Engine/Level/Actors/SpotLight.cpp @@ -156,6 +156,7 @@ void SpotLight::Draw(RenderContext& renderContext) data.ShadowsUpdateRate = ShadowsUpdateRate; data.ShadowsUpdateRateAtDistance = ShadowsUpdateRateAtDistance; data.ShadowFrame = _invalidateShadowFrame; + data.ShadowsResolution = (int32)ShadowsResolution; data.ShadowsMode = ShadowsMode; data.Radius = radius; data.FallOffExponent = FallOffExponent; diff --git a/Source/Engine/Renderer/RenderList.h b/Source/Engine/Renderer/RenderList.h index 8f15175be..8f8e86973 100644 --- a/Source/Engine/Renderer/RenderList.h +++ b/Source/Engine/Renderer/RenderList.h @@ -59,6 +59,7 @@ struct RenderLightData float ShadowsUpdateRate; float ShadowsUpdateRateAtDistance; uint32 ShadowFrame; + int32 ShadowsResolution; bool CanRenderShadow(const RenderView& view) const; }; diff --git a/Source/Engine/Renderer/ShadowsPass.cpp b/Source/Engine/Renderer/ShadowsPass.cpp index 59d9bf45a..a9920ffb2 100644 --- a/Source/Engine/Renderer/ShadowsPass.cpp +++ b/Source/Engine/Renderer/ShadowsPass.cpp @@ -142,6 +142,7 @@ struct ShadowAtlasLightCache float Distance; Float4 CascadeSplits; Float3 ViewDirection; + int32 ShadowsResolution; void Set(const RenderView& view, const RenderLightData& light, const Float4& cascadeSplits = Float4::Zero) { @@ -152,6 +153,7 @@ struct ShadowAtlasLightCache ShadowsUpdateRateAtDistance = light.ShadowsUpdateRateAtDistance; Direction = light.Direction; ShadowFrame = light.ShadowFrame; + ShadowsResolution = light.ShadowsResolution; if (light.IsDirectionalLight) { // Sun @@ -247,6 +249,7 @@ struct ShadowAtlasLight !Math::NearEqual(Cache.ShadowsUpdateRate, light.ShadowsUpdateRate) || !Math::NearEqual(Cache.ShadowsUpdateRateAtDistance, light.ShadowsUpdateRateAtDistance) || Cache.ShadowFrame != light.ShadowFrame || + Cache.ShadowsResolution != light.ShadowsResolution || Float3::Dot(Cache.Direction, light.Direction) < SHADOWS_ROTATION_ERROR) { // Invalidate @@ -1119,9 +1122,12 @@ void ShadowsPass::SetupShadows(RenderContext& renderContext, RenderContextBatch& auto& atlasLight = shadows.Lights[light->ID]; // Calculate resolution for this light - // TODO: add support for fixed shadow map resolution assigned per-light - float lightResolutionFloat = baseLightResolution * light->ScreenSize; - atlasLight.Resolution = QuantizeResolution(lightResolutionFloat); + atlasLight.Resolution = light->ShadowsResolution; + if (atlasLight.Resolution == 0) + { + // ScreenSize-based automatic shadowmap resolution + atlasLight.Resolution = QuantizeResolution(baseLightResolution * light->ScreenSize); + } // Cull too small lights if (atlasLight.Resolution < SHADOWS_MIN_RESOLUTION) From b086de1c1261c8c9f4ee6cfccd79ad0d07d2d207 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 23 May 2024 16:21:38 +0200 Subject: [PATCH 081/292] Add `WindowsMinVer` config for minimum target Windows version switch --- .../Platform/Win32/IncludeWindowsHeaders.h | 13 ++------ Source/Engine/Video/MF/VideoBackendMF.cpp | 9 ++++++ .../Flax.Build/Platforms/UWP/UWPToolchain.cs | 1 + .../Platforms/Windows/WindowsToolchain.cs | 32 +++++++++++++++++++ 4 files changed, 45 insertions(+), 10 deletions(-) diff --git a/Source/Engine/Platform/Win32/IncludeWindowsHeaders.h b/Source/Engine/Platform/Win32/IncludeWindowsHeaders.h index 9f8a49dcf..fd1a84bab 100644 --- a/Source/Engine/Platform/Win32/IncludeWindowsHeaders.h +++ b/Source/Engine/Platform/Win32/IncludeWindowsHeaders.h @@ -9,20 +9,13 @@ #define WINVER 0x0601 #endif #ifndef _WIN32_WINNT -#define _WIN32_WINNT 0x0601 +#define _WIN32_WINNT WINVER #endif #ifndef _WIN32_WINDOWS -#define _WIN32_WINDOWS 0x0601 +#define _WIN32_WINDOWS WINVER #endif -// Override Win API for UWP -#if PLATFORM_UWP -#define WINVER 0x0A00 -#define _WIN32_WINNT 0x0A00 -#define _WIN32_WINDOWS 0x0A00 -#endif - -// Override for Xbox Scarlett +// Override for Xbox #if PLATFORM_XBOX_SCARLETT || PLATFORM_XBOX_ONE #define NOBITMAP #define NOMCX diff --git a/Source/Engine/Video/MF/VideoBackendMF.cpp b/Source/Engine/Video/MF/VideoBackendMF.cpp index 29911575b..08676e6d8 100644 --- a/Source/Engine/Video/MF/VideoBackendMF.cpp +++ b/Source/Engine/Video/MF/VideoBackendMF.cpp @@ -11,6 +11,15 @@ #if USE_EDITOR #include "Editor/Editor.h" #endif +#include +#if WINVER >= _WIN32_WINNT_WINBLUE && WINVER < _WIN32_WINNT_WIN10 +// Fix compilation for Windows 8.1 on the latest Windows SDK +typedef enum _MFVideoSphericalFormat { } MFVideoSphericalFormat; +#endif +#ifndef MF_SOURCE_READER_CURRENT_TYPE_INDEX +// Fix compilation for Windows 7 on the latest Windows SDK +#define MF_SOURCE_READER_CURRENT_TYPE_INDEX 0xFFFFFFFF +#endif #include #include #include diff --git a/Source/Tools/Flax.Build/Platforms/UWP/UWPToolchain.cs b/Source/Tools/Flax.Build/Platforms/UWP/UWPToolchain.cs index 6cef48f52..964a3679c 100644 --- a/Source/Tools/Flax.Build/Platforms/UWP/UWPToolchain.cs +++ b/Source/Tools/Flax.Build/Platforms/UWP/UWPToolchain.cs @@ -77,6 +77,7 @@ namespace Flax.Build.Platforms options.CompileEnv.PreprocessorDefinitions.Add("_WINRT_DLL"); options.CompileEnv.PreprocessorDefinitions.Add("_WINDLL"); options.CompileEnv.PreprocessorDefinitions.Add("__WRL_NO_DEFAULT_LIB__"); + options.CompileEnv.PreprocessorDefinitions.Add("WINVER=0x0A00"); options.LinkEnv.InputLibraries.Add("WindowsApp.lib"); } diff --git a/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchain.cs b/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchain.cs index 66c39f853..e7354442b 100644 --- a/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchain.cs +++ b/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchain.cs @@ -1,10 +1,23 @@ // Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. +using System; using System.Collections.Generic; using System.IO; using Flax.Build.Graph; using Flax.Build.NativeCpp; +namespace Flax.Build +{ + partial class Configuration + { + /// + /// Specifies the minimum Windows version to use (eg. 10). + /// + [CommandLine("winMinVer", "", "Specifies the minimum Windows version to use (eg. 10).")] + public static string WindowsMinVer = "7"; + } +} + namespace Flax.Build.Platforms { /// @@ -31,6 +44,25 @@ namespace Flax.Build.Platforms options.CompileEnv.PreprocessorDefinitions.Add("PLATFORM_WINDOWS"); + // Select minimum Windows version + if (!Version.TryParse(Configuration.WindowsMinVer, out var winMinVer)) + { + if (int.TryParse(Configuration.WindowsMinVer, out var winMinVerMajor)) + winMinVer = new Version(winMinVerMajor, 0); + else + winMinVer = new Version(7, 0); + } + int winVer; + if (winMinVer.Major >= 10) + winVer = 0x0A00; // Windows 10 + else if (winMinVer.Major == 8 && winMinVer.Minor >= 1) + winVer = 0x0603; // Windows 8.1 + else if (winMinVer.Major == 8) + winVer = 0x0602; // Windows 8 + else + winVer = 0x0601; // Windows 7 + options.CompileEnv.PreprocessorDefinitions.Add($"WINVER=0x{winVer:X4}"); + options.LinkEnv.InputLibraries.Add("dwmapi.lib"); options.LinkEnv.InputLibraries.Add("kernel32.lib"); options.LinkEnv.InputLibraries.Add("user32.lib"); From 82ee84ad39c67f7bdf3cad1547871f02fa6bce5b Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 23 May 2024 17:17:21 +0200 Subject: [PATCH 082/292] Optimize Development builds to use SSE2 by default on Windows and use SSE4.1 when targeting Windows 11 --- Source/Tools/Flax.Build/Build/Target.cs | 4 +-- .../Platforms/Windows/WindowsToolchain.cs | 36 +++++++++++++------ .../Platforms/Windows/WindowsToolchainBase.cs | 7 ++-- 3 files changed, 29 insertions(+), 18 deletions(-) diff --git a/Source/Tools/Flax.Build/Build/Target.cs b/Source/Tools/Flax.Build/Build/Target.cs index 90523b5ef..bc56254b6 100644 --- a/Source/Tools/Flax.Build/Build/Target.cs +++ b/Source/Tools/Flax.Build/Build/Target.cs @@ -281,10 +281,10 @@ namespace Flax.Build options.CompileEnv.IntrinsicFunctions = true; options.CompileEnv.BufferSecurityCheck = true; options.CompileEnv.Inlining = true; - options.CompileEnv.WholeProgramOptimization = false; + options.CompileEnv.WholeProgramOptimization = true; options.LinkEnv.DebugInformation = true; - options.LinkEnv.LinkTimeCodeGeneration = false; + options.LinkEnv.LinkTimeCodeGeneration = true; options.LinkEnv.UseIncrementalLinking = true; options.LinkEnv.Optimization = true; break; diff --git a/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchain.cs b/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchain.cs index e7354442b..121a7b367 100644 --- a/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchain.cs +++ b/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchain.cs @@ -27,6 +27,8 @@ namespace Flax.Build.Platforms /// public sealed class WindowsToolchain : WindowsToolchainBase { + private Version _minVersion; + /// /// Initializes a new instance of the class. /// @@ -35,6 +37,14 @@ namespace Flax.Build.Platforms public WindowsToolchain(WindowsPlatform platform, TargetArchitecture architecture) : base(platform, architecture, WindowsPlatformToolset.Latest, WindowsPlatformSDK.Latest) { + // Select minimum Windows version + if (!Version.TryParse(Configuration.WindowsMinVer, out _minVersion)) + { + if (int.TryParse(Configuration.WindowsMinVer, out var winMinVerMajor)) + _minVersion = new Version(winMinVerMajor, 0); + else + _minVersion = new Version(7, 0); + } } /// @@ -44,20 +54,12 @@ namespace Flax.Build.Platforms options.CompileEnv.PreprocessorDefinitions.Add("PLATFORM_WINDOWS"); - // Select minimum Windows version - if (!Version.TryParse(Configuration.WindowsMinVer, out var winMinVer)) - { - if (int.TryParse(Configuration.WindowsMinVer, out var winMinVerMajor)) - winMinVer = new Version(winMinVerMajor, 0); - else - winMinVer = new Version(7, 0); - } int winVer; - if (winMinVer.Major >= 10) + if (_minVersion.Major >= 10) winVer = 0x0A00; // Windows 10 - else if (winMinVer.Major == 8 && winMinVer.Minor >= 1) + else if (_minVersion.Major == 8 && _minVersion.Minor >= 1) winVer = 0x0603; // Windows 8.1 - else if (winMinVer.Major == 8) + else if (_minVersion.Major == 8) winVer = 0x0602; // Windows 8 else winVer = 0x0601; // Windows 7 @@ -74,6 +76,18 @@ namespace Flax.Build.Platforms options.LinkEnv.InputLibraries.Add("delayimp.lib"); } + /// + protected override void SetupCompileCppFilesArgs(TaskGraph graph, BuildOptions options, List args) + { + base.SetupCompileCppFilesArgs(graph, options, args); + + if (Toolset >= WindowsPlatformToolset.v142 && _minVersion.Major >= 11) + { + // Windows 11 requires SSE4.2 + args.Add("/d2archSSE42"); + } + } + /// public override void LinkFiles(TaskGraph graph, BuildOptions options, string outputFilePath) { diff --git a/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchainBase.cs b/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchainBase.cs index c1cdcb0ee..3693595ea 100644 --- a/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchainBase.cs +++ b/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchainBase.cs @@ -511,16 +511,13 @@ namespace Flax.Build.Platforms commonArgs.Add("/Os"); if (compileEnvironment.Optimization) { - // Enable Most Speed Optimizations - // Commented out due to /Og causing slow build times without /GL in development builds - //commonArgs.Add("/Ox"); - // Generate Intrinsic Functions commonArgs.Add("/Oi"); // Frame-Pointer Omission commonArgs.Add("/Oy"); + // Only use /Ox with /GL to prevent too long build times if (compileEnvironment.WholeProgramOptimization) { // Enable Most Speed Optimizations @@ -889,7 +886,7 @@ namespace Flax.Build.Platforms } // Link Incrementally - if (linkEnvironment.UseIncrementalLinking) + if (linkEnvironment.UseIncrementalLinking && !linkEnvironment.LinkTimeCodeGeneration) { args.Add("/INCREMENTAL"); } From 182d6d86020cb8880ae7597daa638c7f7378d2d9 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 23 May 2024 21:51:57 +0200 Subject: [PATCH 083/292] Fix regression in missing shadows inside Global Surface Atals --- Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp | 4 +++- Source/Shaders/GI/GlobalSurfaceAtlas.shader | 3 --- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp index f48bd627e..858825145 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp @@ -946,8 +946,8 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co // Draw light PROFILE_GPU_CPU_NAMED("Directional Light"); const bool useShadow = light.CanRenderShadow(renderContext.View); - // TODO: test perf/quality when using Shadow Map for directional light (ShadowsPass::Instance()->LastDirLightShadowMap) instead of Global SDF trace light.SetShaderData(data.Light, useShadow); + data.Light.ShadowsBufferAddress = useShadow; // Use this to indicate if trace shadow (SDF trace) data.Light.Color *= light.IndirectLightingIntensity; data.LightShadowsStrength = 1.0f - light.ShadowsStrength; context->UpdateCB(_cb0, &data); @@ -981,6 +981,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co PROFILE_GPU_CPU_NAMED("Point Light"); const bool useShadow = light.CanRenderShadow(renderContext.View); light.SetShaderData(data.Light, useShadow); + data.Light.ShadowsBufferAddress = useShadow; // Use this to indicate if trace shadow (SDF trace) data.Light.Color *= light.IndirectLightingIntensity; data.LightShadowsStrength = 1.0f - light.ShadowsStrength; context->UpdateCB(_cb0, &data); @@ -1014,6 +1015,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co PROFILE_GPU_CPU_NAMED("Spot Light"); const bool useShadow = light.CanRenderShadow(renderContext.View); light.SetShaderData(data.Light, useShadow); + data.Light.ShadowsBufferAddress = useShadow; // Use this to indicate if trace shadow (SDF trace) data.Light.Color *= light.IndirectLightingIntensity; data.LightShadowsStrength = 1.0f - light.ShadowsStrength; context->UpdateCB(_cb0, &data); diff --git a/Source/Shaders/GI/GlobalSurfaceAtlas.shader b/Source/Shaders/GI/GlobalSurfaceAtlas.shader index 1af8880bc..23ebddda8 100644 --- a/Source/Shaders/GI/GlobalSurfaceAtlas.shader +++ b/Source/Shaders/GI/GlobalSurfaceAtlas.shader @@ -167,9 +167,6 @@ float4 PS_Lighting(AtlasVertexOutput input) : SV_Target BRANCH if (NoL > 0) { - // TODO: try using shadow map for on-screen pixels - // TODO: try using cone trace with Global SDF for smoother shadow (eg. for sun shadows or for area lights) - // Shot a ray from texel into the light to see if there is any occluder GlobalSDFTrace trace; trace.Init(gBuffer.WorldPos + gBuffer.Normal * shadowBias, L, bias, toLightDst - bias); From 7c83481d6fb5b1d5a28926ed8eb8f72087341ba8 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 24 May 2024 11:23:19 +0200 Subject: [PATCH 084/292] Bump up version number after compilation setup change --- Flax.flaxproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Flax.flaxproj b/Flax.flaxproj index 46f3f93e4..edb7c200c 100644 --- a/Flax.flaxproj +++ b/Flax.flaxproj @@ -4,7 +4,7 @@ "Major": 1, "Minor": 9, "Revision": 0, - "Build": 6601 + "Build": 6602 }, "Company": "Flax", "Copyright": "Copyright (c) 2012-2024 Wojciech Figat. All rights reserved.", From f326fa611f8d4f188544332e0cde60bbc691a9cd Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 24 May 2024 12:41:39 +0200 Subject: [PATCH 085/292] Minor sdf imporvements --- Source/Engine/Tools/ModelTool/ModelTool.cpp | 29 ++++++++++----------- Source/Shaders/GI/DDGI.hlsl | 2 +- Source/Shaders/GlobalSignDistanceField.hlsl | 5 ++-- 3 files changed, 17 insertions(+), 19 deletions(-) diff --git a/Source/Engine/Tools/ModelTool/ModelTool.cpp b/Source/Engine/Tools/ModelTool/ModelTool.cpp index bfbd9421b..0e217b82b 100644 --- a/Source/Engine/Tools/ModelTool/ModelTool.cpp +++ b/Source/Engine/Tools/ModelTool/ModelTool.cpp @@ -18,6 +18,7 @@ #include "Engine/Content/Assets/Model.h" #include "Engine/Content/Content.h" #include "Engine/Serialization/MemoryWriteStream.h" +#include "Engine/Engine/Units.h" #if USE_EDITOR #include "Engine/Core/Utilities.h" #include "Engine/Core/Types/StringView.h" @@ -85,7 +86,7 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float return true; Float3 size = bounds.GetSize(); ModelBase::SDFData sdf; - sdf.WorldUnitsPerVoxel = 10 / Math::Max(resolutionScale, 0.0001f); + sdf.WorldUnitsPerVoxel = METERS_TO_UNITS(0.1f) / Math::Max(resolutionScale, 0.0001f); // 1 voxel per 10 centimeters Int3 resolution(Float3::Ceil(Float3::Clamp(size / sdf.WorldUnitsPerVoxel, 4, 256))); Float3 uvwToLocalMul = size; Float3 uvwToLocalAdd = bounds.Minimum; @@ -96,7 +97,6 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float sdf.LocalBoundsMax = bounds.Maximum; sdf.ResolutionScale = resolutionScale; sdf.LOD = lodIndex; - // TODO: maybe apply 1 voxel margin around the geometry? const int32 maxMips = 3; const int32 mipCount = Math::Min(MipLevelsCount(resolution.X, resolution.Y, resolution.Z, true), maxMips); PixelFormat format = PixelFormat::R16_UNorm; @@ -169,21 +169,20 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float // https://www.cse.chalmers.se/~uffe/HighResolutionSparseVoxelDAGs.pdf // Brute-force for each voxel to calculate distance to the closest triangle with point query and distance sign by raycasting around the voxel - const int32 sampleCount = 12; - Array sampleDirections; - sampleDirections.Resize(sampleCount); + constexpr int32 sampleCount = 12; + Float3 sampleDirections[sampleCount]; { RandomStream rand; - sampleDirections.Get()[0] = Float3::Up; - sampleDirections.Get()[1] = Float3::Down; - sampleDirections.Get()[2] = Float3::Left; - sampleDirections.Get()[3] = Float3::Right; - sampleDirections.Get()[4] = Float3::Forward; - sampleDirections.Get()[5] = Float3::Backward; + sampleDirections[0] = Float3::Up; + sampleDirections[1] = Float3::Down; + sampleDirections[2] = Float3::Left; + sampleDirections[3] = Float3::Right; + sampleDirections[4] = Float3::Forward; + sampleDirections[5] = Float3::Backward; for (int32 i = 6; i < sampleCount; i++) - sampleDirections.Get()[i] = rand.GetUnitVector(); + sampleDirections[i] = rand.GetUnitVector(); } - Function sdfJob = [&sdf, &resolution, &backfacesThreshold, &sampleDirections, &scene, &voxels, &xyzToLocalMul, &xyzToLocalAdd, &encodeMAD, &formatStride, &formatWrite](int32 z) + Function sdfJob = [&sdf, &resolution, &backfacesThreshold, sampleDirections, &sampleCount, &scene, &voxels, &xyzToLocalMul, &xyzToLocalAdd, &encodeMAD, &formatStride, &formatWrite](int32 z) { PROFILE_CPU_NAMED("Model SDF Job"); Real hitDistance; @@ -203,7 +202,7 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float // Raycast samples around voxel to count triangle backfaces hit int32 hitBackCount = 0, hitCount = 0; - for (int32 sample = 0; sample < sampleDirections.Count(); sample++) + for (int32 sample = 0; sample < sampleCount; sample++) { Ray sampleRay(voxelPos, sampleDirections[sample]); if (scene.RayCast(sampleRay, hitDistance, hitNormal, hitTriangle)) @@ -218,7 +217,7 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float float distance = (float)minDistance; // TODO: surface thickness threshold? shift reduce distance for all voxels by something like 0.01 to enlarge thin geometry // if ((float)hitBackCount > (float)hitCount * 0.3f && hitCount != 0) - if ((float)hitBackCount > (float)sampleDirections.Count() * backfacesThreshold && hitCount != 0) + if ((float)hitBackCount > (float)sampleCount * backfacesThreshold && hitCount != 0) { // Voxel is inside the geometry so turn it into negative distance to the surface distance *= -1; diff --git a/Source/Shaders/GI/DDGI.hlsl b/Source/Shaders/GI/DDGI.hlsl index 6d7c71936..e304b4ffc 100644 --- a/Source/Shaders/GI/DDGI.hlsl +++ b/Source/Shaders/GI/DDGI.hlsl @@ -213,11 +213,11 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesData, T float2 uv = GetDDGIProbeUV(data, cascadeIndex, probeIndex, octahedralCoords, DDGI_PROBE_RESOLUTION_DISTANCE); float2 probeDistance = probesDistance.SampleLevel(SamplerLinearClamp, uv, 0).rg * 2.0f; float probeDistanceMean = probeDistance.x; - float probeDistanceMean2 = probeDistance.y; // Visibility weight (Chebyshev) if (biasedPosToProbeDist > probeDistanceMean) { + float probeDistanceMean2 = probeDistance.y; float probeDistanceVariance = abs(Square(probeDistanceMean) - probeDistanceMean2); float chebyshevWeight = probeDistanceVariance / (probeDistanceVariance + Square(biasedPosToProbeDist - probeDistanceMean)); weight *= max(chebyshevWeight * chebyshevWeight * chebyshevWeight, 0.05f); diff --git a/Source/Shaders/GlobalSignDistanceField.hlsl b/Source/Shaders/GlobalSignDistanceField.hlsl index 1681460eb..93bdee22b 100644 --- a/Source/Shaders/GlobalSignDistanceField.hlsl +++ b/Source/Shaders/GlobalSignDistanceField.hlsl @@ -223,7 +223,6 @@ GlobalSDFHit RayTraceGlobalSDF(const GlobalSDFData data, Texture3D tex, T float4 cascadePosDistance = data.CascadePosDistance[cascade]; float voxelSize = data.CascadeVoxelSize[cascade]; float voxelExtent = voxelSize * 0.5f; - float cascadeMinStep = voxelSize; float3 worldPosition = trace.WorldPosition + trace.WorldDirection * (voxelSize * cascadeTraceStartBias); // Hit the cascade bounds to find the intersection points @@ -270,7 +269,7 @@ GlobalSDFHit RayTraceGlobalSDF(const GlobalSDFData data, Texture3D tex, T stepDistance *= cascadeMaxDistance; // Detect surface hit - float minSurfaceThickness = voxelExtent * saturate(stepTime / (voxelExtent * 2.0f)); + float minSurfaceThickness = voxelExtent * saturate(stepTime / voxelSize); if (stepDistance < minSurfaceThickness) { // Surface hit @@ -293,7 +292,7 @@ GlobalSDFHit RayTraceGlobalSDF(const GlobalSDFData data, Texture3D tex, T } // Move forward - stepTime += max(stepDistance * trace.StepScale, cascadeMinStep); + stepTime += max(stepDistance * trace.StepScale, voxelSize); } hit.StepsCount += step; } From 03b52f148ce3d70e0abf87df36661aa44c0daf7a Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 24 May 2024 14:15:52 +0200 Subject: [PATCH 086/292] Improve 82ee84ad39c67f7bdf3cad1547871f02fa6bce5b to support incremental linking --- Source/Tools/Flax.Build/Build/Target.cs | 4 ++-- .../Flax.Build/Platforms/Windows/WindowsToolchainBase.cs | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/Source/Tools/Flax.Build/Build/Target.cs b/Source/Tools/Flax.Build/Build/Target.cs index bc56254b6..90523b5ef 100644 --- a/Source/Tools/Flax.Build/Build/Target.cs +++ b/Source/Tools/Flax.Build/Build/Target.cs @@ -281,10 +281,10 @@ namespace Flax.Build options.CompileEnv.IntrinsicFunctions = true; options.CompileEnv.BufferSecurityCheck = true; options.CompileEnv.Inlining = true; - options.CompileEnv.WholeProgramOptimization = true; + options.CompileEnv.WholeProgramOptimization = false; options.LinkEnv.DebugInformation = true; - options.LinkEnv.LinkTimeCodeGeneration = true; + options.LinkEnv.LinkTimeCodeGeneration = false; options.LinkEnv.UseIncrementalLinking = true; options.LinkEnv.Optimization = true; break; diff --git a/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchainBase.cs b/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchainBase.cs index 3693595ea..252d9134d 100644 --- a/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchainBase.cs +++ b/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchainBase.cs @@ -526,6 +526,12 @@ namespace Flax.Build.Platforms // Whole Program Optimization commonArgs.Add("/GL"); } + else + { + // Enable SEE2 and other code optimizations but without breaking incremental linking + //commonArgs.Add("/Og"); // Results in D9035 warning + commonArgs.Add("/O2"); + } } else { From 37d2b80a3df21059d75c7383d50028c22033be95 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 24 May 2024 14:16:14 +0200 Subject: [PATCH 087/292] Enable IntrinsicFunctions in Debug builds --- Source/Tools/Flax.Build/Build/Target.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Tools/Flax.Build/Build/Target.cs b/Source/Tools/Flax.Build/Build/Target.cs index 90523b5ef..09d817cca 100644 --- a/Source/Tools/Flax.Build/Build/Target.cs +++ b/Source/Tools/Flax.Build/Build/Target.cs @@ -260,7 +260,7 @@ namespace Flax.Build options.CompileEnv.DebugInformation = true; options.CompileEnv.RuntimeChecks = true; options.CompileEnv.StringPooling = false; - options.CompileEnv.IntrinsicFunctions = false; + options.CompileEnv.IntrinsicFunctions = true; options.CompileEnv.BufferSecurityCheck = true; options.CompileEnv.Inlining = false; options.CompileEnv.WholeProgramOptimization = false; From 9acb3a54826f3b65a4245d0140714da4046eac85 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 24 May 2024 14:21:30 +0200 Subject: [PATCH 088/292] Fix model thumbnail for very large assets --- Source/Editor/Content/Proxy/ModelProxy.cs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Source/Editor/Content/Proxy/ModelProxy.cs b/Source/Editor/Content/Proxy/ModelProxy.cs index bf84703ad..15fab4681 100644 --- a/Source/Editor/Content/Proxy/ModelProxy.cs +++ b/Source/Editor/Content/Proxy/ModelProxy.cs @@ -94,7 +94,10 @@ namespace FlaxEditor.Content _preview.Model = (Model)request.Asset; _preview.Parent = guiRoot; _preview.SyncBackbufferSize(); - _preview.ViewportCamera.SetArcBallView(_preview.Model.GetBox()); + var bounds = _preview.Model.GetBox(); + var maxSize = Math.Max(0.001f, (float)bounds.Size.MaxValue); + _preview.ViewportCamera.SetArcBallView(bounds); + _preview.FarPlane = Mathf.Max(1000.0f, maxSize * 2 + 100.0f); _preview.Task.OnDraw(); } From 2c80f4402d335a1d0d57756d376099f9ed891494 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 24 May 2024 14:52:01 +0200 Subject: [PATCH 089/292] Add improved Model SDF generation with sampling offset and min distance accumulation --- Source/Engine/Tools/ModelTool/ModelTool.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Source/Engine/Tools/ModelTool/ModelTool.cpp b/Source/Engine/Tools/ModelTool/ModelTool.cpp index 0e217b82b..cbf080aaa 100644 --- a/Source/Engine/Tools/ModelTool/ModelTool.cpp +++ b/Source/Engine/Tools/ModelTool/ModelTool.cpp @@ -205,8 +205,11 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float for (int32 sample = 0; sample < sampleCount; sample++) { Ray sampleRay(voxelPos, sampleDirections[sample]); + sampleRay.Position -= sampleRay.Direction * 0.0001f; // Apply small margin if (scene.RayCast(sampleRay, hitDistance, hitNormal, hitTriangle)) { + if (hitDistance < minDistance) + minDistance = hitDistance; hitCount++; const bool backHit = Float3::Dot(sampleRay.Direction, hitTriangle.GetNormal()) > 0; if (backHit) From 23efaed29ed2d11e47449d56a96cd43e013231ae Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 24 May 2024 14:58:57 +0200 Subject: [PATCH 090/292] Fix Model SDF texture streaming to properly mark highest loaded mip as resident --- .../Engine/Graphics/Async/Tasks/GPUUploadTextureMipTask.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Graphics/Async/Tasks/GPUUploadTextureMipTask.h b/Source/Engine/Graphics/Async/Tasks/GPUUploadTextureMipTask.h index 2fa98da52..9b87fb636 100644 --- a/Source/Engine/Graphics/Async/Tasks/GPUUploadTextureMipTask.h +++ b/Source/Engine/Graphics/Async/Tasks/GPUUploadTextureMipTask.h @@ -83,12 +83,16 @@ protected: auto texture = _texture.Get(); if (texture) { - // Check if the new mips has been just uploaded if (_mipIndex == texture->HighestResidentMipIndex() - 1) { - // Mark as mip loaded + // Mark the new mip as loaded texture->SetResidentMipLevels(texture->ResidentMipLevels() + 1); } + else + { + // Mark the new mip and all lower ones as loaded (eg. when loading Model SDF texture mips at once but out of order) + texture->SetResidentMipLevels(Math::Max(texture->ResidentMipLevels(), texture->MipLevels() - _mipIndex)); + } } // Base From 513afd603b1cfabce34ca7bd99ea285332ced417 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 24 May 2024 22:19:54 +0200 Subject: [PATCH 091/292] Add increased by 1 mip sdf texture resolution for GlobalSDF building --- Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp index 8e16a8dbf..b9830c6ee 100644 --- a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp +++ b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp @@ -660,8 +660,9 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex // Pick the SDF mip for the cascade int32 mipLevelIndex = 1; - float worldUnitsPerVoxel = object.SDF->WorldUnitsPerVoxel * object.LocalToWorld.Scale.MaxValue() * 2; - while (_voxelSize > worldUnitsPerVoxel && mipLevelIndex < object.SDF->Texture->MipLevels()) + float worldUnitsPerVoxel = object.SDF->WorldUnitsPerVoxel * object.LocalToWorld.Scale.MaxValue() * 4; + const int32 mipLevels = object.SDF->Texture->MipLevels(); + while (_voxelSize > worldUnitsPerVoxel && mipLevelIndex < mipLevels) { mipLevelIndex++; worldUnitsPerVoxel *= 2.0f; From 0cee4ac973cd7305db2c26cdfc3e5a16021983ad Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 25 May 2024 09:52:09 +0200 Subject: [PATCH 092/292] Add SpawnParticles utility to Particle Effect --- .../Graph/CPU/ParticleEmitterGraph.CPU.cpp | 6 ++-- Source/Engine/Particles/ParticleEffect.cpp | 28 +++++++++++++++++++ Source/Engine/Particles/ParticleEffect.h | 7 +++++ .../Engine/Particles/ParticlesSimulation.cpp | 1 + Source/Engine/Particles/ParticlesSimulation.h | 5 ++++ 5 files changed, 45 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Particles/Graph/CPU/ParticleEmitterGraph.CPU.cpp b/Source/Engine/Particles/Graph/CPU/ParticleEmitterGraph.CPU.cpp index f0cf76ce4..8dae724ab 100644 --- a/Source/Engine/Particles/Graph/CPU/ParticleEmitterGraph.CPU.cpp +++ b/Source/Engine/Particles/Graph/CPU/ParticleEmitterGraph.CPU.cpp @@ -505,7 +505,8 @@ void ParticleEmitterGraphCPUExecutor::Update(ParticleEmitter* emitter, ParticleE } // Spawn particles - int32 spawnCount = 0; + int32 spawnCount = data.CustomSpawnCount; + data.CustomSpawnCount = 0; if (canSpawn) { PROFILE_CPU_NAMED("Spawn"); @@ -573,7 +574,8 @@ int32 ParticleEmitterGraphCPUExecutor::UpdateSpawn(ParticleEmitter* emitter, Par Init(emitter, effect, data, dt); // Spawn particles - int32 spawnCount = 0; + int32 spawnCount = data.CustomSpawnCount; + data.CustomSpawnCount = 0; for (int32 i = 0; i < _graph.SpawnModules.Count(); i++) { spawnCount += ProcessSpawnModule(i); diff --git a/Source/Engine/Particles/ParticleEffect.cpp b/Source/Engine/Particles/ParticleEffect.cpp index ab3066372..9b033641c 100644 --- a/Source/Engine/Particles/ParticleEffect.cpp +++ b/Source/Engine/Particles/ParticleEffect.cpp @@ -281,6 +281,34 @@ void ParticleEffect::UpdateSimulation(bool singleFrame) Particles::UpdateEffect(this); } +void ParticleEffect::SpawnParticles(int32 count, const StringView& emitterTrackName) +{ + auto system = ParticleSystem.Get(); + if (!system) + return; + if (emitterTrackName.IsEmpty()) + { + for (auto& e : Instance.Emitters) + e.CustomSpawnCount += count; + } + else + { + for (int32 i = 0; i < system->Tracks.Count(); i++) + { + auto& track = system->Tracks[i]; + if (track.Type == ParticleSystem::Track::Types::Emitter && track.Name == emitterTrackName) + { + const int32 emitterIndex = track.AsEmitter.Index; + if (Instance.Emitters.IsValidIndex(emitterIndex)) + { + Instance.Emitters.Get()[emitterIndex].CustomSpawnCount += count; + break; + } + } + } + } +} + void ParticleEffect::Play() { _isPlaying = true; diff --git a/Source/Engine/Particles/ParticleEffect.h b/Source/Engine/Particles/ParticleEffect.h index 5e2770f89..2b32b3ca2 100644 --- a/Source/Engine/Particles/ParticleEffect.h +++ b/Source/Engine/Particles/ParticleEffect.h @@ -350,6 +350,13 @@ public: /// True if update animation by a single frame only (time time since last engine update), otherwise will update simulation with delta time since last update. API_FUNCTION() void UpdateSimulation(bool singleFrame = false); + /// + /// Manually spawns additional particles into the simulation. + /// + /// Amount of particles to spawn. + /// Name of the emitter track to spawn particles in. Empty if spawn particles into all tracks. + API_FUNCTION() void SpawnParticles(int32 count, const StringView& emitterTrackName = String::Empty); + /// /// Plays the simulation. /// diff --git a/Source/Engine/Particles/ParticlesSimulation.cpp b/Source/Engine/Particles/ParticlesSimulation.cpp index 880f6389d..3fafc8ad6 100644 --- a/Source/Engine/Particles/ParticlesSimulation.cpp +++ b/Source/Engine/Particles/ParticlesSimulation.cpp @@ -26,6 +26,7 @@ void ParticleEmitterInstance::ClearState() Time = 0; SpawnModulesData.Clear(); CustomData.Clear(); + CustomSpawnCount = 0; #if COMPILE_WITH_GPU_PARTICLES GPU.DeltaTime = 0.0f; GPU.SpawnCount = 0; diff --git a/Source/Engine/Particles/ParticlesSimulation.h b/Source/Engine/Particles/ParticlesSimulation.h index 19303b971..ec2bd5e05 100644 --- a/Source/Engine/Particles/ParticlesSimulation.h +++ b/Source/Engine/Particles/ParticlesSimulation.h @@ -84,6 +84,11 @@ public: /// Array CustomData; + /// + /// The external amount of the particles to spawn. + /// + int32 CustomSpawnCount = 0; + struct { /// From 553b6f1ee8759446bd925270a3d68c725e026cf8 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 27 May 2024 10:37:33 +0200 Subject: [PATCH 093/292] Add async execution to model window sdf generation button --- Source/Editor/Windows/Assets/ModelWindow.cs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/Source/Editor/Windows/Assets/ModelWindow.cs b/Source/Editor/Windows/Assets/ModelWindow.cs index 45a3e61ea..51a2fa612 100644 --- a/Source/Editor/Windows/Assets/ModelWindow.cs +++ b/Source/Editor/Windows/Assets/ModelWindow.cs @@ -2,6 +2,7 @@ using System.Collections.Generic; using System.Reflection; +using System.Threading.Tasks; using FlaxEditor.Content; using FlaxEditor.Content.Import; using FlaxEditor.CustomEditors; @@ -294,9 +295,17 @@ namespace FlaxEditor.Windows.Assets private void OnRebuildSDF() { var proxy = (MeshesPropertiesProxy)Values[0]; - proxy.Asset.GenerateSDF(proxy.Window._importSettings.Settings.SDFResolution, _sdfModelLodIndex.Value, true, proxy.Window._backfacesThreshold); - proxy.Window.MarkAsEdited(); - Presenter.BuildLayoutOnUpdate(); + proxy.Window.Enabled = false; + Task.Run(() => + { + proxy.Asset.GenerateSDF(proxy.Window._importSettings.Settings.SDFResolution, _sdfModelLodIndex.Value, true, proxy.Window._backfacesThreshold); + FlaxEngine.Scripting.InvokeOnUpdate(() => + { + proxy.Window.Enabled = true; + proxy.Window.MarkAsEdited(); + Presenter.BuildLayoutOnUpdate(); + }); + }); } private void OnRemoveSDF() From 054e77be426031c08a4d6b256467fabcbac04206 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 27 May 2024 10:37:51 +0200 Subject: [PATCH 094/292] Fix crash on exit when GPU async task failed --- Source/Engine/Graphics/Async/GPUTasksContext.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Graphics/Async/GPUTasksContext.cpp b/Source/Engine/Graphics/Async/GPUTasksContext.cpp index f4f58f4d0..11582fe78 100644 --- a/Source/Engine/Graphics/Async/GPUTasksContext.cpp +++ b/Source/Engine/Graphics/Async/GPUTasksContext.cpp @@ -76,12 +76,18 @@ void GPUTasksContext::OnFrameBegin() for (int32 i = 0; i < _tasksDone.Count(); i++) { auto task = _tasksDone[i]; - if (task->GetSyncPoint() <= _currentSyncPoint && task->GetState() != TaskState::Finished) + auto state = task->GetState(); + if (task->GetSyncPoint() <= _currentSyncPoint && state != TaskState::Finished) { // TODO: add stats counter and count performed jobs, print to log on exit. task->Sync(); } - if (task->GetState() == TaskState::Finished) + if (state == TaskState::Failed || state == TaskState::Canceled) + { + _tasksDone.RemoveAt(i); + i--; + } + if (state == TaskState::Finished) { _tasksDone.RemoveAt(i); i--; From 8a2acd360d5f9b60f1b7f7a9bef02f01d94b3844 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 28 May 2024 14:56:04 +0200 Subject: [PATCH 095/292] Fix `GPUTexture::GetData` to properly handle volume textures --- Source/Engine/Graphics/Textures/GPUTexture.h | 4 +-- .../Engine/Graphics/Textures/TextureBase.cpp | 32 +++++++++++++++++++ Source/Engine/Graphics/Textures/TextureData.h | 1 + .../DirectX/DX11/GPUTextureDX11.cpp | 31 ++---------------- .../DirectX/DX11/GPUTextureDX11.h | 16 +--------- .../DirectX/DX12/GPUTextureDX12.cpp | 30 ++--------------- .../DirectX/DX12/GPUTextureDX12.h | 2 +- .../GraphicsDevice/Null/GPUTextureNull.h | 11 +------ .../Vulkan/GPUTextureVulkan.cpp | 32 +++---------------- .../GraphicsDevice/Vulkan/GPUTextureVulkan.h | 8 +---- 10 files changed, 49 insertions(+), 118 deletions(-) diff --git a/Source/Engine/Graphics/Textures/GPUTexture.h b/Source/Engine/Graphics/Textures/GPUTexture.h index f39ed0b0f..685e59985 100644 --- a/Source/Engine/Graphics/Textures/GPUTexture.h +++ b/Source/Engine/Graphics/Textures/GPUTexture.h @@ -545,12 +545,12 @@ public: /// /// Gets texture mipmap data (raw bytes). Can be used only with textures created with Staging flag. /// - /// Array or depth slice index. + /// Array slice index. /// Mip map index. /// Output mip data. /// Output mip data row pitch to use. Use 0 to use the pitch from the internal GPU storage. /// True if failed, otherwise false. - virtual bool GetData(int32 arrayOrDepthSliceIndex, int32 mipMapIndex, TextureMipData& data, uint32 mipRowPitch = 0) = 0; + virtual bool GetData(int32 arrayIndex, int32 mipMapIndex, TextureMipData& data, uint32 mipRowPitch = 0) = 0; /// /// Sets the number of resident mipmap levels in the texture (already uploaded to the GPU). diff --git a/Source/Engine/Graphics/Textures/TextureBase.cpp b/Source/Engine/Graphics/Textures/TextureBase.cpp index 49a7ec2e4..f396abb79 100644 --- a/Source/Engine/Graphics/Textures/TextureBase.cpp +++ b/Source/Engine/Graphics/Textures/TextureBase.cpp @@ -169,6 +169,38 @@ bool TextureMipData::GetPixels(Array& pixels, int32 width, int32 height, return false; } +void TextureMipData::Copy(void* data, uint32 dataRowPitch, uint32 dataDepthPitch, uint32 dataDepthSlices, uint32 targetRowPitch) +{ + // Check if target row pitch is the same + if (targetRowPitch == dataRowPitch || targetRowPitch == 0) + { + Lines = dataDepthPitch / dataRowPitch; + DepthPitch = dataDepthPitch; + RowPitch = dataRowPitch; + + // Single memory copy + Data.Copy((byte*)data, dataDepthPitch * dataDepthSlices); + } + else + { + Lines = dataDepthPitch / dataRowPitch; + DepthPitch = targetRowPitch * Lines; + RowPitch = targetRowPitch; + + // Convert row by row + Data.Allocate(DepthPitch * dataDepthSlices); + for (uint32 depth = 0; depth < dataDepthSlices; depth++) + { + byte* src = (byte*)data + depth * dataDepthPitch; + byte* dst = Data.Get() + depth * DepthPitch; + for (uint32 row = 0; row < Lines; row++) + { + Platform::MemoryCopy(dst + row * RowPitch, src + row * dataRowPitch, RowPitch); + } + } + } +} + REGISTER_BINARY_ASSET_ABSTRACT(TextureBase, "FlaxEngine.TextureBase"); TextureBase::TextureBase(const SpawnParams& params, const AssetInfo* info) diff --git a/Source/Engine/Graphics/Textures/TextureData.h b/Source/Engine/Graphics/Textures/TextureData.h index 4c5da250a..a27dbf2b4 100644 --- a/Source/Engine/Graphics/Textures/TextureData.h +++ b/Source/Engine/Graphics/Textures/TextureData.h @@ -26,6 +26,7 @@ public: bool GetPixels(Array& pixels, int32 width, int32 height, PixelFormat format) const; bool GetPixels(Array& pixels, int32 width, int32 height, PixelFormat format) const; + void Copy(void* data, uint32 dataRowPitch, uint32 dataDepthPitch, uint32 dataDepthSlices, uint32 targetRowPitch); template T& Get(int32 x, int32 y) diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUTextureDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUTextureDX11.cpp index 30460835d..70c37004f 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUTextureDX11.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUTextureDX11.cpp @@ -549,18 +549,17 @@ void GPUTextureDX11::initHandles() } } -bool GPUTextureDX11::GetData(int32 arrayOrDepthSliceIndex, int32 mipMapIndex, TextureMipData& data, uint32 mipRowPitch) +bool GPUTextureDX11::GetData(int32 arrayIndex, int32 mipMapIndex, TextureMipData& data, uint32 mipRowPitch) { if (!IsStaging()) { LOG(Warning, "Texture::GetData is valid only for staging resources."); return true; } - GPUDeviceLock lock(_device); // Map the staging resource mip map for reading - const uint32 subresource = RenderToolsDX::CalcSubresourceIndex(mipMapIndex, arrayOrDepthSliceIndex, MipLevels()); + const uint32 subresource = RenderToolsDX::CalcSubresourceIndex(mipMapIndex, arrayIndex, MipLevels()); D3D11_MAPPED_SUBRESOURCE mapped; const HRESULT mapResult = _device->GetIM()->Map(_resource, subresource, D3D11_MAP_READ, 0, &mapped); if (FAILED(mapResult)) @@ -569,31 +568,7 @@ bool GPUTextureDX11::GetData(int32 arrayOrDepthSliceIndex, int32 mipMapIndex, Te return true; } - // Check if target row pitch is the same - if (mipRowPitch == mapped.RowPitch || mipRowPitch == 0) - { - // Init mip info - data.Lines = mapped.DepthPitch / mapped.RowPitch; - data.DepthPitch = mapped.DepthPitch; - data.RowPitch = mapped.RowPitch; - - // Copy data - data.Data.Copy((byte*)mapped.pData, mapped.DepthPitch); - } - else - { - // Init mip info - data.Lines = mapped.DepthPitch / mapped.RowPitch; - data.DepthPitch = mipRowPitch * data.Lines; - data.RowPitch = mipRowPitch; - - // Copy data - data.Data.Allocate(data.DepthPitch); - for (uint32 i = 0; i < data.Lines; i++) - { - Platform::MemoryCopy(data.Data.Get() + data.RowPitch * i, ((byte*)mapped.pData) + mapped.RowPitch * i, data.RowPitch); - } - } + data.Copy(mapped.pData, mapped.RowPitch, mapped.DepthPitch, Depth(), mipRowPitch); // Unmap texture _device->GetIM()->Unmap(_resource, subresource); diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUTextureDX11.h b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUTextureDX11.h index 3a1df5035..6cc2124f9 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUTextureDX11.h +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUTextureDX11.h @@ -176,7 +176,6 @@ public: class GPUTextureDX11 : public GPUResourceDX11 { private: - ID3D11Resource* _resource = nullptr; GPUTextureViewDX11 _handleArray; @@ -191,7 +190,6 @@ private: DXGI_FORMAT _dxgiFormatUAV; public: - /// /// Initializes a new instance of the class. /// @@ -203,18 +201,15 @@ public: } public: - /// /// Gets DX11 texture resource. /// - /// DX11 texture resource. FORCE_INLINE ID3D11Resource* GetResource() const { return _resource; } private: - void initHandles(); ID3D11Texture2D* GetTexture2D() const @@ -222,7 +217,6 @@ private: ASSERT(_desc.Dimensions == TextureDimensions::Texture || _desc.Dimensions == TextureDimensions::CubeTexture); return (ID3D11Texture2D*)_resource; } - ID3D11Texture3D* GetTexture3D() const { ASSERT(_desc.Dimensions == TextureDimensions::VolumeTexture); @@ -230,42 +224,35 @@ private: } public: - // [GPUTexture] GPUTextureView* View(int32 arrayOrDepthIndex) const override { return (GPUTextureView*)&_handlesPerSlice[arrayOrDepthIndex]; } - GPUTextureView* View(int32 arrayOrDepthIndex, int32 mipMapIndex) const override { return (GPUTextureView*)&_handlesPerMip[arrayOrDepthIndex][mipMapIndex]; } - GPUTextureView* ViewArray() const override { ASSERT(ArraySize() > 1); return (GPUTextureView*)&_handleArray; } - GPUTextureView* ViewVolume() const override { ASSERT(IsVolume()); return (GPUTextureView*)&_handleVolume; } - GPUTextureView* ViewReadOnlyDepth() const override { ASSERT(_desc.Flags & GPUTextureFlags::ReadOnlyDepthView); return (GPUTextureView*)&_handleReadOnlyDepth; } - void* GetNativePtr() const override { return static_cast(_resource); } - - bool GetData(int32 arrayOrDepthSliceIndex, int32 mipMapIndex, TextureMipData& data, uint32 mipRowPitch) override; + bool GetData(int32 arrayIndex, int32 mipMapIndex, TextureMipData& data, uint32 mipRowPitch) override; // [GPUResourceDX11] ID3D11Resource* GetResource() override @@ -274,7 +261,6 @@ public: } protected: - // [GPUTexture] bool OnInit() override; void OnResidentMipsChanged() override; diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUTextureDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUTextureDX12.cpp index ea5b11728..e6239f406 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUTextureDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUTextureDX12.cpp @@ -7,7 +7,7 @@ #include "Engine/Graphics/PixelFormatExtensions.h" #include "Engine/Graphics/Textures/TextureData.h" -bool GPUTextureDX12::GetData(int32 arrayOrDepthSliceIndex, int32 mipMapIndex, TextureMipData& data, uint32 mipRowPitch) +bool GPUTextureDX12::GetData(int32 arrayIndex, int32 mipMapIndex, TextureMipData& data, uint32 mipRowPitch) { if (!IsStaging()) { @@ -18,7 +18,7 @@ bool GPUTextureDX12::GetData(int32 arrayOrDepthSliceIndex, int32 mipMapIndex, Te GPUDeviceLock lock(_device); // Internally it's a buffer, so adapt resource index and offset - const uint32 subresource = RenderToolsDX::CalcSubresourceIndex(mipMapIndex, arrayOrDepthSliceIndex, MipLevels()); + const uint32 subresource = RenderToolsDX::CalcSubresourceIndex(mipMapIndex, arrayIndex, MipLevels()); const int32 offsetInBytes = ComputeBufferOffset(subresource, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT); const int32 lengthInBytes = ComputeSubresourceSize(subresource, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT); const int32 rowPitch = ComputeRowPitch(mipMapIndex, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); @@ -37,31 +37,7 @@ bool GPUTextureDX12::GetData(int32 arrayOrDepthSliceIndex, int32 mipMapIndex, Te } mapped = (byte*)mapped + offsetInBytes; - // Check if target row pitch is the same - if (mipRowPitch == rowPitch || mipRowPitch == 0) - { - // Init mip info - data.Lines = depthPitch / rowPitch; - data.DepthPitch = depthPitch; - data.RowPitch = rowPitch; - - // Copy data - data.Data.Copy((byte*)mapped, depthPitch); - } - else - { - // Init mip info - data.Lines = depthPitch / rowPitch; - data.DepthPitch = mipRowPitch * data.Lines; - data.RowPitch = mipRowPitch; - - // Copy data - data.Data.Allocate(data.DepthPitch); - for (uint32 i = 0; i < data.Lines; i++) - { - Platform::MemoryCopy(data.Data.Get() + data.RowPitch * i, ((byte*)mapped) + rowPitch * i, data.RowPitch); - } - } + data.Copy(mapped, rowPitch, depthPitch, Depth(), mipRowPitch); // Unmap buffer _resource->Unmap(0, nullptr); diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUTextureDX12.h b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUTextureDX12.h index 0c34c1674..c89ad2cb8 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUTextureDX12.h +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUTextureDX12.h @@ -186,7 +186,7 @@ public: { return (void*)_resource; } - bool GetData(int32 arrayOrDepthSliceIndex, int32 mipMapIndex, TextureMipData& data, uint32 mipRowPitch) override; + bool GetData(int32 arrayIndex, int32 mipMapIndex, TextureMipData& data, uint32 mipRowPitch) override; // [ResourceOwnerDX12] GPUResource* AsGPUResource() const override diff --git a/Source/Engine/GraphicsDevice/Null/GPUTextureNull.h b/Source/Engine/GraphicsDevice/Null/GPUTextureNull.h index 858fcc11e..d73c0d37c 100644 --- a/Source/Engine/GraphicsDevice/Null/GPUTextureNull.h +++ b/Source/Engine/GraphicsDevice/Null/GPUTextureNull.h @@ -12,51 +12,42 @@ class GPUTextureNull : public GPUTexture { public: - // [GPUTexture] GPUTextureView* View(int32 arrayOrDepthIndex) const override { return nullptr; } - GPUTextureView* View(int32 arrayOrDepthIndex, int32 mipMapIndex) const override { return nullptr; } - GPUTextureView* ViewArray() const override { return nullptr; } - GPUTextureView* ViewVolume() const override { return nullptr; } - GPUTextureView* ViewReadOnlyDepth() const override { return nullptr; } - void* GetNativePtr() const override { return nullptr; } - - bool GetData(int32 arrayOrDepthSliceIndex, int32 mipMapIndex, TextureMipData& data, uint32 mipRowPitch) override + bool GetData(int32 arrayIndex, int32 mipMapIndex, TextureMipData& data, uint32 mipRowPitch) override { return true; } protected: - // [GPUTexture] bool OnInit() override { return false; } - void OnResidentMipsChanged() override { } diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUTextureVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUTextureVulkan.cpp index 3ef9aa9c3..ad7b35c9e 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUTextureVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUTextureVulkan.cpp @@ -179,7 +179,7 @@ void GPUTextureViewVulkan::DescriptorAsStorageImage(GPUContextVulkan* context, V context->AddImageBarrier(this, VK_IMAGE_LAYOUT_GENERAL); } -bool GPUTextureVulkan::GetData(int32 arrayOrDepthSliceIndex, int32 mipMapIndex, TextureMipData& data, uint32 mipRowPitch) +bool GPUTextureVulkan::GetData(int32 arrayIndex, int32 mipMapIndex, TextureMipData& data, uint32 mipRowPitch) { if (!IsStaging()) { @@ -189,12 +189,12 @@ bool GPUTextureVulkan::GetData(int32 arrayOrDepthSliceIndex, int32 mipMapIndex, GPUDeviceLock lock(_device); // Internally it's a buffer, so adapt resource index and offset - const uint32 subresource = mipMapIndex + arrayOrDepthSliceIndex * MipLevels(); + const uint32 subresource = mipMapIndex + arrayIndex * MipLevels(); // TODO: rowAlign/sliceAlign on Vulkan texture ??? int32 offsetInBytes = ComputeBufferOffset(subresource, 1, 1); int32 lengthInBytes = ComputeSubresourceSize(subresource, 1, 1); int32 rowPitch = ComputeRowPitch(mipMapIndex, 1); - int32 depthPicth = ComputeSlicePitch(mipMapIndex, 1); + int32 depthPitch = ComputeSlicePitch(mipMapIndex, 1); // Map the staging resource mip map for reading auto allocation = StagingBuffer->GetAllocation(); @@ -205,31 +205,7 @@ bool GPUTextureVulkan::GetData(int32 arrayOrDepthSliceIndex, int32 mipMapIndex, // Shift mapped buffer to the beginning of the mip data start mapped = (void*)((byte*)mapped + offsetInBytes); - // Check if target row pitch is the same - if (mipRowPitch == rowPitch || mipRowPitch == 0) - { - // Init mip info - data.Lines = depthPicth / rowPitch; - data.DepthPitch = depthPicth; - data.RowPitch = rowPitch; - - // Copy data - data.Data.Copy((byte*)mapped, depthPicth); - } - else - { - // Init mip info - data.Lines = depthPicth / rowPitch; - data.DepthPitch = mipRowPitch * data.Lines; - data.RowPitch = mipRowPitch; - - // Copy data - data.Data.Allocate(data.DepthPitch); - for (uint32 i = 0; i < data.Lines; i++) - { - Platform::MemoryCopy(data.Data.Get() + data.RowPitch * i, ((byte*)mapped) + rowPitch * i, data.RowPitch); - } - } + data.Copy(mapped, rowPitch, depthPitch, Depth(), mipRowPitch); // Unmap resource vmaUnmapMemory(_device->Allocator, allocation); diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUTextureVulkan.h b/Source/Engine/GraphicsDevice/Vulkan/GPUTextureVulkan.h index 0c77a73e2..46da296d6 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUTextureVulkan.h +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUTextureVulkan.h @@ -133,36 +133,30 @@ public: { return (GPUTextureView*)&_handlesPerSlice[arrayOrDepthIndex]; } - GPUTextureView* View(int32 arrayOrDepthIndex, int32 mipMapIndex) const override { return (GPUTextureView*)&_handlesPerMip[arrayOrDepthIndex][mipMapIndex]; } - GPUTextureView* ViewArray() const override { ASSERT(ArraySize() > 1); return (GPUTextureView*)&_handleArray; } - GPUTextureView* ViewVolume() const override { ASSERT(IsVolume()); return (GPUTextureView*)&_handleVolume; } - GPUTextureView* ViewReadOnlyDepth() const override { ASSERT(_desc.Flags & GPUTextureFlags::ReadOnlyDepthView); return (GPUTextureView*)&_handleReadOnlyDepth; } - void* GetNativePtr() const override { return (void*)_image; } - - bool GetData(int32 arrayOrDepthSliceIndex, int32 mipMapIndex, TextureMipData& data, uint32 mipRowPitch) override; + bool GetData(int32 arrayIndex, int32 mipMapIndex, TextureMipData& data, uint32 mipRowPitch) override; // [ResourceOwnerVulkan] GPUResource* AsGPUResource() const override From 5f4c57d3eb9c377e491dd5f940946a0966df978c Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 28 May 2024 14:56:38 +0200 Subject: [PATCH 096/292] Fix texture copy into staging one on DX12 and Vulkan --- .../DirectX/DX12/GPUContextDX12.cpp | 27 +++-- .../Vulkan/GPUContextVulkan.cpp | 100 ++++++++++++------ .../Vulkan/GPUTextureVulkan.cpp | 1 + 3 files changed, 90 insertions(+), 38 deletions(-) diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp index 2eb143857..970e55ce5 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp @@ -42,18 +42,18 @@ inline bool operator!=(const D3D12_INDEX_BUFFER_VIEW& l, const D3D12_INDEX_BUFFE // Ensure to match the indirect commands arguments layout static_assert(sizeof(GPUDispatchIndirectArgs) == sizeof(D3D12_DISPATCH_ARGUMENTS), "Wrong size of GPUDrawIndirectArgs."); static_assert(OFFSET_OF(GPUDispatchIndirectArgs, ThreadGroupCountX) == OFFSET_OF(D3D12_DISPATCH_ARGUMENTS, ThreadGroupCountX), "Wrong offset for GPUDrawIndirectArgs::ThreadGroupCountX"); -static_assert(OFFSET_OF(GPUDispatchIndirectArgs, ThreadGroupCountY) == OFFSET_OF(D3D12_DISPATCH_ARGUMENTS, ThreadGroupCountY),"Wrong offset for GPUDrawIndirectArgs::ThreadGroupCountY"); +static_assert(OFFSET_OF(GPUDispatchIndirectArgs, ThreadGroupCountY) == OFFSET_OF(D3D12_DISPATCH_ARGUMENTS, ThreadGroupCountY), "Wrong offset for GPUDrawIndirectArgs::ThreadGroupCountY"); static_assert(OFFSET_OF(GPUDispatchIndirectArgs, ThreadGroupCountZ) == OFFSET_OF(D3D12_DISPATCH_ARGUMENTS, ThreadGroupCountZ), "Wrong offset for GPUDrawIndirectArgs::ThreadGroupCountZ"); // static_assert(sizeof(GPUDrawIndirectArgs) == sizeof(D3D12_DRAW_ARGUMENTS), "Wrong size of GPUDrawIndirectArgs."); static_assert(OFFSET_OF(GPUDrawIndirectArgs, VerticesCount) == OFFSET_OF(D3D12_DRAW_ARGUMENTS, VertexCountPerInstance), "Wrong offset for GPUDrawIndirectArgs::VerticesCount"); -static_assert(OFFSET_OF(GPUDrawIndirectArgs, InstanceCount) == OFFSET_OF(D3D12_DRAW_ARGUMENTS, InstanceCount),"Wrong offset for GPUDrawIndirectArgs::InstanceCount"); +static_assert(OFFSET_OF(GPUDrawIndirectArgs, InstanceCount) == OFFSET_OF(D3D12_DRAW_ARGUMENTS, InstanceCount), "Wrong offset for GPUDrawIndirectArgs::InstanceCount"); static_assert(OFFSET_OF(GPUDrawIndirectArgs, StartVertex) == OFFSET_OF(D3D12_DRAW_ARGUMENTS, StartVertexLocation), "Wrong offset for GPUDrawIndirectArgs::StartVertex"); static_assert(OFFSET_OF(GPUDrawIndirectArgs, StartInstance) == OFFSET_OF(D3D12_DRAW_ARGUMENTS, StartInstanceLocation), "Wrong offset for GPUDrawIndirectArgs::StartInstance"); // static_assert(sizeof(GPUDrawIndexedIndirectArgs) == sizeof(D3D12_DRAW_INDEXED_ARGUMENTS), "Wrong size of GPUDrawIndexedIndirectArgs."); static_assert(OFFSET_OF(GPUDrawIndexedIndirectArgs, IndicesCount) == OFFSET_OF(D3D12_DRAW_INDEXED_ARGUMENTS, IndexCountPerInstance), "Wrong offset for GPUDrawIndexedIndirectArgs::IndicesCount"); -static_assert(OFFSET_OF(GPUDrawIndexedIndirectArgs, InstanceCount) == OFFSET_OF(D3D12_DRAW_INDEXED_ARGUMENTS, InstanceCount),"Wrong offset for GPUDrawIndexedIndirectArgs::InstanceCount"); +static_assert(OFFSET_OF(GPUDrawIndexedIndirectArgs, InstanceCount) == OFFSET_OF(D3D12_DRAW_INDEXED_ARGUMENTS, InstanceCount), "Wrong offset for GPUDrawIndexedIndirectArgs::InstanceCount"); static_assert(OFFSET_OF(GPUDrawIndexedIndirectArgs, StartIndex) == OFFSET_OF(D3D12_DRAW_INDEXED_ARGUMENTS, StartIndexLocation), "Wrong offset for GPUDrawIndexedIndirectArgs::StartIndex"); static_assert(OFFSET_OF(GPUDrawIndexedIndirectArgs, StartVertex) == OFFSET_OF(D3D12_DRAW_INDEXED_ARGUMENTS, BaseVertexLocation), "Wrong offset for GPUDrawIndexedIndirectArgs::StartVertex"); static_assert(OFFSET_OF(GPUDrawIndexedIndirectArgs, StartInstance) == OFFSET_OF(D3D12_DRAW_INDEXED_ARGUMENTS, StartInstanceLocation), "Wrong offset for GPUDrawIndexedIndirectArgs::StartInstance"); @@ -1259,8 +1259,23 @@ void GPUContextDX12::CopyTexture(GPUTexture* dstResource, uint32 dstSubresource, // Get destination copy location D3D12_TEXTURE_COPY_LOCATION dst; dst.pResource = dstTextureDX12->GetResource(); - dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - dst.SubresourceIndex = dstSubresource; + if (dstTextureDX12->IsStaging()) + { + const int32 mipLevel = (int32)dstSubresource % dstTextureDX12->MipLevels(); + const int32 copyOffset = dstTextureDX12->ComputeBufferOffset((int32)dstSubresource, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT); + dst.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + dst.PlacedFootprint.Offset = copyOffset; + dst.PlacedFootprint.Footprint.Width = dstTextureDX12->CalculateMipSize(dstTextureDX12->Width(), mipLevel); + dst.PlacedFootprint.Footprint.Height = dstTextureDX12->CalculateMipSize(dstTextureDX12->Height(), mipLevel); + dst.PlacedFootprint.Footprint.Depth = dstTextureDX12->CalculateMipSize(dstTextureDX12->Depth(), mipLevel); + dst.PlacedFootprint.Footprint.Format = RenderToolsDX::ToDxgiFormat(dstTextureDX12->Format()); + dst.PlacedFootprint.Footprint.RowPitch = dstTextureDX12->ComputeRowPitch(mipLevel, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + } + else + { + dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dst.SubresourceIndex = dstSubresource; + } // Get source copy location D3D12_TEXTURE_COPY_LOCATION src; @@ -1455,7 +1470,7 @@ void GPUContextDX12::ForceRebindDescriptors() _commandList->SetComputeRootSignature(_device->GetRootSignature()); // Bind heaps - ID3D12DescriptorHeap* ppHeaps[] = {_device->RingHeap_CBV_SRV_UAV.GetHeap(), _device->RingHeap_Sampler.GetHeap()}; + ID3D12DescriptorHeap* ppHeaps[] = { _device->RingHeap_CBV_SRV_UAV.GetHeap(), _device->RingHeap_Sampler.GetHeap() }; _commandList->SetDescriptorHeaps(ARRAY_COUNT(ppHeaps), ppHeaps); } diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp index 38ef0eb68..5023bb21f 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp @@ -1311,10 +1311,6 @@ void GPUContextVulkan::UpdateBuffer(GPUBuffer* buffer, const void* data, uint32 // Use direct update for small buffers const uint32 alignedSize = Math::AlignUp(size, 4); - if (alignedSize > buffer->GetSize()) - { - int a= 1; - } if (size <= 16 * 1024 && alignedSize <= buffer->GetSize()) { //AddBufferBarrier(bufferVulkan, VK_ACCESS_TRANSFER_WRITE_BIT); @@ -1413,37 +1409,77 @@ void GPUContextVulkan::CopyTexture(GPUTexture* dstResource, uint32 dstSubresourc const auto dstTextureVulkan = static_cast(dstResource); const auto srcTextureVulkan = static_cast(srcResource); - // Transition resources - AddImageBarrier(dstTextureVulkan, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - AddImageBarrier(srcTextureVulkan, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - FlushBarriers(); - - // Prepare - const int32 dstMipIndex = dstSubresource % dstTextureVulkan->MipLevels(); - const int32 dstArrayIndex = dstSubresource / dstTextureVulkan->MipLevels(); - const int32 srcMipIndex = srcSubresource % srcTextureVulkan->MipLevels(); - const int32 srcArrayIndex = srcSubresource / srcTextureVulkan->MipLevels(); + const int32 dstMipIndex = (int32)dstSubresource % dstTextureVulkan->MipLevels(); + const int32 dstArrayIndex = (int32)dstSubresource / dstTextureVulkan->MipLevels(); + const int32 srcMipIndex = (int32)srcSubresource % srcTextureVulkan->MipLevels(); + const int32 srcArrayIndex = (int32)srcSubresource / srcTextureVulkan->MipLevels(); int32 mipWidth, mipHeight, mipDepth; srcTextureVulkan->GetMipSize(srcMipIndex, mipWidth, mipHeight, mipDepth); - // Copy - VkImageCopy region; - Platform::MemoryClear(®ion, sizeof(VkBufferImageCopy)); - region.extent.width = mipWidth; - region.extent.height = mipHeight; - region.extent.depth = mipDepth; - region.dstOffset.x = dstX; - region.dstOffset.y = dstY; - region.dstOffset.z = dstZ; - region.srcSubresource.baseArrayLayer = srcArrayIndex; - region.srcSubresource.layerCount = 1; - region.srcSubresource.mipLevel = srcMipIndex; - region.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - region.dstSubresource.baseArrayLayer = dstArrayIndex; - region.dstSubresource.layerCount = 1; - region.dstSubresource.mipLevel = dstMipIndex; - region.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - vkCmdCopyImage(cmdBuffer->GetHandle(), srcTextureVulkan->GetHandle(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dstTextureVulkan->GetHandle(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion); + if (dstTextureVulkan->IsStaging()) + { + // Staging Texture -> Staging Texture + if (srcTextureVulkan->IsStaging()) + { + ASSERT(dstTextureVulkan->StagingBuffer && srcTextureVulkan->StagingBuffer); + CopyResource(dstTextureVulkan->StagingBuffer, srcTextureVulkan->StagingBuffer); + } + // Texture -> Staging Texture + else + { + // Transition resources + ASSERT(dstTextureVulkan->StagingBuffer); + AddBufferBarrier(dstTextureVulkan->StagingBuffer, VK_ACCESS_TRANSFER_WRITE_BIT); + AddImageBarrier(srcTextureVulkan, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + FlushBarriers(); + + // Copy + VkBufferImageCopy region; + Platform::MemoryClear(®ion, sizeof(VkBufferImageCopy)); + region.bufferOffset = 0; // TODO: calculate it based on dstSubresource and dstX/dstY/dstZ + ASSERT(dstX == dstY == dstZ == 0); + ASSERT(dstSubresource == 0); + region.bufferRowLength = mipWidth; + region.bufferImageHeight = mipHeight; + region.imageOffset.x = 0; + region.imageOffset.y = 0; + region.imageOffset.z = 0; + region.imageExtent.width = mipWidth; + region.imageExtent.height = mipHeight; + region.imageExtent.depth = mipDepth; + region.imageSubresource.baseArrayLayer = srcArrayIndex; + region.imageSubresource.layerCount = 1; + region.imageSubresource.mipLevel = srcMipIndex; + region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + vkCmdCopyImageToBuffer(cmdBuffer->GetHandle(), srcTextureVulkan->GetHandle(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dstTextureVulkan->StagingBuffer->GetHandle(), 1, ®ion); + } + } + else + { + // Transition resources + AddImageBarrier(dstTextureVulkan, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + AddImageBarrier(srcTextureVulkan, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + FlushBarriers(); + + // Copy + VkImageCopy region; + Platform::MemoryClear(®ion, sizeof(VkBufferImageCopy)); + region.extent.width = mipWidth; + region.extent.height = mipHeight; + region.extent.depth = mipDepth; + region.dstOffset.x = (int32_t)dstX; + region.dstOffset.y = (int32_t)dstY; + region.dstOffset.z = (int32_t)dstZ; + region.srcSubresource.baseArrayLayer = srcArrayIndex; + region.srcSubresource.layerCount = 1; + region.srcSubresource.mipLevel = srcMipIndex; + region.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + region.dstSubresource.baseArrayLayer = dstArrayIndex; + region.dstSubresource.layerCount = 1; + region.dstSubresource.mipLevel = dstMipIndex; + region.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + vkCmdCopyImage(cmdBuffer->GetHandle(), srcTextureVulkan->GetHandle(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dstTextureVulkan->GetHandle(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion); + } } void GPUContextVulkan::ResetCounter(GPUBuffer* buffer) diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUTextureVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUTextureVulkan.cpp index ad7b35c9e..62c0418dc 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUTextureVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUTextureVulkan.cpp @@ -235,6 +235,7 @@ bool GPUTextureVulkan::OnInit() return true; } _memoryUsage = 1; + initResource(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, _desc.MipLevels, _desc.ArraySize, false); return false; } From 53d77d3421d99f7186907fe8f7fa3eb349b44a67 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 29 May 2024 14:53:13 +0200 Subject: [PATCH 097/292] Add **Model SDF baking on GPU** via Compute Shader --- Content/Shaders/GI/GlobalSurfaceAtlas.flax | 4 +- Content/Shaders/SDF.flax | 3 + Source/Editor/Cooker/Steps/DeployDataStep.cpp | 1 + Source/Editor/Windows/Assets/ModelWindow.cs | 5 +- Source/Engine/Content/AssetReference.h | 4 +- Source/Engine/Content/Assets/Model.cpp | 7 +- Source/Engine/Content/Assets/Model.h | 3 +- Source/Engine/Tools/ModelTool/ModelTool.cpp | 463 ++++++++++++++---- Source/Engine/Tools/ModelTool/ModelTool.h | 2 +- Source/Shaders/SDF.shader | 272 ++++++++++ .../ThirdParty/TressFX/TressFXSDF.hlsl | 121 +++++ 11 files changed, 793 insertions(+), 92 deletions(-) create mode 100644 Content/Shaders/SDF.flax create mode 100644 Source/Shaders/SDF.shader create mode 100644 Source/Shaders/ThirdParty/TressFX/TressFXSDF.hlsl diff --git a/Content/Shaders/GI/GlobalSurfaceAtlas.flax b/Content/Shaders/GI/GlobalSurfaceAtlas.flax index d3f164f56..7963c2a71 100644 --- a/Content/Shaders/GI/GlobalSurfaceAtlas.flax +++ b/Content/Shaders/GI/GlobalSurfaceAtlas.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5ef0f096465bb267138c7f10ec745e171a6fd642a22801f339eb6da260665f0b -size 12626 +oid sha256:5bccb119c58a4fcec267e452bdf6026b7e14531ffcf60680026ce964945457cb +size 12461 diff --git a/Content/Shaders/SDF.flax b/Content/Shaders/SDF.flax new file mode 100644 index 000000000..9f9a6a261 --- /dev/null +++ b/Content/Shaders/SDF.flax @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd485ffce3c1d002621d795968cfda9c68555600157332dde91618d75881207e +size 7903 diff --git a/Source/Editor/Cooker/Steps/DeployDataStep.cpp b/Source/Editor/Cooker/Steps/DeployDataStep.cpp index e9e41a8ea..0b4f5d939 100644 --- a/Source/Editor/Cooker/Steps/DeployDataStep.cpp +++ b/Source/Editor/Cooker/Steps/DeployDataStep.cpp @@ -344,6 +344,7 @@ bool DeployDataStep::Perform(CookingData& data) data.AddRootEngineAsset(TEXT("Shaders/Sky")); data.AddRootEngineAsset(TEXT("Shaders/SSAO")); data.AddRootEngineAsset(TEXT("Shaders/SSR")); + data.AddRootEngineAsset(TEXT("Shaders/SDF")); data.AddRootEngineAsset(TEXT("Shaders/VolumetricFog")); data.AddRootEngineAsset(TEXT("Engine/DefaultMaterial")); data.AddRootEngineAsset(TEXT("Engine/DefaultDeformableMaterial")); diff --git a/Source/Editor/Windows/Assets/ModelWindow.cs b/Source/Editor/Windows/Assets/ModelWindow.cs index 51a2fa612..876736c55 100644 --- a/Source/Editor/Windows/Assets/ModelWindow.cs +++ b/Source/Editor/Windows/Assets/ModelWindow.cs @@ -298,11 +298,12 @@ namespace FlaxEditor.Windows.Assets proxy.Window.Enabled = false; Task.Run(() => { - proxy.Asset.GenerateSDF(proxy.Window._importSettings.Settings.SDFResolution, _sdfModelLodIndex.Value, true, proxy.Window._backfacesThreshold); + bool failed = proxy.Asset.GenerateSDF(proxy.Window._importSettings.Settings.SDFResolution, _sdfModelLodIndex.Value, true, proxy.Window._backfacesThreshold); FlaxEngine.Scripting.InvokeOnUpdate(() => { proxy.Window.Enabled = true; - proxy.Window.MarkAsEdited(); + if (!failed) + proxy.Window.MarkAsEdited(); Presenter.BuildLayoutOnUpdate(); }); }); diff --git a/Source/Engine/Content/AssetReference.h b/Source/Engine/Content/AssetReference.h index dfa52c742..170c1b67c 100644 --- a/Source/Engine/Content/AssetReference.h +++ b/Source/Engine/Content/AssetReference.h @@ -93,7 +93,7 @@ public: /// The asset to set. AssetReference(T* asset) { - OnSet(asset); + OnSet((Asset*)asset); } /// @@ -215,7 +215,7 @@ public: /// The asset. void Set(T* asset) { - OnSet(asset); + OnSet((Asset*)asset); } }; diff --git a/Source/Engine/Content/Assets/Model.cpp b/Source/Engine/Content/Assets/Model.cpp index 3bcaf4330..33d244d0c 100644 --- a/Source/Engine/Content/Assets/Model.cpp +++ b/Source/Engine/Content/Assets/Model.cpp @@ -650,7 +650,7 @@ bool Model::Save(bool withMeshDataFromGpu, const StringView& path) #endif -bool Model::GenerateSDF(float resolutionScale, int32 lodIndex, bool cacheData, float backfacesThreshold) +bool Model::GenerateSDF(float resolutionScale, int32 lodIndex, bool cacheData, float backfacesThreshold, bool useGPU) { if (EnableModelSDF == 2) return true; // Not supported @@ -673,7 +673,10 @@ bool Model::GenerateSDF(float resolutionScale, int32 lodIndex, bool cacheData, f #else class MemoryWriteStream* outputStream = nullptr; #endif - if (ModelTool::GenerateModelSDF(this, nullptr, resolutionScale, lodIndex, &SDF, outputStream, GetPath(), backfacesThreshold)) + Locker.Unlock(); + const bool failed = ModelTool::GenerateModelSDF(this, nullptr, resolutionScale, lodIndex, &SDF, outputStream, GetPath(), backfacesThreshold, useGPU); + Locker.Lock(); + if (failed) return true; #if USE_EDITOR diff --git a/Source/Engine/Content/Assets/Model.h b/Source/Engine/Content/Assets/Model.h index c02a3bdcf..5c5310bb9 100644 --- a/Source/Engine/Content/Assets/Model.h +++ b/Source/Engine/Content/Assets/Model.h @@ -227,8 +227,9 @@ public: /// The index of the LOD to use for the SDF building. /// If true, the generated SDF texture data will be cached on CPU (in asset chunk storage) to allow saving it later, otherwise it will be runtime for GPU-only. Ignored for virtual assets or in build. /// Custom threshold (in range 0-1) for adjusting mesh internals detection based on the percentage of test rays hit triangle backfaces. Use lower value for more dense mesh. + /// Enables using GPU for SDF generation, otherwise CPU will be used (async via Job System). /// True if failed, otherwise false. - API_FUNCTION() bool GenerateSDF(float resolutionScale = 1.0f, int32 lodIndex = 6, bool cacheData = true, float backfacesThreshold = 0.6f); + API_FUNCTION() bool GenerateSDF(float resolutionScale = 1.0f, int32 lodIndex = 6, bool cacheData = true, float backfacesThreshold = 0.6f, bool useGPU = true); /// /// Sets set SDF data (releases the current one). diff --git a/Source/Engine/Tools/ModelTool/ModelTool.cpp b/Source/Engine/Tools/ModelTool/ModelTool.cpp index cbf080aaa..99c98bb0e 100644 --- a/Source/Engine/Tools/ModelTool/ModelTool.cpp +++ b/Source/Engine/Tools/ModelTool/ModelTool.cpp @@ -8,15 +8,20 @@ #include "Engine/Core/RandomStream.h" #include "Engine/Core/Math/Vector3.h" #include "Engine/Core/Math/Ray.h" -#include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Platform/ConditionVariable.h" +#include "Engine/Profiler/Profiler.h" #include "Engine/Threading/JobSystem.h" +#include "Engine/Graphics/GPUDevice.h" +#include "Engine/Graphics/GPUBuffer.h" #include "Engine/Graphics/RenderTools.h" #include "Engine/Graphics/Async/GPUTask.h" +#include "Engine/Graphics/Shaders/GPUShader.h" #include "Engine/Graphics/Textures/GPUTexture.h" #include "Engine/Graphics/Textures/TextureData.h" #include "Engine/Graphics/Models/ModelData.h" #include "Engine/Content/Assets/Model.h" #include "Engine/Content/Content.h" +#include "Engine/Content/Assets/Shader.h" #include "Engine/Serialization/MemoryWriteStream.h" #include "Engine/Engine/Units.h" #if USE_EDITOR @@ -71,7 +76,261 @@ ModelSDFMip::ModelSDFMip(int32 mipIndex, const TextureMipData& mip) { } -bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float resolutionScale, int32 lodIndex, ModelBase::SDFData* outputSDF, MemoryWriteStream* outputStream, const StringView& assetName, float backfacesThreshold) +class GPUModelSDFTask : public GPUTask +{ + ConditionVariable* _signal; + AssetReference _shader; + Model* _inputModel; + ModelData* _modelData; + int32 _lodIndex; + Int3 _resolution; + ModelBase::SDFData* _sdf; + GPUBuffer *_sdfSrc, *_sdfDst; + GPUTexture* _sdfResult; + Float3 _xyzToLocalMul, _xyzToLocalAdd; + + const uint32 ThreadGroupSize = 64; + PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data + { + Int3 Resolution; + uint32 ResolutionSize; + float MaxDistance; + uint32 VertexStride; + int32 Index16bit; + uint32 TriangleCount; + Float3 VoxelToPosMul; + float WorldUnitsPerVoxel; + Float3 VoxelToPosAdd; + uint32 ThreadGroupsX; + }); + +public: + GPUModelSDFTask(ConditionVariable& signal, Model* inputModel, ModelData* modelData, int32 lodIndex, const Int3& resolution, ModelBase::SDFData* sdf, GPUTexture* sdfResult, const Float3& xyzToLocalMul, const Float3& xyzToLocalAdd) + : GPUTask(Type::Custom) + , _signal(&signal) + , _shader(Content::LoadAsyncInternal(TEXT("Shaders/SDF"))) + , _inputModel(inputModel) + , _modelData(modelData) + , _lodIndex(lodIndex) + , _resolution(resolution) + , _sdf(sdf) + , _sdfSrc(GPUBuffer::New()) + , _sdfDst(GPUBuffer::New()) + , _sdfResult(sdfResult) + , _xyzToLocalMul(xyzToLocalMul) + , _xyzToLocalAdd(xyzToLocalAdd) + { + } + + ~GPUModelSDFTask() + { + SAFE_DELETE_GPU_RESOURCE(_sdfSrc); + SAFE_DELETE_GPU_RESOURCE(_sdfDst); + } + + Result run(GPUTasksContext* tasksContext) override + { + PROFILE_GPU_CPU("GPUModelSDFTask"); + GPUContext* context = tasksContext->GPU; + + // Allocate resources + if (_shader == nullptr || _shader->WaitForLoaded()) + return Result::Failed; + GPUShader* shader = _shader->GetShader(); + const uint32 resolutionSize = _resolution.X * _resolution.Y * _resolution.Z; + auto desc = GPUBufferDescription::Typed(resolutionSize, PixelFormat::R32_UInt, true); + // TODO: use transient texture (single frame) + if (_sdfSrc->Init(desc) || _sdfDst->Init(desc)) + return Result::Failed; + auto cb = shader->GetCB(0); + Data data; + data.Resolution = _resolution; + data.ResolutionSize = resolutionSize; + data.MaxDistance = _sdf->MaxDistance; + data.WorldUnitsPerVoxel = _sdf->WorldUnitsPerVoxel; + data.VoxelToPosMul = _xyzToLocalMul; + data.VoxelToPosAdd = _xyzToLocalAdd; + + // Dispatch in 1D and fallback to 2D when using large resolution + Int3 threadGroups(Math::CeilToInt((float)resolutionSize / ThreadGroupSize), 1, 1); + if (threadGroups.X > GPU_MAX_CS_DISPATCH_THREAD_GROUPS) + { + const uint32 groups = threadGroups.X; + threadGroups.X = Math::CeilToInt(Math::Sqrt((float)groups)); + threadGroups.Y = Math::CeilToInt((float)groups / threadGroups.X); + } + data.ThreadGroupsX = threadGroups.X; + + // Init SDF volume + context->BindCB(0, cb); + context->UpdateCB(cb, &data); + context->BindUA(0, _sdfSrc->View()); + context->Dispatch(shader->GetCS("CS_Init"), threadGroups.X, threadGroups.Y, threadGroups.Z); + + // Rendering input triangles into the SDF volume + if (_inputModel) + { + PROFILE_GPU_CPU_NAMED("Rasterize"); + const ModelLOD& lod = _inputModel->LODs[Math::Clamp(_lodIndex, _inputModel->HighestResidentLODIndex(), _inputModel->LODs.Count() - 1)]; + GPUBuffer *vbTemp = nullptr, *ibTemp = nullptr; + for (int32 i = 0; i < lod.Meshes.Count(); i++) + { + const Mesh& mesh = lod.Meshes[i]; + const MaterialSlot& materialSlot = _inputModel->MaterialSlots[mesh.GetMaterialSlotIndex()]; + if (materialSlot.Material && !materialSlot.Material->WaitForLoaded()) + { + // Skip transparent materials + if (materialSlot.Material->GetInfo().BlendMode != MaterialBlendMode::Opaque) + continue; + } + + GPUBuffer* vb = mesh.GetVertexBuffer(0); + GPUBuffer* ib = mesh.GetIndexBuffer(); + data.Index16bit = mesh.Use16BitIndexBuffer() ? 1 : 0; + data.VertexStride = vb->GetStride(); + data.TriangleCount = mesh.GetTriangleCount(); + const uint32 groups = Math::CeilToInt((float)data.TriangleCount / ThreadGroupSize); + if (groups > GPU_MAX_CS_DISPATCH_THREAD_GROUPS) + { + // TODO: support larger meshes via 2D dispatch + LOG(Error, "Not supported mesh with {} triangles.", data.TriangleCount); + continue; + } + context->UpdateCB(cb, &data); + if (!EnumHasAllFlags(vb->GetDescription().Flags, GPUBufferFlags::RawBuffer | GPUBufferFlags::ShaderResource)) + { + desc = GPUBufferDescription::Raw(vb->GetSize(), GPUBufferFlags::ShaderResource); + // TODO: use transient buffer (single frame) + if (!vbTemp) + vbTemp = GPUBuffer::New(); + vbTemp->Init(desc); + context->CopyBuffer(vbTemp, vb, desc.Size); + vb = vbTemp; + } + if (!EnumHasAllFlags(ib->GetDescription().Flags, GPUBufferFlags::RawBuffer | GPUBufferFlags::ShaderResource)) + { + desc = GPUBufferDescription::Raw(ib->GetSize(), GPUBufferFlags::ShaderResource); + // TODO: use transient buffer (single frame) + if (!ibTemp) + ibTemp = GPUBuffer::New(); + ibTemp->Init(desc); + context->CopyBuffer(ibTemp, ib, desc.Size); + ib = ibTemp; + } + context->BindSR(0, vb->View()); + context->BindSR(1, ib->View()); + context->Dispatch(shader->GetCS("CS_RasterizeTriangle"), groups, 1, 1); + } + SAFE_DELETE_GPU_RESOURCE(vbTemp); + SAFE_DELETE_GPU_RESOURCE(ibTemp); + } + else if (_modelData) + { + PROFILE_GPU_CPU_NAMED("Rasterize"); + const ModelLodData& lod = _modelData->LODs[Math::Clamp(_lodIndex, 0, _modelData->LODs.Count() - 1)]; + auto vb = GPUBuffer::New(); + auto ib = GPUBuffer::New(); + for (int32 i = 0; i < lod.Meshes.Count(); i++) + { + const MeshData* mesh = lod.Meshes[i]; + const MaterialSlotEntry& materialSlot = _modelData->Materials[mesh->MaterialSlotIndex]; + auto material = Content::LoadAsync(materialSlot.AssetID); + if (material && !material->WaitForLoaded()) + { + // Skip transparent materials + if (material->GetInfo().BlendMode != MaterialBlendMode::Opaque) + continue; + } + + data.Index16bit = 0; + data.VertexStride = sizeof(Float3); + data.TriangleCount = mesh->Indices.Count() / 3; + const uint32 groups = Math::CeilToInt((float)data.TriangleCount / ThreadGroupSize); + if (groups > GPU_MAX_CS_DISPATCH_THREAD_GROUPS) + { + // TODO: support larger meshes via 2D dispatch + LOG(Error, "Not supported mesh with {} triangles.", data.TriangleCount); + continue; + } + context->UpdateCB(cb, &data); + desc = GPUBufferDescription::Raw(mesh->Positions.Count() * sizeof(Float3), GPUBufferFlags::ShaderResource); + desc.InitData = mesh->Positions.Get(); + // TODO: use transient buffer (single frame) + vb->Init(desc); + desc = GPUBufferDescription::Raw(mesh->Indices.Count() * sizeof(uint32), GPUBufferFlags::ShaderResource); + desc.InitData = mesh->Indices.Get(); + // TODO: use transient buffer (single frame) + ib->Init(desc); + context->BindSR(0, vb->View()); + context->BindSR(1, ib->View()); + context->Dispatch(shader->GetCS("CS_RasterizeTriangle"), groups, 1, 1); + } + SAFE_DELETE_GPU_RESOURCE(vb); + SAFE_DELETE_GPU_RESOURCE(ib); + } + + // Convert SDF volume data back to floats + context->Dispatch(shader->GetCS("CS_Resolve"), threadGroups.X, threadGroups.Y, threadGroups.Z); + + // Run linear flood-fill loop to populate all voxels with valid distances (spreads the initial values from triangles rasterization) + { + PROFILE_GPU_CPU_NAMED("FloodFill"); + auto csFloodFill = shader->GetCS("CS_FloodFill"); + const int32 floodFillIterations = Math::Max(_resolution.MaxValue() / 2 + 1, 8); + for (int32 floodFill = 0; floodFill < floodFillIterations; floodFill++) + { + context->ResetUA(); + context->BindUA(0, _sdfDst->View()); + context->BindSR(0, _sdfSrc->View()); + context->Dispatch(csFloodFill, threadGroups.X, threadGroups.Y, threadGroups.Z); + Swap(_sdfSrc, _sdfDst); + } + } + + // Encode SDF values into output storage + context->ResetUA(); + context->BindSR(0, _sdfSrc->View()); + // TODO: update GPU SDF texture within this task to skip additional CPU->GPU copy + auto sdfTextureDesc = GPUTextureDescription::New3D(_resolution.X, _resolution.Y, _resolution.Z, PixelFormat::R16_UNorm, GPUTextureFlags::UnorderedAccess | GPUTextureFlags::RenderTarget); + // TODO: use transient texture (single frame) + auto sdfTexture = GPUTexture::New(); + sdfTexture->Init(sdfTextureDesc); + context->BindUA(1, sdfTexture->ViewVolume()); + context->Dispatch(shader->GetCS("CS_Encode"), threadGroups.X, threadGroups.Y, threadGroups.Z); + + // Copy result data into readback buffer + if (_sdfResult) + { + sdfTextureDesc = sdfTextureDesc.ToStagingReadback(); + _sdfResult->Init(sdfTextureDesc); + context->CopyTexture(_sdfResult, 0, 0, 0, 0, sdfTexture, 0); + } + + SAFE_DELETE_GPU_RESOURCE(sdfTexture); + + return Result::Ok; + } + + void OnSync() override + { + GPUTask::OnSync(); + _signal->NotifyOne(); + } + + void OnFail() override + { + GPUTask::OnFail(); + _signal->NotifyOne(); + } + + void OnCancel() override + { + GPUTask::OnCancel(); + _signal->NotifyOne(); + } +}; + +bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float resolutionScale, int32 lodIndex, ModelBase::SDFData* outputSDF, MemoryWriteStream* outputStream, const StringView& assetName, float backfacesThreshold, bool useGPU) { PROFILE_CPU(); auto startTime = Platform::GetTimeSeconds(); @@ -127,7 +386,7 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float *(uint8*)ptr = (uint8)v; }; } - GPUTextureDescription textureDesc = GPUTextureDescription::New3D(resolution.X, resolution.Y, resolution.Z, format, GPUTextureFlags::ShaderResource, mipCount); + auto textureDesc = GPUTextureDescription::New3D(resolution.X, resolution.Y, resolution.Z, format, GPUTextureFlags::ShaderResource, mipCount); if (outputSDF) { *outputSDF = sdf; @@ -143,19 +402,10 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float #endif } - // TODO: support GPU to generate model SDF on-the-fly (if called during rendering) - - // Setup acceleration structure for fast ray tracing the mesh triangles - MeshAccelerationStructure scene; - if (inputModel) - scene.Add(inputModel, lodIndex); - else if (modelData) - scene.Add(modelData, lodIndex); - scene.BuildBVH(); - // Allocate memory for the distant field const int32 voxelsSize = resolution.X * resolution.Y * resolution.Z * formatStride; - void* voxels = Allocator::Allocate(voxelsSize); + BytesContainer voxels; + voxels.Allocate(voxelsSize); Float3 xyzToLocalMul = uvwToLocalMul / Float3(resolution - 1); Float3 xyzToLocalAdd = uvwToLocalAdd; const Float2 encodeMAD(0.5f / sdf.MaxDistance * formatMaxValue, 0.5f * formatMaxValue); @@ -163,74 +413,125 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float int32 voxelSizeSum = voxelsSize; // TODO: use optimized sparse storage for SDF data as hierarchical bricks as in papers below: + // https://gpuopen.com/gdc-presentations/2023/GDC-2023-Sparse-Distance-Fields-For-Games.pdf + https://www.youtube.com/watch?v=iY15xhuuHPQ&ab_channel=AMD // https://graphics.pixar.com/library/IrradianceAtlas/paper.pdf // http://maverick.inria.fr/Membres/Cyril.Crassin/thesis/CCrassinThesis_EN_Web.pdf // http://ramakarl.com/pdfs/2016_Hoetzlein_GVDB.pdf // https://www.cse.chalmers.se/~uffe/HighResolutionSparseVoxelDAGs.pdf - // Brute-force for each voxel to calculate distance to the closest triangle with point query and distance sign by raycasting around the voxel - constexpr int32 sampleCount = 12; - Float3 sampleDirections[sampleCount]; + // Check if run SDF generation on a GPU via Compute Shader or on a Job System + useGPU &= GPUDevice::Instance + && GPUDevice::Instance->GetState() == GPUDevice::DeviceState::Ready + && GPUDevice::Instance->Limits.HasCompute + && format == PixelFormat::R16_UNorm + && !IsInMainThread() // TODO: support GPU to generate model SDF on-the-fly directly into virtual model (if called during rendering) + && resolution.MaxValue() > 8; + if (useGPU) { - RandomStream rand; - sampleDirections[0] = Float3::Up; - sampleDirections[1] = Float3::Down; - sampleDirections[2] = Float3::Left; - sampleDirections[3] = Float3::Right; - sampleDirections[4] = Float3::Forward; - sampleDirections[5] = Float3::Backward; - for (int32 i = 6; i < sampleCount; i++) - sampleDirections[i] = rand.GetUnitVector(); - } - Function sdfJob = [&sdf, &resolution, &backfacesThreshold, sampleDirections, &sampleCount, &scene, &voxels, &xyzToLocalMul, &xyzToLocalAdd, &encodeMAD, &formatStride, &formatWrite](int32 z) - { - PROFILE_CPU_NAMED("Model SDF Job"); - Real hitDistance; - Vector3 hitNormal, hitPoint; - Triangle hitTriangle; - const int32 zAddress = resolution.Y * resolution.X * z; - for (int32 y = 0; y < resolution.Y; y++) + PROFILE_CPU_NAMED("GPU"); + + // TODO: skip using sdfResult and downloading SDF from GPU when updating virtual model + auto sdfResult = GPUTexture::New(); + + // Run SDF generation via GPU async task + ConditionVariable signal; + CriticalSection mutex; + Task* task = New(signal, inputModel, modelData, lodIndex, resolution, &sdf, sdfResult, xyzToLocalMul, xyzToLocalAdd); + task->Start(); + mutex.Lock(); + signal.Wait(mutex); + mutex.Unlock(); + bool failed = task->IsFailed(); + + // Gather result data from GPU to CPU + if (!failed && sdfResult) { - const int32 yAddress = resolution.X * y + zAddress; - for (int32 x = 0; x < resolution.X; x++) - { - Real minDistance = sdf.MaxDistance; - Vector3 voxelPos = Float3((float)x, (float)y, (float)z) * xyzToLocalMul + xyzToLocalAdd; - - // Point query to find the distance to the closest surface - scene.PointQuery(voxelPos, minDistance, hitPoint, hitTriangle); - - // Raycast samples around voxel to count triangle backfaces hit - int32 hitBackCount = 0, hitCount = 0; - for (int32 sample = 0; sample < sampleCount; sample++) - { - Ray sampleRay(voxelPos, sampleDirections[sample]); - sampleRay.Position -= sampleRay.Direction * 0.0001f; // Apply small margin - if (scene.RayCast(sampleRay, hitDistance, hitNormal, hitTriangle)) - { - if (hitDistance < minDistance) - minDistance = hitDistance; - hitCount++; - const bool backHit = Float3::Dot(sampleRay.Direction, hitTriangle.GetNormal()) > 0; - if (backHit) - hitBackCount++; - } - } - - float distance = (float)minDistance; - // TODO: surface thickness threshold? shift reduce distance for all voxels by something like 0.01 to enlarge thin geometry - // if ((float)hitBackCount > (float)hitCount * 0.3f && hitCount != 0) - if ((float)hitBackCount > (float)sampleCount * backfacesThreshold && hitCount != 0) - { - // Voxel is inside the geometry so turn it into negative distance to the surface - distance *= -1; - } - const int32 xAddress = x + yAddress; - formatWrite((byte*)voxels + xAddress * formatStride, distance * encodeMAD.X + encodeMAD.Y); - } + TextureMipData mipData; + const uint32 rowPitch = resolution.X * formatStride; + failed = sdfResult->GetData(0, 0, mipData, rowPitch); + failed |= voxels.Length() != mipData.Data.Length(); + if (!failed) + voxels = mipData.Data; } - }; - JobSystem::Execute(sdfJob, resolution.Z); + + SAFE_DELETE_GPU_RESOURCE(sdfResult); + if (failed) + return true; + } + else + { + // Setup acceleration structure for fast ray tracing the mesh triangles + MeshAccelerationStructure scene; + if (inputModel) + scene.Add(inputModel, lodIndex); + else if (modelData) + scene.Add(modelData, lodIndex); + scene.BuildBVH(); + + // Brute-force for each voxel to calculate distance to the closest triangle with point query and distance sign by raycasting around the voxel + constexpr int32 sampleCount = 12; + Float3 sampleDirections[sampleCount]; + { + RandomStream rand; + sampleDirections[0] = Float3::Up; + sampleDirections[1] = Float3::Down; + sampleDirections[2] = Float3::Left; + sampleDirections[3] = Float3::Right; + sampleDirections[4] = Float3::Forward; + sampleDirections[5] = Float3::Backward; + for (int32 i = 6; i < sampleCount; i++) + sampleDirections[i] = rand.GetUnitVector(); + } + Function sdfJob = [&sdf, &resolution, &backfacesThreshold, sampleDirections, &sampleCount, &scene, &voxels, &xyzToLocalMul, &xyzToLocalAdd, &encodeMAD, &formatStride, &formatWrite](int32 z) + { + PROFILE_CPU_NAMED("Model SDF Job"); + Real hitDistance; + Vector3 hitNormal, hitPoint; + Triangle hitTriangle; + const int32 zAddress = resolution.Y * resolution.X * z; + for (int32 y = 0; y < resolution.Y; y++) + { + const int32 yAddress = resolution.X * y + zAddress; + for (int32 x = 0; x < resolution.X; x++) + { + Real minDistance = sdf.MaxDistance; + Vector3 voxelPos = Float3((float)x, (float)y, (float)z) * xyzToLocalMul + xyzToLocalAdd; + + // Point query to find the distance to the closest surface + scene.PointQuery(voxelPos, minDistance, hitPoint, hitTriangle); + + // Raycast samples around voxel to count triangle backfaces hit + int32 hitBackCount = 0, hitCount = 0; + for (int32 sample = 0; sample < sampleCount; sample++) + { + Ray sampleRay(voxelPos, sampleDirections[sample]); + sampleRay.Position -= sampleRay.Direction * 0.0001f; // Apply small margin + if (scene.RayCast(sampleRay, hitDistance, hitNormal, hitTriangle)) + { + if (hitDistance < minDistance) + minDistance = hitDistance; + hitCount++; + const bool backHit = Float3::Dot(sampleRay.Direction, hitTriangle.GetNormal()) > 0; + if (backHit) + hitBackCount++; + } + } + + float distance = (float)minDistance; + // TODO: surface thickness threshold? shift reduce distance for all voxels by something like 0.01 to enlarge thin geometry + // if ((float)hitBackCount > (float)hitCount * 0.3f && hitCount != 0) + if ((float)hitBackCount > (float)sampleCount * backfacesThreshold && hitCount != 0) + { + // Voxel is inside the geometry so turn it into negative distance to the surface + distance *= -1; + } + const int32 xAddress = x + yAddress; + formatWrite(voxels.Get() + xAddress * formatStride, distance * encodeMAD.X + encodeMAD.Y); + } + } + }; + JobSystem::Execute(sdfJob, resolution.Z); + } // Cache SDF data on a CPU if (outputStream) @@ -240,20 +541,19 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float outputStream->WriteBytes(&data, sizeof(data)); ModelSDFMip mipData(0, resolution.X * formatStride, voxelsSize); outputStream->WriteBytes(&mipData, sizeof(mipData)); - outputStream->WriteBytes(voxels, voxelsSize); + outputStream->WriteBytes(voxels.Get(), voxelsSize); } // Upload data to the GPU if (outputSDF) { - BytesContainer data; - data.Link((byte*)voxels, voxelsSize); - auto task = outputSDF->Texture->UploadMipMapAsync(data, 0, resolution.X * formatStride, voxelsSize, true); + auto task = outputSDF->Texture->UploadMipMapAsync(voxels, 0, resolution.X * formatStride, voxelsSize, true); if (task) task->Start(); } // Generate mip maps + void* voxelsMipSrc = voxels.Get(); void* voxelsMip = nullptr; for (int32 mipLevel = 1; mipLevel < mipCount; mipLevel++) { @@ -263,7 +563,7 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float voxelsMip = Allocator::Allocate(voxelsMipSize); // Downscale mip - Function mipJob = [&voxelsMip, &voxels, &resolution, &resolutionMip, &encodeMAD, &decodeMAD, &formatStride, &formatRead, &formatWrite](int32 z) + Function mipJob = [&voxelsMip, &voxelsMipSrc, &resolution, &resolutionMip, &encodeMAD, &decodeMAD, &formatStride, &formatRead, &formatWrite](int32 z) { PROFILE_CPU_NAMED("Model SDF Mip Job"); const int32 zAddress = resolutionMip.Y * resolutionMip.X * z; @@ -284,7 +584,7 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float for (int32 dx = 0; dx < 2; dx++) { const int32 dxAddress = (x * 2 + dx) + dyAddress; - const float d = formatRead((byte*)voxels + dxAddress * formatStride) * decodeMAD.X + decodeMAD.Y; + const float d = formatRead((byte*)voxelsMipSrc + dxAddress * formatStride) * decodeMAD.X + decodeMAD.Y; distance += d; } } @@ -318,12 +618,11 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float // Go down voxelSizeSum += voxelsSize; - Swap(voxelsMip, voxels); + Swap(voxelsMip, voxelsMipSrc); resolution = resolutionMip; } Allocator::Free(voxelsMip); - Allocator::Free(voxels); #if !BUILD_RELEASE auto endTime = Platform::GetTimeSeconds(); diff --git a/Source/Engine/Tools/ModelTool/ModelTool.h b/Source/Engine/Tools/ModelTool/ModelTool.h index ed1736214..708b94342 100644 --- a/Source/Engine/Tools/ModelTool/ModelTool.h +++ b/Source/Engine/Tools/ModelTool/ModelTool.h @@ -98,7 +98,7 @@ API_CLASS(Namespace="FlaxEngine.Tools", Static) class FLAXENGINE_API ModelTool // Optional: inputModel or modelData // Optional: outputSDF or null, outputStream or null - static bool GenerateModelSDF(class Model* inputModel, class ModelData* modelData, float resolutionScale, int32 lodIndex, ModelBase::SDFData* outputSDF, class MemoryWriteStream* outputStream, const StringView& assetName, float backfacesThreshold = 0.6f); + static bool GenerateModelSDF(class Model* inputModel, class ModelData* modelData, float resolutionScale, int32 lodIndex, ModelBase::SDFData* outputSDF, class MemoryWriteStream* outputStream, const StringView& assetName, float backfacesThreshold = 0.6f, bool useGPU = true); #if USE_EDITOR diff --git a/Source/Shaders/SDF.shader b/Source/Shaders/SDF.shader new file mode 100644 index 000000000..a19e6d346 --- /dev/null +++ b/Source/Shaders/SDF.shader @@ -0,0 +1,272 @@ +// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. + +// Mesh SDF generation based on https://github.com/GPUOpen-Effects/TressFX + +#include "./Flax/Common.hlsl" +#include "./Flax/ThirdParty/TressFX/TressFXSDF.hlsl" + +#define THREAD_GROUP_SIZE 64 + +META_CB_BEGIN(0, Data) +int3 Resolution; +uint ResolutionSize; +float MaxDistance; +uint VertexStride; +bool Index16bit; +uint TriangleCount; +float3 VoxelToPosMul; +float WorldUnitsPerVoxel; +float3 VoxelToPosAdd; +uint ThreadGroupsX; +META_CB_END + +RWBuffer SDF : register(u0); + +uint GetVoxelIndex(uint3 groupId, uint groupIndex) +{ + return groupIndex + (groupId.x + groupId.y * ThreadGroupsX) * THREAD_GROUP_SIZE; +} + +int3 ClampVoxelCoord(int3 coord) +{ + return clamp(coord, 0, Resolution - 1); +} + +int GetVoxelIndex(int3 coord) +{ + return Resolution.x * Resolution.y * coord.z + Resolution.x * coord.y + coord.x; +} + +float3 GetVoxelPos(int3 coord) +{ + return float3((float)coord.x, (float)coord.y, (float)coord.z) * VoxelToPosMul + VoxelToPosAdd; +} + +int3 GetVoxelCoord(float3 pos) +{ + pos = (pos - VoxelToPosAdd) / VoxelToPosMul; + return int3((int)pos.x, (int)pos.y, (int)pos.z); +} + +int3 GetVoxelCoord(uint index) +{ + uint sizeX = (uint)Resolution.x; + uint sizeY = (uint)(Resolution.x * Resolution.y); + uint coordZ = index / sizeY; + uint coordXY = index % sizeY; + uint coordY = coordXY / sizeX; + uint coordX = coordXY % sizeX; + return int3((int)coordX, (int)coordY, (int)coordZ); +} + +// Clears SDF texture with the initial distance. +META_CS(true, FEATURE_LEVEL_SM5) +[numthreads(THREAD_GROUP_SIZE, 1, 1)] +void CS_Init(uint3 GroupId : SV_GroupID, uint GroupIndex : SV_GroupIndex) +{ + uint voxelIndex = GetVoxelIndex(GroupId, GroupIndex); + if (voxelIndex >= ResolutionSize) + return; + float distance = MaxDistance * 10.0f; // Start with a very large value + SDF[voxelIndex] = FloatFlip3(distance); +} + +// Unpacks SDF texture into distances stores as normal float value (FloatFlip3 is used for interlocked operations on uint). +META_CS(true, FEATURE_LEVEL_SM5) +[numthreads(THREAD_GROUP_SIZE, 1, 1)] +void CS_Resolve(uint3 GroupId : SV_GroupID, uint GroupIndex : SV_GroupIndex) +{ + uint voxelIndex = GetVoxelIndex(GroupId, GroupIndex); + if (voxelIndex >= ResolutionSize) + return; + SDF[voxelIndex] = IFloatFlip3(SDF[voxelIndex]); +} + +#ifdef _CS_RasterizeTriangle + +ByteAddressBuffer VertexBuffer : register(t0); +ByteAddressBuffer IndexBuffer : register(t1); + +uint LoadIndex(uint i) +{ + if (Index16bit) + { + uint index = IndexBuffer.Load((i >> 1u) << 2u); + index = (i & 1u) == 1u ? (index >> 16) : index; + return index & 0xffff; + } + return IndexBuffer.Load(i << 2u); +} + +float3 LoadVertex(uint i) +{ + return asfloat(VertexBuffer.Load3(i * VertexStride)); +} + +// Renders triangle mesh into the SDF texture by writing minimum distance to the triangle into all intersecting voxels. +META_CS(true, FEATURE_LEVEL_SM5) +[numthreads(THREAD_GROUP_SIZE, 1, 1)] +void CS_RasterizeTriangle(uint3 DispatchThreadId : SV_DispatchThreadID) +{ + uint triangleIndex = DispatchThreadId.x; + if (triangleIndex >= TriangleCount) + return; + + // Load triangle + triangleIndex *= 3; + uint i0 = LoadIndex(triangleIndex + 0); + uint i1 = LoadIndex(triangleIndex + 1); + uint i2 = LoadIndex(triangleIndex + 2); + float3 v0 = LoadVertex(i0); + float3 v1 = LoadVertex(i1); + float3 v2 = LoadVertex(i2); + + // Project triangle into SDF voxels + float3 vMargin = float3(WorldUnitsPerVoxel, WorldUnitsPerVoxel, WorldUnitsPerVoxel); + float3 vMin = min(min(v0, v1), v2) - vMargin; + float3 vMax = max(max(v0, v1), v2) + vMargin; + int3 voxelMargin = int3(1, 1, 1); + int3 voxelMin = GetVoxelCoord(vMin) - voxelMargin; + int3 voxelMax = GetVoxelCoord(vMax) + voxelMargin; + voxelMin = ClampVoxelCoord(voxelMin); + voxelMax = ClampVoxelCoord(voxelMax); + + // Rasterize into SDF voxels + for (int z = voxelMin.z; z <= voxelMax.z; z++) + { + for (int y = voxelMin.y; y <= voxelMax.y; y++) + { + for (int x = voxelMin.x; x <= voxelMax.x; x++) + { + int3 voxelCoord = int3(x, y, z); + int voxelIndex = GetVoxelIndex(voxelCoord); + float3 voxelPos = GetVoxelPos(voxelCoord); + float distance = SignedDistancePointToTriangle(voxelPos, v0, v1, v2); + InterlockedMin(SDF[voxelIndex], FloatFlip3(distance)); + } + } + } +} + +#endif + +#if defined(_CS_FloodFill) || defined(_CS_Encode) + +Buffer InSDF : register(t0); + +float GetVoxel(int voxelIndex) +{ + return asfloat(InSDF[voxelIndex]); +} + +float GetVoxel(int3 coord) +{ + coord = ClampVoxelCoord(coord); + int voxelIndex = GetVoxelIndex(coord); + return GetVoxel(voxelIndex); +} + +float CombineSDF(float sdf, int3 nearbyCoord, float nearbyDistance) +{ + // Sample nearby voxel + float sdfNearby = GetVoxel(nearbyCoord); + + // Include distance to that nearby voxel + if (sdfNearby < 0.0f) + nearbyDistance *= -1; + sdfNearby += nearbyDistance; + + if (sdfNearby > MaxDistance) + { + // Ignore if nearby sample is invalid (see CS_Init) + } + else if (sdf > MaxDistance) + { + // Use nearby sample if current one is invalid (see CS_Init) + sdf = sdfNearby; + } + else + { + // Use distance closer to 0 + sdf = sdf >= 0 ? min(sdf, sdfNearby) : max(sdf, sdfNearby); + } + + return sdf; +} + +// Fills the voxels with minimum distances to the triangles. +META_CS(true, FEATURE_LEVEL_SM5) +[numthreads(THREAD_GROUP_SIZE, 1, 1)] +void CS_FloodFill(uint3 GroupId : SV_GroupID, uint GroupIndex : SV_GroupIndex) +{ + uint voxelIndex = GetVoxelIndex(GroupId, GroupIndex); + if (voxelIndex >= ResolutionSize) + return; + float sdf = GetVoxel(voxelIndex); + + // Skip if the distance is already so small that we know that triangle is nearby + if (abs(sdf) > WorldUnitsPerVoxel * 1.2f) + { + int3 voxelCoord = GetVoxelCoord(voxelIndex); + int3 offset = int3(-1, 0, 1); + + // Sample nearby voxels + float nearbyDistance = WorldUnitsPerVoxel; + sdf = CombineSDF(sdf, voxelCoord + offset.zyy, nearbyDistance); + sdf = CombineSDF(sdf, voxelCoord + offset.yzy, nearbyDistance); + sdf = CombineSDF(sdf, voxelCoord + offset.yyz, nearbyDistance); + sdf = CombineSDF(sdf, voxelCoord + offset.xyy, nearbyDistance); + sdf = CombineSDF(sdf, voxelCoord + offset.yxy, nearbyDistance); + sdf = CombineSDF(sdf, voxelCoord + offset.yyx, nearbyDistance); +#if 0 + nearbyDistance = WorldUnitsPerVoxel * 1.41421f; + sdf = CombineSDF(sdf, voxelCoord + offset.xxy, nearbyDistance); + sdf = CombineSDF(sdf, voxelCoord + offset.xzy, nearbyDistance); + sdf = CombineSDF(sdf, voxelCoord + offset.zzy, nearbyDistance); + sdf = CombineSDF(sdf, voxelCoord + offset.zxy, nearbyDistance); + sdf = CombineSDF(sdf, voxelCoord + offset.xyx, nearbyDistance); + sdf = CombineSDF(sdf, voxelCoord + offset.xyz, nearbyDistance); + sdf = CombineSDF(sdf, voxelCoord + offset.zyz, nearbyDistance); + sdf = CombineSDF(sdf, voxelCoord + offset.zyx, nearbyDistance); + sdf = CombineSDF(sdf, voxelCoord + offset.yxx, nearbyDistance); + sdf = CombineSDF(sdf, voxelCoord + offset.yxz, nearbyDistance); + sdf = CombineSDF(sdf, voxelCoord + offset.yzz, nearbyDistance); + sdf = CombineSDF(sdf, voxelCoord + offset.yzx, nearbyDistance); +#endif +#if 0 + nearbyDistance = WorldUnitsPerVoxel * 1.73205f; + sdf = CombineSDF(sdf, voxelCoord + offset.xxx, nearbyDistance); + sdf = CombineSDF(sdf, voxelCoord + offset.xxz, nearbyDistance); + sdf = CombineSDF(sdf, voxelCoord + offset.xzx, nearbyDistance); + sdf = CombineSDF(sdf, voxelCoord + offset.xzz, nearbyDistance); + sdf = CombineSDF(sdf, voxelCoord + offset.zxx, nearbyDistance); + sdf = CombineSDF(sdf, voxelCoord + offset.zxz, nearbyDistance); + sdf = CombineSDF(sdf, voxelCoord + offset.zzx, nearbyDistance); + sdf = CombineSDF(sdf, voxelCoord + offset.zzz, nearbyDistance); +#endif + } + + SDF[voxelIndex] = asuint(sdf); +} + +RWTexture3D SDFtex : register(u1); + +// Encodes SDF values into the packed format with normalized distances. +META_CS(true, FEATURE_LEVEL_SM5) +[numthreads(THREAD_GROUP_SIZE, 1, 1)] +void CS_Encode(uint3 GroupId : SV_GroupID, uint GroupIndex : SV_GroupIndex) +{ + uint voxelIndex = GetVoxelIndex(GroupId, GroupIndex); + if (voxelIndex >= ResolutionSize) + return; + float sdf = GetVoxel(voxelIndex); + sdf = min(sdf, MaxDistance); + + // Pack from range [-MaxDistance; +MaxDistance] to [0; 1] + sdf = (sdf / MaxDistance) * 0.5f + 0.5f; + + int3 voxelCoord = GetVoxelCoord(voxelIndex); + SDFtex[voxelCoord] = sdf; +} + +#endif diff --git a/Source/Shaders/ThirdParty/TressFX/TressFXSDF.hlsl b/Source/Shaders/ThirdParty/TressFX/TressFXSDF.hlsl new file mode 100644 index 000000000..13c28f16d --- /dev/null +++ b/Source/Shaders/ThirdParty/TressFX/TressFXSDF.hlsl @@ -0,0 +1,121 @@ +// Source: https://github.com/GPUOpen-Effects/TressFX +// License: MIT + +// +// Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +//When building the SDF we want to find the lowest distance at each SDF cell. In order to allow multiple threads to write to the same +//cells, it is necessary to use atomics. However, there is no support for atomics with 32-bit floats so we convert the float into unsigned int +//and use atomic_min() / InterlockedMin() as a workaround. +// +//When used with atomic_min, both FloatFlip2() and FloatFlip3() will store the float with the lowest magnitude. +//The difference is that FloatFlip2() will preper negative values ( InterlockedMin( FloatFlip2(1.0), FloatFlip2(-1.0) ) == -1.0 ), +//while FloatFlip3() prefers positive values ( InterlockedMin( FloatFlip3(1.0), FloatFlip3(-1.0) ) == 1.0 ). +//Using FloatFlip3() seems to result in a SDF with higher quality compared to FloatFlip2(). +uint FloatFlip2(float fl) +{ + uint f = asuint(fl); + return (f << 1) | (f >> 31 ^ 0x00000001); //Rotate sign bit to least significant and Flip sign bit so that (0 == negative) +} +uint IFloatFlip2(uint f2) +{ + return (f2 >> 1) | (f2 << 31 ^ 0x80000000); +} +uint FloatFlip3(float fl) +{ + uint f = asuint(fl); + return (f << 1) | (f >> 31); //Rotate sign bit to least significant +} +uint IFloatFlip3(uint f2) +{ + return (f2 >> 1) | (f2 << 31); +} + +float DistancePointToEdge(float3 p, float3 x0, float3 x1, out float3 n) +{ + float3 x10 = x1 - x0; + + float t = dot(x1 - p, x10) / dot(x10, x10); + t = max(0.0f, min(t, 1.0f)); + + float3 a = p - (t*x0 + (1.0f - t)*x1); + float d = length(a); + n = a / (d + 1e-30f); + + return d; +} + +// Check if p is in the positive or negative side of triangle (x0, x1, x2) +// Positive side is where the normal vector of triangle ( (x1-x0) x (x2-x0) ) is pointing to. +float SignedDistancePointToTriangle(float3 p, float3 x0, float3 x1, float3 x2) +{ + float d = 0; + float3 x02 = x0 - x2; + float l0 = length(x02) + 1e-30f; + x02 = x02 / l0; + float3 x12 = x1 - x2; + float l1 = dot(x12, x02); + x12 = x12 - l1*x02; + float l2 = length(x12) + 1e-30f; + x12 = x12 / l2; + float3 px2 = p - x2; + + float b = dot(x12, px2) / l2; + float a = (dot(x02, px2) - l1*b) / l0; + float c = 1 - a - b; + + // normal vector of triangle. Don't need to normalize this yet. + float3 nTri = cross((x1 - x0), (x2 - x0)); + float3 n; + + float tol = 1e-8f; + + if (a >= -tol && b >= -tol && c >= -tol) + { + n = p - (a*x0 + b*x1 + c*x2); + d = length(n); + + float3 n1 = n / d; + float3 n2 = nTri / (length(nTri) + 1e-30f); // if d == 0 + + n = (d > 0) ? n1 : n2; + } + else + { + float3 n_12; + float3 n_02; + d = DistancePointToEdge(p, x0, x1, n); + + float d12 = DistancePointToEdge(p, x1, x2, n_12); + float d02 = DistancePointToEdge(p, x0, x2, n_02); + + d = min(d, d12); + d = min(d, d02); + + n = (d == d12) ? n_12 : n; + n = (d == d02) ? n_02 : n; + } + + d = (dot(p - x0, nTri) < 0.f) ? -d : d; + + return d; +} From 9258c234f2aad292a0b47b9f1186260bc2c88fc2 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 29 May 2024 14:54:06 +0200 Subject: [PATCH 098/292] Add option to SDF generation via GPU in Model Window --- Source/Editor/Windows/Assets/ModelWindow.cs | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/Source/Editor/Windows/Assets/ModelWindow.cs b/Source/Editor/Windows/Assets/ModelWindow.cs index 876736c55..4a0df7255 100644 --- a/Source/Editor/Windows/Assets/ModelWindow.cs +++ b/Source/Editor/Windows/Assets/ModelWindow.cs @@ -1,6 +1,7 @@ // Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. using System.Collections.Generic; +using System.Linq; using System.Reflection; using System.Threading.Tasks; using FlaxEditor.Content; @@ -205,12 +206,27 @@ namespace FlaxEditor.Windows.Assets resolution.ValueBox.BoxValueChanged += b => { proxy.Window._importSettings.Settings.SDFResolution = b.Value; }; proxy.Window._importSettings.Settings.SDFResolution = sdf.ResolutionScale; - var backfacesThreshold = group.FloatValue("Backfaces Threshold", "Custom threshold (in range 0-1) for adjusting mesh internals detection based on the percentage of test rays hit triangle backfaces. Use lower value for more dense mesh."); + var gpu = group.Checkbox("Bake on GPU", "If checked, SDF generation will be calculated using GPU on Compute Shader, otherwise CPU will use Job System. GPU generation is fast but result in artifacts in various meshes (eg. foliage)."); + gpu.CheckBox.Checked = proxy.Window._gpuSDF; + + var backfacesThresholdProp = group.AddPropertyItem("Backfaces Threshold", "Custom threshold (in range 0-1) for adjusting mesh internals detection based on the percentage of test rays hit triangle backfaces. Use lower value for more dense mesh."); + var backfacesThreshold = backfacesThresholdProp.FloatValue(); + var backfacesThresholdLabel = backfacesThresholdProp.Labels.Last(); backfacesThreshold.ValueBox.MinValue = 0.001f; backfacesThreshold.ValueBox.MaxValue = 1.0f; backfacesThreshold.ValueBox.Value = proxy.Window._backfacesThreshold; backfacesThreshold.ValueBox.BoxValueChanged += b => { proxy.Window._backfacesThreshold = b.Value; }; + // Toggle Backfaces Threshold visibility (CPU-only option) + gpu.CheckBox.StateChanged += c => + { + proxy.Window._gpuSDF = c.Checked; + backfacesThresholdLabel.Visible = !c.Checked; + backfacesThreshold.ValueBox.Visible = !c.Checked; + }; + backfacesThresholdLabel.Visible = !gpu.CheckBox.Checked; + backfacesThreshold.ValueBox.Visible = !gpu.CheckBox.Checked; + var lodIndex = group.IntegerValue("LOD Index", "Index of the model Level of Detail to use for SDF data building. By default uses the lowest quality LOD for fast building."); lodIndex.IntValue.MinValue = 0; lodIndex.IntValue.MaxValue = lods.Length - 1; @@ -298,7 +314,7 @@ namespace FlaxEditor.Windows.Assets proxy.Window.Enabled = false; Task.Run(() => { - bool failed = proxy.Asset.GenerateSDF(proxy.Window._importSettings.Settings.SDFResolution, _sdfModelLodIndex.Value, true, proxy.Window._backfacesThreshold); + bool failed = proxy.Asset.GenerateSDF(proxy.Window._importSettings.Settings.SDFResolution, _sdfModelLodIndex.Value, true, proxy.Window._backfacesThreshold, proxy.Window._gpuSDF); FlaxEngine.Scripting.InvokeOnUpdate(() => { proxy.Window.Enabled = true; @@ -789,6 +805,7 @@ namespace FlaxEditor.Windows.Assets private MeshDataCache _meshData; private ModelImportSettings _importSettings = new ModelImportSettings(); private float _backfacesThreshold = 0.6f; + private bool _gpuSDF = true; private ToolStripButton _showCurrentLODButton; /// From 33d1c8c68c725498f39be52fa404887e684ee050 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 29 May 2024 14:54:28 +0200 Subject: [PATCH 099/292] Add async SDF generation for all meshes in the scene --- Source/Editor/Editor.cs | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/Source/Editor/Editor.cs b/Source/Editor/Editor.cs index d0076068e..2a768099d 100644 --- a/Source/Editor/Editor.cs +++ b/Source/Editor/Editor.cs @@ -7,6 +7,7 @@ using System.Linq; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.InteropServices.Marshalling; +using System.Threading.Tasks; using FlaxEditor.Content; using FlaxEditor.Content.Settings; using FlaxEditor.Content.Thumbnails; @@ -1338,20 +1339,33 @@ namespace FlaxEditor /// public void BuildAllMeshesSDF() { - // TODO: async maybe with progress reporting? + var models = new List(); Scene.ExecuteOnGraph(node => { if (node is StaticModelNode staticModelNode && staticModelNode.Actor is StaticModel staticModel) { - if (staticModel.DrawModes.HasFlag(DrawPass.GlobalSDF) && staticModel.Model != null && !staticModel.Model.IsVirtual && staticModel.Model.SDF.Texture == null) + var model = staticModel.Model; + if (staticModel.DrawModes.HasFlag(DrawPass.GlobalSDF) && + model != null && + !models.Contains(model) && + !model.IsVirtual && + model.SDF.Texture == null) { - Log("Generating SDF for " + staticModel.Model); - if (!staticModel.Model.GenerateSDF()) - staticModel.Model.Save(); + models.Add(model); } } return true; }); + Task.Run(() => + { + for (int i = 0; i < models.Count; i++) + { + var model = models[i]; + Log($"[{i}/{models.Count}] Generating SDF for {model}"); + if (!model.GenerateSDF()) + model.Save(); + } + }); } #endregion From 80fbe048bfe234e150117f4a4798b4b30a30a946 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 29 May 2024 14:55:30 +0200 Subject: [PATCH 100/292] Update `VulkanMemoryAllocator` to version`3.1.0` --- .../VulkanMemoryAllocator/LICENSE.txt | 2 +- .../VulkanMemoryAllocator/vk_mem_alloc.h | 26122 ++++++++-------- .../VulkanMemoryAllocator/vk_mem_alloc.natvis | 101 +- 3 files changed, 12814 insertions(+), 13411 deletions(-) diff --git a/Source/ThirdParty/VulkanMemoryAllocator/LICENSE.txt b/Source/ThirdParty/VulkanMemoryAllocator/LICENSE.txt index 71e824f80..b9fff388f 100644 --- a/Source/ThirdParty/VulkanMemoryAllocator/LICENSE.txt +++ b/Source/ThirdParty/VulkanMemoryAllocator/LICENSE.txt @@ -1,4 +1,4 @@ -Copyright (c) 2017-2021 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/Source/ThirdParty/VulkanMemoryAllocator/vk_mem_alloc.h b/Source/ThirdParty/VulkanMemoryAllocator/vk_mem_alloc.h index a410c63e4..2307325d4 100644 --- a/Source/ThirdParty/VulkanMemoryAllocator/vk_mem_alloc.h +++ b/Source/ThirdParty/VulkanMemoryAllocator/vk_mem_alloc.h @@ -1,5 +1,5 @@ // -// Copyright (c) 2017-2021 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -25,14 +25,17 @@ /** \mainpage Vulkan Memory Allocator -Version 3.0.0-development (2021-02-16) +Version 3.1.0 -Copyright (c) 2017-2021 Advanced Micro Devices, Inc. All rights reserved. \n -License: MIT +Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. \n +License: MIT \n +See also: [product page on GPUOpen](https://gpuopen.com/gaming-product/vulkan-memory-allocator/), +[repository on GitHub](https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator) -Documentation of all members: vk_mem_alloc.h -\section main_table_of_contents Table of contents +API documentation divided into groups: [Topics](topics.html) + +General documentation chapters: - User guide - \subpage quick_start @@ -46,1993 +49,96 @@ Documentation of all members: vk_mem_alloc.h - [Custom memory pools](@ref choosing_memory_type_custom_memory_pools) - [Dedicated allocations](@ref choosing_memory_type_dedicated_allocations) - \subpage memory_mapping + - [Copy functions](@ref memory_mapping_copy_functions) - [Mapping functions](@ref memory_mapping_mapping_functions) - [Persistently mapped memory](@ref memory_mapping_persistently_mapped_memory) - [Cache flush and invalidate](@ref memory_mapping_cache_control) - - [Finding out if memory is mappable](@ref memory_mapping_finding_if_memory_mappable) - \subpage staying_within_budget - [Querying for budget](@ref staying_within_budget_querying_for_budget) - [Controlling memory usage](@ref staying_within_budget_controlling_memory_usage) - \subpage resource_aliasing - \subpage custom_memory_pools - [Choosing memory type index](@ref custom_memory_pools_MemTypeIndex) + - [When not to use custom pools](@ref custom_memory_pools_when_not_use) - [Linear allocation algorithm](@ref linear_algorithm) - [Free-at-once](@ref linear_algorithm_free_at_once) - [Stack](@ref linear_algorithm_stack) - [Double stack](@ref linear_algorithm_double_stack) - [Ring buffer](@ref linear_algorithm_ring_buffer) - - [Buddy allocation algorithm](@ref buddy_algorithm) - \subpage defragmentation - - [Defragmenting CPU memory](@ref defragmentation_cpu) - - [Defragmenting GPU memory](@ref defragmentation_gpu) - - [Additional notes](@ref defragmentation_additional_notes) - - [Writing custom allocation algorithm](@ref defragmentation_custom_algorithm) - - \subpage lost_allocations - \subpage statistics - [Numeric statistics](@ref statistics_numeric_statistics) - [JSON dump](@ref statistics_json_dump) - \subpage allocation_annotation - [Allocation user data](@ref allocation_user_data) - [Allocation names](@ref allocation_names) + - \subpage virtual_allocator - \subpage debugging_memory_usage - [Memory initialization](@ref debugging_memory_usage_initialization) - [Margins](@ref debugging_memory_usage_margins) - [Corruption detection](@ref debugging_memory_usage_corruption_detection) - - \subpage record_and_replay + - [Leak detection features](@ref debugging_memory_usage_leak_detection) + - \subpage other_api_interop - \subpage usage_patterns - - [Common mistakes](@ref usage_patterns_common_mistakes) - - [Simple patterns](@ref usage_patterns_simple) - - [Advanced patterns](@ref usage_patterns_advanced) + - [GPU-only resource](@ref usage_patterns_gpu_only) + - [Staging copy for upload](@ref usage_patterns_staging_copy_upload) + - [Readback](@ref usage_patterns_readback) + - [Advanced data uploading](@ref usage_patterns_advanced_data_uploading) + - [Other use cases](@ref usage_patterns_other_use_cases) - \subpage configuration - [Pointers to Vulkan functions](@ref config_Vulkan_functions) - [Custom host memory allocator](@ref custom_memory_allocator) - [Device memory allocation callbacks](@ref allocation_callbacks) - [Device heap memory limit](@ref heap_memory_limit) - - \subpage vk_khr_dedicated_allocation - - \subpage enabling_buffer_device_address - - \subpage vk_amd_device_coherent_memory +- Extension support + - \subpage vk_khr_dedicated_allocation + - \subpage enabling_buffer_device_address + - \subpage vk_ext_memory_priority + - \subpage vk_amd_device_coherent_memory - \subpage general_considerations - [Thread safety](@ref general_considerations_thread_safety) + - [Versioning and compatibility](@ref general_considerations_versioning_and_compatibility) - [Validation layer warnings](@ref general_considerations_validation_layer_warnings) - [Allocation algorithm](@ref general_considerations_allocation_algorithm) - [Features not supported](@ref general_considerations_features_not_supported) -\section main_see_also See also +\defgroup group_init Library initialization -- [Product page on GPUOpen](https://gpuopen.com/gaming-product/vulkan-memory-allocator/) -- [Source repository on GitHub](https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator) +\brief API elements related to the initialization and management of the entire library, especially #VmaAllocator object. +\defgroup group_alloc Memory allocation +\brief API elements related to the allocation, deallocation, and management of Vulkan memory, buffers, images. +Most basic ones being: vmaCreateBuffer(), vmaCreateImage(). +\defgroup group_virtual Virtual allocator -\page quick_start Quick start +\brief API elements related to the mechanism of \ref virtual_allocator - using the core allocation algorithm +for user-defined purpose without allocating any real GPU memory. -\section quick_start_project_setup Project setup - -Vulkan Memory Allocator comes in form of a "stb-style" single header file. -You don't need to build it as a separate library project. -You can add this file directly to your project and submit it to code repository next to your other source files. - -"Single header" doesn't mean that everything is contained in C/C++ declarations, -like it tends to be in case of inline functions or C++ templates. -It means that implementation is bundled with interface in a single file and needs to be extracted using preprocessor macro. -If you don't do it properly, you will get linker errors. - -To do it properly: - --# Include "vk_mem_alloc.h" file in each CPP file where you want to use the library. - This includes declarations of all members of the library. --# In exacly one CPP file define following macro before this include. - It enables also internal definitions. - -\code -#define VMA_IMPLEMENTATION -#include "vk_mem_alloc.h" -\endcode - -It may be a good idea to create dedicated CPP file just for this purpose. - -Note on language: This library is written in C++, but has C-compatible interface. -Thus you can include and use vk_mem_alloc.h in C or C++ code, but full -implementation with `VMA_IMPLEMENTATION` macro must be compiled as C++, NOT as C. - -Please note that this library includes header ``, which in turn -includes `` on Windows. If you need some specific macros defined -before including these headers (like `WIN32_LEAN_AND_MEAN` or -`WINVER` for Windows, `VK_USE_PLATFORM_WIN32_KHR` for Vulkan), you must define -them before every `#include` of this library. - -You may need to configure the way you import Vulkan functions. - -- By default, VMA assumes you you link statically with Vulkan API. If this is not the case, - `#define VMA_STATIC_VULKAN_FUNCTIONS 0` before `#include` of the VMA implementation and use another way. -- You can `#define VMA_DYNAMIC_VULKAN_FUNCTIONS 1` and make sure `vkGetInstanceProcAddr` and `vkGetDeviceProcAddr` globals are defined. - All the remaining Vulkan functions will be fetched automatically. -- Finally, you can provide your own pointers to all Vulkan functions needed by VMA using structure member - VmaAllocatorCreateInfo::pVulkanFunctions, if you fetched them in some custom way e.g. using some loader like [Volk](https://github.com/zeux/volk). - - -\section quick_start_initialization Initialization - -At program startup: - --# Initialize Vulkan to have `VkPhysicalDevice`, `VkDevice` and `VkInstance` object. --# Fill VmaAllocatorCreateInfo structure and create #VmaAllocator object by - calling vmaCreateAllocator(). - -\code -VmaAllocatorCreateInfo allocatorInfo = {}; -allocatorInfo.vulkanApiVersion = VK_API_VERSION_1_2; -allocatorInfo.physicalDevice = physicalDevice; -allocatorInfo.device = device; -allocatorInfo.instance = instance; - -VmaAllocator allocator; -vmaCreateAllocator(&allocatorInfo, &allocator); -\endcode - -Only members `physicalDevice`, `device`, `instance` are required. -However, you should inform the library which Vulkan version do you use by setting -VmaAllocatorCreateInfo::vulkanApiVersion and which extensions did you enable -by setting VmaAllocatorCreateInfo::flags (like #VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT for VK_KHR_buffer_device_address). -Otherwise, VMA would use only features of Vulkan 1.0 core with no extensions. - - -\section quick_start_resource_allocation Resource allocation - -When you want to create a buffer or image: - --# Fill `VkBufferCreateInfo` / `VkImageCreateInfo` structure. --# Fill VmaAllocationCreateInfo structure. --# Call vmaCreateBuffer() / vmaCreateImage() to get `VkBuffer`/`VkImage` with memory - already allocated and bound to it. - -\code -VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; -bufferInfo.size = 65536; -bufferInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; - -VmaAllocationCreateInfo allocInfo = {}; -allocInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; - -VkBuffer buffer; -VmaAllocation allocation; -vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr); -\endcode - -Don't forget to destroy your objects when no longer needed: - -\code -vmaDestroyBuffer(allocator, buffer, allocation); -vmaDestroyAllocator(allocator); -\endcode - - -\page choosing_memory_type Choosing memory type - -Physical devices in Vulkan support various combinations of memory heaps and -types. Help with choosing correct and optimal memory type for your specific -resource is one of the key features of this library. You can use it by filling -appropriate members of VmaAllocationCreateInfo structure, as described below. -You can also combine multiple methods. - --# If you just want to find memory type index that meets your requirements, you - can use function: vmaFindMemoryTypeIndex(), vmaFindMemoryTypeIndexForBufferInfo(), - vmaFindMemoryTypeIndexForImageInfo(). --# If you want to allocate a region of device memory without association with any - specific image or buffer, you can use function vmaAllocateMemory(). Usage of - this function is not recommended and usually not needed. - vmaAllocateMemoryPages() function is also provided for creating multiple allocations at once, - which may be useful for sparse binding. --# If you already have a buffer or an image created, you want to allocate memory - for it and then you will bind it yourself, you can use function - vmaAllocateMemoryForBuffer(), vmaAllocateMemoryForImage(). - For binding you should use functions: vmaBindBufferMemory(), vmaBindImageMemory() - or their extended versions: vmaBindBufferMemory2(), vmaBindImageMemory2(). --# If you want to create a buffer or an image, allocate memory for it and bind - them together, all in one call, you can use function vmaCreateBuffer(), - vmaCreateImage(). This is the easiest and recommended way to use this library. - -When using 3. or 4., the library internally queries Vulkan for memory types -supported for that buffer or image (function `vkGetBufferMemoryRequirements()`) -and uses only one of these types. - -If no memory type can be found that meets all the requirements, these functions -return `VK_ERROR_FEATURE_NOT_PRESENT`. - -You can leave VmaAllocationCreateInfo structure completely filled with zeros. -It means no requirements are specified for memory type. -It is valid, although not very useful. - -\section choosing_memory_type_usage Usage - -The easiest way to specify memory requirements is to fill member -VmaAllocationCreateInfo::usage using one of the values of enum #VmaMemoryUsage. -It defines high level, common usage types. -For more details, see description of this enum. - -For example, if you want to create a uniform buffer that will be filled using -transfer only once or infrequently and used for rendering every frame, you can -do it using following code: - -\code -VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; -bufferInfo.size = 65536; -bufferInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; - -VmaAllocationCreateInfo allocInfo = {}; -allocInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; - -VkBuffer buffer; -VmaAllocation allocation; -vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr); -\endcode - -\section choosing_memory_type_required_preferred_flags Required and preferred flags - -You can specify more detailed requirements by filling members -VmaAllocationCreateInfo::requiredFlags and VmaAllocationCreateInfo::preferredFlags -with a combination of bits from enum `VkMemoryPropertyFlags`. For example, -if you want to create a buffer that will be persistently mapped on host (so it -must be `HOST_VISIBLE`) and preferably will also be `HOST_COHERENT` and `HOST_CACHED`, -use following code: - -\code -VmaAllocationCreateInfo allocInfo = {}; -allocInfo.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; -allocInfo.preferredFlags = VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; -allocInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; - -VkBuffer buffer; -VmaAllocation allocation; -vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr); -\endcode - -A memory type is chosen that has all the required flags and as many preferred -flags set as possible. - -If you use VmaAllocationCreateInfo::usage, it is just internally converted to -a set of required and preferred flags. - -\section choosing_memory_type_explicit_memory_types Explicit memory types - -If you inspected memory types available on the physical device and you have -a preference for memory types that you want to use, you can fill member -VmaAllocationCreateInfo::memoryTypeBits. It is a bit mask, where each bit set -means that a memory type with that index is allowed to be used for the -allocation. Special value 0, just like `UINT32_MAX`, means there are no -restrictions to memory type index. - -Please note that this member is NOT just a memory type index. -Still you can use it to choose just one, specific memory type. -For example, if you already determined that your buffer should be created in -memory type 2, use following code: - -\code -uint32_t memoryTypeIndex = 2; - -VmaAllocationCreateInfo allocInfo = {}; -allocInfo.memoryTypeBits = 1u << memoryTypeIndex; - -VkBuffer buffer; -VmaAllocation allocation; -vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr); -\endcode - - -\section choosing_memory_type_custom_memory_pools Custom memory pools - -If you allocate from custom memory pool, all the ways of specifying memory -requirements described above are not applicable and the aforementioned members -of VmaAllocationCreateInfo structure are ignored. Memory type is selected -explicitly when creating the pool and then used to make all the allocations from -that pool. For further details, see \ref custom_memory_pools. - -\section choosing_memory_type_dedicated_allocations Dedicated allocations - -Memory for allocations is reserved out of larger block of `VkDeviceMemory` -allocated from Vulkan internally. That's the main feature of this whole library. -You can still request a separate memory block to be created for an allocation, -just like you would do in a trivial solution without using any allocator. -In that case, a buffer or image is always bound to that memory at offset 0. -This is called a "dedicated allocation". -You can explicitly request it by using flag #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. -The library can also internally decide to use dedicated allocation in some cases, e.g.: - -- When the size of the allocation is large. -- When [VK_KHR_dedicated_allocation](@ref vk_khr_dedicated_allocation) extension is enabled - and it reports that dedicated allocation is required or recommended for the resource. -- When allocation of next big memory block fails due to not enough device memory, - but allocation with the exact requested size succeeds. - - -\page memory_mapping Memory mapping - -To "map memory" in Vulkan means to obtain a CPU pointer to `VkDeviceMemory`, -to be able to read from it or write to it in CPU code. -Mapping is possible only of memory allocated from a memory type that has -`VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` flag. -Functions `vkMapMemory()`, `vkUnmapMemory()` are designed for this purpose. -You can use them directly with memory allocated by this library, -but it is not recommended because of following issue: -Mapping the same `VkDeviceMemory` block multiple times is illegal - only one mapping at a time is allowed. -This includes mapping disjoint regions. Mapping is not reference-counted internally by Vulkan. -Because of this, Vulkan Memory Allocator provides following facilities: - -\section memory_mapping_mapping_functions Mapping functions - -The library provides following functions for mapping of a specific #VmaAllocation: vmaMapMemory(), vmaUnmapMemory(). -They are safer and more convenient to use than standard Vulkan functions. -You can map an allocation multiple times simultaneously - mapping is reference-counted internally. -You can also map different allocations simultaneously regardless of whether they use the same `VkDeviceMemory` block. -The way it's implemented is that the library always maps entire memory block, not just region of the allocation. -For further details, see description of vmaMapMemory() function. -Example: - -\code -// Having these objects initialized: - -struct ConstantBuffer -{ - ... -}; -ConstantBuffer constantBufferData; - -VmaAllocator allocator; -VkBuffer constantBuffer; -VmaAllocation constantBufferAllocation; - -// You can map and fill your buffer using following code: - -void* mappedData; -vmaMapMemory(allocator, constantBufferAllocation, &mappedData); -memcpy(mappedData, &constantBufferData, sizeof(constantBufferData)); -vmaUnmapMemory(allocator, constantBufferAllocation); -\endcode - -When mapping, you may see a warning from Vulkan validation layer similar to this one: - -Mapping an image with layout VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL can result in undefined behavior if this memory is used by the device. Only GENERAL or PREINITIALIZED should be used. - -It happens because the library maps entire `VkDeviceMemory` block, where different -types of images and buffers may end up together, especially on GPUs with unified memory like Intel. -You can safely ignore it if you are sure you access only memory of the intended -object that you wanted to map. - - -\section memory_mapping_persistently_mapped_memory Persistently mapped memory - -Kepping your memory persistently mapped is generally OK in Vulkan. -You don't need to unmap it before using its data on the GPU. -The library provides a special feature designed for that: -Allocations made with #VMA_ALLOCATION_CREATE_MAPPED_BIT flag set in -VmaAllocationCreateInfo::flags stay mapped all the time, -so you can just access CPU pointer to it any time -without a need to call any "map" or "unmap" function. -Example: - -\code -VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; -bufCreateInfo.size = sizeof(ConstantBuffer); -bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; - -VmaAllocationCreateInfo allocCreateInfo = {}; -allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY; -allocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; - -VkBuffer buf; -VmaAllocation alloc; -VmaAllocationInfo allocInfo; -vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); - -// Buffer is already mapped. You can access its memory. -memcpy(allocInfo.pMappedData, &constantBufferData, sizeof(constantBufferData)); -\endcode - -There are some exceptions though, when you should consider mapping memory only for a short period of time: - -- When operating system is Windows 7 or 8.x (Windows 10 is not affected because it uses WDDM2), - device is discrete AMD GPU, - and memory type is the special 256 MiB pool of `DEVICE_LOCAL + HOST_VISIBLE` memory - (selected when you use #VMA_MEMORY_USAGE_CPU_TO_GPU), - then whenever a memory block allocated from this memory type stays mapped - for the time of any call to `vkQueueSubmit()` or `vkQueuePresentKHR()`, this - block is migrated by WDDM to system RAM, which degrades performance. It doesn't - matter if that particular memory block is actually used by the command buffer - being submitted. -- On Mac/MoltenVK there is a known bug - [Issue #175](https://github.com/KhronosGroup/MoltenVK/issues/175) - which requires unmapping before GPU can see updated texture. -- Keeping many large memory blocks mapped may impact performance or stability of some debugging tools. - -\section memory_mapping_cache_control Cache flush and invalidate - -Memory in Vulkan doesn't need to be unmapped before using it on GPU, -but unless a memory types has `VK_MEMORY_PROPERTY_HOST_COHERENT_BIT` flag set, -you need to manually **invalidate** cache before reading of mapped pointer -and **flush** cache after writing to mapped pointer. -Map/unmap operations don't do that automatically. -Vulkan provides following functions for this purpose `vkFlushMappedMemoryRanges()`, -`vkInvalidateMappedMemoryRanges()`, but this library provides more convenient -functions that refer to given allocation object: vmaFlushAllocation(), -vmaInvalidateAllocation(), -or multiple objects at once: vmaFlushAllocations(), vmaInvalidateAllocations(). - -Regions of memory specified for flush/invalidate must be aligned to -`VkPhysicalDeviceLimits::nonCoherentAtomSize`. This is automatically ensured by the library. -In any memory type that is `HOST_VISIBLE` but not `HOST_COHERENT`, all allocations -within blocks are aligned to this value, so their offsets are always multiply of -`nonCoherentAtomSize` and two different allocations never share same "line" of this size. - -Please note that memory allocated with #VMA_MEMORY_USAGE_CPU_ONLY is guaranteed to be `HOST_COHERENT`. - -Also, Windows drivers from all 3 **PC** GPU vendors (AMD, Intel, NVIDIA) -currently provide `HOST_COHERENT` flag on all memory types that are -`HOST_VISIBLE`, so on this platform you may not need to bother. - -\section memory_mapping_finding_if_memory_mappable Finding out if memory is mappable - -It may happen that your allocation ends up in memory that is `HOST_VISIBLE` (available for mapping) -despite it wasn't explicitly requested. -For example, application may work on integrated graphics with unified memory (like Intel) or -allocation from video memory might have failed, so the library chose system memory as fallback. - -You can detect this case and map such allocation to access its memory on CPU directly, -instead of launching a transfer operation. -In order to do that: inspect `allocInfo.memoryType`, call vmaGetMemoryTypeProperties(), -and look for `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` flag in properties of that memory type. - -\code -VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; -bufCreateInfo.size = sizeof(ConstantBuffer); -bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; - -VmaAllocationCreateInfo allocCreateInfo = {}; -allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; -allocCreateInfo.preferredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; - -VkBuffer buf; -VmaAllocation alloc; -VmaAllocationInfo allocInfo; -vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); - -VkMemoryPropertyFlags memFlags; -vmaGetMemoryTypeProperties(allocator, allocInfo.memoryType, &memFlags); -if((memFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0) -{ - // Allocation ended up in mappable memory. You can map it and access it directly. - void* mappedData; - vmaMapMemory(allocator, alloc, &mappedData); - memcpy(mappedData, &constantBufferData, sizeof(constantBufferData)); - vmaUnmapMemory(allocator, alloc); -} -else -{ - // Allocation ended up in non-mappable memory. - // You need to create CPU-side buffer in VMA_MEMORY_USAGE_CPU_ONLY and make a transfer. -} -\endcode - -You can even use #VMA_ALLOCATION_CREATE_MAPPED_BIT flag while creating allocations -that are not necessarily `HOST_VISIBLE` (e.g. using #VMA_MEMORY_USAGE_GPU_ONLY). -If the allocation ends up in memory type that is `HOST_VISIBLE`, it will be persistently mapped and you can use it directly. -If not, the flag is just ignored. -Example: - -\code -VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; -bufCreateInfo.size = sizeof(ConstantBuffer); -bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; - -VmaAllocationCreateInfo allocCreateInfo = {}; -allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; -allocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; - -VkBuffer buf; -VmaAllocation alloc; -VmaAllocationInfo allocInfo; -vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); - -if(allocInfo.pMappedData != nullptr) -{ - // Allocation ended up in mappable memory. - // It's persistently mapped. You can access it directly. - memcpy(allocInfo.pMappedData, &constantBufferData, sizeof(constantBufferData)); -} -else -{ - // Allocation ended up in non-mappable memory. - // You need to create CPU-side buffer in VMA_MEMORY_USAGE_CPU_ONLY and make a transfer. -} -\endcode - - -\page staying_within_budget Staying within budget - -When developing a graphics-intensive game or program, it is important to avoid allocating -more GPU memory than it's physically available. When the memory is over-committed, -various bad things can happen, depending on the specific GPU, graphics driver, and -operating system: - -- It may just work without any problems. -- The application may slow down because some memory blocks are moved to system RAM - and the GPU has to access them through PCI Express bus. -- A new allocation may take very long time to complete, even few seconds, and possibly - freeze entire system. -- The new allocation may fail with `VK_ERROR_OUT_OF_DEVICE_MEMORY`. -- It may even result in GPU crash (TDR), observed as `VK_ERROR_DEVICE_LOST` - returned somewhere later. - -\section staying_within_budget_querying_for_budget Querying for budget - -To query for current memory usage and available budget, use function vmaGetBudget(). -Returned structure #VmaBudget contains quantities expressed in bytes, per Vulkan memory heap. - -Please note that this function returns different information and works faster than -vmaCalculateStats(). vmaGetBudget() can be called every frame or even before every -allocation, while vmaCalculateStats() is intended to be used rarely, -only to obtain statistical information, e.g. for debugging purposes. - -It is recommended to use VK_EXT_memory_budget device extension to obtain information -about the budget from Vulkan device. VMA is able to use this extension automatically. -When not enabled, the allocator behaves same way, but then it estimates current usage -and available budget based on its internal information and Vulkan memory heap sizes, -which may be less precise. In order to use this extension: - -1. Make sure extensions VK_EXT_memory_budget and VK_KHR_get_physical_device_properties2 - required by it are available and enable them. Please note that the first is a device - extension and the second is instance extension! -2. Use flag #VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT when creating #VmaAllocator object. -3. Make sure to call vmaSetCurrentFrameIndex() every frame. Budget is queried from - Vulkan inside of it to avoid overhead of querying it with every allocation. - -\section staying_within_budget_controlling_memory_usage Controlling memory usage - -There are many ways in which you can try to stay within the budget. - -First, when making new allocation requires allocating a new memory block, the library -tries not to exceed the budget automatically. If a block with default recommended size -(e.g. 256 MB) would go over budget, a smaller block is allocated, possibly even -dedicated memory for just this resource. - -If the size of the requested resource plus current memory usage is more than the -budget, by default the library still tries to create it, leaving it to the Vulkan -implementation whether the allocation succeeds or fails. You can change this behavior -by using #VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT flag. With it, the allocation is -not made if it would exceed the budget or if the budget is already exceeded. -Some other allocations become lost instead to make room for it, if the mechanism of -[lost allocations](@ref lost_allocations) is used. -If that is not possible, the allocation fails with `VK_ERROR_OUT_OF_DEVICE_MEMORY`. -Example usage pattern may be to pass the #VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT flag -when creating resources that are not essential for the application (e.g. the texture -of a specific object) and not to pass it when creating critically important resources -(e.g. render targets). - -Finally, you can also use #VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT flag to make sure -a new allocation is created only when it fits inside one of the existing memory blocks. -If it would require to allocate a new block, if fails instead with `VK_ERROR_OUT_OF_DEVICE_MEMORY`. -This also ensures that the function call is very fast because it never goes to Vulkan -to obtain a new block. - -Please note that creating \ref custom_memory_pools with VmaPoolCreateInfo::minBlockCount -set to more than 0 will try to allocate memory blocks without checking whether they -fit within budget. - - -\page resource_aliasing Resource aliasing (overlap) - -New explicit graphics APIs (Vulkan and Direct3D 12), thanks to manual memory -management, give an opportunity to alias (overlap) multiple resources in the -same region of memory - a feature not available in the old APIs (Direct3D 11, OpenGL). -It can be useful to save video memory, but it must be used with caution. - -For example, if you know the flow of your whole render frame in advance, you -are going to use some intermediate textures or buffers only during a small range of render passes, -and you know these ranges don't overlap in time, you can bind these resources to -the same place in memory, even if they have completely different parameters (width, height, format etc.). - -![Resource aliasing (overlap)](../gfx/Aliasing.png) - -Such scenario is possible using VMA, but you need to create your images manually. -Then you need to calculate parameters of an allocation to be made using formula: - -- allocation size = max(size of each image) -- allocation alignment = max(alignment of each image) -- allocation memoryTypeBits = bitwise AND(memoryTypeBits of each image) - -Following example shows two different images bound to the same place in memory, -allocated to fit largest of them. - -\code -// A 512x512 texture to be sampled. -VkImageCreateInfo img1CreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; -img1CreateInfo.imageType = VK_IMAGE_TYPE_2D; -img1CreateInfo.extent.width = 512; -img1CreateInfo.extent.height = 512; -img1CreateInfo.extent.depth = 1; -img1CreateInfo.mipLevels = 10; -img1CreateInfo.arrayLayers = 1; -img1CreateInfo.format = VK_FORMAT_R8G8B8A8_SRGB; -img1CreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; -img1CreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; -img1CreateInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; -img1CreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; - -// A full screen texture to be used as color attachment. -VkImageCreateInfo img2CreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; -img2CreateInfo.imageType = VK_IMAGE_TYPE_2D; -img2CreateInfo.extent.width = 1920; -img2CreateInfo.extent.height = 1080; -img2CreateInfo.extent.depth = 1; -img2CreateInfo.mipLevels = 1; -img2CreateInfo.arrayLayers = 1; -img2CreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM; -img2CreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; -img2CreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; -img2CreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; -img2CreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; - -VkImage img1; -res = vkCreateImage(device, &img1CreateInfo, nullptr, &img1); -VkImage img2; -res = vkCreateImage(device, &img2CreateInfo, nullptr, &img2); - -VkMemoryRequirements img1MemReq; -vkGetImageMemoryRequirements(device, img1, &img1MemReq); -VkMemoryRequirements img2MemReq; -vkGetImageMemoryRequirements(device, img2, &img2MemReq); - -VkMemoryRequirements finalMemReq = {}; -finalMemReq.size = std::max(img1MemReq.size, img2MemReq.size); -finalMemReq.alignment = std::max(img1MemReq.alignment, img2MemReq.alignment); -finalMemReq.memoryTypeBits = img1MemReq.memoryTypeBits & img2MemReq.memoryTypeBits; -// Validate if(finalMemReq.memoryTypeBits != 0) - -VmaAllocationCreateInfo allocCreateInfo = {}; -allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; - -VmaAllocation alloc; -res = vmaAllocateMemory(allocator, &finalMemReq, &allocCreateInfo, &alloc, nullptr); - -res = vmaBindImageMemory(allocator, alloc, img1); -res = vmaBindImageMemory(allocator, alloc, img2); - -// You can use img1, img2 here, but not at the same time! - -vmaFreeMemory(allocator, alloc); -vkDestroyImage(allocator, img2, nullptr); -vkDestroyImage(allocator, img1, nullptr); -\endcode - -Remember that using resouces that alias in memory requires proper synchronization. -You need to issue a memory barrier to make sure commands that use `img1` and `img2` -don't overlap on GPU timeline. -You also need to treat a resource after aliasing as uninitialized - containing garbage data. -For example, if you use `img1` and then want to use `img2`, you need to issue -an image memory barrier for `img2` with `oldLayout` = `VK_IMAGE_LAYOUT_UNDEFINED`. - -Additional considerations: - -- Vulkan also allows to interpret contents of memory between aliasing resources consistently in some cases. -See chapter 11.8. "Memory Aliasing" of Vulkan specification or `VK_IMAGE_CREATE_ALIAS_BIT` flag. -- You can create more complex layout where different images and buffers are bound -at different offsets inside one large allocation. For example, one can imagine -a big texture used in some render passes, aliasing with a set of many small buffers -used between in some further passes. To bind a resource at non-zero offset of an allocation, -use vmaBindBufferMemory2() / vmaBindImageMemory2(). -- Before allocating memory for the resources you want to alias, check `memoryTypeBits` -returned in memory requirements of each resource to make sure the bits overlap. -Some GPUs may expose multiple memory types suitable e.g. only for buffers or -images with `COLOR_ATTACHMENT` usage, so the sets of memory types supported by your -resources may be disjoint. Aliasing them is not possible in that case. - - -\page custom_memory_pools Custom memory pools - -A memory pool contains a number of `VkDeviceMemory` blocks. -The library automatically creates and manages default pool for each memory type available on the device. -Default memory pool automatically grows in size. -Size of allocated blocks is also variable and managed automatically. - -You can create custom pool and allocate memory out of it. -It can be useful if you want to: - -- Keep certain kind of allocations separate from others. -- Enforce particular, fixed size of Vulkan memory blocks. -- Limit maximum amount of Vulkan memory allocated for that pool. -- Reserve minimum or fixed amount of Vulkan memory always preallocated for that pool. - -To use custom memory pools: - --# Fill VmaPoolCreateInfo structure. --# Call vmaCreatePool() to obtain #VmaPool handle. --# When making an allocation, set VmaAllocationCreateInfo::pool to this handle. - You don't need to specify any other parameters of this structure, like `usage`. - -Example: - -\code -// Create a pool that can have at most 2 blocks, 128 MiB each. -VmaPoolCreateInfo poolCreateInfo = {}; -poolCreateInfo.memoryTypeIndex = ... -poolCreateInfo.blockSize = 128ull * 1024 * 1024; -poolCreateInfo.maxBlockCount = 2; - -VmaPool pool; -vmaCreatePool(allocator, &poolCreateInfo, &pool); - -// Allocate a buffer out of it. -VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; -bufCreateInfo.size = 1024; -bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; - -VmaAllocationCreateInfo allocCreateInfo = {}; -allocCreateInfo.pool = pool; - -VkBuffer buf; -VmaAllocation alloc; -VmaAllocationInfo allocInfo; -vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); -\endcode - -You have to free all allocations made from this pool before destroying it. - -\code -vmaDestroyBuffer(allocator, buf, alloc); -vmaDestroyPool(allocator, pool); -\endcode - -\section custom_memory_pools_MemTypeIndex Choosing memory type index - -When creating a pool, you must explicitly specify memory type index. -To find the one suitable for your buffers or images, you can use helper functions -vmaFindMemoryTypeIndexForBufferInfo(), vmaFindMemoryTypeIndexForImageInfo(). -You need to provide structures with example parameters of buffers or images -that you are going to create in that pool. - -\code -VkBufferCreateInfo exampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; -exampleBufCreateInfo.size = 1024; // Whatever. -exampleBufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; // Change if needed. - -VmaAllocationCreateInfo allocCreateInfo = {}; -allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; // Change if needed. - -uint32_t memTypeIndex; -vmaFindMemoryTypeIndexForBufferInfo(allocator, &exampleBufCreateInfo, &allocCreateInfo, &memTypeIndex); - -VmaPoolCreateInfo poolCreateInfo = {}; -poolCreateInfo.memoryTypeIndex = memTypeIndex; -// ... -\endcode - -When creating buffers/images allocated in that pool, provide following parameters: - -- `VkBufferCreateInfo`: Prefer to pass same parameters as above. - Otherwise you risk creating resources in a memory type that is not suitable for them, which may result in undefined behavior. - Using different `VK_BUFFER_USAGE_` flags may work, but you shouldn't create images in a pool intended for buffers - or the other way around. -- VmaAllocationCreateInfo: You don't need to pass same parameters. Fill only `pool` member. - Other members are ignored anyway. - -\section linear_algorithm Linear allocation algorithm - -Each Vulkan memory block managed by this library has accompanying metadata that -keeps track of used and unused regions. By default, the metadata structure and -algorithm tries to find best place for new allocations among free regions to -optimize memory usage. This way you can allocate and free objects in any order. - -![Default allocation algorithm](../gfx/Linear_allocator_1_algo_default.png) - -Sometimes there is a need to use simpler, linear allocation algorithm. You can -create custom pool that uses such algorithm by adding flag -#VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT to VmaPoolCreateInfo::flags while creating -#VmaPool object. Then an alternative metadata management is used. It always -creates new allocations after last one and doesn't reuse free regions after -allocations freed in the middle. It results in better allocation performance and -less memory consumed by metadata. - -![Linear allocation algorithm](../gfx/Linear_allocator_2_algo_linear.png) - -With this one flag, you can create a custom pool that can be used in many ways: -free-at-once, stack, double stack, and ring buffer. See below for details. - -\subsection linear_algorithm_free_at_once Free-at-once - -In a pool that uses linear algorithm, you still need to free all the allocations -individually, e.g. by using vmaFreeMemory() or vmaDestroyBuffer(). You can free -them in any order. New allocations are always made after last one - free space -in the middle is not reused. However, when you release all the allocation and -the pool becomes empty, allocation starts from the beginning again. This way you -can use linear algorithm to speed up creation of allocations that you are going -to release all at once. - -![Free-at-once](../gfx/Linear_allocator_3_free_at_once.png) - -This mode is also available for pools created with VmaPoolCreateInfo::maxBlockCount -value that allows multiple memory blocks. - -\subsection linear_algorithm_stack Stack - -When you free an allocation that was created last, its space can be reused. -Thanks to this, if you always release allocations in the order opposite to their -creation (LIFO - Last In First Out), you can achieve behavior of a stack. - -![Stack](../gfx/Linear_allocator_4_stack.png) - -This mode is also available for pools created with VmaPoolCreateInfo::maxBlockCount -value that allows multiple memory blocks. - -\subsection linear_algorithm_double_stack Double stack - -The space reserved by a custom pool with linear algorithm may be used by two -stacks: - -- First, default one, growing up from offset 0. -- Second, "upper" one, growing down from the end towards lower offsets. - -To make allocation from upper stack, add flag #VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT -to VmaAllocationCreateInfo::flags. - -![Double stack](../gfx/Linear_allocator_7_double_stack.png) - -Double stack is available only in pools with one memory block - -VmaPoolCreateInfo::maxBlockCount must be 1. Otherwise behavior is undefined. - -When the two stacks' ends meet so there is not enough space between them for a -new allocation, such allocation fails with usual -`VK_ERROR_OUT_OF_DEVICE_MEMORY` error. - -\subsection linear_algorithm_ring_buffer Ring buffer - -When you free some allocations from the beginning and there is not enough free space -for a new one at the end of a pool, allocator's "cursor" wraps around to the -beginning and starts allocation there. Thanks to this, if you always release -allocations in the same order as you created them (FIFO - First In First Out), -you can achieve behavior of a ring buffer / queue. - -![Ring buffer](../gfx/Linear_allocator_5_ring_buffer.png) - -Pools with linear algorithm support [lost allocations](@ref lost_allocations) when used as ring buffer. -If there is not enough free space for a new allocation, but existing allocations -from the front of the queue can become lost, they become lost and the allocation -succeeds. - -![Ring buffer with lost allocations](../gfx/Linear_allocator_6_ring_buffer_lost.png) - -Ring buffer is available only in pools with one memory block - -VmaPoolCreateInfo::maxBlockCount must be 1. Otherwise behavior is undefined. - -\section buddy_algorithm Buddy allocation algorithm - -There is another allocation algorithm that can be used with custom pools, called -"buddy". Its internal data structure is based on a tree of blocks, each having -size that is a power of two and a half of its parent's size. When you want to -allocate memory of certain size, a free node in the tree is located. If it's too -large, it is recursively split into two halves (called "buddies"). However, if -requested allocation size is not a power of two, the size of a tree node is -aligned up to the nearest power of two and the remaining space is wasted. When -two buddy nodes become free, they are merged back into one larger node. - -![Buddy allocator](../gfx/Buddy_allocator.png) - -The advantage of buddy allocation algorithm over default algorithm is faster -allocation and deallocation, as well as smaller external fragmentation. The -disadvantage is more wasted space (internal fragmentation). - -For more information, please read ["Buddy memory allocation" on Wikipedia](https://en.wikipedia.org/wiki/Buddy_memory_allocation) -or other sources that describe this concept in general. - -To use buddy allocation algorithm with a custom pool, add flag -#VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT to VmaPoolCreateInfo::flags while creating -#VmaPool object. - -Several limitations apply to pools that use buddy algorithm: - -- It is recommended to use VmaPoolCreateInfo::blockSize that is a power of two. - Otherwise, only largest power of two smaller than the size is used for - allocations. The remaining space always stays unused. -- [Margins](@ref debugging_memory_usage_margins) and - [corruption detection](@ref debugging_memory_usage_corruption_detection) - don't work in such pools. -- [Lost allocations](@ref lost_allocations) don't work in such pools. You can - use them, but they never become lost. Support may be added in the future. -- [Defragmentation](@ref defragmentation) doesn't work with allocations made from - such pool. - -\page defragmentation Defragmentation - -Interleaved allocations and deallocations of many objects of varying size can -cause fragmentation over time, which can lead to a situation where the library is unable -to find a continuous range of free memory for a new allocation despite there is -enough free space, just scattered across many small free ranges between existing -allocations. - -To mitigate this problem, you can use defragmentation feature: -structure #VmaDefragmentationInfo2, function vmaDefragmentationBegin(), vmaDefragmentationEnd(). -Given set of allocations, -this function can move them to compact used memory, ensure more continuous free -space and possibly also free some `VkDeviceMemory` blocks. - -What the defragmentation does is: - -- Updates #VmaAllocation objects to point to new `VkDeviceMemory` and offset. - After allocation has been moved, its VmaAllocationInfo::deviceMemory and/or - VmaAllocationInfo::offset changes. You must query them again using - vmaGetAllocationInfo() if you need them. -- Moves actual data in memory. - -What it doesn't do, so you need to do it yourself: - -- Recreate buffers and images that were bound to allocations that were defragmented and - bind them with their new places in memory. - You must use `vkDestroyBuffer()`, `vkDestroyImage()`, - `vkCreateBuffer()`, `vkCreateImage()`, vmaBindBufferMemory(), vmaBindImageMemory() - for that purpose and NOT vmaDestroyBuffer(), - vmaDestroyImage(), vmaCreateBuffer(), vmaCreateImage(), because you don't need to - destroy or create allocation objects! -- Recreate views and update descriptors that point to these buffers and images. - -\section defragmentation_cpu Defragmenting CPU memory - -Following example demonstrates how you can run defragmentation on CPU. -Only allocations created in memory types that are `HOST_VISIBLE` can be defragmented. -Others are ignored. - -The way it works is: - -- It temporarily maps entire memory blocks when necessary. -- It moves data using `memmove()` function. - -\code -// Given following variables already initialized: -VkDevice device; -VmaAllocator allocator; -std::vector buffers; -std::vector allocations; - - -const uint32_t allocCount = (uint32_t)allocations.size(); -std::vector allocationsChanged(allocCount); - -VmaDefragmentationInfo2 defragInfo = {}; -defragInfo.allocationCount = allocCount; -defragInfo.pAllocations = allocations.data(); -defragInfo.pAllocationsChanged = allocationsChanged.data(); -defragInfo.maxCpuBytesToMove = VK_WHOLE_SIZE; // No limit. -defragInfo.maxCpuAllocationsToMove = UINT32_MAX; // No limit. - -VmaDefragmentationContext defragCtx; -vmaDefragmentationBegin(allocator, &defragInfo, nullptr, &defragCtx); -vmaDefragmentationEnd(allocator, defragCtx); - -for(uint32_t i = 0; i < allocCount; ++i) -{ - if(allocationsChanged[i]) - { - // Destroy buffer that is immutably bound to memory region which is no longer valid. - vkDestroyBuffer(device, buffers[i], nullptr); - - // Create new buffer with same parameters. - VkBufferCreateInfo bufferInfo = ...; - vkCreateBuffer(device, &bufferInfo, nullptr, &buffers[i]); - - // You can make dummy call to vkGetBufferMemoryRequirements here to silence validation layer warning. - - // Bind new buffer to new memory region. Data contained in it is already moved. - VmaAllocationInfo allocInfo; - vmaGetAllocationInfo(allocator, allocations[i], &allocInfo); - vmaBindBufferMemory(allocator, allocations[i], buffers[i]); - } -} -\endcode - -Setting VmaDefragmentationInfo2::pAllocationsChanged is optional. -This output array tells whether particular allocation in VmaDefragmentationInfo2::pAllocations at the same index -has been modified during defragmentation. -You can pass null, but you then need to query every allocation passed to defragmentation -for new parameters using vmaGetAllocationInfo() if you might need to recreate and rebind a buffer or image associated with it. - -If you use [Custom memory pools](@ref choosing_memory_type_custom_memory_pools), -you can fill VmaDefragmentationInfo2::poolCount and VmaDefragmentationInfo2::pPools -instead of VmaDefragmentationInfo2::allocationCount and VmaDefragmentationInfo2::pAllocations -to defragment all allocations in given pools. -You cannot use VmaDefragmentationInfo2::pAllocationsChanged in that case. -You can also combine both methods. - -\section defragmentation_gpu Defragmenting GPU memory - -It is also possible to defragment allocations created in memory types that are not `HOST_VISIBLE`. -To do that, you need to pass a command buffer that meets requirements as described in -VmaDefragmentationInfo2::commandBuffer. The way it works is: - -- It creates temporary buffers and binds them to entire memory blocks when necessary. -- It issues `vkCmdCopyBuffer()` to passed command buffer. - -Example: - -\code -// Given following variables already initialized: -VkDevice device; -VmaAllocator allocator; -VkCommandBuffer commandBuffer; -std::vector buffers; -std::vector allocations; - - -const uint32_t allocCount = (uint32_t)allocations.size(); -std::vector allocationsChanged(allocCount); - -VkCommandBufferBeginInfo cmdBufBeginInfo = ...; -vkBeginCommandBuffer(commandBuffer, &cmdBufBeginInfo); - -VmaDefragmentationInfo2 defragInfo = {}; -defragInfo.allocationCount = allocCount; -defragInfo.pAllocations = allocations.data(); -defragInfo.pAllocationsChanged = allocationsChanged.data(); -defragInfo.maxGpuBytesToMove = VK_WHOLE_SIZE; // Notice it's "GPU" this time. -defragInfo.maxGpuAllocationsToMove = UINT32_MAX; // Notice it's "GPU" this time. -defragInfo.commandBuffer = commandBuffer; - -VmaDefragmentationContext defragCtx; -vmaDefragmentationBegin(allocator, &defragInfo, nullptr, &defragCtx); - -vkEndCommandBuffer(commandBuffer); - -// Submit commandBuffer. -// Wait for a fence that ensures commandBuffer execution finished. - -vmaDefragmentationEnd(allocator, defragCtx); - -for(uint32_t i = 0; i < allocCount; ++i) -{ - if(allocationsChanged[i]) - { - // Destroy buffer that is immutably bound to memory region which is no longer valid. - vkDestroyBuffer(device, buffers[i], nullptr); - - // Create new buffer with same parameters. - VkBufferCreateInfo bufferInfo = ...; - vkCreateBuffer(device, &bufferInfo, nullptr, &buffers[i]); - - // You can make dummy call to vkGetBufferMemoryRequirements here to silence validation layer warning. - - // Bind new buffer to new memory region. Data contained in it is already moved. - VmaAllocationInfo allocInfo; - vmaGetAllocationInfo(allocator, allocations[i], &allocInfo); - vmaBindBufferMemory(allocator, allocations[i], buffers[i]); - } -} -\endcode - -You can combine these two methods by specifying non-zero `maxGpu*` as well as `maxCpu*` parameters. -The library automatically chooses best method to defragment each memory pool. - -You may try not to block your entire program to wait until defragmentation finishes, -but do it in the background, as long as you carefully fullfill requirements described -in function vmaDefragmentationBegin(). - -\section defragmentation_additional_notes Additional notes - -It is only legal to defragment allocations bound to: - -- buffers -- images created with `VK_IMAGE_CREATE_ALIAS_BIT`, `VK_IMAGE_TILING_LINEAR`, and - being currently in `VK_IMAGE_LAYOUT_GENERAL` or `VK_IMAGE_LAYOUT_PREINITIALIZED`. - -Defragmentation of images created with `VK_IMAGE_TILING_OPTIMAL` or in any other -layout may give undefined results. - -If you defragment allocations bound to images, new images to be bound to new -memory region after defragmentation should be created with `VK_IMAGE_LAYOUT_PREINITIALIZED` -and then transitioned to their original layout from before defragmentation if -needed using an image memory barrier. - -While using defragmentation, you may experience validation layer warnings, which you just need to ignore. -See [Validation layer warnings](@ref general_considerations_validation_layer_warnings). - -Please don't expect memory to be fully compacted after defragmentation. -Algorithms inside are based on some heuristics that try to maximize number of Vulkan -memory blocks to make totally empty to release them, as well as to maximimze continuous -empty space inside remaining blocks, while minimizing the number and size of allocations that -need to be moved. Some fragmentation may still remain - this is normal. - -\section defragmentation_custom_algorithm Writing custom defragmentation algorithm - -If you want to implement your own, custom defragmentation algorithm, -there is infrastructure prepared for that, -but it is not exposed through the library API - you need to hack its source code. -Here are steps needed to do this: - --# Main thing you need to do is to define your own class derived from base abstract - class `VmaDefragmentationAlgorithm` and implement your version of its pure virtual methods. - See definition and comments of this class for details. --# Your code needs to interact with device memory block metadata. - If you need more access to its data than it's provided by its public interface, - declare your new class as a friend class e.g. in class `VmaBlockMetadata_Generic`. --# If you want to create a flag that would enable your algorithm or pass some additional - flags to configure it, add them to `VmaDefragmentationFlagBits` and use them in - VmaDefragmentationInfo2::flags. --# Modify function `VmaBlockVectorDefragmentationContext::Begin` to create object - of your new class whenever needed. - - -\page lost_allocations Lost allocations - -If your game oversubscribes video memory, if may work OK in previous-generation -graphics APIs (DirectX 9, 10, 11, OpenGL) because resources are automatically -paged to system RAM. In Vulkan you can't do it because when you run out of -memory, an allocation just fails. If you have more data (e.g. textures) that can -fit into VRAM and you don't need it all at once, you may want to upload them to -GPU on demand and "push out" ones that are not used for a long time to make room -for the new ones, effectively using VRAM (or a cartain memory pool) as a form of -cache. Vulkan Memory Allocator can help you with that by supporting a concept of -"lost allocations". - -To create an allocation that can become lost, include #VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT -flag in VmaAllocationCreateInfo::flags. Before using a buffer or image bound to -such allocation in every new frame, you need to query it if it's not lost. -To check it, call vmaTouchAllocation(). -If the allocation is lost, you should not use it or buffer/image bound to it. -You mustn't forget to destroy this allocation and this buffer/image. -vmaGetAllocationInfo() can also be used for checking status of the allocation. -Allocation is lost when returned VmaAllocationInfo::deviceMemory == `VK_NULL_HANDLE`. - -To create an allocation that can make some other allocations lost to make room -for it, use #VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT flag. You will -usually use both flags #VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT and -#VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT at the same time. - -Warning! Current implementation uses quite naive, brute force algorithm, -which can make allocation calls that use #VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT -flag quite slow. A new, more optimal algorithm and data structure to speed this -up is planned for the future. - -Q: When interleaving creation of new allocations with usage of existing ones, -how do you make sure that an allocation won't become lost while it's used in the -current frame? - -It is ensured because vmaTouchAllocation() / vmaGetAllocationInfo() not only returns allocation -status/parameters and checks whether it's not lost, but when it's not, it also -atomically marks it as used in the current frame, which makes it impossible to -become lost in that frame. It uses lockless algorithm, so it works fast and -doesn't involve locking any internal mutex. - -Q: What if my allocation may still be in use by the GPU when it's rendering a -previous frame while I already submit new frame on the CPU? - -You can make sure that allocations "touched" by vmaTouchAllocation() / vmaGetAllocationInfo() will not -become lost for a number of additional frames back from the current one by -specifying this number as VmaAllocatorCreateInfo::frameInUseCount (for default -memory pool) and VmaPoolCreateInfo::frameInUseCount (for custom pool). - -Q: How do you inform the library when new frame starts? - -You need to call function vmaSetCurrentFrameIndex(). - -Example code: - -\code -struct MyBuffer -{ - VkBuffer m_Buf = nullptr; - VmaAllocation m_Alloc = nullptr; - - // Called when the buffer is really needed in the current frame. - void EnsureBuffer(); -}; - -void MyBuffer::EnsureBuffer() -{ - // Buffer has been created. - if(m_Buf != VK_NULL_HANDLE) - { - // Check if its allocation is not lost + mark it as used in current frame. - if(vmaTouchAllocation(allocator, m_Alloc)) - { - // It's all OK - safe to use m_Buf. - return; - } - } - - // Buffer not yet exists or lost - destroy and recreate it. - - vmaDestroyBuffer(allocator, m_Buf, m_Alloc); - - VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; - bufCreateInfo.size = 1024; - bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; - - VmaAllocationCreateInfo allocCreateInfo = {}; - allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; - allocCreateInfo.flags = VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT | - VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT; - - vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &m_Buf, &m_Alloc, nullptr); -} -\endcode - -When using lost allocations, you may see some Vulkan validation layer warnings -about overlapping regions of memory bound to different kinds of buffers and -images. This is still valid as long as you implement proper handling of lost -allocations (like in the example above) and don't use them. - -You can create an allocation that is already in lost state from the beginning using function -vmaCreateLostAllocation(). It may be useful if you need a "dummy" allocation that is not null. - -You can call function vmaMakePoolAllocationsLost() to set all eligible allocations -in a specified custom pool to lost state. -Allocations that have been "touched" in current frame or VmaPoolCreateInfo::frameInUseCount frames back -cannot become lost. - -Q: Can I touch allocation that cannot become lost? - -Yes, although it has no visible effect. -Calls to vmaGetAllocationInfo() and vmaTouchAllocation() update last use frame index -also for allocations that cannot become lost, but the only way to observe it is to dump -internal allocator state using vmaBuildStatsString(). -You can use this feature for debugging purposes to explicitly mark allocations that you use -in current frame and then analyze JSON dump to see for how long each allocation stays unused. - - -\page statistics Statistics - -This library contains functions that return information about its internal state, -especially the amount of memory allocated from Vulkan. -Please keep in mind that these functions need to traverse all internal data structures -to gather these information, so they may be quite time-consuming. -Don't call them too often. - -\section statistics_numeric_statistics Numeric statistics - -You can query for overall statistics of the allocator using function vmaCalculateStats(). -Information are returned using structure #VmaStats. -It contains #VmaStatInfo - number of allocated blocks, number of allocations -(occupied ranges in these blocks), number of unused (free) ranges in these blocks, -number of bytes used and unused (but still allocated from Vulkan) and other information. -They are summed across memory heaps, memory types and total for whole allocator. - -You can query for statistics of a custom pool using function vmaGetPoolStats(). -Information are returned using structure #VmaPoolStats. - -You can query for information about specific allocation using function vmaGetAllocationInfo(). -It fill structure #VmaAllocationInfo. - -\section statistics_json_dump JSON dump - -You can dump internal state of the allocator to a string in JSON format using function vmaBuildStatsString(). -The result is guaranteed to be correct JSON. -It uses ANSI encoding. -Any strings provided by user (see [Allocation names](@ref allocation_names)) -are copied as-is and properly escaped for JSON, so if they use UTF-8, ISO-8859-2 or any other encoding, -this JSON string can be treated as using this encoding. -It must be freed using function vmaFreeStatsString(). - -The format of this JSON string is not part of official documentation of the library, -but it will not change in backward-incompatible way without increasing library major version number -and appropriate mention in changelog. - -The JSON string contains all the data that can be obtained using vmaCalculateStats(). -It can also contain detailed map of allocated memory blocks and their regions - -free and occupied by allocations. -This allows e.g. to visualize the memory or assess fragmentation. - - -\page allocation_annotation Allocation names and user data - -\section allocation_user_data Allocation user data - -You can annotate allocations with your own information, e.g. for debugging purposes. -To do that, fill VmaAllocationCreateInfo::pUserData field when creating -an allocation. It's an opaque `void*` pointer. You can use it e.g. as a pointer, -some handle, index, key, ordinal number or any other value that would associate -the allocation with your custom metadata. - -\code -VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; -// Fill bufferInfo... - -MyBufferMetadata* pMetadata = CreateBufferMetadata(); - -VmaAllocationCreateInfo allocCreateInfo = {}; -allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; -allocCreateInfo.pUserData = pMetadata; - -VkBuffer buffer; -VmaAllocation allocation; -vmaCreateBuffer(allocator, &bufferInfo, &allocCreateInfo, &buffer, &allocation, nullptr); -\endcode - -The pointer may be later retrieved as VmaAllocationInfo::pUserData: - -\code -VmaAllocationInfo allocInfo; -vmaGetAllocationInfo(allocator, allocation, &allocInfo); -MyBufferMetadata* pMetadata = (MyBufferMetadata*)allocInfo.pUserData; -\endcode - -It can also be changed using function vmaSetAllocationUserData(). - -Values of (non-zero) allocations' `pUserData` are printed in JSON report created by -vmaBuildStatsString(), in hexadecimal form. - -\section allocation_names Allocation names - -There is alternative mode available where `pUserData` pointer is used to point to -a null-terminated string, giving a name to the allocation. To use this mode, -set #VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT flag in VmaAllocationCreateInfo::flags. -Then `pUserData` passed as VmaAllocationCreateInfo::pUserData or argument to -vmaSetAllocationUserData() must be either null or pointer to a null-terminated string. -The library creates internal copy of the string, so the pointer you pass doesn't need -to be valid for whole lifetime of the allocation. You can free it after the call. - -\code -VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; -// Fill imageInfo... - -std::string imageName = "Texture: "; -imageName += fileName; - -VmaAllocationCreateInfo allocCreateInfo = {}; -allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; -allocCreateInfo.flags = VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT; -allocCreateInfo.pUserData = imageName.c_str(); - -VkImage image; -VmaAllocation allocation; -vmaCreateImage(allocator, &imageInfo, &allocCreateInfo, &image, &allocation, nullptr); -\endcode - -The value of `pUserData` pointer of the allocation will be different than the one -you passed when setting allocation's name - pointing to a buffer managed -internally that holds copy of the string. - -\code -VmaAllocationInfo allocInfo; -vmaGetAllocationInfo(allocator, allocation, &allocInfo); -const char* imageName = (const char*)allocInfo.pUserData; -printf("Image name: %s\n", imageName); -\endcode - -That string is also printed in JSON report created by vmaBuildStatsString(). - -\note Passing string name to VMA allocation doesn't automatically set it to the Vulkan buffer or image created with it. -You must do it manually using an extension like VK_EXT_debug_utils, which is independent of this library. - - -\page debugging_memory_usage Debugging incorrect memory usage - -If you suspect a bug with memory usage, like usage of uninitialized memory or -memory being overwritten out of bounds of an allocation, -you can use debug features of this library to verify this. - -\section debugging_memory_usage_initialization Memory initialization - -If you experience a bug with incorrect and nondeterministic data in your program and you suspect uninitialized memory to be used, -you can enable automatic memory initialization to verify this. -To do it, define macro `VMA_DEBUG_INITIALIZE_ALLOCATIONS` to 1. - -\code -#define VMA_DEBUG_INITIALIZE_ALLOCATIONS 1 -#include "vk_mem_alloc.h" -\endcode - -It makes memory of all new allocations initialized to bit pattern `0xDCDCDCDC`. -Before an allocation is destroyed, its memory is filled with bit pattern `0xEFEFEFEF`. -Memory is automatically mapped and unmapped if necessary. - -If you find these values while debugging your program, good chances are that you incorrectly -read Vulkan memory that is allocated but not initialized, or already freed, respectively. - -Memory initialization works only with memory types that are `HOST_VISIBLE`. -It works also with dedicated allocations. -It doesn't work with allocations created with #VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT flag, -as they cannot be mapped. - -\section debugging_memory_usage_margins Margins - -By default, allocations are laid out in memory blocks next to each other if possible -(considering required alignment, `bufferImageGranularity`, and `nonCoherentAtomSize`). - -![Allocations without margin](../gfx/Margins_1.png) - -Define macro `VMA_DEBUG_MARGIN` to some non-zero value (e.g. 16) to enforce specified -number of bytes as a margin before and after every allocation. - -\code -#define VMA_DEBUG_MARGIN 16 -#include "vk_mem_alloc.h" -\endcode - -![Allocations with margin](../gfx/Margins_2.png) - -If your bug goes away after enabling margins, it means it may be caused by memory -being overwritten outside of allocation boundaries. It is not 100% certain though. -Change in application behavior may also be caused by different order and distribution -of allocations across memory blocks after margins are applied. - -The margin is applied also before first and after last allocation in a block. -It may occur only once between two adjacent allocations. - -Margins work with all types of memory. - -Margin is applied only to allocations made out of memory blocks and not to dedicated -allocations, which have their own memory block of specific size. -It is thus not applied to allocations made using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT flag -or those automatically decided to put into dedicated allocations, e.g. due to its -large size or recommended by VK_KHR_dedicated_allocation extension. -Margins are also not active in custom pools created with #VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT flag. - -Margins appear in [JSON dump](@ref statistics_json_dump) as part of free space. - -Note that enabling margins increases memory usage and fragmentation. - -\section debugging_memory_usage_corruption_detection Corruption detection - -You can additionally define macro `VMA_DEBUG_DETECT_CORRUPTION` to 1 to enable validation -of contents of the margins. - -\code -#define VMA_DEBUG_MARGIN 16 -#define VMA_DEBUG_DETECT_CORRUPTION 1 -#include "vk_mem_alloc.h" -\endcode - -When this feature is enabled, number of bytes specified as `VMA_DEBUG_MARGIN` -(it must be multiply of 4) before and after every allocation is filled with a magic number. -This idea is also know as "canary". -Memory is automatically mapped and unmapped if necessary. - -This number is validated automatically when the allocation is destroyed. -If it's not equal to the expected value, `VMA_ASSERT()` is executed. -It clearly means that either CPU or GPU overwritten the memory outside of boundaries of the allocation, -which indicates a serious bug. - -You can also explicitly request checking margins of all allocations in all memory blocks -that belong to specified memory types by using function vmaCheckCorruption(), -or in memory blocks that belong to specified custom pool, by using function -vmaCheckPoolCorruption(). - -Margin validation (corruption detection) works only for memory types that are -`HOST_VISIBLE` and `HOST_COHERENT`. - - -\page record_and_replay Record and replay - -\section record_and_replay_introduction Introduction - -While using the library, sequence of calls to its functions together with their -parameters can be recorded to a file and later replayed using standalone player -application. It can be useful to: - -- Test correctness - check if same sequence of calls will not cause crash or - failures on a target platform. -- Gather statistics - see number of allocations, peak memory usage, number of - calls etc. -- Benchmark performance - see how much time it takes to replay the whole - sequence. - -\section record_and_replay_usage Usage - -Recording functionality is disabled by default. -To enable it, define following macro before every include of this library: - -\code -#define VMA_RECORDING_ENABLED 1 -\endcode - -To record sequence of calls to a file: Fill in -VmaAllocatorCreateInfo::pRecordSettings member while creating #VmaAllocator -object. File is opened and written during whole lifetime of the allocator. - -To replay file: Use VmaReplay - standalone command-line program. -Precompiled binary can be found in "bin" directory. -Its source can be found in "src/VmaReplay" directory. -Its project is generated by Premake. -Command line syntax is printed when the program is launched without parameters. -Basic usage: - - VmaReplay.exe MyRecording.csv - -Documentation of file format can be found in file: "docs/Recording file format.md". -It's a human-readable, text file in CSV format (Comma Separated Values). - -\section record_and_replay_additional_considerations Additional considerations - -- Replaying file that was recorded on a different GPU (with different parameters - like `bufferImageGranularity`, `nonCoherentAtomSize`, and especially different - set of memory heaps and types) may give different performance and memory usage - results, as well as issue some warnings and errors. -- Current implementation of recording in VMA, as well as VmaReplay application, is - coded and tested only on Windows. Inclusion of recording code is driven by - `VMA_RECORDING_ENABLED` macro. Support for other platforms should be easy to - add. Contributions are welcomed. - - -\page usage_patterns Recommended usage patterns - -See also slides from talk: -[Sawicki, Adam. Advanced Graphics Techniques Tutorial: Memory management in Vulkan and DX12. Game Developers Conference, 2018](https://www.gdcvault.com/play/1025458/Advanced-Graphics-Techniques-Tutorial-New) - - -\section usage_patterns_common_mistakes Common mistakes - -Use of CPU_TO_GPU instead of CPU_ONLY memory - -#VMA_MEMORY_USAGE_CPU_TO_GPU is recommended only for resources that will be -mapped and written by the CPU, as well as read directly by the GPU - like some -buffers or textures updated every frame (dynamic). If you create a staging copy -of a resource to be written by CPU and then used as a source of transfer to -another resource placed in the GPU memory, that staging resource should be -created with #VMA_MEMORY_USAGE_CPU_ONLY. Please read the descriptions of these -enums carefully for details. - -Unnecessary use of custom pools - -\ref custom_memory_pools may be useful for special purposes - when you want to -keep certain type of resources separate e.g. to reserve minimum amount of memory -for them, limit maximum amount of memory they can occupy, or make some of them -push out the other through the mechanism of \ref lost_allocations. For most -resources this is not needed and so it is not recommended to create #VmaPool -objects and allocations out of them. Allocating from the default pool is sufficient. - -\section usage_patterns_simple Simple patterns - -\subsection usage_patterns_simple_render_targets Render targets - -When: -Any resources that you frequently write and read on GPU, -e.g. images used as color attachments (aka "render targets"), depth-stencil attachments, -images/buffers used as storage image/buffer (aka "Unordered Access View (UAV)"). - -What to do: -Create them in video memory that is fastest to access from GPU using -#VMA_MEMORY_USAGE_GPU_ONLY. - -Consider using [VK_KHR_dedicated_allocation](@ref vk_khr_dedicated_allocation) extension -and/or manually creating them as dedicated allocations using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT, -especially if they are large or if you plan to destroy and recreate them e.g. when -display resolution changes. -Prefer to create such resources first and all other GPU resources (like textures and vertex buffers) later. - -\subsection usage_patterns_simple_immutable_resources Immutable resources - -When: -Any resources that you fill on CPU only once (aka "immutable") or infrequently -and then read frequently on GPU, -e.g. textures, vertex and index buffers, constant buffers that don't change often. - -What to do: -Create them in video memory that is fastest to access from GPU using -#VMA_MEMORY_USAGE_GPU_ONLY. - -To initialize content of such resource, create a CPU-side (aka "staging") copy of it -in system memory - #VMA_MEMORY_USAGE_CPU_ONLY, map it, fill it, -and submit a transfer from it to the GPU resource. -You can keep the staging copy if you need it for another upload transfer in the future. -If you don't, you can destroy it or reuse this buffer for uploading different resource -after the transfer finishes. - -Prefer to create just buffers in system memory rather than images, even for uploading textures. -Use `vkCmdCopyBufferToImage()`. -Dont use images with `VK_IMAGE_TILING_LINEAR`. - -\subsection usage_patterns_dynamic_resources Dynamic resources - -When: -Any resources that change frequently (aka "dynamic"), e.g. every frame or every draw call, -written on CPU, read on GPU. - -What to do: -Create them using #VMA_MEMORY_USAGE_CPU_TO_GPU. -You can map it and write to it directly on CPU, as well as read from it on GPU. - -This is a more complex situation. Different solutions are possible, -and the best one depends on specific GPU type, but you can use this simple approach for the start. -Prefer to write to such resource sequentially (e.g. using `memcpy`). -Don't perform random access or any reads from it on CPU, as it may be very slow. -Also note that textures written directly from the host through a mapped pointer need to be in LINEAR not OPTIMAL layout. - -\subsection usage_patterns_readback Readback - -When: -Resources that contain data written by GPU that you want to read back on CPU, -e.g. results of some computations. - -What to do: -Create them using #VMA_MEMORY_USAGE_GPU_TO_CPU. -You can write to them directly on GPU, as well as map and read them on CPU. - -\section usage_patterns_advanced Advanced patterns - -\subsection usage_patterns_integrated_graphics Detecting integrated graphics - -You can support integrated graphics (like Intel HD Graphics, AMD APU) better -by detecting it in Vulkan. -To do it, call `vkGetPhysicalDeviceProperties()`, inspect -`VkPhysicalDeviceProperties::deviceType` and look for `VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU`. -When you find it, you can assume that memory is unified and all memory types are comparably fast -to access from GPU, regardless of `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`. - -You can then sum up sizes of all available memory heaps and treat them as useful for -your GPU resources, instead of only `DEVICE_LOCAL` ones. -You can also prefer to create your resources in memory types that are `HOST_VISIBLE` to map them -directly instead of submitting explicit transfer (see below). - -\subsection usage_patterns_direct_vs_transfer Direct access versus transfer - -For resources that you frequently write on CPU and read on GPU, many solutions are possible: - --# Create one copy in video memory using #VMA_MEMORY_USAGE_GPU_ONLY, - second copy in system memory using #VMA_MEMORY_USAGE_CPU_ONLY and submit explicit transfer each time. --# Create just a single copy using #VMA_MEMORY_USAGE_CPU_TO_GPU, map it and fill it on CPU, - read it directly on GPU. --# Create just a single copy using #VMA_MEMORY_USAGE_CPU_ONLY, map it and fill it on CPU, - read it directly on GPU. - -Which solution is the most efficient depends on your resource and especially on the GPU. -It is best to measure it and then make the decision. -Some general recommendations: - -- On integrated graphics use (2) or (3) to avoid unnecesary time and memory overhead - related to using a second copy and making transfer. -- For small resources (e.g. constant buffers) use (2). - Discrete AMD cards have special 256 MiB pool of video memory that is directly mappable. - Even if the resource ends up in system memory, its data may be cached on GPU after first - fetch over PCIe bus. -- For larger resources (e.g. textures), decide between (1) and (2). - You may want to differentiate NVIDIA and AMD, e.g. by looking for memory type that is - both `DEVICE_LOCAL` and `HOST_VISIBLE`. When you find it, use (2), otherwise use (1). - -Similarly, for resources that you frequently write on GPU and read on CPU, multiple -solutions are possible: - --# Create one copy in video memory using #VMA_MEMORY_USAGE_GPU_ONLY, - second copy in system memory using #VMA_MEMORY_USAGE_GPU_TO_CPU and submit explicit tranfer each time. --# Create just single copy using #VMA_MEMORY_USAGE_GPU_TO_CPU, write to it directly on GPU, - map it and read it on CPU. - -You should take some measurements to decide which option is faster in case of your specific -resource. - -Note that textures accessed directly from the host through a mapped pointer need to be in LINEAR layout, -which may slow down their usage on the device. -Textures accessed only by the device and transfer operations can use OPTIMAL layout. - -If you don't want to specialize your code for specific types of GPUs, you can still make -an simple optimization for cases when your resource ends up in mappable memory to use it -directly in this case instead of creating CPU-side staging copy. -For details see [Finding out if memory is mappable](@ref memory_mapping_finding_if_memory_mappable). - - -\page configuration Configuration - -Please check "CONFIGURATION SECTION" in the code to find macros that you can define -before each include of this file or change directly in this file to provide -your own implementation of basic facilities like assert, `min()` and `max()` functions, -mutex, atomic etc. -The library uses its own implementation of containers by default, but you can switch to using -STL containers instead. - -For example, define `VMA_ASSERT(expr)` before including the library to provide -custom implementation of the assertion, compatible with your project. -By default it is defined to standard C `assert(expr)` in `_DEBUG` configuration -and empty otherwise. - -\section config_Vulkan_functions Pointers to Vulkan functions - -There are multiple ways to import pointers to Vulkan functions in the library. -In the simplest case you don't need to do anything. -If the compilation or linking of your program or the initialization of the #VmaAllocator -doesn't work for you, you can try to reconfigure it. - -First, the allocator tries to fetch pointers to Vulkan functions linked statically, -like this: - -\code -m_VulkanFunctions.vkAllocateMemory = (PFN_vkAllocateMemory)vkAllocateMemory; -\endcode - -If you want to disable this feature, set configuration macro: `#define VMA_STATIC_VULKAN_FUNCTIONS 0`. - -Second, you can provide the pointers yourself by setting member VmaAllocatorCreateInfo::pVulkanFunctions. -You can fetch them e.g. using functions `vkGetInstanceProcAddr` and `vkGetDeviceProcAddr` or -by using a helper library like [volk](https://github.com/zeux/volk). - -Third, VMA tries to fetch remaining pointers that are still null by calling -`vkGetInstanceProcAddr` and `vkGetDeviceProcAddr` on its own. -If you want to disable this feature, set configuration macro: `#define VMA_DYNAMIC_VULKAN_FUNCTIONS 0`. - -Finally, all the function pointers required by the library (considering selected -Vulkan version and enabled extensions) are checked with `VMA_ASSERT` if they are not null. - - -\section custom_memory_allocator Custom host memory allocator - -If you use custom allocator for CPU memory rather than default operator `new` -and `delete` from C++, you can make this library using your allocator as well -by filling optional member VmaAllocatorCreateInfo::pAllocationCallbacks. These -functions will be passed to Vulkan, as well as used by the library itself to -make any CPU-side allocations. - -\section allocation_callbacks Device memory allocation callbacks - -The library makes calls to `vkAllocateMemory()` and `vkFreeMemory()` internally. -You can setup callbacks to be informed about these calls, e.g. for the purpose -of gathering some statistics. To do it, fill optional member -VmaAllocatorCreateInfo::pDeviceMemoryCallbacks. - -\section heap_memory_limit Device heap memory limit - -When device memory of certain heap runs out of free space, new allocations may -fail (returning error code) or they may succeed, silently pushing some existing -memory blocks from GPU VRAM to system RAM (which degrades performance). This -behavior is implementation-dependent - it depends on GPU vendor and graphics -driver. - -On AMD cards it can be controlled while creating Vulkan device object by using -VK_AMD_memory_overallocation_behavior extension, if available. - -Alternatively, if you want to test how your program behaves with limited amount of Vulkan device -memory available without switching your graphics card to one that really has -smaller VRAM, you can use a feature of this library intended for this purpose. -To do it, fill optional member VmaAllocatorCreateInfo::pHeapSizeLimit. - - - -\page vk_khr_dedicated_allocation VK_KHR_dedicated_allocation - -VK_KHR_dedicated_allocation is a Vulkan extension which can be used to improve -performance on some GPUs. It augments Vulkan API with possibility to query -driver whether it prefers particular buffer or image to have its own, dedicated -allocation (separate `VkDeviceMemory` block) for better efficiency - to be able -to do some internal optimizations. - -The extension is supported by this library. It will be used automatically when -enabled. To enable it: - -1 . When creating Vulkan device, check if following 2 device extensions are -supported (call `vkEnumerateDeviceExtensionProperties()`). -If yes, enable them (fill `VkDeviceCreateInfo::ppEnabledExtensionNames`). - -- VK_KHR_get_memory_requirements2 -- VK_KHR_dedicated_allocation - -If you enabled these extensions: - -2 . Use #VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT flag when creating -your #VmaAllocator`to inform the library that you enabled required extensions -and you want the library to use them. - -\code -allocatorInfo.flags |= VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT; - -vmaCreateAllocator(&allocatorInfo, &allocator); -\endcode - -That's all. The extension will be automatically used whenever you create a -buffer using vmaCreateBuffer() or image using vmaCreateImage(). - -When using the extension together with Vulkan Validation Layer, you will receive -warnings like this: - - vkBindBufferMemory(): Binding memory to buffer 0x33 but vkGetBufferMemoryRequirements() has not been called on that buffer. - -It is OK, you should just ignore it. It happens because you use function -`vkGetBufferMemoryRequirements2KHR()` instead of standard -`vkGetBufferMemoryRequirements()`, while the validation layer seems to be -unaware of it. - -To learn more about this extension, see: - -- [VK_KHR_dedicated_allocation in Vulkan specification](https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap44.html#VK_KHR_dedicated_allocation) -- [VK_KHR_dedicated_allocation unofficial manual](http://asawicki.info/articles/VK_KHR_dedicated_allocation.php5) - - - -\page vk_amd_device_coherent_memory VK_AMD_device_coherent_memory - -VK_AMD_device_coherent_memory is a device extension that enables access to -additional memory types with `VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD` and -`VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD` flag. It is useful mostly for -allocation of buffers intended for writing "breadcrumb markers" in between passes -or draw calls, which in turn are useful for debugging GPU crash/hang/TDR cases. - -When the extension is available but has not been enabled, Vulkan physical device -still exposes those memory types, but their usage is forbidden. VMA automatically -takes care of that - it returns `VK_ERROR_FEATURE_NOT_PRESENT` when an attempt -to allocate memory of such type is made. - -If you want to use this extension in connection with VMA, follow these steps: - -\section vk_amd_device_coherent_memory_initialization Initialization - -1) Call `vkEnumerateDeviceExtensionProperties` for the physical device. -Check if the extension is supported - if returned array of `VkExtensionProperties` contains "VK_AMD_device_coherent_memory". - -2) Call `vkGetPhysicalDeviceFeatures2` for the physical device instead of old `vkGetPhysicalDeviceFeatures`. -Attach additional structure `VkPhysicalDeviceCoherentMemoryFeaturesAMD` to `VkPhysicalDeviceFeatures2::pNext` to be returned. -Check if the device feature is really supported - check if `VkPhysicalDeviceCoherentMemoryFeaturesAMD::deviceCoherentMemory` is true. - -3) While creating device with `vkCreateDevice`, enable this extension - add "VK_AMD_device_coherent_memory" -to the list passed as `VkDeviceCreateInfo::ppEnabledExtensionNames`. - -4) While creating the device, also don't set `VkDeviceCreateInfo::pEnabledFeatures`. -Fill in `VkPhysicalDeviceFeatures2` structure instead and pass it as `VkDeviceCreateInfo::pNext`. -Enable this device feature - attach additional structure `VkPhysicalDeviceCoherentMemoryFeaturesAMD` to -`VkPhysicalDeviceFeatures2::pNext` and set its member `deviceCoherentMemory` to `VK_TRUE`. - -5) While creating #VmaAllocator with vmaCreateAllocator() inform VMA that you -have enabled this extension and feature - add #VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT -to VmaAllocatorCreateInfo::flags. - -\section vk_amd_device_coherent_memory_usage Usage - -After following steps described above, you can create VMA allocations and custom pools -out of the special `DEVICE_COHERENT` and `DEVICE_UNCACHED` memory types on eligible -devices. There are multiple ways to do it, for example: - -- You can request or prefer to allocate out of such memory types by adding - `VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD` to VmaAllocationCreateInfo::requiredFlags - or VmaAllocationCreateInfo::preferredFlags. Those flags can be freely mixed with - other ways of \ref choosing_memory_type, like setting VmaAllocationCreateInfo::usage. -- If you manually found memory type index to use for this purpose, force allocation - from this specific index by setting VmaAllocationCreateInfo::memoryTypeBits `= 1u << index`. - -\section vk_amd_device_coherent_memory_more_information More information - -To learn more about this extension, see [VK_AMD_device_coherent_memory in Vulkan specification](https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap44.html#VK_AMD_device_coherent_memory) - -Example use of this extension can be found in the code of the sample and test suite -accompanying this library. - - -\page enabling_buffer_device_address Enabling buffer device address - -Device extension VK_KHR_buffer_device_address -allow to fetch raw GPU pointer to a buffer and pass it for usage in a shader code. -It is promoted to core Vulkan 1.2. - -If you want to use this feature in connection with VMA, follow these steps: - -\section enabling_buffer_device_address_initialization Initialization - -1) (For Vulkan version < 1.2) Call `vkEnumerateDeviceExtensionProperties` for the physical device. -Check if the extension is supported - if returned array of `VkExtensionProperties` contains -"VK_KHR_buffer_device_address". - -2) Call `vkGetPhysicalDeviceFeatures2` for the physical device instead of old `vkGetPhysicalDeviceFeatures`. -Attach additional structure `VkPhysicalDeviceBufferDeviceAddressFeatures*` to `VkPhysicalDeviceFeatures2::pNext` to be returned. -Check if the device feature is really supported - check if `VkPhysicalDeviceBufferDeviceAddressFeatures*::bufferDeviceAddress` is true. - -3) (For Vulkan version < 1.2) While creating device with `vkCreateDevice`, enable this extension - add -"VK_KHR_buffer_device_address" to the list passed as `VkDeviceCreateInfo::ppEnabledExtensionNames`. - -4) While creating the device, also don't set `VkDeviceCreateInfo::pEnabledFeatures`. -Fill in `VkPhysicalDeviceFeatures2` structure instead and pass it as `VkDeviceCreateInfo::pNext`. -Enable this device feature - attach additional structure `VkPhysicalDeviceBufferDeviceAddressFeatures*` to -`VkPhysicalDeviceFeatures2::pNext` and set its member `bufferDeviceAddress` to `VK_TRUE`. - -5) While creating #VmaAllocator with vmaCreateAllocator() inform VMA that you -have enabled this feature - add #VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT -to VmaAllocatorCreateInfo::flags. - -\section enabling_buffer_device_address_usage Usage - -After following steps described above, you can create buffers with `VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT*` using VMA. -The library automatically adds `VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT*` to -allocated memory blocks wherever it might be needed. - -Please note that the library supports only `VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT*`. -The second part of this functionality related to "capture and replay" is not supported, -as it is intended for usage in debugging tools like RenderDoc, not in everyday Vulkan usage. - -\section enabling_buffer_device_address_more_information More information - -To learn more about this extension, see [VK_KHR_buffer_device_address in Vulkan specification](https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap46.html#VK_KHR_buffer_device_address) - -Example use of this extension can be found in the code of the sample and test suite -accompanying this library. - -\page general_considerations General considerations - -\section general_considerations_thread_safety Thread safety - -- The library has no global state, so separate #VmaAllocator objects can be used - independently. - There should be no need to create multiple such objects though - one per `VkDevice` is enough. -- By default, all calls to functions that take #VmaAllocator as first parameter - are safe to call from multiple threads simultaneously because they are - synchronized internally when needed. -- When the allocator is created with #VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT - flag, calls to functions that take such #VmaAllocator object must be - synchronized externally. -- Access to a #VmaAllocation object must be externally synchronized. For example, - you must not call vmaGetAllocationInfo() and vmaMapMemory() from different - threads at the same time if you pass the same #VmaAllocation object to these - functions. - -\section general_considerations_validation_layer_warnings Validation layer warnings - -When using this library, you can meet following types of warnings issued by -Vulkan validation layer. They don't necessarily indicate a bug, so you may need -to just ignore them. - -- *vkBindBufferMemory(): Binding memory to buffer 0xeb8e4 but vkGetBufferMemoryRequirements() has not been called on that buffer.* - - It happens when VK_KHR_dedicated_allocation extension is enabled. - `vkGetBufferMemoryRequirements2KHR` function is used instead, while validation layer seems to be unaware of it. -- *Mapping an image with layout VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL can result in undefined behavior if this memory is used by the device. Only GENERAL or PREINITIALIZED should be used.* - - It happens when you map a buffer or image, because the library maps entire - `VkDeviceMemory` block, where different types of images and buffers may end - up together, especially on GPUs with unified memory like Intel. -- *Non-linear image 0xebc91 is aliased with linear buffer 0xeb8e4 which may indicate a bug.* - - It happens when you use lost allocations, and a new image or buffer is - created in place of an existing object that bacame lost. - - It may happen also when you use [defragmentation](@ref defragmentation). - -\section general_considerations_allocation_algorithm Allocation algorithm - -The library uses following algorithm for allocation, in order: - --# Try to find free range of memory in existing blocks. --# If failed, try to create a new block of `VkDeviceMemory`, with preferred block size. --# If failed, try to create such block with size/2, size/4, size/8. --# If failed and #VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT flag was - specified, try to find space in existing blocks, possilby making some other - allocations lost. --# If failed, try to allocate separate `VkDeviceMemory` for this allocation, - just like when you use #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. --# If failed, choose other memory type that meets the requirements specified in - VmaAllocationCreateInfo and go to point 1. --# If failed, return `VK_ERROR_OUT_OF_DEVICE_MEMORY`. - -\section general_considerations_features_not_supported Features not supported - -Features deliberately excluded from the scope of this library: - -- Data transfer. Uploading (straming) and downloading data of buffers and images - between CPU and GPU memory and related synchronization is responsibility of the user. - Defining some "texture" object that would automatically stream its data from a - staging copy in CPU memory to GPU memory would rather be a feature of another, - higher-level library implemented on top of VMA. -- Allocations for imported/exported external memory. They tend to require - explicit memory type index and dedicated allocation anyway, so they don't - interact with main features of this library. Such special purpose allocations - should be made manually, using `vkCreateBuffer()` and `vkAllocateMemory()`. -- Sub-allocation of parts of one large buffer. Although recommended as a good practice, - it is the user's responsibility to implement such logic on top of VMA. -- Recreation of buffers and images. Although the library has functions for - buffer and image creation (vmaCreateBuffer(), vmaCreateImage()), you need to - recreate these objects yourself after defragmentation. That's because the big - structures `VkBufferCreateInfo`, `VkImageCreateInfo` are not stored in - #VmaAllocation object. -- Handling CPU memory allocation failures. When dynamically creating small C++ - objects in CPU memory (not Vulkan memory), allocation failures are not checked - and handled gracefully, because that would complicate code significantly and - is usually not needed in desktop PC applications anyway. - Success of an allocation is just checked with an assert. -- Code free of any compiler warnings. Maintaining the library to compile and - work correctly on so many different platforms is hard enough. Being free of - any warnings, on any version of any compiler, is simply not feasible. -- This is a C++ library with C interface. - Bindings or ports to any other programming languages are welcomed as external projects and - are not going to be included into this repository. +\defgroup group_stats Statistics +\brief API elements that query current status of the allocator, from memory usage, budget, to full dump of the internal state in JSON format. +See documentation chapter: \ref statistics. */ + #ifdef __cplusplus extern "C" { #endif -/* -Define this macro to 0/1 to disable/enable support for recording functionality, -available through VmaAllocatorCreateInfo::pRecordSettings. -*/ -#ifndef VMA_RECORDING_ENABLED - #define VMA_RECORDING_ENABLED 0 -#endif +#include -#if !defined(NOMINMAX) && defined(VMA_IMPLEMENTATION) - #define NOMINMAX // For windows.h +#if !defined(VMA_VULKAN_VERSION) + #if defined(VK_VERSION_1_3) + #define VMA_VULKAN_VERSION 1003000 + #elif defined(VK_VERSION_1_2) + #define VMA_VULKAN_VERSION 1002000 + #elif defined(VK_VERSION_1_1) + #define VMA_VULKAN_VERSION 1001000 + #else + #define VMA_VULKAN_VERSION 1000000 + #endif #endif #if defined(__ANDROID__) && defined(VK_NO_PROTOTYPES) && VMA_STATIC_VULKAN_FUNCTIONS @@ -2064,23 +170,6 @@ available through VmaAllocatorCreateInfo::pRecordSettings. #endif // #if VMA_VULKAN_VERSION >= 1001000 #endif // #if defined(__ANDROID__) && VMA_STATIC_VULKAN_FUNCTIONS && VK_NO_PROTOTYPES -#ifndef VULKAN_H_ - #include -#endif - -// Define this macro to declare maximum supported Vulkan version in format AAABBBCCC, -// where AAA = major, BBB = minor, CCC = patch. -// If you want to use version > 1.0, it still needs to be enabled via VmaAllocatorCreateInfo::vulkanApiVersion. -#if !defined(VMA_VULKAN_VERSION) - #if defined(VK_VERSION_1_2) - #define VMA_VULKAN_VERSION 1002000 - #elif defined(VK_VERSION_1_1) - #define VMA_VULKAN_VERSION 1001000 - #else - #define VMA_VULKAN_VERSION 1000000 - #endif -#endif - #if !defined(VMA_DEDICATED_ALLOCATION) #if VK_KHR_get_memory_requirements2 && VK_KHR_dedicated_allocation #define VMA_DEDICATED_ALLOCATION 1 @@ -2123,6 +212,34 @@ available through VmaAllocatorCreateInfo::pRecordSettings. #endif #endif +// Defined to 1 when VK_KHR_maintenance4 device extension is defined in Vulkan headers. +#if !defined(VMA_KHR_MAINTENANCE4) + #if VK_KHR_maintenance4 + #define VMA_KHR_MAINTENANCE4 1 + #else + #define VMA_KHR_MAINTENANCE4 0 + #endif +#endif + +// Defined to 1 when VK_KHR_maintenance5 device extension is defined in Vulkan headers. +#if !defined(VMA_KHR_MAINTENANCE5) + #if VK_KHR_maintenance5 + #define VMA_KHR_MAINTENANCE5 1 + #else + #define VMA_KHR_MAINTENANCE5 0 + #endif +#endif + + +// Defined to 1 when VK_KHR_external_memory device extension is defined in Vulkan headers. +#if !defined(VMA_EXTERNAL_MEMORY) + #if VK_KHR_external_memory + #define VMA_EXTERNAL_MEMORY 1 + #else + #define VMA_EXTERNAL_MEMORY 0 + #endif +#endif + // Define these macros to decorate all public functions with additional code, // before and after returned type, appropriately. This may be useful for // exporting the functions when compiling VMA as a separate library. Example: @@ -2135,6 +252,12 @@ available through VmaAllocatorCreateInfo::pRecordSettings. #define VMA_CALL_POST #endif +// Define this macro to decorate pNext pointers with an attribute specifying the Vulkan +// structure that will be extended via the pNext chain. +#ifndef VMA_EXTENDS_VK_STRUCT + #define VMA_EXTENDS_VK_STRUCT(vkStruct) +#endif + // Define this macro to decorate pointers with an attribute specifying the // length of the array they point to if they are not null. // @@ -2188,50 +311,29 @@ available through VmaAllocatorCreateInfo::pRecordSettings. #endif #endif -/** \struct VmaAllocator -\brief Represents main object of this library initialized. +#ifndef VMA_STATS_STRING_ENABLED + #define VMA_STATS_STRING_ENABLED 1 +#endif -Fill structure #VmaAllocatorCreateInfo and call function vmaCreateAllocator() to create it. -Call function vmaDestroyAllocator() to destroy it. +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +// +// INTERFACE +// +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// -It is recommended to create just one object of this type per `VkDevice` object, -right after Vulkan is initialized and keep it alive until before Vulkan device is destroyed. +// Sections for managing code placement in file, only for development purposes e.g. for convenient folding inside an IDE. +#ifndef _VMA_ENUM_DECLARATIONS + +/** +\addtogroup group_init +@{ */ -VK_DEFINE_HANDLE(VmaAllocator) - -/// Callback function called after successful vkAllocateMemory. -typedef void (VKAPI_PTR *PFN_vmaAllocateDeviceMemoryFunction)( - VmaAllocator VMA_NOT_NULL allocator, - uint32_t memoryType, - VkDeviceMemory VMA_NOT_NULL_NON_DISPATCHABLE memory, - VkDeviceSize size, - void* VMA_NULLABLE pUserData); -/// Callback function called before vkFreeMemory. -typedef void (VKAPI_PTR *PFN_vmaFreeDeviceMemoryFunction)( - VmaAllocator VMA_NOT_NULL allocator, - uint32_t memoryType, - VkDeviceMemory VMA_NOT_NULL_NON_DISPATCHABLE memory, - VkDeviceSize size, - void* VMA_NULLABLE pUserData); - -/** \brief Set of callbacks that the library will call for `vkAllocateMemory` and `vkFreeMemory`. - -Provided for informative purpose, e.g. to gather statistics about number of -allocations or total amount of memory allocated in Vulkan. - -Used in VmaAllocatorCreateInfo::pDeviceMemoryCallbacks. -*/ -typedef struct VmaDeviceMemoryCallbacks { - /// Optional, can be null. - PFN_vmaAllocateDeviceMemoryFunction VMA_NULLABLE pfnAllocate; - /// Optional, can be null. - PFN_vmaFreeDeviceMemoryFunction VMA_NULLABLE pfnFree; - /// Optional, can be null. - void* VMA_NULLABLE pUserData; -} VmaDeviceMemoryCallbacks; /// Flags for created #VmaAllocator. -typedef enum VmaAllocatorCreateFlagBits { +typedef enum VmaAllocatorCreateFlagBits +{ /** \brief Allocator and all objects created from it will not be synchronized internally, so you must guarantee they are used from only one thread at a time or synchronized externally by you. Using this flag may increase performance because internal mutexes are not used. @@ -2240,9 +342,9 @@ typedef enum VmaAllocatorCreateFlagBits { /** \brief Enables usage of VK_KHR_dedicated_allocation extension. The flag works only if VmaAllocatorCreateInfo::vulkanApiVersion `== VK_API_VERSION_1_0`. - When it's `VK_API_VERSION_1_1`, the flag is ignored because the extension has been promoted to Vulkan 1.1. + When it is `VK_API_VERSION_1_1`, the flag is ignored because the extension has been promoted to Vulkan 1.1. - Using this extenion will automatically allocate dedicated blocks of memory for + Using this extension will automatically allocate dedicated blocks of memory for some buffers and images instead of suballocating place for them out of bigger memory blocks (as if you explicitly used #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT flag) when it is recommended by the driver. It may improve performance on some @@ -2266,7 +368,7 @@ typedef enum VmaAllocatorCreateFlagBits { Enables usage of VK_KHR_bind_memory2 extension. The flag works only if VmaAllocatorCreateInfo::vulkanApiVersion `== VK_API_VERSION_1_0`. - When it's `VK_API_VERSION_1_1`, the flag is ignored because the extension has been promoted to Vulkan 1.1. + When it is `VK_API_VERSION_1_1`, the flag is ignored because the extension has been promoted to Vulkan 1.1. You may set this flag only if you found out that this device extension is supported, you enabled it while creating Vulkan device passed as VmaAllocatorCreateInfo::device, @@ -2342,16 +444,556 @@ typedef enum VmaAllocatorCreateFlagBits { For more details, see the documentation of the VK_EXT_memory_priority extension. */ VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT = 0x00000040, + /** + Enables usage of VK_KHR_maintenance4 extension in the library. + + You may set this flag only if you found available and enabled this device extension, + while creating Vulkan device passed as VmaAllocatorCreateInfo::device. + */ + VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT = 0x00000080, + /** + Enables usage of VK_KHR_maintenance5 extension in the library. + + You should set this flag if you found available and enabled this device extension, + while creating Vulkan device passed as VmaAllocatorCreateInfo::device. + */ + VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT = 0x00000100, VMA_ALLOCATOR_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VmaAllocatorCreateFlagBits; +/// See #VmaAllocatorCreateFlagBits. typedef VkFlags VmaAllocatorCreateFlags; +/** @} */ + +/** +\addtogroup group_alloc +@{ +*/ + +/// \brief Intended usage of the allocated memory. +typedef enum VmaMemoryUsage +{ + /** No intended memory usage specified. + Use other members of VmaAllocationCreateInfo to specify your requirements. + */ + VMA_MEMORY_USAGE_UNKNOWN = 0, + /** + \deprecated Obsolete, preserved for backward compatibility. + Prefers `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`. + */ + VMA_MEMORY_USAGE_GPU_ONLY = 1, + /** + \deprecated Obsolete, preserved for backward compatibility. + Guarantees `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` and `VK_MEMORY_PROPERTY_HOST_COHERENT_BIT`. + */ + VMA_MEMORY_USAGE_CPU_ONLY = 2, + /** + \deprecated Obsolete, preserved for backward compatibility. + Guarantees `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT`, prefers `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`. + */ + VMA_MEMORY_USAGE_CPU_TO_GPU = 3, + /** + \deprecated Obsolete, preserved for backward compatibility. + Guarantees `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT`, prefers `VK_MEMORY_PROPERTY_HOST_CACHED_BIT`. + */ + VMA_MEMORY_USAGE_GPU_TO_CPU = 4, + /** + \deprecated Obsolete, preserved for backward compatibility. + Prefers not `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`. + */ + VMA_MEMORY_USAGE_CPU_COPY = 5, + /** + Lazily allocated GPU memory having `VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT`. + Exists mostly on mobile platforms. Using it on desktop PC or other GPUs with no such memory type present will fail the allocation. + + Usage: Memory for transient attachment images (color attachments, depth attachments etc.), created with `VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT`. + + Allocations with this usage are always created as dedicated - it implies #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. + */ + VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED = 6, + /** + Selects best memory type automatically. + This flag is recommended for most common use cases. + + When using this flag, if you want to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT), + you must pass one of the flags: #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT + in VmaAllocationCreateInfo::flags. + + It can be used only with functions that let the library know `VkBufferCreateInfo` or `VkImageCreateInfo`, e.g. + vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo(), vmaFindMemoryTypeIndexForImageInfo() + and not with generic memory allocation functions. + */ + VMA_MEMORY_USAGE_AUTO = 7, + /** + Selects best memory type automatically with preference for GPU (device) memory. + + When using this flag, if you want to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT), + you must pass one of the flags: #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT + in VmaAllocationCreateInfo::flags. + + It can be used only with functions that let the library know `VkBufferCreateInfo` or `VkImageCreateInfo`, e.g. + vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo(), vmaFindMemoryTypeIndexForImageInfo() + and not with generic memory allocation functions. + */ + VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE = 8, + /** + Selects best memory type automatically with preference for CPU (host) memory. + + When using this flag, if you want to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT), + you must pass one of the flags: #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT + in VmaAllocationCreateInfo::flags. + + It can be used only with functions that let the library know `VkBufferCreateInfo` or `VkImageCreateInfo`, e.g. + vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo(), vmaFindMemoryTypeIndexForImageInfo() + and not with generic memory allocation functions. + */ + VMA_MEMORY_USAGE_AUTO_PREFER_HOST = 9, + + VMA_MEMORY_USAGE_MAX_ENUM = 0x7FFFFFFF +} VmaMemoryUsage; + +/// Flags to be passed as VmaAllocationCreateInfo::flags. +typedef enum VmaAllocationCreateFlagBits +{ + /** \brief Set this flag if the allocation should have its own memory block. + + Use it for special, big resources, like fullscreen images used as attachments. + + If you use this flag while creating a buffer or an image, `VkMemoryDedicatedAllocateInfo` + structure is applied if possible. + */ + VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT = 0x00000001, + + /** \brief Set this flag to only try to allocate from existing `VkDeviceMemory` blocks and never create new such block. + + If new allocation cannot be placed in any of the existing blocks, allocation + fails with `VK_ERROR_OUT_OF_DEVICE_MEMORY` error. + + You should not use #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT and + #VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT at the same time. It makes no sense. + */ + VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT = 0x00000002, + /** \brief Set this flag to use a memory that will be persistently mapped and retrieve pointer to it. + + Pointer to mapped memory will be returned through VmaAllocationInfo::pMappedData. + + It is valid to use this flag for allocation made from memory type that is not + `HOST_VISIBLE`. This flag is then ignored and memory is not mapped. This is + useful if you need an allocation that is efficient to use on GPU + (`DEVICE_LOCAL`) and still want to map it directly if possible on platforms that + support it (e.g. Intel GPU). + */ + VMA_ALLOCATION_CREATE_MAPPED_BIT = 0x00000004, + /** \deprecated Preserved for backward compatibility. Consider using vmaSetAllocationName() instead. + + Set this flag to treat VmaAllocationCreateInfo::pUserData as pointer to a + null-terminated string. Instead of copying pointer value, a local copy of the + string is made and stored in allocation's `pName`. The string is automatically + freed together with the allocation. It is also used in vmaBuildStatsString(). + */ + VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT = 0x00000020, + /** Allocation will be created from upper stack in a double stack pool. + + This flag is only allowed for custom pools created with #VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT flag. + */ + VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT = 0x00000040, + /** Create both buffer/image and allocation, but don't bind them together. + It is useful when you want to bind yourself to do some more advanced binding, e.g. using some extensions. + The flag is meaningful only with functions that bind by default: vmaCreateBuffer(), vmaCreateImage(). + Otherwise it is ignored. + + If you want to make sure the new buffer/image is not tied to the new memory allocation + through `VkMemoryDedicatedAllocateInfoKHR` structure in case the allocation ends up in its own memory block, + use also flag #VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT. + */ + VMA_ALLOCATION_CREATE_DONT_BIND_BIT = 0x00000080, + /** Create allocation only if additional device memory required for it, if any, won't exceed + memory budget. Otherwise return `VK_ERROR_OUT_OF_DEVICE_MEMORY`. + */ + VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT = 0x00000100, + /** \brief Set this flag if the allocated memory will have aliasing resources. + + Usage of this flag prevents supplying `VkMemoryDedicatedAllocateInfoKHR` when #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT is specified. + Otherwise created dedicated memory will not be suitable for aliasing resources, resulting in Vulkan Validation Layer errors. + */ + VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT = 0x00000200, + /** + Requests possibility to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT). + + - If you use #VMA_MEMORY_USAGE_AUTO or other `VMA_MEMORY_USAGE_AUTO*` value, + you must use this flag to be able to map the allocation. Otherwise, mapping is incorrect. + - If you use other value of #VmaMemoryUsage, this flag is ignored and mapping is always possible in memory types that are `HOST_VISIBLE`. + This includes allocations created in \ref custom_memory_pools. + + Declares that mapped memory will only be written sequentially, e.g. using `memcpy()` or a loop writing number-by-number, + never read or accessed randomly, so a memory type can be selected that is uncached and write-combined. + + \warning Violating this declaration may work correctly, but will likely be very slow. + Watch out for implicit reads introduced by doing e.g. `pMappedData[i] += x;` + Better prepare your data in a local variable and `memcpy()` it to the mapped pointer all at once. + */ + VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT = 0x00000400, + /** + Requests possibility to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT). + + - If you use #VMA_MEMORY_USAGE_AUTO or other `VMA_MEMORY_USAGE_AUTO*` value, + you must use this flag to be able to map the allocation. Otherwise, mapping is incorrect. + - If you use other value of #VmaMemoryUsage, this flag is ignored and mapping is always possible in memory types that are `HOST_VISIBLE`. + This includes allocations created in \ref custom_memory_pools. + + Declares that mapped memory can be read, written, and accessed in random order, + so a `HOST_CACHED` memory type is preferred. + */ + VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT = 0x00000800, + /** + Together with #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT, + it says that despite request for host access, a not-`HOST_VISIBLE` memory type can be selected + if it may improve performance. + + By using this flag, you declare that you will check if the allocation ended up in a `HOST_VISIBLE` memory type + (e.g. using vmaGetAllocationMemoryProperties()) and if not, you will create some "staging" buffer and + issue an explicit transfer to write/read your data. + To prepare for this possibility, don't forget to add appropriate flags like + `VK_BUFFER_USAGE_TRANSFER_DST_BIT`, `VK_BUFFER_USAGE_TRANSFER_SRC_BIT` to the parameters of created buffer or image. + */ + VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT = 0x00001000, + /** Allocation strategy that chooses smallest possible free range for the allocation + to minimize memory usage and fragmentation, possibly at the expense of allocation time. + */ + VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT = 0x00010000, + /** Allocation strategy that chooses first suitable free range for the allocation - + not necessarily in terms of the smallest offset but the one that is easiest and fastest to find + to minimize allocation time, possibly at the expense of allocation quality. + */ + VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT = 0x00020000, + /** Allocation strategy that chooses always the lowest offset in available space. + This is not the most efficient strategy but achieves highly packed data. + Used internally by defragmentation, not recommended in typical usage. + */ + VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT = 0x00040000, + /** Alias to #VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT. + */ + VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT, + /** Alias to #VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT. + */ + VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT, + /** A bit mask to extract only `STRATEGY` bits from entire set of flags. + */ + VMA_ALLOCATION_CREATE_STRATEGY_MASK = + VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT | + VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT | + VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, + + VMA_ALLOCATION_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VmaAllocationCreateFlagBits; +/// See #VmaAllocationCreateFlagBits. +typedef VkFlags VmaAllocationCreateFlags; + +/// Flags to be passed as VmaPoolCreateInfo::flags. +typedef enum VmaPoolCreateFlagBits +{ + /** \brief Use this flag if you always allocate only buffers and linear images or only optimal images out of this pool and so Buffer-Image Granularity can be ignored. + + This is an optional optimization flag. + + If you always allocate using vmaCreateBuffer(), vmaCreateImage(), + vmaAllocateMemoryForBuffer(), then you don't need to use it because allocator + knows exact type of your allocations so it can handle Buffer-Image Granularity + in the optimal way. + + If you also allocate using vmaAllocateMemoryForImage() or vmaAllocateMemory(), + exact type of such allocations is not known, so allocator must be conservative + in handling Buffer-Image Granularity, which can lead to suboptimal allocation + (wasted memory). In that case, if you can make sure you always allocate only + buffers and linear images or only optimal images out of this pool, use this flag + to make allocator disregard Buffer-Image Granularity and so make allocations + faster and more optimal. + */ + VMA_POOL_CREATE_IGNORE_BUFFER_IMAGE_GRANULARITY_BIT = 0x00000002, + + /** \brief Enables alternative, linear allocation algorithm in this pool. + + Specify this flag to enable linear allocation algorithm, which always creates + new allocations after last one and doesn't reuse space from allocations freed in + between. It trades memory consumption for simplified algorithm and data + structure, which has better performance and uses less memory for metadata. + + By using this flag, you can achieve behavior of free-at-once, stack, + ring buffer, and double stack. + For details, see documentation chapter \ref linear_algorithm. + */ + VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT = 0x00000004, + + /** Bit mask to extract only `ALGORITHM` bits from entire set of flags. + */ + VMA_POOL_CREATE_ALGORITHM_MASK = + VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT, + + VMA_POOL_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VmaPoolCreateFlagBits; +/// Flags to be passed as VmaPoolCreateInfo::flags. See #VmaPoolCreateFlagBits. +typedef VkFlags VmaPoolCreateFlags; + +/// Flags to be passed as VmaDefragmentationInfo::flags. +typedef enum VmaDefragmentationFlagBits +{ + /* \brief Use simple but fast algorithm for defragmentation. + May not achieve best results but will require least time to compute and least allocations to copy. + */ + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT = 0x1, + /* \brief Default defragmentation algorithm, applied also when no `ALGORITHM` flag is specified. + Offers a balance between defragmentation quality and the amount of allocations and bytes that need to be moved. + */ + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT = 0x2, + /* \brief Perform full defragmentation of memory. + Can result in notably more time to compute and allocations to copy, but will achieve best memory packing. + */ + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT = 0x4, + /** \brief Use the most roboust algorithm at the cost of time to compute and number of copies to make. + Only available when bufferImageGranularity is greater than 1, since it aims to reduce + alignment issues between different types of resources. + Otherwise falls back to same behavior as #VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT. + */ + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT = 0x8, + + /// A bit mask to extract only `ALGORITHM` bits from entire set of flags. + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_MASK = + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT | + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT | + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT | + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT, + + VMA_DEFRAGMENTATION_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VmaDefragmentationFlagBits; +/// See #VmaDefragmentationFlagBits. +typedef VkFlags VmaDefragmentationFlags; + +/// Operation performed on single defragmentation move. See structure #VmaDefragmentationMove. +typedef enum VmaDefragmentationMoveOperation +{ + /// Buffer/image has been recreated at `dstTmpAllocation`, data has been copied, old buffer/image has been destroyed. `srcAllocation` should be changed to point to the new place. This is the default value set by vmaBeginDefragmentationPass(). + VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY = 0, + /// Set this value if you cannot move the allocation. New place reserved at `dstTmpAllocation` will be freed. `srcAllocation` will remain unchanged. + VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE = 1, + /// Set this value if you decide to abandon the allocation and you destroyed the buffer/image. New place reserved at `dstTmpAllocation` will be freed, along with `srcAllocation`, which will be destroyed. + VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY = 2, +} VmaDefragmentationMoveOperation; + +/** @} */ + +/** +\addtogroup group_virtual +@{ +*/ + +/// Flags to be passed as VmaVirtualBlockCreateInfo::flags. +typedef enum VmaVirtualBlockCreateFlagBits +{ + /** \brief Enables alternative, linear allocation algorithm in this virtual block. + + Specify this flag to enable linear allocation algorithm, which always creates + new allocations after last one and doesn't reuse space from allocations freed in + between. It trades memory consumption for simplified algorithm and data + structure, which has better performance and uses less memory for metadata. + + By using this flag, you can achieve behavior of free-at-once, stack, + ring buffer, and double stack. + For details, see documentation chapter \ref linear_algorithm. + */ + VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT = 0x00000001, + + /** \brief Bit mask to extract only `ALGORITHM` bits from entire set of flags. + */ + VMA_VIRTUAL_BLOCK_CREATE_ALGORITHM_MASK = + VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT, + + VMA_VIRTUAL_BLOCK_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VmaVirtualBlockCreateFlagBits; +/// Flags to be passed as VmaVirtualBlockCreateInfo::flags. See #VmaVirtualBlockCreateFlagBits. +typedef VkFlags VmaVirtualBlockCreateFlags; + +/// Flags to be passed as VmaVirtualAllocationCreateInfo::flags. +typedef enum VmaVirtualAllocationCreateFlagBits +{ + /** \brief Allocation will be created from upper stack in a double stack pool. + + This flag is only allowed for virtual blocks created with #VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT flag. + */ + VMA_VIRTUAL_ALLOCATION_CREATE_UPPER_ADDRESS_BIT = VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT, + /** \brief Allocation strategy that tries to minimize memory usage. + */ + VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT, + /** \brief Allocation strategy that tries to minimize allocation time. + */ + VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT, + /** Allocation strategy that chooses always the lowest offset in available space. + This is not the most efficient strategy but achieves highly packed data. + */ + VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, + /** \brief A bit mask to extract only `STRATEGY` bits from entire set of flags. + + These strategy flags are binary compatible with equivalent flags in #VmaAllocationCreateFlagBits. + */ + VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MASK = VMA_ALLOCATION_CREATE_STRATEGY_MASK, + + VMA_VIRTUAL_ALLOCATION_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VmaVirtualAllocationCreateFlagBits; +/// Flags to be passed as VmaVirtualAllocationCreateInfo::flags. See #VmaVirtualAllocationCreateFlagBits. +typedef VkFlags VmaVirtualAllocationCreateFlags; + +/** @} */ + +#endif // _VMA_ENUM_DECLARATIONS + +#ifndef _VMA_DATA_TYPES_DECLARATIONS + +/** +\addtogroup group_init +@{ */ + +/** \struct VmaAllocator +\brief Represents main object of this library initialized. + +Fill structure #VmaAllocatorCreateInfo and call function vmaCreateAllocator() to create it. +Call function vmaDestroyAllocator() to destroy it. + +It is recommended to create just one object of this type per `VkDevice` object, +right after Vulkan is initialized and keep it alive until before Vulkan device is destroyed. +*/ +VK_DEFINE_HANDLE(VmaAllocator) + +/** @} */ + +/** +\addtogroup group_alloc +@{ +*/ + +/** \struct VmaPool +\brief Represents custom memory pool + +Fill structure VmaPoolCreateInfo and call function vmaCreatePool() to create it. +Call function vmaDestroyPool() to destroy it. + +For more information see [Custom memory pools](@ref choosing_memory_type_custom_memory_pools). +*/ +VK_DEFINE_HANDLE(VmaPool) + +/** \struct VmaAllocation +\brief Represents single memory allocation. + +It may be either dedicated block of `VkDeviceMemory` or a specific region of a bigger block of this type +plus unique offset. + +There are multiple ways to create such object. +You need to fill structure VmaAllocationCreateInfo. +For more information see [Choosing memory type](@ref choosing_memory_type). + +Although the library provides convenience functions that create Vulkan buffer or image, +allocate memory for it and bind them together, +binding of the allocation to a buffer or an image is out of scope of the allocation itself. +Allocation object can exist without buffer/image bound, +binding can be done manually by the user, and destruction of it can be done +independently of destruction of the allocation. + +The object also remembers its size and some other information. +To retrieve this information, use function vmaGetAllocationInfo() and inspect +returned structure VmaAllocationInfo. +*/ +VK_DEFINE_HANDLE(VmaAllocation) + +/** \struct VmaDefragmentationContext +\brief An opaque object that represents started defragmentation process. + +Fill structure #VmaDefragmentationInfo and call function vmaBeginDefragmentation() to create it. +Call function vmaEndDefragmentation() to destroy it. +*/ +VK_DEFINE_HANDLE(VmaDefragmentationContext) + +/** @} */ + +/** +\addtogroup group_virtual +@{ +*/ + +/** \struct VmaVirtualAllocation +\brief Represents single memory allocation done inside VmaVirtualBlock. + +Use it as a unique identifier to virtual allocation within the single block. + +Use value `VK_NULL_HANDLE` to represent a null/invalid allocation. +*/ +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VmaVirtualAllocation) + +/** @} */ + +/** +\addtogroup group_virtual +@{ +*/ + +/** \struct VmaVirtualBlock +\brief Handle to a virtual block object that allows to use core allocation algorithm without allocating any real GPU memory. + +Fill in #VmaVirtualBlockCreateInfo structure and use vmaCreateVirtualBlock() to create it. Use vmaDestroyVirtualBlock() to destroy it. +For more information, see documentation chapter \ref virtual_allocator. + +This object is not thread-safe - should not be used from multiple threads simultaneously, must be synchronized externally. +*/ +VK_DEFINE_HANDLE(VmaVirtualBlock) + +/** @} */ + +/** +\addtogroup group_init +@{ +*/ + +/// Callback function called after successful vkAllocateMemory. +typedef void (VKAPI_PTR* PFN_vmaAllocateDeviceMemoryFunction)( + VmaAllocator VMA_NOT_NULL allocator, + uint32_t memoryType, + VkDeviceMemory VMA_NOT_NULL_NON_DISPATCHABLE memory, + VkDeviceSize size, + void* VMA_NULLABLE pUserData); + +/// Callback function called before vkFreeMemory. +typedef void (VKAPI_PTR* PFN_vmaFreeDeviceMemoryFunction)( + VmaAllocator VMA_NOT_NULL allocator, + uint32_t memoryType, + VkDeviceMemory VMA_NOT_NULL_NON_DISPATCHABLE memory, + VkDeviceSize size, + void* VMA_NULLABLE pUserData); + +/** \brief Set of callbacks that the library will call for `vkAllocateMemory` and `vkFreeMemory`. + +Provided for informative purpose, e.g. to gather statistics about number of +allocations or total amount of memory allocated in Vulkan. + +Used in VmaAllocatorCreateInfo::pDeviceMemoryCallbacks. +*/ +typedef struct VmaDeviceMemoryCallbacks +{ + /// Optional, can be null. + PFN_vmaAllocateDeviceMemoryFunction VMA_NULLABLE pfnAllocate; + /// Optional, can be null. + PFN_vmaFreeDeviceMemoryFunction VMA_NULLABLE pfnFree; + /// Optional, can be null. + void* VMA_NULLABLE pUserData; +} VmaDeviceMemoryCallbacks; + /** \brief Pointers to some Vulkan functions - a subset used by the library. Used in VmaAllocatorCreateInfo::pVulkanFunctions. */ -typedef struct VmaVulkanFunctions { +typedef struct VmaVulkanFunctions +{ + /// Required when using VMA_DYNAMIC_VULKAN_FUNCTIONS. + PFN_vkGetInstanceProcAddr VMA_NULLABLE vkGetInstanceProcAddr; + /// Required when using VMA_DYNAMIC_VULKAN_FUNCTIONS. + PFN_vkGetDeviceProcAddr VMA_NULLABLE vkGetDeviceProcAddr; PFN_vkGetPhysicalDeviceProperties VMA_NULLABLE vkGetPhysicalDeviceProperties; PFN_vkGetPhysicalDeviceMemoryProperties VMA_NULLABLE vkGetPhysicalDeviceMemoryProperties; PFN_vkAllocateMemory VMA_NULLABLE vkAllocateMemory; @@ -2370,46 +1012,29 @@ typedef struct VmaVulkanFunctions { PFN_vkDestroyImage VMA_NULLABLE vkDestroyImage; PFN_vkCmdCopyBuffer VMA_NULLABLE vkCmdCopyBuffer; #if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + /// Fetch "vkGetBufferMemoryRequirements2" on Vulkan >= 1.1, fetch "vkGetBufferMemoryRequirements2KHR" when using VK_KHR_dedicated_allocation extension. PFN_vkGetBufferMemoryRequirements2KHR VMA_NULLABLE vkGetBufferMemoryRequirements2KHR; + /// Fetch "vkGetImageMemoryRequirements2" on Vulkan >= 1.1, fetch "vkGetImageMemoryRequirements2KHR" when using VK_KHR_dedicated_allocation extension. PFN_vkGetImageMemoryRequirements2KHR VMA_NULLABLE vkGetImageMemoryRequirements2KHR; #endif #if VMA_BIND_MEMORY2 || VMA_VULKAN_VERSION >= 1001000 + /// Fetch "vkBindBufferMemory2" on Vulkan >= 1.1, fetch "vkBindBufferMemory2KHR" when using VK_KHR_bind_memory2 extension. PFN_vkBindBufferMemory2KHR VMA_NULLABLE vkBindBufferMemory2KHR; + /// Fetch "vkBindImageMemory2" on Vulkan >= 1.1, fetch "vkBindImageMemory2KHR" when using VK_KHR_bind_memory2 extension. PFN_vkBindImageMemory2KHR VMA_NULLABLE vkBindImageMemory2KHR; #endif #if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000 + /// Fetch from "vkGetPhysicalDeviceMemoryProperties2" on Vulkan >= 1.1, but you can also fetch it from "vkGetPhysicalDeviceMemoryProperties2KHR" if you enabled extension VK_KHR_get_physical_device_properties2. PFN_vkGetPhysicalDeviceMemoryProperties2KHR VMA_NULLABLE vkGetPhysicalDeviceMemoryProperties2KHR; #endif +#if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 + /// Fetch from "vkGetDeviceBufferMemoryRequirements" on Vulkan >= 1.3, but you can also fetch it from "vkGetDeviceBufferMemoryRequirementsKHR" if you enabled extension VK_KHR_maintenance4. + PFN_vkGetDeviceBufferMemoryRequirementsKHR VMA_NULLABLE vkGetDeviceBufferMemoryRequirements; + /// Fetch from "vkGetDeviceImageMemoryRequirements" on Vulkan >= 1.3, but you can also fetch it from "vkGetDeviceImageMemoryRequirementsKHR" if you enabled extension VK_KHR_maintenance4. + PFN_vkGetDeviceImageMemoryRequirementsKHR VMA_NULLABLE vkGetDeviceImageMemoryRequirements; +#endif } VmaVulkanFunctions; -/// Flags to be used in VmaRecordSettings::flags. -typedef enum VmaRecordFlagBits { - /** \brief Enables flush after recording every function call. - - Enable it if you expect your application to crash, which may leave recording file truncated. - It may degrade performance though. - */ - VMA_RECORD_FLUSH_AFTER_CALL_BIT = 0x00000001, - - VMA_RECORD_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF -} VmaRecordFlagBits; -typedef VkFlags VmaRecordFlags; - -/// Parameters for recording calls to VMA functions. To be used in VmaAllocatorCreateInfo::pRecordSettings. -typedef struct VmaRecordSettings -{ - /// Flags for recording. Use #VmaRecordFlagBits enum. - VmaRecordFlags flags; - /** \brief Path to the file that should be written by the recording. - - Suggested extension: "csv". - If the file already exists, it will be overwritten. - It will be opened for the whole time #VmaAllocator object is alive. - If opening this file fails, creation of the whole allocator object fails. - */ - const char* VMA_NOT_NULL pFilePath; -} VmaRecordSettings; - /// Description of a Allocator to be created. typedef struct VmaAllocatorCreateInfo { @@ -2430,20 +1055,6 @@ typedef struct VmaAllocatorCreateInfo /// Informative callbacks for `vkAllocateMemory`, `vkFreeMemory`. Optional. /** Optional, can be null. */ const VmaDeviceMemoryCallbacks* VMA_NULLABLE pDeviceMemoryCallbacks; - /** \brief Maximum number of additional frames that are in use at the same time as current frame. - - This value is used only when you make allocations with - VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT flag. Such allocation cannot become - lost if allocation.lastUseFrameIndex >= allocator.currentFrameIndex - frameInUseCount. - - For example, if you double-buffer your command buffers, so resources used for - rendering in previous frame may still be in use by the GPU at the moment you - allocate resources needed for the current frame, set this value to 1. - - If you want to allow any allocations other than used in the current frame to - become lost, set this value to 0. - */ - uint32_t frameInUseCount; /** \brief Either null or a pointer to an array of limits on maximum number of bytes that can be allocated out of particular Vulkan memory heap. If not NULL, it must be a pointer to an array of @@ -2475,40 +1086,37 @@ typedef struct VmaAllocatorCreateInfo For details see [Pointers to Vulkan functions](@ref config_Vulkan_functions). */ const VmaVulkanFunctions* VMA_NULLABLE pVulkanFunctions; - /** \brief Parameters for recording of VMA calls. Can be null. - - If not null, it enables recording of calls to VMA functions to a file. - If support for recording is not enabled using `VMA_RECORDING_ENABLED` macro, - creation of the allocator object fails with `VK_ERROR_FEATURE_NOT_PRESENT`. - */ - const VmaRecordSettings* VMA_NULLABLE pRecordSettings; /** \brief Handle to Vulkan instance object. Starting from version 3.0.0 this member is no longer optional, it must be set! */ VkInstance VMA_NOT_NULL instance; - /** \brief Optional. The highest version of Vulkan that the application is designed to use. + /** \brief Optional. Vulkan version that the application uses. It must be a value in the format as created by macro `VK_MAKE_VERSION` or a constant like: `VK_API_VERSION_1_1`, `VK_API_VERSION_1_0`. The patch version number specified is ignored. Only the major and minor versions are considered. - It must be less or equal (preferably equal) to value as passed to `vkCreateInstance` as `VkApplicationInfo::apiVersion`. - Only versions 1.0, 1.1, 1.2 are supported by the current implementation. + Only versions 1.0, 1.1, 1.2, 1.3 are supported by the current implementation. Leaving it initialized to zero is equivalent to `VK_API_VERSION_1_0`. + It must match the Vulkan version used by the application and supported on the selected physical device, + so it must be no higher than `VkApplicationInfo::apiVersion` passed to `vkCreateInstance` + and no higher than `VkPhysicalDeviceProperties::apiVersion` found on the physical device used. */ uint32_t vulkanApiVersion; +#if VMA_EXTERNAL_MEMORY + /** \brief Either null or a pointer to an array of external memory handle types for each Vulkan memory type. + + If not NULL, it must be a pointer to an array of `VkPhysicalDeviceMemoryProperties::memoryTypeCount` + elements, defining external memory handle types of particular Vulkan memory type, + to be passed using `VkExportMemoryAllocateInfoKHR`. + + Any of the elements may be equal to 0, which means not to use `VkExportMemoryAllocateInfoKHR` on this memory type. + This is also the default in case of `pTypeExternalMemoryHandleTypes` = NULL. + */ + const VkExternalMemoryHandleTypeFlagsKHR* VMA_NULLABLE VMA_LEN_IF_NOT_NULL("VkPhysicalDeviceMemoryProperties::memoryTypeCount") pTypeExternalMemoryHandleTypes; +#endif // #if VMA_EXTERNAL_MEMORY } VmaAllocatorCreateInfo; -/// Creates Allocator object. -VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAllocator( - const VmaAllocatorCreateInfo* VMA_NOT_NULL pCreateInfo, - VmaAllocator VMA_NULLABLE * VMA_NOT_NULL pAllocator); - -/// Destroys allocator object. -VMA_CALL_PRE void VMA_CALL_POST vmaDestroyAllocator( - VmaAllocator VMA_NULLABLE allocator); - -/** \brief Information about existing #VmaAllocator object. -*/ +/// Information about existing #VmaAllocator object. typedef struct VmaAllocatorInfo { /** \brief Handle to Vulkan instance object. @@ -2528,355 +1136,129 @@ typedef struct VmaAllocatorInfo VkDevice VMA_NOT_NULL device; } VmaAllocatorInfo; -/** \brief Returns information about existing #VmaAllocator object - handle to Vulkan device etc. - -It might be useful if you want to keep just the #VmaAllocator handle and fetch other required handles to -`VkPhysicalDevice`, `VkDevice` etc. every time using this function. -*/ -VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocatorInfo(VmaAllocator VMA_NOT_NULL allocator, VmaAllocatorInfo* VMA_NOT_NULL pAllocatorInfo); +/** @} */ /** -PhysicalDeviceProperties are fetched from physicalDevice by the allocator. -You can access it here, without fetching it again on your own. +\addtogroup group_stats +@{ */ -VMA_CALL_PRE void VMA_CALL_POST vmaGetPhysicalDeviceProperties( - VmaAllocator VMA_NOT_NULL allocator, - const VkPhysicalDeviceProperties* VMA_NULLABLE * VMA_NOT_NULL ppPhysicalDeviceProperties); -/** -PhysicalDeviceMemoryProperties are fetched from physicalDevice by the allocator. -You can access it here, without fetching it again on your own. +/** \brief Calculated statistics of memory usage e.g. in a specific memory type, heap, custom pool, or total. + +These are fast to calculate. +See functions: vmaGetHeapBudgets(), vmaGetPoolStatistics(). */ -VMA_CALL_PRE void VMA_CALL_POST vmaGetMemoryProperties( - VmaAllocator VMA_NOT_NULL allocator, - const VkPhysicalDeviceMemoryProperties* VMA_NULLABLE * VMA_NOT_NULL ppPhysicalDeviceMemoryProperties); - -/** -\brief Given Memory Type Index, returns Property Flags of this memory type. - -This is just a convenience function. Same information can be obtained using -vmaGetMemoryProperties(). -*/ -VMA_CALL_PRE void VMA_CALL_POST vmaGetMemoryTypeProperties( - VmaAllocator VMA_NOT_NULL allocator, - uint32_t memoryTypeIndex, - VkMemoryPropertyFlags* VMA_NOT_NULL pFlags); - -/** \brief Sets index of the current frame. - -This function must be used if you make allocations with -#VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT and -#VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT flags to inform the allocator -when a new frame begins. Allocations queried using vmaGetAllocationInfo() cannot -become lost in the current frame. -*/ -VMA_CALL_PRE void VMA_CALL_POST vmaSetCurrentFrameIndex( - VmaAllocator VMA_NOT_NULL allocator, - uint32_t frameIndex); - -/** \brief Calculated statistics of memory usage in entire allocator. -*/ -typedef struct VmaStatInfo +typedef struct VmaStatistics { - /// Number of `VkDeviceMemory` Vulkan memory blocks allocated. + /** \brief Number of `VkDeviceMemory` objects - Vulkan memory blocks allocated. + */ uint32_t blockCount; - /// Number of #VmaAllocation allocation objects allocated. + /** \brief Number of #VmaAllocation objects allocated. + + Dedicated allocations have their own blocks, so each one adds 1 to `allocationCount` as well as `blockCount`. + */ uint32_t allocationCount; + /** \brief Number of bytes allocated in `VkDeviceMemory` blocks. + + \note To avoid confusion, please be aware that what Vulkan calls an "allocation" - a whole `VkDeviceMemory` object + (e.g. as in `VkPhysicalDeviceLimits::maxMemoryAllocationCount`) is called a "block" in VMA, while VMA calls + "allocation" a #VmaAllocation object that represents a memory region sub-allocated from such block, usually for a single buffer or image. + */ + VkDeviceSize blockBytes; + /** \brief Total number of bytes occupied by all #VmaAllocation objects. + + Always less or equal than `blockBytes`. + Difference `(blockBytes - allocationBytes)` is the amount of memory allocated from Vulkan + but unused by any #VmaAllocation. + */ + VkDeviceSize allocationBytes; +} VmaStatistics; + +/** \brief More detailed statistics than #VmaStatistics. + +These are slower to calculate. Use for debugging purposes. +See functions: vmaCalculateStatistics(), vmaCalculatePoolStatistics(). + +Previous version of the statistics API provided averages, but they have been removed +because they can be easily calculated as: + +\code +VkDeviceSize allocationSizeAvg = detailedStats.statistics.allocationBytes / detailedStats.statistics.allocationCount; +VkDeviceSize unusedBytes = detailedStats.statistics.blockBytes - detailedStats.statistics.allocationBytes; +VkDeviceSize unusedRangeSizeAvg = unusedBytes / detailedStats.unusedRangeCount; +\endcode +*/ +typedef struct VmaDetailedStatistics +{ + /// Basic statistics. + VmaStatistics statistics; /// Number of free ranges of memory between allocations. uint32_t unusedRangeCount; - /// Total number of bytes occupied by all allocations. - VkDeviceSize usedBytes; - /// Total number of bytes occupied by unused ranges. - VkDeviceSize unusedBytes; - VkDeviceSize allocationSizeMin, allocationSizeAvg, allocationSizeMax; - VkDeviceSize unusedRangeSizeMin, unusedRangeSizeAvg, unusedRangeSizeMax; -} VmaStatInfo; + /// Smallest allocation size. `VK_WHOLE_SIZE` if there are 0 allocations. + VkDeviceSize allocationSizeMin; + /// Largest allocation size. 0 if there are 0 allocations. + VkDeviceSize allocationSizeMax; + /// Smallest empty range size. `VK_WHOLE_SIZE` if there are 0 empty ranges. + VkDeviceSize unusedRangeSizeMin; + /// Largest empty range size. 0 if there are 0 empty ranges. + VkDeviceSize unusedRangeSizeMax; +} VmaDetailedStatistics; -/// General statistics from current state of Allocator. -typedef struct VmaStats -{ - VmaStatInfo memoryType[VK_MAX_MEMORY_TYPES]; - VmaStatInfo memoryHeap[VK_MAX_MEMORY_HEAPS]; - VmaStatInfo total; -} VmaStats; +/** \brief General statistics from current state of the Allocator - +total memory usage across all memory heaps and types. -/** \brief Retrieves statistics from current state of the Allocator. - -This function is called "calculate" not "get" because it has to traverse all -internal data structures, so it may be quite slow. For faster but more brief statistics -suitable to be called every frame or every allocation, use vmaGetBudget(). - -Note that when using allocator from multiple threads, returned information may immediately -become outdated. +These are slower to calculate. Use for debugging purposes. +See function vmaCalculateStatistics(). */ -VMA_CALL_PRE void VMA_CALL_POST vmaCalculateStats( - VmaAllocator VMA_NOT_NULL allocator, - VmaStats* VMA_NOT_NULL pStats); +typedef struct VmaTotalStatistics +{ + VmaDetailedStatistics memoryType[VK_MAX_MEMORY_TYPES]; + VmaDetailedStatistics memoryHeap[VK_MAX_MEMORY_HEAPS]; + VmaDetailedStatistics total; +} VmaTotalStatistics; -/** \brief Statistics of current memory usage and available budget, in bytes, for specific memory heap. +/** \brief Statistics of current memory usage and available budget for a specific memory heap. + +These are fast to calculate. +See function vmaGetHeapBudgets(). */ typedef struct VmaBudget { - /** \brief Sum size of all `VkDeviceMemory` blocks allocated from particular heap, in bytes. + /** \brief Statistics fetched from the library. */ - VkDeviceSize blockBytes; - - /** \brief Sum size of all allocations created in particular heap, in bytes. - - Usually less or equal than `blockBytes`. - Difference `blockBytes - allocationBytes` is the amount of memory allocated but unused - - available for new allocations or wasted due to fragmentation. - - It might be greater than `blockBytes` if there are some allocations in lost state, as they account - to this value as well. - */ - VkDeviceSize allocationBytes; - + VmaStatistics statistics; /** \brief Estimated current memory usage of the program, in bytes. - Fetched from system using `VK_EXT_memory_budget` extension if enabled. + Fetched from system using VK_EXT_memory_budget extension if enabled. - It might be different than `blockBytes` (usually higher) due to additional implicit objects + It might be different than `statistics.blockBytes` (usually higher) due to additional implicit objects also occupying the memory, like swapchain, pipelines, descriptor heaps, command buffers, or `VkDeviceMemory` blocks allocated outside of this library, if any. */ VkDeviceSize usage; - /** \brief Estimated amount of memory available to the program, in bytes. - Fetched from system using `VK_EXT_memory_budget` extension if enabled. + Fetched from system using VK_EXT_memory_budget extension if enabled. It might be different (most probably smaller) than `VkMemoryHeap::size[heapIndex]` due to factors - external to the program, like other programs also consuming system resources. + external to the program, decided by the operating system. Difference `budget - usage` is the amount of additional memory that can probably be allocated without problems. Exceeding the budget may result in various problems. */ VkDeviceSize budget; } VmaBudget; -/** \brief Retrieves information about current memory budget for all memory heaps. +/** @} */ -\param[out] pBudget Must point to array with number of elements at least equal to number of memory heaps in physical device used. - -This function is called "get" not "calculate" because it is very fast, suitable to be called -every frame or every allocation. For more detailed statistics use vmaCalculateStats(). - -Note that when using allocator from multiple threads, returned information may immediately -become outdated. +/** +\addtogroup group_alloc +@{ */ -VMA_CALL_PRE void VMA_CALL_POST vmaGetBudget( - VmaAllocator VMA_NOT_NULL allocator, - VmaBudget* VMA_NOT_NULL pBudget); -#ifndef VMA_STATS_STRING_ENABLED -#define VMA_STATS_STRING_ENABLED 1 -#endif +/** \brief Parameters of new #VmaAllocation. -#if VMA_STATS_STRING_ENABLED - -/// Builds and returns statistics as string in JSON format. -/** @param[out] ppStatsString Must be freed using vmaFreeStatsString() function. +To be used with functions like vmaCreateBuffer(), vmaCreateImage(), and many others. */ -VMA_CALL_PRE void VMA_CALL_POST vmaBuildStatsString( - VmaAllocator VMA_NOT_NULL allocator, - char* VMA_NULLABLE * VMA_NOT_NULL ppStatsString, - VkBool32 detailedMap); - -VMA_CALL_PRE void VMA_CALL_POST vmaFreeStatsString( - VmaAllocator VMA_NOT_NULL allocator, - char* VMA_NULLABLE pStatsString); - -#endif // #if VMA_STATS_STRING_ENABLED - -/** \struct VmaPool -\brief Represents custom memory pool - -Fill structure VmaPoolCreateInfo and call function vmaCreatePool() to create it. -Call function vmaDestroyPool() to destroy it. - -For more information see [Custom memory pools](@ref choosing_memory_type_custom_memory_pools). -*/ -VK_DEFINE_HANDLE(VmaPool) - -typedef enum VmaMemoryUsage -{ - /** No intended memory usage specified. - Use other members of VmaAllocationCreateInfo to specify your requirements. - */ - VMA_MEMORY_USAGE_UNKNOWN = 0, - /** Memory will be used on device only, so fast access from the device is preferred. - It usually means device-local GPU (video) memory. - No need to be mappable on host. - It is roughly equivalent of `D3D12_HEAP_TYPE_DEFAULT`. - - Usage: - - - Resources written and read by device, e.g. images used as attachments. - - Resources transferred from host once (immutable) or infrequently and read by - device multiple times, e.g. textures to be sampled, vertex buffers, uniform - (constant) buffers, and majority of other types of resources used on GPU. - - Allocation may still end up in `HOST_VISIBLE` memory on some implementations. - In such case, you are free to map it. - You can use #VMA_ALLOCATION_CREATE_MAPPED_BIT with this usage type. - */ - VMA_MEMORY_USAGE_GPU_ONLY = 1, - /** Memory will be mappable on host. - It usually means CPU (system) memory. - Guarantees to be `HOST_VISIBLE` and `HOST_COHERENT`. - CPU access is typically uncached. Writes may be write-combined. - Resources created in this pool may still be accessible to the device, but access to them can be slow. - It is roughly equivalent of `D3D12_HEAP_TYPE_UPLOAD`. - - Usage: Staging copy of resources used as transfer source. - */ - VMA_MEMORY_USAGE_CPU_ONLY = 2, - /** - Memory that is both mappable on host (guarantees to be `HOST_VISIBLE`) and preferably fast to access by GPU. - CPU access is typically uncached. Writes may be write-combined. - - Usage: Resources written frequently by host (dynamic), read by device. E.g. textures (with LINEAR layout), vertex buffers, uniform buffers updated every frame or every draw call. - */ - VMA_MEMORY_USAGE_CPU_TO_GPU = 3, - /** Memory mappable on host (guarantees to be `HOST_VISIBLE`) and cached. - It is roughly equivalent of `D3D12_HEAP_TYPE_READBACK`. - - Usage: - - - Resources written by device, read by host - results of some computations, e.g. screen capture, average scene luminance for HDR tone mapping. - - Any resources read or accessed randomly on host, e.g. CPU-side copy of vertex buffer used as source of transfer, but also used for collision detection. - */ - VMA_MEMORY_USAGE_GPU_TO_CPU = 4, - /** CPU memory - memory that is preferably not `DEVICE_LOCAL`, but also not guaranteed to be `HOST_VISIBLE`. - - Usage: Staging copy of resources moved from GPU memory to CPU memory as part - of custom paging/residency mechanism, to be moved back to GPU memory when needed. - */ - VMA_MEMORY_USAGE_CPU_COPY = 5, - /** Lazily allocated GPU memory having `VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT`. - Exists mostly on mobile platforms. Using it on desktop PC or other GPUs with no such memory type present will fail the allocation. - - Usage: Memory for transient attachment images (color attachments, depth attachments etc.), created with `VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT`. - - Allocations with this usage are always created as dedicated - it implies #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. - */ - VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED = 6, - - VMA_MEMORY_USAGE_MAX_ENUM = 0x7FFFFFFF -} VmaMemoryUsage; - -/// Flags to be passed as VmaAllocationCreateInfo::flags. -typedef enum VmaAllocationCreateFlagBits { - /** \brief Set this flag if the allocation should have its own memory block. - - Use it for special, big resources, like fullscreen images used as attachments. - - You should not use this flag if VmaAllocationCreateInfo::pool is not null. - */ - VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT = 0x00000001, - - /** \brief Set this flag to only try to allocate from existing `VkDeviceMemory` blocks and never create new such block. - - If new allocation cannot be placed in any of the existing blocks, allocation - fails with `VK_ERROR_OUT_OF_DEVICE_MEMORY` error. - - You should not use #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT and - #VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT at the same time. It makes no sense. - - If VmaAllocationCreateInfo::pool is not null, this flag is implied and ignored. */ - VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT = 0x00000002, - /** \brief Set this flag to use a memory that will be persistently mapped and retrieve pointer to it. - - Pointer to mapped memory will be returned through VmaAllocationInfo::pMappedData. - - It is valid to use this flag for allocation made from memory type that is not - `HOST_VISIBLE`. This flag is then ignored and memory is not mapped. This is - useful if you need an allocation that is efficient to use on GPU - (`DEVICE_LOCAL`) and still want to map it directly if possible on platforms that - support it (e.g. Intel GPU). - - You should not use this flag together with #VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT. - */ - VMA_ALLOCATION_CREATE_MAPPED_BIT = 0x00000004, - /** Allocation created with this flag can become lost as a result of another - allocation with #VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT flag, so you - must check it before use. - - To check if allocation is not lost, call vmaGetAllocationInfo() and check if - VmaAllocationInfo::deviceMemory is not `VK_NULL_HANDLE`. - - For details about supporting lost allocations, see Lost Allocations - chapter of User Guide on Main Page. - - You should not use this flag together with #VMA_ALLOCATION_CREATE_MAPPED_BIT. - */ - VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT = 0x00000008, - /** While creating allocation using this flag, other allocations that were - created with flag #VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT can become lost. - - For details about supporting lost allocations, see Lost Allocations - chapter of User Guide on Main Page. - */ - VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT = 0x00000010, - /** Set this flag to treat VmaAllocationCreateInfo::pUserData as pointer to a - null-terminated string. Instead of copying pointer value, a local copy of the - string is made and stored in allocation's `pUserData`. The string is automatically - freed together with the allocation. It is also used in vmaBuildStatsString(). - */ - VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT = 0x00000020, - /** Allocation will be created from upper stack in a double stack pool. - - This flag is only allowed for custom pools created with #VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT flag. - */ - VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT = 0x00000040, - /** Create both buffer/image and allocation, but don't bind them together. - It is useful when you want to bind yourself to do some more advanced binding, e.g. using some extensions. - The flag is meaningful only with functions that bind by default: vmaCreateBuffer(), vmaCreateImage(). - Otherwise it is ignored. - */ - VMA_ALLOCATION_CREATE_DONT_BIND_BIT = 0x00000080, - /** Create allocation only if additional device memory required for it, if any, won't exceed - memory budget. Otherwise return `VK_ERROR_OUT_OF_DEVICE_MEMORY`. - */ - VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT = 0x00000100, - - /** Allocation strategy that chooses smallest possible free range for the - allocation. - */ - VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT = 0x00010000, - /** Allocation strategy that chooses biggest possible free range for the - allocation. - */ - VMA_ALLOCATION_CREATE_STRATEGY_WORST_FIT_BIT = 0x00020000, - /** Allocation strategy that chooses first suitable free range for the - allocation. - - "First" doesn't necessarily means the one with smallest offset in memory, - but rather the one that is easiest and fastest to find. - */ - VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT = 0x00040000, - - /** Allocation strategy that tries to minimize memory usage. - */ - VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT = VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT, - /** Allocation strategy that tries to minimize allocation time. - */ - VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT = VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT, - /** Allocation strategy that tries to minimize memory fragmentation. - */ - VMA_ALLOCATION_CREATE_STRATEGY_MIN_FRAGMENTATION_BIT = VMA_ALLOCATION_CREATE_STRATEGY_WORST_FIT_BIT, - - /** A bit mask to extract only `STRATEGY` bits from entire set of flags. - */ - VMA_ALLOCATION_CREATE_STRATEGY_MASK = - VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT | - VMA_ALLOCATION_CREATE_STRATEGY_WORST_FIT_BIT | - VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT, - - VMA_ALLOCATION_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF -} VmaAllocationCreateFlagBits; -typedef VkFlags VmaAllocationCreateFlags; - typedef struct VmaAllocationCreateInfo { /// Use #VmaAllocationCreateFlagBits enum. @@ -2919,7 +1301,7 @@ typedef struct VmaAllocationCreateInfo */ void* VMA_NULLABLE pUserData; /** \brief A floating-point value between 0 and 1, indicating the priority of the allocation relative to other memory allocations. - + It is used only when #VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT flag was used during creation of the #VmaAllocator object and this allocation ends up as dedicated or is explicitly forced as dedicated using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. Otherwise, it has the priority of a memory block where it is placed and this variable is ignored. @@ -2927,6 +1309,432 @@ typedef struct VmaAllocationCreateInfo float priority; } VmaAllocationCreateInfo; +/// Describes parameter of created #VmaPool. +typedef struct VmaPoolCreateInfo +{ + /** \brief Vulkan memory type index to allocate this pool from. + */ + uint32_t memoryTypeIndex; + /** \brief Use combination of #VmaPoolCreateFlagBits. + */ + VmaPoolCreateFlags flags; + /** \brief Size of a single `VkDeviceMemory` block to be allocated as part of this pool, in bytes. Optional. + + Specify nonzero to set explicit, constant size of memory blocks used by this + pool. + + Leave 0 to use default and let the library manage block sizes automatically. + Sizes of particular blocks may vary. + In this case, the pool will also support dedicated allocations. + */ + VkDeviceSize blockSize; + /** \brief Minimum number of blocks to be always allocated in this pool, even if they stay empty. + + Set to 0 to have no preallocated blocks and allow the pool be completely empty. + */ + size_t minBlockCount; + /** \brief Maximum number of blocks that can be allocated in this pool. Optional. + + Set to 0 to use default, which is `SIZE_MAX`, which means no limit. + + Set to same value as VmaPoolCreateInfo::minBlockCount to have fixed amount of memory allocated + throughout whole lifetime of this pool. + */ + size_t maxBlockCount; + /** \brief A floating-point value between 0 and 1, indicating the priority of the allocations in this pool relative to other memory allocations. + + It is used only when #VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT flag was used during creation of the #VmaAllocator object. + Otherwise, this variable is ignored. + */ + float priority; + /** \brief Additional minimum alignment to be used for all allocations created from this pool. Can be 0. + + Leave 0 (default) not to impose any additional alignment. If not 0, it must be a power of two. + It can be useful in cases where alignment returned by Vulkan by functions like `vkGetBufferMemoryRequirements` is not enough, + e.g. when doing interop with OpenGL. + */ + VkDeviceSize minAllocationAlignment; + /** \brief Additional `pNext` chain to be attached to `VkMemoryAllocateInfo` used for every allocation made by this pool. Optional. + + Optional, can be null. If not null, it must point to a `pNext` chain of structures that can be attached to `VkMemoryAllocateInfo`. + It can be useful for special needs such as adding `VkExportMemoryAllocateInfoKHR`. + Structures pointed by this member must remain alive and unchanged for the whole lifetime of the custom pool. + + Please note that some structures, e.g. `VkMemoryPriorityAllocateInfoEXT`, `VkMemoryDedicatedAllocateInfoKHR`, + can be attached automatically by this library when using other, more convenient of its features. + */ + void* VMA_NULLABLE VMA_EXTENDS_VK_STRUCT(VkMemoryAllocateInfo) pMemoryAllocateNext; +} VmaPoolCreateInfo; + +/** @} */ + +/** +\addtogroup group_alloc +@{ +*/ + +/** +Parameters of #VmaAllocation objects, that can be retrieved using function vmaGetAllocationInfo(). + +There is also an extended version of this structure that carries additional parameters: #VmaAllocationInfo2. +*/ +typedef struct VmaAllocationInfo +{ + /** \brief Memory type index that this allocation was allocated from. + + It never changes. + */ + uint32_t memoryType; + /** \brief Handle to Vulkan memory object. + + Same memory object can be shared by multiple allocations. + + It can change after the allocation is moved during \ref defragmentation. + */ + VkDeviceMemory VMA_NULLABLE_NON_DISPATCHABLE deviceMemory; + /** \brief Offset in `VkDeviceMemory` object to the beginning of this allocation, in bytes. `(deviceMemory, offset)` pair is unique to this allocation. + + You usually don't need to use this offset. If you create a buffer or an image together with the allocation using e.g. function + vmaCreateBuffer(), vmaCreateImage(), functions that operate on these resources refer to the beginning of the buffer or image, + not entire device memory block. Functions like vmaMapMemory(), vmaBindBufferMemory() also refer to the beginning of the allocation + and apply this offset automatically. + + It can change after the allocation is moved during \ref defragmentation. + */ + VkDeviceSize offset; + /** \brief Size of this allocation, in bytes. + + It never changes. + + \note Allocation size returned in this variable may be greater than the size + requested for the resource e.g. as `VkBufferCreateInfo::size`. Whole size of the + allocation is accessible for operations on memory e.g. using a pointer after + mapping with vmaMapMemory(), but operations on the resource e.g. using + `vkCmdCopyBuffer` must be limited to the size of the resource. + */ + VkDeviceSize size; + /** \brief Pointer to the beginning of this allocation as mapped data. + + If the allocation hasn't been mapped using vmaMapMemory() and hasn't been + created with #VMA_ALLOCATION_CREATE_MAPPED_BIT flag, this value is null. + + It can change after call to vmaMapMemory(), vmaUnmapMemory(). + It can also change after the allocation is moved during \ref defragmentation. + */ + void* VMA_NULLABLE pMappedData; + /** \brief Custom general-purpose pointer that was passed as VmaAllocationCreateInfo::pUserData or set using vmaSetAllocationUserData(). + + It can change after call to vmaSetAllocationUserData() for this allocation. + */ + void* VMA_NULLABLE pUserData; + /** \brief Custom allocation name that was set with vmaSetAllocationName(). + + It can change after call to vmaSetAllocationName() for this allocation. + + Another way to set custom name is to pass it in VmaAllocationCreateInfo::pUserData with + additional flag #VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT set [DEPRECATED]. + */ + const char* VMA_NULLABLE pName; +} VmaAllocationInfo; + +/// Extended parameters of a #VmaAllocation object that can be retrieved using function vmaGetAllocationInfo2(). +typedef struct VmaAllocationInfo2 +{ + /** \brief Basic parameters of the allocation. + + If you need only these, you can use function vmaGetAllocationInfo() and structure #VmaAllocationInfo instead. + */ + VmaAllocationInfo allocationInfo; + /** \brief Size of the `VkDeviceMemory` block that the allocation belongs to. + + In case of an allocation with dedicated memory, it will be equal to `allocationInfo.size`. + */ + VkDeviceSize blockSize; + /** \brief `VK_TRUE` if the allocation has dedicated memory, `VK_FALSE` if it was placed as part of a larger memory block. + + When `VK_TRUE`, it also means `VkMemoryDedicatedAllocateInfo` was used when creating the allocation + (if VK_KHR_dedicated_allocation extension or Vulkan version >= 1.1 is enabled). + */ + VkBool32 dedicatedMemory; +} VmaAllocationInfo2; + +/** Callback function called during vmaBeginDefragmentation() to check custom criterion about ending current defragmentation pass. + +Should return true if the defragmentation needs to stop current pass. +*/ +typedef VkBool32 (VKAPI_PTR* PFN_vmaCheckDefragmentationBreakFunction)(void* VMA_NULLABLE pUserData); + +/** \brief Parameters for defragmentation. + +To be used with function vmaBeginDefragmentation(). +*/ +typedef struct VmaDefragmentationInfo +{ + /// \brief Use combination of #VmaDefragmentationFlagBits. + VmaDefragmentationFlags flags; + /** \brief Custom pool to be defragmented. + + If null then default pools will undergo defragmentation process. + */ + VmaPool VMA_NULLABLE pool; + /** \brief Maximum numbers of bytes that can be copied during single pass, while moving allocations to different places. + + `0` means no limit. + */ + VkDeviceSize maxBytesPerPass; + /** \brief Maximum number of allocations that can be moved during single pass to a different place. + + `0` means no limit. + */ + uint32_t maxAllocationsPerPass; + /** \brief Optional custom callback for stopping vmaBeginDefragmentation(). + + Have to return true for breaking current defragmentation pass. + */ + PFN_vmaCheckDefragmentationBreakFunction VMA_NULLABLE pfnBreakCallback; + /// \brief Optional data to pass to custom callback for stopping pass of defragmentation. + void* VMA_NULLABLE pBreakCallbackUserData; +} VmaDefragmentationInfo; + +/// Single move of an allocation to be done for defragmentation. +typedef struct VmaDefragmentationMove +{ + /// Operation to be performed on the allocation by vmaEndDefragmentationPass(). Default value is #VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY. You can modify it. + VmaDefragmentationMoveOperation operation; + /// Allocation that should be moved. + VmaAllocation VMA_NOT_NULL srcAllocation; + /** \brief Temporary allocation pointing to destination memory that will replace `srcAllocation`. + + \warning Do not store this allocation in your data structures! It exists only temporarily, for the duration of the defragmentation pass, + to be used for binding new buffer/image to the destination memory using e.g. vmaBindBufferMemory(). + vmaEndDefragmentationPass() will destroy it and make `srcAllocation` point to this memory. + */ + VmaAllocation VMA_NOT_NULL dstTmpAllocation; +} VmaDefragmentationMove; + +/** \brief Parameters for incremental defragmentation steps. + +To be used with function vmaBeginDefragmentationPass(). +*/ +typedef struct VmaDefragmentationPassMoveInfo +{ + /// Number of elements in the `pMoves` array. + uint32_t moveCount; + /** \brief Array of moves to be performed by the user in the current defragmentation pass. + + Pointer to an array of `moveCount` elements, owned by VMA, created in vmaBeginDefragmentationPass(), destroyed in vmaEndDefragmentationPass(). + + For each element, you should: + + 1. Create a new buffer/image in the place pointed by VmaDefragmentationMove::dstMemory + VmaDefragmentationMove::dstOffset. + 2. Copy data from the VmaDefragmentationMove::srcAllocation e.g. using `vkCmdCopyBuffer`, `vkCmdCopyImage`. + 3. Make sure these commands finished executing on the GPU. + 4. Destroy the old buffer/image. + + Only then you can finish defragmentation pass by calling vmaEndDefragmentationPass(). + After this call, the allocation will point to the new place in memory. + + Alternatively, if you cannot move specific allocation, you can set VmaDefragmentationMove::operation to #VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE. + + Alternatively, if you decide you want to completely remove the allocation: + + 1. Destroy its buffer/image. + 2. Set VmaDefragmentationMove::operation to #VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY. + + Then, after vmaEndDefragmentationPass() the allocation will be freed. + */ + VmaDefragmentationMove* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(moveCount) pMoves; +} VmaDefragmentationPassMoveInfo; + +/// Statistics returned for defragmentation process in function vmaEndDefragmentation(). +typedef struct VmaDefragmentationStats +{ + /// Total number of bytes that have been copied while moving allocations to different places. + VkDeviceSize bytesMoved; + /// Total number of bytes that have been released to the system by freeing empty `VkDeviceMemory` objects. + VkDeviceSize bytesFreed; + /// Number of allocations that have been moved to different places. + uint32_t allocationsMoved; + /// Number of empty `VkDeviceMemory` objects that have been released to the system. + uint32_t deviceMemoryBlocksFreed; +} VmaDefragmentationStats; + +/** @} */ + +/** +\addtogroup group_virtual +@{ +*/ + +/// Parameters of created #VmaVirtualBlock object to be passed to vmaCreateVirtualBlock(). +typedef struct VmaVirtualBlockCreateInfo +{ + /** \brief Total size of the virtual block. + + Sizes can be expressed in bytes or any units you want as long as you are consistent in using them. + For example, if you allocate from some array of structures, 1 can mean single instance of entire structure. + */ + VkDeviceSize size; + + /** \brief Use combination of #VmaVirtualBlockCreateFlagBits. + */ + VmaVirtualBlockCreateFlags flags; + + /** \brief Custom CPU memory allocation callbacks. Optional. + + Optional, can be null. When specified, they will be used for all CPU-side memory allocations. + */ + const VkAllocationCallbacks* VMA_NULLABLE pAllocationCallbacks; +} VmaVirtualBlockCreateInfo; + +/// Parameters of created virtual allocation to be passed to vmaVirtualAllocate(). +typedef struct VmaVirtualAllocationCreateInfo +{ + /** \brief Size of the allocation. + + Cannot be zero. + */ + VkDeviceSize size; + /** \brief Required alignment of the allocation. Optional. + + Must be power of two. Special value 0 has the same meaning as 1 - means no special alignment is required, so allocation can start at any offset. + */ + VkDeviceSize alignment; + /** \brief Use combination of #VmaVirtualAllocationCreateFlagBits. + */ + VmaVirtualAllocationCreateFlags flags; + /** \brief Custom pointer to be associated with the allocation. Optional. + + It can be any value and can be used for user-defined purposes. It can be fetched or changed later. + */ + void* VMA_NULLABLE pUserData; +} VmaVirtualAllocationCreateInfo; + +/// Parameters of an existing virtual allocation, returned by vmaGetVirtualAllocationInfo(). +typedef struct VmaVirtualAllocationInfo +{ + /** \brief Offset of the allocation. + + Offset at which the allocation was made. + */ + VkDeviceSize offset; + /** \brief Size of the allocation. + + Same value as passed in VmaVirtualAllocationCreateInfo::size. + */ + VkDeviceSize size; + /** \brief Custom pointer associated with the allocation. + + Same value as passed in VmaVirtualAllocationCreateInfo::pUserData or to vmaSetVirtualAllocationUserData(). + */ + void* VMA_NULLABLE pUserData; +} VmaVirtualAllocationInfo; + +/** @} */ + +#endif // _VMA_DATA_TYPES_DECLARATIONS + +#ifndef _VMA_FUNCTION_HEADERS + +/** +\addtogroup group_init +@{ +*/ + +/// Creates #VmaAllocator object. +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAllocator( + const VmaAllocatorCreateInfo* VMA_NOT_NULL pCreateInfo, + VmaAllocator VMA_NULLABLE* VMA_NOT_NULL pAllocator); + +/// Destroys allocator object. +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyAllocator( + VmaAllocator VMA_NULLABLE allocator); + +/** \brief Returns information about existing #VmaAllocator object - handle to Vulkan device etc. + +It might be useful if you want to keep just the #VmaAllocator handle and fetch other required handles to +`VkPhysicalDevice`, `VkDevice` etc. every time using this function. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocatorInfo( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocatorInfo* VMA_NOT_NULL pAllocatorInfo); + +/** +PhysicalDeviceProperties are fetched from physicalDevice by the allocator. +You can access it here, without fetching it again on your own. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetPhysicalDeviceProperties( + VmaAllocator VMA_NOT_NULL allocator, + const VkPhysicalDeviceProperties* VMA_NULLABLE* VMA_NOT_NULL ppPhysicalDeviceProperties); + +/** +PhysicalDeviceMemoryProperties are fetched from physicalDevice by the allocator. +You can access it here, without fetching it again on your own. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetMemoryProperties( + VmaAllocator VMA_NOT_NULL allocator, + const VkPhysicalDeviceMemoryProperties* VMA_NULLABLE* VMA_NOT_NULL ppPhysicalDeviceMemoryProperties); + +/** +\brief Given Memory Type Index, returns Property Flags of this memory type. + +This is just a convenience function. Same information can be obtained using +vmaGetMemoryProperties(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetMemoryTypeProperties( + VmaAllocator VMA_NOT_NULL allocator, + uint32_t memoryTypeIndex, + VkMemoryPropertyFlags* VMA_NOT_NULL pFlags); + +/** \brief Sets index of the current frame. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaSetCurrentFrameIndex( + VmaAllocator VMA_NOT_NULL allocator, + uint32_t frameIndex); + +/** @} */ + +/** +\addtogroup group_stats +@{ +*/ + +/** \brief Retrieves statistics from current state of the Allocator. + +This function is called "calculate" not "get" because it has to traverse all +internal data structures, so it may be quite slow. Use it for debugging purposes. +For faster but more brief statistics suitable to be called every frame or every allocation, +use vmaGetHeapBudgets(). + +Note that when using allocator from multiple threads, returned information may immediately +become outdated. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaCalculateStatistics( + VmaAllocator VMA_NOT_NULL allocator, + VmaTotalStatistics* VMA_NOT_NULL pStats); + +/** \brief Retrieves information about current memory usage and budget for all memory heaps. + +\param allocator +\param[out] pBudgets Must point to array with number of elements at least equal to number of memory heaps in physical device used. + +This function is called "get" not "calculate" because it is very fast, suitable to be called +every frame or every allocation. For more detailed statistics use vmaCalculateStatistics(). + +Note that when using allocator from multiple threads, returned information may immediately +become outdated. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetHeapBudgets( + VmaAllocator VMA_NOT_NULL allocator, + VmaBudget* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL("VkPhysicalDeviceMemoryProperties::memoryHeapCount") pBudgets); + +/** @} */ + +/** +\addtogroup group_alloc +@{ +*/ + /** \brief Helps to find memoryTypeIndex, given memoryTypeBits and VmaAllocationCreateInfo. @@ -2954,12 +1762,6 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndex( It can be useful e.g. to determine value to be used as VmaPoolCreateInfo::memoryTypeIndex. It internally creates a temporary, dummy buffer that never has memory bound. -It is just a convenience function, equivalent to calling: - -- `vkCreateBuffer` -- `vkGetBufferMemoryRequirements` -- `vmaFindMemoryTypeIndex` -- `vkDestroyBuffer` */ VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForBufferInfo( VmaAllocator VMA_NOT_NULL allocator, @@ -2972,12 +1774,6 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForBufferInfo( It can be useful e.g. to determine value to be used as VmaPoolCreateInfo::memoryTypeIndex. It internally creates a temporary, dummy image that never has memory bound. -It is just a convenience function, equivalent to calling: - -- `vkCreateImage` -- `vkGetImageMemoryRequirements` -- `vmaFindMemoryTypeIndex` -- `vkDestroyImage` */ VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForImageInfo( VmaAllocator VMA_NOT_NULL allocator, @@ -2985,155 +1781,16 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForImageInfo( const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo, uint32_t* VMA_NOT_NULL pMemoryTypeIndex); -/// Flags to be passed as VmaPoolCreateInfo::flags. -typedef enum VmaPoolCreateFlagBits { - /** \brief Use this flag if you always allocate only buffers and linear images or only optimal images out of this pool and so Buffer-Image Granularity can be ignored. - - This is an optional optimization flag. - - If you always allocate using vmaCreateBuffer(), vmaCreateImage(), - vmaAllocateMemoryForBuffer(), then you don't need to use it because allocator - knows exact type of your allocations so it can handle Buffer-Image Granularity - in the optimal way. - - If you also allocate using vmaAllocateMemoryForImage() or vmaAllocateMemory(), - exact type of such allocations is not known, so allocator must be conservative - in handling Buffer-Image Granularity, which can lead to suboptimal allocation - (wasted memory). In that case, if you can make sure you always allocate only - buffers and linear images or only optimal images out of this pool, use this flag - to make allocator disregard Buffer-Image Granularity and so make allocations - faster and more optimal. - */ - VMA_POOL_CREATE_IGNORE_BUFFER_IMAGE_GRANULARITY_BIT = 0x00000002, - - /** \brief Enables alternative, linear allocation algorithm in this pool. - - Specify this flag to enable linear allocation algorithm, which always creates - new allocations after last one and doesn't reuse space from allocations freed in - between. It trades memory consumption for simplified algorithm and data - structure, which has better performance and uses less memory for metadata. - - By using this flag, you can achieve behavior of free-at-once, stack, - ring buffer, and double stack. For details, see documentation chapter - \ref linear_algorithm. - - When using this flag, you must specify VmaPoolCreateInfo::maxBlockCount == 1 (or 0 for default). - - For more details, see [Linear allocation algorithm](@ref linear_algorithm). - */ - VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT = 0x00000004, - - /** \brief Enables alternative, buddy allocation algorithm in this pool. - - It operates on a tree of blocks, each having size that is a power of two and - a half of its parent's size. Comparing to default algorithm, this one provides - faster allocation and deallocation and decreased external fragmentation, - at the expense of more memory wasted (internal fragmentation). - - For more details, see [Buddy allocation algorithm](@ref buddy_algorithm). - */ - VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT = 0x00000008, - - /** Bit mask to extract only `ALGORITHM` bits from entire set of flags. - */ - VMA_POOL_CREATE_ALGORITHM_MASK = - VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT | - VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT, - - VMA_POOL_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF -} VmaPoolCreateFlagBits; -typedef VkFlags VmaPoolCreateFlags; - -/** \brief Describes parameter of created #VmaPool. -*/ -typedef struct VmaPoolCreateInfo { - /** \brief Vulkan memory type index to allocate this pool from. - */ - uint32_t memoryTypeIndex; - /** \brief Use combination of #VmaPoolCreateFlagBits. - */ - VmaPoolCreateFlags flags; - /** \brief Size of a single `VkDeviceMemory` block to be allocated as part of this pool, in bytes. Optional. - - Specify nonzero to set explicit, constant size of memory blocks used by this - pool. - - Leave 0 to use default and let the library manage block sizes automatically. - Sizes of particular blocks may vary. - */ - VkDeviceSize blockSize; - /** \brief Minimum number of blocks to be always allocated in this pool, even if they stay empty. - - Set to 0 to have no preallocated blocks and allow the pool be completely empty. - */ - size_t minBlockCount; - /** \brief Maximum number of blocks that can be allocated in this pool. Optional. - - Set to 0 to use default, which is `SIZE_MAX`, which means no limit. - - Set to same value as VmaPoolCreateInfo::minBlockCount to have fixed amount of memory allocated - throughout whole lifetime of this pool. - */ - size_t maxBlockCount; - /** \brief Maximum number of additional frames that are in use at the same time as current frame. - - This value is used only when you make allocations with - #VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT flag. Such allocation cannot become - lost if allocation.lastUseFrameIndex >= allocator.currentFrameIndex - frameInUseCount. - - For example, if you double-buffer your command buffers, so resources used for - rendering in previous frame may still be in use by the GPU at the moment you - allocate resources needed for the current frame, set this value to 1. - - If you want to allow any allocations other than used in the current frame to - become lost, set this value to 0. - */ - uint32_t frameInUseCount; - /** \brief A floating-point value between 0 and 1, indicating the priority of the allocations in this pool relative to other memory allocations. - - It is used only when #VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT flag was used during creation of the #VmaAllocator object. - Otherwise, this variable is ignored. - */ - float priority; -} VmaPoolCreateInfo; - -/** \brief Describes parameter of existing #VmaPool. -*/ -typedef struct VmaPoolStats { - /** \brief Total amount of `VkDeviceMemory` allocated from Vulkan for this pool, in bytes. - */ - VkDeviceSize size; - /** \brief Total number of bytes in the pool not used by any #VmaAllocation. - */ - VkDeviceSize unusedSize; - /** \brief Number of #VmaAllocation objects created from this pool that were not destroyed or lost. - */ - size_t allocationCount; - /** \brief Number of continuous memory ranges in the pool not used by any #VmaAllocation. - */ - size_t unusedRangeCount; - /** \brief Size of the largest continuous free memory region available for new allocation. - - Making a new allocation of that size is not guaranteed to succeed because of - possible additional margin required to respect alignment and buffer/image - granularity. - */ - VkDeviceSize unusedRangeSizeMax; - /** \brief Number of `VkDeviceMemory` blocks allocated for this pool. - */ - size_t blockCount; -} VmaPoolStats; - /** \brief Allocates Vulkan device memory and creates #VmaPool object. -@param allocator Allocator object. -@param pCreateInfo Parameters of pool to create. -@param[out] pPool Handle to created pool. +\param allocator Allocator object. +\param pCreateInfo Parameters of pool to create. +\param[out] pPool Handle to created pool. */ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreatePool( VmaAllocator VMA_NOT_NULL allocator, const VmaPoolCreateInfo* VMA_NOT_NULL pCreateInfo, - VmaPool VMA_NULLABLE * VMA_NOT_NULL pPool); + VmaPool VMA_NULLABLE* VMA_NOT_NULL pPool); /** \brief Destroys #VmaPool object and frees Vulkan device memory. */ @@ -3141,27 +1798,41 @@ VMA_CALL_PRE void VMA_CALL_POST vmaDestroyPool( VmaAllocator VMA_NOT_NULL allocator, VmaPool VMA_NULLABLE pool); +/** @} */ + +/** +\addtogroup group_stats +@{ +*/ + /** \brief Retrieves statistics of existing #VmaPool object. -@param allocator Allocator object. -@param pool Pool object. -@param[out] pPoolStats Statistics of specified pool. +\param allocator Allocator object. +\param pool Pool object. +\param[out] pPoolStats Statistics of specified pool. */ -VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolStats( +VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolStatistics( VmaAllocator VMA_NOT_NULL allocator, VmaPool VMA_NOT_NULL pool, - VmaPoolStats* VMA_NOT_NULL pPoolStats); + VmaStatistics* VMA_NOT_NULL pPoolStats); -/** \brief Marks all allocations in given pool as lost if they are not used in current frame or VmaPoolCreateInfo::frameInUseCount back from now. +/** \brief Retrieves detailed statistics of existing #VmaPool object. -@param allocator Allocator object. -@param pool Pool. -@param[out] pLostAllocationCount Number of allocations marked as lost. Optional - pass null if you don't need this information. +\param allocator Allocator object. +\param pool Pool object. +\param[out] pPoolStats Statistics of specified pool. */ -VMA_CALL_PRE void VMA_CALL_POST vmaMakePoolAllocationsLost( +VMA_CALL_PRE void VMA_CALL_POST vmaCalculatePoolStatistics( VmaAllocator VMA_NOT_NULL allocator, VmaPool VMA_NOT_NULL pool, - size_t* VMA_NULLABLE pLostAllocationCount); + VmaDetailedStatistics* VMA_NOT_NULL pPoolStats); + +/** @} */ + +/** +\addtogroup group_alloc +@{ +*/ /** \brief Checks magic number in margins around all allocations in given memory pool in search for corruptions. @@ -3173,11 +1844,13 @@ Possible return values: - `VK_ERROR_FEATURE_NOT_PRESENT` - corruption detection is not enabled for specified pool. - `VK_SUCCESS` - corruption detection has been performed and succeeded. -- `VK_ERROR_VALIDATION_FAILED_EXT` - corruption detection has been performed and found memory corruptions around one of the allocations. +- `VK_ERROR_UNKNOWN` - corruption detection has been performed and found memory corruptions around one of the allocations. `VMA_ASSERT` is also fired in that case. - Other value: Error returned by Vulkan, e.g. memory mapping failure. */ -VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckPoolCorruption(VmaAllocator VMA_NOT_NULL allocator, VmaPool VMA_NOT_NULL pool); +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckPoolCorruption( + VmaAllocator VMA_NOT_NULL allocator, + VmaPool VMA_NOT_NULL pool); /** \brief Retrieves name of a custom pool. @@ -3188,7 +1861,7 @@ destroyed or its name is changed using vmaSetPoolName(). VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolName( VmaAllocator VMA_NOT_NULL allocator, VmaPool VMA_NOT_NULL pool, - const char* VMA_NULLABLE * VMA_NOT_NULL ppName); + const char* VMA_NULLABLE* VMA_NOT_NULL ppName); /** \brief Sets name of a custom pool. @@ -3200,90 +1873,13 @@ VMA_CALL_PRE void VMA_CALL_POST vmaSetPoolName( VmaPool VMA_NOT_NULL pool, const char* VMA_NULLABLE pName); -/** \struct VmaAllocation -\brief Represents single memory allocation. - -It may be either dedicated block of `VkDeviceMemory` or a specific region of a bigger block of this type -plus unique offset. - -There are multiple ways to create such object. -You need to fill structure VmaAllocationCreateInfo. -For more information see [Choosing memory type](@ref choosing_memory_type). - -Although the library provides convenience functions that create Vulkan buffer or image, -allocate memory for it and bind them together, -binding of the allocation to a buffer or an image is out of scope of the allocation itself. -Allocation object can exist without buffer/image bound, -binding can be done manually by the user, and destruction of it can be done -independently of destruction of the allocation. - -The object also remembers its size and some other information. -To retrieve this information, use function vmaGetAllocationInfo() and inspect -returned structure VmaAllocationInfo. - -Some kinds allocations can be in lost state. -For more information, see [Lost allocations](@ref lost_allocations). -*/ -VK_DEFINE_HANDLE(VmaAllocation) - -/** \brief Parameters of #VmaAllocation objects, that can be retrieved using function vmaGetAllocationInfo(). -*/ -typedef struct VmaAllocationInfo { - /** \brief Memory type index that this allocation was allocated from. - - It never changes. - */ - uint32_t memoryType; - /** \brief Handle to Vulkan memory object. - - Same memory object can be shared by multiple allocations. - - It can change after call to vmaDefragment() if this allocation is passed to the function, or if allocation is lost. - - If the allocation is lost, it is equal to `VK_NULL_HANDLE`. - */ - VkDeviceMemory VMA_NULLABLE_NON_DISPATCHABLE deviceMemory; - /** \brief Offset in `VkDeviceMemory` object to the beginning of this allocation, in bytes. `(deviceMemory, offset)` pair is unique to this allocation. - - You usually don't need to use this offset. If you create a buffer or an image together with the allocation using e.g. function - vmaCreateBuffer(), vmaCreateImage(), functions that operate on these resources refer to the beginning of the buffer or image, - not entire device memory block. Functions like vmaMapMemory(), vmaBindBufferMemory() also refer to the beginning of the allocation - and apply this offset automatically. - - It can change after call to vmaDefragment() if this allocation is passed to the function, or if allocation is lost. - */ - VkDeviceSize offset; - /** \brief Size of this allocation, in bytes. - - It never changes, unless allocation is lost. - - \note Allocation size returned in this variable may be greater than the size - requested for the resource e.g. as `VkBufferCreateInfo::size`. Whole size of the - allocation is accessible for operations on memory e.g. using a pointer after - mapping with vmaMapMemory(), but operations on the resource e.g. using - `vkCmdCopyBuffer` must be limited to the size of the resource. - */ - VkDeviceSize size; - /** \brief Pointer to the beginning of this allocation as mapped data. - - If the allocation hasn't been mapped using vmaMapMemory() and hasn't been - created with #VMA_ALLOCATION_CREATE_MAPPED_BIT flag, this value is null. - - It can change after call to vmaMapMemory(), vmaUnmapMemory(). - It can also change after call to vmaDefragment() if this allocation is passed to the function. - */ - void* VMA_NULLABLE pMappedData; - /** \brief Custom general-purpose pointer that was passed as VmaAllocationCreateInfo::pUserData or set using vmaSetAllocationUserData(). - - It can change after call to vmaSetAllocationUserData() for this allocation. - */ - void* VMA_NULLABLE pUserData; -} VmaAllocationInfo; - /** \brief General purpose memory allocation. -@param[out] pAllocation Handle to allocated memory. -@param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo(). +\param allocator +\param pVkMemoryRequirements +\param pCreateInfo +\param[out] pAllocation Handle to allocated memory. +\param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo(). You should free the memory using vmaFreeMemory() or vmaFreeMemoryPages(). @@ -3294,17 +1890,17 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemory( VmaAllocator VMA_NOT_NULL allocator, const VkMemoryRequirements* VMA_NOT_NULL pVkMemoryRequirements, const VmaAllocationCreateInfo* VMA_NOT_NULL pCreateInfo, - VmaAllocation VMA_NULLABLE * VMA_NOT_NULL pAllocation, + VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation, VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); /** \brief General purpose memory allocation for multiple allocation objects at once. -@param allocator Allocator object. -@param pVkMemoryRequirements Memory requirements for each allocation. -@param pCreateInfo Creation parameters for each alloction. -@param allocationCount Number of allocations to make. -@param[out] pAllocations Pointer to array that will be filled with handles to created allocations. -@param[out] pAllocationInfo Optional. Pointer to array that will be filled with parameters of created allocations. +\param allocator Allocator object. +\param pVkMemoryRequirements Memory requirements for each allocation. +\param pCreateInfo Creation parameters for each allocation. +\param allocationCount Number of allocations to make. +\param[out] pAllocations Pointer to array that will be filled with handles to created allocations. +\param[out] pAllocationInfo Optional. Pointer to array that will be filled with parameters of created allocations. You should free the memory using vmaFreeMemory() or vmaFreeMemoryPages(). @@ -3321,28 +1917,49 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryPages( const VkMemoryRequirements* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pVkMemoryRequirements, const VmaAllocationCreateInfo* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pCreateInfo, size_t allocationCount, - VmaAllocation VMA_NULLABLE * VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pAllocations, + VmaAllocation VMA_NULLABLE* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pAllocations, VmaAllocationInfo* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) pAllocationInfo); -/** -@param[out] pAllocation Handle to allocated memory. -@param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo(). +/** \brief Allocates memory suitable for given `VkBuffer`. -You should free the memory using vmaFreeMemory(). +\param allocator +\param buffer +\param pCreateInfo +\param[out] pAllocation Handle to allocated memory. +\param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo(). + +It only creates #VmaAllocation. To bind the memory to the buffer, use vmaBindBufferMemory(). + +This is a special-purpose function. In most cases you should use vmaCreateBuffer(). + +You must free the allocation using vmaFreeMemory() when no longer needed. */ VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForBuffer( VmaAllocator VMA_NOT_NULL allocator, VkBuffer VMA_NOT_NULL_NON_DISPATCHABLE buffer, const VmaAllocationCreateInfo* VMA_NOT_NULL pCreateInfo, - VmaAllocation VMA_NULLABLE * VMA_NOT_NULL pAllocation, + VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation, VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); -/// Function similar to vmaAllocateMemoryForBuffer(). +/** \brief Allocates memory suitable for given `VkImage`. + +\param allocator +\param image +\param pCreateInfo +\param[out] pAllocation Handle to allocated memory. +\param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo(). + +It only creates #VmaAllocation. To bind the memory to the buffer, use vmaBindImageMemory(). + +This is a special-purpose function. In most cases you should use vmaCreateImage(). + +You must free the allocation using vmaFreeMemory() when no longer needed. +*/ VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForImage( VmaAllocator VMA_NOT_NULL allocator, VkImage VMA_NOT_NULL_NON_DISPATCHABLE image, const VmaAllocationCreateInfo* VMA_NOT_NULL pCreateInfo, - VmaAllocation VMA_NULLABLE * VMA_NOT_NULL pAllocation, + VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation, VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); /** \brief Frees memory previously allocated using vmaAllocateMemory(), vmaAllocateMemoryForBuffer(), or vmaAllocateMemoryForImage(). @@ -3366,70 +1983,42 @@ Passing `VK_NULL_HANDLE` as elements of `pAllocations` array is valid. Such entr VMA_CALL_PRE void VMA_CALL_POST vmaFreeMemoryPages( VmaAllocator VMA_NOT_NULL allocator, size_t allocationCount, - const VmaAllocation VMA_NULLABLE * VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pAllocations); + const VmaAllocation VMA_NULLABLE* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pAllocations); -/** \brief Deprecated. +/** \brief Returns current information about specified allocation. -\deprecated -In version 2.2.0 it used to try to change allocation's size without moving or reallocating it. -In current version it returns `VK_SUCCESS` only if `newSize` equals current allocation's size. -Otherwise returns `VK_ERROR_OUT_OF_POOL_MEMORY`, indicating that allocation's size could not be changed. -*/ -VMA_CALL_PRE VkResult VMA_CALL_POST vmaResizeAllocation( - VmaAllocator VMA_NOT_NULL allocator, - VmaAllocation VMA_NOT_NULL allocation, - VkDeviceSize newSize); +Current parameters of given allocation are returned in `pAllocationInfo`. -/** \brief Returns current information about specified allocation and atomically marks it as used in current frame. +Although this function doesn't lock any mutex, so it should be quite efficient, +you should avoid calling it too often. +You can retrieve same VmaAllocationInfo structure while creating your resource, from function +vmaCreateBuffer(), vmaCreateImage(). You can remember it if you are sure parameters don't change +(e.g. due to defragmentation). -Current paramteres of given allocation are returned in `pAllocationInfo`. - -This function also atomically "touches" allocation - marks it as used in current frame, -just like vmaTouchAllocation(). -If the allocation is in lost state, `pAllocationInfo->deviceMemory == VK_NULL_HANDLE`. - -Although this function uses atomics and doesn't lock any mutex, so it should be quite efficient, -you can avoid calling it too often. - -- You can retrieve same VmaAllocationInfo structure while creating your resource, from function - vmaCreateBuffer(), vmaCreateImage(). You can remember it if you are sure parameters don't change - (e.g. due to defragmentation or allocation becoming lost). -- If you just want to check if allocation is not lost, vmaTouchAllocation() will work faster. +There is also a new function vmaGetAllocationInfo2() that offers extended information +about the allocation, returned using new structure #VmaAllocationInfo2. */ VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo( VmaAllocator VMA_NOT_NULL allocator, VmaAllocation VMA_NOT_NULL allocation, VmaAllocationInfo* VMA_NOT_NULL pAllocationInfo); -/** \brief Returns `VK_TRUE` if allocation is not lost and atomically marks it as used in current frame. +/** \brief Returns extended information about specified allocation. -If the allocation has been created with #VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT flag, -this function returns `VK_TRUE` if it's not in lost state, so it can still be used. -It then also atomically "touches" the allocation - marks it as used in current frame, -so that you can be sure it won't become lost in current frame or next `frameInUseCount` frames. - -If the allocation is in lost state, the function returns `VK_FALSE`. -Memory of such allocation, as well as buffer or image bound to it, should not be used. -Lost allocation and the buffer/image still need to be destroyed. - -If the allocation has been created without #VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT flag, -this function always returns `VK_TRUE`. +Current parameters of given allocation are returned in `pAllocationInfo`. +Extended parameters in structure #VmaAllocationInfo2 include memory block size +and a flag telling whether the allocation has dedicated memory. +It can be useful e.g. for interop with OpenGL. */ -VMA_CALL_PRE VkBool32 VMA_CALL_POST vmaTouchAllocation( +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo2( VmaAllocator VMA_NOT_NULL allocator, - VmaAllocation VMA_NOT_NULL allocation); + VmaAllocation VMA_NOT_NULL allocation, + VmaAllocationInfo2* VMA_NOT_NULL pAllocationInfo); /** \brief Sets pUserData in given allocation to new value. -If the allocation was created with VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT, -pUserData must be either null, or pointer to a null-terminated string. The function -makes local copy of the string and sets it as allocation's `pUserData`. String -passed as pUserData doesn't need to be valid for whole lifetime of the allocation - -you can free it after this call. String previously pointed by allocation's -pUserData is freed from memory. - -If the flag was not used, the value of pointer `pUserData` is just copied to -allocation's `pUserData`. It is opaque, so you can use it however you want - e.g. +The value of pointer `pUserData` is copied to allocation's `pUserData`. +It is opaque, so you can use it however you want - e.g. as a pointer, ordinal number or some handle to you own data. */ VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationUserData( @@ -3437,27 +2026,40 @@ VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationUserData( VmaAllocation VMA_NOT_NULL allocation, void* VMA_NULLABLE pUserData); -/** \brief Creates new allocation that is in lost state from the beginning. +/** \brief Sets pName in given allocation to new value. -It can be useful if you need a dummy, non-null allocation. - -You still need to destroy created object using vmaFreeMemory(). - -Returned allocation is not tied to any specific memory pool or memory type and -not bound to any image or buffer. It has size = 0. It cannot be turned into -a real, non-empty allocation. +`pName` must be either null, or pointer to a null-terminated string. The function +makes local copy of the string and sets it as allocation's `pName`. String +passed as pName doesn't need to be valid for whole lifetime of the allocation - +you can free it after this call. String previously pointed by allocation's +`pName` is freed from memory. */ -VMA_CALL_PRE void VMA_CALL_POST vmaCreateLostAllocation( +VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationName( VmaAllocator VMA_NOT_NULL allocator, - VmaAllocation VMA_NULLABLE * VMA_NOT_NULL pAllocation); + VmaAllocation VMA_NOT_NULL allocation, + const char* VMA_NULLABLE pName); + +/** +\brief Given an allocation, returns Property Flags of its memory type. + +This is just a convenience function. Same information can be obtained using +vmaGetAllocationInfo() + vmaGetMemoryProperties(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationMemoryProperties( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkMemoryPropertyFlags* VMA_NOT_NULL pFlags); /** \brief Maps memory represented by given allocation and returns pointer to it. Maps memory represented by given allocation to make it accessible to CPU code. When succeeded, `*ppData` contains pointer to first byte of this memory. -If the allocation is part of bigger `VkDeviceMemory` block, the pointer is -correctly offseted to the beginning of region assigned to this particular -allocation. + +\warning +If the allocation is part of a bigger `VkDeviceMemory` block, returned pointer is +correctly offsetted to the beginning of region assigned to this particular allocation. +Unlike the result of `vkMapMemory`, it points to the allocation, not to the beginning of the whole block. +You should not add VmaAllocationInfo::offset to it! Mapping is internally reference-counted and synchronized, so despite raw Vulkan function `vkMapMemory()` cannot be used to map same block of `VkDeviceMemory` @@ -3481,10 +2083,6 @@ vmaMapMemory(). You must not call vmaUnmapMemory() additional time to free the This function fails when used on allocation made in memory type that is not `HOST_VISIBLE`. -This function always fails when called for allocation that was created with -#VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT flag. Such allocations cannot be -mapped. - This function doesn't automatically flush or invalidate caches. If the allocation is made from a memory types that is not `HOST_COHERENT`, you also need to use vmaInvalidateAllocation() / vmaFlushAllocation(), as required by Vulkan specification. @@ -3492,7 +2090,7 @@ you also need to use vmaInvalidateAllocation() / vmaFlushAllocation(), as requir VMA_CALL_PRE VkResult VMA_CALL_POST vmaMapMemory( VmaAllocator VMA_NOT_NULL allocator, VmaAllocation VMA_NOT_NULL allocation, - void* VMA_NULLABLE * VMA_NOT_NULL ppData); + void* VMA_NULLABLE* VMA_NOT_NULL ppData); /** \brief Unmaps memory represented by given allocation, mapped previously using vmaMapMemory(). @@ -3568,7 +2166,7 @@ For more information, see documentation of vmaFlushAllocation(). \param allocator \param allocationCount \param allocations -\param offsets If not null, it must point to an array of offsets of regions to flush, relative to the beginning of respective allocations. Null means all ofsets are zero. +\param offsets If not null, it must point to an array of offsets of regions to flush, relative to the beginning of respective allocations. Null means all offsets are zero. \param sizes If not null, it must point to an array of sizes of regions to flush in respective allocations. Null means `VK_WHOLE_SIZE` for all allocations. This function returns the `VkResult` from `vkFlushMappedMemoryRanges` if it is @@ -3577,7 +2175,7 @@ called, otherwise `VK_SUCCESS`. VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocations( VmaAllocator VMA_NOT_NULL allocator, uint32_t allocationCount, - const VmaAllocation VMA_NOT_NULL * VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) allocations, + const VmaAllocation VMA_NOT_NULL* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) allocations, const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) offsets, const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) sizes); @@ -3589,7 +2187,7 @@ For more information, see documentation of vmaInvalidateAllocation(). \param allocator \param allocationCount \param allocations -\param offsets If not null, it must point to an array of offsets of regions to flush, relative to the beginning of respective allocations. Null means all ofsets are zero. +\param offsets If not null, it must point to an array of offsets of regions to flush, relative to the beginning of respective allocations. Null means all offsets are zero. \param sizes If not null, it must point to an array of sizes of regions to flush in respective allocations. Null means `VK_WHOLE_SIZE` for all allocations. This function returns the `VkResult` from `vkInvalidateMappedMemoryRanges` if it is @@ -3598,13 +2196,69 @@ called, otherwise `VK_SUCCESS`. VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocations( VmaAllocator VMA_NOT_NULL allocator, uint32_t allocationCount, - const VmaAllocation VMA_NOT_NULL * VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) allocations, + const VmaAllocation VMA_NOT_NULL* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) allocations, const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) offsets, const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) sizes); +/** \brief Maps the allocation temporarily if needed, copies data from specified host pointer to it, and flushes the memory from the host caches if needed. + +\param allocator +\param pSrcHostPointer Pointer to the host data that become source of the copy. +\param dstAllocation Handle to the allocation that becomes destination of the copy. +\param dstAllocationLocalOffset Offset within `dstAllocation` where to write copied data, in bytes. +\param size Number of bytes to copy. + +This is a convenience function that allows to copy data from a host pointer to an allocation easily. +Same behavior can be achieved by calling vmaMapMemory(), `memcpy()`, vmaUnmapMemory(), vmaFlushAllocation(). + +This function can be called only for allocations created in a memory type that has `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` flag. +It can be ensured e.g. by using #VMA_MEMORY_USAGE_AUTO and #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or +#VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT. +Otherwise, the function will fail and generate a Validation Layers error. + +`dstAllocationLocalOffset` is relative to the contents of given `dstAllocation`. +If you mean whole allocation, you should pass 0. +Do not pass allocation's offset within device memory block this parameter! +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCopyMemoryToAllocation( + VmaAllocator VMA_NOT_NULL allocator, + const void* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(size) pSrcHostPointer, + VmaAllocation VMA_NOT_NULL dstAllocation, + VkDeviceSize dstAllocationLocalOffset, + VkDeviceSize size); + +/** \brief Invalidates memory in the host caches if needed, maps the allocation temporarily if needed, and copies data from it to a specified host pointer. + +\param allocator +\param srcAllocation Handle to the allocation that becomes source of the copy. +\param srcAllocationLocalOffset Offset within `srcAllocation` where to read copied data, in bytes. +\param pDstHostPointer Pointer to the host memory that become destination of the copy. +\param size Number of bytes to copy. + +This is a convenience function that allows to copy data from an allocation to a host pointer easily. +Same behavior can be achieved by calling vmaInvalidateAllocation(), vmaMapMemory(), `memcpy()`, vmaUnmapMemory(). + +This function should be called only for allocations created in a memory type that has `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` +and `VK_MEMORY_PROPERTY_HOST_CACHED_BIT` flag. +It can be ensured e.g. by using #VMA_MEMORY_USAGE_AUTO and #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT. +Otherwise, the function may fail and generate a Validation Layers error. +It may also work very slowly when reading from an uncached memory. + +`srcAllocationLocalOffset` is relative to the contents of given `srcAllocation`. +If you mean whole allocation, you should pass 0. +Do not pass allocation's offset within device memory block as this parameter! +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCopyAllocationToMemory( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL srcAllocation, + VkDeviceSize srcAllocationLocalOffset, + void* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(size) pDstHostPointer, + VkDeviceSize size); + /** \brief Checks magic number in margins around all allocations in given memory types (in both default and custom pools) in search for corruptions. -@param memoryTypeBits Bit mask, where each bit set means that a memory type with that index should be checked. +\param allocator +\param memoryTypeBits Bit mask, where each bit set means that a memory type with that index should be checked. Corruption detection is enabled only when `VMA_DEBUG_DETECT_CORRUPTION` macro is defined to nonzero, `VMA_DEBUG_MARGIN` is defined to nonzero and only for memory types that are @@ -3614,248 +2268,81 @@ Possible return values: - `VK_ERROR_FEATURE_NOT_PRESENT` - corruption detection is not enabled for any of specified memory types. - `VK_SUCCESS` - corruption detection has been performed and succeeded. -- `VK_ERROR_VALIDATION_FAILED_EXT` - corruption detection has been performed and found memory corruptions around one of the allocations. +- `VK_ERROR_UNKNOWN` - corruption detection has been performed and found memory corruptions around one of the allocations. `VMA_ASSERT` is also fired in that case. - Other value: Error returned by Vulkan, e.g. memory mapping failure. */ -VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckCorruption(VmaAllocator VMA_NOT_NULL allocator, uint32_t memoryTypeBits); - -/** \struct VmaDefragmentationContext -\brief Represents Opaque object that represents started defragmentation process. - -Fill structure #VmaDefragmentationInfo2 and call function vmaDefragmentationBegin() to create it. -Call function vmaDefragmentationEnd() to destroy it. -*/ -VK_DEFINE_HANDLE(VmaDefragmentationContext) - -/// Flags to be used in vmaDefragmentationBegin(). None at the moment. Reserved for future use. -typedef enum VmaDefragmentationFlagBits { - VMA_DEFRAGMENTATION_FLAG_INCREMENTAL = 0x1, - VMA_DEFRAGMENTATION_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF -} VmaDefragmentationFlagBits; -typedef VkFlags VmaDefragmentationFlags; - -/** \brief Parameters for defragmentation. - -To be used with function vmaDefragmentationBegin(). -*/ -typedef struct VmaDefragmentationInfo2 { - /** \brief Reserved for future use. Should be 0. - */ - VmaDefragmentationFlags flags; - /** \brief Number of allocations in `pAllocations` array. - */ - uint32_t allocationCount; - /** \brief Pointer to array of allocations that can be defragmented. - - The array should have `allocationCount` elements. - The array should not contain nulls. - Elements in the array should be unique - same allocation cannot occur twice. - It is safe to pass allocations that are in the lost state - they are ignored. - All allocations not present in this array are considered non-moveable during this defragmentation. - */ - const VmaAllocation VMA_NOT_NULL * VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) pAllocations; - /** \brief Optional, output. Pointer to array that will be filled with information whether the allocation at certain index has been changed during defragmentation. - - The array should have `allocationCount` elements. - You can pass null if you are not interested in this information. - */ - VkBool32* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) pAllocationsChanged; - /** \brief Numer of pools in `pPools` array. - */ - uint32_t poolCount; - /** \brief Either null or pointer to array of pools to be defragmented. - - All the allocations in the specified pools can be moved during defragmentation - and there is no way to check if they were really moved as in `pAllocationsChanged`, - so you must query all the allocations in all these pools for new `VkDeviceMemory` - and offset using vmaGetAllocationInfo() if you might need to recreate buffers - and images bound to them. - - The array should have `poolCount` elements. - The array should not contain nulls. - Elements in the array should be unique - same pool cannot occur twice. - - Using this array is equivalent to specifying all allocations from the pools in `pAllocations`. - It might be more efficient. - */ - const VmaPool VMA_NOT_NULL * VMA_NULLABLE VMA_LEN_IF_NOT_NULL(poolCount) pPools; - /** \brief Maximum total numbers of bytes that can be copied while moving allocations to different places using transfers on CPU side, like `memcpy()`, `memmove()`. - - `VK_WHOLE_SIZE` means no limit. - */ - VkDeviceSize maxCpuBytesToMove; - /** \brief Maximum number of allocations that can be moved to a different place using transfers on CPU side, like `memcpy()`, `memmove()`. - - `UINT32_MAX` means no limit. - */ - uint32_t maxCpuAllocationsToMove; - /** \brief Maximum total numbers of bytes that can be copied while moving allocations to different places using transfers on GPU side, posted to `commandBuffer`. - - `VK_WHOLE_SIZE` means no limit. - */ - VkDeviceSize maxGpuBytesToMove; - /** \brief Maximum number of allocations that can be moved to a different place using transfers on GPU side, posted to `commandBuffer`. - - `UINT32_MAX` means no limit. - */ - uint32_t maxGpuAllocationsToMove; - /** \brief Optional. Command buffer where GPU copy commands will be posted. - - If not null, it must be a valid command buffer handle that supports Transfer queue type. - It must be in the recording state and outside of a render pass instance. - You need to submit it and make sure it finished execution before calling vmaDefragmentationEnd(). - - Passing null means that only CPU defragmentation will be performed. - */ - VkCommandBuffer VMA_NULLABLE commandBuffer; -} VmaDefragmentationInfo2; - -typedef struct VmaDefragmentationPassMoveInfo { - VmaAllocation VMA_NOT_NULL allocation; - VkDeviceMemory VMA_NOT_NULL_NON_DISPATCHABLE memory; - VkDeviceSize offset; -} VmaDefragmentationPassMoveInfo; - -/** \brief Parameters for incremental defragmentation steps. - -To be used with function vmaBeginDefragmentationPass(). -*/ -typedef struct VmaDefragmentationPassInfo { - uint32_t moveCount; - VmaDefragmentationPassMoveInfo* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(moveCount) pMoves; -} VmaDefragmentationPassInfo; - -/** \brief Deprecated. Optional configuration parameters to be passed to function vmaDefragment(). - -\deprecated This is a part of the old interface. It is recommended to use structure #VmaDefragmentationInfo2 and function vmaDefragmentationBegin() instead. -*/ -typedef struct VmaDefragmentationInfo { - /** \brief Maximum total numbers of bytes that can be copied while moving allocations to different places. - - Default is `VK_WHOLE_SIZE`, which means no limit. - */ - VkDeviceSize maxBytesToMove; - /** \brief Maximum number of allocations that can be moved to different place. - - Default is `UINT32_MAX`, which means no limit. - */ - uint32_t maxAllocationsToMove; -} VmaDefragmentationInfo; - -/** \brief Statistics returned by function vmaDefragment(). */ -typedef struct VmaDefragmentationStats { - /// Total number of bytes that have been copied while moving allocations to different places. - VkDeviceSize bytesMoved; - /// Total number of bytes that have been released to the system by freeing empty `VkDeviceMemory` objects. - VkDeviceSize bytesFreed; - /// Number of allocations that have been moved to different places. - uint32_t allocationsMoved; - /// Number of empty `VkDeviceMemory` objects that have been released to the system. - uint32_t deviceMemoryBlocksFreed; -} VmaDefragmentationStats; +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckCorruption( + VmaAllocator VMA_NOT_NULL allocator, + uint32_t memoryTypeBits); /** \brief Begins defragmentation process. -@param allocator Allocator object. -@param pInfo Structure filled with parameters of defragmentation. -@param[out] pStats Optional. Statistics of defragmentation. You can pass null if you are not interested in this information. -@param[out] pContext Context object that must be passed to vmaDefragmentationEnd() to finish defragmentation. -@return `VK_SUCCESS` and `*pContext == null` if defragmentation finished within this function call. `VK_NOT_READY` and `*pContext != null` if defragmentation has been started and you need to call vmaDefragmentationEnd() to finish it. Negative value in case of error. +\param allocator Allocator object. +\param pInfo Structure filled with parameters of defragmentation. +\param[out] pContext Context object that must be passed to vmaEndDefragmentation() to finish defragmentation. +\returns +- `VK_SUCCESS` if defragmentation can begin. +- `VK_ERROR_FEATURE_NOT_PRESENT` if defragmentation is not supported. -Use this function instead of old, deprecated vmaDefragment(). - -Warning! Between the call to vmaDefragmentationBegin() and vmaDefragmentationEnd(): - -- You should not use any of allocations passed as `pInfo->pAllocations` or - any allocations that belong to pools passed as `pInfo->pPools`, - including calling vmaGetAllocationInfo(), vmaTouchAllocation(), or access - their data. -- Some mutexes protecting internal data structures may be locked, so trying to - make or free any allocations, bind buffers or images, map memory, or launch - another simultaneous defragmentation in between may cause stall (when done on - another thread) or deadlock (when done on the same thread), unless you are - 100% sure that defragmented allocations are in different pools. -- Information returned via `pStats` and `pInfo->pAllocationsChanged` are undefined. - They become valid after call to vmaDefragmentationEnd(). -- If `pInfo->commandBuffer` is not null, you must submit that command buffer - and make sure it finished execution before calling vmaDefragmentationEnd(). - -For more information and important limitations regarding defragmentation, see documentation chapter: +For more information about defragmentation, see documentation chapter: [Defragmentation](@ref defragmentation). */ -VMA_CALL_PRE VkResult VMA_CALL_POST vmaDefragmentationBegin( +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentation( VmaAllocator VMA_NOT_NULL allocator, - const VmaDefragmentationInfo2* VMA_NOT_NULL pInfo, - VmaDefragmentationStats* VMA_NULLABLE pStats, - VmaDefragmentationContext VMA_NULLABLE * VMA_NOT_NULL pContext); + const VmaDefragmentationInfo* VMA_NOT_NULL pInfo, + VmaDefragmentationContext VMA_NULLABLE* VMA_NOT_NULL pContext); /** \brief Ends defragmentation process. -Use this function to finish defragmentation started by vmaDefragmentationBegin(). -It is safe to pass `context == null`. The function then does nothing. -*/ -VMA_CALL_PRE VkResult VMA_CALL_POST vmaDefragmentationEnd( - VmaAllocator VMA_NOT_NULL allocator, - VmaDefragmentationContext VMA_NULLABLE context); +\param allocator Allocator object. +\param context Context object that has been created by vmaBeginDefragmentation(). +\param[out] pStats Optional stats for the defragmentation. Can be null. +Use this function to finish defragmentation started by vmaBeginDefragmentation(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaEndDefragmentation( + VmaAllocator VMA_NOT_NULL allocator, + VmaDefragmentationContext VMA_NOT_NULL context, + VmaDefragmentationStats* VMA_NULLABLE pStats); + +/** \brief Starts single defragmentation pass. + +\param allocator Allocator object. +\param context Context object that has been created by vmaBeginDefragmentation(). +\param[out] pPassInfo Computed information for current pass. +\returns +- `VK_SUCCESS` if no more moves are possible. Then you can omit call to vmaEndDefragmentationPass() and simply end whole defragmentation. +- `VK_INCOMPLETE` if there are pending moves returned in `pPassInfo`. You need to perform them, call vmaEndDefragmentationPass(), + and then preferably try another pass with vmaBeginDefragmentationPass(). +*/ VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentationPass( VmaAllocator VMA_NOT_NULL allocator, - VmaDefragmentationContext VMA_NULLABLE context, - VmaDefragmentationPassInfo* VMA_NOT_NULL pInfo -); + VmaDefragmentationContext VMA_NOT_NULL context, + VmaDefragmentationPassMoveInfo* VMA_NOT_NULL pPassInfo); + +/** \brief Ends single defragmentation pass. + +\param allocator Allocator object. +\param context Context object that has been created by vmaBeginDefragmentation(). +\param pPassInfo Computed information for current pass filled by vmaBeginDefragmentationPass() and possibly modified by you. + +Returns `VK_SUCCESS` if no more moves are possible or `VK_INCOMPLETE` if more defragmentations are possible. + +Ends incremental defragmentation pass and commits all defragmentation moves from `pPassInfo`. +After this call: + +- Allocations at `pPassInfo[i].srcAllocation` that had `pPassInfo[i].operation ==` #VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY + (which is the default) will be pointing to the new destination place. +- Allocation at `pPassInfo[i].srcAllocation` that had `pPassInfo[i].operation ==` #VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY + will be freed. + +If no more moves are possible you can end whole defragmentation. +*/ VMA_CALL_PRE VkResult VMA_CALL_POST vmaEndDefragmentationPass( VmaAllocator VMA_NOT_NULL allocator, - VmaDefragmentationContext VMA_NULLABLE context -); - -/** \brief Deprecated. Compacts memory by moving allocations. - -@param pAllocations Array of allocations that can be moved during this compation. -@param allocationCount Number of elements in pAllocations and pAllocationsChanged arrays. -@param[out] pAllocationsChanged Array of boolean values that will indicate whether matching allocation in pAllocations array has been moved. This parameter is optional. Pass null if you don't need this information. -@param pDefragmentationInfo Configuration parameters. Optional - pass null to use default values. -@param[out] pDefragmentationStats Statistics returned by the function. Optional - pass null if you don't need this information. -@return `VK_SUCCESS` if completed, negative error code in case of error. - -\deprecated This is a part of the old interface. It is recommended to use structure #VmaDefragmentationInfo2 and function vmaDefragmentationBegin() instead. - -This function works by moving allocations to different places (different -`VkDeviceMemory` objects and/or different offsets) in order to optimize memory -usage. Only allocations that are in `pAllocations` array can be moved. All other -allocations are considered nonmovable in this call. Basic rules: - -- Only allocations made in memory types that have - `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` and `VK_MEMORY_PROPERTY_HOST_COHERENT_BIT` - flags can be compacted. You may pass other allocations but it makes no sense - - these will never be moved. -- Custom pools created with #VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT or - #VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT flag are not defragmented. Allocations - passed to this function that come from such pools are ignored. -- Allocations created with #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT or - created as dedicated allocations for any other reason are also ignored. -- Both allocations made with or without #VMA_ALLOCATION_CREATE_MAPPED_BIT - flag can be compacted. If not persistently mapped, memory will be mapped - temporarily inside this function if needed. -- You must not pass same #VmaAllocation object multiple times in `pAllocations` array. - -The function also frees empty `VkDeviceMemory` blocks. - -Warning: This function may be time-consuming, so you shouldn't call it too often -(like after every resource creation/destruction). -You can call it on special occasions (like when reloading a game level or -when you just destroyed a lot of objects). Calling it every frame may be OK, but -you should measure that on your platform. - -For more information, see [Defragmentation](@ref defragmentation) chapter. -*/ -VMA_CALL_PRE VkResult VMA_CALL_POST vmaDefragment( - VmaAllocator VMA_NOT_NULL allocator, - const VmaAllocation VMA_NOT_NULL * VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pAllocations, - size_t allocationCount, - VkBool32* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) pAllocationsChanged, - const VmaDefragmentationInfo* VMA_NULLABLE pDefragmentationInfo, - VmaDefragmentationStats* VMA_NULLABLE pDefragmentationStats); + VmaDefragmentationContext VMA_NOT_NULL context, + VmaDefragmentationPassMoveInfo* VMA_NOT_NULL pPassInfo); /** \brief Binds buffer to allocation. @@ -3876,8 +2363,11 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory( /** \brief Binds buffer to allocation with additional parameters. -@param allocationLocalOffset Additional offset to be added while binding, relative to the beginnig of the `allocation`. Normally it should be 0. -@param pNext A chain of structures to be attached to `VkBindBufferMemoryInfoKHR` structure used internally. Normally it should be null. +\param allocator +\param allocation +\param allocationLocalOffset Additional offset to be added while binding, relative to the beginning of the `allocation`. Normally it should be 0. +\param buffer +\param pNext A chain of structures to be attached to `VkBindBufferMemoryInfoKHR` structure used internally. Normally it should be null. This function is similar to vmaBindBufferMemory(), but it provides additional parameters. @@ -3889,7 +2379,7 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory2( VmaAllocation VMA_NOT_NULL allocation, VkDeviceSize allocationLocalOffset, VkBuffer VMA_NOT_NULL_NON_DISPATCHABLE buffer, - const void* VMA_NULLABLE pNext); + const void* VMA_NULLABLE VMA_EXTENDS_VK_STRUCT(VkBindBufferMemoryInfoKHR) pNext); /** \brief Binds image to allocation. @@ -3910,8 +2400,11 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindImageMemory( /** \brief Binds image to allocation with additional parameters. -@param allocationLocalOffset Additional offset to be added while binding, relative to the beginnig of the `allocation`. Normally it should be 0. -@param pNext A chain of structures to be attached to `VkBindImageMemoryInfoKHR` structure used internally. Normally it should be null. +\param allocator +\param allocation +\param allocationLocalOffset Additional offset to be added while binding, relative to the beginning of the `allocation`. Normally it should be 0. +\param image +\param pNext A chain of structures to be attached to `VkBindImageMemoryInfoKHR` structure used internally. Normally it should be null. This function is similar to vmaBindImageMemory(), but it provides additional parameters. @@ -3923,12 +2416,16 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindImageMemory2( VmaAllocation VMA_NOT_NULL allocation, VkDeviceSize allocationLocalOffset, VkImage VMA_NOT_NULL_NON_DISPATCHABLE image, - const void* VMA_NULLABLE pNext); + const void* VMA_NULLABLE VMA_EXTENDS_VK_STRUCT(VkBindImageMemoryInfoKHR) pNext); -/** -@param[out] pBuffer Buffer that was created. -@param[out] pAllocation Allocation that was created. -@param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo(). +/** \brief Creates a new `VkBuffer`, allocates and binds memory for it. + +\param allocator +\param pBufferCreateInfo +\param pAllocationCreateInfo +\param[out] pBuffer Buffer that was created. +\param[out] pAllocation Allocation that was created. +\param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo(). This function automatically: @@ -3937,7 +2434,7 @@ This function automatically: -# Binds the buffer with the memory. If any of these operations fail, buffer and allocation are not created, -returned value is negative error code, *pBuffer and *pAllocation are null. +returned value is negative error code, `*pBuffer` and `*pAllocation` are null. If the function succeeded, you must destroy both buffer and allocation when you no longer need them using either convenience function vmaDestroyBuffer() or @@ -3946,8 +2443,8 @@ separately, using `vkDestroyBuffer()` and vmaFreeMemory(). If #VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT flag was used, VK_KHR_dedicated_allocation extension is used internally to query driver whether it requires or prefers the new buffer to have dedicated allocation. If yes, -and if dedicated allocation is possible (VmaAllocationCreateInfo::pool is null -and #VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT is not used), it creates dedicated +and if dedicated allocation is possible +(#VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT is not used), it creates dedicated allocation for this buffer, just like when using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. @@ -3959,10 +2456,81 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBuffer( VmaAllocator VMA_NOT_NULL allocator, const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo, - VkBuffer VMA_NULLABLE_NON_DISPATCHABLE * VMA_NOT_NULL pBuffer, - VmaAllocation VMA_NULLABLE * VMA_NOT_NULL pAllocation, + VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer, + VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation, VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); +/** \brief Creates a buffer with additional minimum alignment. + +Similar to vmaCreateBuffer() but provides additional parameter `minAlignment` which allows to specify custom, +minimum alignment to be used when placing the buffer inside a larger memory block, which may be needed e.g. +for interop with OpenGL. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBufferWithAlignment( + VmaAllocator VMA_NOT_NULL allocator, + const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, + const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo, + VkDeviceSize minAlignment, + VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer, + VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation, + VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); + +/** \brief Creates a new `VkBuffer`, binds already created memory for it. + +\param allocator +\param allocation Allocation that provides memory to be used for binding new buffer to it. +\param pBufferCreateInfo +\param[out] pBuffer Buffer that was created. + +This function automatically: + +-# Creates buffer. +-# Binds the buffer with the supplied memory. + +If any of these operations fail, buffer is not created, +returned value is negative error code and `*pBuffer` is null. + +If the function succeeded, you must destroy the buffer when you +no longer need it using `vkDestroyBuffer()`. If you want to also destroy the corresponding +allocation you can use convenience function vmaDestroyBuffer(). + +\note There is a new version of this function augmented with parameter `allocationLocalOffset` - see vmaCreateAliasingBuffer2(). +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingBuffer( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, + VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer); + +/** \brief Creates a new `VkBuffer`, binds already created memory for it. + +\param allocator +\param allocation Allocation that provides memory to be used for binding new buffer to it. +\param allocationLocalOffset Additional offset to be added while binding, relative to the beginning of the allocation. Normally it should be 0. +\param pBufferCreateInfo +\param[out] pBuffer Buffer that was created. + +This function automatically: + +-# Creates buffer. +-# Binds the buffer with the supplied memory. + +If any of these operations fail, buffer is not created, +returned value is negative error code and `*pBuffer` is null. + +If the function succeeded, you must destroy the buffer when you +no longer need it using `vkDestroyBuffer()`. If you want to also destroy the corresponding +allocation you can use convenience function vmaDestroyBuffer(). + +\note This is a new version of the function augmented with parameter `allocationLocalOffset`. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingBuffer2( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkDeviceSize allocationLocalOffset, + const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, + VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer); + /** \brief Destroys Vulkan buffer and frees allocated memory. This is just a convenience function equivalent to: @@ -3972,7 +2540,7 @@ vkDestroyBuffer(device, buffer, allocationCallbacks); vmaFreeMemory(allocator, allocation); \endcode -It it safe to pass null as buffer and/or allocation. +It is safe to pass null as buffer and/or allocation. */ VMA_CALL_PRE void VMA_CALL_POST vmaDestroyBuffer( VmaAllocator VMA_NOT_NULL allocator, @@ -3984,10 +2552,25 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateImage( VmaAllocator VMA_NOT_NULL allocator, const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo, const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo, - VkImage VMA_NULLABLE_NON_DISPATCHABLE * VMA_NOT_NULL pImage, - VmaAllocation VMA_NULLABLE * VMA_NOT_NULL pAllocation, + VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage, + VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation, VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); +/// Function similar to vmaCreateAliasingBuffer() but for images. +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingImage( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo, + VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage); + +/// Function similar to vmaCreateAliasingBuffer2() but for images. +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingImage2( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkDeviceSize allocationLocalOffset, + const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo, + VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage); + /** \brief Destroys Vulkan image and frees allocated memory. This is just a convenience function equivalent to: @@ -3997,19 +2580,171 @@ vkDestroyImage(device, image, allocationCallbacks); vmaFreeMemory(allocator, allocation); \endcode -It it safe to pass null as image and/or allocation. +It is safe to pass null as image and/or allocation. */ VMA_CALL_PRE void VMA_CALL_POST vmaDestroyImage( VmaAllocator VMA_NOT_NULL allocator, VkImage VMA_NULLABLE_NON_DISPATCHABLE image, VmaAllocation VMA_NULLABLE allocation); +/** @} */ + +/** +\addtogroup group_virtual +@{ +*/ + +/** \brief Creates new #VmaVirtualBlock object. + +\param pCreateInfo Parameters for creation. +\param[out] pVirtualBlock Returned virtual block object or `VMA_NULL` if creation failed. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateVirtualBlock( + const VmaVirtualBlockCreateInfo* VMA_NOT_NULL pCreateInfo, + VmaVirtualBlock VMA_NULLABLE* VMA_NOT_NULL pVirtualBlock); + +/** \brief Destroys #VmaVirtualBlock object. + +Please note that you should consciously handle virtual allocations that could remain unfreed in the block. +You should either free them individually using vmaVirtualFree() or call vmaClearVirtualBlock() +if you are sure this is what you want. If you do neither, an assert is called. + +If you keep pointers to some additional metadata associated with your virtual allocations in their `pUserData`, +don't forget to free them. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyVirtualBlock( + VmaVirtualBlock VMA_NULLABLE virtualBlock); + +/** \brief Returns true of the #VmaVirtualBlock is empty - contains 0 virtual allocations and has all its space available for new allocations. +*/ +VMA_CALL_PRE VkBool32 VMA_CALL_POST vmaIsVirtualBlockEmpty( + VmaVirtualBlock VMA_NOT_NULL virtualBlock); + +/** \brief Returns information about a specific virtual allocation within a virtual block, like its size and `pUserData` pointer. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetVirtualAllocationInfo( + VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaVirtualAllocation VMA_NOT_NULL_NON_DISPATCHABLE allocation, VmaVirtualAllocationInfo* VMA_NOT_NULL pVirtualAllocInfo); + +/** \brief Allocates new virtual allocation inside given #VmaVirtualBlock. + +If the allocation fails due to not enough free space available, `VK_ERROR_OUT_OF_DEVICE_MEMORY` is returned +(despite the function doesn't ever allocate actual GPU memory). +`pAllocation` is then set to `VK_NULL_HANDLE` and `pOffset`, if not null, it set to `UINT64_MAX`. + +\param virtualBlock Virtual block +\param pCreateInfo Parameters for the allocation +\param[out] pAllocation Returned handle of the new allocation +\param[out] pOffset Returned offset of the new allocation. Optional, can be null. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaVirtualAllocate( + VmaVirtualBlock VMA_NOT_NULL virtualBlock, + const VmaVirtualAllocationCreateInfo* VMA_NOT_NULL pCreateInfo, + VmaVirtualAllocation VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pAllocation, + VkDeviceSize* VMA_NULLABLE pOffset); + +/** \brief Frees virtual allocation inside given #VmaVirtualBlock. + +It is correct to call this function with `allocation == VK_NULL_HANDLE` - it does nothing. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaVirtualFree( + VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaVirtualAllocation VMA_NULLABLE_NON_DISPATCHABLE allocation); + +/** \brief Frees all virtual allocations inside given #VmaVirtualBlock. + +You must either call this function or free each virtual allocation individually with vmaVirtualFree() +before destroying a virtual block. Otherwise, an assert is called. + +If you keep pointer to some additional metadata associated with your virtual allocation in its `pUserData`, +don't forget to free it as well. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaClearVirtualBlock( + VmaVirtualBlock VMA_NOT_NULL virtualBlock); + +/** \brief Changes custom pointer associated with given virtual allocation. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaSetVirtualAllocationUserData( + VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaVirtualAllocation VMA_NOT_NULL_NON_DISPATCHABLE allocation, + void* VMA_NULLABLE pUserData); + +/** \brief Calculates and returns statistics about virtual allocations and memory usage in given #VmaVirtualBlock. + +This function is fast to call. For more detailed statistics, see vmaCalculateVirtualBlockStatistics(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetVirtualBlockStatistics( + VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaStatistics* VMA_NOT_NULL pStats); + +/** \brief Calculates and returns detailed statistics about virtual allocations and memory usage in given #VmaVirtualBlock. + +This function is slow to call. Use for debugging purposes. +For less detailed statistics, see vmaGetVirtualBlockStatistics(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaCalculateVirtualBlockStatistics( + VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaDetailedStatistics* VMA_NOT_NULL pStats); + +/** @} */ + +#if VMA_STATS_STRING_ENABLED +/** +\addtogroup group_stats +@{ +*/ + +/** \brief Builds and returns a null-terminated string in JSON format with information about given #VmaVirtualBlock. +\param virtualBlock Virtual block. +\param[out] ppStatsString Returned string. +\param detailedMap Pass `VK_FALSE` to only obtain statistics as returned by vmaCalculateVirtualBlockStatistics(). Pass `VK_TRUE` to also obtain full list of allocations and free spaces. + +Returned string must be freed using vmaFreeVirtualBlockStatsString(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaBuildVirtualBlockStatsString( + VmaVirtualBlock VMA_NOT_NULL virtualBlock, + char* VMA_NULLABLE* VMA_NOT_NULL ppStatsString, + VkBool32 detailedMap); + +/// Frees a string returned by vmaBuildVirtualBlockStatsString(). +VMA_CALL_PRE void VMA_CALL_POST vmaFreeVirtualBlockStatsString( + VmaVirtualBlock VMA_NOT_NULL virtualBlock, + char* VMA_NULLABLE pStatsString); + +/** \brief Builds and returns statistics as a null-terminated string in JSON format. +\param allocator +\param[out] ppStatsString Must be freed using vmaFreeStatsString() function. +\param detailedMap +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaBuildStatsString( + VmaAllocator VMA_NOT_NULL allocator, + char* VMA_NULLABLE* VMA_NOT_NULL ppStatsString, + VkBool32 detailedMap); + +VMA_CALL_PRE void VMA_CALL_POST vmaFreeStatsString( + VmaAllocator VMA_NOT_NULL allocator, + char* VMA_NULLABLE pStatsString); + +/** @} */ + +#endif // VMA_STATS_STRING_ENABLED + +#endif // _VMA_FUNCTION_HEADERS + #ifdef __cplusplus } #endif #endif // AMD_VULKAN_MEMORY_ALLOCATOR_H +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +// +// IMPLEMENTATION +// +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// + // For Visual Studio IntelliSense. #if defined(__cplusplus) && defined(__INTELLISENSE__) #define VMA_IMPLEMENTATION @@ -4021,24 +2756,36 @@ VMA_CALL_PRE void VMA_CALL_POST vmaDestroyImage( #include #include #include +#include #include +#include -#if VMA_RECORDING_ENABLED - #include - #if defined(_WIN32) - #include +#if !defined(VMA_CPP20) + #if __cplusplus >= 202002L || _MSVC_LANG >= 202002L // C++20 + #define VMA_CPP20 1 #else - #include - #include + #define VMA_CPP20 0 #endif #endif +#ifdef _MSC_VER + #include // For functions like __popcnt, _BitScanForward etc. +#endif +#if VMA_CPP20 + #include +#endif + +#if VMA_STATS_STRING_ENABLED + #include // For snprintf +#endif + /******************************************************************************* CONFIGURATION SECTION Define some of these macros before each #include of this header or change them here if you need other then default behavior depending on your environment. */ +#ifndef _VMA_CONFIGURATION /* Define this macro to 1 to make the library fetch pointers to Vulkan functions @@ -4054,38 +2801,21 @@ internally, like: Define this macro to 1 to make the library fetch pointers to Vulkan functions internally, like: - vulkanFunctions.vkAllocateMemory = (PFN_vkAllocateMemory)vkGetDeviceProcAddr(m_hDevice, vkAllocateMemory); + vulkanFunctions.vkAllocateMemory = (PFN_vkAllocateMemory)vkGetDeviceProcAddr(device, "vkAllocateMemory"); + +To use this feature in new versions of VMA you now have to pass +VmaVulkanFunctions::vkGetInstanceProcAddr and vkGetDeviceProcAddr as +VmaAllocatorCreateInfo::pVulkanFunctions. Other members can be null. */ #if !defined(VMA_DYNAMIC_VULKAN_FUNCTIONS) #define VMA_DYNAMIC_VULKAN_FUNCTIONS 1 - #if defined(VK_NO_PROTOTYPES) - extern PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr; - extern PFN_vkGetDeviceProcAddr vkGetDeviceProcAddr; - #endif -#endif - -// Define this macro to 1 to make the library use STL containers instead of its own implementation. -//#define VMA_USE_STL_CONTAINERS 1 - -/* Set this macro to 1 to make the library including and using STL containers: -std::pair, std::vector, std::list, std::unordered_map. - -Set it to 0 or undefined to make the library using its own implementation of -the containers. -*/ -#if VMA_USE_STL_CONTAINERS - #define VMA_USE_STL_VECTOR 1 - #define VMA_USE_STL_UNORDERED_MAP 1 - #define VMA_USE_STL_LIST 1 #endif #ifndef VMA_USE_STL_SHARED_MUTEX - // Compiler conforms to C++17. - #if __cplusplus >= 201703L + #if __cplusplus >= 201703L || _MSVC_LANG >= 201703L // C++17 #define VMA_USE_STL_SHARED_MUTEX 1 // Visual studio defines __cplusplus properly only when passed additional parameter: /Zc:__cplusplus - // Otherwise it's always 199711L, despite shared_mutex works since Visual Studio 2015 Update 2. - // See: https://blogs.msdn.microsoft.com/vcblog/2018/04/09/msvc-now-correctly-reports-__cplusplus/ + // Otherwise it is always 199711L, despite shared_mutex works since Visual Studio 2015 Update 2. #elif defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 190023918 && __cplusplus == 199711L && _MSVC_LANG >= 201703L #define VMA_USE_STL_SHARED_MUTEX 1 #else @@ -4094,34 +2824,76 @@ the containers. #endif /* -THESE INCLUDES ARE NOT ENABLED BY DEFAULT. -Library has its own container implementation. -*/ -#if VMA_USE_STL_VECTOR - #include -#endif +Define this macro to include custom header files without having to edit this file directly, e.g.: -#if VMA_USE_STL_UNORDERED_MAP - #include -#endif + // Inside of "my_vma_configuration_user_includes.h": -#if VMA_USE_STL_LIST - #include -#endif + #include "my_custom_assert.h" // for MY_CUSTOM_ASSERT + #include "my_custom_min.h" // for my_custom_min + #include + #include -/* -Following headers are used in this CONFIGURATION section only, so feel free to + // Inside a different file, which includes "vk_mem_alloc.h": + + #define VMA_CONFIGURATION_USER_INCLUDES_H "my_vma_configuration_user_includes.h" + #define VMA_ASSERT(expr) MY_CUSTOM_ASSERT(expr) + #define VMA_MIN(v1, v2) (my_custom_min(v1, v2)) + #include "vk_mem_alloc.h" + ... + +The following headers are used in this CONFIGURATION section only, so feel free to remove them if not needed. */ -#include // for assert -#include // for min, max -#include +#if !defined(VMA_CONFIGURATION_USER_INCLUDES_H) + #include // for assert + #include // for min, max, swap + #include +#else + #include VMA_CONFIGURATION_USER_INCLUDES_H +#endif #ifndef VMA_NULL // Value used as null pointer. Define it to e.g.: nullptr, NULL, 0, (void*)0. #define VMA_NULL nullptr #endif +#ifndef VMA_FALLTHROUGH + #if __cplusplus >= 201703L || _MSVC_LANG >= 201703L // C++17 + #define VMA_FALLTHROUGH [[fallthrough]] + #else + #define VMA_FALLTHROUGH + #endif +#endif + +// Normal assert to check for programmer's errors, especially in Debug configuration. +#ifndef VMA_ASSERT + #ifdef NDEBUG + #define VMA_ASSERT(expr) + #else + #define VMA_ASSERT(expr) assert(expr) + #endif +#endif + +// Assert that will be called very often, like inside data structures e.g. operator[]. +// Making it non-empty can make program slow. +#ifndef VMA_HEAVY_ASSERT + #ifdef NDEBUG + #define VMA_HEAVY_ASSERT(expr) + #else + #define VMA_HEAVY_ASSERT(expr) //VMA_ASSERT(expr) + #endif +#endif + +// Assert used for reporting memory leaks - unfreed allocations. +#ifndef VMA_ASSERT_LEAK + #define VMA_ASSERT_LEAK(expr) VMA_ASSERT(expr) +#endif + +// If your compiler is not compatible with C++17 and definition of +// aligned_alloc() function is missing, uncommenting following line may help: + +//#include + #if defined(__ANDROID_API__) && (__ANDROID_API__ < 16) #include static void* vma_aligned_alloc(size_t alignment, size_t size) @@ -4143,18 +2915,21 @@ static void* vma_aligned_alloc(size_t alignment, size_t size) static void* vma_aligned_alloc(size_t alignment, size_t size) { -#if defined(__APPLE__) && (defined(MAC_OS_X_VERSION_10_16) || defined(__IPHONE_14_0)) -#if MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_16 || __IPHONE_OS_VERSION_MAX_ALLOWED >= __IPHONE_14_0 - // For C++14, usr/include/malloc/_malloc.h declares aligned_alloc()) only - // with the MacOSX11.0 SDK in Xcode 12 (which is what adds - // MAC_OS_X_VERSION_10_16), even though the function is marked - // availabe for 10.15. That's why the preprocessor checks for 10.16 but - // the __builtin_available checks for 10.15. - // People who use C++17 could call aligned_alloc with the 10.15 SDK already. - if (__builtin_available(macOS 10.15, iOS 13, *)) - return aligned_alloc(alignment, size); -#endif -#endif + // Unfortunately, aligned_alloc causes VMA to crash due to it returning null pointers. (At least under 11.4) + // Therefore, for now disable this specific exception until a proper solution is found. + //#if defined(__APPLE__) && (defined(MAC_OS_X_VERSION_10_16) || defined(__IPHONE_14_0)) + //#if MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_16 || __IPHONE_OS_VERSION_MAX_ALLOWED >= __IPHONE_14_0 + // // For C++14, usr/include/malloc/_malloc.h declares aligned_alloc()) only + // // with the MacOSX11.0 SDK in Xcode 12 (which is what adds + // // MAC_OS_X_VERSION_10_16), even though the function is marked + // // available for 10.15. That is why the preprocessor checks for 10.16 but + // // the __builtin_available checks for 10.15. + // // People who use C++17 could call aligned_alloc with the 10.15 SDK already. + // if (__builtin_available(macOS 10.15, iOS 13, *)) + // return aligned_alloc(alignment, size); + //#endif + //#endif + // alignment must be >= sizeof(void*) if(alignment < sizeof(void*)) { @@ -4171,11 +2946,17 @@ static void* vma_aligned_alloc(size_t alignment, size_t size) { return _aligned_malloc(size, alignment); } -#else +#elif __cplusplus >= 201703L || _MSVC_LANG >= 201703L // C++17 static void* vma_aligned_alloc(size_t alignment, size_t size) { return aligned_alloc(alignment, size); } +#else +static void* vma_aligned_alloc(size_t alignment, size_t size) +{ + VMA_ASSERT(0 && "Could not implement aligned_alloc automatically. Please enable C++17 or later in your compiler or provide custom implementation of macro VMA_SYSTEM_ALIGNED_MALLOC (and VMA_SYSTEM_ALIGNED_FREE if needed) using the API of your system."); + return VMA_NULL; +} #endif #if defined(_WIN32) @@ -4184,38 +2965,14 @@ static void vma_aligned_free(void* ptr) _aligned_free(ptr); } #else -static void vma_aligned_free(void* ptr) +static void vma_aligned_free(void* VMA_NULLABLE ptr) { free(ptr); } #endif -// If your compiler is not compatible with C++11 and definition of -// aligned_alloc() function is missing, uncommeting following line may help: - -//#include - -// Normal assert to check for programmer's errors, especially in Debug configuration. -#ifndef VMA_ASSERT - #ifdef NDEBUG - #define VMA_ASSERT(expr) - #else - #define VMA_ASSERT(expr) assert(expr) - #endif -#endif - -// Assert that will be called very often, like inside data structures e.g. operator[]. -// Making it non-empty can make program slow. -#ifndef VMA_HEAVY_ASSERT - #ifdef NDEBUG - #define VMA_HEAVY_ASSERT(expr) - #else - #define VMA_HEAVY_ASSERT(expr) //VMA_ASSERT(expr) - #endif -#endif - #ifndef VMA_ALIGN_OF - #define VMA_ALIGN_OF(type) (__alignof(type)) + #define VMA_ALIGN_OF(type) (alignof(type)) #endif #ifndef VMA_SYSTEM_ALIGNED_MALLOC @@ -4231,43 +2988,77 @@ static void vma_aligned_free(void* ptr) #endif #endif +#ifndef VMA_COUNT_BITS_SET + // Returns number of bits set to 1 in (v) + #define VMA_COUNT_BITS_SET(v) VmaCountBitsSet(v) +#endif + +#ifndef VMA_BITSCAN_LSB + // Scans integer for index of first nonzero value from the Least Significant Bit (LSB). If mask is 0 then returns UINT8_MAX + #define VMA_BITSCAN_LSB(mask) VmaBitScanLSB(mask) +#endif + +#ifndef VMA_BITSCAN_MSB + // Scans integer for index of first nonzero value from the Most Significant Bit (MSB). If mask is 0 then returns UINT8_MAX + #define VMA_BITSCAN_MSB(mask) VmaBitScanMSB(mask) +#endif + #ifndef VMA_MIN - #define VMA_MIN(v1, v2) (std::min((v1), (v2))) + #define VMA_MIN(v1, v2) ((std::min)((v1), (v2))) #endif #ifndef VMA_MAX - #define VMA_MAX(v1, v2) (std::max((v1), (v2))) -#endif - -#ifndef VMA_SWAP - #define VMA_SWAP(v1, v2) std::swap((v1), (v2)) + #define VMA_MAX(v1, v2) ((std::max)((v1), (v2))) #endif #ifndef VMA_SORT #define VMA_SORT(beg, end, cmp) std::sort(beg, end, cmp) #endif -#ifndef VMA_DEBUG_LOG - #define VMA_DEBUG_LOG(format, ...) +#ifndef VMA_DEBUG_LOG_FORMAT + #define VMA_DEBUG_LOG_FORMAT(format, ...) /* - #define VMA_DEBUG_LOG(format, ...) do { \ - printf(format, __VA_ARGS__); \ + #define VMA_DEBUG_LOG_FORMAT(format, ...) do { \ + printf((format), __VA_ARGS__); \ printf("\n"); \ } while(false) */ #endif +#ifndef VMA_DEBUG_LOG + #define VMA_DEBUG_LOG(str) VMA_DEBUG_LOG_FORMAT("%s", (str)) +#endif + +#ifndef VMA_LEAK_LOG_FORMAT + #define VMA_LEAK_LOG_FORMAT(format, ...) VMA_DEBUG_LOG_FORMAT(format, __VA_ARGS__) +#endif + +#ifndef VMA_CLASS_NO_COPY + #define VMA_CLASS_NO_COPY(className) \ + private: \ + className(const className&) = delete; \ + className& operator=(const className&) = delete; +#endif +#ifndef VMA_CLASS_NO_COPY_NO_MOVE + #define VMA_CLASS_NO_COPY_NO_MOVE(className) \ + private: \ + className(const className&) = delete; \ + className(className&&) = delete; \ + className& operator=(const className&) = delete; \ + className& operator=(className&&) = delete; +#endif + // Define this macro to 1 to enable functions: vmaBuildStatsString, vmaFreeStatsString. #if VMA_STATS_STRING_ENABLED - static inline void VmaUint32ToStr(char* outStr, size_t strLen, uint32_t num) + static inline void VmaUint32ToStr(char* VMA_NOT_NULL outStr, size_t strLen, uint32_t num) { - snprintf(outStr, strLen, "%u", static_cast(num)); + snprintf(outStr, strLen, "%" PRIu32, num); } - static inline void VmaUint64ToStr(char* outStr, size_t strLen, uint64_t num) + static inline void VmaUint64ToStr(char* VMA_NOT_NULL outStr, size_t strLen, uint64_t num) { - snprintf(outStr, strLen, "%llu", static_cast(num)); + snprintf(outStr, strLen, "%" PRIu64, num); } - static inline void VmaPtrToStr(char* outStr, size_t strLen, const void* ptr) + static inline void VmaPtrToStr(char* VMA_NOT_NULL outStr, size_t strLen, const void* ptr) { snprintf(outStr, strLen, "%p", ptr); } @@ -4276,7 +3067,9 @@ static void vma_aligned_free(void* ptr) #ifndef VMA_MUTEX class VmaMutex { + VMA_CLASS_NO_COPY_NO_MOVE(VmaMutex) public: + VmaMutex() { } void Lock() { m_Mutex.lock(); } void Unlock() { m_Mutex.unlock(); } bool TryLock() { return m_Mutex.try_lock(); } @@ -4360,17 +3153,21 @@ If providing your own implementation, you need to implement a subset of std::ato #define VMA_DEBUG_ALWAYS_DEDICATED_MEMORY (0) #endif -#ifndef VMA_DEBUG_ALIGNMENT +#ifndef VMA_MIN_ALIGNMENT /** Minimum alignment of all allocations, in bytes. - Set to more than 1 for debugging purposes only. Must be power of two. + Set to more than 1 for debugging purposes. Must be power of two. */ - #define VMA_DEBUG_ALIGNMENT (1) + #ifdef VMA_DEBUG_ALIGNMENT // Old name + #define VMA_MIN_ALIGNMENT VMA_DEBUG_ALIGNMENT + #else + #define VMA_MIN_ALIGNMENT (1) + #endif #endif #ifndef VMA_DEBUG_MARGIN /** - Minimum margin before and after every allocation, in bytes. + Minimum margin after every allocation, in bytes. Set nonzero for debugging purposes only. */ #define VMA_DEBUG_MARGIN (0) @@ -4387,7 +3184,7 @@ If providing your own implementation, you need to implement a subset of std::ato #ifndef VMA_DEBUG_DETECT_CORRUPTION /** Define this macro to 1 together with non-zero value of VMA_DEBUG_MARGIN to - enable writing magic value to the margin before and after every allocation and + enable writing magic value to the margin after every allocation and validating it, so that memory corruptions (out-of-bounds writes) are detected. */ #define VMA_DEBUG_DETECT_CORRUPTION (0) @@ -4409,6 +3206,14 @@ If providing your own implementation, you need to implement a subset of std::ato #define VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY (1) #endif +#ifndef VMA_DEBUG_DONT_EXCEED_MAX_MEMORY_ALLOCATION_COUNT + /* + Set this to 1 to make VMA never exceed VkPhysicalDeviceLimits::maxMemoryAllocationCount + and return error instead of leaving up to Vulkan implementation what to do in such cases. + */ + #define VMA_DEBUG_DONT_EXCEED_MAX_MEMORY_ALLOCATION_COUNT (0) +#endif + #ifndef VMA_SMALL_HEAP_MAX_SIZE /// Maximum size of a memory heap in Vulkan to consider it "small". #define VMA_SMALL_HEAP_MAX_SIZE (1024ull * 1024 * 1024) @@ -4419,45 +3224,295 @@ If providing your own implementation, you need to implement a subset of std::ato #define VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE (256ull * 1024 * 1024) #endif -#ifndef VMA_CLASS_NO_COPY - #define VMA_CLASS_NO_COPY(className) \ - private: \ - className(const className&) = delete; \ - className& operator=(const className&) = delete; +/* +Mapping hysteresis is a logic that launches when vmaMapMemory/vmaUnmapMemory is called +or a persistently mapped allocation is created and destroyed several times in a row. +It keeps additional +1 mapping of a device memory block to prevent calling actual +vkMapMemory/vkUnmapMemory too many times, which may improve performance and help +tools like RenderDoc. +*/ +#ifndef VMA_MAPPING_HYSTERESIS_ENABLED + #define VMA_MAPPING_HYSTERESIS_ENABLED 1 #endif -static const uint32_t VMA_FRAME_INDEX_LOST = UINT32_MAX; - -// Decimal 2139416166, float NaN, little-endian binary 66 E6 84 7F. -static const uint32_t VMA_CORRUPTION_DETECTION_MAGIC_VALUE = 0x7F84E666; - -static const uint8_t VMA_ALLOCATION_FILL_PATTERN_CREATED = 0xDC; -static const uint8_t VMA_ALLOCATION_FILL_PATTERN_DESTROYED = 0xEF; +#define VMA_VALIDATE(cond) do { if(!(cond)) { \ + VMA_ASSERT(0 && "Validation failed: " #cond); \ + return false; \ + } } while(false) /******************************************************************************* END OF CONFIGURATION */ +#endif // _VMA_CONFIGURATION -// # Copy of some Vulkan definitions so we don't need to check their existence just to handle few constants. +static const uint8_t VMA_ALLOCATION_FILL_PATTERN_CREATED = 0xDC; +static const uint8_t VMA_ALLOCATION_FILL_PATTERN_DESTROYED = 0xEF; +// Decimal 2139416166, float NaN, little-endian binary 66 E6 84 7F. +static const uint32_t VMA_CORRUPTION_DETECTION_MAGIC_VALUE = 0x7F84E666; + +// Copy of some Vulkan definitions so we don't need to check their existence just to handle few constants. static const uint32_t VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY = 0x00000040; static const uint32_t VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY = 0x00000080; static const uint32_t VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_COPY = 0x00020000; - +static const uint32_t VK_IMAGE_CREATE_DISJOINT_BIT_COPY = 0x00000200; +static const int32_t VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT_COPY = 1000158000; static const uint32_t VMA_ALLOCATION_INTERNAL_STRATEGY_MIN_OFFSET = 0x10000000u; +static const uint32_t VMA_ALLOCATION_TRY_COUNT = 32; +static const uint32_t VMA_VENDOR_ID_AMD = 4098; -static VkAllocationCallbacks VmaEmptyAllocationCallbacks = { - VMA_NULL, VMA_NULL, VMA_NULL, VMA_NULL, VMA_NULL, VMA_NULL }; +// This one is tricky. Vulkan specification defines this code as available since +// Vulkan 1.0, but doesn't actually define it in Vulkan SDK earlier than 1.2.131. +// See pull request #207. +#define VK_ERROR_UNKNOWN_COPY ((VkResult)-13) -// Returns number of bits set to 1 in (v). + +#if VMA_STATS_STRING_ENABLED +// Correspond to values of enum VmaSuballocationType. +static const char* VMA_SUBALLOCATION_TYPE_NAMES[] = +{ + "FREE", + "UNKNOWN", + "BUFFER", + "IMAGE_UNKNOWN", + "IMAGE_LINEAR", + "IMAGE_OPTIMAL", +}; +#endif + +static VkAllocationCallbacks VmaEmptyAllocationCallbacks = + { VMA_NULL, VMA_NULL, VMA_NULL, VMA_NULL, VMA_NULL, VMA_NULL }; + + +#ifndef _VMA_ENUM_DECLARATIONS + +enum VmaSuballocationType +{ + VMA_SUBALLOCATION_TYPE_FREE = 0, + VMA_SUBALLOCATION_TYPE_UNKNOWN = 1, + VMA_SUBALLOCATION_TYPE_BUFFER = 2, + VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN = 3, + VMA_SUBALLOCATION_TYPE_IMAGE_LINEAR = 4, + VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL = 5, + VMA_SUBALLOCATION_TYPE_MAX_ENUM = 0x7FFFFFFF +}; + +enum VMA_CACHE_OPERATION +{ + VMA_CACHE_FLUSH, + VMA_CACHE_INVALIDATE +}; + +enum class VmaAllocationRequestType +{ + Normal, + TLSF, + // Used by "Linear" algorithm. + UpperAddress, + EndOf1st, + EndOf2nd, +}; + +#endif // _VMA_ENUM_DECLARATIONS + +#ifndef _VMA_FORWARD_DECLARATIONS +// Opaque handle used by allocation algorithms to identify single allocation in any conforming way. +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VmaAllocHandle); + +struct VmaMutexLock; +struct VmaMutexLockRead; +struct VmaMutexLockWrite; + +template +struct AtomicTransactionalIncrement; + +template +struct VmaStlAllocator; + +template +class VmaVector; + +template +class VmaSmallVector; + +template +class VmaPoolAllocator; + +template +struct VmaListItem; + +template +class VmaRawList; + +template +class VmaList; + +template +class VmaIntrusiveLinkedList; + +#if VMA_STATS_STRING_ENABLED +class VmaStringBuilder; +class VmaJsonWriter; +#endif + +class VmaDeviceMemoryBlock; + +struct VmaDedicatedAllocationListItemTraits; +class VmaDedicatedAllocationList; + +struct VmaSuballocation; +struct VmaSuballocationOffsetLess; +struct VmaSuballocationOffsetGreater; +struct VmaSuballocationItemSizeLess; + +typedef VmaList> VmaSuballocationList; + +struct VmaAllocationRequest; + +class VmaBlockMetadata; +class VmaBlockMetadata_Linear; +class VmaBlockMetadata_TLSF; + +class VmaBlockVector; + +struct VmaPoolListItemTraits; + +struct VmaCurrentBudgetData; + +class VmaAllocationObjectAllocator; + +#endif // _VMA_FORWARD_DECLARATIONS + + +#ifndef _VMA_FUNCTIONS + +/* +Returns number of bits set to 1 in (v). + +On specific platforms and compilers you can use intrinsics like: + +Visual Studio: + return __popcnt(v); +GCC, Clang: + return static_cast(__builtin_popcount(v)); + +Define macro VMA_COUNT_BITS_SET to provide your optimized implementation. +But you need to check in runtime whether user's CPU supports these, as some old processors don't. +*/ static inline uint32_t VmaCountBitsSet(uint32_t v) { +#if VMA_CPP20 + return std::popcount(v); +#else uint32_t c = v - ((v >> 1) & 0x55555555); - c = ((c >> 2) & 0x33333333) + (c & 0x33333333); - c = ((c >> 4) + c) & 0x0F0F0F0F; - c = ((c >> 8) + c) & 0x00FF00FF; + c = ((c >> 2) & 0x33333333) + (c & 0x33333333); + c = ((c >> 4) + c) & 0x0F0F0F0F; + c = ((c >> 8) + c) & 0x00FF00FF; c = ((c >> 16) + c) & 0x0000FFFF; return c; +#endif +} + +static inline uint8_t VmaBitScanLSB(uint64_t mask) +{ +#if defined(_MSC_VER) && defined(_WIN64) + unsigned long pos; + if (_BitScanForward64(&pos, mask)) + return static_cast(pos); + return UINT8_MAX; +#elif VMA_CPP20 + if(mask) + return static_cast(std::countr_zero(mask)); + return UINT8_MAX; +#elif defined __GNUC__ || defined __clang__ + return static_cast(__builtin_ffsll(mask)) - 1U; +#else + uint8_t pos = 0; + uint64_t bit = 1; + do + { + if (mask & bit) + return pos; + bit <<= 1; + } while (pos++ < 63); + return UINT8_MAX; +#endif +} + +static inline uint8_t VmaBitScanLSB(uint32_t mask) +{ +#ifdef _MSC_VER + unsigned long pos; + if (_BitScanForward(&pos, mask)) + return static_cast(pos); + return UINT8_MAX; +#elif VMA_CPP20 + if(mask) + return static_cast(std::countr_zero(mask)); + return UINT8_MAX; +#elif defined __GNUC__ || defined __clang__ + return static_cast(__builtin_ffs(mask)) - 1U; +#else + uint8_t pos = 0; + uint32_t bit = 1; + do + { + if (mask & bit) + return pos; + bit <<= 1; + } while (pos++ < 31); + return UINT8_MAX; +#endif +} + +static inline uint8_t VmaBitScanMSB(uint64_t mask) +{ +#if defined(_MSC_VER) && defined(_WIN64) + unsigned long pos; + if (_BitScanReverse64(&pos, mask)) + return static_cast(pos); +#elif VMA_CPP20 + if(mask) + return 63 - static_cast(std::countl_zero(mask)); +#elif defined __GNUC__ || defined __clang__ + if (mask) + return 63 - static_cast(__builtin_clzll(mask)); +#else + uint8_t pos = 63; + uint64_t bit = 1ULL << 63; + do + { + if (mask & bit) + return pos; + bit >>= 1; + } while (pos-- > 0); +#endif + return UINT8_MAX; +} + +static inline uint8_t VmaBitScanMSB(uint32_t mask) +{ +#ifdef _MSC_VER + unsigned long pos; + if (_BitScanReverse(&pos, mask)) + return static_cast(pos); +#elif VMA_CPP20 + if(mask) + return 31 - static_cast(std::countl_zero(mask)); +#elif defined __GNUC__ || defined __clang__ + if (mask) + return 31 - static_cast(__builtin_clz(mask)); +#else + uint8_t pos = 31; + uint32_t bit = 1UL << 31; + do + { + if (mask & bit) + return pos; + bit >>= 1; + } while (pos-- > 0); +#endif + return UINT8_MAX; } /* @@ -4468,7 +3523,7 @@ For 0 returns true. template inline bool VmaIsPow2(T x) { - return (x & (x-1)) == 0; + return (x & (x - 1)) == 0; } // Aligns given value up to nearest multiply of align value. For example: VmaAlignUp(11, 8) = 16. @@ -4479,7 +3534,8 @@ static inline T VmaAlignUp(T val, T alignment) VMA_HEAVY_ASSERT(VmaIsPow2(alignment)); return (val + alignment - 1) & ~(alignment - 1); } -// Aligns given value down to nearest multiply of align value. For example: VmaAlignUp(11, 8) = 8. + +// Aligns given value down to nearest multiply of align value. For example: VmaAlignDown(11, 8) = 8. // Use types like uint32_t, uint64_t as T. template static inline T VmaAlignDown(T val, T alignment) @@ -4495,6 +3551,13 @@ static inline T VmaRoundDiv(T x, T y) return (x + (y / (T)2)) / y; } +// Divide by 'y' and round up to nearest integer. +template +static inline T VmaDivideRoundingUp(T x, T y) +{ + return (x + y - (T)1) / y; +} + // Returns smallest power of 2 greater or equal to v. static inline uint32_t VmaNextPow2(uint32_t v) { @@ -4507,6 +3570,7 @@ static inline uint32_t VmaNextPow2(uint32_t v) v++; return v; } + static inline uint64_t VmaNextPow2(uint64_t v) { v--; @@ -4531,6 +3595,7 @@ static inline uint32_t VmaPrevPow2(uint32_t v) v = v ^ (v >> 1); return v; } + static inline uint64_t VmaPrevPow2(uint64_t v) { v |= v >> 1; @@ -4548,66 +3613,6 @@ static inline bool VmaStrIsEmpty(const char* pStr) return pStr == VMA_NULL || *pStr == '\0'; } -#if VMA_STATS_STRING_ENABLED - -static const char* VmaAlgorithmToStr(uint32_t algorithm) -{ - switch(algorithm) - { - case VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT: - return "Linear"; - case VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT: - return "Buddy"; - case 0: - return "Default"; - default: - VMA_ASSERT(0); - return ""; - } -} - -#endif // #if VMA_STATS_STRING_ENABLED - -#ifndef VMA_SORT - -template -Iterator VmaQuickSortPartition(Iterator beg, Iterator end, Compare cmp) -{ - Iterator centerValue = end; --centerValue; - Iterator insertIndex = beg; - for(Iterator memTypeIndex = beg; memTypeIndex < centerValue; ++memTypeIndex) - { - if(cmp(*memTypeIndex, *centerValue)) - { - if(insertIndex != memTypeIndex) - { - VMA_SWAP(*memTypeIndex, *insertIndex); - } - ++insertIndex; - } - } - if(insertIndex != centerValue) - { - VMA_SWAP(*insertIndex, *centerValue); - } - return insertIndex; -} - -template -void VmaQuickSort(Iterator beg, Iterator end, Compare cmp) -{ - if(beg < end) - { - Iterator it = VmaQuickSortPartition(beg, end, cmp); - VmaQuickSort(beg, it, cmp); - VmaQuickSort(it + 1, end, cmp); - } -} - -#define VMA_SORT(beg, end, cmp) VmaQuickSort(beg, end, cmp) - -#endif // #ifndef VMA_SORT - /* Returns true if two memory blocks occupy overlapping pages. ResourceA must be in less memory offset than ResourceB. @@ -4629,17 +3634,6 @@ static inline bool VmaBlocksOnSamePage( return resourceAEndPage == resourceBStartPage; } -enum VmaSuballocationType -{ - VMA_SUBALLOCATION_TYPE_FREE = 0, - VMA_SUBALLOCATION_TYPE_UNKNOWN = 1, - VMA_SUBALLOCATION_TYPE_BUFFER = 2, - VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN = 3, - VMA_SUBALLOCATION_TYPE_IMAGE_LINEAR = 4, - VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL = 5, - VMA_SUBALLOCATION_TYPE_MAX_ENUM = 0x7FFFFFFF -}; - /* Returns true if given suballocation types could conflict and must respect VkPhysicalDeviceLimits::bufferImageGranularity. They conflict if one is buffer @@ -4650,12 +3644,12 @@ static inline bool VmaIsBufferImageGranularityConflict( VmaSuballocationType suballocType1, VmaSuballocationType suballocType2) { - if(suballocType1 > suballocType2) + if (suballocType1 > suballocType2) { - VMA_SWAP(suballocType1, suballocType2); + std::swap(suballocType1, suballocType2); } - switch(suballocType1) + switch (suballocType1) { case VMA_SUBALLOCATION_TYPE_FREE: return false; @@ -4686,7 +3680,7 @@ static void VmaWriteMagicValue(void* pData, VkDeviceSize offset) #if VMA_DEBUG_MARGIN > 0 && VMA_DEBUG_DETECT_CORRUPTION uint32_t* pDst = (uint32_t*)((char*)pData + offset); const size_t numberCount = VMA_DEBUG_MARGIN / sizeof(uint32_t); - for(size_t i = 0; i < numberCount; ++i, ++pDst) + for (size_t i = 0; i < numberCount; ++i, ++pDst) { *pDst = VMA_CORRUPTION_DETECTION_MAGIC_VALUE; } @@ -4700,9 +3694,9 @@ static bool VmaValidateMagicValue(const void* pData, VkDeviceSize offset) #if VMA_DEBUG_MARGIN > 0 && VMA_DEBUG_DETECT_CORRUPTION const uint32_t* pSrc = (const uint32_t*)((const char*)pData + offset); const size_t numberCount = VMA_DEBUG_MARGIN / sizeof(uint32_t); - for(size_t i = 0; i < numberCount; ++i, ++pSrc) + for (size_t i = 0; i < numberCount; ++i, ++pSrc) { - if(*pSrc != VMA_CORRUPTION_DETECTION_MAGIC_VALUE) + if (*pSrc != VMA_CORRUPTION_DETECTION_MAGIC_VALUE) { return false; } @@ -4723,55 +3717,6 @@ static void VmaFillGpuDefragmentationBufferCreateInfo(VkBufferCreateInfo& outBuf outBufCreateInfo.size = (VkDeviceSize)VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE; // Example size. } -// Helper RAII class to lock a mutex in constructor and unlock it in destructor (at the end of scope). -struct VmaMutexLock -{ - VMA_CLASS_NO_COPY(VmaMutexLock) -public: - VmaMutexLock(VMA_MUTEX& mutex, bool useMutex = true) : - m_pMutex(useMutex ? &mutex : VMA_NULL) - { if(m_pMutex) { m_pMutex->Lock(); } } - ~VmaMutexLock() - { if(m_pMutex) { m_pMutex->Unlock(); } } -private: - VMA_MUTEX* m_pMutex; -}; - -// Helper RAII class to lock a RW mutex in constructor and unlock it in destructor (at the end of scope), for reading. -struct VmaMutexLockRead -{ - VMA_CLASS_NO_COPY(VmaMutexLockRead) -public: - VmaMutexLockRead(VMA_RW_MUTEX& mutex, bool useMutex) : - m_pMutex(useMutex ? &mutex : VMA_NULL) - { if(m_pMutex) { m_pMutex->LockRead(); } } - ~VmaMutexLockRead() { if(m_pMutex) { m_pMutex->UnlockRead(); } } -private: - VMA_RW_MUTEX* m_pMutex; -}; - -// Helper RAII class to lock a RW mutex in constructor and unlock it in destructor (at the end of scope), for writing. -struct VmaMutexLockWrite -{ - VMA_CLASS_NO_COPY(VmaMutexLockWrite) -public: - VmaMutexLockWrite(VMA_RW_MUTEX& mutex, bool useMutex) : - m_pMutex(useMutex ? &mutex : VMA_NULL) - { if(m_pMutex) { m_pMutex->LockWrite(); } } - ~VmaMutexLockWrite() { if(m_pMutex) { m_pMutex->UnlockWrite(); } } -private: - VMA_RW_MUTEX* m_pMutex; -}; - -#if VMA_DEBUG_GLOBAL_MUTEX - static VMA_MUTEX gDebugGlobalMutex; - #define VMA_DEBUG_GLOBAL_MUTEX_LOCK VmaMutexLock debugGlobalMutexLock(gDebugGlobalMutex, true); -#else - #define VMA_DEBUG_GLOBAL_MUTEX_LOCK -#endif - -// Minimum size of a free suballocation to register it in the free suballocation collection. -static const VkDeviceSize VMA_MIN_FREE_SUBALLOCATION_SIZE_TO_REGISTER = 16; /* Performs binary search and returns iterator to first element that is greater or @@ -4783,13 +3728,13 @@ Returned value is the found element, if present in the collection or place where new element with value (key) should be inserted. */ template -static IterT VmaBinaryFindFirstNotLess(IterT beg, IterT end, const KeyT &key, const CmpLess& cmp) +static IterT VmaBinaryFindFirstNotLess(IterT beg, IterT end, const KeyT& key, const CmpLess& cmp) { - size_t down = 0, up = (end - beg); - while(down < up) + size_t down = 0, up = size_t(end - beg); + while (down < up) { const size_t mid = down + (up - down) / 2; // Overflow-safe midpoint calculation - if(cmp(*(beg+mid), key)) + if (cmp(*(beg + mid), key)) { down = mid + 1; } @@ -4806,7 +3751,7 @@ IterT VmaBinaryFindSorted(const IterT& beg, const IterT& end, const KeyT& value, { IterT it = VmaBinaryFindFirstNotLess( beg, end, value, cmp); - if(it == end || + if (it == end || (!cmp(*it, value) && !cmp(value, *it))) { return it; @@ -4822,16 +3767,16 @@ T must be pointer type, e.g. VmaAllocation, VmaPool. template static bool VmaValidatePointerArray(uint32_t count, const T* arr) { - for(uint32_t i = 0; i < count; ++i) + for (uint32_t i = 0; i < count; ++i) { const T iPtr = arr[i]; - if(iPtr == VMA_NULL) + if (iPtr == VMA_NULL) { return false; } - for(uint32_t j = i + 1; j < count; ++j) + for (uint32_t j = i + 1; j < count; ++j) { - if(iPtr == arr[j]) + if (iPtr == arr[j]) { return false; } @@ -4846,6 +3791,239 @@ static inline void VmaPnextChainPushFront(MainT* mainStruct, NewT* newStruct) newStruct->pNext = mainStruct->pNext; mainStruct->pNext = newStruct; } +// Finds structure with s->sType == sType in mainStruct->pNext chain. +// Returns pointer to it. If not found, returns null. +template +static inline const FindT* VmaPnextChainFind(const MainT* mainStruct, VkStructureType sType) +{ + for(const VkBaseInStructure* s = (const VkBaseInStructure*)mainStruct->pNext; + s != VMA_NULL; s = s->pNext) + { + if(s->sType == sType) + { + return (const FindT*)s; + } + } + return VMA_NULL; +} + +// An abstraction over buffer or image `usage` flags, depending on available extensions. +struct VmaBufferImageUsage +{ +#if VMA_KHR_MAINTENANCE5 + typedef uint64_t BaseType; // VkFlags64 +#else + typedef uint32_t BaseType; // VkFlags32 +#endif + + static const VmaBufferImageUsage UNKNOWN; + + BaseType Value; + + VmaBufferImageUsage() { *this = UNKNOWN; } + explicit VmaBufferImageUsage(BaseType usage) : Value(usage) { } + VmaBufferImageUsage(const VkBufferCreateInfo &createInfo, bool useKhrMaintenance5); + explicit VmaBufferImageUsage(const VkImageCreateInfo &createInfo); + + bool operator==(const VmaBufferImageUsage& rhs) const { return Value == rhs.Value; } + bool operator!=(const VmaBufferImageUsage& rhs) const { return Value != rhs.Value; } + + bool Contains(BaseType flag) const { return (Value & flag) != 0; } + bool ContainsDeviceAccess() const + { + // This relies on values of VK_IMAGE_USAGE_TRANSFER* being the same as VK_BUFFER_IMAGE_TRANSFER*. + return (Value & ~BaseType(VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT)) != 0; + } +}; + +const VmaBufferImageUsage VmaBufferImageUsage::UNKNOWN = VmaBufferImageUsage(0); + +static void swap(VmaBufferImageUsage& lhs, VmaBufferImageUsage& rhs) noexcept +{ + using std::swap; + swap(lhs.Value, rhs.Value); +} + +VmaBufferImageUsage::VmaBufferImageUsage(const VkBufferCreateInfo &createInfo, + bool useKhrMaintenance5) +{ +#if VMA_KHR_MAINTENANCE5 + if(useKhrMaintenance5) + { + // If VkBufferCreateInfo::pNext chain contains VkBufferUsageFlags2CreateInfoKHR, + // take usage from it and ignore VkBufferCreateInfo::usage, per specification + // of the VK_KHR_maintenance5 extension. + const VkBufferUsageFlags2CreateInfoKHR* const usageFlags2 = + VmaPnextChainFind(&createInfo, VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO_KHR); + if(usageFlags2) + { + this->Value = usageFlags2->usage; + return; + } + } +#endif + + this->Value = (BaseType)createInfo.usage; +} + +VmaBufferImageUsage::VmaBufferImageUsage(const VkImageCreateInfo &createInfo) +{ + // Maybe in the future there will be VK_KHR_maintenanceN extension with structure + // VkImageUsageFlags2CreateInfoKHR, like the one for buffers... + + this->Value = (BaseType)createInfo.usage; +} + +// This is the main algorithm that guides the selection of a memory type best for an allocation - +// converts usage to required/preferred/not preferred flags. +static bool FindMemoryPreferences( + bool isIntegratedGPU, + const VmaAllocationCreateInfo& allocCreateInfo, + VmaBufferImageUsage bufImgUsage, + VkMemoryPropertyFlags& outRequiredFlags, + VkMemoryPropertyFlags& outPreferredFlags, + VkMemoryPropertyFlags& outNotPreferredFlags) +{ + outRequiredFlags = allocCreateInfo.requiredFlags; + outPreferredFlags = allocCreateInfo.preferredFlags; + outNotPreferredFlags = 0; + + switch(allocCreateInfo.usage) + { + case VMA_MEMORY_USAGE_UNKNOWN: + break; + case VMA_MEMORY_USAGE_GPU_ONLY: + if(!isIntegratedGPU || (outPreferredFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0) + { + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + break; + case VMA_MEMORY_USAGE_CPU_ONLY: + outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + break; + case VMA_MEMORY_USAGE_CPU_TO_GPU: + outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + if(!isIntegratedGPU || (outPreferredFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0) + { + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + break; + case VMA_MEMORY_USAGE_GPU_TO_CPU: + outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + outPreferredFlags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + break; + case VMA_MEMORY_USAGE_CPU_COPY: + outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + break; + case VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED: + outRequiredFlags |= VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT; + break; + case VMA_MEMORY_USAGE_AUTO: + case VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE: + case VMA_MEMORY_USAGE_AUTO_PREFER_HOST: + { + if(bufImgUsage == VmaBufferImageUsage::UNKNOWN) + { + VMA_ASSERT(0 && "VMA_MEMORY_USAGE_AUTO* values can only be used with functions like vmaCreateBuffer, vmaCreateImage so that the details of the created resource are known." + " Maybe you use VkBufferUsageFlags2CreateInfoKHR but forgot to use VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT?" ); + return false; + } + + const bool deviceAccess = bufImgUsage.ContainsDeviceAccess(); + const bool hostAccessSequentialWrite = (allocCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT) != 0; + const bool hostAccessRandom = (allocCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT) != 0; + const bool hostAccessAllowTransferInstead = (allocCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT) != 0; + const bool preferDevice = allocCreateInfo.usage == VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; + const bool preferHost = allocCreateInfo.usage == VMA_MEMORY_USAGE_AUTO_PREFER_HOST; + + // CPU random access - e.g. a buffer written to or transferred from GPU to read back on CPU. + if(hostAccessRandom) + { + // Prefer cached. Cannot require it, because some platforms don't have it (e.g. Raspberry Pi - see #362)! + outPreferredFlags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + + if (!isIntegratedGPU && deviceAccess && hostAccessAllowTransferInstead && !preferHost) + { + // Nice if it will end up in HOST_VISIBLE, but more importantly prefer DEVICE_LOCAL. + // Omitting HOST_VISIBLE here is intentional. + // In case there is DEVICE_LOCAL | HOST_VISIBLE | HOST_CACHED, it will pick that one. + // Otherwise, this will give same weight to DEVICE_LOCAL as HOST_VISIBLE | HOST_CACHED and select the former if occurs first on the list. + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + else + { + // Always CPU memory. + outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + } + } + // CPU sequential write - may be CPU or host-visible GPU memory, uncached and write-combined. + else if(hostAccessSequentialWrite) + { + // Want uncached and write-combined. + outNotPreferredFlags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + + if(!isIntegratedGPU && deviceAccess && hostAccessAllowTransferInstead && !preferHost) + { + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + } + else + { + outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + // Direct GPU access, CPU sequential write (e.g. a dynamic uniform buffer updated every frame) + if(deviceAccess) + { + // Could go to CPU memory or GPU BAR/unified. Up to the user to decide. If no preference, choose GPU memory. + if(preferHost) + outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + else + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + // GPU no direct access, CPU sequential write (e.g. an upload buffer to be transferred to the GPU) + else + { + // Could go to CPU memory or GPU BAR/unified. Up to the user to decide. If no preference, choose CPU memory. + if(preferDevice) + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + else + outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + } + } + // No CPU access + else + { + // if(deviceAccess) + // + // GPU access, no CPU access (e.g. a color attachment image) - prefer GPU memory, + // unless there is a clear preference from the user not to do so. + // + // else: + // + // No direct GPU access, no CPU access, just transfers. + // It may be staging copy intended for e.g. preserving image for next frame (then better GPU memory) or + // a "swap file" copy to free some GPU memory (then better CPU memory). + // Up to the user to decide. If no preferece, assume the former and choose GPU memory. + + if(preferHost) + outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + else + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + break; + } + default: + VMA_ASSERT(0); + } + + // Avoid DEVICE_COHERENT unless explicitly requested. + if(((allocCreateInfo.requiredFlags | allocCreateInfo.preferredFlags) & + (VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY | VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY)) == 0) + { + outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY; + } + + return true; +} //////////////////////////////////////////////////////////////////////////////// // Memory allocation @@ -4853,7 +4031,7 @@ static inline void VmaPnextChainPushFront(MainT* mainStruct, NewT* newStruct) static void* VmaMalloc(const VkAllocationCallbacks* pAllocationCallbacks, size_t size, size_t alignment) { void* result = VMA_NULL; - if((pAllocationCallbacks != VMA_NULL) && + if ((pAllocationCallbacks != VMA_NULL) && (pAllocationCallbacks->pfnAllocation != VMA_NULL)) { result = (*pAllocationCallbacks->pfnAllocation)( @@ -4872,7 +4050,7 @@ static void* VmaMalloc(const VkAllocationCallbacks* pAllocationCallbacks, size_t static void VmaFree(const VkAllocationCallbacks* pAllocationCallbacks, void* ptr) { - if((pAllocationCallbacks != VMA_NULL) && + if ((pAllocationCallbacks != VMA_NULL) && (pAllocationCallbacks->pfnFree != VMA_NULL)) { (*pAllocationCallbacks->pfnFree)(pAllocationCallbacks->pUserData, ptr); @@ -4909,9 +4087,9 @@ static void vma_delete(const VkAllocationCallbacks* pAllocationCallbacks, T* ptr template static void vma_delete_array(const VkAllocationCallbacks* pAllocationCallbacks, T* ptr, size_t count) { - if(ptr != VMA_NULL) + if (ptr != VMA_NULL) { - for(size_t i = count; i--; ) + for (size_t i = count; i--; ) { ptr[i].~T(); } @@ -4921,299 +4099,39 @@ static void vma_delete_array(const VkAllocationCallbacks* pAllocationCallbacks, static char* VmaCreateStringCopy(const VkAllocationCallbacks* allocs, const char* srcStr) { - if(srcStr != VMA_NULL) + if (srcStr != VMA_NULL) { const size_t len = strlen(srcStr); char* const result = vma_new_array(allocs, char, len + 1); memcpy(result, srcStr, len + 1); return result; } - else - { - return VMA_NULL; - } + return VMA_NULL; } +#if VMA_STATS_STRING_ENABLED +static char* VmaCreateStringCopy(const VkAllocationCallbacks* allocs, const char* srcStr, size_t strLen) +{ + if (srcStr != VMA_NULL) + { + char* const result = vma_new_array(allocs, char, strLen + 1); + memcpy(result, srcStr, strLen); + result[strLen] = '\0'; + return result; + } + return VMA_NULL; +} +#endif // VMA_STATS_STRING_ENABLED + static void VmaFreeString(const VkAllocationCallbacks* allocs, char* str) { - if(str != VMA_NULL) + if (str != VMA_NULL) { const size_t len = strlen(str); vma_delete_array(allocs, str, len + 1); } } -// STL-compatible allocator. -template -class VmaStlAllocator -{ -public: - const VkAllocationCallbacks* const m_pCallbacks; - typedef T value_type; - - VmaStlAllocator(const VkAllocationCallbacks* pCallbacks) : m_pCallbacks(pCallbacks) { } - template VmaStlAllocator(const VmaStlAllocator& src) : m_pCallbacks(src.m_pCallbacks) { } - - T* allocate(size_t n) { return VmaAllocateArray(m_pCallbacks, n); } - void deallocate(T* p, size_t n) { VmaFree(m_pCallbacks, p); } - - template - bool operator==(const VmaStlAllocator& rhs) const - { - return m_pCallbacks == rhs.m_pCallbacks; - } - template - bool operator!=(const VmaStlAllocator& rhs) const - { - return m_pCallbacks != rhs.m_pCallbacks; - } - - VmaStlAllocator& operator=(const VmaStlAllocator& x) = delete; -}; - -#if VMA_USE_STL_VECTOR - -#define VmaVector std::vector - -template -static void VmaVectorInsert(std::vector& vec, size_t index, const T& item) -{ - vec.insert(vec.begin() + index, item); -} - -template -static void VmaVectorRemove(std::vector& vec, size_t index) -{ - vec.erase(vec.begin() + index); -} - -#else // #if VMA_USE_STL_VECTOR - -/* Class with interface compatible with subset of std::vector. -T must be POD because constructors and destructors are not called and memcpy is -used for these objects. */ -template -class VmaVector -{ -public: - typedef T value_type; - - VmaVector(const AllocatorT& allocator) : - m_Allocator(allocator), - m_pArray(VMA_NULL), - m_Count(0), - m_Capacity(0) - { - } - - VmaVector(size_t count, const AllocatorT& allocator) : - m_Allocator(allocator), - m_pArray(count ? (T*)VmaAllocateArray(allocator.m_pCallbacks, count) : VMA_NULL), - m_Count(count), - m_Capacity(count) - { - } - - // This version of the constructor is here for compatibility with pre-C++14 std::vector. - // value is unused. - VmaVector(size_t count, const T& value, const AllocatorT& allocator) - : VmaVector(count, allocator) {} - - VmaVector(const VmaVector& src) : - m_Allocator(src.m_Allocator), - m_pArray(src.m_Count ? (T*)VmaAllocateArray(src.m_Allocator.m_pCallbacks, src.m_Count) : VMA_NULL), - m_Count(src.m_Count), - m_Capacity(src.m_Count) - { - if(m_Count != 0) - { - memcpy(m_pArray, src.m_pArray, m_Count * sizeof(T)); - } - } - - ~VmaVector() - { - VmaFree(m_Allocator.m_pCallbacks, m_pArray); - } - - VmaVector& operator=(const VmaVector& rhs) - { - if(&rhs != this) - { - resize(rhs.m_Count); - if(m_Count != 0) - { - memcpy(m_pArray, rhs.m_pArray, m_Count * sizeof(T)); - } - } - return *this; - } - - bool empty() const { return m_Count == 0; } - size_t size() const { return m_Count; } - T* data() { return m_pArray; } - const T* data() const { return m_pArray; } - - T& operator[](size_t index) - { - VMA_HEAVY_ASSERT(index < m_Count); - return m_pArray[index]; - } - const T& operator[](size_t index) const - { - VMA_HEAVY_ASSERT(index < m_Count); - return m_pArray[index]; - } - - T& front() - { - VMA_HEAVY_ASSERT(m_Count > 0); - return m_pArray[0]; - } - const T& front() const - { - VMA_HEAVY_ASSERT(m_Count > 0); - return m_pArray[0]; - } - T& back() - { - VMA_HEAVY_ASSERT(m_Count > 0); - return m_pArray[m_Count - 1]; - } - const T& back() const - { - VMA_HEAVY_ASSERT(m_Count > 0); - return m_pArray[m_Count - 1]; - } - - void reserve(size_t newCapacity, bool freeMemory = false) - { - newCapacity = VMA_MAX(newCapacity, m_Count); - - if((newCapacity < m_Capacity) && !freeMemory) - { - newCapacity = m_Capacity; - } - - if(newCapacity != m_Capacity) - { - T* const newArray = newCapacity ? VmaAllocateArray(m_Allocator, newCapacity) : VMA_NULL; - if(m_Count != 0) - { - memcpy(newArray, m_pArray, m_Count * sizeof(T)); - } - VmaFree(m_Allocator.m_pCallbacks, m_pArray); - m_Capacity = newCapacity; - m_pArray = newArray; - } - } - - void resize(size_t newCount, bool freeMemory = false) - { - size_t newCapacity = m_Capacity; - if(newCount > m_Capacity) - { - newCapacity = VMA_MAX(newCount, VMA_MAX(m_Capacity * 3 / 2, (size_t)8)); - } - else if(freeMemory) - { - newCapacity = newCount; - } - - if(newCapacity != m_Capacity) - { - T* const newArray = newCapacity ? VmaAllocateArray(m_Allocator.m_pCallbacks, newCapacity) : VMA_NULL; - const size_t elementsToCopy = VMA_MIN(m_Count, newCount); - if(elementsToCopy != 0) - { - memcpy(newArray, m_pArray, elementsToCopy * sizeof(T)); - } - VmaFree(m_Allocator.m_pCallbacks, m_pArray); - m_Capacity = newCapacity; - m_pArray = newArray; - } - - m_Count = newCount; - } - - void clear(bool freeMemory = false) - { - resize(0, freeMemory); - } - - void insert(size_t index, const T& src) - { - VMA_HEAVY_ASSERT(index <= m_Count); - const size_t oldCount = size(); - resize(oldCount + 1); - if(index < oldCount) - { - memmove(m_pArray + (index + 1), m_pArray + index, (oldCount - index) * sizeof(T)); - } - m_pArray[index] = src; - } - - void remove(size_t index) - { - VMA_HEAVY_ASSERT(index < m_Count); - const size_t oldCount = size(); - if(index < oldCount - 1) - { - memmove(m_pArray + index, m_pArray + (index + 1), (oldCount - index - 1) * sizeof(T)); - } - resize(oldCount - 1); - } - - void push_back(const T& src) - { - const size_t newIndex = size(); - resize(newIndex + 1); - m_pArray[newIndex] = src; - } - - void pop_back() - { - VMA_HEAVY_ASSERT(m_Count > 0); - resize(size() - 1); - } - - void push_front(const T& src) - { - insert(0, src); - } - - void pop_front() - { - VMA_HEAVY_ASSERT(m_Count > 0); - remove(0); - } - - typedef T* iterator; - - iterator begin() { return m_pArray; } - iterator end() { return m_pArray + m_Count; } - -private: - AllocatorT m_Allocator; - T* m_pArray; - size_t m_Count; - size_t m_Capacity; -}; - -template -static void VmaVectorInsert(VmaVector& vec, size_t index, const T& item) -{ - vec.insert(index, item); -} - -template -static void VmaVectorRemove(VmaVector& vec, size_t index) -{ - vec.remove(index); -} - -#endif // #if VMA_USE_STL_VECTOR - template size_t VmaVectorInsertSorted(VectorT& vector, const typename VectorT::value_type& value) { @@ -5235,7 +4153,7 @@ bool VmaVectorRemoveSorted(VectorT& vector, const typename VectorT::value_type& vector.end(), value, comparator); - if((it != vector.end()) && !comparator(*it, value) && !comparator(value, *it)) + if ((it != vector.end()) && !comparator(*it, value) && !comparator(value, *it)) { size_t indexToRemove = it - vector.begin(); VmaVectorRemove(vector, indexToRemove); @@ -5243,10 +4161,390 @@ bool VmaVectorRemoveSorted(VectorT& vector, const typename VectorT::value_type& } return false; } +#endif // _VMA_FUNCTIONS -//////////////////////////////////////////////////////////////////////////////// -// class VmaSmallVector +#ifndef _VMA_STATISTICS_FUNCTIONS +static void VmaClearStatistics(VmaStatistics& outStats) +{ + outStats.blockCount = 0; + outStats.allocationCount = 0; + outStats.blockBytes = 0; + outStats.allocationBytes = 0; +} + +static void VmaAddStatistics(VmaStatistics& inoutStats, const VmaStatistics& src) +{ + inoutStats.blockCount += src.blockCount; + inoutStats.allocationCount += src.allocationCount; + inoutStats.blockBytes += src.blockBytes; + inoutStats.allocationBytes += src.allocationBytes; +} + +static void VmaClearDetailedStatistics(VmaDetailedStatistics& outStats) +{ + VmaClearStatistics(outStats.statistics); + outStats.unusedRangeCount = 0; + outStats.allocationSizeMin = VK_WHOLE_SIZE; + outStats.allocationSizeMax = 0; + outStats.unusedRangeSizeMin = VK_WHOLE_SIZE; + outStats.unusedRangeSizeMax = 0; +} + +static void VmaAddDetailedStatisticsAllocation(VmaDetailedStatistics& inoutStats, VkDeviceSize size) +{ + inoutStats.statistics.allocationCount++; + inoutStats.statistics.allocationBytes += size; + inoutStats.allocationSizeMin = VMA_MIN(inoutStats.allocationSizeMin, size); + inoutStats.allocationSizeMax = VMA_MAX(inoutStats.allocationSizeMax, size); +} + +static void VmaAddDetailedStatisticsUnusedRange(VmaDetailedStatistics& inoutStats, VkDeviceSize size) +{ + inoutStats.unusedRangeCount++; + inoutStats.unusedRangeSizeMin = VMA_MIN(inoutStats.unusedRangeSizeMin, size); + inoutStats.unusedRangeSizeMax = VMA_MAX(inoutStats.unusedRangeSizeMax, size); +} + +static void VmaAddDetailedStatistics(VmaDetailedStatistics& inoutStats, const VmaDetailedStatistics& src) +{ + VmaAddStatistics(inoutStats.statistics, src.statistics); + inoutStats.unusedRangeCount += src.unusedRangeCount; + inoutStats.allocationSizeMin = VMA_MIN(inoutStats.allocationSizeMin, src.allocationSizeMin); + inoutStats.allocationSizeMax = VMA_MAX(inoutStats.allocationSizeMax, src.allocationSizeMax); + inoutStats.unusedRangeSizeMin = VMA_MIN(inoutStats.unusedRangeSizeMin, src.unusedRangeSizeMin); + inoutStats.unusedRangeSizeMax = VMA_MAX(inoutStats.unusedRangeSizeMax, src.unusedRangeSizeMax); +} + +#endif // _VMA_STATISTICS_FUNCTIONS + +#ifndef _VMA_MUTEX_LOCK +// Helper RAII class to lock a mutex in constructor and unlock it in destructor (at the end of scope). +struct VmaMutexLock +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaMutexLock) +public: + VmaMutexLock(VMA_MUTEX& mutex, bool useMutex = true) : + m_pMutex(useMutex ? &mutex : VMA_NULL) + { + if (m_pMutex) { m_pMutex->Lock(); } + } + ~VmaMutexLock() { if (m_pMutex) { m_pMutex->Unlock(); } } + +private: + VMA_MUTEX* m_pMutex; +}; + +// Helper RAII class to lock a RW mutex in constructor and unlock it in destructor (at the end of scope), for reading. +struct VmaMutexLockRead +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaMutexLockRead) +public: + VmaMutexLockRead(VMA_RW_MUTEX& mutex, bool useMutex) : + m_pMutex(useMutex ? &mutex : VMA_NULL) + { + if (m_pMutex) { m_pMutex->LockRead(); } + } + ~VmaMutexLockRead() { if (m_pMutex) { m_pMutex->UnlockRead(); } } + +private: + VMA_RW_MUTEX* m_pMutex; +}; + +// Helper RAII class to lock a RW mutex in constructor and unlock it in destructor (at the end of scope), for writing. +struct VmaMutexLockWrite +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaMutexLockWrite) +public: + VmaMutexLockWrite(VMA_RW_MUTEX& mutex, bool useMutex) + : m_pMutex(useMutex ? &mutex : VMA_NULL) + { + if (m_pMutex) { m_pMutex->LockWrite(); } + } + ~VmaMutexLockWrite() { if (m_pMutex) { m_pMutex->UnlockWrite(); } } + +private: + VMA_RW_MUTEX* m_pMutex; +}; + +#if VMA_DEBUG_GLOBAL_MUTEX + static VMA_MUTEX gDebugGlobalMutex; + #define VMA_DEBUG_GLOBAL_MUTEX_LOCK VmaMutexLock debugGlobalMutexLock(gDebugGlobalMutex, true); +#else + #define VMA_DEBUG_GLOBAL_MUTEX_LOCK +#endif +#endif // _VMA_MUTEX_LOCK + +#ifndef _VMA_ATOMIC_TRANSACTIONAL_INCREMENT +// An object that increments given atomic but decrements it back in the destructor unless Commit() is called. +template +struct AtomicTransactionalIncrement +{ +public: + using T = decltype(AtomicT().load()); + + ~AtomicTransactionalIncrement() + { + if(m_Atomic) + --(*m_Atomic); + } + + void Commit() { m_Atomic = VMA_NULL; } + T Increment(AtomicT* atomic) + { + m_Atomic = atomic; + return m_Atomic->fetch_add(1); + } + +private: + AtomicT* m_Atomic = VMA_NULL; +}; +#endif // _VMA_ATOMIC_TRANSACTIONAL_INCREMENT + +#ifndef _VMA_STL_ALLOCATOR +// STL-compatible allocator. +template +struct VmaStlAllocator +{ + const VkAllocationCallbacks* const m_pCallbacks; + typedef T value_type; + + VmaStlAllocator(const VkAllocationCallbacks* pCallbacks) : m_pCallbacks(pCallbacks) {} + template + VmaStlAllocator(const VmaStlAllocator& src) : m_pCallbacks(src.m_pCallbacks) {} + VmaStlAllocator(const VmaStlAllocator&) = default; + VmaStlAllocator& operator=(const VmaStlAllocator&) = delete; + + T* allocate(size_t n) { return VmaAllocateArray(m_pCallbacks, n); } + void deallocate(T* p, size_t n) { VmaFree(m_pCallbacks, p); } + + template + bool operator==(const VmaStlAllocator& rhs) const + { + return m_pCallbacks == rhs.m_pCallbacks; + } + template + bool operator!=(const VmaStlAllocator& rhs) const + { + return m_pCallbacks != rhs.m_pCallbacks; + } +}; +#endif // _VMA_STL_ALLOCATOR + +#ifndef _VMA_VECTOR +/* Class with interface compatible with subset of std::vector. +T must be POD because constructors and destructors are not called and memcpy is +used for these objects. */ +template +class VmaVector +{ +public: + typedef T value_type; + typedef T* iterator; + typedef const T* const_iterator; + + VmaVector(const AllocatorT& allocator); + VmaVector(size_t count, const AllocatorT& allocator); + // This version of the constructor is here for compatibility with pre-C++14 std::vector. + // value is unused. + VmaVector(size_t count, const T& value, const AllocatorT& allocator) : VmaVector(count, allocator) {} + VmaVector(const VmaVector& src); + VmaVector& operator=(const VmaVector& rhs); + ~VmaVector() { VmaFree(m_Allocator.m_pCallbacks, m_pArray); } + + bool empty() const { return m_Count == 0; } + size_t size() const { return m_Count; } + T* data() { return m_pArray; } + T& front() { VMA_HEAVY_ASSERT(m_Count > 0); return m_pArray[0]; } + T& back() { VMA_HEAVY_ASSERT(m_Count > 0); return m_pArray[m_Count - 1]; } + const T* data() const { return m_pArray; } + const T& front() const { VMA_HEAVY_ASSERT(m_Count > 0); return m_pArray[0]; } + const T& back() const { VMA_HEAVY_ASSERT(m_Count > 0); return m_pArray[m_Count - 1]; } + + iterator begin() { return m_pArray; } + iterator end() { return m_pArray + m_Count; } + const_iterator cbegin() const { return m_pArray; } + const_iterator cend() const { return m_pArray + m_Count; } + const_iterator begin() const { return cbegin(); } + const_iterator end() const { return cend(); } + + void pop_front() { VMA_HEAVY_ASSERT(m_Count > 0); remove(0); } + void pop_back() { VMA_HEAVY_ASSERT(m_Count > 0); resize(size() - 1); } + void push_front(const T& src) { insert(0, src); } + + void push_back(const T& src); + void reserve(size_t newCapacity, bool freeMemory = false); + void resize(size_t newCount); + void clear() { resize(0); } + void shrink_to_fit(); + void insert(size_t index, const T& src); + void remove(size_t index); + + T& operator[](size_t index) { VMA_HEAVY_ASSERT(index < m_Count); return m_pArray[index]; } + const T& operator[](size_t index) const { VMA_HEAVY_ASSERT(index < m_Count); return m_pArray[index]; } + +private: + AllocatorT m_Allocator; + T* m_pArray; + size_t m_Count; + size_t m_Capacity; +}; + +#ifndef _VMA_VECTOR_FUNCTIONS +template +VmaVector::VmaVector(const AllocatorT& allocator) + : m_Allocator(allocator), + m_pArray(VMA_NULL), + m_Count(0), + m_Capacity(0) {} + +template +VmaVector::VmaVector(size_t count, const AllocatorT& allocator) + : m_Allocator(allocator), + m_pArray(count ? (T*)VmaAllocateArray(allocator.m_pCallbacks, count) : VMA_NULL), + m_Count(count), + m_Capacity(count) {} + +template +VmaVector::VmaVector(const VmaVector& src) + : m_Allocator(src.m_Allocator), + m_pArray(src.m_Count ? (T*)VmaAllocateArray(src.m_Allocator.m_pCallbacks, src.m_Count) : VMA_NULL), + m_Count(src.m_Count), + m_Capacity(src.m_Count) +{ + if (m_Count != 0) + { + memcpy(m_pArray, src.m_pArray, m_Count * sizeof(T)); + } +} + +template +VmaVector& VmaVector::operator=(const VmaVector& rhs) +{ + if (&rhs != this) + { + resize(rhs.m_Count); + if (m_Count != 0) + { + memcpy(m_pArray, rhs.m_pArray, m_Count * sizeof(T)); + } + } + return *this; +} + +template +void VmaVector::push_back(const T& src) +{ + const size_t newIndex = size(); + resize(newIndex + 1); + m_pArray[newIndex] = src; +} + +template +void VmaVector::reserve(size_t newCapacity, bool freeMemory) +{ + newCapacity = VMA_MAX(newCapacity, m_Count); + + if ((newCapacity < m_Capacity) && !freeMemory) + { + newCapacity = m_Capacity; + } + + if (newCapacity != m_Capacity) + { + T* const newArray = newCapacity ? VmaAllocateArray(m_Allocator, newCapacity) : VMA_NULL; + if (m_Count != 0) + { + memcpy(newArray, m_pArray, m_Count * sizeof(T)); + } + VmaFree(m_Allocator.m_pCallbacks, m_pArray); + m_Capacity = newCapacity; + m_pArray = newArray; + } +} + +template +void VmaVector::resize(size_t newCount) +{ + size_t newCapacity = m_Capacity; + if (newCount > m_Capacity) + { + newCapacity = VMA_MAX(newCount, VMA_MAX(m_Capacity * 3 / 2, (size_t)8)); + } + + if (newCapacity != m_Capacity) + { + T* const newArray = newCapacity ? VmaAllocateArray(m_Allocator.m_pCallbacks, newCapacity) : VMA_NULL; + const size_t elementsToCopy = VMA_MIN(m_Count, newCount); + if (elementsToCopy != 0) + { + memcpy(newArray, m_pArray, elementsToCopy * sizeof(T)); + } + VmaFree(m_Allocator.m_pCallbacks, m_pArray); + m_Capacity = newCapacity; + m_pArray = newArray; + } + + m_Count = newCount; +} + +template +void VmaVector::shrink_to_fit() +{ + if (m_Capacity > m_Count) + { + T* newArray = VMA_NULL; + if (m_Count > 0) + { + newArray = VmaAllocateArray(m_Allocator.m_pCallbacks, m_Count); + memcpy(newArray, m_pArray, m_Count * sizeof(T)); + } + VmaFree(m_Allocator.m_pCallbacks, m_pArray); + m_Capacity = m_Count; + m_pArray = newArray; + } +} + +template +void VmaVector::insert(size_t index, const T& src) +{ + VMA_HEAVY_ASSERT(index <= m_Count); + const size_t oldCount = size(); + resize(oldCount + 1); + if (index < oldCount) + { + memmove(m_pArray + (index + 1), m_pArray + index, (oldCount - index) * sizeof(T)); + } + m_pArray[index] = src; +} + +template +void VmaVector::remove(size_t index) +{ + VMA_HEAVY_ASSERT(index < m_Count); + const size_t oldCount = size(); + if (index < oldCount - 1) + { + memmove(m_pArray + index, m_pArray + (index + 1), (oldCount - index - 1) * sizeof(T)); + } + resize(oldCount - 1); +} +#endif // _VMA_VECTOR_FUNCTIONS + +template +static void VmaVectorInsert(VmaVector& vec, size_t index, const T& item) +{ + vec.insert(index, item); +} + +template +static void VmaVectorRemove(VmaVector& vec, size_t index) +{ + vec.remove(index); +} +#endif // _VMA_VECTOR + +#ifndef _VMA_SMALL_VECTOR /* This is a vector (a variable-sized array), optimized for the case when the array is small. @@ -5254,168 +4552,155 @@ It contains some number of elements in-place, which allows it to avoid heap allo when the actual number of elements is below that threshold. This allows normal "small" cases to be fast without losing generality for large inputs. */ - template class VmaSmallVector { public: typedef T value_type; + typedef T* iterator; - VmaSmallVector(const AllocatorT& allocator) : - m_Count(0), - m_DynamicArray(allocator) - { - } - VmaSmallVector(size_t count, const AllocatorT& allocator) : - m_Count(count), - m_DynamicArray(count > N ? count : 0, allocator) - { - } + VmaSmallVector(const AllocatorT& allocator); + VmaSmallVector(size_t count, const AllocatorT& allocator); template - VmaSmallVector(const VmaSmallVector& src) = delete; + VmaSmallVector(const VmaSmallVector&) = delete; template - VmaSmallVector& operator=(const VmaSmallVector& rhs) = delete; + VmaSmallVector& operator=(const VmaSmallVector&) = delete; + ~VmaSmallVector() = default; bool empty() const { return m_Count == 0; } size_t size() const { return m_Count; } T* data() { return m_Count > N ? m_DynamicArray.data() : m_StaticArray; } + T& front() { VMA_HEAVY_ASSERT(m_Count > 0); return data()[0]; } + T& back() { VMA_HEAVY_ASSERT(m_Count > 0); return data()[m_Count - 1]; } const T* data() const { return m_Count > N ? m_DynamicArray.data() : m_StaticArray; } - - T& operator[](size_t index) - { - VMA_HEAVY_ASSERT(index < m_Count); - return data()[index]; - } - const T& operator[](size_t index) const - { - VMA_HEAVY_ASSERT(index < m_Count); - return data()[index]; - } - - T& front() - { - VMA_HEAVY_ASSERT(m_Count > 0); - return data()[0]; - } - const T& front() const - { - VMA_HEAVY_ASSERT(m_Count > 0); - return data()[0]; - } - T& back() - { - VMA_HEAVY_ASSERT(m_Count > 0); - return data()[m_Count - 1]; - } - const T& back() const - { - VMA_HEAVY_ASSERT(m_Count > 0); - return data()[m_Count - 1]; - } - - void resize(size_t newCount, bool freeMemory = false) - { - if(newCount > N && m_Count > N) - { - // Any direction, staying in m_DynamicArray - m_DynamicArray.resize(newCount, freeMemory); - } - else if(newCount > N && m_Count <= N) - { - // Growing, moving from m_StaticArray to m_DynamicArray - m_DynamicArray.resize(newCount, freeMemory); - if(m_Count > 0) - { - memcpy(m_DynamicArray.data(), m_StaticArray, m_Count * sizeof(T)); - } - } - else if(newCount <= N && m_Count > N) - { - // Shrinking, moving from m_DynamicArray to m_StaticArray - if(newCount > 0) - { - memcpy(m_StaticArray, m_DynamicArray.data(), newCount * sizeof(T)); - } - m_DynamicArray.resize(0, freeMemory); - } - else - { - // Any direction, staying in m_StaticArray - nothing to do here - } - m_Count = newCount; - } - - void clear(bool freeMemory = false) - { - m_DynamicArray.clear(freeMemory); - m_Count = 0; - } - - void insert(size_t index, const T& src) - { - VMA_HEAVY_ASSERT(index <= m_Count); - const size_t oldCount = size(); - resize(oldCount + 1); - T* const dataPtr = data(); - if(index < oldCount) - { - // I know, this could be more optimal for case where memmove can be memcpy directly from m_StaticArray to m_DynamicArray. - memmove(dataPtr + (index + 1), dataPtr + index, (oldCount - index) * sizeof(T)); - } - dataPtr[index] = src; - } - - void remove(size_t index) - { - VMA_HEAVY_ASSERT(index < m_Count); - const size_t oldCount = size(); - if(index < oldCount - 1) - { - // I know, this could be more optimal for case where memmove can be memcpy directly from m_DynamicArray to m_StaticArray. - T* const dataPtr = data(); - memmove(dataPtr + index, dataPtr + (index + 1), (oldCount - index - 1) * sizeof(T)); - } - resize(oldCount - 1); - } - - void push_back(const T& src) - { - const size_t newIndex = size(); - resize(newIndex + 1); - data()[newIndex] = src; - } - - void pop_back() - { - VMA_HEAVY_ASSERT(m_Count > 0); - resize(size() - 1); - } - - void push_front(const T& src) - { - insert(0, src); - } - - void pop_front() - { - VMA_HEAVY_ASSERT(m_Count > 0); - remove(0); - } - - typedef T* iterator; + const T& front() const { VMA_HEAVY_ASSERT(m_Count > 0); return data()[0]; } + const T& back() const { VMA_HEAVY_ASSERT(m_Count > 0); return data()[m_Count - 1]; } iterator begin() { return data(); } iterator end() { return data() + m_Count; } + void pop_front() { VMA_HEAVY_ASSERT(m_Count > 0); remove(0); } + void pop_back() { VMA_HEAVY_ASSERT(m_Count > 0); resize(size() - 1); } + void push_front(const T& src) { insert(0, src); } + + void push_back(const T& src); + void resize(size_t newCount, bool freeMemory = false); + void clear(bool freeMemory = false); + void insert(size_t index, const T& src); + void remove(size_t index); + + T& operator[](size_t index) { VMA_HEAVY_ASSERT(index < m_Count); return data()[index]; } + const T& operator[](size_t index) const { VMA_HEAVY_ASSERT(index < m_Count); return data()[index]; } + private: size_t m_Count; T m_StaticArray[N]; // Used when m_Size <= N VmaVector m_DynamicArray; // Used when m_Size > N }; -//////////////////////////////////////////////////////////////////////////////// -// class VmaPoolAllocator +#ifndef _VMA_SMALL_VECTOR_FUNCTIONS +template +VmaSmallVector::VmaSmallVector(const AllocatorT& allocator) + : m_Count(0), + m_DynamicArray(allocator) {} +template +VmaSmallVector::VmaSmallVector(size_t count, const AllocatorT& allocator) + : m_Count(count), + m_DynamicArray(count > N ? count : 0, allocator) {} + +template +void VmaSmallVector::push_back(const T& src) +{ + const size_t newIndex = size(); + resize(newIndex + 1); + data()[newIndex] = src; +} + +template +void VmaSmallVector::resize(size_t newCount, bool freeMemory) +{ + if (newCount > N && m_Count > N) + { + // Any direction, staying in m_DynamicArray + m_DynamicArray.resize(newCount); + if (freeMemory) + { + m_DynamicArray.shrink_to_fit(); + } + } + else if (newCount > N && m_Count <= N) + { + // Growing, moving from m_StaticArray to m_DynamicArray + m_DynamicArray.resize(newCount); + if (m_Count > 0) + { + memcpy(m_DynamicArray.data(), m_StaticArray, m_Count * sizeof(T)); + } + } + else if (newCount <= N && m_Count > N) + { + // Shrinking, moving from m_DynamicArray to m_StaticArray + if (newCount > 0) + { + memcpy(m_StaticArray, m_DynamicArray.data(), newCount * sizeof(T)); + } + m_DynamicArray.resize(0); + if (freeMemory) + { + m_DynamicArray.shrink_to_fit(); + } + } + else + { + // Any direction, staying in m_StaticArray - nothing to do here + } + m_Count = newCount; +} + +template +void VmaSmallVector::clear(bool freeMemory) +{ + m_DynamicArray.clear(); + if (freeMemory) + { + m_DynamicArray.shrink_to_fit(); + } + m_Count = 0; +} + +template +void VmaSmallVector::insert(size_t index, const T& src) +{ + VMA_HEAVY_ASSERT(index <= m_Count); + const size_t oldCount = size(); + resize(oldCount + 1); + T* const dataPtr = data(); + if (index < oldCount) + { + // I know, this could be more optimal for case where memmove can be memcpy directly from m_StaticArray to m_DynamicArray. + memmove(dataPtr + (index + 1), dataPtr + index, (oldCount - index) * sizeof(T)); + } + dataPtr[index] = src; +} + +template +void VmaSmallVector::remove(size_t index) +{ + VMA_HEAVY_ASSERT(index < m_Count); + const size_t oldCount = size(); + if (index < oldCount - 1) + { + // I know, this could be more optimal for case where memmove can be memcpy directly from m_DynamicArray to m_StaticArray. + T* const dataPtr = data(); + memmove(dataPtr + index, dataPtr + (index + 1), (oldCount - index - 1) * sizeof(T)); + } + resize(oldCount - 1); +} +#endif // _VMA_SMALL_VECTOR_FUNCTIONS +#endif // _VMA_SMALL_VECTOR + +#ifndef _VMA_POOL_ALLOCATOR /* Allocator for objects of type T using a list of arrays (pools) to speed up allocation. Number of elements that can be allocated is not bounded because @@ -5424,11 +4709,11 @@ allocator can create multiple blocks. template class VmaPoolAllocator { - VMA_CLASS_NO_COPY(VmaPoolAllocator) + VMA_CLASS_NO_COPY_NO_MOVE(VmaPoolAllocator) public: VmaPoolAllocator(const VkAllocationCallbacks* pAllocationCallbacks, uint32_t firstBlockCapacity); ~VmaPoolAllocator(); - template T* Alloc(Types... args); + template T* Alloc(Types&&... args); void Free(T* ptr); private: @@ -5437,7 +4722,6 @@ private: uint32_t NextFreeIndex; alignas(T) char Value[sizeof(T)]; }; - struct ItemBlock { Item* pItems; @@ -5447,14 +4731,15 @@ private: const VkAllocationCallbacks* m_pAllocationCallbacks; const uint32_t m_FirstBlockCapacity; - VmaVector< ItemBlock, VmaStlAllocator > m_ItemBlocks; + VmaVector> m_ItemBlocks; ItemBlock& CreateNewBlock(); }; +#ifndef _VMA_POOL_ALLOCATOR_FUNCTIONS template -VmaPoolAllocator::VmaPoolAllocator(const VkAllocationCallbacks* pAllocationCallbacks, uint32_t firstBlockCapacity) : - m_pAllocationCallbacks(pAllocationCallbacks), +VmaPoolAllocator::VmaPoolAllocator(const VkAllocationCallbacks* pAllocationCallbacks, uint32_t firstBlockCapacity) + : m_pAllocationCallbacks(pAllocationCallbacks), m_FirstBlockCapacity(firstBlockCapacity), m_ItemBlocks(VmaStlAllocator(pAllocationCallbacks)) { @@ -5464,19 +4749,19 @@ VmaPoolAllocator::VmaPoolAllocator(const VkAllocationCallbacks* pAllocationCa template VmaPoolAllocator::~VmaPoolAllocator() { - for(size_t i = m_ItemBlocks.size(); i--; ) + for (size_t i = m_ItemBlocks.size(); i--;) vma_delete_array(m_pAllocationCallbacks, m_ItemBlocks[i].pItems, m_ItemBlocks[i].Capacity); m_ItemBlocks.clear(); } template -template T* VmaPoolAllocator::Alloc(Types... args) +template T* VmaPoolAllocator::Alloc(Types&&... args) { - for(size_t i = m_ItemBlocks.size(); i--; ) + for (size_t i = m_ItemBlocks.size(); i--; ) { ItemBlock& block = m_ItemBlocks[i]; // This block has some free items: Use first one. - if(block.FirstFreeIndex != UINT32_MAX) + if (block.FirstFreeIndex != UINT32_MAX) { Item* const pItem = &block.pItems[block.FirstFreeIndex]; block.FirstFreeIndex = pItem->NextFreeIndex; @@ -5491,7 +4776,7 @@ template T* VmaPoolAllocator::Alloc(Types... args) Item* const pItem = &newBlock.pItems[0]; newBlock.FirstFreeIndex = pItem->NextFreeIndex; T* result = (T*)&pItem->Value; - new(result)T(std::forward(args)...); // Explicit constructor call. + new(result) T(std::forward(args)...); // Explicit constructor call. return result; } @@ -5499,7 +4784,7 @@ template void VmaPoolAllocator::Free(T* ptr) { // Search all memory blocks to find ptr. - for(size_t i = m_ItemBlocks.size(); i--; ) + for (size_t i = m_ItemBlocks.size(); i--; ) { ItemBlock& block = m_ItemBlocks[i]; @@ -5508,7 +4793,7 @@ void VmaPoolAllocator::Free(T* ptr) memcpy(&pItemPtr, &ptr, sizeof(pItemPtr)); // Check if pItemPtr is in address range of this block. - if((pItemPtr >= block.pItems) && (pItemPtr < block.pItems + block.Capacity)) + if ((pItemPtr >= block.pItems) && (pItemPtr < block.pItems + block.Capacity)) { ptr->~T(); // Explicit destructor call. const uint32_t index = static_cast(pItemPtr - block.pItems); @@ -5526,29 +4811,25 @@ typename VmaPoolAllocator::ItemBlock& VmaPoolAllocator::CreateNewBlock() const uint32_t newBlockCapacity = m_ItemBlocks.empty() ? m_FirstBlockCapacity : m_ItemBlocks.back().Capacity * 3 / 2; - const ItemBlock newBlock = { + const ItemBlock newBlock = + { vma_new_array(m_pAllocationCallbacks, Item, newBlockCapacity), newBlockCapacity, - 0 }; + 0 + }; m_ItemBlocks.push_back(newBlock); // Setup singly-linked list of all free items in this block. - for(uint32_t i = 0; i < newBlockCapacity - 1; ++i) + for (uint32_t i = 0; i < newBlockCapacity - 1; ++i) newBlock.pItems[i].NextFreeIndex = i + 1; newBlock.pItems[newBlockCapacity - 1].NextFreeIndex = UINT32_MAX; return m_ItemBlocks.back(); } +#endif // _VMA_POOL_ALLOCATOR_FUNCTIONS +#endif // _VMA_POOL_ALLOCATOR -//////////////////////////////////////////////////////////////////////////////// -// class VmaRawList, VmaList - -#if VMA_USE_STL_LIST - -#define VmaList std::list - -#else // #if VMA_USE_STL_LIST - +#ifndef _VMA_RAW_LIST template struct VmaListItem { @@ -5561,37 +4842,38 @@ struct VmaListItem template class VmaRawList { - VMA_CLASS_NO_COPY(VmaRawList) + VMA_CLASS_NO_COPY_NO_MOVE(VmaRawList) public: typedef VmaListItem ItemType; VmaRawList(const VkAllocationCallbacks* pAllocationCallbacks); - ~VmaRawList(); - void Clear(); + // Intentionally not calling Clear, because that would be unnecessary + // computations to return all items to m_ItemAllocator as free. + ~VmaRawList() = default; size_t GetCount() const { return m_Count; } bool IsEmpty() const { return m_Count == 0; } ItemType* Front() { return m_pFront; } - const ItemType* Front() const { return m_pFront; } ItemType* Back() { return m_pBack; } + const ItemType* Front() const { return m_pFront; } const ItemType* Back() const { return m_pBack; } - ItemType* PushBack(); ItemType* PushFront(); - ItemType* PushBack(const T& value); + ItemType* PushBack(); ItemType* PushFront(const T& value); - void PopBack(); + ItemType* PushBack(const T& value); void PopFront(); + void PopBack(); // Item can be null - it means PushBack. ItemType* InsertBefore(ItemType* pItem); // Item can be null - it means PushFront. ItemType* InsertAfter(ItemType* pItem); - ItemType* InsertBefore(ItemType* pItem, const T& value); ItemType* InsertAfter(ItemType* pItem, const T& value); + void Clear(); void Remove(ItemType* pItem); private: @@ -5602,39 +4884,35 @@ private: size_t m_Count; }; +#ifndef _VMA_RAW_LIST_FUNCTIONS template -VmaRawList::VmaRawList(const VkAllocationCallbacks* pAllocationCallbacks) : - m_pAllocationCallbacks(pAllocationCallbacks), +VmaRawList::VmaRawList(const VkAllocationCallbacks* pAllocationCallbacks) + : m_pAllocationCallbacks(pAllocationCallbacks), m_ItemAllocator(pAllocationCallbacks, 128), m_pFront(VMA_NULL), m_pBack(VMA_NULL), - m_Count(0) -{ -} + m_Count(0) {} template -VmaRawList::~VmaRawList() +VmaListItem* VmaRawList::PushFront() { - // Intentionally not calling Clear, because that would be unnecessary - // computations to return all items to m_ItemAllocator as free. -} - -template -void VmaRawList::Clear() -{ - if(IsEmpty() == false) + ItemType* const pNewItem = m_ItemAllocator.Alloc(); + pNewItem->pPrev = VMA_NULL; + if (IsEmpty()) { - ItemType* pItem = m_pBack; - while(pItem != VMA_NULL) - { - ItemType* const pPrevItem = pItem->pPrev; - m_ItemAllocator.Free(pItem); - pItem = pPrevItem; - } - m_pFront = VMA_NULL; - m_pBack = VMA_NULL; - m_Count = 0; + pNewItem->pNext = VMA_NULL; + m_pFront = pNewItem; + m_pBack = pNewItem; + m_Count = 1; } + else + { + pNewItem->pNext = m_pFront; + m_pFront->pPrev = pNewItem; + m_pFront = pNewItem; + ++m_Count; + } + return pNewItem; } template @@ -5660,24 +4938,10 @@ VmaListItem* VmaRawList::PushBack() } template -VmaListItem* VmaRawList::PushFront() +VmaListItem* VmaRawList::PushFront(const T& value) { - ItemType* const pNewItem = m_ItemAllocator.Alloc(); - pNewItem->pPrev = VMA_NULL; - if(IsEmpty()) - { - pNewItem->pNext = VMA_NULL; - m_pFront = pNewItem; - m_pBack = pNewItem; - m_Count = 1; - } - else - { - pNewItem->pNext = m_pFront; - m_pFront->pPrev = pNewItem; - m_pFront = pNewItem; - ++m_Count; - } + ItemType* const pNewItem = PushFront(); + pNewItem->Value = value; return pNewItem; } @@ -5690,11 +4954,18 @@ VmaListItem* VmaRawList::PushBack(const T& value) } template -VmaListItem* VmaRawList::PushFront(const T& value) +void VmaRawList::PopFront() { - ItemType* const pNewItem = PushFront(); - pNewItem->Value = value; - return pNewItem; + VMA_HEAVY_ASSERT(m_Count > 0); + ItemType* const pFrontItem = m_pFront; + ItemType* const pNextItem = pFrontItem->pNext; + if (pNextItem != VMA_NULL) + { + pNextItem->pPrev = VMA_NULL; + } + m_pFront = pNextItem; + m_ItemAllocator.Free(pFrontItem); + --m_Count; } template @@ -5713,18 +4984,21 @@ void VmaRawList::PopBack() } template -void VmaRawList::PopFront() +void VmaRawList::Clear() { - VMA_HEAVY_ASSERT(m_Count > 0); - ItemType* const pFrontItem = m_pFront; - ItemType* const pNextItem = pFrontItem->pNext; - if(pNextItem != VMA_NULL) + if (IsEmpty() == false) { - pNextItem->pPrev = VMA_NULL; + ItemType* pItem = m_pBack; + while (pItem != VMA_NULL) + { + ItemType* const pPrevItem = pItem->pPrev; + m_ItemAllocator.Free(pItem); + pItem = pPrevItem; + } + m_pFront = VMA_NULL; + m_pBack = VMA_NULL; + m_Count = 0; } - m_pFront = pNextItem; - m_ItemAllocator.Free(pFrontItem); - --m_Count; } template @@ -5824,173 +5098,129 @@ VmaListItem* VmaRawList::InsertAfter(ItemType* pItem, const T& value) newItem->Value = value; return newItem; } +#endif // _VMA_RAW_LIST_FUNCTIONS +#endif // _VMA_RAW_LIST +#ifndef _VMA_LIST template class VmaList { - VMA_CLASS_NO_COPY(VmaList) + VMA_CLASS_NO_COPY_NO_MOVE(VmaList) public: + class reverse_iterator; + class const_iterator; + class const_reverse_iterator; + class iterator { + friend class const_iterator; + friend class VmaList; public: - iterator() : - m_pList(VMA_NULL), - m_pItem(VMA_NULL) - { - } + iterator() : m_pList(VMA_NULL), m_pItem(VMA_NULL) {} + iterator(const reverse_iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} - T& operator*() const - { - VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); - return m_pItem->Value; - } - T* operator->() const - { - VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); - return &m_pItem->Value; - } + T& operator*() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return m_pItem->Value; } + T* operator->() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return &m_pItem->Value; } - iterator& operator++() - { - VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); - m_pItem = m_pItem->pNext; - return *this; - } - iterator& operator--() - { - if(m_pItem != VMA_NULL) - { - m_pItem = m_pItem->pPrev; - } - else - { - VMA_HEAVY_ASSERT(!m_pList->IsEmpty()); - m_pItem = m_pList->Back(); - } - return *this; - } + bool operator==(const iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem == rhs.m_pItem; } + bool operator!=(const iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem != rhs.m_pItem; } - iterator operator++(int) - { - iterator result = *this; - ++*this; - return result; - } - iterator operator--(int) - { - iterator result = *this; - --*this; - return result; - } + iterator operator++(int) { iterator result = *this; ++*this; return result; } + iterator operator--(int) { iterator result = *this; --*this; return result; } - bool operator==(const iterator& rhs) const - { - VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); - return m_pItem == rhs.m_pItem; - } - bool operator!=(const iterator& rhs) const - { - VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); - return m_pItem != rhs.m_pItem; - } + iterator& operator++() { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); m_pItem = m_pItem->pNext; return *this; } + iterator& operator--(); private: VmaRawList* m_pList; VmaListItem* m_pItem; - iterator(VmaRawList* pList, VmaListItem* pItem) : - m_pList(pList), - m_pItem(pItem) - { - } - - friend class VmaList; + iterator(VmaRawList* pList, VmaListItem* pItem) : m_pList(pList), m_pItem(pItem) {} }; - - class const_iterator + class reverse_iterator { + friend class const_reverse_iterator; + friend class VmaList; public: - const_iterator() : - m_pList(VMA_NULL), - m_pItem(VMA_NULL) - { - } + reverse_iterator() : m_pList(VMA_NULL), m_pItem(VMA_NULL) {} + reverse_iterator(const iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} - const_iterator(const iterator& src) : - m_pList(src.m_pList), - m_pItem(src.m_pItem) - { - } + T& operator*() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return m_pItem->Value; } + T* operator->() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return &m_pItem->Value; } - const T& operator*() const - { - VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); - return m_pItem->Value; - } - const T* operator->() const - { - VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); - return &m_pItem->Value; - } + bool operator==(const reverse_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem == rhs.m_pItem; } + bool operator!=(const reverse_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem != rhs.m_pItem; } - const_iterator& operator++() - { - VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); - m_pItem = m_pItem->pNext; - return *this; - } - const_iterator& operator--() - { - if(m_pItem != VMA_NULL) - { - m_pItem = m_pItem->pPrev; - } - else - { - VMA_HEAVY_ASSERT(!m_pList->IsEmpty()); - m_pItem = m_pList->Back(); - } - return *this; - } + reverse_iterator operator++(int) { reverse_iterator result = *this; ++* this; return result; } + reverse_iterator operator--(int) { reverse_iterator result = *this; --* this; return result; } - const_iterator operator++(int) - { - const_iterator result = *this; - ++*this; - return result; - } - const_iterator operator--(int) - { - const_iterator result = *this; - --*this; - return result; - } - - bool operator==(const const_iterator& rhs) const - { - VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); - return m_pItem == rhs.m_pItem; - } - bool operator!=(const const_iterator& rhs) const - { - VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); - return m_pItem != rhs.m_pItem; - } + reverse_iterator& operator++() { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); m_pItem = m_pItem->pPrev; return *this; } + reverse_iterator& operator--(); private: - const_iterator(const VmaRawList* pList, const VmaListItem* pItem) : - m_pList(pList), - m_pItem(pItem) - { - } + VmaRawList* m_pList; + VmaListItem* m_pItem; + reverse_iterator(VmaRawList* pList, VmaListItem* pItem) : m_pList(pList), m_pItem(pItem) {} + }; + class const_iterator + { + friend class VmaList; + public: + const_iterator() : m_pList(VMA_NULL), m_pItem(VMA_NULL) {} + const_iterator(const iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + const_iterator(const reverse_iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + + iterator drop_const() { return { const_cast*>(m_pList), const_cast*>(m_pItem) }; } + + const T& operator*() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return m_pItem->Value; } + const T* operator->() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return &m_pItem->Value; } + + bool operator==(const const_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem == rhs.m_pItem; } + bool operator!=(const const_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem != rhs.m_pItem; } + + const_iterator operator++(int) { const_iterator result = *this; ++* this; return result; } + const_iterator operator--(int) { const_iterator result = *this; --* this; return result; } + + const_iterator& operator++() { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); m_pItem = m_pItem->pNext; return *this; } + const_iterator& operator--(); + + private: const VmaRawList* m_pList; const VmaListItem* m_pItem; + const_iterator(const VmaRawList* pList, const VmaListItem* pItem) : m_pList(pList), m_pItem(pItem) {} + }; + class const_reverse_iterator + { friend class VmaList; + public: + const_reverse_iterator() : m_pList(VMA_NULL), m_pItem(VMA_NULL) {} + const_reverse_iterator(const reverse_iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + const_reverse_iterator(const iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + + reverse_iterator drop_const() { return { const_cast*>(m_pList), const_cast*>(m_pItem) }; } + + const T& operator*() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return m_pItem->Value; } + const T* operator->() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return &m_pItem->Value; } + + bool operator==(const const_reverse_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem == rhs.m_pItem; } + bool operator!=(const const_reverse_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem != rhs.m_pItem; } + + const_reverse_iterator operator++(int) { const_reverse_iterator result = *this; ++* this; return result; } + const_reverse_iterator operator--(int) { const_reverse_iterator result = *this; --* this; return result; } + + const_reverse_iterator& operator++() { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); m_pItem = m_pItem->pPrev; return *this; } + const_reverse_iterator& operator--(); + + private: + const VmaRawList* m_pList; + const VmaListItem* m_pItem; + + const_reverse_iterator(const VmaRawList* pList, const VmaListItem* pItem) : m_pList(pList), m_pItem(pItem) {} }; - VmaList(const AllocatorT& allocator) : m_RawList(allocator.m_pCallbacks) { } + VmaList(const AllocatorT& allocator) : m_RawList(allocator.m_pCallbacks) {} bool empty() const { return m_RawList.IsEmpty(); } size_t size() const { return m_RawList.GetCount(); } @@ -6001,133 +5231,942 @@ public: const_iterator cbegin() const { return const_iterator(&m_RawList, m_RawList.Front()); } const_iterator cend() const { return const_iterator(&m_RawList, VMA_NULL); } - void clear() { m_RawList.Clear(); } + const_iterator begin() const { return cbegin(); } + const_iterator end() const { return cend(); } + + reverse_iterator rbegin() { return reverse_iterator(&m_RawList, m_RawList.Back()); } + reverse_iterator rend() { return reverse_iterator(&m_RawList, VMA_NULL); } + + const_reverse_iterator crbegin() const { return const_reverse_iterator(&m_RawList, m_RawList.Back()); } + const_reverse_iterator crend() const { return const_reverse_iterator(&m_RawList, VMA_NULL); } + + const_reverse_iterator rbegin() const { return crbegin(); } + const_reverse_iterator rend() const { return crend(); } + void push_back(const T& value) { m_RawList.PushBack(value); } - void erase(iterator it) { m_RawList.Remove(it.m_pItem); } iterator insert(iterator it, const T& value) { return iterator(&m_RawList, m_RawList.InsertBefore(it.m_pItem, value)); } + void clear() { m_RawList.Clear(); } + void erase(iterator it) { m_RawList.Remove(it.m_pItem); } + private: VmaRawList m_RawList; }; -#endif // #if VMA_USE_STL_LIST - -//////////////////////////////////////////////////////////////////////////////// -// class VmaMap - -// Unused in this version. -#if 0 - -#if VMA_USE_STL_UNORDERED_MAP - -#define VmaPair std::pair - -#define VMA_MAP_TYPE(KeyT, ValueT) \ - std::unordered_map< KeyT, ValueT, std::hash, std::equal_to, VmaStlAllocator< std::pair > > - -#else // #if VMA_USE_STL_UNORDERED_MAP - -template -struct VmaPair +#ifndef _VMA_LIST_FUNCTIONS +template +typename VmaList::iterator& VmaList::iterator::operator--() { - T1 first; - T2 second; - - VmaPair() : first(), second() { } - VmaPair(const T1& firstSrc, const T2& secondSrc) : first(firstSrc), second(secondSrc) { } -}; - -/* Class compatible with subset of interface of std::unordered_map. -KeyT, ValueT must be POD because they will be stored in VmaVector. -*/ -template -class VmaMap -{ -public: - typedef VmaPair PairType; - typedef PairType* iterator; - - VmaMap(const VmaStlAllocator& allocator) : m_Vector(allocator) { } - - iterator begin() { return m_Vector.begin(); } - iterator end() { return m_Vector.end(); } - - void insert(const PairType& pair); - iterator find(const KeyT& key); - void erase(iterator it); - -private: - VmaVector< PairType, VmaStlAllocator > m_Vector; -}; - -#define VMA_MAP_TYPE(KeyT, ValueT) VmaMap - -template -struct VmaPairFirstLess -{ - bool operator()(const VmaPair& lhs, const VmaPair& rhs) const + if (m_pItem != VMA_NULL) { - return lhs.first < rhs.first; - } - bool operator()(const VmaPair& lhs, const FirstT& rhsFirst) const - { - return lhs.first < rhsFirst; - } -}; - -template -void VmaMap::insert(const PairType& pair) -{ - const size_t indexToInsert = VmaBinaryFindFirstNotLess( - m_Vector.data(), - m_Vector.data() + m_Vector.size(), - pair, - VmaPairFirstLess()) - m_Vector.data(); - VmaVectorInsert(m_Vector, indexToInsert, pair); -} - -template -VmaPair* VmaMap::find(const KeyT& key) -{ - PairType* it = VmaBinaryFindFirstNotLess( - m_Vector.data(), - m_Vector.data() + m_Vector.size(), - key, - VmaPairFirstLess()); - if((it != m_Vector.end()) && (it->first == key)) - { - return it; + m_pItem = m_pItem->pPrev; } else { - return m_Vector.end(); + VMA_HEAVY_ASSERT(!m_pList->IsEmpty()); + m_pItem = m_pList->Back(); + } + return *this; +} + +template +typename VmaList::reverse_iterator& VmaList::reverse_iterator::operator--() +{ + if (m_pItem != VMA_NULL) + { + m_pItem = m_pItem->pNext; + } + else + { + VMA_HEAVY_ASSERT(!m_pList->IsEmpty()); + m_pItem = m_pList->Front(); + } + return *this; +} + +template +typename VmaList::const_iterator& VmaList::const_iterator::operator--() +{ + if (m_pItem != VMA_NULL) + { + m_pItem = m_pItem->pPrev; + } + else + { + VMA_HEAVY_ASSERT(!m_pList->IsEmpty()); + m_pItem = m_pList->Back(); + } + return *this; +} + +template +typename VmaList::const_reverse_iterator& VmaList::const_reverse_iterator::operator--() +{ + if (m_pItem != VMA_NULL) + { + m_pItem = m_pItem->pNext; + } + else + { + VMA_HEAVY_ASSERT(!m_pList->IsEmpty()); + m_pItem = m_pList->Back(); + } + return *this; +} +#endif // _VMA_LIST_FUNCTIONS +#endif // _VMA_LIST + +#ifndef _VMA_INTRUSIVE_LINKED_LIST +/* +Expected interface of ItemTypeTraits: +struct MyItemTypeTraits +{ + typedef MyItem ItemType; + static ItemType* GetPrev(const ItemType* item) { return item->myPrevPtr; } + static ItemType* GetNext(const ItemType* item) { return item->myNextPtr; } + static ItemType*& AccessPrev(ItemType* item) { return item->myPrevPtr; } + static ItemType*& AccessNext(ItemType* item) { return item->myNextPtr; } +}; +*/ +template +class VmaIntrusiveLinkedList +{ +public: + typedef typename ItemTypeTraits::ItemType ItemType; + static ItemType* GetPrev(const ItemType* item) { return ItemTypeTraits::GetPrev(item); } + static ItemType* GetNext(const ItemType* item) { return ItemTypeTraits::GetNext(item); } + + // Movable, not copyable. + VmaIntrusiveLinkedList() = default; + VmaIntrusiveLinkedList(VmaIntrusiveLinkedList && src); + VmaIntrusiveLinkedList(const VmaIntrusiveLinkedList&) = delete; + VmaIntrusiveLinkedList& operator=(VmaIntrusiveLinkedList&& src); + VmaIntrusiveLinkedList& operator=(const VmaIntrusiveLinkedList&) = delete; + ~VmaIntrusiveLinkedList() { VMA_HEAVY_ASSERT(IsEmpty()); } + + size_t GetCount() const { return m_Count; } + bool IsEmpty() const { return m_Count == 0; } + ItemType* Front() { return m_Front; } + ItemType* Back() { return m_Back; } + const ItemType* Front() const { return m_Front; } + const ItemType* Back() const { return m_Back; } + + void PushBack(ItemType* item); + void PushFront(ItemType* item); + ItemType* PopBack(); + ItemType* PopFront(); + + // MyItem can be null - it means PushBack. + void InsertBefore(ItemType* existingItem, ItemType* newItem); + // MyItem can be null - it means PushFront. + void InsertAfter(ItemType* existingItem, ItemType* newItem); + void Remove(ItemType* item); + void RemoveAll(); + +private: + ItemType* m_Front = VMA_NULL; + ItemType* m_Back = VMA_NULL; + size_t m_Count = 0; +}; + +#ifndef _VMA_INTRUSIVE_LINKED_LIST_FUNCTIONS +template +VmaIntrusiveLinkedList::VmaIntrusiveLinkedList(VmaIntrusiveLinkedList&& src) + : m_Front(src.m_Front), m_Back(src.m_Back), m_Count(src.m_Count) +{ + src.m_Front = src.m_Back = VMA_NULL; + src.m_Count = 0; +} + +template +VmaIntrusiveLinkedList& VmaIntrusiveLinkedList::operator=(VmaIntrusiveLinkedList&& src) +{ + if (&src != this) + { + VMA_HEAVY_ASSERT(IsEmpty()); + m_Front = src.m_Front; + m_Back = src.m_Back; + m_Count = src.m_Count; + src.m_Front = src.m_Back = VMA_NULL; + src.m_Count = 0; + } + return *this; +} + +template +void VmaIntrusiveLinkedList::PushBack(ItemType* item) +{ + VMA_HEAVY_ASSERT(ItemTypeTraits::GetPrev(item) == VMA_NULL && ItemTypeTraits::GetNext(item) == VMA_NULL); + if (IsEmpty()) + { + m_Front = item; + m_Back = item; + m_Count = 1; + } + else + { + ItemTypeTraits::AccessPrev(item) = m_Back; + ItemTypeTraits::AccessNext(m_Back) = item; + m_Back = item; + ++m_Count; } } -template -void VmaMap::erase(iterator it) +template +void VmaIntrusiveLinkedList::PushFront(ItemType* item) { - VmaVectorRemove(m_Vector, it - m_Vector.begin()); + VMA_HEAVY_ASSERT(ItemTypeTraits::GetPrev(item) == VMA_NULL && ItemTypeTraits::GetNext(item) == VMA_NULL); + if (IsEmpty()) + { + m_Front = item; + m_Back = item; + m_Count = 1; + } + else + { + ItemTypeTraits::AccessNext(item) = m_Front; + ItemTypeTraits::AccessPrev(m_Front) = item; + m_Front = item; + ++m_Count; + } } -#endif // #if VMA_USE_STL_UNORDERED_MAP +template +typename VmaIntrusiveLinkedList::ItemType* VmaIntrusiveLinkedList::PopBack() +{ + VMA_HEAVY_ASSERT(m_Count > 0); + ItemType* const backItem = m_Back; + ItemType* const prevItem = ItemTypeTraits::GetPrev(backItem); + if (prevItem != VMA_NULL) + { + ItemTypeTraits::AccessNext(prevItem) = VMA_NULL; + } + m_Back = prevItem; + --m_Count; + ItemTypeTraits::AccessPrev(backItem) = VMA_NULL; + ItemTypeTraits::AccessNext(backItem) = VMA_NULL; + return backItem; +} -#endif // #if 0 +template +typename VmaIntrusiveLinkedList::ItemType* VmaIntrusiveLinkedList::PopFront() +{ + VMA_HEAVY_ASSERT(m_Count > 0); + ItemType* const frontItem = m_Front; + ItemType* const nextItem = ItemTypeTraits::GetNext(frontItem); + if (nextItem != VMA_NULL) + { + ItemTypeTraits::AccessPrev(nextItem) = VMA_NULL; + } + m_Front = nextItem; + --m_Count; + ItemTypeTraits::AccessPrev(frontItem) = VMA_NULL; + ItemTypeTraits::AccessNext(frontItem) = VMA_NULL; + return frontItem; +} -//////////////////////////////////////////////////////////////////////////////// +template +void VmaIntrusiveLinkedList::InsertBefore(ItemType* existingItem, ItemType* newItem) +{ + VMA_HEAVY_ASSERT(newItem != VMA_NULL && ItemTypeTraits::GetPrev(newItem) == VMA_NULL && ItemTypeTraits::GetNext(newItem) == VMA_NULL); + if (existingItem != VMA_NULL) + { + ItemType* const prevItem = ItemTypeTraits::GetPrev(existingItem); + ItemTypeTraits::AccessPrev(newItem) = prevItem; + ItemTypeTraits::AccessNext(newItem) = existingItem; + ItemTypeTraits::AccessPrev(existingItem) = newItem; + if (prevItem != VMA_NULL) + { + ItemTypeTraits::AccessNext(prevItem) = newItem; + } + else + { + VMA_HEAVY_ASSERT(m_Front == existingItem); + m_Front = newItem; + } + ++m_Count; + } + else + PushBack(newItem); +} -class VmaDeviceMemoryBlock; +template +void VmaIntrusiveLinkedList::InsertAfter(ItemType* existingItem, ItemType* newItem) +{ + VMA_HEAVY_ASSERT(newItem != VMA_NULL && ItemTypeTraits::GetPrev(newItem) == VMA_NULL && ItemTypeTraits::GetNext(newItem) == VMA_NULL); + if (existingItem != VMA_NULL) + { + ItemType* const nextItem = ItemTypeTraits::GetNext(existingItem); + ItemTypeTraits::AccessNext(newItem) = nextItem; + ItemTypeTraits::AccessPrev(newItem) = existingItem; + ItemTypeTraits::AccessNext(existingItem) = newItem; + if (nextItem != VMA_NULL) + { + ItemTypeTraits::AccessPrev(nextItem) = newItem; + } + else + { + VMA_HEAVY_ASSERT(m_Back == existingItem); + m_Back = newItem; + } + ++m_Count; + } + else + return PushFront(newItem); +} -enum VMA_CACHE_OPERATION { VMA_CACHE_FLUSH, VMA_CACHE_INVALIDATE }; +template +void VmaIntrusiveLinkedList::Remove(ItemType* item) +{ + VMA_HEAVY_ASSERT(item != VMA_NULL && m_Count > 0); + if (ItemTypeTraits::GetPrev(item) != VMA_NULL) + { + ItemTypeTraits::AccessNext(ItemTypeTraits::AccessPrev(item)) = ItemTypeTraits::GetNext(item); + } + else + { + VMA_HEAVY_ASSERT(m_Front == item); + m_Front = ItemTypeTraits::GetNext(item); + } + if (ItemTypeTraits::GetNext(item) != VMA_NULL) + { + ItemTypeTraits::AccessPrev(ItemTypeTraits::AccessNext(item)) = ItemTypeTraits::GetPrev(item); + } + else + { + VMA_HEAVY_ASSERT(m_Back == item); + m_Back = ItemTypeTraits::GetPrev(item); + } + ItemTypeTraits::AccessPrev(item) = VMA_NULL; + ItemTypeTraits::AccessNext(item) = VMA_NULL; + --m_Count; +} + +template +void VmaIntrusiveLinkedList::RemoveAll() +{ + if (!IsEmpty()) + { + ItemType* item = m_Back; + while (item != VMA_NULL) + { + ItemType* const prevItem = ItemTypeTraits::AccessPrev(item); + ItemTypeTraits::AccessPrev(item) = VMA_NULL; + ItemTypeTraits::AccessNext(item) = VMA_NULL; + item = prevItem; + } + m_Front = VMA_NULL; + m_Back = VMA_NULL; + m_Count = 0; + } +} +#endif // _VMA_INTRUSIVE_LINKED_LIST_FUNCTIONS +#endif // _VMA_INTRUSIVE_LINKED_LIST + +#if !defined(_VMA_STRING_BUILDER) && VMA_STATS_STRING_ENABLED +class VmaStringBuilder +{ +public: + VmaStringBuilder(const VkAllocationCallbacks* allocationCallbacks) : m_Data(VmaStlAllocator(allocationCallbacks)) {} + ~VmaStringBuilder() = default; + + size_t GetLength() const { return m_Data.size(); } + const char* GetData() const { return m_Data.data(); } + void AddNewLine() { Add('\n'); } + void Add(char ch) { m_Data.push_back(ch); } + + void Add(const char* pStr); + void AddNumber(uint32_t num); + void AddNumber(uint64_t num); + void AddPointer(const void* ptr); + +private: + VmaVector> m_Data; +}; + +#ifndef _VMA_STRING_BUILDER_FUNCTIONS +void VmaStringBuilder::Add(const char* pStr) +{ + const size_t strLen = strlen(pStr); + if (strLen > 0) + { + const size_t oldCount = m_Data.size(); + m_Data.resize(oldCount + strLen); + memcpy(m_Data.data() + oldCount, pStr, strLen); + } +} + +void VmaStringBuilder::AddNumber(uint32_t num) +{ + char buf[11]; + buf[10] = '\0'; + char* p = &buf[10]; + do + { + *--p = '0' + (char)(num % 10); + num /= 10; + } while (num); + Add(p); +} + +void VmaStringBuilder::AddNumber(uint64_t num) +{ + char buf[21]; + buf[20] = '\0'; + char* p = &buf[20]; + do + { + *--p = '0' + (char)(num % 10); + num /= 10; + } while (num); + Add(p); +} + +void VmaStringBuilder::AddPointer(const void* ptr) +{ + char buf[21]; + VmaPtrToStr(buf, sizeof(buf), ptr); + Add(buf); +} +#endif //_VMA_STRING_BUILDER_FUNCTIONS +#endif // _VMA_STRING_BUILDER + +#if !defined(_VMA_JSON_WRITER) && VMA_STATS_STRING_ENABLED +/* +Allows to conveniently build a correct JSON document to be written to the +VmaStringBuilder passed to the constructor. +*/ +class VmaJsonWriter +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaJsonWriter) +public: + // sb - string builder to write the document to. Must remain alive for the whole lifetime of this object. + VmaJsonWriter(const VkAllocationCallbacks* pAllocationCallbacks, VmaStringBuilder& sb); + ~VmaJsonWriter(); + + // Begins object by writing "{". + // Inside an object, you must call pairs of WriteString and a value, e.g.: + // j.BeginObject(true); j.WriteString("A"); j.WriteNumber(1); j.WriteString("B"); j.WriteNumber(2); j.EndObject(); + // Will write: { "A": 1, "B": 2 } + void BeginObject(bool singleLine = false); + // Ends object by writing "}". + void EndObject(); + + // Begins array by writing "[". + // Inside an array, you can write a sequence of any values. + void BeginArray(bool singleLine = false); + // Ends array by writing "[". + void EndArray(); + + // Writes a string value inside "". + // pStr can contain any ANSI characters, including '"', new line etc. - they will be properly escaped. + void WriteString(const char* pStr); + + // Begins writing a string value. + // Call BeginString, ContinueString, ContinueString, ..., EndString instead of + // WriteString to conveniently build the string content incrementally, made of + // parts including numbers. + void BeginString(const char* pStr = VMA_NULL); + // Posts next part of an open string. + void ContinueString(const char* pStr); + // Posts next part of an open string. The number is converted to decimal characters. + void ContinueString(uint32_t n); + void ContinueString(uint64_t n); + // Posts next part of an open string. Pointer value is converted to characters + // using "%p" formatting - shown as hexadecimal number, e.g.: 000000081276Ad00 + void ContinueString_Pointer(const void* ptr); + // Ends writing a string value by writing '"'. + void EndString(const char* pStr = VMA_NULL); + + // Writes a number value. + void WriteNumber(uint32_t n); + void WriteNumber(uint64_t n); + // Writes a boolean value - false or true. + void WriteBool(bool b); + // Writes a null value. + void WriteNull(); + +private: + enum COLLECTION_TYPE + { + COLLECTION_TYPE_OBJECT, + COLLECTION_TYPE_ARRAY, + }; + struct StackItem + { + COLLECTION_TYPE type; + uint32_t valueCount; + bool singleLineMode; + }; + + static const char* const INDENT; + + VmaStringBuilder& m_SB; + VmaVector< StackItem, VmaStlAllocator > m_Stack; + bool m_InsideString; + + void BeginValue(bool isString); + void WriteIndent(bool oneLess = false); +}; +const char* const VmaJsonWriter::INDENT = " "; + +#ifndef _VMA_JSON_WRITER_FUNCTIONS +VmaJsonWriter::VmaJsonWriter(const VkAllocationCallbacks* pAllocationCallbacks, VmaStringBuilder& sb) + : m_SB(sb), + m_Stack(VmaStlAllocator(pAllocationCallbacks)), + m_InsideString(false) {} + +VmaJsonWriter::~VmaJsonWriter() +{ + VMA_ASSERT(!m_InsideString); + VMA_ASSERT(m_Stack.empty()); +} + +void VmaJsonWriter::BeginObject(bool singleLine) +{ + VMA_ASSERT(!m_InsideString); + + BeginValue(false); + m_SB.Add('{'); + + StackItem item; + item.type = COLLECTION_TYPE_OBJECT; + item.valueCount = 0; + item.singleLineMode = singleLine; + m_Stack.push_back(item); +} + +void VmaJsonWriter::EndObject() +{ + VMA_ASSERT(!m_InsideString); + + WriteIndent(true); + m_SB.Add('}'); + + VMA_ASSERT(!m_Stack.empty() && m_Stack.back().type == COLLECTION_TYPE_OBJECT); + m_Stack.pop_back(); +} + +void VmaJsonWriter::BeginArray(bool singleLine) +{ + VMA_ASSERT(!m_InsideString); + + BeginValue(false); + m_SB.Add('['); + + StackItem item; + item.type = COLLECTION_TYPE_ARRAY; + item.valueCount = 0; + item.singleLineMode = singleLine; + m_Stack.push_back(item); +} + +void VmaJsonWriter::EndArray() +{ + VMA_ASSERT(!m_InsideString); + + WriteIndent(true); + m_SB.Add(']'); + + VMA_ASSERT(!m_Stack.empty() && m_Stack.back().type == COLLECTION_TYPE_ARRAY); + m_Stack.pop_back(); +} + +void VmaJsonWriter::WriteString(const char* pStr) +{ + BeginString(pStr); + EndString(); +} + +void VmaJsonWriter::BeginString(const char* pStr) +{ + VMA_ASSERT(!m_InsideString); + + BeginValue(true); + m_SB.Add('"'); + m_InsideString = true; + if (pStr != VMA_NULL && pStr[0] != '\0') + { + ContinueString(pStr); + } +} + +void VmaJsonWriter::ContinueString(const char* pStr) +{ + VMA_ASSERT(m_InsideString); + + const size_t strLen = strlen(pStr); + for (size_t i = 0; i < strLen; ++i) + { + char ch = pStr[i]; + if (ch == '\\') + { + m_SB.Add("\\\\"); + } + else if (ch == '"') + { + m_SB.Add("\\\""); + } + else if ((uint8_t)ch >= 32) + { + m_SB.Add(ch); + } + else switch (ch) + { + case '\b': + m_SB.Add("\\b"); + break; + case '\f': + m_SB.Add("\\f"); + break; + case '\n': + m_SB.Add("\\n"); + break; + case '\r': + m_SB.Add("\\r"); + break; + case '\t': + m_SB.Add("\\t"); + break; + default: + VMA_ASSERT(0 && "Character not currently supported."); + } + } +} + +void VmaJsonWriter::ContinueString(uint32_t n) +{ + VMA_ASSERT(m_InsideString); + m_SB.AddNumber(n); +} + +void VmaJsonWriter::ContinueString(uint64_t n) +{ + VMA_ASSERT(m_InsideString); + m_SB.AddNumber(n); +} + +void VmaJsonWriter::ContinueString_Pointer(const void* ptr) +{ + VMA_ASSERT(m_InsideString); + m_SB.AddPointer(ptr); +} + +void VmaJsonWriter::EndString(const char* pStr) +{ + VMA_ASSERT(m_InsideString); + if (pStr != VMA_NULL && pStr[0] != '\0') + { + ContinueString(pStr); + } + m_SB.Add('"'); + m_InsideString = false; +} + +void VmaJsonWriter::WriteNumber(uint32_t n) +{ + VMA_ASSERT(!m_InsideString); + BeginValue(false); + m_SB.AddNumber(n); +} + +void VmaJsonWriter::WriteNumber(uint64_t n) +{ + VMA_ASSERT(!m_InsideString); + BeginValue(false); + m_SB.AddNumber(n); +} + +void VmaJsonWriter::WriteBool(bool b) +{ + VMA_ASSERT(!m_InsideString); + BeginValue(false); + m_SB.Add(b ? "true" : "false"); +} + +void VmaJsonWriter::WriteNull() +{ + VMA_ASSERT(!m_InsideString); + BeginValue(false); + m_SB.Add("null"); +} + +void VmaJsonWriter::BeginValue(bool isString) +{ + if (!m_Stack.empty()) + { + StackItem& currItem = m_Stack.back(); + if (currItem.type == COLLECTION_TYPE_OBJECT && + currItem.valueCount % 2 == 0) + { + VMA_ASSERT(isString); + } + + if (currItem.type == COLLECTION_TYPE_OBJECT && + currItem.valueCount % 2 != 0) + { + m_SB.Add(": "); + } + else if (currItem.valueCount > 0) + { + m_SB.Add(", "); + WriteIndent(); + } + else + { + WriteIndent(); + } + ++currItem.valueCount; + } +} + +void VmaJsonWriter::WriteIndent(bool oneLess) +{ + if (!m_Stack.empty() && !m_Stack.back().singleLineMode) + { + m_SB.AddNewLine(); + + size_t count = m_Stack.size(); + if (count > 0 && oneLess) + { + --count; + } + for (size_t i = 0; i < count; ++i) + { + m_SB.Add(INDENT); + } + } +} +#endif // _VMA_JSON_WRITER_FUNCTIONS + +static void VmaPrintDetailedStatistics(VmaJsonWriter& json, const VmaDetailedStatistics& stat) +{ + json.BeginObject(); + + json.WriteString("BlockCount"); + json.WriteNumber(stat.statistics.blockCount); + json.WriteString("BlockBytes"); + json.WriteNumber(stat.statistics.blockBytes); + json.WriteString("AllocationCount"); + json.WriteNumber(stat.statistics.allocationCount); + json.WriteString("AllocationBytes"); + json.WriteNumber(stat.statistics.allocationBytes); + json.WriteString("UnusedRangeCount"); + json.WriteNumber(stat.unusedRangeCount); + + if (stat.statistics.allocationCount > 1) + { + json.WriteString("AllocationSizeMin"); + json.WriteNumber(stat.allocationSizeMin); + json.WriteString("AllocationSizeMax"); + json.WriteNumber(stat.allocationSizeMax); + } + if (stat.unusedRangeCount > 1) + { + json.WriteString("UnusedRangeSizeMin"); + json.WriteNumber(stat.unusedRangeSizeMin); + json.WriteString("UnusedRangeSizeMax"); + json.WriteNumber(stat.unusedRangeSizeMax); + } + json.EndObject(); +} +#endif // _VMA_JSON_WRITER + +#ifndef _VMA_MAPPING_HYSTERESIS + +class VmaMappingHysteresis +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaMappingHysteresis) +public: + VmaMappingHysteresis() = default; + + uint32_t GetExtraMapping() const { return m_ExtraMapping; } + + // Call when Map was called. + // Returns true if switched to extra +1 mapping reference count. + bool PostMap() + { +#if VMA_MAPPING_HYSTERESIS_ENABLED + if(m_ExtraMapping == 0) + { + ++m_MajorCounter; + if(m_MajorCounter >= COUNTER_MIN_EXTRA_MAPPING) + { + m_ExtraMapping = 1; + m_MajorCounter = 0; + m_MinorCounter = 0; + return true; + } + } + else // m_ExtraMapping == 1 + PostMinorCounter(); +#endif // #if VMA_MAPPING_HYSTERESIS_ENABLED + return false; + } + + // Call when Unmap was called. + void PostUnmap() + { +#if VMA_MAPPING_HYSTERESIS_ENABLED + if(m_ExtraMapping == 0) + ++m_MajorCounter; + else // m_ExtraMapping == 1 + PostMinorCounter(); +#endif // #if VMA_MAPPING_HYSTERESIS_ENABLED + } + + // Call when allocation was made from the memory block. + void PostAlloc() + { +#if VMA_MAPPING_HYSTERESIS_ENABLED + if(m_ExtraMapping == 1) + ++m_MajorCounter; + else // m_ExtraMapping == 0 + PostMinorCounter(); +#endif // #if VMA_MAPPING_HYSTERESIS_ENABLED + } + + // Call when allocation was freed from the memory block. + // Returns true if switched to extra -1 mapping reference count. + bool PostFree() + { +#if VMA_MAPPING_HYSTERESIS_ENABLED + if(m_ExtraMapping == 1) + { + ++m_MajorCounter; + if(m_MajorCounter >= COUNTER_MIN_EXTRA_MAPPING && + m_MajorCounter > m_MinorCounter + 1) + { + m_ExtraMapping = 0; + m_MajorCounter = 0; + m_MinorCounter = 0; + return true; + } + } + else // m_ExtraMapping == 0 + PostMinorCounter(); +#endif // #if VMA_MAPPING_HYSTERESIS_ENABLED + return false; + } + +private: + static const int32_t COUNTER_MIN_EXTRA_MAPPING = 7; + + uint32_t m_MinorCounter = 0; + uint32_t m_MajorCounter = 0; + uint32_t m_ExtraMapping = 0; // 0 or 1. + + void PostMinorCounter() + { + if(m_MinorCounter < m_MajorCounter) + { + ++m_MinorCounter; + } + else if(m_MajorCounter > 0) + { + --m_MajorCounter; + --m_MinorCounter; + } + } +}; + +#endif // _VMA_MAPPING_HYSTERESIS + +#ifndef _VMA_DEVICE_MEMORY_BLOCK +/* +Represents a single block of device memory (`VkDeviceMemory`) with all the +data about its regions (aka suballocations, #VmaAllocation), assigned and free. + +Thread-safety: +- Access to m_pMetadata must be externally synchronized. +- Map, Unmap, Bind* are synchronized internally. +*/ +class VmaDeviceMemoryBlock +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaDeviceMemoryBlock) +public: + VmaBlockMetadata* m_pMetadata; + + VmaDeviceMemoryBlock(VmaAllocator hAllocator); + ~VmaDeviceMemoryBlock(); + + // Always call after construction. + void Init( + VmaAllocator hAllocator, + VmaPool hParentPool, + uint32_t newMemoryTypeIndex, + VkDeviceMemory newMemory, + VkDeviceSize newSize, + uint32_t id, + uint32_t algorithm, + VkDeviceSize bufferImageGranularity); + // Always call before destruction. + void Destroy(VmaAllocator allocator); + + VmaPool GetParentPool() const { return m_hParentPool; } + VkDeviceMemory GetDeviceMemory() const { return m_hMemory; } + uint32_t GetMemoryTypeIndex() const { return m_MemoryTypeIndex; } + uint32_t GetId() const { return m_Id; } + void* GetMappedData() const { return m_pMappedData; } + uint32_t GetMapRefCount() const { return m_MapCount; } + + // Call when allocation/free was made from m_pMetadata. + // Used for m_MappingHysteresis. + void PostAlloc(VmaAllocator hAllocator); + void PostFree(VmaAllocator hAllocator); + + // Validates all data structures inside this object. If not valid, returns false. + bool Validate() const; + VkResult CheckCorruption(VmaAllocator hAllocator); + + // ppData can be null. + VkResult Map(VmaAllocator hAllocator, uint32_t count, void** ppData); + void Unmap(VmaAllocator hAllocator, uint32_t count); + + VkResult WriteMagicValueAfterAllocation(VmaAllocator hAllocator, VkDeviceSize allocOffset, VkDeviceSize allocSize); + VkResult ValidateMagicValueAfterAllocation(VmaAllocator hAllocator, VkDeviceSize allocOffset, VkDeviceSize allocSize); + + VkResult BindBufferMemory( + const VmaAllocator hAllocator, + const VmaAllocation hAllocation, + VkDeviceSize allocationLocalOffset, + VkBuffer hBuffer, + const void* pNext); + VkResult BindImageMemory( + const VmaAllocator hAllocator, + const VmaAllocation hAllocation, + VkDeviceSize allocationLocalOffset, + VkImage hImage, + const void* pNext); + +private: + VmaPool m_hParentPool; // VK_NULL_HANDLE if not belongs to custom pool. + uint32_t m_MemoryTypeIndex; + uint32_t m_Id; + VkDeviceMemory m_hMemory; + + /* + Protects access to m_hMemory so it is not used by multiple threads simultaneously, e.g. vkMapMemory, vkBindBufferMemory. + Also protects m_MapCount, m_pMappedData. + Allocations, deallocations, any change in m_pMetadata is protected by parent's VmaBlockVector::m_Mutex. + */ + VMA_MUTEX m_MapAndBindMutex; + VmaMappingHysteresis m_MappingHysteresis; + uint32_t m_MapCount; + void* m_pMappedData; +}; +#endif // _VMA_DEVICE_MEMORY_BLOCK + +#ifndef _VMA_ALLOCATION_T struct VmaAllocation_T { -private: - static const uint8_t MAP_COUNT_FLAG_PERSISTENT_MAP = 0x80; + friend struct VmaDedicatedAllocationListItemTraits; enum FLAGS { - FLAG_USER_DATA_STRING = 0x01, + FLAG_PERSISTENT_MAP = 0x01, + FLAG_MAPPING_ALLOWED = 0x02, }; public: @@ -6138,146 +6177,48 @@ public: ALLOCATION_TYPE_DEDICATED, }; - /* - This struct is allocated using VmaPoolAllocator. - */ - - VmaAllocation_T(uint32_t currentFrameIndex, bool userDataString) : - m_Alignment{1}, - m_Size{0}, - m_pUserData{VMA_NULL}, - m_LastUseFrameIndex{currentFrameIndex}, - m_MemoryTypeIndex{0}, - m_Type{(uint8_t)ALLOCATION_TYPE_NONE}, - m_SuballocationType{(uint8_t)VMA_SUBALLOCATION_TYPE_UNKNOWN}, - m_MapCount{0}, - m_Flags{userDataString ? (uint8_t)FLAG_USER_DATA_STRING : (uint8_t)0} - { -#if VMA_STATS_STRING_ENABLED - m_CreationFrameIndex = currentFrameIndex; - m_BufferImageUsage = 0; -#endif - } - - ~VmaAllocation_T() - { - VMA_ASSERT((m_MapCount & ~MAP_COUNT_FLAG_PERSISTENT_MAP) == 0 && "Allocation was not unmapped before destruction."); - - // Check if owned string was freed. - VMA_ASSERT(m_pUserData == VMA_NULL); - } + // This struct is allocated using VmaPoolAllocator. + VmaAllocation_T(bool mappingAllowed); + ~VmaAllocation_T(); void InitBlockAllocation( VmaDeviceMemoryBlock* block, - VkDeviceSize offset, + VmaAllocHandle allocHandle, VkDeviceSize alignment, VkDeviceSize size, uint32_t memoryTypeIndex, VmaSuballocationType suballocationType, - bool mapped, - bool canBecomeLost) - { - VMA_ASSERT(m_Type == ALLOCATION_TYPE_NONE); - VMA_ASSERT(block != VMA_NULL); - m_Type = (uint8_t)ALLOCATION_TYPE_BLOCK; - m_Alignment = alignment; - m_Size = size; - m_MemoryTypeIndex = memoryTypeIndex; - m_MapCount = mapped ? MAP_COUNT_FLAG_PERSISTENT_MAP : 0; - m_SuballocationType = (uint8_t)suballocationType; - m_BlockAllocation.m_Block = block; - m_BlockAllocation.m_Offset = offset; - m_BlockAllocation.m_CanBecomeLost = canBecomeLost; - } - - void InitLost() - { - VMA_ASSERT(m_Type == ALLOCATION_TYPE_NONE); - VMA_ASSERT(m_LastUseFrameIndex.load() == VMA_FRAME_INDEX_LOST); - m_Type = (uint8_t)ALLOCATION_TYPE_BLOCK; - m_MemoryTypeIndex = 0; - m_BlockAllocation.m_Block = VMA_NULL; - m_BlockAllocation.m_Offset = 0; - m_BlockAllocation.m_CanBecomeLost = true; - } - - void ChangeBlockAllocation( - VmaAllocator hAllocator, - VmaDeviceMemoryBlock* block, - VkDeviceSize offset); - - void ChangeOffset(VkDeviceSize newOffset); - + bool mapped); // pMappedData not null means allocation is created with MAPPED flag. void InitDedicatedAllocation( + VmaPool hParentPool, uint32_t memoryTypeIndex, VkDeviceMemory hMemory, VmaSuballocationType suballocationType, void* pMappedData, - VkDeviceSize size) - { - VMA_ASSERT(m_Type == ALLOCATION_TYPE_NONE); - VMA_ASSERT(hMemory != VK_NULL_HANDLE); - m_Type = (uint8_t)ALLOCATION_TYPE_DEDICATED; - m_Alignment = 0; - m_Size = size; - m_MemoryTypeIndex = memoryTypeIndex; - m_SuballocationType = (uint8_t)suballocationType; - m_MapCount = (pMappedData != VMA_NULL) ? MAP_COUNT_FLAG_PERSISTENT_MAP : 0; - m_DedicatedAllocation.m_hMemory = hMemory; - m_DedicatedAllocation.m_pMappedData = pMappedData; - } + VkDeviceSize size); ALLOCATION_TYPE GetType() const { return (ALLOCATION_TYPE)m_Type; } VkDeviceSize GetAlignment() const { return m_Alignment; } VkDeviceSize GetSize() const { return m_Size; } - bool IsUserDataString() const { return (m_Flags & FLAG_USER_DATA_STRING) != 0; } void* GetUserData() const { return m_pUserData; } - void SetUserData(VmaAllocator hAllocator, void* pUserData); + const char* GetName() const { return m_pName; } VmaSuballocationType GetSuballocationType() const { return (VmaSuballocationType)m_SuballocationType; } - VmaDeviceMemoryBlock* GetBlock() const - { - VMA_ASSERT(m_Type == ALLOCATION_TYPE_BLOCK); - return m_BlockAllocation.m_Block; - } - VkDeviceSize GetOffset() const; - VkDeviceMemory GetMemory() const; + VmaDeviceMemoryBlock* GetBlock() const { VMA_ASSERT(m_Type == ALLOCATION_TYPE_BLOCK); return m_BlockAllocation.m_Block; } uint32_t GetMemoryTypeIndex() const { return m_MemoryTypeIndex; } - bool IsPersistentMap() const { return (m_MapCount & MAP_COUNT_FLAG_PERSISTENT_MAP) != 0; } + bool IsPersistentMap() const { return (m_Flags & FLAG_PERSISTENT_MAP) != 0; } + bool IsMappingAllowed() const { return (m_Flags & FLAG_MAPPING_ALLOWED) != 0; } + + void SetUserData(VmaAllocator hAllocator, void* pUserData) { m_pUserData = pUserData; } + void SetName(VmaAllocator hAllocator, const char* pName); + void FreeName(VmaAllocator hAllocator); + uint8_t SwapBlockAllocation(VmaAllocator hAllocator, VmaAllocation allocation); + VmaAllocHandle GetAllocHandle() const; + VkDeviceSize GetOffset() const; + VmaPool GetParentPool() const; + VkDeviceMemory GetMemory() const; void* GetMappedData() const; - bool CanBecomeLost() const; - - uint32_t GetLastUseFrameIndex() const - { - return m_LastUseFrameIndex.load(); - } - bool CompareExchangeLastUseFrameIndex(uint32_t& expected, uint32_t desired) - { - return m_LastUseFrameIndex.compare_exchange_weak(expected, desired); - } - /* - - If hAllocation.LastUseFrameIndex + frameInUseCount < allocator.CurrentFrameIndex, - makes it lost by setting LastUseFrameIndex = VMA_FRAME_INDEX_LOST and returns true. - - Else, returns false. - - If hAllocation is already lost, assert - you should not call it then. - If hAllocation was not created with CAN_BECOME_LOST_BIT, assert. - */ - bool MakeLost(uint32_t currentFrameIndex, uint32_t frameInUseCount); - - void DedicatedAllocCalcStatsInfo(VmaStatInfo& outInfo) - { - VMA_ASSERT(m_Type == ALLOCATION_TYPE_DEDICATED); - outInfo.blockCount = 1; - outInfo.allocationCount = 1; - outInfo.unusedRangeCount = 0; - outInfo.usedBytes = m_Size; - outInfo.unusedBytes = 0; - outInfo.allocationSizeMin = outInfo.allocationSizeMax = m_Size; - outInfo.unusedRangeSizeMin = UINT64_MAX; - outInfo.unusedRangeSizeMax = 0; - } void BlockAllocMap(); void BlockAllocUnmap(); @@ -6285,46 +6226,36 @@ public: void DedicatedAllocUnmap(VmaAllocator hAllocator); #if VMA_STATS_STRING_ENABLED - uint32_t GetCreationFrameIndex() const { return m_CreationFrameIndex; } - uint32_t GetBufferImageUsage() const { return m_BufferImageUsage; } - - void InitBufferImageUsage(uint32_t bufferImageUsage) + VmaBufferImageUsage GetBufferImageUsage() const { return m_BufferImageUsage; } + void InitBufferUsage(const VkBufferCreateInfo &createInfo, bool useKhrMaintenance5) { - VMA_ASSERT(m_BufferImageUsage == 0); - m_BufferImageUsage = bufferImageUsage; + VMA_ASSERT(m_BufferImageUsage == VmaBufferImageUsage::UNKNOWN); + m_BufferImageUsage = VmaBufferImageUsage(createInfo, useKhrMaintenance5); + } + void InitImageUsage(const VkImageCreateInfo &createInfo) + { + VMA_ASSERT(m_BufferImageUsage == VmaBufferImageUsage::UNKNOWN); + m_BufferImageUsage = VmaBufferImageUsage(createInfo); } - void PrintParameters(class VmaJsonWriter& json) const; #endif private: - VkDeviceSize m_Alignment; - VkDeviceSize m_Size; - void* m_pUserData; - VMA_ATOMIC_UINT32 m_LastUseFrameIndex; - uint32_t m_MemoryTypeIndex; - uint8_t m_Type; // ALLOCATION_TYPE - uint8_t m_SuballocationType; // VmaSuballocationType - // Bit 0x80 is set when allocation was created with VMA_ALLOCATION_CREATE_MAPPED_BIT. - // Bits with mask 0x7F are reference counter for vmaMapMemory()/vmaUnmapMemory(). - uint8_t m_MapCount; - uint8_t m_Flags; // enum FLAGS - // Allocation out of VmaDeviceMemoryBlock. struct BlockAllocation { VmaDeviceMemoryBlock* m_Block; - VkDeviceSize m_Offset; - bool m_CanBecomeLost; + VmaAllocHandle m_AllocHandle; }; - // Allocation for an object that has its own private VkDeviceMemory. struct DedicatedAllocation { + VmaPool m_hParentPool; // VK_NULL_HANDLE if not belongs to custom pool. VkDeviceMemory m_hMemory; void* m_pMappedData; // Not null means memory is mapped. + VmaAllocation_T* m_Prev; + VmaAllocation_T* m_Next; }; - union { // Allocation out of VmaDeviceMemoryBlock. @@ -6333,14 +6264,175 @@ private: DedicatedAllocation m_DedicatedAllocation; }; + VkDeviceSize m_Alignment; + VkDeviceSize m_Size; + void* m_pUserData; + char* m_pName; + uint32_t m_MemoryTypeIndex; + uint8_t m_Type; // ALLOCATION_TYPE + uint8_t m_SuballocationType; // VmaSuballocationType + // Reference counter for vmaMapMemory()/vmaUnmapMemory(). + uint8_t m_MapCount; + uint8_t m_Flags; // enum FLAGS #if VMA_STATS_STRING_ENABLED - uint32_t m_CreationFrameIndex; - uint32_t m_BufferImageUsage; // 0 if unknown. + VmaBufferImageUsage m_BufferImageUsage; // 0 if unknown. +#endif +}; +#endif // _VMA_ALLOCATION_T + +#ifndef _VMA_DEDICATED_ALLOCATION_LIST_ITEM_TRAITS +struct VmaDedicatedAllocationListItemTraits +{ + typedef VmaAllocation_T ItemType; + + static ItemType* GetPrev(const ItemType* item) + { + VMA_HEAVY_ASSERT(item->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED); + return item->m_DedicatedAllocation.m_Prev; + } + static ItemType* GetNext(const ItemType* item) + { + VMA_HEAVY_ASSERT(item->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED); + return item->m_DedicatedAllocation.m_Next; + } + static ItemType*& AccessPrev(ItemType* item) + { + VMA_HEAVY_ASSERT(item->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED); + return item->m_DedicatedAllocation.m_Prev; + } + static ItemType*& AccessNext(ItemType* item) + { + VMA_HEAVY_ASSERT(item->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED); + return item->m_DedicatedAllocation.m_Next; + } +}; +#endif // _VMA_DEDICATED_ALLOCATION_LIST_ITEM_TRAITS + +#ifndef _VMA_DEDICATED_ALLOCATION_LIST +/* +Stores linked list of VmaAllocation_T objects. +Thread-safe, synchronized internally. +*/ +class VmaDedicatedAllocationList +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaDedicatedAllocationList) +public: + VmaDedicatedAllocationList() {} + ~VmaDedicatedAllocationList(); + + void Init(bool useMutex) { m_UseMutex = useMutex; } + bool Validate(); + + void AddDetailedStatistics(VmaDetailedStatistics& inoutStats); + void AddStatistics(VmaStatistics& inoutStats); +#if VMA_STATS_STRING_ENABLED + // Writes JSON array with the list of allocations. + void BuildStatsString(VmaJsonWriter& json); #endif - void FreeUserDataString(VmaAllocator hAllocator); + bool IsEmpty(); + void Register(VmaAllocation alloc); + void Unregister(VmaAllocation alloc); + +private: + typedef VmaIntrusiveLinkedList DedicatedAllocationLinkedList; + + bool m_UseMutex = true; + VMA_RW_MUTEX m_Mutex; + DedicatedAllocationLinkedList m_AllocationList; }; +#ifndef _VMA_DEDICATED_ALLOCATION_LIST_FUNCTIONS + +VmaDedicatedAllocationList::~VmaDedicatedAllocationList() +{ + VMA_HEAVY_ASSERT(Validate()); + + if (!m_AllocationList.IsEmpty()) + { + VMA_ASSERT_LEAK(false && "Unfreed dedicated allocations found!"); + } +} + +bool VmaDedicatedAllocationList::Validate() +{ + const size_t declaredCount = m_AllocationList.GetCount(); + size_t actualCount = 0; + VmaMutexLockRead lock(m_Mutex, m_UseMutex); + for (VmaAllocation alloc = m_AllocationList.Front(); + alloc != VMA_NULL; alloc = m_AllocationList.GetNext(alloc)) + { + ++actualCount; + } + VMA_VALIDATE(actualCount == declaredCount); + + return true; +} + +void VmaDedicatedAllocationList::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) +{ + for(auto* item = m_AllocationList.Front(); item != VMA_NULL; item = DedicatedAllocationLinkedList::GetNext(item)) + { + const VkDeviceSize size = item->GetSize(); + inoutStats.statistics.blockCount++; + inoutStats.statistics.blockBytes += size; + VmaAddDetailedStatisticsAllocation(inoutStats, item->GetSize()); + } +} + +void VmaDedicatedAllocationList::AddStatistics(VmaStatistics& inoutStats) +{ + VmaMutexLockRead lock(m_Mutex, m_UseMutex); + + const uint32_t allocCount = (uint32_t)m_AllocationList.GetCount(); + inoutStats.blockCount += allocCount; + inoutStats.allocationCount += allocCount; + + for(auto* item = m_AllocationList.Front(); item != VMA_NULL; item = DedicatedAllocationLinkedList::GetNext(item)) + { + const VkDeviceSize size = item->GetSize(); + inoutStats.blockBytes += size; + inoutStats.allocationBytes += size; + } +} + +#if VMA_STATS_STRING_ENABLED +void VmaDedicatedAllocationList::BuildStatsString(VmaJsonWriter& json) +{ + VmaMutexLockRead lock(m_Mutex, m_UseMutex); + json.BeginArray(); + for (VmaAllocation alloc = m_AllocationList.Front(); + alloc != VMA_NULL; alloc = m_AllocationList.GetNext(alloc)) + { + json.BeginObject(true); + alloc->PrintParameters(json); + json.EndObject(); + } + json.EndArray(); +} +#endif // VMA_STATS_STRING_ENABLED + +bool VmaDedicatedAllocationList::IsEmpty() +{ + VmaMutexLockRead lock(m_Mutex, m_UseMutex); + return m_AllocationList.IsEmpty(); +} + +void VmaDedicatedAllocationList::Register(VmaAllocation alloc) +{ + VmaMutexLockWrite lock(m_Mutex, m_UseMutex); + m_AllocationList.PushBack(alloc); +} + +void VmaDedicatedAllocationList::Unregister(VmaAllocation alloc) +{ + VmaMutexLockWrite lock(m_Mutex, m_UseMutex); + m_AllocationList.Remove(alloc); +} +#endif // _VMA_DEDICATED_ALLOCATION_LIST_FUNCTIONS +#endif // _VMA_DEDICATED_ALLOCATION_LIST + +#ifndef _VMA_SUBALLOCATION /* Represents a region of VmaDeviceMemoryBlock that is either assigned and returned as allocated memory block or free. @@ -6349,7 +6441,7 @@ struct VmaSuballocation { VkDeviceSize offset; VkDeviceSize size; - VmaAllocation hAllocation; + void* userData; VmaSuballocationType type; }; @@ -6361,6 +6453,7 @@ struct VmaSuballocationOffsetLess return lhs.offset < rhs.offset; } }; + struct VmaSuballocationOffsetGreater { bool operator()(const VmaSuballocation& lhs, const VmaSuballocation& rhs) const @@ -6369,72 +6462,74 @@ struct VmaSuballocationOffsetGreater } }; -typedef VmaList< VmaSuballocation, VmaStlAllocator > VmaSuballocationList; - -// Cost of one additional allocation lost, as equivalent in bytes. -static const VkDeviceSize VMA_LOST_ALLOCATION_COST = 1048576; - -enum class VmaAllocationRequestType +struct VmaSuballocationItemSizeLess { - Normal, - // Used by "Linear" algorithm. - UpperAddress, - EndOf1st, - EndOf2nd, -}; + bool operator()(const VmaSuballocationList::iterator lhs, + const VmaSuballocationList::iterator rhs) const + { + return lhs->size < rhs->size; + } + bool operator()(const VmaSuballocationList::iterator lhs, + VkDeviceSize rhsSize) const + { + return lhs->size < rhsSize; + } +}; +#endif // _VMA_SUBALLOCATION + +#ifndef _VMA_ALLOCATION_REQUEST /* Parameters of planned allocation inside a VmaDeviceMemoryBlock. - -If canMakeOtherLost was false: -- item points to a FREE suballocation. -- itemsToMakeLostCount is 0. - -If canMakeOtherLost was true: -- item points to first of sequence of suballocations, which are either FREE, - or point to VmaAllocations that can become lost. -- itemsToMakeLostCount is the number of VmaAllocations that need to be made lost for - the requested allocation to succeed. +item points to a FREE suballocation. */ struct VmaAllocationRequest { - VkDeviceSize offset; - VkDeviceSize sumFreeSize; // Sum size of free items that overlap with proposed allocation. - VkDeviceSize sumItemSize; // Sum size of items to make lost that overlap with proposed allocation. + VmaAllocHandle allocHandle; + VkDeviceSize size; VmaSuballocationList::iterator item; - size_t itemsToMakeLostCount; void* customData; + uint64_t algorithmData; VmaAllocationRequestType type; - - VkDeviceSize CalcCost() const - { - return sumItemSize + itemsToMakeLostCount * VMA_LOST_ALLOCATION_COST; - } }; +#endif // _VMA_ALLOCATION_REQUEST +#ifndef _VMA_BLOCK_METADATA /* Data structure used for bookkeeping of allocations and unused ranges of memory in a single VkDeviceMemory block. */ class VmaBlockMetadata { + VMA_CLASS_NO_COPY_NO_MOVE(VmaBlockMetadata) public: - VmaBlockMetadata(VmaAllocator hAllocator); - virtual ~VmaBlockMetadata() { } + // pAllocationCallbacks, if not null, must be owned externally - alive and unchanged for the whole lifetime of this object. + VmaBlockMetadata(const VkAllocationCallbacks* pAllocationCallbacks, + VkDeviceSize bufferImageGranularity, bool isVirtual); + virtual ~VmaBlockMetadata() = default; + virtual void Init(VkDeviceSize size) { m_Size = size; } + bool IsVirtual() const { return m_IsVirtual; } + VkDeviceSize GetSize() const { return m_Size; } // Validates all data structures inside this object. If not valid, returns false. virtual bool Validate() const = 0; - VkDeviceSize GetSize() const { return m_Size; } virtual size_t GetAllocationCount() const = 0; + virtual size_t GetFreeRegionsCount() const = 0; virtual VkDeviceSize GetSumFreeSize() const = 0; - virtual VkDeviceSize GetUnusedRangeSizeMax() const = 0; // Returns true if this block is empty - contains only single free suballocation. virtual bool IsEmpty() const = 0; + virtual void GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) = 0; + virtual VkDeviceSize GetAllocationOffset(VmaAllocHandle allocHandle) const = 0; + virtual void* GetAllocationUserData(VmaAllocHandle allocHandle) const = 0; + + virtual VmaAllocHandle GetAllocationListBegin() const = 0; + virtual VmaAllocHandle GetNextAllocation(VmaAllocHandle prevAlloc) const = 0; + virtual VkDeviceSize GetNextFreeRegionSize(VmaAllocHandle alloc) const = 0; - virtual void CalcAllocationStatInfo(VmaStatInfo& outInfo) const = 0; // Shouldn't modify blockCount. - virtual void AddPoolStats(VmaPoolStats& inoutStats) const = 0; + virtual void AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const = 0; + virtual void AddStatistics(VmaStatistics& inoutStats) const = 0; #if VMA_STATS_STRING_ENABLED virtual void PrintDetailedMap(class VmaJsonWriter& json) const = 0; @@ -6444,49 +6539,46 @@ public: // If succeeded, fills pAllocationRequest and returns true. // If failed, returns false. virtual bool CreateAllocationRequest( - uint32_t currentFrameIndex, - uint32_t frameInUseCount, - VkDeviceSize bufferImageGranularity, VkDeviceSize allocSize, VkDeviceSize allocAlignment, bool upperAddress, VmaSuballocationType allocType, - bool canMakeOtherLost, // Always one of VMA_ALLOCATION_CREATE_STRATEGY_* or VMA_ALLOCATION_INTERNAL_STRATEGY_* flags. uint32_t strategy, VmaAllocationRequest* pAllocationRequest) = 0; - virtual bool MakeRequestedAllocationsLost( - uint32_t currentFrameIndex, - uint32_t frameInUseCount, - VmaAllocationRequest* pAllocationRequest) = 0; - - virtual uint32_t MakeAllocationsLost(uint32_t currentFrameIndex, uint32_t frameInUseCount) = 0; - virtual VkResult CheckCorruption(const void* pBlockData) = 0; // Makes actual allocation based on request. Request must already be checked and valid. virtual void Alloc( const VmaAllocationRequest& request, VmaSuballocationType type, - VkDeviceSize allocSize, - VmaAllocation hAllocation) = 0; + void* userData) = 0; // Frees suballocation assigned to given memory region. - virtual void Free(const VmaAllocation allocation) = 0; - virtual void FreeAtOffset(VkDeviceSize offset) = 0; + virtual void Free(VmaAllocHandle allocHandle) = 0; + + // Frees all allocations. + // Careful! Don't call it if there are VmaAllocation objects owned by userData of cleared allocations! + virtual void Clear() = 0; + + virtual void SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) = 0; + virtual void DebugLogAllAllocations() const = 0; protected: const VkAllocationCallbacks* GetAllocationCallbacks() const { return m_pAllocationCallbacks; } + VkDeviceSize GetBufferImageGranularity() const { return m_BufferImageGranularity; } + VkDeviceSize GetDebugMargin() const { return VkDeviceSize(IsVirtual() ? 0 : VMA_DEBUG_MARGIN); } + void DebugLogAllocation(VkDeviceSize offset, VkDeviceSize size, void* userData) const; #if VMA_STATS_STRING_ENABLED + // mapRefCount == UINT32_MAX means unspecified. void PrintDetailedMap_Begin(class VmaJsonWriter& json, VkDeviceSize unusedBytes, size_t allocationCount, size_t unusedRangeCount) const; void PrintDetailedMap_Allocation(class VmaJsonWriter& json, - VkDeviceSize offset, - VmaAllocation hAllocation) const; + VkDeviceSize offset, VkDeviceSize size, void* userData) const; void PrintDetailedMap_UnusedRange(class VmaJsonWriter& json, VkDeviceSize offset, VkDeviceSize size) const; @@ -6496,113 +6588,355 @@ protected: private: VkDeviceSize m_Size; const VkAllocationCallbacks* m_pAllocationCallbacks; + const VkDeviceSize m_BufferImageGranularity; + const bool m_IsVirtual; }; -#define VMA_VALIDATE(cond) do { if(!(cond)) { \ - VMA_ASSERT(0 && "Validation failed: " #cond); \ - return false; \ - } } while(false) +#ifndef _VMA_BLOCK_METADATA_FUNCTIONS +VmaBlockMetadata::VmaBlockMetadata(const VkAllocationCallbacks* pAllocationCallbacks, + VkDeviceSize bufferImageGranularity, bool isVirtual) + : m_Size(0), + m_pAllocationCallbacks(pAllocationCallbacks), + m_BufferImageGranularity(bufferImageGranularity), + m_IsVirtual(isVirtual) {} -class VmaBlockMetadata_Generic : public VmaBlockMetadata +void VmaBlockMetadata::DebugLogAllocation(VkDeviceSize offset, VkDeviceSize size, void* userData) const { - VMA_CLASS_NO_COPY(VmaBlockMetadata_Generic) -public: - VmaBlockMetadata_Generic(VmaAllocator hAllocator); - virtual ~VmaBlockMetadata_Generic(); - virtual void Init(VkDeviceSize size); + if (IsVirtual()) + { + VMA_LEAK_LOG_FORMAT("UNFREED VIRTUAL ALLOCATION; Offset: %" PRIu64 "; Size: %" PRIu64 "; UserData: %p", offset, size, userData); + } + else + { + VMA_ASSERT(userData != VMA_NULL); + VmaAllocation allocation = reinterpret_cast(userData); - virtual bool Validate() const; - virtual size_t GetAllocationCount() const { return m_Suballocations.size() - m_FreeCount; } - virtual VkDeviceSize GetSumFreeSize() const { return m_SumFreeSize; } - virtual VkDeviceSize GetUnusedRangeSizeMax() const; - virtual bool IsEmpty() const; - - virtual void CalcAllocationStatInfo(VmaStatInfo& outInfo) const; - virtual void AddPoolStats(VmaPoolStats& inoutStats) const; + userData = allocation->GetUserData(); + const char* name = allocation->GetName(); #if VMA_STATS_STRING_ENABLED - virtual void PrintDetailedMap(class VmaJsonWriter& json) const; -#endif + VMA_LEAK_LOG_FORMAT("UNFREED ALLOCATION; Offset: %" PRIu64 "; Size: %" PRIu64 "; UserData: %p; Name: %s; Type: %s; Usage: %" PRIu64, + offset, size, userData, name ? name : "vma_empty", + VMA_SUBALLOCATION_TYPE_NAMES[allocation->GetSuballocationType()], + (uint64_t)allocation->GetBufferImageUsage().Value); +#else + VMA_LEAK_LOG_FORMAT("UNFREED ALLOCATION; Offset: %" PRIu64 "; Size: %" PRIu64 "; UserData: %p; Name: %s; Type: %u", + offset, size, userData, name ? name : "vma_empty", + (unsigned)allocation->GetSuballocationType()); +#endif // VMA_STATS_STRING_ENABLED + } - virtual bool CreateAllocationRequest( - uint32_t currentFrameIndex, - uint32_t frameInUseCount, - VkDeviceSize bufferImageGranularity, +} + +#if VMA_STATS_STRING_ENABLED +void VmaBlockMetadata::PrintDetailedMap_Begin(class VmaJsonWriter& json, + VkDeviceSize unusedBytes, size_t allocationCount, size_t unusedRangeCount) const +{ + json.WriteString("TotalBytes"); + json.WriteNumber(GetSize()); + + json.WriteString("UnusedBytes"); + json.WriteNumber(unusedBytes); + + json.WriteString("Allocations"); + json.WriteNumber((uint64_t)allocationCount); + + json.WriteString("UnusedRanges"); + json.WriteNumber((uint64_t)unusedRangeCount); + + json.WriteString("Suballocations"); + json.BeginArray(); +} + +void VmaBlockMetadata::PrintDetailedMap_Allocation(class VmaJsonWriter& json, + VkDeviceSize offset, VkDeviceSize size, void* userData) const +{ + json.BeginObject(true); + + json.WriteString("Offset"); + json.WriteNumber(offset); + + if (IsVirtual()) + { + json.WriteString("Size"); + json.WriteNumber(size); + if (userData) + { + json.WriteString("CustomData"); + json.BeginString(); + json.ContinueString_Pointer(userData); + json.EndString(); + } + } + else + { + ((VmaAllocation)userData)->PrintParameters(json); + } + + json.EndObject(); +} + +void VmaBlockMetadata::PrintDetailedMap_UnusedRange(class VmaJsonWriter& json, + VkDeviceSize offset, VkDeviceSize size) const +{ + json.BeginObject(true); + + json.WriteString("Offset"); + json.WriteNumber(offset); + + json.WriteString("Type"); + json.WriteString(VMA_SUBALLOCATION_TYPE_NAMES[VMA_SUBALLOCATION_TYPE_FREE]); + + json.WriteString("Size"); + json.WriteNumber(size); + + json.EndObject(); +} + +void VmaBlockMetadata::PrintDetailedMap_End(class VmaJsonWriter& json) const +{ + json.EndArray(); +} +#endif // VMA_STATS_STRING_ENABLED +#endif // _VMA_BLOCK_METADATA_FUNCTIONS +#endif // _VMA_BLOCK_METADATA + +#ifndef _VMA_BLOCK_BUFFER_IMAGE_GRANULARITY +// Before deleting object of this class remember to call 'Destroy()' +class VmaBlockBufferImageGranularity final +{ +public: + struct ValidationContext + { + const VkAllocationCallbacks* allocCallbacks; + uint16_t* pageAllocs; + }; + + VmaBlockBufferImageGranularity(VkDeviceSize bufferImageGranularity); + ~VmaBlockBufferImageGranularity(); + + bool IsEnabled() const { return m_BufferImageGranularity > MAX_LOW_BUFFER_IMAGE_GRANULARITY; } + + void Init(const VkAllocationCallbacks* pAllocationCallbacks, VkDeviceSize size); + // Before destroying object you must call free it's memory + void Destroy(const VkAllocationCallbacks* pAllocationCallbacks); + + void RoundupAllocRequest(VmaSuballocationType allocType, + VkDeviceSize& inOutAllocSize, + VkDeviceSize& inOutAllocAlignment) const; + + bool CheckConflictAndAlignUp(VkDeviceSize& inOutAllocOffset, VkDeviceSize allocSize, - VkDeviceSize allocAlignment, - bool upperAddress, - VmaSuballocationType allocType, - bool canMakeOtherLost, - uint32_t strategy, - VmaAllocationRequest* pAllocationRequest); + VkDeviceSize blockOffset, + VkDeviceSize blockSize, + VmaSuballocationType allocType) const; - virtual bool MakeRequestedAllocationsLost( - uint32_t currentFrameIndex, - uint32_t frameInUseCount, - VmaAllocationRequest* pAllocationRequest); + void AllocPages(uint8_t allocType, VkDeviceSize offset, VkDeviceSize size); + void FreePages(VkDeviceSize offset, VkDeviceSize size); + void Clear(); - virtual uint32_t MakeAllocationsLost(uint32_t currentFrameIndex, uint32_t frameInUseCount); - - virtual VkResult CheckCorruption(const void* pBlockData); - - virtual void Alloc( - const VmaAllocationRequest& request, - VmaSuballocationType type, - VkDeviceSize allocSize, - VmaAllocation hAllocation); - - virtual void Free(const VmaAllocation allocation); - virtual void FreeAtOffset(VkDeviceSize offset); - - //////////////////////////////////////////////////////////////////////////////// - // For defragmentation - - bool IsBufferImageGranularityConflictPossible( - VkDeviceSize bufferImageGranularity, - VmaSuballocationType& inOutPrevSuballocType) const; + ValidationContext StartValidation(const VkAllocationCallbacks* pAllocationCallbacks, + bool isVirutal) const; + bool Validate(ValidationContext& ctx, VkDeviceSize offset, VkDeviceSize size) const; + bool FinishValidation(ValidationContext& ctx) const; private: - friend class VmaDefragmentationAlgorithm_Generic; - friend class VmaDefragmentationAlgorithm_Fast; + static const uint16_t MAX_LOW_BUFFER_IMAGE_GRANULARITY = 256; - uint32_t m_FreeCount; - VkDeviceSize m_SumFreeSize; - VmaSuballocationList m_Suballocations; - // Suballocations that are free and have size greater than certain threshold. - // Sorted by size, ascending. - VmaVector< VmaSuballocationList::iterator, VmaStlAllocator< VmaSuballocationList::iterator > > m_FreeSuballocationsBySize; + struct RegionInfo + { + uint8_t allocType; + uint16_t allocCount; + }; - bool ValidateFreeSuballocationList() const; + VkDeviceSize m_BufferImageGranularity; + uint32_t m_RegionCount; + RegionInfo* m_RegionInfo; - // Checks if requested suballocation with given parameters can be placed in given pFreeSuballocItem. - // If yes, fills pOffset and returns true. If no, returns false. - bool CheckAllocation( - uint32_t currentFrameIndex, - uint32_t frameInUseCount, - VkDeviceSize bufferImageGranularity, - VkDeviceSize allocSize, - VkDeviceSize allocAlignment, - VmaSuballocationType allocType, - VmaSuballocationList::const_iterator suballocItem, - bool canMakeOtherLost, - VkDeviceSize* pOffset, - size_t* itemsToMakeLostCount, - VkDeviceSize* pSumFreeSize, - VkDeviceSize* pSumItemSize) const; - // Given free suballocation, it merges it with following one, which must also be free. - void MergeFreeWithNext(VmaSuballocationList::iterator item); - // Releases given suballocation, making it free. - // Merges it with adjacent free suballocations if applicable. - // Returns iterator to new free suballocation at this place. - VmaSuballocationList::iterator FreeSuballocation(VmaSuballocationList::iterator suballocItem); - // Given free suballocation, it inserts it into sorted list of - // m_FreeSuballocationsBySize if it's suitable. - void RegisterFreeSuballocation(VmaSuballocationList::iterator item); - // Given free suballocation, it removes it from sorted list of - // m_FreeSuballocationsBySize if it's suitable. - void UnregisterFreeSuballocation(VmaSuballocationList::iterator item); + uint32_t GetStartPage(VkDeviceSize offset) const { return OffsetToPageIndex(offset & ~(m_BufferImageGranularity - 1)); } + uint32_t GetEndPage(VkDeviceSize offset, VkDeviceSize size) const { return OffsetToPageIndex((offset + size - 1) & ~(m_BufferImageGranularity - 1)); } + + uint32_t OffsetToPageIndex(VkDeviceSize offset) const; + void AllocPage(RegionInfo& page, uint8_t allocType); }; +#ifndef _VMA_BLOCK_BUFFER_IMAGE_GRANULARITY_FUNCTIONS +VmaBlockBufferImageGranularity::VmaBlockBufferImageGranularity(VkDeviceSize bufferImageGranularity) + : m_BufferImageGranularity(bufferImageGranularity), + m_RegionCount(0), + m_RegionInfo(VMA_NULL) {} + +VmaBlockBufferImageGranularity::~VmaBlockBufferImageGranularity() +{ + VMA_ASSERT(m_RegionInfo == VMA_NULL && "Free not called before destroying object!"); +} + +void VmaBlockBufferImageGranularity::Init(const VkAllocationCallbacks* pAllocationCallbacks, VkDeviceSize size) +{ + if (IsEnabled()) + { + m_RegionCount = static_cast(VmaDivideRoundingUp(size, m_BufferImageGranularity)); + m_RegionInfo = vma_new_array(pAllocationCallbacks, RegionInfo, m_RegionCount); + memset(m_RegionInfo, 0, m_RegionCount * sizeof(RegionInfo)); + } +} + +void VmaBlockBufferImageGranularity::Destroy(const VkAllocationCallbacks* pAllocationCallbacks) +{ + if (m_RegionInfo) + { + vma_delete_array(pAllocationCallbacks, m_RegionInfo, m_RegionCount); + m_RegionInfo = VMA_NULL; + } +} + +void VmaBlockBufferImageGranularity::RoundupAllocRequest(VmaSuballocationType allocType, + VkDeviceSize& inOutAllocSize, + VkDeviceSize& inOutAllocAlignment) const +{ + if (m_BufferImageGranularity > 1 && + m_BufferImageGranularity <= MAX_LOW_BUFFER_IMAGE_GRANULARITY) + { + if (allocType == VMA_SUBALLOCATION_TYPE_UNKNOWN || + allocType == VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN || + allocType == VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL) + { + inOutAllocAlignment = VMA_MAX(inOutAllocAlignment, m_BufferImageGranularity); + inOutAllocSize = VmaAlignUp(inOutAllocSize, m_BufferImageGranularity); + } + } +} + +bool VmaBlockBufferImageGranularity::CheckConflictAndAlignUp(VkDeviceSize& inOutAllocOffset, + VkDeviceSize allocSize, + VkDeviceSize blockOffset, + VkDeviceSize blockSize, + VmaSuballocationType allocType) const +{ + if (IsEnabled()) + { + uint32_t startPage = GetStartPage(inOutAllocOffset); + if (m_RegionInfo[startPage].allocCount > 0 && + VmaIsBufferImageGranularityConflict(static_cast(m_RegionInfo[startPage].allocType), allocType)) + { + inOutAllocOffset = VmaAlignUp(inOutAllocOffset, m_BufferImageGranularity); + if (blockSize < allocSize + inOutAllocOffset - blockOffset) + return true; + ++startPage; + } + uint32_t endPage = GetEndPage(inOutAllocOffset, allocSize); + if (endPage != startPage && + m_RegionInfo[endPage].allocCount > 0 && + VmaIsBufferImageGranularityConflict(static_cast(m_RegionInfo[endPage].allocType), allocType)) + { + return true; + } + } + return false; +} + +void VmaBlockBufferImageGranularity::AllocPages(uint8_t allocType, VkDeviceSize offset, VkDeviceSize size) +{ + if (IsEnabled()) + { + uint32_t startPage = GetStartPage(offset); + AllocPage(m_RegionInfo[startPage], allocType); + + uint32_t endPage = GetEndPage(offset, size); + if (startPage != endPage) + AllocPage(m_RegionInfo[endPage], allocType); + } +} + +void VmaBlockBufferImageGranularity::FreePages(VkDeviceSize offset, VkDeviceSize size) +{ + if (IsEnabled()) + { + uint32_t startPage = GetStartPage(offset); + --m_RegionInfo[startPage].allocCount; + if (m_RegionInfo[startPage].allocCount == 0) + m_RegionInfo[startPage].allocType = VMA_SUBALLOCATION_TYPE_FREE; + uint32_t endPage = GetEndPage(offset, size); + if (startPage != endPage) + { + --m_RegionInfo[endPage].allocCount; + if (m_RegionInfo[endPage].allocCount == 0) + m_RegionInfo[endPage].allocType = VMA_SUBALLOCATION_TYPE_FREE; + } + } +} + +void VmaBlockBufferImageGranularity::Clear() +{ + if (m_RegionInfo) + memset(m_RegionInfo, 0, m_RegionCount * sizeof(RegionInfo)); +} + +VmaBlockBufferImageGranularity::ValidationContext VmaBlockBufferImageGranularity::StartValidation( + const VkAllocationCallbacks* pAllocationCallbacks, bool isVirutal) const +{ + ValidationContext ctx{ pAllocationCallbacks, VMA_NULL }; + if (!isVirutal && IsEnabled()) + { + ctx.pageAllocs = vma_new_array(pAllocationCallbacks, uint16_t, m_RegionCount); + memset(ctx.pageAllocs, 0, m_RegionCount * sizeof(uint16_t)); + } + return ctx; +} + +bool VmaBlockBufferImageGranularity::Validate(ValidationContext& ctx, + VkDeviceSize offset, VkDeviceSize size) const +{ + if (IsEnabled()) + { + uint32_t start = GetStartPage(offset); + ++ctx.pageAllocs[start]; + VMA_VALIDATE(m_RegionInfo[start].allocCount > 0); + + uint32_t end = GetEndPage(offset, size); + if (start != end) + { + ++ctx.pageAllocs[end]; + VMA_VALIDATE(m_RegionInfo[end].allocCount > 0); + } + } + return true; +} + +bool VmaBlockBufferImageGranularity::FinishValidation(ValidationContext& ctx) const +{ + // Check proper page structure + if (IsEnabled()) + { + VMA_ASSERT(ctx.pageAllocs != VMA_NULL && "Validation context not initialized!"); + + for (uint32_t page = 0; page < m_RegionCount; ++page) + { + VMA_VALIDATE(ctx.pageAllocs[page] == m_RegionInfo[page].allocCount); + } + vma_delete_array(ctx.allocCallbacks, ctx.pageAllocs, m_RegionCount); + ctx.pageAllocs = VMA_NULL; + } + return true; +} + +uint32_t VmaBlockBufferImageGranularity::OffsetToPageIndex(VkDeviceSize offset) const +{ + return static_cast(offset >> VMA_BITSCAN_MSB(m_BufferImageGranularity)); +} + +void VmaBlockBufferImageGranularity::AllocPage(RegionInfo& page, uint8_t allocType) +{ + // When current alloc type is free then it can be overridden by new type + if (page.allocCount == 0 || (page.allocCount > 0 && page.allocType == VMA_SUBALLOCATION_TYPE_FREE)) + page.allocType = allocType; + + ++page.allocCount; +} +#endif // _VMA_BLOCK_BUFFER_IMAGE_GRANULARITY_FUNCTIONS +#endif // _VMA_BLOCK_BUFFER_IMAGE_GRANULARITY + +#ifndef _VMA_BLOCK_METADATA_LINEAR /* Allocations and their references in internal data structure look like this: @@ -6683,54 +7017,52 @@ GetSize() +-------+ */ class VmaBlockMetadata_Linear : public VmaBlockMetadata { - VMA_CLASS_NO_COPY(VmaBlockMetadata_Linear) + VMA_CLASS_NO_COPY_NO_MOVE(VmaBlockMetadata_Linear) public: - VmaBlockMetadata_Linear(VmaAllocator hAllocator); - virtual ~VmaBlockMetadata_Linear(); - virtual void Init(VkDeviceSize size); + VmaBlockMetadata_Linear(const VkAllocationCallbacks* pAllocationCallbacks, + VkDeviceSize bufferImageGranularity, bool isVirtual); + virtual ~VmaBlockMetadata_Linear() = default; - virtual bool Validate() const; - virtual size_t GetAllocationCount() const; - virtual VkDeviceSize GetSumFreeSize() const { return m_SumFreeSize; } - virtual VkDeviceSize GetUnusedRangeSizeMax() const; - virtual bool IsEmpty() const { return GetAllocationCount() == 0; } + VkDeviceSize GetSumFreeSize() const override { return m_SumFreeSize; } + bool IsEmpty() const override { return GetAllocationCount() == 0; } + VkDeviceSize GetAllocationOffset(VmaAllocHandle allocHandle) const override { return (VkDeviceSize)allocHandle - 1; } - virtual void CalcAllocationStatInfo(VmaStatInfo& outInfo) const; - virtual void AddPoolStats(VmaPoolStats& inoutStats) const; + void Init(VkDeviceSize size) override; + bool Validate() const override; + size_t GetAllocationCount() const override; + size_t GetFreeRegionsCount() const override; + + void AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const override; + void AddStatistics(VmaStatistics& inoutStats) const override; #if VMA_STATS_STRING_ENABLED - virtual void PrintDetailedMap(class VmaJsonWriter& json) const; + void PrintDetailedMap(class VmaJsonWriter& json) const override; #endif - virtual bool CreateAllocationRequest( - uint32_t currentFrameIndex, - uint32_t frameInUseCount, - VkDeviceSize bufferImageGranularity, + bool CreateAllocationRequest( VkDeviceSize allocSize, VkDeviceSize allocAlignment, bool upperAddress, VmaSuballocationType allocType, - bool canMakeOtherLost, uint32_t strategy, - VmaAllocationRequest* pAllocationRequest); + VmaAllocationRequest* pAllocationRequest) override; - virtual bool MakeRequestedAllocationsLost( - uint32_t currentFrameIndex, - uint32_t frameInUseCount, - VmaAllocationRequest* pAllocationRequest); + VkResult CheckCorruption(const void* pBlockData) override; - virtual uint32_t MakeAllocationsLost(uint32_t currentFrameIndex, uint32_t frameInUseCount); - - virtual VkResult CheckCorruption(const void* pBlockData); - - virtual void Alloc( + void Alloc( const VmaAllocationRequest& request, VmaSuballocationType type, - VkDeviceSize allocSize, - VmaAllocation hAllocation); + void* userData) override; - virtual void Free(const VmaAllocation allocation); - virtual void FreeAtOffset(VkDeviceSize offset); + void Free(VmaAllocHandle allocHandle) override; + void GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) override; + void* GetAllocationUserData(VmaAllocHandle allocHandle) const override; + VmaAllocHandle GetAllocationListBegin() const override; + VmaAllocHandle GetNextAllocation(VmaAllocHandle prevAlloc) const override; + VkDeviceSize GetNextFreeRegionSize(VmaAllocHandle alloc) const override; + void Clear() override; + void SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) override; + void DebugLogAllAllocations() const override; private: /* @@ -6740,7 +7072,7 @@ private: 2nd can be non-empty only when 1st is not empty. When 2nd is not empty, m_2ndVectorMode indicates its mode of operation. */ - typedef VmaVector< VmaSuballocation, VmaStlAllocator > SuballocationVectorType; + typedef VmaVector> SuballocationVectorType; enum SECOND_VECTOR_MODE { @@ -6762,12 +7094,6 @@ private: SuballocationVectorType m_Suballocations0, m_Suballocations1; uint32_t m_1stVectorIndex; SECOND_VECTOR_MODE m_2ndVectorMode; - - SuballocationVectorType& AccessSuballocations1st() { return m_1stVectorIndex ? m_Suballocations1 : m_Suballocations0; } - SuballocationVectorType& AccessSuballocations2nd() { return m_1stVectorIndex ? m_Suballocations0 : m_Suballocations1; } - const SuballocationVectorType& AccessSuballocations1st() const { return m_1stVectorIndex ? m_Suballocations1 : m_Suballocations0; } - const SuballocationVectorType& AccessSuballocations2nd() const { return m_1stVectorIndex ? m_Suballocations0 : m_Suballocations1; } - // Number of items in 1st vector with hAllocation = null at the beginning. size_t m_1stNullItemsBeginCount; // Number of other items in 1st vector with hAllocation = null somewhere in the middle. @@ -6775,288 +7101,2477 @@ private: // Number of items in 2nd vector with hAllocation = null. size_t m_2ndNullItemsCount; + SuballocationVectorType& AccessSuballocations1st() { return m_1stVectorIndex ? m_Suballocations1 : m_Suballocations0; } + SuballocationVectorType& AccessSuballocations2nd() { return m_1stVectorIndex ? m_Suballocations0 : m_Suballocations1; } + const SuballocationVectorType& AccessSuballocations1st() const { return m_1stVectorIndex ? m_Suballocations1 : m_Suballocations0; } + const SuballocationVectorType& AccessSuballocations2nd() const { return m_1stVectorIndex ? m_Suballocations0 : m_Suballocations1; } + + VmaSuballocation& FindSuballocation(VkDeviceSize offset) const; bool ShouldCompact1st() const; void CleanupAfterFree(); bool CreateAllocationRequest_LowerAddress( - uint32_t currentFrameIndex, - uint32_t frameInUseCount, - VkDeviceSize bufferImageGranularity, VkDeviceSize allocSize, VkDeviceSize allocAlignment, VmaSuballocationType allocType, - bool canMakeOtherLost, uint32_t strategy, VmaAllocationRequest* pAllocationRequest); bool CreateAllocationRequest_UpperAddress( - uint32_t currentFrameIndex, - uint32_t frameInUseCount, - VkDeviceSize bufferImageGranularity, VkDeviceSize allocSize, VkDeviceSize allocAlignment, VmaSuballocationType allocType, - bool canMakeOtherLost, uint32_t strategy, VmaAllocationRequest* pAllocationRequest); }; -/* -- GetSize() is the original size of allocated memory block. -- m_UsableSize is this size aligned down to a power of two. - All allocations and calculations happen relative to m_UsableSize. -- GetUnusableSize() is the difference between them. - It is repoted as separate, unused range, not available for allocations. +#ifndef _VMA_BLOCK_METADATA_LINEAR_FUNCTIONS +VmaBlockMetadata_Linear::VmaBlockMetadata_Linear(const VkAllocationCallbacks* pAllocationCallbacks, + VkDeviceSize bufferImageGranularity, bool isVirtual) + : VmaBlockMetadata(pAllocationCallbacks, bufferImageGranularity, isVirtual), + m_SumFreeSize(0), + m_Suballocations0(VmaStlAllocator(pAllocationCallbacks)), + m_Suballocations1(VmaStlAllocator(pAllocationCallbacks)), + m_1stVectorIndex(0), + m_2ndVectorMode(SECOND_VECTOR_EMPTY), + m_1stNullItemsBeginCount(0), + m_1stNullItemsMiddleCount(0), + m_2ndNullItemsCount(0) {} -Node at level 0 has size = m_UsableSize. -Each next level contains nodes with size 2 times smaller than current level. -m_LevelCount is the maximum number of levels to use in the current object. -*/ -class VmaBlockMetadata_Buddy : public VmaBlockMetadata +void VmaBlockMetadata_Linear::Init(VkDeviceSize size) { - VMA_CLASS_NO_COPY(VmaBlockMetadata_Buddy) -public: - VmaBlockMetadata_Buddy(VmaAllocator hAllocator); - virtual ~VmaBlockMetadata_Buddy(); - virtual void Init(VkDeviceSize size); + VmaBlockMetadata::Init(size); + m_SumFreeSize = size; +} - virtual bool Validate() const; - virtual size_t GetAllocationCount() const { return m_AllocationCount; } - virtual VkDeviceSize GetSumFreeSize() const { return m_SumFreeSize + GetUnusableSize(); } - virtual VkDeviceSize GetUnusedRangeSizeMax() const; - virtual bool IsEmpty() const { return m_Root->type == Node::TYPE_FREE; } +bool VmaBlockMetadata_Linear::Validate() const +{ + const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); - virtual void CalcAllocationStatInfo(VmaStatInfo& outInfo) const; - virtual void AddPoolStats(VmaPoolStats& inoutStats) const; + VMA_VALIDATE(suballocations2nd.empty() == (m_2ndVectorMode == SECOND_VECTOR_EMPTY)); + VMA_VALIDATE(!suballocations1st.empty() || + suballocations2nd.empty() || + m_2ndVectorMode != SECOND_VECTOR_RING_BUFFER); + + if (!suballocations1st.empty()) + { + // Null item at the beginning should be accounted into m_1stNullItemsBeginCount. + VMA_VALIDATE(suballocations1st[m_1stNullItemsBeginCount].type != VMA_SUBALLOCATION_TYPE_FREE); + // Null item at the end should be just pop_back(). + VMA_VALIDATE(suballocations1st.back().type != VMA_SUBALLOCATION_TYPE_FREE); + } + if (!suballocations2nd.empty()) + { + // Null item at the end should be just pop_back(). + VMA_VALIDATE(suballocations2nd.back().type != VMA_SUBALLOCATION_TYPE_FREE); + } + + VMA_VALIDATE(m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount <= suballocations1st.size()); + VMA_VALIDATE(m_2ndNullItemsCount <= suballocations2nd.size()); + + VkDeviceSize sumUsedSize = 0; + const size_t suballoc1stCount = suballocations1st.size(); + const VkDeviceSize debugMargin = GetDebugMargin(); + VkDeviceSize offset = 0; + + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + const size_t suballoc2ndCount = suballocations2nd.size(); + size_t nullItem2ndCount = 0; + for (size_t i = 0; i < suballoc2ndCount; ++i) + { + const VmaSuballocation& suballoc = suballocations2nd[i]; + const bool currFree = (suballoc.type == VMA_SUBALLOCATION_TYPE_FREE); + + VmaAllocation const alloc = (VmaAllocation)suballoc.userData; + if (!IsVirtual()) + { + VMA_VALIDATE(currFree == (alloc == VK_NULL_HANDLE)); + } + VMA_VALIDATE(suballoc.offset >= offset); + + if (!currFree) + { + if (!IsVirtual()) + { + VMA_VALIDATE((VkDeviceSize)alloc->GetAllocHandle() == suballoc.offset + 1); + VMA_VALIDATE(alloc->GetSize() == suballoc.size); + } + sumUsedSize += suballoc.size; + } + else + { + ++nullItem2ndCount; + } + + offset = suballoc.offset + suballoc.size + debugMargin; + } + + VMA_VALIDATE(nullItem2ndCount == m_2ndNullItemsCount); + } + + for (size_t i = 0; i < m_1stNullItemsBeginCount; ++i) + { + const VmaSuballocation& suballoc = suballocations1st[i]; + VMA_VALIDATE(suballoc.type == VMA_SUBALLOCATION_TYPE_FREE && + suballoc.userData == VMA_NULL); + } + + size_t nullItem1stCount = m_1stNullItemsBeginCount; + + for (size_t i = m_1stNullItemsBeginCount; i < suballoc1stCount; ++i) + { + const VmaSuballocation& suballoc = suballocations1st[i]; + const bool currFree = (suballoc.type == VMA_SUBALLOCATION_TYPE_FREE); + + VmaAllocation const alloc = (VmaAllocation)suballoc.userData; + if (!IsVirtual()) + { + VMA_VALIDATE(currFree == (alloc == VK_NULL_HANDLE)); + } + VMA_VALIDATE(suballoc.offset >= offset); + VMA_VALIDATE(i >= m_1stNullItemsBeginCount || currFree); + + if (!currFree) + { + if (!IsVirtual()) + { + VMA_VALIDATE((VkDeviceSize)alloc->GetAllocHandle() == suballoc.offset + 1); + VMA_VALIDATE(alloc->GetSize() == suballoc.size); + } + sumUsedSize += suballoc.size; + } + else + { + ++nullItem1stCount; + } + + offset = suballoc.offset + suballoc.size + debugMargin; + } + VMA_VALIDATE(nullItem1stCount == m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount); + + if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + const size_t suballoc2ndCount = suballocations2nd.size(); + size_t nullItem2ndCount = 0; + for (size_t i = suballoc2ndCount; i--; ) + { + const VmaSuballocation& suballoc = suballocations2nd[i]; + const bool currFree = (suballoc.type == VMA_SUBALLOCATION_TYPE_FREE); + + VmaAllocation const alloc = (VmaAllocation)suballoc.userData; + if (!IsVirtual()) + { + VMA_VALIDATE(currFree == (alloc == VK_NULL_HANDLE)); + } + VMA_VALIDATE(suballoc.offset >= offset); + + if (!currFree) + { + if (!IsVirtual()) + { + VMA_VALIDATE((VkDeviceSize)alloc->GetAllocHandle() == suballoc.offset + 1); + VMA_VALIDATE(alloc->GetSize() == suballoc.size); + } + sumUsedSize += suballoc.size; + } + else + { + ++nullItem2ndCount; + } + + offset = suballoc.offset + suballoc.size + debugMargin; + } + + VMA_VALIDATE(nullItem2ndCount == m_2ndNullItemsCount); + } + + VMA_VALIDATE(offset <= GetSize()); + VMA_VALIDATE(m_SumFreeSize == GetSize() - sumUsedSize); + + return true; +} + +size_t VmaBlockMetadata_Linear::GetAllocationCount() const +{ + return AccessSuballocations1st().size() - m_1stNullItemsBeginCount - m_1stNullItemsMiddleCount + + AccessSuballocations2nd().size() - m_2ndNullItemsCount; +} + +size_t VmaBlockMetadata_Linear::GetFreeRegionsCount() const +{ + // Function only used for defragmentation, which is disabled for this algorithm + VMA_ASSERT(0); + return SIZE_MAX; +} + +void VmaBlockMetadata_Linear::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const +{ + const VkDeviceSize size = GetSize(); + const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + const size_t suballoc1stCount = suballocations1st.size(); + const size_t suballoc2ndCount = suballocations2nd.size(); + + inoutStats.statistics.blockCount++; + inoutStats.statistics.blockBytes += size; + + VkDeviceSize lastOffset = 0; + + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + const VkDeviceSize freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset; + size_t nextAlloc2ndIndex = 0; + while (lastOffset < freeSpace2ndTo1stEnd) + { + // Find next non-null allocation or move nextAllocIndex to the end. + while (nextAlloc2ndIndex < suballoc2ndCount && + suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) + { + ++nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex < suballoc2ndCount) + { + const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + VmaAddDetailedStatisticsAllocation(inoutStats, suballoc.size); + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc2ndIndex; + } + // We are at the end. + else + { + // There is free space from lastOffset to freeSpace2ndTo1stEnd. + if (lastOffset < freeSpace2ndTo1stEnd) + { + const VkDeviceSize unusedRangeSize = freeSpace2ndTo1stEnd - lastOffset; + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); + } + + // End of loop. + lastOffset = freeSpace2ndTo1stEnd; + } + } + } + + size_t nextAlloc1stIndex = m_1stNullItemsBeginCount; + const VkDeviceSize freeSpace1stTo2ndEnd = + m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ? suballocations2nd.back().offset : size; + while (lastOffset < freeSpace1stTo2ndEnd) + { + // Find next non-null allocation or move nextAllocIndex to the end. + while (nextAlloc1stIndex < suballoc1stCount && + suballocations1st[nextAlloc1stIndex].userData == VMA_NULL) + { + ++nextAlloc1stIndex; + } + + // Found non-null allocation. + if (nextAlloc1stIndex < suballoc1stCount) + { + const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + VmaAddDetailedStatisticsAllocation(inoutStats, suballoc.size); + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc1stIndex; + } + // We are at the end. + else + { + // There is free space from lastOffset to freeSpace1stTo2ndEnd. + if (lastOffset < freeSpace1stTo2ndEnd) + { + const VkDeviceSize unusedRangeSize = freeSpace1stTo2ndEnd - lastOffset; + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); + } + + // End of loop. + lastOffset = freeSpace1stTo2ndEnd; + } + } + + if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + size_t nextAlloc2ndIndex = suballocations2nd.size() - 1; + while (lastOffset < size) + { + // Find next non-null allocation or move nextAllocIndex to the end. + while (nextAlloc2ndIndex != SIZE_MAX && + suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) + { + --nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex != SIZE_MAX) + { + const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + VmaAddDetailedStatisticsAllocation(inoutStats, suballoc.size); + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + --nextAlloc2ndIndex; + } + // We are at the end. + else + { + // There is free space from lastOffset to size. + if (lastOffset < size) + { + const VkDeviceSize unusedRangeSize = size - lastOffset; + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); + } + + // End of loop. + lastOffset = size; + } + } + } +} + +void VmaBlockMetadata_Linear::AddStatistics(VmaStatistics& inoutStats) const +{ + const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + const VkDeviceSize size = GetSize(); + const size_t suballoc1stCount = suballocations1st.size(); + const size_t suballoc2ndCount = suballocations2nd.size(); + + inoutStats.blockCount++; + inoutStats.blockBytes += size; + inoutStats.allocationBytes += size - m_SumFreeSize; + + VkDeviceSize lastOffset = 0; + + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + const VkDeviceSize freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset; + size_t nextAlloc2ndIndex = m_1stNullItemsBeginCount; + while (lastOffset < freeSpace2ndTo1stEnd) + { + // Find next non-null allocation or move nextAlloc2ndIndex to the end. + while (nextAlloc2ndIndex < suballoc2ndCount && + suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) + { + ++nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex < suballoc2ndCount) + { + const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + ++inoutStats.allocationCount; + + // Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc2ndIndex; + } + // We are at the end. + else + { + // End of loop. + lastOffset = freeSpace2ndTo1stEnd; + } + } + } + + size_t nextAlloc1stIndex = m_1stNullItemsBeginCount; + const VkDeviceSize freeSpace1stTo2ndEnd = + m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ? suballocations2nd.back().offset : size; + while (lastOffset < freeSpace1stTo2ndEnd) + { + // Find next non-null allocation or move nextAllocIndex to the end. + while (nextAlloc1stIndex < suballoc1stCount && + suballocations1st[nextAlloc1stIndex].userData == VMA_NULL) + { + ++nextAlloc1stIndex; + } + + // Found non-null allocation. + if (nextAlloc1stIndex < suballoc1stCount) + { + const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex]; + + // Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + ++inoutStats.allocationCount; + + // Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc1stIndex; + } + // We are at the end. + else + { + // End of loop. + lastOffset = freeSpace1stTo2ndEnd; + } + } + + if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + size_t nextAlloc2ndIndex = suballocations2nd.size() - 1; + while (lastOffset < size) + { + // Find next non-null allocation or move nextAlloc2ndIndex to the end. + while (nextAlloc2ndIndex != SIZE_MAX && + suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) + { + --nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex != SIZE_MAX) + { + const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + ++inoutStats.allocationCount; + + // Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + --nextAlloc2ndIndex; + } + // We are at the end. + else + { + // End of loop. + lastOffset = size; + } + } + } +} #if VMA_STATS_STRING_ENABLED - virtual void PrintDetailedMap(class VmaJsonWriter& json) const; +void VmaBlockMetadata_Linear::PrintDetailedMap(class VmaJsonWriter& json) const +{ + const VkDeviceSize size = GetSize(); + const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + const size_t suballoc1stCount = suballocations1st.size(); + const size_t suballoc2ndCount = suballocations2nd.size(); + + // FIRST PASS + + size_t unusedRangeCount = 0; + VkDeviceSize usedBytes = 0; + + VkDeviceSize lastOffset = 0; + + size_t alloc2ndCount = 0; + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + const VkDeviceSize freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset; + size_t nextAlloc2ndIndex = 0; + while (lastOffset < freeSpace2ndTo1stEnd) + { + // Find next non-null allocation or move nextAlloc2ndIndex to the end. + while (nextAlloc2ndIndex < suballoc2ndCount && + suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) + { + ++nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex < suballoc2ndCount) + { + const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + ++unusedRangeCount; + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + ++alloc2ndCount; + usedBytes += suballoc.size; + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc2ndIndex; + } + // We are at the end. + else + { + if (lastOffset < freeSpace2ndTo1stEnd) + { + // There is free space from lastOffset to freeSpace2ndTo1stEnd. + ++unusedRangeCount; + } + + // End of loop. + lastOffset = freeSpace2ndTo1stEnd; + } + } + } + + size_t nextAlloc1stIndex = m_1stNullItemsBeginCount; + size_t alloc1stCount = 0; + const VkDeviceSize freeSpace1stTo2ndEnd = + m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ? suballocations2nd.back().offset : size; + while (lastOffset < freeSpace1stTo2ndEnd) + { + // Find next non-null allocation or move nextAllocIndex to the end. + while (nextAlloc1stIndex < suballoc1stCount && + suballocations1st[nextAlloc1stIndex].userData == VMA_NULL) + { + ++nextAlloc1stIndex; + } + + // Found non-null allocation. + if (nextAlloc1stIndex < suballoc1stCount) + { + const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + ++unusedRangeCount; + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + ++alloc1stCount; + usedBytes += suballoc.size; + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc1stIndex; + } + // We are at the end. + else + { + if (lastOffset < freeSpace1stTo2ndEnd) + { + // There is free space from lastOffset to freeSpace1stTo2ndEnd. + ++unusedRangeCount; + } + + // End of loop. + lastOffset = freeSpace1stTo2ndEnd; + } + } + + if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + size_t nextAlloc2ndIndex = suballocations2nd.size() - 1; + while (lastOffset < size) + { + // Find next non-null allocation or move nextAlloc2ndIndex to the end. + while (nextAlloc2ndIndex != SIZE_MAX && + suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) + { + --nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex != SIZE_MAX) + { + const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + ++unusedRangeCount; + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + ++alloc2ndCount; + usedBytes += suballoc.size; + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + --nextAlloc2ndIndex; + } + // We are at the end. + else + { + if (lastOffset < size) + { + // There is free space from lastOffset to size. + ++unusedRangeCount; + } + + // End of loop. + lastOffset = size; + } + } + } + + const VkDeviceSize unusedBytes = size - usedBytes; + PrintDetailedMap_Begin(json, unusedBytes, alloc1stCount + alloc2ndCount, unusedRangeCount); + + // SECOND PASS + lastOffset = 0; + + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + const VkDeviceSize freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset; + size_t nextAlloc2ndIndex = 0; + while (lastOffset < freeSpace2ndTo1stEnd) + { + // Find next non-null allocation or move nextAlloc2ndIndex to the end. + while (nextAlloc2ndIndex < suballoc2ndCount && + suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) + { + ++nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex < suballoc2ndCount) + { + const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; + PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + PrintDetailedMap_Allocation(json, suballoc.offset, suballoc.size, suballoc.userData); + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc2ndIndex; + } + // We are at the end. + else + { + if (lastOffset < freeSpace2ndTo1stEnd) + { + // There is free space from lastOffset to freeSpace2ndTo1stEnd. + const VkDeviceSize unusedRangeSize = freeSpace2ndTo1stEnd - lastOffset; + PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); + } + + // End of loop. + lastOffset = freeSpace2ndTo1stEnd; + } + } + } + + nextAlloc1stIndex = m_1stNullItemsBeginCount; + while (lastOffset < freeSpace1stTo2ndEnd) + { + // Find next non-null allocation or move nextAllocIndex to the end. + while (nextAlloc1stIndex < suballoc1stCount && + suballocations1st[nextAlloc1stIndex].userData == VMA_NULL) + { + ++nextAlloc1stIndex; + } + + // Found non-null allocation. + if (nextAlloc1stIndex < suballoc1stCount) + { + const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; + PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + PrintDetailedMap_Allocation(json, suballoc.offset, suballoc.size, suballoc.userData); + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc1stIndex; + } + // We are at the end. + else + { + if (lastOffset < freeSpace1stTo2ndEnd) + { + // There is free space from lastOffset to freeSpace1stTo2ndEnd. + const VkDeviceSize unusedRangeSize = freeSpace1stTo2ndEnd - lastOffset; + PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); + } + + // End of loop. + lastOffset = freeSpace1stTo2ndEnd; + } + } + + if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + size_t nextAlloc2ndIndex = suballocations2nd.size() - 1; + while (lastOffset < size) + { + // Find next non-null allocation or move nextAlloc2ndIndex to the end. + while (nextAlloc2ndIndex != SIZE_MAX && + suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) + { + --nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex != SIZE_MAX) + { + const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; + PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + PrintDetailedMap_Allocation(json, suballoc.offset, suballoc.size, suballoc.userData); + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + --nextAlloc2ndIndex; + } + // We are at the end. + else + { + if (lastOffset < size) + { + // There is free space from lastOffset to size. + const VkDeviceSize unusedRangeSize = size - lastOffset; + PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); + } + + // End of loop. + lastOffset = size; + } + } + } + + PrintDetailedMap_End(json); +} +#endif // VMA_STATS_STRING_ENABLED + +bool VmaBlockMetadata_Linear::CreateAllocationRequest( + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + bool upperAddress, + VmaSuballocationType allocType, + uint32_t strategy, + VmaAllocationRequest* pAllocationRequest) +{ + VMA_ASSERT(allocSize > 0); + VMA_ASSERT(allocType != VMA_SUBALLOCATION_TYPE_FREE); + VMA_ASSERT(pAllocationRequest != VMA_NULL); + VMA_HEAVY_ASSERT(Validate()); + + if(allocSize > GetSize()) + return false; + + pAllocationRequest->size = allocSize; + return upperAddress ? + CreateAllocationRequest_UpperAddress( + allocSize, allocAlignment, allocType, strategy, pAllocationRequest) : + CreateAllocationRequest_LowerAddress( + allocSize, allocAlignment, allocType, strategy, pAllocationRequest); +} + +VkResult VmaBlockMetadata_Linear::CheckCorruption(const void* pBlockData) +{ + VMA_ASSERT(!IsVirtual()); + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + for (size_t i = m_1stNullItemsBeginCount, count = suballocations1st.size(); i < count; ++i) + { + const VmaSuballocation& suballoc = suballocations1st[i]; + if (suballoc.type != VMA_SUBALLOCATION_TYPE_FREE) + { + if (!VmaValidateMagicValue(pBlockData, suballoc.offset + suballoc.size)) + { + VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER VALIDATED ALLOCATION!"); + return VK_ERROR_UNKNOWN_COPY; + } + } + } + + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + for (size_t i = 0, count = suballocations2nd.size(); i < count; ++i) + { + const VmaSuballocation& suballoc = suballocations2nd[i]; + if (suballoc.type != VMA_SUBALLOCATION_TYPE_FREE) + { + if (!VmaValidateMagicValue(pBlockData, suballoc.offset + suballoc.size)) + { + VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER VALIDATED ALLOCATION!"); + return VK_ERROR_UNKNOWN_COPY; + } + } + } + + return VK_SUCCESS; +} + +void VmaBlockMetadata_Linear::Alloc( + const VmaAllocationRequest& request, + VmaSuballocationType type, + void* userData) +{ + const VkDeviceSize offset = (VkDeviceSize)request.allocHandle - 1; + const VmaSuballocation newSuballoc = { offset, request.size, userData, type }; + + switch (request.type) + { + case VmaAllocationRequestType::UpperAddress: + { + VMA_ASSERT(m_2ndVectorMode != SECOND_VECTOR_RING_BUFFER && + "CRITICAL ERROR: Trying to use linear allocator as double stack while it was already used as ring buffer."); + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + suballocations2nd.push_back(newSuballoc); + m_2ndVectorMode = SECOND_VECTOR_DOUBLE_STACK; + } + break; + case VmaAllocationRequestType::EndOf1st: + { + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + + VMA_ASSERT(suballocations1st.empty() || + offset >= suballocations1st.back().offset + suballocations1st.back().size); + // Check if it fits before the end of the block. + VMA_ASSERT(offset + request.size <= GetSize()); + + suballocations1st.push_back(newSuballoc); + } + break; + case VmaAllocationRequestType::EndOf2nd: + { + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + // New allocation at the end of 2-part ring buffer, so before first allocation from 1st vector. + VMA_ASSERT(!suballocations1st.empty() && + offset + request.size <= suballocations1st[m_1stNullItemsBeginCount].offset); + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + + switch (m_2ndVectorMode) + { + case SECOND_VECTOR_EMPTY: + // First allocation from second part ring buffer. + VMA_ASSERT(suballocations2nd.empty()); + m_2ndVectorMode = SECOND_VECTOR_RING_BUFFER; + break; + case SECOND_VECTOR_RING_BUFFER: + // 2-part ring buffer is already started. + VMA_ASSERT(!suballocations2nd.empty()); + break; + case SECOND_VECTOR_DOUBLE_STACK: + VMA_ASSERT(0 && "CRITICAL ERROR: Trying to use linear allocator as ring buffer while it was already used as double stack."); + break; + default: + VMA_ASSERT(0); + } + + suballocations2nd.push_back(newSuballoc); + } + break; + default: + VMA_ASSERT(0 && "CRITICAL INTERNAL ERROR."); + } + + m_SumFreeSize -= newSuballoc.size; +} + +void VmaBlockMetadata_Linear::Free(VmaAllocHandle allocHandle) +{ + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + VkDeviceSize offset = (VkDeviceSize)allocHandle - 1; + + if (!suballocations1st.empty()) + { + // First allocation: Mark it as next empty at the beginning. + VmaSuballocation& firstSuballoc = suballocations1st[m_1stNullItemsBeginCount]; + if (firstSuballoc.offset == offset) + { + firstSuballoc.type = VMA_SUBALLOCATION_TYPE_FREE; + firstSuballoc.userData = VMA_NULL; + m_SumFreeSize += firstSuballoc.size; + ++m_1stNullItemsBeginCount; + CleanupAfterFree(); + return; + } + } + + // Last allocation in 2-part ring buffer or top of upper stack (same logic). + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER || + m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + VmaSuballocation& lastSuballoc = suballocations2nd.back(); + if (lastSuballoc.offset == offset) + { + m_SumFreeSize += lastSuballoc.size; + suballocations2nd.pop_back(); + CleanupAfterFree(); + return; + } + } + // Last allocation in 1st vector. + else if (m_2ndVectorMode == SECOND_VECTOR_EMPTY) + { + VmaSuballocation& lastSuballoc = suballocations1st.back(); + if (lastSuballoc.offset == offset) + { + m_SumFreeSize += lastSuballoc.size; + suballocations1st.pop_back(); + CleanupAfterFree(); + return; + } + } + + VmaSuballocation refSuballoc; + refSuballoc.offset = offset; + // Rest of members stays uninitialized intentionally for better performance. + + // Item from the middle of 1st vector. + { + const SuballocationVectorType::iterator it = VmaBinaryFindSorted( + suballocations1st.begin() + m_1stNullItemsBeginCount, + suballocations1st.end(), + refSuballoc, + VmaSuballocationOffsetLess()); + if (it != suballocations1st.end()) + { + it->type = VMA_SUBALLOCATION_TYPE_FREE; + it->userData = VMA_NULL; + ++m_1stNullItemsMiddleCount; + m_SumFreeSize += it->size; + CleanupAfterFree(); + return; + } + } + + if (m_2ndVectorMode != SECOND_VECTOR_EMPTY) + { + // Item from the middle of 2nd vector. + const SuballocationVectorType::iterator it = m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER ? + VmaBinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, VmaSuballocationOffsetLess()) : + VmaBinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, VmaSuballocationOffsetGreater()); + if (it != suballocations2nd.end()) + { + it->type = VMA_SUBALLOCATION_TYPE_FREE; + it->userData = VMA_NULL; + ++m_2ndNullItemsCount; + m_SumFreeSize += it->size; + CleanupAfterFree(); + return; + } + } + + VMA_ASSERT(0 && "Allocation to free not found in linear allocator!"); +} + +void VmaBlockMetadata_Linear::GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) +{ + outInfo.offset = (VkDeviceSize)allocHandle - 1; + VmaSuballocation& suballoc = FindSuballocation(outInfo.offset); + outInfo.size = suballoc.size; + outInfo.pUserData = suballoc.userData; +} + +void* VmaBlockMetadata_Linear::GetAllocationUserData(VmaAllocHandle allocHandle) const +{ + return FindSuballocation((VkDeviceSize)allocHandle - 1).userData; +} + +VmaAllocHandle VmaBlockMetadata_Linear::GetAllocationListBegin() const +{ + // Function only used for defragmentation, which is disabled for this algorithm + VMA_ASSERT(0); + return VK_NULL_HANDLE; +} + +VmaAllocHandle VmaBlockMetadata_Linear::GetNextAllocation(VmaAllocHandle prevAlloc) const +{ + // Function only used for defragmentation, which is disabled for this algorithm + VMA_ASSERT(0); + return VK_NULL_HANDLE; +} + +VkDeviceSize VmaBlockMetadata_Linear::GetNextFreeRegionSize(VmaAllocHandle alloc) const +{ + // Function only used for defragmentation, which is disabled for this algorithm + VMA_ASSERT(0); + return 0; +} + +void VmaBlockMetadata_Linear::Clear() +{ + m_SumFreeSize = GetSize(); + m_Suballocations0.clear(); + m_Suballocations1.clear(); + // Leaving m_1stVectorIndex unchanged - it doesn't matter. + m_2ndVectorMode = SECOND_VECTOR_EMPTY; + m_1stNullItemsBeginCount = 0; + m_1stNullItemsMiddleCount = 0; + m_2ndNullItemsCount = 0; +} + +void VmaBlockMetadata_Linear::SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) +{ + VmaSuballocation& suballoc = FindSuballocation((VkDeviceSize)allocHandle - 1); + suballoc.userData = userData; +} + +void VmaBlockMetadata_Linear::DebugLogAllAllocations() const +{ + const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + for (auto it = suballocations1st.begin() + m_1stNullItemsBeginCount; it != suballocations1st.end(); ++it) + if (it->type != VMA_SUBALLOCATION_TYPE_FREE) + DebugLogAllocation(it->offset, it->size, it->userData); + + const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + for (auto it = suballocations2nd.begin(); it != suballocations2nd.end(); ++it) + if (it->type != VMA_SUBALLOCATION_TYPE_FREE) + DebugLogAllocation(it->offset, it->size, it->userData); +} + +VmaSuballocation& VmaBlockMetadata_Linear::FindSuballocation(VkDeviceSize offset) const +{ + const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + + VmaSuballocation refSuballoc; + refSuballoc.offset = offset; + // Rest of members stays uninitialized intentionally for better performance. + + // Item from the 1st vector. + { + SuballocationVectorType::const_iterator it = VmaBinaryFindSorted( + suballocations1st.begin() + m_1stNullItemsBeginCount, + suballocations1st.end(), + refSuballoc, + VmaSuballocationOffsetLess()); + if (it != suballocations1st.end()) + { + return const_cast(*it); + } + } + + if (m_2ndVectorMode != SECOND_VECTOR_EMPTY) + { + // Rest of members stays uninitialized intentionally for better performance. + SuballocationVectorType::const_iterator it = m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER ? + VmaBinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, VmaSuballocationOffsetLess()) : + VmaBinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, VmaSuballocationOffsetGreater()); + if (it != suballocations2nd.end()) + { + return const_cast(*it); + } + } + + VMA_ASSERT(0 && "Allocation not found in linear allocator!"); + return const_cast(suballocations1st.back()); // Should never occur. +} + +bool VmaBlockMetadata_Linear::ShouldCompact1st() const +{ + const size_t nullItemCount = m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount; + const size_t suballocCount = AccessSuballocations1st().size(); + return suballocCount > 32 && nullItemCount * 2 >= (suballocCount - nullItemCount) * 3; +} + +void VmaBlockMetadata_Linear::CleanupAfterFree() +{ + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + + if (IsEmpty()) + { + suballocations1st.clear(); + suballocations2nd.clear(); + m_1stNullItemsBeginCount = 0; + m_1stNullItemsMiddleCount = 0; + m_2ndNullItemsCount = 0; + m_2ndVectorMode = SECOND_VECTOR_EMPTY; + } + else + { + const size_t suballoc1stCount = suballocations1st.size(); + const size_t nullItem1stCount = m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount; + VMA_ASSERT(nullItem1stCount <= suballoc1stCount); + + // Find more null items at the beginning of 1st vector. + while (m_1stNullItemsBeginCount < suballoc1stCount && + suballocations1st[m_1stNullItemsBeginCount].type == VMA_SUBALLOCATION_TYPE_FREE) + { + ++m_1stNullItemsBeginCount; + --m_1stNullItemsMiddleCount; + } + + // Find more null items at the end of 1st vector. + while (m_1stNullItemsMiddleCount > 0 && + suballocations1st.back().type == VMA_SUBALLOCATION_TYPE_FREE) + { + --m_1stNullItemsMiddleCount; + suballocations1st.pop_back(); + } + + // Find more null items at the end of 2nd vector. + while (m_2ndNullItemsCount > 0 && + suballocations2nd.back().type == VMA_SUBALLOCATION_TYPE_FREE) + { + --m_2ndNullItemsCount; + suballocations2nd.pop_back(); + } + + // Find more null items at the beginning of 2nd vector. + while (m_2ndNullItemsCount > 0 && + suballocations2nd[0].type == VMA_SUBALLOCATION_TYPE_FREE) + { + --m_2ndNullItemsCount; + VmaVectorRemove(suballocations2nd, 0); + } + + if (ShouldCompact1st()) + { + const size_t nonNullItemCount = suballoc1stCount - nullItem1stCount; + size_t srcIndex = m_1stNullItemsBeginCount; + for (size_t dstIndex = 0; dstIndex < nonNullItemCount; ++dstIndex) + { + while (suballocations1st[srcIndex].type == VMA_SUBALLOCATION_TYPE_FREE) + { + ++srcIndex; + } + if (dstIndex != srcIndex) + { + suballocations1st[dstIndex] = suballocations1st[srcIndex]; + } + ++srcIndex; + } + suballocations1st.resize(nonNullItemCount); + m_1stNullItemsBeginCount = 0; + m_1stNullItemsMiddleCount = 0; + } + + // 2nd vector became empty. + if (suballocations2nd.empty()) + { + m_2ndVectorMode = SECOND_VECTOR_EMPTY; + } + + // 1st vector became empty. + if (suballocations1st.size() - m_1stNullItemsBeginCount == 0) + { + suballocations1st.clear(); + m_1stNullItemsBeginCount = 0; + + if (!suballocations2nd.empty() && m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + // Swap 1st with 2nd. Now 2nd is empty. + m_2ndVectorMode = SECOND_VECTOR_EMPTY; + m_1stNullItemsMiddleCount = m_2ndNullItemsCount; + while (m_1stNullItemsBeginCount < suballocations2nd.size() && + suballocations2nd[m_1stNullItemsBeginCount].type == VMA_SUBALLOCATION_TYPE_FREE) + { + ++m_1stNullItemsBeginCount; + --m_1stNullItemsMiddleCount; + } + m_2ndNullItemsCount = 0; + m_1stVectorIndex ^= 1; + } + } + } + + VMA_HEAVY_ASSERT(Validate()); +} + +bool VmaBlockMetadata_Linear::CreateAllocationRequest_LowerAddress( + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + VmaSuballocationType allocType, + uint32_t strategy, + VmaAllocationRequest* pAllocationRequest) +{ + const VkDeviceSize blockSize = GetSize(); + const VkDeviceSize debugMargin = GetDebugMargin(); + const VkDeviceSize bufferImageGranularity = GetBufferImageGranularity(); + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + + if (m_2ndVectorMode == SECOND_VECTOR_EMPTY || m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + // Try to allocate at the end of 1st vector. + + VkDeviceSize resultBaseOffset = 0; + if (!suballocations1st.empty()) + { + const VmaSuballocation& lastSuballoc = suballocations1st.back(); + resultBaseOffset = lastSuballoc.offset + lastSuballoc.size + debugMargin; + } + + // Start from offset equal to beginning of free space. + VkDeviceSize resultOffset = resultBaseOffset; + + // Apply alignment. + resultOffset = VmaAlignUp(resultOffset, allocAlignment); + + // Check previous suballocations for BufferImageGranularity conflicts. + // Make bigger alignment if necessary. + if (bufferImageGranularity > 1 && bufferImageGranularity != allocAlignment && !suballocations1st.empty()) + { + bool bufferImageGranularityConflict = false; + for (size_t prevSuballocIndex = suballocations1st.size(); prevSuballocIndex--; ) + { + const VmaSuballocation& prevSuballoc = suballocations1st[prevSuballocIndex]; + if (VmaBlocksOnSamePage(prevSuballoc.offset, prevSuballoc.size, resultOffset, bufferImageGranularity)) + { + if (VmaIsBufferImageGranularityConflict(prevSuballoc.type, allocType)) + { + bufferImageGranularityConflict = true; + break; + } + } + else + // Already on previous page. + break; + } + if (bufferImageGranularityConflict) + { + resultOffset = VmaAlignUp(resultOffset, bufferImageGranularity); + } + } + + const VkDeviceSize freeSpaceEnd = m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ? + suballocations2nd.back().offset : blockSize; + + // There is enough free space at the end after alignment. + if (resultOffset + allocSize + debugMargin <= freeSpaceEnd) + { + // Check next suballocations for BufferImageGranularity conflicts. + // If conflict exists, allocation cannot be made here. + if ((allocSize % bufferImageGranularity || resultOffset % bufferImageGranularity) && m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + for (size_t nextSuballocIndex = suballocations2nd.size(); nextSuballocIndex--; ) + { + const VmaSuballocation& nextSuballoc = suballocations2nd[nextSuballocIndex]; + if (VmaBlocksOnSamePage(resultOffset, allocSize, nextSuballoc.offset, bufferImageGranularity)) + { + if (VmaIsBufferImageGranularityConflict(allocType, nextSuballoc.type)) + { + return false; + } + } + else + { + // Already on previous page. + break; + } + } + } + + // All tests passed: Success. + pAllocationRequest->allocHandle = (VmaAllocHandle)(resultOffset + 1); + // pAllocationRequest->item, customData unused. + pAllocationRequest->type = VmaAllocationRequestType::EndOf1st; + return true; + } + } + + // Wrap-around to end of 2nd vector. Try to allocate there, watching for the + // beginning of 1st vector as the end of free space. + if (m_2ndVectorMode == SECOND_VECTOR_EMPTY || m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + VMA_ASSERT(!suballocations1st.empty()); + + VkDeviceSize resultBaseOffset = 0; + if (!suballocations2nd.empty()) + { + const VmaSuballocation& lastSuballoc = suballocations2nd.back(); + resultBaseOffset = lastSuballoc.offset + lastSuballoc.size + debugMargin; + } + + // Start from offset equal to beginning of free space. + VkDeviceSize resultOffset = resultBaseOffset; + + // Apply alignment. + resultOffset = VmaAlignUp(resultOffset, allocAlignment); + + // Check previous suballocations for BufferImageGranularity conflicts. + // Make bigger alignment if necessary. + if (bufferImageGranularity > 1 && bufferImageGranularity != allocAlignment && !suballocations2nd.empty()) + { + bool bufferImageGranularityConflict = false; + for (size_t prevSuballocIndex = suballocations2nd.size(); prevSuballocIndex--; ) + { + const VmaSuballocation& prevSuballoc = suballocations2nd[prevSuballocIndex]; + if (VmaBlocksOnSamePage(prevSuballoc.offset, prevSuballoc.size, resultOffset, bufferImageGranularity)) + { + if (VmaIsBufferImageGranularityConflict(prevSuballoc.type, allocType)) + { + bufferImageGranularityConflict = true; + break; + } + } + else + // Already on previous page. + break; + } + if (bufferImageGranularityConflict) + { + resultOffset = VmaAlignUp(resultOffset, bufferImageGranularity); + } + } + + size_t index1st = m_1stNullItemsBeginCount; + + // There is enough free space at the end after alignment. + if ((index1st == suballocations1st.size() && resultOffset + allocSize + debugMargin <= blockSize) || + (index1st < suballocations1st.size() && resultOffset + allocSize + debugMargin <= suballocations1st[index1st].offset)) + { + // Check next suballocations for BufferImageGranularity conflicts. + // If conflict exists, allocation cannot be made here. + if (allocSize % bufferImageGranularity || resultOffset % bufferImageGranularity) + { + for (size_t nextSuballocIndex = index1st; + nextSuballocIndex < suballocations1st.size(); + nextSuballocIndex++) + { + const VmaSuballocation& nextSuballoc = suballocations1st[nextSuballocIndex]; + if (VmaBlocksOnSamePage(resultOffset, allocSize, nextSuballoc.offset, bufferImageGranularity)) + { + if (VmaIsBufferImageGranularityConflict(allocType, nextSuballoc.type)) + { + return false; + } + } + else + { + // Already on next page. + break; + } + } + } + + // All tests passed: Success. + pAllocationRequest->allocHandle = (VmaAllocHandle)(resultOffset + 1); + pAllocationRequest->type = VmaAllocationRequestType::EndOf2nd; + // pAllocationRequest->item, customData unused. + return true; + } + } + + return false; +} + +bool VmaBlockMetadata_Linear::CreateAllocationRequest_UpperAddress( + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + VmaSuballocationType allocType, + uint32_t strategy, + VmaAllocationRequest* pAllocationRequest) +{ + const VkDeviceSize blockSize = GetSize(); + const VkDeviceSize bufferImageGranularity = GetBufferImageGranularity(); + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + VMA_ASSERT(0 && "Trying to use pool with linear algorithm as double stack, while it is already being used as ring buffer."); + return false; + } + + // Try to allocate before 2nd.back(), or end of block if 2nd.empty(). + if (allocSize > blockSize) + { + return false; + } + VkDeviceSize resultBaseOffset = blockSize - allocSize; + if (!suballocations2nd.empty()) + { + const VmaSuballocation& lastSuballoc = suballocations2nd.back(); + resultBaseOffset = lastSuballoc.offset - allocSize; + if (allocSize > lastSuballoc.offset) + { + return false; + } + } + + // Start from offset equal to end of free space. + VkDeviceSize resultOffset = resultBaseOffset; + + const VkDeviceSize debugMargin = GetDebugMargin(); + + // Apply debugMargin at the end. + if (debugMargin > 0) + { + if (resultOffset < debugMargin) + { + return false; + } + resultOffset -= debugMargin; + } + + // Apply alignment. + resultOffset = VmaAlignDown(resultOffset, allocAlignment); + + // Check next suballocations from 2nd for BufferImageGranularity conflicts. + // Make bigger alignment if necessary. + if (bufferImageGranularity > 1 && bufferImageGranularity != allocAlignment && !suballocations2nd.empty()) + { + bool bufferImageGranularityConflict = false; + for (size_t nextSuballocIndex = suballocations2nd.size(); nextSuballocIndex--; ) + { + const VmaSuballocation& nextSuballoc = suballocations2nd[nextSuballocIndex]; + if (VmaBlocksOnSamePage(resultOffset, allocSize, nextSuballoc.offset, bufferImageGranularity)) + { + if (VmaIsBufferImageGranularityConflict(nextSuballoc.type, allocType)) + { + bufferImageGranularityConflict = true; + break; + } + } + else + // Already on previous page. + break; + } + if (bufferImageGranularityConflict) + { + resultOffset = VmaAlignDown(resultOffset, bufferImageGranularity); + } + } + + // There is enough free space. + const VkDeviceSize endOf1st = !suballocations1st.empty() ? + suballocations1st.back().offset + suballocations1st.back().size : + 0; + if (endOf1st + debugMargin <= resultOffset) + { + // Check previous suballocations for BufferImageGranularity conflicts. + // If conflict exists, allocation cannot be made here. + if (bufferImageGranularity > 1) + { + for (size_t prevSuballocIndex = suballocations1st.size(); prevSuballocIndex--; ) + { + const VmaSuballocation& prevSuballoc = suballocations1st[prevSuballocIndex]; + if (VmaBlocksOnSamePage(prevSuballoc.offset, prevSuballoc.size, resultOffset, bufferImageGranularity)) + { + if (VmaIsBufferImageGranularityConflict(allocType, prevSuballoc.type)) + { + return false; + } + } + else + { + // Already on next page. + break; + } + } + } + + // All tests passed: Success. + pAllocationRequest->allocHandle = (VmaAllocHandle)(resultOffset + 1); + // pAllocationRequest->item unused. + pAllocationRequest->type = VmaAllocationRequestType::UpperAddress; + return true; + } + + return false; +} +#endif // _VMA_BLOCK_METADATA_LINEAR_FUNCTIONS +#endif // _VMA_BLOCK_METADATA_LINEAR + +#ifndef _VMA_BLOCK_METADATA_TLSF +// To not search current larger region if first allocation won't succeed and skip to smaller range +// use with VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT as strategy in CreateAllocationRequest(). +// When fragmentation and reusal of previous blocks doesn't matter then use with +// VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT for fastest alloc time possible. +class VmaBlockMetadata_TLSF : public VmaBlockMetadata +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaBlockMetadata_TLSF) +public: + VmaBlockMetadata_TLSF(const VkAllocationCallbacks* pAllocationCallbacks, + VkDeviceSize bufferImageGranularity, bool isVirtual); + virtual ~VmaBlockMetadata_TLSF(); + + size_t GetAllocationCount() const override { return m_AllocCount; } + size_t GetFreeRegionsCount() const override { return m_BlocksFreeCount + 1; } + VkDeviceSize GetSumFreeSize() const override { return m_BlocksFreeSize + m_NullBlock->size; } + bool IsEmpty() const override { return m_NullBlock->offset == 0; } + VkDeviceSize GetAllocationOffset(VmaAllocHandle allocHandle) const override { return ((Block*)allocHandle)->offset; } + + void Init(VkDeviceSize size) override; + bool Validate() const override; + + void AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const override; + void AddStatistics(VmaStatistics& inoutStats) const override; + +#if VMA_STATS_STRING_ENABLED + void PrintDetailedMap(class VmaJsonWriter& json) const override; #endif - virtual bool CreateAllocationRequest( - uint32_t currentFrameIndex, - uint32_t frameInUseCount, - VkDeviceSize bufferImageGranularity, + bool CreateAllocationRequest( VkDeviceSize allocSize, VkDeviceSize allocAlignment, bool upperAddress, VmaSuballocationType allocType, - bool canMakeOtherLost, uint32_t strategy, - VmaAllocationRequest* pAllocationRequest); + VmaAllocationRequest* pAllocationRequest) override; - virtual bool MakeRequestedAllocationsLost( - uint32_t currentFrameIndex, - uint32_t frameInUseCount, - VmaAllocationRequest* pAllocationRequest); - - virtual uint32_t MakeAllocationsLost(uint32_t currentFrameIndex, uint32_t frameInUseCount); - - virtual VkResult CheckCorruption(const void* pBlockData) { return VK_ERROR_FEATURE_NOT_PRESENT; } - - virtual void Alloc( + VkResult CheckCorruption(const void* pBlockData) override; + void Alloc( const VmaAllocationRequest& request, VmaSuballocationType type, - VkDeviceSize allocSize, - VmaAllocation hAllocation); + void* userData) override; - virtual void Free(const VmaAllocation allocation) { FreeAtOffset(allocation, allocation->GetOffset()); } - virtual void FreeAtOffset(VkDeviceSize offset) { FreeAtOffset(VMA_NULL, offset); } + void Free(VmaAllocHandle allocHandle) override; + void GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) override; + void* GetAllocationUserData(VmaAllocHandle allocHandle) const override; + VmaAllocHandle GetAllocationListBegin() const override; + VmaAllocHandle GetNextAllocation(VmaAllocHandle prevAlloc) const override; + VkDeviceSize GetNextFreeRegionSize(VmaAllocHandle alloc) const override; + void Clear() override; + void SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) override; + void DebugLogAllAllocations() const override; private: - static const VkDeviceSize MIN_NODE_SIZE = 32; - static const size_t MAX_LEVELS = 30; + // According to original paper it should be preferable 4 or 5: + // M. Masmano, I. Ripoll, A. Crespo, and J. Real "TLSF: a New Dynamic Memory Allocator for Real-Time Systems" + // http://www.gii.upv.es/tlsf/files/ecrts04_tlsf.pdf + static const uint8_t SECOND_LEVEL_INDEX = 5; + static const uint16_t SMALL_BUFFER_SIZE = 256; + static const uint32_t INITIAL_BLOCK_ALLOC_COUNT = 16; + static const uint8_t MEMORY_CLASS_SHIFT = 7; + static const uint8_t MAX_MEMORY_CLASSES = 65 - MEMORY_CLASS_SHIFT; - struct ValidationContext - { - size_t calculatedAllocationCount; - size_t calculatedFreeCount; - VkDeviceSize calculatedSumFreeSize; - - ValidationContext() : - calculatedAllocationCount(0), - calculatedFreeCount(0), - calculatedSumFreeSize(0) { } - }; - - struct Node + class Block { + public: VkDeviceSize offset; - enum TYPE - { - TYPE_FREE, - TYPE_ALLOCATION, - TYPE_SPLIT, - TYPE_COUNT - } type; - Node* parent; - Node* buddy; + VkDeviceSize size; + Block* prevPhysical; + Block* nextPhysical; + void MarkFree() { prevFree = VMA_NULL; } + void MarkTaken() { prevFree = this; } + bool IsFree() const { return prevFree != this; } + void*& UserData() { VMA_HEAVY_ASSERT(!IsFree()); return userData; } + Block*& PrevFree() { return prevFree; } + Block*& NextFree() { VMA_HEAVY_ASSERT(IsFree()); return nextFree; } + + private: + Block* prevFree; // Address of the same block here indicates that block is taken union { - struct - { - Node* prev; - Node* next; - } free; - struct - { - VmaAllocation alloc; - } allocation; - struct - { - Node* leftChild; - } split; + Block* nextFree; + void* userData; }; }; - // Size of the memory block aligned down to a power of two. - VkDeviceSize m_UsableSize; - uint32_t m_LevelCount; + size_t m_AllocCount; + // Total number of free blocks besides null block + size_t m_BlocksFreeCount; + // Total size of free blocks excluding null block + VkDeviceSize m_BlocksFreeSize; + uint32_t m_IsFreeBitmap; + uint8_t m_MemoryClasses; + uint32_t m_InnerIsFreeBitmap[MAX_MEMORY_CLASSES]; + uint32_t m_ListsCount; + /* + * 0: 0-3 lists for small buffers + * 1+: 0-(2^SLI-1) lists for normal buffers + */ + Block** m_FreeList; + VmaPoolAllocator m_BlockAllocator; + Block* m_NullBlock; + VmaBlockBufferImageGranularity m_GranularityHandler; - Node* m_Root; - struct { - Node* front; - Node* back; - } m_FreeList[MAX_LEVELS]; - // Number of nodes in the tree with type == TYPE_ALLOCATION. - size_t m_AllocationCount; - // Number of nodes in the tree with type == TYPE_FREE. - size_t m_FreeCount; - // This includes space wasted due to internal fragmentation. Doesn't include unusable size. - VkDeviceSize m_SumFreeSize; + uint8_t SizeToMemoryClass(VkDeviceSize size) const; + uint16_t SizeToSecondIndex(VkDeviceSize size, uint8_t memoryClass) const; + uint32_t GetListIndex(uint8_t memoryClass, uint16_t secondIndex) const; + uint32_t GetListIndex(VkDeviceSize size) const; - VkDeviceSize GetUnusableSize() const { return GetSize() - m_UsableSize; } - void DeleteNode(Node* node); - bool ValidateNode(ValidationContext& ctx, const Node* parent, const Node* curr, uint32_t level, VkDeviceSize levelNodeSize) const; - uint32_t AllocSizeToLevel(VkDeviceSize allocSize) const; - inline VkDeviceSize LevelToNodeSize(uint32_t level) const { return m_UsableSize >> level; } - // Alloc passed just for validation. Can be null. - void FreeAtOffset(VmaAllocation alloc, VkDeviceSize offset); - void CalcAllocationStatInfoNode(VmaStatInfo& outInfo, const Node* node, VkDeviceSize levelNodeSize) const; - // Adds node to the front of FreeList at given level. - // node->type must be FREE. - // node->free.prev, next can be undefined. - void AddToFreeListFront(uint32_t level, Node* node); - // Removes node from FreeList at given level. - // node->type must be FREE. - // node->free.prev, next stay untouched. - void RemoveFromFreeList(uint32_t level, Node* node); + void RemoveFreeBlock(Block* block); + void InsertFreeBlock(Block* block); + void MergeBlock(Block* block, Block* prev); + + Block* FindFreeBlock(VkDeviceSize size, uint32_t& listIndex) const; + bool CheckBlock( + Block& block, + uint32_t listIndex, + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + VmaSuballocationType allocType, + VmaAllocationRequest* pAllocationRequest); +}; + +#ifndef _VMA_BLOCK_METADATA_TLSF_FUNCTIONS +VmaBlockMetadata_TLSF::VmaBlockMetadata_TLSF(const VkAllocationCallbacks* pAllocationCallbacks, + VkDeviceSize bufferImageGranularity, bool isVirtual) + : VmaBlockMetadata(pAllocationCallbacks, bufferImageGranularity, isVirtual), + m_AllocCount(0), + m_BlocksFreeCount(0), + m_BlocksFreeSize(0), + m_IsFreeBitmap(0), + m_MemoryClasses(0), + m_ListsCount(0), + m_FreeList(VMA_NULL), + m_BlockAllocator(pAllocationCallbacks, INITIAL_BLOCK_ALLOC_COUNT), + m_NullBlock(VMA_NULL), + m_GranularityHandler(bufferImageGranularity) {} + +VmaBlockMetadata_TLSF::~VmaBlockMetadata_TLSF() +{ + if (m_FreeList) + vma_delete_array(GetAllocationCallbacks(), m_FreeList, m_ListsCount); + m_GranularityHandler.Destroy(GetAllocationCallbacks()); +} + +void VmaBlockMetadata_TLSF::Init(VkDeviceSize size) +{ + VmaBlockMetadata::Init(size); + + if (!IsVirtual()) + m_GranularityHandler.Init(GetAllocationCallbacks(), size); + + m_NullBlock = m_BlockAllocator.Alloc(); + m_NullBlock->size = size; + m_NullBlock->offset = 0; + m_NullBlock->prevPhysical = VMA_NULL; + m_NullBlock->nextPhysical = VMA_NULL; + m_NullBlock->MarkFree(); + m_NullBlock->NextFree() = VMA_NULL; + m_NullBlock->PrevFree() = VMA_NULL; + uint8_t memoryClass = SizeToMemoryClass(size); + uint16_t sli = SizeToSecondIndex(size, memoryClass); + m_ListsCount = (memoryClass == 0 ? 0 : (memoryClass - 1) * (1UL << SECOND_LEVEL_INDEX) + sli) + 1; + if (IsVirtual()) + m_ListsCount += 1UL << SECOND_LEVEL_INDEX; + else + m_ListsCount += 4; + + m_MemoryClasses = memoryClass + uint8_t(2); + memset(m_InnerIsFreeBitmap, 0, MAX_MEMORY_CLASSES * sizeof(uint32_t)); + + m_FreeList = vma_new_array(GetAllocationCallbacks(), Block*, m_ListsCount); + memset(m_FreeList, 0, m_ListsCount * sizeof(Block*)); +} + +bool VmaBlockMetadata_TLSF::Validate() const +{ + VMA_VALIDATE(GetSumFreeSize() <= GetSize()); + + VkDeviceSize calculatedSize = m_NullBlock->size; + VkDeviceSize calculatedFreeSize = m_NullBlock->size; + size_t allocCount = 0; + size_t freeCount = 0; + + // Check integrity of free lists + for (uint32_t list = 0; list < m_ListsCount; ++list) + { + Block* block = m_FreeList[list]; + if (block != VMA_NULL) + { + VMA_VALIDATE(block->IsFree()); + VMA_VALIDATE(block->PrevFree() == VMA_NULL); + while (block->NextFree()) + { + VMA_VALIDATE(block->NextFree()->IsFree()); + VMA_VALIDATE(block->NextFree()->PrevFree() == block); + block = block->NextFree(); + } + } + } + + VkDeviceSize nextOffset = m_NullBlock->offset; + auto validateCtx = m_GranularityHandler.StartValidation(GetAllocationCallbacks(), IsVirtual()); + + VMA_VALIDATE(m_NullBlock->nextPhysical == VMA_NULL); + if (m_NullBlock->prevPhysical) + { + VMA_VALIDATE(m_NullBlock->prevPhysical->nextPhysical == m_NullBlock); + } + // Check all blocks + for (Block* prev = m_NullBlock->prevPhysical; prev != VMA_NULL; prev = prev->prevPhysical) + { + VMA_VALIDATE(prev->offset + prev->size == nextOffset); + nextOffset = prev->offset; + calculatedSize += prev->size; + + uint32_t listIndex = GetListIndex(prev->size); + if (prev->IsFree()) + { + ++freeCount; + // Check if free block belongs to free list + Block* freeBlock = m_FreeList[listIndex]; + VMA_VALIDATE(freeBlock != VMA_NULL); + + bool found = false; + do + { + if (freeBlock == prev) + found = true; + + freeBlock = freeBlock->NextFree(); + } while (!found && freeBlock != VMA_NULL); + + VMA_VALIDATE(found); + calculatedFreeSize += prev->size; + } + else + { + ++allocCount; + // Check if taken block is not on a free list + Block* freeBlock = m_FreeList[listIndex]; + while (freeBlock) + { + VMA_VALIDATE(freeBlock != prev); + freeBlock = freeBlock->NextFree(); + } + + if (!IsVirtual()) + { + VMA_VALIDATE(m_GranularityHandler.Validate(validateCtx, prev->offset, prev->size)); + } + } + + if (prev->prevPhysical) + { + VMA_VALIDATE(prev->prevPhysical->nextPhysical == prev); + } + } + + if (!IsVirtual()) + { + VMA_VALIDATE(m_GranularityHandler.FinishValidation(validateCtx)); + } + + VMA_VALIDATE(nextOffset == 0); + VMA_VALIDATE(calculatedSize == GetSize()); + VMA_VALIDATE(calculatedFreeSize == GetSumFreeSize()); + VMA_VALIDATE(allocCount == m_AllocCount); + VMA_VALIDATE(freeCount == m_BlocksFreeCount); + + return true; +} + +void VmaBlockMetadata_TLSF::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const +{ + inoutStats.statistics.blockCount++; + inoutStats.statistics.blockBytes += GetSize(); + if (m_NullBlock->size > 0) + VmaAddDetailedStatisticsUnusedRange(inoutStats, m_NullBlock->size); + + for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical) + { + if (block->IsFree()) + VmaAddDetailedStatisticsUnusedRange(inoutStats, block->size); + else + VmaAddDetailedStatisticsAllocation(inoutStats, block->size); + } +} + +void VmaBlockMetadata_TLSF::AddStatistics(VmaStatistics& inoutStats) const +{ + inoutStats.blockCount++; + inoutStats.allocationCount += (uint32_t)m_AllocCount; + inoutStats.blockBytes += GetSize(); + inoutStats.allocationBytes += GetSize() - GetSumFreeSize(); +} #if VMA_STATS_STRING_ENABLED - void PrintDetailedMapNode(class VmaJsonWriter& json, const Node* node, VkDeviceSize levelNodeSize) const; +void VmaBlockMetadata_TLSF::PrintDetailedMap(class VmaJsonWriter& json) const +{ + size_t blockCount = m_AllocCount + m_BlocksFreeCount; + VmaStlAllocator allocator(GetAllocationCallbacks()); + VmaVector> blockList(blockCount, allocator); + + size_t i = blockCount; + for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical) + { + blockList[--i] = block; + } + VMA_ASSERT(i == 0); + + VmaDetailedStatistics stats; + VmaClearDetailedStatistics(stats); + AddDetailedStatistics(stats); + + PrintDetailedMap_Begin(json, + stats.statistics.blockBytes - stats.statistics.allocationBytes, + stats.statistics.allocationCount, + stats.unusedRangeCount); + + for (; i < blockCount; ++i) + { + Block* block = blockList[i]; + if (block->IsFree()) + PrintDetailedMap_UnusedRange(json, block->offset, block->size); + else + PrintDetailedMap_Allocation(json, block->offset, block->size, block->UserData()); + } + if (m_NullBlock->size > 0) + PrintDetailedMap_UnusedRange(json, m_NullBlock->offset, m_NullBlock->size); + + PrintDetailedMap_End(json); +} #endif -}; -/* -Represents a single block of device memory (`VkDeviceMemory`) with all the -data about its regions (aka suballocations, #VmaAllocation), assigned and free. - -Thread-safety: This class must be externally synchronized. -*/ -class VmaDeviceMemoryBlock +bool VmaBlockMetadata_TLSF::CreateAllocationRequest( + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + bool upperAddress, + VmaSuballocationType allocType, + uint32_t strategy, + VmaAllocationRequest* pAllocationRequest) { - VMA_CLASS_NO_COPY(VmaDeviceMemoryBlock) -public: - VmaBlockMetadata* m_pMetadata; + VMA_ASSERT(allocSize > 0 && "Cannot allocate empty block!"); + VMA_ASSERT(!upperAddress && "VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT can be used only with linear algorithm."); - VmaDeviceMemoryBlock(VmaAllocator hAllocator); + // For small granularity round up + if (!IsVirtual()) + m_GranularityHandler.RoundupAllocRequest(allocType, allocSize, allocAlignment); - ~VmaDeviceMemoryBlock() + allocSize += GetDebugMargin(); + // Quick check for too small pool + if (allocSize > GetSumFreeSize()) + return false; + + // If no free blocks in pool then check only null block + if (m_BlocksFreeCount == 0) + return CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest); + + // Round up to the next block + VkDeviceSize sizeForNextList = allocSize; + VkDeviceSize smallSizeStep = VkDeviceSize(SMALL_BUFFER_SIZE / (IsVirtual() ? 1 << SECOND_LEVEL_INDEX : 4)); + if (allocSize > SMALL_BUFFER_SIZE) { - VMA_ASSERT(m_MapCount == 0 && "VkDeviceMemory block is being destroyed while it is still mapped."); - VMA_ASSERT(m_hMemory == VK_NULL_HANDLE); + sizeForNextList += (1ULL << (VMA_BITSCAN_MSB(allocSize) - SECOND_LEVEL_INDEX)); + } + else if (allocSize > SMALL_BUFFER_SIZE - smallSizeStep) + sizeForNextList = SMALL_BUFFER_SIZE + 1; + else + sizeForNextList += smallSizeStep; + + uint32_t nextListIndex = m_ListsCount; + uint32_t prevListIndex = m_ListsCount; + Block* nextListBlock = VMA_NULL; + Block* prevListBlock = VMA_NULL; + + // Check blocks according to strategies + if (strategy & VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT) + { + // Quick check for larger block first + nextListBlock = FindFreeBlock(sizeForNextList, nextListIndex); + if (nextListBlock != VMA_NULL && CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + + // If not fitted then null block + if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + + // Null block failed, search larger bucket + while (nextListBlock) + { + if (CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + nextListBlock = nextListBlock->NextFree(); + } + + // Failed again, check best fit bucket + prevListBlock = FindFreeBlock(allocSize, prevListIndex); + while (prevListBlock) + { + if (CheckBlock(*prevListBlock, prevListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + prevListBlock = prevListBlock->NextFree(); + } + } + else if (strategy & VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT) + { + // Check best fit bucket + prevListBlock = FindFreeBlock(allocSize, prevListIndex); + while (prevListBlock) + { + if (CheckBlock(*prevListBlock, prevListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + prevListBlock = prevListBlock->NextFree(); + } + + // If failed check null block + if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + + // Check larger bucket + nextListBlock = FindFreeBlock(sizeForNextList, nextListIndex); + while (nextListBlock) + { + if (CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + nextListBlock = nextListBlock->NextFree(); + } + } + else if (strategy & VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT ) + { + // Perform search from the start + VmaStlAllocator allocator(GetAllocationCallbacks()); + VmaVector> blockList(m_BlocksFreeCount, allocator); + + size_t i = m_BlocksFreeCount; + for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical) + { + if (block->IsFree() && block->size >= allocSize) + blockList[--i] = block; + } + + for (; i < m_BlocksFreeCount; ++i) + { + Block& block = *blockList[i]; + if (CheckBlock(block, GetListIndex(block.size), allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + } + + // If failed check null block + if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + + // Whole range searched, no more memory + return false; + } + else + { + // Check larger bucket + nextListBlock = FindFreeBlock(sizeForNextList, nextListIndex); + while (nextListBlock) + { + if (CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + nextListBlock = nextListBlock->NextFree(); + } + + // If failed check null block + if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + + // Check best fit bucket + prevListBlock = FindFreeBlock(allocSize, prevListIndex); + while (prevListBlock) + { + if (CheckBlock(*prevListBlock, prevListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + prevListBlock = prevListBlock->NextFree(); + } } - // Always call after construction. - void Init( - VmaAllocator hAllocator, - VmaPool hParentPool, - uint32_t newMemoryTypeIndex, - VkDeviceMemory newMemory, - VkDeviceSize newSize, - uint32_t id, - uint32_t algorithm); - // Always call before destruction. - void Destroy(VmaAllocator allocator); - - VmaPool GetParentPool() const { return m_hParentPool; } - VkDeviceMemory GetDeviceMemory() const { return m_hMemory; } - uint32_t GetMemoryTypeIndex() const { return m_MemoryTypeIndex; } - uint32_t GetId() const { return m_Id; } - void* GetMappedData() const { return m_pMappedData; } - - // Validates all data structures inside this object. If not valid, returns false. - bool Validate() const; - - VkResult CheckCorruption(VmaAllocator hAllocator); - - // ppData can be null. - VkResult Map(VmaAllocator hAllocator, uint32_t count, void** ppData); - void Unmap(VmaAllocator hAllocator, uint32_t count); - - VkResult WriteMagicValueAroundAllocation(VmaAllocator hAllocator, VkDeviceSize allocOffset, VkDeviceSize allocSize); - VkResult ValidateMagicValueAroundAllocation(VmaAllocator hAllocator, VkDeviceSize allocOffset, VkDeviceSize allocSize); - - VkResult BindBufferMemory( - const VmaAllocator hAllocator, - const VmaAllocation hAllocation, - VkDeviceSize allocationLocalOffset, - VkBuffer hBuffer, - const void* pNext); - VkResult BindImageMemory( - const VmaAllocator hAllocator, - const VmaAllocation hAllocation, - VkDeviceSize allocationLocalOffset, - VkImage hImage, - const void* pNext); - -private: - VmaPool m_hParentPool; // VK_NULL_HANDLE if not belongs to custom pool. - uint32_t m_MemoryTypeIndex; - uint32_t m_Id; - VkDeviceMemory m_hMemory; - - /* - Protects access to m_hMemory so it's not used by multiple threads simultaneously, e.g. vkMapMemory, vkBindBufferMemory. - Also protects m_MapCount, m_pMappedData. - Allocations, deallocations, any change in m_pMetadata is protected by parent's VmaBlockVector::m_Mutex. - */ - VMA_MUTEX m_Mutex; - uint32_t m_MapCount; - void* m_pMappedData; -}; - -struct VmaPointerLess -{ - bool operator()(const void* lhs, const void* rhs) const + // Worst case, full search has to be done + while (++nextListIndex < m_ListsCount) { - return lhs < rhs; + nextListBlock = m_FreeList[nextListIndex]; + while (nextListBlock) + { + if (CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + nextListBlock = nextListBlock->NextFree(); + } } -}; -struct VmaDefragmentationMove + // No more memory sadly + return false; +} + +VkResult VmaBlockMetadata_TLSF::CheckCorruption(const void* pBlockData) { - size_t srcBlockIndex; - size_t dstBlockIndex; - VkDeviceSize srcOffset; - VkDeviceSize dstOffset; - VkDeviceSize size; - VmaAllocation hAllocation; - VmaDeviceMemoryBlock* pSrcBlock; - VmaDeviceMemoryBlock* pDstBlock; -}; + for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical) + { + if (!block->IsFree()) + { + if (!VmaValidateMagicValue(pBlockData, block->offset + block->size)) + { + VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER VALIDATED ALLOCATION!"); + return VK_ERROR_UNKNOWN_COPY; + } + } + } -class VmaDefragmentationAlgorithm; + return VK_SUCCESS; +} +void VmaBlockMetadata_TLSF::Alloc( + const VmaAllocationRequest& request, + VmaSuballocationType type, + void* userData) +{ + VMA_ASSERT(request.type == VmaAllocationRequestType::TLSF); + + // Get block and pop it from the free list + Block* currentBlock = (Block*)request.allocHandle; + VkDeviceSize offset = request.algorithmData; + VMA_ASSERT(currentBlock != VMA_NULL); + VMA_ASSERT(currentBlock->offset <= offset); + + if (currentBlock != m_NullBlock) + RemoveFreeBlock(currentBlock); + + VkDeviceSize debugMargin = GetDebugMargin(); + VkDeviceSize misssingAlignment = offset - currentBlock->offset; + + // Append missing alignment to prev block or create new one + if (misssingAlignment) + { + Block* prevBlock = currentBlock->prevPhysical; + VMA_ASSERT(prevBlock != VMA_NULL && "There should be no missing alignment at offset 0!"); + + if (prevBlock->IsFree() && prevBlock->size != debugMargin) + { + uint32_t oldList = GetListIndex(prevBlock->size); + prevBlock->size += misssingAlignment; + // Check if new size crosses list bucket + if (oldList != GetListIndex(prevBlock->size)) + { + prevBlock->size -= misssingAlignment; + RemoveFreeBlock(prevBlock); + prevBlock->size += misssingAlignment; + InsertFreeBlock(prevBlock); + } + else + m_BlocksFreeSize += misssingAlignment; + } + else + { + Block* newBlock = m_BlockAllocator.Alloc(); + currentBlock->prevPhysical = newBlock; + prevBlock->nextPhysical = newBlock; + newBlock->prevPhysical = prevBlock; + newBlock->nextPhysical = currentBlock; + newBlock->size = misssingAlignment; + newBlock->offset = currentBlock->offset; + newBlock->MarkTaken(); + + InsertFreeBlock(newBlock); + } + + currentBlock->size -= misssingAlignment; + currentBlock->offset += misssingAlignment; + } + + VkDeviceSize size = request.size + debugMargin; + if (currentBlock->size == size) + { + if (currentBlock == m_NullBlock) + { + // Setup new null block + m_NullBlock = m_BlockAllocator.Alloc(); + m_NullBlock->size = 0; + m_NullBlock->offset = currentBlock->offset + size; + m_NullBlock->prevPhysical = currentBlock; + m_NullBlock->nextPhysical = VMA_NULL; + m_NullBlock->MarkFree(); + m_NullBlock->PrevFree() = VMA_NULL; + m_NullBlock->NextFree() = VMA_NULL; + currentBlock->nextPhysical = m_NullBlock; + currentBlock->MarkTaken(); + } + } + else + { + VMA_ASSERT(currentBlock->size > size && "Proper block already found, shouldn't find smaller one!"); + + // Create new free block + Block* newBlock = m_BlockAllocator.Alloc(); + newBlock->size = currentBlock->size - size; + newBlock->offset = currentBlock->offset + size; + newBlock->prevPhysical = currentBlock; + newBlock->nextPhysical = currentBlock->nextPhysical; + currentBlock->nextPhysical = newBlock; + currentBlock->size = size; + + if (currentBlock == m_NullBlock) + { + m_NullBlock = newBlock; + m_NullBlock->MarkFree(); + m_NullBlock->NextFree() = VMA_NULL; + m_NullBlock->PrevFree() = VMA_NULL; + currentBlock->MarkTaken(); + } + else + { + newBlock->nextPhysical->prevPhysical = newBlock; + newBlock->MarkTaken(); + InsertFreeBlock(newBlock); + } + } + currentBlock->UserData() = userData; + + if (debugMargin > 0) + { + currentBlock->size -= debugMargin; + Block* newBlock = m_BlockAllocator.Alloc(); + newBlock->size = debugMargin; + newBlock->offset = currentBlock->offset + currentBlock->size; + newBlock->prevPhysical = currentBlock; + newBlock->nextPhysical = currentBlock->nextPhysical; + newBlock->MarkTaken(); + currentBlock->nextPhysical->prevPhysical = newBlock; + currentBlock->nextPhysical = newBlock; + InsertFreeBlock(newBlock); + } + + if (!IsVirtual()) + m_GranularityHandler.AllocPages((uint8_t)(uintptr_t)request.customData, + currentBlock->offset, currentBlock->size); + ++m_AllocCount; +} + +void VmaBlockMetadata_TLSF::Free(VmaAllocHandle allocHandle) +{ + Block* block = (Block*)allocHandle; + Block* next = block->nextPhysical; + VMA_ASSERT(!block->IsFree() && "Block is already free!"); + + if (!IsVirtual()) + m_GranularityHandler.FreePages(block->offset, block->size); + --m_AllocCount; + + VkDeviceSize debugMargin = GetDebugMargin(); + if (debugMargin > 0) + { + RemoveFreeBlock(next); + MergeBlock(next, block); + block = next; + next = next->nextPhysical; + } + + // Try merging + Block* prev = block->prevPhysical; + if (prev != VMA_NULL && prev->IsFree() && prev->size != debugMargin) + { + RemoveFreeBlock(prev); + MergeBlock(block, prev); + } + + if (!next->IsFree()) + InsertFreeBlock(block); + else if (next == m_NullBlock) + MergeBlock(m_NullBlock, block); + else + { + RemoveFreeBlock(next); + MergeBlock(next, block); + InsertFreeBlock(next); + } +} + +void VmaBlockMetadata_TLSF::GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) +{ + Block* block = (Block*)allocHandle; + VMA_ASSERT(!block->IsFree() && "Cannot get allocation info for free block!"); + outInfo.offset = block->offset; + outInfo.size = block->size; + outInfo.pUserData = block->UserData(); +} + +void* VmaBlockMetadata_TLSF::GetAllocationUserData(VmaAllocHandle allocHandle) const +{ + Block* block = (Block*)allocHandle; + VMA_ASSERT(!block->IsFree() && "Cannot get user data for free block!"); + return block->UserData(); +} + +VmaAllocHandle VmaBlockMetadata_TLSF::GetAllocationListBegin() const +{ + if (m_AllocCount == 0) + return VK_NULL_HANDLE; + + for (Block* block = m_NullBlock->prevPhysical; block; block = block->prevPhysical) + { + if (!block->IsFree()) + return (VmaAllocHandle)block; + } + VMA_ASSERT(false && "If m_AllocCount > 0 then should find any allocation!"); + return VK_NULL_HANDLE; +} + +VmaAllocHandle VmaBlockMetadata_TLSF::GetNextAllocation(VmaAllocHandle prevAlloc) const +{ + Block* startBlock = (Block*)prevAlloc; + VMA_ASSERT(!startBlock->IsFree() && "Incorrect block!"); + + for (Block* block = startBlock->prevPhysical; block; block = block->prevPhysical) + { + if (!block->IsFree()) + return (VmaAllocHandle)block; + } + return VK_NULL_HANDLE; +} + +VkDeviceSize VmaBlockMetadata_TLSF::GetNextFreeRegionSize(VmaAllocHandle alloc) const +{ + Block* block = (Block*)alloc; + VMA_ASSERT(!block->IsFree() && "Incorrect block!"); + + if (block->prevPhysical) + return block->prevPhysical->IsFree() ? block->prevPhysical->size : 0; + return 0; +} + +void VmaBlockMetadata_TLSF::Clear() +{ + m_AllocCount = 0; + m_BlocksFreeCount = 0; + m_BlocksFreeSize = 0; + m_IsFreeBitmap = 0; + m_NullBlock->offset = 0; + m_NullBlock->size = GetSize(); + Block* block = m_NullBlock->prevPhysical; + m_NullBlock->prevPhysical = VMA_NULL; + while (block) + { + Block* prev = block->prevPhysical; + m_BlockAllocator.Free(block); + block = prev; + } + memset(m_FreeList, 0, m_ListsCount * sizeof(Block*)); + memset(m_InnerIsFreeBitmap, 0, m_MemoryClasses * sizeof(uint32_t)); + m_GranularityHandler.Clear(); +} + +void VmaBlockMetadata_TLSF::SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) +{ + Block* block = (Block*)allocHandle; + VMA_ASSERT(!block->IsFree() && "Trying to set user data for not allocated block!"); + block->UserData() = userData; +} + +void VmaBlockMetadata_TLSF::DebugLogAllAllocations() const +{ + for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical) + if (!block->IsFree()) + DebugLogAllocation(block->offset, block->size, block->UserData()); +} + +uint8_t VmaBlockMetadata_TLSF::SizeToMemoryClass(VkDeviceSize size) const +{ + if (size > SMALL_BUFFER_SIZE) + return uint8_t(VMA_BITSCAN_MSB(size) - MEMORY_CLASS_SHIFT); + return 0; +} + +uint16_t VmaBlockMetadata_TLSF::SizeToSecondIndex(VkDeviceSize size, uint8_t memoryClass) const +{ + if (memoryClass == 0) + { + if (IsVirtual()) + return static_cast((size - 1) / 8); + else + return static_cast((size - 1) / 64); + } + return static_cast((size >> (memoryClass + MEMORY_CLASS_SHIFT - SECOND_LEVEL_INDEX)) ^ (1U << SECOND_LEVEL_INDEX)); +} + +uint32_t VmaBlockMetadata_TLSF::GetListIndex(uint8_t memoryClass, uint16_t secondIndex) const +{ + if (memoryClass == 0) + return secondIndex; + + const uint32_t index = static_cast(memoryClass - 1) * (1 << SECOND_LEVEL_INDEX) + secondIndex; + if (IsVirtual()) + return index + (1 << SECOND_LEVEL_INDEX); + else + return index + 4; +} + +uint32_t VmaBlockMetadata_TLSF::GetListIndex(VkDeviceSize size) const +{ + uint8_t memoryClass = SizeToMemoryClass(size); + return GetListIndex(memoryClass, SizeToSecondIndex(size, memoryClass)); +} + +void VmaBlockMetadata_TLSF::RemoveFreeBlock(Block* block) +{ + VMA_ASSERT(block != m_NullBlock); + VMA_ASSERT(block->IsFree()); + + if (block->NextFree() != VMA_NULL) + block->NextFree()->PrevFree() = block->PrevFree(); + if (block->PrevFree() != VMA_NULL) + block->PrevFree()->NextFree() = block->NextFree(); + else + { + uint8_t memClass = SizeToMemoryClass(block->size); + uint16_t secondIndex = SizeToSecondIndex(block->size, memClass); + uint32_t index = GetListIndex(memClass, secondIndex); + VMA_ASSERT(m_FreeList[index] == block); + m_FreeList[index] = block->NextFree(); + if (block->NextFree() == VMA_NULL) + { + m_InnerIsFreeBitmap[memClass] &= ~(1U << secondIndex); + if (m_InnerIsFreeBitmap[memClass] == 0) + m_IsFreeBitmap &= ~(1UL << memClass); + } + } + block->MarkTaken(); + block->UserData() = VMA_NULL; + --m_BlocksFreeCount; + m_BlocksFreeSize -= block->size; +} + +void VmaBlockMetadata_TLSF::InsertFreeBlock(Block* block) +{ + VMA_ASSERT(block != m_NullBlock); + VMA_ASSERT(!block->IsFree() && "Cannot insert block twice!"); + + uint8_t memClass = SizeToMemoryClass(block->size); + uint16_t secondIndex = SizeToSecondIndex(block->size, memClass); + uint32_t index = GetListIndex(memClass, secondIndex); + VMA_ASSERT(index < m_ListsCount); + block->PrevFree() = VMA_NULL; + block->NextFree() = m_FreeList[index]; + m_FreeList[index] = block; + if (block->NextFree() != VMA_NULL) + block->NextFree()->PrevFree() = block; + else + { + m_InnerIsFreeBitmap[memClass] |= 1U << secondIndex; + m_IsFreeBitmap |= 1UL << memClass; + } + ++m_BlocksFreeCount; + m_BlocksFreeSize += block->size; +} + +void VmaBlockMetadata_TLSF::MergeBlock(Block* block, Block* prev) +{ + VMA_ASSERT(block->prevPhysical == prev && "Cannot merge separate physical regions!"); + VMA_ASSERT(!prev->IsFree() && "Cannot merge block that belongs to free list!"); + + block->offset = prev->offset; + block->size += prev->size; + block->prevPhysical = prev->prevPhysical; + if (block->prevPhysical) + block->prevPhysical->nextPhysical = block; + m_BlockAllocator.Free(prev); +} + +VmaBlockMetadata_TLSF::Block* VmaBlockMetadata_TLSF::FindFreeBlock(VkDeviceSize size, uint32_t& listIndex) const +{ + uint8_t memoryClass = SizeToMemoryClass(size); + uint32_t innerFreeMap = m_InnerIsFreeBitmap[memoryClass] & (~0U << SizeToSecondIndex(size, memoryClass)); + if (!innerFreeMap) + { + // Check higher levels for available blocks + uint32_t freeMap = m_IsFreeBitmap & (~0UL << (memoryClass + 1)); + if (!freeMap) + return VMA_NULL; // No more memory available + + // Find lowest free region + memoryClass = VMA_BITSCAN_LSB(freeMap); + innerFreeMap = m_InnerIsFreeBitmap[memoryClass]; + VMA_ASSERT(innerFreeMap != 0); + } + // Find lowest free subregion + listIndex = GetListIndex(memoryClass, VMA_BITSCAN_LSB(innerFreeMap)); + VMA_ASSERT(m_FreeList[listIndex]); + return m_FreeList[listIndex]; +} + +bool VmaBlockMetadata_TLSF::CheckBlock( + Block& block, + uint32_t listIndex, + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + VmaSuballocationType allocType, + VmaAllocationRequest* pAllocationRequest) +{ + VMA_ASSERT(block.IsFree() && "Block is already taken!"); + + VkDeviceSize alignedOffset = VmaAlignUp(block.offset, allocAlignment); + if (block.size < allocSize + alignedOffset - block.offset) + return false; + + // Check for granularity conflicts + if (!IsVirtual() && + m_GranularityHandler.CheckConflictAndAlignUp(alignedOffset, allocSize, block.offset, block.size, allocType)) + return false; + + // Alloc successful + pAllocationRequest->type = VmaAllocationRequestType::TLSF; + pAllocationRequest->allocHandle = (VmaAllocHandle)█ + pAllocationRequest->size = allocSize - GetDebugMargin(); + pAllocationRequest->customData = (void*)allocType; + pAllocationRequest->algorithmData = alignedOffset; + + // Place block at the start of list if it's normal block + if (listIndex != m_ListsCount && block.PrevFree()) + { + block.PrevFree()->NextFree() = block.NextFree(); + if (block.NextFree()) + block.NextFree()->PrevFree() = block.PrevFree(); + block.PrevFree() = VMA_NULL; + block.NextFree() = m_FreeList[listIndex]; + m_FreeList[listIndex] = █ + if (block.NextFree()) + block.NextFree()->PrevFree() = █ + } + + return true; +} +#endif // _VMA_BLOCK_METADATA_TLSF_FUNCTIONS +#endif // _VMA_BLOCK_METADATA_TLSF + +#ifndef _VMA_BLOCK_VECTOR /* Sequence of VmaDeviceMemoryBlock. Represents memory blocks allocated for a specific Vulkan memory type. Synchronized internally with a mutex. */ -struct VmaBlockVector +class VmaBlockVector { - VMA_CLASS_NO_COPY(VmaBlockVector) + friend struct VmaDefragmentationContext_T; + VMA_CLASS_NO_COPY_NO_MOVE(VmaBlockVector) public: VmaBlockVector( VmaAllocator hAllocator, @@ -7066,30 +9581,36 @@ public: size_t minBlockCount, size_t maxBlockCount, VkDeviceSize bufferImageGranularity, - uint32_t frameInUseCount, bool explicitBlockSize, uint32_t algorithm, - float priority); + float priority, + VkDeviceSize minAllocationAlignment, + void* pMemoryAllocateNext); ~VmaBlockVector(); - VkResult CreateMinBlocks(); - VmaAllocator GetAllocator() const { return m_hAllocator; } VmaPool GetParentPool() const { return m_hParentPool; } bool IsCustomPool() const { return m_hParentPool != VMA_NULL; } uint32_t GetMemoryTypeIndex() const { return m_MemoryTypeIndex; } VkDeviceSize GetPreferredBlockSize() const { return m_PreferredBlockSize; } VkDeviceSize GetBufferImageGranularity() const { return m_BufferImageGranularity; } - uint32_t GetFrameInUseCount() const { return m_FrameInUseCount; } uint32_t GetAlgorithm() const { return m_Algorithm; } + bool HasExplicitBlockSize() const { return m_ExplicitBlockSize; } + float GetPriority() const { return m_Priority; } + const void* GetAllocationNextPtr() const { return m_pMemoryAllocateNext; } + // To be used only while the m_Mutex is locked. Used during defragmentation. + size_t GetBlockCount() const { return m_Blocks.size(); } + // To be used only while the m_Mutex is locked. Used during defragmentation. + VmaDeviceMemoryBlock* GetBlock(size_t index) const { return m_Blocks[index]; } + VMA_RW_MUTEX &GetMutex() { return m_Mutex; } - void GetPoolStats(VmaPoolStats* pStats); - + VkResult CreateMinBlocks(); + void AddStatistics(VmaStatistics& inoutStats); + void AddDetailedStatistics(VmaDetailedStatistics& inoutStats); bool IsEmpty(); bool IsCorruptionDetectionEnabled() const; VkResult Allocate( - uint32_t currentFrameIndex, VkDeviceSize size, VkDeviceSize alignment, const VmaAllocationCreateInfo& createInfo, @@ -7099,49 +9620,13 @@ public: void Free(const VmaAllocation hAllocation); - // Adds statistics of this BlockVector to pStats. - void AddStats(VmaStats* pStats); - #if VMA_STATS_STRING_ENABLED void PrintDetailedMap(class VmaJsonWriter& json); #endif - void MakePoolAllocationsLost( - uint32_t currentFrameIndex, - size_t* pLostAllocationCount); VkResult CheckCorruption(); - // Saves results in pCtx->res. - void Defragment( - class VmaBlockVectorDefragmentationContext* pCtx, - VmaDefragmentationStats* pStats, VmaDefragmentationFlags flags, - VkDeviceSize& maxCpuBytesToMove, uint32_t& maxCpuAllocationsToMove, - VkDeviceSize& maxGpuBytesToMove, uint32_t& maxGpuAllocationsToMove, - VkCommandBuffer commandBuffer); - void DefragmentationEnd( - class VmaBlockVectorDefragmentationContext* pCtx, - uint32_t flags, - VmaDefragmentationStats* pStats); - - uint32_t ProcessDefragmentations( - class VmaBlockVectorDefragmentationContext *pCtx, - VmaDefragmentationPassMoveInfo* pMove, uint32_t maxMoves); - - void CommitDefragmentations( - class VmaBlockVectorDefragmentationContext *pCtx, - VmaDefragmentationStats* pStats); - - //////////////////////////////////////////////////////////////////////////////// - // To be used only while the m_Mutex is locked. Used during defragmentation. - - size_t GetBlockCount() const { return m_Blocks.size(); } - VmaDeviceMemoryBlock* GetBlock(size_t index) const { return m_Blocks[index]; } - size_t CalcAllocationCount() const; - bool IsBufferImageGranularityConflictPossible() const; - private: - friend class VmaDefragmentationAlgorithm_Generic; - const VmaAllocator m_hAllocator; const VmaPool m_hParentPool; const uint32_t m_MemoryTypeIndex; @@ -7149,40 +9634,37 @@ private: const size_t m_MinBlockCount; const size_t m_MaxBlockCount; const VkDeviceSize m_BufferImageGranularity; - const uint32_t m_FrameInUseCount; const bool m_ExplicitBlockSize; const uint32_t m_Algorithm; const float m_Priority; - VMA_RW_MUTEX m_Mutex; + const VkDeviceSize m_MinAllocationAlignment; - /* There can be at most one allocation that is completely empty (except when minBlockCount > 0) - - a hysteresis to avoid pessimistic case of alternating creation and destruction of a VkDeviceMemory. */ - bool m_HasEmptyBlock; + void* const m_pMemoryAllocateNext; + VMA_RW_MUTEX m_Mutex; // Incrementally sorted by sumFreeSize, ascending. - VmaVector< VmaDeviceMemoryBlock*, VmaStlAllocator > m_Blocks; + VmaVector> m_Blocks; uint32_t m_NextBlockId; + bool m_IncrementalSort = true; + + void SetIncrementalSort(bool val) { m_IncrementalSort = val; } VkDeviceSize CalcMaxBlockSize() const; - // Finds and removes given block from vector. void Remove(VmaDeviceMemoryBlock* pBlock); - // Performs single step in sorting m_Blocks. They may not be fully sorted // after this call. void IncrementallySortBlocks(); + void SortByFreeSize(); VkResult AllocatePage( - uint32_t currentFrameIndex, VkDeviceSize size, VkDeviceSize alignment, const VmaAllocationCreateInfo& createInfo, VmaSuballocationType suballocType, VmaAllocation* pAllocation); - // To be used only without CAN_MAKE_OTHER_LOST flag. VkResult AllocateFromBlock( VmaDeviceMemoryBlock* pBlock, - uint32_t currentFrameIndex, VkDeviceSize size, VkDeviceSize alignment, VmaAllocationCreateFlags allocFlags, @@ -7191,32 +9673,116 @@ private: uint32_t strategy, VmaAllocation* pAllocation); + VkResult CommitAllocationRequest( + VmaAllocationRequest& allocRequest, + VmaDeviceMemoryBlock* pBlock, + VkDeviceSize alignment, + VmaAllocationCreateFlags allocFlags, + void* pUserData, + VmaSuballocationType suballocType, + VmaAllocation* pAllocation); + VkResult CreateBlock(VkDeviceSize blockSize, size_t* pNewBlockIndex); - - // Saves result to pCtx->res. - void ApplyDefragmentationMovesCpu( - class VmaBlockVectorDefragmentationContext* pDefragCtx, - const VmaVector< VmaDefragmentationMove, VmaStlAllocator >& moves); - // Saves result to pCtx->res. - void ApplyDefragmentationMovesGpu( - class VmaBlockVectorDefragmentationContext* pDefragCtx, - VmaVector< VmaDefragmentationMove, VmaStlAllocator >& moves, - VkCommandBuffer commandBuffer); - - /* - Used during defragmentation. pDefragmentationStats is optional. It's in/out - - updated with new data. - */ - void FreeEmptyBlocks(VmaDefragmentationStats* pDefragmentationStats); - - void UpdateHasEmptyBlock(); + bool HasEmptyBlock(); }; +#endif // _VMA_BLOCK_VECTOR +#ifndef _VMA_DEFRAGMENTATION_CONTEXT +struct VmaDefragmentationContext_T +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaDefragmentationContext_T) +public: + VmaDefragmentationContext_T( + VmaAllocator hAllocator, + const VmaDefragmentationInfo& info); + ~VmaDefragmentationContext_T(); + + void GetStats(VmaDefragmentationStats& outStats) { outStats = m_GlobalStats; } + + VkResult DefragmentPassBegin(VmaDefragmentationPassMoveInfo& moveInfo); + VkResult DefragmentPassEnd(VmaDefragmentationPassMoveInfo& moveInfo); + +private: + // Max number of allocations to ignore due to size constraints before ending single pass + static const uint8_t MAX_ALLOCS_TO_IGNORE = 16; + enum class CounterStatus { Pass, Ignore, End }; + + struct FragmentedBlock + { + uint32_t data; + VmaDeviceMemoryBlock* block; + }; + struct StateBalanced + { + VkDeviceSize avgFreeSize = 0; + VkDeviceSize avgAllocSize = UINT64_MAX; + }; + struct StateExtensive + { + enum class Operation : uint8_t + { + FindFreeBlockBuffer, FindFreeBlockTexture, FindFreeBlockAll, + MoveBuffers, MoveTextures, MoveAll, + Cleanup, Done + }; + + Operation operation = Operation::FindFreeBlockTexture; + size_t firstFreeBlock = SIZE_MAX; + }; + struct MoveAllocationData + { + VkDeviceSize size; + VkDeviceSize alignment; + VmaSuballocationType type; + VmaAllocationCreateFlags flags; + VmaDefragmentationMove move = {}; + }; + + const VkDeviceSize m_MaxPassBytes; + const uint32_t m_MaxPassAllocations; + const PFN_vmaCheckDefragmentationBreakFunction m_BreakCallback; + void* m_BreakCallbackUserData; + + VmaStlAllocator m_MoveAllocator; + VmaVector> m_Moves; + + uint8_t m_IgnoredAllocs = 0; + uint32_t m_Algorithm; + uint32_t m_BlockVectorCount; + VmaBlockVector* m_PoolBlockVector; + VmaBlockVector** m_pBlockVectors; + size_t m_ImmovableBlockCount = 0; + VmaDefragmentationStats m_GlobalStats = { 0 }; + VmaDefragmentationStats m_PassStats = { 0 }; + void* m_AlgorithmState = VMA_NULL; + + static MoveAllocationData GetMoveData(VmaAllocHandle handle, VmaBlockMetadata* metadata); + CounterStatus CheckCounters(VkDeviceSize bytes); + bool IncrementCounters(VkDeviceSize bytes); + bool ReallocWithinBlock(VmaBlockVector& vector, VmaDeviceMemoryBlock* block); + bool AllocInOtherBlock(size_t start, size_t end, MoveAllocationData& data, VmaBlockVector& vector); + + bool ComputeDefragmentation(VmaBlockVector& vector, size_t index); + bool ComputeDefragmentation_Fast(VmaBlockVector& vector); + bool ComputeDefragmentation_Balanced(VmaBlockVector& vector, size_t index, bool update); + bool ComputeDefragmentation_Full(VmaBlockVector& vector); + bool ComputeDefragmentation_Extensive(VmaBlockVector& vector, size_t index); + + void UpdateVectorStatistics(VmaBlockVector& vector, StateBalanced& state); + bool MoveDataToFreeBlocks(VmaSuballocationType currentType, + VmaBlockVector& vector, size_t firstFreeBlock, + bool& texturePresent, bool& bufferPresent, bool& otherPresent); +}; +#endif // _VMA_DEFRAGMENTATION_CONTEXT + +#ifndef _VMA_POOL_T struct VmaPool_T { - VMA_CLASS_NO_COPY(VmaPool_T) + friend struct VmaPoolListItemTraits; + VMA_CLASS_NO_COPY_NO_MOVE(VmaPool_T) public: VmaBlockVector m_BlockVector; + VmaDedicatedAllocationList m_DedicatedAllocations; VmaPool_T( VmaAllocator hAllocator, @@ -7237,606 +9803,29 @@ public: private: uint32_t m_Id; char* m_Name; + VmaPool_T* m_PrevPool = VMA_NULL; + VmaPool_T* m_NextPool = VMA_NULL; }; -/* -Performs defragmentation: - -- Updates `pBlockVector->m_pMetadata`. -- Updates allocations by calling ChangeBlockAllocation() or ChangeOffset(). -- Does not move actual data, only returns requested moves as `moves`. -*/ -class VmaDefragmentationAlgorithm +struct VmaPoolListItemTraits { - VMA_CLASS_NO_COPY(VmaDefragmentationAlgorithm) -public: - VmaDefragmentationAlgorithm( - VmaAllocator hAllocator, - VmaBlockVector* pBlockVector, - uint32_t currentFrameIndex) : - m_hAllocator(hAllocator), - m_pBlockVector(pBlockVector), - m_CurrentFrameIndex(currentFrameIndex) - { - } - virtual ~VmaDefragmentationAlgorithm() - { - } + typedef VmaPool_T ItemType; - virtual void AddAllocation(VmaAllocation hAlloc, VkBool32* pChanged) = 0; - virtual void AddAll() = 0; - - virtual VkResult Defragment( - VmaVector< VmaDefragmentationMove, VmaStlAllocator >& moves, - VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove, - VmaDefragmentationFlags flags) = 0; - - virtual VkDeviceSize GetBytesMoved() const = 0; - virtual uint32_t GetAllocationsMoved() const = 0; - -protected: - VmaAllocator const m_hAllocator; - VmaBlockVector* const m_pBlockVector; - const uint32_t m_CurrentFrameIndex; - - struct AllocationInfo - { - VmaAllocation m_hAllocation; - VkBool32* m_pChanged; - - AllocationInfo() : - m_hAllocation(VK_NULL_HANDLE), - m_pChanged(VMA_NULL) - { - } - AllocationInfo(VmaAllocation hAlloc, VkBool32* pChanged) : - m_hAllocation(hAlloc), - m_pChanged(pChanged) - { - } - }; -}; - -class VmaDefragmentationAlgorithm_Generic : public VmaDefragmentationAlgorithm -{ - VMA_CLASS_NO_COPY(VmaDefragmentationAlgorithm_Generic) -public: - VmaDefragmentationAlgorithm_Generic( - VmaAllocator hAllocator, - VmaBlockVector* pBlockVector, - uint32_t currentFrameIndex, - bool overlappingMoveSupported); - virtual ~VmaDefragmentationAlgorithm_Generic(); - - virtual void AddAllocation(VmaAllocation hAlloc, VkBool32* pChanged); - virtual void AddAll() { m_AllAllocations = true; } - - virtual VkResult Defragment( - VmaVector< VmaDefragmentationMove, VmaStlAllocator >& moves, - VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove, - VmaDefragmentationFlags flags); - - virtual VkDeviceSize GetBytesMoved() const { return m_BytesMoved; } - virtual uint32_t GetAllocationsMoved() const { return m_AllocationsMoved; } - -private: - uint32_t m_AllocationCount; - bool m_AllAllocations; - - VkDeviceSize m_BytesMoved; - uint32_t m_AllocationsMoved; - - struct AllocationInfoSizeGreater - { - bool operator()(const AllocationInfo& lhs, const AllocationInfo& rhs) const - { - return lhs.m_hAllocation->GetSize() > rhs.m_hAllocation->GetSize(); - } - }; - - struct AllocationInfoOffsetGreater - { - bool operator()(const AllocationInfo& lhs, const AllocationInfo& rhs) const - { - return lhs.m_hAllocation->GetOffset() > rhs.m_hAllocation->GetOffset(); - } - }; - - struct BlockInfo - { - size_t m_OriginalBlockIndex; - VmaDeviceMemoryBlock* m_pBlock; - bool m_HasNonMovableAllocations; - VmaVector< AllocationInfo, VmaStlAllocator > m_Allocations; - - BlockInfo(const VkAllocationCallbacks* pAllocationCallbacks) : - m_OriginalBlockIndex(SIZE_MAX), - m_pBlock(VMA_NULL), - m_HasNonMovableAllocations(true), - m_Allocations(pAllocationCallbacks) - { - } - - void CalcHasNonMovableAllocations() - { - const size_t blockAllocCount = m_pBlock->m_pMetadata->GetAllocationCount(); - const size_t defragmentAllocCount = m_Allocations.size(); - m_HasNonMovableAllocations = blockAllocCount != defragmentAllocCount; - } - - void SortAllocationsBySizeDescending() - { - VMA_SORT(m_Allocations.begin(), m_Allocations.end(), AllocationInfoSizeGreater()); - } - - void SortAllocationsByOffsetDescending() - { - VMA_SORT(m_Allocations.begin(), m_Allocations.end(), AllocationInfoOffsetGreater()); - } - }; - - struct BlockPointerLess - { - bool operator()(const BlockInfo* pLhsBlockInfo, const VmaDeviceMemoryBlock* pRhsBlock) const - { - return pLhsBlockInfo->m_pBlock < pRhsBlock; - } - bool operator()(const BlockInfo* pLhsBlockInfo, const BlockInfo* pRhsBlockInfo) const - { - return pLhsBlockInfo->m_pBlock < pRhsBlockInfo->m_pBlock; - } - }; - - // 1. Blocks with some non-movable allocations go first. - // 2. Blocks with smaller sumFreeSize go first. - struct BlockInfoCompareMoveDestination - { - bool operator()(const BlockInfo* pLhsBlockInfo, const BlockInfo* pRhsBlockInfo) const - { - if(pLhsBlockInfo->m_HasNonMovableAllocations && !pRhsBlockInfo->m_HasNonMovableAllocations) - { - return true; - } - if(!pLhsBlockInfo->m_HasNonMovableAllocations && pRhsBlockInfo->m_HasNonMovableAllocations) - { - return false; - } - if(pLhsBlockInfo->m_pBlock->m_pMetadata->GetSumFreeSize() < pRhsBlockInfo->m_pBlock->m_pMetadata->GetSumFreeSize()) - { - return true; - } - return false; - } - }; - - typedef VmaVector< BlockInfo*, VmaStlAllocator > BlockInfoVector; - BlockInfoVector m_Blocks; - - VkResult DefragmentRound( - VmaVector< VmaDefragmentationMove, VmaStlAllocator >& moves, - VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove, - bool freeOldAllocations); - - size_t CalcBlocksWithNonMovableCount() const; - - static bool MoveMakesSense( - size_t dstBlockIndex, VkDeviceSize dstOffset, - size_t srcBlockIndex, VkDeviceSize srcOffset); -}; - -class VmaDefragmentationAlgorithm_Fast : public VmaDefragmentationAlgorithm -{ - VMA_CLASS_NO_COPY(VmaDefragmentationAlgorithm_Fast) -public: - VmaDefragmentationAlgorithm_Fast( - VmaAllocator hAllocator, - VmaBlockVector* pBlockVector, - uint32_t currentFrameIndex, - bool overlappingMoveSupported); - virtual ~VmaDefragmentationAlgorithm_Fast(); - - virtual void AddAllocation(VmaAllocation hAlloc, VkBool32* pChanged) { ++m_AllocationCount; } - virtual void AddAll() { m_AllAllocations = true; } - - virtual VkResult Defragment( - VmaVector< VmaDefragmentationMove, VmaStlAllocator >& moves, - VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove, - VmaDefragmentationFlags flags); - - virtual VkDeviceSize GetBytesMoved() const { return m_BytesMoved; } - virtual uint32_t GetAllocationsMoved() const { return m_AllocationsMoved; } - -private: - struct BlockInfo - { - size_t origBlockIndex; - }; - - class FreeSpaceDatabase - { - public: - FreeSpaceDatabase() - { - FreeSpace s = {}; - s.blockInfoIndex = SIZE_MAX; - for(size_t i = 0; i < MAX_COUNT; ++i) - { - m_FreeSpaces[i] = s; - } - } - - void Register(size_t blockInfoIndex, VkDeviceSize offset, VkDeviceSize size) - { - if(size < VMA_MIN_FREE_SUBALLOCATION_SIZE_TO_REGISTER) - { - return; - } - - // Find first invalid or the smallest structure. - size_t bestIndex = SIZE_MAX; - for(size_t i = 0; i < MAX_COUNT; ++i) - { - // Empty structure. - if(m_FreeSpaces[i].blockInfoIndex == SIZE_MAX) - { - bestIndex = i; - break; - } - if(m_FreeSpaces[i].size < size && - (bestIndex == SIZE_MAX || m_FreeSpaces[bestIndex].size > m_FreeSpaces[i].size)) - { - bestIndex = i; - } - } - - if(bestIndex != SIZE_MAX) - { - m_FreeSpaces[bestIndex].blockInfoIndex = blockInfoIndex; - m_FreeSpaces[bestIndex].offset = offset; - m_FreeSpaces[bestIndex].size = size; - } - } - - bool Fetch(VkDeviceSize alignment, VkDeviceSize size, - size_t& outBlockInfoIndex, VkDeviceSize& outDstOffset) - { - size_t bestIndex = SIZE_MAX; - VkDeviceSize bestFreeSpaceAfter = 0; - for(size_t i = 0; i < MAX_COUNT; ++i) - { - // Structure is valid. - if(m_FreeSpaces[i].blockInfoIndex != SIZE_MAX) - { - const VkDeviceSize dstOffset = VmaAlignUp(m_FreeSpaces[i].offset, alignment); - // Allocation fits into this structure. - if(dstOffset + size <= m_FreeSpaces[i].offset + m_FreeSpaces[i].size) - { - const VkDeviceSize freeSpaceAfter = (m_FreeSpaces[i].offset + m_FreeSpaces[i].size) - - (dstOffset + size); - if(bestIndex == SIZE_MAX || freeSpaceAfter > bestFreeSpaceAfter) - { - bestIndex = i; - bestFreeSpaceAfter = freeSpaceAfter; - } - } - } - } - - if(bestIndex != SIZE_MAX) - { - outBlockInfoIndex = m_FreeSpaces[bestIndex].blockInfoIndex; - outDstOffset = VmaAlignUp(m_FreeSpaces[bestIndex].offset, alignment); - - if(bestFreeSpaceAfter >= VMA_MIN_FREE_SUBALLOCATION_SIZE_TO_REGISTER) - { - // Leave this structure for remaining empty space. - const VkDeviceSize alignmentPlusSize = (outDstOffset - m_FreeSpaces[bestIndex].offset) + size; - m_FreeSpaces[bestIndex].offset += alignmentPlusSize; - m_FreeSpaces[bestIndex].size -= alignmentPlusSize; - } - else - { - // This structure becomes invalid. - m_FreeSpaces[bestIndex].blockInfoIndex = SIZE_MAX; - } - - return true; - } - - return false; - } - - private: - static const size_t MAX_COUNT = 4; - - struct FreeSpace - { - size_t blockInfoIndex; // SIZE_MAX means this structure is invalid. - VkDeviceSize offset; - VkDeviceSize size; - } m_FreeSpaces[MAX_COUNT]; - }; - - const bool m_OverlappingMoveSupported; - - uint32_t m_AllocationCount; - bool m_AllAllocations; - - VkDeviceSize m_BytesMoved; - uint32_t m_AllocationsMoved; - - VmaVector< BlockInfo, VmaStlAllocator > m_BlockInfos; - - void PreprocessMetadata(); - void PostprocessMetadata(); - void InsertSuballoc(VmaBlockMetadata_Generic* pMetadata, const VmaSuballocation& suballoc); -}; - -struct VmaBlockDefragmentationContext -{ - enum BLOCK_FLAG - { - BLOCK_FLAG_USED = 0x00000001, - }; - uint32_t flags; - VkBuffer hBuffer; -}; - -class VmaBlockVectorDefragmentationContext -{ - VMA_CLASS_NO_COPY(VmaBlockVectorDefragmentationContext) -public: - VkResult res; - bool mutexLocked; - VmaVector< VmaBlockDefragmentationContext, VmaStlAllocator > blockContexts; - VmaVector< VmaDefragmentationMove, VmaStlAllocator > defragmentationMoves; - uint32_t defragmentationMovesProcessed; - uint32_t defragmentationMovesCommitted; - bool hasDefragmentationPlan; - - VmaBlockVectorDefragmentationContext( - VmaAllocator hAllocator, - VmaPool hCustomPool, // Optional. - VmaBlockVector* pBlockVector, - uint32_t currFrameIndex); - ~VmaBlockVectorDefragmentationContext(); - - VmaPool GetCustomPool() const { return m_hCustomPool; } - VmaBlockVector* GetBlockVector() const { return m_pBlockVector; } - VmaDefragmentationAlgorithm* GetAlgorithm() const { return m_pAlgorithm; } - - void AddAllocation(VmaAllocation hAlloc, VkBool32* pChanged); - void AddAll() { m_AllAllocations = true; } - - void Begin(bool overlappingMoveSupported, VmaDefragmentationFlags flags); - -private: - const VmaAllocator m_hAllocator; - // Null if not from custom pool. - const VmaPool m_hCustomPool; - // Redundant, for convenience not to fetch from m_hCustomPool->m_BlockVector or m_hAllocator->m_pBlockVectors. - VmaBlockVector* const m_pBlockVector; - const uint32_t m_CurrFrameIndex; - // Owner of this object. - VmaDefragmentationAlgorithm* m_pAlgorithm; - - struct AllocInfo - { - VmaAllocation hAlloc; - VkBool32* pChanged; - }; - // Used between constructor and Begin. - VmaVector< AllocInfo, VmaStlAllocator > m_Allocations; - bool m_AllAllocations; -}; - -struct VmaDefragmentationContext_T -{ -private: - VMA_CLASS_NO_COPY(VmaDefragmentationContext_T) -public: - VmaDefragmentationContext_T( - VmaAllocator hAllocator, - uint32_t currFrameIndex, - uint32_t flags, - VmaDefragmentationStats* pStats); - ~VmaDefragmentationContext_T(); - - void AddPools(uint32_t poolCount, const VmaPool* pPools); - void AddAllocations( - uint32_t allocationCount, - const VmaAllocation* pAllocations, - VkBool32* pAllocationsChanged); - - /* - Returns: - - `VK_SUCCESS` if succeeded and object can be destroyed immediately. - - `VK_NOT_READY` if succeeded but the object must remain alive until vmaDefragmentationEnd(). - - Negative value if error occured and object can be destroyed immediately. - */ - VkResult Defragment( - VkDeviceSize maxCpuBytesToMove, uint32_t maxCpuAllocationsToMove, - VkDeviceSize maxGpuBytesToMove, uint32_t maxGpuAllocationsToMove, - VkCommandBuffer commandBuffer, VmaDefragmentationStats* pStats, VmaDefragmentationFlags flags); - - VkResult DefragmentPassBegin(VmaDefragmentationPassInfo* pInfo); - VkResult DefragmentPassEnd(); - -private: - const VmaAllocator m_hAllocator; - const uint32_t m_CurrFrameIndex; - const uint32_t m_Flags; - VmaDefragmentationStats* const m_pStats; - - VkDeviceSize m_MaxCpuBytesToMove; - uint32_t m_MaxCpuAllocationsToMove; - VkDeviceSize m_MaxGpuBytesToMove; - uint32_t m_MaxGpuAllocationsToMove; - - // Owner of these objects. - VmaBlockVectorDefragmentationContext* m_DefaultPoolContexts[VK_MAX_MEMORY_TYPES]; - // Owner of these objects. - VmaVector< VmaBlockVectorDefragmentationContext*, VmaStlAllocator > m_CustomPoolContexts; -}; - -#if VMA_RECORDING_ENABLED - -class VmaRecorder -{ -public: - VmaRecorder(); - VkResult Init(const VmaRecordSettings& settings, bool useMutex); - void WriteConfiguration( - const VkPhysicalDeviceProperties& devProps, - const VkPhysicalDeviceMemoryProperties& memProps, - uint32_t vulkanApiVersion, - bool dedicatedAllocationExtensionEnabled, - bool bindMemory2ExtensionEnabled, - bool memoryBudgetExtensionEnabled, - bool deviceCoherentMemoryExtensionEnabled); - ~VmaRecorder(); - - void RecordCreateAllocator(uint32_t frameIndex); - void RecordDestroyAllocator(uint32_t frameIndex); - void RecordCreatePool(uint32_t frameIndex, - const VmaPoolCreateInfo& createInfo, - VmaPool pool); - void RecordDestroyPool(uint32_t frameIndex, VmaPool pool); - void RecordAllocateMemory(uint32_t frameIndex, - const VkMemoryRequirements& vkMemReq, - const VmaAllocationCreateInfo& createInfo, - VmaAllocation allocation); - void RecordAllocateMemoryPages(uint32_t frameIndex, - const VkMemoryRequirements& vkMemReq, - const VmaAllocationCreateInfo& createInfo, - uint64_t allocationCount, - const VmaAllocation* pAllocations); - void RecordAllocateMemoryForBuffer(uint32_t frameIndex, - const VkMemoryRequirements& vkMemReq, - bool requiresDedicatedAllocation, - bool prefersDedicatedAllocation, - const VmaAllocationCreateInfo& createInfo, - VmaAllocation allocation); - void RecordAllocateMemoryForImage(uint32_t frameIndex, - const VkMemoryRequirements& vkMemReq, - bool requiresDedicatedAllocation, - bool prefersDedicatedAllocation, - const VmaAllocationCreateInfo& createInfo, - VmaAllocation allocation); - void RecordFreeMemory(uint32_t frameIndex, - VmaAllocation allocation); - void RecordFreeMemoryPages(uint32_t frameIndex, - uint64_t allocationCount, - const VmaAllocation* pAllocations); - void RecordSetAllocationUserData(uint32_t frameIndex, - VmaAllocation allocation, - const void* pUserData); - void RecordCreateLostAllocation(uint32_t frameIndex, - VmaAllocation allocation); - void RecordMapMemory(uint32_t frameIndex, - VmaAllocation allocation); - void RecordUnmapMemory(uint32_t frameIndex, - VmaAllocation allocation); - void RecordFlushAllocation(uint32_t frameIndex, - VmaAllocation allocation, VkDeviceSize offset, VkDeviceSize size); - void RecordInvalidateAllocation(uint32_t frameIndex, - VmaAllocation allocation, VkDeviceSize offset, VkDeviceSize size); - void RecordCreateBuffer(uint32_t frameIndex, - const VkBufferCreateInfo& bufCreateInfo, - const VmaAllocationCreateInfo& allocCreateInfo, - VmaAllocation allocation); - void RecordCreateImage(uint32_t frameIndex, - const VkImageCreateInfo& imageCreateInfo, - const VmaAllocationCreateInfo& allocCreateInfo, - VmaAllocation allocation); - void RecordDestroyBuffer(uint32_t frameIndex, - VmaAllocation allocation); - void RecordDestroyImage(uint32_t frameIndex, - VmaAllocation allocation); - void RecordTouchAllocation(uint32_t frameIndex, - VmaAllocation allocation); - void RecordGetAllocationInfo(uint32_t frameIndex, - VmaAllocation allocation); - void RecordMakePoolAllocationsLost(uint32_t frameIndex, - VmaPool pool); - void RecordDefragmentationBegin(uint32_t frameIndex, - const VmaDefragmentationInfo2& info, - VmaDefragmentationContext ctx); - void RecordDefragmentationEnd(uint32_t frameIndex, - VmaDefragmentationContext ctx); - void RecordSetPoolName(uint32_t frameIndex, - VmaPool pool, - const char* name); - -private: - struct CallParams - { - uint32_t threadId; - double time; - }; - - class UserDataString - { - public: - UserDataString(VmaAllocationCreateFlags allocFlags, const void* pUserData); - const char* GetString() const { return m_Str; } - - private: - char m_PtrStr[17]; - const char* m_Str; - }; - - bool m_UseMutex; - VmaRecordFlags m_Flags; - FILE* m_File; - VMA_MUTEX m_FileMutex; - std::chrono::time_point m_RecordingStartTime; - - void GetBasicParams(CallParams& outParams); - - // T must be a pointer type, e.g. VmaAllocation, VmaPool. - template - void PrintPointerList(uint64_t count, const T* pItems) - { - if(count) - { - fprintf(m_File, "%p", pItems[0]); - for(uint64_t i = 1; i < count; ++i) - { - fprintf(m_File, " %p", pItems[i]); - } - } - } - - void PrintPointerList(uint64_t count, const VmaAllocation* pItems); - void Flush(); -}; - -#endif // #if VMA_RECORDING_ENABLED - -/* -Thread-safe wrapper over VmaPoolAllocator free list, for allocation of VmaAllocation_T objects. -*/ -class VmaAllocationObjectAllocator -{ - VMA_CLASS_NO_COPY(VmaAllocationObjectAllocator) -public: - VmaAllocationObjectAllocator(const VkAllocationCallbacks* pAllocationCallbacks); - - template VmaAllocation Allocate(Types... args); - void Free(VmaAllocation hAlloc); - -private: - VMA_MUTEX m_Mutex; - VmaPoolAllocator m_Allocator; + static ItemType* GetPrev(const ItemType* item) { return item->m_PrevPool; } + static ItemType* GetNext(const ItemType* item) { return item->m_NextPool; } + static ItemType*& AccessPrev(ItemType* item) { return item->m_PrevPool; } + static ItemType*& AccessNext(ItemType* item) { return item->m_NextPool; } }; +#endif // _VMA_POOL_T +#ifndef _VMA_CURRENT_BUDGET_DATA struct VmaCurrentBudgetData { + VMA_CLASS_NO_COPY_NO_MOVE(VmaCurrentBudgetData) +public: + + VMA_ATOMIC_UINT32 m_BlockCount[VK_MAX_MEMORY_HEAPS]; + VMA_ATOMIC_UINT32 m_AllocationCount[VK_MAX_MEMORY_HEAPS]; VMA_ATOMIC_UINT64 m_BlockBytes[VK_MAX_MEMORY_HEAPS]; VMA_ATOMIC_UINT64 m_AllocationBytes[VK_MAX_MEMORY_HEAPS]; @@ -7846,61 +9835,250 @@ struct VmaCurrentBudgetData uint64_t m_VulkanUsage[VK_MAX_MEMORY_HEAPS]; uint64_t m_VulkanBudget[VK_MAX_MEMORY_HEAPS]; uint64_t m_BlockBytesAtBudgetFetch[VK_MAX_MEMORY_HEAPS]; -#endif // #if VMA_MEMORY_BUDGET +#endif // VMA_MEMORY_BUDGET - VmaCurrentBudgetData() - { - for(uint32_t heapIndex = 0; heapIndex < VK_MAX_MEMORY_HEAPS; ++heapIndex) - { - m_BlockBytes[heapIndex] = 0; - m_AllocationBytes[heapIndex] = 0; -#if VMA_MEMORY_BUDGET - m_VulkanUsage[heapIndex] = 0; - m_VulkanBudget[heapIndex] = 0; - m_BlockBytesAtBudgetFetch[heapIndex] = 0; -#endif - } + VmaCurrentBudgetData(); -#if VMA_MEMORY_BUDGET - m_OperationsSinceBudgetFetch = 0; -#endif - } - - void AddAllocation(uint32_t heapIndex, VkDeviceSize allocationSize) - { - m_AllocationBytes[heapIndex] += allocationSize; -#if VMA_MEMORY_BUDGET - ++m_OperationsSinceBudgetFetch; -#endif - } - - void RemoveAllocation(uint32_t heapIndex, VkDeviceSize allocationSize) - { - VMA_ASSERT(m_AllocationBytes[heapIndex] >= allocationSize); // DELME - m_AllocationBytes[heapIndex] -= allocationSize; -#if VMA_MEMORY_BUDGET - ++m_OperationsSinceBudgetFetch; -#endif - } + void AddAllocation(uint32_t heapIndex, VkDeviceSize allocationSize); + void RemoveAllocation(uint32_t heapIndex, VkDeviceSize allocationSize); }; +#ifndef _VMA_CURRENT_BUDGET_DATA_FUNCTIONS +VmaCurrentBudgetData::VmaCurrentBudgetData() +{ + for (uint32_t heapIndex = 0; heapIndex < VK_MAX_MEMORY_HEAPS; ++heapIndex) + { + m_BlockCount[heapIndex] = 0; + m_AllocationCount[heapIndex] = 0; + m_BlockBytes[heapIndex] = 0; + m_AllocationBytes[heapIndex] = 0; +#if VMA_MEMORY_BUDGET + m_VulkanUsage[heapIndex] = 0; + m_VulkanBudget[heapIndex] = 0; + m_BlockBytesAtBudgetFetch[heapIndex] = 0; +#endif + } + +#if VMA_MEMORY_BUDGET + m_OperationsSinceBudgetFetch = 0; +#endif +} + +void VmaCurrentBudgetData::AddAllocation(uint32_t heapIndex, VkDeviceSize allocationSize) +{ + m_AllocationBytes[heapIndex] += allocationSize; + ++m_AllocationCount[heapIndex]; +#if VMA_MEMORY_BUDGET + ++m_OperationsSinceBudgetFetch; +#endif +} + +void VmaCurrentBudgetData::RemoveAllocation(uint32_t heapIndex, VkDeviceSize allocationSize) +{ + VMA_ASSERT(m_AllocationBytes[heapIndex] >= allocationSize); + m_AllocationBytes[heapIndex] -= allocationSize; + VMA_ASSERT(m_AllocationCount[heapIndex] > 0); + --m_AllocationCount[heapIndex]; +#if VMA_MEMORY_BUDGET + ++m_OperationsSinceBudgetFetch; +#endif +} +#endif // _VMA_CURRENT_BUDGET_DATA_FUNCTIONS +#endif // _VMA_CURRENT_BUDGET_DATA + +#ifndef _VMA_ALLOCATION_OBJECT_ALLOCATOR +/* +Thread-safe wrapper over VmaPoolAllocator free list, for allocation of VmaAllocation_T objects. +*/ +class VmaAllocationObjectAllocator +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaAllocationObjectAllocator) +public: + VmaAllocationObjectAllocator(const VkAllocationCallbacks* pAllocationCallbacks) + : m_Allocator(pAllocationCallbacks, 1024) {} + + template VmaAllocation Allocate(Types&&... args); + void Free(VmaAllocation hAlloc); + +private: + VMA_MUTEX m_Mutex; + VmaPoolAllocator m_Allocator; +}; + +template +VmaAllocation VmaAllocationObjectAllocator::Allocate(Types&&... args) +{ + VmaMutexLock mutexLock(m_Mutex); + return m_Allocator.Alloc(std::forward(args)...); +} + +void VmaAllocationObjectAllocator::Free(VmaAllocation hAlloc) +{ + VmaMutexLock mutexLock(m_Mutex); + m_Allocator.Free(hAlloc); +} +#endif // _VMA_ALLOCATION_OBJECT_ALLOCATOR + +#ifndef _VMA_VIRTUAL_BLOCK_T +struct VmaVirtualBlock_T +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaVirtualBlock_T) +public: + const bool m_AllocationCallbacksSpecified; + const VkAllocationCallbacks m_AllocationCallbacks; + + VmaVirtualBlock_T(const VmaVirtualBlockCreateInfo& createInfo); + ~VmaVirtualBlock_T(); + + VkResult Init() { return VK_SUCCESS; } + bool IsEmpty() const { return m_Metadata->IsEmpty(); } + void Free(VmaVirtualAllocation allocation) { m_Metadata->Free((VmaAllocHandle)allocation); } + void SetAllocationUserData(VmaVirtualAllocation allocation, void* userData) { m_Metadata->SetAllocationUserData((VmaAllocHandle)allocation, userData); } + void Clear() { m_Metadata->Clear(); } + + const VkAllocationCallbacks* GetAllocationCallbacks() const; + void GetAllocationInfo(VmaVirtualAllocation allocation, VmaVirtualAllocationInfo& outInfo); + VkResult Allocate(const VmaVirtualAllocationCreateInfo& createInfo, VmaVirtualAllocation& outAllocation, + VkDeviceSize* outOffset); + void GetStatistics(VmaStatistics& outStats) const; + void CalculateDetailedStatistics(VmaDetailedStatistics& outStats) const; +#if VMA_STATS_STRING_ENABLED + void BuildStatsString(bool detailedMap, VmaStringBuilder& sb) const; +#endif + +private: + VmaBlockMetadata* m_Metadata; +}; + +#ifndef _VMA_VIRTUAL_BLOCK_T_FUNCTIONS +VmaVirtualBlock_T::VmaVirtualBlock_T(const VmaVirtualBlockCreateInfo& createInfo) + : m_AllocationCallbacksSpecified(createInfo.pAllocationCallbacks != VMA_NULL), + m_AllocationCallbacks(createInfo.pAllocationCallbacks != VMA_NULL ? *createInfo.pAllocationCallbacks : VmaEmptyAllocationCallbacks) +{ + const uint32_t algorithm = createInfo.flags & VMA_VIRTUAL_BLOCK_CREATE_ALGORITHM_MASK; + switch (algorithm) + { + case 0: + m_Metadata = vma_new(GetAllocationCallbacks(), VmaBlockMetadata_TLSF)(VK_NULL_HANDLE, 1, true); + break; + case VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT: + m_Metadata = vma_new(GetAllocationCallbacks(), VmaBlockMetadata_Linear)(VK_NULL_HANDLE, 1, true); + break; + default: + VMA_ASSERT(0); + m_Metadata = vma_new(GetAllocationCallbacks(), VmaBlockMetadata_TLSF)(VK_NULL_HANDLE, 1, true); + } + + m_Metadata->Init(createInfo.size); +} + +VmaVirtualBlock_T::~VmaVirtualBlock_T() +{ + // Define macro VMA_DEBUG_LOG_FORMAT or more specialized VMA_LEAK_LOG_FORMAT + // to receive the list of the unfreed allocations. + if (!m_Metadata->IsEmpty()) + m_Metadata->DebugLogAllAllocations(); + // This is the most important assert in the entire library. + // Hitting it means you have some memory leak - unreleased virtual allocations. + VMA_ASSERT_LEAK(m_Metadata->IsEmpty() && "Some virtual allocations were not freed before destruction of this virtual block!"); + + vma_delete(GetAllocationCallbacks(), m_Metadata); +} + +const VkAllocationCallbacks* VmaVirtualBlock_T::GetAllocationCallbacks() const +{ + return m_AllocationCallbacksSpecified ? &m_AllocationCallbacks : VMA_NULL; +} + +void VmaVirtualBlock_T::GetAllocationInfo(VmaVirtualAllocation allocation, VmaVirtualAllocationInfo& outInfo) +{ + m_Metadata->GetAllocationInfo((VmaAllocHandle)allocation, outInfo); +} + +VkResult VmaVirtualBlock_T::Allocate(const VmaVirtualAllocationCreateInfo& createInfo, VmaVirtualAllocation& outAllocation, + VkDeviceSize* outOffset) +{ + VmaAllocationRequest request = {}; + if (m_Metadata->CreateAllocationRequest( + createInfo.size, // allocSize + VMA_MAX(createInfo.alignment, (VkDeviceSize)1), // allocAlignment + (createInfo.flags & VMA_VIRTUAL_ALLOCATION_CREATE_UPPER_ADDRESS_BIT) != 0, // upperAddress + VMA_SUBALLOCATION_TYPE_UNKNOWN, // allocType - unimportant + createInfo.flags & VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MASK, // strategy + &request)) + { + m_Metadata->Alloc(request, + VMA_SUBALLOCATION_TYPE_UNKNOWN, // type - unimportant + createInfo.pUserData); + outAllocation = (VmaVirtualAllocation)request.allocHandle; + if(outOffset) + *outOffset = m_Metadata->GetAllocationOffset(request.allocHandle); + return VK_SUCCESS; + } + outAllocation = (VmaVirtualAllocation)VK_NULL_HANDLE; + if (outOffset) + *outOffset = UINT64_MAX; + return VK_ERROR_OUT_OF_DEVICE_MEMORY; +} + +void VmaVirtualBlock_T::GetStatistics(VmaStatistics& outStats) const +{ + VmaClearStatistics(outStats); + m_Metadata->AddStatistics(outStats); +} + +void VmaVirtualBlock_T::CalculateDetailedStatistics(VmaDetailedStatistics& outStats) const +{ + VmaClearDetailedStatistics(outStats); + m_Metadata->AddDetailedStatistics(outStats); +} + +#if VMA_STATS_STRING_ENABLED +void VmaVirtualBlock_T::BuildStatsString(bool detailedMap, VmaStringBuilder& sb) const +{ + VmaJsonWriter json(GetAllocationCallbacks(), sb); + json.BeginObject(); + + VmaDetailedStatistics stats; + CalculateDetailedStatistics(stats); + + json.WriteString("Stats"); + VmaPrintDetailedStatistics(json, stats); + + if (detailedMap) + { + json.WriteString("Details"); + json.BeginObject(); + m_Metadata->PrintDetailedMap(json); + json.EndObject(); + } + + json.EndObject(); +} +#endif // VMA_STATS_STRING_ENABLED +#endif // _VMA_VIRTUAL_BLOCK_T_FUNCTIONS +#endif // _VMA_VIRTUAL_BLOCK_T + + // Main allocator object. struct VmaAllocator_T { - VMA_CLASS_NO_COPY(VmaAllocator_T) + VMA_CLASS_NO_COPY_NO_MOVE(VmaAllocator_T) public: - bool m_UseMutex; - uint32_t m_VulkanApiVersion; + const bool m_UseMutex; + const uint32_t m_VulkanApiVersion; bool m_UseKhrDedicatedAllocation; // Can be set only if m_VulkanApiVersion < VK_MAKE_VERSION(1, 1, 0). bool m_UseKhrBindMemory2; // Can be set only if m_VulkanApiVersion < VK_MAKE_VERSION(1, 1, 0). bool m_UseExtMemoryBudget; bool m_UseAmdDeviceCoherentMemory; bool m_UseKhrBufferDeviceAddress; bool m_UseExtMemoryPriority; - VkDevice m_hDevice; - VkInstance m_hInstance; - bool m_AllocationCallbacksSpecified; - VkAllocationCallbacks m_AllocationCallbacks; + bool m_UseKhrMaintenance4; + bool m_UseKhrMaintenance5; + const VkDevice m_hDevice; + const VkInstance m_hInstance; + const bool m_AllocationCallbacksSpecified; + const VkAllocationCallbacks m_AllocationCallbacks; VmaDeviceMemoryCallbacks m_DeviceMemoryCallbacks; VmaAllocationObjectAllocator m_AllocationObjectAllocator; @@ -7912,13 +10090,10 @@ public: // Default pools. VmaBlockVector* m_pBlockVectors[VK_MAX_MEMORY_TYPES]; - - // Each vector is sorted by memory (handle value). - typedef VmaVector< VmaAllocation, VmaStlAllocator > AllocationVectorType; - AllocationVectorType* m_pDedicatedAllocations[VK_MAX_MEMORY_TYPES]; - VMA_RW_MUTEX m_DedicatedAllocationsMutex[VK_MAX_MEMORY_TYPES]; + VmaDedicatedAllocationList m_DedicatedAllocations[VK_MAX_MEMORY_TYPES]; VmaCurrentBudgetData m_Budget; + VMA_ATOMIC_UINT32 m_DeviceMemoryCount; // Total number of VkDeviceMemory objects. VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo); VkResult Init(const VmaAllocatorCreateInfo* pCreateInfo); @@ -7926,7 +10101,7 @@ public: const VkAllocationCallbacks* GetAllocationCallbacks() const { - return m_AllocationCallbacksSpecified ? &m_AllocationCallbacks : 0; + return m_AllocationCallbacksSpecified ? &m_AllocationCallbacks : VMA_NULL; } const VmaVulkanFunctions& GetVulkanFunctions() const { @@ -7960,8 +10135,8 @@ public: VkDeviceSize GetMemoryTypeMinAlignment(uint32_t memTypeIndex) const { return IsMemoryTypeNonCoherent(memTypeIndex) ? - VMA_MAX((VkDeviceSize)VMA_DEBUG_ALIGNMENT, m_PhysicalDeviceProperties.limits.nonCoherentAtomSize) : - (VkDeviceSize)VMA_DEBUG_ALIGNMENT; + VMA_MAX((VkDeviceSize)VMA_MIN_ALIGNMENT, m_PhysicalDeviceProperties.limits.nonCoherentAtomSize) : + (VkDeviceSize)VMA_MIN_ALIGNMENT; } bool IsIntegratedGpu() const @@ -7971,10 +10146,6 @@ public: uint32_t GetGlobalMemoryTypeBits() const { return m_GlobalMemoryTypeBits; } -#if VMA_RECORDING_ENABLED - VmaRecorder* GetRecorder() const { return m_pRecorder; } -#endif - void GetBufferMemoryRequirements( VkBuffer hBuffer, VkMemoryRequirements& memReq, @@ -7985,6 +10156,11 @@ public: VkMemoryRequirements& memReq, bool& requiresDedicatedAllocation, bool& prefersDedicatedAllocation) const; + VkResult FindMemoryTypeIndex( + uint32_t memoryTypeBits, + const VmaAllocationCreateInfo* pAllocationCreateInfo, + VmaBufferImageUsage bufImgUsage, + uint32_t* pMemoryTypeIndex) const; // Main allocation function. VkResult AllocateMemory( @@ -7992,8 +10168,8 @@ public: bool requiresDedicatedAllocation, bool prefersDedicatedAllocation, VkBuffer dedicatedBuffer, - VkBufferUsageFlags dedicatedBufferUsage, // UINT32_MAX when unknown. VkImage dedicatedImage, + VmaBufferImageUsage dedicatedBufferImageUsage, const VmaAllocationCreateInfo& createInfo, VmaSuballocationType suballocType, size_t allocationCount, @@ -8004,50 +10180,29 @@ public: size_t allocationCount, const VmaAllocation* pAllocations); - VkResult ResizeAllocation( - const VmaAllocation alloc, - VkDeviceSize newSize); + void CalculateStatistics(VmaTotalStatistics* pStats); - void CalculateStats(VmaStats* pStats); - - void GetBudget( - VmaBudget* outBudget, uint32_t firstHeap, uint32_t heapCount); + void GetHeapBudgets( + VmaBudget* outBudgets, uint32_t firstHeap, uint32_t heapCount); #if VMA_STATS_STRING_ENABLED void PrintDetailedMap(class VmaJsonWriter& json); #endif - VkResult DefragmentationBegin( - const VmaDefragmentationInfo2& info, - VmaDefragmentationStats* pStats, - VmaDefragmentationContext* pContext); - VkResult DefragmentationEnd( - VmaDefragmentationContext context); - - VkResult DefragmentationPassBegin( - VmaDefragmentationPassInfo* pInfo, - VmaDefragmentationContext context); - VkResult DefragmentationPassEnd( - VmaDefragmentationContext context); - void GetAllocationInfo(VmaAllocation hAllocation, VmaAllocationInfo* pAllocationInfo); - bool TouchAllocation(VmaAllocation hAllocation); + void GetAllocationInfo2(VmaAllocation hAllocation, VmaAllocationInfo2* pAllocationInfo); VkResult CreatePool(const VmaPoolCreateInfo* pCreateInfo, VmaPool* pPool); void DestroyPool(VmaPool pool); - void GetPoolStats(VmaPool pool, VmaPoolStats* pPoolStats); + void GetPoolStatistics(VmaPool pool, VmaStatistics* pPoolStats); + void CalculatePoolStatistics(VmaPool pool, VmaDetailedStatistics* pPoolStats); void SetCurrentFrameIndex(uint32_t frameIndex); uint32_t GetCurrentFrameIndex() const { return m_CurrentFrameIndex.load(); } - void MakePoolAllocationsLost( - VmaPool hPool, - size_t* pLostAllocationCount); VkResult CheckPoolCorruption(VmaPool hPool); VkResult CheckCorruption(uint32_t memoryTypeBits); - void CreateLostAllocation(VmaAllocation* pAllocation); - // Call to Vulkan function vkAllocateMemory with accompanying bookkeeping. VkResult AllocateVulkanMemory(const VkMemoryAllocateInfo* pAllocateInfo, VkDeviceMemory* pMemory); // Call to Vulkan function vkFreeMemory with accompanying bookkeeping. @@ -8089,6 +10244,17 @@ public: const VkDeviceSize* offsets, const VkDeviceSize* sizes, VMA_CACHE_OPERATION op); + VkResult CopyMemoryToAllocation( + const void* pSrcHostPointer, + VmaAllocation dstAllocation, + VkDeviceSize dstAllocationLocalOffset, + VkDeviceSize size); + VkResult CopyAllocationToMemory( + VmaAllocation srcAllocation, + VkDeviceSize srcAllocationLocalOffset, + void* pDstHostPointer, + VkDeviceSize size); + void FillAllocation(const VmaAllocation hAllocation, uint8_t pattern); /* @@ -8097,16 +10263,27 @@ public: */ uint32_t GetGpuDefragmentationMemoryTypeBits(); +#if VMA_EXTERNAL_MEMORY + VkExternalMemoryHandleTypeFlagsKHR GetExternalMemoryHandleTypeFlags(uint32_t memTypeIndex) const + { + return m_TypeExternalMemoryHandleTypes[memTypeIndex]; + } +#endif // #if VMA_EXTERNAL_MEMORY + private: VkDeviceSize m_PreferredLargeHeapBlockSize; VkPhysicalDevice m_PhysicalDevice; VMA_ATOMIC_UINT32 m_CurrentFrameIndex; VMA_ATOMIC_UINT32 m_GpuDefragmentationMemoryTypeBits; // UINT32_MAX means uninitialized. +#if VMA_EXTERNAL_MEMORY + VkExternalMemoryHandleTypeFlagsKHR m_TypeExternalMemoryHandleTypes[VK_MAX_MEMORY_TYPES]; +#endif // #if VMA_EXTERNAL_MEMORY VMA_RW_MUTEX m_PoolsMutex; - // Protected by m_PoolsMutex. Sorted by pointer value. - VmaVector > m_Pools; + typedef VmaIntrusiveLinkedList PoolList; + // Protected by m_PoolsMutex. + PoolList m_Pools; uint32_t m_NextPoolId; VmaVulkanFunctions m_VulkanFunctions; @@ -8114,10 +10291,6 @@ private: // Global bit mask AND-ed with any memoryTypeBits to disallow certain memory types. uint32_t m_GlobalMemoryTypeBits; -#if VMA_RECORDING_ENABLED - VmaRecorder* m_pRecorder; -#endif - void ImportVulkanFunctions(const VmaVulkanFunctions* pVulkanFunctions); #if VMA_STATIC_VULKAN_FUNCTIONS == 1 @@ -8135,53 +10308,71 @@ private: VkDeviceSize CalcPreferredBlockSize(uint32_t memTypeIndex); VkResult AllocateMemoryOfType( + VmaPool pool, VkDeviceSize size, VkDeviceSize alignment, - bool dedicatedAllocation, + bool dedicatedPreferred, VkBuffer dedicatedBuffer, - VkBufferUsageFlags dedicatedBufferUsage, VkImage dedicatedImage, + VmaBufferImageUsage dedicatedBufferImageUsage, const VmaAllocationCreateInfo& createInfo, uint32_t memTypeIndex, VmaSuballocationType suballocType, + VmaDedicatedAllocationList& dedicatedAllocations, + VmaBlockVector& blockVector, size_t allocationCount, VmaAllocation* pAllocations); // Helper function only to be used inside AllocateDedicatedMemory. VkResult AllocateDedicatedMemoryPage( + VmaPool pool, VkDeviceSize size, VmaSuballocationType suballocType, uint32_t memTypeIndex, const VkMemoryAllocateInfo& allocInfo, bool map, bool isUserDataString, + bool isMappingAllowed, void* pUserData, VmaAllocation* pAllocation); // Allocates and registers new VkDeviceMemory specifically for dedicated allocations. VkResult AllocateDedicatedMemory( + VmaPool pool, VkDeviceSize size, VmaSuballocationType suballocType, + VmaDedicatedAllocationList& dedicatedAllocations, uint32_t memTypeIndex, - bool withinBudget, bool map, bool isUserDataString, + bool isMappingAllowed, + bool canAliasMemory, void* pUserData, float priority, VkBuffer dedicatedBuffer, - VkBufferUsageFlags dedicatedBufferUsage, VkImage dedicatedImage, + VmaBufferImageUsage dedicatedBufferImageUsage, size_t allocationCount, - VmaAllocation* pAllocations); + VmaAllocation* pAllocations, + const void* pNextChain = VMA_NULL); void FreeDedicatedMemory(const VmaAllocation allocation); + VkResult CalcMemTypeParams( + VmaAllocationCreateInfo& outCreateInfo, + uint32_t memTypeIndex, + VkDeviceSize size, + size_t allocationCount); + VkResult CalcAllocationParams( + VmaAllocationCreateInfo& outCreateInfo, + bool dedicatedRequired, + bool dedicatedPreferred); + /* Calculates and returns bit mask of memory types that can support defragmentation on GPU as they support creation of required buffer for copy operations. */ uint32_t CalculateGpuDefragmentationMemoryTypeBits() const; - uint32_t CalculateGlobalMemoryTypeBits() const; bool GetFlushOrInvalidateRange( @@ -8194,9 +10385,8 @@ private: #endif // #if VMA_MEMORY_BUDGET }; -//////////////////////////////////////////////////////////////////////////////// -// Memory allocation #2 after VmaAllocator_T definition +#ifndef _VMA_MEMORY_FUNCTIONS static void* VmaMalloc(VmaAllocator hAllocator, size_t size, size_t alignment) { return VmaMalloc(&hAllocator->m_AllocationCallbacks, size, alignment); @@ -8239,421 +10429,385 @@ static void vma_delete_array(VmaAllocator hAllocator, T* ptr, size_t count) VmaFree(hAllocator, ptr); } } +#endif // _VMA_MEMORY_FUNCTIONS -//////////////////////////////////////////////////////////////////////////////// -// VmaStringBuilder +#ifndef _VMA_DEVICE_MEMORY_BLOCK_FUNCTIONS +VmaDeviceMemoryBlock::VmaDeviceMemoryBlock(VmaAllocator hAllocator) + : m_pMetadata(VMA_NULL), + m_MemoryTypeIndex(UINT32_MAX), + m_Id(0), + m_hMemory(VK_NULL_HANDLE), + m_MapCount(0), + m_pMappedData(VMA_NULL) {} -#if VMA_STATS_STRING_ENABLED - -class VmaStringBuilder +VmaDeviceMemoryBlock::~VmaDeviceMemoryBlock() { -public: - VmaStringBuilder(VmaAllocator alloc) : m_Data(VmaStlAllocator(alloc->GetAllocationCallbacks())) { } - size_t GetLength() const { return m_Data.size(); } - const char* GetData() const { return m_Data.data(); } + VMA_ASSERT_LEAK(m_MapCount == 0 && "VkDeviceMemory block is being destroyed while it is still mapped."); + VMA_ASSERT_LEAK(m_hMemory == VK_NULL_HANDLE); +} - void Add(char ch) { m_Data.push_back(ch); } - void Add(const char* pStr); - void AddNewLine() { Add('\n'); } - void AddNumber(uint32_t num); - void AddNumber(uint64_t num); - void AddPointer(const void* ptr); - -private: - VmaVector< char, VmaStlAllocator > m_Data; -}; - -void VmaStringBuilder::Add(const char* pStr) +void VmaDeviceMemoryBlock::Init( + VmaAllocator hAllocator, + VmaPool hParentPool, + uint32_t newMemoryTypeIndex, + VkDeviceMemory newMemory, + VkDeviceSize newSize, + uint32_t id, + uint32_t algorithm, + VkDeviceSize bufferImageGranularity) { - const size_t strLen = strlen(pStr); - if(strLen > 0) + VMA_ASSERT(m_hMemory == VK_NULL_HANDLE); + + m_hParentPool = hParentPool; + m_MemoryTypeIndex = newMemoryTypeIndex; + m_Id = id; + m_hMemory = newMemory; + + switch (algorithm) { - const size_t oldCount = m_Data.size(); - m_Data.resize(oldCount + strLen); - memcpy(m_Data.data() + oldCount, pStr, strLen); + case 0: + m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_TLSF)(hAllocator->GetAllocationCallbacks(), + bufferImageGranularity, false); // isVirtual + break; + case VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT: + m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_Linear)(hAllocator->GetAllocationCallbacks(), + bufferImageGranularity, false); // isVirtual + break; + default: + VMA_ASSERT(0); + m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_TLSF)(hAllocator->GetAllocationCallbacks(), + bufferImageGranularity, false); // isVirtual } + m_pMetadata->Init(newSize); } -void VmaStringBuilder::AddNumber(uint32_t num) +void VmaDeviceMemoryBlock::Destroy(VmaAllocator allocator) { - char buf[11]; - buf[10] = '\0'; - char *p = &buf[10]; - do + // Define macro VMA_DEBUG_LOG_FORMAT or more specialized VMA_LEAK_LOG_FORMAT + // to receive the list of the unfreed allocations. + if (!m_pMetadata->IsEmpty()) + m_pMetadata->DebugLogAllAllocations(); + // This is the most important assert in the entire library. + // Hitting it means you have some memory leak - unreleased VmaAllocation objects. + VMA_ASSERT_LEAK(m_pMetadata->IsEmpty() && "Some allocations were not freed before destruction of this memory block!"); + + VMA_ASSERT_LEAK(m_hMemory != VK_NULL_HANDLE); + allocator->FreeVulkanMemory(m_MemoryTypeIndex, m_pMetadata->GetSize(), m_hMemory); + m_hMemory = VK_NULL_HANDLE; + + vma_delete(allocator, m_pMetadata); + m_pMetadata = VMA_NULL; +} + +void VmaDeviceMemoryBlock::PostAlloc(VmaAllocator hAllocator) +{ + VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); + m_MappingHysteresis.PostAlloc(); +} + +void VmaDeviceMemoryBlock::PostFree(VmaAllocator hAllocator) +{ + VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); + if(m_MappingHysteresis.PostFree()) { - *--p = '0' + (num % 10); - num /= 10; - } - while(num); - Add(p); -} - -void VmaStringBuilder::AddNumber(uint64_t num) -{ - char buf[21]; - buf[20] = '\0'; - char *p = &buf[20]; - do - { - *--p = '0' + (num % 10); - num /= 10; - } - while(num); - Add(p); -} - -void VmaStringBuilder::AddPointer(const void* ptr) -{ - char buf[21]; - VmaPtrToStr(buf, sizeof(buf), ptr); - Add(buf); -} - -#endif // #if VMA_STATS_STRING_ENABLED - -//////////////////////////////////////////////////////////////////////////////// -// VmaJsonWriter - -#if VMA_STATS_STRING_ENABLED - -class VmaJsonWriter -{ - VMA_CLASS_NO_COPY(VmaJsonWriter) -public: - VmaJsonWriter(const VkAllocationCallbacks* pAllocationCallbacks, VmaStringBuilder& sb); - ~VmaJsonWriter(); - - void BeginObject(bool singleLine = false); - void EndObject(); - - void BeginArray(bool singleLine = false); - void EndArray(); - - void WriteString(const char* pStr); - void BeginString(const char* pStr = VMA_NULL); - void ContinueString(const char* pStr); - void ContinueString(uint32_t n); - void ContinueString(uint64_t n); - void ContinueString_Pointer(const void* ptr); - void EndString(const char* pStr = VMA_NULL); - - void WriteNumber(uint32_t n); - void WriteNumber(uint64_t n); - void WriteBool(bool b); - void WriteNull(); - -private: - static const char* const INDENT; - - enum COLLECTION_TYPE - { - COLLECTION_TYPE_OBJECT, - COLLECTION_TYPE_ARRAY, - }; - struct StackItem - { - COLLECTION_TYPE type; - uint32_t valueCount; - bool singleLineMode; - }; - - VmaStringBuilder& m_SB; - VmaVector< StackItem, VmaStlAllocator > m_Stack; - bool m_InsideString; - - void BeginValue(bool isString); - void WriteIndent(bool oneLess = false); -}; - -const char* const VmaJsonWriter::INDENT = " "; - -VmaJsonWriter::VmaJsonWriter(const VkAllocationCallbacks* pAllocationCallbacks, VmaStringBuilder& sb) : - m_SB(sb), - m_Stack(VmaStlAllocator(pAllocationCallbacks)), - m_InsideString(false) -{ -} - -VmaJsonWriter::~VmaJsonWriter() -{ - VMA_ASSERT(!m_InsideString); - VMA_ASSERT(m_Stack.empty()); -} - -void VmaJsonWriter::BeginObject(bool singleLine) -{ - VMA_ASSERT(!m_InsideString); - - BeginValue(false); - m_SB.Add('{'); - - StackItem item; - item.type = COLLECTION_TYPE_OBJECT; - item.valueCount = 0; - item.singleLineMode = singleLine; - m_Stack.push_back(item); -} - -void VmaJsonWriter::EndObject() -{ - VMA_ASSERT(!m_InsideString); - - WriteIndent(true); - m_SB.Add('}'); - - VMA_ASSERT(!m_Stack.empty() && m_Stack.back().type == COLLECTION_TYPE_OBJECT); - m_Stack.pop_back(); -} - -void VmaJsonWriter::BeginArray(bool singleLine) -{ - VMA_ASSERT(!m_InsideString); - - BeginValue(false); - m_SB.Add('['); - - StackItem item; - item.type = COLLECTION_TYPE_ARRAY; - item.valueCount = 0; - item.singleLineMode = singleLine; - m_Stack.push_back(item); -} - -void VmaJsonWriter::EndArray() -{ - VMA_ASSERT(!m_InsideString); - - WriteIndent(true); - m_SB.Add(']'); - - VMA_ASSERT(!m_Stack.empty() && m_Stack.back().type == COLLECTION_TYPE_ARRAY); - m_Stack.pop_back(); -} - -void VmaJsonWriter::WriteString(const char* pStr) -{ - BeginString(pStr); - EndString(); -} - -void VmaJsonWriter::BeginString(const char* pStr) -{ - VMA_ASSERT(!m_InsideString); - - BeginValue(true); - m_SB.Add('"'); - m_InsideString = true; - if(pStr != VMA_NULL && pStr[0] != '\0') - { - ContinueString(pStr); - } -} - -void VmaJsonWriter::ContinueString(const char* pStr) -{ - VMA_ASSERT(m_InsideString); - - const size_t strLen = strlen(pStr); - for(size_t i = 0; i < strLen; ++i) - { - char ch = pStr[i]; - if(ch == '\\') + VMA_ASSERT(m_MappingHysteresis.GetExtraMapping() == 0); + if (m_MapCount == 0) { - m_SB.Add("\\\\"); - } - else if(ch == '"') - { - m_SB.Add("\\\""); - } - else if(ch >= 32) - { - m_SB.Add(ch); - } - else switch(ch) - { - case '\b': - m_SB.Add("\\b"); - break; - case '\f': - m_SB.Add("\\f"); - break; - case '\n': - m_SB.Add("\\n"); - break; - case '\r': - m_SB.Add("\\r"); - break; - case '\t': - m_SB.Add("\\t"); - break; - default: - VMA_ASSERT(0 && "Character not currently supported."); - break; + m_pMappedData = VMA_NULL; + (*hAllocator->GetVulkanFunctions().vkUnmapMemory)(hAllocator->m_hDevice, m_hMemory); } } } -void VmaJsonWriter::ContinueString(uint32_t n) +bool VmaDeviceMemoryBlock::Validate() const { - VMA_ASSERT(m_InsideString); - m_SB.AddNumber(n); + VMA_VALIDATE((m_hMemory != VK_NULL_HANDLE) && + (m_pMetadata->GetSize() != 0)); + + return m_pMetadata->Validate(); } -void VmaJsonWriter::ContinueString(uint64_t n) +VkResult VmaDeviceMemoryBlock::CheckCorruption(VmaAllocator hAllocator) { - VMA_ASSERT(m_InsideString); - m_SB.AddNumber(n); -} - -void VmaJsonWriter::ContinueString_Pointer(const void* ptr) -{ - VMA_ASSERT(m_InsideString); - m_SB.AddPointer(ptr); -} - -void VmaJsonWriter::EndString(const char* pStr) -{ - VMA_ASSERT(m_InsideString); - if(pStr != VMA_NULL && pStr[0] != '\0') + void* pData = VMA_NULL; + VkResult res = Map(hAllocator, 1, &pData); + if (res != VK_SUCCESS) { - ContinueString(pStr); + return res; } - m_SB.Add('"'); - m_InsideString = false; + + res = m_pMetadata->CheckCorruption(pData); + + Unmap(hAllocator, 1); + + return res; } -void VmaJsonWriter::WriteNumber(uint32_t n) +VkResult VmaDeviceMemoryBlock::Map(VmaAllocator hAllocator, uint32_t count, void** ppData) { - VMA_ASSERT(!m_InsideString); - BeginValue(false); - m_SB.AddNumber(n); -} - -void VmaJsonWriter::WriteNumber(uint64_t n) -{ - VMA_ASSERT(!m_InsideString); - BeginValue(false); - m_SB.AddNumber(n); -} - -void VmaJsonWriter::WriteBool(bool b) -{ - VMA_ASSERT(!m_InsideString); - BeginValue(false); - m_SB.Add(b ? "true" : "false"); -} - -void VmaJsonWriter::WriteNull() -{ - VMA_ASSERT(!m_InsideString); - BeginValue(false); - m_SB.Add("null"); -} - -void VmaJsonWriter::BeginValue(bool isString) -{ - if(!m_Stack.empty()) + if (count == 0) { - StackItem& currItem = m_Stack.back(); - if(currItem.type == COLLECTION_TYPE_OBJECT && - currItem.valueCount % 2 == 0) - { - VMA_ASSERT(isString); - } - - if(currItem.type == COLLECTION_TYPE_OBJECT && - currItem.valueCount % 2 != 0) - { - m_SB.Add(": "); - } - else if(currItem.valueCount > 0) - { - m_SB.Add(", "); - WriteIndent(); - } - else - { - WriteIndent(); - } - ++currItem.valueCount; + return VK_SUCCESS; } -} -void VmaJsonWriter::WriteIndent(bool oneLess) -{ - if(!m_Stack.empty() && !m_Stack.back().singleLineMode) + VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); + const uint32_t oldTotalMapCount = m_MapCount + m_MappingHysteresis.GetExtraMapping(); + if (oldTotalMapCount != 0) { - m_SB.AddNewLine(); - - size_t count = m_Stack.size(); - if(count > 0 && oneLess) + VMA_ASSERT(m_pMappedData != VMA_NULL); + m_MappingHysteresis.PostMap(); + m_MapCount += count; + if (ppData != VMA_NULL) { - --count; - } - for(size_t i = 0; i < count; ++i) - { - m_SB.Add(INDENT); - } - } -} - -#endif // #if VMA_STATS_STRING_ENABLED - -//////////////////////////////////////////////////////////////////////////////// - -void VmaAllocation_T::SetUserData(VmaAllocator hAllocator, void* pUserData) -{ - if(IsUserDataString()) - { - VMA_ASSERT(pUserData == VMA_NULL || pUserData != m_pUserData); - - FreeUserDataString(hAllocator); - - if(pUserData != VMA_NULL) - { - m_pUserData = VmaCreateStringCopy(hAllocator->GetAllocationCallbacks(), (const char*)pUserData); + *ppData = m_pMappedData; } + return VK_SUCCESS; } else { - m_pUserData = pUserData; + VkResult result = (*hAllocator->GetVulkanFunctions().vkMapMemory)( + hAllocator->m_hDevice, + m_hMemory, + 0, // offset + VK_WHOLE_SIZE, + 0, // flags + &m_pMappedData); + if (result == VK_SUCCESS) + { + VMA_ASSERT(m_pMappedData != VMA_NULL); + m_MappingHysteresis.PostMap(); + m_MapCount = count; + if (ppData != VMA_NULL) + { + *ppData = m_pMappedData; + } + } + return result; } } -void VmaAllocation_T::ChangeBlockAllocation( - VmaAllocator hAllocator, - VmaDeviceMemoryBlock* block, - VkDeviceSize offset) +void VmaDeviceMemoryBlock::Unmap(VmaAllocator hAllocator, uint32_t count) { - VMA_ASSERT(block != VMA_NULL); - VMA_ASSERT(m_Type == ALLOCATION_TYPE_BLOCK); - - // Move mapping reference counter from old block to new block. - if(block != m_BlockAllocation.m_Block) + if (count == 0) { - uint32_t mapRefCount = m_MapCount & ~MAP_COUNT_FLAG_PERSISTENT_MAP; - if(IsPersistentMap()) - ++mapRefCount; - m_BlockAllocation.m_Block->Unmap(hAllocator, mapRefCount); - block->Map(hAllocator, mapRefCount, VMA_NULL); + return; } - m_BlockAllocation.m_Block = block; - m_BlockAllocation.m_Offset = offset; + VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); + if (m_MapCount >= count) + { + m_MapCount -= count; + const uint32_t totalMapCount = m_MapCount + m_MappingHysteresis.GetExtraMapping(); + if (totalMapCount == 0) + { + m_pMappedData = VMA_NULL; + (*hAllocator->GetVulkanFunctions().vkUnmapMemory)(hAllocator->m_hDevice, m_hMemory); + } + m_MappingHysteresis.PostUnmap(); + } + else + { + VMA_ASSERT(0 && "VkDeviceMemory block is being unmapped while it was not previously mapped."); + } } -void VmaAllocation_T::ChangeOffset(VkDeviceSize newOffset) +VkResult VmaDeviceMemoryBlock::WriteMagicValueAfterAllocation(VmaAllocator hAllocator, VkDeviceSize allocOffset, VkDeviceSize allocSize) { + VMA_ASSERT(VMA_DEBUG_MARGIN > 0 && VMA_DEBUG_MARGIN % 4 == 0 && VMA_DEBUG_DETECT_CORRUPTION); + + void* pData; + VkResult res = Map(hAllocator, 1, &pData); + if (res != VK_SUCCESS) + { + return res; + } + + VmaWriteMagicValue(pData, allocOffset + allocSize); + + Unmap(hAllocator, 1); + return VK_SUCCESS; +} + +VkResult VmaDeviceMemoryBlock::ValidateMagicValueAfterAllocation(VmaAllocator hAllocator, VkDeviceSize allocOffset, VkDeviceSize allocSize) +{ + VMA_ASSERT(VMA_DEBUG_MARGIN > 0 && VMA_DEBUG_MARGIN % 4 == 0 && VMA_DEBUG_DETECT_CORRUPTION); + + void* pData; + VkResult res = Map(hAllocator, 1, &pData); + if (res != VK_SUCCESS) + { + return res; + } + + if (!VmaValidateMagicValue(pData, allocOffset + allocSize)) + { + VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER FREED ALLOCATION!"); + } + + Unmap(hAllocator, 1); + return VK_SUCCESS; +} + +VkResult VmaDeviceMemoryBlock::BindBufferMemory( + const VmaAllocator hAllocator, + const VmaAllocation hAllocation, + VkDeviceSize allocationLocalOffset, + VkBuffer hBuffer, + const void* pNext) +{ + VMA_ASSERT(hAllocation->GetType() == VmaAllocation_T::ALLOCATION_TYPE_BLOCK && + hAllocation->GetBlock() == this); + VMA_ASSERT(allocationLocalOffset < hAllocation->GetSize() && + "Invalid allocationLocalOffset. Did you forget that this offset is relative to the beginning of the allocation, not the whole memory block?"); + const VkDeviceSize memoryOffset = hAllocation->GetOffset() + allocationLocalOffset; + // This lock is important so that we don't call vkBind... and/or vkMap... simultaneously on the same VkDeviceMemory from multiple threads. + VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); + return hAllocator->BindVulkanBuffer(m_hMemory, memoryOffset, hBuffer, pNext); +} + +VkResult VmaDeviceMemoryBlock::BindImageMemory( + const VmaAllocator hAllocator, + const VmaAllocation hAllocation, + VkDeviceSize allocationLocalOffset, + VkImage hImage, + const void* pNext) +{ + VMA_ASSERT(hAllocation->GetType() == VmaAllocation_T::ALLOCATION_TYPE_BLOCK && + hAllocation->GetBlock() == this); + VMA_ASSERT(allocationLocalOffset < hAllocation->GetSize() && + "Invalid allocationLocalOffset. Did you forget that this offset is relative to the beginning of the allocation, not the whole memory block?"); + const VkDeviceSize memoryOffset = hAllocation->GetOffset() + allocationLocalOffset; + // This lock is important so that we don't call vkBind... and/or vkMap... simultaneously on the same VkDeviceMemory from multiple threads. + VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); + return hAllocator->BindVulkanImage(m_hMemory, memoryOffset, hImage, pNext); +} +#endif // _VMA_DEVICE_MEMORY_BLOCK_FUNCTIONS + +#ifndef _VMA_ALLOCATION_T_FUNCTIONS +VmaAllocation_T::VmaAllocation_T(bool mappingAllowed) + : m_Alignment{ 1 }, + m_Size{ 0 }, + m_pUserData{ VMA_NULL }, + m_pName{ VMA_NULL }, + m_MemoryTypeIndex{ 0 }, + m_Type{ (uint8_t)ALLOCATION_TYPE_NONE }, + m_SuballocationType{ (uint8_t)VMA_SUBALLOCATION_TYPE_UNKNOWN }, + m_MapCount{ 0 }, + m_Flags{ 0 } +{ + if(mappingAllowed) + m_Flags |= (uint8_t)FLAG_MAPPING_ALLOWED; +} + +VmaAllocation_T::~VmaAllocation_T() +{ + VMA_ASSERT_LEAK(m_MapCount == 0 && "Allocation was not unmapped before destruction."); + + // Check if owned string was freed. + VMA_ASSERT(m_pName == VMA_NULL); +} + +void VmaAllocation_T::InitBlockAllocation( + VmaDeviceMemoryBlock* block, + VmaAllocHandle allocHandle, + VkDeviceSize alignment, + VkDeviceSize size, + uint32_t memoryTypeIndex, + VmaSuballocationType suballocationType, + bool mapped) +{ + VMA_ASSERT(m_Type == ALLOCATION_TYPE_NONE); + VMA_ASSERT(block != VMA_NULL); + m_Type = (uint8_t)ALLOCATION_TYPE_BLOCK; + m_Alignment = alignment; + m_Size = size; + m_MemoryTypeIndex = memoryTypeIndex; + if(mapped) + { + VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it."); + m_Flags |= (uint8_t)FLAG_PERSISTENT_MAP; + } + m_SuballocationType = (uint8_t)suballocationType; + m_BlockAllocation.m_Block = block; + m_BlockAllocation.m_AllocHandle = allocHandle; +} + +void VmaAllocation_T::InitDedicatedAllocation( + VmaPool hParentPool, + uint32_t memoryTypeIndex, + VkDeviceMemory hMemory, + VmaSuballocationType suballocationType, + void* pMappedData, + VkDeviceSize size) +{ + VMA_ASSERT(m_Type == ALLOCATION_TYPE_NONE); + VMA_ASSERT(hMemory != VK_NULL_HANDLE); + m_Type = (uint8_t)ALLOCATION_TYPE_DEDICATED; + m_Alignment = 0; + m_Size = size; + m_MemoryTypeIndex = memoryTypeIndex; + m_SuballocationType = (uint8_t)suballocationType; + if(pMappedData != VMA_NULL) + { + VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it."); + m_Flags |= (uint8_t)FLAG_PERSISTENT_MAP; + } + m_DedicatedAllocation.m_hParentPool = hParentPool; + m_DedicatedAllocation.m_hMemory = hMemory; + m_DedicatedAllocation.m_pMappedData = pMappedData; + m_DedicatedAllocation.m_Prev = VMA_NULL; + m_DedicatedAllocation.m_Next = VMA_NULL; +} + +void VmaAllocation_T::SetName(VmaAllocator hAllocator, const char* pName) +{ + VMA_ASSERT(pName == VMA_NULL || pName != m_pName); + + FreeName(hAllocator); + + if (pName != VMA_NULL) + m_pName = VmaCreateStringCopy(hAllocator->GetAllocationCallbacks(), pName); +} + +uint8_t VmaAllocation_T::SwapBlockAllocation(VmaAllocator hAllocator, VmaAllocation allocation) +{ + VMA_ASSERT(allocation != VMA_NULL); VMA_ASSERT(m_Type == ALLOCATION_TYPE_BLOCK); - m_BlockAllocation.m_Offset = newOffset; + VMA_ASSERT(allocation->m_Type == ALLOCATION_TYPE_BLOCK); + + if (m_MapCount != 0) + m_BlockAllocation.m_Block->Unmap(hAllocator, m_MapCount); + + m_BlockAllocation.m_Block->m_pMetadata->SetAllocationUserData(m_BlockAllocation.m_AllocHandle, allocation); + std::swap(m_BlockAllocation, allocation->m_BlockAllocation); + m_BlockAllocation.m_Block->m_pMetadata->SetAllocationUserData(m_BlockAllocation.m_AllocHandle, this); + +#if VMA_STATS_STRING_ENABLED + std::swap(m_BufferImageUsage, allocation->m_BufferImageUsage); +#endif + return m_MapCount; +} + +VmaAllocHandle VmaAllocation_T::GetAllocHandle() const +{ + switch (m_Type) + { + case ALLOCATION_TYPE_BLOCK: + return m_BlockAllocation.m_AllocHandle; + case ALLOCATION_TYPE_DEDICATED: + return VK_NULL_HANDLE; + default: + VMA_ASSERT(0); + return VK_NULL_HANDLE; + } } VkDeviceSize VmaAllocation_T::GetOffset() const { - switch(m_Type) + switch (m_Type) { case ALLOCATION_TYPE_BLOCK: - return m_BlockAllocation.m_Offset; + return m_BlockAllocation.m_Block->m_pMetadata->GetAllocationOffset(m_BlockAllocation.m_AllocHandle); case ALLOCATION_TYPE_DEDICATED: return 0; default: @@ -8662,9 +10816,23 @@ VkDeviceSize VmaAllocation_T::GetOffset() const } } +VmaPool VmaAllocation_T::GetParentPool() const +{ + switch (m_Type) + { + case ALLOCATION_TYPE_BLOCK: + return m_BlockAllocation.m_Block->GetParentPool(); + case ALLOCATION_TYPE_DEDICATED: + return m_DedicatedAllocation.m_hParentPool; + default: + VMA_ASSERT(0); + return VK_NULL_HANDLE; + } +} + VkDeviceMemory VmaAllocation_T::GetMemory() const { - switch(m_Type) + switch (m_Type) { case ALLOCATION_TYPE_BLOCK: return m_BlockAllocation.m_Block->GetDeviceMemory(); @@ -8678,14 +10846,14 @@ VkDeviceMemory VmaAllocation_T::GetMemory() const void* VmaAllocation_T::GetMappedData() const { - switch(m_Type) + switch (m_Type) { case ALLOCATION_TYPE_BLOCK: - if(m_MapCount != 0) + if (m_MapCount != 0 || IsPersistentMap()) { void* pBlockData = m_BlockAllocation.m_Block->GetMappedData(); VMA_ASSERT(pBlockData != VMA_NULL); - return (char*)pBlockData + m_BlockAllocation.m_Offset; + return (char*)pBlockData + GetOffset(); } else { @@ -8693,7 +10861,7 @@ void* VmaAllocation_T::GetMappedData() const } break; case ALLOCATION_TYPE_DEDICATED: - VMA_ASSERT((m_DedicatedAllocation.m_pMappedData != VMA_NULL) == (m_MapCount != 0)); + VMA_ASSERT((m_DedicatedAllocation.m_pMappedData != VMA_NULL) == (m_MapCount != 0 || IsPersistentMap())); return m_DedicatedAllocation.m_pMappedData; default: VMA_ASSERT(0); @@ -8701,114 +10869,12 @@ void* VmaAllocation_T::GetMappedData() const } } -bool VmaAllocation_T::CanBecomeLost() const -{ - switch(m_Type) - { - case ALLOCATION_TYPE_BLOCK: - return m_BlockAllocation.m_CanBecomeLost; - case ALLOCATION_TYPE_DEDICATED: - return false; - default: - VMA_ASSERT(0); - return false; - } -} - -bool VmaAllocation_T::MakeLost(uint32_t currentFrameIndex, uint32_t frameInUseCount) -{ - VMA_ASSERT(CanBecomeLost()); - - /* - Warning: This is a carefully designed algorithm. - Do not modify unless you really know what you're doing :) - */ - uint32_t localLastUseFrameIndex = GetLastUseFrameIndex(); - for(;;) - { - if(localLastUseFrameIndex == VMA_FRAME_INDEX_LOST) - { - VMA_ASSERT(0); - return false; - } - else if(localLastUseFrameIndex + frameInUseCount >= currentFrameIndex) - { - return false; - } - else // Last use time earlier than current time. - { - if(CompareExchangeLastUseFrameIndex(localLastUseFrameIndex, VMA_FRAME_INDEX_LOST)) - { - // Setting hAllocation.LastUseFrameIndex atomic to VMA_FRAME_INDEX_LOST is enough to mark it as LOST. - // Calling code just needs to unregister this allocation in owning VmaDeviceMemoryBlock. - return true; - } - } - } -} - -#if VMA_STATS_STRING_ENABLED - -// Correspond to values of enum VmaSuballocationType. -static const char* VMA_SUBALLOCATION_TYPE_NAMES[] = { - "FREE", - "UNKNOWN", - "BUFFER", - "IMAGE_UNKNOWN", - "IMAGE_LINEAR", - "IMAGE_OPTIMAL", -}; - -void VmaAllocation_T::PrintParameters(class VmaJsonWriter& json) const -{ - json.WriteString("Type"); - json.WriteString(VMA_SUBALLOCATION_TYPE_NAMES[m_SuballocationType]); - - json.WriteString("Size"); - json.WriteNumber(m_Size); - - if(m_pUserData != VMA_NULL) - { - json.WriteString("UserData"); - if(IsUserDataString()) - { - json.WriteString((const char*)m_pUserData); - } - else - { - json.BeginString(); - json.ContinueString_Pointer(m_pUserData); - json.EndString(); - } - } - - json.WriteString("CreationFrameIndex"); - json.WriteNumber(m_CreationFrameIndex); - - json.WriteString("LastUseFrameIndex"); - json.WriteNumber(GetLastUseFrameIndex()); - - if(m_BufferImageUsage != 0) - { - json.WriteString("Usage"); - json.WriteNumber(m_BufferImageUsage); - } -} - -#endif - -void VmaAllocation_T::FreeUserDataString(VmaAllocator hAllocator) -{ - VMA_ASSERT(IsUserDataString()); - VmaFreeString(hAllocator->GetAllocationCallbacks(), (char*)m_pUserData); - m_pUserData = VMA_NULL; -} - void VmaAllocation_T::BlockAllocMap() { VMA_ASSERT(GetType() == ALLOCATION_TYPE_BLOCK); + VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it."); - if((m_MapCount & ~MAP_COUNT_FLAG_PERSISTENT_MAP) < 0x7F) + if (m_MapCount < 0xFF) { ++m_MapCount; } @@ -8822,7 +10888,7 @@ void VmaAllocation_T::BlockAllocUnmap() { VMA_ASSERT(GetType() == ALLOCATION_TYPE_BLOCK); - if((m_MapCount & ~MAP_COUNT_FLAG_PERSISTENT_MAP) != 0) + if (m_MapCount > 0) { --m_MapCount; } @@ -8835,10 +10901,11 @@ void VmaAllocation_T::BlockAllocUnmap() VkResult VmaAllocation_T::DedicatedAllocMap(VmaAllocator hAllocator, void** ppData) { VMA_ASSERT(GetType() == ALLOCATION_TYPE_DEDICATED); + VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it."); - if(m_MapCount != 0) + if (m_MapCount != 0 || IsPersistentMap()) { - if((m_MapCount & ~MAP_COUNT_FLAG_PERSISTENT_MAP) < 0x7F) + if (m_MapCount < 0xFF) { VMA_ASSERT(m_DedicatedAllocation.m_pMappedData != VMA_NULL); *ppData = m_DedicatedAllocation.m_pMappedData; @@ -8860,7 +10927,7 @@ VkResult VmaAllocation_T::DedicatedAllocMap(VmaAllocator hAllocator, void** ppDa VK_WHOLE_SIZE, 0, // flags ppData); - if(result == VK_SUCCESS) + if (result == VK_SUCCESS) { m_DedicatedAllocation.m_pMappedData = *ppData; m_MapCount = 1; @@ -8873,10 +10940,10 @@ void VmaAllocation_T::DedicatedAllocUnmap(VmaAllocator hAllocator) { VMA_ASSERT(GetType() == ALLOCATION_TYPE_DEDICATED); - if((m_MapCount & ~MAP_COUNT_FLAG_PERSISTENT_MAP) != 0) + if (m_MapCount > 0) { --m_MapCount; - if(m_MapCount == 0) + if (m_MapCount == 0 && !IsPersistentMap()) { m_DedicatedAllocation.m_pMappedData = VMA_NULL; (*hAllocator->GetVulkanFunctions().vkUnmapMemory)( @@ -8891,3750 +10958,42 @@ void VmaAllocation_T::DedicatedAllocUnmap(VmaAllocator hAllocator) } #if VMA_STATS_STRING_ENABLED - -static void VmaPrintStatInfo(VmaJsonWriter& json, const VmaStatInfo& stat) +void VmaAllocation_T::PrintParameters(class VmaJsonWriter& json) const { - json.BeginObject(); - - json.WriteString("Blocks"); - json.WriteNumber(stat.blockCount); - - json.WriteString("Allocations"); - json.WriteNumber(stat.allocationCount); - - json.WriteString("UnusedRanges"); - json.WriteNumber(stat.unusedRangeCount); - - json.WriteString("UsedBytes"); - json.WriteNumber(stat.usedBytes); - - json.WriteString("UnusedBytes"); - json.WriteNumber(stat.unusedBytes); - - if(stat.allocationCount > 1) - { - json.WriteString("AllocationSize"); - json.BeginObject(true); - json.WriteString("Min"); - json.WriteNumber(stat.allocationSizeMin); - json.WriteString("Avg"); - json.WriteNumber(stat.allocationSizeAvg); - json.WriteString("Max"); - json.WriteNumber(stat.allocationSizeMax); - json.EndObject(); - } - - if(stat.unusedRangeCount > 1) - { - json.WriteString("UnusedRangeSize"); - json.BeginObject(true); - json.WriteString("Min"); - json.WriteNumber(stat.unusedRangeSizeMin); - json.WriteString("Avg"); - json.WriteNumber(stat.unusedRangeSizeAvg); - json.WriteString("Max"); - json.WriteNumber(stat.unusedRangeSizeMax); - json.EndObject(); - } - - json.EndObject(); -} - -#endif // #if VMA_STATS_STRING_ENABLED - -struct VmaSuballocationItemSizeLess -{ - bool operator()( - const VmaSuballocationList::iterator lhs, - const VmaSuballocationList::iterator rhs) const - { - return lhs->size < rhs->size; - } - bool operator()( - const VmaSuballocationList::iterator lhs, - VkDeviceSize rhsSize) const - { - return lhs->size < rhsSize; - } -}; - - -//////////////////////////////////////////////////////////////////////////////// -// class VmaBlockMetadata - -VmaBlockMetadata::VmaBlockMetadata(VmaAllocator hAllocator) : - m_Size(0), - m_pAllocationCallbacks(hAllocator->GetAllocationCallbacks()) -{ -} - -#if VMA_STATS_STRING_ENABLED - -void VmaBlockMetadata::PrintDetailedMap_Begin(class VmaJsonWriter& json, - VkDeviceSize unusedBytes, - size_t allocationCount, - size_t unusedRangeCount) const -{ - json.BeginObject(); - - json.WriteString("TotalBytes"); - json.WriteNumber(GetSize()); - - json.WriteString("UnusedBytes"); - json.WriteNumber(unusedBytes); - - json.WriteString("Allocations"); - json.WriteNumber((uint64_t)allocationCount); - - json.WriteString("UnusedRanges"); - json.WriteNumber((uint64_t)unusedRangeCount); - - json.WriteString("Suballocations"); - json.BeginArray(); -} - -void VmaBlockMetadata::PrintDetailedMap_Allocation(class VmaJsonWriter& json, - VkDeviceSize offset, - VmaAllocation hAllocation) const -{ - json.BeginObject(true); - - json.WriteString("Offset"); - json.WriteNumber(offset); - - hAllocation->PrintParameters(json); - - json.EndObject(); -} - -void VmaBlockMetadata::PrintDetailedMap_UnusedRange(class VmaJsonWriter& json, - VkDeviceSize offset, - VkDeviceSize size) const -{ - json.BeginObject(true); - - json.WriteString("Offset"); - json.WriteNumber(offset); - json.WriteString("Type"); - json.WriteString(VMA_SUBALLOCATION_TYPE_NAMES[VMA_SUBALLOCATION_TYPE_FREE]); + json.WriteString(VMA_SUBALLOCATION_TYPE_NAMES[m_SuballocationType]); json.WriteString("Size"); - json.WriteNumber(size); + json.WriteNumber(m_Size); + json.WriteString("Usage"); + json.WriteNumber(m_BufferImageUsage.Value); // It may be uint32_t or uint64_t. - json.EndObject(); -} - -void VmaBlockMetadata::PrintDetailedMap_End(class VmaJsonWriter& json) const -{ - json.EndArray(); - json.EndObject(); -} - -#endif // #if VMA_STATS_STRING_ENABLED - -//////////////////////////////////////////////////////////////////////////////// -// class VmaBlockMetadata_Generic - -VmaBlockMetadata_Generic::VmaBlockMetadata_Generic(VmaAllocator hAllocator) : - VmaBlockMetadata(hAllocator), - m_FreeCount(0), - m_SumFreeSize(0), - m_Suballocations(VmaStlAllocator(hAllocator->GetAllocationCallbacks())), - m_FreeSuballocationsBySize(VmaStlAllocator(hAllocator->GetAllocationCallbacks())) -{ -} - -VmaBlockMetadata_Generic::~VmaBlockMetadata_Generic() -{ -} - -void VmaBlockMetadata_Generic::Init(VkDeviceSize size) -{ - VmaBlockMetadata::Init(size); - - m_FreeCount = 1; - m_SumFreeSize = size; - - VmaSuballocation suballoc = {}; - suballoc.offset = 0; - suballoc.size = size; - suballoc.type = VMA_SUBALLOCATION_TYPE_FREE; - suballoc.hAllocation = VK_NULL_HANDLE; - - VMA_ASSERT(size > VMA_MIN_FREE_SUBALLOCATION_SIZE_TO_REGISTER); - m_Suballocations.push_back(suballoc); - VmaSuballocationList::iterator suballocItem = m_Suballocations.end(); - --suballocItem; - m_FreeSuballocationsBySize.push_back(suballocItem); -} - -bool VmaBlockMetadata_Generic::Validate() const -{ - VMA_VALIDATE(!m_Suballocations.empty()); - - // Expected offset of new suballocation as calculated from previous ones. - VkDeviceSize calculatedOffset = 0; - // Expected number of free suballocations as calculated from traversing their list. - uint32_t calculatedFreeCount = 0; - // Expected sum size of free suballocations as calculated from traversing their list. - VkDeviceSize calculatedSumFreeSize = 0; - // Expected number of free suballocations that should be registered in - // m_FreeSuballocationsBySize calculated from traversing their list. - size_t freeSuballocationsToRegister = 0; - // True if previous visited suballocation was free. - bool prevFree = false; - - for(VmaSuballocationList::const_iterator suballocItem = m_Suballocations.cbegin(); - suballocItem != m_Suballocations.cend(); - ++suballocItem) - { - const VmaSuballocation& subAlloc = *suballocItem; - - // Actual offset of this suballocation doesn't match expected one. - VMA_VALIDATE(subAlloc.offset == calculatedOffset); - - const bool currFree = (subAlloc.type == VMA_SUBALLOCATION_TYPE_FREE); - // Two adjacent free suballocations are invalid. They should be merged. - VMA_VALIDATE(!prevFree || !currFree); - - VMA_VALIDATE(currFree == (subAlloc.hAllocation == VK_NULL_HANDLE)); - - if(currFree) - { - calculatedSumFreeSize += subAlloc.size; - ++calculatedFreeCount; - if(subAlloc.size >= VMA_MIN_FREE_SUBALLOCATION_SIZE_TO_REGISTER) - { - ++freeSuballocationsToRegister; - } - - // Margin required between allocations - every free space must be at least that large. - VMA_VALIDATE(subAlloc.size >= VMA_DEBUG_MARGIN); - } - else - { - VMA_VALIDATE(subAlloc.hAllocation->GetOffset() == subAlloc.offset); - VMA_VALIDATE(subAlloc.hAllocation->GetSize() == subAlloc.size); - - // Margin required between allocations - previous allocation must be free. - VMA_VALIDATE(VMA_DEBUG_MARGIN == 0 || prevFree); - } - - calculatedOffset += subAlloc.size; - prevFree = currFree; - } - - // Number of free suballocations registered in m_FreeSuballocationsBySize doesn't - // match expected one. - VMA_VALIDATE(m_FreeSuballocationsBySize.size() == freeSuballocationsToRegister); - - VkDeviceSize lastSize = 0; - for(size_t i = 0; i < m_FreeSuballocationsBySize.size(); ++i) - { - VmaSuballocationList::iterator suballocItem = m_FreeSuballocationsBySize[i]; - - // Only free suballocations can be registered in m_FreeSuballocationsBySize. - VMA_VALIDATE(suballocItem->type == VMA_SUBALLOCATION_TYPE_FREE); - // They must be sorted by size ascending. - VMA_VALIDATE(suballocItem->size >= lastSize); - - lastSize = suballocItem->size; - } - - // Check if totals match calculacted values. - VMA_VALIDATE(ValidateFreeSuballocationList()); - VMA_VALIDATE(calculatedOffset == GetSize()); - VMA_VALIDATE(calculatedSumFreeSize == m_SumFreeSize); - VMA_VALIDATE(calculatedFreeCount == m_FreeCount); - - return true; -} - -VkDeviceSize VmaBlockMetadata_Generic::GetUnusedRangeSizeMax() const -{ - if(!m_FreeSuballocationsBySize.empty()) - { - return m_FreeSuballocationsBySize.back()->size; - } - else - { - return 0; - } -} - -bool VmaBlockMetadata_Generic::IsEmpty() const -{ - return (m_Suballocations.size() == 1) && (m_FreeCount == 1); -} - -void VmaBlockMetadata_Generic::CalcAllocationStatInfo(VmaStatInfo& outInfo) const -{ - outInfo.blockCount = 1; - - const uint32_t rangeCount = (uint32_t)m_Suballocations.size(); - outInfo.allocationCount = rangeCount - m_FreeCount; - outInfo.unusedRangeCount = m_FreeCount; - - outInfo.unusedBytes = m_SumFreeSize; - outInfo.usedBytes = GetSize() - outInfo.unusedBytes; - - outInfo.allocationSizeMin = UINT64_MAX; - outInfo.allocationSizeMax = 0; - outInfo.unusedRangeSizeMin = UINT64_MAX; - outInfo.unusedRangeSizeMax = 0; - - for(VmaSuballocationList::const_iterator suballocItem = m_Suballocations.cbegin(); - suballocItem != m_Suballocations.cend(); - ++suballocItem) - { - const VmaSuballocation& suballoc = *suballocItem; - if(suballoc.type != VMA_SUBALLOCATION_TYPE_FREE) - { - outInfo.allocationSizeMin = VMA_MIN(outInfo.allocationSizeMin, suballoc.size); - outInfo.allocationSizeMax = VMA_MAX(outInfo.allocationSizeMax, suballoc.size); - } - else - { - outInfo.unusedRangeSizeMin = VMA_MIN(outInfo.unusedRangeSizeMin, suballoc.size); - outInfo.unusedRangeSizeMax = VMA_MAX(outInfo.unusedRangeSizeMax, suballoc.size); - } - } -} - -void VmaBlockMetadata_Generic::AddPoolStats(VmaPoolStats& inoutStats) const -{ - const uint32_t rangeCount = (uint32_t)m_Suballocations.size(); - - inoutStats.size += GetSize(); - inoutStats.unusedSize += m_SumFreeSize; - inoutStats.allocationCount += rangeCount - m_FreeCount; - inoutStats.unusedRangeCount += m_FreeCount; - inoutStats.unusedRangeSizeMax = VMA_MAX(inoutStats.unusedRangeSizeMax, GetUnusedRangeSizeMax()); -} - -#if VMA_STATS_STRING_ENABLED - -void VmaBlockMetadata_Generic::PrintDetailedMap(class VmaJsonWriter& json) const -{ - PrintDetailedMap_Begin(json, - m_SumFreeSize, // unusedBytes - m_Suballocations.size() - (size_t)m_FreeCount, // allocationCount - m_FreeCount); // unusedRangeCount - - size_t i = 0; - for(VmaSuballocationList::const_iterator suballocItem = m_Suballocations.cbegin(); - suballocItem != m_Suballocations.cend(); - ++suballocItem, ++i) - { - if(suballocItem->type == VMA_SUBALLOCATION_TYPE_FREE) - { - PrintDetailedMap_UnusedRange(json, suballocItem->offset, suballocItem->size); - } - else - { - PrintDetailedMap_Allocation(json, suballocItem->offset, suballocItem->hAllocation); - } - } - - PrintDetailedMap_End(json); -} - -#endif // #if VMA_STATS_STRING_ENABLED - -bool VmaBlockMetadata_Generic::CreateAllocationRequest( - uint32_t currentFrameIndex, - uint32_t frameInUseCount, - VkDeviceSize bufferImageGranularity, - VkDeviceSize allocSize, - VkDeviceSize allocAlignment, - bool upperAddress, - VmaSuballocationType allocType, - bool canMakeOtherLost, - uint32_t strategy, - VmaAllocationRequest* pAllocationRequest) -{ - VMA_ASSERT(allocSize > 0); - VMA_ASSERT(!upperAddress); - VMA_ASSERT(allocType != VMA_SUBALLOCATION_TYPE_FREE); - VMA_ASSERT(pAllocationRequest != VMA_NULL); - VMA_HEAVY_ASSERT(Validate()); - - pAllocationRequest->type = VmaAllocationRequestType::Normal; - - // There is not enough total free space in this block to fullfill the request: Early return. - if(canMakeOtherLost == false && - m_SumFreeSize < allocSize + 2 * VMA_DEBUG_MARGIN) - { - return false; - } - - // New algorithm, efficiently searching freeSuballocationsBySize. - const size_t freeSuballocCount = m_FreeSuballocationsBySize.size(); - if(freeSuballocCount > 0) - { - if(strategy == VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT) - { - // Find first free suballocation with size not less than allocSize + 2 * VMA_DEBUG_MARGIN. - VmaSuballocationList::iterator* const it = VmaBinaryFindFirstNotLess( - m_FreeSuballocationsBySize.data(), - m_FreeSuballocationsBySize.data() + freeSuballocCount, - allocSize + 2 * VMA_DEBUG_MARGIN, - VmaSuballocationItemSizeLess()); - size_t index = it - m_FreeSuballocationsBySize.data(); - for(; index < freeSuballocCount; ++index) - { - if(CheckAllocation( - currentFrameIndex, - frameInUseCount, - bufferImageGranularity, - allocSize, - allocAlignment, - allocType, - m_FreeSuballocationsBySize[index], - false, // canMakeOtherLost - &pAllocationRequest->offset, - &pAllocationRequest->itemsToMakeLostCount, - &pAllocationRequest->sumFreeSize, - &pAllocationRequest->sumItemSize)) - { - pAllocationRequest->item = m_FreeSuballocationsBySize[index]; - return true; - } - } - } - else if(strategy == VMA_ALLOCATION_INTERNAL_STRATEGY_MIN_OFFSET) - { - for(VmaSuballocationList::iterator it = m_Suballocations.begin(); - it != m_Suballocations.end(); - ++it) - { - if(it->type == VMA_SUBALLOCATION_TYPE_FREE && CheckAllocation( - currentFrameIndex, - frameInUseCount, - bufferImageGranularity, - allocSize, - allocAlignment, - allocType, - it, - false, // canMakeOtherLost - &pAllocationRequest->offset, - &pAllocationRequest->itemsToMakeLostCount, - &pAllocationRequest->sumFreeSize, - &pAllocationRequest->sumItemSize)) - { - pAllocationRequest->item = it; - return true; - } - } - } - else // WORST_FIT, FIRST_FIT - { - // Search staring from biggest suballocations. - for(size_t index = freeSuballocCount; index--; ) - { - if(CheckAllocation( - currentFrameIndex, - frameInUseCount, - bufferImageGranularity, - allocSize, - allocAlignment, - allocType, - m_FreeSuballocationsBySize[index], - false, // canMakeOtherLost - &pAllocationRequest->offset, - &pAllocationRequest->itemsToMakeLostCount, - &pAllocationRequest->sumFreeSize, - &pAllocationRequest->sumItemSize)) - { - pAllocationRequest->item = m_FreeSuballocationsBySize[index]; - return true; - } - } - } - } - - if(canMakeOtherLost) - { - // Brute-force algorithm. TODO: Come up with something better. - - bool found = false; - VmaAllocationRequest tmpAllocRequest = {}; - tmpAllocRequest.type = VmaAllocationRequestType::Normal; - for(VmaSuballocationList::iterator suballocIt = m_Suballocations.begin(); - suballocIt != m_Suballocations.end(); - ++suballocIt) - { - if(suballocIt->type == VMA_SUBALLOCATION_TYPE_FREE || - suballocIt->hAllocation->CanBecomeLost()) - { - if(CheckAllocation( - currentFrameIndex, - frameInUseCount, - bufferImageGranularity, - allocSize, - allocAlignment, - allocType, - suballocIt, - canMakeOtherLost, - &tmpAllocRequest.offset, - &tmpAllocRequest.itemsToMakeLostCount, - &tmpAllocRequest.sumFreeSize, - &tmpAllocRequest.sumItemSize)) - { - if(strategy == VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT) - { - *pAllocationRequest = tmpAllocRequest; - pAllocationRequest->item = suballocIt; - break; - } - if(!found || tmpAllocRequest.CalcCost() < pAllocationRequest->CalcCost()) - { - *pAllocationRequest = tmpAllocRequest; - pAllocationRequest->item = suballocIt; - found = true; - } - } - } - } - - return found; - } - - return false; -} - -bool VmaBlockMetadata_Generic::MakeRequestedAllocationsLost( - uint32_t currentFrameIndex, - uint32_t frameInUseCount, - VmaAllocationRequest* pAllocationRequest) -{ - VMA_ASSERT(pAllocationRequest && pAllocationRequest->type == VmaAllocationRequestType::Normal); - - while(pAllocationRequest->itemsToMakeLostCount > 0) - { - if(pAllocationRequest->item->type == VMA_SUBALLOCATION_TYPE_FREE) - { - ++pAllocationRequest->item; - } - VMA_ASSERT(pAllocationRequest->item != m_Suballocations.end()); - VMA_ASSERT(pAllocationRequest->item->hAllocation != VK_NULL_HANDLE); - VMA_ASSERT(pAllocationRequest->item->hAllocation->CanBecomeLost()); - if(pAllocationRequest->item->hAllocation->MakeLost(currentFrameIndex, frameInUseCount)) - { - pAllocationRequest->item = FreeSuballocation(pAllocationRequest->item); - --pAllocationRequest->itemsToMakeLostCount; - } - else - { - return false; - } - } - - VMA_HEAVY_ASSERT(Validate()); - VMA_ASSERT(pAllocationRequest->item != m_Suballocations.end()); - VMA_ASSERT(pAllocationRequest->item->type == VMA_SUBALLOCATION_TYPE_FREE); - - return true; -} - -uint32_t VmaBlockMetadata_Generic::MakeAllocationsLost(uint32_t currentFrameIndex, uint32_t frameInUseCount) -{ - uint32_t lostAllocationCount = 0; - for(VmaSuballocationList::iterator it = m_Suballocations.begin(); - it != m_Suballocations.end(); - ++it) - { - if(it->type != VMA_SUBALLOCATION_TYPE_FREE && - it->hAllocation->CanBecomeLost() && - it->hAllocation->MakeLost(currentFrameIndex, frameInUseCount)) - { - it = FreeSuballocation(it); - ++lostAllocationCount; - } - } - return lostAllocationCount; -} - -VkResult VmaBlockMetadata_Generic::CheckCorruption(const void* pBlockData) -{ - for(VmaSuballocationList::iterator it = m_Suballocations.begin(); - it != m_Suballocations.end(); - ++it) - { - if(it->type != VMA_SUBALLOCATION_TYPE_FREE) - { - if(!VmaValidateMagicValue(pBlockData, it->offset - VMA_DEBUG_MARGIN)) - { - VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED BEFORE VALIDATED ALLOCATION!"); - return VK_ERROR_VALIDATION_FAILED_EXT; - } - if(!VmaValidateMagicValue(pBlockData, it->offset + it->size)) - { - VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER VALIDATED ALLOCATION!"); - return VK_ERROR_VALIDATION_FAILED_EXT; - } - } - } - - return VK_SUCCESS; -} - -void VmaBlockMetadata_Generic::Alloc( - const VmaAllocationRequest& request, - VmaSuballocationType type, - VkDeviceSize allocSize, - VmaAllocation hAllocation) -{ - VMA_ASSERT(request.type == VmaAllocationRequestType::Normal); - VMA_ASSERT(request.item != m_Suballocations.end()); - VmaSuballocation& suballoc = *request.item; - // Given suballocation is a free block. - VMA_ASSERT(suballoc.type == VMA_SUBALLOCATION_TYPE_FREE); - // Given offset is inside this suballocation. - VMA_ASSERT(request.offset >= suballoc.offset); - const VkDeviceSize paddingBegin = request.offset - suballoc.offset; - VMA_ASSERT(suballoc.size >= paddingBegin + allocSize); - const VkDeviceSize paddingEnd = suballoc.size - paddingBegin - allocSize; - - // Unregister this free suballocation from m_FreeSuballocationsBySize and update - // it to become used. - UnregisterFreeSuballocation(request.item); - - suballoc.offset = request.offset; - suballoc.size = allocSize; - suballoc.type = type; - suballoc.hAllocation = hAllocation; - - // If there are any free bytes remaining at the end, insert new free suballocation after current one. - if(paddingEnd) - { - VmaSuballocation paddingSuballoc = {}; - paddingSuballoc.offset = request.offset + allocSize; - paddingSuballoc.size = paddingEnd; - paddingSuballoc.type = VMA_SUBALLOCATION_TYPE_FREE; - VmaSuballocationList::iterator next = request.item; - ++next; - const VmaSuballocationList::iterator paddingEndItem = - m_Suballocations.insert(next, paddingSuballoc); - RegisterFreeSuballocation(paddingEndItem); - } - - // If there are any free bytes remaining at the beginning, insert new free suballocation before current one. - if(paddingBegin) - { - VmaSuballocation paddingSuballoc = {}; - paddingSuballoc.offset = request.offset - paddingBegin; - paddingSuballoc.size = paddingBegin; - paddingSuballoc.type = VMA_SUBALLOCATION_TYPE_FREE; - const VmaSuballocationList::iterator paddingBeginItem = - m_Suballocations.insert(request.item, paddingSuballoc); - RegisterFreeSuballocation(paddingBeginItem); - } - - // Update totals. - m_FreeCount = m_FreeCount - 1; - if(paddingBegin > 0) - { - ++m_FreeCount; - } - if(paddingEnd > 0) - { - ++m_FreeCount; - } - m_SumFreeSize -= allocSize; -} - -void VmaBlockMetadata_Generic::Free(const VmaAllocation allocation) -{ - for(VmaSuballocationList::iterator suballocItem = m_Suballocations.begin(); - suballocItem != m_Suballocations.end(); - ++suballocItem) - { - VmaSuballocation& suballoc = *suballocItem; - if(suballoc.hAllocation == allocation) - { - FreeSuballocation(suballocItem); - VMA_HEAVY_ASSERT(Validate()); - return; - } - } - VMA_ASSERT(0 && "Not found!"); -} - -void VmaBlockMetadata_Generic::FreeAtOffset(VkDeviceSize offset) -{ - for(VmaSuballocationList::iterator suballocItem = m_Suballocations.begin(); - suballocItem != m_Suballocations.end(); - ++suballocItem) - { - VmaSuballocation& suballoc = *suballocItem; - if(suballoc.offset == offset) - { - FreeSuballocation(suballocItem); - return; - } - } - VMA_ASSERT(0 && "Not found!"); -} - -bool VmaBlockMetadata_Generic::ValidateFreeSuballocationList() const -{ - VkDeviceSize lastSize = 0; - for(size_t i = 0, count = m_FreeSuballocationsBySize.size(); i < count; ++i) - { - const VmaSuballocationList::iterator it = m_FreeSuballocationsBySize[i]; - - VMA_VALIDATE(it->type == VMA_SUBALLOCATION_TYPE_FREE); - VMA_VALIDATE(it->size >= VMA_MIN_FREE_SUBALLOCATION_SIZE_TO_REGISTER); - VMA_VALIDATE(it->size >= lastSize); - lastSize = it->size; - } - return true; -} - -bool VmaBlockMetadata_Generic::CheckAllocation( - uint32_t currentFrameIndex, - uint32_t frameInUseCount, - VkDeviceSize bufferImageGranularity, - VkDeviceSize allocSize, - VkDeviceSize allocAlignment, - VmaSuballocationType allocType, - VmaSuballocationList::const_iterator suballocItem, - bool canMakeOtherLost, - VkDeviceSize* pOffset, - size_t* itemsToMakeLostCount, - VkDeviceSize* pSumFreeSize, - VkDeviceSize* pSumItemSize) const -{ - VMA_ASSERT(allocSize > 0); - VMA_ASSERT(allocType != VMA_SUBALLOCATION_TYPE_FREE); - VMA_ASSERT(suballocItem != m_Suballocations.cend()); - VMA_ASSERT(pOffset != VMA_NULL); - - *itemsToMakeLostCount = 0; - *pSumFreeSize = 0; - *pSumItemSize = 0; - - if(canMakeOtherLost) - { - if(suballocItem->type == VMA_SUBALLOCATION_TYPE_FREE) - { - *pSumFreeSize = suballocItem->size; - } - else - { - if(suballocItem->hAllocation->CanBecomeLost() && - suballocItem->hAllocation->GetLastUseFrameIndex() + frameInUseCount < currentFrameIndex) - { - ++*itemsToMakeLostCount; - *pSumItemSize = suballocItem->size; - } - else - { - return false; - } - } - - // Remaining size is too small for this request: Early return. - if(GetSize() - suballocItem->offset < allocSize) - { - return false; - } - - // Start from offset equal to beginning of this suballocation. - *pOffset = suballocItem->offset; - - // Apply VMA_DEBUG_MARGIN at the beginning. - if(VMA_DEBUG_MARGIN > 0) - { - *pOffset += VMA_DEBUG_MARGIN; - } - - // Apply alignment. - *pOffset = VmaAlignUp(*pOffset, allocAlignment); - - // Check previous suballocations for BufferImageGranularity conflicts. - // Make bigger alignment if necessary. - if(bufferImageGranularity > 1 && bufferImageGranularity != allocAlignment) - { - bool bufferImageGranularityConflict = false; - VmaSuballocationList::const_iterator prevSuballocItem = suballocItem; - while(prevSuballocItem != m_Suballocations.cbegin()) - { - --prevSuballocItem; - const VmaSuballocation& prevSuballoc = *prevSuballocItem; - if(VmaBlocksOnSamePage(prevSuballoc.offset, prevSuballoc.size, *pOffset, bufferImageGranularity)) - { - if(VmaIsBufferImageGranularityConflict(prevSuballoc.type, allocType)) - { - bufferImageGranularityConflict = true; - break; - } - } - else - // Already on previous page. - break; - } - if(bufferImageGranularityConflict) - { - *pOffset = VmaAlignUp(*pOffset, bufferImageGranularity); - } - } - - // Now that we have final *pOffset, check if we are past suballocItem. - // If yes, return false - this function should be called for another suballocItem as starting point. - if(*pOffset >= suballocItem->offset + suballocItem->size) - { - return false; - } - - // Calculate padding at the beginning based on current offset. - const VkDeviceSize paddingBegin = *pOffset - suballocItem->offset; - - // Calculate required margin at the end. - const VkDeviceSize requiredEndMargin = VMA_DEBUG_MARGIN; - - const VkDeviceSize totalSize = paddingBegin + allocSize + requiredEndMargin; - // Another early return check. - if(suballocItem->offset + totalSize > GetSize()) - { - return false; - } - - // Advance lastSuballocItem until desired size is reached. - // Update itemsToMakeLostCount. - VmaSuballocationList::const_iterator lastSuballocItem = suballocItem; - if(totalSize > suballocItem->size) - { - VkDeviceSize remainingSize = totalSize - suballocItem->size; - while(remainingSize > 0) - { - ++lastSuballocItem; - if(lastSuballocItem == m_Suballocations.cend()) - { - return false; - } - if(lastSuballocItem->type == VMA_SUBALLOCATION_TYPE_FREE) - { - *pSumFreeSize += lastSuballocItem->size; - } - else - { - VMA_ASSERT(lastSuballocItem->hAllocation != VK_NULL_HANDLE); - if(lastSuballocItem->hAllocation->CanBecomeLost() && - lastSuballocItem->hAllocation->GetLastUseFrameIndex() + frameInUseCount < currentFrameIndex) - { - ++*itemsToMakeLostCount; - *pSumItemSize += lastSuballocItem->size; - } - else - { - return false; - } - } - remainingSize = (lastSuballocItem->size < remainingSize) ? - remainingSize - lastSuballocItem->size : 0; - } - } - - // Check next suballocations for BufferImageGranularity conflicts. - // If conflict exists, we must mark more allocations lost or fail. - if(allocSize % bufferImageGranularity || *pOffset % bufferImageGranularity) - { - VmaSuballocationList::const_iterator nextSuballocItem = lastSuballocItem; - ++nextSuballocItem; - while(nextSuballocItem != m_Suballocations.cend()) - { - const VmaSuballocation& nextSuballoc = *nextSuballocItem; - if(VmaBlocksOnSamePage(*pOffset, allocSize, nextSuballoc.offset, bufferImageGranularity)) - { - if(VmaIsBufferImageGranularityConflict(allocType, nextSuballoc.type)) - { - VMA_ASSERT(nextSuballoc.hAllocation != VK_NULL_HANDLE); - if(nextSuballoc.hAllocation->CanBecomeLost() && - nextSuballoc.hAllocation->GetLastUseFrameIndex() + frameInUseCount < currentFrameIndex) - { - ++*itemsToMakeLostCount; - } - else - { - return false; - } - } - } - else - { - // Already on next page. - break; - } - ++nextSuballocItem; - } - } - } - else - { - const VmaSuballocation& suballoc = *suballocItem; - VMA_ASSERT(suballoc.type == VMA_SUBALLOCATION_TYPE_FREE); - - *pSumFreeSize = suballoc.size; - - // Size of this suballocation is too small for this request: Early return. - if(suballoc.size < allocSize) - { - return false; - } - - // Start from offset equal to beginning of this suballocation. - *pOffset = suballoc.offset; - - // Apply VMA_DEBUG_MARGIN at the beginning. - if(VMA_DEBUG_MARGIN > 0) - { - *pOffset += VMA_DEBUG_MARGIN; - } - - // Apply alignment. - *pOffset = VmaAlignUp(*pOffset, allocAlignment); - - // Check previous suballocations for BufferImageGranularity conflicts. - // Make bigger alignment if necessary. - if(bufferImageGranularity > 1 && bufferImageGranularity != allocAlignment) - { - bool bufferImageGranularityConflict = false; - VmaSuballocationList::const_iterator prevSuballocItem = suballocItem; - while(prevSuballocItem != m_Suballocations.cbegin()) - { - --prevSuballocItem; - const VmaSuballocation& prevSuballoc = *prevSuballocItem; - if(VmaBlocksOnSamePage(prevSuballoc.offset, prevSuballoc.size, *pOffset, bufferImageGranularity)) - { - if(VmaIsBufferImageGranularityConflict(prevSuballoc.type, allocType)) - { - bufferImageGranularityConflict = true; - break; - } - } - else - // Already on previous page. - break; - } - if(bufferImageGranularityConflict) - { - *pOffset = VmaAlignUp(*pOffset, bufferImageGranularity); - } - } - - // Calculate padding at the beginning based on current offset. - const VkDeviceSize paddingBegin = *pOffset - suballoc.offset; - - // Calculate required margin at the end. - const VkDeviceSize requiredEndMargin = VMA_DEBUG_MARGIN; - - // Fail if requested size plus margin before and after is bigger than size of this suballocation. - if(paddingBegin + allocSize + requiredEndMargin > suballoc.size) - { - return false; - } - - // Check next suballocations for BufferImageGranularity conflicts. - // If conflict exists, allocation cannot be made here. - if(allocSize % bufferImageGranularity || *pOffset % bufferImageGranularity) - { - VmaSuballocationList::const_iterator nextSuballocItem = suballocItem; - ++nextSuballocItem; - while(nextSuballocItem != m_Suballocations.cend()) - { - const VmaSuballocation& nextSuballoc = *nextSuballocItem; - if(VmaBlocksOnSamePage(*pOffset, allocSize, nextSuballoc.offset, bufferImageGranularity)) - { - if(VmaIsBufferImageGranularityConflict(allocType, nextSuballoc.type)) - { - return false; - } - } - else - { - // Already on next page. - break; - } - ++nextSuballocItem; - } - } - } - - // All tests passed: Success. pOffset is already filled. - return true; -} - -void VmaBlockMetadata_Generic::MergeFreeWithNext(VmaSuballocationList::iterator item) -{ - VMA_ASSERT(item != m_Suballocations.end()); - VMA_ASSERT(item->type == VMA_SUBALLOCATION_TYPE_FREE); - - VmaSuballocationList::iterator nextItem = item; - ++nextItem; - VMA_ASSERT(nextItem != m_Suballocations.end()); - VMA_ASSERT(nextItem->type == VMA_SUBALLOCATION_TYPE_FREE); - - item->size += nextItem->size; - --m_FreeCount; - m_Suballocations.erase(nextItem); -} - -VmaSuballocationList::iterator VmaBlockMetadata_Generic::FreeSuballocation(VmaSuballocationList::iterator suballocItem) -{ - // Change this suballocation to be marked as free. - VmaSuballocation& suballoc = *suballocItem; - suballoc.type = VMA_SUBALLOCATION_TYPE_FREE; - suballoc.hAllocation = VK_NULL_HANDLE; - - // Update totals. - ++m_FreeCount; - m_SumFreeSize += suballoc.size; - - // Merge with previous and/or next suballocation if it's also free. - bool mergeWithNext = false; - bool mergeWithPrev = false; - - VmaSuballocationList::iterator nextItem = suballocItem; - ++nextItem; - if((nextItem != m_Suballocations.end()) && (nextItem->type == VMA_SUBALLOCATION_TYPE_FREE)) - { - mergeWithNext = true; - } - - VmaSuballocationList::iterator prevItem = suballocItem; - if(suballocItem != m_Suballocations.begin()) - { - --prevItem; - if(prevItem->type == VMA_SUBALLOCATION_TYPE_FREE) - { - mergeWithPrev = true; - } - } - - if(mergeWithNext) - { - UnregisterFreeSuballocation(nextItem); - MergeFreeWithNext(suballocItem); - } - - if(mergeWithPrev) - { - UnregisterFreeSuballocation(prevItem); - MergeFreeWithNext(prevItem); - RegisterFreeSuballocation(prevItem); - return prevItem; - } - else - { - RegisterFreeSuballocation(suballocItem); - return suballocItem; - } -} - -void VmaBlockMetadata_Generic::RegisterFreeSuballocation(VmaSuballocationList::iterator item) -{ - VMA_ASSERT(item->type == VMA_SUBALLOCATION_TYPE_FREE); - VMA_ASSERT(item->size > 0); - - // You may want to enable this validation at the beginning or at the end of - // this function, depending on what do you want to check. - VMA_HEAVY_ASSERT(ValidateFreeSuballocationList()); - - if(item->size >= VMA_MIN_FREE_SUBALLOCATION_SIZE_TO_REGISTER) - { - if(m_FreeSuballocationsBySize.empty()) - { - m_FreeSuballocationsBySize.push_back(item); - } - else - { - VmaVectorInsertSorted(m_FreeSuballocationsBySize, item); - } - } - - //VMA_HEAVY_ASSERT(ValidateFreeSuballocationList()); -} - - -void VmaBlockMetadata_Generic::UnregisterFreeSuballocation(VmaSuballocationList::iterator item) -{ - VMA_ASSERT(item->type == VMA_SUBALLOCATION_TYPE_FREE); - VMA_ASSERT(item->size > 0); - - // You may want to enable this validation at the beginning or at the end of - // this function, depending on what do you want to check. - VMA_HEAVY_ASSERT(ValidateFreeSuballocationList()); - - if(item->size >= VMA_MIN_FREE_SUBALLOCATION_SIZE_TO_REGISTER) - { - VmaSuballocationList::iterator* const it = VmaBinaryFindFirstNotLess( - m_FreeSuballocationsBySize.data(), - m_FreeSuballocationsBySize.data() + m_FreeSuballocationsBySize.size(), - item, - VmaSuballocationItemSizeLess()); - for(size_t index = it - m_FreeSuballocationsBySize.data(); - index < m_FreeSuballocationsBySize.size(); - ++index) - { - if(m_FreeSuballocationsBySize[index] == item) - { - VmaVectorRemove(m_FreeSuballocationsBySize, index); - return; - } - VMA_ASSERT((m_FreeSuballocationsBySize[index]->size == item->size) && "Not found."); - } - VMA_ASSERT(0 && "Not found."); - } - - //VMA_HEAVY_ASSERT(ValidateFreeSuballocationList()); -} - -bool VmaBlockMetadata_Generic::IsBufferImageGranularityConflictPossible( - VkDeviceSize bufferImageGranularity, - VmaSuballocationType& inOutPrevSuballocType) const -{ - if(bufferImageGranularity == 1 || IsEmpty()) - { - return false; - } - - VkDeviceSize minAlignment = VK_WHOLE_SIZE; - bool typeConflictFound = false; - for(VmaSuballocationList::const_iterator it = m_Suballocations.cbegin(); - it != m_Suballocations.cend(); - ++it) - { - const VmaSuballocationType suballocType = it->type; - if(suballocType != VMA_SUBALLOCATION_TYPE_FREE) - { - minAlignment = VMA_MIN(minAlignment, it->hAllocation->GetAlignment()); - if(VmaIsBufferImageGranularityConflict(inOutPrevSuballocType, suballocType)) - { - typeConflictFound = true; - } - inOutPrevSuballocType = suballocType; - } - } - - return typeConflictFound || minAlignment >= bufferImageGranularity; -} - -//////////////////////////////////////////////////////////////////////////////// -// class VmaBlockMetadata_Linear - -VmaBlockMetadata_Linear::VmaBlockMetadata_Linear(VmaAllocator hAllocator) : - VmaBlockMetadata(hAllocator), - m_SumFreeSize(0), - m_Suballocations0(VmaStlAllocator(hAllocator->GetAllocationCallbacks())), - m_Suballocations1(VmaStlAllocator(hAllocator->GetAllocationCallbacks())), - m_1stVectorIndex(0), - m_2ndVectorMode(SECOND_VECTOR_EMPTY), - m_1stNullItemsBeginCount(0), - m_1stNullItemsMiddleCount(0), - m_2ndNullItemsCount(0) -{ -} - -VmaBlockMetadata_Linear::~VmaBlockMetadata_Linear() -{ -} - -void VmaBlockMetadata_Linear::Init(VkDeviceSize size) -{ - VmaBlockMetadata::Init(size); - m_SumFreeSize = size; -} - -bool VmaBlockMetadata_Linear::Validate() const -{ - const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); - const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); - - VMA_VALIDATE(suballocations2nd.empty() == (m_2ndVectorMode == SECOND_VECTOR_EMPTY)); - VMA_VALIDATE(!suballocations1st.empty() || - suballocations2nd.empty() || - m_2ndVectorMode != SECOND_VECTOR_RING_BUFFER); - - if(!suballocations1st.empty()) - { - // Null item at the beginning should be accounted into m_1stNullItemsBeginCount. - VMA_VALIDATE(suballocations1st[m_1stNullItemsBeginCount].hAllocation != VK_NULL_HANDLE); - // Null item at the end should be just pop_back(). - VMA_VALIDATE(suballocations1st.back().hAllocation != VK_NULL_HANDLE); - } - if(!suballocations2nd.empty()) - { - // Null item at the end should be just pop_back(). - VMA_VALIDATE(suballocations2nd.back().hAllocation != VK_NULL_HANDLE); - } - - VMA_VALIDATE(m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount <= suballocations1st.size()); - VMA_VALIDATE(m_2ndNullItemsCount <= suballocations2nd.size()); - - VkDeviceSize sumUsedSize = 0; - const size_t suballoc1stCount = suballocations1st.size(); - VkDeviceSize offset = VMA_DEBUG_MARGIN; - - if(m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) - { - const size_t suballoc2ndCount = suballocations2nd.size(); - size_t nullItem2ndCount = 0; - for(size_t i = 0; i < suballoc2ndCount; ++i) - { - const VmaSuballocation& suballoc = suballocations2nd[i]; - const bool currFree = (suballoc.type == VMA_SUBALLOCATION_TYPE_FREE); - - VMA_VALIDATE(currFree == (suballoc.hAllocation == VK_NULL_HANDLE)); - VMA_VALIDATE(suballoc.offset >= offset); - - if(!currFree) - { - VMA_VALIDATE(suballoc.hAllocation->GetOffset() == suballoc.offset); - VMA_VALIDATE(suballoc.hAllocation->GetSize() == suballoc.size); - sumUsedSize += suballoc.size; - } - else - { - ++nullItem2ndCount; - } - - offset = suballoc.offset + suballoc.size + VMA_DEBUG_MARGIN; - } - - VMA_VALIDATE(nullItem2ndCount == m_2ndNullItemsCount); - } - - for(size_t i = 0; i < m_1stNullItemsBeginCount; ++i) - { - const VmaSuballocation& suballoc = suballocations1st[i]; - VMA_VALIDATE(suballoc.type == VMA_SUBALLOCATION_TYPE_FREE && - suballoc.hAllocation == VK_NULL_HANDLE); - } - - size_t nullItem1stCount = m_1stNullItemsBeginCount; - - for(size_t i = m_1stNullItemsBeginCount; i < suballoc1stCount; ++i) - { - const VmaSuballocation& suballoc = suballocations1st[i]; - const bool currFree = (suballoc.type == VMA_SUBALLOCATION_TYPE_FREE); - - VMA_VALIDATE(currFree == (suballoc.hAllocation == VK_NULL_HANDLE)); - VMA_VALIDATE(suballoc.offset >= offset); - VMA_VALIDATE(i >= m_1stNullItemsBeginCount || currFree); - - if(!currFree) - { - VMA_VALIDATE(suballoc.hAllocation->GetOffset() == suballoc.offset); - VMA_VALIDATE(suballoc.hAllocation->GetSize() == suballoc.size); - sumUsedSize += suballoc.size; - } - else - { - ++nullItem1stCount; - } - - offset = suballoc.offset + suballoc.size + VMA_DEBUG_MARGIN; - } - VMA_VALIDATE(nullItem1stCount == m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount); - - if(m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) - { - const size_t suballoc2ndCount = suballocations2nd.size(); - size_t nullItem2ndCount = 0; - for(size_t i = suballoc2ndCount; i--; ) - { - const VmaSuballocation& suballoc = suballocations2nd[i]; - const bool currFree = (suballoc.type == VMA_SUBALLOCATION_TYPE_FREE); - - VMA_VALIDATE(currFree == (suballoc.hAllocation == VK_NULL_HANDLE)); - VMA_VALIDATE(suballoc.offset >= offset); - - if(!currFree) - { - VMA_VALIDATE(suballoc.hAllocation->GetOffset() == suballoc.offset); - VMA_VALIDATE(suballoc.hAllocation->GetSize() == suballoc.size); - sumUsedSize += suballoc.size; - } - else - { - ++nullItem2ndCount; - } - - offset = suballoc.offset + suballoc.size + VMA_DEBUG_MARGIN; - } - - VMA_VALIDATE(nullItem2ndCount == m_2ndNullItemsCount); - } - - VMA_VALIDATE(offset <= GetSize()); - VMA_VALIDATE(m_SumFreeSize == GetSize() - sumUsedSize); - - return true; -} - -size_t VmaBlockMetadata_Linear::GetAllocationCount() const -{ - return AccessSuballocations1st().size() - (m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount) + - AccessSuballocations2nd().size() - m_2ndNullItemsCount; -} - -VkDeviceSize VmaBlockMetadata_Linear::GetUnusedRangeSizeMax() const -{ - const VkDeviceSize size = GetSize(); - - /* - We don't consider gaps inside allocation vectors with freed allocations because - they are not suitable for reuse in linear allocator. We consider only space that - is available for new allocations. - */ - if(IsEmpty()) - { - return size; - } - - const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); - - switch(m_2ndVectorMode) - { - case SECOND_VECTOR_EMPTY: - /* - Available space is after end of 1st, as well as before beginning of 1st (which - whould make it a ring buffer). - */ - { - const size_t suballocations1stCount = suballocations1st.size(); - VMA_ASSERT(suballocations1stCount > m_1stNullItemsBeginCount); - const VmaSuballocation& firstSuballoc = suballocations1st[m_1stNullItemsBeginCount]; - const VmaSuballocation& lastSuballoc = suballocations1st[suballocations1stCount - 1]; - return VMA_MAX( - firstSuballoc.offset, - size - (lastSuballoc.offset + lastSuballoc.size)); - } - break; - - case SECOND_VECTOR_RING_BUFFER: - /* - Available space is only between end of 2nd and beginning of 1st. - */ - { - const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); - const VmaSuballocation& lastSuballoc2nd = suballocations2nd.back(); - const VmaSuballocation& firstSuballoc1st = suballocations1st[m_1stNullItemsBeginCount]; - return firstSuballoc1st.offset - (lastSuballoc2nd.offset + lastSuballoc2nd.size); - } - break; - - case SECOND_VECTOR_DOUBLE_STACK: - /* - Available space is only between end of 1st and top of 2nd. - */ - { - const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); - const VmaSuballocation& topSuballoc2nd = suballocations2nd.back(); - const VmaSuballocation& lastSuballoc1st = suballocations1st.back(); - return topSuballoc2nd.offset - (lastSuballoc1st.offset + lastSuballoc1st.size); - } - break; - - default: - VMA_ASSERT(0); - return 0; - } -} - -void VmaBlockMetadata_Linear::CalcAllocationStatInfo(VmaStatInfo& outInfo) const -{ - const VkDeviceSize size = GetSize(); - const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); - const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); - const size_t suballoc1stCount = suballocations1st.size(); - const size_t suballoc2ndCount = suballocations2nd.size(); - - outInfo.blockCount = 1; - outInfo.allocationCount = (uint32_t)GetAllocationCount(); - outInfo.unusedRangeCount = 0; - outInfo.usedBytes = 0; - outInfo.allocationSizeMin = UINT64_MAX; - outInfo.allocationSizeMax = 0; - outInfo.unusedRangeSizeMin = UINT64_MAX; - outInfo.unusedRangeSizeMax = 0; - - VkDeviceSize lastOffset = 0; - - if(m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) - { - const VkDeviceSize freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset; - size_t nextAlloc2ndIndex = 0; - while(lastOffset < freeSpace2ndTo1stEnd) - { - // Find next non-null allocation or move nextAllocIndex to the end. - while(nextAlloc2ndIndex < suballoc2ndCount && - suballocations2nd[nextAlloc2ndIndex].hAllocation == VK_NULL_HANDLE) - { - ++nextAlloc2ndIndex; - } - - // Found non-null allocation. - if(nextAlloc2ndIndex < suballoc2ndCount) - { - const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; - - // 1. Process free space before this allocation. - if(lastOffset < suballoc.offset) - { - // There is free space from lastOffset to suballoc.offset. - const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; - ++outInfo.unusedRangeCount; - outInfo.unusedBytes += unusedRangeSize; - outInfo.unusedRangeSizeMin = VMA_MIN(outInfo.unusedRangeSizeMin, unusedRangeSize); - outInfo.unusedRangeSizeMax = VMA_MIN(outInfo.unusedRangeSizeMax, unusedRangeSize); - } - - // 2. Process this allocation. - // There is allocation with suballoc.offset, suballoc.size. - outInfo.usedBytes += suballoc.size; - outInfo.allocationSizeMin = VMA_MIN(outInfo.allocationSizeMin, suballoc.size); - outInfo.allocationSizeMax = VMA_MIN(outInfo.allocationSizeMax, suballoc.size); - - // 3. Prepare for next iteration. - lastOffset = suballoc.offset + suballoc.size; - ++nextAlloc2ndIndex; - } - // We are at the end. - else - { - // There is free space from lastOffset to freeSpace2ndTo1stEnd. - if(lastOffset < freeSpace2ndTo1stEnd) - { - const VkDeviceSize unusedRangeSize = freeSpace2ndTo1stEnd - lastOffset; - ++outInfo.unusedRangeCount; - outInfo.unusedBytes += unusedRangeSize; - outInfo.unusedRangeSizeMin = VMA_MIN(outInfo.unusedRangeSizeMin, unusedRangeSize); - outInfo.unusedRangeSizeMax = VMA_MIN(outInfo.unusedRangeSizeMax, unusedRangeSize); - } - - // End of loop. - lastOffset = freeSpace2ndTo1stEnd; - } - } - } - - size_t nextAlloc1stIndex = m_1stNullItemsBeginCount; - const VkDeviceSize freeSpace1stTo2ndEnd = - m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ? suballocations2nd.back().offset : size; - while(lastOffset < freeSpace1stTo2ndEnd) - { - // Find next non-null allocation or move nextAllocIndex to the end. - while(nextAlloc1stIndex < suballoc1stCount && - suballocations1st[nextAlloc1stIndex].hAllocation == VK_NULL_HANDLE) - { - ++nextAlloc1stIndex; - } - - // Found non-null allocation. - if(nextAlloc1stIndex < suballoc1stCount) - { - const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex]; - - // 1. Process free space before this allocation. - if(lastOffset < suballoc.offset) - { - // There is free space from lastOffset to suballoc.offset. - const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; - ++outInfo.unusedRangeCount; - outInfo.unusedBytes += unusedRangeSize; - outInfo.unusedRangeSizeMin = VMA_MIN(outInfo.unusedRangeSizeMin, unusedRangeSize); - outInfo.unusedRangeSizeMax = VMA_MIN(outInfo.unusedRangeSizeMax, unusedRangeSize); - } - - // 2. Process this allocation. - // There is allocation with suballoc.offset, suballoc.size. - outInfo.usedBytes += suballoc.size; - outInfo.allocationSizeMin = VMA_MIN(outInfo.allocationSizeMin, suballoc.size); - outInfo.allocationSizeMax = VMA_MIN(outInfo.allocationSizeMax, suballoc.size); - - // 3. Prepare for next iteration. - lastOffset = suballoc.offset + suballoc.size; - ++nextAlloc1stIndex; - } - // We are at the end. - else - { - // There is free space from lastOffset to freeSpace1stTo2ndEnd. - if(lastOffset < freeSpace1stTo2ndEnd) - { - const VkDeviceSize unusedRangeSize = freeSpace1stTo2ndEnd - lastOffset; - ++outInfo.unusedRangeCount; - outInfo.unusedBytes += unusedRangeSize; - outInfo.unusedRangeSizeMin = VMA_MIN(outInfo.unusedRangeSizeMin, unusedRangeSize); - outInfo.unusedRangeSizeMax = VMA_MIN(outInfo.unusedRangeSizeMax, unusedRangeSize); - } - - // End of loop. - lastOffset = freeSpace1stTo2ndEnd; - } - } - - if(m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) - { - size_t nextAlloc2ndIndex = suballocations2nd.size() - 1; - while(lastOffset < size) - { - // Find next non-null allocation or move nextAllocIndex to the end. - while(nextAlloc2ndIndex != SIZE_MAX && - suballocations2nd[nextAlloc2ndIndex].hAllocation == VK_NULL_HANDLE) - { - --nextAlloc2ndIndex; - } - - // Found non-null allocation. - if(nextAlloc2ndIndex != SIZE_MAX) - { - const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; - - // 1. Process free space before this allocation. - if(lastOffset < suballoc.offset) - { - // There is free space from lastOffset to suballoc.offset. - const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; - ++outInfo.unusedRangeCount; - outInfo.unusedBytes += unusedRangeSize; - outInfo.unusedRangeSizeMin = VMA_MIN(outInfo.unusedRangeSizeMin, unusedRangeSize); - outInfo.unusedRangeSizeMax = VMA_MIN(outInfo.unusedRangeSizeMax, unusedRangeSize); - } - - // 2. Process this allocation. - // There is allocation with suballoc.offset, suballoc.size. - outInfo.usedBytes += suballoc.size; - outInfo.allocationSizeMin = VMA_MIN(outInfo.allocationSizeMin, suballoc.size); - outInfo.allocationSizeMax = VMA_MIN(outInfo.allocationSizeMax, suballoc.size); - - // 3. Prepare for next iteration. - lastOffset = suballoc.offset + suballoc.size; - --nextAlloc2ndIndex; - } - // We are at the end. - else - { - // There is free space from lastOffset to size. - if(lastOffset < size) - { - const VkDeviceSize unusedRangeSize = size - lastOffset; - ++outInfo.unusedRangeCount; - outInfo.unusedBytes += unusedRangeSize; - outInfo.unusedRangeSizeMin = VMA_MIN(outInfo.unusedRangeSizeMin, unusedRangeSize); - outInfo.unusedRangeSizeMax = VMA_MIN(outInfo.unusedRangeSizeMax, unusedRangeSize); - } - - // End of loop. - lastOffset = size; - } - } - } - - outInfo.unusedBytes = size - outInfo.usedBytes; -} - -void VmaBlockMetadata_Linear::AddPoolStats(VmaPoolStats& inoutStats) const -{ - const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); - const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); - const VkDeviceSize size = GetSize(); - const size_t suballoc1stCount = suballocations1st.size(); - const size_t suballoc2ndCount = suballocations2nd.size(); - - inoutStats.size += size; - - VkDeviceSize lastOffset = 0; - - if(m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) - { - const VkDeviceSize freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset; - size_t nextAlloc2ndIndex = m_1stNullItemsBeginCount; - while(lastOffset < freeSpace2ndTo1stEnd) - { - // Find next non-null allocation or move nextAlloc2ndIndex to the end. - while(nextAlloc2ndIndex < suballoc2ndCount && - suballocations2nd[nextAlloc2ndIndex].hAllocation == VK_NULL_HANDLE) - { - ++nextAlloc2ndIndex; - } - - // Found non-null allocation. - if(nextAlloc2ndIndex < suballoc2ndCount) - { - const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; - - // 1. Process free space before this allocation. - if(lastOffset < suballoc.offset) - { - // There is free space from lastOffset to suballoc.offset. - const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; - inoutStats.unusedSize += unusedRangeSize; - ++inoutStats.unusedRangeCount; - inoutStats.unusedRangeSizeMax = VMA_MAX(inoutStats.unusedRangeSizeMax, unusedRangeSize); - } - - // 2. Process this allocation. - // There is allocation with suballoc.offset, suballoc.size. - ++inoutStats.allocationCount; - - // 3. Prepare for next iteration. - lastOffset = suballoc.offset + suballoc.size; - ++nextAlloc2ndIndex; - } - // We are at the end. - else - { - if(lastOffset < freeSpace2ndTo1stEnd) - { - // There is free space from lastOffset to freeSpace2ndTo1stEnd. - const VkDeviceSize unusedRangeSize = freeSpace2ndTo1stEnd - lastOffset; - inoutStats.unusedSize += unusedRangeSize; - ++inoutStats.unusedRangeCount; - inoutStats.unusedRangeSizeMax = VMA_MAX(inoutStats.unusedRangeSizeMax, unusedRangeSize); - } - - // End of loop. - lastOffset = freeSpace2ndTo1stEnd; - } - } - } - - size_t nextAlloc1stIndex = m_1stNullItemsBeginCount; - const VkDeviceSize freeSpace1stTo2ndEnd = - m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ? suballocations2nd.back().offset : size; - while(lastOffset < freeSpace1stTo2ndEnd) - { - // Find next non-null allocation or move nextAllocIndex to the end. - while(nextAlloc1stIndex < suballoc1stCount && - suballocations1st[nextAlloc1stIndex].hAllocation == VK_NULL_HANDLE) - { - ++nextAlloc1stIndex; - } - - // Found non-null allocation. - if(nextAlloc1stIndex < suballoc1stCount) - { - const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex]; - - // 1. Process free space before this allocation. - if(lastOffset < suballoc.offset) - { - // There is free space from lastOffset to suballoc.offset. - const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; - inoutStats.unusedSize += unusedRangeSize; - ++inoutStats.unusedRangeCount; - inoutStats.unusedRangeSizeMax = VMA_MAX(inoutStats.unusedRangeSizeMax, unusedRangeSize); - } - - // 2. Process this allocation. - // There is allocation with suballoc.offset, suballoc.size. - ++inoutStats.allocationCount; - - // 3. Prepare for next iteration. - lastOffset = suballoc.offset + suballoc.size; - ++nextAlloc1stIndex; - } - // We are at the end. - else - { - if(lastOffset < freeSpace1stTo2ndEnd) - { - // There is free space from lastOffset to freeSpace1stTo2ndEnd. - const VkDeviceSize unusedRangeSize = freeSpace1stTo2ndEnd - lastOffset; - inoutStats.unusedSize += unusedRangeSize; - ++inoutStats.unusedRangeCount; - inoutStats.unusedRangeSizeMax = VMA_MAX(inoutStats.unusedRangeSizeMax, unusedRangeSize); - } - - // End of loop. - lastOffset = freeSpace1stTo2ndEnd; - } - } - - if(m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) - { - size_t nextAlloc2ndIndex = suballocations2nd.size() - 1; - while(lastOffset < size) - { - // Find next non-null allocation or move nextAlloc2ndIndex to the end. - while(nextAlloc2ndIndex != SIZE_MAX && - suballocations2nd[nextAlloc2ndIndex].hAllocation == VK_NULL_HANDLE) - { - --nextAlloc2ndIndex; - } - - // Found non-null allocation. - if(nextAlloc2ndIndex != SIZE_MAX) - { - const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; - - // 1. Process free space before this allocation. - if(lastOffset < suballoc.offset) - { - // There is free space from lastOffset to suballoc.offset. - const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; - inoutStats.unusedSize += unusedRangeSize; - ++inoutStats.unusedRangeCount; - inoutStats.unusedRangeSizeMax = VMA_MAX(inoutStats.unusedRangeSizeMax, unusedRangeSize); - } - - // 2. Process this allocation. - // There is allocation with suballoc.offset, suballoc.size. - ++inoutStats.allocationCount; - - // 3. Prepare for next iteration. - lastOffset = suballoc.offset + suballoc.size; - --nextAlloc2ndIndex; - } - // We are at the end. - else - { - if(lastOffset < size) - { - // There is free space from lastOffset to size. - const VkDeviceSize unusedRangeSize = size - lastOffset; - inoutStats.unusedSize += unusedRangeSize; - ++inoutStats.unusedRangeCount; - inoutStats.unusedRangeSizeMax = VMA_MAX(inoutStats.unusedRangeSizeMax, unusedRangeSize); - } - - // End of loop. - lastOffset = size; - } - } - } -} - -#if VMA_STATS_STRING_ENABLED -void VmaBlockMetadata_Linear::PrintDetailedMap(class VmaJsonWriter& json) const -{ - const VkDeviceSize size = GetSize(); - const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); - const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); - const size_t suballoc1stCount = suballocations1st.size(); - const size_t suballoc2ndCount = suballocations2nd.size(); - - // FIRST PASS - - size_t unusedRangeCount = 0; - VkDeviceSize usedBytes = 0; - - VkDeviceSize lastOffset = 0; - - size_t alloc2ndCount = 0; - if(m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) - { - const VkDeviceSize freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset; - size_t nextAlloc2ndIndex = 0; - while(lastOffset < freeSpace2ndTo1stEnd) - { - // Find next non-null allocation or move nextAlloc2ndIndex to the end. - while(nextAlloc2ndIndex < suballoc2ndCount && - suballocations2nd[nextAlloc2ndIndex].hAllocation == VK_NULL_HANDLE) - { - ++nextAlloc2ndIndex; - } - - // Found non-null allocation. - if(nextAlloc2ndIndex < suballoc2ndCount) - { - const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; - - // 1. Process free space before this allocation. - if(lastOffset < suballoc.offset) - { - // There is free space from lastOffset to suballoc.offset. - ++unusedRangeCount; - } - - // 2. Process this allocation. - // There is allocation with suballoc.offset, suballoc.size. - ++alloc2ndCount; - usedBytes += suballoc.size; - - // 3. Prepare for next iteration. - lastOffset = suballoc.offset + suballoc.size; - ++nextAlloc2ndIndex; - } - // We are at the end. - else - { - if(lastOffset < freeSpace2ndTo1stEnd) - { - // There is free space from lastOffset to freeSpace2ndTo1stEnd. - ++unusedRangeCount; - } - - // End of loop. - lastOffset = freeSpace2ndTo1stEnd; - } - } - } - - size_t nextAlloc1stIndex = m_1stNullItemsBeginCount; - size_t alloc1stCount = 0; - const VkDeviceSize freeSpace1stTo2ndEnd = - m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ? suballocations2nd.back().offset : size; - while(lastOffset < freeSpace1stTo2ndEnd) - { - // Find next non-null allocation or move nextAllocIndex to the end. - while(nextAlloc1stIndex < suballoc1stCount && - suballocations1st[nextAlloc1stIndex].hAllocation == VK_NULL_HANDLE) - { - ++nextAlloc1stIndex; - } - - // Found non-null allocation. - if(nextAlloc1stIndex < suballoc1stCount) - { - const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex]; - - // 1. Process free space before this allocation. - if(lastOffset < suballoc.offset) - { - // There is free space from lastOffset to suballoc.offset. - ++unusedRangeCount; - } - - // 2. Process this allocation. - // There is allocation with suballoc.offset, suballoc.size. - ++alloc1stCount; - usedBytes += suballoc.size; - - // 3. Prepare for next iteration. - lastOffset = suballoc.offset + suballoc.size; - ++nextAlloc1stIndex; - } - // We are at the end. - else - { - if(lastOffset < size) - { - // There is free space from lastOffset to freeSpace1stTo2ndEnd. - ++unusedRangeCount; - } - - // End of loop. - lastOffset = freeSpace1stTo2ndEnd; - } - } - - if(m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) - { - size_t nextAlloc2ndIndex = suballocations2nd.size() - 1; - while(lastOffset < size) - { - // Find next non-null allocation or move nextAlloc2ndIndex to the end. - while(nextAlloc2ndIndex != SIZE_MAX && - suballocations2nd[nextAlloc2ndIndex].hAllocation == VK_NULL_HANDLE) - { - --nextAlloc2ndIndex; - } - - // Found non-null allocation. - if(nextAlloc2ndIndex != SIZE_MAX) - { - const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; - - // 1. Process free space before this allocation. - if(lastOffset < suballoc.offset) - { - // There is free space from lastOffset to suballoc.offset. - ++unusedRangeCount; - } - - // 2. Process this allocation. - // There is allocation with suballoc.offset, suballoc.size. - ++alloc2ndCount; - usedBytes += suballoc.size; - - // 3. Prepare for next iteration. - lastOffset = suballoc.offset + suballoc.size; - --nextAlloc2ndIndex; - } - // We are at the end. - else - { - if(lastOffset < size) - { - // There is free space from lastOffset to size. - ++unusedRangeCount; - } - - // End of loop. - lastOffset = size; - } - } - } - - const VkDeviceSize unusedBytes = size - usedBytes; - PrintDetailedMap_Begin(json, unusedBytes, alloc1stCount + alloc2ndCount, unusedRangeCount); - - // SECOND PASS - lastOffset = 0; - - if(m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) - { - const VkDeviceSize freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset; - size_t nextAlloc2ndIndex = 0; - while(lastOffset < freeSpace2ndTo1stEnd) - { - // Find next non-null allocation or move nextAlloc2ndIndex to the end. - while(nextAlloc2ndIndex < suballoc2ndCount && - suballocations2nd[nextAlloc2ndIndex].hAllocation == VK_NULL_HANDLE) - { - ++nextAlloc2ndIndex; - } - - // Found non-null allocation. - if(nextAlloc2ndIndex < suballoc2ndCount) - { - const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; - - // 1. Process free space before this allocation. - if(lastOffset < suballoc.offset) - { - // There is free space from lastOffset to suballoc.offset. - const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; - PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); - } - - // 2. Process this allocation. - // There is allocation with suballoc.offset, suballoc.size. - PrintDetailedMap_Allocation(json, suballoc.offset, suballoc.hAllocation); - - // 3. Prepare for next iteration. - lastOffset = suballoc.offset + suballoc.size; - ++nextAlloc2ndIndex; - } - // We are at the end. - else - { - if(lastOffset < freeSpace2ndTo1stEnd) - { - // There is free space from lastOffset to freeSpace2ndTo1stEnd. - const VkDeviceSize unusedRangeSize = freeSpace2ndTo1stEnd - lastOffset; - PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); - } - - // End of loop. - lastOffset = freeSpace2ndTo1stEnd; - } - } - } - - nextAlloc1stIndex = m_1stNullItemsBeginCount; - while(lastOffset < freeSpace1stTo2ndEnd) - { - // Find next non-null allocation or move nextAllocIndex to the end. - while(nextAlloc1stIndex < suballoc1stCount && - suballocations1st[nextAlloc1stIndex].hAllocation == VK_NULL_HANDLE) - { - ++nextAlloc1stIndex; - } - - // Found non-null allocation. - if(nextAlloc1stIndex < suballoc1stCount) - { - const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex]; - - // 1. Process free space before this allocation. - if(lastOffset < suballoc.offset) - { - // There is free space from lastOffset to suballoc.offset. - const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; - PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); - } - - // 2. Process this allocation. - // There is allocation with suballoc.offset, suballoc.size. - PrintDetailedMap_Allocation(json, suballoc.offset, suballoc.hAllocation); - - // 3. Prepare for next iteration. - lastOffset = suballoc.offset + suballoc.size; - ++nextAlloc1stIndex; - } - // We are at the end. - else - { - if(lastOffset < freeSpace1stTo2ndEnd) - { - // There is free space from lastOffset to freeSpace1stTo2ndEnd. - const VkDeviceSize unusedRangeSize = freeSpace1stTo2ndEnd - lastOffset; - PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); - } - - // End of loop. - lastOffset = freeSpace1stTo2ndEnd; - } - } - - if(m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) - { - size_t nextAlloc2ndIndex = suballocations2nd.size() - 1; - while(lastOffset < size) - { - // Find next non-null allocation or move nextAlloc2ndIndex to the end. - while(nextAlloc2ndIndex != SIZE_MAX && - suballocations2nd[nextAlloc2ndIndex].hAllocation == VK_NULL_HANDLE) - { - --nextAlloc2ndIndex; - } - - // Found non-null allocation. - if(nextAlloc2ndIndex != SIZE_MAX) - { - const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; - - // 1. Process free space before this allocation. - if(lastOffset < suballoc.offset) - { - // There is free space from lastOffset to suballoc.offset. - const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; - PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); - } - - // 2. Process this allocation. - // There is allocation with suballoc.offset, suballoc.size. - PrintDetailedMap_Allocation(json, suballoc.offset, suballoc.hAllocation); - - // 3. Prepare for next iteration. - lastOffset = suballoc.offset + suballoc.size; - --nextAlloc2ndIndex; - } - // We are at the end. - else - { - if(lastOffset < size) - { - // There is free space from lastOffset to size. - const VkDeviceSize unusedRangeSize = size - lastOffset; - PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); - } - - // End of loop. - lastOffset = size; - } - } - } - - PrintDetailedMap_End(json); -} -#endif // #if VMA_STATS_STRING_ENABLED - -bool VmaBlockMetadata_Linear::CreateAllocationRequest( - uint32_t currentFrameIndex, - uint32_t frameInUseCount, - VkDeviceSize bufferImageGranularity, - VkDeviceSize allocSize, - VkDeviceSize allocAlignment, - bool upperAddress, - VmaSuballocationType allocType, - bool canMakeOtherLost, - uint32_t strategy, - VmaAllocationRequest* pAllocationRequest) -{ - VMA_ASSERT(allocSize > 0); - VMA_ASSERT(allocType != VMA_SUBALLOCATION_TYPE_FREE); - VMA_ASSERT(pAllocationRequest != VMA_NULL); - VMA_HEAVY_ASSERT(Validate()); - return upperAddress ? - CreateAllocationRequest_UpperAddress( - currentFrameIndex, frameInUseCount, bufferImageGranularity, - allocSize, allocAlignment, allocType, canMakeOtherLost, strategy, pAllocationRequest) : - CreateAllocationRequest_LowerAddress( - currentFrameIndex, frameInUseCount, bufferImageGranularity, - allocSize, allocAlignment, allocType, canMakeOtherLost, strategy, pAllocationRequest); -} - -bool VmaBlockMetadata_Linear::CreateAllocationRequest_UpperAddress( - uint32_t currentFrameIndex, - uint32_t frameInUseCount, - VkDeviceSize bufferImageGranularity, - VkDeviceSize allocSize, - VkDeviceSize allocAlignment, - VmaSuballocationType allocType, - bool canMakeOtherLost, - uint32_t strategy, - VmaAllocationRequest* pAllocationRequest) -{ - const VkDeviceSize size = GetSize(); - SuballocationVectorType& suballocations1st = AccessSuballocations1st(); - SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); - - if(m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) - { - VMA_ASSERT(0 && "Trying to use pool with linear algorithm as double stack, while it is already being used as ring buffer."); - return false; - } - - // Try to allocate before 2nd.back(), or end of block if 2nd.empty(). - if(allocSize > size) - { - return false; - } - VkDeviceSize resultBaseOffset = size - allocSize; - if(!suballocations2nd.empty()) - { - const VmaSuballocation& lastSuballoc = suballocations2nd.back(); - resultBaseOffset = lastSuballoc.offset - allocSize; - if(allocSize > lastSuballoc.offset) - { - return false; - } - } - - // Start from offset equal to end of free space. - VkDeviceSize resultOffset = resultBaseOffset; - - // Apply VMA_DEBUG_MARGIN at the end. - if(VMA_DEBUG_MARGIN > 0) - { - if(resultOffset < VMA_DEBUG_MARGIN) - { - return false; - } - resultOffset -= VMA_DEBUG_MARGIN; - } - - // Apply alignment. - resultOffset = VmaAlignDown(resultOffset, allocAlignment); - - // Check next suballocations from 2nd for BufferImageGranularity conflicts. - // Make bigger alignment if necessary. - if(bufferImageGranularity > 1 && bufferImageGranularity != allocAlignment && !suballocations2nd.empty()) - { - bool bufferImageGranularityConflict = false; - for(size_t nextSuballocIndex = suballocations2nd.size(); nextSuballocIndex--; ) - { - const VmaSuballocation& nextSuballoc = suballocations2nd[nextSuballocIndex]; - if(VmaBlocksOnSamePage(resultOffset, allocSize, nextSuballoc.offset, bufferImageGranularity)) - { - if(VmaIsBufferImageGranularityConflict(nextSuballoc.type, allocType)) - { - bufferImageGranularityConflict = true; - break; - } - } - else - // Already on previous page. - break; - } - if(bufferImageGranularityConflict) - { - resultOffset = VmaAlignDown(resultOffset, bufferImageGranularity); - } - } - - // There is enough free space. - const VkDeviceSize endOf1st = !suballocations1st.empty() ? - suballocations1st.back().offset + suballocations1st.back().size : - 0; - if(endOf1st + VMA_DEBUG_MARGIN <= resultOffset) - { - // Check previous suballocations for BufferImageGranularity conflicts. - // If conflict exists, allocation cannot be made here. - if(bufferImageGranularity > 1) - { - for(size_t prevSuballocIndex = suballocations1st.size(); prevSuballocIndex--; ) - { - const VmaSuballocation& prevSuballoc = suballocations1st[prevSuballocIndex]; - if(VmaBlocksOnSamePage(prevSuballoc.offset, prevSuballoc.size, resultOffset, bufferImageGranularity)) - { - if(VmaIsBufferImageGranularityConflict(allocType, prevSuballoc.type)) - { - return false; - } - } - else - { - // Already on next page. - break; - } - } - } - - // All tests passed: Success. - pAllocationRequest->offset = resultOffset; - pAllocationRequest->sumFreeSize = resultBaseOffset + allocSize - endOf1st; - pAllocationRequest->sumItemSize = 0; - // pAllocationRequest->item unused. - pAllocationRequest->itemsToMakeLostCount = 0; - pAllocationRequest->type = VmaAllocationRequestType::UpperAddress; - return true; - } - - return false; -} - -bool VmaBlockMetadata_Linear::CreateAllocationRequest_LowerAddress( - uint32_t currentFrameIndex, - uint32_t frameInUseCount, - VkDeviceSize bufferImageGranularity, - VkDeviceSize allocSize, - VkDeviceSize allocAlignment, - VmaSuballocationType allocType, - bool canMakeOtherLost, - uint32_t strategy, - VmaAllocationRequest* pAllocationRequest) -{ - const VkDeviceSize size = GetSize(); - SuballocationVectorType& suballocations1st = AccessSuballocations1st(); - SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); - - if(m_2ndVectorMode == SECOND_VECTOR_EMPTY || m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) - { - // Try to allocate at the end of 1st vector. - - VkDeviceSize resultBaseOffset = 0; - if(!suballocations1st.empty()) - { - const VmaSuballocation& lastSuballoc = suballocations1st.back(); - resultBaseOffset = lastSuballoc.offset + lastSuballoc.size; - } - - // Start from offset equal to beginning of free space. - VkDeviceSize resultOffset = resultBaseOffset; - - // Apply VMA_DEBUG_MARGIN at the beginning. - if(VMA_DEBUG_MARGIN > 0) - { - resultOffset += VMA_DEBUG_MARGIN; - } - - // Apply alignment. - resultOffset = VmaAlignUp(resultOffset, allocAlignment); - - // Check previous suballocations for BufferImageGranularity conflicts. - // Make bigger alignment if necessary. - if(bufferImageGranularity > 1 && bufferImageGranularity != allocAlignment && !suballocations1st.empty()) - { - bool bufferImageGranularityConflict = false; - for(size_t prevSuballocIndex = suballocations1st.size(); prevSuballocIndex--; ) - { - const VmaSuballocation& prevSuballoc = suballocations1st[prevSuballocIndex]; - if(VmaBlocksOnSamePage(prevSuballoc.offset, prevSuballoc.size, resultOffset, bufferImageGranularity)) - { - if(VmaIsBufferImageGranularityConflict(prevSuballoc.type, allocType)) - { - bufferImageGranularityConflict = true; - break; - } - } - else - // Already on previous page. - break; - } - if(bufferImageGranularityConflict) - { - resultOffset = VmaAlignUp(resultOffset, bufferImageGranularity); - } - } - - const VkDeviceSize freeSpaceEnd = m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ? - suballocations2nd.back().offset : size; - - // There is enough free space at the end after alignment. - if(resultOffset + allocSize + VMA_DEBUG_MARGIN <= freeSpaceEnd) - { - // Check next suballocations for BufferImageGranularity conflicts. - // If conflict exists, allocation cannot be made here. - if((allocSize % bufferImageGranularity || resultOffset % bufferImageGranularity) && m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) - { - for(size_t nextSuballocIndex = suballocations2nd.size(); nextSuballocIndex--; ) - { - const VmaSuballocation& nextSuballoc = suballocations2nd[nextSuballocIndex]; - if(VmaBlocksOnSamePage(resultOffset, allocSize, nextSuballoc.offset, bufferImageGranularity)) - { - if(VmaIsBufferImageGranularityConflict(allocType, nextSuballoc.type)) - { - return false; - } - } - else - { - // Already on previous page. - break; - } - } - } - - // All tests passed: Success. - pAllocationRequest->offset = resultOffset; - pAllocationRequest->sumFreeSize = freeSpaceEnd - resultBaseOffset; - pAllocationRequest->sumItemSize = 0; - // pAllocationRequest->item, customData unused. - pAllocationRequest->type = VmaAllocationRequestType::EndOf1st; - pAllocationRequest->itemsToMakeLostCount = 0; - return true; - } - } - - // Wrap-around to end of 2nd vector. Try to allocate there, watching for the - // beginning of 1st vector as the end of free space. - if(m_2ndVectorMode == SECOND_VECTOR_EMPTY || m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) - { - VMA_ASSERT(!suballocations1st.empty()); - - VkDeviceSize resultBaseOffset = 0; - if(!suballocations2nd.empty()) - { - const VmaSuballocation& lastSuballoc = suballocations2nd.back(); - resultBaseOffset = lastSuballoc.offset + lastSuballoc.size; - } - - // Start from offset equal to beginning of free space. - VkDeviceSize resultOffset = resultBaseOffset; - - // Apply VMA_DEBUG_MARGIN at the beginning. - if(VMA_DEBUG_MARGIN > 0) - { - resultOffset += VMA_DEBUG_MARGIN; - } - - // Apply alignment. - resultOffset = VmaAlignUp(resultOffset, allocAlignment); - - // Check previous suballocations for BufferImageGranularity conflicts. - // Make bigger alignment if necessary. - if(bufferImageGranularity > 1 && bufferImageGranularity != allocAlignment && !suballocations2nd.empty()) - { - bool bufferImageGranularityConflict = false; - for(size_t prevSuballocIndex = suballocations2nd.size(); prevSuballocIndex--; ) - { - const VmaSuballocation& prevSuballoc = suballocations2nd[prevSuballocIndex]; - if(VmaBlocksOnSamePage(prevSuballoc.offset, prevSuballoc.size, resultOffset, bufferImageGranularity)) - { - if(VmaIsBufferImageGranularityConflict(prevSuballoc.type, allocType)) - { - bufferImageGranularityConflict = true; - break; - } - } - else - // Already on previous page. - break; - } - if(bufferImageGranularityConflict) - { - resultOffset = VmaAlignUp(resultOffset, bufferImageGranularity); - } - } - - pAllocationRequest->itemsToMakeLostCount = 0; - pAllocationRequest->sumItemSize = 0; - size_t index1st = m_1stNullItemsBeginCount; - - if(canMakeOtherLost) - { - while(index1st < suballocations1st.size() && - resultOffset + allocSize + VMA_DEBUG_MARGIN > suballocations1st[index1st].offset) - { - // Next colliding allocation at the beginning of 1st vector found. Try to make it lost. - const VmaSuballocation& suballoc = suballocations1st[index1st]; - if(suballoc.type == VMA_SUBALLOCATION_TYPE_FREE) - { - // No problem. - } - else - { - VMA_ASSERT(suballoc.hAllocation != VK_NULL_HANDLE); - if(suballoc.hAllocation->CanBecomeLost() && - suballoc.hAllocation->GetLastUseFrameIndex() + frameInUseCount < currentFrameIndex) - { - ++pAllocationRequest->itemsToMakeLostCount; - pAllocationRequest->sumItemSize += suballoc.size; - } - else - { - return false; - } - } - ++index1st; - } - - // Check next suballocations for BufferImageGranularity conflicts. - // If conflict exists, we must mark more allocations lost or fail. - if(allocSize % bufferImageGranularity || resultOffset % bufferImageGranularity) - { - while(index1st < suballocations1st.size()) - { - const VmaSuballocation& suballoc = suballocations1st[index1st]; - if(VmaBlocksOnSamePage(resultOffset, allocSize, suballoc.offset, bufferImageGranularity)) - { - if(suballoc.hAllocation != VK_NULL_HANDLE) - { - // Not checking actual VmaIsBufferImageGranularityConflict(allocType, suballoc.type). - if(suballoc.hAllocation->CanBecomeLost() && - suballoc.hAllocation->GetLastUseFrameIndex() + frameInUseCount < currentFrameIndex) - { - ++pAllocationRequest->itemsToMakeLostCount; - pAllocationRequest->sumItemSize += suballoc.size; - } - else - { - return false; - } - } - } - else - { - // Already on next page. - break; - } - ++index1st; - } - } - - // Special case: There is not enough room at the end for this allocation, even after making all from the 1st lost. - if(index1st == suballocations1st.size() && - resultOffset + allocSize + VMA_DEBUG_MARGIN > size) - { - // TODO: This is a known bug that it's not yet implemented and the allocation is failing. - VMA_DEBUG_LOG("Unsupported special case in custom pool with linear allocation algorithm used as ring buffer with allocations that can be lost."); - } - } - - // There is enough free space at the end after alignment. - if((index1st == suballocations1st.size() && resultOffset + allocSize + VMA_DEBUG_MARGIN <= size) || - (index1st < suballocations1st.size() && resultOffset + allocSize + VMA_DEBUG_MARGIN <= suballocations1st[index1st].offset)) - { - // Check next suballocations for BufferImageGranularity conflicts. - // If conflict exists, allocation cannot be made here. - if(allocSize % bufferImageGranularity || resultOffset % bufferImageGranularity) - { - for(size_t nextSuballocIndex = index1st; - nextSuballocIndex < suballocations1st.size(); - nextSuballocIndex++) - { - const VmaSuballocation& nextSuballoc = suballocations1st[nextSuballocIndex]; - if(VmaBlocksOnSamePage(resultOffset, allocSize, nextSuballoc.offset, bufferImageGranularity)) - { - if(VmaIsBufferImageGranularityConflict(allocType, nextSuballoc.type)) - { - return false; - } - } - else - { - // Already on next page. - break; - } - } - } - - // All tests passed: Success. - pAllocationRequest->offset = resultOffset; - pAllocationRequest->sumFreeSize = - (index1st < suballocations1st.size() ? suballocations1st[index1st].offset : size) - - resultBaseOffset - - pAllocationRequest->sumItemSize; - pAllocationRequest->type = VmaAllocationRequestType::EndOf2nd; - // pAllocationRequest->item, customData unused. - return true; - } - } - - return false; -} - -bool VmaBlockMetadata_Linear::MakeRequestedAllocationsLost( - uint32_t currentFrameIndex, - uint32_t frameInUseCount, - VmaAllocationRequest* pAllocationRequest) -{ - if(pAllocationRequest->itemsToMakeLostCount == 0) - { - return true; - } - - VMA_ASSERT(m_2ndVectorMode == SECOND_VECTOR_EMPTY || m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER); - - // We always start from 1st. - SuballocationVectorType* suballocations = &AccessSuballocations1st(); - size_t index = m_1stNullItemsBeginCount; - size_t madeLostCount = 0; - while(madeLostCount < pAllocationRequest->itemsToMakeLostCount) - { - if(index == suballocations->size()) - { - index = 0; - // If we get to the end of 1st, we wrap around to beginning of 2nd of 1st. - if(m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) - { - suballocations = &AccessSuballocations2nd(); - } - // else: m_2ndVectorMode == SECOND_VECTOR_EMPTY: - // suballocations continues pointing at AccessSuballocations1st(). - VMA_ASSERT(!suballocations->empty()); - } - VmaSuballocation& suballoc = (*suballocations)[index]; - if(suballoc.type != VMA_SUBALLOCATION_TYPE_FREE) - { - VMA_ASSERT(suballoc.hAllocation != VK_NULL_HANDLE); - VMA_ASSERT(suballoc.hAllocation->CanBecomeLost()); - if(suballoc.hAllocation->MakeLost(currentFrameIndex, frameInUseCount)) - { - suballoc.type = VMA_SUBALLOCATION_TYPE_FREE; - suballoc.hAllocation = VK_NULL_HANDLE; - m_SumFreeSize += suballoc.size; - if(suballocations == &AccessSuballocations1st()) - { - ++m_1stNullItemsMiddleCount; - } - else - { - ++m_2ndNullItemsCount; - } - ++madeLostCount; - } - else - { - return false; - } - } - ++index; - } - - CleanupAfterFree(); - //VMA_HEAVY_ASSERT(Validate()); // Already called by ClanupAfterFree(). - - return true; -} - -uint32_t VmaBlockMetadata_Linear::MakeAllocationsLost(uint32_t currentFrameIndex, uint32_t frameInUseCount) -{ - uint32_t lostAllocationCount = 0; - - SuballocationVectorType& suballocations1st = AccessSuballocations1st(); - for(size_t i = m_1stNullItemsBeginCount, count = suballocations1st.size(); i < count; ++i) - { - VmaSuballocation& suballoc = suballocations1st[i]; - if(suballoc.type != VMA_SUBALLOCATION_TYPE_FREE && - suballoc.hAllocation->CanBecomeLost() && - suballoc.hAllocation->MakeLost(currentFrameIndex, frameInUseCount)) - { - suballoc.type = VMA_SUBALLOCATION_TYPE_FREE; - suballoc.hAllocation = VK_NULL_HANDLE; - ++m_1stNullItemsMiddleCount; - m_SumFreeSize += suballoc.size; - ++lostAllocationCount; - } - } - - SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); - for(size_t i = 0, count = suballocations2nd.size(); i < count; ++i) - { - VmaSuballocation& suballoc = suballocations2nd[i]; - if(suballoc.type != VMA_SUBALLOCATION_TYPE_FREE && - suballoc.hAllocation->CanBecomeLost() && - suballoc.hAllocation->MakeLost(currentFrameIndex, frameInUseCount)) - { - suballoc.type = VMA_SUBALLOCATION_TYPE_FREE; - suballoc.hAllocation = VK_NULL_HANDLE; - ++m_2ndNullItemsCount; - m_SumFreeSize += suballoc.size; - ++lostAllocationCount; - } - } - - if(lostAllocationCount) - { - CleanupAfterFree(); - } - - return lostAllocationCount; -} - -VkResult VmaBlockMetadata_Linear::CheckCorruption(const void* pBlockData) -{ - SuballocationVectorType& suballocations1st = AccessSuballocations1st(); - for(size_t i = m_1stNullItemsBeginCount, count = suballocations1st.size(); i < count; ++i) - { - const VmaSuballocation& suballoc = suballocations1st[i]; - if(suballoc.type != VMA_SUBALLOCATION_TYPE_FREE) - { - if(!VmaValidateMagicValue(pBlockData, suballoc.offset - VMA_DEBUG_MARGIN)) - { - VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED BEFORE VALIDATED ALLOCATION!"); - return VK_ERROR_VALIDATION_FAILED_EXT; - } - if(!VmaValidateMagicValue(pBlockData, suballoc.offset + suballoc.size)) - { - VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER VALIDATED ALLOCATION!"); - return VK_ERROR_VALIDATION_FAILED_EXT; - } - } - } - - SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); - for(size_t i = 0, count = suballocations2nd.size(); i < count; ++i) - { - const VmaSuballocation& suballoc = suballocations2nd[i]; - if(suballoc.type != VMA_SUBALLOCATION_TYPE_FREE) - { - if(!VmaValidateMagicValue(pBlockData, suballoc.offset - VMA_DEBUG_MARGIN)) - { - VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED BEFORE VALIDATED ALLOCATION!"); - return VK_ERROR_VALIDATION_FAILED_EXT; - } - if(!VmaValidateMagicValue(pBlockData, suballoc.offset + suballoc.size)) - { - VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER VALIDATED ALLOCATION!"); - return VK_ERROR_VALIDATION_FAILED_EXT; - } - } - } - - return VK_SUCCESS; -} - -void VmaBlockMetadata_Linear::Alloc( - const VmaAllocationRequest& request, - VmaSuballocationType type, - VkDeviceSize allocSize, - VmaAllocation hAllocation) -{ - const VmaSuballocation newSuballoc = { request.offset, allocSize, hAllocation, type }; - - switch(request.type) - { - case VmaAllocationRequestType::UpperAddress: - { - VMA_ASSERT(m_2ndVectorMode != SECOND_VECTOR_RING_BUFFER && - "CRITICAL ERROR: Trying to use linear allocator as double stack while it was already used as ring buffer."); - SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); - suballocations2nd.push_back(newSuballoc); - m_2ndVectorMode = SECOND_VECTOR_DOUBLE_STACK; - } - break; - case VmaAllocationRequestType::EndOf1st: - { - SuballocationVectorType& suballocations1st = AccessSuballocations1st(); - - VMA_ASSERT(suballocations1st.empty() || - request.offset >= suballocations1st.back().offset + suballocations1st.back().size); - // Check if it fits before the end of the block. - VMA_ASSERT(request.offset + allocSize <= GetSize()); - - suballocations1st.push_back(newSuballoc); - } - break; - case VmaAllocationRequestType::EndOf2nd: - { - SuballocationVectorType& suballocations1st = AccessSuballocations1st(); - // New allocation at the end of 2-part ring buffer, so before first allocation from 1st vector. - VMA_ASSERT(!suballocations1st.empty() && - request.offset + allocSize <= suballocations1st[m_1stNullItemsBeginCount].offset); - SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); - - switch(m_2ndVectorMode) - { - case SECOND_VECTOR_EMPTY: - // First allocation from second part ring buffer. - VMA_ASSERT(suballocations2nd.empty()); - m_2ndVectorMode = SECOND_VECTOR_RING_BUFFER; - break; - case SECOND_VECTOR_RING_BUFFER: - // 2-part ring buffer is already started. - VMA_ASSERT(!suballocations2nd.empty()); - break; - case SECOND_VECTOR_DOUBLE_STACK: - VMA_ASSERT(0 && "CRITICAL ERROR: Trying to use linear allocator as ring buffer while it was already used as double stack."); - break; - default: - VMA_ASSERT(0); - } - - suballocations2nd.push_back(newSuballoc); - } - break; - default: - VMA_ASSERT(0 && "CRITICAL INTERNAL ERROR."); - } - - m_SumFreeSize -= newSuballoc.size; -} - -void VmaBlockMetadata_Linear::Free(const VmaAllocation allocation) -{ - FreeAtOffset(allocation->GetOffset()); -} - -void VmaBlockMetadata_Linear::FreeAtOffset(VkDeviceSize offset) -{ - SuballocationVectorType& suballocations1st = AccessSuballocations1st(); - SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); - - if(!suballocations1st.empty()) - { - // First allocation: Mark it as next empty at the beginning. - VmaSuballocation& firstSuballoc = suballocations1st[m_1stNullItemsBeginCount]; - if(firstSuballoc.offset == offset) - { - firstSuballoc.type = VMA_SUBALLOCATION_TYPE_FREE; - firstSuballoc.hAllocation = VK_NULL_HANDLE; - m_SumFreeSize += firstSuballoc.size; - ++m_1stNullItemsBeginCount; - CleanupAfterFree(); - return; - } - } - - // Last allocation in 2-part ring buffer or top of upper stack (same logic). - if(m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER || - m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) - { - VmaSuballocation& lastSuballoc = suballocations2nd.back(); - if(lastSuballoc.offset == offset) - { - m_SumFreeSize += lastSuballoc.size; - suballocations2nd.pop_back(); - CleanupAfterFree(); - return; - } - } - // Last allocation in 1st vector. - else if(m_2ndVectorMode == SECOND_VECTOR_EMPTY) - { - VmaSuballocation& lastSuballoc = suballocations1st.back(); - if(lastSuballoc.offset == offset) - { - m_SumFreeSize += lastSuballoc.size; - suballocations1st.pop_back(); - CleanupAfterFree(); - return; - } - } - - // Item from the middle of 1st vector. - { - VmaSuballocation refSuballoc; - refSuballoc.offset = offset; - // Rest of members stays uninitialized intentionally for better performance. - SuballocationVectorType::iterator it = VmaBinaryFindSorted( - suballocations1st.begin() + m_1stNullItemsBeginCount, - suballocations1st.end(), - refSuballoc, - VmaSuballocationOffsetLess()); - if(it != suballocations1st.end()) - { - it->type = VMA_SUBALLOCATION_TYPE_FREE; - it->hAllocation = VK_NULL_HANDLE; - ++m_1stNullItemsMiddleCount; - m_SumFreeSize += it->size; - CleanupAfterFree(); - return; - } - } - - if(m_2ndVectorMode != SECOND_VECTOR_EMPTY) - { - // Item from the middle of 2nd vector. - VmaSuballocation refSuballoc; - refSuballoc.offset = offset; - // Rest of members stays uninitialized intentionally for better performance. - SuballocationVectorType::iterator it = m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER ? - VmaBinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, VmaSuballocationOffsetLess()) : - VmaBinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, VmaSuballocationOffsetGreater()); - if(it != suballocations2nd.end()) - { - it->type = VMA_SUBALLOCATION_TYPE_FREE; - it->hAllocation = VK_NULL_HANDLE; - ++m_2ndNullItemsCount; - m_SumFreeSize += it->size; - CleanupAfterFree(); - return; - } - } - - VMA_ASSERT(0 && "Allocation to free not found in linear allocator!"); -} - -bool VmaBlockMetadata_Linear::ShouldCompact1st() const -{ - const size_t nullItemCount = m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount; - const size_t suballocCount = AccessSuballocations1st().size(); - return suballocCount > 32 && nullItemCount * 2 >= (suballocCount - nullItemCount) * 3; -} - -void VmaBlockMetadata_Linear::CleanupAfterFree() -{ - SuballocationVectorType& suballocations1st = AccessSuballocations1st(); - SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); - - if(IsEmpty()) - { - suballocations1st.clear(); - suballocations2nd.clear(); - m_1stNullItemsBeginCount = 0; - m_1stNullItemsMiddleCount = 0; - m_2ndNullItemsCount = 0; - m_2ndVectorMode = SECOND_VECTOR_EMPTY; - } - else - { - const size_t suballoc1stCount = suballocations1st.size(); - const size_t nullItem1stCount = m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount; - VMA_ASSERT(nullItem1stCount <= suballoc1stCount); - - // Find more null items at the beginning of 1st vector. - while(m_1stNullItemsBeginCount < suballoc1stCount && - suballocations1st[m_1stNullItemsBeginCount].hAllocation == VK_NULL_HANDLE) - { - ++m_1stNullItemsBeginCount; - --m_1stNullItemsMiddleCount; - } - - // Find more null items at the end of 1st vector. - while(m_1stNullItemsMiddleCount > 0 && - suballocations1st.back().hAllocation == VK_NULL_HANDLE) - { - --m_1stNullItemsMiddleCount; - suballocations1st.pop_back(); - } - - // Find more null items at the end of 2nd vector. - while(m_2ndNullItemsCount > 0 && - suballocations2nd.back().hAllocation == VK_NULL_HANDLE) - { - --m_2ndNullItemsCount; - suballocations2nd.pop_back(); - } - - // Find more null items at the beginning of 2nd vector. - while(m_2ndNullItemsCount > 0 && - suballocations2nd[0].hAllocation == VK_NULL_HANDLE) - { - --m_2ndNullItemsCount; - VmaVectorRemove(suballocations2nd, 0); - } - - if(ShouldCompact1st()) - { - const size_t nonNullItemCount = suballoc1stCount - nullItem1stCount; - size_t srcIndex = m_1stNullItemsBeginCount; - for(size_t dstIndex = 0; dstIndex < nonNullItemCount; ++dstIndex) - { - while(suballocations1st[srcIndex].hAllocation == VK_NULL_HANDLE) - { - ++srcIndex; - } - if(dstIndex != srcIndex) - { - suballocations1st[dstIndex] = suballocations1st[srcIndex]; - } - ++srcIndex; - } - suballocations1st.resize(nonNullItemCount); - m_1stNullItemsBeginCount = 0; - m_1stNullItemsMiddleCount = 0; - } - - // 2nd vector became empty. - if(suballocations2nd.empty()) - { - m_2ndVectorMode = SECOND_VECTOR_EMPTY; - } - - // 1st vector became empty. - if(suballocations1st.size() - m_1stNullItemsBeginCount == 0) - { - suballocations1st.clear(); - m_1stNullItemsBeginCount = 0; - - if(!suballocations2nd.empty() && m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) - { - // Swap 1st with 2nd. Now 2nd is empty. - m_2ndVectorMode = SECOND_VECTOR_EMPTY; - m_1stNullItemsMiddleCount = m_2ndNullItemsCount; - while(m_1stNullItemsBeginCount < suballocations2nd.size() && - suballocations2nd[m_1stNullItemsBeginCount].hAllocation == VK_NULL_HANDLE) - { - ++m_1stNullItemsBeginCount; - --m_1stNullItemsMiddleCount; - } - m_2ndNullItemsCount = 0; - m_1stVectorIndex ^= 1; - } - } - } - - VMA_HEAVY_ASSERT(Validate()); -} - - -//////////////////////////////////////////////////////////////////////////////// -// class VmaBlockMetadata_Buddy - -VmaBlockMetadata_Buddy::VmaBlockMetadata_Buddy(VmaAllocator hAllocator) : - VmaBlockMetadata(hAllocator), - m_Root(VMA_NULL), - m_AllocationCount(0), - m_FreeCount(1), - m_SumFreeSize(0) -{ - memset(m_FreeList, 0, sizeof(m_FreeList)); -} - -VmaBlockMetadata_Buddy::~VmaBlockMetadata_Buddy() -{ - DeleteNode(m_Root); -} - -void VmaBlockMetadata_Buddy::Init(VkDeviceSize size) -{ - VmaBlockMetadata::Init(size); - - m_UsableSize = VmaPrevPow2(size); - m_SumFreeSize = m_UsableSize; - - // Calculate m_LevelCount. - m_LevelCount = 1; - while(m_LevelCount < MAX_LEVELS && - LevelToNodeSize(m_LevelCount) >= MIN_NODE_SIZE) - { - ++m_LevelCount; - } - - Node* rootNode = vma_new(GetAllocationCallbacks(), Node)(); - rootNode->offset = 0; - rootNode->type = Node::TYPE_FREE; - rootNode->parent = VMA_NULL; - rootNode->buddy = VMA_NULL; - - m_Root = rootNode; - AddToFreeListFront(0, rootNode); -} - -bool VmaBlockMetadata_Buddy::Validate() const -{ - // Validate tree. - ValidationContext ctx; - if(!ValidateNode(ctx, VMA_NULL, m_Root, 0, LevelToNodeSize(0))) - { - VMA_VALIDATE(false && "ValidateNode failed."); - } - VMA_VALIDATE(m_AllocationCount == ctx.calculatedAllocationCount); - VMA_VALIDATE(m_SumFreeSize == ctx.calculatedSumFreeSize); - - // Validate free node lists. - for(uint32_t level = 0; level < m_LevelCount; ++level) - { - VMA_VALIDATE(m_FreeList[level].front == VMA_NULL || - m_FreeList[level].front->free.prev == VMA_NULL); - - for(Node* node = m_FreeList[level].front; - node != VMA_NULL; - node = node->free.next) - { - VMA_VALIDATE(node->type == Node::TYPE_FREE); - - if(node->free.next == VMA_NULL) - { - VMA_VALIDATE(m_FreeList[level].back == node); - } - else - { - VMA_VALIDATE(node->free.next->free.prev == node); - } - } - } - - // Validate that free lists ar higher levels are empty. - for(uint32_t level = m_LevelCount; level < MAX_LEVELS; ++level) - { - VMA_VALIDATE(m_FreeList[level].front == VMA_NULL && m_FreeList[level].back == VMA_NULL); - } - - return true; -} - -VkDeviceSize VmaBlockMetadata_Buddy::GetUnusedRangeSizeMax() const -{ - for(uint32_t level = 0; level < m_LevelCount; ++level) - { - if(m_FreeList[level].front != VMA_NULL) - { - return LevelToNodeSize(level); - } - } - return 0; -} - -void VmaBlockMetadata_Buddy::CalcAllocationStatInfo(VmaStatInfo& outInfo) const -{ - const VkDeviceSize unusableSize = GetUnusableSize(); - - outInfo.blockCount = 1; - - outInfo.allocationCount = outInfo.unusedRangeCount = 0; - outInfo.usedBytes = outInfo.unusedBytes = 0; - - outInfo.allocationSizeMax = outInfo.unusedRangeSizeMax = 0; - outInfo.allocationSizeMin = outInfo.unusedRangeSizeMin = UINT64_MAX; - outInfo.allocationSizeAvg = outInfo.unusedRangeSizeAvg = 0; // Unused. - - CalcAllocationStatInfoNode(outInfo, m_Root, LevelToNodeSize(0)); - - if(unusableSize > 0) - { - ++outInfo.unusedRangeCount; - outInfo.unusedBytes += unusableSize; - outInfo.unusedRangeSizeMax = VMA_MAX(outInfo.unusedRangeSizeMax, unusableSize); - outInfo.unusedRangeSizeMin = VMA_MIN(outInfo.unusedRangeSizeMin, unusableSize); - } -} - -void VmaBlockMetadata_Buddy::AddPoolStats(VmaPoolStats& inoutStats) const -{ - const VkDeviceSize unusableSize = GetUnusableSize(); - - inoutStats.size += GetSize(); - inoutStats.unusedSize += m_SumFreeSize + unusableSize; - inoutStats.allocationCount += m_AllocationCount; - inoutStats.unusedRangeCount += m_FreeCount; - inoutStats.unusedRangeSizeMax = VMA_MAX(inoutStats.unusedRangeSizeMax, GetUnusedRangeSizeMax()); - - if(unusableSize > 0) - { - ++inoutStats.unusedRangeCount; - // Not updating inoutStats.unusedRangeSizeMax with unusableSize because this space is not available for allocations. - } -} - -#if VMA_STATS_STRING_ENABLED - -void VmaBlockMetadata_Buddy::PrintDetailedMap(class VmaJsonWriter& json) const -{ - // TODO optimize - VmaStatInfo stat; - CalcAllocationStatInfo(stat); - - PrintDetailedMap_Begin( - json, - stat.unusedBytes, - stat.allocationCount, - stat.unusedRangeCount); - - PrintDetailedMapNode(json, m_Root, LevelToNodeSize(0)); - - const VkDeviceSize unusableSize = GetUnusableSize(); - if(unusableSize > 0) - { - PrintDetailedMap_UnusedRange(json, - m_UsableSize, // offset - unusableSize); // size - } - - PrintDetailedMap_End(json); -} - -#endif // #if VMA_STATS_STRING_ENABLED - -bool VmaBlockMetadata_Buddy::CreateAllocationRequest( - uint32_t currentFrameIndex, - uint32_t frameInUseCount, - VkDeviceSize bufferImageGranularity, - VkDeviceSize allocSize, - VkDeviceSize allocAlignment, - bool upperAddress, - VmaSuballocationType allocType, - bool canMakeOtherLost, - uint32_t strategy, - VmaAllocationRequest* pAllocationRequest) -{ - VMA_ASSERT(!upperAddress && "VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT can be used only with linear algorithm."); - - // Simple way to respect bufferImageGranularity. May be optimized some day. - // Whenever it might be an OPTIMAL image... - if(allocType == VMA_SUBALLOCATION_TYPE_UNKNOWN || - allocType == VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN || - allocType == VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL) - { - allocAlignment = VMA_MAX(allocAlignment, bufferImageGranularity); - allocSize = VMA_MAX(allocSize, bufferImageGranularity); - } - - if(allocSize > m_UsableSize) - { - return false; - } - - const uint32_t targetLevel = AllocSizeToLevel(allocSize); - for(uint32_t level = targetLevel + 1; level--; ) - { - for(Node* freeNode = m_FreeList[level].front; - freeNode != VMA_NULL; - freeNode = freeNode->free.next) - { - if(freeNode->offset % allocAlignment == 0) - { - pAllocationRequest->type = VmaAllocationRequestType::Normal; - pAllocationRequest->offset = freeNode->offset; - pAllocationRequest->sumFreeSize = LevelToNodeSize(level); - pAllocationRequest->sumItemSize = 0; - pAllocationRequest->itemsToMakeLostCount = 0; - pAllocationRequest->customData = (void*)(uintptr_t)level; - return true; - } - } - } - - return false; -} - -bool VmaBlockMetadata_Buddy::MakeRequestedAllocationsLost( - uint32_t currentFrameIndex, - uint32_t frameInUseCount, - VmaAllocationRequest* pAllocationRequest) -{ - /* - Lost allocations are not supported in buddy allocator at the moment. - Support might be added in the future. - */ - return pAllocationRequest->itemsToMakeLostCount == 0; -} - -uint32_t VmaBlockMetadata_Buddy::MakeAllocationsLost(uint32_t currentFrameIndex, uint32_t frameInUseCount) -{ - /* - Lost allocations are not supported in buddy allocator at the moment. - Support might be added in the future. - */ - return 0; -} - -void VmaBlockMetadata_Buddy::Alloc( - const VmaAllocationRequest& request, - VmaSuballocationType type, - VkDeviceSize allocSize, - VmaAllocation hAllocation) -{ - VMA_ASSERT(request.type == VmaAllocationRequestType::Normal); - - const uint32_t targetLevel = AllocSizeToLevel(allocSize); - uint32_t currLevel = (uint32_t)(uintptr_t)request.customData; - - Node* currNode = m_FreeList[currLevel].front; - VMA_ASSERT(currNode != VMA_NULL && currNode->type == Node::TYPE_FREE); - while(currNode->offset != request.offset) - { - currNode = currNode->free.next; - VMA_ASSERT(currNode != VMA_NULL && currNode->type == Node::TYPE_FREE); - } - - // Go down, splitting free nodes. - while(currLevel < targetLevel) - { - // currNode is already first free node at currLevel. - // Remove it from list of free nodes at this currLevel. - RemoveFromFreeList(currLevel, currNode); - - const uint32_t childrenLevel = currLevel + 1; - - // Create two free sub-nodes. - Node* leftChild = vma_new(GetAllocationCallbacks(), Node)(); - Node* rightChild = vma_new(GetAllocationCallbacks(), Node)(); - - leftChild->offset = currNode->offset; - leftChild->type = Node::TYPE_FREE; - leftChild->parent = currNode; - leftChild->buddy = rightChild; - - rightChild->offset = currNode->offset + LevelToNodeSize(childrenLevel); - rightChild->type = Node::TYPE_FREE; - rightChild->parent = currNode; - rightChild->buddy = leftChild; - - // Convert current currNode to split type. - currNode->type = Node::TYPE_SPLIT; - currNode->split.leftChild = leftChild; - - // Add child nodes to free list. Order is important! - AddToFreeListFront(childrenLevel, rightChild); - AddToFreeListFront(childrenLevel, leftChild); - - ++m_FreeCount; - //m_SumFreeSize -= LevelToNodeSize(currLevel) % 2; // Useful only when level node sizes can be non power of 2. - ++currLevel; - currNode = m_FreeList[currLevel].front; - - /* - We can be sure that currNode, as left child of node previously split, - also fullfills the alignment requirement. - */ - } - - // Remove from free list. - VMA_ASSERT(currLevel == targetLevel && - currNode != VMA_NULL && - currNode->type == Node::TYPE_FREE); - RemoveFromFreeList(currLevel, currNode); - - // Convert to allocation node. - currNode->type = Node::TYPE_ALLOCATION; - currNode->allocation.alloc = hAllocation; - - ++m_AllocationCount; - --m_FreeCount; - m_SumFreeSize -= allocSize; -} - -void VmaBlockMetadata_Buddy::DeleteNode(Node* node) -{ - if(node->type == Node::TYPE_SPLIT) - { - DeleteNode(node->split.leftChild->buddy); - DeleteNode(node->split.leftChild); - } - - vma_delete(GetAllocationCallbacks(), node); -} - -bool VmaBlockMetadata_Buddy::ValidateNode(ValidationContext& ctx, const Node* parent, const Node* curr, uint32_t level, VkDeviceSize levelNodeSize) const -{ - VMA_VALIDATE(level < m_LevelCount); - VMA_VALIDATE(curr->parent == parent); - VMA_VALIDATE((curr->buddy == VMA_NULL) == (parent == VMA_NULL)); - VMA_VALIDATE(curr->buddy == VMA_NULL || curr->buddy->buddy == curr); - switch(curr->type) - { - case Node::TYPE_FREE: - // curr->free.prev, next are validated separately. - ctx.calculatedSumFreeSize += levelNodeSize; - ++ctx.calculatedFreeCount; - break; - case Node::TYPE_ALLOCATION: - ++ctx.calculatedAllocationCount; - ctx.calculatedSumFreeSize += levelNodeSize - curr->allocation.alloc->GetSize(); - VMA_VALIDATE(curr->allocation.alloc != VK_NULL_HANDLE); - break; - case Node::TYPE_SPLIT: - { - const uint32_t childrenLevel = level + 1; - const VkDeviceSize childrenLevelNodeSize = levelNodeSize / 2; - const Node* const leftChild = curr->split.leftChild; - VMA_VALIDATE(leftChild != VMA_NULL); - VMA_VALIDATE(leftChild->offset == curr->offset); - if(!ValidateNode(ctx, curr, leftChild, childrenLevel, childrenLevelNodeSize)) - { - VMA_VALIDATE(false && "ValidateNode for left child failed."); - } - const Node* const rightChild = leftChild->buddy; - VMA_VALIDATE(rightChild->offset == curr->offset + childrenLevelNodeSize); - if(!ValidateNode(ctx, curr, rightChild, childrenLevel, childrenLevelNodeSize)) - { - VMA_VALIDATE(false && "ValidateNode for right child failed."); - } - } - break; - default: - return false; - } - - return true; -} - -uint32_t VmaBlockMetadata_Buddy::AllocSizeToLevel(VkDeviceSize allocSize) const -{ - // I know this could be optimized somehow e.g. by using std::log2p1 from C++20. - uint32_t level = 0; - VkDeviceSize currLevelNodeSize = m_UsableSize; - VkDeviceSize nextLevelNodeSize = currLevelNodeSize >> 1; - while(allocSize <= nextLevelNodeSize && level + 1 < m_LevelCount) - { - ++level; - currLevelNodeSize = nextLevelNodeSize; - nextLevelNodeSize = currLevelNodeSize >> 1; - } - return level; -} - -void VmaBlockMetadata_Buddy::FreeAtOffset(VmaAllocation alloc, VkDeviceSize offset) -{ - // Find node and level. - Node* node = m_Root; - VkDeviceSize nodeOffset = 0; - uint32_t level = 0; - VkDeviceSize levelNodeSize = LevelToNodeSize(0); - while(node->type == Node::TYPE_SPLIT) - { - const VkDeviceSize nextLevelSize = levelNodeSize >> 1; - if(offset < nodeOffset + nextLevelSize) - { - node = node->split.leftChild; - } - else - { - node = node->split.leftChild->buddy; - nodeOffset += nextLevelSize; - } - ++level; - levelNodeSize = nextLevelSize; - } - - VMA_ASSERT(node != VMA_NULL && node->type == Node::TYPE_ALLOCATION); - VMA_ASSERT(alloc == VK_NULL_HANDLE || node->allocation.alloc == alloc); - - ++m_FreeCount; - --m_AllocationCount; - m_SumFreeSize += alloc->GetSize(); - - node->type = Node::TYPE_FREE; - - // Join free nodes if possible. - while(level > 0 && node->buddy->type == Node::TYPE_FREE) - { - RemoveFromFreeList(level, node->buddy); - Node* const parent = node->parent; - - vma_delete(GetAllocationCallbacks(), node->buddy); - vma_delete(GetAllocationCallbacks(), node); - parent->type = Node::TYPE_FREE; - - node = parent; - --level; - //m_SumFreeSize += LevelToNodeSize(level) % 2; // Useful only when level node sizes can be non power of 2. - --m_FreeCount; - } - - AddToFreeListFront(level, node); -} - -void VmaBlockMetadata_Buddy::CalcAllocationStatInfoNode(VmaStatInfo& outInfo, const Node* node, VkDeviceSize levelNodeSize) const -{ - switch(node->type) - { - case Node::TYPE_FREE: - ++outInfo.unusedRangeCount; - outInfo.unusedBytes += levelNodeSize; - outInfo.unusedRangeSizeMax = VMA_MAX(outInfo.unusedRangeSizeMax, levelNodeSize); - outInfo.unusedRangeSizeMin = VMA_MAX(outInfo.unusedRangeSizeMin, levelNodeSize); - break; - case Node::TYPE_ALLOCATION: - { - const VkDeviceSize allocSize = node->allocation.alloc->GetSize(); - ++outInfo.allocationCount; - outInfo.usedBytes += allocSize; - outInfo.allocationSizeMax = VMA_MAX(outInfo.allocationSizeMax, allocSize); - outInfo.allocationSizeMin = VMA_MAX(outInfo.allocationSizeMin, allocSize); - - const VkDeviceSize unusedRangeSize = levelNodeSize - allocSize; - if(unusedRangeSize > 0) - { - ++outInfo.unusedRangeCount; - outInfo.unusedBytes += unusedRangeSize; - outInfo.unusedRangeSizeMax = VMA_MAX(outInfo.unusedRangeSizeMax, unusedRangeSize); - outInfo.unusedRangeSizeMin = VMA_MAX(outInfo.unusedRangeSizeMin, unusedRangeSize); - } - } - break; - case Node::TYPE_SPLIT: - { - const VkDeviceSize childrenNodeSize = levelNodeSize / 2; - const Node* const leftChild = node->split.leftChild; - CalcAllocationStatInfoNode(outInfo, leftChild, childrenNodeSize); - const Node* const rightChild = leftChild->buddy; - CalcAllocationStatInfoNode(outInfo, rightChild, childrenNodeSize); - } - break; - default: - VMA_ASSERT(0); - } -} - -void VmaBlockMetadata_Buddy::AddToFreeListFront(uint32_t level, Node* node) -{ - VMA_ASSERT(node->type == Node::TYPE_FREE); - - // List is empty. - Node* const frontNode = m_FreeList[level].front; - if(frontNode == VMA_NULL) - { - VMA_ASSERT(m_FreeList[level].back == VMA_NULL); - node->free.prev = node->free.next = VMA_NULL; - m_FreeList[level].front = m_FreeList[level].back = node; - } - else - { - VMA_ASSERT(frontNode->free.prev == VMA_NULL); - node->free.prev = VMA_NULL; - node->free.next = frontNode; - frontNode->free.prev = node; - m_FreeList[level].front = node; - } -} - -void VmaBlockMetadata_Buddy::RemoveFromFreeList(uint32_t level, Node* node) -{ - VMA_ASSERT(m_FreeList[level].front != VMA_NULL); - - // It is at the front. - if(node->free.prev == VMA_NULL) - { - VMA_ASSERT(m_FreeList[level].front == node); - m_FreeList[level].front = node->free.next; - } - else - { - Node* const prevFreeNode = node->free.prev; - VMA_ASSERT(prevFreeNode->free.next == node); - prevFreeNode->free.next = node->free.next; - } - - // It is at the back. - if(node->free.next == VMA_NULL) - { - VMA_ASSERT(m_FreeList[level].back == node); - m_FreeList[level].back = node->free.prev; - } - else - { - Node* const nextFreeNode = node->free.next; - VMA_ASSERT(nextFreeNode->free.prev == node); - nextFreeNode->free.prev = node->free.prev; - } -} - -#if VMA_STATS_STRING_ENABLED -void VmaBlockMetadata_Buddy::PrintDetailedMapNode(class VmaJsonWriter& json, const Node* node, VkDeviceSize levelNodeSize) const -{ - switch(node->type) - { - case Node::TYPE_FREE: - PrintDetailedMap_UnusedRange(json, node->offset, levelNodeSize); - break; - case Node::TYPE_ALLOCATION: - { - PrintDetailedMap_Allocation(json, node->offset, node->allocation.alloc); - const VkDeviceSize allocSize = node->allocation.alloc->GetSize(); - if(allocSize < levelNodeSize) - { - PrintDetailedMap_UnusedRange(json, node->offset + allocSize, levelNodeSize - allocSize); - } - } - break; - case Node::TYPE_SPLIT: - { - const VkDeviceSize childrenNodeSize = levelNodeSize / 2; - const Node* const leftChild = node->split.leftChild; - PrintDetailedMapNode(json, leftChild, childrenNodeSize); - const Node* const rightChild = leftChild->buddy; - PrintDetailedMapNode(json, rightChild, childrenNodeSize); - } - break; - default: - VMA_ASSERT(0); - } -} -#endif // #if VMA_STATS_STRING_ENABLED - - -//////////////////////////////////////////////////////////////////////////////// -// class VmaDeviceMemoryBlock - -VmaDeviceMemoryBlock::VmaDeviceMemoryBlock(VmaAllocator hAllocator) : - m_pMetadata(VMA_NULL), - m_MemoryTypeIndex(UINT32_MAX), - m_Id(0), - m_hMemory(VK_NULL_HANDLE), - m_MapCount(0), - m_pMappedData(VMA_NULL) -{ -} - -void VmaDeviceMemoryBlock::Init( - VmaAllocator hAllocator, - VmaPool hParentPool, - uint32_t newMemoryTypeIndex, - VkDeviceMemory newMemory, - VkDeviceSize newSize, - uint32_t id, - uint32_t algorithm) -{ - VMA_ASSERT(m_hMemory == VK_NULL_HANDLE); - - m_hParentPool = hParentPool; - m_MemoryTypeIndex = newMemoryTypeIndex; - m_Id = id; - m_hMemory = newMemory; - - switch(algorithm) - { - case VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT: - m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_Linear)(hAllocator); - break; - case VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT: - m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_Buddy)(hAllocator); - break; - default: - VMA_ASSERT(0); - // Fall-through. - case 0: - m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_Generic)(hAllocator); - } - m_pMetadata->Init(newSize); -} - -void VmaDeviceMemoryBlock::Destroy(VmaAllocator allocator) -{ - // This is the most important assert in the entire library. - // Hitting it means you have some memory leak - unreleased VmaAllocation objects. - VMA_ASSERT(m_pMetadata->IsEmpty() && "Some allocations were not freed before destruction of this memory block!"); - - VMA_ASSERT(m_hMemory != VK_NULL_HANDLE); - allocator->FreeVulkanMemory(m_MemoryTypeIndex, m_pMetadata->GetSize(), m_hMemory); - m_hMemory = VK_NULL_HANDLE; - - vma_delete(allocator, m_pMetadata); - m_pMetadata = VMA_NULL; -} - -bool VmaDeviceMemoryBlock::Validate() const -{ - VMA_VALIDATE((m_hMemory != VK_NULL_HANDLE) && - (m_pMetadata->GetSize() != 0)); - - return m_pMetadata->Validate(); -} - -VkResult VmaDeviceMemoryBlock::CheckCorruption(VmaAllocator hAllocator) -{ - void* pData = nullptr; - VkResult res = Map(hAllocator, 1, &pData); - if(res != VK_SUCCESS) - { - return res; - } - - res = m_pMetadata->CheckCorruption(pData); - - Unmap(hAllocator, 1); - - return res; -} - -VkResult VmaDeviceMemoryBlock::Map(VmaAllocator hAllocator, uint32_t count, void** ppData) -{ - if(count == 0) - { - return VK_SUCCESS; - } - - VmaMutexLock lock(m_Mutex, hAllocator->m_UseMutex); - if(m_MapCount != 0) - { - m_MapCount += count; - VMA_ASSERT(m_pMappedData != VMA_NULL); - if(ppData != VMA_NULL) - { - *ppData = m_pMappedData; - } - return VK_SUCCESS; - } - else - { - VkResult result = (*hAllocator->GetVulkanFunctions().vkMapMemory)( - hAllocator->m_hDevice, - m_hMemory, - 0, // offset - VK_WHOLE_SIZE, - 0, // flags - &m_pMappedData); - if(result == VK_SUCCESS) - { - if(ppData != VMA_NULL) - { - *ppData = m_pMappedData; - } - m_MapCount = count; - } - return result; - } -} - -void VmaDeviceMemoryBlock::Unmap(VmaAllocator hAllocator, uint32_t count) -{ - if(count == 0) - { - return; - } - - VmaMutexLock lock(m_Mutex, hAllocator->m_UseMutex); - if(m_MapCount >= count) - { - m_MapCount -= count; - if(m_MapCount == 0) - { - m_pMappedData = VMA_NULL; - (*hAllocator->GetVulkanFunctions().vkUnmapMemory)(hAllocator->m_hDevice, m_hMemory); - } - } - else - { - VMA_ASSERT(0 && "VkDeviceMemory block is being unmapped while it was not previously mapped."); - } -} - -VkResult VmaDeviceMemoryBlock::WriteMagicValueAroundAllocation(VmaAllocator hAllocator, VkDeviceSize allocOffset, VkDeviceSize allocSize) -{ - VMA_ASSERT(VMA_DEBUG_MARGIN > 0 && VMA_DEBUG_MARGIN % 4 == 0 && VMA_DEBUG_DETECT_CORRUPTION); - VMA_ASSERT(allocOffset >= VMA_DEBUG_MARGIN); - - void* pData; - VkResult res = Map(hAllocator, 1, &pData); - if(res != VK_SUCCESS) - { - return res; - } - - VmaWriteMagicValue(pData, allocOffset - VMA_DEBUG_MARGIN); - VmaWriteMagicValue(pData, allocOffset + allocSize); - - Unmap(hAllocator, 1); - - return VK_SUCCESS; -} - -VkResult VmaDeviceMemoryBlock::ValidateMagicValueAroundAllocation(VmaAllocator hAllocator, VkDeviceSize allocOffset, VkDeviceSize allocSize) -{ - VMA_ASSERT(VMA_DEBUG_MARGIN > 0 && VMA_DEBUG_MARGIN % 4 == 0 && VMA_DEBUG_DETECT_CORRUPTION); - VMA_ASSERT(allocOffset >= VMA_DEBUG_MARGIN); - - void* pData; - VkResult res = Map(hAllocator, 1, &pData); - if(res != VK_SUCCESS) - { - return res; - } - - if(!VmaValidateMagicValue(pData, allocOffset - VMA_DEBUG_MARGIN)) + if (m_pUserData != VMA_NULL) { - VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED BEFORE FREED ALLOCATION!"); + json.WriteString("CustomData"); + json.BeginString(); + json.ContinueString_Pointer(m_pUserData); + json.EndString(); } - else if(!VmaValidateMagicValue(pData, allocOffset + allocSize)) + if (m_pName != VMA_NULL) { - VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER FREED ALLOCATION!"); + json.WriteString("Name"); + json.WriteString(m_pName); } - - Unmap(hAllocator, 1); - - return VK_SUCCESS; -} - -VkResult VmaDeviceMemoryBlock::BindBufferMemory( - const VmaAllocator hAllocator, - const VmaAllocation hAllocation, - VkDeviceSize allocationLocalOffset, - VkBuffer hBuffer, - const void* pNext) -{ - VMA_ASSERT(hAllocation->GetType() == VmaAllocation_T::ALLOCATION_TYPE_BLOCK && - hAllocation->GetBlock() == this); - VMA_ASSERT(allocationLocalOffset < hAllocation->GetSize() && - "Invalid allocationLocalOffset. Did you forget that this offset is relative to the beginning of the allocation, not the whole memory block?"); - const VkDeviceSize memoryOffset = hAllocation->GetOffset() + allocationLocalOffset; - // This lock is important so that we don't call vkBind... and/or vkMap... simultaneously on the same VkDeviceMemory from multiple threads. - VmaMutexLock lock(m_Mutex, hAllocator->m_UseMutex); - return hAllocator->BindVulkanBuffer(m_hMemory, memoryOffset, hBuffer, pNext); -} - -VkResult VmaDeviceMemoryBlock::BindImageMemory( - const VmaAllocator hAllocator, - const VmaAllocation hAllocation, - VkDeviceSize allocationLocalOffset, - VkImage hImage, - const void* pNext) -{ - VMA_ASSERT(hAllocation->GetType() == VmaAllocation_T::ALLOCATION_TYPE_BLOCK && - hAllocation->GetBlock() == this); - VMA_ASSERT(allocationLocalOffset < hAllocation->GetSize() && - "Invalid allocationLocalOffset. Did you forget that this offset is relative to the beginning of the allocation, not the whole memory block?"); - const VkDeviceSize memoryOffset = hAllocation->GetOffset() + allocationLocalOffset; - // This lock is important so that we don't call vkBind... and/or vkMap... simultaneously on the same VkDeviceMemory from multiple threads. - VmaMutexLock lock(m_Mutex, hAllocator->m_UseMutex); - return hAllocator->BindVulkanImage(m_hMemory, memoryOffset, hImage, pNext); -} - -static void InitStatInfo(VmaStatInfo& outInfo) -{ - memset(&outInfo, 0, sizeof(outInfo)); - outInfo.allocationSizeMin = UINT64_MAX; - outInfo.unusedRangeSizeMin = UINT64_MAX; -} - -// Adds statistics srcInfo into inoutInfo, like: inoutInfo += srcInfo. -static void VmaAddStatInfo(VmaStatInfo& inoutInfo, const VmaStatInfo& srcInfo) -{ - inoutInfo.blockCount += srcInfo.blockCount; - inoutInfo.allocationCount += srcInfo.allocationCount; - inoutInfo.unusedRangeCount += srcInfo.unusedRangeCount; - inoutInfo.usedBytes += srcInfo.usedBytes; - inoutInfo.unusedBytes += srcInfo.unusedBytes; - inoutInfo.allocationSizeMin = VMA_MIN(inoutInfo.allocationSizeMin, srcInfo.allocationSizeMin); - inoutInfo.allocationSizeMax = VMA_MAX(inoutInfo.allocationSizeMax, srcInfo.allocationSizeMax); - inoutInfo.unusedRangeSizeMin = VMA_MIN(inoutInfo.unusedRangeSizeMin, srcInfo.unusedRangeSizeMin); - inoutInfo.unusedRangeSizeMax = VMA_MAX(inoutInfo.unusedRangeSizeMax, srcInfo.unusedRangeSizeMax); -} - -static void VmaPostprocessCalcStatInfo(VmaStatInfo& inoutInfo) -{ - inoutInfo.allocationSizeAvg = (inoutInfo.allocationCount > 0) ? - VmaRoundDiv(inoutInfo.usedBytes, inoutInfo.allocationCount) : 0; - inoutInfo.unusedRangeSizeAvg = (inoutInfo.unusedRangeCount > 0) ? - VmaRoundDiv(inoutInfo.unusedBytes, inoutInfo.unusedRangeCount) : 0; -} - -VmaPool_T::VmaPool_T( - VmaAllocator hAllocator, - const VmaPoolCreateInfo& createInfo, - VkDeviceSize preferredBlockSize) : - m_BlockVector( - hAllocator, - this, // hParentPool - createInfo.memoryTypeIndex, - createInfo.blockSize != 0 ? createInfo.blockSize : preferredBlockSize, - createInfo.minBlockCount, - createInfo.maxBlockCount, - (createInfo.flags & VMA_POOL_CREATE_IGNORE_BUFFER_IMAGE_GRANULARITY_BIT) != 0 ? 1 : hAllocator->GetBufferImageGranularity(), - createInfo.frameInUseCount, - createInfo.blockSize != 0, // explicitBlockSize - createInfo.flags & VMA_POOL_CREATE_ALGORITHM_MASK, - createInfo.priority), // algorithm - m_Id(0), - m_Name(VMA_NULL) -{ } +#endif // VMA_STATS_STRING_ENABLED -VmaPool_T::~VmaPool_T() +void VmaAllocation_T::FreeName(VmaAllocator hAllocator) { -} - -void VmaPool_T::SetName(const char* pName) -{ - const VkAllocationCallbacks* allocs = m_BlockVector.GetAllocator()->GetAllocationCallbacks(); - VmaFreeString(allocs, m_Name); - - if(pName != VMA_NULL) + if(m_pName) { - m_Name = VmaCreateStringCopy(allocs, pName); - } - else - { - m_Name = VMA_NULL; + VmaFreeString(hAllocator->GetAllocationCallbacks(), m_pName); + m_pName = VMA_NULL; } } +#endif // _VMA_ALLOCATION_T_FUNCTIONS -#if VMA_STATS_STRING_ENABLED - -#endif // #if VMA_STATS_STRING_ENABLED - +#ifndef _VMA_BLOCK_VECTOR_FUNCTIONS VmaBlockVector::VmaBlockVector( VmaAllocator hAllocator, VmaPool hParentPool, @@ -12643,30 +11002,29 @@ VmaBlockVector::VmaBlockVector( size_t minBlockCount, size_t maxBlockCount, VkDeviceSize bufferImageGranularity, - uint32_t frameInUseCount, bool explicitBlockSize, uint32_t algorithm, - float priority) : - m_hAllocator(hAllocator), + float priority, + VkDeviceSize minAllocationAlignment, + void* pMemoryAllocateNext) + : m_hAllocator(hAllocator), m_hParentPool(hParentPool), m_MemoryTypeIndex(memoryTypeIndex), m_PreferredBlockSize(preferredBlockSize), m_MinBlockCount(minBlockCount), m_MaxBlockCount(maxBlockCount), m_BufferImageGranularity(bufferImageGranularity), - m_FrameInUseCount(frameInUseCount), m_ExplicitBlockSize(explicitBlockSize), m_Algorithm(algorithm), m_Priority(priority), - m_HasEmptyBlock(false), + m_MinAllocationAlignment(minAllocationAlignment), + m_pMemoryAllocateNext(pMemoryAllocateNext), m_Blocks(VmaStlAllocator(hAllocator->GetAllocationCallbacks())), - m_NextBlockId(0) -{ -} + m_NextBlockId(0) {} VmaBlockVector::~VmaBlockVector() { - for(size_t i = m_Blocks.size(); i--; ) + for (size_t i = m_Blocks.size(); i--; ) { m_Blocks[i]->Destroy(m_hAllocator); vma_delete(m_hAllocator, m_Blocks[i]); @@ -12675,10 +11033,10 @@ VmaBlockVector::~VmaBlockVector() VkResult VmaBlockVector::CreateMinBlocks() { - for(size_t i = 0; i < m_MinBlockCount; ++i) + for (size_t i = 0; i < m_MinBlockCount; ++i) { VkResult res = CreateBlock(m_PreferredBlockSize, VMA_NULL); - if(res != VK_SUCCESS) + if (res != VK_SUCCESS) { return res; } @@ -12686,25 +11044,31 @@ VkResult VmaBlockVector::CreateMinBlocks() return VK_SUCCESS; } -void VmaBlockVector::GetPoolStats(VmaPoolStats* pStats) +void VmaBlockVector::AddStatistics(VmaStatistics& inoutStats) { VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex); const size_t blockCount = m_Blocks.size(); - - pStats->size = 0; - pStats->unusedSize = 0; - pStats->allocationCount = 0; - pStats->unusedRangeCount = 0; - pStats->unusedRangeSizeMax = 0; - pStats->blockCount = blockCount; - - for(uint32_t blockIndex = 0; blockIndex < blockCount; ++blockIndex) + for (uint32_t blockIndex = 0; blockIndex < blockCount; ++blockIndex) { const VmaDeviceMemoryBlock* const pBlock = m_Blocks[blockIndex]; VMA_ASSERT(pBlock); VMA_HEAVY_ASSERT(pBlock->Validate()); - pBlock->m_pMetadata->AddPoolStats(*pStats); + pBlock->m_pMetadata->AddStatistics(inoutStats); + } +} + +void VmaBlockVector::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) +{ + VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex); + + const size_t blockCount = m_Blocks.size(); + for (uint32_t blockIndex = 0; blockIndex < blockCount; ++blockIndex) + { + const VmaDeviceMemoryBlock* const pBlock = m_Blocks[blockIndex]; + VMA_ASSERT(pBlock); + VMA_HEAVY_ASSERT(pBlock->Validate()); + pBlock->m_pMetadata->AddDetailedStatistics(inoutStats); } } @@ -12723,10 +11087,7 @@ bool VmaBlockVector::IsCorruptionDetectionEnabled() const (m_hAllocator->m_MemProps.memoryTypes[m_MemoryTypeIndex].propertyFlags & requiredMemFlags) == requiredMemFlags; } -static const uint32_t VMA_ALLOCATION_TRY_COUNT = 32; - VkResult VmaBlockVector::Allocate( - uint32_t currentFrameIndex, VkDeviceSize size, VkDeviceSize alignment, const VmaAllocationCreateInfo& createInfo, @@ -12737,7 +11098,9 @@ VkResult VmaBlockVector::Allocate( size_t allocIndex; VkResult res = VK_SUCCESS; - if(IsCorruptionDetectionEnabled()) + alignment = VMA_MAX(alignment, m_MinAllocationAlignment); + + if (IsCorruptionDetectionEnabled()) { size = VmaAlignUp(size, sizeof(VMA_CORRUPTION_DETECTION_MAGIC_VALUE)); alignment = VmaAlignUp(alignment, sizeof(VMA_CORRUPTION_DETECTION_MAGIC_VALUE)); @@ -12745,29 +11108,26 @@ VkResult VmaBlockVector::Allocate( { VmaMutexLockWrite lock(m_Mutex, m_hAllocator->m_UseMutex); - for(allocIndex = 0; allocIndex < allocationCount; ++allocIndex) + for (allocIndex = 0; allocIndex < allocationCount; ++allocIndex) { res = AllocatePage( - currentFrameIndex, size, alignment, createInfo, suballocType, pAllocations + allocIndex); - if(res != VK_SUCCESS) + if (res != VK_SUCCESS) { break; } } } - if(res != VK_SUCCESS) + if (res != VK_SUCCESS) { // Free all already created allocations. - while(allocIndex--) - { + while (allocIndex--) Free(pAllocations[allocIndex]); - } memset(pAllocations, 0, sizeof(VmaAllocation) * allocationCount); } @@ -12775,7 +11135,6 @@ VkResult VmaBlockVector::Allocate( } VkResult VmaBlockVector::AllocatePage( - uint32_t currentFrameIndex, VkDeviceSize size, VkDeviceSize alignment, const VmaAllocationCreateInfo& createInfo, @@ -12783,377 +11142,203 @@ VkResult VmaBlockVector::AllocatePage( VmaAllocation* pAllocation) { const bool isUpperAddress = (createInfo.flags & VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT) != 0; - bool canMakeOtherLost = (createInfo.flags & VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT) != 0; - const bool mapped = (createInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0; - const bool isUserDataString = (createInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0; VkDeviceSize freeMemory; { const uint32_t heapIndex = m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex); VmaBudget heapBudget = {}; - m_hAllocator->GetBudget(&heapBudget, heapIndex, 1); + m_hAllocator->GetHeapBudgets(&heapBudget, heapIndex, 1); freeMemory = (heapBudget.usage < heapBudget.budget) ? (heapBudget.budget - heapBudget.usage) : 0; } - const bool canFallbackToDedicated = !IsCustomPool(); + const bool canFallbackToDedicated = !HasExplicitBlockSize() && + (createInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) == 0; const bool canCreateNewBlock = ((createInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) == 0) && (m_Blocks.size() < m_MaxBlockCount) && (freeMemory >= size || !canFallbackToDedicated); uint32_t strategy = createInfo.flags & VMA_ALLOCATION_CREATE_STRATEGY_MASK; - // If linearAlgorithm is used, canMakeOtherLost is available only when used as ring buffer. - // Which in turn is available only when maxBlockCount = 1. - if(m_Algorithm == VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT && m_MaxBlockCount > 1) - { - canMakeOtherLost = false; - } - // Upper address can only be used with linear allocator and within single memory block. - if(isUpperAddress && + if (isUpperAddress && (m_Algorithm != VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT || m_MaxBlockCount > 1)) { return VK_ERROR_FEATURE_NOT_PRESENT; } - // Validate strategy. - switch(strategy) - { - case 0: - strategy = VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT; - break; - case VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT: - case VMA_ALLOCATION_CREATE_STRATEGY_WORST_FIT_BIT: - case VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT: - break; - default: - return VK_ERROR_FEATURE_NOT_PRESENT; - } - // Early reject: requested allocation size is larger that maximum block size for this block vector. - if(size + 2 * VMA_DEBUG_MARGIN > m_PreferredBlockSize) + if (size + VMA_DEBUG_MARGIN > m_PreferredBlockSize) { return VK_ERROR_OUT_OF_DEVICE_MEMORY; } - /* - Under certain condition, this whole section can be skipped for optimization, so - we move on directly to trying to allocate with canMakeOtherLost. That's the case - e.g. for custom pools with linear algorithm. - */ - if(!canMakeOtherLost || canCreateNewBlock) + // 1. Search existing allocations. Try to allocate. + if (m_Algorithm == VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT) { - // 1. Search existing allocations. Try to allocate without making other allocations lost. - VmaAllocationCreateFlags allocFlagsCopy = createInfo.flags; - allocFlagsCopy &= ~VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT; - - if(m_Algorithm == VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT) + // Use only last block. + if (!m_Blocks.empty()) { - // Use only last block. - if(!m_Blocks.empty()) + VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks.back(); + VMA_ASSERT(pCurrBlock); + VkResult res = AllocateFromBlock( + pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); + if (res == VK_SUCCESS) { - VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks.back(); - VMA_ASSERT(pCurrBlock); - VkResult res = AllocateFromBlock( - pCurrBlock, - currentFrameIndex, - size, - alignment, - allocFlagsCopy, - createInfo.pUserData, - suballocType, - strategy, - pAllocation); - if(res == VK_SUCCESS) - { - VMA_DEBUG_LOG(" Returned from last block #%u", pCurrBlock->GetId()); - return VK_SUCCESS; - } + VMA_DEBUG_LOG_FORMAT(" Returned from last block #%" PRIu32, pCurrBlock->GetId()); + IncrementallySortBlocks(); + return VK_SUCCESS; } } - else + } + else + { + if (strategy != VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT) // MIN_MEMORY or default { - if(strategy == VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT) + const bool isHostVisible = + (m_hAllocator->m_MemProps.memoryTypes[m_MemoryTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0; + if(isHostVisible) + { + const bool isMappingAllowed = (createInfo.flags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0; + /* + For non-mappable allocations, check blocks that are not mapped first. + For mappable allocations, check blocks that are already mapped first. + This way, having many blocks, we will separate mappable and non-mappable allocations, + hopefully limiting the number of blocks that are mapped, which will help tools like RenderDoc. + */ + for(size_t mappingI = 0; mappingI < 2; ++mappingI) + { + // Forward order in m_Blocks - prefer blocks with smallest amount of free space. + for (size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) + { + VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex]; + VMA_ASSERT(pCurrBlock); + const bool isBlockMapped = pCurrBlock->GetMappedData() != VMA_NULL; + if((mappingI == 0) == (isMappingAllowed == isBlockMapped)) + { + VkResult res = AllocateFromBlock( + pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); + if (res == VK_SUCCESS) + { + VMA_DEBUG_LOG_FORMAT(" Returned from existing block #%" PRIu32, pCurrBlock->GetId()); + IncrementallySortBlocks(); + return VK_SUCCESS; + } + } + } + } + } + else { // Forward order in m_Blocks - prefer blocks with smallest amount of free space. - for(size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex ) + for (size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) { VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex]; VMA_ASSERT(pCurrBlock); VkResult res = AllocateFromBlock( - pCurrBlock, - currentFrameIndex, - size, - alignment, - allocFlagsCopy, - createInfo.pUserData, - suballocType, - strategy, - pAllocation); - if(res == VK_SUCCESS) + pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); + if (res == VK_SUCCESS) { - VMA_DEBUG_LOG(" Returned from existing block #%u", pCurrBlock->GetId()); - return VK_SUCCESS; - } - } - } - else // WORST_FIT, FIRST_FIT - { - // Backward order in m_Blocks - prefer blocks with largest amount of free space. - for(size_t blockIndex = m_Blocks.size(); blockIndex--; ) - { - VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex]; - VMA_ASSERT(pCurrBlock); - VkResult res = AllocateFromBlock( - pCurrBlock, - currentFrameIndex, - size, - alignment, - allocFlagsCopy, - createInfo.pUserData, - suballocType, - strategy, - pAllocation); - if(res == VK_SUCCESS) - { - VMA_DEBUG_LOG(" Returned from existing block #%u", pCurrBlock->GetId()); + VMA_DEBUG_LOG_FORMAT(" Returned from existing block #%" PRIu32, pCurrBlock->GetId()); + IncrementallySortBlocks(); return VK_SUCCESS; } } } } - - // 2. Try to create new block. - if(canCreateNewBlock) + else // VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT { - // Calculate optimal size for new block. - VkDeviceSize newBlockSize = m_PreferredBlockSize; - uint32_t newBlockSizeShift = 0; - const uint32_t NEW_BLOCK_SIZE_SHIFT_MAX = 3; - - if(!m_ExplicitBlockSize) + // Backward order in m_Blocks - prefer blocks with largest amount of free space. + for (size_t blockIndex = m_Blocks.size(); blockIndex--; ) { - // Allocate 1/8, 1/4, 1/2 as first blocks. - const VkDeviceSize maxExistingBlockSize = CalcMaxBlockSize(); - for(uint32_t i = 0; i < NEW_BLOCK_SIZE_SHIFT_MAX; ++i) + VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex]; + VMA_ASSERT(pCurrBlock); + VkResult res = AllocateFromBlock(pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); + if (res == VK_SUCCESS) { - const VkDeviceSize smallerNewBlockSize = newBlockSize / 2; - if(smallerNewBlockSize > maxExistingBlockSize && smallerNewBlockSize >= size * 2) - { - newBlockSize = smallerNewBlockSize; - ++newBlockSizeShift; - } - else - { - break; - } - } - } - - size_t newBlockIndex = 0; - VkResult res = (newBlockSize <= freeMemory || !canFallbackToDedicated) ? - CreateBlock(newBlockSize, &newBlockIndex) : VK_ERROR_OUT_OF_DEVICE_MEMORY; - // Allocation of this size failed? Try 1/2, 1/4, 1/8 of m_PreferredBlockSize. - if(!m_ExplicitBlockSize) - { - while(res < 0 && newBlockSizeShift < NEW_BLOCK_SIZE_SHIFT_MAX) - { - const VkDeviceSize smallerNewBlockSize = newBlockSize / 2; - if(smallerNewBlockSize >= size) - { - newBlockSize = smallerNewBlockSize; - ++newBlockSizeShift; - res = (newBlockSize <= freeMemory || !canFallbackToDedicated) ? - CreateBlock(newBlockSize, &newBlockIndex) : VK_ERROR_OUT_OF_DEVICE_MEMORY; - } - else - { - break; - } - } - } - - if(res == VK_SUCCESS) - { - VmaDeviceMemoryBlock* const pBlock = m_Blocks[newBlockIndex]; - VMA_ASSERT(pBlock->m_pMetadata->GetSize() >= size); - - res = AllocateFromBlock( - pBlock, - currentFrameIndex, - size, - alignment, - allocFlagsCopy, - createInfo.pUserData, - suballocType, - strategy, - pAllocation); - if(res == VK_SUCCESS) - { - VMA_DEBUG_LOG(" Created new block #%u Size=%llu", pBlock->GetId(), newBlockSize); + VMA_DEBUG_LOG_FORMAT(" Returned from existing block #%" PRIu32, pCurrBlock->GetId()); + IncrementallySortBlocks(); return VK_SUCCESS; } - else - { - // Allocation from new block failed, possibly due to VMA_DEBUG_MARGIN or alignment. - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - } } } } - // 3. Try to allocate from existing blocks with making other allocations lost. - if(canMakeOtherLost) + // 2. Try to create new block. + if (canCreateNewBlock) { - uint32_t tryIndex = 0; - for(; tryIndex < VMA_ALLOCATION_TRY_COUNT; ++tryIndex) + // Calculate optimal size for new block. + VkDeviceSize newBlockSize = m_PreferredBlockSize; + uint32_t newBlockSizeShift = 0; + const uint32_t NEW_BLOCK_SIZE_SHIFT_MAX = 3; + + if (!m_ExplicitBlockSize) { - VmaDeviceMemoryBlock* pBestRequestBlock = VMA_NULL; - VmaAllocationRequest bestRequest = {}; - VkDeviceSize bestRequestCost = VK_WHOLE_SIZE; - - // 1. Search existing allocations. - if(strategy == VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT) + // Allocate 1/8, 1/4, 1/2 as first blocks. + const VkDeviceSize maxExistingBlockSize = CalcMaxBlockSize(); + for (uint32_t i = 0; i < NEW_BLOCK_SIZE_SHIFT_MAX; ++i) { - // Forward order in m_Blocks - prefer blocks with smallest amount of free space. - for(size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex ) + const VkDeviceSize smallerNewBlockSize = newBlockSize / 2; + if (smallerNewBlockSize > maxExistingBlockSize && smallerNewBlockSize >= size * 2) { - VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex]; - VMA_ASSERT(pCurrBlock); - VmaAllocationRequest currRequest = {}; - if(pCurrBlock->m_pMetadata->CreateAllocationRequest( - currentFrameIndex, - m_FrameInUseCount, - m_BufferImageGranularity, - size, - alignment, - (createInfo.flags & VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT) != 0, - suballocType, - canMakeOtherLost, - strategy, - &currRequest)) - { - const VkDeviceSize currRequestCost = currRequest.CalcCost(); - if(pBestRequestBlock == VMA_NULL || - currRequestCost < bestRequestCost) - { - pBestRequestBlock = pCurrBlock; - bestRequest = currRequest; - bestRequestCost = currRequestCost; - - if(bestRequestCost == 0) - { - break; - } - } - } + newBlockSize = smallerNewBlockSize; + ++newBlockSizeShift; + } + else + { + break; } } - else // WORST_FIT, FIRST_FIT - { - // Backward order in m_Blocks - prefer blocks with largest amount of free space. - for(size_t blockIndex = m_Blocks.size(); blockIndex--; ) - { - VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex]; - VMA_ASSERT(pCurrBlock); - VmaAllocationRequest currRequest = {}; - if(pCurrBlock->m_pMetadata->CreateAllocationRequest( - currentFrameIndex, - m_FrameInUseCount, - m_BufferImageGranularity, - size, - alignment, - (createInfo.flags & VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT) != 0, - suballocType, - canMakeOtherLost, - strategy, - &currRequest)) - { - const VkDeviceSize currRequestCost = currRequest.CalcCost(); - if(pBestRequestBlock == VMA_NULL || - currRequestCost < bestRequestCost || - strategy == VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT) - { - pBestRequestBlock = pCurrBlock; - bestRequest = currRequest; - bestRequestCost = currRequestCost; + } - if(bestRequestCost == 0 || - strategy == VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT) - { - break; - } - } - } + size_t newBlockIndex = 0; + VkResult res = (newBlockSize <= freeMemory || !canFallbackToDedicated) ? + CreateBlock(newBlockSize, &newBlockIndex) : VK_ERROR_OUT_OF_DEVICE_MEMORY; + // Allocation of this size failed? Try 1/2, 1/4, 1/8 of m_PreferredBlockSize. + if (!m_ExplicitBlockSize) + { + while (res < 0 && newBlockSizeShift < NEW_BLOCK_SIZE_SHIFT_MAX) + { + const VkDeviceSize smallerNewBlockSize = newBlockSize / 2; + if (smallerNewBlockSize >= size) + { + newBlockSize = smallerNewBlockSize; + ++newBlockSizeShift; + res = (newBlockSize <= freeMemory || !canFallbackToDedicated) ? + CreateBlock(newBlockSize, &newBlockIndex) : VK_ERROR_OUT_OF_DEVICE_MEMORY; + } + else + { + break; } } + } - if(pBestRequestBlock != VMA_NULL) + if (res == VK_SUCCESS) + { + VmaDeviceMemoryBlock* const pBlock = m_Blocks[newBlockIndex]; + VMA_ASSERT(pBlock->m_pMetadata->GetSize() >= size); + + res = AllocateFromBlock( + pBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); + if (res == VK_SUCCESS) { - if(mapped) - { - VkResult res = pBestRequestBlock->Map(m_hAllocator, 1, VMA_NULL); - if(res != VK_SUCCESS) - { - return res; - } - } - - if(pBestRequestBlock->m_pMetadata->MakeRequestedAllocationsLost( - currentFrameIndex, - m_FrameInUseCount, - &bestRequest)) - { - // Allocate from this pBlock. - *pAllocation = m_hAllocator->m_AllocationObjectAllocator.Allocate(currentFrameIndex, isUserDataString); - pBestRequestBlock->m_pMetadata->Alloc(bestRequest, suballocType, size, *pAllocation); - UpdateHasEmptyBlock(); - (*pAllocation)->InitBlockAllocation( - pBestRequestBlock, - bestRequest.offset, - alignment, - size, - m_MemoryTypeIndex, - suballocType, - mapped, - (createInfo.flags & VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT) != 0); - VMA_HEAVY_ASSERT(pBestRequestBlock->Validate()); - VMA_DEBUG_LOG(" Returned from existing block"); - (*pAllocation)->SetUserData(m_hAllocator, createInfo.pUserData); - m_hAllocator->m_Budget.AddAllocation(m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex), size); - if(VMA_DEBUG_INITIALIZE_ALLOCATIONS) - { - m_hAllocator->FillAllocation(*pAllocation, VMA_ALLOCATION_FILL_PATTERN_CREATED); - } - if(IsCorruptionDetectionEnabled()) - { - VkResult res = pBestRequestBlock->WriteMagicValueAroundAllocation(m_hAllocator, bestRequest.offset, size); - VMA_ASSERT(res == VK_SUCCESS && "Couldn't map block memory to write magic value."); - } - return VK_SUCCESS; - } - // else: Some allocations must have been touched while we are here. Next try. + VMA_DEBUG_LOG_FORMAT(" Created new block #%" PRIu32 " Size=%" PRIu64, pBlock->GetId(), newBlockSize); + IncrementallySortBlocks(); + return VK_SUCCESS; } else { - // Could not find place in any of the blocks - break outer loop. - break; + // Allocation from new block failed, possibly due to VMA_DEBUG_MARGIN or alignment. + return VK_ERROR_OUT_OF_DEVICE_MEMORY; } } - /* Maximum number of tries exceeded - a very unlike event when many other - threads are simultaneously touching allocations making it impossible to make - lost at the same time as we try to allocate. */ - if(tryIndex == VMA_ALLOCATION_TRY_COUNT) - { - return VK_ERROR_TOO_MANY_OBJECTS; - } } return VK_ERROR_OUT_OF_DEVICE_MEMORY; } -void VmaBlockVector::Free( - const VmaAllocation hAllocation) +void VmaBlockVector::Free(const VmaAllocation hAllocation) { VmaDeviceMemoryBlock* pBlockToDelete = VMA_NULL; @@ -13161,7 +11346,7 @@ void VmaBlockVector::Free( { const uint32_t heapIndex = m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex); VmaBudget heapBudget = {}; - m_hAllocator->GetBudget(&heapBudget, heapIndex, 1); + m_hAllocator->GetHeapBudgets(&heapBudget, heapIndex, 1); budgetExceeded = heapBudget.usage >= heapBudget.budget; } @@ -13171,67 +11356,71 @@ void VmaBlockVector::Free( VmaDeviceMemoryBlock* pBlock = hAllocation->GetBlock(); - if(IsCorruptionDetectionEnabled()) + if (IsCorruptionDetectionEnabled()) { - VkResult res = pBlock->ValidateMagicValueAroundAllocation(m_hAllocator, hAllocation->GetOffset(), hAllocation->GetSize()); + VkResult res = pBlock->ValidateMagicValueAfterAllocation(m_hAllocator, hAllocation->GetOffset(), hAllocation->GetSize()); VMA_ASSERT(res == VK_SUCCESS && "Couldn't map block memory to validate magic value."); } - if(hAllocation->IsPersistentMap()) + if (hAllocation->IsPersistentMap()) { pBlock->Unmap(m_hAllocator, 1); } - pBlock->m_pMetadata->Free(hAllocation); + const bool hadEmptyBlockBeforeFree = HasEmptyBlock(); + pBlock->m_pMetadata->Free(hAllocation->GetAllocHandle()); + pBlock->PostFree(m_hAllocator); VMA_HEAVY_ASSERT(pBlock->Validate()); - VMA_DEBUG_LOG(" Freed from MemoryTypeIndex=%u", m_MemoryTypeIndex); + VMA_DEBUG_LOG_FORMAT(" Freed from MemoryTypeIndex=%" PRIu32, m_MemoryTypeIndex); const bool canDeleteBlock = m_Blocks.size() > m_MinBlockCount; // pBlock became empty after this deallocation. - if(pBlock->m_pMetadata->IsEmpty()) + if (pBlock->m_pMetadata->IsEmpty()) { - // Already has empty block. We don't want to have two, so delete this one. - if((m_HasEmptyBlock || budgetExceeded) && canDeleteBlock) + // Already had empty block. We don't want to have two, so delete this one. + if ((hadEmptyBlockBeforeFree || budgetExceeded) && canDeleteBlock) { pBlockToDelete = pBlock; Remove(pBlock); } - // else: We now have an empty block - leave it. + // else: We now have one empty block - leave it. A hysteresis to avoid allocating whole block back and forth. } // pBlock didn't become empty, but we have another empty block - find and free that one. // (This is optional, heuristics.) - else if(m_HasEmptyBlock && canDeleteBlock) + else if (hadEmptyBlockBeforeFree && canDeleteBlock) { VmaDeviceMemoryBlock* pLastBlock = m_Blocks.back(); - if(pLastBlock->m_pMetadata->IsEmpty()) + if (pLastBlock->m_pMetadata->IsEmpty()) { pBlockToDelete = pLastBlock; m_Blocks.pop_back(); } } - UpdateHasEmptyBlock(); IncrementallySortBlocks(); } // Destruction of a free block. Deferred until this point, outside of mutex // lock, for performance reason. - if(pBlockToDelete != VMA_NULL) + if (pBlockToDelete != VMA_NULL) { - VMA_DEBUG_LOG(" Deleted empty block"); + VMA_DEBUG_LOG_FORMAT(" Deleted empty block #%" PRIu32, pBlockToDelete->GetId()); pBlockToDelete->Destroy(m_hAllocator); vma_delete(m_hAllocator, pBlockToDelete); } + + m_hAllocator->m_Budget.RemoveAllocation(m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex), hAllocation->GetSize()); + m_hAllocator->m_AllocationObjectAllocator.Free(hAllocation); } VkDeviceSize VmaBlockVector::CalcMaxBlockSize() const { VkDeviceSize result = 0; - for(size_t i = m_Blocks.size(); i--; ) + for (size_t i = m_Blocks.size(); i--; ) { result = VMA_MAX(result, m_Blocks[i]->m_pMetadata->GetSize()); - if(result >= m_PreferredBlockSize) + if (result >= m_PreferredBlockSize) { break; } @@ -13241,9 +11430,9 @@ VkDeviceSize VmaBlockVector::CalcMaxBlockSize() const void VmaBlockVector::Remove(VmaDeviceMemoryBlock* pBlock) { - for(uint32_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) + for (uint32_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) { - if(m_Blocks[blockIndex] == pBlock) + if (m_Blocks[blockIndex] == pBlock) { VmaVectorRemove(m_Blocks, blockIndex); return; @@ -13254,23 +11443,33 @@ void VmaBlockVector::Remove(VmaDeviceMemoryBlock* pBlock) void VmaBlockVector::IncrementallySortBlocks() { - if(m_Algorithm != VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT) + if (!m_IncrementalSort) + return; + if (m_Algorithm != VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT) { // Bubble sort only until first swap. - for(size_t i = 1; i < m_Blocks.size(); ++i) + for (size_t i = 1; i < m_Blocks.size(); ++i) { - if(m_Blocks[i - 1]->m_pMetadata->GetSumFreeSize() > m_Blocks[i]->m_pMetadata->GetSumFreeSize()) + if (m_Blocks[i - 1]->m_pMetadata->GetSumFreeSize() > m_Blocks[i]->m_pMetadata->GetSumFreeSize()) { - VMA_SWAP(m_Blocks[i - 1], m_Blocks[i]); + std::swap(m_Blocks[i - 1], m_Blocks[i]); return; } } } } +void VmaBlockVector::SortByFreeSize() +{ + VMA_SORT(m_Blocks.begin(), m_Blocks.end(), + [](VmaDeviceMemoryBlock* b1, VmaDeviceMemoryBlock* b2) -> bool + { + return b1->m_pMetadata->GetSumFreeSize() < b2->m_pMetadata->GetSumFreeSize(); + }); +} + VkResult VmaBlockVector::AllocateFromBlock( VmaDeviceMemoryBlock* pBlock, - uint32_t currentFrameIndex, VkDeviceSize size, VkDeviceSize alignment, VmaAllocationCreateFlags allocFlags, @@ -13279,93 +11478,115 @@ VkResult VmaBlockVector::AllocateFromBlock( uint32_t strategy, VmaAllocation* pAllocation) { - VMA_ASSERT((allocFlags & VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT) == 0); const bool isUpperAddress = (allocFlags & VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT) != 0; - const bool mapped = (allocFlags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0; - const bool isUserDataString = (allocFlags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0; VmaAllocationRequest currRequest = {}; - if(pBlock->m_pMetadata->CreateAllocationRequest( - currentFrameIndex, - m_FrameInUseCount, - m_BufferImageGranularity, + if (pBlock->m_pMetadata->CreateAllocationRequest( size, alignment, isUpperAddress, suballocType, - false, // canMakeOtherLost strategy, &currRequest)) { - // Allocate from pCurrBlock. - VMA_ASSERT(currRequest.itemsToMakeLostCount == 0); - - if(mapped) - { - VkResult res = pBlock->Map(m_hAllocator, 1, VMA_NULL); - if(res != VK_SUCCESS) - { - return res; - } - } - - *pAllocation = m_hAllocator->m_AllocationObjectAllocator.Allocate(currentFrameIndex, isUserDataString); - pBlock->m_pMetadata->Alloc(currRequest, suballocType, size, *pAllocation); - UpdateHasEmptyBlock(); - (*pAllocation)->InitBlockAllocation( - pBlock, - currRequest.offset, - alignment, - size, - m_MemoryTypeIndex, - suballocType, - mapped, - (allocFlags & VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT) != 0); - VMA_HEAVY_ASSERT(pBlock->Validate()); - (*pAllocation)->SetUserData(m_hAllocator, pUserData); - m_hAllocator->m_Budget.AddAllocation(m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex), size); - if(VMA_DEBUG_INITIALIZE_ALLOCATIONS) - { - m_hAllocator->FillAllocation(*pAllocation, VMA_ALLOCATION_FILL_PATTERN_CREATED); - } - if(IsCorruptionDetectionEnabled()) - { - VkResult res = pBlock->WriteMagicValueAroundAllocation(m_hAllocator, currRequest.offset, size); - VMA_ASSERT(res == VK_SUCCESS && "Couldn't map block memory to write magic value."); - } - return VK_SUCCESS; + return CommitAllocationRequest(currRequest, pBlock, alignment, allocFlags, pUserData, suballocType, pAllocation); } return VK_ERROR_OUT_OF_DEVICE_MEMORY; } +VkResult VmaBlockVector::CommitAllocationRequest( + VmaAllocationRequest& allocRequest, + VmaDeviceMemoryBlock* pBlock, + VkDeviceSize alignment, + VmaAllocationCreateFlags allocFlags, + void* pUserData, + VmaSuballocationType suballocType, + VmaAllocation* pAllocation) +{ + const bool mapped = (allocFlags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0; + const bool isUserDataString = (allocFlags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0; + const bool isMappingAllowed = (allocFlags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0; + + pBlock->PostAlloc(m_hAllocator); + // Allocate from pCurrBlock. + if (mapped) + { + VkResult res = pBlock->Map(m_hAllocator, 1, VMA_NULL); + if (res != VK_SUCCESS) + { + return res; + } + } + + *pAllocation = m_hAllocator->m_AllocationObjectAllocator.Allocate(isMappingAllowed); + pBlock->m_pMetadata->Alloc(allocRequest, suballocType, *pAllocation); + (*pAllocation)->InitBlockAllocation( + pBlock, + allocRequest.allocHandle, + alignment, + allocRequest.size, // Not size, as actual allocation size may be larger than requested! + m_MemoryTypeIndex, + suballocType, + mapped); + VMA_HEAVY_ASSERT(pBlock->Validate()); + if (isUserDataString) + (*pAllocation)->SetName(m_hAllocator, (const char*)pUserData); + else + (*pAllocation)->SetUserData(m_hAllocator, pUserData); + m_hAllocator->m_Budget.AddAllocation(m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex), allocRequest.size); + if (VMA_DEBUG_INITIALIZE_ALLOCATIONS) + { + m_hAllocator->FillAllocation(*pAllocation, VMA_ALLOCATION_FILL_PATTERN_CREATED); + } + if (IsCorruptionDetectionEnabled()) + { + VkResult res = pBlock->WriteMagicValueAfterAllocation(m_hAllocator, (*pAllocation)->GetOffset(), allocRequest.size); + VMA_ASSERT(res == VK_SUCCESS && "Couldn't map block memory to write magic value."); + } + return VK_SUCCESS; +} + VkResult VmaBlockVector::CreateBlock(VkDeviceSize blockSize, size_t* pNewBlockIndex) { VkMemoryAllocateInfo allocInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO }; + allocInfo.pNext = m_pMemoryAllocateNext; allocInfo.memoryTypeIndex = m_MemoryTypeIndex; allocInfo.allocationSize = blockSize; #if VMA_BUFFER_DEVICE_ADDRESS // Every standalone block can potentially contain a buffer with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT - always enable the feature. VkMemoryAllocateFlagsInfoKHR allocFlagsInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO_KHR }; - if(m_hAllocator->m_UseKhrBufferDeviceAddress) + if (m_hAllocator->m_UseKhrBufferDeviceAddress) { allocFlagsInfo.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR; VmaPnextChainPushFront(&allocInfo, &allocFlagsInfo); } -#endif // #if VMA_BUFFER_DEVICE_ADDRESS +#endif // VMA_BUFFER_DEVICE_ADDRESS #if VMA_MEMORY_PRIORITY VkMemoryPriorityAllocateInfoEXT priorityInfo = { VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT }; - if(m_hAllocator->m_UseExtMemoryPriority) + if (m_hAllocator->m_UseExtMemoryPriority) { + VMA_ASSERT(m_Priority >= 0.f && m_Priority <= 1.f); priorityInfo.priority = m_Priority; VmaPnextChainPushFront(&allocInfo, &priorityInfo); } -#endif // #if VMA_MEMORY_PRIORITY +#endif // VMA_MEMORY_PRIORITY + +#if VMA_EXTERNAL_MEMORY + // Attach VkExportMemoryAllocateInfoKHR if necessary. + VkExportMemoryAllocateInfoKHR exportMemoryAllocInfo = { VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR }; + exportMemoryAllocInfo.handleTypes = m_hAllocator->GetExternalMemoryHandleTypeFlags(m_MemoryTypeIndex); + if (exportMemoryAllocInfo.handleTypes != 0) + { + VmaPnextChainPushFront(&allocInfo, &exportMemoryAllocInfo); + } +#endif // VMA_EXTERNAL_MEMORY VkDeviceMemory mem = VK_NULL_HANDLE; VkResult res = m_hAllocator->AllocateVulkanMemory(&allocInfo, &mem); - if(res < 0) + if (res < 0) { return res; } @@ -13381,10 +11602,11 @@ VkResult VmaBlockVector::CreateBlock(VkDeviceSize blockSize, size_t* pNewBlockIn mem, allocInfo.allocationSize, m_NextBlockId++, - m_Algorithm); + m_Algorithm, + m_BufferImageGranularity); m_Blocks.push_back(pBlock); - if(pNewBlockIndex != VMA_NULL) + if (pNewBlockIndex != VMA_NULL) { *pNewBlockIndex = m_Blocks.size() - 1; } @@ -13392,532 +11614,12 @@ VkResult VmaBlockVector::CreateBlock(VkDeviceSize blockSize, size_t* pNewBlockIn return VK_SUCCESS; } -void VmaBlockVector::ApplyDefragmentationMovesCpu( - class VmaBlockVectorDefragmentationContext* pDefragCtx, - const VmaVector< VmaDefragmentationMove, VmaStlAllocator >& moves) +bool VmaBlockVector::HasEmptyBlock() { - const size_t blockCount = m_Blocks.size(); - const bool isNonCoherent = m_hAllocator->IsMemoryTypeNonCoherent(m_MemoryTypeIndex); - - enum BLOCK_FLAG - { - BLOCK_FLAG_USED = 0x00000001, - BLOCK_FLAG_MAPPED_FOR_DEFRAGMENTATION = 0x00000002, - }; - - struct BlockInfo - { - uint32_t flags; - void* pMappedData; - }; - VmaVector< BlockInfo, VmaStlAllocator > - blockInfo(blockCount, BlockInfo(), VmaStlAllocator(m_hAllocator->GetAllocationCallbacks())); - memset(blockInfo.data(), 0, blockCount * sizeof(BlockInfo)); - - // Go over all moves. Mark blocks that are used with BLOCK_FLAG_USED. - const size_t moveCount = moves.size(); - for(size_t moveIndex = 0; moveIndex < moveCount; ++moveIndex) - { - const VmaDefragmentationMove& move = moves[moveIndex]; - blockInfo[move.srcBlockIndex].flags |= BLOCK_FLAG_USED; - blockInfo[move.dstBlockIndex].flags |= BLOCK_FLAG_USED; - } - - VMA_ASSERT(pDefragCtx->res == VK_SUCCESS); - - // Go over all blocks. Get mapped pointer or map if necessary. - for(size_t blockIndex = 0; pDefragCtx->res == VK_SUCCESS && blockIndex < blockCount; ++blockIndex) - { - BlockInfo& currBlockInfo = blockInfo[blockIndex]; - VmaDeviceMemoryBlock* pBlock = m_Blocks[blockIndex]; - if((currBlockInfo.flags & BLOCK_FLAG_USED) != 0) - { - currBlockInfo.pMappedData = pBlock->GetMappedData(); - // It is not originally mapped - map it. - if(currBlockInfo.pMappedData == VMA_NULL) - { - pDefragCtx->res = pBlock->Map(m_hAllocator, 1, &currBlockInfo.pMappedData); - if(pDefragCtx->res == VK_SUCCESS) - { - currBlockInfo.flags |= BLOCK_FLAG_MAPPED_FOR_DEFRAGMENTATION; - } - } - } - } - - // Go over all moves. Do actual data transfer. - if(pDefragCtx->res == VK_SUCCESS) - { - const VkDeviceSize nonCoherentAtomSize = m_hAllocator->m_PhysicalDeviceProperties.limits.nonCoherentAtomSize; - VkMappedMemoryRange memRange = { VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE }; - - for(size_t moveIndex = 0; moveIndex < moveCount; ++moveIndex) - { - const VmaDefragmentationMove& move = moves[moveIndex]; - - const BlockInfo& srcBlockInfo = blockInfo[move.srcBlockIndex]; - const BlockInfo& dstBlockInfo = blockInfo[move.dstBlockIndex]; - - VMA_ASSERT(srcBlockInfo.pMappedData && dstBlockInfo.pMappedData); - - // Invalidate source. - if(isNonCoherent) - { - VmaDeviceMemoryBlock* const pSrcBlock = m_Blocks[move.srcBlockIndex]; - memRange.memory = pSrcBlock->GetDeviceMemory(); - memRange.offset = VmaAlignDown(move.srcOffset, nonCoherentAtomSize); - memRange.size = VMA_MIN( - VmaAlignUp(move.size + (move.srcOffset - memRange.offset), nonCoherentAtomSize), - pSrcBlock->m_pMetadata->GetSize() - memRange.offset); - (*m_hAllocator->GetVulkanFunctions().vkInvalidateMappedMemoryRanges)(m_hAllocator->m_hDevice, 1, &memRange); - } - - // THE PLACE WHERE ACTUAL DATA COPY HAPPENS. - memmove( - reinterpret_cast(dstBlockInfo.pMappedData) + move.dstOffset, - reinterpret_cast(srcBlockInfo.pMappedData) + move.srcOffset, - static_cast(move.size)); - - if(IsCorruptionDetectionEnabled()) - { - VmaWriteMagicValue(dstBlockInfo.pMappedData, move.dstOffset - VMA_DEBUG_MARGIN); - VmaWriteMagicValue(dstBlockInfo.pMappedData, move.dstOffset + move.size); - } - - // Flush destination. - if(isNonCoherent) - { - VmaDeviceMemoryBlock* const pDstBlock = m_Blocks[move.dstBlockIndex]; - memRange.memory = pDstBlock->GetDeviceMemory(); - memRange.offset = VmaAlignDown(move.dstOffset, nonCoherentAtomSize); - memRange.size = VMA_MIN( - VmaAlignUp(move.size + (move.dstOffset - memRange.offset), nonCoherentAtomSize), - pDstBlock->m_pMetadata->GetSize() - memRange.offset); - (*m_hAllocator->GetVulkanFunctions().vkFlushMappedMemoryRanges)(m_hAllocator->m_hDevice, 1, &memRange); - } - } - } - - // Go over all blocks in reverse order. Unmap those that were mapped just for defragmentation. - // Regardless of pCtx->res == VK_SUCCESS. - for(size_t blockIndex = blockCount; blockIndex--; ) - { - const BlockInfo& currBlockInfo = blockInfo[blockIndex]; - if((currBlockInfo.flags & BLOCK_FLAG_MAPPED_FOR_DEFRAGMENTATION) != 0) - { - VmaDeviceMemoryBlock* pBlock = m_Blocks[blockIndex]; - pBlock->Unmap(m_hAllocator, 1); - } - } -} - -void VmaBlockVector::ApplyDefragmentationMovesGpu( - class VmaBlockVectorDefragmentationContext* pDefragCtx, - VmaVector< VmaDefragmentationMove, VmaStlAllocator >& moves, - VkCommandBuffer commandBuffer) -{ - const size_t blockCount = m_Blocks.size(); - - pDefragCtx->blockContexts.resize(blockCount); - memset(pDefragCtx->blockContexts.data(), 0, blockCount * sizeof(VmaBlockDefragmentationContext)); - - // Go over all moves. Mark blocks that are used with BLOCK_FLAG_USED. - const size_t moveCount = moves.size(); - for(size_t moveIndex = 0; moveIndex < moveCount; ++moveIndex) - { - const VmaDefragmentationMove& move = moves[moveIndex]; - - //if(move.type == VMA_ALLOCATION_TYPE_UNKNOWN) - { - // Old school move still require us to map the whole block - pDefragCtx->blockContexts[move.srcBlockIndex].flags |= VmaBlockDefragmentationContext::BLOCK_FLAG_USED; - pDefragCtx->blockContexts[move.dstBlockIndex].flags |= VmaBlockDefragmentationContext::BLOCK_FLAG_USED; - } - } - - VMA_ASSERT(pDefragCtx->res == VK_SUCCESS); - - // Go over all blocks. Create and bind buffer for whole block if necessary. - { - VkBufferCreateInfo bufCreateInfo; - VmaFillGpuDefragmentationBufferCreateInfo(bufCreateInfo); - - for(size_t blockIndex = 0; pDefragCtx->res == VK_SUCCESS && blockIndex < blockCount; ++blockIndex) - { - VmaBlockDefragmentationContext& currBlockCtx = pDefragCtx->blockContexts[blockIndex]; - VmaDeviceMemoryBlock* pBlock = m_Blocks[blockIndex]; - if((currBlockCtx.flags & VmaBlockDefragmentationContext::BLOCK_FLAG_USED) != 0) - { - bufCreateInfo.size = pBlock->m_pMetadata->GetSize(); - pDefragCtx->res = (*m_hAllocator->GetVulkanFunctions().vkCreateBuffer)( - m_hAllocator->m_hDevice, &bufCreateInfo, m_hAllocator->GetAllocationCallbacks(), &currBlockCtx.hBuffer); - if(pDefragCtx->res == VK_SUCCESS) - { - pDefragCtx->res = (*m_hAllocator->GetVulkanFunctions().vkBindBufferMemory)( - m_hAllocator->m_hDevice, currBlockCtx.hBuffer, pBlock->GetDeviceMemory(), 0); - } - } - } - } - - // Go over all moves. Post data transfer commands to command buffer. - if(pDefragCtx->res == VK_SUCCESS) - { - for(size_t moveIndex = 0; moveIndex < moveCount; ++moveIndex) - { - const VmaDefragmentationMove& move = moves[moveIndex]; - - const VmaBlockDefragmentationContext& srcBlockCtx = pDefragCtx->blockContexts[move.srcBlockIndex]; - const VmaBlockDefragmentationContext& dstBlockCtx = pDefragCtx->blockContexts[move.dstBlockIndex]; - - VMA_ASSERT(srcBlockCtx.hBuffer && dstBlockCtx.hBuffer); - - VkBufferCopy region = { - move.srcOffset, - move.dstOffset, - move.size }; - (*m_hAllocator->GetVulkanFunctions().vkCmdCopyBuffer)( - commandBuffer, srcBlockCtx.hBuffer, dstBlockCtx.hBuffer, 1, ®ion); - } - } - - // Save buffers to defrag context for later destruction. - if(pDefragCtx->res == VK_SUCCESS && moveCount > 0) - { - pDefragCtx->res = VK_NOT_READY; - } -} - -void VmaBlockVector::FreeEmptyBlocks(VmaDefragmentationStats* pDefragmentationStats) -{ - for(size_t blockIndex = m_Blocks.size(); blockIndex--; ) - { - VmaDeviceMemoryBlock* pBlock = m_Blocks[blockIndex]; - if(pBlock->m_pMetadata->IsEmpty()) - { - if(m_Blocks.size() > m_MinBlockCount) - { - if(pDefragmentationStats != VMA_NULL) - { - ++pDefragmentationStats->deviceMemoryBlocksFreed; - pDefragmentationStats->bytesFreed += pBlock->m_pMetadata->GetSize(); - } - - VmaVectorRemove(m_Blocks, blockIndex); - pBlock->Destroy(m_hAllocator); - vma_delete(m_hAllocator, pBlock); - } - else - { - break; - } - } - } - UpdateHasEmptyBlock(); -} - -void VmaBlockVector::UpdateHasEmptyBlock() -{ - m_HasEmptyBlock = false; - for(size_t index = 0, count = m_Blocks.size(); index < count; ++index) + for (size_t index = 0, count = m_Blocks.size(); index < count; ++index) { VmaDeviceMemoryBlock* const pBlock = m_Blocks[index]; - if(pBlock->m_pMetadata->IsEmpty()) - { - m_HasEmptyBlock = true; - break; - } - } -} - -#if VMA_STATS_STRING_ENABLED - -void VmaBlockVector::PrintDetailedMap(class VmaJsonWriter& json) -{ - VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex); - - json.BeginObject(); - - if(IsCustomPool()) - { - const char* poolName = m_hParentPool->GetName(); - if(poolName != VMA_NULL && poolName[0] != '\0') - { - json.WriteString("Name"); - json.WriteString(poolName); - } - - json.WriteString("MemoryTypeIndex"); - json.WriteNumber(m_MemoryTypeIndex); - - json.WriteString("BlockSize"); - json.WriteNumber(m_PreferredBlockSize); - - json.WriteString("BlockCount"); - json.BeginObject(true); - if(m_MinBlockCount > 0) - { - json.WriteString("Min"); - json.WriteNumber((uint64_t)m_MinBlockCount); - } - if(m_MaxBlockCount < SIZE_MAX) - { - json.WriteString("Max"); - json.WriteNumber((uint64_t)m_MaxBlockCount); - } - json.WriteString("Cur"); - json.WriteNumber((uint64_t)m_Blocks.size()); - json.EndObject(); - - if(m_FrameInUseCount > 0) - { - json.WriteString("FrameInUseCount"); - json.WriteNumber(m_FrameInUseCount); - } - - if(m_Algorithm != 0) - { - json.WriteString("Algorithm"); - json.WriteString(VmaAlgorithmToStr(m_Algorithm)); - } - } - else - { - json.WriteString("PreferredBlockSize"); - json.WriteNumber(m_PreferredBlockSize); - } - - json.WriteString("Blocks"); - json.BeginObject(); - for(size_t i = 0; i < m_Blocks.size(); ++i) - { - json.BeginString(); - json.ContinueString(m_Blocks[i]->GetId()); - json.EndString(); - - m_Blocks[i]->m_pMetadata->PrintDetailedMap(json); - } - json.EndObject(); - - json.EndObject(); -} - -#endif // #if VMA_STATS_STRING_ENABLED - -void VmaBlockVector::Defragment( - class VmaBlockVectorDefragmentationContext* pCtx, - VmaDefragmentationStats* pStats, VmaDefragmentationFlags flags, - VkDeviceSize& maxCpuBytesToMove, uint32_t& maxCpuAllocationsToMove, - VkDeviceSize& maxGpuBytesToMove, uint32_t& maxGpuAllocationsToMove, - VkCommandBuffer commandBuffer) -{ - pCtx->res = VK_SUCCESS; - - const VkMemoryPropertyFlags memPropFlags = - m_hAllocator->m_MemProps.memoryTypes[m_MemoryTypeIndex].propertyFlags; - const bool isHostVisible = (memPropFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0; - - const bool canDefragmentOnCpu = maxCpuBytesToMove > 0 && maxCpuAllocationsToMove > 0 && - isHostVisible; - const bool canDefragmentOnGpu = maxGpuBytesToMove > 0 && maxGpuAllocationsToMove > 0 && - !IsCorruptionDetectionEnabled() && - ((1u << m_MemoryTypeIndex) & m_hAllocator->GetGpuDefragmentationMemoryTypeBits()) != 0; - - // There are options to defragment this memory type. - if(canDefragmentOnCpu || canDefragmentOnGpu) - { - bool defragmentOnGpu; - // There is only one option to defragment this memory type. - if(canDefragmentOnGpu != canDefragmentOnCpu) - { - defragmentOnGpu = canDefragmentOnGpu; - } - // Both options are available: Heuristics to choose the best one. - else - { - defragmentOnGpu = (memPropFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0 || - m_hAllocator->IsIntegratedGpu(); - } - - bool overlappingMoveSupported = !defragmentOnGpu; - - if(m_hAllocator->m_UseMutex) - { - if(flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL) - { - if(!m_Mutex.TryLockWrite()) - { - pCtx->res = VK_ERROR_INITIALIZATION_FAILED; - return; - } - } - else - { - m_Mutex.LockWrite(); - pCtx->mutexLocked = true; - } - } - - pCtx->Begin(overlappingMoveSupported, flags); - - // Defragment. - - const VkDeviceSize maxBytesToMove = defragmentOnGpu ? maxGpuBytesToMove : maxCpuBytesToMove; - const uint32_t maxAllocationsToMove = defragmentOnGpu ? maxGpuAllocationsToMove : maxCpuAllocationsToMove; - pCtx->res = pCtx->GetAlgorithm()->Defragment(pCtx->defragmentationMoves, maxBytesToMove, maxAllocationsToMove, flags); - - // Accumulate statistics. - if(pStats != VMA_NULL) - { - const VkDeviceSize bytesMoved = pCtx->GetAlgorithm()->GetBytesMoved(); - const uint32_t allocationsMoved = pCtx->GetAlgorithm()->GetAllocationsMoved(); - pStats->bytesMoved += bytesMoved; - pStats->allocationsMoved += allocationsMoved; - VMA_ASSERT(bytesMoved <= maxBytesToMove); - VMA_ASSERT(allocationsMoved <= maxAllocationsToMove); - if(defragmentOnGpu) - { - maxGpuBytesToMove -= bytesMoved; - maxGpuAllocationsToMove -= allocationsMoved; - } - else - { - maxCpuBytesToMove -= bytesMoved; - maxCpuAllocationsToMove -= allocationsMoved; - } - } - - if(flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL) - { - if(m_hAllocator->m_UseMutex) - m_Mutex.UnlockWrite(); - - if(pCtx->res >= VK_SUCCESS && !pCtx->defragmentationMoves.empty()) - pCtx->res = VK_NOT_READY; - - return; - } - - if(pCtx->res >= VK_SUCCESS) - { - if(defragmentOnGpu) - { - ApplyDefragmentationMovesGpu(pCtx, pCtx->defragmentationMoves, commandBuffer); - } - else - { - ApplyDefragmentationMovesCpu(pCtx, pCtx->defragmentationMoves); - } - } - } -} - -void VmaBlockVector::DefragmentationEnd( - class VmaBlockVectorDefragmentationContext* pCtx, - uint32_t flags, - VmaDefragmentationStats* pStats) -{ - if(flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL && m_hAllocator->m_UseMutex) - { - VMA_ASSERT(pCtx->mutexLocked == false); - - // Incremental defragmentation doesn't hold the lock, so when we enter here we don't actually have any - // lock protecting us. Since we mutate state here, we have to take the lock out now - m_Mutex.LockWrite(); - pCtx->mutexLocked = true; - } - - // If the mutex isn't locked we didn't do any work and there is nothing to delete. - if(pCtx->mutexLocked || !m_hAllocator->m_UseMutex) - { - // Destroy buffers. - for(size_t blockIndex = pCtx->blockContexts.size(); blockIndex--;) - { - VmaBlockDefragmentationContext &blockCtx = pCtx->blockContexts[blockIndex]; - if(blockCtx.hBuffer) - { - (*m_hAllocator->GetVulkanFunctions().vkDestroyBuffer)(m_hAllocator->m_hDevice, blockCtx.hBuffer, m_hAllocator->GetAllocationCallbacks()); - } - } - - if(pCtx->res >= VK_SUCCESS) - { - FreeEmptyBlocks(pStats); - } - } - - if(pCtx->mutexLocked) - { - VMA_ASSERT(m_hAllocator->m_UseMutex); - m_Mutex.UnlockWrite(); - } -} - -uint32_t VmaBlockVector::ProcessDefragmentations( - class VmaBlockVectorDefragmentationContext *pCtx, - VmaDefragmentationPassMoveInfo* pMove, uint32_t maxMoves) -{ - VmaMutexLockWrite lock(m_Mutex, m_hAllocator->m_UseMutex); - - const uint32_t moveCount = VMA_MIN(uint32_t(pCtx->defragmentationMoves.size()) - pCtx->defragmentationMovesProcessed, maxMoves); - - for(uint32_t i = 0; i < moveCount; ++ i) - { - VmaDefragmentationMove& move = pCtx->defragmentationMoves[pCtx->defragmentationMovesProcessed + i]; - - pMove->allocation = move.hAllocation; - pMove->memory = move.pDstBlock->GetDeviceMemory(); - pMove->offset = move.dstOffset; - - ++ pMove; - } - - pCtx->defragmentationMovesProcessed += moveCount; - - return moveCount; -} - -void VmaBlockVector::CommitDefragmentations( - class VmaBlockVectorDefragmentationContext *pCtx, - VmaDefragmentationStats* pStats) -{ - VmaMutexLockWrite lock(m_Mutex, m_hAllocator->m_UseMutex); - - for(uint32_t i = pCtx->defragmentationMovesCommitted; i < pCtx->defragmentationMovesProcessed; ++ i) - { - const VmaDefragmentationMove &move = pCtx->defragmentationMoves[i]; - - move.pSrcBlock->m_pMetadata->FreeAtOffset(move.srcOffset); - move.hAllocation->ChangeBlockAllocation(m_hAllocator, move.pDstBlock, move.dstOffset); - } - - pCtx->defragmentationMovesCommitted = pCtx->defragmentationMovesProcessed; - FreeEmptyBlocks(pStats); -} - -size_t VmaBlockVector::CalcAllocationCount() const -{ - size_t result = 0; - for(size_t i = 0; i < m_Blocks.size(); ++i) - { - result += m_Blocks[i]->m_pMetadata->GetAllocationCount(); - } - return result; -} - -bool VmaBlockVector::IsBufferImageGranularityConflictPossible() const -{ - if(m_BufferImageGranularity == 1) - { - return false; - } - VmaSuballocationType lastSuballocType = VMA_SUBALLOCATION_TYPE_FREE; - for(size_t i = 0, count = m_Blocks.size(); i < count; ++i) - { - VmaDeviceMemoryBlock* const pBlock = m_Blocks[i]; - VMA_ASSERT(m_Algorithm == 0); - VmaBlockMetadata_Generic* const pMetadata = (VmaBlockMetadata_Generic*)pBlock->m_pMetadata; - if(pMetadata->IsBufferImageGranularityConflictPossible(m_BufferImageGranularity, lastSuballocType)) + if (pBlock->m_pMetadata->IsEmpty()) { return true; } @@ -13925,38 +11627,44 @@ bool VmaBlockVector::IsBufferImageGranularityConflictPossible() const return false; } -void VmaBlockVector::MakePoolAllocationsLost( - uint32_t currentFrameIndex, - size_t* pLostAllocationCount) +#if VMA_STATS_STRING_ENABLED +void VmaBlockVector::PrintDetailedMap(class VmaJsonWriter& json) { - VmaMutexLockWrite lock(m_Mutex, m_hAllocator->m_UseMutex); - size_t lostAllocationCount = 0; - for(uint32_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) + VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex); + + + json.BeginObject(); + for (size_t i = 0; i < m_Blocks.size(); ++i) { - VmaDeviceMemoryBlock* const pBlock = m_Blocks[blockIndex]; - VMA_ASSERT(pBlock); - lostAllocationCount += pBlock->m_pMetadata->MakeAllocationsLost(currentFrameIndex, m_FrameInUseCount); - } - if(pLostAllocationCount != VMA_NULL) - { - *pLostAllocationCount = lostAllocationCount; + json.BeginString(); + json.ContinueString(m_Blocks[i]->GetId()); + json.EndString(); + + json.BeginObject(); + json.WriteString("MapRefCount"); + json.WriteNumber(m_Blocks[i]->GetMapRefCount()); + + m_Blocks[i]->m_pMetadata->PrintDetailedMap(json); + json.EndObject(); } + json.EndObject(); } +#endif // VMA_STATS_STRING_ENABLED VkResult VmaBlockVector::CheckCorruption() { - if(!IsCorruptionDetectionEnabled()) + if (!IsCorruptionDetectionEnabled()) { return VK_ERROR_FEATURE_NOT_PRESENT; } VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex); - for(uint32_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) + for (uint32_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) { VmaDeviceMemoryBlock* const pBlock = m_Blocks[blockIndex]; VMA_ASSERT(pBlock); VkResult res = pBlock->CheckCorruption(m_hAllocator); - if(res != VK_SUCCESS) + if (res != VK_SUCCESS) { return res; } @@ -13964,1755 +11672,1034 @@ VkResult VmaBlockVector::CheckCorruption() return VK_SUCCESS; } -void VmaBlockVector::AddStats(VmaStats* pStats) -{ - const uint32_t memTypeIndex = m_MemoryTypeIndex; - const uint32_t memHeapIndex = m_hAllocator->MemoryTypeIndexToHeapIndex(memTypeIndex); +#endif // _VMA_BLOCK_VECTOR_FUNCTIONS - VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex); - - for(uint32_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) - { - const VmaDeviceMemoryBlock* const pBlock = m_Blocks[blockIndex]; - VMA_ASSERT(pBlock); - VMA_HEAVY_ASSERT(pBlock->Validate()); - VmaStatInfo allocationStatInfo; - pBlock->m_pMetadata->CalcAllocationStatInfo(allocationStatInfo); - VmaAddStatInfo(pStats->total, allocationStatInfo); - VmaAddStatInfo(pStats->memoryType[memTypeIndex], allocationStatInfo); - VmaAddStatInfo(pStats->memoryHeap[memHeapIndex], allocationStatInfo); - } -} - -//////////////////////////////////////////////////////////////////////////////// -// VmaDefragmentationAlgorithm_Generic members definition - -VmaDefragmentationAlgorithm_Generic::VmaDefragmentationAlgorithm_Generic( +#ifndef _VMA_DEFRAGMENTATION_CONTEXT_FUNCTIONS +VmaDefragmentationContext_T::VmaDefragmentationContext_T( VmaAllocator hAllocator, - VmaBlockVector* pBlockVector, - uint32_t currentFrameIndex, - bool overlappingMoveSupported) : - VmaDefragmentationAlgorithm(hAllocator, pBlockVector, currentFrameIndex), - m_AllocationCount(0), - m_AllAllocations(false), - m_BytesMoved(0), - m_AllocationsMoved(0), - m_Blocks(VmaStlAllocator(hAllocator->GetAllocationCallbacks())) + const VmaDefragmentationInfo& info) + : m_MaxPassBytes(info.maxBytesPerPass == 0 ? VK_WHOLE_SIZE : info.maxBytesPerPass), + m_MaxPassAllocations(info.maxAllocationsPerPass == 0 ? UINT32_MAX : info.maxAllocationsPerPass), + m_BreakCallback(info.pfnBreakCallback), + m_BreakCallbackUserData(info.pBreakCallbackUserData), + m_MoveAllocator(hAllocator->GetAllocationCallbacks()), + m_Moves(m_MoveAllocator) { - // Create block info for each block. - const size_t blockCount = m_pBlockVector->m_Blocks.size(); - for(size_t blockIndex = 0; blockIndex < blockCount; ++blockIndex) + m_Algorithm = info.flags & VMA_DEFRAGMENTATION_FLAG_ALGORITHM_MASK; + + if (info.pool != VMA_NULL) { - BlockInfo* pBlockInfo = vma_new(m_hAllocator, BlockInfo)(m_hAllocator->GetAllocationCallbacks()); - pBlockInfo->m_OriginalBlockIndex = blockIndex; - pBlockInfo->m_pBlock = m_pBlockVector->m_Blocks[blockIndex]; - m_Blocks.push_back(pBlockInfo); + m_BlockVectorCount = 1; + m_PoolBlockVector = &info.pool->m_BlockVector; + m_pBlockVectors = &m_PoolBlockVector; + m_PoolBlockVector->SetIncrementalSort(false); + m_PoolBlockVector->SortByFreeSize(); } - - // Sort them by m_pBlock pointer value. - VMA_SORT(m_Blocks.begin(), m_Blocks.end(), BlockPointerLess()); -} - -VmaDefragmentationAlgorithm_Generic::~VmaDefragmentationAlgorithm_Generic() -{ - for(size_t i = m_Blocks.size(); i--; ) + else { - vma_delete(m_hAllocator, m_Blocks[i]); - } -} - -void VmaDefragmentationAlgorithm_Generic::AddAllocation(VmaAllocation hAlloc, VkBool32* pChanged) -{ - // Now as we are inside VmaBlockVector::m_Mutex, we can make final check if this allocation was not lost. - if(hAlloc->GetLastUseFrameIndex() != VMA_FRAME_INDEX_LOST) - { - VmaDeviceMemoryBlock* pBlock = hAlloc->GetBlock(); - BlockInfoVector::iterator it = VmaBinaryFindFirstNotLess(m_Blocks.begin(), m_Blocks.end(), pBlock, BlockPointerLess()); - if(it != m_Blocks.end() && (*it)->m_pBlock == pBlock) + m_BlockVectorCount = hAllocator->GetMemoryTypeCount(); + m_PoolBlockVector = VMA_NULL; + m_pBlockVectors = hAllocator->m_pBlockVectors; + for (uint32_t i = 0; i < m_BlockVectorCount; ++i) { - AllocationInfo allocInfo = AllocationInfo(hAlloc, pChanged); - (*it)->m_Allocations.push_back(allocInfo); + VmaBlockVector* vector = m_pBlockVectors[i]; + if (vector != VMA_NULL) + { + vector->SetIncrementalSort(false); + vector->SortByFreeSize(); + } + } + } + + switch (m_Algorithm) + { + case 0: // Default algorithm + m_Algorithm = VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT; + m_AlgorithmState = vma_new_array(hAllocator, StateBalanced, m_BlockVectorCount); + break; + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT: + m_AlgorithmState = vma_new_array(hAllocator, StateBalanced, m_BlockVectorCount); + break; + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT: + if (hAllocator->GetBufferImageGranularity() > 1) + { + m_AlgorithmState = vma_new_array(hAllocator, StateExtensive, m_BlockVectorCount); + } + break; + } +} + +VmaDefragmentationContext_T::~VmaDefragmentationContext_T() +{ + if (m_PoolBlockVector != VMA_NULL) + { + m_PoolBlockVector->SetIncrementalSort(true); + } + else + { + for (uint32_t i = 0; i < m_BlockVectorCount; ++i) + { + VmaBlockVector* vector = m_pBlockVectors[i]; + if (vector != VMA_NULL) + vector->SetIncrementalSort(true); + } + } + + if (m_AlgorithmState) + { + switch (m_Algorithm) + { + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT: + vma_delete_array(m_MoveAllocator.m_pCallbacks, reinterpret_cast(m_AlgorithmState), m_BlockVectorCount); + break; + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT: + vma_delete_array(m_MoveAllocator.m_pCallbacks, reinterpret_cast(m_AlgorithmState), m_BlockVectorCount); + break; + default: + VMA_ASSERT(0); + } + } +} + +VkResult VmaDefragmentationContext_T::DefragmentPassBegin(VmaDefragmentationPassMoveInfo& moveInfo) +{ + if (m_PoolBlockVector != VMA_NULL) + { + VmaMutexLockWrite lock(m_PoolBlockVector->GetMutex(), m_PoolBlockVector->GetAllocator()->m_UseMutex); + + if (m_PoolBlockVector->GetBlockCount() > 1) + ComputeDefragmentation(*m_PoolBlockVector, 0); + else if (m_PoolBlockVector->GetBlockCount() == 1) + ReallocWithinBlock(*m_PoolBlockVector, m_PoolBlockVector->GetBlock(0)); + } + else + { + for (uint32_t i = 0; i < m_BlockVectorCount; ++i) + { + if (m_pBlockVectors[i] != VMA_NULL) + { + VmaMutexLockWrite lock(m_pBlockVectors[i]->GetMutex(), m_pBlockVectors[i]->GetAllocator()->m_UseMutex); + + if (m_pBlockVectors[i]->GetBlockCount() > 1) + { + if (ComputeDefragmentation(*m_pBlockVectors[i], i)) + break; + } + else if (m_pBlockVectors[i]->GetBlockCount() == 1) + { + if (ReallocWithinBlock(*m_pBlockVectors[i], m_pBlockVectors[i]->GetBlock(0))) + break; + } + } + } + } + + moveInfo.moveCount = static_cast(m_Moves.size()); + if (moveInfo.moveCount > 0) + { + moveInfo.pMoves = m_Moves.data(); + return VK_INCOMPLETE; + } + + moveInfo.pMoves = VMA_NULL; + return VK_SUCCESS; +} + +VkResult VmaDefragmentationContext_T::DefragmentPassEnd(VmaDefragmentationPassMoveInfo& moveInfo) +{ + VMA_ASSERT(moveInfo.moveCount > 0 ? moveInfo.pMoves != VMA_NULL : true); + + VkResult result = VK_SUCCESS; + VmaStlAllocator blockAllocator(m_MoveAllocator.m_pCallbacks); + VmaVector> immovableBlocks(blockAllocator); + VmaVector> mappedBlocks(blockAllocator); + + VmaAllocator allocator = VMA_NULL; + for (uint32_t i = 0; i < moveInfo.moveCount; ++i) + { + VmaDefragmentationMove& move = moveInfo.pMoves[i]; + size_t prevCount = 0, currentCount = 0; + VkDeviceSize freedBlockSize = 0; + + uint32_t vectorIndex; + VmaBlockVector* vector; + if (m_PoolBlockVector != VMA_NULL) + { + vectorIndex = 0; + vector = m_PoolBlockVector; } else { + vectorIndex = move.srcAllocation->GetMemoryTypeIndex(); + vector = m_pBlockVectors[vectorIndex]; + VMA_ASSERT(vector != VMA_NULL); + } + + switch (move.operation) + { + case VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY: + { + uint8_t mapCount = move.srcAllocation->SwapBlockAllocation(vector->m_hAllocator, move.dstTmpAllocation); + if (mapCount > 0) + { + allocator = vector->m_hAllocator; + VmaDeviceMemoryBlock* newMapBlock = move.srcAllocation->GetBlock(); + bool notPresent = true; + for (FragmentedBlock& block : mappedBlocks) + { + if (block.block == newMapBlock) + { + notPresent = false; + block.data += mapCount; + break; + } + } + if (notPresent) + mappedBlocks.push_back({ mapCount, newMapBlock }); + } + + // Scope for locks, Free have it's own lock + { + VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + prevCount = vector->GetBlockCount(); + freedBlockSize = move.dstTmpAllocation->GetBlock()->m_pMetadata->GetSize(); + } + vector->Free(move.dstTmpAllocation); + { + VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + currentCount = vector->GetBlockCount(); + } + + result = VK_INCOMPLETE; + break; + } + case VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE: + { + m_PassStats.bytesMoved -= move.srcAllocation->GetSize(); + --m_PassStats.allocationsMoved; + vector->Free(move.dstTmpAllocation); + + VmaDeviceMemoryBlock* newBlock = move.srcAllocation->GetBlock(); + bool notPresent = true; + for (const FragmentedBlock& block : immovableBlocks) + { + if (block.block == newBlock) + { + notPresent = false; + break; + } + } + if (notPresent) + immovableBlocks.push_back({ vectorIndex, newBlock }); + break; + } + case VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY: + { + m_PassStats.bytesMoved -= move.srcAllocation->GetSize(); + --m_PassStats.allocationsMoved; + // Scope for locks, Free have it's own lock + { + VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + prevCount = vector->GetBlockCount(); + freedBlockSize = move.srcAllocation->GetBlock()->m_pMetadata->GetSize(); + } + vector->Free(move.srcAllocation); + { + VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + currentCount = vector->GetBlockCount(); + } + freedBlockSize *= prevCount - currentCount; + + VkDeviceSize dstBlockSize; + { + VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + dstBlockSize = move.dstTmpAllocation->GetBlock()->m_pMetadata->GetSize(); + } + vector->Free(move.dstTmpAllocation); + { + VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + freedBlockSize += dstBlockSize * (currentCount - vector->GetBlockCount()); + currentCount = vector->GetBlockCount(); + } + + result = VK_INCOMPLETE; + break; + } + default: VMA_ASSERT(0); } - ++m_AllocationCount; - } -} - -VkResult VmaDefragmentationAlgorithm_Generic::DefragmentRound( - VmaVector< VmaDefragmentationMove, VmaStlAllocator >& moves, - VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove, - bool freeOldAllocations) -{ - if(m_Blocks.empty()) - { - return VK_SUCCESS; - } - - // This is a choice based on research. - // Option 1: - uint32_t strategy = VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT; - // Option 2: - //uint32_t strategy = VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT; - // Option 3: - //uint32_t strategy = VMA_ALLOCATION_CREATE_STRATEGY_MIN_FRAGMENTATION_BIT; - - size_t srcBlockMinIndex = 0; - // When FAST_ALGORITHM, move allocations from only last out of blocks that contain non-movable allocations. - /* - if(m_AlgorithmFlags & VMA_DEFRAGMENTATION_FAST_ALGORITHM_BIT) - { - const size_t blocksWithNonMovableCount = CalcBlocksWithNonMovableCount(); - if(blocksWithNonMovableCount > 0) + if (prevCount > currentCount) { - srcBlockMinIndex = blocksWithNonMovableCount - 1; + size_t freedBlocks = prevCount - currentCount; + m_PassStats.deviceMemoryBlocksFreed += static_cast(freedBlocks); + m_PassStats.bytesFreed += freedBlockSize; } - } - */ - size_t srcBlockIndex = m_Blocks.size() - 1; - size_t srcAllocIndex = SIZE_MAX; - for(;;) - { - // 1. Find next allocation to move. - // 1.1. Start from last to first m_Blocks - they are sorted from most "destination" to most "source". - // 1.2. Then start from last to first m_Allocations. - while(srcAllocIndex >= m_Blocks[srcBlockIndex]->m_Allocations.size()) + if(m_Algorithm == VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT && + m_AlgorithmState != VMA_NULL) { - if(m_Blocks[srcBlockIndex]->m_Allocations.empty()) + // Avoid unnecessary tries to allocate when new free block is available + StateExtensive& state = reinterpret_cast(m_AlgorithmState)[vectorIndex]; + if (state.firstFreeBlock != SIZE_MAX) { - // Finished: no more allocations to process. - if(srcBlockIndex == srcBlockMinIndex) + const size_t diff = prevCount - currentCount; + if (state.firstFreeBlock >= diff) { - return VK_SUCCESS; + state.firstFreeBlock -= diff; + if (state.firstFreeBlock != 0) + state.firstFreeBlock -= vector->GetBlock(state.firstFreeBlock - 1)->m_pMetadata->IsEmpty(); } else - { - --srcBlockIndex; - srcAllocIndex = SIZE_MAX; - } - } - else - { - srcAllocIndex = m_Blocks[srcBlockIndex]->m_Allocations.size() - 1; + state.firstFreeBlock = 0; } } + } + moveInfo.moveCount = 0; + moveInfo.pMoves = VMA_NULL; + m_Moves.clear(); - BlockInfo* pSrcBlockInfo = m_Blocks[srcBlockIndex]; - AllocationInfo& allocInfo = pSrcBlockInfo->m_Allocations[srcAllocIndex]; + // Update stats + m_GlobalStats.allocationsMoved += m_PassStats.allocationsMoved; + m_GlobalStats.bytesFreed += m_PassStats.bytesFreed; + m_GlobalStats.bytesMoved += m_PassStats.bytesMoved; + m_GlobalStats.deviceMemoryBlocksFreed += m_PassStats.deviceMemoryBlocksFreed; + m_PassStats = { 0 }; - const VkDeviceSize size = allocInfo.m_hAllocation->GetSize(); - const VkDeviceSize srcOffset = allocInfo.m_hAllocation->GetOffset(); - const VkDeviceSize alignment = allocInfo.m_hAllocation->GetAlignment(); - const VmaSuballocationType suballocType = allocInfo.m_hAllocation->GetSuballocationType(); - - // 2. Try to find new place for this allocation in preceding or current block. - for(size_t dstBlockIndex = 0; dstBlockIndex <= srcBlockIndex; ++dstBlockIndex) + // Move blocks with immovable allocations according to algorithm + if (immovableBlocks.size() > 0) + { + do { - BlockInfo* pDstBlockInfo = m_Blocks[dstBlockIndex]; - VmaAllocationRequest dstAllocRequest; - if(pDstBlockInfo->m_pBlock->m_pMetadata->CreateAllocationRequest( - m_CurrentFrameIndex, - m_pBlockVector->GetFrameInUseCount(), - m_pBlockVector->GetBufferImageGranularity(), - size, - alignment, - false, // upperAddress - suballocType, - false, // canMakeOtherLost - strategy, - &dstAllocRequest) && - MoveMakesSense( - dstBlockIndex, dstAllocRequest.offset, srcBlockIndex, srcOffset)) + if(m_Algorithm == VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT) { - VMA_ASSERT(dstAllocRequest.itemsToMakeLostCount == 0); - - // Reached limit on number of allocations or bytes to move. - if((m_AllocationsMoved + 1 > maxAllocationsToMove) || - (m_BytesMoved + size > maxBytesToMove)) + if (m_AlgorithmState != VMA_NULL) { - return VK_SUCCESS; + bool swapped = false; + // Move to the start of free blocks range + for (const FragmentedBlock& block : immovableBlocks) + { + StateExtensive& state = reinterpret_cast(m_AlgorithmState)[block.data]; + if (state.operation != StateExtensive::Operation::Cleanup) + { + VmaBlockVector* vector = m_pBlockVectors[block.data]; + VmaMutexLockWrite lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + + for (size_t i = 0, count = vector->GetBlockCount() - m_ImmovableBlockCount; i < count; ++i) + { + if (vector->GetBlock(i) == block.block) + { + std::swap(vector->m_Blocks[i], vector->m_Blocks[vector->GetBlockCount() - ++m_ImmovableBlockCount]); + if (state.firstFreeBlock != SIZE_MAX) + { + if (i + 1 < state.firstFreeBlock) + { + if (state.firstFreeBlock > 1) + std::swap(vector->m_Blocks[i], vector->m_Blocks[--state.firstFreeBlock]); + else + --state.firstFreeBlock; + } + } + swapped = true; + break; + } + } + } + } + if (swapped) + result = VK_INCOMPLETE; + break; } - - VmaDefragmentationMove move = {}; - move.srcBlockIndex = pSrcBlockInfo->m_OriginalBlockIndex; - move.dstBlockIndex = pDstBlockInfo->m_OriginalBlockIndex; - move.srcOffset = srcOffset; - move.dstOffset = dstAllocRequest.offset; - move.size = size; - move.hAllocation = allocInfo.m_hAllocation; - move.pSrcBlock = pSrcBlockInfo->m_pBlock; - move.pDstBlock = pDstBlockInfo->m_pBlock; - - moves.push_back(move); - - pDstBlockInfo->m_pBlock->m_pMetadata->Alloc( - dstAllocRequest, - suballocType, - size, - allocInfo.m_hAllocation); - - if(freeOldAllocations) - { - pSrcBlockInfo->m_pBlock->m_pMetadata->FreeAtOffset(srcOffset); - allocInfo.m_hAllocation->ChangeBlockAllocation(m_hAllocator, pDstBlockInfo->m_pBlock, dstAllocRequest.offset); - } - - if(allocInfo.m_pChanged != VMA_NULL) - { - *allocInfo.m_pChanged = VK_TRUE; - } - - ++m_AllocationsMoved; - m_BytesMoved += size; - - VmaVectorRemove(pSrcBlockInfo->m_Allocations, srcAllocIndex); - - break; } - } - // If not processed, this allocInfo remains in pBlockInfo->m_Allocations for next round. + // Move to the beginning + for (const FragmentedBlock& block : immovableBlocks) + { + VmaBlockVector* vector = m_pBlockVectors[block.data]; + VmaMutexLockWrite lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); - if(srcAllocIndex > 0) - { - --srcAllocIndex; - } + for (size_t i = m_ImmovableBlockCount; i < vector->GetBlockCount(); ++i) + { + if (vector->GetBlock(i) == block.block) + { + std::swap(vector->m_Blocks[i], vector->m_Blocks[m_ImmovableBlockCount++]); + break; + } + } + } + } while (false); + } + + // Bulk-map destination blocks + for (const FragmentedBlock& block : mappedBlocks) + { + VkResult res = block.block->Map(allocator, block.data, VMA_NULL); + VMA_ASSERT(res == VK_SUCCESS); + } + return result; +} + +bool VmaDefragmentationContext_T::ComputeDefragmentation(VmaBlockVector& vector, size_t index) +{ + switch (m_Algorithm) + { + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT: + return ComputeDefragmentation_Fast(vector); + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT: + return ComputeDefragmentation_Balanced(vector, index, true); + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT: + return ComputeDefragmentation_Full(vector); + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT: + return ComputeDefragmentation_Extensive(vector, index); + default: + VMA_ASSERT(0); + return ComputeDefragmentation_Balanced(vector, index, true); + } +} + +VmaDefragmentationContext_T::MoveAllocationData VmaDefragmentationContext_T::GetMoveData( + VmaAllocHandle handle, VmaBlockMetadata* metadata) +{ + MoveAllocationData moveData; + moveData.move.srcAllocation = (VmaAllocation)metadata->GetAllocationUserData(handle); + moveData.size = moveData.move.srcAllocation->GetSize(); + moveData.alignment = moveData.move.srcAllocation->GetAlignment(); + moveData.type = moveData.move.srcAllocation->GetSuballocationType(); + moveData.flags = 0; + + if (moveData.move.srcAllocation->IsPersistentMap()) + moveData.flags |= VMA_ALLOCATION_CREATE_MAPPED_BIT; + if (moveData.move.srcAllocation->IsMappingAllowed()) + moveData.flags |= VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; + + return moveData; +} + +VmaDefragmentationContext_T::CounterStatus VmaDefragmentationContext_T::CheckCounters(VkDeviceSize bytes) +{ + // Check custom criteria if exists + if (m_BreakCallback && m_BreakCallback(m_BreakCallbackUserData)) + return CounterStatus::End; + + // Ignore allocation if will exceed max size for copy + if (m_PassStats.bytesMoved + bytes > m_MaxPassBytes) + { + if (++m_IgnoredAllocs < MAX_ALLOCS_TO_IGNORE) + return CounterStatus::Ignore; else - { - if(srcBlockIndex > 0) - { - --srcBlockIndex; - srcAllocIndex = SIZE_MAX; - } - else - { - return VK_SUCCESS; - } - } + return CounterStatus::End; } + else + m_IgnoredAllocs = 0; + return CounterStatus::Pass; } -size_t VmaDefragmentationAlgorithm_Generic::CalcBlocksWithNonMovableCount() const +bool VmaDefragmentationContext_T::IncrementCounters(VkDeviceSize bytes) { - size_t result = 0; - for(size_t i = 0; i < m_Blocks.size(); ++i) - { - if(m_Blocks[i]->m_HasNonMovableAllocations) - { - ++result; - } - } - return result; -} - -VkResult VmaDefragmentationAlgorithm_Generic::Defragment( - VmaVector< VmaDefragmentationMove, VmaStlAllocator >& moves, - VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove, - VmaDefragmentationFlags flags) -{ - if(!m_AllAllocations && m_AllocationCount == 0) - { - return VK_SUCCESS; - } - - const size_t blockCount = m_Blocks.size(); - for(size_t blockIndex = 0; blockIndex < blockCount; ++blockIndex) - { - BlockInfo* pBlockInfo = m_Blocks[blockIndex]; - - if(m_AllAllocations) - { - VmaBlockMetadata_Generic* pMetadata = (VmaBlockMetadata_Generic*)pBlockInfo->m_pBlock->m_pMetadata; - for(VmaSuballocationList::const_iterator it = pMetadata->m_Suballocations.begin(); - it != pMetadata->m_Suballocations.end(); - ++it) - { - if(it->type != VMA_SUBALLOCATION_TYPE_FREE) - { - AllocationInfo allocInfo = AllocationInfo(it->hAllocation, VMA_NULL); - pBlockInfo->m_Allocations.push_back(allocInfo); - } - } - } - - pBlockInfo->CalcHasNonMovableAllocations(); - - // This is a choice based on research. - // Option 1: - pBlockInfo->SortAllocationsByOffsetDescending(); - // Option 2: - //pBlockInfo->SortAllocationsBySizeDescending(); - } - - // Sort m_Blocks this time by the main criterium, from most "destination" to most "source" blocks. - VMA_SORT(m_Blocks.begin(), m_Blocks.end(), BlockInfoCompareMoveDestination()); - - // This is a choice based on research. - const uint32_t roundCount = 2; - - // Execute defragmentation rounds (the main part). - VkResult result = VK_SUCCESS; - for(uint32_t round = 0; (round < roundCount) && (result == VK_SUCCESS); ++round) - { - result = DefragmentRound(moves, maxBytesToMove, maxAllocationsToMove, !(flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL)); - } - - return result; -} - -bool VmaDefragmentationAlgorithm_Generic::MoveMakesSense( - size_t dstBlockIndex, VkDeviceSize dstOffset, - size_t srcBlockIndex, VkDeviceSize srcOffset) -{ - if(dstBlockIndex < srcBlockIndex) - { - return true; - } - if(dstBlockIndex > srcBlockIndex) - { - return false; - } - if(dstOffset < srcOffset) + m_PassStats.bytesMoved += bytes; + // Early return when max found + if (++m_PassStats.allocationsMoved >= m_MaxPassAllocations || m_PassStats.bytesMoved >= m_MaxPassBytes) { + VMA_ASSERT((m_PassStats.allocationsMoved == m_MaxPassAllocations || + m_PassStats.bytesMoved == m_MaxPassBytes) && "Exceeded maximal pass threshold!"); return true; } return false; } -//////////////////////////////////////////////////////////////////////////////// -// VmaDefragmentationAlgorithm_Fast - -VmaDefragmentationAlgorithm_Fast::VmaDefragmentationAlgorithm_Fast( - VmaAllocator hAllocator, - VmaBlockVector* pBlockVector, - uint32_t currentFrameIndex, - bool overlappingMoveSupported) : - VmaDefragmentationAlgorithm(hAllocator, pBlockVector, currentFrameIndex), - m_OverlappingMoveSupported(overlappingMoveSupported), - m_AllocationCount(0), - m_AllAllocations(false), - m_BytesMoved(0), - m_AllocationsMoved(0), - m_BlockInfos(VmaStlAllocator(hAllocator->GetAllocationCallbacks())) +bool VmaDefragmentationContext_T::ReallocWithinBlock(VmaBlockVector& vector, VmaDeviceMemoryBlock* block) { - VMA_ASSERT(VMA_DEBUG_MARGIN == 0); + VmaBlockMetadata* metadata = block->m_pMetadata; -} - -VmaDefragmentationAlgorithm_Fast::~VmaDefragmentationAlgorithm_Fast() -{ -} - -VkResult VmaDefragmentationAlgorithm_Fast::Defragment( - VmaVector< VmaDefragmentationMove, VmaStlAllocator >& moves, - VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove, - VmaDefragmentationFlags flags) -{ - VMA_ASSERT(m_AllAllocations || m_pBlockVector->CalcAllocationCount() == m_AllocationCount); - - const size_t blockCount = m_pBlockVector->GetBlockCount(); - if(blockCount == 0 || maxBytesToMove == 0 || maxAllocationsToMove == 0) + for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); + handle != VK_NULL_HANDLE; + handle = metadata->GetNextAllocation(handle)) { - return VK_SUCCESS; - } - - PreprocessMetadata(); - - // Sort blocks in order from most destination. - - m_BlockInfos.resize(blockCount); - for(size_t i = 0; i < blockCount; ++i) - { - m_BlockInfos[i].origBlockIndex = i; - } - - VMA_SORT(m_BlockInfos.begin(), m_BlockInfos.end(), [this](const BlockInfo& lhs, const BlockInfo& rhs) -> bool { - return m_pBlockVector->GetBlock(lhs.origBlockIndex)->m_pMetadata->GetSumFreeSize() < - m_pBlockVector->GetBlock(rhs.origBlockIndex)->m_pMetadata->GetSumFreeSize(); - }); - - // THE MAIN ALGORITHM - - FreeSpaceDatabase freeSpaceDb; - - size_t dstBlockInfoIndex = 0; - size_t dstOrigBlockIndex = m_BlockInfos[dstBlockInfoIndex].origBlockIndex; - VmaDeviceMemoryBlock* pDstBlock = m_pBlockVector->GetBlock(dstOrigBlockIndex); - VmaBlockMetadata_Generic* pDstMetadata = (VmaBlockMetadata_Generic*)pDstBlock->m_pMetadata; - VkDeviceSize dstBlockSize = pDstMetadata->GetSize(); - VkDeviceSize dstOffset = 0; - - bool end = false; - for(size_t srcBlockInfoIndex = 0; !end && srcBlockInfoIndex < blockCount; ++srcBlockInfoIndex) - { - const size_t srcOrigBlockIndex = m_BlockInfos[srcBlockInfoIndex].origBlockIndex; - VmaDeviceMemoryBlock* const pSrcBlock = m_pBlockVector->GetBlock(srcOrigBlockIndex); - VmaBlockMetadata_Generic* const pSrcMetadata = (VmaBlockMetadata_Generic*)pSrcBlock->m_pMetadata; - for(VmaSuballocationList::iterator srcSuballocIt = pSrcMetadata->m_Suballocations.begin(); - !end && srcSuballocIt != pSrcMetadata->m_Suballocations.end(); ) + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.srcAllocation->GetUserData() == this) + continue; + switch (CheckCounters(moveData.move.srcAllocation->GetSize())) { - VmaAllocation_T* const pAlloc = srcSuballocIt->hAllocation; - const VkDeviceSize srcAllocAlignment = pAlloc->GetAlignment(); - const VkDeviceSize srcAllocSize = srcSuballocIt->size; - if(m_AllocationsMoved == maxAllocationsToMove || - m_BytesMoved + srcAllocSize > maxBytesToMove) + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + case CounterStatus::Pass: + break; + default: + VMA_ASSERT(0); + } + + VkDeviceSize offset = moveData.move.srcAllocation->GetOffset(); + if (offset != 0 && metadata->GetSumFreeSize() >= moveData.size) + { + VmaAllocationRequest request = {}; + if (metadata->CreateAllocationRequest( + moveData.size, + moveData.alignment, + false, + moveData.type, + VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, + &request)) { - end = true; + if (metadata->GetAllocationOffset(request.allocHandle) < offset) + { + if (vector.CommitAllocationRequest( + request, + block, + moveData.alignment, + moveData.flags, + this, + moveData.type, + &moveData.move.dstTmpAllocation) == VK_SUCCESS) + { + m_Moves.push_back(moveData.move); + if (IncrementCounters(moveData.size)) + return true; + } + } + } + } + } + return false; +} + +bool VmaDefragmentationContext_T::AllocInOtherBlock(size_t start, size_t end, MoveAllocationData& data, VmaBlockVector& vector) +{ + for (; start < end; ++start) + { + VmaDeviceMemoryBlock* dstBlock = vector.GetBlock(start); + if (dstBlock->m_pMetadata->GetSumFreeSize() >= data.size) + { + if (vector.AllocateFromBlock(dstBlock, + data.size, + data.alignment, + data.flags, + this, + data.type, + 0, + &data.move.dstTmpAllocation) == VK_SUCCESS) + { + m_Moves.push_back(data.move); + if (IncrementCounters(data.size)) + return true; break; } - const VkDeviceSize srcAllocOffset = srcSuballocIt->offset; + } + } + return false; +} - VmaDefragmentationMove move = {}; - // Try to place it in one of free spaces from the database. - size_t freeSpaceInfoIndex; - VkDeviceSize dstAllocOffset; - if(freeSpaceDb.Fetch(srcAllocAlignment, srcAllocSize, - freeSpaceInfoIndex, dstAllocOffset)) +bool VmaDefragmentationContext_T::ComputeDefragmentation_Fast(VmaBlockVector& vector) +{ + // Move only between blocks + + // Go through allocations in last blocks and try to fit them inside first ones + for (size_t i = vector.GetBlockCount() - 1; i > m_ImmovableBlockCount; --i) + { + VmaBlockMetadata* metadata = vector.GetBlock(i)->m_pMetadata; + + for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); + handle != VK_NULL_HANDLE; + handle = metadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.srcAllocation->GetUserData() == this) + continue; + switch (CheckCounters(moveData.move.srcAllocation->GetSize())) { - size_t freeSpaceOrigBlockIndex = m_BlockInfos[freeSpaceInfoIndex].origBlockIndex; - VmaDeviceMemoryBlock* pFreeSpaceBlock = m_pBlockVector->GetBlock(freeSpaceOrigBlockIndex); - VmaBlockMetadata_Generic* pFreeSpaceMetadata = (VmaBlockMetadata_Generic*)pFreeSpaceBlock->m_pMetadata; - - // Same block - if(freeSpaceInfoIndex == srcBlockInfoIndex) - { - VMA_ASSERT(dstAllocOffset <= srcAllocOffset); - - // MOVE OPTION 1: Move the allocation inside the same block by decreasing offset. - - VmaSuballocation suballoc = *srcSuballocIt; - suballoc.offset = dstAllocOffset; - suballoc.hAllocation->ChangeOffset(dstAllocOffset); - m_BytesMoved += srcAllocSize; - ++m_AllocationsMoved; - - VmaSuballocationList::iterator nextSuballocIt = srcSuballocIt; - ++nextSuballocIt; - pSrcMetadata->m_Suballocations.erase(srcSuballocIt); - srcSuballocIt = nextSuballocIt; - - InsertSuballoc(pFreeSpaceMetadata, suballoc); - - move.srcBlockIndex = srcOrigBlockIndex; - move.dstBlockIndex = freeSpaceOrigBlockIndex; - move.srcOffset = srcAllocOffset; - move.dstOffset = dstAllocOffset; - move.size = srcAllocSize; - - moves.push_back(move); - } - // Different block - else - { - // MOVE OPTION 2: Move the allocation to a different block. - - VMA_ASSERT(freeSpaceInfoIndex < srcBlockInfoIndex); - - VmaSuballocation suballoc = *srcSuballocIt; - suballoc.offset = dstAllocOffset; - suballoc.hAllocation->ChangeBlockAllocation(m_hAllocator, pFreeSpaceBlock, dstAllocOffset); - m_BytesMoved += srcAllocSize; - ++m_AllocationsMoved; - - VmaSuballocationList::iterator nextSuballocIt = srcSuballocIt; - ++nextSuballocIt; - pSrcMetadata->m_Suballocations.erase(srcSuballocIt); - srcSuballocIt = nextSuballocIt; - - InsertSuballoc(pFreeSpaceMetadata, suballoc); - - move.srcBlockIndex = srcOrigBlockIndex; - move.dstBlockIndex = freeSpaceOrigBlockIndex; - move.srcOffset = srcAllocOffset; - move.dstOffset = dstAllocOffset; - move.size = srcAllocSize; - - moves.push_back(move); - } + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + case CounterStatus::Pass: + break; + default: + VMA_ASSERT(0); } - else + + // Check all previous blocks for free space + if (AllocInOtherBlock(0, i, moveData, vector)) + return true; + } + } + return false; +} + +bool VmaDefragmentationContext_T::ComputeDefragmentation_Balanced(VmaBlockVector& vector, size_t index, bool update) +{ + // Go over every allocation and try to fit it in previous blocks at lowest offsets, + // if not possible: realloc within single block to minimize offset (exclude offset == 0), + // but only if there are noticeable gaps between them (some heuristic, ex. average size of allocation in block) + VMA_ASSERT(m_AlgorithmState != VMA_NULL); + + StateBalanced& vectorState = reinterpret_cast(m_AlgorithmState)[index]; + if (update && vectorState.avgAllocSize == UINT64_MAX) + UpdateVectorStatistics(vector, vectorState); + + const size_t startMoveCount = m_Moves.size(); + VkDeviceSize minimalFreeRegion = vectorState.avgFreeSize / 2; + for (size_t i = vector.GetBlockCount() - 1; i > m_ImmovableBlockCount; --i) + { + VmaDeviceMemoryBlock* block = vector.GetBlock(i); + VmaBlockMetadata* metadata = block->m_pMetadata; + VkDeviceSize prevFreeRegionSize = 0; + + for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); + handle != VK_NULL_HANDLE; + handle = metadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.srcAllocation->GetUserData() == this) + continue; + switch (CheckCounters(moveData.move.srcAllocation->GetSize())) { - dstAllocOffset = VmaAlignUp(dstOffset, srcAllocAlignment); + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + case CounterStatus::Pass: + break; + default: + VMA_ASSERT(0); + } - // If the allocation doesn't fit before the end of dstBlock, forward to next block. - while(dstBlockInfoIndex < srcBlockInfoIndex && - dstAllocOffset + srcAllocSize > dstBlockSize) + // Check all previous blocks for free space + const size_t prevMoveCount = m_Moves.size(); + if (AllocInOtherBlock(0, i, moveData, vector)) + return true; + + VkDeviceSize nextFreeRegionSize = metadata->GetNextFreeRegionSize(handle); + // If no room found then realloc within block for lower offset + VkDeviceSize offset = moveData.move.srcAllocation->GetOffset(); + if (prevMoveCount == m_Moves.size() && offset != 0 && metadata->GetSumFreeSize() >= moveData.size) + { + // Check if realloc will make sense + if (prevFreeRegionSize >= minimalFreeRegion || + nextFreeRegionSize >= minimalFreeRegion || + moveData.size <= vectorState.avgFreeSize || + moveData.size <= vectorState.avgAllocSize) { - // But before that, register remaining free space at the end of dst block. - freeSpaceDb.Register(dstBlockInfoIndex, dstOffset, dstBlockSize - dstOffset); - - ++dstBlockInfoIndex; - dstOrigBlockIndex = m_BlockInfos[dstBlockInfoIndex].origBlockIndex; - pDstBlock = m_pBlockVector->GetBlock(dstOrigBlockIndex); - pDstMetadata = (VmaBlockMetadata_Generic*)pDstBlock->m_pMetadata; - dstBlockSize = pDstMetadata->GetSize(); - dstOffset = 0; - dstAllocOffset = 0; - } - - // Same block - if(dstBlockInfoIndex == srcBlockInfoIndex) - { - VMA_ASSERT(dstAllocOffset <= srcAllocOffset); - - const bool overlap = dstAllocOffset + srcAllocSize > srcAllocOffset; - - bool skipOver = overlap; - if(overlap && m_OverlappingMoveSupported && dstAllocOffset < srcAllocOffset) + VmaAllocationRequest request = {}; + if (metadata->CreateAllocationRequest( + moveData.size, + moveData.alignment, + false, + moveData.type, + VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, + &request)) { - // If destination and source place overlap, skip if it would move it - // by only < 1/64 of its size. - skipOver = (srcAllocOffset - dstAllocOffset) * 64 < srcAllocSize; - } - - if(skipOver) - { - freeSpaceDb.Register(dstBlockInfoIndex, dstOffset, srcAllocOffset - dstOffset); - - dstOffset = srcAllocOffset + srcAllocSize; - ++srcSuballocIt; - } - // MOVE OPTION 1: Move the allocation inside the same block by decreasing offset. - else - { - srcSuballocIt->offset = dstAllocOffset; - srcSuballocIt->hAllocation->ChangeOffset(dstAllocOffset); - dstOffset = dstAllocOffset + srcAllocSize; - m_BytesMoved += srcAllocSize; - ++m_AllocationsMoved; - ++srcSuballocIt; - - move.srcBlockIndex = srcOrigBlockIndex; - move.dstBlockIndex = dstOrigBlockIndex; - move.srcOffset = srcAllocOffset; - move.dstOffset = dstAllocOffset; - move.size = srcAllocSize; - - moves.push_back(move); - } - } - // Different block - else - { - // MOVE OPTION 2: Move the allocation to a different block. - - VMA_ASSERT(dstBlockInfoIndex < srcBlockInfoIndex); - VMA_ASSERT(dstAllocOffset + srcAllocSize <= dstBlockSize); - - VmaSuballocation suballoc = *srcSuballocIt; - suballoc.offset = dstAllocOffset; - suballoc.hAllocation->ChangeBlockAllocation(m_hAllocator, pDstBlock, dstAllocOffset); - dstOffset = dstAllocOffset + srcAllocSize; - m_BytesMoved += srcAllocSize; - ++m_AllocationsMoved; - - VmaSuballocationList::iterator nextSuballocIt = srcSuballocIt; - ++nextSuballocIt; - pSrcMetadata->m_Suballocations.erase(srcSuballocIt); - srcSuballocIt = nextSuballocIt; - - pDstMetadata->m_Suballocations.push_back(suballoc); - - move.srcBlockIndex = srcOrigBlockIndex; - move.dstBlockIndex = dstOrigBlockIndex; - move.srcOffset = srcAllocOffset; - move.dstOffset = dstAllocOffset; - move.size = srcAllocSize; - - moves.push_back(move); - } - } - } - } - - m_BlockInfos.clear(); - - PostprocessMetadata(); - - return VK_SUCCESS; -} - -void VmaDefragmentationAlgorithm_Fast::PreprocessMetadata() -{ - const size_t blockCount = m_pBlockVector->GetBlockCount(); - for(size_t blockIndex = 0; blockIndex < blockCount; ++blockIndex) - { - VmaBlockMetadata_Generic* const pMetadata = - (VmaBlockMetadata_Generic*)m_pBlockVector->GetBlock(blockIndex)->m_pMetadata; - pMetadata->m_FreeCount = 0; - pMetadata->m_SumFreeSize = pMetadata->GetSize(); - pMetadata->m_FreeSuballocationsBySize.clear(); - for(VmaSuballocationList::iterator it = pMetadata->m_Suballocations.begin(); - it != pMetadata->m_Suballocations.end(); ) - { - if(it->type == VMA_SUBALLOCATION_TYPE_FREE) - { - VmaSuballocationList::iterator nextIt = it; - ++nextIt; - pMetadata->m_Suballocations.erase(it); - it = nextIt; - } - else - { - ++it; - } - } - } -} - -void VmaDefragmentationAlgorithm_Fast::PostprocessMetadata() -{ - const size_t blockCount = m_pBlockVector->GetBlockCount(); - for(size_t blockIndex = 0; blockIndex < blockCount; ++blockIndex) - { - VmaBlockMetadata_Generic* const pMetadata = - (VmaBlockMetadata_Generic*)m_pBlockVector->GetBlock(blockIndex)->m_pMetadata; - const VkDeviceSize blockSize = pMetadata->GetSize(); - - // No allocations in this block - entire area is free. - if(pMetadata->m_Suballocations.empty()) - { - pMetadata->m_FreeCount = 1; - //pMetadata->m_SumFreeSize is already set to blockSize. - VmaSuballocation suballoc = { - 0, // offset - blockSize, // size - VMA_NULL, // hAllocation - VMA_SUBALLOCATION_TYPE_FREE }; - pMetadata->m_Suballocations.push_back(suballoc); - pMetadata->RegisterFreeSuballocation(pMetadata->m_Suballocations.begin()); - } - // There are some allocations in this block. - else - { - VkDeviceSize offset = 0; - VmaSuballocationList::iterator it; - for(it = pMetadata->m_Suballocations.begin(); - it != pMetadata->m_Suballocations.end(); - ++it) - { - VMA_ASSERT(it->type != VMA_SUBALLOCATION_TYPE_FREE); - VMA_ASSERT(it->offset >= offset); - - // Need to insert preceding free space. - if(it->offset > offset) - { - ++pMetadata->m_FreeCount; - const VkDeviceSize freeSize = it->offset - offset; - VmaSuballocation suballoc = { - offset, // offset - freeSize, // size - VMA_NULL, // hAllocation - VMA_SUBALLOCATION_TYPE_FREE }; - VmaSuballocationList::iterator precedingFreeIt = pMetadata->m_Suballocations.insert(it, suballoc); - if(freeSize >= VMA_MIN_FREE_SUBALLOCATION_SIZE_TO_REGISTER) - { - pMetadata->m_FreeSuballocationsBySize.push_back(precedingFreeIt); - } - } - - pMetadata->m_SumFreeSize -= it->size; - offset = it->offset + it->size; - } - - // Need to insert trailing free space. - if(offset < blockSize) - { - ++pMetadata->m_FreeCount; - const VkDeviceSize freeSize = blockSize - offset; - VmaSuballocation suballoc = { - offset, // offset - freeSize, // size - VMA_NULL, // hAllocation - VMA_SUBALLOCATION_TYPE_FREE }; - VMA_ASSERT(it == pMetadata->m_Suballocations.end()); - VmaSuballocationList::iterator trailingFreeIt = pMetadata->m_Suballocations.insert(it, suballoc); - if(freeSize > VMA_MIN_FREE_SUBALLOCATION_SIZE_TO_REGISTER) - { - pMetadata->m_FreeSuballocationsBySize.push_back(trailingFreeIt); - } - } - - VMA_SORT( - pMetadata->m_FreeSuballocationsBySize.begin(), - pMetadata->m_FreeSuballocationsBySize.end(), - VmaSuballocationItemSizeLess()); - } - - VMA_HEAVY_ASSERT(pMetadata->Validate()); - } -} - -void VmaDefragmentationAlgorithm_Fast::InsertSuballoc(VmaBlockMetadata_Generic* pMetadata, const VmaSuballocation& suballoc) -{ - // TODO: Optimize somehow. Remember iterator instead of searching for it linearly. - VmaSuballocationList::iterator it = pMetadata->m_Suballocations.begin(); - while(it != pMetadata->m_Suballocations.end()) - { - if(it->offset < suballoc.offset) - { - ++it; - } - } - pMetadata->m_Suballocations.insert(it, suballoc); -} - -//////////////////////////////////////////////////////////////////////////////// -// VmaBlockVectorDefragmentationContext - -VmaBlockVectorDefragmentationContext::VmaBlockVectorDefragmentationContext( - VmaAllocator hAllocator, - VmaPool hCustomPool, - VmaBlockVector* pBlockVector, - uint32_t currFrameIndex) : - res(VK_SUCCESS), - mutexLocked(false), - blockContexts(VmaStlAllocator(hAllocator->GetAllocationCallbacks())), - defragmentationMoves(VmaStlAllocator(hAllocator->GetAllocationCallbacks())), - defragmentationMovesProcessed(0), - defragmentationMovesCommitted(0), - hasDefragmentationPlan(0), - m_hAllocator(hAllocator), - m_hCustomPool(hCustomPool), - m_pBlockVector(pBlockVector), - m_CurrFrameIndex(currFrameIndex), - m_pAlgorithm(VMA_NULL), - m_Allocations(VmaStlAllocator(hAllocator->GetAllocationCallbacks())), - m_AllAllocations(false) -{ -} - -VmaBlockVectorDefragmentationContext::~VmaBlockVectorDefragmentationContext() -{ - vma_delete(m_hAllocator, m_pAlgorithm); -} - -void VmaBlockVectorDefragmentationContext::AddAllocation(VmaAllocation hAlloc, VkBool32* pChanged) -{ - AllocInfo info = { hAlloc, pChanged }; - m_Allocations.push_back(info); -} - -void VmaBlockVectorDefragmentationContext::Begin(bool overlappingMoveSupported, VmaDefragmentationFlags flags) -{ - const bool allAllocations = m_AllAllocations || - m_Allocations.size() == m_pBlockVector->CalcAllocationCount(); - - /******************************** - HERE IS THE CHOICE OF DEFRAGMENTATION ALGORITHM. - ********************************/ - - /* - Fast algorithm is supported only when certain criteria are met: - - VMA_DEBUG_MARGIN is 0. - - All allocations in this block vector are moveable. - - There is no possibility of image/buffer granularity conflict. - - The defragmentation is not incremental - */ - if(VMA_DEBUG_MARGIN == 0 && - allAllocations && - !m_pBlockVector->IsBufferImageGranularityConflictPossible() && - !(flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL)) - { - m_pAlgorithm = vma_new(m_hAllocator, VmaDefragmentationAlgorithm_Fast)( - m_hAllocator, m_pBlockVector, m_CurrFrameIndex, overlappingMoveSupported); - } - else - { - m_pAlgorithm = vma_new(m_hAllocator, VmaDefragmentationAlgorithm_Generic)( - m_hAllocator, m_pBlockVector, m_CurrFrameIndex, overlappingMoveSupported); - } - - if(allAllocations) - { - m_pAlgorithm->AddAll(); - } - else - { - for(size_t i = 0, count = m_Allocations.size(); i < count; ++i) - { - m_pAlgorithm->AddAllocation(m_Allocations[i].hAlloc, m_Allocations[i].pChanged); - } - } -} - -//////////////////////////////////////////////////////////////////////////////// -// VmaDefragmentationContext - -VmaDefragmentationContext_T::VmaDefragmentationContext_T( - VmaAllocator hAllocator, - uint32_t currFrameIndex, - uint32_t flags, - VmaDefragmentationStats* pStats) : - m_hAllocator(hAllocator), - m_CurrFrameIndex(currFrameIndex), - m_Flags(flags), - m_pStats(pStats), - m_CustomPoolContexts(VmaStlAllocator(hAllocator->GetAllocationCallbacks())) -{ - memset(m_DefaultPoolContexts, 0, sizeof(m_DefaultPoolContexts)); -} - -VmaDefragmentationContext_T::~VmaDefragmentationContext_T() -{ - for(size_t i = m_CustomPoolContexts.size(); i--; ) - { - VmaBlockVectorDefragmentationContext* pBlockVectorCtx = m_CustomPoolContexts[i]; - pBlockVectorCtx->GetBlockVector()->DefragmentationEnd(pBlockVectorCtx, m_Flags, m_pStats); - vma_delete(m_hAllocator, pBlockVectorCtx); - } - for(size_t i = m_hAllocator->m_MemProps.memoryTypeCount; i--; ) - { - VmaBlockVectorDefragmentationContext* pBlockVectorCtx = m_DefaultPoolContexts[i]; - if(pBlockVectorCtx) - { - pBlockVectorCtx->GetBlockVector()->DefragmentationEnd(pBlockVectorCtx, m_Flags, m_pStats); - vma_delete(m_hAllocator, pBlockVectorCtx); - } - } -} - -void VmaDefragmentationContext_T::AddPools(uint32_t poolCount, const VmaPool* pPools) -{ - for(uint32_t poolIndex = 0; poolIndex < poolCount; ++poolIndex) - { - VmaPool pool = pPools[poolIndex]; - VMA_ASSERT(pool); - // Pools with algorithm other than default are not defragmented. - if(pool->m_BlockVector.GetAlgorithm() == 0) - { - VmaBlockVectorDefragmentationContext* pBlockVectorDefragCtx = VMA_NULL; - - for(size_t i = m_CustomPoolContexts.size(); i--; ) - { - if(m_CustomPoolContexts[i]->GetCustomPool() == pool) - { - pBlockVectorDefragCtx = m_CustomPoolContexts[i]; - break; - } - } - - if(!pBlockVectorDefragCtx) - { - pBlockVectorDefragCtx = vma_new(m_hAllocator, VmaBlockVectorDefragmentationContext)( - m_hAllocator, - pool, - &pool->m_BlockVector, - m_CurrFrameIndex); - m_CustomPoolContexts.push_back(pBlockVectorDefragCtx); - } - - pBlockVectorDefragCtx->AddAll(); - } - } -} - -void VmaDefragmentationContext_T::AddAllocations( - uint32_t allocationCount, - const VmaAllocation* pAllocations, - VkBool32* pAllocationsChanged) -{ - // Dispatch pAllocations among defragmentators. Create them when necessary. - for(uint32_t allocIndex = 0; allocIndex < allocationCount; ++allocIndex) - { - const VmaAllocation hAlloc = pAllocations[allocIndex]; - VMA_ASSERT(hAlloc); - // DedicatedAlloc cannot be defragmented. - if((hAlloc->GetType() == VmaAllocation_T::ALLOCATION_TYPE_BLOCK) && - // Lost allocation cannot be defragmented. - (hAlloc->GetLastUseFrameIndex() != VMA_FRAME_INDEX_LOST)) - { - VmaBlockVectorDefragmentationContext* pBlockVectorDefragCtx = VMA_NULL; - - const VmaPool hAllocPool = hAlloc->GetBlock()->GetParentPool(); - // This allocation belongs to custom pool. - if(hAllocPool != VK_NULL_HANDLE) - { - // Pools with algorithm other than default are not defragmented. - if(hAllocPool->m_BlockVector.GetAlgorithm() == 0) - { - for(size_t i = m_CustomPoolContexts.size(); i--; ) - { - if(m_CustomPoolContexts[i]->GetCustomPool() == hAllocPool) + if (metadata->GetAllocationOffset(request.allocHandle) < offset) { - pBlockVectorDefragCtx = m_CustomPoolContexts[i]; - break; + if (vector.CommitAllocationRequest( + request, + block, + moveData.alignment, + moveData.flags, + this, + moveData.type, + &moveData.move.dstTmpAllocation) == VK_SUCCESS) + { + m_Moves.push_back(moveData.move); + if (IncrementCounters(moveData.size)) + return true; + } } } - if(!pBlockVectorDefragCtx) + } + } + prevFreeRegionSize = nextFreeRegionSize; + } + } + + // No moves performed, update statistics to current vector state + if (startMoveCount == m_Moves.size() && !update) + { + vectorState.avgAllocSize = UINT64_MAX; + return ComputeDefragmentation_Balanced(vector, index, false); + } + return false; +} + +bool VmaDefragmentationContext_T::ComputeDefragmentation_Full(VmaBlockVector& vector) +{ + // Go over every allocation and try to fit it in previous blocks at lowest offsets, + // if not possible: realloc within single block to minimize offset (exclude offset == 0) + + for (size_t i = vector.GetBlockCount() - 1; i > m_ImmovableBlockCount; --i) + { + VmaDeviceMemoryBlock* block = vector.GetBlock(i); + VmaBlockMetadata* metadata = block->m_pMetadata; + + for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); + handle != VK_NULL_HANDLE; + handle = metadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.srcAllocation->GetUserData() == this) + continue; + switch (CheckCounters(moveData.move.srcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + case CounterStatus::Pass: + break; + default: + VMA_ASSERT(0); + } + + // Check all previous blocks for free space + const size_t prevMoveCount = m_Moves.size(); + if (AllocInOtherBlock(0, i, moveData, vector)) + return true; + + // If no room found then realloc within block for lower offset + VkDeviceSize offset = moveData.move.srcAllocation->GetOffset(); + if (prevMoveCount == m_Moves.size() && offset != 0 && metadata->GetSumFreeSize() >= moveData.size) + { + VmaAllocationRequest request = {}; + if (metadata->CreateAllocationRequest( + moveData.size, + moveData.alignment, + false, + moveData.type, + VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, + &request)) + { + if (metadata->GetAllocationOffset(request.allocHandle) < offset) { - pBlockVectorDefragCtx = vma_new(m_hAllocator, VmaBlockVectorDefragmentationContext)( - m_hAllocator, - hAllocPool, - &hAllocPool->m_BlockVector, - m_CurrFrameIndex); - m_CustomPoolContexts.push_back(pBlockVectorDefragCtx); + if (vector.CommitAllocationRequest( + request, + block, + moveData.alignment, + moveData.flags, + this, + moveData.type, + &moveData.move.dstTmpAllocation) == VK_SUCCESS) + { + m_Moves.push_back(moveData.move); + if (IncrementCounters(moveData.size)) + return true; + } } } } - // This allocation belongs to default pool. - else + } + } + return false; +} + +bool VmaDefragmentationContext_T::ComputeDefragmentation_Extensive(VmaBlockVector& vector, size_t index) +{ + // First free single block, then populate it to the brim, then free another block, and so on + + // Fallback to previous algorithm since without granularity conflicts it can achieve max packing + if (vector.m_BufferImageGranularity == 1) + return ComputeDefragmentation_Full(vector); + + VMA_ASSERT(m_AlgorithmState != VMA_NULL); + + StateExtensive& vectorState = reinterpret_cast(m_AlgorithmState)[index]; + + bool texturePresent = false, bufferPresent = false, otherPresent = false; + switch (vectorState.operation) + { + case StateExtensive::Operation::Done: // Vector defragmented + return false; + case StateExtensive::Operation::FindFreeBlockBuffer: + case StateExtensive::Operation::FindFreeBlockTexture: + case StateExtensive::Operation::FindFreeBlockAll: + { + // No more blocks to free, just perform fast realloc and move to cleanup + if (vectorState.firstFreeBlock == 0) + { + vectorState.operation = StateExtensive::Operation::Cleanup; + return ComputeDefragmentation_Fast(vector); + } + + // No free blocks, have to clear last one + size_t last = (vectorState.firstFreeBlock == SIZE_MAX ? vector.GetBlockCount() : vectorState.firstFreeBlock) - 1; + VmaBlockMetadata* freeMetadata = vector.GetBlock(last)->m_pMetadata; + + const size_t prevMoveCount = m_Moves.size(); + for (VmaAllocHandle handle = freeMetadata->GetAllocationListBegin(); + handle != VK_NULL_HANDLE; + handle = freeMetadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, freeMetadata); + switch (CheckCounters(moveData.move.srcAllocation->GetSize())) { - const uint32_t memTypeIndex = hAlloc->GetMemoryTypeIndex(); - pBlockVectorDefragCtx = m_DefaultPoolContexts[memTypeIndex]; - if(!pBlockVectorDefragCtx) + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + case CounterStatus::Pass: + break; + default: + VMA_ASSERT(0); + } + + // Check all previous blocks for free space + if (AllocInOtherBlock(0, last, moveData, vector)) + { + // Full clear performed already + if (prevMoveCount != m_Moves.size() && freeMetadata->GetNextAllocation(handle) == VK_NULL_HANDLE) + vectorState.firstFreeBlock = last; + return true; + } + } + + if (prevMoveCount == m_Moves.size()) + { + // Cannot perform full clear, have to move data in other blocks around + if (last != 0) + { + for (size_t i = last - 1; i; --i) { - pBlockVectorDefragCtx = vma_new(m_hAllocator, VmaBlockVectorDefragmentationContext)( - m_hAllocator, - VMA_NULL, // hCustomPool - m_hAllocator->m_pBlockVectors[memTypeIndex], - m_CurrFrameIndex); - m_DefaultPoolContexts[memTypeIndex] = pBlockVectorDefragCtx; + if (ReallocWithinBlock(vector, vector.GetBlock(i))) + return true; } } - if(pBlockVectorDefragCtx) + if (prevMoveCount == m_Moves.size()) { - VkBool32* const pChanged = (pAllocationsChanged != VMA_NULL) ? - &pAllocationsChanged[allocIndex] : VMA_NULL; - pBlockVectorDefragCtx->AddAllocation(hAlloc, pChanged); + // No possible reallocs within blocks, try to move them around fast + return ComputeDefragmentation_Fast(vector); } } - } -} - -VkResult VmaDefragmentationContext_T::Defragment( - VkDeviceSize maxCpuBytesToMove, uint32_t maxCpuAllocationsToMove, - VkDeviceSize maxGpuBytesToMove, uint32_t maxGpuAllocationsToMove, - VkCommandBuffer commandBuffer, VmaDefragmentationStats* pStats, VmaDefragmentationFlags flags) -{ - if(pStats) - { - memset(pStats, 0, sizeof(VmaDefragmentationStats)); - } - - if(flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL) - { - // For incremental defragmetnations, we just earmark how much we can move - // The real meat is in the defragmentation steps - m_MaxCpuBytesToMove = maxCpuBytesToMove; - m_MaxCpuAllocationsToMove = maxCpuAllocationsToMove; - - m_MaxGpuBytesToMove = maxGpuBytesToMove; - m_MaxGpuAllocationsToMove = maxGpuAllocationsToMove; - - if(m_MaxCpuBytesToMove == 0 && m_MaxCpuAllocationsToMove == 0 && - m_MaxGpuBytesToMove == 0 && m_MaxGpuAllocationsToMove == 0) - return VK_SUCCESS; - - return VK_NOT_READY; - } - - if(commandBuffer == VK_NULL_HANDLE) - { - maxGpuBytesToMove = 0; - maxGpuAllocationsToMove = 0; - } - - VkResult res = VK_SUCCESS; - - // Process default pools. - for(uint32_t memTypeIndex = 0; - memTypeIndex < m_hAllocator->GetMemoryTypeCount() && res >= VK_SUCCESS; - ++memTypeIndex) - { - VmaBlockVectorDefragmentationContext* pBlockVectorCtx = m_DefaultPoolContexts[memTypeIndex]; - if(pBlockVectorCtx) - { - VMA_ASSERT(pBlockVectorCtx->GetBlockVector()); - pBlockVectorCtx->GetBlockVector()->Defragment( - pBlockVectorCtx, - pStats, flags, - maxCpuBytesToMove, maxCpuAllocationsToMove, - maxGpuBytesToMove, maxGpuAllocationsToMove, - commandBuffer); - if(pBlockVectorCtx->res != VK_SUCCESS) - { - res = pBlockVectorCtx->res; - } - } - } - - // Process custom pools. - for(size_t customCtxIndex = 0, customCtxCount = m_CustomPoolContexts.size(); - customCtxIndex < customCtxCount && res >= VK_SUCCESS; - ++customCtxIndex) - { - VmaBlockVectorDefragmentationContext* pBlockVectorCtx = m_CustomPoolContexts[customCtxIndex]; - VMA_ASSERT(pBlockVectorCtx && pBlockVectorCtx->GetBlockVector()); - pBlockVectorCtx->GetBlockVector()->Defragment( - pBlockVectorCtx, - pStats, flags, - maxCpuBytesToMove, maxCpuAllocationsToMove, - maxGpuBytesToMove, maxGpuAllocationsToMove, - commandBuffer); - if(pBlockVectorCtx->res != VK_SUCCESS) - { - res = pBlockVectorCtx->res; - } - } - - return res; -} - -VkResult VmaDefragmentationContext_T::DefragmentPassBegin(VmaDefragmentationPassInfo* pInfo) -{ - VmaDefragmentationPassMoveInfo* pCurrentMove = pInfo->pMoves; - uint32_t movesLeft = pInfo->moveCount; - - // Process default pools. - for(uint32_t memTypeIndex = 0; - memTypeIndex < m_hAllocator->GetMemoryTypeCount(); - ++memTypeIndex) - { - VmaBlockVectorDefragmentationContext *pBlockVectorCtx = m_DefaultPoolContexts[memTypeIndex]; - if(pBlockVectorCtx) - { - VMA_ASSERT(pBlockVectorCtx->GetBlockVector()); - - if(!pBlockVectorCtx->hasDefragmentationPlan) - { - pBlockVectorCtx->GetBlockVector()->Defragment( - pBlockVectorCtx, - m_pStats, m_Flags, - m_MaxCpuBytesToMove, m_MaxCpuAllocationsToMove, - m_MaxGpuBytesToMove, m_MaxGpuAllocationsToMove, - VK_NULL_HANDLE); - - if(pBlockVectorCtx->res < VK_SUCCESS) - continue; - - pBlockVectorCtx->hasDefragmentationPlan = true; - } - - const uint32_t processed = pBlockVectorCtx->GetBlockVector()->ProcessDefragmentations( - pBlockVectorCtx, - pCurrentMove, movesLeft); - - movesLeft -= processed; - pCurrentMove += processed; - } - } - - // Process custom pools. - for(size_t customCtxIndex = 0, customCtxCount = m_CustomPoolContexts.size(); - customCtxIndex < customCtxCount; - ++customCtxIndex) - { - VmaBlockVectorDefragmentationContext *pBlockVectorCtx = m_CustomPoolContexts[customCtxIndex]; - VMA_ASSERT(pBlockVectorCtx && pBlockVectorCtx->GetBlockVector()); - - if(!pBlockVectorCtx->hasDefragmentationPlan) - { - pBlockVectorCtx->GetBlockVector()->Defragment( - pBlockVectorCtx, - m_pStats, m_Flags, - m_MaxCpuBytesToMove, m_MaxCpuAllocationsToMove, - m_MaxGpuBytesToMove, m_MaxGpuAllocationsToMove, - VK_NULL_HANDLE); - - if(pBlockVectorCtx->res < VK_SUCCESS) - continue; - - pBlockVectorCtx->hasDefragmentationPlan = true; - } - - const uint32_t processed = pBlockVectorCtx->GetBlockVector()->ProcessDefragmentations( - pBlockVectorCtx, - pCurrentMove, movesLeft); - - movesLeft -= processed; - pCurrentMove += processed; - } - - pInfo->moveCount = pInfo->moveCount - movesLeft; - - return VK_SUCCESS; -} -VkResult VmaDefragmentationContext_T::DefragmentPassEnd() -{ - VkResult res = VK_SUCCESS; - - // Process default pools. - for(uint32_t memTypeIndex = 0; - memTypeIndex < m_hAllocator->GetMemoryTypeCount(); - ++memTypeIndex) - { - VmaBlockVectorDefragmentationContext *pBlockVectorCtx = m_DefaultPoolContexts[memTypeIndex]; - if(pBlockVectorCtx) - { - VMA_ASSERT(pBlockVectorCtx->GetBlockVector()); - - if(!pBlockVectorCtx->hasDefragmentationPlan) - { - res = VK_NOT_READY; - continue; - } - - pBlockVectorCtx->GetBlockVector()->CommitDefragmentations( - pBlockVectorCtx, m_pStats); - - if(pBlockVectorCtx->defragmentationMoves.size() != pBlockVectorCtx->defragmentationMovesCommitted) - res = VK_NOT_READY; - } - } - - // Process custom pools. - for(size_t customCtxIndex = 0, customCtxCount = m_CustomPoolContexts.size(); - customCtxIndex < customCtxCount; - ++customCtxIndex) - { - VmaBlockVectorDefragmentationContext *pBlockVectorCtx = m_CustomPoolContexts[customCtxIndex]; - VMA_ASSERT(pBlockVectorCtx && pBlockVectorCtx->GetBlockVector()); - - if(!pBlockVectorCtx->hasDefragmentationPlan) - { - res = VK_NOT_READY; - continue; - } - - pBlockVectorCtx->GetBlockVector()->CommitDefragmentations( - pBlockVectorCtx, m_pStats); - - if(pBlockVectorCtx->defragmentationMoves.size() != pBlockVectorCtx->defragmentationMovesCommitted) - res = VK_NOT_READY; - } - - return res; -} - -//////////////////////////////////////////////////////////////////////////////// -// VmaRecorder - -#if VMA_RECORDING_ENABLED - -VmaRecorder::VmaRecorder() : - m_UseMutex(true), - m_Flags(0), - m_File(VMA_NULL), - m_RecordingStartTime(std::chrono::high_resolution_clock::now()) -{ -} - -VkResult VmaRecorder::Init(const VmaRecordSettings& settings, bool useMutex) -{ - m_UseMutex = useMutex; - m_Flags = settings.flags; - -#if defined(_WIN32) - // Open file for writing. - errno_t err = fopen_s(&m_File, settings.pFilePath, "wb"); - - if(err != 0) - { - return VK_ERROR_INITIALIZATION_FAILED; - } -#else - // Open file for writing. - m_File = fopen(settings.pFilePath, "wb"); - - if(m_File == 0) - { - return VK_ERROR_INITIALIZATION_FAILED; - } -#endif - - // Write header. - fprintf(m_File, "%s\n", "Vulkan Memory Allocator,Calls recording"); - fprintf(m_File, "%s\n", "1,8"); - - return VK_SUCCESS; -} - -VmaRecorder::~VmaRecorder() -{ - if(m_File != VMA_NULL) - { - fclose(m_File); - } -} - -void VmaRecorder::RecordCreateAllocator(uint32_t frameIndex) -{ - CallParams callParams; - GetBasicParams(callParams); - - VmaMutexLock lock(m_FileMutex, m_UseMutex); - fprintf(m_File, "%u,%.3f,%u,vmaCreateAllocator\n", callParams.threadId, callParams.time, frameIndex); - Flush(); -} - -void VmaRecorder::RecordDestroyAllocator(uint32_t frameIndex) -{ - CallParams callParams; - GetBasicParams(callParams); - - VmaMutexLock lock(m_FileMutex, m_UseMutex); - fprintf(m_File, "%u,%.3f,%u,vmaDestroyAllocator\n", callParams.threadId, callParams.time, frameIndex); - Flush(); -} - -void VmaRecorder::RecordCreatePool(uint32_t frameIndex, const VmaPoolCreateInfo& createInfo, VmaPool pool) -{ - CallParams callParams; - GetBasicParams(callParams); - - VmaMutexLock lock(m_FileMutex, m_UseMutex); - fprintf(m_File, "%u,%.3f,%u,vmaCreatePool,%u,%u,%llu,%llu,%llu,%u,%p\n", callParams.threadId, callParams.time, frameIndex, - createInfo.memoryTypeIndex, - createInfo.flags, - createInfo.blockSize, - (uint64_t)createInfo.minBlockCount, - (uint64_t)createInfo.maxBlockCount, - createInfo.frameInUseCount, - pool); - Flush(); -} - -void VmaRecorder::RecordDestroyPool(uint32_t frameIndex, VmaPool pool) -{ - CallParams callParams; - GetBasicParams(callParams); - - VmaMutexLock lock(m_FileMutex, m_UseMutex); - fprintf(m_File, "%u,%.3f,%u,vmaDestroyPool,%p\n", callParams.threadId, callParams.time, frameIndex, - pool); - Flush(); -} - -void VmaRecorder::RecordAllocateMemory(uint32_t frameIndex, - const VkMemoryRequirements& vkMemReq, - const VmaAllocationCreateInfo& createInfo, - VmaAllocation allocation) -{ - CallParams callParams; - GetBasicParams(callParams); - - VmaMutexLock lock(m_FileMutex, m_UseMutex); - UserDataString userDataStr(createInfo.flags, createInfo.pUserData); - fprintf(m_File, "%u,%.3f,%u,vmaAllocateMemory,%llu,%llu,%u,%u,%u,%u,%u,%u,%p,%p,%s\n", callParams.threadId, callParams.time, frameIndex, - vkMemReq.size, - vkMemReq.alignment, - vkMemReq.memoryTypeBits, - createInfo.flags, - createInfo.usage, - createInfo.requiredFlags, - createInfo.preferredFlags, - createInfo.memoryTypeBits, - createInfo.pool, - allocation, - userDataStr.GetString()); - Flush(); -} - -void VmaRecorder::RecordAllocateMemoryPages(uint32_t frameIndex, - const VkMemoryRequirements& vkMemReq, - const VmaAllocationCreateInfo& createInfo, - uint64_t allocationCount, - const VmaAllocation* pAllocations) -{ - CallParams callParams; - GetBasicParams(callParams); - - VmaMutexLock lock(m_FileMutex, m_UseMutex); - UserDataString userDataStr(createInfo.flags, createInfo.pUserData); - fprintf(m_File, "%u,%.3f,%u,vmaAllocateMemoryPages,%llu,%llu,%u,%u,%u,%u,%u,%u,%p,", callParams.threadId, callParams.time, frameIndex, - vkMemReq.size, - vkMemReq.alignment, - vkMemReq.memoryTypeBits, - createInfo.flags, - createInfo.usage, - createInfo.requiredFlags, - createInfo.preferredFlags, - createInfo.memoryTypeBits, - createInfo.pool); - PrintPointerList(allocationCount, pAllocations); - fprintf(m_File, ",%s\n", userDataStr.GetString()); - Flush(); -} - -void VmaRecorder::RecordAllocateMemoryForBuffer(uint32_t frameIndex, - const VkMemoryRequirements& vkMemReq, - bool requiresDedicatedAllocation, - bool prefersDedicatedAllocation, - const VmaAllocationCreateInfo& createInfo, - VmaAllocation allocation) -{ - CallParams callParams; - GetBasicParams(callParams); - - VmaMutexLock lock(m_FileMutex, m_UseMutex); - UserDataString userDataStr(createInfo.flags, createInfo.pUserData); - fprintf(m_File, "%u,%.3f,%u,vmaAllocateMemoryForBuffer,%llu,%llu,%u,%u,%u,%u,%u,%u,%u,%u,%p,%p,%s\n", callParams.threadId, callParams.time, frameIndex, - vkMemReq.size, - vkMemReq.alignment, - vkMemReq.memoryTypeBits, - requiresDedicatedAllocation ? 1 : 0, - prefersDedicatedAllocation ? 1 : 0, - createInfo.flags, - createInfo.usage, - createInfo.requiredFlags, - createInfo.preferredFlags, - createInfo.memoryTypeBits, - createInfo.pool, - allocation, - userDataStr.GetString()); - Flush(); -} - -void VmaRecorder::RecordAllocateMemoryForImage(uint32_t frameIndex, - const VkMemoryRequirements& vkMemReq, - bool requiresDedicatedAllocation, - bool prefersDedicatedAllocation, - const VmaAllocationCreateInfo& createInfo, - VmaAllocation allocation) -{ - CallParams callParams; - GetBasicParams(callParams); - - VmaMutexLock lock(m_FileMutex, m_UseMutex); - UserDataString userDataStr(createInfo.flags, createInfo.pUserData); - fprintf(m_File, "%u,%.3f,%u,vmaAllocateMemoryForImage,%llu,%llu,%u,%u,%u,%u,%u,%u,%u,%u,%p,%p,%s\n", callParams.threadId, callParams.time, frameIndex, - vkMemReq.size, - vkMemReq.alignment, - vkMemReq.memoryTypeBits, - requiresDedicatedAllocation ? 1 : 0, - prefersDedicatedAllocation ? 1 : 0, - createInfo.flags, - createInfo.usage, - createInfo.requiredFlags, - createInfo.preferredFlags, - createInfo.memoryTypeBits, - createInfo.pool, - allocation, - userDataStr.GetString()); - Flush(); -} - -void VmaRecorder::RecordFreeMemory(uint32_t frameIndex, - VmaAllocation allocation) -{ - CallParams callParams; - GetBasicParams(callParams); - - VmaMutexLock lock(m_FileMutex, m_UseMutex); - fprintf(m_File, "%u,%.3f,%u,vmaFreeMemory,%p\n", callParams.threadId, callParams.time, frameIndex, - allocation); - Flush(); -} - -void VmaRecorder::RecordFreeMemoryPages(uint32_t frameIndex, - uint64_t allocationCount, - const VmaAllocation* pAllocations) -{ - CallParams callParams; - GetBasicParams(callParams); - - VmaMutexLock lock(m_FileMutex, m_UseMutex); - fprintf(m_File, "%u,%.3f,%u,vmaFreeMemoryPages,", callParams.threadId, callParams.time, frameIndex); - PrintPointerList(allocationCount, pAllocations); - fprintf(m_File, "\n"); - Flush(); -} - -void VmaRecorder::RecordSetAllocationUserData(uint32_t frameIndex, - VmaAllocation allocation, - const void* pUserData) -{ - CallParams callParams; - GetBasicParams(callParams); - - VmaMutexLock lock(m_FileMutex, m_UseMutex); - UserDataString userDataStr( - allocation->IsUserDataString() ? VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT : 0, - pUserData); - fprintf(m_File, "%u,%.3f,%u,vmaSetAllocationUserData,%p,%s\n", callParams.threadId, callParams.time, frameIndex, - allocation, - userDataStr.GetString()); - Flush(); -} - -void VmaRecorder::RecordCreateLostAllocation(uint32_t frameIndex, - VmaAllocation allocation) -{ - CallParams callParams; - GetBasicParams(callParams); - - VmaMutexLock lock(m_FileMutex, m_UseMutex); - fprintf(m_File, "%u,%.3f,%u,vmaCreateLostAllocation,%p\n", callParams.threadId, callParams.time, frameIndex, - allocation); - Flush(); -} - -void VmaRecorder::RecordMapMemory(uint32_t frameIndex, - VmaAllocation allocation) -{ - CallParams callParams; - GetBasicParams(callParams); - - VmaMutexLock lock(m_FileMutex, m_UseMutex); - fprintf(m_File, "%u,%.3f,%u,vmaMapMemory,%p\n", callParams.threadId, callParams.time, frameIndex, - allocation); - Flush(); -} - -void VmaRecorder::RecordUnmapMemory(uint32_t frameIndex, - VmaAllocation allocation) -{ - CallParams callParams; - GetBasicParams(callParams); - - VmaMutexLock lock(m_FileMutex, m_UseMutex); - fprintf(m_File, "%u,%.3f,%u,vmaUnmapMemory,%p\n", callParams.threadId, callParams.time, frameIndex, - allocation); - Flush(); -} - -void VmaRecorder::RecordFlushAllocation(uint32_t frameIndex, - VmaAllocation allocation, VkDeviceSize offset, VkDeviceSize size) -{ - CallParams callParams; - GetBasicParams(callParams); - - VmaMutexLock lock(m_FileMutex, m_UseMutex); - fprintf(m_File, "%u,%.3f,%u,vmaFlushAllocation,%p,%llu,%llu\n", callParams.threadId, callParams.time, frameIndex, - allocation, - offset, - size); - Flush(); -} - -void VmaRecorder::RecordInvalidateAllocation(uint32_t frameIndex, - VmaAllocation allocation, VkDeviceSize offset, VkDeviceSize size) -{ - CallParams callParams; - GetBasicParams(callParams); - - VmaMutexLock lock(m_FileMutex, m_UseMutex); - fprintf(m_File, "%u,%.3f,%u,vmaInvalidateAllocation,%p,%llu,%llu\n", callParams.threadId, callParams.time, frameIndex, - allocation, - offset, - size); - Flush(); -} - -void VmaRecorder::RecordCreateBuffer(uint32_t frameIndex, - const VkBufferCreateInfo& bufCreateInfo, - const VmaAllocationCreateInfo& allocCreateInfo, - VmaAllocation allocation) -{ - CallParams callParams; - GetBasicParams(callParams); - - VmaMutexLock lock(m_FileMutex, m_UseMutex); - UserDataString userDataStr(allocCreateInfo.flags, allocCreateInfo.pUserData); - fprintf(m_File, "%u,%.3f,%u,vmaCreateBuffer,%u,%llu,%u,%u,%u,%u,%u,%u,%u,%p,%p,%s\n", callParams.threadId, callParams.time, frameIndex, - bufCreateInfo.flags, - bufCreateInfo.size, - bufCreateInfo.usage, - bufCreateInfo.sharingMode, - allocCreateInfo.flags, - allocCreateInfo.usage, - allocCreateInfo.requiredFlags, - allocCreateInfo.preferredFlags, - allocCreateInfo.memoryTypeBits, - allocCreateInfo.pool, - allocation, - userDataStr.GetString()); - Flush(); -} - -void VmaRecorder::RecordCreateImage(uint32_t frameIndex, - const VkImageCreateInfo& imageCreateInfo, - const VmaAllocationCreateInfo& allocCreateInfo, - VmaAllocation allocation) -{ - CallParams callParams; - GetBasicParams(callParams); - - VmaMutexLock lock(m_FileMutex, m_UseMutex); - UserDataString userDataStr(allocCreateInfo.flags, allocCreateInfo.pUserData); - fprintf(m_File, "%u,%.3f,%u,vmaCreateImage,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%p,%p,%s\n", callParams.threadId, callParams.time, frameIndex, - imageCreateInfo.flags, - imageCreateInfo.imageType, - imageCreateInfo.format, - imageCreateInfo.extent.width, - imageCreateInfo.extent.height, - imageCreateInfo.extent.depth, - imageCreateInfo.mipLevels, - imageCreateInfo.arrayLayers, - imageCreateInfo.samples, - imageCreateInfo.tiling, - imageCreateInfo.usage, - imageCreateInfo.sharingMode, - imageCreateInfo.initialLayout, - allocCreateInfo.flags, - allocCreateInfo.usage, - allocCreateInfo.requiredFlags, - allocCreateInfo.preferredFlags, - allocCreateInfo.memoryTypeBits, - allocCreateInfo.pool, - allocation, - userDataStr.GetString()); - Flush(); -} - -void VmaRecorder::RecordDestroyBuffer(uint32_t frameIndex, - VmaAllocation allocation) -{ - CallParams callParams; - GetBasicParams(callParams); - - VmaMutexLock lock(m_FileMutex, m_UseMutex); - fprintf(m_File, "%u,%.3f,%u,vmaDestroyBuffer,%p\n", callParams.threadId, callParams.time, frameIndex, - allocation); - Flush(); -} - -void VmaRecorder::RecordDestroyImage(uint32_t frameIndex, - VmaAllocation allocation) -{ - CallParams callParams; - GetBasicParams(callParams); - - VmaMutexLock lock(m_FileMutex, m_UseMutex); - fprintf(m_File, "%u,%.3f,%u,vmaDestroyImage,%p\n", callParams.threadId, callParams.time, frameIndex, - allocation); - Flush(); -} - -void VmaRecorder::RecordTouchAllocation(uint32_t frameIndex, - VmaAllocation allocation) -{ - CallParams callParams; - GetBasicParams(callParams); - - VmaMutexLock lock(m_FileMutex, m_UseMutex); - fprintf(m_File, "%u,%.3f,%u,vmaTouchAllocation,%p\n", callParams.threadId, callParams.time, frameIndex, - allocation); - Flush(); -} - -void VmaRecorder::RecordGetAllocationInfo(uint32_t frameIndex, - VmaAllocation allocation) -{ - CallParams callParams; - GetBasicParams(callParams); - - VmaMutexLock lock(m_FileMutex, m_UseMutex); - fprintf(m_File, "%u,%.3f,%u,vmaGetAllocationInfo,%p\n", callParams.threadId, callParams.time, frameIndex, - allocation); - Flush(); -} - -void VmaRecorder::RecordMakePoolAllocationsLost(uint32_t frameIndex, - VmaPool pool) -{ - CallParams callParams; - GetBasicParams(callParams); - - VmaMutexLock lock(m_FileMutex, m_UseMutex); - fprintf(m_File, "%u,%.3f,%u,vmaMakePoolAllocationsLost,%p\n", callParams.threadId, callParams.time, frameIndex, - pool); - Flush(); -} - -void VmaRecorder::RecordDefragmentationBegin(uint32_t frameIndex, - const VmaDefragmentationInfo2& info, - VmaDefragmentationContext ctx) -{ - CallParams callParams; - GetBasicParams(callParams); - - VmaMutexLock lock(m_FileMutex, m_UseMutex); - fprintf(m_File, "%u,%.3f,%u,vmaDefragmentationBegin,%u,", callParams.threadId, callParams.time, frameIndex, - info.flags); - PrintPointerList(info.allocationCount, info.pAllocations); - fprintf(m_File, ","); - PrintPointerList(info.poolCount, info.pPools); - fprintf(m_File, ",%llu,%u,%llu,%u,%p,%p\n", - info.maxCpuBytesToMove, - info.maxCpuAllocationsToMove, - info.maxGpuBytesToMove, - info.maxGpuAllocationsToMove, - info.commandBuffer, - ctx); - Flush(); -} - -void VmaRecorder::RecordDefragmentationEnd(uint32_t frameIndex, - VmaDefragmentationContext ctx) -{ - CallParams callParams; - GetBasicParams(callParams); - - VmaMutexLock lock(m_FileMutex, m_UseMutex); - fprintf(m_File, "%u,%.3f,%u,vmaDefragmentationEnd,%p\n", callParams.threadId, callParams.time, frameIndex, - ctx); - Flush(); -} - -void VmaRecorder::RecordSetPoolName(uint32_t frameIndex, - VmaPool pool, - const char* name) -{ - CallParams callParams; - GetBasicParams(callParams); - - VmaMutexLock lock(m_FileMutex, m_UseMutex); - fprintf(m_File, "%u,%.3f,%u,vmaSetPoolName,%p,%s\n", callParams.threadId, callParams.time, frameIndex, - pool, name != VMA_NULL ? name : ""); - Flush(); -} - -VmaRecorder::UserDataString::UserDataString(VmaAllocationCreateFlags allocFlags, const void* pUserData) -{ - if(pUserData != VMA_NULL) - { - if((allocFlags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0) - { - m_Str = (const char*)pUserData; - } else { - // If VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT is not specified, convert the string's memory address to a string and store it. - snprintf(m_PtrStr, 17, "%p", pUserData); - m_Str = m_PtrStr; + switch (vectorState.operation) + { + case StateExtensive::Operation::FindFreeBlockBuffer: + vectorState.operation = StateExtensive::Operation::MoveBuffers; + break; + case StateExtensive::Operation::FindFreeBlockTexture: + vectorState.operation = StateExtensive::Operation::MoveTextures; + break; + case StateExtensive::Operation::FindFreeBlockAll: + vectorState.operation = StateExtensive::Operation::MoveAll; + break; + default: + VMA_ASSERT(0); + vectorState.operation = StateExtensive::Operation::MoveTextures; + } + vectorState.firstFreeBlock = last; + // Nothing done, block found without reallocations, can perform another reallocs in same pass + return ComputeDefragmentation_Extensive(vector, index); } + break; + } + case StateExtensive::Operation::MoveTextures: + { + if (MoveDataToFreeBlocks(VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL, vector, + vectorState.firstFreeBlock, texturePresent, bufferPresent, otherPresent)) + { + if (texturePresent) + { + vectorState.operation = StateExtensive::Operation::FindFreeBlockTexture; + return ComputeDefragmentation_Extensive(vector, index); + } + + if (!bufferPresent && !otherPresent) + { + vectorState.operation = StateExtensive::Operation::Cleanup; + break; + } + + // No more textures to move, check buffers + vectorState.operation = StateExtensive::Operation::MoveBuffers; + bufferPresent = false; + otherPresent = false; + } + else + break; + VMA_FALLTHROUGH; // Fallthrough + } + case StateExtensive::Operation::MoveBuffers: + { + if (MoveDataToFreeBlocks(VMA_SUBALLOCATION_TYPE_BUFFER, vector, + vectorState.firstFreeBlock, texturePresent, bufferPresent, otherPresent)) + { + if (bufferPresent) + { + vectorState.operation = StateExtensive::Operation::FindFreeBlockBuffer; + return ComputeDefragmentation_Extensive(vector, index); + } + + if (!otherPresent) + { + vectorState.operation = StateExtensive::Operation::Cleanup; + break; + } + + // No more buffers to move, check all others + vectorState.operation = StateExtensive::Operation::MoveAll; + otherPresent = false; + } + else + break; + VMA_FALLTHROUGH; // Fallthrough + } + case StateExtensive::Operation::MoveAll: + { + if (MoveDataToFreeBlocks(VMA_SUBALLOCATION_TYPE_FREE, vector, + vectorState.firstFreeBlock, texturePresent, bufferPresent, otherPresent)) + { + if (otherPresent) + { + vectorState.operation = StateExtensive::Operation::FindFreeBlockBuffer; + return ComputeDefragmentation_Extensive(vector, index); + } + // Everything moved + vectorState.operation = StateExtensive::Operation::Cleanup; + } + break; + } + case StateExtensive::Operation::Cleanup: + // Cleanup is handled below so that other operations may reuse the cleanup code. This case is here to prevent the unhandled enum value warning (C4062). + break; + } + + if (vectorState.operation == StateExtensive::Operation::Cleanup) + { + // All other work done, pack data in blocks even tighter if possible + const size_t prevMoveCount = m_Moves.size(); + for (size_t i = 0; i < vector.GetBlockCount(); ++i) + { + if (ReallocWithinBlock(vector, vector.GetBlock(i))) + return true; + } + + if (prevMoveCount == m_Moves.size()) + vectorState.operation = StateExtensive::Operation::Done; + } + return false; +} + +void VmaDefragmentationContext_T::UpdateVectorStatistics(VmaBlockVector& vector, StateBalanced& state) +{ + size_t allocCount = 0; + size_t freeCount = 0; + state.avgFreeSize = 0; + state.avgAllocSize = 0; + + for (size_t i = 0; i < vector.GetBlockCount(); ++i) + { + VmaBlockMetadata* metadata = vector.GetBlock(i)->m_pMetadata; + + allocCount += metadata->GetAllocationCount(); + freeCount += metadata->GetFreeRegionsCount(); + state.avgFreeSize += metadata->GetSumFreeSize(); + state.avgAllocSize += metadata->GetSize(); + } + + state.avgAllocSize = (state.avgAllocSize - state.avgFreeSize) / allocCount; + state.avgFreeSize /= freeCount; +} + +bool VmaDefragmentationContext_T::MoveDataToFreeBlocks(VmaSuballocationType currentType, + VmaBlockVector& vector, size_t firstFreeBlock, + bool& texturePresent, bool& bufferPresent, bool& otherPresent) +{ + const size_t prevMoveCount = m_Moves.size(); + for (size_t i = firstFreeBlock ; i;) + { + VmaDeviceMemoryBlock* block = vector.GetBlock(--i); + VmaBlockMetadata* metadata = block->m_pMetadata; + + for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); + handle != VK_NULL_HANDLE; + handle = metadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.srcAllocation->GetUserData() == this) + continue; + switch (CheckCounters(moveData.move.srcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + case CounterStatus::Pass: + break; + default: + VMA_ASSERT(0); + } + + // Move only single type of resources at once + if (!VmaIsBufferImageGranularityConflict(moveData.type, currentType)) + { + // Try to fit allocation into free blocks + if (AllocInOtherBlock(firstFreeBlock, vector.GetBlockCount(), moveData, vector)) + return false; + } + + if (!VmaIsBufferImageGranularityConflict(moveData.type, VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL)) + texturePresent = true; + else if (!VmaIsBufferImageGranularityConflict(moveData.type, VMA_SUBALLOCATION_TYPE_BUFFER)) + bufferPresent = true; + else + otherPresent = true; + } + } + return prevMoveCount == m_Moves.size(); +} +#endif // _VMA_DEFRAGMENTATION_CONTEXT_FUNCTIONS + +#ifndef _VMA_POOL_T_FUNCTIONS +VmaPool_T::VmaPool_T( + VmaAllocator hAllocator, + const VmaPoolCreateInfo& createInfo, + VkDeviceSize preferredBlockSize) + : m_BlockVector( + hAllocator, + this, // hParentPool + createInfo.memoryTypeIndex, + createInfo.blockSize != 0 ? createInfo.blockSize : preferredBlockSize, + createInfo.minBlockCount, + createInfo.maxBlockCount, + (createInfo.flags& VMA_POOL_CREATE_IGNORE_BUFFER_IMAGE_GRANULARITY_BIT) != 0 ? 1 : hAllocator->GetBufferImageGranularity(), + createInfo.blockSize != 0, // explicitBlockSize + createInfo.flags & VMA_POOL_CREATE_ALGORITHM_MASK, // algorithm + createInfo.priority, + VMA_MAX(hAllocator->GetMemoryTypeMinAlignment(createInfo.memoryTypeIndex), createInfo.minAllocationAlignment), + createInfo.pMemoryAllocateNext), + m_Id(0), + m_Name(VMA_NULL) {} + +VmaPool_T::~VmaPool_T() +{ + VMA_ASSERT(m_PrevPool == VMA_NULL && m_NextPool == VMA_NULL); + + const VkAllocationCallbacks* allocs = m_BlockVector.GetAllocator()->GetAllocationCallbacks(); + VmaFreeString(allocs, m_Name); +} + +void VmaPool_T::SetName(const char* pName) +{ + const VkAllocationCallbacks* allocs = m_BlockVector.GetAllocator()->GetAllocationCallbacks(); + VmaFreeString(allocs, m_Name); + + if (pName != VMA_NULL) + { + m_Name = VmaCreateStringCopy(allocs, pName); } else { - m_Str = ""; + m_Name = VMA_NULL; } } +#endif // _VMA_POOL_T_FUNCTIONS -void VmaRecorder::WriteConfiguration( - const VkPhysicalDeviceProperties& devProps, - const VkPhysicalDeviceMemoryProperties& memProps, - uint32_t vulkanApiVersion, - bool dedicatedAllocationExtensionEnabled, - bool bindMemory2ExtensionEnabled, - bool memoryBudgetExtensionEnabled, - bool deviceCoherentMemoryExtensionEnabled) -{ - fprintf(m_File, "Config,Begin\n"); - - fprintf(m_File, "VulkanApiVersion,%u,%u\n", VK_VERSION_MAJOR(vulkanApiVersion), VK_VERSION_MINOR(vulkanApiVersion)); - - fprintf(m_File, "PhysicalDevice,apiVersion,%u\n", devProps.apiVersion); - fprintf(m_File, "PhysicalDevice,driverVersion,%u\n", devProps.driverVersion); - fprintf(m_File, "PhysicalDevice,vendorID,%u\n", devProps.vendorID); - fprintf(m_File, "PhysicalDevice,deviceID,%u\n", devProps.deviceID); - fprintf(m_File, "PhysicalDevice,deviceType,%u\n", devProps.deviceType); - fprintf(m_File, "PhysicalDevice,deviceName,%s\n", devProps.deviceName); - - fprintf(m_File, "PhysicalDeviceLimits,maxMemoryAllocationCount,%u\n", devProps.limits.maxMemoryAllocationCount); - fprintf(m_File, "PhysicalDeviceLimits,bufferImageGranularity,%llu\n", devProps.limits.bufferImageGranularity); - fprintf(m_File, "PhysicalDeviceLimits,nonCoherentAtomSize,%llu\n", devProps.limits.nonCoherentAtomSize); - - fprintf(m_File, "PhysicalDeviceMemory,HeapCount,%u\n", memProps.memoryHeapCount); - for(uint32_t i = 0; i < memProps.memoryHeapCount; ++i) - { - fprintf(m_File, "PhysicalDeviceMemory,Heap,%u,size,%llu\n", i, memProps.memoryHeaps[i].size); - fprintf(m_File, "PhysicalDeviceMemory,Heap,%u,flags,%u\n", i, memProps.memoryHeaps[i].flags); - } - fprintf(m_File, "PhysicalDeviceMemory,TypeCount,%u\n", memProps.memoryTypeCount); - for(uint32_t i = 0; i < memProps.memoryTypeCount; ++i) - { - fprintf(m_File, "PhysicalDeviceMemory,Type,%u,heapIndex,%u\n", i, memProps.memoryTypes[i].heapIndex); - fprintf(m_File, "PhysicalDeviceMemory,Type,%u,propertyFlags,%u\n", i, memProps.memoryTypes[i].propertyFlags); - } - - fprintf(m_File, "Extension,VK_KHR_dedicated_allocation,%u\n", dedicatedAllocationExtensionEnabled ? 1 : 0); - fprintf(m_File, "Extension,VK_KHR_bind_memory2,%u\n", bindMemory2ExtensionEnabled ? 1 : 0); - fprintf(m_File, "Extension,VK_EXT_memory_budget,%u\n", memoryBudgetExtensionEnabled ? 1 : 0); - fprintf(m_File, "Extension,VK_AMD_device_coherent_memory,%u\n", deviceCoherentMemoryExtensionEnabled ? 1 : 0); - - fprintf(m_File, "Macro,VMA_DEBUG_ALWAYS_DEDICATED_MEMORY,%u\n", VMA_DEBUG_ALWAYS_DEDICATED_MEMORY ? 1 : 0); - fprintf(m_File, "Macro,VMA_DEBUG_ALIGNMENT,%llu\n", (VkDeviceSize)VMA_DEBUG_ALIGNMENT); - fprintf(m_File, "Macro,VMA_DEBUG_MARGIN,%llu\n", (VkDeviceSize)VMA_DEBUG_MARGIN); - fprintf(m_File, "Macro,VMA_DEBUG_INITIALIZE_ALLOCATIONS,%u\n", VMA_DEBUG_INITIALIZE_ALLOCATIONS ? 1 : 0); - fprintf(m_File, "Macro,VMA_DEBUG_DETECT_CORRUPTION,%u\n", VMA_DEBUG_DETECT_CORRUPTION ? 1 : 0); - fprintf(m_File, "Macro,VMA_DEBUG_GLOBAL_MUTEX,%u\n", VMA_DEBUG_GLOBAL_MUTEX ? 1 : 0); - fprintf(m_File, "Macro,VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY,%llu\n", (VkDeviceSize)VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY); - fprintf(m_File, "Macro,VMA_SMALL_HEAP_MAX_SIZE,%llu\n", (VkDeviceSize)VMA_SMALL_HEAP_MAX_SIZE); - fprintf(m_File, "Macro,VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE,%llu\n", (VkDeviceSize)VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE); - - fprintf(m_File, "Config,End\n"); -} - -void VmaRecorder::GetBasicParams(CallParams& outParams) -{ - #if defined(_WIN32) - outParams.threadId = GetCurrentThreadId(); - #else - // Use C++11 features to get thread id and convert it to uint32_t. - // There is room for optimization since sstream is quite slow. - // Is there a better way to convert std::this_thread::get_id() to uint32_t? - std::thread::id thread_id = std::this_thread::get_id(); - std::stringstream thread_id_to_string_converter; - thread_id_to_string_converter << thread_id; - std::string thread_id_as_string = thread_id_to_string_converter.str(); - outParams.threadId = static_cast(std::stoi(thread_id_as_string.c_str())); - #endif - - auto current_time = std::chrono::high_resolution_clock::now(); - - outParams.time = std::chrono::duration(current_time - m_RecordingStartTime).count(); -} - -void VmaRecorder::PrintPointerList(uint64_t count, const VmaAllocation* pItems) -{ - if(count) - { - fprintf(m_File, "%p", pItems[0]); - for(uint64_t i = 1; i < count; ++i) - { - fprintf(m_File, " %p", pItems[i]); - } - } -} - -void VmaRecorder::Flush() -{ - if((m_Flags & VMA_RECORD_FLUSH_AFTER_CALL_BIT) != 0) - { - fflush(m_File); - } -} - -#endif // #if VMA_RECORDING_ENABLED - -//////////////////////////////////////////////////////////////////////////////// -// VmaAllocationObjectAllocator - -VmaAllocationObjectAllocator::VmaAllocationObjectAllocator(const VkAllocationCallbacks* pAllocationCallbacks) : - m_Allocator(pAllocationCallbacks, 1024) -{ -} - -template VmaAllocation VmaAllocationObjectAllocator::Allocate(Types... args) -{ - VmaMutexLock mutexLock(m_Mutex); - return m_Allocator.Alloc(std::forward(args)...); -} - -void VmaAllocationObjectAllocator::Free(VmaAllocation hAlloc) -{ - VmaMutexLock mutexLock(m_Mutex); - m_Allocator.Free(hAlloc); -} - -//////////////////////////////////////////////////////////////////////////////// -// VmaAllocator_T - +#ifndef _VMA_ALLOCATOR_T_FUNCTIONS VmaAllocator_T::VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo) : m_UseMutex((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT) == 0), m_VulkanApiVersion(pCreateInfo->vulkanApiVersion != 0 ? pCreateInfo->vulkanApiVersion : VK_API_VERSION_1_0), @@ -15722,6 +12709,8 @@ VmaAllocator_T::VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo) : m_UseAmdDeviceCoherentMemory((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT) != 0), m_UseKhrBufferDeviceAddress((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT) != 0), m_UseExtMemoryPriority((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT) != 0), + m_UseKhrMaintenance4((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT) != 0), + m_UseKhrMaintenance5((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT) != 0), m_hDevice(pCreateInfo->device), m_hInstance(pCreateInfo->instance), m_AllocationCallbacksSpecified(pCreateInfo->pAllocationCallbacks != VMA_NULL), @@ -15729,16 +12718,12 @@ VmaAllocator_T::VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo) : *pCreateInfo->pAllocationCallbacks : VmaEmptyAllocationCallbacks), m_AllocationObjectAllocator(&m_AllocationCallbacks), m_HeapSizeLimitMask(0), + m_DeviceMemoryCount(0), m_PreferredLargeHeapBlockSize(0), m_PhysicalDevice(pCreateInfo->physicalDevice), - m_CurrentFrameIndex(0), m_GpuDefragmentationMemoryTypeBits(UINT32_MAX), - m_Pools(VmaStlAllocator(GetAllocationCallbacks())), m_NextPoolId(0), m_GlobalMemoryTypeBits(UINT32_MAX) -#if VMA_RECORDING_ENABLED - ,m_pRecorder(VMA_NULL) -#endif { if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) { @@ -15781,6 +12766,12 @@ VmaAllocator_T::VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo) : VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT is set but required extension or Vulkan 1.2 is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); } #endif +#if VMA_VULKAN_VERSION < 1003000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 3, 0)) + { + VMA_ASSERT(0 && "vulkanApiVersion >= VK_API_VERSION_1_3 but required Vulkan version is disabled by preprocessor macros."); + } +#endif #if VMA_VULKAN_VERSION < 1002000 if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 2, 0)) { @@ -15799,15 +12790,30 @@ VmaAllocator_T::VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo) : VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); } #endif +#if !(VMA_KHR_MAINTENANCE4) + if(m_UseKhrMaintenance4) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); + } +#endif +#if !(VMA_KHR_MAINTENANCE5) + if(m_UseKhrMaintenance5) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); + } +#endif memset(&m_DeviceMemoryCallbacks, 0 ,sizeof(m_DeviceMemoryCallbacks)); memset(&m_PhysicalDeviceProperties, 0, sizeof(m_PhysicalDeviceProperties)); memset(&m_MemProps, 0, sizeof(m_MemProps)); memset(&m_pBlockVectors, 0, sizeof(m_pBlockVectors)); - memset(&m_pDedicatedAllocations, 0, sizeof(m_pDedicatedAllocations)); memset(&m_VulkanFunctions, 0, sizeof(m_VulkanFunctions)); +#if VMA_EXTERNAL_MEMORY + memset(&m_TypeExternalMemoryHandleTypes, 0, sizeof(m_TypeExternalMemoryHandleTypes)); +#endif // #if VMA_EXTERNAL_MEMORY + if(pCreateInfo->pDeviceMemoryCallbacks != VMA_NULL) { m_DeviceMemoryCallbacks.pUserData = pCreateInfo->pDeviceMemoryCallbacks->pUserData; @@ -15820,7 +12826,7 @@ VmaAllocator_T::VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo) : (*m_VulkanFunctions.vkGetPhysicalDeviceProperties)(m_PhysicalDevice, &m_PhysicalDeviceProperties); (*m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties)(m_PhysicalDevice, &m_MemProps); - VMA_ASSERT(VmaIsPow2(VMA_DEBUG_ALIGNMENT)); + VMA_ASSERT(VmaIsPow2(VMA_MIN_ALIGNMENT)); VMA_ASSERT(VmaIsPow2(VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY)); VMA_ASSERT(VmaIsPow2(m_PhysicalDeviceProperties.limits.bufferImageGranularity)); VMA_ASSERT(VmaIsPow2(m_PhysicalDeviceProperties.limits.nonCoherentAtomSize)); @@ -15830,6 +12836,14 @@ VmaAllocator_T::VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo) : m_GlobalMemoryTypeBits = CalculateGlobalMemoryTypeBits(); +#if VMA_EXTERNAL_MEMORY + if(pCreateInfo->pTypeExternalMemoryHandleTypes != VMA_NULL) + { + memcpy(m_TypeExternalMemoryHandleTypes, pCreateInfo->pTypeExternalMemoryHandleTypes, + sizeof(VkExternalMemoryHandleTypeFlagsKHR) * GetMemoryTypeCount()); + } +#endif // #if VMA_EXTERNAL_MEMORY + if(pCreateInfo->pHeapSizeLimit != VMA_NULL) { for(uint32_t heapIndex = 0; heapIndex < GetMemoryHeapCount(); ++heapIndex) @@ -15848,24 +12862,26 @@ VmaAllocator_T::VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo) : for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) { - const VkDeviceSize preferredBlockSize = CalcPreferredBlockSize(memTypeIndex); - - m_pBlockVectors[memTypeIndex] = vma_new(this, VmaBlockVector)( - this, - VK_NULL_HANDLE, // hParentPool - memTypeIndex, - preferredBlockSize, - 0, - SIZE_MAX, - GetBufferImageGranularity(), - pCreateInfo->frameInUseCount, - false, // explicitBlockSize - false, // linearAlgorithm - 0.5f); // priority (0.5 is the default per Vulkan spec) - // No need to call m_pBlockVectors[memTypeIndex][blockVectorTypeIndex]->CreateMinBlocks here, - // becase minBlockCount is 0. - m_pDedicatedAllocations[memTypeIndex] = vma_new(this, AllocationVectorType)(VmaStlAllocator(GetAllocationCallbacks())); - + // Create only supported types + if((m_GlobalMemoryTypeBits & (1u << memTypeIndex)) != 0) + { + const VkDeviceSize preferredBlockSize = CalcPreferredBlockSize(memTypeIndex); + m_pBlockVectors[memTypeIndex] = vma_new(this, VmaBlockVector)( + this, + VK_NULL_HANDLE, // hParentPool + memTypeIndex, + preferredBlockSize, + 0, + SIZE_MAX, + GetBufferImageGranularity(), + false, // explicitBlockSize + 0, // algorithm + 0.5f, // priority (0.5 is the default per Vulkan spec) + GetMemoryTypeMinAlignment(memTypeIndex), // minAllocationAlignment + VMA_NULL); // // pMemoryAllocateNext + // No need to call m_pBlockVectors[memTypeIndex][blockVectorTypeIndex]->CreateMinBlocks here, + // because minBlockCount is 0. + } } } @@ -15873,31 +12889,6 @@ VkResult VmaAllocator_T::Init(const VmaAllocatorCreateInfo* pCreateInfo) { VkResult res = VK_SUCCESS; - if(pCreateInfo->pRecordSettings != VMA_NULL && - !VmaStrIsEmpty(pCreateInfo->pRecordSettings->pFilePath)) - { -#if VMA_RECORDING_ENABLED - m_pRecorder = vma_new(this, VmaRecorder)(); - res = m_pRecorder->Init(*pCreateInfo->pRecordSettings, m_UseMutex); - if(res != VK_SUCCESS) - { - return res; - } - m_pRecorder->WriteConfiguration( - m_PhysicalDeviceProperties, - m_MemProps, - m_VulkanApiVersion, - m_UseKhrDedicatedAllocation, - m_UseKhrBindMemory2, - m_UseExtMemoryBudget, - m_UseAmdDeviceCoherentMemory); - m_pRecorder->RecordCreateAllocator(GetCurrentFrameIndex()); -#else - VMA_ASSERT(0 && "VmaAllocatorCreateInfo::pRecordSettings used, but not supported due to VMA_RECORDING_ENABLED not defined to 1."); - return VK_ERROR_FEATURE_NOT_PRESENT; -#endif - } - #if VMA_MEMORY_BUDGET if(m_UseExtMemoryBudget) { @@ -15910,25 +12901,11 @@ VkResult VmaAllocator_T::Init(const VmaAllocatorCreateInfo* pCreateInfo) VmaAllocator_T::~VmaAllocator_T() { -#if VMA_RECORDING_ENABLED - if(m_pRecorder != VMA_NULL) + VMA_ASSERT(m_Pools.IsEmpty()); + + for(size_t memTypeIndex = GetMemoryTypeCount(); memTypeIndex--; ) { - m_pRecorder->RecordDestroyAllocator(GetCurrentFrameIndex()); - vma_delete(this, m_pRecorder); - } -#endif - - VMA_ASSERT(m_Pools.empty()); - - for(size_t i = GetMemoryTypeCount(); i--; ) - { - if(m_pDedicatedAllocations[i] != VMA_NULL && !m_pDedicatedAllocations[i]->empty()) - { - VMA_ASSERT(0 && "Unfreed dedicated allocations found."); - } - - vma_delete(this, m_pDedicatedAllocations[i]); - vma_delete(this, m_pBlockVectors[i]); + vma_delete(this, m_pBlockVectors[memTypeIndex]); } } @@ -15955,6 +12932,8 @@ void VmaAllocator_T::ImportVulkanFunctions(const VmaVulkanFunctions* pVulkanFunc void VmaAllocator_T::ImportVulkanFunctions_Static() { // Vulkan 1.0 + m_VulkanFunctions.vkGetInstanceProcAddr = (PFN_vkGetInstanceProcAddr)vkGetInstanceProcAddr; + m_VulkanFunctions.vkGetDeviceProcAddr = (PFN_vkGetDeviceProcAddr)vkGetDeviceProcAddr; m_VulkanFunctions.vkGetPhysicalDeviceProperties = (PFN_vkGetPhysicalDeviceProperties)vkGetPhysicalDeviceProperties; m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties = (PFN_vkGetPhysicalDeviceMemoryProperties)vkGetPhysicalDeviceMemoryProperties; m_VulkanFunctions.vkAllocateMemory = (PFN_vkAllocateMemory)vkAllocateMemory; @@ -15981,12 +12960,26 @@ void VmaAllocator_T::ImportVulkanFunctions_Static() m_VulkanFunctions.vkGetImageMemoryRequirements2KHR = (PFN_vkGetImageMemoryRequirements2)vkGetImageMemoryRequirements2; m_VulkanFunctions.vkBindBufferMemory2KHR = (PFN_vkBindBufferMemory2)vkBindBufferMemory2; m_VulkanFunctions.vkBindImageMemory2KHR = (PFN_vkBindImageMemory2)vkBindImageMemory2; + } +#endif + +#if VMA_VULKAN_VERSION >= 1001000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties2KHR = (PFN_vkGetPhysicalDeviceMemoryProperties2)vkGetPhysicalDeviceMemoryProperties2; } #endif + +#if VMA_VULKAN_VERSION >= 1003000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 3, 0)) + { + m_VulkanFunctions.vkGetDeviceBufferMemoryRequirements = (PFN_vkGetDeviceBufferMemoryRequirements)vkGetDeviceBufferMemoryRequirements; + m_VulkanFunctions.vkGetDeviceImageMemoryRequirements = (PFN_vkGetDeviceImageMemoryRequirements)vkGetDeviceImageMemoryRequirements; + } +#endif } -#endif // #if VMA_STATIC_VULKAN_FUNCTIONS == 1 +#endif // VMA_STATIC_VULKAN_FUNCTIONS == 1 void VmaAllocator_T::ImportVulkanFunctions_Custom(const VmaVulkanFunctions* pVulkanFunctions) { @@ -15995,6 +12988,8 @@ void VmaAllocator_T::ImportVulkanFunctions_Custom(const VmaVulkanFunctions* pVul #define VMA_COPY_IF_NOT_NULL(funcName) \ if(pVulkanFunctions->funcName != VMA_NULL) m_VulkanFunctions.funcName = pVulkanFunctions->funcName; + VMA_COPY_IF_NOT_NULL(vkGetInstanceProcAddr); + VMA_COPY_IF_NOT_NULL(vkGetDeviceProcAddr); VMA_COPY_IF_NOT_NULL(vkGetPhysicalDeviceProperties); VMA_COPY_IF_NOT_NULL(vkGetPhysicalDeviceMemoryProperties); VMA_COPY_IF_NOT_NULL(vkAllocateMemory); @@ -16023,10 +13018,15 @@ void VmaAllocator_T::ImportVulkanFunctions_Custom(const VmaVulkanFunctions* pVul VMA_COPY_IF_NOT_NULL(vkBindImageMemory2KHR); #endif -#if VMA_MEMORY_BUDGET +#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000 VMA_COPY_IF_NOT_NULL(vkGetPhysicalDeviceMemoryProperties2KHR); #endif +#if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 + VMA_COPY_IF_NOT_NULL(vkGetDeviceBufferMemoryRequirements); + VMA_COPY_IF_NOT_NULL(vkGetDeviceImageMemoryRequirements); +#endif + #undef VMA_COPY_IF_NOT_NULL } @@ -16034,14 +13034,19 @@ void VmaAllocator_T::ImportVulkanFunctions_Custom(const VmaVulkanFunctions* pVul void VmaAllocator_T::ImportVulkanFunctions_Dynamic() { + VMA_ASSERT(m_VulkanFunctions.vkGetInstanceProcAddr && m_VulkanFunctions.vkGetDeviceProcAddr && + "To use VMA_DYNAMIC_VULKAN_FUNCTIONS in new versions of VMA you now have to pass " + "VmaVulkanFunctions::vkGetInstanceProcAddr and vkGetDeviceProcAddr as VmaAllocatorCreateInfo::pVulkanFunctions. " + "Other members can be null."); + #define VMA_FETCH_INSTANCE_FUNC(memberName, functionPointerType, functionNameString) \ if(m_VulkanFunctions.memberName == VMA_NULL) \ m_VulkanFunctions.memberName = \ - (functionPointerType)vkGetInstanceProcAddr(m_hInstance, functionNameString); + (functionPointerType)m_VulkanFunctions.vkGetInstanceProcAddr(m_hInstance, functionNameString); #define VMA_FETCH_DEVICE_FUNC(memberName, functionPointerType, functionNameString) \ if(m_VulkanFunctions.memberName == VMA_NULL) \ m_VulkanFunctions.memberName = \ - (functionPointerType)vkGetDeviceProcAddr(m_hDevice, functionNameString); + (functionPointerType)m_VulkanFunctions.vkGetDeviceProcAddr(m_hDevice, functionNameString); VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceProperties, PFN_vkGetPhysicalDeviceProperties, "vkGetPhysicalDeviceProperties"); VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties, PFN_vkGetPhysicalDeviceMemoryProperties, "vkGetPhysicalDeviceMemoryProperties"); @@ -16068,7 +13073,17 @@ void VmaAllocator_T::ImportVulkanFunctions_Dynamic() VMA_FETCH_DEVICE_FUNC(vkGetImageMemoryRequirements2KHR, PFN_vkGetImageMemoryRequirements2, "vkGetImageMemoryRequirements2"); VMA_FETCH_DEVICE_FUNC(vkBindBufferMemory2KHR, PFN_vkBindBufferMemory2, "vkBindBufferMemory2"); VMA_FETCH_DEVICE_FUNC(vkBindImageMemory2KHR, PFN_vkBindImageMemory2, "vkBindImageMemory2"); - VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2, "vkGetPhysicalDeviceMemoryProperties2"); + } +#endif + +#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2"); + } + else if(m_UseExtMemoryBudget) + { + VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2KHR"); } #endif @@ -16088,18 +13103,37 @@ void VmaAllocator_T::ImportVulkanFunctions_Dynamic() } #endif // #if VMA_BIND_MEMORY2 -#if VMA_MEMORY_BUDGET - if(m_UseExtMemoryBudget) +#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2"); + } + else if(m_UseExtMemoryBudget) { VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2KHR"); } #endif // #if VMA_MEMORY_BUDGET +#if VMA_VULKAN_VERSION >= 1003000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 3, 0)) + { + VMA_FETCH_DEVICE_FUNC(vkGetDeviceBufferMemoryRequirements, PFN_vkGetDeviceBufferMemoryRequirements, "vkGetDeviceBufferMemoryRequirements"); + VMA_FETCH_DEVICE_FUNC(vkGetDeviceImageMemoryRequirements, PFN_vkGetDeviceImageMemoryRequirements, "vkGetDeviceImageMemoryRequirements"); + } +#endif +#if VMA_KHR_MAINTENANCE4 + if(m_UseKhrMaintenance4) + { + VMA_FETCH_DEVICE_FUNC(vkGetDeviceBufferMemoryRequirements, PFN_vkGetDeviceBufferMemoryRequirementsKHR, "vkGetDeviceBufferMemoryRequirementsKHR"); + VMA_FETCH_DEVICE_FUNC(vkGetDeviceImageMemoryRequirements, PFN_vkGetDeviceImageMemoryRequirementsKHR, "vkGetDeviceImageMemoryRequirementsKHR"); + } +#endif + #undef VMA_FETCH_DEVICE_FUNC #undef VMA_FETCH_INSTANCE_FUNC } -#endif // #if VMA_DYNAMIC_VULKAN_FUNCTIONS == 1 +#endif // VMA_DYNAMIC_VULKAN_FUNCTIONS == 1 void VmaAllocator_T::ValidateVulkanFunctions() { @@ -16143,6 +13177,13 @@ void VmaAllocator_T::ValidateVulkanFunctions() VMA_ASSERT(m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties2KHR != VMA_NULL); } #endif + + // Not validating these due to suspected driver bugs with these function + // pointers being null despite correct extension or Vulkan version is enabled. + // See issue #397. Their usage in VMA is optional anyway. + // + // VMA_ASSERT(m_VulkanFunctions.vkGetDeviceBufferMemoryRequirements != VMA_NULL); + // VMA_ASSERT(m_VulkanFunctions.vkGetDeviceImageMemoryRequirements != VMA_NULL); } VkDeviceSize VmaAllocator_T::CalcPreferredBlockSize(uint32_t memTypeIndex) @@ -16154,80 +13195,108 @@ VkDeviceSize VmaAllocator_T::CalcPreferredBlockSize(uint32_t memTypeIndex) } VkResult VmaAllocator_T::AllocateMemoryOfType( + VmaPool pool, VkDeviceSize size, VkDeviceSize alignment, - bool dedicatedAllocation, + bool dedicatedPreferred, VkBuffer dedicatedBuffer, - VkBufferUsageFlags dedicatedBufferUsage, VkImage dedicatedImage, + VmaBufferImageUsage dedicatedBufferImageUsage, const VmaAllocationCreateInfo& createInfo, uint32_t memTypeIndex, VmaSuballocationType suballocType, + VmaDedicatedAllocationList& dedicatedAllocations, + VmaBlockVector& blockVector, size_t allocationCount, VmaAllocation* pAllocations) { VMA_ASSERT(pAllocations != VMA_NULL); - VMA_DEBUG_LOG(" AllocateMemory: MemoryTypeIndex=%u, AllocationCount=%zu, Size=%llu", memTypeIndex, allocationCount, size); + VMA_DEBUG_LOG_FORMAT(" AllocateMemory: MemoryTypeIndex=%" PRIu32 ", AllocationCount=%zu, Size=%" PRIu64, memTypeIndex, allocationCount, size); VmaAllocationCreateInfo finalCreateInfo = createInfo; - - // If memory type is not HOST_VISIBLE, disable MAPPED. - if((finalCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0 && - (m_MemProps.memoryTypes[memTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0) - { - finalCreateInfo.flags &= ~VMA_ALLOCATION_CREATE_MAPPED_BIT; - } - // If memory is lazily allocated, it should be always dedicated. - if(finalCreateInfo.usage == VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED) - { - finalCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; - } - - VmaBlockVector* const blockVector = m_pBlockVectors[memTypeIndex]; - VMA_ASSERT(blockVector); - - const VkDeviceSize preferredBlockSize = blockVector->GetPreferredBlockSize(); - bool preferDedicatedMemory = - VMA_DEBUG_ALWAYS_DEDICATED_MEMORY || - dedicatedAllocation || - // Heuristics: Allocate dedicated memory if requested size if greater than half of preferred block size. - size > preferredBlockSize / 2; - - if(preferDedicatedMemory && - (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) == 0 && - finalCreateInfo.pool == VK_NULL_HANDLE) - { - finalCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; - } + VkResult res = CalcMemTypeParams( + finalCreateInfo, + memTypeIndex, + size, + allocationCount); + if(res != VK_SUCCESS) + return res; if((finalCreateInfo.flags & VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT) != 0) { - if((finalCreateInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) != 0) - { - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - } - else - { - return AllocateDedicatedMemory( - size, - suballocType, - memTypeIndex, - (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT) != 0, - (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0, - (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0, - finalCreateInfo.pUserData, - finalCreateInfo.priority, - dedicatedBuffer, - dedicatedBufferUsage, - dedicatedImage, - allocationCount, - pAllocations); - } + return AllocateDedicatedMemory( + pool, + size, + suballocType, + dedicatedAllocations, + memTypeIndex, + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0, + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0, + (finalCreateInfo.flags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0, + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT) != 0, + finalCreateInfo.pUserData, + finalCreateInfo.priority, + dedicatedBuffer, + dedicatedImage, + dedicatedBufferImageUsage, + allocationCount, + pAllocations, + blockVector.GetAllocationNextPtr()); } else { - VkResult res = blockVector->Allocate( - m_CurrentFrameIndex.load(), + const bool canAllocateDedicated = + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) == 0 && + (pool == VK_NULL_HANDLE || !blockVector.HasExplicitBlockSize()); + + if(canAllocateDedicated) + { + // Heuristics: Allocate dedicated memory if requested size if greater than half of preferred block size. + if(size > blockVector.GetPreferredBlockSize() / 2) + { + dedicatedPreferred = true; + } + // Protection against creating each allocation as dedicated when we reach or exceed heap size/budget, + // which can quickly deplete maxMemoryAllocationCount: Don't prefer dedicated allocations when above + // 3/4 of the maximum allocation count. + if(m_PhysicalDeviceProperties.limits.maxMemoryAllocationCount < UINT32_MAX / 4 && + m_DeviceMemoryCount.load() > m_PhysicalDeviceProperties.limits.maxMemoryAllocationCount * 3 / 4) + { + dedicatedPreferred = false; + } + + if(dedicatedPreferred) + { + res = AllocateDedicatedMemory( + pool, + size, + suballocType, + dedicatedAllocations, + memTypeIndex, + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0, + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0, + (finalCreateInfo.flags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0, + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT) != 0, + finalCreateInfo.pUserData, + finalCreateInfo.priority, + dedicatedBuffer, + dedicatedImage, + dedicatedBufferImageUsage, + allocationCount, + pAllocations, + blockVector.GetAllocationNextPtr()); + if(res == VK_SUCCESS) + { + // Succeeded: AllocateDedicatedMemory function already filled pMemory, nothing more to do here. + VMA_DEBUG_LOG(" Allocated as DedicatedMemory"); + return VK_SUCCESS; + } + } + } + + res = blockVector.Allocate( size, alignment, finalCreateInfo, @@ -16235,93 +13304,86 @@ VkResult VmaAllocator_T::AllocateMemoryOfType( allocationCount, pAllocations); if(res == VK_SUCCESS) - { - return res; - } + return VK_SUCCESS; - // 5. Try dedicated memory. - if((finalCreateInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) != 0) - { - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - } - else + // Try dedicated memory. + if(canAllocateDedicated && !dedicatedPreferred) { res = AllocateDedicatedMemory( + pool, size, suballocType, + dedicatedAllocations, memTypeIndex, - (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT) != 0, (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0, (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0, + (finalCreateInfo.flags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0, + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT) != 0, finalCreateInfo.pUserData, finalCreateInfo.priority, dedicatedBuffer, - dedicatedBufferUsage, dedicatedImage, + dedicatedBufferImageUsage, allocationCount, - pAllocations); + pAllocations, + blockVector.GetAllocationNextPtr()); if(res == VK_SUCCESS) { - // Succeeded: AllocateDedicatedMemory function already filld pMemory, nothing more to do here. + // Succeeded: AllocateDedicatedMemory function already filled pMemory, nothing more to do here. VMA_DEBUG_LOG(" Allocated as DedicatedMemory"); return VK_SUCCESS; } - else - { - // Everything failed: Return error code. - VMA_DEBUG_LOG(" vkAllocateMemory FAILED"); - return res; - } } + // Everything failed: Return error code. + VMA_DEBUG_LOG(" vkAllocateMemory FAILED"); + return res; } } VkResult VmaAllocator_T::AllocateDedicatedMemory( + VmaPool pool, VkDeviceSize size, VmaSuballocationType suballocType, + VmaDedicatedAllocationList& dedicatedAllocations, uint32_t memTypeIndex, - bool withinBudget, bool map, bool isUserDataString, + bool isMappingAllowed, + bool canAliasMemory, void* pUserData, float priority, VkBuffer dedicatedBuffer, - VkBufferUsageFlags dedicatedBufferUsage, VkImage dedicatedImage, + VmaBufferImageUsage dedicatedBufferImageUsage, size_t allocationCount, - VmaAllocation* pAllocations) + VmaAllocation* pAllocations, + const void* pNextChain) { VMA_ASSERT(allocationCount > 0 && pAllocations); - if(withinBudget) - { - const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memTypeIndex); - VmaBudget heapBudget = {}; - GetBudget(&heapBudget, heapIndex, 1); - if(heapBudget.usage + size * allocationCount > heapBudget.budget) - { - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - } - } - VkMemoryAllocateInfo allocInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO }; allocInfo.memoryTypeIndex = memTypeIndex; allocInfo.allocationSize = size; + allocInfo.pNext = pNextChain; #if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 VkMemoryDedicatedAllocateInfoKHR dedicatedAllocInfo = { VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR }; - if(m_UseKhrDedicatedAllocation || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + if(!canAliasMemory) { - if(dedicatedBuffer != VK_NULL_HANDLE) + if(m_UseKhrDedicatedAllocation || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) { - VMA_ASSERT(dedicatedImage == VK_NULL_HANDLE); - dedicatedAllocInfo.buffer = dedicatedBuffer; - VmaPnextChainPushFront(&allocInfo, &dedicatedAllocInfo); - } - else if(dedicatedImage != VK_NULL_HANDLE) - { - dedicatedAllocInfo.image = dedicatedImage; - VmaPnextChainPushFront(&allocInfo, &dedicatedAllocInfo); + if(dedicatedBuffer != VK_NULL_HANDLE) + { + VMA_ASSERT(dedicatedImage == VK_NULL_HANDLE); + dedicatedAllocInfo.buffer = dedicatedBuffer; + VmaPnextChainPushFront(&allocInfo, &dedicatedAllocInfo); + } + else if(dedicatedImage != VK_NULL_HANDLE) + { + dedicatedAllocInfo.image = dedicatedImage; + VmaPnextChainPushFront(&allocInfo, &dedicatedAllocInfo); + } } } #endif // #if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 @@ -16333,8 +13395,8 @@ VkResult VmaAllocator_T::AllocateDedicatedMemory( bool canContainBufferWithDeviceAddress = true; if(dedicatedBuffer != VK_NULL_HANDLE) { - canContainBufferWithDeviceAddress = dedicatedBufferUsage == UINT32_MAX || // Usage flags unknown - (dedicatedBufferUsage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_EXT) != 0; + canContainBufferWithDeviceAddress = dedicatedBufferImageUsage == VmaBufferImageUsage::UNKNOWN || + dedicatedBufferImageUsage.Contains(VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_EXT); } else if(dedicatedImage != VK_NULL_HANDLE) { @@ -16352,22 +13414,35 @@ VkResult VmaAllocator_T::AllocateDedicatedMemory( VkMemoryPriorityAllocateInfoEXT priorityInfo = { VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT }; if(m_UseExtMemoryPriority) { + VMA_ASSERT(priority >= 0.f && priority <= 1.f); priorityInfo.priority = priority; VmaPnextChainPushFront(&allocInfo, &priorityInfo); } #endif // #if VMA_MEMORY_PRIORITY +#if VMA_EXTERNAL_MEMORY + // Attach VkExportMemoryAllocateInfoKHR if necessary. + VkExportMemoryAllocateInfoKHR exportMemoryAllocInfo = { VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR }; + exportMemoryAllocInfo.handleTypes = GetExternalMemoryHandleTypeFlags(memTypeIndex); + if(exportMemoryAllocInfo.handleTypes != 0) + { + VmaPnextChainPushFront(&allocInfo, &exportMemoryAllocInfo); + } +#endif // #if VMA_EXTERNAL_MEMORY + size_t allocIndex; VkResult res = VK_SUCCESS; for(allocIndex = 0; allocIndex < allocationCount; ++allocIndex) { res = AllocateDedicatedMemoryPage( + pool, size, suballocType, memTypeIndex, allocInfo, map, isUserDataString, + isMappingAllowed, pUserData, pAllocations + allocIndex); if(res != VK_SUCCESS) @@ -16378,18 +13453,11 @@ VkResult VmaAllocator_T::AllocateDedicatedMemory( if(res == VK_SUCCESS) { - // Register them in m_pDedicatedAllocations. + for (allocIndex = 0; allocIndex < allocationCount; ++allocIndex) { - VmaMutexLockWrite lock(m_DedicatedAllocationsMutex[memTypeIndex], m_UseMutex); - AllocationVectorType* pDedicatedAllocations = m_pDedicatedAllocations[memTypeIndex]; - VMA_ASSERT(pDedicatedAllocations); - for(allocIndex = 0; allocIndex < allocationCount; ++allocIndex) - { - VmaVectorInsertSorted(*pDedicatedAllocations, pAllocations[allocIndex]); - } + dedicatedAllocations.Register(pAllocations[allocIndex]); } - - VMA_DEBUG_LOG(" Allocated DedicatedMemory Count=%zu, MemoryTypeIndex=#%u", allocationCount, memTypeIndex); + VMA_DEBUG_LOG_FORMAT(" Allocated DedicatedMemory Count=%zu, MemoryTypeIndex=#%" PRIu32, allocationCount, memTypeIndex); } else { @@ -16411,7 +13479,6 @@ VkResult VmaAllocator_T::AllocateDedicatedMemory( FreeVulkanMemory(memTypeIndex, currAlloc->GetSize(), hMemory); m_Budget.RemoveAllocation(MemoryTypeIndexToHeapIndex(memTypeIndex), currAlloc->GetSize()); - currAlloc->SetUserData(this, VMA_NULL); m_AllocationObjectAllocator.Free(currAlloc); } @@ -16422,12 +13489,14 @@ VkResult VmaAllocator_T::AllocateDedicatedMemory( } VkResult VmaAllocator_T::AllocateDedicatedMemoryPage( + VmaPool pool, VkDeviceSize size, VmaSuballocationType suballocType, uint32_t memTypeIndex, const VkMemoryAllocateInfo& allocInfo, bool map, bool isUserDataString, + bool isMappingAllowed, void* pUserData, VmaAllocation* pAllocation) { @@ -16457,9 +13526,12 @@ VkResult VmaAllocator_T::AllocateDedicatedMemoryPage( } } - *pAllocation = m_AllocationObjectAllocator.Allocate(m_CurrentFrameIndex.load(), isUserDataString); - (*pAllocation)->InitDedicatedAllocation(memTypeIndex, hMemory, suballocType, pMappedData, size); - (*pAllocation)->SetUserData(this, pUserData); + *pAllocation = m_AllocationObjectAllocator.Allocate(isMappingAllowed); + (*pAllocation)->InitDedicatedAllocation(pool, memTypeIndex, hMemory, suballocType, pMappedData, size); + if (isUserDataString) + (*pAllocation)->SetName(this, (const char*)pUserData); + else + (*pAllocation)->SetUserData(this, pUserData); m_Budget.AddAllocation(MemoryTypeIndexToHeapIndex(memTypeIndex), size); if(VMA_DEBUG_INITIALIZE_ALLOCATIONS) { @@ -16533,13 +13605,165 @@ void VmaAllocator_T::GetImageMemoryRequirements( } } +VkResult VmaAllocator_T::FindMemoryTypeIndex( + uint32_t memoryTypeBits, + const VmaAllocationCreateInfo* pAllocationCreateInfo, + VmaBufferImageUsage bufImgUsage, + uint32_t* pMemoryTypeIndex) const +{ + memoryTypeBits &= GetGlobalMemoryTypeBits(); + + if(pAllocationCreateInfo->memoryTypeBits != 0) + { + memoryTypeBits &= pAllocationCreateInfo->memoryTypeBits; + } + + VkMemoryPropertyFlags requiredFlags = 0, preferredFlags = 0, notPreferredFlags = 0; + if(!FindMemoryPreferences( + IsIntegratedGpu(), + *pAllocationCreateInfo, + bufImgUsage, + requiredFlags, preferredFlags, notPreferredFlags)) + { + return VK_ERROR_FEATURE_NOT_PRESENT; + } + + *pMemoryTypeIndex = UINT32_MAX; + uint32_t minCost = UINT32_MAX; + for(uint32_t memTypeIndex = 0, memTypeBit = 1; + memTypeIndex < GetMemoryTypeCount(); + ++memTypeIndex, memTypeBit <<= 1) + { + // This memory type is acceptable according to memoryTypeBits bitmask. + if((memTypeBit & memoryTypeBits) != 0) + { + const VkMemoryPropertyFlags currFlags = + m_MemProps.memoryTypes[memTypeIndex].propertyFlags; + // This memory type contains requiredFlags. + if((requiredFlags & ~currFlags) == 0) + { + // Calculate cost as number of bits from preferredFlags not present in this memory type. + uint32_t currCost = VMA_COUNT_BITS_SET(preferredFlags & ~currFlags) + + VMA_COUNT_BITS_SET(currFlags & notPreferredFlags); + // Remember memory type with lowest cost. + if(currCost < minCost) + { + *pMemoryTypeIndex = memTypeIndex; + if(currCost == 0) + { + return VK_SUCCESS; + } + minCost = currCost; + } + } + } + } + return (*pMemoryTypeIndex != UINT32_MAX) ? VK_SUCCESS : VK_ERROR_FEATURE_NOT_PRESENT; +} + +VkResult VmaAllocator_T::CalcMemTypeParams( + VmaAllocationCreateInfo& inoutCreateInfo, + uint32_t memTypeIndex, + VkDeviceSize size, + size_t allocationCount) +{ + // If memory type is not HOST_VISIBLE, disable MAPPED. + if((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0 && + (m_MemProps.memoryTypes[memTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0) + { + inoutCreateInfo.flags &= ~VMA_ALLOCATION_CREATE_MAPPED_BIT; + } + + if((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT) != 0 && + (inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT) != 0) + { + const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memTypeIndex); + VmaBudget heapBudget = {}; + GetHeapBudgets(&heapBudget, heapIndex, 1); + if(heapBudget.usage + size * allocationCount > heapBudget.budget) + { + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + } + } + return VK_SUCCESS; +} + +VkResult VmaAllocator_T::CalcAllocationParams( + VmaAllocationCreateInfo& inoutCreateInfo, + bool dedicatedRequired, + bool dedicatedPreferred) +{ + VMA_ASSERT((inoutCreateInfo.flags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT) && + "Specifying both flags VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT and VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT is incorrect."); + VMA_ASSERT((((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT) == 0 || + (inoutCreateInfo.flags & (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0)) && + "Specifying VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT requires also VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT."); + if(inoutCreateInfo.usage == VMA_MEMORY_USAGE_AUTO || inoutCreateInfo.usage == VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE || inoutCreateInfo.usage == VMA_MEMORY_USAGE_AUTO_PREFER_HOST) + { + if((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0) + { + VMA_ASSERT((inoutCreateInfo.flags & (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0 && + "When using VMA_ALLOCATION_CREATE_MAPPED_BIT and usage = VMA_MEMORY_USAGE_AUTO*, you must also specify VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT."); + } + } + + // If memory is lazily allocated, it should be always dedicated. + if(dedicatedRequired || + inoutCreateInfo.usage == VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED) + { + inoutCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; + } + + if(inoutCreateInfo.pool != VK_NULL_HANDLE) + { + if(inoutCreateInfo.pool->m_BlockVector.HasExplicitBlockSize() && + (inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT) != 0) + { + VMA_ASSERT(0 && "Specifying VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT while current custom pool doesn't support dedicated allocations."); + return VK_ERROR_FEATURE_NOT_PRESENT; + } + inoutCreateInfo.priority = inoutCreateInfo.pool->m_BlockVector.GetPriority(); + } + + if((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT) != 0 && + (inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) != 0) + { + VMA_ASSERT(0 && "Specifying VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT together with VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT makes no sense."); + return VK_ERROR_FEATURE_NOT_PRESENT; + } + + if(VMA_DEBUG_ALWAYS_DEDICATED_MEMORY && + (inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) != 0) + { + inoutCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; + } + + // Non-auto USAGE values imply HOST_ACCESS flags. + // And so does VMA_MEMORY_USAGE_UNKNOWN because it is used with custom pools. + // Which specific flag is used doesn't matter. They change things only when used with VMA_MEMORY_USAGE_AUTO*. + // Otherwise they just protect from assert on mapping. + if(inoutCreateInfo.usage != VMA_MEMORY_USAGE_AUTO && + inoutCreateInfo.usage != VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE && + inoutCreateInfo.usage != VMA_MEMORY_USAGE_AUTO_PREFER_HOST) + { + if((inoutCreateInfo.flags & (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) == 0) + { + inoutCreateInfo.flags |= VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; + } + } + + return VK_SUCCESS; +} + VkResult VmaAllocator_T::AllocateMemory( const VkMemoryRequirements& vkMemReq, bool requiresDedicatedAllocation, bool prefersDedicatedAllocation, VkBuffer dedicatedBuffer, - VkBufferUsageFlags dedicatedBufferUsage, VkImage dedicatedImage, + VmaBufferImageUsage dedicatedBufferImageUsage, const VmaAllocationCreateInfo& createInfo, VmaSuballocationType suballocType, size_t allocationCount, @@ -16551,60 +13775,30 @@ VkResult VmaAllocator_T::AllocateMemory( if(vkMemReq.size == 0) { - return VK_ERROR_VALIDATION_FAILED_EXT; - } - if((createInfo.flags & VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT) != 0 && - (createInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) != 0) - { - VMA_ASSERT(0 && "Specifying VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT together with VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT makes no sense."); - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - } - if((createInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0 && - (createInfo.flags & VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT) != 0) - { - VMA_ASSERT(0 && "Specifying VMA_ALLOCATION_CREATE_MAPPED_BIT together with VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT is invalid."); - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - } - if(requiresDedicatedAllocation) - { - if((createInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) != 0) - { - VMA_ASSERT(0 && "VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT specified while dedicated allocation is required."); - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - } - if(createInfo.pool != VK_NULL_HANDLE) - { - VMA_ASSERT(0 && "Pool specified while dedicated allocation is required."); - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - } - } - if((createInfo.pool != VK_NULL_HANDLE) && - ((createInfo.flags & (VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT)) != 0)) - { - VMA_ASSERT(0 && "Specifying VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT when pool != null is invalid."); - return VK_ERROR_OUT_OF_DEVICE_MEMORY; + return VK_ERROR_INITIALIZATION_FAILED; } - if(createInfo.pool != VK_NULL_HANDLE) + VmaAllocationCreateInfo createInfoFinal = createInfo; + VkResult res = CalcAllocationParams(createInfoFinal, requiresDedicatedAllocation, prefersDedicatedAllocation); + if(res != VK_SUCCESS) + return res; + + if(createInfoFinal.pool != VK_NULL_HANDLE) { - const VkDeviceSize alignmentForPool = VMA_MAX( - vkMemReq.alignment, - GetMemoryTypeMinAlignment(createInfo.pool->m_BlockVector.GetMemoryTypeIndex())); - - VmaAllocationCreateInfo createInfoForPool = createInfo; - // If memory type is not HOST_VISIBLE, disable MAPPED. - if((createInfoForPool.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0 && - (m_MemProps.memoryTypes[createInfo.pool->m_BlockVector.GetMemoryTypeIndex()].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0) - { - createInfoForPool.flags &= ~VMA_ALLOCATION_CREATE_MAPPED_BIT; - } - - return createInfo.pool->m_BlockVector.Allocate( - m_CurrentFrameIndex.load(), + VmaBlockVector& blockVector = createInfoFinal.pool->m_BlockVector; + return AllocateMemoryOfType( + createInfoFinal.pool, vkMemReq.size, - alignmentForPool, - createInfoForPool, + vkMemReq.alignment, + prefersDedicatedAllocation, + dedicatedBuffer, + dedicatedImage, + dedicatedBufferImageUsage, + createInfoFinal, + blockVector.GetMemoryTypeIndex(), suballocType, + createInfoFinal.pool->m_DedicatedAllocations, + blockVector, allocationCount, pAllocations); } @@ -16613,76 +13807,42 @@ VkResult VmaAllocator_T::AllocateMemory( // Bit mask of memory Vulkan types acceptable for this allocation. uint32_t memoryTypeBits = vkMemReq.memoryTypeBits; uint32_t memTypeIndex = UINT32_MAX; - VkResult res = vmaFindMemoryTypeIndex(this, memoryTypeBits, &createInfo, &memTypeIndex); - if(res == VK_SUCCESS) + res = FindMemoryTypeIndex(memoryTypeBits, &createInfoFinal, dedicatedBufferImageUsage, &memTypeIndex); + // Can't find any single memory type matching requirements. res is VK_ERROR_FEATURE_NOT_PRESENT. + if(res != VK_SUCCESS) + return res; + do { - VkDeviceSize alignmentForMemType = VMA_MAX( - vkMemReq.alignment, - GetMemoryTypeMinAlignment(memTypeIndex)); - + VmaBlockVector* blockVector = m_pBlockVectors[memTypeIndex]; + VMA_ASSERT(blockVector && "Trying to use unsupported memory type!"); res = AllocateMemoryOfType( + VK_NULL_HANDLE, vkMemReq.size, - alignmentForMemType, + vkMemReq.alignment, requiresDedicatedAllocation || prefersDedicatedAllocation, dedicatedBuffer, - dedicatedBufferUsage, dedicatedImage, - createInfo, + dedicatedBufferImageUsage, + createInfoFinal, memTypeIndex, suballocType, + m_DedicatedAllocations[memTypeIndex], + *blockVector, allocationCount, pAllocations); - // Succeeded on first try. + // Allocation succeeded if(res == VK_SUCCESS) - { - return res; - } - // Allocation from this memory type failed. Try other compatible memory types. - else - { - for(;;) - { - // Remove old memTypeIndex from list of possibilities. - memoryTypeBits &= ~(1u << memTypeIndex); - // Find alternative memTypeIndex. - res = vmaFindMemoryTypeIndex(this, memoryTypeBits, &createInfo, &memTypeIndex); - if(res == VK_SUCCESS) - { - alignmentForMemType = VMA_MAX( - vkMemReq.alignment, - GetMemoryTypeMinAlignment(memTypeIndex)); + return VK_SUCCESS; - res = AllocateMemoryOfType( - vkMemReq.size, - alignmentForMemType, - requiresDedicatedAllocation || prefersDedicatedAllocation, - dedicatedBuffer, - dedicatedBufferUsage, - dedicatedImage, - createInfo, - memTypeIndex, - suballocType, - allocationCount, - pAllocations); - // Allocation from this alternative memory type succeeded. - if(res == VK_SUCCESS) - { - return res; - } - // else: Allocation from this memory type failed. Try next one - next loop iteration. - } - // No other matching memory type index could be found. - else - { - // Not returning res, which is VK_ERROR_FEATURE_NOT_PRESENT, because we already failed to allocate once. - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - } - } - } - } - // Can't find any single memory type maching requirements. res is VK_ERROR_FEATURE_NOT_PRESENT. - else - return res; + // Remove old memTypeIndex from list of possibilities. + memoryTypeBits &= ~(1u << memTypeIndex); + // Find alternative memTypeIndex. + res = FindMemoryTypeIndex(memoryTypeBits, &createInfoFinal, dedicatedBufferImageUsage, &memTypeIndex); + } while(res == VK_SUCCESS); + + // No other matching memory type index could be found. + // Not returning res, which is VK_ERROR_FEATURE_NOT_PRESENT, because we already failed to allocate once. + return VK_ERROR_OUT_OF_DEVICE_MEMORY; } } @@ -16698,115 +13858,95 @@ void VmaAllocator_T::FreeMemory( if(allocation != VK_NULL_HANDLE) { - if(TouchAllocation(allocation)) + if(VMA_DEBUG_INITIALIZE_ALLOCATIONS) { - if(VMA_DEBUG_INITIALIZE_ALLOCATIONS) - { - FillAllocation(allocation, VMA_ALLOCATION_FILL_PATTERN_DESTROYED); - } - - switch(allocation->GetType()) - { - case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: - { - VmaBlockVector* pBlockVector = VMA_NULL; - VmaPool hPool = allocation->GetBlock()->GetParentPool(); - if(hPool != VK_NULL_HANDLE) - { - pBlockVector = &hPool->m_BlockVector; - } - else - { - const uint32_t memTypeIndex = allocation->GetMemoryTypeIndex(); - pBlockVector = m_pBlockVectors[memTypeIndex]; - } - pBlockVector->Free(allocation); - } - break; - case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: - FreeDedicatedMemory(allocation); - break; - default: - VMA_ASSERT(0); - } + FillAllocation(allocation, VMA_ALLOCATION_FILL_PATTERN_DESTROYED); } - // Do this regardless of whether the allocation is lost. Lost allocations still account to Budget.AllocationBytes. - m_Budget.RemoveAllocation(MemoryTypeIndexToHeapIndex(allocation->GetMemoryTypeIndex()), allocation->GetSize()); - allocation->SetUserData(this, VMA_NULL); - m_AllocationObjectAllocator.Free(allocation); + allocation->FreeName(this); + + switch(allocation->GetType()) + { + case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: + { + VmaBlockVector* pBlockVector = VMA_NULL; + VmaPool hPool = allocation->GetParentPool(); + if(hPool != VK_NULL_HANDLE) + { + pBlockVector = &hPool->m_BlockVector; + } + else + { + const uint32_t memTypeIndex = allocation->GetMemoryTypeIndex(); + pBlockVector = m_pBlockVectors[memTypeIndex]; + VMA_ASSERT(pBlockVector && "Trying to free memory of unsupported type!"); + } + pBlockVector->Free(allocation); + } + break; + case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: + FreeDedicatedMemory(allocation); + break; + default: + VMA_ASSERT(0); + } } } } -VkResult VmaAllocator_T::ResizeAllocation( - const VmaAllocation alloc, - VkDeviceSize newSize) -{ - // This function is deprecated and so it does nothing. It's left for backward compatibility. - if(newSize == 0 || alloc->GetLastUseFrameIndex() == VMA_FRAME_INDEX_LOST) - { - return VK_ERROR_VALIDATION_FAILED_EXT; - } - if(newSize == alloc->GetSize()) - { - return VK_SUCCESS; - } - return VK_ERROR_OUT_OF_POOL_MEMORY; -} - -void VmaAllocator_T::CalculateStats(VmaStats* pStats) +void VmaAllocator_T::CalculateStatistics(VmaTotalStatistics* pStats) { // Initialize. - InitStatInfo(pStats->total); - for(size_t i = 0; i < VK_MAX_MEMORY_TYPES; ++i) - InitStatInfo(pStats->memoryType[i]); - for(size_t i = 0; i < VK_MAX_MEMORY_HEAPS; ++i) - InitStatInfo(pStats->memoryHeap[i]); + VmaClearDetailedStatistics(pStats->total); + for(uint32_t i = 0; i < VK_MAX_MEMORY_TYPES; ++i) + VmaClearDetailedStatistics(pStats->memoryType[i]); + for(uint32_t i = 0; i < VK_MAX_MEMORY_HEAPS; ++i) + VmaClearDetailedStatistics(pStats->memoryHeap[i]); // Process default pools. for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) { VmaBlockVector* const pBlockVector = m_pBlockVectors[memTypeIndex]; - VMA_ASSERT(pBlockVector); - pBlockVector->AddStats(pStats); + if (pBlockVector != VMA_NULL) + pBlockVector->AddDetailedStatistics(pStats->memoryType[memTypeIndex]); } // Process custom pools. { VmaMutexLockRead lock(m_PoolsMutex, m_UseMutex); - for(size_t poolIndex = 0, poolCount = m_Pools.size(); poolIndex < poolCount; ++poolIndex) + for(VmaPool pool = m_Pools.Front(); pool != VMA_NULL; pool = m_Pools.GetNext(pool)) { - m_Pools[poolIndex]->m_BlockVector.AddStats(pStats); + VmaBlockVector& blockVector = pool->m_BlockVector; + const uint32_t memTypeIndex = blockVector.GetMemoryTypeIndex(); + blockVector.AddDetailedStatistics(pStats->memoryType[memTypeIndex]); + pool->m_DedicatedAllocations.AddDetailedStatistics(pStats->memoryType[memTypeIndex]); } } // Process dedicated allocations. for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) { - const uint32_t memHeapIndex = MemoryTypeIndexToHeapIndex(memTypeIndex); - VmaMutexLockRead dedicatedAllocationsLock(m_DedicatedAllocationsMutex[memTypeIndex], m_UseMutex); - AllocationVectorType* const pDedicatedAllocVector = m_pDedicatedAllocations[memTypeIndex]; - VMA_ASSERT(pDedicatedAllocVector); - for(size_t allocIndex = 0, allocCount = pDedicatedAllocVector->size(); allocIndex < allocCount; ++allocIndex) - { - VmaStatInfo allocationStatInfo; - (*pDedicatedAllocVector)[allocIndex]->DedicatedAllocCalcStatsInfo(allocationStatInfo); - VmaAddStatInfo(pStats->total, allocationStatInfo); - VmaAddStatInfo(pStats->memoryType[memTypeIndex], allocationStatInfo); - VmaAddStatInfo(pStats->memoryHeap[memHeapIndex], allocationStatInfo); - } + m_DedicatedAllocations[memTypeIndex].AddDetailedStatistics(pStats->memoryType[memTypeIndex]); } - // Postprocess. - VmaPostprocessCalcStatInfo(pStats->total); - for(size_t i = 0; i < GetMemoryTypeCount(); ++i) - VmaPostprocessCalcStatInfo(pStats->memoryType[i]); - for(size_t i = 0; i < GetMemoryHeapCount(); ++i) - VmaPostprocessCalcStatInfo(pStats->memoryHeap[i]); + // Sum from memory types to memory heaps. + for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + { + const uint32_t memHeapIndex = m_MemProps.memoryTypes[memTypeIndex].heapIndex; + VmaAddDetailedStatistics(pStats->memoryHeap[memHeapIndex], pStats->memoryType[memTypeIndex]); + } + + // Sum from memory heaps to total. + for(uint32_t memHeapIndex = 0; memHeapIndex < GetMemoryHeapCount(); ++memHeapIndex) + VmaAddDetailedStatistics(pStats->total, pStats->memoryHeap[memHeapIndex]); + + VMA_ASSERT(pStats->total.statistics.allocationCount == 0 || + pStats->total.allocationSizeMax >= pStats->total.allocationSizeMin); + VMA_ASSERT(pStats->total.unusedRangeCount == 0 || + pStats->total.unusedRangeSizeMax >= pStats->total.unusedRangeSizeMin); } -void VmaAllocator_T::GetBudget(VmaBudget* outBudget, uint32_t firstHeap, uint32_t heapCount) +void VmaAllocator_T::GetHeapBudgets(VmaBudget* outBudgets, uint32_t firstHeap, uint32_t heapCount) { #if VMA_MEMORY_BUDGET if(m_UseExtMemoryBudget) @@ -16814,233 +13954,96 @@ void VmaAllocator_T::GetBudget(VmaBudget* outBudget, uint32_t firstHeap, uint32_ if(m_Budget.m_OperationsSinceBudgetFetch < 30) { VmaMutexLockRead lockRead(m_Budget.m_BudgetMutex, m_UseMutex); - for(uint32_t i = 0; i < heapCount; ++i, ++outBudget) + for(uint32_t i = 0; i < heapCount; ++i, ++outBudgets) { const uint32_t heapIndex = firstHeap + i; - outBudget->blockBytes = m_Budget.m_BlockBytes[heapIndex]; - outBudget->allocationBytes = m_Budget.m_AllocationBytes[heapIndex]; + outBudgets->statistics.blockCount = m_Budget.m_BlockCount[heapIndex]; + outBudgets->statistics.allocationCount = m_Budget.m_AllocationCount[heapIndex]; + outBudgets->statistics.blockBytes = m_Budget.m_BlockBytes[heapIndex]; + outBudgets->statistics.allocationBytes = m_Budget.m_AllocationBytes[heapIndex]; - if(m_Budget.m_VulkanUsage[heapIndex] + outBudget->blockBytes > m_Budget.m_BlockBytesAtBudgetFetch[heapIndex]) + if(m_Budget.m_VulkanUsage[heapIndex] + outBudgets->statistics.blockBytes > m_Budget.m_BlockBytesAtBudgetFetch[heapIndex]) { - outBudget->usage = m_Budget.m_VulkanUsage[heapIndex] + - outBudget->blockBytes - m_Budget.m_BlockBytesAtBudgetFetch[heapIndex]; + outBudgets->usage = m_Budget.m_VulkanUsage[heapIndex] + + outBudgets->statistics.blockBytes - m_Budget.m_BlockBytesAtBudgetFetch[heapIndex]; } else { - outBudget->usage = 0; + outBudgets->usage = 0; } // Have to take MIN with heap size because explicit HeapSizeLimit is included in it. - outBudget->budget = VMA_MIN( + outBudgets->budget = VMA_MIN( m_Budget.m_VulkanBudget[heapIndex], m_MemProps.memoryHeaps[heapIndex].size); } } else { UpdateVulkanBudget(); // Outside of mutex lock - GetBudget(outBudget, firstHeap, heapCount); // Recursion + GetHeapBudgets(outBudgets, firstHeap, heapCount); // Recursion } } else #endif { - for(uint32_t i = 0; i < heapCount; ++i, ++outBudget) + for(uint32_t i = 0; i < heapCount; ++i, ++outBudgets) { const uint32_t heapIndex = firstHeap + i; - outBudget->blockBytes = m_Budget.m_BlockBytes[heapIndex]; - outBudget->allocationBytes = m_Budget.m_AllocationBytes[heapIndex]; + outBudgets->statistics.blockCount = m_Budget.m_BlockCount[heapIndex]; + outBudgets->statistics.allocationCount = m_Budget.m_AllocationCount[heapIndex]; + outBudgets->statistics.blockBytes = m_Budget.m_BlockBytes[heapIndex]; + outBudgets->statistics.allocationBytes = m_Budget.m_AllocationBytes[heapIndex]; - outBudget->usage = outBudget->blockBytes; - outBudget->budget = m_MemProps.memoryHeaps[heapIndex].size * 8 / 10; // 80% heuristics. + outBudgets->usage = outBudgets->statistics.blockBytes; + outBudgets->budget = m_MemProps.memoryHeaps[heapIndex].size * 8 / 10; // 80% heuristics. } } } -static const uint32_t VMA_VENDOR_ID_AMD = 4098; - -VkResult VmaAllocator_T::DefragmentationBegin( - const VmaDefragmentationInfo2& info, - VmaDefragmentationStats* pStats, - VmaDefragmentationContext* pContext) -{ - if(info.pAllocationsChanged != VMA_NULL) - { - memset(info.pAllocationsChanged, 0, info.allocationCount * sizeof(VkBool32)); - } - - *pContext = vma_new(this, VmaDefragmentationContext_T)( - this, m_CurrentFrameIndex.load(), info.flags, pStats); - - (*pContext)->AddPools(info.poolCount, info.pPools); - (*pContext)->AddAllocations( - info.allocationCount, info.pAllocations, info.pAllocationsChanged); - - VkResult res = (*pContext)->Defragment( - info.maxCpuBytesToMove, info.maxCpuAllocationsToMove, - info.maxGpuBytesToMove, info.maxGpuAllocationsToMove, - info.commandBuffer, pStats, info.flags); - - if(res != VK_NOT_READY) - { - vma_delete(this, *pContext); - *pContext = VMA_NULL; - } - - return res; -} - -VkResult VmaAllocator_T::DefragmentationEnd( - VmaDefragmentationContext context) -{ - vma_delete(this, context); - return VK_SUCCESS; -} - -VkResult VmaAllocator_T::DefragmentationPassBegin( - VmaDefragmentationPassInfo* pInfo, - VmaDefragmentationContext context) -{ - return context->DefragmentPassBegin(pInfo); -} -VkResult VmaAllocator_T::DefragmentationPassEnd( - VmaDefragmentationContext context) -{ - return context->DefragmentPassEnd(); - -} - void VmaAllocator_T::GetAllocationInfo(VmaAllocation hAllocation, VmaAllocationInfo* pAllocationInfo) { - if(hAllocation->CanBecomeLost()) - { - /* - Warning: This is a carefully designed algorithm. - Do not modify unless you really know what you're doing :) - */ - const uint32_t localCurrFrameIndex = m_CurrentFrameIndex.load(); - uint32_t localLastUseFrameIndex = hAllocation->GetLastUseFrameIndex(); - for(;;) - { - if(localLastUseFrameIndex == VMA_FRAME_INDEX_LOST) - { - pAllocationInfo->memoryType = UINT32_MAX; - pAllocationInfo->deviceMemory = VK_NULL_HANDLE; - pAllocationInfo->offset = 0; - pAllocationInfo->size = hAllocation->GetSize(); - pAllocationInfo->pMappedData = VMA_NULL; - pAllocationInfo->pUserData = hAllocation->GetUserData(); - return; - } - else if(localLastUseFrameIndex == localCurrFrameIndex) - { - pAllocationInfo->memoryType = hAllocation->GetMemoryTypeIndex(); - pAllocationInfo->deviceMemory = hAllocation->GetMemory(); - pAllocationInfo->offset = hAllocation->GetOffset(); - pAllocationInfo->size = hAllocation->GetSize(); - pAllocationInfo->pMappedData = VMA_NULL; - pAllocationInfo->pUserData = hAllocation->GetUserData(); - return; - } - else // Last use time earlier than current time. - { - if(hAllocation->CompareExchangeLastUseFrameIndex(localLastUseFrameIndex, localCurrFrameIndex)) - { - localLastUseFrameIndex = localCurrFrameIndex; - } - } - } - } - else - { -#if VMA_STATS_STRING_ENABLED - uint32_t localCurrFrameIndex = m_CurrentFrameIndex.load(); - uint32_t localLastUseFrameIndex = hAllocation->GetLastUseFrameIndex(); - for(;;) - { - VMA_ASSERT(localLastUseFrameIndex != VMA_FRAME_INDEX_LOST); - if(localLastUseFrameIndex == localCurrFrameIndex) - { - break; - } - else // Last use time earlier than current time. - { - if(hAllocation->CompareExchangeLastUseFrameIndex(localLastUseFrameIndex, localCurrFrameIndex)) - { - localLastUseFrameIndex = localCurrFrameIndex; - } - } - } -#endif - - pAllocationInfo->memoryType = hAllocation->GetMemoryTypeIndex(); - pAllocationInfo->deviceMemory = hAllocation->GetMemory(); - pAllocationInfo->offset = hAllocation->GetOffset(); - pAllocationInfo->size = hAllocation->GetSize(); - pAllocationInfo->pMappedData = hAllocation->GetMappedData(); - pAllocationInfo->pUserData = hAllocation->GetUserData(); - } + pAllocationInfo->memoryType = hAllocation->GetMemoryTypeIndex(); + pAllocationInfo->deviceMemory = hAllocation->GetMemory(); + pAllocationInfo->offset = hAllocation->GetOffset(); + pAllocationInfo->size = hAllocation->GetSize(); + pAllocationInfo->pMappedData = hAllocation->GetMappedData(); + pAllocationInfo->pUserData = hAllocation->GetUserData(); + pAllocationInfo->pName = hAllocation->GetName(); } -bool VmaAllocator_T::TouchAllocation(VmaAllocation hAllocation) +void VmaAllocator_T::GetAllocationInfo2(VmaAllocation hAllocation, VmaAllocationInfo2* pAllocationInfo) { - // This is a stripped-down version of VmaAllocator_T::GetAllocationInfo. - if(hAllocation->CanBecomeLost()) - { - uint32_t localCurrFrameIndex = m_CurrentFrameIndex.load(); - uint32_t localLastUseFrameIndex = hAllocation->GetLastUseFrameIndex(); - for(;;) - { - if(localLastUseFrameIndex == VMA_FRAME_INDEX_LOST) - { - return false; - } - else if(localLastUseFrameIndex == localCurrFrameIndex) - { - return true; - } - else // Last use time earlier than current time. - { - if(hAllocation->CompareExchangeLastUseFrameIndex(localLastUseFrameIndex, localCurrFrameIndex)) - { - localLastUseFrameIndex = localCurrFrameIndex; - } - } - } - } - else - { -#if VMA_STATS_STRING_ENABLED - uint32_t localCurrFrameIndex = m_CurrentFrameIndex.load(); - uint32_t localLastUseFrameIndex = hAllocation->GetLastUseFrameIndex(); - for(;;) - { - VMA_ASSERT(localLastUseFrameIndex != VMA_FRAME_INDEX_LOST); - if(localLastUseFrameIndex == localCurrFrameIndex) - { - break; - } - else // Last use time earlier than current time. - { - if(hAllocation->CompareExchangeLastUseFrameIndex(localLastUseFrameIndex, localCurrFrameIndex)) - { - localLastUseFrameIndex = localCurrFrameIndex; - } - } - } -#endif + GetAllocationInfo(hAllocation, &pAllocationInfo->allocationInfo); - return true; + switch (hAllocation->GetType()) + { + case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: + pAllocationInfo->blockSize = hAllocation->GetBlock()->m_pMetadata->GetSize(); + pAllocationInfo->dedicatedMemory = VK_FALSE; + break; + case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: + pAllocationInfo->blockSize = pAllocationInfo->allocationInfo.size; + pAllocationInfo->dedicatedMemory = VK_TRUE; + break; + default: + VMA_ASSERT(0); } } VkResult VmaAllocator_T::CreatePool(const VmaPoolCreateInfo* pCreateInfo, VmaPool* pPool) { - VMA_DEBUG_LOG(" CreatePool: MemoryTypeIndex=%u, flags=%u", pCreateInfo->memoryTypeIndex, pCreateInfo->flags); + VMA_DEBUG_LOG_FORMAT(" CreatePool: MemoryTypeIndex=%" PRIu32 ", flags=%" PRIu32, pCreateInfo->memoryTypeIndex, pCreateInfo->flags); VmaPoolCreateInfo newCreateInfo = *pCreateInfo; + // Protection against uninitialized new structure member. If garbage data are left there, this pointer dereference would crash. + if(pCreateInfo->pMemoryAllocateNext) + { + VMA_ASSERT(((const VkBaseInStructure*)pCreateInfo->pMemoryAllocateNext)->sType != 0); + } + if(newCreateInfo.maxBlockCount == 0) { newCreateInfo.maxBlockCount = SIZE_MAX; @@ -17055,6 +14058,10 @@ VkResult VmaAllocator_T::CreatePool(const VmaPoolCreateInfo* pCreateInfo, VmaPoo { return VK_ERROR_FEATURE_NOT_PRESENT; } + if(newCreateInfo.minAllocationAlignment > 0) + { + VMA_ASSERT(VmaIsPow2(newCreateInfo.minAllocationAlignment)); + } const VkDeviceSize preferredBlockSize = CalcPreferredBlockSize(newCreateInfo.memoryTypeIndex); @@ -17072,7 +14079,7 @@ VkResult VmaAllocator_T::CreatePool(const VmaPoolCreateInfo* pCreateInfo, VmaPoo { VmaMutexLockWrite lock(m_PoolsMutex, m_UseMutex); (*pPool)->SetId(m_NextPoolId++); - VmaVectorInsertSorted(m_Pools, *pPool); + m_Pools.PushBack(*pPool); } return VK_SUCCESS; @@ -17083,16 +14090,24 @@ void VmaAllocator_T::DestroyPool(VmaPool pool) // Remove from m_Pools. { VmaMutexLockWrite lock(m_PoolsMutex, m_UseMutex); - bool success = VmaVectorRemoveSorted(m_Pools, pool); - VMA_ASSERT(success && "Pool not found in Allocator."); + m_Pools.Remove(pool); } vma_delete(this, pool); } -void VmaAllocator_T::GetPoolStats(VmaPool pool, VmaPoolStats* pPoolStats) +void VmaAllocator_T::GetPoolStatistics(VmaPool pool, VmaStatistics* pPoolStats) { - pool->m_BlockVector.GetPoolStats(pPoolStats); + VmaClearStatistics(*pPoolStats); + pool->m_BlockVector.AddStatistics(*pPoolStats); + pool->m_DedicatedAllocations.AddStatistics(*pPoolStats); +} + +void VmaAllocator_T::CalculatePoolStatistics(VmaPool pool, VmaDetailedStatistics* pPoolStats) +{ + VmaClearDetailedStatistics(*pPoolStats); + pool->m_BlockVector.AddDetailedStatistics(*pPoolStats); + pool->m_DedicatedAllocations.AddDetailedStatistics(*pPoolStats); } void VmaAllocator_T::SetCurrentFrameIndex(uint32_t frameIndex) @@ -17107,15 +14122,6 @@ void VmaAllocator_T::SetCurrentFrameIndex(uint32_t frameIndex) #endif // #if VMA_MEMORY_BUDGET } -void VmaAllocator_T::MakePoolAllocationsLost( - VmaPool hPool, - size_t* pLostAllocationCount) -{ - hPool->m_BlockVector.MakePoolAllocationsLost( - m_CurrentFrameIndex.load(), - pLostAllocationCount); -} - VkResult VmaAllocator_T::CheckPoolCorruption(VmaPool hPool) { return hPool->m_BlockVector.CheckCorruption(); @@ -17128,10 +14134,9 @@ VkResult VmaAllocator_T::CheckCorruption(uint32_t memoryTypeBits) // Process default pools. for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) { - if(((1u << memTypeIndex) & memoryTypeBits) != 0) + VmaBlockVector* const pBlockVector = m_pBlockVectors[memTypeIndex]; + if(pBlockVector != VMA_NULL) { - VmaBlockVector* const pBlockVector = m_pBlockVectors[memTypeIndex]; - VMA_ASSERT(pBlockVector); VkResult localRes = pBlockVector->CheckCorruption(); switch(localRes) { @@ -17149,11 +14154,11 @@ VkResult VmaAllocator_T::CheckCorruption(uint32_t memoryTypeBits) // Process custom pools. { VmaMutexLockRead lock(m_PoolsMutex, m_UseMutex); - for(size_t poolIndex = 0, poolCount = m_Pools.size(); poolIndex < poolCount; ++poolIndex) + for(VmaPool pool = m_Pools.Front(); pool != VMA_NULL; pool = m_Pools.GetNext(pool)) { - if(((1u << m_Pools[poolIndex]->m_BlockVector.GetMemoryTypeIndex()) & memoryTypeBits) != 0) + if(((1u << pool->m_BlockVector.GetMemoryTypeIndex()) & memoryTypeBits) != 0) { - VkResult localRes = m_Pools[poolIndex]->m_BlockVector.CheckCorruption(); + VkResult localRes = pool->m_BlockVector.CheckCorruption(); switch(localRes) { case VK_ERROR_FEATURE_NOT_PRESENT: @@ -17171,14 +14176,17 @@ VkResult VmaAllocator_T::CheckCorruption(uint32_t memoryTypeBits) return finalRes; } -void VmaAllocator_T::CreateLostAllocation(VmaAllocation* pAllocation) -{ - *pAllocation = m_AllocationObjectAllocator.Allocate(VMA_FRAME_INDEX_LOST, false); - (*pAllocation)->InitLost(); -} - VkResult VmaAllocator_T::AllocateVulkanMemory(const VkMemoryAllocateInfo* pAllocateInfo, VkDeviceMemory* pMemory) { + AtomicTransactionalIncrement deviceMemoryCountIncrement; + const uint64_t prevDeviceMemoryCount = deviceMemoryCountIncrement.Increment(&m_DeviceMemoryCount); +#if VMA_DEBUG_DONT_EXCEED_MAX_MEMORY_ALLOCATION_COUNT + if(prevDeviceMemoryCount >= m_PhysicalDeviceProperties.limits.maxMemoryAllocationCount) + { + return VK_ERROR_TOO_MANY_OBJECTS; + } +#endif + const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(pAllocateInfo->memoryTypeIndex); // HeapSizeLimit is in effect for this heap. @@ -17203,6 +14211,7 @@ VkResult VmaAllocator_T::AllocateVulkanMemory(const VkMemoryAllocateInfo* pAlloc { m_Budget.m_BlockBytes[heapIndex] += pAllocateInfo->allocationSize; } + ++m_Budget.m_BlockCount[heapIndex]; // VULKAN CALL vkAllocateMemory. VkResult res = (*m_VulkanFunctions.vkAllocateMemory)(m_hDevice, pAllocateInfo, GetAllocationCallbacks(), pMemory); @@ -17218,9 +14227,12 @@ VkResult VmaAllocator_T::AllocateVulkanMemory(const VkMemoryAllocateInfo* pAlloc { (*m_DeviceMemoryCallbacks.pfnAllocate)(this, pAllocateInfo->memoryTypeIndex, *pMemory, pAllocateInfo->allocationSize, m_DeviceMemoryCallbacks.pUserData); } + + deviceMemoryCountIncrement.Commit(); } else { + --m_Budget.m_BlockCount[heapIndex]; m_Budget.m_BlockBytes[heapIndex] -= pAllocateInfo->allocationSize; } @@ -17238,7 +14250,11 @@ void VmaAllocator_T::FreeVulkanMemory(uint32_t memoryType, VkDeviceSize size, Vk // VULKAN CALL vkFreeMemory. (*m_VulkanFunctions.vkFreeMemory)(m_hDevice, hMemory, GetAllocationCallbacks()); - m_Budget.m_BlockBytes[MemoryTypeIndexToHeapIndex(memoryType)] -= size; + const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memoryType); + --m_Budget.m_BlockCount[heapIndex]; + m_Budget.m_BlockBytes[heapIndex] -= size; + + --m_DeviceMemoryCount; } VkResult VmaAllocator_T::BindVulkanBuffer( @@ -17305,11 +14321,6 @@ VkResult VmaAllocator_T::BindVulkanImage( VkResult VmaAllocator_T::Map(VmaAllocation hAllocation, void** ppData) { - if(hAllocation->CanBecomeLost()) - { - return VK_ERROR_MEMORY_MAP_FAILED; - } - switch(hAllocation->GetType()) { case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: @@ -17324,6 +14335,7 @@ VkResult VmaAllocator_T::Map(VmaAllocation hAllocation, void** ppData) } return res; } + VMA_FALLTHROUGH; // Fallthrough case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: return hAllocation->DedicatedAllocMap(this, ppData); default: @@ -17357,7 +14369,7 @@ VkResult VmaAllocator_T::BindBufferMemory( VkBuffer hBuffer, const void* pNext) { - VkResult res = VK_SUCCESS; + VkResult res = VK_ERROR_UNKNOWN_COPY; switch(hAllocation->GetType()) { case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: @@ -17366,7 +14378,7 @@ VkResult VmaAllocator_T::BindBufferMemory( case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: { VmaDeviceMemoryBlock* const pBlock = hAllocation->GetBlock(); - VMA_ASSERT(pBlock && "Binding buffer to allocation that doesn't belong to any block. Is the allocation lost?"); + VMA_ASSERT(pBlock && "Binding buffer to allocation that doesn't belong to any block."); res = pBlock->BindBufferMemory(this, hAllocation, allocationLocalOffset, hBuffer, pNext); break; } @@ -17382,7 +14394,7 @@ VkResult VmaAllocator_T::BindImageMemory( VkImage hImage, const void* pNext) { - VkResult res = VK_SUCCESS; + VkResult res = VK_ERROR_UNKNOWN_COPY; switch(hAllocation->GetType()) { case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: @@ -17391,7 +14403,7 @@ VkResult VmaAllocator_T::BindImageMemory( case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: { VmaDeviceMemoryBlock* pBlock = hAllocation->GetBlock(); - VMA_ASSERT(pBlock && "Binding image to allocation that doesn't belong to any block. Is the allocation lost?"); + VMA_ASSERT(pBlock && "Binding image to allocation that doesn't belong to any block."); res = pBlock->BindImageMemory(this, hAllocation, allocationLocalOffset, hImage, pNext); break; } @@ -17468,17 +14480,58 @@ VkResult VmaAllocator_T::FlushOrInvalidateAllocations( return res; } +VkResult VmaAllocator_T::CopyMemoryToAllocation( + const void* pSrcHostPointer, + VmaAllocation dstAllocation, + VkDeviceSize dstAllocationLocalOffset, + VkDeviceSize size) +{ + void* dstMappedData = VMA_NULL; + VkResult res = Map(dstAllocation, &dstMappedData); + if(res == VK_SUCCESS) + { + memcpy((char*)dstMappedData + dstAllocationLocalOffset, pSrcHostPointer, (size_t)size); + Unmap(dstAllocation); + res = FlushOrInvalidateAllocation(dstAllocation, dstAllocationLocalOffset, size, VMA_CACHE_FLUSH); + } + return res; +} + +VkResult VmaAllocator_T::CopyAllocationToMemory( + VmaAllocation srcAllocation, + VkDeviceSize srcAllocationLocalOffset, + void* pDstHostPointer, + VkDeviceSize size) +{ + void* srcMappedData = VMA_NULL; + VkResult res = Map(srcAllocation, &srcMappedData); + if(res == VK_SUCCESS) + { + res = FlushOrInvalidateAllocation(srcAllocation, srcAllocationLocalOffset, size, VMA_CACHE_INVALIDATE); + if(res == VK_SUCCESS) + { + memcpy(pDstHostPointer, (const char*)srcMappedData + srcAllocationLocalOffset, (size_t)size); + Unmap(srcAllocation); + } + } + return res; +} + void VmaAllocator_T::FreeDedicatedMemory(const VmaAllocation allocation) { VMA_ASSERT(allocation && allocation->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED); const uint32_t memTypeIndex = allocation->GetMemoryTypeIndex(); + VmaPool parentPool = allocation->GetParentPool(); + if(parentPool == VK_NULL_HANDLE) { - VmaMutexLockWrite lock(m_DedicatedAllocationsMutex[memTypeIndex], m_UseMutex); - AllocationVectorType* const pDedicatedAllocations = m_pDedicatedAllocations[memTypeIndex]; - VMA_ASSERT(pDedicatedAllocations); - bool success = VmaVectorRemoveSorted(*pDedicatedAllocations, allocation); - VMA_ASSERT(success); + // Default pool + m_DedicatedAllocations[memTypeIndex].Unregister(allocation); + } + else + { + // Custom pool + parentPool->m_DedicatedAllocations.Unregister(allocation); } VkDeviceMemory hMemory = allocation->GetMemory(); @@ -17495,7 +14548,10 @@ void VmaAllocator_T::FreeDedicatedMemory(const VmaAllocation allocation) FreeVulkanMemory(memTypeIndex, allocation->GetSize(), hMemory); - VMA_DEBUG_LOG(" Freed DedicatedMemory MemoryTypeIndex=%u", memTypeIndex); + m_Budget.RemoveAllocation(MemoryTypeIndexToHeapIndex(allocation->GetMemoryTypeIndex()), allocation->GetSize()); + m_AllocationObjectAllocator.Free(allocation); + + VMA_DEBUG_LOG_FORMAT(" Freed DedicatedMemory MemoryTypeIndex=%" PRIu32, memTypeIndex); } uint32_t VmaAllocator_T::CalculateGpuDefragmentationMemoryTypeBits() const @@ -17609,7 +14665,6 @@ bool VmaAllocator_T::GetFlushOrInvalidateRange( } #if VMA_MEMORY_BUDGET - void VmaAllocator_T::UpdateVulkanBudget() { VMA_ASSERT(m_UseExtMemoryBudget); @@ -17647,13 +14702,12 @@ void VmaAllocator_T::UpdateVulkanBudget() m_Budget.m_OperationsSinceBudgetFetch = 0; } } - -#endif // #if VMA_MEMORY_BUDGET +#endif // VMA_MEMORY_BUDGET void VmaAllocator_T::FillAllocation(const VmaAllocation hAllocation, uint8_t pattern) { if(VMA_DEBUG_INITIALIZE_ALLOCATIONS && - !hAllocation->CanBecomeLost() && + hAllocation->IsMappingAllowed() && (m_MemProps.memoryTypes[hAllocation->GetMemoryTypeIndex()].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0) { void* pData = VMA_NULL; @@ -17683,108 +14737,114 @@ uint32_t VmaAllocator_T::GetGpuDefragmentationMemoryTypeBits() } #if VMA_STATS_STRING_ENABLED - void VmaAllocator_T::PrintDetailedMap(VmaJsonWriter& json) { - bool dedicatedAllocationsStarted = false; - for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + json.WriteString("DefaultPools"); + json.BeginObject(); { - VmaMutexLockRead dedicatedAllocationsLock(m_DedicatedAllocationsMutex[memTypeIndex], m_UseMutex); - AllocationVectorType* const pDedicatedAllocVector = m_pDedicatedAllocations[memTypeIndex]; - VMA_ASSERT(pDedicatedAllocVector); - if(pDedicatedAllocVector->empty() == false) + for (uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) { - if(dedicatedAllocationsStarted == false) + VmaBlockVector* pBlockVector = m_pBlockVectors[memTypeIndex]; + VmaDedicatedAllocationList& dedicatedAllocList = m_DedicatedAllocations[memTypeIndex]; + if (pBlockVector != VMA_NULL) { - dedicatedAllocationsStarted = true; - json.WriteString("DedicatedAllocations"); - json.BeginObject(); - } - - json.BeginString("Type "); - json.ContinueString(memTypeIndex); - json.EndString(); - - json.BeginArray(); - - for(size_t i = 0; i < pDedicatedAllocVector->size(); ++i) - { - json.BeginObject(true); - const VmaAllocation hAlloc = (*pDedicatedAllocVector)[i]; - hAlloc->PrintParameters(json); - json.EndObject(); - } - - json.EndArray(); - } - } - if(dedicatedAllocationsStarted) - { - json.EndObject(); - } - - { - bool allocationsStarted = false; - for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) - { - if(m_pBlockVectors[memTypeIndex]->IsEmpty() == false) - { - if(allocationsStarted == false) - { - allocationsStarted = true; - json.WriteString("DefaultPools"); - json.BeginObject(); - } - json.BeginString("Type "); json.ContinueString(memTypeIndex); json.EndString(); + json.BeginObject(); + { + json.WriteString("PreferredBlockSize"); + json.WriteNumber(pBlockVector->GetPreferredBlockSize()); - m_pBlockVectors[memTypeIndex]->PrintDetailedMap(json); + json.WriteString("Blocks"); + pBlockVector->PrintDetailedMap(json); + + json.WriteString("DedicatedAllocations"); + dedicatedAllocList.BuildStatsString(json); + } + json.EndObject(); } } - if(allocationsStarted) - { - json.EndObject(); - } } + json.EndObject(); - // Custom pools + json.WriteString("CustomPools"); + json.BeginObject(); { VmaMutexLockRead lock(m_PoolsMutex, m_UseMutex); - const size_t poolCount = m_Pools.size(); - if(poolCount > 0) + if (!m_Pools.IsEmpty()) { - json.WriteString("Pools"); - json.BeginObject(); - for(size_t poolIndex = 0; poolIndex < poolCount; ++poolIndex) + for (uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) { - json.BeginString(); - json.ContinueString(m_Pools[poolIndex]->GetId()); - json.EndString(); + bool displayType = true; + size_t index = 0; + for (VmaPool pool = m_Pools.Front(); pool != VMA_NULL; pool = m_Pools.GetNext(pool)) + { + VmaBlockVector& blockVector = pool->m_BlockVector; + if (blockVector.GetMemoryTypeIndex() == memTypeIndex) + { + if (displayType) + { + json.BeginString("Type "); + json.ContinueString(memTypeIndex); + json.EndString(); + json.BeginArray(); + displayType = false; + } - m_Pools[poolIndex]->m_BlockVector.PrintDetailedMap(json); + json.BeginObject(); + { + json.WriteString("Name"); + json.BeginString(); + json.ContinueString((uint64_t)index++); + if (pool->GetName()) + { + json.ContinueString(" - "); + json.ContinueString(pool->GetName()); + } + json.EndString(); + + json.WriteString("PreferredBlockSize"); + json.WriteNumber(blockVector.GetPreferredBlockSize()); + + json.WriteString("Blocks"); + blockVector.PrintDetailedMap(json); + + json.WriteString("DedicatedAllocations"); + pool->m_DedicatedAllocations.BuildStatsString(json); + } + json.EndObject(); + } + } + + if (!displayType) + json.EndArray(); } - json.EndObject(); } } + json.EndObject(); } +#endif // VMA_STATS_STRING_ENABLED +#endif // _VMA_ALLOCATOR_T_FUNCTIONS -#endif // #if VMA_STATS_STRING_ENABLED - -//////////////////////////////////////////////////////////////////////////////// -// Public interface +#ifndef _VMA_PUBLIC_INTERFACE VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAllocator( const VmaAllocatorCreateInfo* pCreateInfo, VmaAllocator* pAllocator) { VMA_ASSERT(pCreateInfo && pAllocator); VMA_ASSERT(pCreateInfo->vulkanApiVersion == 0 || - (VK_VERSION_MAJOR(pCreateInfo->vulkanApiVersion) == 1 && VK_VERSION_MINOR(pCreateInfo->vulkanApiVersion) <= 2)); + (VK_VERSION_MAJOR(pCreateInfo->vulkanApiVersion) == 1 && VK_VERSION_MINOR(pCreateInfo->vulkanApiVersion) <= 3)); VMA_DEBUG_LOG("vmaCreateAllocator"); *pAllocator = vma_new(pCreateInfo->pAllocationCallbacks, VmaAllocator_T)(pCreateInfo); - return (*pAllocator)->Init(pCreateInfo); + VkResult result = (*pAllocator)->Init(pCreateInfo); + if(result < 0) + { + vma_delete(pCreateInfo->pAllocationCallbacks, *pAllocator); + *pAllocator = VK_NULL_HANDLE; + } + return result; } VMA_CALL_PRE void VMA_CALL_POST vmaDestroyAllocator( @@ -17793,7 +14853,7 @@ VMA_CALL_PRE void VMA_CALL_POST vmaDestroyAllocator( if(allocator != VK_NULL_HANDLE) { VMA_DEBUG_LOG("vmaDestroyAllocator"); - VkAllocationCallbacks allocationCallbacks = allocator->m_AllocationCallbacks; + VkAllocationCallbacks allocationCallbacks = allocator->m_AllocationCallbacks; // Have to copy the callbacks when destroying. vma_delete(&allocationCallbacks, allocator); } } @@ -17837,29 +14897,28 @@ VMA_CALL_PRE void VMA_CALL_POST vmaSetCurrentFrameIndex( uint32_t frameIndex) { VMA_ASSERT(allocator); - VMA_ASSERT(frameIndex != VMA_FRAME_INDEX_LOST); VMA_DEBUG_GLOBAL_MUTEX_LOCK allocator->SetCurrentFrameIndex(frameIndex); } -VMA_CALL_PRE void VMA_CALL_POST vmaCalculateStats( +VMA_CALL_PRE void VMA_CALL_POST vmaCalculateStatistics( VmaAllocator allocator, - VmaStats* pStats) + VmaTotalStatistics* pStats) { VMA_ASSERT(allocator && pStats); VMA_DEBUG_GLOBAL_MUTEX_LOCK - allocator->CalculateStats(pStats); + allocator->CalculateStatistics(pStats); } -VMA_CALL_PRE void VMA_CALL_POST vmaGetBudget( +VMA_CALL_PRE void VMA_CALL_POST vmaGetHeapBudgets( VmaAllocator allocator, - VmaBudget* pBudget) + VmaBudget* pBudgets) { - VMA_ASSERT(allocator && pBudget); + VMA_ASSERT(allocator && pBudgets); VMA_DEBUG_GLOBAL_MUTEX_LOCK - allocator->GetBudget(pBudget, 0, allocator->GetMemoryHeapCount()); + allocator->GetHeapBudgets(pBudgets, 0, allocator->GetMemoryHeapCount()); } #if VMA_STATS_STRING_ENABLED @@ -17872,133 +14931,183 @@ VMA_CALL_PRE void VMA_CALL_POST vmaBuildStatsString( VMA_ASSERT(allocator && ppStatsString); VMA_DEBUG_GLOBAL_MUTEX_LOCK - VmaStringBuilder sb(allocator); + VmaStringBuilder sb(allocator->GetAllocationCallbacks()); { + VmaBudget budgets[VK_MAX_MEMORY_HEAPS]; + allocator->GetHeapBudgets(budgets, 0, allocator->GetMemoryHeapCount()); + + VmaTotalStatistics stats; + allocator->CalculateStatistics(&stats); + VmaJsonWriter json(allocator->GetAllocationCallbacks(), sb); json.BeginObject(); - - VmaBudget budget[VK_MAX_MEMORY_HEAPS]; - allocator->GetBudget(budget, 0, allocator->GetMemoryHeapCount()); - - VmaStats stats; - allocator->CalculateStats(&stats); - - json.WriteString("Total"); - VmaPrintStatInfo(json, stats.total); - - for(uint32_t heapIndex = 0; heapIndex < allocator->GetMemoryHeapCount(); ++heapIndex) { - json.BeginString("Heap "); - json.ContinueString(heapIndex); - json.EndString(); - json.BeginObject(); - - json.WriteString("Size"); - json.WriteNumber(allocator->m_MemProps.memoryHeaps[heapIndex].size); - - json.WriteString("Flags"); - json.BeginArray(true); - if((allocator->m_MemProps.memoryHeaps[heapIndex].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) != 0) - { - json.WriteString("DEVICE_LOCAL"); - } - json.EndArray(); - - json.WriteString("Budget"); + json.WriteString("General"); json.BeginObject(); { - json.WriteString("BlockBytes"); - json.WriteNumber(budget[heapIndex].blockBytes); - json.WriteString("AllocationBytes"); - json.WriteNumber(budget[heapIndex].allocationBytes); - json.WriteString("Usage"); - json.WriteNumber(budget[heapIndex].usage); - json.WriteString("Budget"); - json.WriteNumber(budget[heapIndex].budget); + const VkPhysicalDeviceProperties& deviceProperties = allocator->m_PhysicalDeviceProperties; + const VkPhysicalDeviceMemoryProperties& memoryProperties = allocator->m_MemProps; + + json.WriteString("API"); + json.WriteString("Vulkan"); + + json.WriteString("apiVersion"); + json.BeginString(); + json.ContinueString(VK_VERSION_MAJOR(deviceProperties.apiVersion)); + json.ContinueString("."); + json.ContinueString(VK_VERSION_MINOR(deviceProperties.apiVersion)); + json.ContinueString("."); + json.ContinueString(VK_VERSION_PATCH(deviceProperties.apiVersion)); + json.EndString(); + + json.WriteString("GPU"); + json.WriteString(deviceProperties.deviceName); + json.WriteString("deviceType"); + json.WriteNumber(static_cast(deviceProperties.deviceType)); + + json.WriteString("maxMemoryAllocationCount"); + json.WriteNumber(deviceProperties.limits.maxMemoryAllocationCount); + json.WriteString("bufferImageGranularity"); + json.WriteNumber(deviceProperties.limits.bufferImageGranularity); + json.WriteString("nonCoherentAtomSize"); + json.WriteNumber(deviceProperties.limits.nonCoherentAtomSize); + + json.WriteString("memoryHeapCount"); + json.WriteNumber(memoryProperties.memoryHeapCount); + json.WriteString("memoryTypeCount"); + json.WriteNumber(memoryProperties.memoryTypeCount); } json.EndObject(); - - if(stats.memoryHeap[heapIndex].blockCount > 0) + } + { + json.WriteString("Total"); + VmaPrintDetailedStatistics(json, stats.total); + } + { + json.WriteString("MemoryInfo"); + json.BeginObject(); { - json.WriteString("Stats"); - VmaPrintStatInfo(json, stats.memoryHeap[heapIndex]); - } - - for(uint32_t typeIndex = 0; typeIndex < allocator->GetMemoryTypeCount(); ++typeIndex) - { - if(allocator->MemoryTypeIndexToHeapIndex(typeIndex) == heapIndex) + for (uint32_t heapIndex = 0; heapIndex < allocator->GetMemoryHeapCount(); ++heapIndex) { - json.BeginString("Type "); - json.ContinueString(typeIndex); + json.BeginString("Heap "); + json.ContinueString(heapIndex); json.EndString(); - json.BeginObject(); + { + const VkMemoryHeap& heapInfo = allocator->m_MemProps.memoryHeaps[heapIndex]; + json.WriteString("Flags"); + json.BeginArray(true); + { + if (heapInfo.flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) + json.WriteString("DEVICE_LOCAL"); + #if VMA_VULKAN_VERSION >= 1001000 + if (heapInfo.flags & VK_MEMORY_HEAP_MULTI_INSTANCE_BIT) + json.WriteString("MULTI_INSTANCE"); + #endif - json.WriteString("Flags"); - json.BeginArray(true); - VkMemoryPropertyFlags flags = allocator->m_MemProps.memoryTypes[typeIndex].propertyFlags; - if((flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0) - { - json.WriteString("DEVICE_LOCAL"); - } - if((flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0) - { - json.WriteString("HOST_VISIBLE"); - } - if((flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0) - { - json.WriteString("HOST_COHERENT"); - } - if((flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) != 0) - { - json.WriteString("HOST_CACHED"); - } - if((flags & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT) != 0) - { - json.WriteString("LAZILY_ALLOCATED"); - } - if((flags & VK_MEMORY_PROPERTY_PROTECTED_BIT) != 0) - { - json.WriteString(" PROTECTED"); - } - if((flags & VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY) != 0) - { - json.WriteString(" DEVICE_COHERENT"); - } - if((flags & VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY) != 0) - { - json.WriteString(" DEVICE_UNCACHED"); - } - json.EndArray(); + VkMemoryHeapFlags flags = heapInfo.flags & + ~(VK_MEMORY_HEAP_DEVICE_LOCAL_BIT + #if VMA_VULKAN_VERSION >= 1001000 + | VK_MEMORY_HEAP_MULTI_INSTANCE_BIT + #endif + ); + if (flags != 0) + json.WriteNumber(flags); + } + json.EndArray(); + + json.WriteString("Size"); + json.WriteNumber(heapInfo.size); + + json.WriteString("Budget"); + json.BeginObject(); + { + json.WriteString("BudgetBytes"); + json.WriteNumber(budgets[heapIndex].budget); + json.WriteString("UsageBytes"); + json.WriteNumber(budgets[heapIndex].usage); + } + json.EndObject(); - if(stats.memoryType[typeIndex].blockCount > 0) - { json.WriteString("Stats"); - VmaPrintStatInfo(json, stats.memoryType[typeIndex]); - } + VmaPrintDetailedStatistics(json, stats.memoryHeap[heapIndex]); + json.WriteString("MemoryPools"); + json.BeginObject(); + { + for (uint32_t typeIndex = 0; typeIndex < allocator->GetMemoryTypeCount(); ++typeIndex) + { + if (allocator->MemoryTypeIndexToHeapIndex(typeIndex) == heapIndex) + { + json.BeginString("Type "); + json.ContinueString(typeIndex); + json.EndString(); + json.BeginObject(); + { + json.WriteString("Flags"); + json.BeginArray(true); + { + VkMemoryPropertyFlags flags = allocator->m_MemProps.memoryTypes[typeIndex].propertyFlags; + if (flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) + json.WriteString("DEVICE_LOCAL"); + if (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + json.WriteString("HOST_VISIBLE"); + if (flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + json.WriteString("HOST_COHERENT"); + if (flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) + json.WriteString("HOST_CACHED"); + if (flags & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT) + json.WriteString("LAZILY_ALLOCATED"); + #if VMA_VULKAN_VERSION >= 1001000 + if (flags & VK_MEMORY_PROPERTY_PROTECTED_BIT) + json.WriteString("PROTECTED"); + #endif + #if VK_AMD_device_coherent_memory + if (flags & VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY) + json.WriteString("DEVICE_COHERENT_AMD"); + if (flags & VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY) + json.WriteString("DEVICE_UNCACHED_AMD"); + #endif + + flags &= ~(VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT + #if VMA_VULKAN_VERSION >= 1001000 + | VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT + #endif + #if VK_AMD_device_coherent_memory + | VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY + | VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY + #endif + | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT + | VK_MEMORY_PROPERTY_HOST_CACHED_BIT); + if (flags != 0) + json.WriteNumber(flags); + } + json.EndArray(); + + json.WriteString("Stats"); + VmaPrintDetailedStatistics(json, stats.memoryType[typeIndex]); + } + json.EndObject(); + } + } + + } + json.EndObject(); + } json.EndObject(); } } - json.EndObject(); } - if(detailedMap == VK_TRUE) - { + + if (detailedMap == VK_TRUE) allocator->PrintDetailedMap(json); - } json.EndObject(); } - const size_t len = sb.GetLength(); - char* const pChars = vma_new_array(allocator, char, len + 1); - if(len > 0) - { - memcpy(pChars, sb.GetData(), len); - } - pChars[len] = '\0'; - *ppStatsString = pChars; + *ppStatsString = VmaCreateStringCopy(allocator->GetAllocationCallbacks(), sb.GetData(), sb.GetLength()); } VMA_CALL_PRE void VMA_CALL_POST vmaFreeStatsString( @@ -18008,12 +15117,11 @@ VMA_CALL_PRE void VMA_CALL_POST vmaFreeStatsString( if(pStatsString != VMA_NULL) { VMA_ASSERT(allocator); - size_t len = strlen(pStatsString); - vma_delete_array(allocator, pStatsString, len + 1); + VmaFreeString(allocator->GetAllocationCallbacks(), pStatsString); } } -#endif // #if VMA_STATS_STRING_ENABLED +#endif // VMA_STATS_STRING_ENABLED /* This function is not protected by any mutex because it just reads immutable data. @@ -18028,91 +15136,7 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndex( VMA_ASSERT(pAllocationCreateInfo != VMA_NULL); VMA_ASSERT(pMemoryTypeIndex != VMA_NULL); - memoryTypeBits &= allocator->GetGlobalMemoryTypeBits(); - - if(pAllocationCreateInfo->memoryTypeBits != 0) - { - memoryTypeBits &= pAllocationCreateInfo->memoryTypeBits; - } - - uint32_t requiredFlags = pAllocationCreateInfo->requiredFlags; - uint32_t preferredFlags = pAllocationCreateInfo->preferredFlags; - uint32_t notPreferredFlags = 0; - - // Convert usage to requiredFlags and preferredFlags. - switch(pAllocationCreateInfo->usage) - { - case VMA_MEMORY_USAGE_UNKNOWN: - break; - case VMA_MEMORY_USAGE_GPU_ONLY: - if(!allocator->IsIntegratedGpu() || (preferredFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0) - { - preferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - } - break; - case VMA_MEMORY_USAGE_CPU_ONLY: - requiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; - break; - case VMA_MEMORY_USAGE_CPU_TO_GPU: - requiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; - if(!allocator->IsIntegratedGpu() || (preferredFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0) - { - preferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - } - break; - case VMA_MEMORY_USAGE_GPU_TO_CPU: - requiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; - preferredFlags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT; - break; - case VMA_MEMORY_USAGE_CPU_COPY: - notPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - break; - case VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED: - requiredFlags |= VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT; - break; - default: - VMA_ASSERT(0); - break; - } - - // Avoid DEVICE_COHERENT unless explicitly requested. - if(((pAllocationCreateInfo->requiredFlags | pAllocationCreateInfo->preferredFlags) & - (VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY | VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY)) == 0) - { - notPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY; - } - - *pMemoryTypeIndex = UINT32_MAX; - uint32_t minCost = UINT32_MAX; - for(uint32_t memTypeIndex = 0, memTypeBit = 1; - memTypeIndex < allocator->GetMemoryTypeCount(); - ++memTypeIndex, memTypeBit <<= 1) - { - // This memory type is acceptable according to memoryTypeBits bitmask. - if((memTypeBit & memoryTypeBits) != 0) - { - const VkMemoryPropertyFlags currFlags = - allocator->m_MemProps.memoryTypes[memTypeIndex].propertyFlags; - // This memory type contains requiredFlags. - if((requiredFlags & ~currFlags) == 0) - { - // Calculate cost as number of bits from preferredFlags not present in this memory type. - uint32_t currCost = VmaCountBitsSet(preferredFlags & ~currFlags) + - VmaCountBitsSet(currFlags & notPreferredFlags); - // Remember memory type with lowest cost. - if(currCost < minCost) - { - *pMemoryTypeIndex = memTypeIndex; - if(currCost == 0) - { - return VK_SUCCESS; - } - minCost = currCost; - } - } - } - } - return (*pMemoryTypeIndex != UINT32_MAX) ? VK_SUCCESS : VK_ERROR_FEATURE_NOT_PRESENT; + return allocator->FindMemoryTypeIndex(memoryTypeBits, pAllocationCreateInfo, VmaBufferImageUsage::UNKNOWN, pMemoryTypeIndex); } VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForBufferInfo( @@ -18127,23 +15151,42 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForBufferInfo( VMA_ASSERT(pMemoryTypeIndex != VMA_NULL); const VkDevice hDev = allocator->m_hDevice; - VkBuffer hBuffer = VK_NULL_HANDLE; - VkResult res = allocator->GetVulkanFunctions().vkCreateBuffer( - hDev, pBufferCreateInfo, allocator->GetAllocationCallbacks(), &hBuffer); - if(res == VK_SUCCESS) + const VmaVulkanFunctions* funcs = &allocator->GetVulkanFunctions(); + VkResult res; + +#if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 + if(funcs->vkGetDeviceBufferMemoryRequirements) { - VkMemoryRequirements memReq = {}; - allocator->GetVulkanFunctions().vkGetBufferMemoryRequirements( - hDev, hBuffer, &memReq); + // Can query straight from VkBufferCreateInfo :) + VkDeviceBufferMemoryRequirementsKHR devBufMemReq = {VK_STRUCTURE_TYPE_DEVICE_BUFFER_MEMORY_REQUIREMENTS_KHR}; + devBufMemReq.pCreateInfo = pBufferCreateInfo; - res = vmaFindMemoryTypeIndex( - allocator, - memReq.memoryTypeBits, - pAllocationCreateInfo, - pMemoryTypeIndex); + VkMemoryRequirements2 memReq = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2}; + (*funcs->vkGetDeviceBufferMemoryRequirements)(hDev, &devBufMemReq, &memReq); - allocator->GetVulkanFunctions().vkDestroyBuffer( - hDev, hBuffer, allocator->GetAllocationCallbacks()); + res = allocator->FindMemoryTypeIndex( + memReq.memoryRequirements.memoryTypeBits, pAllocationCreateInfo, + VmaBufferImageUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5), pMemoryTypeIndex); + } + else +#endif // VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 + { + // Must create a dummy buffer to query :( + VkBuffer hBuffer = VK_NULL_HANDLE; + res = funcs->vkCreateBuffer( + hDev, pBufferCreateInfo, allocator->GetAllocationCallbacks(), &hBuffer); + if(res == VK_SUCCESS) + { + VkMemoryRequirements memReq = {}; + funcs->vkGetBufferMemoryRequirements(hDev, hBuffer, &memReq); + + res = allocator->FindMemoryTypeIndex( + memReq.memoryTypeBits, pAllocationCreateInfo, + VmaBufferImageUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5), pMemoryTypeIndex); + + funcs->vkDestroyBuffer( + hDev, hBuffer, allocator->GetAllocationCallbacks()); + } } return res; } @@ -18160,23 +15203,44 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForImageInfo( VMA_ASSERT(pMemoryTypeIndex != VMA_NULL); const VkDevice hDev = allocator->m_hDevice; - VkImage hImage = VK_NULL_HANDLE; - VkResult res = allocator->GetVulkanFunctions().vkCreateImage( - hDev, pImageCreateInfo, allocator->GetAllocationCallbacks(), &hImage); - if(res == VK_SUCCESS) + const VmaVulkanFunctions* funcs = &allocator->GetVulkanFunctions(); + VkResult res; + +#if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 + if(funcs->vkGetDeviceImageMemoryRequirements) { - VkMemoryRequirements memReq = {}; - allocator->GetVulkanFunctions().vkGetImageMemoryRequirements( - hDev, hImage, &memReq); + // Can query straight from VkImageCreateInfo :) + VkDeviceImageMemoryRequirementsKHR devImgMemReq = {VK_STRUCTURE_TYPE_DEVICE_IMAGE_MEMORY_REQUIREMENTS_KHR}; + devImgMemReq.pCreateInfo = pImageCreateInfo; + VMA_ASSERT(pImageCreateInfo->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT_COPY && (pImageCreateInfo->flags & VK_IMAGE_CREATE_DISJOINT_BIT_COPY) == 0 && + "Cannot use this VkImageCreateInfo with vmaFindMemoryTypeIndexForImageInfo as I don't know what to pass as VkDeviceImageMemoryRequirements::planeAspect."); - res = vmaFindMemoryTypeIndex( - allocator, - memReq.memoryTypeBits, - pAllocationCreateInfo, - pMemoryTypeIndex); + VkMemoryRequirements2 memReq = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2}; + (*funcs->vkGetDeviceImageMemoryRequirements)(hDev, &devImgMemReq, &memReq); - allocator->GetVulkanFunctions().vkDestroyImage( - hDev, hImage, allocator->GetAllocationCallbacks()); + res = allocator->FindMemoryTypeIndex( + memReq.memoryRequirements.memoryTypeBits, pAllocationCreateInfo, + VmaBufferImageUsage(*pImageCreateInfo), pMemoryTypeIndex); + } + else +#endif // VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 + { + // Must create a dummy image to query :( + VkImage hImage = VK_NULL_HANDLE; + res = funcs->vkCreateImage( + hDev, pImageCreateInfo, allocator->GetAllocationCallbacks(), &hImage); + if(res == VK_SUCCESS) + { + VkMemoryRequirements memReq = {}; + funcs->vkGetImageMemoryRequirements(hDev, hImage, &memReq); + + res = allocator->FindMemoryTypeIndex( + memReq.memoryTypeBits, pAllocationCreateInfo, + VmaBufferImageUsage(*pImageCreateInfo), pMemoryTypeIndex); + + funcs->vkDestroyImage( + hDev, hImage, allocator->GetAllocationCallbacks()); + } } return res; } @@ -18192,16 +15256,7 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreatePool( VMA_DEBUG_GLOBAL_MUTEX_LOCK - VkResult res = allocator->CreatePool(pCreateInfo, pPool); - -#if VMA_RECORDING_ENABLED - if(allocator->GetRecorder() != VMA_NULL) - { - allocator->GetRecorder()->RecordCreatePool(allocator->GetCurrentFrameIndex(), *pCreateInfo, *pPool); - } -#endif - - return res; + return allocator->CreatePool(pCreateInfo, pPool); } VMA_CALL_PRE void VMA_CALL_POST vmaDestroyPool( @@ -18219,45 +15274,31 @@ VMA_CALL_PRE void VMA_CALL_POST vmaDestroyPool( VMA_DEBUG_GLOBAL_MUTEX_LOCK -#if VMA_RECORDING_ENABLED - if(allocator->GetRecorder() != VMA_NULL) - { - allocator->GetRecorder()->RecordDestroyPool(allocator->GetCurrentFrameIndex(), pool); - } -#endif - allocator->DestroyPool(pool); } -VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolStats( +VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolStatistics( VmaAllocator allocator, VmaPool pool, - VmaPoolStats* pPoolStats) + VmaStatistics* pPoolStats) { VMA_ASSERT(allocator && pool && pPoolStats); VMA_DEBUG_GLOBAL_MUTEX_LOCK - allocator->GetPoolStats(pool, pPoolStats); + allocator->GetPoolStatistics(pool, pPoolStats); } -VMA_CALL_PRE void VMA_CALL_POST vmaMakePoolAllocationsLost( +VMA_CALL_PRE void VMA_CALL_POST vmaCalculatePoolStatistics( VmaAllocator allocator, VmaPool pool, - size_t* pLostAllocationCount) + VmaDetailedStatistics* pPoolStats) { - VMA_ASSERT(allocator && pool); + VMA_ASSERT(allocator && pool && pPoolStats); VMA_DEBUG_GLOBAL_MUTEX_LOCK -#if VMA_RECORDING_ENABLED - if(allocator->GetRecorder() != VMA_NULL) - { - allocator->GetRecorder()->RecordMakePoolAllocationsLost(allocator->GetCurrentFrameIndex(), pool); - } -#endif - - allocator->MakePoolAllocationsLost(pool, pLostAllocationCount); + allocator->CalculatePoolStatistics(pool, pPoolStats); } VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckPoolCorruption(VmaAllocator allocator, VmaPool pool) @@ -18297,13 +15338,6 @@ VMA_CALL_PRE void VMA_CALL_POST vmaSetPoolName( VMA_DEBUG_GLOBAL_MUTEX_LOCK pool->SetName(pName); - -#if VMA_RECORDING_ENABLED - if(allocator->GetRecorder() != VMA_NULL) - { - allocator->GetRecorder()->RecordSetPoolName(allocator->GetCurrentFrameIndex(), pool, pName); - } -#endif } VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemory( @@ -18324,24 +15358,13 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemory( false, // requiresDedicatedAllocation false, // prefersDedicatedAllocation VK_NULL_HANDLE, // dedicatedBuffer - UINT32_MAX, // dedicatedBufferUsage VK_NULL_HANDLE, // dedicatedImage + VmaBufferImageUsage::UNKNOWN, // dedicatedBufferImageUsage *pCreateInfo, VMA_SUBALLOCATION_TYPE_UNKNOWN, 1, // allocationCount pAllocation); -#if VMA_RECORDING_ENABLED - if(allocator->GetRecorder() != VMA_NULL) - { - allocator->GetRecorder()->RecordAllocateMemory( - allocator->GetCurrentFrameIndex(), - *pVkMemoryRequirements, - *pCreateInfo, - *pAllocation); - } -#endif - if(pAllocationInfo != VMA_NULL && result == VK_SUCCESS) { allocator->GetAllocationInfo(*pAllocation, pAllocationInfo); @@ -18374,25 +15397,13 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryPages( false, // requiresDedicatedAllocation false, // prefersDedicatedAllocation VK_NULL_HANDLE, // dedicatedBuffer - UINT32_MAX, // dedicatedBufferUsage VK_NULL_HANDLE, // dedicatedImage + VmaBufferImageUsage::UNKNOWN, // dedicatedBufferImageUsage *pCreateInfo, VMA_SUBALLOCATION_TYPE_UNKNOWN, allocationCount, pAllocations); -#if VMA_RECORDING_ENABLED - if(allocator->GetRecorder() != VMA_NULL) - { - allocator->GetRecorder()->RecordAllocateMemoryPages( - allocator->GetCurrentFrameIndex(), - *pVkMemoryRequirements, - *pCreateInfo, - (uint64_t)allocationCount, - pAllocations); - } -#endif - if(pAllocationInfo != VMA_NULL && result == VK_SUCCESS) { for(size_t i = 0; i < allocationCount; ++i) @@ -18429,26 +15440,13 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForBuffer( requiresDedicatedAllocation, prefersDedicatedAllocation, buffer, // dedicatedBuffer - UINT32_MAX, // dedicatedBufferUsage VK_NULL_HANDLE, // dedicatedImage + VmaBufferImageUsage::UNKNOWN, // dedicatedBufferImageUsage *pCreateInfo, VMA_SUBALLOCATION_TYPE_BUFFER, 1, // allocationCount pAllocation); -#if VMA_RECORDING_ENABLED - if(allocator->GetRecorder() != VMA_NULL) - { - allocator->GetRecorder()->RecordAllocateMemoryForBuffer( - allocator->GetCurrentFrameIndex(), - vkMemReq, - requiresDedicatedAllocation, - prefersDedicatedAllocation, - *pCreateInfo, - *pAllocation); - } -#endif - if(pAllocationInfo && result == VK_SUCCESS) { allocator->GetAllocationInfo(*pAllocation, pAllocationInfo); @@ -18481,26 +15479,13 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForImage( requiresDedicatedAllocation, prefersDedicatedAllocation, VK_NULL_HANDLE, // dedicatedBuffer - UINT32_MAX, // dedicatedBufferUsage image, // dedicatedImage + VmaBufferImageUsage::UNKNOWN, // dedicatedBufferImageUsage *pCreateInfo, VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN, 1, // allocationCount pAllocation); -#if VMA_RECORDING_ENABLED - if(allocator->GetRecorder() != VMA_NULL) - { - allocator->GetRecorder()->RecordAllocateMemoryForImage( - allocator->GetCurrentFrameIndex(), - vkMemReq, - requiresDedicatedAllocation, - prefersDedicatedAllocation, - *pCreateInfo, - *pAllocation); - } -#endif - if(pAllocationInfo && result == VK_SUCCESS) { allocator->GetAllocationInfo(*pAllocation, pAllocationInfo); @@ -18524,15 +15509,6 @@ VMA_CALL_PRE void VMA_CALL_POST vmaFreeMemory( VMA_DEBUG_GLOBAL_MUTEX_LOCK -#if VMA_RECORDING_ENABLED - if(allocator->GetRecorder() != VMA_NULL) - { - allocator->GetRecorder()->RecordFreeMemory( - allocator->GetCurrentFrameIndex(), - allocation); - } -#endif - allocator->FreeMemory( 1, // allocationCount &allocation); @@ -18554,33 +15530,9 @@ VMA_CALL_PRE void VMA_CALL_POST vmaFreeMemoryPages( VMA_DEBUG_GLOBAL_MUTEX_LOCK -#if VMA_RECORDING_ENABLED - if(allocator->GetRecorder() != VMA_NULL) - { - allocator->GetRecorder()->RecordFreeMemoryPages( - allocator->GetCurrentFrameIndex(), - (uint64_t)allocationCount, - pAllocations); - } -#endif - allocator->FreeMemory(allocationCount, pAllocations); } -VMA_CALL_PRE VkResult VMA_CALL_POST vmaResizeAllocation( - VmaAllocator allocator, - VmaAllocation allocation, - VkDeviceSize newSize) -{ - VMA_ASSERT(allocator && allocation); - - VMA_DEBUG_LOG("vmaResizeAllocation"); - - VMA_DEBUG_GLOBAL_MUTEX_LOCK - - return allocator->ResizeAllocation(allocation, newSize); -} - VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo( VmaAllocator allocator, VmaAllocation allocation, @@ -18590,36 +15542,19 @@ VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo( VMA_DEBUG_GLOBAL_MUTEX_LOCK -#if VMA_RECORDING_ENABLED - if(allocator->GetRecorder() != VMA_NULL) - { - allocator->GetRecorder()->RecordGetAllocationInfo( - allocator->GetCurrentFrameIndex(), - allocation); - } -#endif - allocator->GetAllocationInfo(allocation, pAllocationInfo); } -VMA_CALL_PRE VkBool32 VMA_CALL_POST vmaTouchAllocation( +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo2( VmaAllocator allocator, - VmaAllocation allocation) + VmaAllocation allocation, + VmaAllocationInfo2* pAllocationInfo) { - VMA_ASSERT(allocator && allocation); + VMA_ASSERT(allocator && allocation && pAllocationInfo); VMA_DEBUG_GLOBAL_MUTEX_LOCK -#if VMA_RECORDING_ENABLED - if(allocator->GetRecorder() != VMA_NULL) - { - allocator->GetRecorder()->RecordTouchAllocation( - allocator->GetCurrentFrameIndex(), - allocation); - } -#endif - - return allocator->TouchAllocation(allocation); + allocator->GetAllocationInfo2(allocation, pAllocationInfo); } VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationUserData( @@ -18632,36 +15567,24 @@ VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationUserData( VMA_DEBUG_GLOBAL_MUTEX_LOCK allocation->SetUserData(allocator, pUserData); - -#if VMA_RECORDING_ENABLED - if(allocator->GetRecorder() != VMA_NULL) - { - allocator->GetRecorder()->RecordSetAllocationUserData( - allocator->GetCurrentFrameIndex(), - allocation, - pUserData); - } -#endif } -VMA_CALL_PRE void VMA_CALL_POST vmaCreateLostAllocation( - VmaAllocator allocator, - VmaAllocation* pAllocation) +VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationName( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + const char* VMA_NULLABLE pName) { - VMA_ASSERT(allocator && pAllocation); + allocation->SetName(allocator, pName); +} - VMA_DEBUG_GLOBAL_MUTEX_LOCK; - - allocator->CreateLostAllocation(pAllocation); - -#if VMA_RECORDING_ENABLED - if(allocator->GetRecorder() != VMA_NULL) - { - allocator->GetRecorder()->RecordCreateLostAllocation( - allocator->GetCurrentFrameIndex(), - *pAllocation); - } -#endif +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationMemoryProperties( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkMemoryPropertyFlags* VMA_NOT_NULL pFlags) +{ + VMA_ASSERT(allocator && allocation && pFlags); + const uint32_t memTypeIndex = allocation->GetMemoryTypeIndex(); + *pFlags = allocator->m_MemProps.memoryTypes[memTypeIndex].propertyFlags; } VMA_CALL_PRE VkResult VMA_CALL_POST vmaMapMemory( @@ -18673,18 +15596,7 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaMapMemory( VMA_DEBUG_GLOBAL_MUTEX_LOCK - VkResult res = allocator->Map(allocation, ppData); - -#if VMA_RECORDING_ENABLED - if(allocator->GetRecorder() != VMA_NULL) - { - allocator->GetRecorder()->RecordMapMemory( - allocator->GetCurrentFrameIndex(), - allocation); - } -#endif - - return res; + return allocator->Map(allocation, ppData); } VMA_CALL_PRE void VMA_CALL_POST vmaUnmapMemory( @@ -18695,19 +15607,14 @@ VMA_CALL_PRE void VMA_CALL_POST vmaUnmapMemory( VMA_DEBUG_GLOBAL_MUTEX_LOCK -#if VMA_RECORDING_ENABLED - if(allocator->GetRecorder() != VMA_NULL) - { - allocator->GetRecorder()->RecordUnmapMemory( - allocator->GetCurrentFrameIndex(), - allocation); - } -#endif - allocator->Unmap(allocation); } -VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocation(VmaAllocator allocator, VmaAllocation allocation, VkDeviceSize offset, VkDeviceSize size) +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocation( + VmaAllocator allocator, + VmaAllocation allocation, + VkDeviceSize offset, + VkDeviceSize size) { VMA_ASSERT(allocator && allocation); @@ -18715,21 +15622,14 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocation(VmaAllocator allocator, V VMA_DEBUG_GLOBAL_MUTEX_LOCK - const VkResult res = allocator->FlushOrInvalidateAllocation(allocation, offset, size, VMA_CACHE_FLUSH); - -#if VMA_RECORDING_ENABLED - if(allocator->GetRecorder() != VMA_NULL) - { - allocator->GetRecorder()->RecordFlushAllocation( - allocator->GetCurrentFrameIndex(), - allocation, offset, size); - } -#endif - - return res; + return allocator->FlushOrInvalidateAllocation(allocation, offset, size, VMA_CACHE_FLUSH); } -VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocation(VmaAllocator allocator, VmaAllocation allocation, VkDeviceSize offset, VkDeviceSize size) +VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocation( + VmaAllocator allocator, + VmaAllocation allocation, + VkDeviceSize offset, + VkDeviceSize size) { VMA_ASSERT(allocator && allocation); @@ -18737,18 +15637,7 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocation(VmaAllocator allocat VMA_DEBUG_GLOBAL_MUTEX_LOCK - const VkResult res = allocator->FlushOrInvalidateAllocation(allocation, offset, size, VMA_CACHE_INVALIDATE); - -#if VMA_RECORDING_ENABLED - if(allocator->GetRecorder() != VMA_NULL) - { - allocator->GetRecorder()->RecordInvalidateAllocation( - allocator->GetCurrentFrameIndex(), - allocation, offset, size); - } -#endif - - return res; + return allocator->FlushOrInvalidateAllocation(allocation, offset, size, VMA_CACHE_INVALIDATE); } VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocations( @@ -18771,16 +15660,7 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocations( VMA_DEBUG_GLOBAL_MUTEX_LOCK - const VkResult res = allocator->FlushOrInvalidateAllocations(allocationCount, allocations, offsets, sizes, VMA_CACHE_FLUSH); - -#if VMA_RECORDING_ENABLED - if(allocator->GetRecorder() != VMA_NULL) - { - //TODO - } -#endif - - return res; + return allocator->FlushOrInvalidateAllocations(allocationCount, allocations, offsets, sizes, VMA_CACHE_FLUSH); } VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocations( @@ -18803,19 +15683,54 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocations( VMA_DEBUG_GLOBAL_MUTEX_LOCK - const VkResult res = allocator->FlushOrInvalidateAllocations(allocationCount, allocations, offsets, sizes, VMA_CACHE_INVALIDATE); - -#if VMA_RECORDING_ENABLED - if(allocator->GetRecorder() != VMA_NULL) - { - //TODO - } -#endif - - return res; + return allocator->FlushOrInvalidateAllocations(allocationCount, allocations, offsets, sizes, VMA_CACHE_INVALIDATE); } -VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckCorruption(VmaAllocator allocator, uint32_t memoryTypeBits) +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCopyMemoryToAllocation( + VmaAllocator allocator, + const void* pSrcHostPointer, + VmaAllocation dstAllocation, + VkDeviceSize dstAllocationLocalOffset, + VkDeviceSize size) +{ + VMA_ASSERT(allocator && pSrcHostPointer && dstAllocation); + + if(size == 0) + { + return VK_SUCCESS; + } + + VMA_DEBUG_LOG("vmaCopyMemoryToAllocation"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->CopyMemoryToAllocation(pSrcHostPointer, dstAllocation, dstAllocationLocalOffset, size); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCopyAllocationToMemory( + VmaAllocator allocator, + VmaAllocation srcAllocation, + VkDeviceSize srcAllocationLocalOffset, + void* pDstHostPointer, + VkDeviceSize size) +{ + VMA_ASSERT(allocator && srcAllocation && pDstHostPointer); + + if(size == 0) + { + return VK_SUCCESS; + } + + VMA_DEBUG_LOG("vmaCopyAllocationToMemory"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->CopyAllocationToMemory(srcAllocation, srcAllocationLocalOffset, pDstHostPointer, size); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckCorruption( + VmaAllocator allocator, + uint32_t memoryTypeBits) { VMA_ASSERT(allocator); @@ -18826,139 +15741,70 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckCorruption(VmaAllocator allocator, u return allocator->CheckCorruption(memoryTypeBits); } -VMA_CALL_PRE VkResult VMA_CALL_POST vmaDefragment( +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentation( VmaAllocator allocator, - const VmaAllocation* pAllocations, - size_t allocationCount, - VkBool32* pAllocationsChanged, - const VmaDefragmentationInfo *pDefragmentationInfo, - VmaDefragmentationStats* pDefragmentationStats) -{ - // Deprecated interface, reimplemented using new one. - - VmaDefragmentationInfo2 info2 = {}; - info2.allocationCount = (uint32_t)allocationCount; - info2.pAllocations = pAllocations; - info2.pAllocationsChanged = pAllocationsChanged; - if(pDefragmentationInfo != VMA_NULL) - { - info2.maxCpuAllocationsToMove = pDefragmentationInfo->maxAllocationsToMove; - info2.maxCpuBytesToMove = pDefragmentationInfo->maxBytesToMove; - } - else - { - info2.maxCpuAllocationsToMove = UINT32_MAX; - info2.maxCpuBytesToMove = VK_WHOLE_SIZE; - } - // info2.flags, maxGpuAllocationsToMove, maxGpuBytesToMove, commandBuffer deliberately left zero. - - VmaDefragmentationContext ctx; - VkResult res = vmaDefragmentationBegin(allocator, &info2, pDefragmentationStats, &ctx); - if(res == VK_NOT_READY) - { - res = vmaDefragmentationEnd( allocator, ctx); - } - return res; -} - -VMA_CALL_PRE VkResult VMA_CALL_POST vmaDefragmentationBegin( - VmaAllocator allocator, - const VmaDefragmentationInfo2* pInfo, - VmaDefragmentationStats* pStats, - VmaDefragmentationContext *pContext) + const VmaDefragmentationInfo* pInfo, + VmaDefragmentationContext* pContext) { VMA_ASSERT(allocator && pInfo && pContext); - // Degenerate case: Nothing to defragment. - if(pInfo->allocationCount == 0 && pInfo->poolCount == 0) + VMA_DEBUG_LOG("vmaBeginDefragmentation"); + + if (pInfo->pool != VMA_NULL) { - return VK_SUCCESS; + // Check if run on supported algorithms + if (pInfo->pool->m_BlockVector.GetAlgorithm() & VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT) + return VK_ERROR_FEATURE_NOT_PRESENT; } - VMA_ASSERT(pInfo->allocationCount == 0 || pInfo->pAllocations != VMA_NULL); - VMA_ASSERT(pInfo->poolCount == 0 || pInfo->pPools != VMA_NULL); - VMA_HEAVY_ASSERT(VmaValidatePointerArray(pInfo->allocationCount, pInfo->pAllocations)); - VMA_HEAVY_ASSERT(VmaValidatePointerArray(pInfo->poolCount, pInfo->pPools)); - - VMA_DEBUG_LOG("vmaDefragmentationBegin"); - VMA_DEBUG_GLOBAL_MUTEX_LOCK - VkResult res = allocator->DefragmentationBegin(*pInfo, pStats, pContext); - -#if VMA_RECORDING_ENABLED - if(allocator->GetRecorder() != VMA_NULL) - { - allocator->GetRecorder()->RecordDefragmentationBegin( - allocator->GetCurrentFrameIndex(), *pInfo, *pContext); - } -#endif - - return res; + *pContext = vma_new(allocator, VmaDefragmentationContext_T)(allocator, *pInfo); + return VK_SUCCESS; } -VMA_CALL_PRE VkResult VMA_CALL_POST vmaDefragmentationEnd( +VMA_CALL_PRE void VMA_CALL_POST vmaEndDefragmentation( VmaAllocator allocator, - VmaDefragmentationContext context) + VmaDefragmentationContext context, + VmaDefragmentationStats* pStats) { - VMA_ASSERT(allocator); + VMA_ASSERT(allocator && context); - VMA_DEBUG_LOG("vmaDefragmentationEnd"); + VMA_DEBUG_LOG("vmaEndDefragmentation"); - if(context != VK_NULL_HANDLE) - { - VMA_DEBUG_GLOBAL_MUTEX_LOCK + VMA_DEBUG_GLOBAL_MUTEX_LOCK -#if VMA_RECORDING_ENABLED - if(allocator->GetRecorder() != VMA_NULL) - { - allocator->GetRecorder()->RecordDefragmentationEnd( - allocator->GetCurrentFrameIndex(), context); - } -#endif - - return allocator->DefragmentationEnd(context); - } - else - { - return VK_SUCCESS; - } + if (pStats) + context->GetStats(*pStats); + vma_delete(allocator, context); } VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentationPass( - VmaAllocator allocator, - VmaDefragmentationContext context, - VmaDefragmentationPassInfo* pInfo - ) + VmaAllocator VMA_NOT_NULL allocator, + VmaDefragmentationContext VMA_NOT_NULL context, + VmaDefragmentationPassMoveInfo* VMA_NOT_NULL pPassInfo) { - VMA_ASSERT(allocator); - VMA_ASSERT(pInfo); + VMA_ASSERT(context && pPassInfo); VMA_DEBUG_LOG("vmaBeginDefragmentationPass"); VMA_DEBUG_GLOBAL_MUTEX_LOCK - if(context == VK_NULL_HANDLE) - { - pInfo->moveCount = 0; - return VK_SUCCESS; - } - - return allocator->DefragmentationPassBegin(pInfo, context); + return context->DefragmentPassBegin(*pPassInfo); } + VMA_CALL_PRE VkResult VMA_CALL_POST vmaEndDefragmentationPass( - VmaAllocator allocator, - VmaDefragmentationContext context) + VmaAllocator VMA_NOT_NULL allocator, + VmaDefragmentationContext VMA_NOT_NULL context, + VmaDefragmentationPassMoveInfo* VMA_NOT_NULL pPassInfo) { - VMA_ASSERT(allocator); + VMA_ASSERT(context && pPassInfo); VMA_DEBUG_LOG("vmaEndDefragmentationPass"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK - if(context == VK_NULL_HANDLE) - return VK_SUCCESS; - - return allocator->DefragmentationPassEnd(context); + return context->DefragmentPassEnd(*pPassInfo); } VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory( @@ -19033,13 +15879,13 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBuffer( if(pBufferCreateInfo->size == 0) { - return VK_ERROR_VALIDATION_FAILED_EXT; + return VK_ERROR_INITIALIZATION_FAILED; } if((pBufferCreateInfo->usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_COPY) != 0 && !allocator->m_UseKhrBufferDeviceAddress) { VMA_ASSERT(0 && "Creating a buffer with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT is not valid if VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT was not used."); - return VK_ERROR_VALIDATION_FAILED_EXT; + return VK_ERROR_INITIALIZATION_FAILED; } VMA_DEBUG_LOG("vmaCreateBuffer"); @@ -19070,24 +15916,13 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBuffer( requiresDedicatedAllocation, prefersDedicatedAllocation, *pBuffer, // dedicatedBuffer - pBufferCreateInfo->usage, // dedicatedBufferUsage VK_NULL_HANDLE, // dedicatedImage + VmaBufferImageUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5), // dedicatedBufferImageUsage *pAllocationCreateInfo, VMA_SUBALLOCATION_TYPE_BUFFER, 1, // allocationCount pAllocation); -#if VMA_RECORDING_ENABLED - if(allocator->GetRecorder() != VMA_NULL) - { - allocator->GetRecorder()->RecordCreateBuffer( - allocator->GetCurrentFrameIndex(), - *pBufferCreateInfo, - *pAllocationCreateInfo, - *pAllocation); - } -#endif - if(res >= 0) { // 3. Bind buffer with memory. @@ -19099,7 +15934,7 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBuffer( { // All steps succeeded. #if VMA_STATS_STRING_ENABLED - (*pAllocation)->InitBufferImageUsage(pBufferCreateInfo->usage); + (*pAllocation)->InitBufferUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5); #endif if(pAllocationInfo != VMA_NULL) { @@ -19123,6 +15958,156 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBuffer( return res; } +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBufferWithAlignment( + VmaAllocator allocator, + const VkBufferCreateInfo* pBufferCreateInfo, + const VmaAllocationCreateInfo* pAllocationCreateInfo, + VkDeviceSize minAlignment, + VkBuffer* pBuffer, + VmaAllocation* pAllocation, + VmaAllocationInfo* pAllocationInfo) +{ + VMA_ASSERT(allocator && pBufferCreateInfo && pAllocationCreateInfo && VmaIsPow2(minAlignment) && pBuffer && pAllocation); + + if(pBufferCreateInfo->size == 0) + { + return VK_ERROR_INITIALIZATION_FAILED; + } + if((pBufferCreateInfo->usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_COPY) != 0 && + !allocator->m_UseKhrBufferDeviceAddress) + { + VMA_ASSERT(0 && "Creating a buffer with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT is not valid if VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT was not used."); + return VK_ERROR_INITIALIZATION_FAILED; + } + + VMA_DEBUG_LOG("vmaCreateBufferWithAlignment"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + *pBuffer = VK_NULL_HANDLE; + *pAllocation = VK_NULL_HANDLE; + + // 1. Create VkBuffer. + VkResult res = (*allocator->GetVulkanFunctions().vkCreateBuffer)( + allocator->m_hDevice, + pBufferCreateInfo, + allocator->GetAllocationCallbacks(), + pBuffer); + if(res >= 0) + { + // 2. vkGetBufferMemoryRequirements. + VkMemoryRequirements vkMemReq = {}; + bool requiresDedicatedAllocation = false; + bool prefersDedicatedAllocation = false; + allocator->GetBufferMemoryRequirements(*pBuffer, vkMemReq, + requiresDedicatedAllocation, prefersDedicatedAllocation); + + // 2a. Include minAlignment + vkMemReq.alignment = VMA_MAX(vkMemReq.alignment, minAlignment); + + // 3. Allocate memory using allocator. + res = allocator->AllocateMemory( + vkMemReq, + requiresDedicatedAllocation, + prefersDedicatedAllocation, + *pBuffer, // dedicatedBuffer + VK_NULL_HANDLE, // dedicatedImage + VmaBufferImageUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5), // dedicatedBufferImageUsage + *pAllocationCreateInfo, + VMA_SUBALLOCATION_TYPE_BUFFER, + 1, // allocationCount + pAllocation); + + if(res >= 0) + { + // 3. Bind buffer with memory. + if((pAllocationCreateInfo->flags & VMA_ALLOCATION_CREATE_DONT_BIND_BIT) == 0) + { + res = allocator->BindBufferMemory(*pAllocation, 0, *pBuffer, VMA_NULL); + } + if(res >= 0) + { + // All steps succeeded. + #if VMA_STATS_STRING_ENABLED + (*pAllocation)->InitBufferUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5); + #endif + if(pAllocationInfo != VMA_NULL) + { + allocator->GetAllocationInfo(*pAllocation, pAllocationInfo); + } + + return VK_SUCCESS; + } + allocator->FreeMemory( + 1, // allocationCount + pAllocation); + *pAllocation = VK_NULL_HANDLE; + (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks()); + *pBuffer = VK_NULL_HANDLE; + return res; + } + (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks()); + *pBuffer = VK_NULL_HANDLE; + return res; + } + return res; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingBuffer( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, + VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer) +{ + return vmaCreateAliasingBuffer2(allocator, allocation, 0, pBufferCreateInfo, pBuffer); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingBuffer2( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkDeviceSize allocationLocalOffset, + const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, + VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer) +{ + VMA_ASSERT(allocator && pBufferCreateInfo && pBuffer && allocation); + VMA_ASSERT(allocationLocalOffset + pBufferCreateInfo->size <= allocation->GetSize()); + + VMA_DEBUG_LOG("vmaCreateAliasingBuffer2"); + + *pBuffer = VK_NULL_HANDLE; + + if (pBufferCreateInfo->size == 0) + { + return VK_ERROR_INITIALIZATION_FAILED; + } + if ((pBufferCreateInfo->usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_COPY) != 0 && + !allocator->m_UseKhrBufferDeviceAddress) + { + VMA_ASSERT(0 && "Creating a buffer with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT is not valid if VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT was not used."); + return VK_ERROR_INITIALIZATION_FAILED; + } + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + // 1. Create VkBuffer. + VkResult res = (*allocator->GetVulkanFunctions().vkCreateBuffer)( + allocator->m_hDevice, + pBufferCreateInfo, + allocator->GetAllocationCallbacks(), + pBuffer); + if (res >= 0) + { + // 2. Bind buffer with memory. + res = allocator->BindBufferMemory(allocation, allocationLocalOffset, *pBuffer, VMA_NULL); + if (res >= 0) + { + return VK_SUCCESS; + } + (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks()); + } + return res; +} + VMA_CALL_PRE void VMA_CALL_POST vmaDestroyBuffer( VmaAllocator allocator, VkBuffer buffer, @@ -19139,15 +16124,6 @@ VMA_CALL_PRE void VMA_CALL_POST vmaDestroyBuffer( VMA_DEBUG_GLOBAL_MUTEX_LOCK -#if VMA_RECORDING_ENABLED - if(allocator->GetRecorder() != VMA_NULL) - { - allocator->GetRecorder()->RecordDestroyBuffer( - allocator->GetCurrentFrameIndex(), - allocation); - } -#endif - if(buffer != VK_NULL_HANDLE) { (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, buffer, allocator->GetAllocationCallbacks()); @@ -19177,7 +16153,7 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateImage( pImageCreateInfo->mipLevels == 0 || pImageCreateInfo->arrayLayers == 0) { - return VK_ERROR_VALIDATION_FAILED_EXT; + return VK_ERROR_INITIALIZATION_FAILED; } VMA_DEBUG_LOG("vmaCreateImage"); @@ -19211,24 +16187,13 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateImage( requiresDedicatedAllocation, prefersDedicatedAllocation, VK_NULL_HANDLE, // dedicatedBuffer - UINT32_MAX, // dedicatedBufferUsage *pImage, // dedicatedImage + VmaBufferImageUsage(*pImageCreateInfo), // dedicatedBufferImageUsage *pAllocationCreateInfo, suballocType, 1, // allocationCount pAllocation); -#if VMA_RECORDING_ENABLED - if(allocator->GetRecorder() != VMA_NULL) - { - allocator->GetRecorder()->RecordCreateImage( - allocator->GetCurrentFrameIndex(), - *pImageCreateInfo, - *pAllocationCreateInfo, - *pAllocation); - } -#endif - if(res >= 0) { // 3. Bind image with memory. @@ -19240,7 +16205,7 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateImage( { // All steps succeeded. #if VMA_STATS_STRING_ENABLED - (*pAllocation)->InitBufferImageUsage(pImageCreateInfo->usage); + (*pAllocation)->InitImageUsage(*pImageCreateInfo); #endif if(pAllocationInfo != VMA_NULL) { @@ -19264,10 +16229,62 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateImage( return res; } +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingImage( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo, + VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage) +{ + return vmaCreateAliasingImage2(allocator, allocation, 0, pImageCreateInfo, pImage); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingImage2( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkDeviceSize allocationLocalOffset, + const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo, + VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage) +{ + VMA_ASSERT(allocator && pImageCreateInfo && pImage && allocation); + + *pImage = VK_NULL_HANDLE; + + VMA_DEBUG_LOG("vmaCreateImage2"); + + if (pImageCreateInfo->extent.width == 0 || + pImageCreateInfo->extent.height == 0 || + pImageCreateInfo->extent.depth == 0 || + pImageCreateInfo->mipLevels == 0 || + pImageCreateInfo->arrayLayers == 0) + { + return VK_ERROR_INITIALIZATION_FAILED; + } + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + // 1. Create VkImage. + VkResult res = (*allocator->GetVulkanFunctions().vkCreateImage)( + allocator->m_hDevice, + pImageCreateInfo, + allocator->GetAllocationCallbacks(), + pImage); + if (res >= 0) + { + // 2. Bind image with memory. + res = allocator->BindImageMemory(allocation, allocationLocalOffset, *pImage, VMA_NULL); + if (res >= 0) + { + return VK_SUCCESS; + } + (*allocator->GetVulkanFunctions().vkDestroyImage)(allocator->m_hDevice, *pImage, allocator->GetAllocationCallbacks()); + } + return res; +} + VMA_CALL_PRE void VMA_CALL_POST vmaDestroyImage( - VmaAllocator allocator, - VkImage image, - VmaAllocation allocation) + VmaAllocator VMA_NOT_NULL allocator, + VkImage VMA_NULLABLE_NON_DISPATCHABLE image, + VmaAllocation VMA_NULLABLE allocation) { VMA_ASSERT(allocator); @@ -19280,15 +16297,6 @@ VMA_CALL_PRE void VMA_CALL_POST vmaDestroyImage( VMA_DEBUG_GLOBAL_MUTEX_LOCK -#if VMA_RECORDING_ENABLED - if(allocator->GetRecorder() != VMA_NULL) - { - allocator->GetRecorder()->RecordDestroyImage( - allocator->GetCurrentFrameIndex(), - allocation); - } -#endif - if(image != VK_NULL_HANDLE) { (*allocator->GetVulkanFunctions().vkDestroyImage)(allocator->m_hDevice, image, allocator->GetAllocationCallbacks()); @@ -19301,4 +16309,2368 @@ VMA_CALL_PRE void VMA_CALL_POST vmaDestroyImage( } } -#endif // #ifdef VMA_IMPLEMENTATION +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateVirtualBlock( + const VmaVirtualBlockCreateInfo* VMA_NOT_NULL pCreateInfo, + VmaVirtualBlock VMA_NULLABLE * VMA_NOT_NULL pVirtualBlock) +{ + VMA_ASSERT(pCreateInfo && pVirtualBlock); + VMA_ASSERT(pCreateInfo->size > 0); + VMA_DEBUG_LOG("vmaCreateVirtualBlock"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + *pVirtualBlock = vma_new(pCreateInfo->pAllocationCallbacks, VmaVirtualBlock_T)(*pCreateInfo); + VkResult res = (*pVirtualBlock)->Init(); + if(res < 0) + { + vma_delete(pCreateInfo->pAllocationCallbacks, *pVirtualBlock); + *pVirtualBlock = VK_NULL_HANDLE; + } + return res; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyVirtualBlock(VmaVirtualBlock VMA_NULLABLE virtualBlock) +{ + if(virtualBlock != VK_NULL_HANDLE) + { + VMA_DEBUG_LOG("vmaDestroyVirtualBlock"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + VkAllocationCallbacks allocationCallbacks = virtualBlock->m_AllocationCallbacks; // Have to copy the callbacks when destroying. + vma_delete(&allocationCallbacks, virtualBlock); + } +} + +VMA_CALL_PRE VkBool32 VMA_CALL_POST vmaIsVirtualBlockEmpty(VmaVirtualBlock VMA_NOT_NULL virtualBlock) +{ + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE); + VMA_DEBUG_LOG("vmaIsVirtualBlockEmpty"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + return virtualBlock->IsEmpty() ? VK_TRUE : VK_FALSE; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetVirtualAllocationInfo(VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaVirtualAllocation VMA_NOT_NULL_NON_DISPATCHABLE allocation, VmaVirtualAllocationInfo* VMA_NOT_NULL pVirtualAllocInfo) +{ + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && pVirtualAllocInfo != VMA_NULL); + VMA_DEBUG_LOG("vmaGetVirtualAllocationInfo"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + virtualBlock->GetAllocationInfo(allocation, *pVirtualAllocInfo); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaVirtualAllocate(VmaVirtualBlock VMA_NOT_NULL virtualBlock, + const VmaVirtualAllocationCreateInfo* VMA_NOT_NULL pCreateInfo, VmaVirtualAllocation VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pAllocation, + VkDeviceSize* VMA_NULLABLE pOffset) +{ + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && pCreateInfo != VMA_NULL && pAllocation != VMA_NULL); + VMA_DEBUG_LOG("vmaVirtualAllocate"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + return virtualBlock->Allocate(*pCreateInfo, *pAllocation, pOffset); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaVirtualFree(VmaVirtualBlock VMA_NOT_NULL virtualBlock, VmaVirtualAllocation VMA_NULLABLE_NON_DISPATCHABLE allocation) +{ + if(allocation != VK_NULL_HANDLE) + { + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE); + VMA_DEBUG_LOG("vmaVirtualFree"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + virtualBlock->Free(allocation); + } +} + +VMA_CALL_PRE void VMA_CALL_POST vmaClearVirtualBlock(VmaVirtualBlock VMA_NOT_NULL virtualBlock) +{ + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE); + VMA_DEBUG_LOG("vmaClearVirtualBlock"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + virtualBlock->Clear(); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaSetVirtualAllocationUserData(VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaVirtualAllocation VMA_NOT_NULL_NON_DISPATCHABLE allocation, void* VMA_NULLABLE pUserData) +{ + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE); + VMA_DEBUG_LOG("vmaSetVirtualAllocationUserData"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + virtualBlock->SetAllocationUserData(allocation, pUserData); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetVirtualBlockStatistics(VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaStatistics* VMA_NOT_NULL pStats) +{ + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && pStats != VMA_NULL); + VMA_DEBUG_LOG("vmaGetVirtualBlockStatistics"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + virtualBlock->GetStatistics(*pStats); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaCalculateVirtualBlockStatistics(VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaDetailedStatistics* VMA_NOT_NULL pStats) +{ + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && pStats != VMA_NULL); + VMA_DEBUG_LOG("vmaCalculateVirtualBlockStatistics"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + virtualBlock->CalculateDetailedStatistics(*pStats); +} + +#if VMA_STATS_STRING_ENABLED + +VMA_CALL_PRE void VMA_CALL_POST vmaBuildVirtualBlockStatsString(VmaVirtualBlock VMA_NOT_NULL virtualBlock, + char* VMA_NULLABLE * VMA_NOT_NULL ppStatsString, VkBool32 detailedMap) +{ + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && ppStatsString != VMA_NULL); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + const VkAllocationCallbacks* allocationCallbacks = virtualBlock->GetAllocationCallbacks(); + VmaStringBuilder sb(allocationCallbacks); + virtualBlock->BuildStatsString(detailedMap != VK_FALSE, sb); + *ppStatsString = VmaCreateStringCopy(allocationCallbacks, sb.GetData(), sb.GetLength()); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaFreeVirtualBlockStatsString(VmaVirtualBlock VMA_NOT_NULL virtualBlock, + char* VMA_NULLABLE pStatsString) +{ + if(pStatsString != VMA_NULL) + { + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + VmaFreeString(virtualBlock->GetAllocationCallbacks(), pStatsString); + } +} +#endif // VMA_STATS_STRING_ENABLED +#endif // _VMA_PUBLIC_INTERFACE +#endif // VMA_IMPLEMENTATION + +/** +\page quick_start Quick start + +\section quick_start_project_setup Project setup + +Vulkan Memory Allocator comes in form of a "stb-style" single header file. +While you can pull the entire repository e.g. as Git module, there is also Cmake script provided, +you don't need to build it as a separate library project. +You can add file "vk_mem_alloc.h" directly to your project and submit it to code repository next to your other source files. + +"Single header" doesn't mean that everything is contained in C/C++ declarations, +like it tends to be in case of inline functions or C++ templates. +It means that implementation is bundled with interface in a single file and needs to be extracted using preprocessor macro. +If you don't do it properly, it will result in linker errors. + +To do it properly: + +-# Include "vk_mem_alloc.h" file in each CPP file where you want to use the library. + This includes declarations of all members of the library. +-# In exactly one CPP file define following macro before this include. + It enables also internal definitions. + +\code +#define VMA_IMPLEMENTATION +#include "vk_mem_alloc.h" +\endcode + +It may be a good idea to create dedicated CPP file just for this purpose, e.g. "VmaUsage.cpp". + +This library includes header ``, which in turn +includes `` on Windows. If you need some specific macros defined +before including these headers (like `WIN32_LEAN_AND_MEAN` or +`WINVER` for Windows, `VK_USE_PLATFORM_WIN32_KHR` for Vulkan), you must define +them before every `#include` of this library. +It may be a good idea to create a dedicate header file for this purpose, e.g. "VmaUsage.h", +that will be included in other source files instead of VMA header directly. + +This library is written in C++, but has C-compatible interface. +Thus, you can include and use "vk_mem_alloc.h" in C or C++ code, but full +implementation with `VMA_IMPLEMENTATION` macro must be compiled as C++, NOT as C. +Some features of C++14 are used and required. Features of C++20 are used optionally when available. +Some headers of standard C and C++ library are used, but STL containers, RTTI, or C++ exceptions are not used. + + +\section quick_start_initialization Initialization + +VMA offers library interface in a style similar to Vulkan, with object handles like #VmaAllocation, +structures describing parameters of objects to be created like #VmaAllocationCreateInfo, +and errors codes returned from functions using `VkResult` type. + +The first and the main object that needs to be created is #VmaAllocator. +It represents the initialization of the entire library. +Only one such object should be created per `VkDevice`. +You should create it at program startup, after `VkDevice` was created, and before any device memory allocator needs to be made. +It must be destroyed before `VkDevice` is destroyed. + +At program startup: + +-# Initialize Vulkan to have `VkInstance`, `VkPhysicalDevice`, `VkDevice` object. +-# Fill VmaAllocatorCreateInfo structure and call vmaCreateAllocator() to create #VmaAllocator object. + +Only members `physicalDevice`, `device`, `instance` are required. +However, you should inform the library which Vulkan version do you use by setting +VmaAllocatorCreateInfo::vulkanApiVersion and which extensions did you enable +by setting VmaAllocatorCreateInfo::flags. +Otherwise, VMA would use only features of Vulkan 1.0 core with no extensions. +See below for details. + +\subsection quick_start_initialization_selecting_vulkan_version Selecting Vulkan version + +VMA supports Vulkan version down to 1.0, for backward compatibility. +If you want to use higher version, you need to inform the library about it. +This is a two-step process. + +Step 1: Compile time. By default, VMA compiles with code supporting the highest +Vulkan version found in the included `` that is also supported by the library. +If this is OK, you don't need to do anything. +However, if you want to compile VMA as if only some lower Vulkan version was available, +define macro `VMA_VULKAN_VERSION` before every `#include "vk_mem_alloc.h"`. +It should have decimal numeric value in form of ABBBCCC, where A = major, BBB = minor, CCC = patch Vulkan version. +For example, to compile against Vulkan 1.2: + +\code +#define VMA_VULKAN_VERSION 1002000 // Vulkan 1.2 +#include "vk_mem_alloc.h" +\endcode + +Step 2: Runtime. Even when compiled with higher Vulkan version available, +VMA can use only features of a lower version, which is configurable during creation of the #VmaAllocator object. +By default, only Vulkan 1.0 is used. +To initialize the allocator with support for higher Vulkan version, you need to set member +VmaAllocatorCreateInfo::vulkanApiVersion to an appropriate value, e.g. using constants like `VK_API_VERSION_1_2`. +See code sample below. + +\subsection quick_start_initialization_importing_vulkan_functions Importing Vulkan functions + +You may need to configure importing Vulkan functions. There are 3 ways to do this: + +-# **If you link with Vulkan static library** (e.g. "vulkan-1.lib" on Windows): + - You don't need to do anything. + - VMA will use these, as macro `VMA_STATIC_VULKAN_FUNCTIONS` is defined to 1 by default. +-# **If you want VMA to fetch pointers to Vulkan functions dynamically** using `vkGetInstanceProcAddr`, + `vkGetDeviceProcAddr` (this is the option presented in the example below): + - Define `VMA_STATIC_VULKAN_FUNCTIONS` to 0, `VMA_DYNAMIC_VULKAN_FUNCTIONS` to 1. + - Provide pointers to these two functions via VmaVulkanFunctions::vkGetInstanceProcAddr, + VmaVulkanFunctions::vkGetDeviceProcAddr. + - The library will fetch pointers to all other functions it needs internally. +-# **If you fetch pointers to all Vulkan functions in a custom way**, e.g. using some loader like + [Volk](https://github.com/zeux/volk): + - Define `VMA_STATIC_VULKAN_FUNCTIONS` and `VMA_DYNAMIC_VULKAN_FUNCTIONS` to 0. + - Pass these pointers via structure #VmaVulkanFunctions. + +\subsection quick_start_initialization_enabling_extensions Enabling extensions + +VMA can automatically use following Vulkan extensions. +If you found them available on the selected physical device and you enabled them +while creating `VkInstance` / `VkDevice` object, inform VMA about their availability +by setting appropriate flags in VmaAllocatorCreateInfo::flags. + +Vulkan extension | VMA flag +------------------------------|----------------------------------------------------- +VK_KHR_dedicated_allocation | #VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT +VK_KHR_bind_memory2 | #VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT +VK_KHR_maintenance4 | #VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT +VK_KHR_maintenance5 | #VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT +VK_EXT_memory_budget | #VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT +VK_KHR_buffer_device_address | #VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT +VK_EXT_memory_priority | #VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT +VK_AMD_device_coherent_memory | #VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT + +Example with fetching pointers to Vulkan functions dynamically: + +\code +#define VMA_STATIC_VULKAN_FUNCTIONS 0 +#define VMA_DYNAMIC_VULKAN_FUNCTIONS 1 +#include "vk_mem_alloc.h" + +... + +VmaVulkanFunctions vulkanFunctions = {}; +vulkanFunctions.vkGetInstanceProcAddr = &vkGetInstanceProcAddr; +vulkanFunctions.vkGetDeviceProcAddr = &vkGetDeviceProcAddr; + +VmaAllocatorCreateInfo allocatorCreateInfo = {}; +allocatorCreateInfo.flags = VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT; +allocatorCreateInfo.vulkanApiVersion = VK_API_VERSION_1_2; +allocatorCreateInfo.physicalDevice = physicalDevice; +allocatorCreateInfo.device = device; +allocatorCreateInfo.instance = instance; +allocatorCreateInfo.pVulkanFunctions = &vulkanFunctions; + +VmaAllocator allocator; +vmaCreateAllocator(&allocatorCreateInfo, &allocator); + +// Entire program... + +// At the end, don't forget to: +vmaDestroyAllocator(allocator); +\endcode + + +\subsection quick_start_initialization_other_config Other configuration options + +There are additional configuration options available through preprocessor macros that you can define +before including VMA header and through parameters passed in #VmaAllocatorCreateInfo. +They include a possibility to use your own callbacks for host memory allocations (`VkAllocationCallbacks`), +callbacks for device memory allocations (instead of `vkAllocateMemory`, `vkFreeMemory`), +or your custom `VMA_ASSERT` macro, among others. +For more information, see: @ref configuration. + + +\section quick_start_resource_allocation Resource allocation + +When you want to create a buffer or image: + +-# Fill `VkBufferCreateInfo` / `VkImageCreateInfo` structure. +-# Fill VmaAllocationCreateInfo structure. +-# Call vmaCreateBuffer() / vmaCreateImage() to get `VkBuffer`/`VkImage` with memory + already allocated and bound to it, plus #VmaAllocation objects that represents its underlying memory. + +\code +VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufferInfo.size = 65536; +bufferInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo allocInfo = {}; +allocInfo.usage = VMA_MEMORY_USAGE_AUTO; + +VkBuffer buffer; +VmaAllocation allocation; +vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr); +\endcode + +Don't forget to destroy your buffer and allocation objects when no longer needed: + +\code +vmaDestroyBuffer(allocator, buffer, allocation); +\endcode + +If you need to map the buffer, you must set flag +#VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT +in VmaAllocationCreateInfo::flags. +There are many additional parameters that can control the choice of memory type to be used for the allocation +and other features. +For more information, see documentation chapters: @ref choosing_memory_type, @ref memory_mapping. + + +\page choosing_memory_type Choosing memory type + +Physical devices in Vulkan support various combinations of memory heaps and +types. Help with choosing correct and optimal memory type for your specific +resource is one of the key features of this library. You can use it by filling +appropriate members of VmaAllocationCreateInfo structure, as described below. +You can also combine multiple methods. + +-# If you just want to find memory type index that meets your requirements, you + can use function: vmaFindMemoryTypeIndexForBufferInfo(), + vmaFindMemoryTypeIndexForImageInfo(), vmaFindMemoryTypeIndex(). +-# If you want to allocate a region of device memory without association with any + specific image or buffer, you can use function vmaAllocateMemory(). Usage of + this function is not recommended and usually not needed. + vmaAllocateMemoryPages() function is also provided for creating multiple allocations at once, + which may be useful for sparse binding. +-# If you already have a buffer or an image created, you want to allocate memory + for it and then you will bind it yourself, you can use function + vmaAllocateMemoryForBuffer(), vmaAllocateMemoryForImage(). + For binding you should use functions: vmaBindBufferMemory(), vmaBindImageMemory() + or their extended versions: vmaBindBufferMemory2(), vmaBindImageMemory2(). +-# If you want to create a buffer or an image, allocate memory for it, and bind + them together, all in one call, you can use function vmaCreateBuffer(), + vmaCreateImage(). + This is the easiest and recommended way to use this library! + +When using 3. or 4., the library internally queries Vulkan for memory types +supported for that buffer or image (function `vkGetBufferMemoryRequirements()`) +and uses only one of these types. + +If no memory type can be found that meets all the requirements, these functions +return `VK_ERROR_FEATURE_NOT_PRESENT`. + +You can leave VmaAllocationCreateInfo structure completely filled with zeros. +It means no requirements are specified for memory type. +It is valid, although not very useful. + +\section choosing_memory_type_usage Usage + +The easiest way to specify memory requirements is to fill member +VmaAllocationCreateInfo::usage using one of the values of enum #VmaMemoryUsage. +It defines high level, common usage types. +Since version 3 of the library, it is recommended to use #VMA_MEMORY_USAGE_AUTO to let it select best memory type for your resource automatically. + +For example, if you want to create a uniform buffer that will be filled using +transfer only once or infrequently and then used for rendering every frame as a uniform buffer, you can +do it using following code. The buffer will most likely end up in a memory type with +`VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT` to be fast to access by the GPU device. + +\code +VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufferInfo.size = 65536; +bufferInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo allocInfo = {}; +allocInfo.usage = VMA_MEMORY_USAGE_AUTO; + +VkBuffer buffer; +VmaAllocation allocation; +vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr); +\endcode + +If you have a preference for putting the resource in GPU (device) memory or CPU (host) memory +on systems with discrete graphics card that have the memories separate, you can use +#VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE or #VMA_MEMORY_USAGE_AUTO_PREFER_HOST. + +When using `VMA_MEMORY_USAGE_AUTO*` while you want to map the allocated memory, +you also need to specify one of the host access flags: +#VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT. +This will help the library decide about preferred memory type to ensure it has `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` +so you can map it. + +For example, a staging buffer that will be filled via mapped pointer and then +used as a source of transfer to the buffer described previously can be created like this. +It will likely end up in a memory type that is `HOST_VISIBLE` and `HOST_COHERENT` +but not `HOST_CACHED` (meaning uncached, write-combined) and not `DEVICE_LOCAL` (meaning system RAM). + +\code +VkBufferCreateInfo stagingBufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +stagingBufferInfo.size = 65536; +stagingBufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + +VmaAllocationCreateInfo stagingAllocInfo = {}; +stagingAllocInfo.usage = VMA_MEMORY_USAGE_AUTO; +stagingAllocInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; + +VkBuffer stagingBuffer; +VmaAllocation stagingAllocation; +vmaCreateBuffer(allocator, &stagingBufferInfo, &stagingAllocInfo, &stagingBuffer, &stagingAllocation, nullptr); +\endcode + +For more examples of creating different kinds of resources, see chapter \ref usage_patterns. +See also: @ref memory_mapping. + +Usage values `VMA_MEMORY_USAGE_AUTO*` are legal to use only when the library knows +about the resource being created by having `VkBufferCreateInfo` / `VkImageCreateInfo` passed, +so they work with functions like: vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo() etc. +If you allocate raw memory using function vmaAllocateMemory(), you have to use other means of selecting +memory type, as described below. + +\note +Old usage values (`VMA_MEMORY_USAGE_GPU_ONLY`, `VMA_MEMORY_USAGE_CPU_ONLY`, +`VMA_MEMORY_USAGE_CPU_TO_GPU`, `VMA_MEMORY_USAGE_GPU_TO_CPU`, `VMA_MEMORY_USAGE_CPU_COPY`) +are still available and work same way as in previous versions of the library +for backward compatibility, but they are deprecated. + +\section choosing_memory_type_required_preferred_flags Required and preferred flags + +You can specify more detailed requirements by filling members +VmaAllocationCreateInfo::requiredFlags and VmaAllocationCreateInfo::preferredFlags +with a combination of bits from enum `VkMemoryPropertyFlags`. For example, +if you want to create a buffer that will be persistently mapped on host (so it +must be `HOST_VISIBLE`) and preferably will also be `HOST_COHERENT` and `HOST_CACHED`, +use following code: + +\code +VmaAllocationCreateInfo allocInfo = {}; +allocInfo.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; +allocInfo.preferredFlags = VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; +allocInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT; + +VkBuffer buffer; +VmaAllocation allocation; +vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr); +\endcode + +A memory type is chosen that has all the required flags and as many preferred +flags set as possible. + +Value passed in VmaAllocationCreateInfo::usage is internally converted to a set of required and preferred flags, +plus some extra "magic" (heuristics). + +\section choosing_memory_type_explicit_memory_types Explicit memory types + +If you inspected memory types available on the physical device and you have +a preference for memory types that you want to use, you can fill member +VmaAllocationCreateInfo::memoryTypeBits. It is a bit mask, where each bit set +means that a memory type with that index is allowed to be used for the +allocation. Special value 0, just like `UINT32_MAX`, means there are no +restrictions to memory type index. + +Please note that this member is NOT just a memory type index. +Still you can use it to choose just one, specific memory type. +For example, if you already determined that your buffer should be created in +memory type 2, use following code: + +\code +uint32_t memoryTypeIndex = 2; + +VmaAllocationCreateInfo allocInfo = {}; +allocInfo.memoryTypeBits = 1u << memoryTypeIndex; + +VkBuffer buffer; +VmaAllocation allocation; +vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr); +\endcode + +You can also use this parameter to exclude some memory types. +If you inspect memory heaps and types available on the current physical device and +you determine that for some reason you don't want to use a specific memory type for the allocation, +you can enable automatic memory type selection but exclude certain memory type or types +by setting all bits of `memoryTypeBits` to 1 except the ones you choose. + +\code +// ... +uint32_t excludedMemoryTypeIndex = 2; +VmaAllocationCreateInfo allocInfo = {}; +allocInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocInfo.memoryTypeBits = ~(1u << excludedMemoryTypeIndex); +// ... +\endcode + + +\section choosing_memory_type_custom_memory_pools Custom memory pools + +If you allocate from custom memory pool, all the ways of specifying memory +requirements described above are not applicable and the aforementioned members +of VmaAllocationCreateInfo structure are ignored. Memory type is selected +explicitly when creating the pool and then used to make all the allocations from +that pool. For further details, see \ref custom_memory_pools. + +\section choosing_memory_type_dedicated_allocations Dedicated allocations + +Memory for allocations is reserved out of larger block of `VkDeviceMemory` +allocated from Vulkan internally. That is the main feature of this whole library. +You can still request a separate memory block to be created for an allocation, +just like you would do in a trivial solution without using any allocator. +In that case, a buffer or image is always bound to that memory at offset 0. +This is called a "dedicated allocation". +You can explicitly request it by using flag #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. +The library can also internally decide to use dedicated allocation in some cases, e.g.: + +- When the size of the allocation is large. +- When [VK_KHR_dedicated_allocation](@ref vk_khr_dedicated_allocation) extension is enabled + and it reports that dedicated allocation is required or recommended for the resource. +- When allocation of next big memory block fails due to not enough device memory, + but allocation with the exact requested size succeeds. + + +\page memory_mapping Memory mapping + +To "map memory" in Vulkan means to obtain a CPU pointer to `VkDeviceMemory`, +to be able to read from it or write to it in CPU code. +Mapping is possible only of memory allocated from a memory type that has +`VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` flag. +Functions `vkMapMemory()`, `vkUnmapMemory()` are designed for this purpose. +You can use them directly with memory allocated by this library, +but it is not recommended because of following issue: +Mapping the same `VkDeviceMemory` block multiple times is illegal - only one mapping at a time is allowed. +This includes mapping disjoint regions. Mapping is not reference-counted internally by Vulkan. +It is also not thread-safe. +Because of this, Vulkan Memory Allocator provides following facilities: + +\note If you want to be able to map an allocation, you need to specify one of the flags +#VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT +in VmaAllocationCreateInfo::flags. These flags are required for an allocation to be mappable +when using #VMA_MEMORY_USAGE_AUTO or other `VMA_MEMORY_USAGE_AUTO*` enum values. +For other usage values they are ignored and every such allocation made in `HOST_VISIBLE` memory type is mappable, +but these flags can still be used for consistency. + +\section memory_mapping_copy_functions Copy functions + +The easiest way to copy data from a host pointer to an allocation is to use convenience function vmaCopyMemoryToAllocation(). +It automatically maps the Vulkan memory temporarily (if not already mapped), performs `memcpy`, +and calls `vkFlushMappedMemoryRanges` (if required - if memory type is not `HOST_COHERENT`). + +It is also the safest one, because using `memcpy` avoids a risk of accidentally introducing memory reads +(e.g. by doing `pMappedVectors[i] += v`), which may be very slow on memory types that are not `HOST_CACHED`. + +\code +struct ConstantBuffer +{ + ... +}; +ConstantBuffer constantBufferData = ... + +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = sizeof(ConstantBuffer); +bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; + +VkBuffer buf; +VmaAllocation alloc; +vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, nullptr); + +vmaCopyMemoryToAllocation(allocator, &constantBufferData, alloc, 0, sizeof(ConstantBuffer)); +\endcode + +Copy in the other direction - from an allocation to a host pointer can be performed the same way using function vmaCopyAllocationToMemory(). + +\section memory_mapping_mapping_functions Mapping functions + +The library provides following functions for mapping of a specific allocation: vmaMapMemory(), vmaUnmapMemory(). +They are safer and more convenient to use than standard Vulkan functions. +You can map an allocation multiple times simultaneously - mapping is reference-counted internally. +You can also map different allocations simultaneously regardless of whether they use the same `VkDeviceMemory` block. +The way it is implemented is that the library always maps entire memory block, not just region of the allocation. +For further details, see description of vmaMapMemory() function. +Example: + +\code +// Having these objects initialized: +struct ConstantBuffer +{ + ... +}; +ConstantBuffer constantBufferData = ... + +VmaAllocator allocator = ... +VkBuffer constantBuffer = ... +VmaAllocation constantBufferAllocation = ... + +// You can map and fill your buffer using following code: + +void* mappedData; +vmaMapMemory(allocator, constantBufferAllocation, &mappedData); +memcpy(mappedData, &constantBufferData, sizeof(constantBufferData)); +vmaUnmapMemory(allocator, constantBufferAllocation); +\endcode + +When mapping, you may see a warning from Vulkan validation layer similar to this one: + +Mapping an image with layout VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL can result in undefined behavior if this memory is used by the device. Only GENERAL or PREINITIALIZED should be used. + +It happens because the library maps entire `VkDeviceMemory` block, where different +types of images and buffers may end up together, especially on GPUs with unified memory like Intel. +You can safely ignore it if you are sure you access only memory of the intended +object that you wanted to map. + + +\section memory_mapping_persistently_mapped_memory Persistently mapped memory + +Keeping your memory persistently mapped is generally OK in Vulkan. +You don't need to unmap it before using its data on the GPU. +The library provides a special feature designed for that: +Allocations made with #VMA_ALLOCATION_CREATE_MAPPED_BIT flag set in +VmaAllocationCreateInfo::flags stay mapped all the time, +so you can just access CPU pointer to it any time +without a need to call any "map" or "unmap" function. +Example: + +\code +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = sizeof(ConstantBuffer); +bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | + VMA_ALLOCATION_CREATE_MAPPED_BIT; + +VkBuffer buf; +VmaAllocation alloc; +VmaAllocationInfo allocInfo; +vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); + +// Buffer is already mapped. You can access its memory. +memcpy(allocInfo.pMappedData, &constantBufferData, sizeof(constantBufferData)); +\endcode + +\note #VMA_ALLOCATION_CREATE_MAPPED_BIT by itself doesn't guarantee that the allocation will end up +in a mappable memory type. +For this, you need to also specify #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or +#VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT. +#VMA_ALLOCATION_CREATE_MAPPED_BIT only guarantees that if the memory is `HOST_VISIBLE`, the allocation will be mapped on creation. +For an example of how to make use of this fact, see section \ref usage_patterns_advanced_data_uploading. + +\section memory_mapping_cache_control Cache flush and invalidate + +Memory in Vulkan doesn't need to be unmapped before using it on GPU, +but unless a memory types has `VK_MEMORY_PROPERTY_HOST_COHERENT_BIT` flag set, +you need to manually **invalidate** cache before reading of mapped pointer +and **flush** cache after writing to mapped pointer. +Map/unmap operations don't do that automatically. +Vulkan provides following functions for this purpose `vkFlushMappedMemoryRanges()`, +`vkInvalidateMappedMemoryRanges()`, but this library provides more convenient +functions that refer to given allocation object: vmaFlushAllocation(), +vmaInvalidateAllocation(), +or multiple objects at once: vmaFlushAllocations(), vmaInvalidateAllocations(). + +Regions of memory specified for flush/invalidate must be aligned to +`VkPhysicalDeviceLimits::nonCoherentAtomSize`. This is automatically ensured by the library. +In any memory type that is `HOST_VISIBLE` but not `HOST_COHERENT`, all allocations +within blocks are aligned to this value, so their offsets are always multiply of +`nonCoherentAtomSize` and two different allocations never share same "line" of this size. + +Also, Windows drivers from all 3 PC GPU vendors (AMD, Intel, NVIDIA) +currently provide `HOST_COHERENT` flag on all memory types that are +`HOST_VISIBLE`, so on PC you may not need to bother. + + +\page staying_within_budget Staying within budget + +When developing a graphics-intensive game or program, it is important to avoid allocating +more GPU memory than it is physically available. When the memory is over-committed, +various bad things can happen, depending on the specific GPU, graphics driver, and +operating system: + +- It may just work without any problems. +- The application may slow down because some memory blocks are moved to system RAM + and the GPU has to access them through PCI Express bus. +- A new allocation may take very long time to complete, even few seconds, and possibly + freeze entire system. +- The new allocation may fail with `VK_ERROR_OUT_OF_DEVICE_MEMORY`. +- It may even result in GPU crash (TDR), observed as `VK_ERROR_DEVICE_LOST` + returned somewhere later. + +\section staying_within_budget_querying_for_budget Querying for budget + +To query for current memory usage and available budget, use function vmaGetHeapBudgets(). +Returned structure #VmaBudget contains quantities expressed in bytes, per Vulkan memory heap. + +Please note that this function returns different information and works faster than +vmaCalculateStatistics(). vmaGetHeapBudgets() can be called every frame or even before every +allocation, while vmaCalculateStatistics() is intended to be used rarely, +only to obtain statistical information, e.g. for debugging purposes. + +It is recommended to use VK_EXT_memory_budget device extension to obtain information +about the budget from Vulkan device. VMA is able to use this extension automatically. +When not enabled, the allocator behaves same way, but then it estimates current usage +and available budget based on its internal information and Vulkan memory heap sizes, +which may be less precise. In order to use this extension: + +1. Make sure extensions VK_EXT_memory_budget and VK_KHR_get_physical_device_properties2 + required by it are available and enable them. Please note that the first is a device + extension and the second is instance extension! +2. Use flag #VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT when creating #VmaAllocator object. +3. Make sure to call vmaSetCurrentFrameIndex() every frame. Budget is queried from + Vulkan inside of it to avoid overhead of querying it with every allocation. + +\section staying_within_budget_controlling_memory_usage Controlling memory usage + +There are many ways in which you can try to stay within the budget. + +First, when making new allocation requires allocating a new memory block, the library +tries not to exceed the budget automatically. If a block with default recommended size +(e.g. 256 MB) would go over budget, a smaller block is allocated, possibly even +dedicated memory for just this resource. + +If the size of the requested resource plus current memory usage is more than the +budget, by default the library still tries to create it, leaving it to the Vulkan +implementation whether the allocation succeeds or fails. You can change this behavior +by using #VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT flag. With it, the allocation is +not made if it would exceed the budget or if the budget is already exceeded. +VMA then tries to make the allocation from the next eligible Vulkan memory type. +The all of them fail, the call then fails with `VK_ERROR_OUT_OF_DEVICE_MEMORY`. +Example usage pattern may be to pass the #VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT flag +when creating resources that are not essential for the application (e.g. the texture +of a specific object) and not to pass it when creating critically important resources +(e.g. render targets). + +On AMD graphics cards there is a custom vendor extension available: VK_AMD_memory_overallocation_behavior +that allows to control the behavior of the Vulkan implementation in out-of-memory cases - +whether it should fail with an error code or still allow the allocation. +Usage of this extension involves only passing extra structure on Vulkan device creation, +so it is out of scope of this library. + +Finally, you can also use #VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT flag to make sure +a new allocation is created only when it fits inside one of the existing memory blocks. +If it would require to allocate a new block, if fails instead with `VK_ERROR_OUT_OF_DEVICE_MEMORY`. +This also ensures that the function call is very fast because it never goes to Vulkan +to obtain a new block. + +\note Creating \ref custom_memory_pools with VmaPoolCreateInfo::minBlockCount +set to more than 0 will currently try to allocate memory blocks without checking whether they +fit within budget. + + +\page resource_aliasing Resource aliasing (overlap) + +New explicit graphics APIs (Vulkan and Direct3D 12), thanks to manual memory +management, give an opportunity to alias (overlap) multiple resources in the +same region of memory - a feature not available in the old APIs (Direct3D 11, OpenGL). +It can be useful to save video memory, but it must be used with caution. + +For example, if you know the flow of your whole render frame in advance, you +are going to use some intermediate textures or buffers only during a small range of render passes, +and you know these ranges don't overlap in time, you can bind these resources to +the same place in memory, even if they have completely different parameters (width, height, format etc.). + +![Resource aliasing (overlap)](../gfx/Aliasing.png) + +Such scenario is possible using VMA, but you need to create your images manually. +Then you need to calculate parameters of an allocation to be made using formula: + +- allocation size = max(size of each image) +- allocation alignment = max(alignment of each image) +- allocation memoryTypeBits = bitwise AND(memoryTypeBits of each image) + +Following example shows two different images bound to the same place in memory, +allocated to fit largest of them. + +\code +// A 512x512 texture to be sampled. +VkImageCreateInfo img1CreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; +img1CreateInfo.imageType = VK_IMAGE_TYPE_2D; +img1CreateInfo.extent.width = 512; +img1CreateInfo.extent.height = 512; +img1CreateInfo.extent.depth = 1; +img1CreateInfo.mipLevels = 10; +img1CreateInfo.arrayLayers = 1; +img1CreateInfo.format = VK_FORMAT_R8G8B8A8_SRGB; +img1CreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; +img1CreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; +img1CreateInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; +img1CreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; + +// A full screen texture to be used as color attachment. +VkImageCreateInfo img2CreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; +img2CreateInfo.imageType = VK_IMAGE_TYPE_2D; +img2CreateInfo.extent.width = 1920; +img2CreateInfo.extent.height = 1080; +img2CreateInfo.extent.depth = 1; +img2CreateInfo.mipLevels = 1; +img2CreateInfo.arrayLayers = 1; +img2CreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM; +img2CreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; +img2CreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; +img2CreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; +img2CreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; + +VkImage img1; +res = vkCreateImage(device, &img1CreateInfo, nullptr, &img1); +VkImage img2; +res = vkCreateImage(device, &img2CreateInfo, nullptr, &img2); + +VkMemoryRequirements img1MemReq; +vkGetImageMemoryRequirements(device, img1, &img1MemReq); +VkMemoryRequirements img2MemReq; +vkGetImageMemoryRequirements(device, img2, &img2MemReq); + +VkMemoryRequirements finalMemReq = {}; +finalMemReq.size = std::max(img1MemReq.size, img2MemReq.size); +finalMemReq.alignment = std::max(img1MemReq.alignment, img2MemReq.alignment); +finalMemReq.memoryTypeBits = img1MemReq.memoryTypeBits & img2MemReq.memoryTypeBits; +// Validate if(finalMemReq.memoryTypeBits != 0) + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + +VmaAllocation alloc; +res = vmaAllocateMemory(allocator, &finalMemReq, &allocCreateInfo, &alloc, nullptr); + +res = vmaBindImageMemory(allocator, alloc, img1); +res = vmaBindImageMemory(allocator, alloc, img2); + +// You can use img1, img2 here, but not at the same time! + +vmaFreeMemory(allocator, alloc); +vkDestroyImage(allocator, img2, nullptr); +vkDestroyImage(allocator, img1, nullptr); +\endcode + +VMA also provides convenience functions that create a buffer or image and bind it to memory +represented by an existing #VmaAllocation: +vmaCreateAliasingBuffer(), vmaCreateAliasingBuffer2(), +vmaCreateAliasingImage(), vmaCreateAliasingImage2(). +Versions with "2" offer additional parameter `allocationLocalOffset`. + +Remember that using resources that alias in memory requires proper synchronization. +You need to issue a memory barrier to make sure commands that use `img1` and `img2` +don't overlap on GPU timeline. +You also need to treat a resource after aliasing as uninitialized - containing garbage data. +For example, if you use `img1` and then want to use `img2`, you need to issue +an image memory barrier for `img2` with `oldLayout` = `VK_IMAGE_LAYOUT_UNDEFINED`. + +Additional considerations: + +- Vulkan also allows to interpret contents of memory between aliasing resources consistently in some cases. +See chapter 11.8. "Memory Aliasing" of Vulkan specification or `VK_IMAGE_CREATE_ALIAS_BIT` flag. +- You can create more complex layout where different images and buffers are bound +at different offsets inside one large allocation. For example, one can imagine +a big texture used in some render passes, aliasing with a set of many small buffers +used between in some further passes. To bind a resource at non-zero offset in an allocation, +use vmaBindBufferMemory2() / vmaBindImageMemory2(). +- Before allocating memory for the resources you want to alias, check `memoryTypeBits` +returned in memory requirements of each resource to make sure the bits overlap. +Some GPUs may expose multiple memory types suitable e.g. only for buffers or +images with `COLOR_ATTACHMENT` usage, so the sets of memory types supported by your +resources may be disjoint. Aliasing them is not possible in that case. + + +\page custom_memory_pools Custom memory pools + +A memory pool contains a number of `VkDeviceMemory` blocks. +The library automatically creates and manages default pool for each memory type available on the device. +Default memory pool automatically grows in size. +Size of allocated blocks is also variable and managed automatically. +You are using default pools whenever you leave VmaAllocationCreateInfo::pool = null. + +You can create custom pool and allocate memory out of it. +It can be useful if you want to: + +- Keep certain kind of allocations separate from others. +- Enforce particular, fixed size of Vulkan memory blocks. +- Limit maximum amount of Vulkan memory allocated for that pool. +- Reserve minimum or fixed amount of Vulkan memory always preallocated for that pool. +- Use extra parameters for a set of your allocations that are available in #VmaPoolCreateInfo but not in + #VmaAllocationCreateInfo - e.g., custom minimum alignment, custom `pNext` chain. +- Perform defragmentation on a specific subset of your allocations. + +To use custom memory pools: + +-# Fill VmaPoolCreateInfo structure. +-# Call vmaCreatePool() to obtain #VmaPool handle. +-# When making an allocation, set VmaAllocationCreateInfo::pool to this handle. + You don't need to specify any other parameters of this structure, like `usage`. + +Example: + +\code +// Find memoryTypeIndex for the pool. +VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +sampleBufCreateInfo.size = 0x10000; // Doesn't matter. +sampleBufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo sampleAllocCreateInfo = {}; +sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + +uint32_t memTypeIndex; +VkResult res = vmaFindMemoryTypeIndexForBufferInfo(allocator, + &sampleBufCreateInfo, &sampleAllocCreateInfo, &memTypeIndex); +// Check res... + +// Create a pool that can have at most 2 blocks, 128 MiB each. +VmaPoolCreateInfo poolCreateInfo = {}; +poolCreateInfo.memoryTypeIndex = memTypeIndex; +poolCreateInfo.blockSize = 128ull * 1024 * 1024; +poolCreateInfo.maxBlockCount = 2; + +VmaPool pool; +res = vmaCreatePool(allocator, &poolCreateInfo, &pool); +// Check res... + +// Allocate a buffer out of it. +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = 1024; +bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.pool = pool; + +VkBuffer buf; +VmaAllocation alloc; +res = vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, nullptr); +// Check res... +\endcode + +You have to free all allocations made from this pool before destroying it. + +\code +vmaDestroyBuffer(allocator, buf, alloc); +vmaDestroyPool(allocator, pool); +\endcode + +New versions of this library support creating dedicated allocations in custom pools. +It is supported only when VmaPoolCreateInfo::blockSize = 0. +To use this feature, set VmaAllocationCreateInfo::pool to the pointer to your custom pool and +VmaAllocationCreateInfo::flags to #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. + + +\section custom_memory_pools_MemTypeIndex Choosing memory type index + +When creating a pool, you must explicitly specify memory type index. +To find the one suitable for your buffers or images, you can use helper functions +vmaFindMemoryTypeIndexForBufferInfo(), vmaFindMemoryTypeIndexForImageInfo(). +You need to provide structures with example parameters of buffers or images +that you are going to create in that pool. + +\code +VkBufferCreateInfo exampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +exampleBufCreateInfo.size = 1024; // Doesn't matter +exampleBufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + +uint32_t memTypeIndex; +vmaFindMemoryTypeIndexForBufferInfo(allocator, &exampleBufCreateInfo, &allocCreateInfo, &memTypeIndex); + +VmaPoolCreateInfo poolCreateInfo = {}; +poolCreateInfo.memoryTypeIndex = memTypeIndex; +// ... +\endcode + +When creating buffers/images allocated in that pool, provide following parameters: + +- `VkBufferCreateInfo`: Prefer to pass same parameters as above. + Otherwise you risk creating resources in a memory type that is not suitable for them, which may result in undefined behavior. + Using different `VK_BUFFER_USAGE_` flags may work, but you shouldn't create images in a pool intended for buffers + or the other way around. +- VmaAllocationCreateInfo: You don't need to pass same parameters. Fill only `pool` member. + Other members are ignored anyway. + + +\section custom_memory_pools_when_not_use When not to use custom pools + +Custom pools are commonly overused by VMA users. +While it may feel natural to keep some logical groups of resources separate in memory, +in most cases it does more harm than good. +Using custom pool shouldn't be your first choice. +Instead, please make all allocations from default pools first and only use custom pools +if you can prove and measure that it is beneficial in some way, +e.g. it results in lower memory usage, better performance, etc. + +Using custom pools has disadvantages: + +- Each pool has its own collection of `VkDeviceMemory` blocks. + Some of them may be partially or even completely empty. + Spreading allocations across multiple pools increases the amount of wasted (allocated but unbound) memory. +- You must manually choose specific memory type to be used by a custom pool (set as VmaPoolCreateInfo::memoryTypeIndex). + When using default pools, best memory type for each of your allocations can be selected automatically + using a carefully design algorithm that works across all kinds of GPUs. +- If an allocation from a custom pool at specific memory type fails, entire allocation operation returns failure. + When using default pools, VMA tries another compatible memory type. +- If you set VmaPoolCreateInfo::blockSize != 0, each memory block has the same size, + while default pools start from small blocks and only allocate next blocks larger and larger + up to the preferred block size. + +Many of the common concerns can be addressed in a different way than using custom pools: + +- If you want to keep your allocations of certain size (small versus large) or certain lifetime (transient versus long lived) + separate, you likely don't need to. + VMA uses a high quality allocation algorithm that manages memory well in various cases. + Please measure and check if using custom pools provides a benefit. +- If you want to keep your images and buffers separate, you don't need to. + VMA respects `bufferImageGranularity` limit automatically. +- If you want to keep your mapped and not mapped allocations separate, you don't need to. + VMA respects `nonCoherentAtomSize` limit automatically. + It also maps only those `VkDeviceMemory` blocks that need to map any allocation. + It even tries to keep mappable and non-mappable allocations in separate blocks to minimize the amount of mapped memory. +- If you want to choose a custom size for the default memory block, you can set it globally instead + using VmaAllocatorCreateInfo::preferredLargeHeapBlockSize. +- If you want to select specific memory type for your allocation, + you can set VmaAllocationCreateInfo::memoryTypeBits to `(1u << myMemoryTypeIndex)` instead. +- If you need to create a buffer with certain minimum alignment, you can still do it + using default pools with dedicated function vmaCreateBufferWithAlignment(). + + +\section linear_algorithm Linear allocation algorithm + +Each Vulkan memory block managed by this library has accompanying metadata that +keeps track of used and unused regions. By default, the metadata structure and +algorithm tries to find best place for new allocations among free regions to +optimize memory usage. This way you can allocate and free objects in any order. + +![Default allocation algorithm](../gfx/Linear_allocator_1_algo_default.png) + +Sometimes there is a need to use simpler, linear allocation algorithm. You can +create custom pool that uses such algorithm by adding flag +#VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT to VmaPoolCreateInfo::flags while creating +#VmaPool object. Then an alternative metadata management is used. It always +creates new allocations after last one and doesn't reuse free regions after +allocations freed in the middle. It results in better allocation performance and +less memory consumed by metadata. + +![Linear allocation algorithm](../gfx/Linear_allocator_2_algo_linear.png) + +With this one flag, you can create a custom pool that can be used in many ways: +free-at-once, stack, double stack, and ring buffer. See below for details. +You don't need to specify explicitly which of these options you are going to use - it is detected automatically. + +\subsection linear_algorithm_free_at_once Free-at-once + +In a pool that uses linear algorithm, you still need to free all the allocations +individually, e.g. by using vmaFreeMemory() or vmaDestroyBuffer(). You can free +them in any order. New allocations are always made after last one - free space +in the middle is not reused. However, when you release all the allocation and +the pool becomes empty, allocation starts from the beginning again. This way you +can use linear algorithm to speed up creation of allocations that you are going +to release all at once. + +![Free-at-once](../gfx/Linear_allocator_3_free_at_once.png) + +This mode is also available for pools created with VmaPoolCreateInfo::maxBlockCount +value that allows multiple memory blocks. + +\subsection linear_algorithm_stack Stack + +When you free an allocation that was created last, its space can be reused. +Thanks to this, if you always release allocations in the order opposite to their +creation (LIFO - Last In First Out), you can achieve behavior of a stack. + +![Stack](../gfx/Linear_allocator_4_stack.png) + +This mode is also available for pools created with VmaPoolCreateInfo::maxBlockCount +value that allows multiple memory blocks. + +\subsection linear_algorithm_double_stack Double stack + +The space reserved by a custom pool with linear algorithm may be used by two +stacks: + +- First, default one, growing up from offset 0. +- Second, "upper" one, growing down from the end towards lower offsets. + +To make allocation from the upper stack, add flag #VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT +to VmaAllocationCreateInfo::flags. + +![Double stack](../gfx/Linear_allocator_7_double_stack.png) + +Double stack is available only in pools with one memory block - +VmaPoolCreateInfo::maxBlockCount must be 1. Otherwise behavior is undefined. + +When the two stacks' ends meet so there is not enough space between them for a +new allocation, such allocation fails with usual +`VK_ERROR_OUT_OF_DEVICE_MEMORY` error. + +\subsection linear_algorithm_ring_buffer Ring buffer + +When you free some allocations from the beginning and there is not enough free space +for a new one at the end of a pool, allocator's "cursor" wraps around to the +beginning and starts allocation there. Thanks to this, if you always release +allocations in the same order as you created them (FIFO - First In First Out), +you can achieve behavior of a ring buffer / queue. + +![Ring buffer](../gfx/Linear_allocator_5_ring_buffer.png) + +Ring buffer is available only in pools with one memory block - +VmaPoolCreateInfo::maxBlockCount must be 1. Otherwise behavior is undefined. + +\note \ref defragmentation is not supported in custom pools created with #VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT. + + +\page defragmentation Defragmentation + +Interleaved allocations and deallocations of many objects of varying size can +cause fragmentation over time, which can lead to a situation where the library is unable +to find a continuous range of free memory for a new allocation despite there is +enough free space, just scattered across many small free ranges between existing +allocations. + +To mitigate this problem, you can use defragmentation feature. +It doesn't happen automatically though and needs your cooperation, +because VMA is a low level library that only allocates memory. +It cannot recreate buffers and images in a new place as it doesn't remember the contents of `VkBufferCreateInfo` / `VkImageCreateInfo` structures. +It cannot copy their contents as it doesn't record any commands to a command buffer. + +Example: + +\code +VmaDefragmentationInfo defragInfo = {}; +defragInfo.pool = myPool; +defragInfo.flags = VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT; + +VmaDefragmentationContext defragCtx; +VkResult res = vmaBeginDefragmentation(allocator, &defragInfo, &defragCtx); +// Check res... + +for(;;) +{ + VmaDefragmentationPassMoveInfo pass; + res = vmaBeginDefragmentationPass(allocator, defragCtx, &pass); + if(res == VK_SUCCESS) + break; + else if(res != VK_INCOMPLETE) + // Handle error... + + for(uint32_t i = 0; i < pass.moveCount; ++i) + { + // Inspect pass.pMoves[i].srcAllocation, identify what buffer/image it represents. + VmaAllocationInfo allocInfo; + vmaGetAllocationInfo(allocator, pass.pMoves[i].srcAllocation, &allocInfo); + MyEngineResourceData* resData = (MyEngineResourceData*)allocInfo.pUserData; + + // Recreate and bind this buffer/image at: pass.pMoves[i].dstMemory, pass.pMoves[i].dstOffset. + VkImageCreateInfo imgCreateInfo = ... + VkImage newImg; + res = vkCreateImage(device, &imgCreateInfo, nullptr, &newImg); + // Check res... + res = vmaBindImageMemory(allocator, pass.pMoves[i].dstTmpAllocation, newImg); + // Check res... + + // Issue a vkCmdCopyBuffer/vkCmdCopyImage to copy its content to the new place. + vkCmdCopyImage(cmdBuf, resData->img, ..., newImg, ...); + } + + // Make sure the copy commands finished executing. + vkWaitForFences(...); + + // Destroy old buffers/images bound with pass.pMoves[i].srcAllocation. + for(uint32_t i = 0; i < pass.moveCount; ++i) + { + // ... + vkDestroyImage(device, resData->img, nullptr); + } + + // Update appropriate descriptors to point to the new places... + + res = vmaEndDefragmentationPass(allocator, defragCtx, &pass); + if(res == VK_SUCCESS) + break; + else if(res != VK_INCOMPLETE) + // Handle error... +} + +vmaEndDefragmentation(allocator, defragCtx, nullptr); +\endcode + +Although functions like vmaCreateBuffer(), vmaCreateImage(), vmaDestroyBuffer(), vmaDestroyImage() +create/destroy an allocation and a buffer/image at once, these are just a shortcut for +creating the resource, allocating memory, and binding them together. +Defragmentation works on memory allocations only. You must handle the rest manually. +Defragmentation is an iterative process that should repreat "passes" as long as related functions +return `VK_INCOMPLETE` not `VK_SUCCESS`. +In each pass: + +1. vmaBeginDefragmentationPass() function call: + - Calculates and returns the list of allocations to be moved in this pass. + Note this can be a time-consuming process. + - Reserves destination memory for them by creating temporary destination allocations + that you can query for their `VkDeviceMemory` + offset using vmaGetAllocationInfo(). +2. Inside the pass, **you should**: + - Inspect the returned list of allocations to be moved. + - Create new buffers/images and bind them at the returned destination temporary allocations. + - Copy data from source to destination resources if necessary. + - Destroy the source buffers/images, but NOT their allocations. +3. vmaEndDefragmentationPass() function call: + - Frees the source memory reserved for the allocations that are moved. + - Modifies source #VmaAllocation objects that are moved to point to the destination reserved memory. + - Frees `VkDeviceMemory` blocks that became empty. + +Unlike in previous iterations of the defragmentation API, there is no list of "movable" allocations passed as a parameter. +Defragmentation algorithm tries to move all suitable allocations. +You can, however, refuse to move some of them inside a defragmentation pass, by setting +`pass.pMoves[i].operation` to #VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE. +This is not recommended and may result in suboptimal packing of the allocations after defragmentation. +If you cannot ensure any allocation can be moved, it is better to keep movable allocations separate in a custom pool. + +Inside a pass, for each allocation that should be moved: + +- You should copy its data from the source to the destination place by calling e.g. `vkCmdCopyBuffer()`, `vkCmdCopyImage()`. + - You need to make sure these commands finished executing before destroying the source buffers/images and before calling vmaEndDefragmentationPass(). +- If a resource doesn't contain any meaningful data, e.g. it is a transient color attachment image to be cleared, + filled, and used temporarily in each rendering frame, you can just recreate this image + without copying its data. +- If the resource is in `HOST_VISIBLE` and `HOST_CACHED` memory, you can copy its data on the CPU + using `memcpy()`. +- If you cannot move the allocation, you can set `pass.pMoves[i].operation` to #VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE. + This will cancel the move. + - vmaEndDefragmentationPass() will then free the destination memory + not the source memory of the allocation, leaving it unchanged. +- If you decide the allocation is unimportant and can be destroyed instead of moved (e.g. it wasn't used for long time), + you can set `pass.pMoves[i].operation` to #VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY. + - vmaEndDefragmentationPass() will then free both source and destination memory, and will destroy the source #VmaAllocation object. + +You can defragment a specific custom pool by setting VmaDefragmentationInfo::pool +(like in the example above) or all the default pools by setting this member to null. + +Defragmentation is always performed in each pool separately. +Allocations are never moved between different Vulkan memory types. +The size of the destination memory reserved for a moved allocation is the same as the original one. +Alignment of an allocation as it was determined using `vkGetBufferMemoryRequirements()` etc. is also respected after defragmentation. +Buffers/images should be recreated with the same `VkBufferCreateInfo` / `VkImageCreateInfo` parameters as the original ones. + +You can perform the defragmentation incrementally to limit the number of allocations and bytes to be moved +in each pass, e.g. to call it in sync with render frames and not to experience too big hitches. +See members: VmaDefragmentationInfo::maxBytesPerPass, VmaDefragmentationInfo::maxAllocationsPerPass. + +It is also safe to perform the defragmentation asynchronously to render frames and other Vulkan and VMA +usage, possibly from multiple threads, with the exception that allocations +returned in VmaDefragmentationPassMoveInfo::pMoves shouldn't be destroyed until the defragmentation pass is ended. + +Mapping is preserved on allocations that are moved during defragmentation. +Whether through #VMA_ALLOCATION_CREATE_MAPPED_BIT or vmaMapMemory(), the allocations +are mapped at their new place. Of course, pointer to the mapped data changes, so it needs to be queried +using VmaAllocationInfo::pMappedData. + +\note Defragmentation is not supported in custom pools created with #VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT. + + +\page statistics Statistics + +This library contains several functions that return information about its internal state, +especially the amount of memory allocated from Vulkan. + +\section statistics_numeric_statistics Numeric statistics + +If you need to obtain basic statistics about memory usage per heap, together with current budget, +you can call function vmaGetHeapBudgets() and inspect structure #VmaBudget. +This is useful to keep track of memory usage and stay within budget +(see also \ref staying_within_budget). +Example: + +\code +uint32_t heapIndex = ... + +VmaBudget budgets[VK_MAX_MEMORY_HEAPS]; +vmaGetHeapBudgets(allocator, budgets); + +printf("My heap currently has %u allocations taking %llu B,\n", + budgets[heapIndex].statistics.allocationCount, + budgets[heapIndex].statistics.allocationBytes); +printf("allocated out of %u Vulkan device memory blocks taking %llu B,\n", + budgets[heapIndex].statistics.blockCount, + budgets[heapIndex].statistics.blockBytes); +printf("Vulkan reports total usage %llu B with budget %llu B.\n", + budgets[heapIndex].usage, + budgets[heapIndex].budget); +\endcode + +You can query for more detailed statistics per memory heap, type, and totals, +including minimum and maximum allocation size and unused range size, +by calling function vmaCalculateStatistics() and inspecting structure #VmaTotalStatistics. +This function is slower though, as it has to traverse all the internal data structures, +so it should be used only for debugging purposes. + +You can query for statistics of a custom pool using function vmaGetPoolStatistics() +or vmaCalculatePoolStatistics(). + +You can query for information about a specific allocation using function vmaGetAllocationInfo(). +It fill structure #VmaAllocationInfo. + +\section statistics_json_dump JSON dump + +You can dump internal state of the allocator to a string in JSON format using function vmaBuildStatsString(). +The result is guaranteed to be correct JSON. +It uses ANSI encoding. +Any strings provided by user (see [Allocation names](@ref allocation_names)) +are copied as-is and properly escaped for JSON, so if they use UTF-8, ISO-8859-2 or any other encoding, +this JSON string can be treated as using this encoding. +It must be freed using function vmaFreeStatsString(). + +The format of this JSON string is not part of official documentation of the library, +but it will not change in backward-incompatible way without increasing library major version number +and appropriate mention in changelog. + +The JSON string contains all the data that can be obtained using vmaCalculateStatistics(). +It can also contain detailed map of allocated memory blocks and their regions - +free and occupied by allocations. +This allows e.g. to visualize the memory or assess fragmentation. + + +\page allocation_annotation Allocation names and user data + +\section allocation_user_data Allocation user data + +You can annotate allocations with your own information, e.g. for debugging purposes. +To do that, fill VmaAllocationCreateInfo::pUserData field when creating +an allocation. It is an opaque `void*` pointer. You can use it e.g. as a pointer, +some handle, index, key, ordinal number or any other value that would associate +the allocation with your custom metadata. +It is useful to identify appropriate data structures in your engine given #VmaAllocation, +e.g. when doing \ref defragmentation. + +\code +VkBufferCreateInfo bufCreateInfo = ... + +MyBufferMetadata* pMetadata = CreateBufferMetadata(); + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.pUserData = pMetadata; + +VkBuffer buffer; +VmaAllocation allocation; +vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buffer, &allocation, nullptr); +\endcode + +The pointer may be later retrieved as VmaAllocationInfo::pUserData: + +\code +VmaAllocationInfo allocInfo; +vmaGetAllocationInfo(allocator, allocation, &allocInfo); +MyBufferMetadata* pMetadata = (MyBufferMetadata*)allocInfo.pUserData; +\endcode + +It can also be changed using function vmaSetAllocationUserData(). + +Values of (non-zero) allocations' `pUserData` are printed in JSON report created by +vmaBuildStatsString() in hexadecimal form. + +\section allocation_names Allocation names + +An allocation can also carry a null-terminated string, giving a name to the allocation. +To set it, call vmaSetAllocationName(). +The library creates internal copy of the string, so the pointer you pass doesn't need +to be valid for whole lifetime of the allocation. You can free it after the call. + +\code +std::string imageName = "Texture: "; +imageName += fileName; +vmaSetAllocationName(allocator, allocation, imageName.c_str()); +\endcode + +The string can be later retrieved by inspecting VmaAllocationInfo::pName. +It is also printed in JSON report created by vmaBuildStatsString(). + +\note Setting string name to VMA allocation doesn't automatically set it to the Vulkan buffer or image created with it. +You must do it manually using an extension like VK_EXT_debug_utils, which is independent of this library. + + +\page virtual_allocator Virtual allocator + +As an extra feature, the core allocation algorithm of the library is exposed through a simple and convenient API of "virtual allocator". +It doesn't allocate any real GPU memory. It just keeps track of used and free regions of a "virtual block". +You can use it to allocate your own memory or other objects, even completely unrelated to Vulkan. +A common use case is sub-allocation of pieces of one large GPU buffer. + +\section virtual_allocator_creating_virtual_block Creating virtual block + +To use this functionality, there is no main "allocator" object. +You don't need to have #VmaAllocator object created. +All you need to do is to create a separate #VmaVirtualBlock object for each block of memory you want to be managed by the allocator: + +-# Fill in #VmaVirtualBlockCreateInfo structure. +-# Call vmaCreateVirtualBlock(). Get new #VmaVirtualBlock object. + +Example: + +\code +VmaVirtualBlockCreateInfo blockCreateInfo = {}; +blockCreateInfo.size = 1048576; // 1 MB + +VmaVirtualBlock block; +VkResult res = vmaCreateVirtualBlock(&blockCreateInfo, &block); +\endcode + +\section virtual_allocator_making_virtual_allocations Making virtual allocations + +#VmaVirtualBlock object contains internal data structure that keeps track of free and occupied regions +using the same code as the main Vulkan memory allocator. +Similarly to #VmaAllocation for standard GPU allocations, there is #VmaVirtualAllocation type +that represents an opaque handle to an allocation within the virtual block. + +In order to make such allocation: + +-# Fill in #VmaVirtualAllocationCreateInfo structure. +-# Call vmaVirtualAllocate(). Get new #VmaVirtualAllocation object that represents the allocation. + You can also receive `VkDeviceSize offset` that was assigned to the allocation. + +Example: + +\code +VmaVirtualAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.size = 4096; // 4 KB + +VmaVirtualAllocation alloc; +VkDeviceSize offset; +res = vmaVirtualAllocate(block, &allocCreateInfo, &alloc, &offset); +if(res == VK_SUCCESS) +{ + // Use the 4 KB of your memory starting at offset. +} +else +{ + // Allocation failed - no space for it could be found. Handle this error! +} +\endcode + +\section virtual_allocator_deallocation Deallocation + +When no longer needed, an allocation can be freed by calling vmaVirtualFree(). +You can only pass to this function an allocation that was previously returned by vmaVirtualAllocate() +called for the same #VmaVirtualBlock. + +When whole block is no longer needed, the block object can be released by calling vmaDestroyVirtualBlock(). +All allocations must be freed before the block is destroyed, which is checked internally by an assert. +However, if you don't want to call vmaVirtualFree() for each allocation, you can use vmaClearVirtualBlock() to free them all at once - +a feature not available in normal Vulkan memory allocator. Example: + +\code +vmaVirtualFree(block, alloc); +vmaDestroyVirtualBlock(block); +\endcode + +\section virtual_allocator_allocation_parameters Allocation parameters + +You can attach a custom pointer to each allocation by using vmaSetVirtualAllocationUserData(). +Its default value is null. +It can be used to store any data that needs to be associated with that allocation - e.g. an index, a handle, or a pointer to some +larger data structure containing more information. Example: + +\code +struct CustomAllocData +{ + std::string m_AllocName; +}; +CustomAllocData* allocData = new CustomAllocData(); +allocData->m_AllocName = "My allocation 1"; +vmaSetVirtualAllocationUserData(block, alloc, allocData); +\endcode + +The pointer can later be fetched, along with allocation offset and size, by passing the allocation handle to function +vmaGetVirtualAllocationInfo() and inspecting returned structure #VmaVirtualAllocationInfo. +If you allocated a new object to be used as the custom pointer, don't forget to delete that object before freeing the allocation! +Example: + +\code +VmaVirtualAllocationInfo allocInfo; +vmaGetVirtualAllocationInfo(block, alloc, &allocInfo); +delete (CustomAllocData*)allocInfo.pUserData; + +vmaVirtualFree(block, alloc); +\endcode + +\section virtual_allocator_alignment_and_units Alignment and units + +It feels natural to express sizes and offsets in bytes. +If an offset of an allocation needs to be aligned to a multiply of some number (e.g. 4 bytes), you can fill optional member +VmaVirtualAllocationCreateInfo::alignment to request it. Example: + +\code +VmaVirtualAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.size = 4096; // 4 KB +allocCreateInfo.alignment = 4; // Returned offset must be a multiply of 4 B + +VmaVirtualAllocation alloc; +res = vmaVirtualAllocate(block, &allocCreateInfo, &alloc, nullptr); +\endcode + +Alignments of different allocations made from one block may vary. +However, if all alignments and sizes are always multiply of some size e.g. 4 B or `sizeof(MyDataStruct)`, +you can express all sizes, alignments, and offsets in multiples of that size instead of individual bytes. +It might be more convenient, but you need to make sure to use this new unit consistently in all the places: + +- VmaVirtualBlockCreateInfo::size +- VmaVirtualAllocationCreateInfo::size and VmaVirtualAllocationCreateInfo::alignment +- Using offset returned by vmaVirtualAllocate() or in VmaVirtualAllocationInfo::offset + +\section virtual_allocator_statistics Statistics + +You can obtain statistics of a virtual block using vmaGetVirtualBlockStatistics() +(to get brief statistics that are fast to calculate) +or vmaCalculateVirtualBlockStatistics() (to get more detailed statistics, slower to calculate). +The functions fill structures #VmaStatistics, #VmaDetailedStatistics respectively - same as used by the normal Vulkan memory allocator. +Example: + +\code +VmaStatistics stats; +vmaGetVirtualBlockStatistics(block, &stats); +printf("My virtual block has %llu bytes used by %u virtual allocations\n", + stats.allocationBytes, stats.allocationCount); +\endcode + +You can also request a full list of allocations and free regions as a string in JSON format by calling +vmaBuildVirtualBlockStatsString(). +Returned string must be later freed using vmaFreeVirtualBlockStatsString(). +The format of this string differs from the one returned by the main Vulkan allocator, but it is similar. + +\section virtual_allocator_additional_considerations Additional considerations + +The "virtual allocator" functionality is implemented on a level of individual memory blocks. +Keeping track of a whole collection of blocks, allocating new ones when out of free space, +deleting empty ones, and deciding which one to try first for a new allocation must be implemented by the user. + +Alternative allocation algorithms are supported, just like in custom pools of the real GPU memory. +See enum #VmaVirtualBlockCreateFlagBits to learn how to specify them (e.g. #VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT). +You can find their description in chapter \ref custom_memory_pools. +Allocation strategies are also supported. +See enum #VmaVirtualAllocationCreateFlagBits to learn how to specify them (e.g. #VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT). + +Following features are supported only by the allocator of the real GPU memory and not by virtual allocations: +buffer-image granularity, `VMA_DEBUG_MARGIN`, `VMA_MIN_ALIGNMENT`. + + +\page debugging_memory_usage Debugging incorrect memory usage + +If you suspect a bug with memory usage, like usage of uninitialized memory or +memory being overwritten out of bounds of an allocation, +you can use debug features of this library to verify this. + +\section debugging_memory_usage_initialization Memory initialization + +If you experience a bug with incorrect and nondeterministic data in your program and you suspect uninitialized memory to be used, +you can enable automatic memory initialization to verify this. +To do it, define macro `VMA_DEBUG_INITIALIZE_ALLOCATIONS` to 1. + +\code +#define VMA_DEBUG_INITIALIZE_ALLOCATIONS 1 +#include "vk_mem_alloc.h" +\endcode + +It makes memory of new allocations initialized to bit pattern `0xDCDCDCDC`. +Before an allocation is destroyed, its memory is filled with bit pattern `0xEFEFEFEF`. +Memory is automatically mapped and unmapped if necessary. + +If you find these values while debugging your program, good chances are that you incorrectly +read Vulkan memory that is allocated but not initialized, or already freed, respectively. + +Memory initialization works only with memory types that are `HOST_VISIBLE` and with allocations that can be mapped. +It works also with dedicated allocations. + +\section debugging_memory_usage_margins Margins + +By default, allocations are laid out in memory blocks next to each other if possible +(considering required alignment, `bufferImageGranularity`, and `nonCoherentAtomSize`). + +![Allocations without margin](../gfx/Margins_1.png) + +Define macro `VMA_DEBUG_MARGIN` to some non-zero value (e.g. 16) to enforce specified +number of bytes as a margin after every allocation. + +\code +#define VMA_DEBUG_MARGIN 16 +#include "vk_mem_alloc.h" +\endcode + +![Allocations with margin](../gfx/Margins_2.png) + +If your bug goes away after enabling margins, it means it may be caused by memory +being overwritten outside of allocation boundaries. It is not 100% certain though. +Change in application behavior may also be caused by different order and distribution +of allocations across memory blocks after margins are applied. + +Margins work with all types of memory. + +Margin is applied only to allocations made out of memory blocks and not to dedicated +allocations, which have their own memory block of specific size. +It is thus not applied to allocations made using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT flag +or those automatically decided to put into dedicated allocations, e.g. due to its +large size or recommended by VK_KHR_dedicated_allocation extension. + +Margins appear in [JSON dump](@ref statistics_json_dump) as part of free space. + +Note that enabling margins increases memory usage and fragmentation. + +Margins do not apply to \ref virtual_allocator. + +\section debugging_memory_usage_corruption_detection Corruption detection + +You can additionally define macro `VMA_DEBUG_DETECT_CORRUPTION` to 1 to enable validation +of contents of the margins. + +\code +#define VMA_DEBUG_MARGIN 16 +#define VMA_DEBUG_DETECT_CORRUPTION 1 +#include "vk_mem_alloc.h" +\endcode + +When this feature is enabled, number of bytes specified as `VMA_DEBUG_MARGIN` +(it must be multiply of 4) after every allocation is filled with a magic number. +This idea is also know as "canary". +Memory is automatically mapped and unmapped if necessary. + +This number is validated automatically when the allocation is destroyed. +If it is not equal to the expected value, `VMA_ASSERT()` is executed. +It clearly means that either CPU or GPU overwritten the memory outside of boundaries of the allocation, +which indicates a serious bug. + +You can also explicitly request checking margins of all allocations in all memory blocks +that belong to specified memory types by using function vmaCheckCorruption(), +or in memory blocks that belong to specified custom pool, by using function +vmaCheckPoolCorruption(). + +Margin validation (corruption detection) works only for memory types that are +`HOST_VISIBLE` and `HOST_COHERENT`. + + +\section debugging_memory_usage_leak_detection Leak detection features + +At allocation and allocator destruction time VMA checks for unfreed and unmapped blocks using +`VMA_ASSERT_LEAK()`. This macro defaults to an assertion, triggering a typically fatal error in Debug +builds, and doing nothing in Release builds. You can provide your own definition of `VMA_ASSERT_LEAK()` +to change this behavior. + +At memory block destruction time VMA lists out all unfreed allocations using the `VMA_LEAK_LOG_FORMAT()` +macro, which defaults to `VMA_DEBUG_LOG_FORMAT`, which in turn defaults to a no-op. +If you're having trouble with leaks - for example, the aforementioned assertion triggers, but you don't +quite know \em why -, overriding this macro to print out the the leaking blocks, combined with assigning +individual names to allocations using vmaSetAllocationName(), can greatly aid in fixing them. + +\page other_api_interop Interop with other graphics APIs + +VMA provides some features that help with interoperability with other graphics APIs, e.g. OpenGL. + +\section opengl_interop_exporting_memory Exporting memory + +If you want to attach `VkExportMemoryAllocateInfoKHR` or other structure to `pNext` chain of memory allocations made by the library: + +You can create \ref custom_memory_pools for such allocations. +Define and fill in your `VkExportMemoryAllocateInfoKHR` structure and attach it to VmaPoolCreateInfo::pMemoryAllocateNext +while creating the custom pool. +Please note that the structure must remain alive and unchanged for the whole lifetime of the #VmaPool, +not only while creating it, as no copy of the structure is made, +but its original pointer is used for each allocation instead. + +If you want to export all memory allocated by VMA from certain memory types, +also dedicated allocations or other allocations made from default pools, +an alternative solution is to fill in VmaAllocatorCreateInfo::pTypeExternalMemoryHandleTypes. +It should point to an array with `VkExternalMemoryHandleTypeFlagsKHR` to be automatically passed by the library +through `VkExportMemoryAllocateInfoKHR` on each allocation made from a specific memory type. +Please note that new versions of the library also support dedicated allocations created in custom pools. + +You should not mix these two methods in a way that allows to apply both to the same memory type. +Otherwise, `VkExportMemoryAllocateInfoKHR` structure would be attached twice to the `pNext` chain of `VkMemoryAllocateInfo`. + + +\section opengl_interop_custom_alignment Custom alignment + +Buffers or images exported to a different API like OpenGL may require a different alignment, +higher than the one used by the library automatically, queried from functions like `vkGetBufferMemoryRequirements`. +To impose such alignment: + +You can create \ref custom_memory_pools for such allocations. +Set VmaPoolCreateInfo::minAllocationAlignment member to the minimum alignment required for each allocation +to be made out of this pool. +The alignment actually used will be the maximum of this member and the alignment returned for the specific buffer or image +from a function like `vkGetBufferMemoryRequirements`, which is called by VMA automatically. + +If you want to create a buffer with a specific minimum alignment out of default pools, +use special function vmaCreateBufferWithAlignment(), which takes additional parameter `minAlignment`. + +Note the problem of alignment affects only resources placed inside bigger `VkDeviceMemory` blocks and not dedicated +allocations, as these, by definition, always have alignment = 0 because the resource is bound to the beginning of its dedicated block. +You can ensure that an allocation is created as dedicated by using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. +Contrary to Direct3D 12, Vulkan doesn't have a concept of alignment of the entire memory block passed on its allocation. + +\section opengl_interop_extended_allocation_information Extended allocation information + +If you want to rely on VMA to allocate your buffers and images inside larger memory blocks, +but you need to know the size of the entire block and whether the allocation was made +with its own dedicated memory, use function vmaGetAllocationInfo2() to retrieve +extended allocation information in structure #VmaAllocationInfo2. + + + +\page usage_patterns Recommended usage patterns + +Vulkan gives great flexibility in memory allocation. +This chapter shows the most common patterns. + +See also slides from talk: +[Sawicki, Adam. Advanced Graphics Techniques Tutorial: Memory management in Vulkan and DX12. Game Developers Conference, 2018](https://www.gdcvault.com/play/1025458/Advanced-Graphics-Techniques-Tutorial-New) + + +\section usage_patterns_gpu_only GPU-only resource + +When: +Any resources that you frequently write and read on GPU, +e.g. images used as color attachments (aka "render targets"), depth-stencil attachments, +images/buffers used as storage image/buffer (aka "Unordered Access View (UAV)"). + +What to do: +Let the library select the optimal memory type, which will likely have `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`. + +\code +VkImageCreateInfo imgCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; +imgCreateInfo.imageType = VK_IMAGE_TYPE_2D; +imgCreateInfo.extent.width = 3840; +imgCreateInfo.extent.height = 2160; +imgCreateInfo.extent.depth = 1; +imgCreateInfo.mipLevels = 1; +imgCreateInfo.arrayLayers = 1; +imgCreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM; +imgCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; +imgCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; +imgCreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; +imgCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; +allocCreateInfo.priority = 1.0f; + +VkImage img; +VmaAllocation alloc; +vmaCreateImage(allocator, &imgCreateInfo, &allocCreateInfo, &img, &alloc, nullptr); +\endcode + +Also consider: +Consider creating them as dedicated allocations using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT, +especially if they are large or if you plan to destroy and recreate them with different sizes +e.g. when display resolution changes. +Prefer to create such resources first and all other GPU resources (like textures and vertex buffers) later. +When VK_EXT_memory_priority extension is enabled, it is also worth setting high priority to such allocation +to decrease chances to be evicted to system memory by the operating system. + +\section usage_patterns_staging_copy_upload Staging copy for upload + +When: +A "staging" buffer than you want to map and fill from CPU code, then use as a source of transfer +to some GPU resource. + +What to do: +Use flag #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT. +Let the library select the optimal memory type, which will always have `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT`. + +\code +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = 65536; +bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | + VMA_ALLOCATION_CREATE_MAPPED_BIT; + +VkBuffer buf; +VmaAllocation alloc; +VmaAllocationInfo allocInfo; +vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); + +... + +memcpy(allocInfo.pMappedData, myData, myDataSize); +\endcode + +Also consider: +You can map the allocation using vmaMapMemory() or you can create it as persistenly mapped +using #VMA_ALLOCATION_CREATE_MAPPED_BIT, as in the example above. + + +\section usage_patterns_readback Readback + +When: +Buffers for data written by or transferred from the GPU that you want to read back on the CPU, +e.g. results of some computations. + +What to do: +Use flag #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT. +Let the library select the optimal memory type, which will always have `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` +and `VK_MEMORY_PROPERTY_HOST_CACHED_BIT`. + +\code +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = 65536; +bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT | + VMA_ALLOCATION_CREATE_MAPPED_BIT; + +VkBuffer buf; +VmaAllocation alloc; +VmaAllocationInfo allocInfo; +vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); + +... + +const float* downloadedData = (const float*)allocInfo.pMappedData; +\endcode + + +\section usage_patterns_advanced_data_uploading Advanced data uploading + +For resources that you frequently write on CPU via mapped pointer and +frequently read on GPU e.g. as a uniform buffer (also called "dynamic"), multiple options are possible: + +-# Easiest solution is to have one copy of the resource in `HOST_VISIBLE` memory, + even if it means system RAM (not `DEVICE_LOCAL`) on systems with a discrete graphics card, + and make the device reach out to that resource directly. + - Reads performed by the device will then go through PCI Express bus. + The performance of this access may be limited, but it may be fine depending on the size + of this resource (whether it is small enough to quickly end up in GPU cache) and the sparsity + of access. +-# On systems with unified memory (e.g. AMD APU or Intel integrated graphics, mobile chips), + a memory type may be available that is both `HOST_VISIBLE` (available for mapping) and `DEVICE_LOCAL` + (fast to access from the GPU). Then, it is likely the best choice for such type of resource. +-# Systems with a discrete graphics card and separate video memory may or may not expose + a memory type that is both `HOST_VISIBLE` and `DEVICE_LOCAL`, also known as Base Address Register (BAR). + If they do, it represents a piece of VRAM (or entire VRAM, if ReBAR is enabled in the motherboard BIOS) + that is available to CPU for mapping. + - Writes performed by the host to that memory go through PCI Express bus. + The performance of these writes may be limited, but it may be fine, especially on PCIe 4.0, + as long as rules of using uncached and write-combined memory are followed - only sequential writes and no reads. +-# Finally, you may need or prefer to create a separate copy of the resource in `DEVICE_LOCAL` memory, + a separate "staging" copy in `HOST_VISIBLE` memory and perform an explicit transfer command between them. + +Thankfully, VMA offers an aid to create and use such resources in the the way optimal +for the current Vulkan device. To help the library make the best choice, +use flag #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT together with +#VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT. +It will then prefer a memory type that is both `DEVICE_LOCAL` and `HOST_VISIBLE` (integrated memory or BAR), +but if no such memory type is available or allocation from it fails +(PC graphics cards have only 256 MB of BAR by default, unless ReBAR is supported and enabled in BIOS), +it will fall back to `DEVICE_LOCAL` memory for fast GPU access. +It is then up to you to detect that the allocation ended up in a memory type that is not `HOST_VISIBLE`, +so you need to create another "staging" allocation and perform explicit transfers. + +\code +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = 65536; +bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | + VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT | + VMA_ALLOCATION_CREATE_MAPPED_BIT; + +VkBuffer buf; +VmaAllocation alloc; +VmaAllocationInfo allocInfo; +vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); + +VkMemoryPropertyFlags memPropFlags; +vmaGetAllocationMemoryProperties(allocator, alloc, &memPropFlags); + +if(memPropFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) +{ + // Allocation ended up in a mappable memory and is already mapped - write to it directly. + + // [Executed in runtime]: + memcpy(allocInfo.pMappedData, myData, myDataSize); +} +else +{ + // Allocation ended up in a non-mappable memory - need to transfer. + VkBufferCreateInfo stagingBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; + stagingBufCreateInfo.size = 65536; + stagingBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + + VmaAllocationCreateInfo stagingAllocCreateInfo = {}; + stagingAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + stagingAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | + VMA_ALLOCATION_CREATE_MAPPED_BIT; + + VkBuffer stagingBuf; + VmaAllocation stagingAlloc; + VmaAllocationInfo stagingAllocInfo; + vmaCreateBuffer(allocator, &stagingBufCreateInfo, &stagingAllocCreateInfo, + &stagingBuf, &stagingAlloc, stagingAllocInfo); + + // [Executed in runtime]: + memcpy(stagingAllocInfo.pMappedData, myData, myDataSize); + vmaFlushAllocation(allocator, stagingAlloc, 0, VK_WHOLE_SIZE); + //vkCmdPipelineBarrier: VK_ACCESS_HOST_WRITE_BIT --> VK_ACCESS_TRANSFER_READ_BIT + VkBufferCopy bufCopy = { + 0, // srcOffset + 0, // dstOffset, + myDataSize); // size + vkCmdCopyBuffer(cmdBuf, stagingBuf, buf, 1, &bufCopy); +} +\endcode + +\section usage_patterns_other_use_cases Other use cases + +Here are some other, less obvious use cases and their recommended settings: + +- An image that is used only as transfer source and destination, but it should stay on the device, + as it is used to temporarily store a copy of some texture, e.g. from the current to the next frame, + for temporal antialiasing or other temporal effects. + - Use `VkImageCreateInfo::usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT` + - Use VmaAllocationCreateInfo::usage = #VMA_MEMORY_USAGE_AUTO +- An image that is used only as transfer source and destination, but it should be placed + in the system RAM despite it doesn't need to be mapped, because it serves as a "swap" copy to evict + least recently used textures from VRAM. + - Use `VkImageCreateInfo::usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT` + - Use VmaAllocationCreateInfo::usage = #VMA_MEMORY_USAGE_AUTO_PREFER_HOST, + as VMA needs a hint here to differentiate from the previous case. +- A buffer that you want to map and write from the CPU, directly read from the GPU + (e.g. as a uniform or vertex buffer), but you have a clear preference to place it in device or + host memory due to its large size. + - Use `VkBufferCreateInfo::usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT` + - Use VmaAllocationCreateInfo::usage = #VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE or #VMA_MEMORY_USAGE_AUTO_PREFER_HOST + - Use VmaAllocationCreateInfo::flags = #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT + + +\page configuration Configuration + +Please check "CONFIGURATION SECTION" in the code to find macros that you can define +before each include of this file or change directly in this file to provide +your own implementation of basic facilities like assert, `min()` and `max()` functions, +mutex, atomic etc. +The library uses its own implementation of containers by default, but you can switch to using +STL containers instead. + +For example, define `VMA_ASSERT(expr)` before including the library to provide +custom implementation of the assertion, compatible with your project. +By default it is defined to standard C `assert(expr)` in `_DEBUG` configuration +and empty otherwise. + +\section config_Vulkan_functions Pointers to Vulkan functions + +There are multiple ways to import pointers to Vulkan functions in the library. +In the simplest case you don't need to do anything. +If the compilation or linking of your program or the initialization of the #VmaAllocator +doesn't work for you, you can try to reconfigure it. + +First, the allocator tries to fetch pointers to Vulkan functions linked statically, +like this: + +\code +m_VulkanFunctions.vkAllocateMemory = (PFN_vkAllocateMemory)vkAllocateMemory; +\endcode + +If you want to disable this feature, set configuration macro: `#define VMA_STATIC_VULKAN_FUNCTIONS 0`. + +Second, you can provide the pointers yourself by setting member VmaAllocatorCreateInfo::pVulkanFunctions. +You can fetch them e.g. using functions `vkGetInstanceProcAddr` and `vkGetDeviceProcAddr` or +by using a helper library like [volk](https://github.com/zeux/volk). + +Third, VMA tries to fetch remaining pointers that are still null by calling +`vkGetInstanceProcAddr` and `vkGetDeviceProcAddr` on its own. +You need to only fill in VmaVulkanFunctions::vkGetInstanceProcAddr and VmaVulkanFunctions::vkGetDeviceProcAddr. +Other pointers will be fetched automatically. +If you want to disable this feature, set configuration macro: `#define VMA_DYNAMIC_VULKAN_FUNCTIONS 0`. + +Finally, all the function pointers required by the library (considering selected +Vulkan version and enabled extensions) are checked with `VMA_ASSERT` if they are not null. + + +\section custom_memory_allocator Custom host memory allocator + +If you use custom allocator for CPU memory rather than default operator `new` +and `delete` from C++, you can make this library using your allocator as well +by filling optional member VmaAllocatorCreateInfo::pAllocationCallbacks. These +functions will be passed to Vulkan, as well as used by the library itself to +make any CPU-side allocations. + +\section allocation_callbacks Device memory allocation callbacks + +The library makes calls to `vkAllocateMemory()` and `vkFreeMemory()` internally. +You can setup callbacks to be informed about these calls, e.g. for the purpose +of gathering some statistics. To do it, fill optional member +VmaAllocatorCreateInfo::pDeviceMemoryCallbacks. + +\section heap_memory_limit Device heap memory limit + +When device memory of certain heap runs out of free space, new allocations may +fail (returning error code) or they may succeed, silently pushing some existing_ +memory blocks from GPU VRAM to system RAM (which degrades performance). This +behavior is implementation-dependent - it depends on GPU vendor and graphics +driver. + +On AMD cards it can be controlled while creating Vulkan device object by using +VK_AMD_memory_overallocation_behavior extension, if available. + +Alternatively, if you want to test how your program behaves with limited amount of Vulkan device +memory available without switching your graphics card to one that really has +smaller VRAM, you can use a feature of this library intended for this purpose. +To do it, fill optional member VmaAllocatorCreateInfo::pHeapSizeLimit. + + + +\page vk_khr_dedicated_allocation VK_KHR_dedicated_allocation + +VK_KHR_dedicated_allocation is a Vulkan extension which can be used to improve +performance on some GPUs. It augments Vulkan API with possibility to query +driver whether it prefers particular buffer or image to have its own, dedicated +allocation (separate `VkDeviceMemory` block) for better efficiency - to be able +to do some internal optimizations. The extension is supported by this library. +It will be used automatically when enabled. + +It has been promoted to core Vulkan 1.1, so if you use eligible Vulkan version +and inform VMA about it by setting VmaAllocatorCreateInfo::vulkanApiVersion, +you are all set. + +Otherwise, if you want to use it as an extension: + +1 . When creating Vulkan device, check if following 2 device extensions are +supported (call `vkEnumerateDeviceExtensionProperties()`). +If yes, enable them (fill `VkDeviceCreateInfo::ppEnabledExtensionNames`). + +- VK_KHR_get_memory_requirements2 +- VK_KHR_dedicated_allocation + +If you enabled these extensions: + +2 . Use #VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT flag when creating +your #VmaAllocator to inform the library that you enabled required extensions +and you want the library to use them. + +\code +allocatorInfo.flags |= VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT; + +vmaCreateAllocator(&allocatorInfo, &allocator); +\endcode + +That is all. The extension will be automatically used whenever you create a +buffer using vmaCreateBuffer() or image using vmaCreateImage(). + +When using the extension together with Vulkan Validation Layer, you will receive +warnings like this: + +_vkBindBufferMemory(): Binding memory to buffer 0x33 but vkGetBufferMemoryRequirements() has not been called on that buffer._ + +It is OK, you should just ignore it. It happens because you use function +`vkGetBufferMemoryRequirements2KHR()` instead of standard +`vkGetBufferMemoryRequirements()`, while the validation layer seems to be +unaware of it. + +To learn more about this extension, see: + +- [VK_KHR_dedicated_allocation in Vulkan specification](https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap50.html#VK_KHR_dedicated_allocation) +- [VK_KHR_dedicated_allocation unofficial manual](http://asawicki.info/articles/VK_KHR_dedicated_allocation.php5) + + + +\page vk_ext_memory_priority VK_EXT_memory_priority + +VK_EXT_memory_priority is a device extension that allows to pass additional "priority" +value to Vulkan memory allocations that the implementation may use prefer certain +buffers and images that are critical for performance to stay in device-local memory +in cases when the memory is over-subscribed, while some others may be moved to the system memory. + +VMA offers convenient usage of this extension. +If you enable it, you can pass "priority" parameter when creating allocations or custom pools +and the library automatically passes the value to Vulkan using this extension. + +If you want to use this extension in connection with VMA, follow these steps: + +\section vk_ext_memory_priority_initialization Initialization + +1) Call `vkEnumerateDeviceExtensionProperties` for the physical device. +Check if the extension is supported - if returned array of `VkExtensionProperties` contains "VK_EXT_memory_priority". + +2) Call `vkGetPhysicalDeviceFeatures2` for the physical device instead of old `vkGetPhysicalDeviceFeatures`. +Attach additional structure `VkPhysicalDeviceMemoryPriorityFeaturesEXT` to `VkPhysicalDeviceFeatures2::pNext` to be returned. +Check if the device feature is really supported - check if `VkPhysicalDeviceMemoryPriorityFeaturesEXT::memoryPriority` is true. + +3) While creating device with `vkCreateDevice`, enable this extension - add "VK_EXT_memory_priority" +to the list passed as `VkDeviceCreateInfo::ppEnabledExtensionNames`. + +4) While creating the device, also don't set `VkDeviceCreateInfo::pEnabledFeatures`. +Fill in `VkPhysicalDeviceFeatures2` structure instead and pass it as `VkDeviceCreateInfo::pNext`. +Enable this device feature - attach additional structure `VkPhysicalDeviceMemoryPriorityFeaturesEXT` to +`VkPhysicalDeviceFeatures2::pNext` chain and set its member `memoryPriority` to `VK_TRUE`. + +5) While creating #VmaAllocator with vmaCreateAllocator() inform VMA that you +have enabled this extension and feature - add #VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT +to VmaAllocatorCreateInfo::flags. + +\section vk_ext_memory_priority_usage Usage + +When using this extension, you should initialize following member: + +- VmaAllocationCreateInfo::priority when creating a dedicated allocation with #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. +- VmaPoolCreateInfo::priority when creating a custom pool. + +It should be a floating-point value between `0.0f` and `1.0f`, where recommended default is `0.5f`. +Memory allocated with higher value can be treated by the Vulkan implementation as higher priority +and so it can have lower chances of being pushed out to system memory, experiencing degraded performance. + +It might be a good idea to create performance-critical resources like color-attachment or depth-stencil images +as dedicated and set high priority to them. For example: + +\code +VkImageCreateInfo imgCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; +imgCreateInfo.imageType = VK_IMAGE_TYPE_2D; +imgCreateInfo.extent.width = 3840; +imgCreateInfo.extent.height = 2160; +imgCreateInfo.extent.depth = 1; +imgCreateInfo.mipLevels = 1; +imgCreateInfo.arrayLayers = 1; +imgCreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM; +imgCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; +imgCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; +imgCreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; +imgCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; +allocCreateInfo.priority = 1.0f; + +VkImage img; +VmaAllocation alloc; +vmaCreateImage(allocator, &imgCreateInfo, &allocCreateInfo, &img, &alloc, nullptr); +\endcode + +`priority` member is ignored in the following situations: + +- Allocations created in custom pools: They inherit the priority, along with all other allocation parameters + from the parameters passed in #VmaPoolCreateInfo when the pool was created. +- Allocations created in default pools: They inherit the priority from the parameters + VMA used when creating default pools, which means `priority == 0.5f`. + + +\page vk_amd_device_coherent_memory VK_AMD_device_coherent_memory + +VK_AMD_device_coherent_memory is a device extension that enables access to +additional memory types with `VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD` and +`VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD` flag. It is useful mostly for +allocation of buffers intended for writing "breadcrumb markers" in between passes +or draw calls, which in turn are useful for debugging GPU crash/hang/TDR cases. + +When the extension is available but has not been enabled, Vulkan physical device +still exposes those memory types, but their usage is forbidden. VMA automatically +takes care of that - it returns `VK_ERROR_FEATURE_NOT_PRESENT` when an attempt +to allocate memory of such type is made. + +If you want to use this extension in connection with VMA, follow these steps: + +\section vk_amd_device_coherent_memory_initialization Initialization + +1) Call `vkEnumerateDeviceExtensionProperties` for the physical device. +Check if the extension is supported - if returned array of `VkExtensionProperties` contains "VK_AMD_device_coherent_memory". + +2) Call `vkGetPhysicalDeviceFeatures2` for the physical device instead of old `vkGetPhysicalDeviceFeatures`. +Attach additional structure `VkPhysicalDeviceCoherentMemoryFeaturesAMD` to `VkPhysicalDeviceFeatures2::pNext` to be returned. +Check if the device feature is really supported - check if `VkPhysicalDeviceCoherentMemoryFeaturesAMD::deviceCoherentMemory` is true. + +3) While creating device with `vkCreateDevice`, enable this extension - add "VK_AMD_device_coherent_memory" +to the list passed as `VkDeviceCreateInfo::ppEnabledExtensionNames`. + +4) While creating the device, also don't set `VkDeviceCreateInfo::pEnabledFeatures`. +Fill in `VkPhysicalDeviceFeatures2` structure instead and pass it as `VkDeviceCreateInfo::pNext`. +Enable this device feature - attach additional structure `VkPhysicalDeviceCoherentMemoryFeaturesAMD` to +`VkPhysicalDeviceFeatures2::pNext` and set its member `deviceCoherentMemory` to `VK_TRUE`. + +5) While creating #VmaAllocator with vmaCreateAllocator() inform VMA that you +have enabled this extension and feature - add #VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT +to VmaAllocatorCreateInfo::flags. + +\section vk_amd_device_coherent_memory_usage Usage + +After following steps described above, you can create VMA allocations and custom pools +out of the special `DEVICE_COHERENT` and `DEVICE_UNCACHED` memory types on eligible +devices. There are multiple ways to do it, for example: + +- You can request or prefer to allocate out of such memory types by adding + `VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD` to VmaAllocationCreateInfo::requiredFlags + or VmaAllocationCreateInfo::preferredFlags. Those flags can be freely mixed with + other ways of \ref choosing_memory_type, like setting VmaAllocationCreateInfo::usage. +- If you manually found memory type index to use for this purpose, force allocation + from this specific index by setting VmaAllocationCreateInfo::memoryTypeBits `= 1u << index`. + +\section vk_amd_device_coherent_memory_more_information More information + +To learn more about this extension, see [VK_AMD_device_coherent_memory in Vulkan specification](https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VK_AMD_device_coherent_memory.html) + +Example use of this extension can be found in the code of the sample and test suite +accompanying this library. + + +\page enabling_buffer_device_address Enabling buffer device address + +Device extension VK_KHR_buffer_device_address +allow to fetch raw GPU pointer to a buffer and pass it for usage in a shader code. +It has been promoted to core Vulkan 1.2. + +If you want to use this feature in connection with VMA, follow these steps: + +\section enabling_buffer_device_address_initialization Initialization + +1) (For Vulkan version < 1.2) Call `vkEnumerateDeviceExtensionProperties` for the physical device. +Check if the extension is supported - if returned array of `VkExtensionProperties` contains +"VK_KHR_buffer_device_address". + +2) Call `vkGetPhysicalDeviceFeatures2` for the physical device instead of old `vkGetPhysicalDeviceFeatures`. +Attach additional structure `VkPhysicalDeviceBufferDeviceAddressFeatures*` to `VkPhysicalDeviceFeatures2::pNext` to be returned. +Check if the device feature is really supported - check if `VkPhysicalDeviceBufferDeviceAddressFeatures::bufferDeviceAddress` is true. + +3) (For Vulkan version < 1.2) While creating device with `vkCreateDevice`, enable this extension - add +"VK_KHR_buffer_device_address" to the list passed as `VkDeviceCreateInfo::ppEnabledExtensionNames`. + +4) While creating the device, also don't set `VkDeviceCreateInfo::pEnabledFeatures`. +Fill in `VkPhysicalDeviceFeatures2` structure instead and pass it as `VkDeviceCreateInfo::pNext`. +Enable this device feature - attach additional structure `VkPhysicalDeviceBufferDeviceAddressFeatures*` to +`VkPhysicalDeviceFeatures2::pNext` and set its member `bufferDeviceAddress` to `VK_TRUE`. + +5) While creating #VmaAllocator with vmaCreateAllocator() inform VMA that you +have enabled this feature - add #VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT +to VmaAllocatorCreateInfo::flags. + +\section enabling_buffer_device_address_usage Usage + +After following steps described above, you can create buffers with `VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT*` using VMA. +The library automatically adds `VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT*` to +allocated memory blocks wherever it might be needed. + +Please note that the library supports only `VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT*`. +The second part of this functionality related to "capture and replay" is not supported, +as it is intended for usage in debugging tools like RenderDoc, not in everyday Vulkan usage. + +\section enabling_buffer_device_address_more_information More information + +To learn more about this extension, see [VK_KHR_buffer_device_address in Vulkan specification](https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap46.html#VK_KHR_buffer_device_address) + +Example use of this extension can be found in the code of the sample and test suite +accompanying this library. + +\page general_considerations General considerations + +\section general_considerations_thread_safety Thread safety + +- The library has no global state, so separate #VmaAllocator objects can be used + independently. + There should be no need to create multiple such objects though - one per `VkDevice` is enough. +- By default, all calls to functions that take #VmaAllocator as first parameter + are safe to call from multiple threads simultaneously because they are + synchronized internally when needed. + This includes allocation and deallocation from default memory pool, as well as custom #VmaPool. +- When the allocator is created with #VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT + flag, calls to functions that take such #VmaAllocator object must be + synchronized externally. +- Access to a #VmaAllocation object must be externally synchronized. For example, + you must not call vmaGetAllocationInfo() and vmaMapMemory() from different + threads at the same time if you pass the same #VmaAllocation object to these + functions. +- #VmaVirtualBlock is not safe to be used from multiple threads simultaneously. + +\section general_considerations_versioning_and_compatibility Versioning and compatibility + +The library uses [**Semantic Versioning**](https://semver.org/), +which means version numbers follow convention: Major.Minor.Patch (e.g. 2.3.0), where: + +- Incremented Patch version means a release is backward- and forward-compatible, + introducing only some internal improvements, bug fixes, optimizations etc. + or changes that are out of scope of the official API described in this documentation. +- Incremented Minor version means a release is backward-compatible, + so existing code that uses the library should continue to work, while some new + symbols could have been added: new structures, functions, new values in existing + enums and bit flags, new structure members, but not new function parameters. +- Incrementing Major version means a release could break some backward compatibility. + +All changes between official releases are documented in file "CHANGELOG.md". + +\warning Backward compatibility is considered on the level of C++ source code, not binary linkage. +Adding new members to existing structures is treated as backward compatible if initializing +the new members to binary zero results in the old behavior. +You should always fully initialize all library structures to zeros and not rely on their +exact binary size. + +\section general_considerations_validation_layer_warnings Validation layer warnings + +When using this library, you can meet following types of warnings issued by +Vulkan validation layer. They don't necessarily indicate a bug, so you may need +to just ignore them. + +- *vkBindBufferMemory(): Binding memory to buffer 0xeb8e4 but vkGetBufferMemoryRequirements() has not been called on that buffer.* + - It happens when VK_KHR_dedicated_allocation extension is enabled. + `vkGetBufferMemoryRequirements2KHR` function is used instead, while validation layer seems to be unaware of it. +- *Mapping an image with layout VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL can result in undefined behavior if this memory is used by the device. Only GENERAL or PREINITIALIZED should be used.* + - It happens when you map a buffer or image, because the library maps entire + `VkDeviceMemory` block, where different types of images and buffers may end + up together, especially on GPUs with unified memory like Intel. +- *Non-linear image 0xebc91 is aliased with linear buffer 0xeb8e4 which may indicate a bug.* + - It may happen when you use [defragmentation](@ref defragmentation). + +\section general_considerations_allocation_algorithm Allocation algorithm + +The library uses following algorithm for allocation, in order: + +-# Try to find free range of memory in existing blocks. +-# If failed, try to create a new block of `VkDeviceMemory`, with preferred block size. +-# If failed, try to create such block with size / 2, size / 4, size / 8. +-# If failed, try to allocate separate `VkDeviceMemory` for this allocation, + just like when you use #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. +-# If failed, choose other memory type that meets the requirements specified in + VmaAllocationCreateInfo and go to point 1. +-# If failed, return `VK_ERROR_OUT_OF_DEVICE_MEMORY`. + +\section general_considerations_features_not_supported Features not supported + +Features deliberately excluded from the scope of this library: + +-# **Data transfer.** Uploading (streaming) and downloading data of buffers and images + between CPU and GPU memory and related synchronization is responsibility of the user. + Defining some "texture" object that would automatically stream its data from a + staging copy in CPU memory to GPU memory would rather be a feature of another, + higher-level library implemented on top of VMA. + VMA doesn't record any commands to a `VkCommandBuffer`. It just allocates memory. +-# **Recreation of buffers and images.** Although the library has functions for + buffer and image creation: vmaCreateBuffer(), vmaCreateImage(), you need to + recreate these objects yourself after defragmentation. That is because the big + structures `VkBufferCreateInfo`, `VkImageCreateInfo` are not stored in + #VmaAllocation object. +-# **Handling CPU memory allocation failures.** When dynamically creating small C++ + objects in CPU memory (not Vulkan memory), allocation failures are not checked + and handled gracefully, because that would complicate code significantly and + is usually not needed in desktop PC applications anyway. + Success of an allocation is just checked with an assert. +-# **Code free of any compiler warnings.** Maintaining the library to compile and + work correctly on so many different platforms is hard enough. Being free of + any warnings, on any version of any compiler, is simply not feasible. + There are many preprocessor macros that make some variables unused, function parameters unreferenced, + or conditional expressions constant in some configurations. + The code of this library should not be bigger or more complicated just to silence these warnings. + It is recommended to disable such warnings instead. +-# This is a C++ library with C interface. **Bindings or ports to any other programming languages** are welcome as external projects but + are not going to be included into this repository. +*/ diff --git a/Source/ThirdParty/VulkanMemoryAllocator/vk_mem_alloc.natvis b/Source/ThirdParty/VulkanMemoryAllocator/vk_mem_alloc.natvis index 85c75335f..92215d86b 100644 --- a/Source/ThirdParty/VulkanMemoryAllocator/vk_mem_alloc.natvis +++ b/Source/ThirdParty/VulkanMemoryAllocator/vk_mem_alloc.natvis @@ -1,40 +1,71 @@ - - {{ Count={m_Count} }} - - m_Count - - m_Count - m_pFront - pNext - Value - - - + + {{ Count={m_Count} }} + + m_Count + + m_Count + m_pFront + pNext + Value + + + - - {{ Count={m_RawList.m_Count} }} - - m_RawList.m_Count - - m_RawList.m_Count - m_RawList.m_pFront - pNext - Value - - - + + {{ Count={m_RawList.m_Count} }} + + m_RawList.m_Count + + m_RawList.m_Count + m_RawList.m_pFront + pNext + Value + + + - - {{ Count={m_Count} }} - - m_Count - m_Capacity - - m_Count - m_pArray - - - + + {{ Count={m_Count} }} + + m_Count + m_Capacity + + m_Count + m_pArray + + + + + + + {{ Count={m_Count} }} + + m_Count + + m_Count + m_Front + m_DedicatedAllocation.m_Next + *this + + + + + {{ Count={m_Count} }} + + m_Count + + m_Count + m_Front + m_NextPool + *this + + + \ No newline at end of file From a8220147cef12992b66cd4618d8afaa5e4082173 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 29 May 2024 15:00:03 +0200 Subject: [PATCH 101/292] Improve SDF mip generation to use min-filter --- Source/Engine/Tools/ModelTool/ModelTool.cpp | 8 +++---- Source/Shaders/SDF.shader | 26 --------------------- 2 files changed, 3 insertions(+), 31 deletions(-) diff --git a/Source/Engine/Tools/ModelTool/ModelTool.cpp b/Source/Engine/Tools/ModelTool/ModelTool.cpp index 99c98bb0e..c5a703726 100644 --- a/Source/Engine/Tools/ModelTool/ModelTool.cpp +++ b/Source/Engine/Tools/ModelTool/ModelTool.cpp @@ -572,9 +572,8 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float const int32 yAddress = resolutionMip.X * y + zAddress; for (int32 x = 0; x < resolutionMip.X; x++) { - // Linear box filter around the voxel - // TODO: use min distance for nearby texels (texel distance + distance to texel) - float distance = 0; + // Min-filter around the voxel + float distance = MAX_float; for (int32 dz = 0; dz < 2; dz++) { const int32 dzAddress = (z * 2 + dz) * (resolution.Y * resolution.X); @@ -585,11 +584,10 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float { const int32 dxAddress = (x * 2 + dx) + dyAddress; const float d = formatRead((byte*)voxelsMipSrc + dxAddress * formatStride) * decodeMAD.X + decodeMAD.Y; - distance += d; + distance = Math::Min(distance, d); } } } - distance *= 1.0f / 8.0f; const int32 xAddress = x + yAddress; formatWrite((byte*)voxelsMip + xAddress * formatStride, distance * encodeMAD.X + encodeMAD.Y); diff --git a/Source/Shaders/SDF.shader b/Source/Shaders/SDF.shader index a19e6d346..7976558c7 100644 --- a/Source/Shaders/SDF.shader +++ b/Source/Shaders/SDF.shader @@ -218,32 +218,6 @@ void CS_FloodFill(uint3 GroupId : SV_GroupID, uint GroupIndex : SV_GroupIndex) sdf = CombineSDF(sdf, voxelCoord + offset.xyy, nearbyDistance); sdf = CombineSDF(sdf, voxelCoord + offset.yxy, nearbyDistance); sdf = CombineSDF(sdf, voxelCoord + offset.yyx, nearbyDistance); -#if 0 - nearbyDistance = WorldUnitsPerVoxel * 1.41421f; - sdf = CombineSDF(sdf, voxelCoord + offset.xxy, nearbyDistance); - sdf = CombineSDF(sdf, voxelCoord + offset.xzy, nearbyDistance); - sdf = CombineSDF(sdf, voxelCoord + offset.zzy, nearbyDistance); - sdf = CombineSDF(sdf, voxelCoord + offset.zxy, nearbyDistance); - sdf = CombineSDF(sdf, voxelCoord + offset.xyx, nearbyDistance); - sdf = CombineSDF(sdf, voxelCoord + offset.xyz, nearbyDistance); - sdf = CombineSDF(sdf, voxelCoord + offset.zyz, nearbyDistance); - sdf = CombineSDF(sdf, voxelCoord + offset.zyx, nearbyDistance); - sdf = CombineSDF(sdf, voxelCoord + offset.yxx, nearbyDistance); - sdf = CombineSDF(sdf, voxelCoord + offset.yxz, nearbyDistance); - sdf = CombineSDF(sdf, voxelCoord + offset.yzz, nearbyDistance); - sdf = CombineSDF(sdf, voxelCoord + offset.yzx, nearbyDistance); -#endif -#if 0 - nearbyDistance = WorldUnitsPerVoxel * 1.73205f; - sdf = CombineSDF(sdf, voxelCoord + offset.xxx, nearbyDistance); - sdf = CombineSDF(sdf, voxelCoord + offset.xxz, nearbyDistance); - sdf = CombineSDF(sdf, voxelCoord + offset.xzx, nearbyDistance); - sdf = CombineSDF(sdf, voxelCoord + offset.xzz, nearbyDistance); - sdf = CombineSDF(sdf, voxelCoord + offset.zxx, nearbyDistance); - sdf = CombineSDF(sdf, voxelCoord + offset.zxz, nearbyDistance); - sdf = CombineSDF(sdf, voxelCoord + offset.zzx, nearbyDistance); - sdf = CombineSDF(sdf, voxelCoord + offset.zzz, nearbyDistance); -#endif } SDF[voxelIndex] = asuint(sdf); From f1debd6cb53627bea716a0fcf0973c5968e69986 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 29 May 2024 17:52:48 +0200 Subject: [PATCH 102/292] Add names to SDF gpu resources --- Source/Engine/Tools/ModelTool/ModelTool.cpp | 26 ++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/Source/Engine/Tools/ModelTool/ModelTool.cpp b/Source/Engine/Tools/ModelTool/ModelTool.cpp index c5a703726..f8e972b71 100644 --- a/Source/Engine/Tools/ModelTool/ModelTool.cpp +++ b/Source/Engine/Tools/ModelTool/ModelTool.cpp @@ -120,6 +120,10 @@ public: , _xyzToLocalMul(xyzToLocalMul) , _xyzToLocalAdd(xyzToLocalAdd) { +#if GPU_ENABLE_RESOURCE_NAMING + _sdfSrc->SetName(TEXT("SDFSrc")); + _sdfDst->SetName(TEXT("SDFDst")); +#endif } ~GPUModelSDFTask() @@ -202,7 +206,12 @@ public: desc = GPUBufferDescription::Raw(vb->GetSize(), GPUBufferFlags::ShaderResource); // TODO: use transient buffer (single frame) if (!vbTemp) + { vbTemp = GPUBuffer::New(); +#if GPU_ENABLE_RESOURCE_NAMING + vbTemp->SetName(TEXT("SDFvb")); +#endif + } vbTemp->Init(desc); context->CopyBuffer(vbTemp, vb, desc.Size); vb = vbTemp; @@ -212,7 +221,12 @@ public: desc = GPUBufferDescription::Raw(ib->GetSize(), GPUBufferFlags::ShaderResource); // TODO: use transient buffer (single frame) if (!ibTemp) + { ibTemp = GPUBuffer::New(); +#if GPU_ENABLE_RESOURCE_NAMING + ibTemp->SetName(TEXT("SDFib")); +#endif + } ibTemp->Init(desc); context->CopyBuffer(ibTemp, ib, desc.Size); ib = ibTemp; @@ -230,6 +244,10 @@ public: const ModelLodData& lod = _modelData->LODs[Math::Clamp(_lodIndex, 0, _modelData->LODs.Count() - 1)]; auto vb = GPUBuffer::New(); auto ib = GPUBuffer::New(); +#if GPU_ENABLE_RESOURCE_NAMING + vb->SetName(TEXT("SDFvb")); + ib->SetName(TEXT("SDFib")); +#endif for (int32 i = 0; i < lod.Meshes.Count(); i++) { const MeshData* mesh = lod.Meshes[i]; @@ -294,6 +312,9 @@ public: auto sdfTextureDesc = GPUTextureDescription::New3D(_resolution.X, _resolution.Y, _resolution.Z, PixelFormat::R16_UNorm, GPUTextureFlags::UnorderedAccess | GPUTextureFlags::RenderTarget); // TODO: use transient texture (single frame) auto sdfTexture = GPUTexture::New(); +#if GPU_ENABLE_RESOURCE_NAMING + sdfTexture->SetName(TEXT("SDFTexture")); +#endif sdfTexture->Init(sdfTextureDesc); context->BindUA(1, sdfTexture->ViewVolume()); context->Dispatch(shader->GetCS("CS_Encode"), threadGroups.X, threadGroups.Y, threadGroups.Z); @@ -397,7 +418,7 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float SAFE_DELETE_GPU_RESOURCE(outputSDF->Texture); return true; } -#if !BUILD_RELEASE +#if GPU_ENABLE_RESOURCE_NAMING outputSDF->Texture->SetName(TEXT("ModelSDF")); #endif } @@ -432,6 +453,9 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float // TODO: skip using sdfResult and downloading SDF from GPU when updating virtual model auto sdfResult = GPUTexture::New(); +#if GPU_ENABLE_RESOURCE_NAMING + sdfResult->SetName(TEXT("SDFResult")); +#endif // Run SDF generation via GPU async task ConditionVariable signal; From 4dd331d5462237ecccb63a61d7392ea97fdb20c3 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 29 May 2024 18:45:32 +0200 Subject: [PATCH 103/292] Add half-texel margin to SDF around the mesh --- Source/Engine/Tools/ModelTool/ModelTool.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Source/Engine/Tools/ModelTool/ModelTool.cpp b/Source/Engine/Tools/ModelTool/ModelTool.cpp index f8e972b71..d9b8d0bf3 100644 --- a/Source/Engine/Tools/ModelTool/ModelTool.cpp +++ b/Source/Engine/Tools/ModelTool/ModelTool.cpp @@ -364,9 +364,12 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float bounds = modelData->LODs[lodIndex].GetBox(); else return true; - Float3 size = bounds.GetSize(); ModelBase::SDFData sdf; sdf.WorldUnitsPerVoxel = METERS_TO_UNITS(0.1f) / Math::Max(resolutionScale, 0.0001f); // 1 voxel per 10 centimeters + const float boundsMargin = sdf.WorldUnitsPerVoxel * 0.5f; // Add half-texel margin around the mesh + bounds.Minimum -= boundsMargin; + bounds.Maximum += boundsMargin; + const Float3 size = bounds.GetSize(); Int3 resolution(Float3::Ceil(Float3::Clamp(size / sdf.WorldUnitsPerVoxel, 4, 256))); Float3 uvwToLocalMul = size; Float3 uvwToLocalAdd = bounds.Minimum; From 62dcfe2caebaf214cd58a32aa474c6a740fdc3bc Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 29 May 2024 18:45:45 +0200 Subject: [PATCH 104/292] Update in-built meshes sdf --- Content/Editor/Primitives/Capsule.flax | 4 ++-- Content/Editor/Primitives/Cube.flax | 4 ++-- Content/Editor/Primitives/Cylinder.flax | 4 ++-- Content/Editor/Primitives/Sphere.flax | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Content/Editor/Primitives/Capsule.flax b/Content/Editor/Primitives/Capsule.flax index aac4785b9..6ff0daed9 100644 --- a/Content/Editor/Primitives/Capsule.flax +++ b/Content/Editor/Primitives/Capsule.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:100fe101c4f754cc1f21041d7ab44a41607cafd65ce5f8588a341740f7498f24 -size 30396 +oid sha256:071d1e65b4bbaea07f9d0b7b681c7c43ff141704f98c78927af656917aba6551 +size 31478 diff --git a/Content/Editor/Primitives/Cube.flax b/Content/Editor/Primitives/Cube.flax index ea492f271..2aafb70ee 100644 --- a/Content/Editor/Primitives/Cube.flax +++ b/Content/Editor/Primitives/Cube.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9032ba0a951268901b3b711b840c22108b2db6063330bf597ad309101324e5e6 -size 4981 +oid sha256:3f54c6aa9d56964baf70273a37d88258b3ff21abd62876f74eb54de295221aef +size 5643 diff --git a/Content/Editor/Primitives/Cylinder.flax b/Content/Editor/Primitives/Cylinder.flax index 224177090..037a78d41 100644 --- a/Content/Editor/Primitives/Cylinder.flax +++ b/Content/Editor/Primitives/Cylinder.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d744a628567cf3ca1598183a3d7fbf2b23c3b23816ca80103cd8026638f7eddc -size 15609 +oid sha256:3220e2e081ba3219cb3b3f35eb295b912df77c407cc9e385dc8fc71d9a0f1724 +size 16271 diff --git a/Content/Editor/Primitives/Sphere.flax b/Content/Editor/Primitives/Sphere.flax index 651bc7afc..99b5cac11 100644 --- a/Content/Editor/Primitives/Sphere.flax +++ b/Content/Editor/Primitives/Sphere.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0bd78fb9c7b970d7661cff626568cd5fada5a5071740b7771241288d9bcb7995 -size 40605 +oid sha256:3c240cd5211bd02fc4dc2c50e15c34644ecb80330839ac8b00ef7b77f72a88f9 +size 43127 From 7e316a130593b3f8080488bb4448ab9ebfb7c4c5 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 31 May 2024 11:35:38 +0200 Subject: [PATCH 105/292] Fix SSAO flickering artifacts in cooked game (disable depth mips usage) --- Content/Shaders/SSAO.flax | 4 ++-- Source/Engine/Renderer/AmbientOcclusionPass.cpp | 9 +++++++-- Source/Engine/Renderer/AmbientOcclusionPass.h | 1 + Source/Shaders/SSAO.shader | 2 +- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/Content/Shaders/SSAO.flax b/Content/Shaders/SSAO.flax index 632ee01e3..10eb267c4 100644 --- a/Content/Shaders/SSAO.flax +++ b/Content/Shaders/SSAO.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f2c81b8a794402bd456de39ad3322b21377190a346ba601ce3ba10c762259542 -size 36841 +oid sha256:dc490e550c1aa601647b2caf46a77043ba806f0ced7566777edac2dba1d3a247 +size 36842 diff --git a/Source/Engine/Renderer/AmbientOcclusionPass.cpp b/Source/Engine/Renderer/AmbientOcclusionPass.cpp index 06c4f73f2..f176b2ac2 100644 --- a/Source/Engine/Renderer/AmbientOcclusionPass.cpp +++ b/Source/Engine/Renderer/AmbientOcclusionPass.cpp @@ -214,6 +214,7 @@ void AmbientOcclusionPass::Render(RenderContext& renderContext) return; PROFILE_GPU_CPU("Ambient Occlusion"); + settings = ASSAO_Settings(); settings.Radius = aoSettings.Radius * 0.006f; settings.ShadowMultiplier = aoSettings.Intensity; settings.ShadowPower = aoSettings.Power; @@ -296,8 +297,11 @@ void AmbientOcclusionPass::InitRTs(const RenderContext& renderContext) GPUTextureDescription tempDesc; for (int i = 0; i < 4; i++) { - // TODO: maybe instead of using whole mip chain request only SSAO_DEPTH_MIP_LEVELS? +#if SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET < 99 tempDesc = GPUTextureDescription::New2D((int32)m_halfSizeX, (int32)m_halfSizeY, 0, SSAO_DEPTH_FORMAT, GPUTextureFlags::ShaderResource | GPUTextureFlags::RenderTarget | GPUTextureFlags::PerMipViews); +#else + tempDesc = GPUTextureDescription::New2D((int32)m_halfSizeX, (int32)m_halfSizeY, SSAO_DEPTH_FORMAT, GPUTextureFlags::ShaderResource | GPUTextureFlags::RenderTarget); +#endif m_halfDepths[i] = RenderTargetPool::Get(tempDesc); RENDER_TARGET_POOL_SET_NAME(m_halfDepths[i], "SSAO.HalfDepth"); } @@ -334,6 +338,7 @@ void AmbientOcclusionPass::UpdateCB(const RenderContext& renderContext, GPUConte const float farPlane = view.Far; const Matrix& proj = view.Projection; + Platform::MemoryClear(&_constantsBufferData, sizeof(_constantsBufferData)); GBufferPass::SetInputs(view, _constantsBufferData.GBuffer); Matrix::Transpose(view.View, _constantsBufferData.ViewMatrix); @@ -447,7 +452,7 @@ void AmbientOcclusionPass::PrepareDepths(const RenderContext& renderContext) } // Only do mipmaps for higher quality levels (not beneficial on quality level 1, and detrimental on quality level 0) - if (settings.QualityLevel > 1) + if (settings.QualityLevel > 1 && SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET < 99) { for (int i = 1; i < SSAO_DEPTH_MIP_LEVELS; i++) { diff --git a/Source/Engine/Renderer/AmbientOcclusionPass.h b/Source/Engine/Renderer/AmbientOcclusionPass.h index d89ad38b4..69fcd211c 100644 --- a/Source/Engine/Renderer/AmbientOcclusionPass.h +++ b/Source/Engine/Renderer/AmbientOcclusionPass.h @@ -6,6 +6,7 @@ // Config #define SSAO_DEPTH_MIP_LEVELS 4 // <- must match shader define +#define SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET (99) // <- must match shader define #define SSAO_DEPTH_FORMAT PixelFormat::R16_Float #define SSAO_AO_RESULT_FORMAT PixelFormat::R8G8_UNorm #define SSAO_MAX_BLUR_PASS_COUNT 6 diff --git a/Source/Shaders/SSAO.shader b/Source/Shaders/SSAO.shader index e72d9a30c..206da2bff 100644 --- a/Source/Shaders/SSAO.shader +++ b/Source/Shaders/SSAO.shader @@ -58,7 +58,7 @@ static const uint g_numTaps[4] = { 3, 5, 8, 12 }; // #define SSAO_DETAIL_AO_ENABLE_AT_QUALITY_PRESET (1) // whether to use DetailAOStrength; to disable simply set to 99 or similar // -#define SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET (2) // !!warning!! the MIP generation on the C++ side will be enabled on quality preset 2 regardless of this value, so if changing here, change the C++ side too +#define SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET (99) // !!warning!! the MIP generation on the C++ side will be enabled on quality preset 2 regardless of this value, so if changing here, change the C++ side too #define SSAO_DEPTH_MIPS_GLOBAL_OFFSET (-4.3) // best noise/quality/performance tradeoff, found empirically // // !!warning!! the edge handling is hard-coded to 'disabled' on quality level 0, and enabled above, on the C++ side; while toggling it here will work for From f78bbc6b70a25529470be4246d25b25b389b1f18 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 31 May 2024 23:21:07 +0200 Subject: [PATCH 106/292] Add `GlobalSDFDistance` to graphics settings for default GlobalSDF range #2664 --- Source/Engine/Core/Config/GraphicsSettings.h | 6 ++++++ Source/Engine/Graphics/PostProcessSettings.h | 2 +- Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp | 6 +++++- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Core/Config/GraphicsSettings.h b/Source/Engine/Core/Config/GraphicsSettings.h index a3a5e7ded..9fef78f74 100644 --- a/Source/Engine/Core/Config/GraphicsSettings.h +++ b/Source/Engine/Core/Config/GraphicsSettings.h @@ -84,6 +84,12 @@ public: API_FIELD(Attributes="EditorOrder(2000), EditorDisplay(\"Global SDF\")") bool EnableGlobalSDF = false; + /// + /// Draw distance of the Global SDF. Actual value can be large when using DDGI. + /// + API_FIELD(Attributes="EditorOrder(2001), EditorDisplay(\"Global SDF\"), Limit(1000), ValueCategory(Utils.ValueCategory.Distance)") + float GlobalSDFDistance = 15000.0f; + /// /// The Global SDF quality. Controls the volume texture resolution and amount of cascades to use. /// diff --git a/Source/Engine/Graphics/PostProcessSettings.h b/Source/Engine/Graphics/PostProcessSettings.h index 525da3f36..19b3cec93 100644 --- a/Source/Engine/Graphics/PostProcessSettings.h +++ b/Source/Engine/Graphics/PostProcessSettings.h @@ -358,7 +358,7 @@ API_STRUCT() struct FLAXENGINE_API GlobalIlluminationSettings : ISerializable float BounceIntensity = 1.0f; /// - /// Defines how quickly GI blends between the the current frame and the history buffer. Lower values update GI faster, but with more jittering and noise. If the camera in your game doesn't move much, we recommend values closer to 1. + /// Defines how quickly GI blends between the current frame and the history buffer. Lower values update GI faster, but with more jittering and noise. If the camera in your game doesn't move much, we recommend values closer to 1. /// API_FIELD(Attributes="EditorOrder(20), Limit(0, 1), PostProcessSetting((int)GlobalIlluminationSettingsOverride.TemporalResponse)") float TemporalResponse = 0.9f; diff --git a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp index b9830c6ee..27eed44b8 100644 --- a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp +++ b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp @@ -5,6 +5,7 @@ #include "Engine/Core/Math/Vector3.h" #include "Engine/Core/Math/Matrix3x4.h" #include "Engine/Core/Collections/HashSet.h" +#include "Engine/Core/Config/GraphicsSettings.h" #include "Engine/Engine/Engine.h" #include "Engine/Content/Content.h" #include "Engine/Graphics/GPUContext.h" @@ -411,7 +412,10 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex } const int32 resolutionMip = Math::DivideAndRoundUp(resolution, GLOBAL_SDF_RASTERIZE_MIP_FACTOR); auto& giSettings = renderContext.List->Settings.GlobalIllumination; - const float distance = Math::Min(giSettings.Mode == GlobalIlluminationMode::DDGI ? giSettings.Distance : 15000.0f, renderContext.View.Far); + float distance = GraphicsSettings::Get()->GlobalSDFDistance; + if (giSettings.Mode == GlobalIlluminationMode::DDGI) + distance = Math::Max(distance, giSettings.Distance); + distance = Math::Min(distance, renderContext.View.Far); const float cascadesDistanceScales[] = { 1.0f, 2.5f, 5.0f, 10.0f }; const float distanceExtent = distance / cascadesDistanceScales[cascadesCount - 1]; From 6dacf9e1f1e49fa5fed2b35356871179b0a34571 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 31 May 2024 23:36:35 +0200 Subject: [PATCH 107/292] Add caching Model SDF options in the project cache #2075 --- Source/Editor/Windows/Assets/ModelWindow.cs | 35 ++++++++++++++++----- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/Source/Editor/Windows/Assets/ModelWindow.cs b/Source/Editor/Windows/Assets/ModelWindow.cs index 4a0df7255..c3e285973 100644 --- a/Source/Editor/Windows/Assets/ModelWindow.cs +++ b/Source/Editor/Windows/Assets/ModelWindow.cs @@ -13,6 +13,7 @@ using FlaxEditor.Viewport.Cameras; using FlaxEditor.Viewport.Previews; using FlaxEngine; using FlaxEngine.GUI; +using FlaxEngine.Json; using FlaxEngine.Tools; using FlaxEngine.Utilities; using Object = FlaxEngine.Object; @@ -187,6 +188,7 @@ namespace FlaxEditor.Windows.Assets // SDF { var group = layout.Group("SDF"); + var sdfOptions = proxy.Window._sdfOptions; var sdf = proxy.Asset.SDF; if (sdf.Texture != null) @@ -207,20 +209,20 @@ namespace FlaxEditor.Windows.Assets proxy.Window._importSettings.Settings.SDFResolution = sdf.ResolutionScale; var gpu = group.Checkbox("Bake on GPU", "If checked, SDF generation will be calculated using GPU on Compute Shader, otherwise CPU will use Job System. GPU generation is fast but result in artifacts in various meshes (eg. foliage)."); - gpu.CheckBox.Checked = proxy.Window._gpuSDF; + gpu.CheckBox.Checked = sdfOptions.GPU; var backfacesThresholdProp = group.AddPropertyItem("Backfaces Threshold", "Custom threshold (in range 0-1) for adjusting mesh internals detection based on the percentage of test rays hit triangle backfaces. Use lower value for more dense mesh."); var backfacesThreshold = backfacesThresholdProp.FloatValue(); var backfacesThresholdLabel = backfacesThresholdProp.Labels.Last(); backfacesThreshold.ValueBox.MinValue = 0.001f; backfacesThreshold.ValueBox.MaxValue = 1.0f; - backfacesThreshold.ValueBox.Value = proxy.Window._backfacesThreshold; - backfacesThreshold.ValueBox.BoxValueChanged += b => { proxy.Window._backfacesThreshold = b.Value; }; + backfacesThreshold.ValueBox.Value = sdfOptions.BackfacesThreshold; + backfacesThreshold.ValueBox.BoxValueChanged += b => { proxy.Window._sdfOptions.BackfacesThreshold = b.Value; }; // Toggle Backfaces Threshold visibility (CPU-only option) gpu.CheckBox.StateChanged += c => { - proxy.Window._gpuSDF = c.Checked; + proxy.Window._sdfOptions.GPU = c.Checked; backfacesThresholdLabel.Visible = !c.Checked; backfacesThreshold.ValueBox.Visible = !c.Checked; }; @@ -314,13 +316,17 @@ namespace FlaxEditor.Windows.Assets proxy.Window.Enabled = false; Task.Run(() => { - bool failed = proxy.Asset.GenerateSDF(proxy.Window._importSettings.Settings.SDFResolution, _sdfModelLodIndex.Value, true, proxy.Window._backfacesThreshold, proxy.Window._gpuSDF); + var sdfOptions = proxy.Window._sdfOptions; + bool failed = proxy.Asset.GenerateSDF(proxy.Window._importSettings.Settings.SDFResolution, _sdfModelLodIndex.Value, true, sdfOptions.BackfacesThreshold, sdfOptions.GPU); FlaxEngine.Scripting.InvokeOnUpdate(() => { proxy.Window.Enabled = true; if (!failed) proxy.Window.MarkAsEdited(); Presenter.BuildLayoutOnUpdate(); + + // Save some SDF options locally in the project cache + proxy.Window.Editor.ProjectCache.SetCustomData(JsonSerializer.GetStringID(proxy.Window.Item.ID) + ".SDF", JsonSerializer.Serialize(sdfOptions)); }); }); } @@ -800,18 +806,33 @@ namespace FlaxEditor.Windows.Assets } } + private struct ModelSdfOptions + { + public bool GPU; + public float BackfacesThreshold; + } + private readonly ModelPreview _preview; private StaticModel _highlightActor; private MeshDataCache _meshData; private ModelImportSettings _importSettings = new ModelImportSettings(); - private float _backfacesThreshold = 0.6f; - private bool _gpuSDF = true; + private ModelSdfOptions _sdfOptions; private ToolStripButton _showCurrentLODButton; /// public ModelWindow(Editor editor, AssetItem item) : base(editor, item) { + // Try to restore SDF options from project cache (not saved in the asset) + if (Editor.ProjectCache.TryGetCustomData(JsonSerializer.GetStringID(Item.ID) + ".SDF", out string sdOptionsStr)) + _sdfOptions = JsonSerializer.Deserialize(sdOptionsStr); + else + _sdfOptions = new ModelSdfOptions + { + GPU = true, + BackfacesThreshold = 0.6f, + }; + // Toolstrip _toolstrip.AddSeparator(); _showCurrentLODButton = (ToolStripButton)_toolstrip.AddButton(editor.Icons.Info64, () => _preview.ShowCurrentLOD = !_preview.ShowCurrentLOD).LinkTooltip("Show LOD statistics"); From d2d2297dc7531f7873ddfe1f10c03aed2ca58d33 Mon Sep 17 00:00:00 2001 From: Ari Vuollet Date: Thu, 9 May 2024 20:05:49 +0300 Subject: [PATCH 108/292] Support ARM64 architecture under Windows --- Source/Engine/Platform/Win32/Win32Defines.h | 6 +- .../Engine/Platform/Win32/Win32Platform.cpp | 12 +++- .../Platform/Windows/WindowsPlatform.cpp | 8 +++ Source/Engine/Scripting/Runtime/DotNet.cpp | 11 +++- Source/Engine/Scripting/ScriptingType.h | 60 +++++++++++++++++++ Source/FlaxEditor.Build.cs | 3 + Source/Tools/Flax.Build/Build/Platform.cs | 19 +++++- .../Platforms/Windows/WindowsPlatform.cs | 5 +- .../Platforms/Windows/WindowsToolchain.cs | 6 ++ .../Platforms/Windows/WindowsToolchainBase.cs | 6 +- .../VisualStudio/VCProjectGenerator.cs | 54 +++++++++-------- 11 files changed, 154 insertions(+), 36 deletions(-) diff --git a/Source/Engine/Platform/Win32/Win32Defines.h b/Source/Engine/Platform/Win32/Win32Defines.h index 891e86839..4571dc419 100644 --- a/Source/Engine/Platform/Win32/Win32Defines.h +++ b/Source/Engine/Platform/Win32/Win32Defines.h @@ -6,10 +6,14 @@ // Platform description #define PLATFORM_DESKTOP 1 -#if defined(WIN64) +#if defined(WIN64) && defined(_M_X64) #define PLATFORM_64BITS 1 #define PLATFORM_ARCH_X64 1 #define PLATFORM_ARCH ArchitectureType::x64 +#elif defined(WIN64) && defined(_M_ARM64) +#define PLATFORM_64BITS 1 +#define PLATFORM_ARCH_ARM64 1 +#define PLATFORM_ARCH ArchitectureType::ARM64 #else #define PLATFORM_64BITS 0 #define PLATFORM_ARCH_X86 1 diff --git a/Source/Engine/Platform/Win32/Win32Platform.cpp b/Source/Engine/Platform/Win32/Win32Platform.cpp index d46706621..a9aa784c2 100644 --- a/Source/Engine/Platform/Win32/Win32Platform.cpp +++ b/Source/Engine/Platform/Win32/Win32Platform.cpp @@ -157,10 +157,14 @@ bool Win32Platform::Init() CpuInfo.PageSize = siSysInfo.dwPageSize; CpuInfo.ClockSpeed = ClockFrequency; { +#ifdef _M_ARM64 + CpuInfo.CacheLineSize = 128; +#else int args[4]; __cpuid(args, 0x80000006); CpuInfo.CacheLineSize = args[2] & 0xFF; ASSERT(CpuInfo.CacheLineSize && Math::IsPowerOfTwo(CpuInfo.CacheLineSize)); +#endif } // Setup unique device ID @@ -226,10 +230,12 @@ void Win32Platform::MemoryBarrier() { _ReadWriteBarrier(); #if PLATFORM_64BITS -#ifdef _AMD64_ +#if defined(_AMD64_) __faststorefence(); #elif defined(_IA64_) __mf(); +#elif defined(_ARM64_) + __dmb(_ARM64_BARRIER_ISH); #else #error "Invalid platform." #endif @@ -243,7 +249,11 @@ void Win32Platform::MemoryBarrier() void Win32Platform::Prefetch(void const* ptr) { +#if _M_ARM64 + __prefetch((char const*)ptr); +#else _mm_prefetch((char const*)ptr, _MM_HINT_T0); +#endif } void* Win32Platform::Allocate(uint64 size, uint64 alignment) diff --git a/Source/Engine/Platform/Windows/WindowsPlatform.cpp b/Source/Engine/Platform/Windows/WindowsPlatform.cpp index ba450d7ee..0f1159fc5 100644 --- a/Source/Engine/Platform/Windows/WindowsPlatform.cpp +++ b/Source/Engine/Platform/Windows/WindowsPlatform.cpp @@ -1312,6 +1312,14 @@ Array WindowsPlatform::GetStackFrames(int32 skipCount, stack.AddrBStore.Mode = AddrModeFlat; stack.AddrStack.Offset = ctx.IntSp; stack.AddrStack.Mode = AddrModeFlat; +#elif _M_ARM64 + imageType = IMAGE_FILE_MACHINE_ARM64; + stack.AddrPC.Offset = ctx.Pc; + stack.AddrPC.Mode = AddrModeFlat; + stack.AddrFrame.Offset = ctx.Fp; + stack.AddrFrame.Mode = AddrModeFlat; + stack.AddrStack.Offset = ctx.Sp; + stack.AddrStack.Mode = AddrModeFlat; #else #error "Platform not supported!" #endif diff --git a/Source/Engine/Scripting/Runtime/DotNet.cpp b/Source/Engine/Scripting/Runtime/DotNet.cpp index a3ec1321f..25213ec46 100644 --- a/Source/Engine/Scripting/Runtime/DotNet.cpp +++ b/Source/Engine/Scripting/Runtime/DotNet.cpp @@ -1777,13 +1777,18 @@ bool InitHostfxr() { case PlatformType::Windows: case PlatformType::UWP: - platformStr = PLATFORM_64BITS ? "Windows x64" : "Windows x86"; + if (PLATFORM_ARCH == ArchitectureType::x64) + platformStr = "Windows x64"; + else if (PLATFORM_ARCH == ArchitectureType::ARM64) + platformStr = "Windows ARM64"; + else + platformStr = "Windows x86"; break; case PlatformType::Linux: - platformStr = PLATFORM_ARCH_ARM64 ? "Linux Arm64" : PLATFORM_ARCH_ARM ? "Linux Arm32" : PLATFORM_64BITS ? "Linux x64" : "Linux x86"; + platformStr = PLATFORM_ARCH_ARM64 ? "Linux ARM64" : PLATFORM_ARCH_ARM ? "Linux Arm32" : PLATFORM_64BITS ? "Linux x64" : "Linux x86"; break; case PlatformType::Mac: - platformStr = PLATFORM_ARCH_ARM || PLATFORM_ARCH_ARM64 ? "macOS Arm64" : PLATFORM_64BITS ? "macOS x64" : "macOS x86"; + platformStr = PLATFORM_ARCH_ARM || PLATFORM_ARCH_ARM64 ? "macOS ARM64" : PLATFORM_64BITS ? "macOS x64" : "macOS x86"; break; default:; platformStr = ""; diff --git a/Source/Engine/Scripting/ScriptingType.h b/Source/Engine/Scripting/ScriptingType.h index 17a844097..01335b6cb 100644 --- a/Source/Engine/Scripting/ScriptingType.h +++ b/Source/Engine/Scripting/ScriptingType.h @@ -493,6 +493,66 @@ FORCE_INLINE int32 GetVTableIndex(void** vtable, int32 entriesCount, void* func) if (op == 0x20) return 0; return *(byte*)funcJmp / sizeof(void*); +#elif defined(_MSC_VER) && PLATFORM_ARCH_ARM64 + // For MSVC ARM64, the following thunk takes a relative jump from the function pointer to the next thunk: + // adrp xip0, offset_high + // add xip0, xip0, offset_low + // br xip0 + // The last thunk contains the offset to the vtable: + // ldr xip0, [x0] + // ldr xip0, [xip0, XXX] + uint32_t* op = (uint32_t*)func; + + uint32_t def = *op; + if ((*op & 0x9F000000) == 0x90000000) + { + // adrp + uint32_t imm20 = (((*op & 0x60000000) >> 29) + ((*op & 0xFFFFE0) >> 3)) << 12; + op++; + + // add + def = *op; + uint32_t imm12 = (*op & 0x3FFC00) >> 10; + imm12 = (*op & 0x400000) != 0 ? (imm12 << 12) : imm12; + + // br + op = (uint32_t*)(((uintptr)func & ((uintptr)-1 << 12)) + imm20 + imm12) + 1; + + // ldr + offset + def = *op; + uint32_t offset = ((*op & 0x3FFC00) >> 10) * ((*op & 0x40000000) != 0 ? 8 : 4); + return offset / sizeof(void*); + } + else if ((*op & 0xBFC00000) == 0xB9400000) + { + // ldr + offset + uint32_t offset = ((*op & 0x3FFC00) >> 10) * ((*op & 0x40000000) != 0 ? 8 : 4); + op++; + + // ldr + offset + def = *op; + if ((*op & 0xBFE00C00) == 0xB8400400) + { + // offset is stored in the register as is + uint32_t postindex = (*op & 0x1FF000) >> 12; + offset = postindex; + return offset / sizeof(void*); + } + else if ((*op & 0xBFE00C00) == 0xB8400C00) + { + // offset is added to the value in base register... updated to the same register + uint32_t preindex = (*op & 0x1FF000) >> 12; + offset += preindex; + return offset / sizeof(void*); + } + else if ((*op & 0xBFC00000) == 0xB9400000) + { + // 20-bit offset + offset = ((*op & 0x3FFC00) >> 10) * ((*op & 0x40000000) != 0 ? 8 : 4); + return offset / sizeof(void*); + } + CRASH; + } #elif defined(__clang__) // On Clang member function pointer represents the offset from the vtable begin. return (int32)(intptr)func / sizeof(void*); diff --git a/Source/FlaxEditor.Build.cs b/Source/FlaxEditor.Build.cs index b6074e264..e77fcaade 100644 --- a/Source/FlaxEditor.Build.cs +++ b/Source/FlaxEditor.Build.cs @@ -56,6 +56,9 @@ public class FlaxEditor : EngineTarget case TargetArchitecture.x86: options.OutputFolder = Path.Combine(options.WorkingDirectory, "Binaries", "Editor", "Win32", options.Configuration.ToString()); break; + case TargetArchitecture.ARM64: + options.OutputFolder = Path.Combine(options.WorkingDirectory, "Binaries", "Editor", "ARM64", options.Configuration.ToString()); + break; default: throw new InvalidArchitectureException(options.Architecture, "Not supported Editor architecture."); } break; diff --git a/Source/Tools/Flax.Build/Build/Platform.cs b/Source/Tools/Flax.Build/Build/Platform.cs index 5fe01a93c..526dd2c54 100644 --- a/Source/Tools/Flax.Build/Build/Platform.cs +++ b/Source/Tools/Flax.Build/Build/Platform.cs @@ -286,11 +286,24 @@ namespace Flax.Build var subdir = "Binaries/Editor/"; switch (Platform.BuildTargetPlatform) { - case TargetPlatform.Windows: return subdir + "Win64"; + case TargetPlatform.Windows: + { + switch (Platform.BuildTargetArchitecture) + { + case TargetArchitecture.x64: + return subdir + "Win64"; + case TargetArchitecture.x86: + return subdir + "Win32"; + case TargetArchitecture.ARM64: + return subdir + "ARM64"; + default: + throw new NotImplementedException($"{Platform.BuildTargetPlatform}: {Platform.BuildTargetArchitecture}"); + } + } case TargetPlatform.Linux: return subdir + "Linux"; case TargetPlatform.Mac: return subdir + "Mac"; } - throw new NotImplementedException(); + throw new NotImplementedException(Platform.BuildTargetPlatform.ToString()); } /// @@ -306,7 +319,7 @@ namespace Flax.Build switch (targetPlatform) { - case TargetPlatform.Windows: return targetArchitecture == TargetArchitecture.x64 || targetArchitecture == TargetArchitecture.x86; + case TargetPlatform.Windows: return targetArchitecture == TargetArchitecture.x64 || targetArchitecture == TargetArchitecture.x86 || targetArchitecture == TargetArchitecture.ARM64; case TargetPlatform.XboxScarlett: return targetArchitecture == TargetArchitecture.x64; case TargetPlatform.XboxOne: return targetArchitecture == TargetArchitecture.x64; case TargetPlatform.UWP: return targetArchitecture == TargetArchitecture.x64; diff --git a/Source/Tools/Flax.Build/Platforms/Windows/WindowsPlatform.cs b/Source/Tools/Flax.Build/Platforms/Windows/WindowsPlatform.cs index 9d9a6913b..8fd361700 100644 --- a/Source/Tools/Flax.Build/Platforms/Windows/WindowsPlatform.cs +++ b/Source/Tools/Flax.Build/Platforms/Windows/WindowsPlatform.cs @@ -82,7 +82,7 @@ namespace Flax.Build.Platforms var outputType = project.OutputType ?? configuration.Target.OutputType; if (outputType != TargetOutputType.Executable && configuration.Name.StartsWith("Editor.")) { - var editorFolder = configuration.Architecture == TargetArchitecture.x64 ? "Win64" : "Win32"; + var editorFolder = configuration.Architecture == TargetArchitecture.x64 ? "Win64" : (configuration.Architecture == TargetArchitecture.ARM64 ? "ARM64" : "Win32"); vcUserFileContent.AppendLine(string.Format(" ", configuration.Name)); vcUserFileContent.AppendLine(string.Format(" {0}\\FlaxEditor.exe", Path.Combine(Globals.EngineRoot, "Binaries", "Editor", editorFolder, configuration.ConfigurationName))); vcUserFileContent.AppendLine(" -project \"$(SolutionDir)\" -skipCompile"); @@ -108,6 +108,9 @@ namespace Flax.Build.Platforms case TargetArchitecture.x64: name = "Win64"; break; + case TargetArchitecture.ARM64: + name = "ARM64"; + break; } } } diff --git a/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchain.cs b/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchain.cs index 121a7b367..3f74761c3 100644 --- a/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchain.cs +++ b/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchain.cs @@ -74,6 +74,12 @@ namespace Flax.Build.Platforms options.LinkEnv.InputLibraries.Add("ole32.lib"); options.LinkEnv.InputLibraries.Add("oleaut32.lib"); options.LinkEnv.InputLibraries.Add("delayimp.lib"); + + if (options.Architecture == TargetArchitecture.ARM64) + { + options.CompileEnv.PreprocessorDefinitions.Add("USE_SOFT_INTRINSICS"); + options.LinkEnv.InputLibraries.Add("softintrin.lib"); + } } /// diff --git a/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchainBase.cs b/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchainBase.cs index 252d9134d..8d5afbeae 100644 --- a/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchainBase.cs +++ b/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchainBase.cs @@ -405,7 +405,7 @@ namespace Flax.Build.Platforms options.CompileEnv.PreprocessorDefinitions.Add("_CRT_SECURE_NO_DEPRECATE"); options.CompileEnv.PreprocessorDefinitions.Add("_CRT_SECURE_NO_WARNINGS"); options.CompileEnv.PreprocessorDefinitions.Add("_WINDOWS"); - if (Architecture == TargetArchitecture.x64) + if (Architecture == TargetArchitecture.x64 || Architecture == TargetArchitecture.ARM64) options.CompileEnv.PreprocessorDefinitions.Add("WIN64"); } @@ -790,9 +790,11 @@ namespace Flax.Build.Platforms args.Add("/MACHINE:x64"); break; case TargetArchitecture.ARM: - case TargetArchitecture.ARM64: args.Add("/MACHINE:ARM"); break; + case TargetArchitecture.ARM64: + args.Add("/MACHINE:ARM64"); + break; default: throw new InvalidArchitectureException(Architecture); } diff --git a/Source/Tools/Flax.Build/Projects/VisualStudio/VCProjectGenerator.cs b/Source/Tools/Flax.Build/Projects/VisualStudio/VCProjectGenerator.cs index 13c728b30..fabae2b86 100644 --- a/Source/Tools/Flax.Build/Projects/VisualStudio/VCProjectGenerator.cs +++ b/Source/Tools/Flax.Build/Projects/VisualStudio/VCProjectGenerator.cs @@ -199,6 +199,26 @@ namespace Flax.Build.Projects.VisualStudio if (includePaths.Count != 0) vcProjectFileContent.AppendLine(string.Format(" $(NMakeIncludeSearchPath);{0}", string.Join(";", includePaths))); + var additionalOptions = new List(); + additionalOptions.Add("$(AdditionalOptions)"); + switch (configuration.TargetBuildOptions.CompileEnv.CppVersion) + { + case CppVersion.Cpp14: + additionalOptions.Add("/std:c++14"); + break; + case CppVersion.Cpp17: + additionalOptions.Add("/std:c++17"); + break; + case CppVersion.Cpp20: + additionalOptions.Add("/std:c++20"); + break; + case CppVersion.Latest: + additionalOptions.Add("/std:c++latest"); + break; + } + + vcProjectFileContent.AppendLine(string.Format(" {0}", string.Join(" ", additionalOptions))); + vcProjectFileContent.AppendLine(" "); } @@ -326,34 +346,18 @@ namespace Flax.Build.Projects.VisualStudio vcProjectFileContent.AppendLine(" "); vcFiltersFileContent.AppendLine(" "); - // IntelliSense information - - var additionalOptions = new List(); - switch (project.Configurations[0].TargetBuildOptions.CompileEnv.CppVersion) { - case CppVersion.Cpp14: - additionalOptions.Add("/std:c++14"); - break; - case CppVersion.Cpp17: - additionalOptions.Add("/std:c++17"); - break; - case CppVersion.Cpp20: - additionalOptions.Add("/std:c++20"); - break; - case CppVersion.Latest: - additionalOptions.Add("/std:c++latest"); - break; + // IntelliSense information + vcProjectFileContent.AppendLine(" "); + vcProjectFileContent.AppendLine(string.Format(" $(NMakePreprocessorDefinitions){0}", (project.Defines.Count > 0 ? (";" + string.Join(";", project.Defines)) : ""))); + vcProjectFileContent.AppendLine(string.Format(" $(NMakeIncludeSearchPath){0}", (project.SearchPaths.Length > 0 ? (";" + string.Join(";", project.SearchPaths)) : ""))); + vcProjectFileContent.AppendLine(" $(NMakeForcedIncludes)"); + vcProjectFileContent.AppendLine(" $(NMakeAssemblySearchPath)"); + vcProjectFileContent.AppendLine(" $(NMakeForcedUsingAssemblies)"); + vcProjectFileContent.AppendLine(" $(AdditionalOptions)"); + vcProjectFileContent.AppendLine(" "); } - vcProjectFileContent.AppendLine(" "); - vcProjectFileContent.AppendLine(string.Format(" $(NMakePreprocessorDefinitions){0}", (project.Defines.Count > 0 ? (";" + string.Join(";", project.Defines)) : ""))); - vcProjectFileContent.AppendLine(string.Format(" $(NMakeIncludeSearchPath){0}", (project.SearchPaths.Length > 0 ? (";" + string.Join(";", project.SearchPaths)) : ""))); - vcProjectFileContent.AppendLine(" $(NMakeForcedIncludes)"); - vcProjectFileContent.AppendLine(" $(NMakeAssemblySearchPath)"); - vcProjectFileContent.AppendLine(" $(NMakeForcedUsingAssemblies)"); - vcProjectFileContent.AppendLine(string.Format(" {0}", string.Join(" ", additionalOptions))); - vcProjectFileContent.AppendLine(" "); - foreach (var platform in platforms) { if (platform is IVisualStudioProjectCustomizer customizer) From ac6154e94d4abf4a3e2f7afaf8a3894e8284857b Mon Sep 17 00:00:00 2001 From: Ari Vuollet Date: Thu, 9 May 2024 20:08:39 +0300 Subject: [PATCH 109/292] Support using native host MSVC binaries on ARM64 --- .../Flax.Build/Platforms/GDK/GDKToolchain.cs | 2 +- .../Platforms/Windows/WindowsPlatformBase.cs | 133 +++++++----------- .../Platforms/Windows/WindowsToolchainBase.cs | 34 ++--- 3 files changed, 68 insertions(+), 101 deletions(-) diff --git a/Source/Tools/Flax.Build/Platforms/GDK/GDKToolchain.cs b/Source/Tools/Flax.Build/Platforms/GDK/GDKToolchain.cs index 8e56950e7..c8e0dff11 100644 --- a/Source/Tools/Flax.Build/Platforms/GDK/GDKToolchain.cs +++ b/Source/Tools/Flax.Build/Platforms/GDK/GDKToolchain.cs @@ -47,7 +47,7 @@ namespace Flax.Build.Platforms options.LinkEnv.InputLibraries.Add($"Microsoft.Xbox.Services.{(int)xboxServicesToolset}.GDK.C.lib"); var toolsetPath = WindowsPlatformBase.GetToolsets()[Toolset]; - var toolsPath = WindowsPlatformBase.GetVCToolPath64(Toolset); + var toolsPath = WindowsPlatformBase.GetVCToolPath(Toolset, TargetArchitecture.x64, Architecture); if (options.CompileEnv.UseDebugCRT) throw new Exception("Don't use debug CRT on GDK."); var name = Path.GetFileName(toolsetPath); diff --git a/Source/Tools/Flax.Build/Platforms/Windows/WindowsPlatformBase.cs b/Source/Tools/Flax.Build/Platforms/Windows/WindowsPlatformBase.cs index f038b1e6e..d2b490704 100644 --- a/Source/Tools/Flax.Build/Platforms/Windows/WindowsPlatformBase.cs +++ b/Source/Tools/Flax.Build/Platforms/Windows/WindowsPlatformBase.cs @@ -409,101 +409,68 @@ namespace Flax.Build.Platforms } /// - /// Gets the path to the 32-bit tool binaries. + /// Gets the path to the VC++ tool binaries for specified host and target architectures. /// /// The version of the toolset to use. - /// The directory containing the 64-bit toolchain binaries. - public static string GetVCToolPath32(WindowsPlatformToolset toolset) + /// The host architecture for native binaries. + /// The target architecture to build for. + /// The directory containing the toolchain binaries. + public static string GetVCToolPath(WindowsPlatformToolset toolset, TargetArchitecture hostArchitecture, TargetArchitecture architecture) { + if (architecture == TargetArchitecture.AnyCPU) + architecture = hostArchitecture; var toolsets = GetToolsets(); var vcToolChainDir = toolsets[toolset]; switch (toolset) { - case WindowsPlatformToolset.v140: - { - string compilerPath = Path.Combine(vcToolChainDir, "bin", "cl.exe"); - if (File.Exists(compilerPath)) + case WindowsPlatformToolset.v140: { - return Path.GetDirectoryName(compilerPath); - } + if (hostArchitecture != TargetArchitecture.x86) + { + string nativeCompilerPath = Path.Combine(vcToolChainDir, "bin", "amd64", "cl.exe"); + if (File.Exists(nativeCompilerPath)) + { + return Path.GetDirectoryName(nativeCompilerPath); + } - throw new Exception(string.Format("No 32-bit compiler toolchain found in {0}", compilerPath)); - } - case WindowsPlatformToolset.v141: - case WindowsPlatformToolset.v142: - case WindowsPlatformToolset.v143: - case WindowsPlatformToolset.v144: - { - /* - string crossCompilerPath = Path.Combine(vcToolChainDir, "bin", "HostX64", "x86", "cl.exe"); - if (File.Exists(crossCompilerPath)) + string crossCompilerPath = Path.Combine(vcToolChainDir, "bin", "x86_amd64", "cl.exe"); + if (File.Exists(crossCompilerPath)) + { + return Path.GetDirectoryName(crossCompilerPath); + } + throw new Exception(string.Format("No {0} host compiler toolchain found in {1} or {2}", hostArchitecture.ToString(), nativeCompilerPath, crossCompilerPath)); + } + else + { + string compilerPath = Path.Combine(vcToolChainDir, "bin", "cl.exe"); + if (File.Exists(compilerPath)) + { + return Path.GetDirectoryName(compilerPath); + } + throw new Exception(string.Format("No {0} host compiler toolchain found in {1}", hostArchitecture.ToString())); + } + } + case WindowsPlatformToolset.v141: + case WindowsPlatformToolset.v142: + case WindowsPlatformToolset.v143: + case WindowsPlatformToolset.v144: { - return Path.GetDirectoryName(crossCompilerPath); + string hostFolder = hostArchitecture == TargetArchitecture.x86 ? "HostX86" : $"Host{hostArchitecture.ToString().ToLower()}"; + string nativeCompilerPath = Path.Combine(vcToolChainDir, "bin", hostFolder, architecture.ToString().ToLower(), "cl.exe"); + if (File.Exists(nativeCompilerPath)) + { + return Path.GetDirectoryName(nativeCompilerPath); + } + + string crossCompilerPath = Path.Combine(vcToolChainDir, "bin", hostFolder, architecture.ToString().ToLower(), "cl.exe"); + if (File.Exists(crossCompilerPath)) + { + return Path.GetDirectoryName(crossCompilerPath); + } + throw new Exception(string.Format("No {0} host compiler toolchain found in {1} or {2}", hostArchitecture.ToString(), nativeCompilerPath, crossCompilerPath)); } - */ - - string nativeCompilerPath = Path.Combine(vcToolChainDir, "bin", "HostX86", "x86", "cl.exe"); - if (File.Exists(nativeCompilerPath)) - { - return Path.GetDirectoryName(nativeCompilerPath); - } - - //throw new Exception(string.Format("No 32-bit compiler toolchain found in {0} or {1}", crossCompilerPath, nativeCompilerPath)); - throw new Exception(string.Format("No 32-bit compiler toolchain found in {0}", nativeCompilerPath)); - } - default: throw new ArgumentOutOfRangeException(nameof(toolset), toolset, null); - } - } - - /// - /// Gets the path to the 64-bit tool binaries. - /// - /// The version of the toolset to use. - /// The directory containing the 64-bit toolchain binaries. - public static string GetVCToolPath64(WindowsPlatformToolset toolset) - { - var toolsets = GetToolsets(); - var vcToolChainDir = toolsets[toolset]; - - switch (toolset) - { - case WindowsPlatformToolset.v140: - { - string nativeCompilerPath = Path.Combine(vcToolChainDir, "bin", "amd64", "cl.exe"); - if (File.Exists(nativeCompilerPath)) - { - return Path.GetDirectoryName(nativeCompilerPath); - } - - string crossCompilerPath = Path.Combine(vcToolChainDir, "bin", "x86_amd64", "cl.exe"); - if (File.Exists(crossCompilerPath)) - { - return Path.GetDirectoryName(crossCompilerPath); - } - - throw new Exception(string.Format("No 64-bit compiler toolchain found in {0} or {1}", nativeCompilerPath, crossCompilerPath)); - } - case WindowsPlatformToolset.v141: - case WindowsPlatformToolset.v142: - case WindowsPlatformToolset.v143: - case WindowsPlatformToolset.v144: - { - string nativeCompilerPath = Path.Combine(vcToolChainDir, "bin", "HostX64", "x64", "cl.exe"); - if (File.Exists(nativeCompilerPath)) - { - return Path.GetDirectoryName(nativeCompilerPath); - } - - string crossCompilerPath = Path.Combine(vcToolChainDir, "bin", "HostX86", "x64", "cl.exe"); - if (File.Exists(crossCompilerPath)) - { - return Path.GetDirectoryName(crossCompilerPath); - } - - throw new Exception(string.Format("No 64-bit compiler toolchain found in {0} or {1}", nativeCompilerPath, crossCompilerPath)); - } - default: throw new ArgumentOutOfRangeException(nameof(toolset), toolset, null); + default: throw new ArgumentOutOfRangeException(nameof(toolset), toolset, null); } } diff --git a/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchainBase.cs b/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchainBase.cs index 8d5afbeae..6df1c38eb 100644 --- a/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchainBase.cs +++ b/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchainBase.cs @@ -4,6 +4,7 @@ using System; using System.Collections.Generic; using System.IO; using System.Linq; +using System.Runtime.InteropServices; using System.Text; using System.Xml; using Flax.Build.Graph; @@ -133,16 +134,12 @@ namespace Flax.Build.Platforms throw new Exception(string.Format("Missing SDK {0} for platform Windows", SDK)); // Get the tools paths - string vcToolPath; - if (Architecture == TargetArchitecture.x64) - vcToolPath = WindowsPlatformBase.GetVCToolPath64(Toolset); - else - vcToolPath = WindowsPlatformBase.GetVCToolPath32(Toolset); - _vcToolPath = vcToolPath; - _compilerPath = Path.Combine(vcToolPath, "cl.exe"); - _linkerPath = Path.Combine(vcToolPath, "link.exe"); - _libToolPath = Path.Combine(vcToolPath, "lib.exe"); - _xdcmakePath = Path.Combine(vcToolPath, "xdcmake.exe"); + var hostArchitecture = Platform.BuildTargetArchitecture; + _vcToolPath = WindowsPlatformBase.GetVCToolPath(Toolset, hostArchitecture, Architecture); + _compilerPath = Path.Combine(_vcToolPath, "cl.exe"); + _linkerPath = Path.Combine(_vcToolPath, "link.exe"); + _libToolPath = Path.Combine(_vcToolPath, "lib.exe"); + _xdcmakePath = Path.Combine(_vcToolPath, "xdcmake.exe"); // Add Visual C++ toolset include and library paths var vcToolChainDir = toolsets[Toolset]; @@ -166,7 +163,7 @@ namespace Flax.Build.Platforms case TargetArchitecture.x64: SystemLibraryPaths.Add(Path.Combine(vcToolChainDir, "lib", "amd64")); break; - default: throw new InvalidArchitectureException(architecture); + default: throw new InvalidArchitectureException(Architecture); } // When using Visual Studio 2015 toolset and using pre-Windows 10 SDK, find a Windows 10 SDK and add the UCRT include paths @@ -198,7 +195,7 @@ namespace Flax.Build.Platforms case TargetArchitecture.x64: SystemLibraryPaths.Add(Path.Combine(libraryRootDir, "ucrt", "x64")); break; - default: throw new InvalidArchitectureException(architecture); + default: throw new InvalidArchitectureException(Architecture); } } break; @@ -223,7 +220,7 @@ namespace Flax.Build.Platforms case TargetArchitecture.x64: SystemLibraryPaths.Add(Path.Combine(vcToolChainDir, "lib", "x64")); break; - default: throw new InvalidArchitectureException(architecture); + default: throw new InvalidArchitectureException(Architecture); } break; } @@ -274,7 +271,7 @@ namespace Flax.Build.Platforms _makepriPath = Path.Combine(binRootDir, "makepri.exe"); break; } - default: throw new InvalidArchitectureException(architecture); + default: throw new InvalidArchitectureException(Architecture); } break; } @@ -312,13 +309,16 @@ namespace Flax.Build.Platforms { SystemLibraryPaths.Add(Path.Combine(libraryRootDir, "ucrt", "arm64")); SystemLibraryPaths.Add(Path.Combine(libraryRootDir, "um", "arm64")); + var binRootDir = Path.Combine(windowsSdkDir, "bin", sdkVersionName, hostArchitecture.ToString().ToLower()); + _resourceCompilerPath = Path.Combine(binRootDir, "rc.exe"); + _makepriPath = Path.Combine(binRootDir, "makepri.exe"); break; } case TargetArchitecture.x86: { SystemLibraryPaths.Add(Path.Combine(libraryRootDir, "ucrt", "x86")); SystemLibraryPaths.Add(Path.Combine(libraryRootDir, "um", "x86")); - var binRootDir = Path.Combine(windowsSdkDir, "bin", sdkVersionName, "x86"); + var binRootDir = Path.Combine(windowsSdkDir, "bin", sdkVersionName, hostArchitecture.ToString().ToLower()); _resourceCompilerPath = Path.Combine(binRootDir, "rc.exe"); _makepriPath = Path.Combine(binRootDir, "makepri.exe"); break; @@ -327,12 +327,12 @@ namespace Flax.Build.Platforms { SystemLibraryPaths.Add(Path.Combine(libraryRootDir, "ucrt", "x64")); SystemLibraryPaths.Add(Path.Combine(libraryRootDir, "um", "x64")); - var binRootDir = Path.Combine(windowsSdkDir, "bin", sdkVersionName, "x64"); + var binRootDir = Path.Combine(windowsSdkDir, "bin", sdkVersionName, hostArchitecture.ToString().ToLower()); _resourceCompilerPath = Path.Combine(binRootDir, "rc.exe"); _makepriPath = Path.Combine(binRootDir, "makepri.exe"); break; } - default: throw new InvalidArchitectureException(architecture); + default: throw new InvalidArchitectureException(Architecture); } break; } From 2f48521ce7527cdd3bb06b0dc65ce5ebf2a57cc6 Mon Sep 17 00:00:00 2001 From: Ari Vuollet Date: Thu, 9 May 2024 20:09:27 +0300 Subject: [PATCH 110/292] Fallback to D3D11 devices without debug layers when unavailable --- .../DirectX/DX11/GPUDeviceDX11.cpp | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp index 2d90b50ae..59f42f57e 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp @@ -71,7 +71,28 @@ static bool TryCreateDevice(IDXGIAdapter* adapter, D3D_FEATURE_LEVEL maxFeatureL context->Release(); return true; } - +#if GPU_ENABLE_DIAGNOSTICS + deviceFlags &= ~D3D11_CREATE_DEVICE_DEBUG; + if (SUCCEEDED(D3D11CreateDevice( + adapter, + D3D_DRIVER_TYPE_UNKNOWN, + NULL, + deviceFlags, + &featureLevels[levelIndex], + ARRAY_COUNT(featureLevels) - levelIndex, + D3D11_SDK_VERSION, + &device, + featureLevel, + &context + ))) + { + LOG(Warning, "Direct3D SDK debug layers were requested, but not available."); + device->Release(); + context->Release(); + return true; + } +#endif + return false; } From 766910c0ae2ff41a79c9be1c86e3773f97441093 Mon Sep 17 00:00:00 2001 From: Ari Vuollet Date: Thu, 9 May 2024 20:14:22 +0300 Subject: [PATCH 111/292] Fix rebuilding dependencies using Git with existing local folders --- Source/Tools/Flax.Build/Deps/Dependency.cs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Source/Tools/Flax.Build/Deps/Dependency.cs b/Source/Tools/Flax.Build/Deps/Dependency.cs index c99dd0ee4..8d9d54c74 100644 --- a/Source/Tools/Flax.Build/Deps/Dependency.cs +++ b/Source/Tools/Flax.Build/Deps/Dependency.cs @@ -128,7 +128,7 @@ namespace Flax.Deps /// True if initialize submodules of the repository (recursive). public static void CloneGitRepo(string path, string url, string commit = null, string args = null, bool submodules = false) { - if (!Directory.Exists(Path.Combine(path, Path.GetFileNameWithoutExtension(url), ".git"))) + if (!Directory.Exists(Path.Combine(path, ".git"))) { string cmdLine = string.Format("clone \"{0}\" \"{1}\"", url, path); if (args != null) @@ -136,12 +136,12 @@ namespace Flax.Deps if (submodules) cmdLine += " --recurse-submodules"; - Utilities.Run("git", cmdLine, null, null, Utilities.RunOptions.DefaultTool); + Utilities.Run("git", cmdLine, null, path, Utilities.RunOptions.DefaultTool); if (submodules) - Utilities.Run("git", "submodule update --init --recursive", null, null, Utilities.RunOptions.DefaultTool); + Utilities.Run("git", "submodule update --init --recursive", null, path, Utilities.RunOptions.DefaultTool); } if (commit != null) - Utilities.Run("git", string.Format("reset --hard {0}", commit), null, null, Utilities.RunOptions.DefaultTool); + Utilities.Run("git", string.Format("reset --hard {0}", commit), null, path, Utilities.RunOptions.DefaultTool); } /// @@ -153,7 +153,7 @@ namespace Flax.Deps /// True if initialize submodules of the repository (recursive). public static void CloneGitRepoFast(string path, string url, string args = null, bool submodules = false) { - if (!Directory.Exists(Path.Combine(path, Path.GetFileNameWithoutExtension(url), ".git"))) + if (!Directory.Exists(Path.Combine(path, ".git"))) { string cmdLine = string.Format("clone \"{0}\" \"{1}\" --depth 1", url, path); if (args != null) @@ -161,9 +161,9 @@ namespace Flax.Deps if (submodules) cmdLine += " --recurse-submodules"; - Utilities.Run("git", cmdLine, null, null, Utilities.RunOptions.DefaultTool); + Utilities.Run("git", cmdLine, null, path, Utilities.RunOptions.DefaultTool); if (submodules) - Utilities.Run("git", "submodule update --init --recursive", null, null, Utilities.RunOptions.DefaultTool); + Utilities.Run("git", "submodule update --init --recursive", null, path, Utilities.RunOptions.DefaultTool); } } @@ -188,9 +188,9 @@ namespace Flax.Deps if (submodules) cmdLine += " --recurse-submodules"; - Utilities.Run("git", cmdLine, null, null, Utilities.RunOptions.DefaultTool); + Utilities.Run("git", cmdLine, null, path, Utilities.RunOptions.DefaultTool); if (submodules) - Utilities.Run("git", "submodule update --init --recursive", null, null, Utilities.RunOptions.DefaultTool); + Utilities.Run("git", "submodule update --init --recursive", null, path, Utilities.RunOptions.DefaultTool); } if (commit != null) @@ -217,7 +217,7 @@ namespace Flax.Deps Utilities.Run("git", cmdLine, null, path, Utilities.RunOptions.DefaultTool); if (submodules) - Utilities.Run("git", "submodule update --init --recursive", null, null, Utilities.RunOptions.DefaultTool); + Utilities.Run("git", "submodule update --init --recursive", null, path, Utilities.RunOptions.DefaultTool); if (commit != null) { From 0862362ebde67a5365ffa5a1970508b0e4a1c139 Mon Sep 17 00:00:00 2001 From: Ari Vuollet Date: Thu, 9 May 2024 20:19:35 +0300 Subject: [PATCH 112/292] Update Freetype to 2.13.2 --- Source/ThirdParty/freetype/FTL.TXT | 169 ++ Source/ThirdParty/freetype/LICENSE.TXT | 57 +- Source/ThirdParty/freetype/config/ftconfig.h | 532 +----- Source/ThirdParty/freetype/config/ftheader.h | 76 +- Source/ThirdParty/freetype/config/ftmodule.h | 9 +- Source/ThirdParty/freetype/config/ftoption.h | 184 +- Source/ThirdParty/freetype/config/ftstdlib.h | 28 +- .../freetype/config/integer-types.h | 250 +++ .../ThirdParty/freetype/config/mac-support.h | 49 + .../freetype/config/public-macros.h | 138 ++ Source/ThirdParty/freetype/freetype.h | 1573 +++++++++++------ Source/ThirdParty/freetype/ft2build.h | 10 +- Source/ThirdParty/freetype/ftadvanc.h | 6 +- Source/ThirdParty/freetype/ftbbox.h | 5 +- Source/ThirdParty/freetype/ftbdf.h | 5 +- Source/ThirdParty/freetype/ftbitmap.h | 7 +- Source/ThirdParty/freetype/ftbzip2.h | 24 +- Source/ThirdParty/freetype/ftcache.h | 81 +- Source/ThirdParty/freetype/ftchapters.h | 27 +- Source/ThirdParty/freetype/ftcid.h | 5 +- Source/ThirdParty/freetype/ftcolor.h | 1374 +++++++++++++- Source/ThirdParty/freetype/ftdriver.h | 166 +- Source/ThirdParty/freetype/fterrdef.h | 6 +- Source/ThirdParty/freetype/fterrors.h | 31 +- Source/ThirdParty/freetype/ftfntfmt.h | 5 +- Source/ThirdParty/freetype/ftgasp.h | 5 +- Source/ThirdParty/freetype/ftglyph.h | 131 +- Source/ThirdParty/freetype/ftgxval.h | 5 +- Source/ThirdParty/freetype/ftgzip.h | 24 +- Source/ThirdParty/freetype/ftimage.h | 180 +- Source/ThirdParty/freetype/ftincrem.h | 22 +- Source/ThirdParty/freetype/ftlcdfil.h | 59 +- Source/ThirdParty/freetype/ftlist.h | 5 +- Source/ThirdParty/freetype/ftlogging.h | 184 ++ Source/ThirdParty/freetype/ftlzw.h | 24 +- Source/ThirdParty/freetype/ftmac.h | 3 +- Source/ThirdParty/freetype/ftmm.h | 74 +- Source/ThirdParty/freetype/ftmodapi.h | 42 +- Source/ThirdParty/freetype/ftmoderr.h | 5 +- Source/ThirdParty/freetype/ftotval.h | 5 +- Source/ThirdParty/freetype/ftoutln.h | 35 +- Source/ThirdParty/freetype/ftparams.h | 20 +- Source/ThirdParty/freetype/ftpfr.h | 7 +- Source/ThirdParty/freetype/ftrender.h | 9 +- Source/ThirdParty/freetype/ftsizes.h | 5 +- Source/ThirdParty/freetype/ftsnames.h | 7 +- Source/ThirdParty/freetype/ftstroke.h | 43 +- Source/ThirdParty/freetype/ftsynth.h | 28 +- Source/ThirdParty/freetype/ftsystem.h | 13 +- Source/ThirdParty/freetype/fttrigon.h | 4 +- Source/ThirdParty/freetype/fttypes.h | 28 +- Source/ThirdParty/freetype/ftwinfnt.h | 9 +- .../ThirdParty/freetype/internal/autohint.h | 8 +- .../ThirdParty/freetype/internal/cffotypes.h | 13 +- .../ThirdParty/freetype/internal/cfftypes.h | 17 +- .../freetype/internal/compiler-macros.h | 343 ++++ Source/ThirdParty/freetype/internal/ftcalc.h | 105 +- Source/ThirdParty/freetype/internal/ftdebug.h | 179 +- Source/ThirdParty/freetype/internal/ftdrv.h | 7 +- .../ThirdParty/freetype/internal/ftgloadr.h | 8 +- Source/ThirdParty/freetype/internal/fthash.h | 3 +- .../ThirdParty/freetype/internal/ftmemory.h | 46 +- .../ThirdParty/freetype/internal/ftmmtypes.h | 91 + Source/ThirdParty/freetype/internal/ftobjs.h | 44 +- .../ThirdParty/freetype/internal/ftpsprop.h | 5 +- Source/ThirdParty/freetype/internal/ftrfork.h | 5 +- Source/ThirdParty/freetype/internal/ftserv.h | 30 +- .../ThirdParty/freetype/internal/ftstream.h | 151 +- Source/ThirdParty/freetype/internal/fttrace.h | 30 +- Source/ThirdParty/freetype/internal/ftvalid.h | 5 +- .../ThirdParty/freetype/internal/internal.h | 66 - Source/ThirdParty/freetype/internal/psaux.h | 29 +- Source/ThirdParty/freetype/internal/pshints.h | 11 +- .../freetype/internal/services/svbdf.h | 6 +- .../freetype/internal/services/svcfftl.h | 6 +- .../freetype/internal/services/svcid.h | 4 +- .../freetype/internal/services/svfntfmt.h | 4 +- .../freetype/internal/services/svgldict.h | 8 +- .../freetype/internal/services/svgxval.h | 6 +- .../freetype/internal/services/svkern.h | 6 +- .../freetype/internal/services/svmetric.h | 14 +- .../freetype/internal/services/svmm.h | 160 +- .../freetype/internal/services/svotval.h | 6 +- .../freetype/internal/services/svpfr.h | 7 +- .../freetype/internal/services/svpostnm.h | 4 +- .../freetype/internal/services/svprop.h | 2 +- .../freetype/internal/services/svpscmap.h | 6 +- .../freetype/internal/services/svpsinfo.h | 6 +- .../freetype/internal/services/svsfnt.h | 6 +- .../freetype/internal/services/svttcmap.h | 6 +- .../freetype/internal/services/svtteng.h | 6 +- .../freetype/internal/services/svttglyf.h | 6 +- .../freetype/internal/services/svwinfnt.h | 6 +- Source/ThirdParty/freetype/internal/sfnt.h | 301 +++- .../freetype/internal/svginterface.h | 46 + Source/ThirdParty/freetype/internal/t1types.h | 47 +- Source/ThirdParty/freetype/internal/tttypes.h | 249 +-- .../ThirdParty/freetype/internal/wofftypes.h | 312 ++++ Source/ThirdParty/freetype/otsvg.h | 336 ++++ Source/ThirdParty/freetype/t1tables.h | 179 +- Source/ThirdParty/freetype/ttnameid.h | 5 +- Source/ThirdParty/freetype/tttables.h | 9 +- Source/ThirdParty/freetype/tttags.h | 7 +- .../Flax.Build/Deps/Dependencies/freetype.cs | 54 +- 104 files changed, 6357 insertions(+), 2452 deletions(-) create mode 100644 Source/ThirdParty/freetype/FTL.TXT create mode 100644 Source/ThirdParty/freetype/config/integer-types.h create mode 100644 Source/ThirdParty/freetype/config/mac-support.h create mode 100644 Source/ThirdParty/freetype/config/public-macros.h create mode 100644 Source/ThirdParty/freetype/ftlogging.h create mode 100644 Source/ThirdParty/freetype/internal/compiler-macros.h create mode 100644 Source/ThirdParty/freetype/internal/ftmmtypes.h delete mode 100644 Source/ThirdParty/freetype/internal/internal.h create mode 100644 Source/ThirdParty/freetype/internal/svginterface.h create mode 100644 Source/ThirdParty/freetype/internal/wofftypes.h create mode 100644 Source/ThirdParty/freetype/otsvg.h diff --git a/Source/ThirdParty/freetype/FTL.TXT b/Source/ThirdParty/freetype/FTL.TXT new file mode 100644 index 000000000..c406d150f --- /dev/null +++ b/Source/ThirdParty/freetype/FTL.TXT @@ -0,0 +1,169 @@ + The FreeType Project LICENSE + ---------------------------- + + 2006-Jan-27 + + Copyright 1996-2002, 2006 by + David Turner, Robert Wilhelm, and Werner Lemberg + + + +Introduction +============ + + The FreeType Project is distributed in several archive packages; + some of them may contain, in addition to the FreeType font engine, + various tools and contributions which rely on, or relate to, the + FreeType Project. + + This license applies to all files found in such packages, and + which do not fall under their own explicit license. The license + affects thus the FreeType font engine, the test programs, + documentation and makefiles, at the very least. + + This license was inspired by the BSD, Artistic, and IJG + (Independent JPEG Group) licenses, which all encourage inclusion + and use of free software in commercial and freeware products + alike. As a consequence, its main points are that: + + o We don't promise that this software works. However, we will be + interested in any kind of bug reports. (`as is' distribution) + + o You can use this software for whatever you want, in parts or + full form, without having to pay us. (`royalty-free' usage) + + o You may not pretend that you wrote this software. If you use + it, or only parts of it, in a program, you must acknowledge + somewhere in your documentation that you have used the + FreeType code. (`credits') + + We specifically permit and encourage the inclusion of this + software, with or without modifications, in commercial products. + We disclaim all warranties covering The FreeType Project and + assume no liability related to The FreeType Project. + + + Finally, many people asked us for a preferred form for a + credit/disclaimer to use in compliance with this license. We thus + encourage you to use the following text: + + """ + Portions of this software are copyright © The FreeType + Project (www.freetype.org). All rights reserved. + """ + + Please replace with the value from the FreeType version you + actually use. + + +Legal Terms +=========== + +0. Definitions +-------------- + + Throughout this license, the terms `package', `FreeType Project', + and `FreeType archive' refer to the set of files originally + distributed by the authors (David Turner, Robert Wilhelm, and + Werner Lemberg) as the `FreeType Project', be they named as alpha, + beta or final release. + + `You' refers to the licensee, or person using the project, where + `using' is a generic term including compiling the project's source + code as well as linking it to form a `program' or `executable'. + This program is referred to as `a program using the FreeType + engine'. + + This license applies to all files distributed in the original + FreeType Project, including all source code, binaries and + documentation, unless otherwise stated in the file in its + original, unmodified form as distributed in the original archive. + If you are unsure whether or not a particular file is covered by + this license, you must contact us to verify this. + + The FreeType Project is copyright (C) 1996-2000 by David Turner, + Robert Wilhelm, and Werner Lemberg. All rights reserved except as + specified below. + +1. No Warranty +-------------- + + THE FREETYPE PROJECT IS PROVIDED `AS IS' WITHOUT WARRANTY OF ANY + KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE. IN NO EVENT WILL ANY OF THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY DAMAGES CAUSED BY THE USE OR THE INABILITY TO + USE, OF THE FREETYPE PROJECT. + +2. Redistribution +----------------- + + This license grants a worldwide, royalty-free, perpetual and + irrevocable right and license to use, execute, perform, compile, + display, copy, create derivative works of, distribute and + sublicense the FreeType Project (in both source and object code + forms) and derivative works thereof for any purpose; and to + authorize others to exercise some or all of the rights granted + herein, subject to the following conditions: + + o Redistribution of source code must retain this license file + (`FTL.TXT') unaltered; any additions, deletions or changes to + the original files must be clearly indicated in accompanying + documentation. The copyright notices of the unaltered, + original files must be preserved in all copies of source + files. + + o Redistribution in binary form must provide a disclaimer that + states that the software is based in part of the work of the + FreeType Team, in the distribution documentation. We also + encourage you to put an URL to the FreeType web page in your + documentation, though this isn't mandatory. + + These conditions apply to any software derived from or based on + the FreeType Project, not just the unmodified files. If you use + our work, you must acknowledge us. However, no fee need be paid + to us. + +3. Advertising +-------------- + + Neither the FreeType authors and contributors nor you shall use + the name of the other for commercial, advertising, or promotional + purposes without specific prior written permission. + + We suggest, but do not require, that you use one or more of the + following phrases to refer to this software in your documentation + or advertising materials: `FreeType Project', `FreeType Engine', + `FreeType library', or `FreeType Distribution'. + + As you have not signed this license, you are not required to + accept it. However, as the FreeType Project is copyrighted + material, only this license, or another one contracted with the + authors, grants you the right to use, distribute, and modify it. + Therefore, by using, distributing, or modifying the FreeType + Project, you indicate that you understand and accept all the terms + of this license. + +4. Contacts +----------- + + There are two mailing lists related to FreeType: + + o freetype@nongnu.org + + Discusses general use and applications of FreeType, as well as + future and wanted additions to the library and distribution. + If you are looking for support, start in this list if you + haven't found anything to help you in the documentation. + + o freetype-devel@nongnu.org + + Discusses bugs, as well as engine internals, design issues, + specific licenses, porting, etc. + + Our home page can be found at + + https://www.freetype.org + + +--- end of FTL.TXT --- diff --git a/Source/ThirdParty/freetype/LICENSE.TXT b/Source/ThirdParty/freetype/LICENSE.TXT index af5a1c50f..8b9ce9e2e 100644 --- a/Source/ThirdParty/freetype/LICENSE.TXT +++ b/Source/ThirdParty/freetype/LICENSE.TXT @@ -1,39 +1,46 @@ +FREETYPE LICENSES +----------------- -The FreeType 2 font engine is copyrighted work and cannot be used -legally without a software license. In order to make this project -usable to a vast majority of developers, we distribute it under two +The FreeType 2 font engine is copyrighted work and cannot be used +legally without a software license. In order to make this project +usable to a vast majority of developers, we distribute it under two mutually exclusive open-source licenses. -This means that *you* must choose *one* of the two licenses described -below, then obey all its terms and conditions when using FreeType 2 in +This means that *you* must choose *one* of the two licenses described +below, then obey all its terms and conditions when using FreeType 2 in any of your projects or products. - - The FreeType License, found in the file `FTL.TXT', which is similar - to the original BSD license *with* an advertising clause that forces - you to explicitly cite the FreeType project in your product's - documentation. All details are in the license file. This license - is suited to products which don't use the GNU General Public - License. + - The FreeType License, found in the file `docs/FTL.TXT`, which is + similar to the original BSD license *with* an advertising clause + that forces you to explicitly cite the FreeType project in your + product's documentation. All details are in the license file. + This license is suited to products which don't use the GNU General + Public License. - Note that this license is compatible to the GNU General Public + Note that this license is compatible to the GNU General Public License version 3, but not version 2. - - The GNU General Public License version 2, found in `GPLv2.TXT' (any - later version can be used also), for programs which already use the - GPL. Note that the FTL is incompatible with GPLv2 due to its - advertisement clause. + - The GNU General Public License version 2, found in + `docs/GPLv2.TXT` (any later version can be used also), for + programs which already use the GPL. Note that the FTL is + incompatible with GPLv2 due to its advertisement clause. -The contributed BDF and PCF drivers come with a license similar to that -of the X Window System. It is compatible to the above two licenses (see -file src/bdf/README and src/pcf/README). The same holds for the files -`fthash.c' and `fthash.h'; their code was part of the BDF driver in -earlier FreeType versions. +The contributed BDF and PCF drivers come with a license similar to +that of the X Window System. It is compatible to the above two +licenses (see files `src/bdf/README` and `src/pcf/README`). The same +holds for the source code files `src/base/fthash.c` and +`include/freetype/internal/fthash.h`; they were part of the BDF driver +in earlier FreeType versions. -The gzip module uses the zlib license (see src/gzip/zlib.h) which too is -compatible to the above two licenses. +The gzip module uses the zlib license (see `src/gzip/zlib.h`) which +too is compatible to the above two licenses. -The MD5 checksum support (only used for debugging in development builds) -is in the public domain. +The files `src/autofit/ft-hb.c` and `src/autofit/ft-hb.h` contain code +taken almost verbatim from the HarfBuzz file `hb-ft.cc`, which uses +the 'Old MIT' license, compatible to the above two licenses. + +The MD5 checksum support (only used for debugging in development +builds) is in the public domain. --- end of LICENSE.TXT --- diff --git a/Source/ThirdParty/freetype/config/ftconfig.h b/Source/ThirdParty/freetype/config/ftconfig.h index 946660337..a85151699 100644 --- a/Source/ThirdParty/freetype/config/ftconfig.h +++ b/Source/ThirdParty/freetype/config/ftconfig.h @@ -4,7 +4,7 @@ * * ANSI-specific configuration file (specification only). * - * Copyright (C) 1996-2019 by + * Copyright (C) 1996-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -41,533 +41,9 @@ #include FT_CONFIG_OPTIONS_H #include FT_CONFIG_STANDARD_LIBRARY_H - -FT_BEGIN_HEADER - - - /************************************************************************** - * - * PLATFORM-SPECIFIC CONFIGURATION MACROS - * - * These macros can be toggled to suit a specific system. The current ones - * are defaults used to compile FreeType in an ANSI C environment (16bit - * compilers are also supported). Copy this file to your own - * `builds/` directory, and edit it to port the engine. - * - */ - - - /* There are systems (like the Texas Instruments 'C54x) where a `char` */ - /* has 16~bits. ANSI~C says that `sizeof(char)` is always~1. Since an */ - /* `int` has 16~bits also for this system, `sizeof(int)` gives~1 which */ - /* is probably unexpected. */ - /* */ - /* `CHAR_BIT` (defined in `limits.h`) gives the number of bits in a */ - /* `char` type. */ - -#ifndef FT_CHAR_BIT -#define FT_CHAR_BIT CHAR_BIT -#endif - - - /* The size of an `int` type. */ -#if FT_UINT_MAX == 0xFFFFUL -#define FT_SIZEOF_INT ( 16 / FT_CHAR_BIT ) -#elif FT_UINT_MAX == 0xFFFFFFFFUL -#define FT_SIZEOF_INT ( 32 / FT_CHAR_BIT ) -#elif FT_UINT_MAX > 0xFFFFFFFFUL && FT_UINT_MAX == 0xFFFFFFFFFFFFFFFFUL -#define FT_SIZEOF_INT ( 64 / FT_CHAR_BIT ) -#else -#error "Unsupported size of `int' type!" -#endif - - /* The size of a `long` type. A five-byte `long` (as used e.g. on the */ - /* DM642) is recognized but avoided. */ -#if FT_ULONG_MAX == 0xFFFFFFFFUL -#define FT_SIZEOF_LONG ( 32 / FT_CHAR_BIT ) -#elif FT_ULONG_MAX > 0xFFFFFFFFUL && FT_ULONG_MAX == 0xFFFFFFFFFFUL -#define FT_SIZEOF_LONG ( 32 / FT_CHAR_BIT ) -#elif FT_ULONG_MAX > 0xFFFFFFFFUL && FT_ULONG_MAX == 0xFFFFFFFFFFFFFFFFUL -#define FT_SIZEOF_LONG ( 64 / FT_CHAR_BIT ) -#else -#error "Unsupported size of `long' type!" -#endif - - - /* `FT_UNUSED` indicates that a given parameter is not used -- */ - /* this is only used to get rid of unpleasant compiler warnings. */ -#ifndef FT_UNUSED -#define FT_UNUSED( arg ) ( (arg) = (arg) ) -#endif - - - /************************************************************************** - * - * AUTOMATIC CONFIGURATION MACROS - * - * These macros are computed from the ones defined above. Don't touch - * their definition, unless you know precisely what you are doing. No - * porter should need to mess with them. - * - */ - - - /************************************************************************** - * - * Mac support - * - * This is the only necessary change, so it is defined here instead - * providing a new configuration file. - */ -#if defined( __APPLE__ ) || ( defined( __MWERKS__ ) && defined( macintosh ) ) - /* No Carbon frameworks for 64bit 10.4.x. */ - /* `AvailabilityMacros.h` is available since Mac OS X 10.2, */ - /* so guess the system version by maximum errno before inclusion. */ -#include -#ifdef ECANCELED /* defined since 10.2 */ -#include "AvailabilityMacros.h" -#endif -#if defined( __LP64__ ) && \ - ( MAC_OS_X_VERSION_MIN_REQUIRED <= MAC_OS_X_VERSION_10_4 ) -#undef FT_MACINTOSH -#endif - -#elif defined( __SC__ ) || defined( __MRC__ ) - /* Classic MacOS compilers */ -#include "ConditionalMacros.h" -#if TARGET_OS_MAC -#define FT_MACINTOSH 1 -#endif - -#endif - - - /* Fix compiler warning with sgi compiler. */ -#if defined( __sgi ) && !defined( __GNUC__ ) -#if defined( _COMPILER_VERSION ) && ( _COMPILER_VERSION >= 730 ) -#pragma set woff 3505 -#endif -#endif - - - /************************************************************************** - * - * @section: - * basic_types - * - */ - - - /************************************************************************** - * - * @type: - * FT_Int16 - * - * @description: - * A typedef for a 16bit signed integer type. - */ - typedef signed short FT_Int16; - - - /************************************************************************** - * - * @type: - * FT_UInt16 - * - * @description: - * A typedef for a 16bit unsigned integer type. - */ - typedef unsigned short FT_UInt16; - - /* */ - - - /* this #if 0 ... #endif clause is for documentation purposes */ -#if 0 - - /************************************************************************** - * - * @type: - * FT_Int32 - * - * @description: - * A typedef for a 32bit signed integer type. The size depends on the - * configuration. - */ - typedef signed XXX FT_Int32; - - - /************************************************************************** - * - * @type: - * FT_UInt32 - * - * A typedef for a 32bit unsigned integer type. The size depends on the - * configuration. - */ - typedef unsigned XXX FT_UInt32; - - - /************************************************************************** - * - * @type: - * FT_Int64 - * - * A typedef for a 64bit signed integer type. The size depends on the - * configuration. Only defined if there is real 64bit support; - * otherwise, it gets emulated with a structure (if necessary). - */ - typedef signed XXX FT_Int64; - - - /************************************************************************** - * - * @type: - * FT_UInt64 - * - * A typedef for a 64bit unsigned integer type. The size depends on the - * configuration. Only defined if there is real 64bit support; - * otherwise, it gets emulated with a structure (if necessary). - */ - typedef unsigned XXX FT_UInt64; - - /* */ - -#endif - -#if FT_SIZEOF_INT == ( 32 / FT_CHAR_BIT ) - - typedef signed int FT_Int32; - typedef unsigned int FT_UInt32; - -#elif FT_SIZEOF_LONG == ( 32 / FT_CHAR_BIT ) - - typedef signed long FT_Int32; - typedef unsigned long FT_UInt32; - -#else -#error "no 32bit type found -- please check your configuration files" -#endif - - - /* look up an integer type that is at least 32~bits */ -#if FT_SIZEOF_INT >= ( 32 / FT_CHAR_BIT ) - - typedef int FT_Fast; - typedef unsigned int FT_UFast; - -#elif FT_SIZEOF_LONG >= ( 32 / FT_CHAR_BIT ) - - typedef long FT_Fast; - typedef unsigned long FT_UFast; - -#endif - - - /* determine whether we have a 64-bit `int` type for platforms without */ - /* Autoconf */ -#if FT_SIZEOF_LONG == ( 64 / FT_CHAR_BIT ) - - /* `FT_LONG64` must be defined if a 64-bit type is available */ -#define FT_LONG64 -#define FT_INT64 long -#define FT_UINT64 unsigned long - - /************************************************************************** - * - * A 64-bit data type may create compilation problems if you compile in - * strict ANSI mode. To avoid them, we disable other 64-bit data types if - * `__STDC__` is defined. You can however ignore this rule by defining the - * `FT_CONFIG_OPTION_FORCE_INT64` configuration macro. - */ -#elif !defined( __STDC__ ) || defined( FT_CONFIG_OPTION_FORCE_INT64 ) - -#if defined( __STDC_VERSION__ ) && __STDC_VERSION__ >= 199901L - -#define FT_LONG64 -#define FT_INT64 long long int -#define FT_UINT64 unsigned long long int - -#elif defined( _MSC_VER ) && _MSC_VER >= 900 /* Visual C++ (and Intel C++) */ - - /* this compiler provides the `__int64` type */ -#define FT_LONG64 -#define FT_INT64 __int64 -#define FT_UINT64 unsigned __int64 - -#elif defined( __BORLANDC__ ) /* Borland C++ */ - - /* XXXX: We should probably check the value of `__BORLANDC__` in order */ - /* to test the compiler version. */ - - /* this compiler provides the `__int64` type */ -#define FT_LONG64 -#define FT_INT64 __int64 -#define FT_UINT64 unsigned __int64 - -#elif defined( __WATCOMC__ ) /* Watcom C++ */ - - /* Watcom doesn't provide 64-bit data types */ - -#elif defined( __MWERKS__ ) /* Metrowerks CodeWarrior */ - -#define FT_LONG64 -#define FT_INT64 long long int -#define FT_UINT64 unsigned long long int - -#elif defined( __GNUC__ ) - - /* GCC provides the `long long` type */ -#define FT_LONG64 -#define FT_INT64 long long int -#define FT_UINT64 unsigned long long int - -#endif /* __STDC_VERSION__ >= 199901L */ - -#endif /* FT_SIZEOF_LONG == (64 / FT_CHAR_BIT) */ - -#ifdef FT_LONG64 - typedef FT_INT64 FT_Int64; - typedef FT_UINT64 FT_UInt64; -#endif - - -#ifdef _WIN64 - /* only 64bit Windows uses the LLP64 data model, i.e., */ - /* 32bit integers, 64bit pointers */ -#define FT_UINT_TO_POINTER( x ) (void*)(unsigned __int64)(x) -#else -#define FT_UINT_TO_POINTER( x ) (void*)(unsigned long)(x) -#endif - - - /************************************************************************** - * - * miscellaneous - * - */ - - -#define FT_BEGIN_STMNT do { -#define FT_END_STMNT } while ( 0 ) -#define FT_DUMMY_STMNT FT_BEGIN_STMNT FT_END_STMNT - - - /* `typeof` condition taken from gnulib's `intprops.h` header file */ -#if ( ( defined( __GNUC__ ) && __GNUC__ >= 2 ) || \ - ( defined( __IBMC__ ) && __IBMC__ >= 1210 && \ - defined( __IBM__TYPEOF__ ) ) || \ - ( defined( __SUNPRO_C ) && __SUNPRO_C >= 0x5110 && !__STDC__ ) ) -#define FT_TYPEOF( type ) ( __typeof__ ( type ) ) -#else -#define FT_TYPEOF( type ) /* empty */ -#endif - - - /* Use `FT_LOCAL` and `FT_LOCAL_DEF` to declare and define, */ - /* respectively, a function that gets used only within the scope of a */ - /* module. Normally, both the header and source code files for such a */ - /* function are within a single module directory. */ - /* */ - /* Intra-module arrays should be tagged with `FT_LOCAL_ARRAY` and */ - /* `FT_LOCAL_ARRAY_DEF`. */ - /* */ -#ifdef FT_MAKE_OPTION_SINGLE_OBJECT - -#define FT_LOCAL( x ) static x -#define FT_LOCAL_DEF( x ) static x - -#else - -#ifdef __cplusplus -#define FT_LOCAL( x ) extern "C" x -#define FT_LOCAL_DEF( x ) extern "C" x -#else -#define FT_LOCAL( x ) extern x -#define FT_LOCAL_DEF( x ) x -#endif - -#endif /* FT_MAKE_OPTION_SINGLE_OBJECT */ - -#define FT_LOCAL_ARRAY( x ) extern const x -#define FT_LOCAL_ARRAY_DEF( x ) const x - - - /* Use `FT_BASE` and `FT_BASE_DEF` to declare and define, respectively, */ - /* functions that are used in more than a single module. In the */ - /* current setup this implies that the declaration is in a header file */ - /* in the `include/freetype/internal` directory, and the function body */ - /* is in a file in `src/base`. */ - /* */ -#ifndef FT_BASE - -#ifdef __cplusplus -#define FT_BASE( x ) extern "C" x -#else -#define FT_BASE( x ) extern x -#endif - -#endif /* !FT_BASE */ - - -#ifndef FT_BASE_DEF - -#ifdef __cplusplus -#define FT_BASE_DEF( x ) x -#else -#define FT_BASE_DEF( x ) x -#endif - -#endif /* !FT_BASE_DEF */ - - - /* When compiling FreeType as a DLL or DSO with hidden visibility */ - /* some systems/compilers need a special attribute in front OR after */ - /* the return type of function declarations. */ - /* */ - /* Two macros are used within the FreeType source code to define */ - /* exported library functions: `FT_EXPORT` and `FT_EXPORT_DEF`. */ - /* */ - /* - `FT_EXPORT( return_type )` */ - /* */ - /* is used in a function declaration, as in */ - /* */ - /* ``` */ - /* FT_EXPORT( FT_Error ) */ - /* FT_Init_FreeType( FT_Library* alibrary ); */ - /* ``` */ - /* */ - /* - `FT_EXPORT_DEF( return_type )` */ - /* */ - /* is used in a function definition, as in */ - /* */ - /* ``` */ - /* FT_EXPORT_DEF( FT_Error ) */ - /* FT_Init_FreeType( FT_Library* alibrary ) */ - /* { */ - /* ... some code ... */ - /* return FT_Err_Ok; */ - /* } */ - /* ``` */ - /* */ - /* You can provide your own implementation of `FT_EXPORT` and */ - /* `FT_EXPORT_DEF` here if you want. */ - /* */ - /* To export a variable, use `FT_EXPORT_VAR`. */ - /* */ -#ifndef FT_EXPORT - -#ifdef FT2_BUILD_LIBRARY - -#if defined( _WIN32 ) && defined( DLL_EXPORT ) -#define FT_EXPORT( x ) __declspec( dllexport ) x -#elif defined( __GNUC__ ) && __GNUC__ >= 4 -#define FT_EXPORT( x ) __attribute__(( visibility( "default" ) )) x -#elif defined( __SUNPRO_C ) && __SUNPRO_C >= 0x550 -#define FT_EXPORT( x ) __global x -#elif defined( __cplusplus ) -#define FT_EXPORT( x ) extern "C" x -#else -#define FT_EXPORT( x ) extern x -#endif - -#else - -#if defined( _WIN32 ) && defined( DLL_IMPORT ) -#define FT_EXPORT( x ) __declspec( dllimport ) x -#elif defined( __cplusplus ) -#define FT_EXPORT( x ) extern "C" x -#else -#define FT_EXPORT( x ) extern x -#endif - -#endif - -#endif /* !FT_EXPORT */ - - -#ifndef FT_EXPORT_DEF - -#ifdef __cplusplus -#define FT_EXPORT_DEF( x ) extern "C" x -#else -#define FT_EXPORT_DEF( x ) extern x -#endif - -#endif /* !FT_EXPORT_DEF */ - - -#ifndef FT_EXPORT_VAR - -#ifdef __cplusplus -#define FT_EXPORT_VAR( x ) extern "C" x -#else -#define FT_EXPORT_VAR( x ) extern x -#endif - -#endif /* !FT_EXPORT_VAR */ - - - /* The following macros are needed to compile the library with a */ - /* C++ compiler and with 16bit compilers. */ - /* */ - - /* This is special. Within C++, you must specify `extern "C"` for */ - /* functions which are used via function pointers, and you also */ - /* must do that for structures which contain function pointers to */ - /* assure C linkage -- it's not possible to have (local) anonymous */ - /* functions which are accessed by (global) function pointers. */ - /* */ - /* */ - /* FT_CALLBACK_DEF is used to _define_ a callback function, */ - /* located in the same source code file as the structure that uses */ - /* it. */ - /* */ - /* FT_BASE_CALLBACK and FT_BASE_CALLBACK_DEF are used to declare */ - /* and define a callback function, respectively, in a similar way */ - /* as FT_BASE and FT_BASE_DEF work. */ - /* */ - /* FT_CALLBACK_TABLE is used to _declare_ a constant variable that */ - /* contains pointers to callback functions. */ - /* */ - /* FT_CALLBACK_TABLE_DEF is used to _define_ a constant variable */ - /* that contains pointers to callback functions. */ - /* */ - /* */ - /* Some 16bit compilers have to redefine these macros to insert */ - /* the infamous `_cdecl` or `__fastcall` declarations. */ - /* */ -#ifndef FT_CALLBACK_DEF -#ifdef __cplusplus -#define FT_CALLBACK_DEF( x ) extern "C" x -#else -#define FT_CALLBACK_DEF( x ) static x -#endif -#endif /* FT_CALLBACK_DEF */ - -#ifndef FT_BASE_CALLBACK -#ifdef __cplusplus -#define FT_BASE_CALLBACK( x ) extern "C" x -#define FT_BASE_CALLBACK_DEF( x ) extern "C" x -#else -#define FT_BASE_CALLBACK( x ) extern x -#define FT_BASE_CALLBACK_DEF( x ) x -#endif -#endif /* FT_BASE_CALLBACK */ - -#ifndef FT_CALLBACK_TABLE -#ifdef __cplusplus -#define FT_CALLBACK_TABLE extern "C" -#define FT_CALLBACK_TABLE_DEF extern "C" -#else -#define FT_CALLBACK_TABLE extern -#define FT_CALLBACK_TABLE_DEF /* nothing */ -#endif -#endif /* FT_CALLBACK_TABLE */ - - -FT_END_HEADER - +#include +#include +#include #endif /* FTCONFIG_H_ */ diff --git a/Source/ThirdParty/freetype/config/ftheader.h b/Source/ThirdParty/freetype/config/ftheader.h index 696d6ba90..e607bce15 100644 --- a/Source/ThirdParty/freetype/config/ftheader.h +++ b/Source/ThirdParty/freetype/config/ftheader.h @@ -4,7 +4,7 @@ * * Build macros of the FreeType 2 library. * - * Copyright (C) 1996-2019 by + * Copyright (C) 1996-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -30,10 +30,12 @@ /* encapsulated in an `extern "C" { .. }` block when included from a */ /* C++ compiler. */ /* */ -#ifdef __cplusplus -#define FT_BEGIN_HEADER extern "C" { -#else -#define FT_BEGIN_HEADER /* nothing */ +#ifndef FT_BEGIN_HEADER +# ifdef __cplusplus +# define FT_BEGIN_HEADER extern "C" { +# else +# define FT_BEGIN_HEADER /* nothing */ +# endif #endif @@ -48,10 +50,12 @@ /* encapsulated in an `extern "C" { .. }` block when included from a */ /* C++ compiler. */ /* */ -#ifdef __cplusplus -#define FT_END_HEADER } -#else -#define FT_END_HEADER /* nothing */ +#ifndef FT_END_HEADER +# ifdef __cplusplus +# define FT_END_HEADER } +# else +# define FT_END_HEADER /* nothing */ +# endif #endif @@ -73,9 +77,16 @@ * Macro definitions used to `#include` specific header files. * * @description: - * The following macros are defined to the name of specific FreeType~2 - * header files. They can be used directly in `#include` statements as - * in: + * In addition to the normal scheme of including header files like + * + * ``` + * #include + * #include + * #include + * ``` + * + * it is possible to used named macros instead. They can be used + * directly in `#include` statements as in * * ``` * #include FT_FREETYPE_H @@ -83,13 +94,9 @@ * #include FT_GLYPH_H * ``` * - * There are several reasons why we are now using macros to name public - * header files. The first one is that such macros are not limited to - * the infamous 8.3~naming rule required by DOS (and - * `FT_MULTIPLE_MASTERS_H` is a lot more meaningful than `ftmm.h`). - * - * The second reason is that it allows for more flexibility in the way - * FreeType~2 is installed on a given system. + * These macros were introduced to overcome the infamous 8.3~naming rule + * required by DOS (and `FT_MULTIPLE_MASTERS_H` is a lot more meaningful + * than `ftmm.h`). * */ @@ -770,6 +777,18 @@ #define FT_COLOR_H + /************************************************************************** + * + * @macro: + * FT_OTSVG_H + * + * @description: + * A macro used in `#include` statements to name the file containing the + * FreeType~2 API which handles the OpenType 'SVG~' glyphs. + */ +#define FT_OTSVG_H + + /* */ /* These header files don't need to be included by the user. */ @@ -797,16 +816,19 @@ #define FT_CACHE_INTERNAL_IMAGE_H FT_CACHE_H #define FT_CACHE_INTERNAL_SBITS_H FT_CACHE_H - - /* - * Include internal headers definitions from `` only when - * building the library. - */ +/* TODO(david): Move this section below to a different header */ #ifdef FT2_BUILD_LIBRARY -#define FT_INTERNAL_INTERNAL_H -#include FT_INTERNAL_INTERNAL_H -#endif /* FT2_BUILD_LIBRARY */ +#if defined( _MSC_VER ) /* Visual C++ (and Intel C++) */ + /* We disable the warning `conditional expression is constant' here */ + /* in order to compile cleanly with the maximum level of warnings. */ + /* In particular, the warning complains about stuff like `while(0)' */ + /* which is very useful in macro definitions. There is no benefit */ + /* in having it enabled. */ +#pragma warning( disable : 4127 ) + +#endif /* _MSC_VER */ +#endif /* FT2_BUILD_LIBRARY */ #endif /* FTHEADER_H_ */ diff --git a/Source/ThirdParty/freetype/config/ftmodule.h b/Source/ThirdParty/freetype/config/ftmodule.h index 7c603e532..b315baba8 100644 --- a/Source/ThirdParty/freetype/config/ftmodule.h +++ b/Source/ThirdParty/freetype/config/ftmodule.h @@ -19,14 +19,15 @@ FT_USE_MODULE( FT_Driver_ClassRec, pfr_driver_class ) FT_USE_MODULE( FT_Driver_ClassRec, t42_driver_class ) FT_USE_MODULE( FT_Driver_ClassRec, winfnt_driver_class ) FT_USE_MODULE( FT_Driver_ClassRec, pcf_driver_class ) +FT_USE_MODULE( FT_Driver_ClassRec, bdf_driver_class ) FT_USE_MODULE( FT_Module_Class, psaux_module_class ) FT_USE_MODULE( FT_Module_Class, psnames_module_class ) FT_USE_MODULE( FT_Module_Class, pshinter_module_class ) -FT_USE_MODULE( FT_Renderer_Class, ft_raster1_renderer_class ) FT_USE_MODULE( FT_Module_Class, sfnt_module_class ) FT_USE_MODULE( FT_Renderer_Class, ft_smooth_renderer_class ) -FT_USE_MODULE( FT_Renderer_Class, ft_smooth_lcd_renderer_class ) -FT_USE_MODULE( FT_Renderer_Class, ft_smooth_lcdv_renderer_class ) -FT_USE_MODULE( FT_Driver_ClassRec, bdf_driver_class ) +FT_USE_MODULE( FT_Renderer_Class, ft_raster1_renderer_class ) +FT_USE_MODULE( FT_Renderer_Class, ft_sdf_renderer_class ) +FT_USE_MODULE( FT_Renderer_Class, ft_bitmap_sdf_renderer_class ) +FT_USE_MODULE( FT_Renderer_Class, ft_svg_renderer_class ) /* EOF */ diff --git a/Source/ThirdParty/freetype/config/ftoption.h b/Source/ThirdParty/freetype/config/ftoption.h index 12f47a82e..1976b33af 100644 --- a/Source/ThirdParty/freetype/config/ftoption.h +++ b/Source/ThirdParty/freetype/config/ftoption.h @@ -4,7 +4,7 @@ * * User-selectable configuration macros (specification only). * - * Copyright (C) 1996-2019 by + * Copyright (C) 1996-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -42,7 +42,7 @@ FT_BEGIN_HEADER * the name of a directory that is included _before_ the FreeType include * path during compilation. * - * The default FreeType Makefiles and Jamfiles use the build directory + * The default FreeType Makefiles use the build directory * `builds/` by default, but you can easily change that for your * own projects. * @@ -105,8 +105,7 @@ FT_BEGIN_HEADER * * ``` * FREETYPE_PROPERTIES=truetype:interpreter-version=35 \ - * cff:no-stem-darkening=1 \ - * autofitter:warping=1 + * cff:no-stem-darkening=1 * ``` * */ @@ -121,10 +120,8 @@ FT_BEGIN_HEADER * mitigate color fringes inherent to this technology, you also need to * explicitly set up LCD filtering. * - * Note that this feature is covered by several Microsoft patents and - * should not be activated in any default build of the library. When this - * macro is not defined, FreeType offers alternative LCD rendering - * technology that produces excellent output without LCD filtering. + * When this macro is not defined, FreeType offers alternative LCD + * rendering technology that produces excellent output. */ /* #define FT_CONFIG_OPTION_SUBPIXEL_RENDERING */ @@ -222,6 +219,10 @@ FT_BEGIN_HEADER * If you use a build system like cmake or the `configure` script, * options set by those programs have precedence, overwriting the value * here with the configured one. + * + * If you use the GNU make build system directly (that is, without the + * `configure` script) and you define this macro, you also have to pass + * `SYSTEM_ZLIB=yes` as an argument to make. */ /* #define FT_CONFIG_OPTION_SYSTEM_ZLIB */ @@ -292,6 +293,22 @@ FT_BEGIN_HEADER /* #define FT_CONFIG_OPTION_USE_HARFBUZZ */ + /************************************************************************** + * + * Brotli support. + * + * FreeType uses the Brotli library to provide support for decompressing + * WOFF2 streams. + * + * Define this macro if you want to enable this 'feature'. + * + * If you use a build system like cmake or the `configure` script, + * options set by those programs have precedence, overwriting the value + * here with the configured one. + */ +/* #define FT_CONFIG_OPTION_USE_BROTLI */ + + /************************************************************************** * * Glyph Postscript Names handling @@ -417,6 +434,23 @@ FT_BEGIN_HEADER /* #define FT_DEBUG_LEVEL_TRACE */ + /************************************************************************** + * + * Logging + * + * Compiling FreeType in debug or trace mode makes FreeType write error + * and trace log messages to `stderr`. Enabling this macro + * automatically forces the `FT_DEBUG_LEVEL_ERROR` and + * `FT_DEBUG_LEVEL_TRACE` macros and allows FreeType to write error and + * trace log messages to a file instead of `stderr`. For writing logs + * to a file, FreeType uses an the external `dlg` library (the source + * code is in `src/dlg`). + * + * This option needs a C99 compiler. + */ +/* #define FT_DEBUG_LOGGING */ + + /************************************************************************** * * Autofitter debugging @@ -427,9 +461,9 @@ FT_BEGIN_HEADER * while compiling in 'release' mode): * * ``` - * _af_debug_disable_horz_hints - * _af_debug_disable_vert_hints - * _af_debug_disable_blue_hints + * af_debug_disable_horz_hints_ + * af_debug_disable_vert_hints_ + * af_debug_disable_blue_hints_ * ``` * * Additionally, the following functions provide dumps of various @@ -446,7 +480,7 @@ FT_BEGIN_HEADER * As an argument, they use another global variable: * * ``` - * _af_debug_hints + * af_debug_hints_ * ``` * * Please have a look at the `ftgrid` demo program to see how those @@ -493,6 +527,20 @@ FT_BEGIN_HEADER #undef FT_CONFIG_OPTION_USE_MODULE_ERRORS + /************************************************************************** + * + * OpenType SVG Glyph Support + * + * Setting this macro enables support for OpenType SVG glyphs. By + * default, FreeType can only fetch SVG documents. However, it can also + * render them if external rendering hook functions are plugged in at + * runtime. + * + * More details on the hooks can be found in file `otsvg.h`. + */ +#define FT_CONFIG_OPTION_SVG + + /************************************************************************** * * Error Strings @@ -526,7 +574,7 @@ FT_BEGIN_HEADER /************************************************************************** * - * Define `TT_CONFIG_OPTION_COLOR_LAYERS` if you want to support coloured + * Define `TT_CONFIG_OPTION_COLOR_LAYERS` if you want to support colored * outlines (from the 'COLR'/'CPAL' tables) in all formats using the 'sfnt' * module (namely TrueType~& OpenType). */ @@ -536,12 +584,12 @@ FT_BEGIN_HEADER /************************************************************************** * * Define `TT_CONFIG_OPTION_POSTSCRIPT_NAMES` if you want to be able to - * load and enumerate the glyph Postscript names in a TrueType or OpenType + * load and enumerate Postscript names of glyphs in a TrueType or OpenType * file. * - * Note that when you do not compile the 'psnames' module by undefining the - * above `FT_CONFIG_OPTION_POSTSCRIPT_NAMES`, the 'sfnt' module will - * contain additional code used to read the PS Names table from a font. + * Note that if you do not compile the 'psnames' module by undefining the + * above `FT_CONFIG_OPTION_POSTSCRIPT_NAMES` macro, the 'sfnt' module will + * contain additional code to read the PostScript name table from a font. * * (By default, the module uses 'psnames' to extract glyph names.) */ @@ -613,36 +661,12 @@ FT_BEGIN_HEADER * not) instructions in a certain way so that all TrueType fonts look like * they do in a Windows ClearType (DirectWrite) environment. See [1] for a * technical overview on what this means. See `ttinterp.h` for more - * details on the LEAN option. + * details on this option. * - * There are three possible values. - * - * Value 1: - * This value is associated with the 'Infinality' moniker, contributed by - * an individual nicknamed Infinality with the goal of making TrueType - * fonts render better than on Windows. A high amount of configurability - * and flexibility, down to rules for single glyphs in fonts, but also - * very slow. Its experimental and slow nature and the original - * developer losing interest meant that this option was never enabled in - * default builds. - * - * The corresponding interpreter version is v38. - * - * Value 2: - * The new default mode for the TrueType driver. The Infinality code - * base was stripped to the bare minimum and all configurability removed - * in the name of speed and simplicity. The configurability was mainly - * aimed at legacy fonts like 'Arial', 'Times New Roman', or 'Courier'. - * Legacy fonts are fonts that modify vertical stems to achieve clean - * black-and-white bitmaps. The new mode focuses on applying a minimal - * set of rules to all fonts indiscriminately so that modern and web - * fonts render well while legacy fonts render okay. - * - * The corresponding interpreter version is v40. - * - * Value 3: - * Compile both, making both v38 and v40 available (the latter is the - * default). + * The new default mode focuses on applying a minimal set of rules to all + * fonts indiscriminately so that modern and web fonts render well while + * legacy fonts render okay. The corresponding interpreter version is v40. + * The so-called Infinality mode (v38) is no longer available in FreeType. * * By undefining these, you get rendering behavior like on Windows without * ClearType, i.e., Windows XP without ClearType enabled and Win9x @@ -657,9 +681,7 @@ FT_BEGIN_HEADER * [1] * https://www.microsoft.com/typography/cleartype/truetypecleartype.aspx */ -/* #define TT_CONFIG_OPTION_SUBPIXEL_HINTING 1 */ -#define TT_CONFIG_OPTION_SUBPIXEL_HINTING 2 -/* #define TT_CONFIG_OPTION_SUBPIXEL_HINTING ( 1 | 2 ) */ +#define TT_CONFIG_OPTION_SUBPIXEL_HINTING /************************************************************************** @@ -691,6 +713,24 @@ FT_BEGIN_HEADER #define TT_CONFIG_OPTION_GX_VAR_SUPPORT + /************************************************************************** + * + * Define `TT_CONFIG_OPTION_NO_BORING_EXPANSION` if you want to exclude + * support for 'boring' OpenType specification expansions. + * + * https://github.com/harfbuzz/boring-expansion-spec + * + * Right now, the following features are covered: + * + * - 'avar' version 2.0 + * + * Most likely, this is a temporary configuration option to be removed in + * the near future, since it is assumed that eventually those features are + * added to the OpenType standard. + */ +/* #define TT_CONFIG_OPTION_NO_BORING_EXPANSION */ + + /************************************************************************** * * Define `TT_CONFIG_OPTION_BDF` if you want to include support for an @@ -871,27 +911,11 @@ FT_BEGIN_HEADER * * Compile 'autofit' module with fallback Indic script support, covering * some scripts that the 'latin' submodule of the 'autofit' module doesn't - * (yet) handle. + * (yet) handle. Currently, this needs option `AF_CONFIG_OPTION_CJK`. */ +#ifdef AF_CONFIG_OPTION_CJK #define AF_CONFIG_OPTION_INDIC - - - /************************************************************************** - * - * Compile 'autofit' module with warp hinting. The idea of the warping - * code is to slightly scale and shift a glyph within a single dimension so - * that as much of its segments are aligned (more or less) on the grid. To - * find out the optimal scaling and shifting value, various parameter - * combinations are tried and scored. - * - * You can switch warping on and off with the `warping` property of the - * auto-hinter (see file `ftdriver.h` for more information; by default it - * is switched off). - * - * This experimental option is not active if the rendering mode is - * `FT_RENDER_MODE_LIGHT`. - */ -#define AF_CONFIG_OPTION_USE_WARPER +#endif /************************************************************************** @@ -927,21 +951,29 @@ FT_BEGIN_HEADER /* - * The next three macros are defined if native TrueType hinting is + * The next two macros are defined if native TrueType hinting is * requested by the definitions above. Don't change this. */ #ifdef TT_CONFIG_OPTION_BYTECODE_INTERPRETER #define TT_USE_BYTECODE_INTERPRETER - #ifdef TT_CONFIG_OPTION_SUBPIXEL_HINTING -#if TT_CONFIG_OPTION_SUBPIXEL_HINTING & 1 -#define TT_SUPPORT_SUBPIXEL_HINTING_INFINALITY -#endif - -#if TT_CONFIG_OPTION_SUBPIXEL_HINTING & 2 #define TT_SUPPORT_SUBPIXEL_HINTING_MINIMAL #endif #endif + + + /* + * The TT_SUPPORT_COLRV1 macro is defined to indicate to clients that this + * version of FreeType has support for 'COLR' v1 API. This definition is + * useful to FreeType clients that want to build in support for 'COLR' v1 + * depending on a tip-of-tree checkout before it is officially released in + * FreeType, and while the feature cannot yet be tested against using + * version macros. Don't change this macro. This may be removed once the + * feature is in a FreeType release version and version macros can be used + * to test for availability. + */ +#ifdef TT_CONFIG_OPTION_COLOR_LAYERS +#define TT_SUPPORT_COLRV1 #endif @@ -973,8 +1005,8 @@ FT_BEGIN_HEADER #error "Invalid CFF darkening parameters!" #endif -FT_END_HEADER +FT_END_HEADER #endif /* FTOPTION_H_ */ diff --git a/Source/ThirdParty/freetype/config/ftstdlib.h b/Source/ThirdParty/freetype/config/ftstdlib.h index 438b6145d..f65148a90 100644 --- a/Source/ThirdParty/freetype/config/ftstdlib.h +++ b/Source/ThirdParty/freetype/config/ftstdlib.h @@ -5,7 +5,7 @@ * ANSI-specific library and header configuration file (specification * only). * - * Copyright (C) 2002-2019 by + * Copyright (C) 2002-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -43,7 +43,8 @@ * * `UINT_MAX` and `ULONG_MAX` are used to automatically compute the size of * `int` and `long` in bytes at compile-time. So far, this works for all - * platforms the library has been tested on. + * platforms the library has been tested on. We also check `ULLONG_MAX` + * to see whether we can use 64-bit `long long` later on. * * Note that on the extremely rare platforms that do not provide integer * types that are _exactly_ 16 and 32~bits wide (e.g., some old Crays where @@ -66,6 +67,15 @@ #define FT_LONG_MIN LONG_MIN #define FT_LONG_MAX LONG_MAX #define FT_ULONG_MAX ULONG_MAX +#ifdef LLONG_MAX +#define FT_LLONG_MAX LLONG_MAX +#endif +#ifdef LLONG_MIN +#define FT_LLONG_MIN LLONG_MIN +#endif +#ifdef ULLONG_MAX +#define FT_ULLONG_MAX ULLONG_MAX +#endif /************************************************************************** @@ -101,13 +111,13 @@ #include -#define FT_FILE FILE -#define ft_fclose fclose -#define ft_fopen fopen -#define ft_fread fread -#define ft_fseek fseek -#define ft_ftell ftell -#define ft_sprintf sprintf +#define FT_FILE FILE +#define ft_fclose fclose +#define ft_fopen fopen +#define ft_fread fread +#define ft_fseek fseek +#define ft_ftell ftell +#define ft_snprintf snprintf /************************************************************************** diff --git a/Source/ThirdParty/freetype/config/integer-types.h b/Source/ThirdParty/freetype/config/integer-types.h new file mode 100644 index 000000000..7258b5085 --- /dev/null +++ b/Source/ThirdParty/freetype/config/integer-types.h @@ -0,0 +1,250 @@ +/**************************************************************************** + * + * config/integer-types.h + * + * FreeType integer types definitions. + * + * Copyright (C) 1996-2023 by + * David Turner, Robert Wilhelm, and Werner Lemberg. + * + * This file is part of the FreeType project, and may only be used, + * modified, and distributed under the terms of the FreeType project + * license, LICENSE.TXT. By continuing to use, modify, or distribute + * this file you indicate that you have read the license and + * understand and accept it fully. + * + */ +#ifndef FREETYPE_CONFIG_INTEGER_TYPES_H_ +#define FREETYPE_CONFIG_INTEGER_TYPES_H_ + + /* There are systems (like the Texas Instruments 'C54x) where a `char` */ + /* has 16~bits. ANSI~C says that `sizeof(char)` is always~1. Since an */ + /* `int` has 16~bits also for this system, `sizeof(int)` gives~1 which */ + /* is probably unexpected. */ + /* */ + /* `CHAR_BIT` (defined in `limits.h`) gives the number of bits in a */ + /* `char` type. */ + +#ifndef FT_CHAR_BIT +#define FT_CHAR_BIT CHAR_BIT +#endif + +#ifndef FT_SIZEOF_INT + + /* The size of an `int` type. */ +#if FT_UINT_MAX == 0xFFFFUL +#define FT_SIZEOF_INT ( 16 / FT_CHAR_BIT ) +#elif FT_UINT_MAX == 0xFFFFFFFFUL +#define FT_SIZEOF_INT ( 32 / FT_CHAR_BIT ) +#elif FT_UINT_MAX > 0xFFFFFFFFUL && FT_UINT_MAX == 0xFFFFFFFFFFFFFFFFUL +#define FT_SIZEOF_INT ( 64 / FT_CHAR_BIT ) +#else +#error "Unsupported size of `int' type!" +#endif + +#endif /* !defined(FT_SIZEOF_INT) */ + +#ifndef FT_SIZEOF_LONG + + /* The size of a `long` type. A five-byte `long` (as used e.g. on the */ + /* DM642) is recognized but avoided. */ +#if FT_ULONG_MAX == 0xFFFFFFFFUL +#define FT_SIZEOF_LONG ( 32 / FT_CHAR_BIT ) +#elif FT_ULONG_MAX > 0xFFFFFFFFUL && FT_ULONG_MAX == 0xFFFFFFFFFFUL +#define FT_SIZEOF_LONG ( 32 / FT_CHAR_BIT ) +#elif FT_ULONG_MAX > 0xFFFFFFFFUL && FT_ULONG_MAX == 0xFFFFFFFFFFFFFFFFUL +#define FT_SIZEOF_LONG ( 64 / FT_CHAR_BIT ) +#else +#error "Unsupported size of `long' type!" +#endif + +#endif /* !defined(FT_SIZEOF_LONG) */ + +#ifndef FT_SIZEOF_LONG_LONG + + /* The size of a `long long` type if available */ +#if defined( FT_ULLONG_MAX ) && FT_ULLONG_MAX >= 0xFFFFFFFFFFFFFFFFULL +#define FT_SIZEOF_LONG_LONG ( 64 / FT_CHAR_BIT ) +#else +#define FT_SIZEOF_LONG_LONG 0 +#endif + +#endif /* !defined(FT_SIZEOF_LONG_LONG) */ + + + /************************************************************************** + * + * @section: + * basic_types + * + */ + + + /************************************************************************** + * + * @type: + * FT_Int16 + * + * @description: + * A typedef for a 16bit signed integer type. + */ + typedef signed short FT_Int16; + + + /************************************************************************** + * + * @type: + * FT_UInt16 + * + * @description: + * A typedef for a 16bit unsigned integer type. + */ + typedef unsigned short FT_UInt16; + + /* */ + + + /* this #if 0 ... #endif clause is for documentation purposes */ +#if 0 + + /************************************************************************** + * + * @type: + * FT_Int32 + * + * @description: + * A typedef for a 32bit signed integer type. The size depends on the + * configuration. + */ + typedef signed XXX FT_Int32; + + + /************************************************************************** + * + * @type: + * FT_UInt32 + * + * A typedef for a 32bit unsigned integer type. The size depends on the + * configuration. + */ + typedef unsigned XXX FT_UInt32; + + + /************************************************************************** + * + * @type: + * FT_Int64 + * + * A typedef for a 64bit signed integer type. The size depends on the + * configuration. Only defined if there is real 64bit support; + * otherwise, it gets emulated with a structure (if necessary). + */ + typedef signed XXX FT_Int64; + + + /************************************************************************** + * + * @type: + * FT_UInt64 + * + * A typedef for a 64bit unsigned integer type. The size depends on the + * configuration. Only defined if there is real 64bit support; + * otherwise, it gets emulated with a structure (if necessary). + */ + typedef unsigned XXX FT_UInt64; + + /* */ + +#endif + +#if FT_SIZEOF_INT == ( 32 / FT_CHAR_BIT ) + + typedef signed int FT_Int32; + typedef unsigned int FT_UInt32; + +#elif FT_SIZEOF_LONG == ( 32 / FT_CHAR_BIT ) + + typedef signed long FT_Int32; + typedef unsigned long FT_UInt32; + +#else +#error "no 32bit type found -- please check your configuration files" +#endif + + + /* look up an integer type that is at least 32~bits */ +#if FT_SIZEOF_INT >= ( 32 / FT_CHAR_BIT ) + + typedef int FT_Fast; + typedef unsigned int FT_UFast; + +#elif FT_SIZEOF_LONG >= ( 32 / FT_CHAR_BIT ) + + typedef long FT_Fast; + typedef unsigned long FT_UFast; + +#endif + + + /* determine whether we have a 64-bit integer type */ +#if FT_SIZEOF_LONG == ( 64 / FT_CHAR_BIT ) + +#define FT_INT64 long +#define FT_UINT64 unsigned long + +#elif FT_SIZEOF_LONG_LONG >= ( 64 / FT_CHAR_BIT ) + +#define FT_INT64 long long int +#define FT_UINT64 unsigned long long int + + /************************************************************************** + * + * A 64-bit data type may create compilation problems if you compile in + * strict ANSI mode. To avoid them, we disable other 64-bit data types if + * `__STDC__` is defined. You can however ignore this rule by defining the + * `FT_CONFIG_OPTION_FORCE_INT64` configuration macro. + */ +#elif !defined( __STDC__ ) || defined( FT_CONFIG_OPTION_FORCE_INT64 ) + +#if defined( _MSC_VER ) && _MSC_VER >= 900 /* Visual C++ (and Intel C++) */ + + /* this compiler provides the `__int64` type */ +#define FT_INT64 __int64 +#define FT_UINT64 unsigned __int64 + +#elif defined( __BORLANDC__ ) /* Borland C++ */ + + /* XXXX: We should probably check the value of `__BORLANDC__` in order */ + /* to test the compiler version. */ + + /* this compiler provides the `__int64` type */ +#define FT_INT64 __int64 +#define FT_UINT64 unsigned __int64 + +#elif defined( __WATCOMC__ ) && __WATCOMC__ >= 1100 /* Watcom C++ */ + +#define FT_INT64 long long int +#define FT_UINT64 unsigned long long int + +#elif defined( __MWERKS__ ) /* Metrowerks CodeWarrior */ + +#define FT_INT64 long long int +#define FT_UINT64 unsigned long long int + +#elif defined( __GNUC__ ) + + /* GCC provides the `long long` type */ +#define FT_INT64 long long int +#define FT_UINT64 unsigned long long int + +#endif /* !__STDC__ */ + +#endif /* FT_SIZEOF_LONG == (64 / FT_CHAR_BIT) */ + +#ifdef FT_INT64 + typedef FT_INT64 FT_Int64; + typedef FT_UINT64 FT_UInt64; +#endif + + +#endif /* FREETYPE_CONFIG_INTEGER_TYPES_H_ */ diff --git a/Source/ThirdParty/freetype/config/mac-support.h b/Source/ThirdParty/freetype/config/mac-support.h new file mode 100644 index 000000000..b77b96d5d --- /dev/null +++ b/Source/ThirdParty/freetype/config/mac-support.h @@ -0,0 +1,49 @@ +/**************************************************************************** + * + * config/mac-support.h + * + * Mac/OS X support configuration header. + * + * Copyright (C) 1996-2023 by + * David Turner, Robert Wilhelm, and Werner Lemberg. + * + * This file is part of the FreeType project, and may only be used, + * modified, and distributed under the terms of the FreeType project + * license, LICENSE.TXT. By continuing to use, modify, or distribute + * this file you indicate that you have read the license and + * understand and accept it fully. + * + */ +#ifndef FREETYPE_CONFIG_MAC_SUPPORT_H_ +#define FREETYPE_CONFIG_MAC_SUPPORT_H_ + + /************************************************************************** + * + * Mac support + * + * This is the only necessary change, so it is defined here instead + * providing a new configuration file. + */ +#if defined( __APPLE__ ) || ( defined( __MWERKS__ ) && defined( macintosh ) ) + /* No Carbon frameworks for 64bit 10.4.x. */ + /* `AvailabilityMacros.h` is available since Mac OS X 10.2, */ + /* so guess the system version by maximum errno before inclusion. */ +#include +#ifdef ECANCELED /* defined since 10.2 */ +#include "AvailabilityMacros.h" +#endif +#if defined( __LP64__ ) && \ + ( MAC_OS_X_VERSION_MIN_REQUIRED <= MAC_OS_X_VERSION_10_4 ) +#undef FT_MACINTOSH +#endif + +#elif defined( __SC__ ) || defined( __MRC__ ) + /* Classic MacOS compilers */ +#include "ConditionalMacros.h" +#if TARGET_OS_MAC +#define FT_MACINTOSH 1 +#endif + +#endif /* Mac support */ + +#endif /* FREETYPE_CONFIG_MAC_SUPPORT_H_ */ diff --git a/Source/ThirdParty/freetype/config/public-macros.h b/Source/ThirdParty/freetype/config/public-macros.h new file mode 100644 index 000000000..23d0fa6a3 --- /dev/null +++ b/Source/ThirdParty/freetype/config/public-macros.h @@ -0,0 +1,138 @@ +/**************************************************************************** + * + * config/public-macros.h + * + * Define a set of compiler macros used in public FreeType headers. + * + * Copyright (C) 2020-2023 by + * David Turner, Robert Wilhelm, and Werner Lemberg. + * + * This file is part of the FreeType project, and may only be used, + * modified, and distributed under the terms of the FreeType project + * license, LICENSE.TXT. By continuing to use, modify, or distribute + * this file you indicate that you have read the license and + * understand and accept it fully. + * + */ + + /* + * The definitions in this file are used by the public FreeType headers + * and thus should be considered part of the public API. + * + * Other compiler-specific macro definitions that are not exposed by the + * FreeType API should go into + * `include/freetype/internal/compiler-macros.h` instead. + */ +#ifndef FREETYPE_CONFIG_PUBLIC_MACROS_H_ +#define FREETYPE_CONFIG_PUBLIC_MACROS_H_ + + /* + * `FT_BEGIN_HEADER` and `FT_END_HEADER` might have already been defined + * by `freetype/config/ftheader.h`, but we don't want to include this + * header here, so redefine the macros here only when needed. Their + * definition is very stable, so keeping them in sync with the ones in the + * header should not be a maintenance issue. + */ +#ifndef FT_BEGIN_HEADER +#ifdef __cplusplus +#define FT_BEGIN_HEADER extern "C" { +#else +#define FT_BEGIN_HEADER /* empty */ +#endif +#endif /* FT_BEGIN_HEADER */ + +#ifndef FT_END_HEADER +#ifdef __cplusplus +#define FT_END_HEADER } +#else +#define FT_END_HEADER /* empty */ +#endif +#endif /* FT_END_HEADER */ + + +FT_BEGIN_HEADER + + /* + * Mark a function declaration as public. This ensures it will be + * properly exported to client code. Place this before a function + * declaration. + * + * NOTE: This macro should be considered an internal implementation + * detail, and not part of the FreeType API. It is only defined here + * because it is needed by `FT_EXPORT`. + */ + + /* Visual C, mingw */ +#if defined( _WIN32 ) + +#if defined( FT2_BUILD_LIBRARY ) && defined( DLL_EXPORT ) +#define FT_PUBLIC_FUNCTION_ATTRIBUTE __declspec( dllexport ) +#elif defined( DLL_IMPORT ) +#define FT_PUBLIC_FUNCTION_ATTRIBUTE __declspec( dllimport ) +#endif + + /* gcc, clang */ +#elif ( defined( __GNUC__ ) && __GNUC__ >= 4 ) || defined( __clang__ ) +#define FT_PUBLIC_FUNCTION_ATTRIBUTE \ + __attribute__(( visibility( "default" ) )) + + /* Sun */ +#elif defined( __SUNPRO_C ) && __SUNPRO_C >= 0x550 +#define FT_PUBLIC_FUNCTION_ATTRIBUTE __global +#endif + + +#ifndef FT_PUBLIC_FUNCTION_ATTRIBUTE +#define FT_PUBLIC_FUNCTION_ATTRIBUTE /* empty */ +#endif + + + /* + * Define a public FreeType API function. This ensures it is properly + * exported or imported at build time. The macro parameter is the + * function's return type as in: + * + * FT_EXPORT( FT_Bool ) + * FT_Object_Method( FT_Object obj, + * ... ); + * + * NOTE: This requires that all `FT_EXPORT` uses are inside + * `FT_BEGIN_HEADER ... FT_END_HEADER` blocks. This guarantees that the + * functions are exported with C linkage, even when the header is included + * by a C++ source file. + */ +#define FT_EXPORT( x ) FT_PUBLIC_FUNCTION_ATTRIBUTE extern x + + + /* + * `FT_UNUSED` indicates that a given parameter is not used -- this is + * only used to get rid of unpleasant compiler warnings. + * + * Technically, this was not meant to be part of the public API, but some + * third-party code depends on it. + */ +#ifndef FT_UNUSED +#define FT_UNUSED( arg ) ( (arg) = (arg) ) +#endif + + + /* + * Support for casts in both C and C++. + */ +#ifdef __cplusplus +#define FT_STATIC_CAST( type, var ) static_cast(var) +#define FT_REINTERPRET_CAST( type, var ) reinterpret_cast(var) + +#define FT_STATIC_BYTE_CAST( type, var ) \ + static_cast( static_cast( var ) ) +#else +#define FT_STATIC_CAST( type, var ) (type)(var) +#define FT_REINTERPRET_CAST( type, var ) (type)(var) + +#define FT_STATIC_BYTE_CAST( type, var ) (type)(unsigned char)(var) +#endif + + +FT_END_HEADER + +#endif /* FREETYPE_CONFIG_PUBLIC_MACROS_H_ */ diff --git a/Source/ThirdParty/freetype/freetype.h b/Source/ThirdParty/freetype/freetype.h index 4f2eaca69..92acf3794 100644 --- a/Source/ThirdParty/freetype/freetype.h +++ b/Source/ThirdParty/freetype/freetype.h @@ -4,7 +4,7 @@ * * FreeType high-level API and common types (specification only). * - * Copyright (C) 1996-2019 by + * Copyright (C) 1996-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -20,25 +20,44 @@ #define FREETYPE_H_ -#ifndef FT_FREETYPE_H -#error "`ft2build.h' hasn't been included yet!" -#error "Please always use macros to include FreeType header files." -#error "Example:" -#error " #include " -#error " #include FT_FREETYPE_H" -#endif - - #include #include FT_CONFIG_CONFIG_H -#include FT_TYPES_H -#include FT_ERRORS_H +#include +#include FT_BEGIN_HEADER + /************************************************************************** + * + * @section: + * preamble + * + * @title: + * Preamble + * + * @abstract: + * What FreeType is and isn't + * + * @description: + * FreeType is a library that provides access to glyphs in font files. It + * scales the glyph images and their metrics to a requested size, and it + * rasterizes the glyph images to produce pixel or subpixel alpha coverage + * bitmaps. + * + * Note that FreeType is _not_ a text layout engine. You have to use + * higher-level libraries like HarfBuzz, Pango, or ICU for that. + * + * Note also that FreeType does _not_ perform alpha blending or + * compositing the resulting bitmaps or pixmaps by itself. Use your + * favourite graphics library (for example, Cairo or Skia) to further + * process FreeType's output. + * + */ + + /************************************************************************** * * @section: @@ -51,22 +70,15 @@ FT_BEGIN_HEADER * How client applications should include FreeType header files. * * @description: - * To be as flexible as possible (and for historical reasons), FreeType - * uses a very special inclusion scheme to load header files, for example + * To be as flexible as possible (and for historical reasons), you must + * load file `ft2build.h` first before other header files, for example * * ``` * #include * - * #include FT_FREETYPE_H - * #include FT_OUTLINE_H + * #include + * #include * ``` - * - * A compiler and its preprocessor only needs an include path to find the - * file `ft2build.h`; the exact locations and names of the other FreeType - * header files are hidden by @header_file_macros, loaded by - * `ft2build.h`. The API documentation always gives the header macro - * name needed for a particular function. - * */ @@ -90,6 +102,277 @@ FT_BEGIN_HEADER */ + /************************************************************************** + * + * @section: + * font_testing_macros + * + * @title: + * Font Testing Macros + * + * @abstract: + * Macros to test various properties of fonts. + * + * @description: + * Macros to test the most important font properties. + * + * It is recommended to use these high-level macros instead of directly + * testing the corresponding flags, which are scattered over various + * structures. + * + * @order: + * FT_HAS_HORIZONTAL + * FT_HAS_VERTICAL + * FT_HAS_KERNING + * FT_HAS_FIXED_SIZES + * FT_HAS_GLYPH_NAMES + * FT_HAS_COLOR + * FT_HAS_MULTIPLE_MASTERS + * FT_HAS_SVG + * FT_HAS_SBIX + * FT_HAS_SBIX_OVERLAY + * + * FT_IS_SFNT + * FT_IS_SCALABLE + * FT_IS_FIXED_WIDTH + * FT_IS_CID_KEYED + * FT_IS_TRICKY + * FT_IS_NAMED_INSTANCE + * FT_IS_VARIATION + * + */ + + + /************************************************************************** + * + * @section: + * library_setup + * + * @title: + * Library Setup + * + * @abstract: + * Functions to start and end the usage of the FreeType library. + * + * @description: + * Functions to start and end the usage of the FreeType library. + * + * Note that @FT_Library_Version and @FREETYPE_XXX are of limited use + * because even a new release of FreeType with only documentation + * changes increases the version number. + * + * @order: + * FT_Library + * FT_Init_FreeType + * FT_Done_FreeType + * + * FT_Library_Version + * FREETYPE_XXX + * + */ + + + /************************************************************************** + * + * @section: + * face_creation + * + * @title: + * Face Creation + * + * @abstract: + * Functions to manage fonts. + * + * @description: + * The functions and structures collected in this section operate on + * fonts globally. + * + * @order: + * FT_Face + * FT_FaceRec + * FT_FACE_FLAG_XXX + * FT_STYLE_FLAG_XXX + * + * FT_New_Face + * FT_Done_Face + * FT_Reference_Face + * FT_New_Memory_Face + * FT_Face_Properties + * FT_Open_Face + * FT_Open_Args + * FT_OPEN_XXX + * FT_Parameter + * FT_Attach_File + * FT_Attach_Stream + * + */ + + + /************************************************************************** + * + * @section: + * sizing_and_scaling + * + * @title: + * Sizing and Scaling + * + * @abstract: + * Functions to manage font sizes. + * + * @description: + * The functions and structures collected in this section are related to + * selecting and manipulating the size of a font globally. + * + * @order: + * FT_Size + * FT_SizeRec + * FT_Size_Metrics + * + * FT_Bitmap_Size + * + * FT_Set_Char_Size + * FT_Set_Pixel_Sizes + * FT_Request_Size + * FT_Select_Size + * FT_Size_Request_Type + * FT_Size_RequestRec + * FT_Size_Request + * + * FT_Set_Transform + * FT_Get_Transform + * + */ + + + /************************************************************************** + * + * @section: + * glyph_retrieval + * + * @title: + * Glyph Retrieval + * + * @abstract: + * Functions to manage glyphs. + * + * @description: + * The functions and structures collected in this section operate on + * single glyphs, of which @FT_Load_Glyph is most important. + * + * @order: + * FT_GlyphSlot + * FT_GlyphSlotRec + * FT_Glyph_Metrics + * + * FT_Load_Glyph + * FT_LOAD_XXX + * FT_LOAD_TARGET_MODE + * FT_LOAD_TARGET_XXX + * + * FT_Render_Glyph + * FT_Render_Mode + * FT_Get_Kerning + * FT_Kerning_Mode + * FT_Get_Track_Kerning + * + */ + + + /************************************************************************** + * + * @section: + * character_mapping + * + * @title: + * Character Mapping + * + * @abstract: + * Functions to manage character-to-glyph maps. + * + * @description: + * This section holds functions and structures that are related to + * mapping character input codes to glyph indices. + * + * Note that for many scripts the simplistic approach used by FreeType + * of mapping a single character to a single glyph is not valid or + * possible! In general, a higher-level library like HarfBuzz or ICU + * should be used for handling text strings. + * + * @order: + * FT_CharMap + * FT_CharMapRec + * FT_Encoding + * FT_ENC_TAG + * + * FT_Select_Charmap + * FT_Set_Charmap + * FT_Get_Charmap_Index + * + * FT_Get_Char_Index + * FT_Get_First_Char + * FT_Get_Next_Char + * FT_Load_Char + * + */ + + + /************************************************************************** + * + * @section: + * information_retrieval + * + * @title: + * Information Retrieval + * + * @abstract: + * Functions to retrieve font and glyph information. + * + * @description: + * Functions to retrieve font and glyph information. Only some very + * basic data is covered; see also the chapter on the format-specific + * API for more. + * + * + * @order: + * FT_Get_Name_Index + * FT_Get_Glyph_Name + * FT_Get_Postscript_Name + * FT_Get_FSType_Flags + * FT_FSTYPE_XXX + * FT_Get_SubGlyph_Info + * FT_SUBGLYPH_FLAG_XXX + * + */ + + + /************************************************************************** + * + * @section: + * other_api_data + * + * @title: + * Other API Data + * + * @abstract: + * Other structures, enumerations, and macros. + * + * @description: + * Other structures, enumerations, and macros. Deprecated functions are + * also listed here. + * + * @order: + * FT_Face_Internal + * FT_Size_Internal + * FT_Slot_Internal + * + * FT_SubGlyph + * + * FT_HAS_FAST_GLYPHS + * FT_Face_CheckTrueTypePatents + * FT_Face_SetUnpatentedHinting + * + */ + /*************************************************************************/ /*************************************************************************/ @@ -103,165 +386,10 @@ FT_BEGIN_HEADER /************************************************************************** * * @section: - * base_interface - * - * @title: - * Base Interface - * - * @abstract: - * The FreeType~2 base font interface. - * - * @description: - * This section describes the most important public high-level API - * functions of FreeType~2. - * - * @order: - * FT_Library - * FT_Face - * FT_Size - * FT_GlyphSlot - * FT_CharMap - * FT_Encoding - * FT_ENC_TAG - * - * FT_FaceRec - * - * FT_FACE_FLAG_SCALABLE - * FT_FACE_FLAG_FIXED_SIZES - * FT_FACE_FLAG_FIXED_WIDTH - * FT_FACE_FLAG_HORIZONTAL - * FT_FACE_FLAG_VERTICAL - * FT_FACE_FLAG_COLOR - * FT_FACE_FLAG_SFNT - * FT_FACE_FLAG_CID_KEYED - * FT_FACE_FLAG_TRICKY - * FT_FACE_FLAG_KERNING - * FT_FACE_FLAG_MULTIPLE_MASTERS - * FT_FACE_FLAG_VARIATION - * FT_FACE_FLAG_GLYPH_NAMES - * FT_FACE_FLAG_EXTERNAL_STREAM - * FT_FACE_FLAG_HINTER - * - * FT_HAS_HORIZONTAL - * FT_HAS_VERTICAL - * FT_HAS_KERNING - * FT_HAS_FIXED_SIZES - * FT_HAS_GLYPH_NAMES - * FT_HAS_COLOR - * FT_HAS_MULTIPLE_MASTERS - * - * FT_IS_SFNT - * FT_IS_SCALABLE - * FT_IS_FIXED_WIDTH - * FT_IS_CID_KEYED - * FT_IS_TRICKY - * FT_IS_NAMED_INSTANCE - * FT_IS_VARIATION - * - * FT_STYLE_FLAG_BOLD - * FT_STYLE_FLAG_ITALIC - * - * FT_SizeRec - * FT_Size_Metrics - * - * FT_GlyphSlotRec - * FT_Glyph_Metrics - * FT_SubGlyph - * - * FT_Bitmap_Size - * - * FT_Init_FreeType - * FT_Done_FreeType - * - * FT_New_Face - * FT_Done_Face - * FT_Reference_Face - * FT_New_Memory_Face - * FT_Face_Properties - * FT_Open_Face - * FT_Open_Args - * FT_Parameter - * FT_Attach_File - * FT_Attach_Stream - * - * FT_Set_Char_Size - * FT_Set_Pixel_Sizes - * FT_Request_Size - * FT_Select_Size - * FT_Size_Request_Type - * FT_Size_RequestRec - * FT_Size_Request - * FT_Set_Transform - * FT_Load_Glyph - * FT_Get_Char_Index - * FT_Get_First_Char - * FT_Get_Next_Char - * FT_Get_Name_Index - * FT_Load_Char - * - * FT_OPEN_MEMORY - * FT_OPEN_STREAM - * FT_OPEN_PATHNAME - * FT_OPEN_DRIVER - * FT_OPEN_PARAMS - * - * FT_LOAD_DEFAULT - * FT_LOAD_RENDER - * FT_LOAD_MONOCHROME - * FT_LOAD_LINEAR_DESIGN - * FT_LOAD_NO_SCALE - * FT_LOAD_NO_HINTING - * FT_LOAD_NO_BITMAP - * FT_LOAD_NO_AUTOHINT - * FT_LOAD_COLOR - * - * FT_LOAD_VERTICAL_LAYOUT - * FT_LOAD_IGNORE_TRANSFORM - * FT_LOAD_FORCE_AUTOHINT - * FT_LOAD_NO_RECURSE - * FT_LOAD_PEDANTIC - * - * FT_LOAD_TARGET_NORMAL - * FT_LOAD_TARGET_LIGHT - * FT_LOAD_TARGET_MONO - * FT_LOAD_TARGET_LCD - * FT_LOAD_TARGET_LCD_V - * - * FT_LOAD_TARGET_MODE - * - * FT_Render_Glyph - * FT_Render_Mode - * FT_Get_Kerning - * FT_Kerning_Mode - * FT_Get_Track_Kerning - * FT_Get_Glyph_Name - * FT_Get_Postscript_Name - * - * FT_CharMapRec - * FT_Select_Charmap - * FT_Set_Charmap - * FT_Get_Charmap_Index - * - * FT_Get_FSType_Flags - * FT_Get_SubGlyph_Info - * - * FT_Face_Internal - * FT_Size_Internal - * FT_Slot_Internal - * - * FT_FACE_FLAG_XXX - * FT_STYLE_FLAG_XXX - * FT_OPEN_XXX - * FT_LOAD_XXX - * FT_LOAD_TARGET_XXX - * FT_SUBGLYPH_FLAG_XXX - * FT_FSTYPE_XXX - * - * FT_HAS_FAST_GLYPHS + * glyph_retrieval * */ - /************************************************************************** * * @struct: @@ -329,6 +457,13 @@ FT_BEGIN_HEADER } FT_Glyph_Metrics; + /************************************************************************** + * + * @section: + * sizing_and_scaling + * + */ + /************************************************************************** * * @struct: @@ -389,6 +524,13 @@ FT_BEGIN_HEADER /*************************************************************************/ /*************************************************************************/ + /************************************************************************** + * + * @section: + * library_setup + * + */ + /************************************************************************** * * @type: @@ -463,7 +605,7 @@ FT_BEGIN_HEADER /************************************************************************** * * @section: - * base_interface + * face_creation * */ @@ -499,6 +641,13 @@ FT_BEGIN_HEADER typedef struct FT_FaceRec_* FT_Face; + /************************************************************************** + * + * @section: + * sizing_and_scaling + * + */ + /************************************************************************** * * @type: @@ -509,13 +658,15 @@ FT_BEGIN_HEADER * size. * * @note: - * An @FT_Face has one _active_ @FT_Size object that is used by functions - * like @FT_Load_Glyph to determine the scaling transformation that in - * turn is used to load and hint glyphs and metrics. + * An @FT_Face has one _active_ `FT_Size` object that is used by + * functions like @FT_Load_Glyph to determine the scaling transformation + * that in turn is used to load and hint glyphs and metrics. * - * You can use @FT_Set_Char_Size, @FT_Set_Pixel_Sizes, @FT_Request_Size + * A newly created `FT_Size` object contains only meaningless zero values. + * You must use @FT_Set_Char_Size, @FT_Set_Pixel_Sizes, @FT_Request_Size * or even @FT_Select_Size to change the content (i.e., the scaling - * values) of the active @FT_Size. + * values) of the active `FT_Size`. Otherwise, the scaling and hinting + * will not be performed. * * You can use @FT_New_Size to create additional size objects for a given * @FT_Face, but they won't be used by other functions until you activate @@ -529,6 +680,13 @@ FT_BEGIN_HEADER typedef struct FT_SizeRec_* FT_Size; + /************************************************************************** + * + * @section: + * glyph_retrieval + * + */ + /************************************************************************** * * @type: @@ -548,6 +706,13 @@ FT_BEGIN_HEADER typedef struct FT_GlyphSlotRec_* FT_GlyphSlot; + /************************************************************************** + * + * @section: + * character_mapping + * + */ + /************************************************************************** * * @type: @@ -603,11 +768,12 @@ FT_BEGIN_HEADER */ #ifndef FT_ENC_TAG -#define FT_ENC_TAG( value, a, b, c, d ) \ - value = ( ( (FT_UInt32)(a) << 24 ) | \ - ( (FT_UInt32)(b) << 16 ) | \ - ( (FT_UInt32)(c) << 8 ) | \ - (FT_UInt32)(d) ) + +#define FT_ENC_TAG( value, a, b, c, d ) \ + value = ( ( FT_STATIC_BYTE_CAST( FT_UInt32, a ) << 24 ) | \ + ( FT_STATIC_BYTE_CAST( FT_UInt32, b ) << 16 ) | \ + ( FT_STATIC_BYTE_CAST( FT_UInt32, c ) << 8 ) | \ + FT_STATIC_BYTE_CAST( FT_UInt32, d ) ) #endif /* FT_ENC_TAG */ @@ -623,7 +789,7 @@ FT_BEGIN_HEADER * * @note: * Despite the name, this enumeration lists specific character - * repertories (i.e., charsets), and not text encoding methods (e.g., + * repertoires (i.e., charsets), and not text encoding methods (e.g., * UTF-8, UTF-16, etc.). * * Other encodings might be defined in the future. @@ -645,7 +811,7 @@ FT_BEGIN_HEADER * FT_ENCODING_MS_SYMBOL :: * Microsoft Symbol encoding, used to encode mathematical symbols and * wingdings. For more information, see - * 'https://www.microsoft.com/typography/otspec/recom.htm', + * 'https://www.microsoft.com/typography/otspec/recom.htm#non-standard-symbol-fonts', * 'http://www.kostis.net/charsets/symbol.htm', and * 'http://www.kostis.net/charsets/wingding.htm'. * @@ -717,11 +883,16 @@ FT_BEGIN_HEADER * Same as FT_ENCODING_JOHAB. Deprecated. * * @note: - * By default, FreeType enables a Unicode charmap and tags it with - * `FT_ENCODING_UNICODE` when it is either provided or can be generated - * from PostScript glyph name dictionaries in the font file. All other - * encodings are considered legacy and tagged only if explicitly defined - * in the font file. Otherwise, `FT_ENCODING_NONE` is used. + * When loading a font, FreeType makes a Unicode charmap active if + * possible (either if the font provides such a charmap, or if FreeType + * can synthesize one from PostScript glyph name dictionaries; in either + * case, the charmap is tagged with `FT_ENCODING_UNICODE`). If such a + * charmap is synthesized, it is placed at the first position of the + * charmap array. + * + * All other encodings are considered legacy and tagged only if + * explicitly defined in the font file. Otherwise, `FT_ENCODING_NONE` is + * used. * * `FT_ENCODING_NONE` is set by the BDF and PCF drivers if the charmap is * neither Unicode nor ISO-8859-1 (otherwise it is set to @@ -751,7 +922,7 @@ FT_BEGIN_HEADER * `encoding_id`. If, for example, `encoding_id` is `TT_MAC_ID_ROMAN` * and the language ID (minus~1) is `TT_MAC_LANGID_GREEK`, it is the * Greek encoding, not Roman. `TT_MAC_ID_ARABIC` with - * `TT_MAC_LANGID_FARSI` means the Farsi variant the Arabic encoding. + * `TT_MAC_LANGID_FARSI` means the Farsi variant of the Arabic encoding. */ typedef enum FT_Encoding_ { @@ -849,6 +1020,13 @@ FT_BEGIN_HEADER /*************************************************************************/ + /************************************************************************** + * + * @section: + * other_api_data + * + */ + /************************************************************************** * * @type: @@ -864,6 +1042,13 @@ FT_BEGIN_HEADER typedef struct FT_Face_InternalRec_* FT_Face_Internal; + /************************************************************************** + * + * @section: + * face_creation + * + */ + /************************************************************************** * * @struct: @@ -890,7 +1075,7 @@ FT_BEGIN_HEADER * If we have the third named instance of face~4, say, `face_index` is * set to 0x00030004. * - * Bit 31 is always zero (this is, `face_index` is always a positive + * Bit 31 is always zero (that is, `face_index` is always a positive * value). * * [Since 2.9] Changing the design coordinates with @@ -908,7 +1093,7 @@ FT_BEGIN_HEADER * * [Since 2.6.1] Bits 16-30 hold the number of named instances * available for the current face if we have a GX or OpenType variation - * (sub)font. Bit 31 is always zero (this is, `style_flags` is always + * (sub)font. Bit 31 is always zero (that is, `style_flags` is always * a positive value). Note that a variation font has always at least * one named instance, namely the default instance. * @@ -974,6 +1159,9 @@ FT_BEGIN_HEADER * Note that the bounding box might be off by (at least) one pixel for * hinted fonts. See @FT_Size_Metrics for further discussion. * + * Note that the bounding box does not vary in OpenType variation fonts + * and should only be used in relation to the default instance. + * * units_per_EM :: * The number of font units per EM square for this face. This is * typically 2048 for TrueType fonts, and 1000 for Type~1 fonts. Only @@ -1059,9 +1247,9 @@ FT_BEGIN_HEADER FT_Generic generic; - /*# The following member variables (down to `underline_thickness`) */ - /*# are only relevant to scalable outlines; cf. @FT_Bitmap_Size */ - /*# for bitmap fonts. */ + /* The following member variables (down to `underline_thickness`) */ + /* are only relevant to scalable outlines; cf. @FT_Bitmap_Size */ + /* for bitmap fonts. */ FT_BBox bbox; FT_UShort units_per_EM; @@ -1079,7 +1267,7 @@ FT_BEGIN_HEADER FT_Size size; FT_CharMap charmap; - /*@private begin */ + /* private fields, internal to FreeType */ FT_Driver driver; FT_Memory memory; @@ -1092,8 +1280,6 @@ FT_BEGIN_HEADER FT_Face_Internal internal; - /*@private end */ - } FT_FaceRec; @@ -1136,9 +1322,9 @@ FT_BEGIN_HEADER * FT_FACE_FLAG_KERNING :: * The face contains kerning information. If set, the kerning distance * can be retrieved using the function @FT_Get_Kerning. Otherwise the - * function always return the vector (0,0). Note that FreeType doesn't - * handle kerning data from the SFNT 'GPOS' table (as present in many - * OpenType fonts). + * function always returns the vector (0,0). Note that FreeType + * doesn't handle kerning data from the SFNT 'GPOS' table (as present + * in many OpenType fonts). * * FT_FACE_FLAG_FAST_GLYPHS :: * THIS FLAG IS DEPRECATED. DO NOT USE OR TEST IT. @@ -1176,13 +1362,13 @@ FT_BEGIN_HEADER * successfully; in all other cases you get an * `FT_Err_Invalid_Argument` error. * - * Note that CID-keyed fonts that are in an SFNT wrapper (this is, all + * Note that CID-keyed fonts that are in an SFNT wrapper (that is, all * OpenType/CFF fonts) don't have this flag set since the glyphs are * accessed in the normal way (using contiguous indices); the * 'CID-ness' isn't visible to the application. * * FT_FACE_FLAG_TRICKY :: - * The face is 'tricky', this is, it always needs the font format's + * The face is 'tricky', that is, it always needs the font format's * native hinting engine to get a reasonable result. A typical example * is the old Chinese font `mingli.ttf` (but not `mingliu.ttc`) that * uses TrueType bytecode instructions to move and scale all of its @@ -1204,8 +1390,21 @@ FT_BEGIN_HEADER * FT_FACE_FLAG_VARIATION :: * [Since 2.9] Set if the current face (or named instance) has been * altered with @FT_Set_MM_Design_Coordinates, - * @FT_Set_Var_Design_Coordinates, or @FT_Set_Var_Blend_Coordinates. - * This flag is unset by a call to @FT_Set_Named_Instance. + * @FT_Set_Var_Design_Coordinates, @FT_Set_Var_Blend_Coordinates, or + * @FT_Set_MM_WeightVector to select a non-default instance. + * + * FT_FACE_FLAG_SVG :: + * [Since 2.12] The face has an 'SVG~' OpenType table. + * + * FT_FACE_FLAG_SBIX :: + * [Since 2.12] The face has an 'sbix' OpenType table *and* outlines. + * For such fonts, @FT_FACE_FLAG_SCALABLE is not set by default to + * retain backward compatibility. + * + * FT_FACE_FLAG_SBIX_OVERLAY :: + * [Since 2.12] The face has an 'sbix' OpenType table where outlines + * should be drawn on top of bitmap strikes. + * */ #define FT_FACE_FLAG_SCALABLE ( 1L << 0 ) #define FT_FACE_FLAG_FIXED_SIZES ( 1L << 1 ) @@ -1223,8 +1422,18 @@ FT_BEGIN_HEADER #define FT_FACE_FLAG_TRICKY ( 1L << 13 ) #define FT_FACE_FLAG_COLOR ( 1L << 14 ) #define FT_FACE_FLAG_VARIATION ( 1L << 15 ) +#define FT_FACE_FLAG_SVG ( 1L << 16 ) +#define FT_FACE_FLAG_SBIX ( 1L << 17 ) +#define FT_FACE_FLAG_SBIX_OVERLAY ( 1L << 18 ) + /************************************************************************** + * + * @section: + * font_testing_macros + * + */ + /************************************************************************** * * @macro: @@ -1239,7 +1448,7 @@ FT_BEGIN_HEADER * */ #define FT_HAS_HORIZONTAL( face ) \ - ( (face)->face_flags & FT_FACE_FLAG_HORIZONTAL ) + ( !!( (face)->face_flags & FT_FACE_FLAG_HORIZONTAL ) ) /************************************************************************** @@ -1253,7 +1462,7 @@ FT_BEGIN_HEADER * */ #define FT_HAS_VERTICAL( face ) \ - ( (face)->face_flags & FT_FACE_FLAG_VERTICAL ) + ( !!( (face)->face_flags & FT_FACE_FLAG_VERTICAL ) ) /************************************************************************** @@ -1267,7 +1476,7 @@ FT_BEGIN_HEADER * */ #define FT_HAS_KERNING( face ) \ - ( (face)->face_flags & FT_FACE_FLAG_KERNING ) + ( !!( (face)->face_flags & FT_FACE_FLAG_KERNING ) ) /************************************************************************** @@ -1282,7 +1491,7 @@ FT_BEGIN_HEADER * */ #define FT_IS_SCALABLE( face ) \ - ( (face)->face_flags & FT_FACE_FLAG_SCALABLE ) + ( !!( (face)->face_flags & FT_FACE_FLAG_SCALABLE ) ) /************************************************************************** @@ -1301,7 +1510,7 @@ FT_BEGIN_HEADER * */ #define FT_IS_SFNT( face ) \ - ( (face)->face_flags & FT_FACE_FLAG_SFNT ) + ( !!( (face)->face_flags & FT_FACE_FLAG_SFNT ) ) /************************************************************************** @@ -1316,7 +1525,7 @@ FT_BEGIN_HEADER * */ #define FT_IS_FIXED_WIDTH( face ) \ - ( (face)->face_flags & FT_FACE_FLAG_FIXED_WIDTH ) + ( !!( (face)->face_flags & FT_FACE_FLAG_FIXED_WIDTH ) ) /************************************************************************** @@ -1331,9 +1540,16 @@ FT_BEGIN_HEADER * */ #define FT_HAS_FIXED_SIZES( face ) \ - ( (face)->face_flags & FT_FACE_FLAG_FIXED_SIZES ) + ( !!( (face)->face_flags & FT_FACE_FLAG_FIXED_SIZES ) ) + /************************************************************************** + * + * @section: + * other_api_data + * + */ + /************************************************************************** * * @macro: @@ -1346,6 +1562,13 @@ FT_BEGIN_HEADER #define FT_HAS_FAST_GLYPHS( face ) 0 + /************************************************************************** + * + * @section: + * font_testing_macros + * + */ + /************************************************************************** * * @macro: @@ -1357,7 +1580,7 @@ FT_BEGIN_HEADER * */ #define FT_HAS_GLYPH_NAMES( face ) \ - ( (face)->face_flags & FT_FACE_FLAG_GLYPH_NAMES ) + ( !!( (face)->face_flags & FT_FACE_FLAG_GLYPH_NAMES ) ) /************************************************************************** @@ -1372,7 +1595,7 @@ FT_BEGIN_HEADER * */ #define FT_HAS_MULTIPLE_MASTERS( face ) \ - ( (face)->face_flags & FT_FACE_FLAG_MULTIPLE_MASTERS ) + ( !!( (face)->face_flags & FT_FACE_FLAG_MULTIPLE_MASTERS ) ) /************************************************************************** @@ -1394,7 +1617,7 @@ FT_BEGIN_HEADER * */ #define FT_IS_NAMED_INSTANCE( face ) \ - ( (face)->face_index & 0x7FFF0000L ) + ( !!( (face)->face_index & 0x7FFF0000L ) ) /************************************************************************** @@ -1404,15 +1627,15 @@ FT_BEGIN_HEADER * * @description: * A macro that returns true whenever a face object has been altered by - * @FT_Set_MM_Design_Coordinates, @FT_Set_Var_Design_Coordinates, or - * @FT_Set_Var_Blend_Coordinates. + * @FT_Set_MM_Design_Coordinates, @FT_Set_Var_Design_Coordinates, + * @FT_Set_Var_Blend_Coordinates, or @FT_Set_MM_WeightVector. * * @since: * 2.9 * */ #define FT_IS_VARIATION( face ) \ - ( (face)->face_flags & FT_FACE_FLAG_VARIATION ) + ( !!( (face)->face_flags & FT_FACE_FLAG_VARIATION ) ) /************************************************************************** @@ -1429,7 +1652,7 @@ FT_BEGIN_HEADER * */ #define FT_IS_CID_KEYED( face ) \ - ( (face)->face_flags & FT_FACE_FLAG_CID_KEYED ) + ( !!( (face)->face_flags & FT_FACE_FLAG_CID_KEYED ) ) /************************************************************************** @@ -1443,7 +1666,7 @@ FT_BEGIN_HEADER * */ #define FT_IS_TRICKY( face ) \ - ( (face)->face_flags & FT_FACE_FLAG_TRICKY ) + ( !!( (face)->face_flags & FT_FACE_FLAG_TRICKY ) ) /************************************************************************** @@ -1460,9 +1683,134 @@ FT_BEGIN_HEADER * */ #define FT_HAS_COLOR( face ) \ - ( (face)->face_flags & FT_FACE_FLAG_COLOR ) + ( !!( (face)->face_flags & FT_FACE_FLAG_COLOR ) ) + /************************************************************************** + * + * @macro: + * FT_HAS_SVG + * + * @description: + * A macro that returns true whenever a face object contains an 'SVG~' + * OpenType table. + * + * @since: + * 2.12 + */ +#define FT_HAS_SVG( face ) \ + ( !!( (face)->face_flags & FT_FACE_FLAG_SVG ) ) + + + /************************************************************************** + * + * @macro: + * FT_HAS_SBIX + * + * @description: + * A macro that returns true whenever a face object contains an 'sbix' + * OpenType table *and* outline glyphs. + * + * Currently, FreeType only supports bitmap glyphs in PNG format for this + * table (i.e., JPEG and TIFF formats are unsupported, as are + * Apple-specific formats not part of the OpenType specification). + * + * @note: + * For backward compatibility, a font with an 'sbix' table is treated as + * a bitmap-only face. Using @FT_Open_Face with + * @FT_PARAM_TAG_IGNORE_SBIX, an application can switch off 'sbix' + * handling so that the face is treated as an ordinary outline font with + * scalable outlines. + * + * Here is some pseudo code that roughly illustrates how to implement + * 'sbix' handling according to the OpenType specification. + * + * ``` + * if ( FT_HAS_SBIX( face ) ) + * { + * // open font as a scalable one without sbix handling + * FT_Face face2; + * FT_Parameter param = { FT_PARAM_TAG_IGNORE_SBIX, NULL }; + * FT_Open_Args args = { FT_OPEN_PARAMS | ..., + * ..., + * 1, ¶m }; + * + * + * FT_Open_Face( library, &args, 0, &face2 ); + * + * available_size` as necessary into + * `preferred_sizes`[*]> + * + * for ( i = 0; i < face->num_fixed_sizes; i++ ) + * { + * size = preferred_sizes[i].size; + * + * error = FT_Set_Pixel_Sizes( face, size, size ); + * + * + * // check whether we have a glyph in a bitmap strike + * error = FT_Load_Glyph( face, + * glyph_index, + * FT_LOAD_SBITS_ONLY | + * FT_LOAD_BITMAP_METRICS_ONLY ); + * if ( error == FT_Err_Invalid_Argument ) + * continue; + * else if ( error ) + * + * else + * break; + * } + * + * if ( i != face->num_fixed_sizes ) + * + * + * if ( i == face->num_fixed_sizes || + * FT_HAS_SBIX_OVERLAY( face ) ) + * + * } + * ``` + * + * [*] Assuming a target value of 400dpi and available strike sizes 100, + * 200, 300, and 400dpi, a possible order might be [400, 200, 300, 100]: + * scaling 200dpi to 400dpi usually gives better results than scaling + * 300dpi to 400dpi; it is also much faster. However, scaling 100dpi to + * 400dpi can yield a too pixelated result, thus the preference might be + * 300dpi over 100dpi. + * + * @since: + * 2.12 + */ +#define FT_HAS_SBIX( face ) \ + ( !!( (face)->face_flags & FT_FACE_FLAG_SBIX ) ) + + + /************************************************************************** + * + * @macro: + * FT_HAS_SBIX_OVERLAY + * + * @description: + * A macro that returns true whenever a face object contains an 'sbix' + * OpenType table with bit~1 in its `flags` field set, instructing the + * application to overlay the bitmap strike with the corresponding + * outline glyph. See @FT_HAS_SBIX for pseudo code how to use it. + * + * @since: + * 2.12 + */ +#define FT_HAS_SBIX_OVERLAY( face ) \ + ( !!( (face)->face_flags & FT_FACE_FLAG_SBIX_OVERLAY ) ) + + + /************************************************************************** + * + * @section: + * face_creation + * + */ + /************************************************************************** * * @enum: @@ -1489,6 +1837,13 @@ FT_BEGIN_HEADER #define FT_STYLE_FLAG_BOLD ( 1 << 1 ) + /************************************************************************** + * + * @section: + * other_api_data + * + */ + /************************************************************************** * * @type: @@ -1501,6 +1856,13 @@ FT_BEGIN_HEADER typedef struct FT_Size_InternalRec_* FT_Size_Internal; + /************************************************************************** + * + * @section: + * sizing_and_scaling + * + */ + /************************************************************************** * * @struct: @@ -1652,6 +2014,13 @@ FT_BEGIN_HEADER } FT_SizeRec; + /************************************************************************** + * + * @section: + * other_api_data + * + */ + /************************************************************************** * * @struct: @@ -1683,6 +2052,13 @@ FT_BEGIN_HEADER typedef struct FT_Slot_InternalRec_* FT_Slot_Internal; + /************************************************************************** + * + * @section: + * glyph_retrieval + * + */ + /************************************************************************** * * @struct: @@ -1727,13 +2103,13 @@ FT_BEGIN_HEADER * The advance width of the unhinted glyph. Its value is expressed in * 16.16 fractional pixels, unless @FT_LOAD_LINEAR_DESIGN is set when * loading the glyph. This field can be important to perform correct - * WYSIWYG layout. Only relevant for outline glyphs. + * WYSIWYG layout. Only relevant for scalable glyphs. * * linearVertAdvance :: * The advance height of the unhinted glyph. Its value is expressed in * 16.16 fractional pixels, unless @FT_LOAD_LINEAR_DESIGN is set when * loading the glyph. This field can be important to perform correct - * WYSIWYG layout. Only relevant for outline glyphs. + * WYSIWYG layout. Only relevant for scalable glyphs. * * advance :: * This shorthand is, depending on @FT_LOAD_IGNORE_TRANSFORM, the @@ -1766,6 +2142,13 @@ FT_BEGIN_HEADER * transformed, distorted, emboldened, etc. However, it must not be * freed. * + * [Since 2.10.1] If @FT_LOAD_NO_SCALE is set, outline coordinates of + * OpenType variation fonts for a selected instance are internally + * handled as 26.6 fractional font units but returned as (rounded) + * integers, as expected. To get unrounded font units, don't use + * @FT_LOAD_NO_SCALE but load the glyph with @FT_LOAD_NO_HINTING and + * scale it, using the font's `units_per_EM` value as the ppem. + * * num_subglyphs :: * The number of subglyphs in a composite glyph. This field is only * valid for the composite glyph format that should normally only be @@ -1920,6 +2303,13 @@ FT_BEGIN_HEADER /*************************************************************************/ + /************************************************************************** + * + * @section: + * library_setup + * + */ + /************************************************************************** * * @function: @@ -1977,6 +2367,13 @@ FT_BEGIN_HEADER FT_Done_FreeType( FT_Library library ); + /************************************************************************** + * + * @section: + * face_creation + * + */ + /************************************************************************** * * @enum: @@ -2071,7 +2468,9 @@ FT_BEGIN_HEADER * The size in bytes of the file in memory. * * pathname :: - * A pointer to an 8-bit file pathname. + * A pointer to an 8-bit file pathname, which must be a C~string (i.e., + * no null bytes except at the very end). The pointer is not owned by + * FreeType. * * stream :: * A handle to a source stream object. @@ -2089,8 +2488,7 @@ FT_BEGIN_HEADER * Extra parameters passed to the font driver when opening a new face. * * @note: - * The stream type is determined by the contents of `flags` that are - * tested in the following order by @FT_Open_Face: + * The stream type is determined by the contents of `flags`: * * If the @FT_OPEN_MEMORY bit is set, assume that this is a memory file * of `memory_size` bytes, located at `memory_address`. The data are not @@ -2103,6 +2501,9 @@ FT_BEGIN_HEADER * Otherwise, if the @FT_OPEN_PATHNAME bit is set, assume that this is a * normal file and use `pathname` to open it. * + * If none of the above bits are set or if multiple are set at the same + * time, the flags are invalid and @FT_Open_Face fails. + * * If the @FT_OPEN_DRIVER bit is set, @FT_Open_Face only tries to open * the file with the driver whose handler is in `driver`. * @@ -2155,6 +2556,13 @@ FT_BEGIN_HEADER * FreeType error code. 0~means success. * * @note: + * The `pathname` string should be recognizable as such by a standard + * `fopen` call on your system; in particular, this means that `pathname` + * must not contain null bytes. If that is not sufficient to address all + * file name possibilities (for example, to handle wide character file + * names on Windows in UTF-16 encoding) you might use @FT_Open_Face to + * pass a memory array or a stream object instead. + * * Use @FT_Done_Face to destroy the created @FT_Face object (along with * its slot and sizes). */ @@ -2268,13 +2676,17 @@ FT_BEGIN_HEADER * Each new face object created with this function also owns a default * @FT_Size object, accessible as `face->size`. * - * One @FT_Library instance can have multiple face objects, this is, + * One @FT_Library instance can have multiple face objects, that is, * @FT_Open_Face and its siblings can be called multiple times using the * same `library` argument. * * See the discussion of reference counters in the description of * @FT_Reference_Face. * + * If `FT_OPEN_STREAM` is set in `args->flags`, the stream in + * `args->stream` is automatically closed before this function returns + * any error (including `FT_Err_Invalid_Argument`). + * * @example: * To loop over all faces, use code similar to the following snippet * (omitting the error handling). @@ -2406,8 +2818,8 @@ FT_BEGIN_HEADER * stream attachments. */ FT_EXPORT( FT_Error ) - FT_Attach_Stream( FT_Face face, - FT_Open_Args* parameters ); + FT_Attach_Stream( FT_Face face, + const FT_Open_Args* parameters ); /************************************************************************** @@ -2433,6 +2845,7 @@ FT_BEGIN_HEADER * * @since: * 2.4.2 + * */ FT_EXPORT( FT_Error ) FT_Reference_Face( FT_Face face ); @@ -2462,6 +2875,13 @@ FT_BEGIN_HEADER FT_Done_Face( FT_Face face ); + /************************************************************************** + * + * @section: + * sizing_and_scaling + * + */ + /************************************************************************** * * @function: @@ -2491,7 +2911,7 @@ FT_BEGIN_HEADER * silently uses outlines if there is no bitmap for a given glyph index. * * For GX and OpenType variation fonts, a bitmap strike makes sense only - * if the default instance is active (this is, no glyph variation takes + * if the default instance is active (that is, no glyph variation takes * place); otherwise, FreeType simply ignores bitmap strikes. The same * is true for all named instances that are different from the default * instance. @@ -2657,8 +3077,8 @@ FT_BEGIN_HEADER * 'https://www.freetype.org/freetype2/docs/glyphs/glyphs-2.html'. * * Contrary to @FT_Set_Char_Size, this function doesn't have special code - * to normalize zero-valued widths, heights, or resolutions (which lead - * to errors in most cases). + * to normalize zero-valued widths, heights, or resolutions, which are + * treated as @FT_LOAD_NO_SCALE. * * Don't use this function if you are using the FreeType cache API. */ @@ -2754,6 +3174,13 @@ FT_BEGIN_HEADER FT_UInt pixel_height ); + /************************************************************************** + * + * @section: + * glyph_retrieval + * + */ + /************************************************************************** * * @function: @@ -2774,7 +3201,7 @@ FT_BEGIN_HEADER * * load_flags :: * A flag indicating what to load for this glyph. The @FT_LOAD_XXX - * constants can be used to control the glyph loading process (e.g., + * flags can be used to control the glyph loading process (e.g., * whether the outline should be scaled, whether to load bitmaps or * not, whether to hint the outline, etc). * @@ -2782,11 +3209,13 @@ FT_BEGIN_HEADER * FreeType error code. 0~means success. * * @note: - * The loaded glyph may be transformed. See @FT_Set_Transform for the - * details. + * For proper scaling and hinting, the active @FT_Size object owned by + * the face has to be meaningfully initialized by calling + * @FT_Set_Char_Size before this function, for example. The loaded + * glyph may be transformed. See @FT_Set_Transform for the details. * * For subsetted CID-keyed fonts, `FT_Err_Invalid_Argument` is returned - * for invalid CID values (this is, for CID values that don't have a + * for invalid CID values (that is, for CID values that don't have a * corresponding glyph in the font). See the discussion of the * @FT_FACE_FLAG_CID_KEYED flag for more details. * @@ -2800,6 +3229,13 @@ FT_BEGIN_HEADER FT_Int32 load_flags ); + /************************************************************************** + * + * @section: + * character_mapping + * + */ + /************************************************************************** * * @function: @@ -2843,6 +3279,13 @@ FT_BEGIN_HEADER FT_Int32 load_flags ); + /************************************************************************** + * + * @section: + * glyph_retrieval + * + */ + /************************************************************************** * * @enum: @@ -2873,19 +3316,21 @@ FT_BEGIN_HEADER * * FT_LOAD_NO_SCALE :: * Don't scale the loaded outline glyph but keep it in font units. + * This flag is also assumed if @FT_Size owned by the face was not + * properly initialized. * * This flag implies @FT_LOAD_NO_HINTING and @FT_LOAD_NO_BITMAP, and * unsets @FT_LOAD_RENDER. * * If the font is 'tricky' (see @FT_FACE_FLAG_TRICKY for more), using * `FT_LOAD_NO_SCALE` usually yields meaningless outlines because the - * subglyphs must be scaled and positioned with hinting instructions. + * subglyphs must be scaled and positioned with hinting instructions. * This can be solved by loading the font without `FT_LOAD_NO_SCALE` * and setting the character size to `font->units_per_EM`. * * FT_LOAD_NO_HINTING :: * Disable hinting. This generally generates 'blurrier' bitmap glyphs - * when the glyph are rendered in any of the anti-aliased modes. See + * when the glyphs are rendered in any of the anti-aliased modes. See * also the note below. * * This flag is implied by @FT_LOAD_NO_SCALE. @@ -2903,6 +3348,15 @@ FT_BEGIN_HEADER * * @FT_LOAD_NO_SCALE always sets this flag. * + * FT_LOAD_SBITS_ONLY :: + * [Since 2.12] This is the opposite of @FT_LOAD_NO_BITMAP, more or + * less: @FT_Load_Glyph returns `FT_Err_Invalid_Argument` if the face + * contains a bitmap strike for the given size (or the strike selected + * by @FT_Select_Size) but there is no glyph in the strike. + * + * Note that this load flag was part of FreeType since version 2.0.6 + * but previously tagged as internal. + * * FT_LOAD_VERTICAL_LAYOUT :: * Load the glyph for vertical text layout. In particular, the * `advance` value in the @FT_GlyphSlotRec structure is set to the @@ -2959,25 +3413,39 @@ FT_BEGIN_HEADER * Disable the auto-hinter. See also the note below. * * FT_LOAD_COLOR :: - * Load colored glyphs. There are slight differences depending on the - * font format. + * Load colored glyphs. FreeType searches in the following order; + * there are slight differences depending on the font format. * - * [Since 2.5] Load embedded color bitmap images. The resulting color - * bitmaps, if available, will have the @FT_PIXEL_MODE_BGRA format, - * with pre-multiplied color channels. If the flag is not set and - * color bitmaps are found, they are converted to 256-level gray - * bitmaps, using the @FT_PIXEL_MODE_GRAY format. + * [Since 2.5] Load embedded color bitmap images (provided + * @FT_LOAD_NO_BITMAP is not set). The resulting color bitmaps, if + * available, have the @FT_PIXEL_MODE_BGRA format, with pre-multiplied + * color channels. If the flag is not set and color bitmaps are found, + * they are converted to 256-level gray bitmaps, using the + * @FT_PIXEL_MODE_GRAY format. * - * [Since 2.10, experimental] If the glyph index contains an entry in + * [Since 2.12] If the glyph index maps to an entry in the face's + * 'SVG~' table, load the associated SVG document from this table and + * set the `format` field of @FT_GlyphSlotRec to @FT_GLYPH_FORMAT_SVG + * ([since 2.13.1] provided @FT_LOAD_NO_SVG is not set). Note that + * FreeType itself can't render SVG documents; however, the library + * provides hooks to seamlessly integrate an external renderer. See + * sections @ot_svg_driver and @svg_fonts for more. + * + * [Since 2.10, experimental] If the glyph index maps to an entry in * the face's 'COLR' table with a 'CPAL' palette table (as defined in * the OpenType specification), make @FT_Render_Glyph provide a default * blending of the color glyph layers associated with the glyph index, * using the same bitmap format as embedded color bitmap images. This - * is mainly for convenience; for full control of color layers use + * is mainly for convenience and works only for glyphs in 'COLR' v0 + * tables (or glyphs in 'COLR' v1 tables that exclusively use v0 + * features). For full control of color layers use * @FT_Get_Color_Glyph_Layer and FreeType's color functions like * @FT_Palette_Select instead of setting @FT_LOAD_COLOR for rendering * so that the client application can handle blending by itself. * + * FT_LOAD_NO_SVG :: + * [Since 2.13.1] Ignore SVG glyph data when loading. + * * FT_LOAD_COMPUTE_METRICS :: * [Since 2.6.1] Compute glyph metrics from the glyph data, without the * use of bundled metrics tables (for example, the 'hdmx' table in @@ -3024,30 +3492,32 @@ FT_BEGIN_HEADER * */ #define FT_LOAD_DEFAULT 0x0 -#define FT_LOAD_NO_SCALE ( 1L << 0 ) -#define FT_LOAD_NO_HINTING ( 1L << 1 ) -#define FT_LOAD_RENDER ( 1L << 2 ) -#define FT_LOAD_NO_BITMAP ( 1L << 3 ) -#define FT_LOAD_VERTICAL_LAYOUT ( 1L << 4 ) -#define FT_LOAD_FORCE_AUTOHINT ( 1L << 5 ) -#define FT_LOAD_CROP_BITMAP ( 1L << 6 ) -#define FT_LOAD_PEDANTIC ( 1L << 7 ) -#define FT_LOAD_IGNORE_GLOBAL_ADVANCE_WIDTH ( 1L << 9 ) +#define FT_LOAD_NO_SCALE ( 1L << 0 ) +#define FT_LOAD_NO_HINTING ( 1L << 1 ) +#define FT_LOAD_RENDER ( 1L << 2 ) +#define FT_LOAD_NO_BITMAP ( 1L << 3 ) +#define FT_LOAD_VERTICAL_LAYOUT ( 1L << 4 ) +#define FT_LOAD_FORCE_AUTOHINT ( 1L << 5 ) +#define FT_LOAD_CROP_BITMAP ( 1L << 6 ) +#define FT_LOAD_PEDANTIC ( 1L << 7 ) +#define FT_LOAD_IGNORE_GLOBAL_ADVANCE_WIDTH ( 1L << 9 ) #define FT_LOAD_NO_RECURSE ( 1L << 10 ) #define FT_LOAD_IGNORE_TRANSFORM ( 1L << 11 ) #define FT_LOAD_MONOCHROME ( 1L << 12 ) #define FT_LOAD_LINEAR_DESIGN ( 1L << 13 ) +#define FT_LOAD_SBITS_ONLY ( 1L << 14 ) #define FT_LOAD_NO_AUTOHINT ( 1L << 15 ) /* Bits 16-19 are used by `FT_LOAD_TARGET_` */ #define FT_LOAD_COLOR ( 1L << 20 ) #define FT_LOAD_COMPUTE_METRICS ( 1L << 21 ) #define FT_LOAD_BITMAP_METRICS_ONLY ( 1L << 22 ) +#define FT_LOAD_NO_SVG ( 1L << 24 ) /* */ /* used internally only by certain font drivers */ -#define FT_LOAD_ADVANCE_ONLY ( 1L << 8 ) -#define FT_LOAD_SBITS_ONLY ( 1L << 14 ) +#define FT_LOAD_ADVANCE_ONLY ( 1L << 8 ) +#define FT_LOAD_SVG_ONLY ( 1L << 23 ) /************************************************************************** @@ -3137,7 +3607,7 @@ FT_BEGIN_HEADER * necessary to empty the cache after a mode switch to avoid false hits. * */ -#define FT_LOAD_TARGET_( x ) ( (FT_Int32)( (x) & 15 ) << 16 ) +#define FT_LOAD_TARGET_( x ) ( FT_STATIC_CAST( FT_Int32, (x) & 15 ) << 16 ) #define FT_LOAD_TARGET_NORMAL FT_LOAD_TARGET_( FT_RENDER_MODE_NORMAL ) #define FT_LOAD_TARGET_LIGHT FT_LOAD_TARGET_( FT_RENDER_MODE_LIGHT ) @@ -3156,9 +3626,17 @@ FT_BEGIN_HEADER * @FT_LOAD_TARGET_XXX value. * */ -#define FT_LOAD_TARGET_MODE( x ) ( (FT_Render_Mode)( ( (x) >> 16 ) & 15 ) ) +#define FT_LOAD_TARGET_MODE( x ) \ + FT_STATIC_CAST( FT_Render_Mode, ( (x) >> 16 ) & 15 ) + /************************************************************************** + * + * @section: + * sizing_and_scaling + * + */ + /************************************************************************** * * @function: @@ -3177,9 +3655,16 @@ FT_BEGIN_HEADER * A pointer to the transformation's 2x2 matrix. Use `NULL` for the * identity matrix. * delta :: - * A pointer to the translation vector. Use `NULL` for the null vector. + * A pointer to the translation vector. Use `NULL` for the null + * vector. * * @note: + * This function is provided as a convenience, but keep in mind that + * @FT_Matrix coefficients are only 16.16 fixed-point values, which can + * limit the accuracy of the results. Using floating-point computations + * to perform the transform directly in client code instead will always + * yield better numbers. + * * The transformation is only applied to scalable image formats after the * glyph has been loaded. It means that hinting is unaltered by the * transformation and is performed on the character size given in the @@ -3194,6 +3679,46 @@ FT_BEGIN_HEADER FT_Vector* delta ); + /************************************************************************** + * + * @function: + * FT_Get_Transform + * + * @description: + * Return the transformation that is applied to glyph images when they + * are loaded into a glyph slot through @FT_Load_Glyph. See + * @FT_Set_Transform for more details. + * + * @input: + * face :: + * A handle to the source face object. + * + * @output: + * matrix :: + * A pointer to a transformation's 2x2 matrix. Set this to NULL if you + * are not interested in the value. + * + * delta :: + * A pointer to a translation vector. Set this to NULL if you are not + * interested in the value. + * + * @since: + * 2.11 + * + */ + FT_EXPORT( void ) + FT_Get_Transform( FT_Face face, + FT_Matrix* matrix, + FT_Vector* delta ); + + + /************************************************************************** + * + * @section: + * glyph_retrieval + * + */ + /************************************************************************** * * @enum: @@ -3212,6 +3737,10 @@ FT_BEGIN_HEADER * correction to correctly render non-monochrome glyph bitmaps onto a * surface; see @FT_Render_Glyph. * + * The @FT_RENDER_MODE_SDF is a special render mode that uses up to 256 + * distance values, indicating the signed distance from the grid position + * to the nearest outline. + * * @values: * FT_RENDER_MODE_NORMAL :: * Default render mode; it corresponds to 8-bit anti-aliased bitmaps. @@ -3237,19 +3766,88 @@ FT_BEGIN_HEADER * bitmaps that are 3~times the height of the original glyph outline in * pixels and use the @FT_PIXEL_MODE_LCD_V mode. * - * @note: - * Should you define `FT_CONFIG_OPTION_SUBPIXEL_RENDERING` in your - * `ftoption.h`, which enables patented ClearType-style rendering, the - * LCD-optimized glyph bitmaps should be filtered to reduce color fringes - * inherent to this technology. You can either set up LCD filtering with - * @FT_Library_SetLcdFilter or @FT_Face_Properties, or do the filtering - * yourself. The default FreeType LCD rendering technology does not - * require filtering. + * FT_RENDER_MODE_SDF :: + * This mode corresponds to 8-bit, single-channel signed distance field + * (SDF) bitmaps. Each pixel in the SDF grid is the value from the + * pixel's position to the nearest glyph's outline. The distances are + * calculated from the center of the pixel and are positive if they are + * filled by the outline (i.e., inside the outline) and negative + * otherwise. Check the note below on how to convert the output values + * to usable data. * + * @note: * The selected render mode only affects vector glyphs of a font. * Embedded bitmaps often have a different pixel mode like * @FT_PIXEL_MODE_MONO. You can use @FT_Bitmap_Convert to transform them * into 8-bit pixmaps. + * + * For @FT_RENDER_MODE_SDF the output bitmap buffer contains normalized + * distances that are packed into unsigned 8-bit values. To get pixel + * values in floating point representation use the following pseudo-C + * code for the conversion. + * + * ``` + * // Load glyph and render using FT_RENDER_MODE_SDF, + * // then use the output buffer as follows. + * + * ... + * FT_Byte buffer = glyph->bitmap->buffer; + * + * + * for pixel in buffer + * { + * // `sd` is the signed distance and `spread` is the current spread; + * // the default spread is 2 and can be changed. + * + * float sd = (float)pixel - 128.0f; + * + * + * // Convert to pixel values. + * sd = ( sd / 128.0f ) * spread; + * + * // Store `sd` in a buffer or use as required. + * } + * + * ``` + * + * FreeType has two rasterizers for generating SDF, namely: + * + * 1. `sdf` for generating SDF directly from glyph's outline, and + * + * 2. `bsdf` for generating SDF from rasterized bitmaps. + * + * Depending on the glyph type (i.e., outline or bitmap), one of the two + * rasterizers is chosen at runtime and used for generating SDFs. To + * force the use of `bsdf` you should render the glyph with any of the + * FreeType's other rendering modes (e.g., `FT_RENDER_MODE_NORMAL`) and + * then re-render with `FT_RENDER_MODE_SDF`. + * + * There are some issues with stability and possible failures of the SDF + * renderers (specifically `sdf`). + * + * 1. The `sdf` rasterizer is sensitive to really small features (e.g., + * sharp turns that are less than 1~pixel) and imperfections in the + * glyph's outline, causing artifacts in the final output. + * + * 2. The `sdf` rasterizer has limited support for handling intersecting + * contours and *cannot* handle self-intersecting contours whatsoever. + * Self-intersection happens when a single connected contour + * intersects itself at some point; having these in your font + * definitely poses a problem to the rasterizer and cause artifacts, + * too. + * + * 3. Generating SDF for really small glyphs may result in undesirable + * output; the pixel grid (which stores distance information) becomes + * too coarse. + * + * 4. Since the output buffer is normalized, precision at smaller spreads + * is greater than precision at larger spread values because the + * output range of [0..255] gets mapped to a smaller SDF range. A + * spread of~2 should be sufficient in most cases. + * + * Points (1) and (2) can be avoided by using the `bsdf` rasterizer, + * which is more stable than the `sdf` rasterizer in general. + * */ typedef enum FT_Render_Mode_ { @@ -3258,6 +3856,7 @@ FT_BEGIN_HEADER FT_RENDER_MODE_MONO, FT_RENDER_MODE_LCD, FT_RENDER_MODE_LCD_V, + FT_RENDER_MODE_SDF, FT_RENDER_MODE_MAX @@ -3289,7 +3888,7 @@ FT_BEGIN_HEADER * @FT_Render_Mode for a list of possible values. * * If @FT_RENDER_MODE_NORMAL is used, a previous call of @FT_Load_Glyph - * with flag @FT_LOAD_COLOR makes FT_Render_Glyph provide a default + * with flag @FT_LOAD_COLOR makes `FT_Render_Glyph` provide a default * blending of colored glyph layers associated with the current glyph * slot (provided the font contains such layers) instead of rendering * the glyph slot's outline. This is an experimental feature; see @@ -3299,9 +3898,6 @@ FT_BEGIN_HEADER * FreeType error code. 0~means success. * * @note: - * To get meaningful results, font scaling values must be set with - * functions like @FT_Set_Char_Size before calling `FT_Render_Glyph`. - * * When FreeType outputs a bitmap of a glyph, it really outputs an alpha * coverage map. If a pixel is completely covered by a filled-in * outline, the bitmap contains 0xFF at that pixel, meaning that @@ -3345,7 +3941,8 @@ FT_BEGIN_HEADER * * which is known as the OVER operator. * - * To correctly composite an antialiased pixel of a glyph onto a surface, + * To correctly composite an anti-aliased pixel of a glyph onto a + * surface, * * 1. take the foreground and background colors (e.g., in sRGB space) * and apply gamma to get them in a linear space, @@ -3518,86 +4115,10 @@ FT_BEGIN_HEADER /************************************************************************** * - * @function: - * FT_Get_Glyph_Name + * @section: + * character_mapping * - * @description: - * Retrieve the ASCII name of a given glyph in a face. This only works - * for those faces where @FT_HAS_GLYPH_NAMES(face) returns~1. - * - * @input: - * face :: - * A handle to a source face object. - * - * glyph_index :: - * The glyph index. - * - * buffer_max :: - * The maximum number of bytes available in the buffer. - * - * @output: - * buffer :: - * A pointer to a target buffer where the name is copied to. - * - * @return: - * FreeType error code. 0~means success. - * - * @note: - * An error is returned if the face doesn't provide glyph names or if the - * glyph index is invalid. In all cases of failure, the first byte of - * `buffer` is set to~0 to indicate an empty name. - * - * The glyph name is truncated to fit within the buffer if it is too - * long. The returned string is always zero-terminated. - * - * Be aware that FreeType reorders glyph indices internally so that glyph - * index~0 always corresponds to the 'missing glyph' (called '.notdef'). - * - * This function always returns an error if the config macro - * `FT_CONFIG_OPTION_NO_GLYPH_NAMES` is not defined in `ftoption.h`. */ - FT_EXPORT( FT_Error ) - FT_Get_Glyph_Name( FT_Face face, - FT_UInt glyph_index, - FT_Pointer buffer, - FT_UInt buffer_max ); - - - /************************************************************************** - * - * @function: - * FT_Get_Postscript_Name - * - * @description: - * Retrieve the ASCII PostScript name of a given face, if available. - * This only works with PostScript, TrueType, and OpenType fonts. - * - * @input: - * face :: - * A handle to the source face object. - * - * @return: - * A pointer to the face's PostScript name. `NULL` if unavailable. - * - * @note: - * The returned pointer is owned by the face and is destroyed with it. - * - * For variation fonts, this string changes if you select a different - * instance, and you have to call `FT_Get_PostScript_Name` again to - * retrieve it. FreeType follows Adobe TechNote #5902, 'Generating - * PostScript Names for Fonts Using OpenType Font Variations'. - * - * https://download.macromedia.com/pub/developer/opentype/tech-notes/5902.AdobePSNameGeneration.html - * - * [Since 2.9] Special PostScript names for named instances are only - * returned if the named instance is set with @FT_Set_Named_Instance (and - * the font has corresponding entries in its 'fvar' table). If - * @FT_IS_VARIATION returns true, the algorithmically derived PostScript - * name is provided, not looking up special entries for named instances. - */ - FT_EXPORT( const char* ) - FT_Get_Postscript_Name( FT_Face face ); - /************************************************************************** * @@ -3815,6 +4336,13 @@ FT_BEGIN_HEADER FT_UInt *agindex ); + /************************************************************************** + * + * @section: + * face_creation + * + */ + /************************************************************************** * * @function: @@ -3913,13 +4441,21 @@ FT_BEGIN_HEADER FT_Parameter* properties ); + /************************************************************************** + * + * @section: + * information_retrieval + * + */ + /************************************************************************** * * @function: * FT_Get_Name_Index * * @description: - * Return the glyph index of a given glyph name. + * Return the glyph index of a given glyph name. This only works + * for those faces where @FT_HAS_GLYPH_NAMES returns true. * * @input: * face :: @@ -3930,10 +4466,106 @@ FT_BEGIN_HEADER * * @return: * The glyph index. 0~means 'undefined character code'. + * + * @note: + * Acceptable glyph names might come from the [Adobe Glyph + * List](https://github.com/adobe-type-tools/agl-aglfn). See + * @FT_Get_Glyph_Name for the inverse functionality. + * + * This function has limited capabilities if the config macro + * `FT_CONFIG_OPTION_POSTSCRIPT_NAMES` is not defined in `ftoption.h`: + * It then works only for fonts that actually embed glyph names (which + * many recent OpenType fonts do not). */ FT_EXPORT( FT_UInt ) - FT_Get_Name_Index( FT_Face face, - FT_String* glyph_name ); + FT_Get_Name_Index( FT_Face face, + const FT_String* glyph_name ); + + + /************************************************************************** + * + * @function: + * FT_Get_Glyph_Name + * + * @description: + * Retrieve the ASCII name of a given glyph in a face. This only works + * for those faces where @FT_HAS_GLYPH_NAMES returns true. + * + * @input: + * face :: + * A handle to a source face object. + * + * glyph_index :: + * The glyph index. + * + * buffer_max :: + * The maximum number of bytes available in the buffer. + * + * @output: + * buffer :: + * A pointer to a target buffer where the name is copied to. + * + * @return: + * FreeType error code. 0~means success. + * + * @note: + * An error is returned if the face doesn't provide glyph names or if the + * glyph index is invalid. In all cases of failure, the first byte of + * `buffer` is set to~0 to indicate an empty name. + * + * The glyph name is truncated to fit within the buffer if it is too + * long. The returned string is always zero-terminated. + * + * Be aware that FreeType reorders glyph indices internally so that glyph + * index~0 always corresponds to the 'missing glyph' (called '.notdef'). + * + * This function has limited capabilities if the config macro + * `FT_CONFIG_OPTION_POSTSCRIPT_NAMES` is not defined in `ftoption.h`: + * It then works only for fonts that actually embed glyph names (which + * many recent OpenType fonts do not). + */ + FT_EXPORT( FT_Error ) + FT_Get_Glyph_Name( FT_Face face, + FT_UInt glyph_index, + FT_Pointer buffer, + FT_UInt buffer_max ); + + + /************************************************************************** + * + * @function: + * FT_Get_Postscript_Name + * + * @description: + * Retrieve the ASCII PostScript name of a given face, if available. + * This only works with PostScript, TrueType, and OpenType fonts. + * + * @input: + * face :: + * A handle to the source face object. + * + * @return: + * A pointer to the face's PostScript name. `NULL` if unavailable. + * + * @note: + * The returned pointer is owned by the face and is destroyed with it. + * + * For variation fonts, this string changes if you select a different + * instance, and you have to call `FT_Get_PostScript_Name` again to + * retrieve it. FreeType follows Adobe TechNote #5902, 'Generating + * PostScript Names for Fonts Using OpenType Font Variations'. + * + * https://download.macromedia.com/pub/developer/opentype/tech-notes/5902.AdobePSNameGeneration.html + * + * [Since 2.9] Special PostScript names for named instances are only + * returned if the named instance is set with @FT_Set_Named_Instance (and + * the font has corresponding entries in its 'fvar' table or is the + * default named instance). If @FT_IS_VARIATION returns true, the + * algorithmically derived PostScript name is provided, not looking up + * special entries for named instances. + */ + FT_EXPORT( const char* ) + FT_Get_Postscript_Name( FT_Face face ); /************************************************************************** @@ -4022,175 +4654,6 @@ FT_BEGIN_HEADER FT_Matrix *p_transform ); - /************************************************************************** - * - * @section: - * layer_management - * - * @title: - * Glyph Layer Management - * - * @abstract: - * Retrieving and manipulating OpenType's 'COLR' table data. - * - * @description: - * The functions described here allow access of colored glyph layer data - * in OpenType's 'COLR' tables. - */ - - - /************************************************************************** - * - * @struct: - * FT_LayerIterator - * - * @description: - * This iterator object is needed for @FT_Get_Color_Glyph_Layer. - * - * @fields: - * num_layers :: - * The number of glyph layers for the requested glyph index. Will be - * set by @FT_Get_Color_Glyph_Layer. - * - * layer :: - * The current layer. Will be set by @FT_Get_Color_Glyph_Layer. - * - * p :: - * An opaque pointer into 'COLR' table data. The caller must set this - * to `NULL` before the first call of @FT_Get_Color_Glyph_Layer. - */ - typedef struct FT_LayerIterator_ - { - FT_UInt num_layers; - FT_UInt layer; - FT_Byte* p; - - } FT_LayerIterator; - - - /************************************************************************** - * - * @function: - * FT_Get_Color_Glyph_Layer - * - * @description: - * This is an interface to the 'COLR' table in OpenType fonts to - * iteratively retrieve the colored glyph layers associated with the - * current glyph slot. - * - * https://docs.microsoft.com/en-us/typography/opentype/spec/colr - * - * The glyph layer data for a given glyph index, if present, provides an - * alternative, multi-colour glyph representation: Instead of rendering - * the outline or bitmap with the given glyph index, glyphs with the - * indices and colors returned by this function are rendered layer by - * layer. - * - * The returned elements are ordered in the z~direction from bottom to - * top; the 'n'th element should be rendered with the associated palette - * color and blended on top of the already rendered layers (elements 0, - * 1, ..., n-1). - * - * @input: - * face :: - * A handle to the parent face object. - * - * base_glyph :: - * The glyph index the colored glyph layers are associated with. - * - * @inout: - * iterator :: - * An @FT_LayerIterator object. For the first call you should set - * `iterator->p` to `NULL`. For all following calls, simply use the - * same object again. - * - * @output: - * aglyph_index :: - * The glyph index of the current layer. - * - * acolor_index :: - * The color index into the font face's color palette of the current - * layer. The value 0xFFFF is special; it doesn't reference a palette - * entry but indicates that the text foreground color should be used - * instead (to be set up by the application outside of FreeType). - * - * The color palette can be retrieved with @FT_Palette_Select. - * - * @return: - * Value~1 if everything is OK. If there are no more layers (or if there - * are no layers at all), value~0 gets returned. In case of an error, - * value~0 is returned also. - * - * @note: - * This function is necessary if you want to handle glyph layers by - * yourself. In particular, functions that operate with @FT_GlyphRec - * objects (like @FT_Get_Glyph or @FT_Glyph_To_Bitmap) don't have access - * to this information. - * - * Note that @FT_Render_Glyph is able to handle colored glyph layers - * automatically if the @FT_LOAD_COLOR flag is passed to a previous call - * to @FT_Load_Glyph. [This is an experimental feature.] - * - * @example: - * ``` - * FT_Color* palette; - * FT_LayerIterator iterator; - * - * FT_Bool have_layers; - * FT_UInt layer_glyph_index; - * FT_UInt layer_color_index; - * - * - * error = FT_Palette_Select( face, palette_index, &palette ); - * if ( error ) - * palette = NULL; - * - * iterator.p = NULL; - * have_layers = FT_Get_Color_Glyph_Layer( face, - * glyph_index, - * &layer_glyph_index, - * &layer_color_index, - * &iterator ); - * - * if ( palette && have_layers ) - * { - * do - * { - * FT_Color layer_color; - * - * - * if ( layer_color_index == 0xFFFF ) - * layer_color = text_foreground_color; - * else - * layer_color = palette[layer_color_index]; - * - * // Load and render glyph `layer_glyph_index', then - * // blend resulting pixmap (using color `layer_color') - * // with previously created pixmaps. - * - * } while ( FT_Get_Color_Glyph_Layer( face, - * glyph_index, - * &layer_glyph_index, - * &layer_color_index, - * &iterator ) ); - * } - * ``` - */ - FT_EXPORT( FT_Bool ) - FT_Get_Color_Glyph_Layer( FT_Face face, - FT_UInt base_glyph, - FT_UInt *aglyph_index, - FT_UInt *acolor_index, - FT_LayerIterator* iterator ); - - - /************************************************************************** - * - * @section: - * base_interface - * - */ - /************************************************************************** * * @enum: @@ -4274,6 +4737,7 @@ FT_BEGIN_HEADER * * @since: * 2.3.8 + * */ FT_EXPORT( FT_UShort ) FT_Get_FSType_Flags( FT_Face face ); @@ -4367,6 +4831,7 @@ FT_BEGIN_HEADER * * @since: * 2.3.6 + * */ FT_EXPORT( FT_UInt ) FT_Face_GetCharVariantIndex( FT_Face face, @@ -4403,6 +4868,7 @@ FT_BEGIN_HEADER * * @since: * 2.3.6 + * */ FT_EXPORT( FT_Int ) FT_Face_GetCharVariantIsDefault( FT_Face face, @@ -4434,6 +4900,7 @@ FT_BEGIN_HEADER * * @since: * 2.3.6 + * */ FT_EXPORT( FT_UInt32* ) FT_Face_GetVariantSelectors( FT_Face face ); @@ -4467,6 +4934,7 @@ FT_BEGIN_HEADER * * @since: * 2.3.6 + * */ FT_EXPORT( FT_UInt32* ) FT_Face_GetVariantsOfChar( FT_Face face, @@ -4501,6 +4969,7 @@ FT_BEGIN_HEADER * * @since: * 2.3.6 + * */ FT_EXPORT( FT_UInt32* ) FT_Face_GetCharsOfVariant( FT_Face face, @@ -4520,7 +4989,8 @@ FT_BEGIN_HEADER * * @description: * This section contains various functions used to perform computations - * on 16.16 fixed-float numbers or 2d vectors. + * on 16.16 fixed-point numbers or 2D vectors. FreeType does not use + * floating-point data types. * * **Attention**: Most arithmetic functions take `FT_Long` as arguments. * For historical reasons, FreeType was designed under the assumption @@ -4724,32 +5194,10 @@ FT_BEGIN_HEADER /************************************************************************** * * @section: - * version - * - * @title: - * FreeType Version - * - * @abstract: - * Functions and macros related to FreeType versions. - * - * @description: - * Note that those functions and macros are of limited use because even a - * new release of FreeType with only documentation changes increases the - * version number. - * - * @order: - * FT_Library_Version - * - * FREETYPE_MAJOR - * FREETYPE_MINOR - * FREETYPE_PATCH - * - * FT_Face_CheckTrueTypePatents - * FT_Face_SetUnpatentedHinting + * library_setup * */ - /************************************************************************** * * @enum: @@ -4773,8 +5221,8 @@ FT_BEGIN_HEADER * */ #define FREETYPE_MAJOR 2 -#define FREETYPE_MINOR 10 -#define FREETYPE_PATCH 0 +#define FREETYPE_MINOR 13 +#define FREETYPE_PATCH 2 /************************************************************************** @@ -4816,6 +5264,13 @@ FT_BEGIN_HEADER FT_Int *apatch ); + /************************************************************************** + * + * @section: + * other_api_data + * + */ + /************************************************************************** * * @function: @@ -4836,6 +5291,7 @@ FT_BEGIN_HEADER * * @since: * 2.3.5 + * */ FT_EXPORT( FT_Bool ) FT_Face_CheckTrueTypePatents( FT_Face face ); @@ -4864,6 +5320,7 @@ FT_BEGIN_HEADER * * @since: * 2.3.5 + * */ FT_EXPORT( FT_Bool ) FT_Face_SetUnpatentedHinting( FT_Face face, diff --git a/Source/ThirdParty/freetype/ft2build.h b/Source/ThirdParty/freetype/ft2build.h index e3f488794..58491ceea 100644 --- a/Source/ThirdParty/freetype/ft2build.h +++ b/Source/ThirdParty/freetype/ft2build.h @@ -4,7 +4,7 @@ * * FreeType 2 build and setup macros. * - * Copyright (C) 1996-2019 by + * Copyright (C) 1996-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -18,16 +18,14 @@ /************************************************************************** * - * This is the 'entry point' for FreeType header file inclusions. It is - * the only header file which should be included directly; all other - * FreeType header files should be accessed with macro names (after - * including `ft2build.h`). + * This is the 'entry point' for FreeType header file inclusions, to be + * loaded before all other header files. * * A typical example is * * ``` * #include - * #include FT_FREETYPE_H + * #include * ``` * */ diff --git a/Source/ThirdParty/freetype/ftadvanc.h b/Source/ThirdParty/freetype/ftadvanc.h index 95c38f92b..4560ded6d 100644 --- a/Source/ThirdParty/freetype/ftadvanc.h +++ b/Source/ThirdParty/freetype/ftadvanc.h @@ -4,7 +4,7 @@ * * Quick computation of advance widths (specification only). * - * Copyright (C) 2008-2019 by + * Copyright (C) 2008-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -20,8 +20,7 @@ #define FTADVANC_H_ -#include -#include FT_FREETYPE_H +#include #ifdef FREETYPE_H #error "freetype.h of FreeType 1 has been loaded!" @@ -96,6 +95,7 @@ FT_BEGIN_HEADER * load_flags :: * A set of bit flags similar to those used when calling * @FT_Load_Glyph, used to determine what kind of advances you need. + * * @output: * padvance :: * The advance value. If scaling is performed (based on the value of diff --git a/Source/ThirdParty/freetype/ftbbox.h b/Source/ThirdParty/freetype/ftbbox.h index 22da70c0d..fc21740fc 100644 --- a/Source/ThirdParty/freetype/ftbbox.h +++ b/Source/ThirdParty/freetype/ftbbox.h @@ -4,7 +4,7 @@ * * FreeType exact bbox computation (specification). * - * Copyright (C) 1996-2019 by + * Copyright (C) 1996-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -31,8 +31,7 @@ #define FTBBOX_H_ -#include -#include FT_FREETYPE_H +#include #ifdef FREETYPE_H #error "freetype.h of FreeType 1 has been loaded!" diff --git a/Source/ThirdParty/freetype/ftbdf.h b/Source/ThirdParty/freetype/ftbdf.h index 1c46da598..e8ce64312 100644 --- a/Source/ThirdParty/freetype/ftbdf.h +++ b/Source/ThirdParty/freetype/ftbdf.h @@ -4,7 +4,7 @@ * * FreeType API for accessing BDF-specific strings (specification). * - * Copyright (C) 2002-2019 by + * Copyright (C) 2002-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -19,8 +19,7 @@ #ifndef FTBDF_H_ #define FTBDF_H_ -#include -#include FT_FREETYPE_H +#include #ifdef FREETYPE_H #error "freetype.h of FreeType 1 has been loaded!" diff --git a/Source/ThirdParty/freetype/ftbitmap.h b/Source/ThirdParty/freetype/ftbitmap.h index a6acdb969..eb6b4b1ee 100644 --- a/Source/ThirdParty/freetype/ftbitmap.h +++ b/Source/ThirdParty/freetype/ftbitmap.h @@ -4,7 +4,7 @@ * * FreeType utility functions for bitmaps (specification). * - * Copyright (C) 2004-2019 by + * Copyright (C) 2004-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -20,9 +20,8 @@ #define FTBITMAP_H_ -#include -#include FT_FREETYPE_H -#include FT_COLOR_H +#include +#include #ifdef FREETYPE_H #error "freetype.h of FreeType 1 has been loaded!" diff --git a/Source/ThirdParty/freetype/ftbzip2.h b/Source/ThirdParty/freetype/ftbzip2.h index ae88cfdbd..7d29f4682 100644 --- a/Source/ThirdParty/freetype/ftbzip2.h +++ b/Source/ThirdParty/freetype/ftbzip2.h @@ -4,7 +4,7 @@ * * Bzip2-compressed stream support. * - * Copyright (C) 2010-2019 by + * Copyright (C) 2010-2023 by * Joel Klinghed. * * This file is part of the FreeType project, and may only be used, @@ -19,8 +19,7 @@ #ifndef FTBZIP2_H_ #define FTBZIP2_H_ -#include -#include FT_FREETYPE_H +#include #ifdef FREETYPE_H #error "freetype.h of FreeType 1 has been loaded!" @@ -43,6 +42,16 @@ FT_BEGIN_HEADER * Using bzip2-compressed font files. * * @description: + * In certain builds of the library, bzip2 compression recognition is + * automatically handled when calling @FT_New_Face or @FT_Open_Face. + * This means that if no font driver is capable of handling the raw + * compressed file, the library will try to open a bzip2 compressed + * stream from it and re-open the face with it. + * + * The stream implementation is very basic and resets the decompression + * process each time seeking backwards is needed within the stream, + * which significantly undermines the performance. + * * This section contains the declaration of Bzip2-specific functions. * */ @@ -75,15 +84,6 @@ FT_BEGIN_HEADER * **not** call `FT_Stream_Close` on the source stream. None of the * stream objects will be released to the heap. * - * The stream implementation is very basic and resets the decompression - * process each time seeking backwards is needed within the stream. - * - * In certain builds of the library, bzip2 compression recognition is - * automatically handled when calling @FT_New_Face or @FT_Open_Face. - * This means that if no font driver is capable of handling the raw - * compressed file, the library will try to open a bzip2 compressed - * stream from it and re-open the face with it. - * * This function may return `FT_Err_Unimplemented_Feature` if your build * of FreeType was not compiled with bzip2 support. */ diff --git a/Source/ThirdParty/freetype/ftcache.h b/Source/ThirdParty/freetype/ftcache.h index 0d589d0b3..a2072e26b 100644 --- a/Source/ThirdParty/freetype/ftcache.h +++ b/Source/ThirdParty/freetype/ftcache.h @@ -4,7 +4,7 @@ * * FreeType Cache subsystem (specification). * - * Copyright (C) 1996-2019 by + * Copyright (C) 1996-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -20,8 +20,7 @@ #define FTCACHE_H_ -#include -#include FT_GLYPH_H +#include FT_BEGIN_HEADER @@ -44,61 +43,61 @@ FT_BEGIN_HEADER * objects, as well as caching information like character maps and glyph * images while limiting their maximum memory usage. * - * Note that all types and functions begin with the `FTC_` prefix. + * Note that all types and functions begin with the `FTC_` prefix rather + * than the usual `FT_` prefix in the rest of FreeType. * - * The cache is highly portable and thus doesn't know anything about the - * fonts installed on your system, or how to access them. This implies - * the following scheme: + * The cache is highly portable and, thus, doesn't know anything about + * the fonts installed on your system, or how to access them. Therefore, + * it requires the following. * - * First, available or installed font faces are uniquely identified by - * @FTC_FaceID values, provided to the cache by the client. Note that - * the cache only stores and compares these values, and doesn't try to - * interpret them in any way. + * * @FTC_FaceID, an arbitrary non-zero value that uniquely identifies + * available or installed font faces, has to be provided to the + * cache by the client. Note that the cache only stores and compares + * these values and doesn't try to interpret them in any way, but they + * have to be persistent on the client side. * - * Second, the cache calls, only when needed, a client-provided function - * to convert an @FTC_FaceID into a new @FT_Face object. The latter is - * then completely managed by the cache, including its termination - * through @FT_Done_Face. To monitor termination of face objects, the - * finalizer callback in the `generic` field of the @FT_Face object can - * be used, which might also be used to store the @FTC_FaceID of the - * face. + * * @FTC_Face_Requester, a method to convert an @FTC_FaceID into a new + * @FT_Face object when necessary, has to be provided to the cache by + * the client. The @FT_Face object is completely managed by the cache, + * including its termination through @FT_Done_Face. To monitor + * termination of face objects, the finalizer callback in the `generic` + * field of the @FT_Face object can be used, which might also be used + * to store the @FTC_FaceID of the face. * - * Clients are free to map face IDs to anything else. The most simple - * usage is to associate them to a (pathname,face_index) pair that is - * used to call @FT_New_Face. However, more complex schemes are also - * possible. + * Clients are free to map face IDs to anything useful. The most simple + * usage is, for example, to associate them to a `{pathname,face_index}` + * pair that is then used by @FTC_Face_Requester to call @FT_New_Face. + * However, more complex schemes are also possible. * * Note that for the cache to work correctly, the face ID values must be * **persistent**, which means that the contents they point to should not * change at runtime, or that their value should not become invalid. - * * If this is unavoidable (e.g., when a font is uninstalled at runtime), - * you should call @FTC_Manager_RemoveFaceID as soon as possible, to let + * you should call @FTC_Manager_RemoveFaceID as soon as possible to let * the cache get rid of any references to the old @FTC_FaceID it may keep * internally. Failure to do so will lead to incorrect behaviour or even - * crashes. + * crashes in @FTC_Face_Requester. * * To use the cache, start with calling @FTC_Manager_New to create a new * @FTC_Manager object, which models a single cache instance. You can * then look up @FT_Face and @FT_Size objects with - * @FTC_Manager_LookupFace and @FTC_Manager_LookupSize, respectively. + * @FTC_Manager_LookupFace and @FTC_Manager_LookupSize, respectively, and + * use them in any FreeType work stream. You can also cache other + * FreeType objects as follows. * - * If you want to use the charmap caching, call @FTC_CMapCache_New, then - * later use @FTC_CMapCache_Lookup to perform the equivalent of - * @FT_Get_Char_Index, only much faster. + * * If you want to use the charmap caching, call @FTC_CMapCache_New, + * then later use @FTC_CMapCache_Lookup to perform the equivalent of + * @FT_Get_Char_Index, only much faster. * - * If you want to use the @FT_Glyph caching, call @FTC_ImageCache, then - * later use @FTC_ImageCache_Lookup to retrieve the corresponding - * @FT_Glyph objects from the cache. - * - * If you need lots of small bitmaps, it is much more memory efficient to - * call @FTC_SBitCache_New followed by @FTC_SBitCache_Lookup. This - * returns @FTC_SBitRec structures, which are used to store small bitmaps - * directly. (A small bitmap is one whose metrics and dimensions all fit - * into 8-bit integers). - * - * We hope to also provide a kerning cache in the near future. + * * If you want to use the @FT_Glyph caching, call @FTC_ImageCache_New, + * then later use @FTC_ImageCache_Lookup to retrieve the corresponding + * @FT_Glyph objects from the cache. * + * * If you need lots of small bitmaps, it is much more memory-efficient + * to call @FTC_SBitCache_New followed by @FTC_SBitCache_Lookup. This + * returns @FTC_SBitRec structures, which are used to store small + * bitmaps directly. (A small bitmap is one whose metrics and + * dimensions all fit into 8-bit integers). * * @order: * FTC_Manager @@ -425,7 +424,7 @@ FT_BEGIN_HEADER * pixel :: * A Boolean. If 1, the `width` and `height` fields are interpreted as * integer pixel character sizes. Otherwise, they are expressed as - * 1/64th of points. + * 1/64 of points. * * x_res :: * Only used when `pixel` is value~0 to indicate the horizontal diff --git a/Source/ThirdParty/freetype/ftchapters.h b/Source/ThirdParty/freetype/ftchapters.h index 2ee26973e..7566fbd10 100644 --- a/Source/ThirdParty/freetype/ftchapters.h +++ b/Source/ThirdParty/freetype/ftchapters.h @@ -15,6 +15,7 @@ * General Remarks * * @sections: + * preamble * header_inclusion * user_allocation * @@ -30,9 +31,28 @@ * Core API * * @sections: - * version * basic_types - * base_interface + * library_setup + * face_creation + * font_testing_macros + * sizing_and_scaling + * glyph_retrieval + * character_mapping + * information_retrieval + * other_api_data + * + */ + + + /************************************************************************** + * + * @chapter: + * extended_api + * + * @title: + * Extended API + * + * @sections: * glyph_variants * color_management * layer_management @@ -61,6 +81,7 @@ * cid_fonts * pfr_fonts * winfnt_fonts + * svg_fonts * font_formats * gasp_table * @@ -81,6 +102,7 @@ * t1_cid_driver * tt_driver * pcf_driver + * ot_svg_driver * properties * parameter_tags * lcd_rendering @@ -123,6 +145,7 @@ * gzip * lzw * bzip2 + * debugging_apis * */ diff --git a/Source/ThirdParty/freetype/ftcid.h b/Source/ThirdParty/freetype/ftcid.h index 8eafc1c78..ef2293902 100644 --- a/Source/ThirdParty/freetype/ftcid.h +++ b/Source/ThirdParty/freetype/ftcid.h @@ -4,7 +4,7 @@ * * FreeType API for accessing CID font information (specification). * - * Copyright (C) 2007-2019 by + * Copyright (C) 2007-2023 by * Dereg Clegg and Michael Toftdal. * * This file is part of the FreeType project, and may only be used, @@ -19,8 +19,7 @@ #ifndef FTCID_H_ #define FTCID_H_ -#include -#include FT_FREETYPE_H +#include #ifdef FREETYPE_H #error "freetype.h of FreeType 1 has been loaded!" diff --git a/Source/ThirdParty/freetype/ftcolor.h b/Source/ThirdParty/freetype/ftcolor.h index cf1802195..eae200fdf 100644 --- a/Source/ThirdParty/freetype/ftcolor.h +++ b/Source/ThirdParty/freetype/ftcolor.h @@ -4,7 +4,7 @@ * * FreeType's glyph color management (specification). * - * Copyright (C) 2018-2019 by + * Copyright (C) 2018-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -19,8 +19,7 @@ #ifndef FTCOLOR_H_ #define FTCOLOR_H_ -#include -#include FT_FREETYPE_H +#include #ifdef FREETYPE_H #error "freetype.h of FreeType 1 has been loaded!" @@ -125,9 +124,9 @@ FT_BEGIN_HEADER * The number of palettes. * * palette_name_ids :: - * A read-only array of palette name IDs with `num_palettes` elements, - * corresponding to entries like 'dark' or 'light' in the font's 'name' - * table. + * An optional read-only array of palette name IDs with `num_palettes` + * elements, corresponding to entries like 'dark' or 'light' in the + * font's 'name' table. * * An empty name ID in the 'CPAL' table gets represented as value * 0xFFFF. @@ -135,8 +134,8 @@ FT_BEGIN_HEADER * `NULL` if the font's 'CPAL' table doesn't contain appropriate data. * * palette_flags :: - * A read-only array of palette flags with `num_palettes` elements. - * Possible values are an ORed combination of + * An optional read-only array of palette flags with `num_palettes` + * elements. Possible values are an ORed combination of * @FT_PALETTE_FOR_LIGHT_BACKGROUND and * @FT_PALETTE_FOR_DARK_BACKGROUND. * @@ -147,7 +146,7 @@ FT_BEGIN_HEADER * same size. * * palette_entry_name_ids :: - * A read-only array of palette entry name IDs with + * An optional read-only array of palette entry name IDs with * `num_palette_entries`. In each palette, entries with the same index * have the same function. For example, index~0 might correspond to * string 'outline' in the font's 'name' table to indicate that this @@ -163,6 +162,9 @@ FT_BEGIN_HEADER * Use function @FT_Get_Sfnt_Name to map name IDs and entry name IDs to * name strings. * + * Use function @FT_Palette_Select to get the colors associated with a + * palette entry. + * * @since: * 2.10 */ @@ -300,6 +302,1360 @@ FT_BEGIN_HEADER FT_Palette_Set_Foreground_Color( FT_Face face, FT_Color foreground_color ); + + /************************************************************************** + * + * @section: + * layer_management + * + * @title: + * Glyph Layer Management + * + * @abstract: + * Retrieving and manipulating OpenType's 'COLR' table data. + * + * @description: + * The functions described here allow access of colored glyph layer data + * in OpenType's 'COLR' tables. + */ + + + /************************************************************************** + * + * @struct: + * FT_LayerIterator + * + * @description: + * This iterator object is needed for @FT_Get_Color_Glyph_Layer. + * + * @fields: + * num_layers :: + * The number of glyph layers for the requested glyph index. Will be + * set by @FT_Get_Color_Glyph_Layer. + * + * layer :: + * The current layer. Will be set by @FT_Get_Color_Glyph_Layer. + * + * p :: + * An opaque pointer into 'COLR' table data. The caller must set this + * to `NULL` before the first call of @FT_Get_Color_Glyph_Layer. + */ + typedef struct FT_LayerIterator_ + { + FT_UInt num_layers; + FT_UInt layer; + FT_Byte* p; + + } FT_LayerIterator; + + + /************************************************************************** + * + * @function: + * FT_Get_Color_Glyph_Layer + * + * @description: + * This is an interface to the 'COLR' table in OpenType fonts to + * iteratively retrieve the colored glyph layers associated with the + * current glyph slot. + * + * https://docs.microsoft.com/en-us/typography/opentype/spec/colr + * + * The glyph layer data for a given glyph index, if present, provides an + * alternative, multi-color glyph representation: Instead of rendering + * the outline or bitmap with the given glyph index, glyphs with the + * indices and colors returned by this function are rendered layer by + * layer. + * + * The returned elements are ordered in the z~direction from bottom to + * top; the 'n'th element should be rendered with the associated palette + * color and blended on top of the already rendered layers (elements 0, + * 1, ..., n-1). + * + * @input: + * face :: + * A handle to the parent face object. + * + * base_glyph :: + * The glyph index the colored glyph layers are associated with. + * + * @inout: + * iterator :: + * An @FT_LayerIterator object. For the first call you should set + * `iterator->p` to `NULL`. For all following calls, simply use the + * same object again. + * + * @output: + * aglyph_index :: + * The glyph index of the current layer. + * + * acolor_index :: + * The color index into the font face's color palette of the current + * layer. The value 0xFFFF is special; it doesn't reference a palette + * entry but indicates that the text foreground color should be used + * instead (to be set up by the application outside of FreeType). + * + * The color palette can be retrieved with @FT_Palette_Select. + * + * @return: + * Value~1 if everything is OK. If there are no more layers (or if there + * are no layers at all), value~0 gets returned. In case of an error, + * value~0 is returned also. + * + * @note: + * This function is necessary if you want to handle glyph layers by + * yourself. In particular, functions that operate with @FT_GlyphRec + * objects (like @FT_Get_Glyph or @FT_Glyph_To_Bitmap) don't have access + * to this information. + * + * Note that @FT_Render_Glyph is able to handle colored glyph layers + * automatically if the @FT_LOAD_COLOR flag is passed to a previous call + * to @FT_Load_Glyph. [This is an experimental feature.] + * + * @example: + * ``` + * FT_Color* palette; + * FT_LayerIterator iterator; + * + * FT_Bool have_layers; + * FT_UInt layer_glyph_index; + * FT_UInt layer_color_index; + * + * + * error = FT_Palette_Select( face, palette_index, &palette ); + * if ( error ) + * palette = NULL; + * + * iterator.p = NULL; + * have_layers = FT_Get_Color_Glyph_Layer( face, + * glyph_index, + * &layer_glyph_index, + * &layer_color_index, + * &iterator ); + * + * if ( palette && have_layers ) + * { + * do + * { + * FT_Color layer_color; + * + * + * if ( layer_color_index == 0xFFFF ) + * layer_color = text_foreground_color; + * else + * layer_color = palette[layer_color_index]; + * + * // Load and render glyph `layer_glyph_index', then + * // blend resulting pixmap (using color `layer_color') + * // with previously created pixmaps. + * + * } while ( FT_Get_Color_Glyph_Layer( face, + * glyph_index, + * &layer_glyph_index, + * &layer_color_index, + * &iterator ) ); + * } + * ``` + * + * @since: + * 2.10 + */ + FT_EXPORT( FT_Bool ) + FT_Get_Color_Glyph_Layer( FT_Face face, + FT_UInt base_glyph, + FT_UInt *aglyph_index, + FT_UInt *acolor_index, + FT_LayerIterator* iterator ); + + + /************************************************************************** + * + * @enum: + * FT_PaintFormat + * + * @description: + * Enumeration describing the different paint format types of the v1 + * extensions to the 'COLR' table, see + * 'https://github.com/googlefonts/colr-gradients-spec'. + * + * The enumeration values loosely correspond with the format numbers of + * the specification: FreeType always returns a fully specified 'Paint' + * structure for the 'Transform', 'Translate', 'Scale', 'Rotate', and + * 'Skew' table types even though the specification has different formats + * depending on whether or not a center is specified, whether the scale + * is uniform in x and y~direction or not, etc. Also, only non-variable + * format identifiers are listed in this enumeration; as soon as support + * for variable 'COLR' v1 fonts is implemented, interpolation is + * performed dependent on axis coordinates, which are configured on the + * @FT_Face through @FT_Set_Var_Design_Coordinates. This implies that + * always static, readily interpolated values are returned in the 'Paint' + * structures. + * + * @since: + * 2.13 + */ + typedef enum FT_PaintFormat_ + { + FT_COLR_PAINTFORMAT_COLR_LAYERS = 1, + FT_COLR_PAINTFORMAT_SOLID = 2, + FT_COLR_PAINTFORMAT_LINEAR_GRADIENT = 4, + FT_COLR_PAINTFORMAT_RADIAL_GRADIENT = 6, + FT_COLR_PAINTFORMAT_SWEEP_GRADIENT = 8, + FT_COLR_PAINTFORMAT_GLYPH = 10, + FT_COLR_PAINTFORMAT_COLR_GLYPH = 11, + FT_COLR_PAINTFORMAT_TRANSFORM = 12, + FT_COLR_PAINTFORMAT_TRANSLATE = 14, + FT_COLR_PAINTFORMAT_SCALE = 16, + FT_COLR_PAINTFORMAT_ROTATE = 24, + FT_COLR_PAINTFORMAT_SKEW = 28, + FT_COLR_PAINTFORMAT_COMPOSITE = 32, + FT_COLR_PAINT_FORMAT_MAX = 33, + FT_COLR_PAINTFORMAT_UNSUPPORTED = 255 + + } FT_PaintFormat; + + + /************************************************************************** + * + * @struct: + * FT_ColorStopIterator + * + * @description: + * This iterator object is needed for @FT_Get_Colorline_Stops. It keeps + * state while iterating over the stops of an @FT_ColorLine, representing + * the `ColorLine` struct of the v1 extensions to 'COLR', see + * 'https://github.com/googlefonts/colr-gradients-spec'. Do not manually + * modify fields of this iterator. + * + * @fields: + * num_color_stops :: + * The number of color stops for the requested glyph index. Set by + * @FT_Get_Paint. + * + * current_color_stop :: + * The current color stop. Set by @FT_Get_Colorline_Stops. + * + * p :: + * An opaque pointer into 'COLR' table data. Set by @FT_Get_Paint. + * Updated by @FT_Get_Colorline_Stops. + * + * read_variable :: + * A boolean keeping track of whether variable color lines are to be + * read. Set by @FT_Get_Paint. + * + * @since: + * 2.13 + */ + typedef struct FT_ColorStopIterator_ + { + FT_UInt num_color_stops; + FT_UInt current_color_stop; + + FT_Byte* p; + + FT_Bool read_variable; + + } FT_ColorStopIterator; + + + /************************************************************************** + * + * @struct: + * FT_ColorIndex + * + * @description: + * A structure representing a `ColorIndex` value of the 'COLR' v1 + * extensions, see 'https://github.com/googlefonts/colr-gradients-spec'. + * + * @fields: + * palette_index :: + * The palette index into a 'CPAL' palette. + * + * alpha :: + * Alpha transparency value multiplied with the value from 'CPAL'. + * + * @since: + * 2.13 + */ + typedef struct FT_ColorIndex_ + { + FT_UInt16 palette_index; + FT_F2Dot14 alpha; + + } FT_ColorIndex; + + + /************************************************************************** + * + * @struct: + * FT_ColorStop + * + * @description: + * A structure representing a `ColorStop` value of the 'COLR' v1 + * extensions, see 'https://github.com/googlefonts/colr-gradients-spec'. + * + * @fields: + * stop_offset :: + * The stop offset along the gradient, expressed as a 16.16 fixed-point + * coordinate. + * + * color :: + * The color information for this stop, see @FT_ColorIndex. + * + * @since: + * 2.13 + */ + typedef struct FT_ColorStop_ + { + FT_Fixed stop_offset; + FT_ColorIndex color; + + } FT_ColorStop; + + + /************************************************************************** + * + * @enum: + * FT_PaintExtend + * + * @description: + * An enumeration representing the 'Extend' mode of the 'COLR' v1 + * extensions, see 'https://github.com/googlefonts/colr-gradients-spec'. + * It describes how the gradient fill continues at the other boundaries. + * + * @since: + * 2.13 + */ + typedef enum FT_PaintExtend_ + { + FT_COLR_PAINT_EXTEND_PAD = 0, + FT_COLR_PAINT_EXTEND_REPEAT = 1, + FT_COLR_PAINT_EXTEND_REFLECT = 2 + + } FT_PaintExtend; + + + /************************************************************************** + * + * @struct: + * FT_ColorLine + * + * @description: + * A structure representing a `ColorLine` value of the 'COLR' v1 + * extensions, see 'https://github.com/googlefonts/colr-gradients-spec'. + * It describes a list of color stops along the defined gradient. + * + * @fields: + * extend :: + * The extend mode at the outer boundaries, see @FT_PaintExtend. + * + * color_stop_iterator :: + * The @FT_ColorStopIterator used to enumerate and retrieve the + * actual @FT_ColorStop's. + * + * @since: + * 2.13 + */ + typedef struct FT_ColorLine_ + { + FT_PaintExtend extend; + FT_ColorStopIterator color_stop_iterator; + + } FT_ColorLine; + + + /************************************************************************** + * + * @struct: + * FT_Affine23 + * + * @description: + * A structure used to store a 2x3 matrix. Coefficients are in + * 16.16 fixed-point format. The computation performed is + * + * ``` + * x' = x*xx + y*xy + dx + * y' = x*yx + y*yy + dy + * ``` + * + * @fields: + * xx :: + * Matrix coefficient. + * + * xy :: + * Matrix coefficient. + * + * dx :: + * x translation. + * + * yx :: + * Matrix coefficient. + * + * yy :: + * Matrix coefficient. + * + * dy :: + * y translation. + * + * @since: + * 2.13 + */ + typedef struct FT_Affine_23_ + { + FT_Fixed xx, xy, dx; + FT_Fixed yx, yy, dy; + + } FT_Affine23; + + + /************************************************************************** + * + * @enum: + * FT_Composite_Mode + * + * @description: + * An enumeration listing the 'COLR' v1 composite modes used in + * @FT_PaintComposite. For more details on each paint mode, see + * 'https://www.w3.org/TR/compositing-1/#porterduffcompositingoperators'. + * + * @since: + * 2.13 + */ + typedef enum FT_Composite_Mode_ + { + FT_COLR_COMPOSITE_CLEAR = 0, + FT_COLR_COMPOSITE_SRC = 1, + FT_COLR_COMPOSITE_DEST = 2, + FT_COLR_COMPOSITE_SRC_OVER = 3, + FT_COLR_COMPOSITE_DEST_OVER = 4, + FT_COLR_COMPOSITE_SRC_IN = 5, + FT_COLR_COMPOSITE_DEST_IN = 6, + FT_COLR_COMPOSITE_SRC_OUT = 7, + FT_COLR_COMPOSITE_DEST_OUT = 8, + FT_COLR_COMPOSITE_SRC_ATOP = 9, + FT_COLR_COMPOSITE_DEST_ATOP = 10, + FT_COLR_COMPOSITE_XOR = 11, + FT_COLR_COMPOSITE_PLUS = 12, + FT_COLR_COMPOSITE_SCREEN = 13, + FT_COLR_COMPOSITE_OVERLAY = 14, + FT_COLR_COMPOSITE_DARKEN = 15, + FT_COLR_COMPOSITE_LIGHTEN = 16, + FT_COLR_COMPOSITE_COLOR_DODGE = 17, + FT_COLR_COMPOSITE_COLOR_BURN = 18, + FT_COLR_COMPOSITE_HARD_LIGHT = 19, + FT_COLR_COMPOSITE_SOFT_LIGHT = 20, + FT_COLR_COMPOSITE_DIFFERENCE = 21, + FT_COLR_COMPOSITE_EXCLUSION = 22, + FT_COLR_COMPOSITE_MULTIPLY = 23, + FT_COLR_COMPOSITE_HSL_HUE = 24, + FT_COLR_COMPOSITE_HSL_SATURATION = 25, + FT_COLR_COMPOSITE_HSL_COLOR = 26, + FT_COLR_COMPOSITE_HSL_LUMINOSITY = 27, + FT_COLR_COMPOSITE_MAX = 28 + + } FT_Composite_Mode; + + + /************************************************************************** + * + * @struct: + * FT_OpaquePaint + * + * @description: + * A structure representing an offset to a `Paint` value stored in any + * of the paint tables of a 'COLR' v1 font. Compare Offset<24> there. + * When 'COLR' v1 paint tables represented by FreeType objects such as + * @FT_PaintColrLayers, @FT_PaintComposite, or @FT_PaintTransform + * reference downstream nested paint tables, we do not immediately + * retrieve them but encapsulate their location in this type. Use + * @FT_Get_Paint to retrieve the actual @FT_COLR_Paint object that + * describes the details of the respective paint table. + * + * @fields: + * p :: + * An internal offset to a Paint table, needs to be set to NULL before + * passing this struct as an argument to @FT_Get_Paint. + * + * insert_root_transform :: + * An internal boolean to track whether an initial root transform is + * to be provided. Do not set this value. + * + * @since: + * 2.13 + */ + typedef struct FT_Opaque_Paint_ + { + FT_Byte* p; + FT_Bool insert_root_transform; + } FT_OpaquePaint; + + + /************************************************************************** + * + * @struct: + * FT_PaintColrLayers + * + * @description: + * A structure representing a `PaintColrLayers` table of a 'COLR' v1 + * font. This table describes a set of layers that are to be composited + * with composite mode `FT_COLR_COMPOSITE_SRC_OVER`. The return value + * of this function is an @FT_LayerIterator initialized so that it can + * be used with @FT_Get_Paint_Layers to retrieve the @FT_OpaquePaint + * objects as references to each layer. + * + * @fields: + * layer_iterator :: + * The layer iterator that describes the layers of this paint. + * + * @since: + * 2.13 + */ + typedef struct FT_PaintColrLayers_ + { + FT_LayerIterator layer_iterator; + + } FT_PaintColrLayers; + + + /************************************************************************** + * + * @struct: + * FT_PaintSolid + * + * @description: + * A structure representing a `PaintSolid` value of the 'COLR' v1 + * extensions, see 'https://github.com/googlefonts/colr-gradients-spec'. + * Using a `PaintSolid` value means that the glyph layer filled with + * this paint is solid-colored and does not contain a gradient. + * + * @fields: + * color :: + * The color information for this solid paint, see @FT_ColorIndex. + * + * @since: + * 2.13 + */ + typedef struct FT_PaintSolid_ + { + FT_ColorIndex color; + + } FT_PaintSolid; + + + /************************************************************************** + * + * @struct: + * FT_PaintLinearGradient + * + * @description: + * A structure representing a `PaintLinearGradient` value of the 'COLR' + * v1 extensions, see + * 'https://github.com/googlefonts/colr-gradients-spec'. The glyph + * layer filled with this paint is drawn filled with a linear gradient. + * + * @fields: + * colorline :: + * The @FT_ColorLine information for this paint, i.e., the list of + * color stops along the gradient. + * + * p0 :: + * The starting point of the gradient definition in font units + * represented as a 16.16 fixed-point `FT_Vector`. + * + * p1 :: + * The end point of the gradient definition in font units + * represented as a 16.16 fixed-point `FT_Vector`. + * + * p2 :: + * Optional point~p2 to rotate the gradient in font units + * represented as a 16.16 fixed-point `FT_Vector`. + * Otherwise equal to~p0. + * + * @since: + * 2.13 + */ + typedef struct FT_PaintLinearGradient_ + { + FT_ColorLine colorline; + + /* TODO: Potentially expose those as x0, y0 etc. */ + FT_Vector p0; + FT_Vector p1; + FT_Vector p2; + + } FT_PaintLinearGradient; + + + /************************************************************************** + * + * @struct: + * FT_PaintRadialGradient + * + * @description: + * A structure representing a `PaintRadialGradient` value of the 'COLR' + * v1 extensions, see + * 'https://github.com/googlefonts/colr-gradients-spec'. The glyph + * layer filled with this paint is drawn filled with a radial gradient. + * + * @fields: + * colorline :: + * The @FT_ColorLine information for this paint, i.e., the list of + * color stops along the gradient. + * + * c0 :: + * The center of the starting point of the radial gradient in font + * units represented as a 16.16 fixed-point `FT_Vector`. + * + * r0 :: + * The radius of the starting circle of the radial gradient in font + * units represented as a 16.16 fixed-point value. + * + * c1 :: + * The center of the end point of the radial gradient in font units + * represented as a 16.16 fixed-point `FT_Vector`. + * + * r1 :: + * The radius of the end circle of the radial gradient in font + * units represented as a 16.16 fixed-point value. + * + * @since: + * 2.13 + */ + typedef struct FT_PaintRadialGradient_ + { + FT_ColorLine colorline; + + FT_Vector c0; + FT_Pos r0; + FT_Vector c1; + FT_Pos r1; + + } FT_PaintRadialGradient; + + + /************************************************************************** + * + * @struct: + * FT_PaintSweepGradient + * + * @description: + * A structure representing a `PaintSweepGradient` value of the 'COLR' + * v1 extensions, see + * 'https://github.com/googlefonts/colr-gradients-spec'. The glyph + * layer filled with this paint is drawn filled with a sweep gradient + * from `start_angle` to `end_angle`. + * + * @fields: + * colorline :: + * The @FT_ColorLine information for this paint, i.e., the list of + * color stops along the gradient. + * + * center :: + * The center of the sweep gradient in font units represented as a + * vector of 16.16 fixed-point values. + * + * start_angle :: + * The start angle of the sweep gradient in 16.16 fixed-point + * format specifying degrees divided by 180.0 (as in the + * spec). Multiply by 180.0f to receive degrees value. Values are + * given counter-clockwise, starting from the (positive) y~axis. + * + * end_angle :: + * The end angle of the sweep gradient in 16.16 fixed-point + * format specifying degrees divided by 180.0 (as in the + * spec). Multiply by 180.0f to receive degrees value. Values are + * given counter-clockwise, starting from the (positive) y~axis. + * + * @since: + * 2.13 + */ + typedef struct FT_PaintSweepGradient_ + { + FT_ColorLine colorline; + + FT_Vector center; + FT_Fixed start_angle; + FT_Fixed end_angle; + + } FT_PaintSweepGradient; + + + /************************************************************************** + * + * @struct: + * FT_PaintGlyph + * + * @description: + * A structure representing a 'COLR' v1 `PaintGlyph` paint table. + * + * @fields: + * paint :: + * An opaque paint object pointing to a `Paint` table that serves as + * the fill for the glyph ID. + * + * glyphID :: + * The glyph ID from the 'glyf' table, which serves as the contour + * information that is filled with paint. + * + * @since: + * 2.13 + */ + typedef struct FT_PaintGlyph_ + { + FT_OpaquePaint paint; + FT_UInt glyphID; + + } FT_PaintGlyph; + + + /************************************************************************** + * + * @struct: + * FT_PaintColrGlyph + * + * @description: + * A structure representing a 'COLR' v1 `PaintColorGlyph` paint table. + * + * @fields: + * glyphID :: + * The glyph ID from the `BaseGlyphV1List` table that is drawn for + * this paint. + * + * @since: + * 2.13 + */ + typedef struct FT_PaintColrGlyph_ + { + FT_UInt glyphID; + + } FT_PaintColrGlyph; + + + /************************************************************************** + * + * @struct: + * FT_PaintTransform + * + * @description: + * A structure representing a 'COLR' v1 `PaintTransform` paint table. + * + * @fields: + * paint :: + * An opaque paint that is subject to being transformed. + * + * affine :: + * A 2x3 transformation matrix in @FT_Affine23 format containing + * 16.16 fixed-point values. + * + * @since: + * 2.13 + */ + typedef struct FT_PaintTransform_ + { + FT_OpaquePaint paint; + FT_Affine23 affine; + + } FT_PaintTransform; + + + /************************************************************************** + * + * @struct: + * FT_PaintTranslate + * + * @description: + * A structure representing a 'COLR' v1 `PaintTranslate` paint table. + * Used for translating downstream paints by a given x and y~delta. + * + * @fields: + * paint :: + * An @FT_OpaquePaint object referencing the paint that is to be + * rotated. + * + * dx :: + * Translation in x~direction in font units represented as a + * 16.16 fixed-point value. + * + * dy :: + * Translation in y~direction in font units represented as a + * 16.16 fixed-point value. + * + * @since: + * 2.13 + */ + typedef struct FT_PaintTranslate_ + { + FT_OpaquePaint paint; + + FT_Fixed dx; + FT_Fixed dy; + + } FT_PaintTranslate; + + + /************************************************************************** + * + * @struct: + * FT_PaintScale + * + * @description: + * A structure representing all of the 'COLR' v1 'PaintScale*' paint + * tables. Used for scaling downstream paints by a given x and y~scale, + * with a given center. This structure is used for all 'PaintScale*' + * types that are part of specification; fields of this structure are + * filled accordingly. If there is a center, the center values are set, + * otherwise they are set to the zero coordinate. If the source font + * file has 'PaintScaleUniform*' set, the scale values are set + * accordingly to the same value. + * + * @fields: + * paint :: + * An @FT_OpaquePaint object referencing the paint that is to be + * scaled. + * + * scale_x :: + * Scale factor in x~direction represented as a + * 16.16 fixed-point value. + * + * scale_y :: + * Scale factor in y~direction represented as a + * 16.16 fixed-point value. + * + * center_x :: + * x~coordinate of center point to scale from represented as a + * 16.16 fixed-point value. + * + * center_y :: + * y~coordinate of center point to scale from represented as a + * 16.16 fixed-point value. + * + * @since: + * 2.13 + */ + typedef struct FT_PaintScale_ + { + FT_OpaquePaint paint; + + FT_Fixed scale_x; + FT_Fixed scale_y; + + FT_Fixed center_x; + FT_Fixed center_y; + + } FT_PaintScale; + + + /************************************************************************** + * + * @struct: + * FT_PaintRotate + * + * @description: + * A structure representing a 'COLR' v1 `PaintRotate` paint table. Used + * for rotating downstream paints with a given center and angle. + * + * @fields: + * paint :: + * An @FT_OpaquePaint object referencing the paint that is to be + * rotated. + * + * angle :: + * The rotation angle that is to be applied in degrees divided by + * 180.0 (as in the spec) represented as a 16.16 fixed-point + * value. Multiply by 180.0f to receive degrees value. + * + * center_x :: + * The x~coordinate of the pivot point of the rotation in font + * units represented as a 16.16 fixed-point value. + * + * center_y :: + * The y~coordinate of the pivot point of the rotation in font + * units represented as a 16.16 fixed-point value. + * + * @since: + * 2.13 + */ + + typedef struct FT_PaintRotate_ + { + FT_OpaquePaint paint; + + FT_Fixed angle; + + FT_Fixed center_x; + FT_Fixed center_y; + + } FT_PaintRotate; + + + /************************************************************************** + * + * @struct: + * FT_PaintSkew + * + * @description: + * A structure representing a 'COLR' v1 `PaintSkew` paint table. Used + * for skewing or shearing downstream paints by a given center and + * angle. + * + * @fields: + * paint :: + * An @FT_OpaquePaint object referencing the paint that is to be + * skewed. + * + * x_skew_angle :: + * The skewing angle in x~direction in degrees divided by 180.0 + * (as in the spec) represented as a 16.16 fixed-point + * value. Multiply by 180.0f to receive degrees. + * + * y_skew_angle :: + * The skewing angle in y~direction in degrees divided by 180.0 + * (as in the spec) represented as a 16.16 fixed-point + * value. Multiply by 180.0f to receive degrees. + * + * center_x :: + * The x~coordinate of the pivot point of the skew in font units + * represented as a 16.16 fixed-point value. + * + * center_y :: + * The y~coordinate of the pivot point of the skew in font units + * represented as a 16.16 fixed-point value. + * + * @since: + * 2.13 + */ + typedef struct FT_PaintSkew_ + { + FT_OpaquePaint paint; + + FT_Fixed x_skew_angle; + FT_Fixed y_skew_angle; + + FT_Fixed center_x; + FT_Fixed center_y; + + } FT_PaintSkew; + + + /************************************************************************** + * + * @struct: + * FT_PaintComposite + * + * @description: + * A structure representing a 'COLR' v1 `PaintComposite` paint table. + * Used for compositing two paints in a 'COLR' v1 directed acyclic graph. + * + * @fields: + * source_paint :: + * An @FT_OpaquePaint object referencing the source that is to be + * composited. + * + * composite_mode :: + * An @FT_Composite_Mode enum value determining the composition + * operation. + * + * backdrop_paint :: + * An @FT_OpaquePaint object referencing the backdrop paint that + * `source_paint` is composited onto. + * + * @since: + * 2.13 + */ + typedef struct FT_PaintComposite_ + { + FT_OpaquePaint source_paint; + FT_Composite_Mode composite_mode; + FT_OpaquePaint backdrop_paint; + + } FT_PaintComposite; + + + /************************************************************************** + * + * @union: + * FT_COLR_Paint + * + * @description: + * A union object representing format and details of a paint table of a + * 'COLR' v1 font, see + * 'https://github.com/googlefonts/colr-gradients-spec'. Use + * @FT_Get_Paint to retrieve a @FT_COLR_Paint for an @FT_OpaquePaint + * object. + * + * @fields: + * format :: + * The gradient format for this Paint structure. + * + * u :: + * Union of all paint table types: + * + * * @FT_PaintColrLayers + * * @FT_PaintGlyph + * * @FT_PaintSolid + * * @FT_PaintLinearGradient + * * @FT_PaintRadialGradient + * * @FT_PaintSweepGradient + * * @FT_PaintTransform + * * @FT_PaintTranslate + * * @FT_PaintRotate + * * @FT_PaintSkew + * * @FT_PaintComposite + * * @FT_PaintColrGlyph + * + * @since: + * 2.13 + */ + typedef struct FT_COLR_Paint_ + { + FT_PaintFormat format; + + union + { + FT_PaintColrLayers colr_layers; + FT_PaintGlyph glyph; + FT_PaintSolid solid; + FT_PaintLinearGradient linear_gradient; + FT_PaintRadialGradient radial_gradient; + FT_PaintSweepGradient sweep_gradient; + FT_PaintTransform transform; + FT_PaintTranslate translate; + FT_PaintScale scale; + FT_PaintRotate rotate; + FT_PaintSkew skew; + FT_PaintComposite composite; + FT_PaintColrGlyph colr_glyph; + + } u; + + } FT_COLR_Paint; + + + /************************************************************************** + * + * @enum: + * FT_Color_Root_Transform + * + * @description: + * An enumeration to specify whether @FT_Get_Color_Glyph_Paint is to + * return a root transform to configure the client's graphics context + * matrix. + * + * @values: + * FT_COLOR_INCLUDE_ROOT_TRANSFORM :: + * Do include the root transform as the initial @FT_COLR_Paint object. + * + * FT_COLOR_NO_ROOT_TRANSFORM :: + * Do not output an initial root transform. + * + * @since: + * 2.13 + */ + typedef enum FT_Color_Root_Transform_ + { + FT_COLOR_INCLUDE_ROOT_TRANSFORM, + FT_COLOR_NO_ROOT_TRANSFORM, + + FT_COLOR_ROOT_TRANSFORM_MAX + + } FT_Color_Root_Transform; + + + /************************************************************************** + * + * @struct: + * FT_ClipBox + * + * @description: + * A structure representing a 'COLR' v1 'ClipBox' table. 'COLR' v1 + * glyphs may optionally define a clip box for aiding allocation or + * defining a maximum drawable region. Use @FT_Get_Color_Glyph_ClipBox + * to retrieve it. + * + * @fields: + * bottom_left :: + * The bottom left corner of the clip box as an @FT_Vector with + * fixed-point coordinates in 26.6 format. + * + * top_left :: + * The top left corner of the clip box as an @FT_Vector with + * fixed-point coordinates in 26.6 format. + * + * top_right :: + * The top right corner of the clip box as an @FT_Vector with + * fixed-point coordinates in 26.6 format. + * + * bottom_right :: + * The bottom right corner of the clip box as an @FT_Vector with + * fixed-point coordinates in 26.6 format. + * + * @since: + * 2.13 + */ + typedef struct FT_ClipBox_ + { + FT_Vector bottom_left; + FT_Vector top_left; + FT_Vector top_right; + FT_Vector bottom_right; + + } FT_ClipBox; + + + /************************************************************************** + * + * @function: + * FT_Get_Color_Glyph_Paint + * + * @description: + * This is the starting point and interface to color gradient + * information in a 'COLR' v1 table in OpenType fonts to recursively + * retrieve the paint tables for the directed acyclic graph of a colored + * glyph, given a glyph ID. + * + * https://github.com/googlefonts/colr-gradients-spec + * + * In a 'COLR' v1 font, each color glyph defines a directed acyclic + * graph of nested paint tables, such as `PaintGlyph`, `PaintSolid`, + * `PaintLinearGradient`, `PaintRadialGradient`, and so on. Using this + * function and specifying a glyph ID, one retrieves the root paint + * table for this glyph ID. + * + * This function allows control whether an initial root transform is + * returned to configure scaling, transform, and translation correctly + * on the client's graphics context. The initial root transform is + * computed and returned according to the values configured for @FT_Size + * and @FT_Set_Transform on the @FT_Face object, see below for details + * of the `root_transform` parameter. This has implications for a + * client 'COLR' v1 implementation: When this function returns an + * initially computed root transform, at the time of executing the + * @FT_PaintGlyph operation, the contours should be retrieved using + * @FT_Load_Glyph at unscaled, untransformed size. This is because the + * root transform applied to the graphics context will take care of + * correct scaling. + * + * Alternatively, to allow hinting of contours, at the time of executing + * @FT_Load_Glyph, the current graphics context transformation matrix + * can be decomposed into a scaling matrix and a remainder, and + * @FT_Load_Glyph can be used to retrieve the contours at scaled size. + * Care must then be taken to blit or clip to the graphics context with + * taking this remainder transformation into account. + * + * @input: + * face :: + * A handle to the parent face object. + * + * base_glyph :: + * The glyph index for which to retrieve the root paint table. + * + * root_transform :: + * Specifies whether an initially computed root is returned by the + * @FT_PaintTransform operation to account for the activated size + * (see @FT_Activate_Size) and the configured transform and translate + * (see @FT_Set_Transform). + * + * This root transform is returned before nodes of the glyph graph of + * the font are returned. Subsequent @FT_COLR_Paint structures + * contain unscaled and untransformed values. The inserted root + * transform enables the client application to apply an initial + * transform to its graphics context. When executing subsequent + * FT_COLR_Paint operations, values from @FT_COLR_Paint operations + * will ultimately be correctly scaled because of the root transform + * applied to the graphics context. Use + * @FT_COLOR_INCLUDE_ROOT_TRANSFORM to include the root transform, use + * @FT_COLOR_NO_ROOT_TRANSFORM to not include it. The latter may be + * useful when traversing the 'COLR' v1 glyph graph and reaching a + * @FT_PaintColrGlyph. When recursing into @FT_PaintColrGlyph and + * painting that inline, no additional root transform is needed as it + * has already been applied to the graphics context at the beginning + * of drawing this glyph. + * + * @output: + * paint :: + * The @FT_OpaquePaint object that references the actual paint table. + * + * The respective actual @FT_COLR_Paint object is retrieved via + * @FT_Get_Paint. + * + * @return: + * Value~1 if everything is OK. If no color glyph is found, or the root + * paint could not be retrieved, value~0 gets returned. In case of an + * error, value~0 is returned also. + * + * @since: + * 2.13 + */ + FT_EXPORT( FT_Bool ) + FT_Get_Color_Glyph_Paint( FT_Face face, + FT_UInt base_glyph, + FT_Color_Root_Transform root_transform, + FT_OpaquePaint* paint ); + + + /************************************************************************** + * + * @function: + * FT_Get_Color_Glyph_ClipBox + * + * @description: + * Search for a 'COLR' v1 clip box for the specified `base_glyph` and + * fill the `clip_box` parameter with the 'COLR' v1 'ClipBox' information + * if one is found. + * + * @input: + * face :: + * A handle to the parent face object. + * + * base_glyph :: + * The glyph index for which to retrieve the clip box. + * + * @output: + * clip_box :: + * The clip box for the requested `base_glyph` if one is found. The + * clip box is computed taking scale and transformations configured on + * the @FT_Face into account. @FT_ClipBox contains @FT_Vector values + * in 26.6 format. + * + * @return: + * Value~1 if a clip box is found. If no clip box is found or an error + * occured, value~0 is returned. + * + * @note: + * To retrieve the clip box in font units, reset scale to units-per-em + * and remove transforms configured using @FT_Set_Transform. + * + * @since: + * 2.13 + */ + FT_EXPORT( FT_Bool ) + FT_Get_Color_Glyph_ClipBox( FT_Face face, + FT_UInt base_glyph, + FT_ClipBox* clip_box ); + + + /************************************************************************** + * + * @function: + * FT_Get_Paint_Layers + * + * @description: + * Access the layers of a `PaintColrLayers` table. + * + * If the root paint of a color glyph, or a nested paint of a 'COLR' + * glyph is a `PaintColrLayers` table, this function retrieves the + * layers of the `PaintColrLayers` table. + * + * The @FT_PaintColrLayers object contains an @FT_LayerIterator, which + * is used here to iterate over the layers. Each layer is returned as + * an @FT_OpaquePaint object, which then can be used with @FT_Get_Paint + * to retrieve the actual paint object. + * + * @input: + * face :: + * A handle to the parent face object. + * + * @inout: + * iterator :: + * The @FT_LayerIterator from an @FT_PaintColrLayers object, for which + * the layers are to be retrieved. The internal state of the iterator + * is incremented after one call to this function for retrieving one + * layer. + * + * @output: + * paint :: + * The @FT_OpaquePaint object that references the actual paint table. + * The respective actual @FT_COLR_Paint object is retrieved via + * @FT_Get_Paint. + * + * @return: + * Value~1 if everything is OK. Value~0 gets returned when the paint + * object can not be retrieved or any other error occurs. + * + * @since: + * 2.13 + */ + FT_EXPORT( FT_Bool ) + FT_Get_Paint_Layers( FT_Face face, + FT_LayerIterator* iterator, + FT_OpaquePaint* paint ); + + + /************************************************************************** + * + * @function: + * FT_Get_Colorline_Stops + * + * @description: + * This is an interface to color gradient information in a 'COLR' v1 + * table in OpenType fonts to iteratively retrieve the gradient and + * solid fill information for colored glyph layers for a specified glyph + * ID. + * + * https://github.com/googlefonts/colr-gradients-spec + * + * @input: + * face :: + * A handle to the parent face object. + * + * @inout: + * iterator :: + * The retrieved @FT_ColorStopIterator, configured on an @FT_ColorLine, + * which in turn got retrieved via paint information in + * @FT_PaintLinearGradient or @FT_PaintRadialGradient. + * + * @output: + * color_stop :: + * Color index and alpha value for the retrieved color stop. + * + * @return: + * Value~1 if everything is OK. If there are no more color stops, + * value~0 gets returned. In case of an error, value~0 is returned + * also. + * + * @since: + * 2.13 + */ + FT_EXPORT( FT_Bool ) + FT_Get_Colorline_Stops( FT_Face face, + FT_ColorStop* color_stop, + FT_ColorStopIterator* iterator ); + + + /************************************************************************** + * + * @function: + * FT_Get_Paint + * + * @description: + * Access the details of a paint using an @FT_OpaquePaint opaque paint + * object, which internally stores the offset to the respective `Paint` + * object in the 'COLR' table. + * + * @input: + * face :: + * A handle to the parent face object. + * + * opaque_paint :: + * The opaque paint object for which the underlying @FT_COLR_Paint + * data is to be retrieved. + * + * @output: + * paint :: + * The specific @FT_COLR_Paint object containing information coming + * from one of the font's `Paint*` tables. + * + * @return: + * Value~1 if everything is OK. Value~0 if no details can be found for + * this paint or any other error occured. + * + * @since: + * 2.13 + */ + FT_EXPORT( FT_Bool ) + FT_Get_Paint( FT_Face face, + FT_OpaquePaint opaque_paint, + FT_COLR_Paint* paint ); + /* */ diff --git a/Source/ThirdParty/freetype/ftdriver.h b/Source/ThirdParty/freetype/ftdriver.h index 497bde9f6..7af7465bc 100644 --- a/Source/ThirdParty/freetype/ftdriver.h +++ b/Source/ThirdParty/freetype/ftdriver.h @@ -4,7 +4,7 @@ * * FreeType API for controlling driver modules (specification only). * - * Copyright (C) 2017-2019 by + * Copyright (C) 2017-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -19,9 +19,8 @@ #ifndef FTDRIVER_H_ #define FTDRIVER_H_ -#include -#include FT_FREETYPE_H -#include FT_PARAMETER_TAGS_H +#include +#include #ifdef FREETYPE_H #error "freetype.h of FreeType 1 has been loaded!" @@ -54,10 +53,10 @@ FT_BEGIN_HEADER * reasons. * * Available properties are @increase-x-height, @no-stem-darkening - * (experimental), @darkening-parameters (experimental), @warping - * (experimental), @glyph-to-script-map (experimental), @fallback-script - * (experimental), and @default-script (experimental), as documented in - * the @properties section. + * (experimental), @darkening-parameters (experimental), + * @glyph-to-script-map (experimental), @fallback-script (experimental), + * and @default-script (experimental), as documented in the @properties + * section. * */ @@ -85,15 +84,15 @@ FT_BEGIN_HEADER * @properties section. * * - * **Hinting and antialiasing principles of the new engine** + * **Hinting and anti-aliasing principles of the new engine** * * The rasterizer is positioning horizontal features (e.g., ascender * height & x-height, or crossbars) on the pixel grid and minimizing the - * amount of antialiasing applied to them, while placing vertical + * amount of anti-aliasing applied to them, while placing vertical * features (vertical stems) on the pixel grid without hinting, thus * representing the stem position and weight accurately. Sometimes the * vertical stems may be only partially black. In this context, - * 'antialiasing' means that stems are not positioned exactly on pixel + * 'anti-aliasing' means that stems are not positioned exactly on pixel * borders, causing a fuzzy appearance. * * There are two principles behind this approach. @@ -109,7 +108,7 @@ FT_BEGIN_HEADER * sizes are comparable to kerning values and thus would be noticeable * (and distracting) while reading if hinting were applied. * - * One of the reasons to not hint horizontally is antialiasing for LCD + * One of the reasons to not hint horizontally is anti-aliasing for LCD * screens: The pixel geometry of modern displays supplies three vertical * subpixels as the eye moves horizontally across each visible pixel. On * devices where we can be certain this characteristic is present a @@ -117,7 +116,7 @@ FT_BEGIN_HEADER * weight. In Western writing systems this turns out to be the more * critical direction anyway; the weights and spacing of vertical stems * (see above) are central to Armenian, Cyrillic, Greek, and Latin type - * designs. Even when the rasterizer uses greyscale antialiasing instead + * designs. Even when the rasterizer uses greyscale anti-aliasing instead * of color (a necessary compromise when one doesn't know the screen * characteristics), the unhinted vertical features preserve the design's * weight and spacing much better than aliased type would. @@ -135,7 +134,7 @@ FT_BEGIN_HEADER * each being rounded to the nearest pixel edge, taking care of overshoot * suppression at small sizes, stem darkening, and scaling. * - * Hstems (this is, hint values defined in the font to help align + * Hstems (that is, hint values defined in the font to help align * horizontal features) that fall within a blue zone are said to be * 'captured' and are aligned to that zone. Uncaptured stems are moved * in one of four ways, top edge up or down, bottom edge up or down. @@ -213,16 +212,14 @@ FT_BEGIN_HEADER * @description: * While FreeType's TrueType driver doesn't expose API functions by * itself, it is possible to control its behaviour with @FT_Property_Set - * and @FT_Property_Get. The following lists the available properties - * together with the necessary macros and structures. + * and @FT_Property_Get. * - * The TrueType driver's module name is 'truetype'. + * The TrueType driver's module name is 'truetype'; a single property + * @interpreter-version is available, as documented in the @properties + * section. * - * A single property @interpreter-version is available, as documented in - * the @properties section. - * - * We start with a list of definitions, kindly provided by Greg - * Hitchcock. + * To help understand the differences between interpreter versions, we + * introduce a list of definitions, kindly provided by Greg Hitchcock. * * _Bi-Level Rendering_ * @@ -301,6 +298,31 @@ FT_BEGIN_HEADER */ + /************************************************************************** + * + * @section: + * ot_svg_driver + * + * @title: + * The SVG driver + * + * @abstract: + * Controlling the external rendering of OT-SVG glyphs. + * + * @description: + * By default, FreeType can only load the 'SVG~' table of OpenType fonts + * if configuration macro `FT_CONFIG_OPTION_SVG` is defined. To make it + * render SVG glyphs, an external SVG rendering library is needed. All + * details on the interface between FreeType and the external library + * via function hooks can be found in section @svg_fonts. + * + * The OT-SVG driver's module name is 'ot-svg'; it supports a single + * property called @svg-hooks, documented below in the @properties + * section. + * + */ + + /************************************************************************** * * @section: @@ -363,12 +385,8 @@ FT_BEGIN_HEADER * The same holds for the Type~1 and CID modules if compiled with * `T1_CONFIG_OPTION_OLD_ENGINE`. * - * For the 'cff' module, the default engine is 'freetype' if - * `CFF_CONFIG_OPTION_OLD_ENGINE` is defined, and 'adobe' otherwise. - * - * For both the 'type1' and 't1cid' modules, the default engine is - * 'freetype' if `T1_CONFIG_OPTION_OLD_ENGINE` is defined, and 'adobe' - * otherwise. + * For the 'cff' module, the default engine is 'adobe'. For both the + * 'type1' and 't1cid' modules, the default engine is 'adobe', too. * * @note: * This property can be used with @FT_Property_Get also. @@ -427,12 +445,8 @@ FT_BEGIN_HEADER * counteracts the 'thinning out' of glyphs, making text remain readable * at smaller sizes. * - * By default, the Adobe engines for CFF, Type~1, and CID fonts darken - * stems at smaller sizes, regardless of hinting, to enhance contrast. - * Setting this property, stem darkening gets switched off. - * * For the auto-hinter, stem-darkening is experimental currently and thus - * switched off by default (this is, `no-stem-darkening` is set to TRUE + * switched off by default (that is, `no-stem-darkening` is set to TRUE * by default). Total consistency with the CFF driver is not achieved * right now because the emboldening method differs and glyphs must be * scaled down on the Y-axis to keep outline points inside their @@ -637,11 +651,8 @@ FT_BEGIN_HEADER * Windows~98; only grayscale and B/W rasterizing is supported. * * TT_INTERPRETER_VERSION_38 :: - * Version~38 corresponds to MS rasterizer v.1.9; it is roughly - * equivalent to the hinting provided by DirectWrite ClearType (as can - * be found, for example, in the Internet Explorer~9 running on - * Windows~7). It is used in FreeType to select the 'Infinality' - * subpixel hinting code. The code may be removed in a future version. + * Version~38 is the same Version~40. The original 'Infinality' code is + * no longer available. * * TT_INTERPRETER_VERSION_40 :: * Version~40 corresponds to MS rasterizer v.2.1; it is roughly @@ -806,6 +817,39 @@ FT_BEGIN_HEADER * 2.5 */ + /************************************************************************** + * + * @property: + * svg-hooks + * + * @description: + * Set up the interface between FreeType and an extern SVG rendering + * library like 'librsvg'. All details on the function hooks can be + * found in section @svg_fonts. + * + * @example: + * The following example code expects that the four hook functions + * `svg_*` are defined elsewhere. Error handling is omitted, too. + * + * ``` + * FT_Library library; + * SVG_RendererHooks hooks = { + * (SVG_Lib_Init_Func)svg_init, + * (SVG_Lib_Free_Func)svg_free, + * (SVG_Lib_Render_Func)svg_render, + * (SVG_Lib_Preset_Slot_Func)svg_preset_slot }; + * + * + * FT_Init_FreeType( &library ); + * + * FT_Property_Set( library, "ot-svg", + * "svg-hooks", &hooks ); + * ``` + * + * @since: + * 2.12 + */ + /************************************************************************** * @@ -1171,48 +1215,18 @@ FT_BEGIN_HEADER * warping * * @description: - * **Experimental only** + * **Obsolete** * - * If FreeType gets compiled with option `AF_CONFIG_OPTION_USE_WARPER` to - * activate the warp hinting code in the auto-hinter, this property - * switches warping on and off. + * This property was always experimental and probably never worked + * correctly. It was entirely removed from the FreeType~2 sources. This + * entry is only here for historical reference. * - * Warping only works in 'normal' auto-hinting mode replacing it. The - * idea of the code is to slightly scale and shift a glyph along the + * Warping only worked in 'normal' auto-hinting mode replacing it. The + * idea of the code was to slightly scale and shift a glyph along the * non-hinted dimension (which is usually the horizontal axis) so that as - * much of its segments are aligned (more or less) to the grid. To find + * much of its segments were aligned (more or less) to the grid. To find * out a glyph's optimal scaling and shifting value, various parameter - * combinations are tried and scored. - * - * By default, warping is off. - * - * @note: - * This property can be used with @FT_Property_Get also. - * - * This property can be set via the `FREETYPE_PROPERTIES` environment - * variable (using values 1 and 0 for 'on' and 'off', respectively). - * - * The warping code can also change advance widths. Have a look at the - * `lsb_delta` and `rsb_delta` fields in the @FT_GlyphSlotRec structure - * for details on improving inter-glyph distances while rendering. - * - * Since warping is a global property of the auto-hinter it is best to - * change its value before rendering any face. Otherwise, you should - * reload all faces that get auto-hinted in 'normal' hinting mode. - * - * @example: - * This example shows how to switch on warping (omitting the error - * handling). - * - * ``` - * FT_Library library; - * FT_Bool warping = 1; - * - * - * FT_Init_FreeType( &library ); - * - * FT_Property_Set( library, "autofitter", "warping", &warping ); - * ``` + * combinations were tried and scored. * * @since: * 2.6 diff --git a/Source/ThirdParty/freetype/fterrdef.h b/Source/ThirdParty/freetype/fterrdef.h index 9bc7dc65e..d59b3cc2d 100644 --- a/Source/ThirdParty/freetype/fterrdef.h +++ b/Source/ThirdParty/freetype/fterrdef.h @@ -4,7 +4,7 @@ * * FreeType error codes (specification). * - * Copyright (C) 2002-2019 by + * Copyright (C) 2002-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -101,6 +101,8 @@ "too many hints" ) FT_ERRORDEF_( Invalid_Pixel_Size, 0x17, "invalid pixel size" ) + FT_ERRORDEF_( Invalid_SVG_Document, 0x18, + "invalid SVG document" ) /* handle errors */ @@ -234,6 +236,8 @@ "found FDEF or IDEF opcode in glyf bytecode" ) FT_ERRORDEF_( Missing_Bitmap, 0x9D, "missing bitmap in strike" ) + FT_ERRORDEF_( Missing_SVG_Hooks, 0x9E, + "SVG hooks have not been set" ) /* CFF, CID, and Type 1 errors */ diff --git a/Source/ThirdParty/freetype/fterrors.h b/Source/ThirdParty/freetype/fterrors.h index 58f5a3ead..15ef3f76b 100644 --- a/Source/ThirdParty/freetype/fterrors.h +++ b/Source/ThirdParty/freetype/fterrors.h @@ -4,7 +4,7 @@ * * FreeType error code handling (specification). * - * Copyright (C) 1996-2019 by + * Copyright (C) 1996-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -29,7 +29,7 @@ * * @description: * The header file `fterrors.h` (which is automatically included by - * `freetype.h` defines the handling of FreeType's enumeration + * `freetype.h`) defines the handling of FreeType's enumeration * constants. It can also be used to generate error message strings * with a small macro trick explained below. * @@ -89,7 +89,7 @@ * const char* err_msg; * } ft_errors[] = * - * #include FT_ERRORS_H + * #include * ``` * * An alternative to using an array is a switch statement. @@ -124,7 +124,7 @@ /* include module base error codes */ -#include FT_MODULE_ERRORS_H +#include /*******************************************************************/ @@ -197,7 +197,7 @@ /* now include the error codes */ -#include FT_ERROR_DEFINITIONS_H +#include #ifdef FT_ERROR_END_LIST @@ -232,11 +232,16 @@ #undef FT_ERR_PREFIX #endif - /* FT_INCLUDE_ERR_PROTOS: Control if function prototypes should be */ - /* included with `#include FT_ERRORS_H'. This is */ - /* only true where `FT_ERRORDEF` is undefined. */ - /* FT_ERR_PROTOS_DEFINED: Actual multiple-inclusion protection of */ - /* `fterrors.h`. */ + /* FT_INCLUDE_ERR_PROTOS: Control whether function prototypes should be */ + /* included with */ + /* */ + /* #include */ + /* */ + /* This is only true where `FT_ERRORDEF` is */ + /* undefined. */ + /* */ + /* FT_ERR_PROTOS_DEFINED: Actual multiple-inclusion protection of */ + /* `fterrors.h`. */ #ifdef FT_INCLUDE_ERR_PROTOS #undef FT_INCLUDE_ERR_PROTOS @@ -244,6 +249,8 @@ #define FT_ERR_PROTOS_DEFINED +FT_BEGIN_HEADER + /************************************************************************** * * @function: @@ -274,6 +281,10 @@ FT_EXPORT( const char* ) FT_Error_String( FT_Error error_code ); + /* */ + +FT_END_HEADER + #endif /* FT_ERR_PROTOS_DEFINED */ diff --git a/Source/ThirdParty/freetype/ftfntfmt.h b/Source/ThirdParty/freetype/ftfntfmt.h index aae0b1326..c0018fc83 100644 --- a/Source/ThirdParty/freetype/ftfntfmt.h +++ b/Source/ThirdParty/freetype/ftfntfmt.h @@ -4,7 +4,7 @@ * * Support functions for font formats. * - * Copyright (C) 2002-2019 by + * Copyright (C) 2002-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -19,8 +19,7 @@ #ifndef FTFNTFMT_H_ #define FTFNTFMT_H_ -#include -#include FT_FREETYPE_H +#include #ifdef FREETYPE_H #error "freetype.h of FreeType 1 has been loaded!" diff --git a/Source/ThirdParty/freetype/ftgasp.h b/Source/ThirdParty/freetype/ftgasp.h index 24673d8ce..d5f19add8 100644 --- a/Source/ThirdParty/freetype/ftgasp.h +++ b/Source/ThirdParty/freetype/ftgasp.h @@ -4,7 +4,7 @@ * * Access of TrueType's 'gasp' table (specification). * - * Copyright (C) 2007-2019 by + * Copyright (C) 2007-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -19,8 +19,7 @@ #ifndef FTGASP_H_ #define FTGASP_H_ -#include -#include FT_FREETYPE_H +#include #ifdef FREETYPE_H #error "freetype.h of FreeType 1 has been loaded!" diff --git a/Source/ThirdParty/freetype/ftglyph.h b/Source/ThirdParty/freetype/ftglyph.h index 4067c2e62..4658895f7 100644 --- a/Source/ThirdParty/freetype/ftglyph.h +++ b/Source/ThirdParty/freetype/ftglyph.h @@ -4,7 +4,7 @@ * * FreeType convenience functions to handle glyphs (specification). * - * Copyright (C) 1996-2019 by + * Copyright (C) 1996-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -33,8 +33,7 @@ #define FTGLYPH_H_ -#include -#include FT_FREETYPE_H +#include #ifdef FREETYPE_H #error "freetype.h of FreeType 1 has been loaded!" @@ -127,7 +126,7 @@ FT_BEGIN_HEADER * * @description: * A handle to an object used to model a bitmap glyph image. This is a - * sub-class of @FT_Glyph, and a pointer to @FT_BitmapGlyphRec. + * 'sub-class' of @FT_Glyph, and a pointer to @FT_BitmapGlyphRec. */ typedef struct FT_BitmapGlyphRec_* FT_BitmapGlyph; @@ -143,7 +142,7 @@ FT_BEGIN_HEADER * * @fields: * root :: - * The root @FT_Glyph fields. + * The root fields of @FT_Glyph. * * left :: * The left-side bearing, i.e., the horizontal distance from the @@ -182,7 +181,7 @@ FT_BEGIN_HEADER * * @description: * A handle to an object used to model an outline glyph image. This is a - * sub-class of @FT_Glyph, and a pointer to @FT_OutlineGlyphRec. + * 'sub-class' of @FT_Glyph, and a pointer to @FT_OutlineGlyphRec. */ typedef struct FT_OutlineGlyphRec_* FT_OutlineGlyph; @@ -210,7 +209,7 @@ FT_BEGIN_HEADER * * As the outline is extracted from a glyph slot, its coordinates are * expressed normally in 26.6 pixels, unless the flag @FT_LOAD_NO_SCALE - * was used in @FT_Load_Glyph() or @FT_Load_Char(). + * was used in @FT_Load_Glyph or @FT_Load_Char. * * The outline's tables are always owned by the object and are destroyed * with it. @@ -223,6 +222,92 @@ FT_BEGIN_HEADER } FT_OutlineGlyphRec; + /************************************************************************** + * + * @type: + * FT_SvgGlyph + * + * @description: + * A handle to an object used to model an SVG glyph. This is a + * 'sub-class' of @FT_Glyph, and a pointer to @FT_SvgGlyphRec. + * + * @since: + * 2.12 + */ + typedef struct FT_SvgGlyphRec_* FT_SvgGlyph; + + + /************************************************************************** + * + * @struct: + * FT_SvgGlyphRec + * + * @description: + * A structure used for OT-SVG glyphs. This is a 'sub-class' of + * @FT_GlyphRec. + * + * @fields: + * root :: + * The root @FT_GlyphRec fields. + * + * svg_document :: + * A pointer to the SVG document. + * + * svg_document_length :: + * The length of `svg_document`. + * + * glyph_index :: + * The index of the glyph to be rendered. + * + * metrics :: + * A metrics object storing the size information. + * + * units_per_EM :: + * The size of the EM square. + * + * start_glyph_id :: + * The first glyph ID in the glyph range covered by this document. + * + * end_glyph_id :: + * The last glyph ID in the glyph range covered by this document. + * + * transform :: + * A 2x2 transformation matrix to apply to the glyph while rendering + * it. + * + * delta :: + * Translation to apply to the glyph while rendering. + * + * @note: + * The Glyph Management API requires @FT_Glyph or its 'sub-class' to have + * all the information needed to completely define the glyph's rendering. + * Outline-based glyphs can directly apply transformations to the outline + * but this is not possible for an SVG document that hasn't been parsed. + * Therefore, the transformation is stored along with the document. In + * the absence of a 'ViewBox' or 'Width'/'Height' attribute, the size of + * the ViewPort should be assumed to be 'units_per_EM'. + */ + typedef struct FT_SvgGlyphRec_ + { + FT_GlyphRec root; + + FT_Byte* svg_document; + FT_ULong svg_document_length; + + FT_UInt glyph_index; + + FT_Size_Metrics metrics; + FT_UShort units_per_EM; + + FT_UShort start_glyph_id; + FT_UShort end_glyph_id; + + FT_Matrix transform; + FT_Vector delta; + + } FT_SvgGlyphRec; + + /************************************************************************** * * @function: @@ -270,7 +355,7 @@ FT_BEGIN_HEADER * * @output: * aglyph :: - * A handle to the glyph object. + * A handle to the glyph object. `NULL` in case of error. * * @return: * FreeType error code. 0~means success. @@ -300,7 +385,7 @@ FT_BEGIN_HEADER * * @output: * target :: - * A handle to the target glyph object. 0~in case of error. + * A handle to the target glyph object. `NULL` in case of error. * * @return: * FreeType error code. 0~means success. @@ -328,7 +413,7 @@ FT_BEGIN_HEADER * * delta :: * A pointer to a 2d vector to apply. Coordinates are expressed in - * 1/64th of a pixel. + * 1/64 of a pixel. * * @return: * FreeType error code (if not 0, the glyph format is not scalable). @@ -338,9 +423,9 @@ FT_BEGIN_HEADER * vector. */ FT_EXPORT( FT_Error ) - FT_Glyph_Transform( FT_Glyph glyph, - FT_Matrix* matrix, - FT_Vector* delta ); + FT_Glyph_Transform( FT_Glyph glyph, + const FT_Matrix* matrix, + const FT_Vector* delta ); /************************************************************************** @@ -415,7 +500,7 @@ FT_BEGIN_HEADER * @output: * acbox :: * The glyph coordinate bounding box. Coordinates are expressed in - * 1/64th of pixels if it is grid-fitted. + * 1/64 of pixels if it is grid-fitted. * * @note: * Coordinates are relative to the glyph origin, using the y~upwards @@ -499,9 +584,9 @@ FT_BEGIN_HEADER * The glyph image is translated with the `origin` vector before * rendering. * - * The first parameter is a pointer to an @FT_Glyph handle, that will be + * The first parameter is a pointer to an @FT_Glyph handle that will be * _replaced_ by this function (with newly allocated data). Typically, - * you would use (omitting error handling): + * you would do something like the following (omitting error handling). * * ``` * FT_Glyph glyph; @@ -518,7 +603,7 @@ FT_BEGIN_HEADER * if ( glyph->format != FT_GLYPH_FORMAT_BITMAP ) * { * error = FT_Glyph_To_Bitmap( &glyph, FT_RENDER_MODE_NORMAL, - * 0, 1 ); + * 0, 1 ); * if ( error ) // `glyph' unchanged * ... * } @@ -533,7 +618,7 @@ FT_BEGIN_HEADER * FT_Done_Glyph( glyph ); * ``` * - * Here is another example, again without error handling: + * Here is another example, again without error handling. * * ``` * FT_Glyph glyphs[MAX_GLYPHS] @@ -570,10 +655,10 @@ FT_BEGIN_HEADER * ``` */ FT_EXPORT( FT_Error ) - FT_Glyph_To_Bitmap( FT_Glyph* the_glyph, - FT_Render_Mode render_mode, - FT_Vector* origin, - FT_Bool destroy ); + FT_Glyph_To_Bitmap( FT_Glyph* the_glyph, + FT_Render_Mode render_mode, + const FT_Vector* origin, + FT_Bool destroy ); /************************************************************************** @@ -586,7 +671,7 @@ FT_BEGIN_HEADER * * @input: * glyph :: - * A handle to the target glyph object. + * A handle to the target glyph object. Can be `NULL`. */ FT_EXPORT( void ) FT_Done_Glyph( FT_Glyph glyph ); diff --git a/Source/ThirdParty/freetype/ftgxval.h b/Source/ThirdParty/freetype/ftgxval.h index b14f637c5..e8de9a6ed 100644 --- a/Source/ThirdParty/freetype/ftgxval.h +++ b/Source/ThirdParty/freetype/ftgxval.h @@ -4,7 +4,7 @@ * * FreeType API for validating TrueTypeGX/AAT tables (specification). * - * Copyright (C) 2004-2019 by + * Copyright (C) 2004-2023 by * Masatake YAMATO, Redhat K.K, * David Turner, Robert Wilhelm, and Werner Lemberg. * @@ -28,8 +28,7 @@ #ifndef FTGXVAL_H_ #define FTGXVAL_H_ -#include -#include FT_FREETYPE_H +#include #ifdef FREETYPE_H #error "freetype.h of FreeType 1 has been loaded!" diff --git a/Source/ThirdParty/freetype/ftgzip.h b/Source/ThirdParty/freetype/ftgzip.h index 418c61228..443ec29db 100644 --- a/Source/ThirdParty/freetype/ftgzip.h +++ b/Source/ThirdParty/freetype/ftgzip.h @@ -4,7 +4,7 @@ * * Gzip-compressed stream support. * - * Copyright (C) 2002-2019 by + * Copyright (C) 2002-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -19,8 +19,7 @@ #ifndef FTGZIP_H_ #define FTGZIP_H_ -#include -#include FT_FREETYPE_H +#include #ifdef FREETYPE_H #error "freetype.h of FreeType 1 has been loaded!" @@ -43,6 +42,16 @@ FT_BEGIN_HEADER * Using gzip-compressed font files. * * @description: + * In certain builds of the library, gzip compression recognition is + * automatically handled when calling @FT_New_Face or @FT_Open_Face. + * This means that if no font driver is capable of handling the raw + * compressed file, the library will try to open a gzipped stream from it + * and re-open the face with it. + * + * The stream implementation is very basic and resets the decompression + * process each time seeking backwards is needed within the stream, + * which significantly undermines the performance. + * * This section contains the declaration of Gzip-specific functions. * */ @@ -75,15 +84,6 @@ FT_BEGIN_HEADER * **not** call `FT_Stream_Close` on the source stream. None of the * stream objects will be released to the heap. * - * The stream implementation is very basic and resets the decompression - * process each time seeking backwards is needed within the stream. - * - * In certain builds of the library, gzip compression recognition is - * automatically handled when calling @FT_New_Face or @FT_Open_Face. - * This means that if no font driver is capable of handling the raw - * compressed file, the library will try to open a gzipped stream from it - * and re-open the face with it. - * * This function may return `FT_Err_Unimplemented_Feature` if your build * of FreeType was not compiled with zlib support. */ diff --git a/Source/ThirdParty/freetype/ftimage.h b/Source/ThirdParty/freetype/ftimage.h index d640b0b0a..6baa81256 100644 --- a/Source/ThirdParty/freetype/ftimage.h +++ b/Source/ThirdParty/freetype/ftimage.h @@ -5,7 +5,7 @@ * FreeType glyph image formats and default raster interface * (specification). * - * Copyright (C) 1996-2019 by + * Copyright (C) 1996-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -19,7 +19,7 @@ /************************************************************************** * * Note: A 'raster' is simply a scan-line converter, used to render - * FT_Outlines into FT_Bitmaps. + * `FT_Outline`s into `FT_Bitmap`s. * */ @@ -28,12 +28,6 @@ #define FTIMAGE_H_ - /* STANDALONE_ is from ftgrays.c */ -#ifndef STANDALONE_ -#include -#endif - - FT_BEGIN_HEADER @@ -202,6 +196,11 @@ FT_BEGIN_HEADER #define ft_pixel_mode_pal2 FT_PIXEL_MODE_GRAY2 #define ft_pixel_mode_pal4 FT_PIXEL_MODE_GRAY4 + /* */ + + /* For debugging, the @FT_Pixel_Mode enumeration must stay in sync */ + /* with the `pixel_modes` array in file `ftobjs.c`. */ + /************************************************************************** * @@ -257,6 +256,12 @@ FT_BEGIN_HEADER * palette :: * A typeless pointer to the bitmap palette; this field is intended for * paletted pixel modes. Not used currently. + * + * @note: + * `width` and `rows` refer to the *physical* size of the bitmap, not the + * *logical* one. For example, if @FT_Pixel_Mode is set to + * `FT_PIXEL_MODE_LCD`, the logical width is a just a third of the + * physical one. */ typedef struct FT_Bitmap_ { @@ -401,6 +406,13 @@ FT_BEGIN_HEADER * if @FT_OUTLINE_IGNORE_DROPOUTS is set. See below for more * information. * + * FT_OUTLINE_OVERLAP :: + * [Since 2.10.3] This flag indicates that this outline contains + * overlapping contours and the anti-aliased renderer should perform + * oversampling to mitigate possible artifacts. This flag should _not_ + * be set for well designed glyphs without overlaps because it quadruples + * the rendering time. + * * FT_OUTLINE_HIGH_PRECISION :: * This flag indicates that the scan-line converter should try to * convert this outline to bitmaps with the highest possible quality. @@ -432,6 +444,7 @@ FT_BEGIN_HEADER #define FT_OUTLINE_IGNORE_DROPOUTS 0x8 #define FT_OUTLINE_SMART_DROPOUTS 0x10 #define FT_OUTLINE_INCLUDE_STUBS 0x20 +#define FT_OUTLINE_OVERLAP 0x40 #define FT_OUTLINE_HIGH_PRECISION 0x100 #define FT_OUTLINE_SINGLE_PASS 0x200 @@ -688,11 +701,13 @@ FT_BEGIN_HEADER * to get a simple enumeration without assigning special numbers. */ #ifndef FT_IMAGE_TAG -#define FT_IMAGE_TAG( value, _x1, _x2, _x3, _x4 ) \ - value = ( ( (unsigned long)_x1 << 24 ) | \ - ( (unsigned long)_x2 << 16 ) | \ - ( (unsigned long)_x3 << 8 ) | \ - (unsigned long)_x4 ) + +#define FT_IMAGE_TAG( value, _x1, _x2, _x3, _x4 ) \ + value = ( ( FT_STATIC_BYTE_CAST( unsigned long, _x1 ) << 24 ) | \ + ( FT_STATIC_BYTE_CAST( unsigned long, _x2 ) << 16 ) | \ + ( FT_STATIC_BYTE_CAST( unsigned long, _x3 ) << 8 ) | \ + FT_STATIC_BYTE_CAST( unsigned long, _x4 ) ) + #endif /* FT_IMAGE_TAG */ @@ -732,6 +747,10 @@ FT_BEGIN_HEADER * contours. Some Type~1 fonts, like those in the Hershey family, * contain glyphs in this format. These are described as @FT_Outline, * but FreeType isn't currently capable of rendering them correctly. + * + * FT_GLYPH_FORMAT_SVG :: + * [Since 2.12] The glyph is represented by an SVG document in the + * 'SVG~' table. */ typedef enum FT_Glyph_Format_ { @@ -740,7 +759,8 @@ FT_BEGIN_HEADER FT_IMAGE_TAG( FT_GLYPH_FORMAT_COMPOSITE, 'c', 'o', 'm', 'p' ), FT_IMAGE_TAG( FT_GLYPH_FORMAT_BITMAP, 'b', 'i', 't', 's' ), FT_IMAGE_TAG( FT_GLYPH_FORMAT_OUTLINE, 'o', 'u', 't', 'l' ), - FT_IMAGE_TAG( FT_GLYPH_FORMAT_PLOTTER, 'p', 'l', 'o', 't' ) + FT_IMAGE_TAG( FT_GLYPH_FORMAT_PLOTTER, 'p', 'l', 'o', 't' ), + FT_IMAGE_TAG( FT_GLYPH_FORMAT_SVG, 'S', 'V', 'G', ' ' ) } FT_Glyph_Format; @@ -765,17 +785,6 @@ FT_BEGIN_HEADER /*************************************************************************/ - /************************************************************************** - * - * A raster is a scan converter, in charge of rendering an outline into a - * bitmap. This section contains the public API for rasters. - * - * Note that in FreeType 2, all rasters are now encapsulated within - * specific modules called 'renderers'. See `ftrender.h` for more details - * on renderers. - * - */ - /************************************************************************** * @@ -789,16 +798,35 @@ FT_BEGIN_HEADER * How vectorial outlines are converted into bitmaps and pixmaps. * * @description: - * This section contains technical definitions. + * A raster or a rasterizer is a scan converter in charge of producing a + * pixel coverage bitmap that can be used as an alpha channel when + * compositing a glyph with a background. FreeType comes with two + * rasterizers: bilevel `raster1` and anti-aliased `smooth` are two + * separate modules. They are usually called from the high-level + * @FT_Load_Glyph or @FT_Render_Glyph functions and produce the entire + * coverage bitmap at once, while staying largely invisible to users. + * + * Instead of working with complete coverage bitmaps, it is also possible + * to intercept consecutive pixel runs on the same scanline with the same + * coverage, called _spans_, and process them individually. Only the + * `smooth` rasterizer permits this when calling @FT_Outline_Render with + * @FT_Raster_Params as described below. + * + * Working with either complete bitmaps or spans it is important to think + * of them as colorless coverage objects suitable as alpha channels to + * blend arbitrary colors with a background. For best results, it is + * recommended to use gamma correction, too. + * + * This section also describes the public API needed to set up alternative + * @FT_Renderer modules. * * @order: - * FT_Raster * FT_Span * FT_SpanFunc - * * FT_Raster_Params * FT_RASTER_FLAG_XXX * + * FT_Raster * FT_Raster_NewFunc * FT_Raster_DoneFunc * FT_Raster_ResetFunc @@ -809,26 +837,14 @@ FT_BEGIN_HEADER */ - /************************************************************************** - * - * @type: - * FT_Raster - * - * @description: - * An opaque handle (pointer) to a raster object. Each object can be - * used independently to convert an outline into a bitmap or pixmap. - */ - typedef struct FT_RasterRec_* FT_Raster; - - /************************************************************************** * * @struct: * FT_Span * * @description: - * A structure used to model a single span of gray pixels when rendering - * an anti-aliased bitmap. + * A structure to model a single span of consecutive pixels when + * rendering an anti-aliased bitmap. * * @fields: * x :: @@ -845,8 +861,8 @@ FT_BEGIN_HEADER * This structure is used by the span drawing callback type named * @FT_SpanFunc that takes the y~coordinate of the span as a parameter. * - * The coverage value is always between 0 and 255. If you want less gray - * values, the callback function has to reduce them. + * The anti-aliased rasterizer produces coverage values from 0 to 255, + * that is, from completely transparent to completely opaque. */ typedef struct FT_Span_ { @@ -864,12 +880,12 @@ FT_BEGIN_HEADER * * @description: * A function used as a call-back by the anti-aliased renderer in order - * to let client applications draw themselves the gray pixel spans on - * each scan line. + * to let client applications draw themselves the pixel spans on each + * scan line. * * @input: * y :: - * The scanline's y~coordinate. + * The scanline's upward y~coordinate. * * count :: * The number of spans to draw on this scanline. @@ -881,11 +897,12 @@ FT_BEGIN_HEADER * User-supplied data that is passed to the callback. * * @note: - * This callback allows client applications to directly render the gray - * spans of the anti-aliased bitmap to any kind of surfaces. + * This callback allows client applications to directly render the spans + * of the anti-aliased bitmap to any kind of surfaces. * * This can be used to write anti-aliased outlines directly to a given - * background bitmap, and even perform translucency. + * background bitmap using alpha compositing. It can also be used for + * oversampling and averaging. */ typedef void (*FT_SpanFunc)( int y, @@ -945,24 +962,27 @@ FT_BEGIN_HEADER * This flag is set to indicate direct rendering. In this mode, client * applications must provide their own span callback. This lets them * directly draw or compose over an existing bitmap. If this bit is - * not set, the target pixmap's buffer _must_ be zeroed before - * rendering. + * _not_ set, the target pixmap's buffer _must_ be zeroed before + * rendering and the output will be clipped to its size. * * Direct rendering is only possible with anti-aliased glyphs. * * FT_RASTER_FLAG_CLIP :: * This flag is only used in direct rendering mode. If set, the output * will be clipped to a box specified in the `clip_box` field of the - * @FT_Raster_Params structure. + * @FT_Raster_Params structure. Otherwise, the `clip_box` is + * effectively set to the bounding box and all spans are generated. * - * Note that by default, the glyph bitmap is clipped to the target - * pixmap, except in direct rendering mode where all spans are - * generated if no clipping box is set. + * FT_RASTER_FLAG_SDF :: + * This flag is set to indicate that a signed distance field glyph + * image should be generated. This is only used while rendering with + * the @FT_RENDER_MODE_SDF render mode. */ #define FT_RASTER_FLAG_DEFAULT 0x0 #define FT_RASTER_FLAG_AA 0x1 #define FT_RASTER_FLAG_DIRECT 0x2 #define FT_RASTER_FLAG_CLIP 0x4 +#define FT_RASTER_FLAG_SDF 0x8 /* these constants are deprecated; use the corresponding */ /* `FT_RASTER_FLAG_XXX` values instead */ @@ -978,7 +998,8 @@ FT_BEGIN_HEADER * FT_Raster_Params * * @description: - * A structure to hold the arguments used by a raster's render function. + * A structure to hold the parameters used by a raster's render function, + * passed as an argument to @FT_Outline_Render. * * @fields: * target :: @@ -1006,20 +1027,26 @@ FT_BEGIN_HEADER * User-supplied data that is passed to each drawing callback. * * clip_box :: - * An optional clipping box. It is only used in direct rendering mode. - * Note that coordinates here should be expressed in _integer_ pixels - * (and not in 26.6 fixed-point units). + * An optional span clipping box expressed in _integer_ pixels + * (not in 26.6 fixed-point units). * * @note: - * An anti-aliased glyph bitmap is drawn if the @FT_RASTER_FLAG_AA bit - * flag is set in the `flags` field, otherwise a monochrome bitmap is - * generated. + * The @FT_RASTER_FLAG_AA bit flag must be set in the `flags` to + * generate an anti-aliased glyph bitmap, otherwise a monochrome bitmap + * is generated. The `target` should have appropriate pixel mode and its + * dimensions define the clipping region. * - * If the @FT_RASTER_FLAG_DIRECT bit flag is set in `flags`, the raster - * will call the `gray_spans` callback to draw gray pixel spans. This - * allows direct composition over a pre-existing bitmap through - * user-provided callbacks to perform the span drawing and composition. - * Not supported by the monochrome rasterizer. + * If both @FT_RASTER_FLAG_AA and @FT_RASTER_FLAG_DIRECT bit flags + * are set in `flags`, the raster calls an @FT_SpanFunc callback + * `gray_spans` with `user` data as an argument ignoring `target`. This + * allows direct composition over a pre-existing user surface to perform + * the span drawing and composition. To optionally clip the spans, set + * the @FT_RASTER_FLAG_CLIP flag and `clip_box`. The monochrome raster + * does not support the direct mode. + * + * The gray-level rasterizer always uses 256 gray levels. If you want + * fewer gray levels, you have to use @FT_RASTER_FLAG_DIRECT and reduce + * the levels in the callback function. */ typedef struct FT_Raster_Params_ { @@ -1036,6 +1063,23 @@ FT_BEGIN_HEADER } FT_Raster_Params; + /************************************************************************** + * + * @type: + * FT_Raster + * + * @description: + * An opaque handle (pointer) to a raster object. Each object can be + * used independently to convert an outline into a bitmap or pixmap. + * + * @note: + * In FreeType 2, all rasters are now encapsulated within specific + * @FT_Renderer modules and only used in their context. + * + */ + typedef struct FT_RasterRec_* FT_Raster; + + /************************************************************************** * * @functype: diff --git a/Source/ThirdParty/freetype/ftincrem.h b/Source/ThirdParty/freetype/ftincrem.h index a4db02b58..2d4f5def2 100644 --- a/Source/ThirdParty/freetype/ftincrem.h +++ b/Source/ThirdParty/freetype/ftincrem.h @@ -4,7 +4,7 @@ * * FreeType incremental loading (specification). * - * Copyright (C) 2002-2019 by + * Copyright (C) 2002-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -19,9 +19,8 @@ #ifndef FTINCREM_H_ #define FTINCREM_H_ -#include -#include FT_FREETYPE_H -#include FT_PARAMETER_TAGS_H +#include +#include #ifdef FREETYPE_H #error "freetype.h of FreeType 1 has been loaded!" @@ -214,9 +213,14 @@ FT_BEGIN_HEADER * * @description: * A function used to retrieve the basic metrics of a given glyph index - * before accessing its data. This is necessary because, in certain - * formats like TrueType, the metrics are stored in a different place - * from the glyph images proper. + * before accessing its data. This allows for handling font types such + * as PCL~XL Format~1, Class~2 downloaded TrueType fonts, where the glyph + * metrics (`hmtx` and `vmtx` tables) are permitted to be omitted from + * the font, and the relevant metrics included in the header of the glyph + * outline data. Importantly, this is not intended to allow custom glyph + * metrics (for example, Postscript Metrics dictionaries), because that + * conflicts with the requirements of outline hinting. Such custom + * metrics must be handled separately, by the calling application. * * @input: * incremental :: @@ -236,7 +240,7 @@ FT_BEGIN_HEADER * * @output: * ametrics :: - * The replacement glyph metrics in font units. + * The glyph metrics in font units. * */ typedef FT_Error @@ -265,7 +269,7 @@ FT_BEGIN_HEADER * * get_glyph_metrics :: * The function to get glyph metrics. May be null if the font does not - * provide overriding glyph metrics. + * require it. * */ typedef struct FT_Incremental_FuncsRec_ diff --git a/Source/ThirdParty/freetype/ftlcdfil.h b/Source/ThirdParty/freetype/ftlcdfil.h index 3a19d043b..d3723e16f 100644 --- a/Source/ThirdParty/freetype/ftlcdfil.h +++ b/Source/ThirdParty/freetype/ftlcdfil.h @@ -5,7 +5,7 @@ * FreeType API for color filtering of subpixel bitmap glyphs * (specification). * - * Copyright (C) 2006-2019 by + * Copyright (C) 2006-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -20,9 +20,8 @@ #ifndef FTLCDFIL_H_ #define FTLCDFIL_H_ -#include -#include FT_FREETYPE_H -#include FT_PARAMETER_TAGS_H +#include +#include #ifdef FREETYPE_H #error "freetype.h of FreeType 1 has been loaded!" @@ -45,9 +44,9 @@ FT_BEGIN_HEADER * API to control subpixel rendering. * * @description: - * FreeType provides two alternative subpixel rendering technologies. + * FreeType provides two alternative subpixel rendering technologies. * Should you define `FT_CONFIG_OPTION_SUBPIXEL_RENDERING` in your - * `ftoption.h` file, this enables patented ClearType-style rendering. + * `ftoption.h` file, this enables ClearType-style rendering. * Otherwise, Harmony LCD rendering is enabled. These technologies are * controlled differently and API described below, although always * available, performs its function when appropriate method is enabled @@ -56,13 +55,12 @@ FT_BEGIN_HEADER * ClearType-style LCD rendering exploits the color-striped structure of * LCD pixels, increasing the available resolution in the direction of * the stripe (usually horizontal RGB) by a factor of~3. Using the - * subpixels coverages unfiltered can create severe color fringes + * subpixel coverages unfiltered can create severe color fringes * especially when rendering thin features. Indeed, to produce * black-on-white text, the nearby color subpixels must be dimmed - * equally. - * - * A good 5-tap FIR filter should be applied to subpixel coverages - * regardless of pixel boundaries and should have these properties: + * evenly. Therefore, an equalizing 5-tap FIR filter should be applied + * to subpixel coverages regardless of pixel boundaries and should have + * these properties: * * 1. It should be symmetrical, like {~a, b, c, b, a~}, to avoid * any shifts in appearance. @@ -85,7 +83,7 @@ FT_BEGIN_HEADER * Harmony LCD rendering is suitable to panels with any regular subpixel * structure, not just monitors with 3 color striped subpixels, as long * as the color subpixels have fixed positions relative to the pixel - * center. In this case, each color channel is then rendered separately + * center. In this case, each color channel can be rendered separately * after shifting the outline opposite to the subpixel shift so that the * coverage maps are aligned. This method is immune to color fringes * because the shifts do not change integral coverage. @@ -102,9 +100,9 @@ FT_BEGIN_HEADER * clockwise. Harmony with default LCD geometry is equivalent to * ClearType with light filter. * - * As a result of ClearType filtering or Harmony rendering, the - * dimensions of LCD bitmaps can be either wider or taller than the - * dimensions of the corresponding outline with regard to the pixel grid. + * As a result of ClearType filtering or Harmony shifts, the resulting + * dimensions of LCD bitmaps can be slightly wider or taller than the + * dimensions the original outline with regard to the pixel grid. * For example, for @FT_RENDER_MODE_LCD, the filter adds 2~subpixels to * the left, and 2~subpixels to the right. The bitmap offset values are * adjusted accordingly, so clients shouldn't need to modify their layout @@ -139,11 +137,11 @@ FT_BEGIN_HEADER * * FT_LCD_FILTER_DEFAULT :: * This is a beveled, normalized, and color-balanced five-tap filter - * with weights of [0x08 0x4D 0x56 0x4D 0x08] in 1/256th units. + * with weights of [0x08 0x4D 0x56 0x4D 0x08] in 1/256 units. * * FT_LCD_FILTER_LIGHT :: * this is a boxy, normalized, and color-balanced three-tap filter with - * weights of [0x00 0x55 0x56 0x55 0x00] in 1/256th units. + * weights of [0x00 0x55 0x56 0x55 0x00] in 1/256 units. * * FT_LCD_FILTER_LEGACY :: * FT_LCD_FILTER_LEGACY1 :: @@ -177,7 +175,7 @@ FT_BEGIN_HEADER * FT_Library_SetLcdFilter * * @description: - * This function is used to apply color filtering to LCD decimated + * This function is used to change filter applied to LCD decimated * bitmaps, like the ones used when calling @FT_Render_Glyph with * @FT_RENDER_MODE_LCD or @FT_RENDER_MODE_LCD_V. * @@ -196,15 +194,14 @@ FT_BEGIN_HEADER * FreeType error code. 0~means success. * * @note: - * This feature is always disabled by default. Clients must make an - * explicit call to this function with a `filter` value other than - * @FT_LCD_FILTER_NONE in order to enable it. + * Since 2.10.3 the LCD filtering is enabled with @FT_LCD_FILTER_DEFAULT. + * It is no longer necessary to call this function explicitly except + * to choose a different filter or disable filtering altogether with + * @FT_LCD_FILTER_NONE. * - * Due to **PATENTS** covering subpixel rendering, this function doesn't - * do anything except returning `FT_Err_Unimplemented_Feature` if the - * configuration macro `FT_CONFIG_OPTION_SUBPIXEL_RENDERING` is not - * defined in your build of the library, which should correspond to all - * default builds of FreeType. + * This function does nothing but returns `FT_Err_Unimplemented_Feature` + * if the configuration macro `FT_CONFIG_OPTION_SUBPIXEL_RENDERING` is + * not defined in your build of the library. * * @since: * 2.3.0 @@ -229,17 +226,15 @@ FT_BEGIN_HEADER * * weights :: * A pointer to an array; the function copies the first five bytes and - * uses them to specify the filter weights in 1/256th units. + * uses them to specify the filter weights in 1/256 units. * * @return: * FreeType error code. 0~means success. * * @note: - * Due to **PATENTS** covering subpixel rendering, this function doesn't - * do anything except returning `FT_Err_Unimplemented_Feature` if the - * configuration macro `FT_CONFIG_OPTION_SUBPIXEL_RENDERING` is not - * defined in your build of the library, which should correspond to all - * default builds of FreeType. + * This function does nothing but returns `FT_Err_Unimplemented_Feature` + * if the configuration macro `FT_CONFIG_OPTION_SUBPIXEL_RENDERING` is + * not defined in your build of the library. * * LCD filter weights can also be set per face using @FT_Face_Properties * with @FT_PARAM_TAG_LCD_FILTER_WEIGHTS. diff --git a/Source/ThirdParty/freetype/ftlist.h b/Source/ThirdParty/freetype/ftlist.h index 4782892d1..b55313133 100644 --- a/Source/ThirdParty/freetype/ftlist.h +++ b/Source/ThirdParty/freetype/ftlist.h @@ -4,7 +4,7 @@ * * Generic list support for FreeType (specification). * - * Copyright (C) 1996-2019 by + * Copyright (C) 1996-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -28,8 +28,7 @@ #define FTLIST_H_ -#include -#include FT_FREETYPE_H +#include #ifdef FREETYPE_H #error "freetype.h of FreeType 1 has been loaded!" diff --git a/Source/ThirdParty/freetype/ftlogging.h b/Source/ThirdParty/freetype/ftlogging.h new file mode 100644 index 000000000..53b8b8964 --- /dev/null +++ b/Source/ThirdParty/freetype/ftlogging.h @@ -0,0 +1,184 @@ +/**************************************************************************** + * + * ftlogging.h + * + * Additional debugging APIs. + * + * Copyright (C) 2020-2023 by + * David Turner, Robert Wilhelm, and Werner Lemberg. + * + * This file is part of the FreeType project, and may only be used, + * modified, and distributed under the terms of the FreeType project + * license, LICENSE.TXT. By continuing to use, modify, or distribute + * this file you indicate that you have read the license and + * understand and accept it fully. + * + */ + + +#ifndef FTLOGGING_H_ +#define FTLOGGING_H_ + + +#include +#include FT_CONFIG_CONFIG_H + + +FT_BEGIN_HEADER + + + /************************************************************************** + * + * @section: + * debugging_apis + * + * @title: + * External Debugging APIs + * + * @abstract: + * Public APIs to control the `FT_DEBUG_LOGGING` macro. + * + * @description: + * This section contains the declarations of public functions that + * enables fine control of what the `FT_DEBUG_LOGGING` macro outputs. + * + */ + + + /************************************************************************** + * + * @function: + * FT_Trace_Set_Level + * + * @description: + * Change the levels of tracing components of FreeType at run time. + * + * @input: + * tracing_level :: + * New tracing value. + * + * @example: + * The following call makes FreeType trace everything but the 'memory' + * component. + * + * ``` + * FT_Trace_Set_Level( "any:7 memory:0" ); + * ``` + * + * @note: + * This function does nothing if compilation option `FT_DEBUG_LOGGING` + * isn't set. + * + * @since: + * 2.11 + * + */ + FT_EXPORT( void ) + FT_Trace_Set_Level( const char* tracing_level ); + + + /************************************************************************** + * + * @function: + * FT_Trace_Set_Default_Level + * + * @description: + * Reset tracing value of FreeType's components to the default value + * (i.e., to the value of the `FT2_DEBUG` environment value or to NULL + * if `FT2_DEBUG` is not set). + * + * @note: + * This function does nothing if compilation option `FT_DEBUG_LOGGING` + * isn't set. + * + * @since: + * 2.11 + * + */ + FT_EXPORT( void ) + FT_Trace_Set_Default_Level( void ); + + + /************************************************************************** + * + * @functype: + * FT_Custom_Log_Handler + * + * @description: + * A function typedef that is used to handle the logging of tracing and + * debug messages on a file system. + * + * @input: + * ft_component :: + * The name of `FT_COMPONENT` from which the current debug or error + * message is produced. + * + * fmt :: + * Actual debug or tracing message. + * + * args:: + * Arguments of debug or tracing messages. + * + * @since: + * 2.11 + * + */ + typedef void + (*FT_Custom_Log_Handler)( const char* ft_component, + const char* fmt, + va_list args ); + + + /************************************************************************** + * + * @function: + * FT_Set_Log_Handler + * + * @description: + * A function to set a custom log handler. + * + * @input: + * handler :: + * New logging function. + * + * @note: + * This function does nothing if compilation option `FT_DEBUG_LOGGING` + * isn't set. + * + * @since: + * 2.11 + * + */ + FT_EXPORT( void ) + FT_Set_Log_Handler( FT_Custom_Log_Handler handler ); + + + /************************************************************************** + * + * @function: + * FT_Set_Default_Log_Handler + * + * @description: + * A function to undo the effect of @FT_Set_Log_Handler, resetting the + * log handler to FreeType's built-in version. + * + * @note: + * This function does nothing if compilation option `FT_DEBUG_LOGGING` + * isn't set. + * + * @since: + * 2.11 + * + */ + FT_EXPORT( void ) + FT_Set_Default_Log_Handler( void ); + + /* */ + + +FT_END_HEADER + +#endif /* FTLOGGING_H_ */ + + +/* END */ diff --git a/Source/ThirdParty/freetype/ftlzw.h b/Source/ThirdParty/freetype/ftlzw.h index fd22968f5..adfd17247 100644 --- a/Source/ThirdParty/freetype/ftlzw.h +++ b/Source/ThirdParty/freetype/ftlzw.h @@ -4,7 +4,7 @@ * * LZW-compressed stream support. * - * Copyright (C) 2004-2019 by + * Copyright (C) 2004-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -19,8 +19,7 @@ #ifndef FTLZW_H_ #define FTLZW_H_ -#include -#include FT_FREETYPE_H +#include #ifdef FREETYPE_H #error "freetype.h of FreeType 1 has been loaded!" @@ -43,6 +42,16 @@ FT_BEGIN_HEADER * Using LZW-compressed font files. * * @description: + * In certain builds of the library, LZW compression recognition is + * automatically handled when calling @FT_New_Face or @FT_Open_Face. + * This means that if no font driver is capable of handling the raw + * compressed file, the library will try to open a LZW stream from it and + * re-open the face with it. + * + * The stream implementation is very basic and resets the decompression + * process each time seeking backwards is needed within the stream, + * which significantly undermines the performance. + * * This section contains the declaration of LZW-specific functions. * */ @@ -73,15 +82,6 @@ FT_BEGIN_HEADER * **not** call `FT_Stream_Close` on the source stream. None of the * stream objects will be released to the heap. * - * The stream implementation is very basic and resets the decompression - * process each time seeking backwards is needed within the stream - * - * In certain builds of the library, LZW compression recognition is - * automatically handled when calling @FT_New_Face or @FT_Open_Face. - * This means that if no font driver is capable of handling the raw - * compressed file, the library will try to open a LZW stream from it and - * re-open the face with it. - * * This function may return `FT_Err_Unimplemented_Feature` if your build * of FreeType was not compiled with LZW support. */ diff --git a/Source/ThirdParty/freetype/ftmac.h b/Source/ThirdParty/freetype/ftmac.h index 92b9f3dc0..a91e38f9e 100644 --- a/Source/ThirdParty/freetype/ftmac.h +++ b/Source/ThirdParty/freetype/ftmac.h @@ -4,7 +4,7 @@ * * Additional Mac-specific API. * - * Copyright (C) 1996-2019 by + * Copyright (C) 1996-2023 by * Just van Rossum, David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -29,7 +29,6 @@ #define FTMAC_H_ -#include FT_BEGIN_HEADER diff --git a/Source/ThirdParty/freetype/ftmm.h b/Source/ThirdParty/freetype/ftmm.h index f2e16b640..d145128a9 100644 --- a/Source/ThirdParty/freetype/ftmm.h +++ b/Source/ThirdParty/freetype/ftmm.h @@ -4,7 +4,7 @@ * * FreeType Multiple Master font interface (specification). * - * Copyright (C) 1996-2019 by + * Copyright (C) 1996-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -20,8 +20,7 @@ #define FTMM_H_ -#include -#include FT_TYPE1_TABLES_H +#include FT_BEGIN_HEADER @@ -48,6 +47,9 @@ FT_BEGIN_HEADER * MM fonts, others will work with all three types. They are similar * enough that a consistent interface makes sense. * + * For Adobe MM fonts, macro @FT_IS_SFNT returns false. For GX and + * OpenType variation fonts, it returns true. + * */ @@ -151,7 +153,7 @@ FT_BEGIN_HEADER * @note: * The fields `minimum`, `def`, and `maximum` are 16.16 fractional values * for TrueType GX and OpenType variation fonts. For Adobe MM fonts, the - * values are integers. + * values are whole numbers (i.e., the fractional part is zero). */ typedef struct FT_Var_Axis_ { @@ -396,6 +398,10 @@ FT_BEGIN_HEADER * FreeType error code. 0~means success. * * @note: + * The design coordinates are 16.16 fractional values for TrueType GX and + * OpenType variation fonts. For Adobe MM fonts, the values are supposed + * to be whole numbers (i.e., the fractional part is zero). + * * [Since 2.8.1] To reset all axes to the default values, call the * function with `num_coords` set to zero and `coords` set to `NULL`. * [Since 2.9] 'Default values' means the currently selected named @@ -438,6 +444,11 @@ FT_BEGIN_HEADER * @return: * FreeType error code. 0~means success. * + * @note: + * The design coordinates are 16.16 fractional values for TrueType GX and + * OpenType variation fonts. For Adobe MM fonts, the values are whole + * numbers (i.e., the fractional part is zero). + * * @since: * 2.7.1 */ @@ -469,9 +480,9 @@ FT_BEGIN_HEADER * the number of axes, use default values for the remaining axes. * * coords :: - * The design coordinates array (each element must be between 0 and 1.0 - * for Adobe MM fonts, and between -1.0 and 1.0 for TrueType GX and - * OpenType variation fonts). + * The design coordinates array. Each element is a 16.16 fractional + * value and must be between 0 and 1.0 for Adobe MM fonts, and between + * -1.0 and 1.0 for TrueType GX and OpenType variation fonts. * * @return: * FreeType error code. 0~means success. @@ -516,7 +527,7 @@ FT_BEGIN_HEADER * * @output: * coords :: - * The normalized blend coordinates array. + * The normalized blend coordinates array (as 16.16 fractional values). * * @return: * FreeType error code. 0~means success. @@ -591,10 +602,12 @@ FT_BEGIN_HEADER * * @note: * Adobe Multiple Master fonts limit the number of designs, and thus the - * length of the weight vector to~16. + * length of the weight vector to 16~elements. * - * If `len` is zero and `weightvector` is `NULL`, the weight vector array - * is reset to the default values. + * If `len` is larger than zero, this function sets the + * @FT_FACE_FLAG_VARIATION bit in @FT_Face's `face_flags` field (i.e., + * @FT_IS_VARIATION will return true). If `len` is zero, this bit flag + * is unset and the weight vector array is reset to the default values. * * The Adobe documentation also states that the values in the * WeightVector array must total 1.0 +/-~0.001. In practice this does @@ -742,6 +755,45 @@ FT_BEGIN_HEADER FT_Set_Named_Instance( FT_Face face, FT_UInt instance_index ); + + /************************************************************************** + * + * @function: + * FT_Get_Default_Named_Instance + * + * @description: + * Retrieve the index of the default named instance, to be used with + * @FT_Set_Named_Instance. + * + * The default instance of a variation font is that instance for which + * the nth axis coordinate is equal to `axis[n].def` (as specified in the + * @FT_MM_Var structure), with~n covering all axes. + * + * FreeType synthesizes a named instance for the default instance if the + * font does not contain such an entry. + * + * @input: + * face :: + * A handle to the source face. + * + * @output: + * instance_index :: + * The index of the default named instance. + * + * @return: + * FreeType error code. 0~means success. + * + * @note: + * For Adobe MM fonts (which don't have named instances) this function + * always returns zero for `instance_index`. + * + * @since: + * 2.13.1 + */ + FT_EXPORT( FT_Error ) + FT_Get_Default_Named_Instance( FT_Face face, + FT_UInt *instance_index ); + /* */ diff --git a/Source/ThirdParty/freetype/ftmodapi.h b/Source/ThirdParty/freetype/ftmodapi.h index 88488bfe8..c8f0c2c2a 100644 --- a/Source/ThirdParty/freetype/ftmodapi.h +++ b/Source/ThirdParty/freetype/ftmodapi.h @@ -4,7 +4,7 @@ * * FreeType modules public interface (specification). * - * Copyright (C) 1996-2019 by + * Copyright (C) 1996-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -20,8 +20,7 @@ #define FTMODAPI_H_ -#include -#include FT_FREETYPE_H +#include #ifdef FREETYPE_H #error "freetype.h of FreeType 1 has been loaded!" @@ -46,10 +45,12 @@ FT_BEGIN_HEADER * * @description: * The definitions below are used to manage modules within FreeType. - * Modules can be added, upgraded, and removed at runtime. Additionally, - * some module properties can be controlled also. + * Internal and external modules can be added, upgraded, and removed at + * runtime. For example, an alternative renderer or proprietary font + * driver can be registered and prioritized. Additionally, some module + * properties can also be controlled. * - * Here is a list of possible values of the `module_name` field in the + * Here is a list of existing values of the `module_name` field in the * @FT_Module_Class structure. * * ``` @@ -65,7 +66,7 @@ FT_BEGIN_HEADER * psnames * raster1 * sfnt - * smooth, smooth-lcd, smooth-lcdv + * smooth * truetype * type1 * type42 @@ -87,6 +88,7 @@ FT_BEGIN_HEADER * FT_Remove_Module * FT_Add_Default_Modules * + * FT_FACE_DRIVER_NAME * FT_Property_Set * FT_Property_Get * FT_Set_Default_Properties @@ -329,6 +331,27 @@ FT_BEGIN_HEADER FT_Module module ); + /************************************************************************** + * + * @macro: + * FT_FACE_DRIVER_NAME + * + * @description: + * A macro that retrieves the name of a font driver from a face object. + * + * @note: + * The font driver name is a valid `module_name` for @FT_Property_Set + * and @FT_Property_Get. This is not the same as @FT_Get_Font_Format. + * + * @since: + * 2.11 + * + */ +#define FT_FACE_DRIVER_NAME( face ) \ + ( ( *FT_REINTERPRET_CAST( FT_Module_Class**, \ + ( face )->driver ) )->module_name ) + + /************************************************************************** * * @function: @@ -486,8 +509,7 @@ FT_BEGIN_HEADER * * ``` * FREETYPE_PROPERTIES=truetype:interpreter-version=35 \ - * cff:no-stem-darkening=1 \ - * autofitter:warping=1 + * cff:no-stem-darkening=0 * ``` * * @inout: @@ -623,7 +645,7 @@ FT_BEGIN_HEADER * it is bytecode interpreter's execution context, `TT_ExecContext`, * which is declared in FreeType's internal header file `tttypes.h`. */ - typedef void + typedef FT_Error (*FT_DebugHook_Func)( void* arg ); diff --git a/Source/ThirdParty/freetype/ftmoderr.h b/Source/ThirdParty/freetype/ftmoderr.h index e16993572..c8c892dcc 100644 --- a/Source/ThirdParty/freetype/ftmoderr.h +++ b/Source/ThirdParty/freetype/ftmoderr.h @@ -4,7 +4,7 @@ * * FreeType module error offsets (specification). * - * Copyright (C) 2001-2019 by + * Copyright (C) 2001-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -94,7 +94,7 @@ * const char* mod_err_msg * } ft_mod_errors[] = * - * #include FT_MODULE_ERRORS_H + * #include * ``` * */ @@ -171,6 +171,7 @@ FT_MODERRDEF( Type42, 0x1400, "Type 42 module" ) FT_MODERRDEF( Winfonts, 0x1500, "Windows FON/FNT module" ) FT_MODERRDEF( GXvalid, 0x1600, "GX validation module" ) + FT_MODERRDEF( Sdf, 0x1700, "Signed distance field raster module" ) #ifdef FT_MODERR_END_LIST diff --git a/Source/ThirdParty/freetype/ftotval.h b/Source/ThirdParty/freetype/ftotval.h index c034f4895..011bdfc83 100644 --- a/Source/ThirdParty/freetype/ftotval.h +++ b/Source/ThirdParty/freetype/ftotval.h @@ -4,7 +4,7 @@ * * FreeType API for validating OpenType tables (specification). * - * Copyright (C) 2004-2019 by + * Copyright (C) 2004-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -30,8 +30,7 @@ #ifndef FTOTVAL_H_ #define FTOTVAL_H_ -#include -#include FT_FREETYPE_H +#include #ifdef FREETYPE_H #error "freetype.h of FreeType 1 has been loaded!" diff --git a/Source/ThirdParty/freetype/ftoutln.h b/Source/ThirdParty/freetype/ftoutln.h index 75c3d0159..f9329ca40 100644 --- a/Source/ThirdParty/freetype/ftoutln.h +++ b/Source/ThirdParty/freetype/ftoutln.h @@ -5,7 +5,7 @@ * Support for the FT_Outline type used to store glyph shapes of * most scalable font formats (specification). * - * Copyright (C) 1996-2019 by + * Copyright (C) 1996-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -21,8 +21,7 @@ #define FTOUTLN_H_ -#include -#include FT_FREETYPE_H +#include #ifdef FREETYPE_H #error "freetype.h of FreeType 1 has been loaded!" @@ -110,14 +109,16 @@ FT_BEGIN_HEADER * FreeType error code. 0~means success. * * @note: - * A contour that contains a single point only is represented by a 'move - * to' operation followed by 'line to' to the same point. In most cases, - * it is best to filter this out before using the outline for stroking - * purposes (otherwise it would result in a visible dot when round caps - * are used). + * Degenerate contours, segments, and Bezier arcs may be reported. In + * most cases, it is best to filter these out before using the outline + * for stroking or other path modification purposes (which may cause + * degenerate segments to become non-degenrate and visible, like when + * stroke caps are used or the path is otherwise outset). Some glyph + * outlines may contain deliberate degenerate single points for mark + * attachement. * * Similarly, the function returns success for an empty outline also - * (doing nothing, this is, not calling any emitter); if necessary, you + * (doing nothing, that is, not calling any emitter); if necessary, you * should filter this out, too. */ FT_EXPORT( FT_Error ) @@ -466,8 +467,6 @@ FT_BEGIN_HEADER * * @description: * Render an outline within a bitmap using the current scan-convert. - * This function uses an @FT_Raster_Params structure as an argument, - * allowing advanced features like direct composition, translucency, etc. * * @input: * library :: @@ -485,17 +484,13 @@ FT_BEGIN_HEADER * FreeType error code. 0~means success. * * @note: - * You should know what you are doing and how @FT_Raster_Params works to - * use this function. - * + * This advanced function uses @FT_Raster_Params as an argument. * The field `params.source` will be set to `outline` before the scan * converter is called, which means that the value you give to it is - * actually ignored. - * - * The gray-level rasterizer always uses 256 gray levels. If you want - * less gray levels, you have to provide your own span callback. See the - * @FT_RASTER_FLAG_DIRECT value of the `flags` field in the - * @FT_Raster_Params structure for more details. + * actually ignored. Either `params.target` must point to preallocated + * bitmap, or @FT_RASTER_FLAG_DIRECT must be set in `params.flags` + * allowing FreeType rasterizer to be used for direct composition, + * translucency, etc. See @FT_Raster_Params for more details. */ FT_EXPORT( FT_Error ) FT_Outline_Render( FT_Library library, diff --git a/Source/ThirdParty/freetype/ftparams.h b/Source/ThirdParty/freetype/ftparams.h index c374ee2f2..6a9f243bc 100644 --- a/Source/ThirdParty/freetype/ftparams.h +++ b/Source/ThirdParty/freetype/ftparams.h @@ -4,7 +4,7 @@ * * FreeType API for possible FT_Parameter tags (specification only). * - * Copyright (C) 2017-2019 by + * Copyright (C) 2017-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -19,8 +19,7 @@ #ifndef FTPARAMS_H_ #define FTPARAMS_H_ -#include -#include FT_FREETYPE_H +#include #ifdef FREETYPE_H #error "freetype.h of FreeType 1 has been loaded!" @@ -113,6 +112,21 @@ FT_BEGIN_HEADER FT_MAKE_TAG( 'i', 'n', 'c', 'r' ) + /************************************************************************** + * + * @enum: + * FT_PARAM_TAG_IGNORE_SBIX + * + * @description: + * A tag for @FT_Parameter to make @FT_Open_Face ignore an 'sbix' table + * while loading a font. Use this if @FT_FACE_FLAG_SBIX is set and you + * want to access the outline glyphs in the font. + * + */ +#define FT_PARAM_TAG_IGNORE_SBIX \ + FT_MAKE_TAG( 'i', 's', 'b', 'x' ) + + /************************************************************************** * * @enum: diff --git a/Source/ThirdParty/freetype/ftpfr.h b/Source/ThirdParty/freetype/ftpfr.h index b4eca76eb..7111d40a0 100644 --- a/Source/ThirdParty/freetype/ftpfr.h +++ b/Source/ThirdParty/freetype/ftpfr.h @@ -4,7 +4,7 @@ * * FreeType API for accessing PFR-specific data (specification only). * - * Copyright (C) 2002-2019 by + * Copyright (C) 2002-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -19,8 +19,7 @@ #ifndef FTPFR_H_ #define FTPFR_H_ -#include -#include FT_FREETYPE_H +#include #ifdef FREETYPE_H #error "freetype.h of FreeType 1 has been loaded!" @@ -162,7 +161,7 @@ FT_BEGIN_HEADER * * @note: * You can use the `x_scale` or `y_scale` results of @FT_Get_PFR_Metrics - * to convert the advance to device subpixels (i.e., 1/64th of pixels). + * to convert the advance to device subpixels (i.e., 1/64 of pixels). */ FT_EXPORT( FT_Error ) FT_Get_PFR_Advance( FT_Face face, diff --git a/Source/ThirdParty/freetype/ftrender.h b/Source/ThirdParty/freetype/ftrender.h index a01c77427..0b6fad32e 100644 --- a/Source/ThirdParty/freetype/ftrender.h +++ b/Source/ThirdParty/freetype/ftrender.h @@ -4,7 +4,7 @@ * * FreeType renderer modules public interface (specification). * - * Copyright (C) 1996-2019 by + * Copyright (C) 1996-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -20,9 +20,8 @@ #define FTRENDER_H_ -#include -#include FT_MODULE_H -#include FT_GLYPH_H +#include +#include FT_BEGIN_HEADER @@ -159,7 +158,7 @@ FT_BEGIN_HEADER FT_Renderer_GetCBoxFunc get_glyph_cbox; FT_Renderer_SetModeFunc set_mode; - FT_Raster_Funcs* raster_class; + const FT_Raster_Funcs* raster_class; } FT_Renderer_Class; diff --git a/Source/ThirdParty/freetype/ftsizes.h b/Source/ThirdParty/freetype/ftsizes.h index 6c63cef2b..7bfb1aed4 100644 --- a/Source/ThirdParty/freetype/ftsizes.h +++ b/Source/ThirdParty/freetype/ftsizes.h @@ -4,7 +4,7 @@ * * FreeType size objects management (specification). * - * Copyright (C) 1996-2019 by + * Copyright (C) 1996-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -29,8 +29,7 @@ #define FTSIZES_H_ -#include -#include FT_FREETYPE_H +#include #ifdef FREETYPE_H #error "freetype.h of FreeType 1 has been loaded!" diff --git a/Source/ThirdParty/freetype/ftsnames.h b/Source/ThirdParty/freetype/ftsnames.h index 4d43602a4..9d5d22bb2 100644 --- a/Source/ThirdParty/freetype/ftsnames.h +++ b/Source/ThirdParty/freetype/ftsnames.h @@ -7,7 +7,7 @@ * * This is _not_ used to retrieve glyph names! * - * Copyright (C) 1996-2019 by + * Copyright (C) 1996-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -23,9 +23,8 @@ #define FTSNAMES_H_ -#include -#include FT_FREETYPE_H -#include FT_PARAMETER_TAGS_H +#include +#include #ifdef FREETYPE_H #error "freetype.h of FreeType 1 has been loaded!" diff --git a/Source/ThirdParty/freetype/ftstroke.h b/Source/ThirdParty/freetype/ftstroke.h index 01a9c1811..b3d90802a 100644 --- a/Source/ThirdParty/freetype/ftstroke.h +++ b/Source/ThirdParty/freetype/ftstroke.h @@ -4,7 +4,7 @@ * * FreeType path stroker (specification). * - * Copyright (C) 2002-2019 by + * Copyright (C) 2002-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -19,9 +19,8 @@ #ifndef FTSTROKE_H_ #define FTSTROKE_H_ -#include -#include FT_OUTLINE_H -#include FT_GLYPH_H +#include +#include FT_BEGIN_HEADER @@ -44,7 +43,7 @@ FT_BEGIN_HEADER * borders of the stroke. * * This can be useful to generate 'bordered' glyph, i.e., glyphs - * displayed with a coloured (and anti-aliased) border around their + * displayed with a colored (and anti-aliased) border around their * shape. * * @order: @@ -114,22 +113,19 @@ FT_BEGIN_HEADER * FT_STROKER_LINEJOIN_MITER_FIXED :: * Used to render mitered line joins, with fixed bevels if the miter * limit is exceeded. The outer edges of the strokes for the two - * segments are extended until they meet at an angle. If the segments - * meet at too sharp an angle (such that the miter would extend from - * the intersection of the segments a distance greater than the product - * of the miter limit value and the border radius), then a bevel join - * (see above) is used instead. This prevents long spikes being - * created. `FT_STROKER_LINEJOIN_MITER_FIXED` generates a miter line - * join as used in PostScript and PDF. + * segments are extended until they meet at an angle. A bevel join + * (see above) is used if the segments meet at too sharp an angle and + * the outer edges meet beyond a distance corresponding to the meter + * limit. This prevents long spikes being created. + * `FT_STROKER_LINEJOIN_MITER_FIXED` generates a miter line join as + * used in PostScript and PDF. * * FT_STROKER_LINEJOIN_MITER_VARIABLE :: * FT_STROKER_LINEJOIN_MITER :: * Used to render mitered line joins, with variable bevels if the miter - * limit is exceeded. The intersection of the strokes is clipped at a - * line perpendicular to the bisector of the angle between the strokes, - * at the distance from the intersection of the segments equal to the - * product of the miter limit value and the border radius. This - * prevents long spikes being created. + * limit is exceeded. The intersection of the strokes is clipped + * perpendicularly to the bisector, at a distance corresponding to + * the miter limit. This prevents long spikes being created. * `FT_STROKER_LINEJOIN_MITER_VARIABLE` generates a mitered line join * as used in XPS. `FT_STROKER_LINEJOIN_MITER` is an alias for * `FT_STROKER_LINEJOIN_MITER_VARIABLE`, retained for backward @@ -296,12 +292,17 @@ FT_BEGIN_HEADER * The line join style. * * miter_limit :: - * The miter limit for the `FT_STROKER_LINEJOIN_MITER_FIXED` and - * `FT_STROKER_LINEJOIN_MITER_VARIABLE` line join styles, expressed as - * 16.16 fixed-point value. + * The maximum reciprocal sine of half-angle at the miter join, + * expressed as 16.16 fixed-point value. * * @note: - * The radius is expressed in the same units as the outline coordinates. + * The `radius` is expressed in the same units as the outline + * coordinates. + * + * The `miter_limit` multiplied by the `radius` gives the maximum size + * of a miter spike, at which it is clipped for + * @FT_STROKER_LINEJOIN_MITER_VARIABLE or replaced with a bevel join for + * @FT_STROKER_LINEJOIN_MITER_FIXED. * * This function calls @FT_Stroker_Rewind automatically. */ diff --git a/Source/ThirdParty/freetype/ftsynth.h b/Source/ThirdParty/freetype/ftsynth.h index 8754f97ce..af90967dd 100644 --- a/Source/ThirdParty/freetype/ftsynth.h +++ b/Source/ThirdParty/freetype/ftsynth.h @@ -5,7 +5,7 @@ * FreeType synthesizing code for emboldening and slanting * (specification). * - * Copyright (C) 2000-2019 by + * Copyright (C) 2000-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -45,8 +45,7 @@ #define FTSYNTH_H_ -#include -#include FT_FREETYPE_H +#include #ifdef FREETYPE_H #error "freetype.h of FreeType 1 has been loaded!" @@ -69,10 +68,31 @@ FT_BEGIN_HEADER FT_EXPORT( void ) FT_GlyphSlot_Embolden( FT_GlyphSlot slot ); - /* Slant an outline glyph to the right by about 12 degrees. */ + /* Precisely adjust the glyph weight either horizontally or vertically. */ + /* The `xdelta` and `ydelta` values are fractions of the face Em size */ + /* (in fixed-point format). Considering that a regular face would have */ + /* stem widths on the order of 0.1 Em, a delta of 0.05 (0x0CCC) should */ + /* be very noticeable. To increase or decrease the weight, use positive */ + /* or negative values, respectively. */ + FT_EXPORT( void ) + FT_GlyphSlot_AdjustWeight( FT_GlyphSlot slot, + FT_Fixed xdelta, + FT_Fixed ydelta ); + + + /* Slant an outline glyph to the right by about 12 degrees. */ FT_EXPORT( void ) FT_GlyphSlot_Oblique( FT_GlyphSlot slot ); + /* Slant an outline glyph by a given sine of an angle. You can apply */ + /* slant along either x- or y-axis by choosing a corresponding non-zero */ + /* argument. If both slants are non-zero, some affine transformation */ + /* will result. */ + FT_EXPORT( void ) + FT_GlyphSlot_Slant( FT_GlyphSlot slot, + FT_Fixed xslant, + FT_Fixed yslant ); + /* */ diff --git a/Source/ThirdParty/freetype/ftsystem.h b/Source/ThirdParty/freetype/ftsystem.h index 889a6ba17..3a08f4912 100644 --- a/Source/ThirdParty/freetype/ftsystem.h +++ b/Source/ThirdParty/freetype/ftsystem.h @@ -4,7 +4,7 @@ * * FreeType low-level system interface definition (specification). * - * Copyright (C) 1996-2019 by + * Copyright (C) 1996-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -20,7 +20,6 @@ #define FTSYSTEM_H_ -#include FT_BEGIN_HEADER @@ -230,7 +229,7 @@ FT_BEGIN_HEADER * A handle to the source stream. * * offset :: - * The offset of read in stream (always from start). + * The offset from the start of the stream to seek to. * * buffer :: * The address of the read buffer. @@ -239,11 +238,9 @@ FT_BEGIN_HEADER * The number of bytes to read from the stream. * * @return: - * The number of bytes effectively read by the stream. - * - * @note: - * This function might be called to perform a seek or skip operation with - * a `count` of~0. A non-zero return value then indicates an error. + * If count >~0, return the number of bytes effectively read by the + * stream (after seeking to `offset`). If count ==~0, return the status + * of the seek operation (non-zero indicates an error). * */ typedef unsigned long diff --git a/Source/ThirdParty/freetype/fttrigon.h b/Source/ThirdParty/freetype/fttrigon.h index 37e1412fd..294981a6f 100644 --- a/Source/ThirdParty/freetype/fttrigon.h +++ b/Source/ThirdParty/freetype/fttrigon.h @@ -4,7 +4,7 @@ * * FreeType trigonometric functions (specification). * - * Copyright (C) 2001-2019 by + * Copyright (C) 2001-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -19,7 +19,7 @@ #ifndef FTTRIGON_H_ #define FTTRIGON_H_ -#include FT_FREETYPE_H +#include #ifdef FREETYPE_H #error "freetype.h of FreeType 1 has been loaded!" diff --git a/Source/ThirdParty/freetype/fttypes.h b/Source/ThirdParty/freetype/fttypes.h index 10571505a..5b109f0c7 100644 --- a/Source/ThirdParty/freetype/fttypes.h +++ b/Source/ThirdParty/freetype/fttypes.h @@ -4,7 +4,7 @@ * * FreeType simple types definitions (specification only). * - * Copyright (C) 1996-2019 by + * Copyright (C) 1996-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -22,8 +22,8 @@ #include #include FT_CONFIG_CONFIG_H -#include FT_SYSTEM_H -#include FT_IMAGE_H +#include +#include #include @@ -45,7 +45,10 @@ FT_BEGIN_HEADER * @description: * This section contains the basic data types defined by FreeType~2, * ranging from simple scalar types to bitmap descriptors. More - * font-specific structures are defined in a different section. + * font-specific structures are defined in a different section. Note + * that FreeType does not use floating-point data types. Fractional + * values are represented by fixed-point integers, with lower bits + * storing the fractional part. * * @order: * FT_Byte @@ -413,7 +416,7 @@ FT_BEGIN_HEADER typedef struct FT_Data_ { const FT_Byte* pointer; - FT_Int length; + FT_UInt length; } FT_Data; @@ -479,18 +482,17 @@ FT_BEGIN_HEADER * * @description: * This macro converts four-letter tags that are used to label TrueType - * tables into an unsigned long, to be used within FreeType. + * tables into an `FT_Tag` type, to be used within FreeType. * * @note: * The produced values **must** be 32-bit integers. Don't redefine this * macro. */ -#define FT_MAKE_TAG( _x1, _x2, _x3, _x4 ) \ - (FT_Tag) \ - ( ( (FT_ULong)_x1 << 24 ) | \ - ( (FT_ULong)_x2 << 16 ) | \ - ( (FT_ULong)_x3 << 8 ) | \ - (FT_ULong)_x4 ) +#define FT_MAKE_TAG( _x1, _x2, _x3, _x4 ) \ + ( ( FT_STATIC_BYTE_CAST( FT_Tag, _x1 ) << 24 ) | \ + ( FT_STATIC_BYTE_CAST( FT_Tag, _x2 ) << 16 ) | \ + ( FT_STATIC_BYTE_CAST( FT_Tag, _x3 ) << 8 ) | \ + FT_STATIC_BYTE_CAST( FT_Tag, _x4 ) ) /*************************************************************************/ @@ -588,7 +590,7 @@ FT_BEGIN_HEADER #define FT_IS_EMPTY( list ) ( (list).head == 0 ) -#define FT_BOOL( x ) ( (FT_Bool)( (x) != 0 ) ) +#define FT_BOOL( x ) FT_STATIC_CAST( FT_Bool, (x) != 0 ) /* concatenate C tokens */ #define FT_ERR_XCAT( x, y ) x ## y diff --git a/Source/ThirdParty/freetype/ftwinfnt.h b/Source/ThirdParty/freetype/ftwinfnt.h index 3437913d5..7b701ea59 100644 --- a/Source/ThirdParty/freetype/ftwinfnt.h +++ b/Source/ThirdParty/freetype/ftwinfnt.h @@ -4,7 +4,7 @@ * * FreeType API for accessing Windows fnt-specific data. * - * Copyright (C) 2003-2019 by + * Copyright (C) 2003-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -19,8 +19,7 @@ #ifndef FTWINFNT_H_ #define FTWINFNT_H_ -#include -#include FT_FREETYPE_H +#include #ifdef FREETYPE_H #error "freetype.h of FreeType 1 has been loaded!" @@ -56,9 +55,9 @@ FT_BEGIN_HEADER * FT_WinFNT_ID_XXX * * @description: - * A list of valid values for the `charset` byte in @FT_WinFNT_HeaderRec. + * A list of valid values for the `charset` byte in @FT_WinFNT_HeaderRec. * Exact mapping tables for the various 'cpXXXX' encodings (except for - * 'cp1361') can be found at 'ftp://ftp.unicode.org/Public' in the + * 'cp1361') can be found at 'ftp://ftp.unicode.org/Public/' in the * `MAPPINGS/VENDORS/MICSFT/WINDOWS` subdirectory. 'cp1361' is roughly a * superset of `MAPPINGS/OBSOLETE/EASTASIA/KSC/JOHAB.TXT`. * diff --git a/Source/ThirdParty/freetype/internal/autohint.h b/Source/ThirdParty/freetype/internal/autohint.h index f64c28bb2..bf9c8b7cf 100644 --- a/Source/ThirdParty/freetype/internal/autohint.h +++ b/Source/ThirdParty/freetype/internal/autohint.h @@ -4,7 +4,7 @@ * * High-level 'autohint' module-specific interface (specification). * - * Copyright (C) 1996-2019 by + * Copyright (C) 1996-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -70,8 +70,7 @@ */ -#include -#include FT_FREETYPE_H +#include FT_BEGIN_HEADER @@ -208,6 +207,9 @@ FT_BEGIN_HEADER } FT_AutoHinter_InterfaceRec, *FT_AutoHinter_Interface; +#define FT_DECLARE_AUTOHINTER_INTERFACE( class_ ) \ + FT_CALLBACK_TABLE const FT_AutoHinter_InterfaceRec class_; + #define FT_DEFINE_AUTOHINTER_INTERFACE( \ class_, \ reset_face_, \ diff --git a/Source/ThirdParty/freetype/internal/cffotypes.h b/Source/ThirdParty/freetype/internal/cffotypes.h index b26893eab..50d535384 100644 --- a/Source/ThirdParty/freetype/internal/cffotypes.h +++ b/Source/ThirdParty/freetype/internal/cffotypes.h @@ -4,7 +4,7 @@ * * Basic OpenType/CFF object type definitions (specification). * - * Copyright (C) 2017-2019 by + * Copyright (C) 2017-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -19,12 +19,11 @@ #ifndef CFFOTYPES_H_ #define CFFOTYPES_H_ -#include -#include FT_INTERNAL_OBJECTS_H -#include FT_INTERNAL_CFF_TYPES_H -#include FT_INTERNAL_TRUETYPE_TYPES_H -#include FT_SERVICE_POSTSCRIPT_CMAPS_H -#include FT_INTERNAL_POSTSCRIPT_HINTS_H +#include +#include +#include +#include +#include FT_BEGIN_HEADER diff --git a/Source/ThirdParty/freetype/internal/cfftypes.h b/Source/ThirdParty/freetype/internal/cfftypes.h index 2fc905ec7..c2521764c 100644 --- a/Source/ThirdParty/freetype/internal/cfftypes.h +++ b/Source/ThirdParty/freetype/internal/cfftypes.h @@ -5,7 +5,7 @@ * Basic OpenType/CFF type definitions and interface (specification * only). * - * Copyright (C) 1996-2019 by + * Copyright (C) 1996-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -21,13 +21,12 @@ #define CFFTYPES_H_ -#include -#include FT_FREETYPE_H -#include FT_TYPE1_TABLES_H -#include FT_INTERNAL_SERVICE_H -#include FT_SERVICE_POSTSCRIPT_CMAPS_H -#include FT_INTERNAL_POSTSCRIPT_HINTS_H -#include FT_INTERNAL_TYPE1_TYPES_H +#include +#include +#include +#include +#include +#include FT_BEGIN_HEADER @@ -316,7 +315,7 @@ FT_BEGIN_HEADER /* The normal stack then points to these values instead of the DICT */ /* because all other operators in Private DICT clear the stack. */ /* `blend_stack' could be cleared at each operator other than blend. */ - /* Blended values are stored as 5-byte fixed point values. */ + /* Blended values are stored as 5-byte fixed-point values. */ FT_Byte* blend_stack; /* base of stack allocation */ FT_Byte* blend_top; /* first empty slot */ diff --git a/Source/ThirdParty/freetype/internal/compiler-macros.h b/Source/ThirdParty/freetype/internal/compiler-macros.h new file mode 100644 index 000000000..6f6765097 --- /dev/null +++ b/Source/ThirdParty/freetype/internal/compiler-macros.h @@ -0,0 +1,343 @@ +/**************************************************************************** + * + * internal/compiler-macros.h + * + * Compiler-specific macro definitions used internally by FreeType. + * + * Copyright (C) 2020-2023 by + * David Turner, Robert Wilhelm, and Werner Lemberg. + * + * This file is part of the FreeType project, and may only be used, + * modified, and distributed under the terms of the FreeType project + * license, LICENSE.TXT. By continuing to use, modify, or distribute + * this file you indicate that you have read the license and + * understand and accept it fully. + * + */ + +#ifndef INTERNAL_COMPILER_MACROS_H_ +#define INTERNAL_COMPILER_MACROS_H_ + +#include + +FT_BEGIN_HEADER + + /* Fix compiler warning with sgi compiler. */ +#if defined( __sgi ) && !defined( __GNUC__ ) +# if defined( _COMPILER_VERSION ) && ( _COMPILER_VERSION >= 730 ) +# pragma set woff 3505 +# endif +#endif + + /* Fix compiler warning with sgi compiler. */ +#if defined( __sgi ) && !defined( __GNUC__ ) +# if defined( _COMPILER_VERSION ) && ( _COMPILER_VERSION >= 730 ) +# pragma set woff 3505 +# endif +#endif + + /* Newer compilers warn for fall-through case statements. */ +#ifndef FALL_THROUGH +# if ( defined( __STDC_VERSION__ ) && __STDC_VERSION__ > 201710L ) || \ + ( defined( __cplusplus ) && __cplusplus > 201402L ) +# define FALL_THROUGH [[__fallthrough__]] +# elif ( defined( __GNUC__ ) && __GNUC__ >= 7 ) || \ + ( defined( __clang__ ) && \ + ( defined( __apple_build_version__ ) \ + ? __apple_build_version__ >= 12000000 \ + : __clang_major__ >= 10 ) ) +# define FALL_THROUGH __attribute__(( __fallthrough__ )) +# else +# define FALL_THROUGH ( (void)0 ) +# endif +#endif + + /* + * When defining a macro that expands to a non-trivial C statement, use + * FT_BEGIN_STMNT and FT_END_STMNT to enclose the macro's body. This + * ensures there are no surprises when the macro is invoked in conditional + * branches. + * + * Example: + * + * #define LOG( ... ) \ + * FT_BEGIN_STMNT \ + * if ( logging_enabled ) \ + * log( __VA_ARGS__ ); \ + * FT_END_STMNT + */ +#define FT_BEGIN_STMNT do { +#define FT_END_STMNT } while ( 0 ) + + /* + * FT_DUMMY_STMNT expands to an empty C statement. Useful for + * conditionally defined statement macros. + * + * Example: + * + * #ifdef BUILD_CONFIG_LOGGING + * #define LOG( ... ) \ + * FT_BEGIN_STMNT \ + * if ( logging_enabled ) \ + * log( __VA_ARGS__ ); \ + * FT_END_STMNT + * #else + * # define LOG( ... ) FT_DUMMY_STMNT + * #endif + */ +#define FT_DUMMY_STMNT FT_BEGIN_STMNT FT_END_STMNT + +#ifdef __UINTPTR_TYPE__ + /* + * GCC and Clang both provide a `__UINTPTR_TYPE__` that can be used to + * avoid a dependency on `stdint.h`. + */ +# define FT_UINT_TO_POINTER( x ) (void *)(__UINTPTR_TYPE__)(x) +#elif defined( _WIN64 ) + /* only 64bit Windows uses the LLP64 data model, i.e., */ + /* 32-bit integers, 64-bit pointers. */ +# define FT_UINT_TO_POINTER( x ) (void *)(unsigned __int64)(x) +#else +# define FT_UINT_TO_POINTER( x ) (void *)(unsigned long)(x) +#endif + + /* + * Use `FT_TYPEOF( type )` to cast a value to `type`. This is useful to + * suppress signedness compilation warnings in macros. + * + * Example: + * + * #define PAD_( x, n ) ( (x) & ~FT_TYPEOF( x )( (n) - 1 ) ) + * + * (The `typeof` condition is taken from gnulib's `intprops.h` header + * file.) + */ +#if ( ( defined( __GNUC__ ) && __GNUC__ >= 2 ) || \ + ( defined( __IBMC__ ) && __IBMC__ >= 1210 && \ + defined( __IBM__TYPEOF__ ) ) || \ + ( defined( __SUNPRO_C ) && __SUNPRO_C >= 0x5110 && !__STDC__ ) ) +#define FT_TYPEOF( type ) ( __typeof__ ( type ) ) +#else +#define FT_TYPEOF( type ) /* empty */ +#endif + + /* + * Mark a function declaration as internal to the library. This ensures + * that it will not be exposed by default to client code, and helps + * generate smaller and faster code on ELF-based platforms. Place this + * before a function declaration. + */ + + /* Visual C, mingw */ +#if defined( _WIN32 ) +#define FT_INTERNAL_FUNCTION_ATTRIBUTE /* empty */ + + /* gcc, clang */ +#elif ( defined( __GNUC__ ) && __GNUC__ >= 4 ) || defined( __clang__ ) +#define FT_INTERNAL_FUNCTION_ATTRIBUTE \ + __attribute__(( visibility( "hidden" ) )) + + /* Sun */ +#elif defined( __SUNPRO_C ) && __SUNPRO_C >= 0x550 +#define FT_INTERNAL_FUNCTION_ATTRIBUTE __hidden + +#else +#define FT_INTERNAL_FUNCTION_ATTRIBUTE /* empty */ +#endif + + /* + * FreeType supports compilation of its C sources with a C++ compiler (in + * C++ mode); this introduces a number of subtle issues. + * + * The main one is that a C++ function declaration and its definition must + * have the same 'linkage'. Because all FreeType headers declare their + * functions with C linkage (i.e., within an `extern "C" { ... }` block + * due to the magic of FT_BEGIN_HEADER and FT_END_HEADER), their + * definition in FreeType sources should also be prefixed with `extern + * "C"` when compiled in C++ mode. + * + * The `FT_FUNCTION_DECLARATION` and `FT_FUNCTION_DEFINITION` macros are + * provided to deal with this case, as well as `FT_CALLBACK_DEF` and its + * siblings below. + */ + + /* + * `FT_FUNCTION_DECLARATION( type )` can be used to write a C function + * declaration to ensure it will have C linkage when the library is built + * with a C++ compiler. The parameter is the function's return type, so a + * declaration would look like + * + * FT_FUNCTION_DECLARATION( int ) + * foo( int x ); + * + * NOTE: This requires that all uses are inside of `FT_BEGIN_HEADER ... + * FT_END_HEADER` blocks, which guarantees that the declarations have C + * linkage when the headers are included by C++ sources. + * + * NOTE: Do not use directly. Use `FT_LOCAL`, `FT_BASE`, and `FT_EXPORT` + * instead. + */ +#define FT_FUNCTION_DECLARATION( x ) extern x + + /* + * Same as `FT_FUNCTION_DECLARATION`, but for function definitions instead. + * + * NOTE: Do not use directly. Use `FT_LOCAL_DEF`, `FT_BASE_DEF`, and + * `FT_EXPORT_DEF` instead. + */ +#ifdef __cplusplus +#define FT_FUNCTION_DEFINITION( x ) extern "C" x +#else +#define FT_FUNCTION_DEFINITION( x ) x +#endif + + /* + * Use `FT_LOCAL` and `FT_LOCAL_DEF` to declare and define, respectively, + * an internal FreeType function that is only used by the sources of a + * single `src/module/` directory. This ensures that the functions are + * turned into static ones at build time, resulting in smaller and faster + * code. + */ +#ifdef FT_MAKE_OPTION_SINGLE_OBJECT + +#define FT_LOCAL( x ) static x +#define FT_LOCAL_DEF( x ) static x + +#else + +#define FT_LOCAL( x ) FT_INTERNAL_FUNCTION_ATTRIBUTE \ + FT_FUNCTION_DECLARATION( x ) +#define FT_LOCAL_DEF( x ) FT_FUNCTION_DEFINITION( x ) + +#endif /* FT_MAKE_OPTION_SINGLE_OBJECT */ + + /* + * Use `FT_LOCAL_ARRAY` and `FT_LOCAL_ARRAY_DEF` to declare and define, + * respectively, a constant array that must be accessed from several + * sources in the same `src/module/` sub-directory, and which are internal + * to the library. + */ +#define FT_LOCAL_ARRAY( x ) FT_INTERNAL_FUNCTION_ATTRIBUTE \ + extern const x +#define FT_LOCAL_ARRAY_DEF( x ) FT_FUNCTION_DEFINITION( const x ) + + /* + * `Use FT_BASE` and `FT_BASE_DEF` to declare and define, respectively, an + * internal library function that is used by more than a single module. + */ +#define FT_BASE( x ) FT_INTERNAL_FUNCTION_ATTRIBUTE \ + FT_FUNCTION_DECLARATION( x ) +#define FT_BASE_DEF( x ) FT_FUNCTION_DEFINITION( x ) + + + /* + * NOTE: Conditionally define `FT_EXPORT_VAR` due to its definition in + * `src/smooth/ftgrays.h` to make the header more portable. + */ +#ifndef FT_EXPORT_VAR +#define FT_EXPORT_VAR( x ) FT_FUNCTION_DECLARATION( x ) +#endif + + /* + * When compiling FreeType as a DLL or DSO with hidden visibility, + * some systems/compilers need a special attribute in front OR after + * the return type of function declarations. + * + * Two macros are used within the FreeType source code to define + * exported library functions: `FT_EXPORT` and `FT_EXPORT_DEF`. + * + * - `FT_EXPORT( return_type )` + * + * is used in a function declaration, as in + * + * ``` + * FT_EXPORT( FT_Error ) + * FT_Init_FreeType( FT_Library* alibrary ); + * ``` + * + * - `FT_EXPORT_DEF( return_type )` + * + * is used in a function definition, as in + * + * ``` + * FT_EXPORT_DEF( FT_Error ) + * FT_Init_FreeType( FT_Library* alibrary ) + * { + * ... some code ... + * return FT_Err_Ok; + * } + * ``` + * + * You can provide your own implementation of `FT_EXPORT` and + * `FT_EXPORT_DEF` here if you want. + * + * To export a variable, use `FT_EXPORT_VAR`. + */ + + /* See `freetype/config/public-macros.h` for the `FT_EXPORT` definition */ +#define FT_EXPORT_DEF( x ) FT_FUNCTION_DEFINITION( x ) + + /* + * The following macros are needed to compile the library with a + * C++ compiler and with 16bit compilers. + */ + + /* + * This is special. Within C++, you must specify `extern "C"` for + * functions which are used via function pointers, and you also + * must do that for structures which contain function pointers to + * assure C linkage -- it's not possible to have (local) anonymous + * functions which are accessed by (global) function pointers. + * + * + * FT_CALLBACK_DEF is used to _define_ a callback function, + * located in the same source code file as the structure that uses + * it. FT_COMPARE_DEF, in addition, ensures the `cdecl` calling + * convention on x86, required by the C library function `qsort`. + * + * FT_BASE_CALLBACK and FT_BASE_CALLBACK_DEF are used to declare + * and define a callback function, respectively, in a similar way + * as FT_BASE and FT_BASE_DEF work. + * + * FT_CALLBACK_TABLE is used to _declare_ a constant variable that + * contains pointers to callback functions. + * + * FT_CALLBACK_TABLE_DEF is used to _define_ a constant variable + * that contains pointers to callback functions. + * + * + * Some 16bit compilers have to redefine these macros to insert + * the infamous `_cdecl` or `__fastcall` declarations. + */ +#ifdef __cplusplus +#define FT_CALLBACK_DEF( x ) extern "C" x +#else +#define FT_CALLBACK_DEF( x ) static x +#endif + +#if defined( __GNUC__ ) && defined( __i386__ ) +#define FT_COMPARE_DEF( x ) FT_CALLBACK_DEF( x ) __attribute__(( cdecl )) +#elif defined( _MSC_VER ) && defined( _M_IX86 ) +#define FT_COMPARE_DEF( x ) FT_CALLBACK_DEF( x ) __cdecl +#elif defined( __WATCOMC__ ) && __WATCOMC__ >= 1240 +#define FT_COMPARE_DEF( x ) FT_CALLBACK_DEF( x ) __watcall +#else +#define FT_COMPARE_DEF( x ) FT_CALLBACK_DEF( x ) +#endif + +#define FT_BASE_CALLBACK( x ) FT_FUNCTION_DECLARATION( x ) +#define FT_BASE_CALLBACK_DEF( x ) FT_FUNCTION_DEFINITION( x ) + +#ifndef FT_CALLBACK_TABLE +#ifdef __cplusplus +#define FT_CALLBACK_TABLE extern "C" +#define FT_CALLBACK_TABLE_DEF extern "C" +#else +#define FT_CALLBACK_TABLE extern +#define FT_CALLBACK_TABLE_DEF /* nothing */ +#endif +#endif /* FT_CALLBACK_TABLE */ + +FT_END_HEADER + +#endif /* INTERNAL_COMPILER_MACROS_H_ */ diff --git a/Source/ThirdParty/freetype/internal/ftcalc.h b/Source/ThirdParty/freetype/internal/ftcalc.h index 2986ec359..d9aea2360 100644 --- a/Source/ThirdParty/freetype/internal/ftcalc.h +++ b/Source/ThirdParty/freetype/internal/ftcalc.h @@ -4,7 +4,7 @@ * * Arithmetic computations (specification). * - * Copyright (C) 1996-2019 by + * Copyright (C) 1996-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -20,9 +20,9 @@ #define FTCALC_H_ -#include -#include FT_FREETYPE_H +#include +#include "compiler-macros.h" FT_BEGIN_HEADER @@ -278,6 +278,40 @@ FT_BEGIN_HEADER FT_Long c ); + /************************************************************************** + * + * @function: + * FT_MulAddFix + * + * @description: + * Compute `(s[0] * f[0] + s[1] * f[1] + ...) / 0x10000`, where `s[n]` is + * usually a 16.16 scalar. + * + * @input: + * s :: + * The array of scalars. + * f :: + * The array of factors. + * count :: + * The number of entries in the array. + * + * @return: + * The result of `(s[0] * f[0] + s[1] * f[1] + ...) / 0x10000`. + * + * @note: + * This function is currently used for the scaled delta computation of + * variation stores. It internally uses 64-bit data types when + * available, otherwise it emulates 64-bit math by using 32-bit + * operations, which produce a correct result but most likely at a slower + * performance in comparison to the implementation base on `int64_t`. + * + */ + FT_BASE( FT_Int32 ) + FT_MulAddFix( FT_Fixed* s, + FT_Int32* f, + FT_UInt count ); + + /* * A variant of FT_Matrix_Multiply which scales its result afterwards. The * idea is that both `a' and `b' are scaled by factors of 10 so that the @@ -298,9 +332,9 @@ FT_BEGIN_HEADER * Based on geometric considerations we use the following inequality to * identify a degenerate matrix. * - * 50 * abs(xx*yy - xy*yx) < xx^2 + xy^2 + yx^2 + yy^2 + * 32 * abs(xx*yy - xy*yx) < xx^2 + xy^2 + yx^2 + yy^2 * - * Value 50 is heuristic. + * Value 32 is heuristic. */ FT_BASE( FT_Bool ) FT_Matrix_Check( const FT_Matrix* matrix ); @@ -359,8 +393,8 @@ FT_BEGIN_HEADER #ifndef FT_CONFIG_OPTION_NO_ASSEMBLER -#if defined( __GNUC__ ) && \ - ( __GNUC__ > 3 || ( __GNUC__ == 3 && __GNUC_MINOR__ >= 4 ) ) +#if defined( __clang__ ) || ( defined( __GNUC__ ) && \ + ( __GNUC__ > 3 || ( __GNUC__ == 3 && __GNUC_MINOR__ >= 4 ) ) ) #if FT_SIZEOF_INT == 4 @@ -370,14 +404,28 @@ FT_BEGIN_HEADER #define FT_MSB( x ) ( 31 - __builtin_clzl( x ) ) -#endif /* __GNUC__ */ +#endif +#elif defined( _MSC_VER ) && _MSC_VER >= 1400 -#elif defined( _MSC_VER ) && ( _MSC_VER >= 1400 ) +#if defined( _WIN32_WCE ) -#if FT_SIZEOF_INT == 4 +#include +#pragma intrinsic( _CountLeadingZeros ) + +#define FT_MSB( x ) ( 31 - _CountLeadingZeros( x ) ) + +#elif defined( _M_ARM64 ) || defined( _M_ARM ) #include +#pragma intrinsic( _CountLeadingZeros ) + +#define FT_MSB( x ) ( 31 - _CountLeadingZeros( x ) ) + +#elif defined( _M_IX86 ) || defined( _M_AMD64 ) || defined( _M_IA64 ) + +#include +#pragma intrinsic( _BitScanReverse ) static __inline FT_Int32 FT_MSB_i386( FT_UInt32 x ) @@ -385,21 +433,45 @@ FT_BEGIN_HEADER unsigned long where; - /* not available in older VC versions */ _BitScanReverse( &where, x ); return (FT_Int32)where; } -#define FT_MSB( x ) ( FT_MSB_i386( x ) ) +#define FT_MSB( x ) FT_MSB_i386( x ) #endif -#endif /* _MSC_VER */ +#elif defined( __WATCOMC__ ) && defined( __386__ ) + extern __inline FT_Int32 + FT_MSB_i386( FT_UInt32 x ); + +#pragma aux FT_MSB_i386 = \ + "bsr eax, eax" \ + __parm [__eax] __nomemory \ + __value [__eax] \ + __modify __exact [__eax] __nomemory; + +#define FT_MSB( x ) FT_MSB_i386( x ) + +#elif defined( __DECC ) || defined( __DECCXX ) + +#include + +#define FT_MSB( x ) (FT_Int)( 63 - _leadz( x ) ) + +#elif defined( _CRAYC ) + +#include + +#define FT_MSB( x ) (FT_Int)( 31 - _leadz32( x ) ) + +#endif /* FT_MSB macro definitions */ #endif /* !FT_CONFIG_OPTION_NO_ASSEMBLER */ + #ifndef FT_MSB FT_BASE( FT_Int ) @@ -449,8 +521,7 @@ FT_BEGIN_HEADER #define F2DOT14_TO_FIXED( x ) ( (FT_Long)(x) * 4 ) /* << 2 */ #define FIXED_TO_INT( x ) ( FT_RoundFix( x ) >> 16 ) -#define ROUND_F26DOT6( x ) ( x >= 0 ? ( ( (x) + 32 ) & -64 ) \ - : ( -( ( 32 - (x) ) & -64 ) ) ) +#define ROUND_F26DOT6( x ) ( ( (x) + 32 - ( x < 0 ) ) & -64 ) /* * The following macros have two purposes. @@ -488,7 +559,7 @@ FT_BEGIN_HEADER #define NEG_INT32( a ) \ (FT_Int32)( (FT_UInt32)0 - (FT_UInt32)(a) ) -#ifdef FT_LONG64 +#ifdef FT_INT64 #define ADD_INT64( a, b ) \ (FT_Int64)( (FT_UInt64)(a) + (FT_UInt64)(b) ) @@ -499,7 +570,7 @@ FT_BEGIN_HEADER #define NEG_INT64( a ) \ (FT_Int64)( (FT_UInt64)0 - (FT_UInt64)(a) ) -#endif /* FT_LONG64 */ +#endif /* FT_INT64 */ FT_END_HEADER diff --git a/Source/ThirdParty/freetype/internal/ftdebug.h b/Source/ThirdParty/freetype/internal/ftdebug.h index 54a9673af..4e013ba1e 100644 --- a/Source/ThirdParty/freetype/internal/ftdebug.h +++ b/Source/ThirdParty/freetype/internal/ftdebug.h @@ -4,7 +4,7 @@ * * Debugging and logging component (specification). * - * Copyright (C) 1996-2019 by + * Copyright (C) 1996-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -27,11 +27,28 @@ #include #include FT_CONFIG_CONFIG_H -#include FT_FREETYPE_H +#include + +#include "compiler-macros.h" + +#ifdef FT_DEBUG_LOGGING +#define DLG_STATIC +#include +#include + +#include +#endif /* FT_DEBUG_LOGGING */ FT_BEGIN_HEADER + /* force the definition of FT_DEBUG_LEVEL_TRACE if FT_DEBUG_LOGGING is */ + /* already defined. */ + /* */ +#ifdef FT_DEBUG_LOGGING +#undef FT_DEBUG_LEVEL_TRACE +#define FT_DEBUG_LEVEL_TRACE +#endif /* force the definition of FT_DEBUG_LEVEL_ERROR if FT_DEBUG_LEVEL_TRACE */ /* is already defined; this simplifies the following #ifdefs */ @@ -56,7 +73,7 @@ FT_BEGIN_HEADER /* defining the enumeration */ typedef enum FT_Trace_ { -#include FT_INTERNAL_TRACE_H +#include trace_count } FT_Trace; @@ -80,20 +97,66 @@ FT_BEGIN_HEADER * Each component must define the macro FT_COMPONENT to a valid FT_Trace * value before using any TRACE macro. * + * To get consistent logging output, there should be no newline character + * (i.e., '\n') or a single trailing one in the message string of + * `FT_TRACEx` and `FT_ERROR`. */ + + /************************************************************************* + * + * If FT_DEBUG_LOGGING is enabled, tracing messages are sent to dlg's API. + * If FT_DEBUG_LOGGING is disabled, tracing messages are sent to + * `FT_Message` (defined in ftdebug.c). + */ +#ifdef FT_DEBUG_LOGGING + + /* we need two macros to convert the names of `FT_COMPONENT` to a string */ +#define FT_LOGGING_TAG( x ) FT_LOGGING_TAG_( x ) +#define FT_LOGGING_TAG_( x ) #x + + /* we need two macros to convert the component and the trace level */ + /* to a string that combines them */ +#define FT_LOGGING_TAGX( x, y ) FT_LOGGING_TAGX_( x, y ) +#define FT_LOGGING_TAGX_( x, y ) #x ":" #y + + +#define FT_LOG( level, varformat ) \ + do \ + { \ + const char* dlg_tag = FT_LOGGING_TAGX( FT_COMPONENT, level ); \ + \ + \ + ft_add_tag( dlg_tag ); \ + if ( ft_trace_levels[FT_TRACE_COMP( FT_COMPONENT )] >= level ) \ + { \ + if ( custom_output_handler != NULL ) \ + FT_Logging_Callback varformat; \ + else \ + dlg_trace varformat; \ + } \ + ft_remove_tag( dlg_tag ); \ + } while( 0 ) + +#else /* !FT_DEBUG_LOGGING */ + +#define FT_LOG( level, varformat ) \ + do \ + { \ + if ( ft_trace_levels[FT_TRACE_COMP( FT_COMPONENT )] >= level ) \ + FT_Message varformat; \ + } while ( 0 ) + +#endif /* !FT_DEBUG_LOGGING */ + + #ifdef FT_DEBUG_LEVEL_TRACE /* we need two macros here to make cpp expand `FT_COMPONENT' */ #define FT_TRACE_COMP( x ) FT_TRACE_COMP_( x ) #define FT_TRACE_COMP_( x ) trace_ ## x -#define FT_TRACE( level, varformat ) \ - do \ - { \ - if ( ft_trace_levels[FT_TRACE_COMP( FT_COMPONENT )] >= level ) \ - FT_Message varformat; \ - } while ( 0 ) +#define FT_TRACE( level, varformat ) FT_LOG( level, varformat ) #else /* !FT_DEBUG_LEVEL_TRACE */ @@ -202,7 +265,32 @@ FT_BEGIN_HEADER #ifdef FT_DEBUG_LEVEL_ERROR -#define FT_ERROR( varformat ) FT_Message varformat + /************************************************************************** + * + * If FT_DEBUG_LOGGING is enabled, error messages are sent to dlg's API. + * If FT_DEBUG_LOGGING is disabled, error messages are sent to `FT_Message` + * (defined in ftdebug.c). + * + */ +#ifdef FT_DEBUG_LOGGING + +#define FT_ERROR( varformat ) \ + do \ + { \ + const char* dlg_tag = FT_LOGGING_TAG( FT_COMPONENT ); \ + \ + \ + ft_add_tag( dlg_tag ); \ + dlg_trace varformat; \ + ft_remove_tag( dlg_tag ); \ + } while ( 0 ) + +#else /* !FT_DEBUG_LOGGING */ + +#define FT_ERROR( varformat ) FT_Message varformat + +#endif /* !FT_DEBUG_LOGGING */ + #else /* !FT_DEBUG_LEVEL_ERROR */ @@ -275,6 +363,77 @@ FT_BEGIN_HEADER FT_BASE( void ) ft_debug_init( void ); + +#ifdef FT_DEBUG_LOGGING + + /************************************************************************** + * + * 'dlg' uses output handlers to control how and where log messages are + * printed. Therefore we need to define a default output handler for + * FreeType. + */ + FT_BASE( void ) + ft_log_handler( const struct dlg_origin* origin, + const char* string, + void* data ); + + + /************************************************************************** + * + * 1. `ft_default_log_handler` stores the function pointer that is used + * internally by FreeType to print logs to a file. + * + * 2. `custom_output_handler` stores the function pointer to the callback + * function provided by the user. + * + * It is defined in `ftdebug.c`. + */ + extern dlg_handler ft_default_log_handler; + extern FT_Custom_Log_Handler custom_output_handler; + + + /************************************************************************** + * + * If FT_DEBUG_LOGGING macro is enabled, FreeType needs to initialize and + * un-initialize `FILE*`. + * + * These functions are defined in `ftdebug.c`. + */ + FT_BASE( void ) + ft_logging_init( void ); + + FT_BASE( void ) + ft_logging_deinit( void ); + + + /************************************************************************** + * + * For printing the name of `FT_COMPONENT` along with the actual log we + * need to add a tag with the name of `FT_COMPONENT`. + * + * These functions are defined in `ftdebug.c`. + */ + FT_BASE( void ) + ft_add_tag( const char* tag ); + + FT_BASE( void ) + ft_remove_tag( const char* tag ); + + + /************************************************************************** + * + * A function to print log data using a custom callback logging function + * (which is set using `FT_Set_Log_Handler`). + * + * This function is defined in `ftdebug.c`. + */ + FT_BASE( void ) + FT_Logging_Callback( const char* fmt, + ... ); + +#endif /* FT_DEBUG_LOGGING */ + + FT_END_HEADER #endif /* FTDEBUG_H_ */ diff --git a/Source/ThirdParty/freetype/internal/ftdrv.h b/Source/ThirdParty/freetype/internal/ftdrv.h index 09e846e1c..9001c07ad 100644 --- a/Source/ThirdParty/freetype/internal/ftdrv.h +++ b/Source/ThirdParty/freetype/internal/ftdrv.h @@ -4,7 +4,7 @@ * * FreeType internal font driver interface (specification). * - * Copyright (C) 1996-2019 by + * Copyright (C) 1996-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -20,9 +20,9 @@ #define FTDRV_H_ -#include -#include FT_MODULE_H +#include +#include "compiler-macros.h" FT_BEGIN_HEADER @@ -157,6 +157,7 @@ FT_BEGIN_HEADER * A handle to a function used to select a new fixed size. It is used * only if @FT_FACE_FLAG_FIXED_SIZES is set. Can be set to 0 if the * scaling done in the base layer suffices. + * * @note: * Most function pointers, with the exception of `load_glyph`, can be set * to 0 to indicate a default behaviour. diff --git a/Source/ThirdParty/freetype/internal/ftgloadr.h b/Source/ThirdParty/freetype/internal/ftgloadr.h index 770871d81..36e5509f9 100644 --- a/Source/ThirdParty/freetype/internal/ftgloadr.h +++ b/Source/ThirdParty/freetype/internal/ftgloadr.h @@ -4,7 +4,7 @@ * * The FreeType glyph loader (specification). * - * Copyright (C) 2002-2019 by + * Copyright (C) 2002-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg * * This file is part of the FreeType project, and may only be used, @@ -20,9 +20,9 @@ #define FTGLOADR_H_ -#include -#include FT_FREETYPE_H +#include +#include "compiler-macros.h" FT_BEGIN_HEADER @@ -138,8 +138,6 @@ FT_BEGIN_HEADER FT_BASE( void ) FT_GlyphLoader_Add( FT_GlyphLoader loader ); - /* */ - FT_END_HEADER diff --git a/Source/ThirdParty/freetype/internal/fthash.h b/Source/ThirdParty/freetype/internal/fthash.h index 249188040..622ec76bb 100644 --- a/Source/ThirdParty/freetype/internal/fthash.h +++ b/Source/ThirdParty/freetype/internal/fthash.h @@ -43,8 +43,7 @@ #define FTHASH_H_ -#include -#include FT_FREETYPE_H +#include FT_BEGIN_HEADER diff --git a/Source/ThirdParty/freetype/internal/ftmemory.h b/Source/ThirdParty/freetype/internal/ftmemory.h index 78bd3bc22..5eb1d21ff 100644 --- a/Source/ThirdParty/freetype/internal/ftmemory.h +++ b/Source/ThirdParty/freetype/internal/ftmemory.h @@ -4,7 +4,7 @@ * * The FreeType memory management macros (specification). * - * Copyright (C) 1996-2019 by + * Copyright (C) 1996-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg * * This file is part of the FreeType project, and may only be used, @@ -22,8 +22,9 @@ #include #include FT_CONFIG_CONFIG_H -#include FT_TYPES_H +#include +#include "compiler-macros.h" FT_BEGIN_HEADER @@ -57,6 +58,14 @@ FT_BEGIN_HEADER /*************************************************************************/ + /* The calculation `NULL + n' is undefined in C. Even if the resulting */ + /* pointer doesn't get dereferenced, this causes warnings with */ + /* sanitizers. */ + /* */ + /* We thus provide a macro that should be used if `base' can be NULL. */ +#define FT_OFFSET( base, count ) ( (base) ? (base) + (count) : NULL ) + + /* * C++ refuses to handle statements like p = (void*)anything, with `p' a * typed pointer. Since we don't have a `typeof' operator in standard C++, @@ -87,15 +96,15 @@ extern "C++" #ifdef FT_DEBUG_MEMORY - FT_BASE( const char* ) _ft_debug_file; - FT_BASE( long ) _ft_debug_lineno; + FT_BASE( const char* ) ft_debug_file_; + FT_BASE( long ) ft_debug_lineno_; -#define FT_DEBUG_INNER( exp ) ( _ft_debug_file = __FILE__, \ - _ft_debug_lineno = __LINE__, \ +#define FT_DEBUG_INNER( exp ) ( ft_debug_file_ = __FILE__, \ + ft_debug_lineno_ = __LINE__, \ (exp) ) -#define FT_ASSIGNP_INNER( p, exp ) ( _ft_debug_file = __FILE__, \ - _ft_debug_lineno = __LINE__, \ +#define FT_ASSIGNP_INNER( p, exp ) ( ft_debug_file_ = __FILE__, \ + ft_debug_lineno_ = __LINE__, \ FT_ASSIGNP( p, exp ) ) #else /* !FT_DEBUG_MEMORY */ @@ -153,10 +162,10 @@ extern "C++" (FT_Long)(size), \ &error ) ) -#define FT_MEM_FREE( ptr ) \ - FT_BEGIN_STMNT \ - ft_mem_free( memory, (ptr) ); \ - (ptr) = NULL; \ +#define FT_MEM_FREE( ptr ) \ + FT_BEGIN_STMNT \ + FT_DEBUG_INNER( ft_mem_free( memory, (ptr) ) ); \ + (ptr) = NULL; \ FT_END_STMNT #define FT_MEM_NEW( ptr ) \ @@ -335,14 +344,13 @@ extern "C++" #define FT_RENEW_ARRAY( ptr, curcnt, newcnt ) \ FT_MEM_SET_ERROR( FT_MEM_RENEW_ARRAY( ptr, curcnt, newcnt ) ) -#define FT_QNEW( ptr ) \ - FT_MEM_SET_ERROR( FT_MEM_QNEW( ptr ) ) +#define FT_QNEW( ptr ) FT_MEM_SET_ERROR( FT_MEM_QNEW( ptr ) ) -#define FT_QNEW_ARRAY( ptr, count ) \ - FT_MEM_SET_ERROR( FT_MEM_NEW_ARRAY( ptr, count ) ) +#define FT_QNEW_ARRAY( ptr, count ) \ + FT_MEM_SET_ERROR( FT_MEM_QNEW_ARRAY( ptr, count ) ) -#define FT_QRENEW_ARRAY( ptr, curcnt, newcnt ) \ - FT_MEM_SET_ERROR( FT_MEM_RENEW_ARRAY( ptr, curcnt, newcnt ) ) +#define FT_QRENEW_ARRAY( ptr, curcnt, newcnt ) \ + FT_MEM_SET_ERROR( FT_MEM_QRENEW_ARRAY( ptr, curcnt, newcnt ) ) FT_BASE( FT_Pointer ) @@ -381,8 +389,6 @@ extern "C++" #define FT_STRCPYN( dst, src, size ) \ ft_mem_strcpyn( (char*)dst, (const char*)(src), (FT_ULong)(size) ) - /* */ - FT_END_HEADER diff --git a/Source/ThirdParty/freetype/internal/ftmmtypes.h b/Source/ThirdParty/freetype/internal/ftmmtypes.h new file mode 100644 index 000000000..c4b21d614 --- /dev/null +++ b/Source/ThirdParty/freetype/internal/ftmmtypes.h @@ -0,0 +1,91 @@ +/**************************************************************************** + * + * ftmmtypes.h + * + * OpenType Variations type definitions for internal use + * with the multi-masters service (specification). + * + * Copyright (C) 2022-2023 by + * David Turner, Robert Wilhelm, Werner Lemberg, George Williams, and + * Dominik Röttsches. + * + * This file is part of the FreeType project, and may only be used, + * modified, and distributed under the terms of the FreeType project + * license, LICENSE.TXT. By continuing to use, modify, or distribute + * this file you indicate that you have read the license and + * understand and accept it fully. + * + */ + + +#ifndef FTMMTYPES_H_ +#define FTMMTYPES_H_ + +FT_BEGIN_HEADER + + + typedef FT_Int32 FT_ItemVarDelta; + + typedef struct GX_ItemVarDataRec_ + { + FT_UInt itemCount; /* Number of delta sets per item. */ + FT_UInt regionIdxCount; /* Number of region indices. */ + FT_UInt* regionIndices; /* Array of `regionCount` indices; */ + /* these index `varRegionList`. */ + FT_Byte* deltaSet; /* Array of `itemCount` deltas; */ + /* use `innerIndex` for this array. */ + FT_UShort wordDeltaCount; /* Number of the first 32-bit ints */ + /* or 16-bit ints of `deltaSet` */ + /* depending on `longWords`. */ + FT_Bool longWords; /* If true, `deltaSet` is a 32-bit */ + /* array followed by a 16-bit */ + /* array, otherwise a 16-bit array */ + /* followed by an 8-bit array. */ + } GX_ItemVarDataRec, *GX_ItemVarData; + + + /* contribution of one axis to a region */ + typedef struct GX_AxisCoordsRec_ + { + FT_Fixed startCoord; + FT_Fixed peakCoord; /* zero means no effect (factor = 1) */ + FT_Fixed endCoord; + + } GX_AxisCoordsRec, *GX_AxisCoords; + + + typedef struct GX_VarRegionRec_ + { + GX_AxisCoords axisList; /* array of axisCount records */ + + } GX_VarRegionRec, *GX_VarRegion; + + + /* item variation store */ + typedef struct GX_ItemVarStoreRec_ + { + FT_UInt dataCount; + GX_ItemVarData varData; /* array of dataCount records; */ + /* use `outerIndex' for this array */ + FT_UShort axisCount; + FT_UInt regionCount; /* total number of regions defined */ + GX_VarRegion varRegionList; + + } GX_ItemVarStoreRec, *GX_ItemVarStore; + + + typedef struct GX_DeltaSetIdxMapRec_ + { + FT_ULong mapCount; + FT_UInt* outerIndex; /* indices to item var data */ + FT_UInt* innerIndex; /* indices to delta set */ + + } GX_DeltaSetIdxMapRec, *GX_DeltaSetIdxMap; + + +FT_END_HEADER + +#endif /* FTMMTYPES_H_ */ + + +/* END */ diff --git a/Source/ThirdParty/freetype/internal/ftobjs.h b/Source/ThirdParty/freetype/internal/ftobjs.h index f3a41b35a..28bc9b65f 100644 --- a/Source/ThirdParty/freetype/internal/ftobjs.h +++ b/Source/ThirdParty/freetype/internal/ftobjs.h @@ -4,7 +4,7 @@ * * The FreeType private base classes (specification). * - * Copyright (C) 1996-2019 by + * Copyright (C) 1996-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -26,21 +26,21 @@ #ifndef FTOBJS_H_ #define FTOBJS_H_ -#include -#include FT_RENDER_H -#include FT_SIZES_H -#include FT_LCD_FILTER_H -#include FT_INTERNAL_MEMORY_H -#include FT_INTERNAL_GLYPH_LOADER_H -#include FT_INTERNAL_DRIVER_H -#include FT_INTERNAL_AUTOHINT_H -#include FT_INTERNAL_SERVICE_H -#include FT_INTERNAL_CALC_H +#include +#include +#include +#include +#include +#include +#include +#include +#include #ifdef FT_CONFIG_OPTION_INCREMENTAL -#include FT_INCREMENTAL_H +#include #endif +#include "compiler-macros.h" FT_BEGIN_HEADER @@ -226,8 +226,8 @@ FT_BEGIN_HEADER } FT_CMap_ClassRec; -#define FT_DECLARE_CMAP_CLASS( class_ ) \ - FT_CALLBACK_TABLE const FT_CMap_ClassRec class_; +#define FT_DECLARE_CMAP_CLASS( class_ ) \ + FT_CALLBACK_TABLE const FT_CMap_ClassRec class_; #define FT_DEFINE_CMAP_CLASS( \ class_, \ @@ -278,14 +278,12 @@ FT_BEGIN_HEADER #ifdef FT_CONFIG_OPTION_SUBPIXEL_RENDERING typedef void (*FT_Bitmap_LcdFilterFunc)( FT_Bitmap* bitmap, - FT_Render_Mode render_mode, FT_Byte* weights ); /* This is the default LCD filter, an in-place, 5-tap FIR filter. */ FT_BASE( void ) ft_lcd_filter_fir( FT_Bitmap* bitmap, - FT_Render_Mode mode, FT_LcdFiveTapFilter weights ); #endif /* FT_CONFIG_OPTION_SUBPIXEL_RENDERING */ @@ -420,7 +418,8 @@ FT_BEGIN_HEADER * initializing the glyph slot. */ -#define FT_GLYPH_OWN_BITMAP 0x1U +#define FT_GLYPH_OWN_BITMAP 0x1U +#define FT_GLYPH_OWN_GZIP_SVG 0x2U typedef struct FT_Slot_InternalRec_ { @@ -655,7 +654,7 @@ FT_BEGIN_HEADER FT_BASE( void ) FT_Done_GlyphSlot( FT_GlyphSlot slot ); - /* */ + /* */ #define FT_REQUEST_WIDTH( req ) \ ( (req)->horiResolution \ @@ -675,7 +674,7 @@ FT_BEGIN_HEADER /* Set the metrics according to a size request. */ - FT_BASE( void ) + FT_BASE( FT_Error ) FT_Request_Metrics( FT_Face face, FT_Size_Request req ); @@ -941,8 +940,8 @@ FT_BEGIN_HEADER FT_UInt buffer_max ); typedef FT_UInt - (*FT_Face_GetGlyphNameIndexFunc)( FT_Face face, - FT_String* glyph_name ); + (*FT_Face_GetGlyphNameIndexFunc)( FT_Face face, + const FT_String* glyph_name ); #ifndef FT_CONFIG_OPTION_NO_DEFAULT_SYSTEM @@ -1059,6 +1058,9 @@ FT_BEGIN_HEADER * The struct will be allocated in the global scope (or the scope where * the macro is used). */ +#define FT_DECLARE_GLYPH( class_ ) \ + FT_CALLBACK_TABLE const FT_Glyph_Class class_; + #define FT_DEFINE_GLYPH( \ class_, \ size_, \ diff --git a/Source/ThirdParty/freetype/internal/ftpsprop.h b/Source/ThirdParty/freetype/internal/ftpsprop.h index 574837f6d..1d5b287ad 100644 --- a/Source/ThirdParty/freetype/internal/ftpsprop.h +++ b/Source/ThirdParty/freetype/internal/ftpsprop.h @@ -4,7 +4,7 @@ * * Get and set properties of PostScript drivers (specification). * - * Copyright (C) 2017-2019 by + * Copyright (C) 2017-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -20,8 +20,7 @@ #define FTPSPROP_H_ -#include -#include FT_FREETYPE_H +#include FT_BEGIN_HEADER diff --git a/Source/ThirdParty/freetype/internal/ftrfork.h b/Source/ThirdParty/freetype/internal/ftrfork.h index 75b3e531b..e96459921 100644 --- a/Source/ThirdParty/freetype/internal/ftrfork.h +++ b/Source/ThirdParty/freetype/internal/ftrfork.h @@ -4,7 +4,7 @@ * * Embedded resource forks accessor (specification). * - * Copyright (C) 2004-2019 by + * Copyright (C) 2004-2023 by * Masatake YAMATO and Redhat K.K. * * This file is part of the FreeType project, and may only be used, @@ -25,8 +25,7 @@ #define FTRFORK_H_ -#include -#include FT_INTERNAL_OBJECTS_H +#include FT_BEGIN_HEADER diff --git a/Source/ThirdParty/freetype/internal/ftserv.h b/Source/ThirdParty/freetype/internal/ftserv.h index 8836cf3f1..1e85d6d38 100644 --- a/Source/ThirdParty/freetype/internal/ftserv.h +++ b/Source/ThirdParty/freetype/internal/ftserv.h @@ -4,7 +4,7 @@ * * The FreeType services (specification only). * - * Copyright (C) 2003-2019 by + * Copyright (C) 2003-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -31,6 +31,7 @@ #ifndef FTSERV_H_ #define FTSERV_H_ +#include "compiler-macros.h" FT_BEGIN_HEADER @@ -486,33 +487,6 @@ FT_BEGIN_HEADER /* */ - /* - * The header files containing the services. - */ - -#define FT_SERVICE_BDF_H -#define FT_SERVICE_CFF_TABLE_LOAD_H -#define FT_SERVICE_CID_H -#define FT_SERVICE_FONT_FORMAT_H -#define FT_SERVICE_GLYPH_DICT_H -#define FT_SERVICE_GX_VALIDATE_H -#define FT_SERVICE_KERNING_H -#define FT_SERVICE_METRICS_VARIATIONS_H -#define FT_SERVICE_MULTIPLE_MASTERS_H -#define FT_SERVICE_OPENTYPE_VALIDATE_H -#define FT_SERVICE_PFR_H -#define FT_SERVICE_POSTSCRIPT_CMAPS_H -#define FT_SERVICE_POSTSCRIPT_INFO_H -#define FT_SERVICE_POSTSCRIPT_NAME_H -#define FT_SERVICE_PROPERTIES_H -#define FT_SERVICE_SFNT_H -#define FT_SERVICE_TRUETYPE_ENGINE_H -#define FT_SERVICE_TRUETYPE_GLYF_H -#define FT_SERVICE_TT_CMAP_H -#define FT_SERVICE_WINFNT_H - - /* */ - FT_END_HEADER #endif /* FTSERV_H_ */ diff --git a/Source/ThirdParty/freetype/internal/ftstream.h b/Source/ThirdParty/freetype/internal/ftstream.h index e4dca0b0a..88e19287c 100644 --- a/Source/ThirdParty/freetype/internal/ftstream.h +++ b/Source/ThirdParty/freetype/internal/ftstream.h @@ -4,7 +4,7 @@ * * Stream handling (specification). * - * Copyright (C) 1996-2019 by + * Copyright (C) 1996-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -21,8 +21,8 @@ #include -#include FT_SYSTEM_H -#include FT_INTERNAL_OBJECTS_H +#include +#include FT_BEGIN_HEADER @@ -165,6 +165,17 @@ FT_BEGIN_HEADER #define FT_BYTE_U32( p, i, s ) ( FT_UINT32( FT_BYTE_( p, i ) ) << (s) ) + /* + * function acts on increases does range for emits + * pointer checking frames error + * ------------------------------------------------------------------- + * FT_PEEK_XXX buffer pointer no no no no + * FT_NEXT_XXX buffer pointer yes no no no + * FT_GET_XXX stream->cursor yes yes yes no + * FT_READ_XXX stream->pos yes yes no yes + */ + + /* * `FT_PEEK_XXX' are generic macros to get data from a buffer position. No * safety checks are performed. @@ -185,9 +196,9 @@ FT_BEGIN_HEADER FT_BYTE_U32( p, 2, 8 ) | \ FT_BYTE_U32( p, 3, 0 ) ) -#define FT_PEEK_OFF3( p ) FT_INT32( FT_BYTE_U32( p, 0, 16 ) | \ - FT_BYTE_U32( p, 1, 8 ) | \ - FT_BYTE_U32( p, 2, 0 ) ) +#define FT_PEEK_OFF3( p ) ( FT_INT32( FT_BYTE_U32( p, 0, 24 ) | \ + FT_BYTE_U32( p, 1, 16 ) | \ + FT_BYTE_U32( p, 2, 8 ) ) >> 8 ) #define FT_PEEK_UOFF3( p ) FT_UINT32( FT_BYTE_U32( p, 0, 16 ) | \ FT_BYTE_U32( p, 1, 8 ) | \ @@ -209,9 +220,9 @@ FT_BEGIN_HEADER FT_BYTE_U32( p, 1, 8 ) | \ FT_BYTE_U32( p, 0, 0 ) ) -#define FT_PEEK_OFF3_LE( p ) FT_INT32( FT_BYTE_U32( p, 2, 16 ) | \ - FT_BYTE_U32( p, 1, 8 ) | \ - FT_BYTE_U32( p, 0, 0 ) ) +#define FT_PEEK_OFF3_LE( p ) ( FT_INT32( FT_BYTE_U32( p, 2, 24 ) | \ + FT_BYTE_U32( p, 1, 16 ) | \ + FT_BYTE_U32( p, 0, 8 ) ) >> 8 ) #define FT_PEEK_UOFF3_LE( p ) FT_UINT32( FT_BYTE_U32( p, 2, 16 ) | \ FT_BYTE_U32( p, 1, 8 ) | \ @@ -227,42 +238,42 @@ FT_BEGIN_HEADER #define FT_NEXT_BYTE( buffer ) \ ( (unsigned char)*buffer++ ) -#define FT_NEXT_SHORT( buffer ) \ - ( (short)( buffer += 2, FT_PEEK_SHORT( buffer - 2 ) ) ) +#define FT_NEXT_SHORT( buffer ) \ + ( buffer += 2, FT_PEEK_SHORT( buffer - 2 ) ) -#define FT_NEXT_USHORT( buffer ) \ - ( (unsigned short)( buffer += 2, FT_PEEK_USHORT( buffer - 2 ) ) ) +#define FT_NEXT_USHORT( buffer ) \ + ( buffer += 2, FT_PEEK_USHORT( buffer - 2 ) ) -#define FT_NEXT_OFF3( buffer ) \ - ( (long)( buffer += 3, FT_PEEK_OFF3( buffer - 3 ) ) ) +#define FT_NEXT_OFF3( buffer ) \ + ( buffer += 3, FT_PEEK_OFF3( buffer - 3 ) ) -#define FT_NEXT_UOFF3( buffer ) \ - ( (unsigned long)( buffer += 3, FT_PEEK_UOFF3( buffer - 3 ) ) ) +#define FT_NEXT_UOFF3( buffer ) \ + ( buffer += 3, FT_PEEK_UOFF3( buffer - 3 ) ) -#define FT_NEXT_LONG( buffer ) \ - ( (long)( buffer += 4, FT_PEEK_LONG( buffer - 4 ) ) ) +#define FT_NEXT_LONG( buffer ) \ + ( buffer += 4, FT_PEEK_LONG( buffer - 4 ) ) -#define FT_NEXT_ULONG( buffer ) \ - ( (unsigned long)( buffer += 4, FT_PEEK_ULONG( buffer - 4 ) ) ) +#define FT_NEXT_ULONG( buffer ) \ + ( buffer += 4, FT_PEEK_ULONG( buffer - 4 ) ) -#define FT_NEXT_SHORT_LE( buffer ) \ - ( (short)( buffer += 2, FT_PEEK_SHORT_LE( buffer - 2 ) ) ) +#define FT_NEXT_SHORT_LE( buffer ) \ + ( buffer += 2, FT_PEEK_SHORT_LE( buffer - 2 ) ) -#define FT_NEXT_USHORT_LE( buffer ) \ - ( (unsigned short)( buffer += 2, FT_PEEK_USHORT_LE( buffer - 2 ) ) ) +#define FT_NEXT_USHORT_LE( buffer ) \ + ( buffer += 2, FT_PEEK_USHORT_LE( buffer - 2 ) ) -#define FT_NEXT_OFF3_LE( buffer ) \ - ( (long)( buffer += 3, FT_PEEK_OFF3_LE( buffer - 3 ) ) ) +#define FT_NEXT_OFF3_LE( buffer ) \ + ( buffer += 3, FT_PEEK_OFF3_LE( buffer - 3 ) ) -#define FT_NEXT_UOFF3_LE( buffer ) \ - ( (unsigned long)( buffer += 3, FT_PEEK_UOFF3_LE( buffer - 3 ) ) ) +#define FT_NEXT_UOFF3_LE( buffer ) \ + ( buffer += 3, FT_PEEK_UOFF3_LE( buffer - 3 ) ) -#define FT_NEXT_LONG_LE( buffer ) \ - ( (long)( buffer += 4, FT_PEEK_LONG_LE( buffer - 4 ) ) ) +#define FT_NEXT_LONG_LE( buffer ) \ + ( buffer += 4, FT_PEEK_LONG_LE( buffer - 4 ) ) -#define FT_NEXT_ULONG_LE( buffer ) \ - ( (unsigned long)( buffer += 4, FT_PEEK_ULONG_LE( buffer - 4 ) ) ) +#define FT_NEXT_ULONG_LE( buffer ) \ + ( buffer += 4, FT_PEEK_ULONG_LE( buffer - 4 ) ) /************************************************************************** @@ -294,20 +305,19 @@ FT_BEGIN_HEADER #else #define FT_GET_MACRO( func, type ) ( (type)func( stream ) ) -#define FT_GET_CHAR() FT_GET_MACRO( FT_Stream_GetChar, FT_Char ) -#define FT_GET_BYTE() FT_GET_MACRO( FT_Stream_GetChar, FT_Byte ) -#define FT_GET_SHORT() FT_GET_MACRO( FT_Stream_GetUShort, FT_Short ) -#define FT_GET_USHORT() FT_GET_MACRO( FT_Stream_GetUShort, FT_UShort ) -#define FT_GET_OFF3() FT_GET_MACRO( FT_Stream_GetUOffset, FT_Long ) -#define FT_GET_UOFF3() FT_GET_MACRO( FT_Stream_GetUOffset, FT_ULong ) -#define FT_GET_LONG() FT_GET_MACRO( FT_Stream_GetULong, FT_Long ) -#define FT_GET_ULONG() FT_GET_MACRO( FT_Stream_GetULong, FT_ULong ) -#define FT_GET_TAG4() FT_GET_MACRO( FT_Stream_GetULong, FT_ULong ) +#define FT_GET_CHAR() FT_GET_MACRO( FT_Stream_GetByte, FT_Char ) +#define FT_GET_BYTE() FT_GET_MACRO( FT_Stream_GetByte, FT_Byte ) +#define FT_GET_SHORT() FT_GET_MACRO( FT_Stream_GetUShort, FT_Int16 ) +#define FT_GET_USHORT() FT_GET_MACRO( FT_Stream_GetUShort, FT_UInt16 ) +#define FT_GET_UOFF3() FT_GET_MACRO( FT_Stream_GetUOffset, FT_UInt32 ) +#define FT_GET_LONG() FT_GET_MACRO( FT_Stream_GetULong, FT_Int32 ) +#define FT_GET_ULONG() FT_GET_MACRO( FT_Stream_GetULong, FT_UInt32 ) +#define FT_GET_TAG4() FT_GET_MACRO( FT_Stream_GetULong, FT_UInt32 ) -#define FT_GET_SHORT_LE() FT_GET_MACRO( FT_Stream_GetUShortLE, FT_Short ) -#define FT_GET_USHORT_LE() FT_GET_MACRO( FT_Stream_GetUShortLE, FT_UShort ) -#define FT_GET_LONG_LE() FT_GET_MACRO( FT_Stream_GetULongLE, FT_Long ) -#define FT_GET_ULONG_LE() FT_GET_MACRO( FT_Stream_GetULongLE, FT_ULong ) +#define FT_GET_SHORT_LE() FT_GET_MACRO( FT_Stream_GetUShortLE, FT_Int16 ) +#define FT_GET_USHORT_LE() FT_GET_MACRO( FT_Stream_GetUShortLE, FT_UInt16 ) +#define FT_GET_LONG_LE() FT_GET_MACRO( FT_Stream_GetULongLE, FT_Int32 ) +#define FT_GET_ULONG_LE() FT_GET_MACRO( FT_Stream_GetULongLE, FT_UInt32 ) #endif @@ -322,19 +332,18 @@ FT_BEGIN_HEADER * `FT_STREAM_POS'. They use the full machinery to check whether a read is * valid. */ -#define FT_READ_BYTE( var ) FT_READ_MACRO( FT_Stream_ReadChar, FT_Byte, var ) -#define FT_READ_CHAR( var ) FT_READ_MACRO( FT_Stream_ReadChar, FT_Char, var ) -#define FT_READ_SHORT( var ) FT_READ_MACRO( FT_Stream_ReadUShort, FT_Short, var ) -#define FT_READ_USHORT( var ) FT_READ_MACRO( FT_Stream_ReadUShort, FT_UShort, var ) -#define FT_READ_OFF3( var ) FT_READ_MACRO( FT_Stream_ReadUOffset, FT_Long, var ) -#define FT_READ_UOFF3( var ) FT_READ_MACRO( FT_Stream_ReadUOffset, FT_ULong, var ) -#define FT_READ_LONG( var ) FT_READ_MACRO( FT_Stream_ReadULong, FT_Long, var ) -#define FT_READ_ULONG( var ) FT_READ_MACRO( FT_Stream_ReadULong, FT_ULong, var ) +#define FT_READ_BYTE( var ) FT_READ_MACRO( FT_Stream_ReadByte, FT_Byte, var ) +#define FT_READ_CHAR( var ) FT_READ_MACRO( FT_Stream_ReadByte, FT_Char, var ) +#define FT_READ_SHORT( var ) FT_READ_MACRO( FT_Stream_ReadUShort, FT_Int16, var ) +#define FT_READ_USHORT( var ) FT_READ_MACRO( FT_Stream_ReadUShort, FT_UInt16, var ) +#define FT_READ_UOFF3( var ) FT_READ_MACRO( FT_Stream_ReadUOffset, FT_UInt32, var ) +#define FT_READ_LONG( var ) FT_READ_MACRO( FT_Stream_ReadULong, FT_Int32, var ) +#define FT_READ_ULONG( var ) FT_READ_MACRO( FT_Stream_ReadULong, FT_UInt32, var ) -#define FT_READ_SHORT_LE( var ) FT_READ_MACRO( FT_Stream_ReadUShortLE, FT_Short, var ) -#define FT_READ_USHORT_LE( var ) FT_READ_MACRO( FT_Stream_ReadUShortLE, FT_UShort, var ) -#define FT_READ_LONG_LE( var ) FT_READ_MACRO( FT_Stream_ReadULongLE, FT_Long, var ) -#define FT_READ_ULONG_LE( var ) FT_READ_MACRO( FT_Stream_ReadULongLE, FT_ULong, var ) +#define FT_READ_SHORT_LE( var ) FT_READ_MACRO( FT_Stream_ReadUShortLE, FT_Int16, var ) +#define FT_READ_USHORT_LE( var ) FT_READ_MACRO( FT_Stream_ReadUShortLE, FT_UInt16, var ) +#define FT_READ_LONG_LE( var ) FT_READ_MACRO( FT_Stream_ReadULongLE, FT_Int32, var ) +#define FT_READ_ULONG_LE( var ) FT_READ_MACRO( FT_Stream_ReadULongLE, FT_UInt32, var ) #ifndef FT_CONFIG_OPTION_NO_DEFAULT_SYSTEM @@ -446,37 +455,37 @@ FT_BEGIN_HEADER /* read a byte from an entered frame */ - FT_BASE( FT_Char ) - FT_Stream_GetChar( FT_Stream stream ); + FT_BASE( FT_Byte ) + FT_Stream_GetByte( FT_Stream stream ); /* read a 16-bit big-endian unsigned integer from an entered frame */ - FT_BASE( FT_UShort ) + FT_BASE( FT_UInt16 ) FT_Stream_GetUShort( FT_Stream stream ); /* read a 24-bit big-endian unsigned integer from an entered frame */ - FT_BASE( FT_ULong ) + FT_BASE( FT_UInt32 ) FT_Stream_GetUOffset( FT_Stream stream ); /* read a 32-bit big-endian unsigned integer from an entered frame */ - FT_BASE( FT_ULong ) + FT_BASE( FT_UInt32 ) FT_Stream_GetULong( FT_Stream stream ); /* read a 16-bit little-endian unsigned integer from an entered frame */ - FT_BASE( FT_UShort ) + FT_BASE( FT_UInt16 ) FT_Stream_GetUShortLE( FT_Stream stream ); /* read a 32-bit little-endian unsigned integer from an entered frame */ - FT_BASE( FT_ULong ) + FT_BASE( FT_UInt32 ) FT_Stream_GetULongLE( FT_Stream stream ); /* read a byte from a stream */ - FT_BASE( FT_Char ) - FT_Stream_ReadChar( FT_Stream stream, + FT_BASE( FT_Byte ) + FT_Stream_ReadByte( FT_Stream stream, FT_Error* error ); /* read a 16-bit big-endian unsigned integer from a stream */ - FT_BASE( FT_UShort ) + FT_BASE( FT_UInt16 ) FT_Stream_ReadUShort( FT_Stream stream, FT_Error* error ); @@ -486,17 +495,17 @@ FT_BEGIN_HEADER FT_Error* error ); /* read a 32-bit big-endian integer from a stream */ - FT_BASE( FT_ULong ) + FT_BASE( FT_UInt32 ) FT_Stream_ReadULong( FT_Stream stream, FT_Error* error ); /* read a 16-bit little-endian unsigned integer from a stream */ - FT_BASE( FT_UShort ) + FT_BASE( FT_UInt16 ) FT_Stream_ReadUShortLE( FT_Stream stream, FT_Error* error ); /* read a 32-bit little-endian unsigned integer from a stream */ - FT_BASE( FT_ULong ) + FT_BASE( FT_UInt32 ) FT_Stream_ReadULongLE( FT_Stream stream, FT_Error* error ); diff --git a/Source/ThirdParty/freetype/internal/fttrace.h b/Source/ThirdParty/freetype/internal/fttrace.h index 8089babfb..319fe56fd 100644 --- a/Source/ThirdParty/freetype/internal/fttrace.h +++ b/Source/ThirdParty/freetype/internal/fttrace.h @@ -4,7 +4,7 @@ * * Tracing handling (specification only). * - * Copyright (C) 2002-2019 by + * Copyright (C) 2002-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -18,6 +18,11 @@ /* definitions of trace levels for FreeType 2 */ + /* the maximum string length (if the argument to `FT_TRACE_DEF` */ + /* gets used as a string) plus one charachter for ':' plus */ + /* another one for the trace level */ +#define FT_MAX_TRACE_LEVEL_LENGTH (9 + 1 + 1) + /* the first level must always be `trace_any' */ FT_TRACE_DEF( any ) @@ -38,20 +43,28 @@ FT_TRACE_DEF( checksum ) /* bitmap checksum (ftobjs.c) */ FT_TRACE_DEF( mm ) /* MM interface (ftmm.c) */ FT_TRACE_DEF( psprops ) /* PS driver properties (ftpsprop.c) */ FT_TRACE_DEF( raccess ) /* resource fork accessor (ftrfork.c) */ -FT_TRACE_DEF( raster ) /* monochrome rasterizer (ftraster.c) */ -FT_TRACE_DEF( smooth ) /* anti-aliasing raster (ftgrays.c) */ FT_TRACE_DEF( synth ) /* bold/slant synthesizer (ftsynth.c) */ - /* Cache sub-system */ -FT_TRACE_DEF( cache ) /* cache sub-system (ftcache.c, etc.) */ + /* rasterizers */ +FT_TRACE_DEF( raster ) /* monochrome rasterizer (ftraster.c) */ +FT_TRACE_DEF( smooth ) /* anti-aliasing raster (ftgrays.c) */ + + /* ot-svg module */ +FT_TRACE_DEF( otsvg ) /* OT-SVG renderer (ftsvg.c) */ + + /* cache sub-system */ +FT_TRACE_DEF( cache ) /* cache sub-system (ftcache.c, etc.) */ /* SFNT driver components */ FT_TRACE_DEF( sfdriver ) /* SFNT font driver (sfdriver.c) */ FT_TRACE_DEF( sfobjs ) /* SFNT object handler (sfobjs.c) */ +FT_TRACE_DEF( sfwoff ) /* WOFF format handler (sfwoff.c) */ +FT_TRACE_DEF( sfwoff2 ) /* WOFF2 format handler (sfwoff2.c) */ FT_TRACE_DEF( ttbdf ) /* TrueType embedded BDF (ttbdf.c) */ FT_TRACE_DEF( ttcmap ) /* charmap handler (ttcmap.c) */ FT_TRACE_DEF( ttcolr ) /* glyph layer table (ttcolr.c) */ FT_TRACE_DEF( ttcpal ) /* color palette table (ttcpal.c) */ +FT_TRACE_DEF( ttsvg ) /* OpenType SVG table (ttsvg.c) */ FT_TRACE_DEF( ttkern ) /* kerning handler (ttkern.c) */ FT_TRACE_DEF( ttload ) /* basic TrueType tables (ttload.c) */ FT_TRACE_DEF( ttmtx ) /* metrics-related tables (ttmtx.c) */ @@ -75,6 +88,7 @@ FT_TRACE_DEF( t1objs ) FT_TRACE_DEF( t1parse ) /* PostScript helper module `psaux' */ +FT_TRACE_DEF( afmparse ) FT_TRACE_DEF( cffdecode ) FT_TRACE_DEF( psconv ) FT_TRACE_DEF( psobjs ) @@ -149,8 +163,10 @@ FT_TRACE_DEF( afglobal ) FT_TRACE_DEF( afhints ) FT_TRACE_DEF( afmodule ) FT_TRACE_DEF( aflatin ) -FT_TRACE_DEF( aflatin2 ) FT_TRACE_DEF( afshaper ) -FT_TRACE_DEF( afwarp ) + + /* SDF components */ +FT_TRACE_DEF( sdf ) /* signed distance raster for outlines (ftsdf.c) */ +FT_TRACE_DEF( bsdf ) /* signed distance raster for bitmaps (ftbsdf.c) */ /* END */ diff --git a/Source/ThirdParty/freetype/internal/ftvalid.h b/Source/ThirdParty/freetype/internal/ftvalid.h index 38aa06cc4..e98ee4e47 100644 --- a/Source/ThirdParty/freetype/internal/ftvalid.h +++ b/Source/ThirdParty/freetype/internal/ftvalid.h @@ -4,7 +4,7 @@ * * FreeType validation support (specification). * - * Copyright (C) 2004-2019 by + * Copyright (C) 2004-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -20,8 +20,9 @@ #define FTVALID_H_ #include -#include FT_CONFIG_STANDARD_LIBRARY_H /* for ft_setjmp and ft_longjmp */ +#include FT_CONFIG_STANDARD_LIBRARY_H /* for ft_jmpbuf */ +#include "compiler-macros.h" FT_BEGIN_HEADER diff --git a/Source/ThirdParty/freetype/internal/internal.h b/Source/ThirdParty/freetype/internal/internal.h deleted file mode 100644 index 173d8ad90..000000000 --- a/Source/ThirdParty/freetype/internal/internal.h +++ /dev/null @@ -1,66 +0,0 @@ -/**************************************************************************** - * - * internal.h - * - * Internal header files (specification only). - * - * Copyright (C) 1996-2019 by - * David Turner, Robert Wilhelm, and Werner Lemberg. - * - * This file is part of the FreeType project, and may only be used, - * modified, and distributed under the terms of the FreeType project - * license, LICENSE.TXT. By continuing to use, modify, or distribute - * this file you indicate that you have read the license and - * understand and accept it fully. - * - */ - - - /************************************************************************** - * - * This file is automatically included by `ft2build.h`. Do not include it - * manually! - * - */ - - -#define FT_INTERNAL_OBJECTS_H -#define FT_INTERNAL_STREAM_H -#define FT_INTERNAL_MEMORY_H -#define FT_INTERNAL_DEBUG_H -#define FT_INTERNAL_CALC_H -#define FT_INTERNAL_HASH_H -#define FT_INTERNAL_DRIVER_H -#define FT_INTERNAL_TRACE_H -#define FT_INTERNAL_GLYPH_LOADER_H -#define FT_INTERNAL_SFNT_H -#define FT_INTERNAL_SERVICE_H -#define FT_INTERNAL_RFORK_H -#define FT_INTERNAL_VALIDATE_H - -#define FT_INTERNAL_TRUETYPE_TYPES_H -#define FT_INTERNAL_TYPE1_TYPES_H - -#define FT_INTERNAL_POSTSCRIPT_AUX_H -#define FT_INTERNAL_POSTSCRIPT_HINTS_H -#define FT_INTERNAL_POSTSCRIPT_PROPS_H - -#define FT_INTERNAL_AUTOHINT_H - -#define FT_INTERNAL_CFF_TYPES_H -#define FT_INTERNAL_CFF_OBJECTS_TYPES_H - - -#if defined( _MSC_VER ) /* Visual C++ (and Intel C++) */ - - /* We disable the warning `conditional expression is constant' here */ - /* in order to compile cleanly with the maximum level of warnings. */ - /* In particular, the warning complains about stuff like `while(0)' */ - /* which is very useful in macro definitions. There is no benefit */ - /* in having it enabled. */ -#pragma warning( disable : 4127 ) - -#endif /* _MSC_VER */ - - -/* END */ diff --git a/Source/ThirdParty/freetype/internal/psaux.h b/Source/ThirdParty/freetype/internal/psaux.h index 3ab01c3e6..dfb1987f8 100644 --- a/Source/ThirdParty/freetype/internal/psaux.h +++ b/Source/ThirdParty/freetype/internal/psaux.h @@ -5,7 +5,7 @@ * Auxiliary functions and data structures related to PostScript fonts * (specification). * - * Copyright (C) 1996-2019 by + * Copyright (C) 1996-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -21,14 +21,13 @@ #define PSAUX_H_ -#include -#include FT_INTERNAL_OBJECTS_H -#include FT_INTERNAL_TYPE1_TYPES_H -#include FT_INTERNAL_HASH_H -#include FT_INTERNAL_TRUETYPE_TYPES_H -#include FT_SERVICE_POSTSCRIPT_CMAPS_H -#include FT_INTERNAL_CFF_TYPES_H -#include FT_INTERNAL_CFF_OBJECTS_TYPES_H +#include +#include +#include +#include +#include +#include +#include @@ -96,10 +95,10 @@ FT_BEGIN_HEADER (*done)( PS_Table table ); FT_Error - (*add)( PS_Table table, - FT_Int idx, - void* object, - FT_UInt length ); + (*add)( PS_Table table, + FT_Int idx, + const void* object, + FT_UInt length ); void (*release)( PS_Table table ); @@ -133,9 +132,6 @@ FT_BEGIN_HEADER * max_elems :: * The maximum number of elements in table. * - * num_elems :: - * The current number of elements in table. - * * elements :: * A table of element addresses within the block. * @@ -156,7 +152,6 @@ FT_BEGIN_HEADER FT_ULong init; FT_Int max_elems; - FT_Int num_elems; FT_Byte** elements; /* addresses of table elements */ FT_UInt* lengths; /* lengths of table elements */ diff --git a/Source/ThirdParty/freetype/internal/pshints.h b/Source/ThirdParty/freetype/internal/pshints.h index 699acea6f..ededc4c72 100644 --- a/Source/ThirdParty/freetype/internal/pshints.h +++ b/Source/ThirdParty/freetype/internal/pshints.h @@ -6,7 +6,7 @@ * recorders (specification only). These are used to support native * T1/T2 hints in the 'type1', 'cid', and 'cff' font drivers. * - * Copyright (C) 2001-2019 by + * Copyright (C) 2001-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -22,9 +22,8 @@ #define PSHINTS_H_ -#include -#include FT_FREETYPE_H -#include FT_TYPE1_TABLES_H +#include +#include FT_BEGIN_HEADER @@ -295,7 +294,7 @@ FT_BEGIN_HEADER * * @note: * On input, all points within the outline are in font coordinates. On - * output, they are in 1/64th of pixels. + * output, they are in 1/64 of pixels. * * The scaling transformation is taken from the 'globals' object which * must correspond to the same font as the glyph. @@ -608,7 +607,7 @@ FT_BEGIN_HEADER * * @note: * On input, all points within the outline are in font coordinates. On - * output, they are in 1/64th of pixels. + * output, they are in 1/64 of pixels. * * The scaling transformation is taken from the 'globals' object which * must correspond to the same font than the glyph. diff --git a/Source/ThirdParty/freetype/internal/services/svbdf.h b/Source/ThirdParty/freetype/internal/services/svbdf.h index e4786ed03..bf0c1dcc7 100644 --- a/Source/ThirdParty/freetype/internal/services/svbdf.h +++ b/Source/ThirdParty/freetype/internal/services/svbdf.h @@ -4,7 +4,7 @@ * * The FreeType BDF services (specification). * - * Copyright (C) 2003-2019 by + * Copyright (C) 2003-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -19,8 +19,8 @@ #ifndef SVBDF_H_ #define SVBDF_H_ -#include FT_BDF_H -#include FT_INTERNAL_SERVICE_H +#include +#include FT_BEGIN_HEADER diff --git a/Source/ThirdParty/freetype/internal/services/svcfftl.h b/Source/ThirdParty/freetype/internal/services/svcfftl.h index 6c621732d..4a20498ee 100644 --- a/Source/ThirdParty/freetype/internal/services/svcfftl.h +++ b/Source/ThirdParty/freetype/internal/services/svcfftl.h @@ -4,7 +4,7 @@ * * The FreeType CFF tables loader service (specification). * - * Copyright (C) 2017-2019 by + * Copyright (C) 2017-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -19,8 +19,8 @@ #ifndef SVCFFTL_H_ #define SVCFFTL_H_ -#include FT_INTERNAL_SERVICE_H -#include FT_INTERNAL_CFF_TYPES_H +#include +#include FT_BEGIN_HEADER diff --git a/Source/ThirdParty/freetype/internal/services/svcid.h b/Source/ThirdParty/freetype/internal/services/svcid.h index 555a5af5b..06d0cb8fd 100644 --- a/Source/ThirdParty/freetype/internal/services/svcid.h +++ b/Source/ThirdParty/freetype/internal/services/svcid.h @@ -4,7 +4,7 @@ * * The FreeType CID font services (specification). * - * Copyright (C) 2007-2019 by + * Copyright (C) 2007-2023 by * Derek Clegg and Michael Toftdal. * * This file is part of the FreeType project, and may only be used, @@ -19,7 +19,7 @@ #ifndef SVCID_H_ #define SVCID_H_ -#include FT_INTERNAL_SERVICE_H +#include FT_BEGIN_HEADER diff --git a/Source/ThirdParty/freetype/internal/services/svfntfmt.h b/Source/ThirdParty/freetype/internal/services/svfntfmt.h index 6f4285ea8..bc45e8056 100644 --- a/Source/ThirdParty/freetype/internal/services/svfntfmt.h +++ b/Source/ThirdParty/freetype/internal/services/svfntfmt.h @@ -4,7 +4,7 @@ * * The FreeType font format service (specification only). * - * Copyright (C) 2003-2019 by + * Copyright (C) 2003-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -19,7 +19,7 @@ #ifndef SVFNTFMT_H_ #define SVFNTFMT_H_ -#include FT_INTERNAL_SERVICE_H +#include FT_BEGIN_HEADER diff --git a/Source/ThirdParty/freetype/internal/services/svgldict.h b/Source/ThirdParty/freetype/internal/services/svgldict.h index ca8edf0eb..6437abfbf 100644 --- a/Source/ThirdParty/freetype/internal/services/svgldict.h +++ b/Source/ThirdParty/freetype/internal/services/svgldict.h @@ -4,7 +4,7 @@ * * The FreeType glyph dictionary services (specification). * - * Copyright (C) 2003-2019 by + * Copyright (C) 2003-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -19,7 +19,7 @@ #ifndef SVGLDICT_H_ #define SVGLDICT_H_ -#include FT_INTERNAL_SERVICE_H +#include FT_BEGIN_HEADER @@ -41,8 +41,8 @@ FT_BEGIN_HEADER FT_UInt buffer_max ); typedef FT_UInt - (*FT_GlyphDict_NameIndexFunc)( FT_Face face, - FT_String* glyph_name ); + (*FT_GlyphDict_NameIndexFunc)( FT_Face face, + const FT_String* glyph_name ); FT_DEFINE_SERVICE( GlyphDict ) diff --git a/Source/ThirdParty/freetype/internal/services/svgxval.h b/Source/ThirdParty/freetype/internal/services/svgxval.h index 0bb76f314..31016afe0 100644 --- a/Source/ThirdParty/freetype/internal/services/svgxval.h +++ b/Source/ThirdParty/freetype/internal/services/svgxval.h @@ -4,7 +4,7 @@ * * FreeType API for validating TrueTypeGX/AAT tables (specification). * - * Copyright (C) 2004-2019 by + * Copyright (C) 2004-2023 by * Masatake YAMATO, Red Hat K.K., * David Turner, Robert Wilhelm, and Werner Lemberg. * @@ -28,8 +28,8 @@ #ifndef SVGXVAL_H_ #define SVGXVAL_H_ -#include FT_GX_VALIDATE_H -#include FT_INTERNAL_VALIDATE_H +#include +#include FT_BEGIN_HEADER diff --git a/Source/ThirdParty/freetype/internal/services/svkern.h b/Source/ThirdParty/freetype/internal/services/svkern.h index f992a327c..bcabbc3e6 100644 --- a/Source/ThirdParty/freetype/internal/services/svkern.h +++ b/Source/ThirdParty/freetype/internal/services/svkern.h @@ -4,7 +4,7 @@ * * The FreeType Kerning service (specification). * - * Copyright (C) 2006-2019 by + * Copyright (C) 2006-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -19,8 +19,8 @@ #ifndef SVKERN_H_ #define SVKERN_H_ -#include FT_INTERNAL_SERVICE_H -#include FT_TRUETYPE_TABLES_H +#include +#include FT_BEGIN_HEADER diff --git a/Source/ThirdParty/freetype/internal/services/svmetric.h b/Source/ThirdParty/freetype/internal/services/svmetric.h index d688bc7c6..167617ebb 100644 --- a/Source/ThirdParty/freetype/internal/services/svmetric.h +++ b/Source/ThirdParty/freetype/internal/services/svmetric.h @@ -4,7 +4,7 @@ * * The FreeType services for metrics variations (specification). * - * Copyright (C) 2016-2019 by + * Copyright (C) 2016-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -19,7 +19,7 @@ #ifndef SVMETRIC_H_ #define SVMETRIC_H_ -#include FT_INTERNAL_SERVICE_H +#include FT_BEGIN_HEADER @@ -77,6 +77,9 @@ FT_BEGIN_HEADER typedef void (*FT_Metrics_Adjust_Func)( FT_Face face ); + typedef FT_Error + (*FT_Size_Reset_Func)( FT_Size size ); + FT_DEFINE_SERVICE( MetricsVariations ) { @@ -90,6 +93,7 @@ FT_BEGIN_HEADER FT_VOrg_Adjust_Func vorg_adjust; FT_Metrics_Adjust_Func metrics_adjust; + FT_Size_Reset_Func size_reset; }; @@ -101,7 +105,8 @@ FT_BEGIN_HEADER tsb_adjust_, \ bsb_adjust_, \ vorg_adjust_, \ - metrics_adjust_ ) \ + metrics_adjust_, \ + size_reset_ ) \ static const FT_Service_MetricsVariationsRec class_ = \ { \ hadvance_adjust_, \ @@ -111,7 +116,8 @@ FT_BEGIN_HEADER tsb_adjust_, \ bsb_adjust_, \ vorg_adjust_, \ - metrics_adjust_ \ + metrics_adjust_, \ + size_reset_ \ }; /* */ diff --git a/Source/ThirdParty/freetype/internal/services/svmm.h b/Source/ThirdParty/freetype/internal/services/svmm.h index 3652f2050..7e76ab832 100644 --- a/Source/ThirdParty/freetype/internal/services/svmm.h +++ b/Source/ThirdParty/freetype/internal/services/svmm.h @@ -4,8 +4,8 @@ * * The FreeType Multiple Masters and GX var services (specification). * - * Copyright (C) 2003-2019 by - * David Turner, Robert Wilhelm, and Werner Lemberg. + * Copyright (C) 2003-2023 by + * David Turner, Robert Wilhelm, Werner Lemberg, and Dominik Röttsches. * * This file is part of the FreeType project, and may only be used, * modified, and distributed under the terms of the FreeType project @@ -19,7 +19,9 @@ #ifndef SVMM_H_ #define SVMM_H_ -#include FT_INTERNAL_SERVICE_H +#include +#include +#include FT_BEGIN_HEADER @@ -58,9 +60,9 @@ FT_BEGIN_HEADER /* use return value -1 to indicate that the new coordinates */ /* are equal to the current ones; no changes are thus needed */ typedef FT_Error - (*FT_Set_MM_Blend_Func)( FT_Face face, - FT_UInt num_coords, - FT_Long* coords ); + (*FT_Set_MM_Blend_Func)( FT_Face face, + FT_UInt num_coords, + FT_Fixed* coords ); typedef FT_Error (*FT_Get_Var_Design_Func)( FT_Face face, @@ -68,13 +70,17 @@ FT_BEGIN_HEADER FT_Fixed* coords ); typedef FT_Error - (*FT_Set_Instance_Func)( FT_Face face, - FT_UInt instance_index ); + (*FT_Set_Named_Instance_Func)( FT_Face face, + FT_UInt instance_index ); typedef FT_Error - (*FT_Get_MM_Blend_Func)( FT_Face face, - FT_UInt num_coords, - FT_Long* coords ); + (*FT_Get_Default_Named_Instance_Func)( FT_Face face, + FT_UInt *instance_index ); + + typedef FT_Error + (*FT_Get_MM_Blend_Func)( FT_Face face, + FT_UInt num_coords, + FT_Fixed* coords ); typedef FT_Error (*FT_Get_Var_Blend_Func)( FT_Face face, @@ -84,7 +90,7 @@ FT_BEGIN_HEADER FT_MM_Var* *mm_var ); typedef void - (*FT_Done_Blend_Func)( FT_Face ); + (*FT_Done_Blend_Func)( FT_Face face ); typedef FT_Error (*FT_Set_MM_WeightVector_Func)( FT_Face face, @@ -96,53 +102,105 @@ FT_BEGIN_HEADER FT_UInt* len, FT_Fixed* weight_vector ); + typedef void + (*FT_Construct_PS_Name_Func)( FT_Face face ); + + typedef FT_Error + (*FT_Var_Load_Delta_Set_Idx_Map_Func)( FT_Face face, + FT_ULong offset, + GX_DeltaSetIdxMap map, + GX_ItemVarStore itemStore, + FT_ULong table_len ); + + typedef FT_Error + (*FT_Var_Load_Item_Var_Store_Func)( FT_Face face, + FT_ULong offset, + GX_ItemVarStore itemStore ); + + typedef FT_ItemVarDelta + (*FT_Var_Get_Item_Delta_Func)( FT_Face face, + GX_ItemVarStore itemStore, + FT_UInt outerIndex, + FT_UInt innerIndex ); + + typedef void + (*FT_Var_Done_Item_Var_Store_Func)( FT_Face face, + GX_ItemVarStore itemStore ); + + typedef void + (*FT_Var_Done_Delta_Set_Idx_Map_Func)( FT_Face face, + GX_DeltaSetIdxMap deltaSetIdxMap ); + FT_DEFINE_SERVICE( MultiMasters ) { - FT_Get_MM_Func get_mm; - FT_Set_MM_Design_Func set_mm_design; - FT_Set_MM_Blend_Func set_mm_blend; - FT_Get_MM_Blend_Func get_mm_blend; - FT_Get_MM_Var_Func get_mm_var; - FT_Set_Var_Design_Func set_var_design; - FT_Get_Var_Design_Func get_var_design; - FT_Set_Instance_Func set_instance; - FT_Set_MM_WeightVector_Func set_mm_weightvector; - FT_Get_MM_WeightVector_Func get_mm_weightvector; + FT_Get_MM_Func get_mm; + FT_Set_MM_Design_Func set_mm_design; + FT_Set_MM_Blend_Func set_mm_blend; + FT_Get_MM_Blend_Func get_mm_blend; + FT_Get_MM_Var_Func get_mm_var; + FT_Set_Var_Design_Func set_var_design; + FT_Get_Var_Design_Func get_var_design; + FT_Set_Named_Instance_Func set_named_instance; + FT_Get_Default_Named_Instance_Func get_default_named_instance; + FT_Set_MM_WeightVector_Func set_mm_weightvector; + FT_Get_MM_WeightVector_Func get_mm_weightvector; /* for internal use; only needed for code sharing between modules */ - FT_Get_Var_Blend_Func get_var_blend; - FT_Done_Blend_Func done_blend; + FT_Construct_PS_Name_Func construct_ps_name; + FT_Var_Load_Delta_Set_Idx_Map_Func load_delta_set_idx_map; + FT_Var_Load_Item_Var_Store_Func load_item_var_store; + FT_Var_Get_Item_Delta_Func get_item_delta; + FT_Var_Done_Item_Var_Store_Func done_item_var_store; + FT_Var_Done_Delta_Set_Idx_Map_Func done_delta_set_idx_map; + FT_Get_Var_Blend_Func get_var_blend; + FT_Done_Blend_Func done_blend; }; -#define FT_DEFINE_SERVICE_MULTIMASTERSREC( class_, \ - get_mm_, \ - set_mm_design_, \ - set_mm_blend_, \ - get_mm_blend_, \ - get_mm_var_, \ - set_var_design_, \ - get_var_design_, \ - set_instance_, \ - set_weightvector_, \ - get_weightvector_, \ - get_var_blend_, \ - done_blend_ ) \ - static const FT_Service_MultiMastersRec class_ = \ - { \ - get_mm_, \ - set_mm_design_, \ - set_mm_blend_, \ - get_mm_blend_, \ - get_mm_var_, \ - set_var_design_, \ - get_var_design_, \ - set_instance_, \ - set_weightvector_, \ - get_weightvector_, \ - get_var_blend_, \ - done_blend_ \ +#define FT_DEFINE_SERVICE_MULTIMASTERSREC( class_, \ + get_mm_, \ + set_mm_design_, \ + set_mm_blend_, \ + get_mm_blend_, \ + get_mm_var_, \ + set_var_design_, \ + get_var_design_, \ + set_named_instance_, \ + get_default_named_instance_, \ + set_mm_weightvector_, \ + get_mm_weightvector_, \ + \ + construct_ps_name_, \ + load_delta_set_idx_map_, \ + load_item_var_store_, \ + get_item_delta_, \ + done_item_var_store_, \ + done_delta_set_idx_map_, \ + get_var_blend_, \ + done_blend_ ) \ + static const FT_Service_MultiMastersRec class_ = \ + { \ + get_mm_, \ + set_mm_design_, \ + set_mm_blend_, \ + get_mm_blend_, \ + get_mm_var_, \ + set_var_design_, \ + get_var_design_, \ + set_named_instance_, \ + get_default_named_instance_, \ + set_mm_weightvector_, \ + get_mm_weightvector_, \ + \ + construct_ps_name_, \ + load_delta_set_idx_map_, \ + load_item_var_store_, \ + get_item_delta_, \ + done_item_var_store_, \ + done_delta_set_idx_map_, \ + get_var_blend_, \ + done_blend_ \ }; /* */ diff --git a/Source/ThirdParty/freetype/internal/services/svotval.h b/Source/ThirdParty/freetype/internal/services/svotval.h index cab4c6efb..a4683cd5f 100644 --- a/Source/ThirdParty/freetype/internal/services/svotval.h +++ b/Source/ThirdParty/freetype/internal/services/svotval.h @@ -4,7 +4,7 @@ * * The FreeType OpenType validation service (specification). * - * Copyright (C) 2004-2019 by + * Copyright (C) 2004-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -19,8 +19,8 @@ #ifndef SVOTVAL_H_ #define SVOTVAL_H_ -#include FT_OPENTYPE_VALIDATE_H -#include FT_INTERNAL_VALIDATE_H +#include +#include FT_BEGIN_HEADER diff --git a/Source/ThirdParty/freetype/internal/services/svpfr.h b/Source/ThirdParty/freetype/internal/services/svpfr.h index fd01d614d..fd189c7de 100644 --- a/Source/ThirdParty/freetype/internal/services/svpfr.h +++ b/Source/ThirdParty/freetype/internal/services/svpfr.h @@ -4,7 +4,7 @@ * * Internal PFR service functions (specification). * - * Copyright (C) 2003-2019 by + * Copyright (C) 2003-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -19,8 +19,8 @@ #ifndef SVPFR_H_ #define SVPFR_H_ -#include FT_PFR_H -#include FT_INTERNAL_SERVICE_H +#include +#include FT_BEGIN_HEADER @@ -56,7 +56,6 @@ FT_BEGIN_HEADER }; - /* */ FT_END_HEADER diff --git a/Source/ThirdParty/freetype/internal/services/svpostnm.h b/Source/ThirdParty/freetype/internal/services/svpostnm.h index 18e3843cb..2b8f6dfec 100644 --- a/Source/ThirdParty/freetype/internal/services/svpostnm.h +++ b/Source/ThirdParty/freetype/internal/services/svpostnm.h @@ -4,7 +4,7 @@ * * The FreeType PostScript name services (specification). * - * Copyright (C) 2003-2019 by + * Copyright (C) 2003-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -19,7 +19,7 @@ #ifndef SVPOSTNM_H_ #define SVPOSTNM_H_ -#include FT_INTERNAL_SERVICE_H +#include FT_BEGIN_HEADER diff --git a/Source/ThirdParty/freetype/internal/services/svprop.h b/Source/ThirdParty/freetype/internal/services/svprop.h index e48d0151e..932ce32e0 100644 --- a/Source/ThirdParty/freetype/internal/services/svprop.h +++ b/Source/ThirdParty/freetype/internal/services/svprop.h @@ -4,7 +4,7 @@ * * The FreeType property service (specification). * - * Copyright (C) 2012-2019 by + * Copyright (C) 2012-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, diff --git a/Source/ThirdParty/freetype/internal/services/svpscmap.h b/Source/ThirdParty/freetype/internal/services/svpscmap.h index dfac3bafa..6e599f3aa 100644 --- a/Source/ThirdParty/freetype/internal/services/svpscmap.h +++ b/Source/ThirdParty/freetype/internal/services/svpscmap.h @@ -4,7 +4,7 @@ * * The FreeType PostScript charmap service (specification). * - * Copyright (C) 2003-2019 by + * Copyright (C) 2003-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -19,7 +19,7 @@ #ifndef SVPSCMAP_H_ #define SVPSCMAP_H_ -#include FT_INTERNAL_OBJECTS_H +#include FT_BEGIN_HEADER @@ -97,7 +97,7 @@ FT_BEGIN_HEADER (*PS_Unicodes_CharIndexFunc)( PS_Unicodes unicodes, FT_UInt32 unicode ); - typedef FT_UInt32 + typedef FT_UInt (*PS_Unicodes_CharNextFunc)( PS_Unicodes unicodes, FT_UInt32 *unicode ); diff --git a/Source/ThirdParty/freetype/internal/services/svpsinfo.h b/Source/ThirdParty/freetype/internal/services/svpsinfo.h index fb4e0e3fa..09c4cdccc 100644 --- a/Source/ThirdParty/freetype/internal/services/svpsinfo.h +++ b/Source/ThirdParty/freetype/internal/services/svpsinfo.h @@ -4,7 +4,7 @@ * * The FreeType PostScript info service (specification). * - * Copyright (C) 2003-2019 by + * Copyright (C) 2003-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -19,8 +19,8 @@ #ifndef SVPSINFO_H_ #define SVPSINFO_H_ -#include FT_INTERNAL_SERVICE_H -#include FT_INTERNAL_TYPE1_TYPES_H +#include +#include FT_BEGIN_HEADER diff --git a/Source/ThirdParty/freetype/internal/services/svsfnt.h b/Source/ThirdParty/freetype/internal/services/svsfnt.h index 464aa209f..f98df2ef5 100644 --- a/Source/ThirdParty/freetype/internal/services/svsfnt.h +++ b/Source/ThirdParty/freetype/internal/services/svsfnt.h @@ -4,7 +4,7 @@ * * The FreeType SFNT table loading service (specification). * - * Copyright (C) 2003-2019 by + * Copyright (C) 2003-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -19,8 +19,8 @@ #ifndef SVSFNT_H_ #define SVSFNT_H_ -#include FT_INTERNAL_SERVICE_H -#include FT_TRUETYPE_TABLES_H +#include +#include FT_BEGIN_HEADER diff --git a/Source/ThirdParty/freetype/internal/services/svttcmap.h b/Source/ThirdParty/freetype/internal/services/svttcmap.h index 0fcb81371..5f9eb02d6 100644 --- a/Source/ThirdParty/freetype/internal/services/svttcmap.h +++ b/Source/ThirdParty/freetype/internal/services/svttcmap.h @@ -4,7 +4,7 @@ * * The FreeType TrueType/sfnt cmap extra information service. * - * Copyright (C) 2003-2019 by + * Copyright (C) 2003-2023 by * Masatake YAMATO, Redhat K.K., * David Turner, Robert Wilhelm, and Werner Lemberg. * @@ -22,8 +22,8 @@ #ifndef SVTTCMAP_H_ #define SVTTCMAP_H_ -#include FT_INTERNAL_SERVICE_H -#include FT_TRUETYPE_TABLES_H +#include +#include FT_BEGIN_HEADER diff --git a/Source/ThirdParty/freetype/internal/services/svtteng.h b/Source/ThirdParty/freetype/internal/services/svtteng.h index a852f5c6f..ad577cb29 100644 --- a/Source/ThirdParty/freetype/internal/services/svtteng.h +++ b/Source/ThirdParty/freetype/internal/services/svtteng.h @@ -4,7 +4,7 @@ * * The FreeType TrueType engine query service (specification). * - * Copyright (C) 2006-2019 by + * Copyright (C) 2006-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -19,8 +19,8 @@ #ifndef SVTTENG_H_ #define SVTTENG_H_ -#include FT_INTERNAL_SERVICE_H -#include FT_MODULE_H +#include +#include FT_BEGIN_HEADER diff --git a/Source/ThirdParty/freetype/internal/services/svttglyf.h b/Source/ThirdParty/freetype/internal/services/svttglyf.h index c8798771f..ca6fff744 100644 --- a/Source/ThirdParty/freetype/internal/services/svttglyf.h +++ b/Source/ThirdParty/freetype/internal/services/svttglyf.h @@ -4,7 +4,7 @@ * * The FreeType TrueType glyph service. * - * Copyright (C) 2007-2019 by + * Copyright (C) 2007-2023 by * David Turner. * * This file is part of the FreeType project, and may only be used, @@ -18,8 +18,8 @@ #ifndef SVTTGLYF_H_ #define SVTTGLYF_H_ -#include FT_INTERNAL_SERVICE_H -#include FT_TRUETYPE_TABLES_H +#include +#include FT_BEGIN_HEADER diff --git a/Source/ThirdParty/freetype/internal/services/svwinfnt.h b/Source/ThirdParty/freetype/internal/services/svwinfnt.h index 38ee02096..002923f8c 100644 --- a/Source/ThirdParty/freetype/internal/services/svwinfnt.h +++ b/Source/ThirdParty/freetype/internal/services/svwinfnt.h @@ -4,7 +4,7 @@ * * The FreeType Windows FNT/FONT service (specification). * - * Copyright (C) 2003-2019 by + * Copyright (C) 2003-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -19,8 +19,8 @@ #ifndef SVWINFNT_H_ #define SVWINFNT_H_ -#include FT_INTERNAL_SERVICE_H -#include FT_WINFONTS_H +#include +#include FT_BEGIN_HEADER diff --git a/Source/ThirdParty/freetype/internal/sfnt.h b/Source/ThirdParty/freetype/internal/sfnt.h index 225f40df6..a2d4e15ba 100644 --- a/Source/ThirdParty/freetype/internal/sfnt.h +++ b/Source/ThirdParty/freetype/internal/sfnt.h @@ -4,7 +4,7 @@ * * High-level 'sfnt' driver interface (specification). * - * Copyright (C) 1996-2019 by + * Copyright (C) 1996-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -20,9 +20,9 @@ #define SFNT_H_ -#include -#include FT_INTERNAL_DRIVER_H -#include FT_INTERNAL_TRUETYPE_TYPES_H +#include +#include +#include FT_BEGIN_HEADER @@ -311,6 +311,33 @@ FT_BEGIN_HEADER TT_SBit_MetricsRec *ametrics ); + /************************************************************************** + * + * @functype: + * TT_Load_Svg_Doc_Func + * + * @description: + * Scan the SVG document list to find the document containing the glyph + * that has the ID 'glyph*XXX*', where *XXX* is the value of + * `glyph_index` as a decimal integer. + * + * @inout: + * glyph :: + * The glyph slot from which pointers to the SVG document list is to be + * grabbed. The results are stored back in the slot. + * + * @input: + * glyph_index :: + * The index of the glyph that is to be looked up. + * + * @return: + * FreeType error code. 0 means success. + */ + typedef FT_Error + (*TT_Load_Svg_Doc_Func)( FT_GlyphSlot glyph, + FT_UInt glyph_index ); + + /************************************************************************** * * @functype: @@ -524,6 +551,170 @@ FT_BEGIN_HEADER FT_LayerIterator* iterator ); + /************************************************************************** + * + * @functype: + * TT_Get_Color_Glyph_Paint_Func + * + * @description: + * Find the root @FT_OpaquePaint object for a given glyph ID. + * + * @input: + * face :: + * The target face object. + * + * base_glyph :: + * The glyph index the colored glyph layers are associated with. + * + * @output: + * paint :: + * The root @FT_OpaquePaint object. + * + * @return: + * Value~1 if everything is OK. If no color glyph is found, or the root + * paint could not be retrieved, value~0 gets returned. In case of an + * error, value~0 is returned also. + */ + typedef FT_Bool + ( *TT_Get_Color_Glyph_Paint_Func )( TT_Face face, + FT_UInt base_glyph, + FT_Color_Root_Transform root_transform, + FT_OpaquePaint *paint ); + + + /************************************************************************** + * + * @functype: + * TT_Get_Color_Glyph_ClipBox_Func + * + * @description: + * Search for a 'COLR' v1 clip box for the specified `base_glyph` and + * fill the `clip_box` parameter with the 'COLR' v1 'ClipBox' information + * if one is found. + * + * @input: + * face :: + * A handle to the parent face object. + * + * base_glyph :: + * The glyph index for which to retrieve the clip box. + * + * @output: + * clip_box :: + * The clip box for the requested `base_glyph` if one is found. The + * clip box is computed taking scale and transformations configured on + * the @FT_Face into account. @FT_ClipBox contains @FT_Vector values + * in 26.6 format. + * + * @note: + * To retrieve the clip box in font units, reset scale to units-per-em + * and remove transforms configured using @FT_Set_Transform. + * + * @return: + * Value~1 if a ClipBox is found. If no clip box is found or an + * error occured, value~0 is returned. + */ + typedef FT_Bool + ( *TT_Get_Color_Glyph_ClipBox_Func )( TT_Face face, + FT_UInt base_glyph, + FT_ClipBox* clip_box ); + + + /************************************************************************** + * + * @functype: + * TT_Get_Paint_Layers_Func + * + * @description: + * Access the layers of a `PaintColrLayers` table. + * + * @input: + * face :: + * The target face object. + * + * @inout: + * iterator :: + * The @FT_LayerIterator from an @FT_PaintColrLayers object, for which + * the layers are to be retrieved. The internal state of the iterator + * is incremented after one call to this function for retrieving one + * layer. + * + * @output: + * paint :: + * The root @FT_OpaquePaint object referencing the actual paint table. + * + * @return: + * Value~1 if everything is OK. Value~0 gets returned when the paint + * object can not be retrieved or any other error occurs. + */ + typedef FT_Bool + ( *TT_Get_Paint_Layers_Func )( TT_Face face, + FT_LayerIterator* iterator, + FT_OpaquePaint *paint ); + + + /************************************************************************** + * + * @functype: + * TT_Get_Colorline_Stops_Func + * + * @description: + * Get the gradient and solid fill information for a given glyph. + * + * @input: + * face :: + * The target face object. + * + * @inout: + * iterator :: + * An @FT_ColorStopIterator object. For the first call you should set + * `iterator->p` to `NULL`. For all following calls, simply use the + * same object again. + * + * @output: + * color_stop :: + * Color index and alpha value for the retrieved color stop. + * + * @return: + * Value~1 if everything is OK. If there are no more color stops, + * value~0 gets returned. In case of an error, value~0 is returned + * also. + */ + typedef FT_Bool + ( *TT_Get_Colorline_Stops_Func )( TT_Face face, + FT_ColorStop *color_stop, + FT_ColorStopIterator* iterator ); + + + /************************************************************************** + * + * @functype: + * TT_Get_Paint_Func + * + * @description: + * Get the paint details for a given @FT_OpaquePaint object. + * + * @input: + * face :: + * The target face object. + * + * opaque_paint :: + * The @FT_OpaquePaint object. + * + * @output: + * paint :: + * An @FT_COLR_Paint object holding the details on `opaque_paint`. + * + * @return: + * Value~1 if everything is OK. Value~0 if no details can be found for + * this paint or any other error occured. + */ + typedef FT_Bool + ( *TT_Get_Paint_Func )( TT_Face face, + FT_OpaquePaint opaque_paint, + FT_COLR_Paint *paint ); + + /************************************************************************** * * @functype: @@ -709,73 +900,83 @@ FT_BEGIN_HEADER */ typedef struct SFNT_Interface_ { - TT_Loader_GotoTableFunc goto_table; + TT_Loader_GotoTableFunc goto_table; - TT_Init_Face_Func init_face; - TT_Load_Face_Func load_face; - TT_Done_Face_Func done_face; - FT_Module_Requester get_interface; + TT_Init_Face_Func init_face; + TT_Load_Face_Func load_face; + TT_Done_Face_Func done_face; + FT_Module_Requester get_interface; - TT_Load_Any_Func load_any; + TT_Load_Any_Func load_any; /* these functions are called by `load_face' but they can also */ /* be called from external modules, if there is a need to do so */ - TT_Load_Table_Func load_head; - TT_Load_Metrics_Func load_hhea; - TT_Load_Table_Func load_cmap; - TT_Load_Table_Func load_maxp; - TT_Load_Table_Func load_os2; - TT_Load_Table_Func load_post; + TT_Load_Table_Func load_head; + TT_Load_Metrics_Func load_hhea; + TT_Load_Table_Func load_cmap; + TT_Load_Table_Func load_maxp; + TT_Load_Table_Func load_os2; + TT_Load_Table_Func load_post; - TT_Load_Table_Func load_name; - TT_Free_Table_Func free_name; + TT_Load_Table_Func load_name; + TT_Free_Table_Func free_name; /* this field was called `load_kerning' up to version 2.1.10 */ - TT_Load_Table_Func load_kern; + TT_Load_Table_Func load_kern; - TT_Load_Table_Func load_gasp; - TT_Load_Table_Func load_pclt; + TT_Load_Table_Func load_gasp; + TT_Load_Table_Func load_pclt; /* see `ttload.h'; this field was called `load_bitmap_header' up to */ /* version 2.1.10 */ - TT_Load_Table_Func load_bhed; + TT_Load_Table_Func load_bhed; - TT_Load_SBit_Image_Func load_sbit_image; + TT_Load_SBit_Image_Func load_sbit_image; /* see `ttpost.h' */ - TT_Get_PS_Name_Func get_psname; - TT_Free_Table_Func free_psnames; + TT_Get_PS_Name_Func get_psname; + TT_Free_Table_Func free_psnames; /* starting here, the structure differs from version 2.1.7 */ /* this field was introduced in version 2.1.8, named `get_psname' */ - TT_Face_GetKerningFunc get_kerning; + TT_Face_GetKerningFunc get_kerning; /* new elements introduced after version 2.1.10 */ /* load the font directory, i.e., the offset table and */ /* the table directory */ - TT_Load_Table_Func load_font_dir; - TT_Load_Metrics_Func load_hmtx; + TT_Load_Table_Func load_font_dir; + TT_Load_Metrics_Func load_hmtx; - TT_Load_Table_Func load_eblc; - TT_Free_Table_Func free_eblc; + TT_Load_Table_Func load_eblc; + TT_Free_Table_Func free_eblc; TT_Set_SBit_Strike_Func set_sbit_strike; TT_Load_Strike_Metrics_Func load_strike_metrics; - TT_Load_Table_Func load_cpal; - TT_Load_Table_Func load_colr; - TT_Free_Table_Func free_cpal; - TT_Free_Table_Func free_colr; - TT_Set_Palette_Func set_palette; - TT_Get_Colr_Layer_Func get_colr_layer; - TT_Blend_Colr_Func colr_blend; + TT_Load_Table_Func load_cpal; + TT_Load_Table_Func load_colr; + TT_Free_Table_Func free_cpal; + TT_Free_Table_Func free_colr; + TT_Set_Palette_Func set_palette; + TT_Get_Colr_Layer_Func get_colr_layer; + TT_Get_Color_Glyph_Paint_Func get_colr_glyph_paint; + TT_Get_Color_Glyph_ClipBox_Func get_color_glyph_clipbox; + TT_Get_Paint_Layers_Func get_paint_layers; + TT_Get_Colorline_Stops_Func get_colorline_stops; + TT_Get_Paint_Func get_paint; + TT_Blend_Colr_Func colr_blend; - TT_Get_Metrics_Func get_metrics; + TT_Get_Metrics_Func get_metrics; - TT_Get_Name_Func get_name; - TT_Get_Name_ID_Func get_name_id; + TT_Get_Name_Func get_name; + TT_Get_Name_ID_Func get_name_id; + + /* OpenType SVG Support */ + TT_Load_Table_Func load_svg; + TT_Free_Table_Func free_svg; + TT_Load_Svg_Doc_Func load_svg_doc; } SFNT_Interface; @@ -820,10 +1021,18 @@ FT_BEGIN_HEADER free_colr_, \ set_palette_, \ get_colr_layer_, \ + get_colr_glyph_paint_, \ + get_color_glyph_clipbox, \ + get_paint_layers_, \ + get_colorline_stops_, \ + get_paint_, \ colr_blend_, \ get_metrics_, \ get_name_, \ - get_name_id_ ) \ + get_name_id_, \ + load_svg_, \ + free_svg_, \ + load_svg_doc_ ) \ static const SFNT_Interface class_ = \ { \ goto_table_, \ @@ -860,10 +1069,18 @@ FT_BEGIN_HEADER free_colr_, \ set_palette_, \ get_colr_layer_, \ + get_colr_glyph_paint_, \ + get_color_glyph_clipbox, \ + get_paint_layers_, \ + get_colorline_stops_, \ + get_paint_, \ colr_blend_, \ get_metrics_, \ get_name_, \ - get_name_id_ \ + get_name_id_, \ + load_svg_, \ + free_svg_, \ + load_svg_doc_ \ }; diff --git a/Source/ThirdParty/freetype/internal/svginterface.h b/Source/ThirdParty/freetype/internal/svginterface.h new file mode 100644 index 000000000..f464b2c05 --- /dev/null +++ b/Source/ThirdParty/freetype/internal/svginterface.h @@ -0,0 +1,46 @@ +/**************************************************************************** + * + * svginterface.h + * + * Interface of ot-svg module (specification only). + * + * Copyright (C) 2022-2023 by + * David Turner, Robert Wilhelm, Werner Lemberg, and Moazin Khatti. + * + * This file is part of the FreeType project, and may only be used, + * modified, and distributed under the terms of the FreeType project + * license, LICENSE.TXT. By continuing to use, modify, or distribute + * this file you indicate that you have read the license and + * understand and accept it fully. + * + */ + + +#ifndef SVGINTERFACE_H_ +#define SVGINTERFACE_H_ + +#include +#include + + +FT_BEGIN_HEADER + + typedef FT_Error + (*Preset_Bitmap_Func)( FT_Module module, + FT_GlyphSlot slot, + FT_Bool cache ); + + typedef struct SVG_Interface_ + { + Preset_Bitmap_Func preset_slot; + + } SVG_Interface; + + typedef SVG_Interface* SVG_Service; + +FT_END_HEADER + +#endif /* SVGINTERFACE_H_ */ + + +/* END */ diff --git a/Source/ThirdParty/freetype/internal/t1types.h b/Source/ThirdParty/freetype/internal/t1types.h index e197a1afc..b9c94398f 100644 --- a/Source/ThirdParty/freetype/internal/t1types.h +++ b/Source/ThirdParty/freetype/internal/t1types.h @@ -5,7 +5,7 @@ * Basic Type1/Type2 type definitions and interface (specification * only). * - * Copyright (C) 1996-2019 by + * Copyright (C) 1996-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -21,12 +21,11 @@ #define T1TYPES_H_ -#include -#include FT_TYPE1_TABLES_H -#include FT_INTERNAL_POSTSCRIPT_HINTS_H -#include FT_INTERNAL_SERVICE_H -#include FT_INTERNAL_HASH_H -#include FT_SERVICE_POSTSCRIPT_CMAPS_H +#include +#include +#include +#include +#include FT_BEGIN_HEADER @@ -76,8 +75,8 @@ FT_BEGIN_HEADER FT_Int code_first; FT_Int code_last; - FT_UShort* char_index; - FT_String** char_name; + FT_UShort* char_index; + const FT_String** char_name; } T1_EncodingRec, *T1_Encoding; @@ -173,8 +172,8 @@ FT_BEGIN_HEADER { FT_Bool IsCIDFont; FT_BBox FontBBox; - FT_Fixed Ascender; - FT_Fixed Descender; + FT_Fixed Ascender; /* optional, mind the zero */ + FT_Fixed Descender; /* optional, mind the zero */ AFM_TrackKern TrackKerns; /* free if non-NULL */ FT_UInt NumTrackKern; AFM_KernPair KernPairs; /* free if non-NULL */ @@ -202,30 +201,30 @@ FT_BEGIN_HEADER typedef struct T1_FaceRec_ { - FT_FaceRec root; - T1_FontRec type1; - const void* psnames; - const void* psaux; - const void* afm_data; - FT_CharMapRec charmaprecs[2]; - FT_CharMap charmaps[2]; + FT_FaceRec root; + T1_FontRec type1; + const void* psnames; + const void* psaux; + const void* afm_data; + FT_CharMapRec charmaprecs[2]; + FT_CharMap charmaps[2]; /* support for Multiple Masters fonts */ - PS_Blend blend; + PS_Blend blend; /* undocumented, optional: indices of subroutines that express */ /* the NormalizeDesignVector and the ConvertDesignVector procedure, */ /* respectively, as Type 2 charstrings; -1 if keywords not present */ - FT_Int ndv_idx; - FT_Int cdv_idx; + FT_Int ndv_idx; + FT_Int cdv_idx; /* undocumented, optional: has the same meaning as len_buildchar */ /* for Type 2 fonts; manipulated by othersubrs 19, 24, and 25 */ - FT_UInt len_buildchar; - FT_Long* buildchar; + FT_UInt len_buildchar; + FT_Long* buildchar; /* since version 2.1 - interface to PostScript hinter */ - const void* pshinter; + const void* pshinter; } T1_FaceRec; diff --git a/Source/ThirdParty/freetype/internal/tttypes.h b/Source/ThirdParty/freetype/internal/tttypes.h index 5e9f40ec3..b9788c783 100644 --- a/Source/ThirdParty/freetype/internal/tttypes.h +++ b/Source/ThirdParty/freetype/internal/tttypes.h @@ -5,7 +5,7 @@ * Basic SFNT/TrueType type definitions and interface (specification * only). * - * Copyright (C) 1996-2019 by + * Copyright (C) 1996-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -21,13 +21,12 @@ #define TTTYPES_H_ -#include -#include FT_TRUETYPE_TABLES_H -#include FT_INTERNAL_OBJECTS_H -#include FT_COLOR_H +#include +#include +#include #ifdef TT_CONFIG_OPTION_GX_VAR_SUPPORT -#include FT_MULTIPLE_MASTERS_H +#include #endif @@ -150,81 +149,6 @@ FT_BEGIN_HEADER } TT_TableRec, *TT_Table; - /************************************************************************** - * - * @struct: - * WOFF_HeaderRec - * - * @description: - * WOFF file format header. - * - * @fields: - * See - * - * https://www.w3.org/TR/WOFF/#WOFFHeader - */ - typedef struct WOFF_HeaderRec_ - { - FT_ULong signature; - FT_ULong flavor; - FT_ULong length; - FT_UShort num_tables; - FT_UShort reserved; - FT_ULong totalSfntSize; - FT_UShort majorVersion; - FT_UShort minorVersion; - FT_ULong metaOffset; - FT_ULong metaLength; - FT_ULong metaOrigLength; - FT_ULong privOffset; - FT_ULong privLength; - - } WOFF_HeaderRec, *WOFF_Header; - - - /************************************************************************** - * - * @struct: - * WOFF_TableRec - * - * @description: - * This structure describes a given table of a WOFF font. - * - * @fields: - * Tag :: - * A four-bytes tag describing the table. - * - * Offset :: - * The offset of the table from the start of the WOFF font in its - * resource. - * - * CompLength :: - * Compressed table length (in bytes). - * - * OrigLength :: - * Uncompressed table length (in bytes). - * - * CheckSum :: - * The table checksum. This value can be ignored. - * - * OrigOffset :: - * The uncompressed table file offset. This value gets computed while - * constructing the (uncompressed) SFNT header. It is not contained in - * the WOFF file. - */ - typedef struct WOFF_TableRec_ - { - FT_ULong Tag; /* table ID */ - FT_ULong Offset; /* table file offset */ - FT_ULong CompLength; /* compressed table length */ - FT_ULong OrigLength; /* uncompressed table length */ - FT_ULong CheckSum; /* uncompressed checksum */ - - FT_ULong OrigOffset; /* uncompressed table file offset */ - /* (not in the WOFF file) */ - } WOFF_TableRec, *WOFF_Table; - - /************************************************************************** * * @struct: @@ -855,13 +779,15 @@ FT_BEGIN_HEADER /************************************************************************** * * @struct: - * TT_Post_20Rec + * TT_Post_NamesRec * * @description: - * Postscript names sub-table, format 2.0. Stores the PS name of each - * glyph in the font face. + * Postscript names table, either format 2.0 or 2.5. * * @fields: + * loaded :: + * A flag to indicate whether the PS names are loaded. + * * num_glyphs :: * The number of named glyphs in the table. * @@ -874,68 +800,13 @@ FT_BEGIN_HEADER * glyph_names :: * The PS names not in Mac Encoding. */ - typedef struct TT_Post_20Rec_ + typedef struct TT_Post_NamesRec_ { + FT_Bool loaded; FT_UShort num_glyphs; FT_UShort num_names; FT_UShort* glyph_indices; - FT_Char** glyph_names; - - } TT_Post_20Rec, *TT_Post_20; - - - /************************************************************************** - * - * @struct: - * TT_Post_25Rec - * - * @description: - * Postscript names sub-table, format 2.5. Stores the PS name of each - * glyph in the font face. - * - * @fields: - * num_glyphs :: - * The number of glyphs in the table. - * - * offsets :: - * An array of signed offsets in a normal Mac Postscript name encoding. - */ - typedef struct TT_Post_25_ - { - FT_UShort num_glyphs; - FT_Char* offsets; - - } TT_Post_25Rec, *TT_Post_25; - - - /************************************************************************** - * - * @struct: - * TT_Post_NamesRec - * - * @description: - * Postscript names table, either format 2.0 or 2.5. - * - * @fields: - * loaded :: - * A flag to indicate whether the PS names are loaded. - * - * format_20 :: - * The sub-table used for format 2.0. - * - * format_25 :: - * The sub-table used for format 2.5. - */ - typedef struct TT_Post_NamesRec_ - { - FT_Bool loaded; - - union - { - TT_Post_20Rec format_20; - TT_Post_25Rec format_25; - - } names; + FT_Byte** glyph_names; } TT_Post_NamesRec, *TT_Post_Names; @@ -1329,12 +1200,16 @@ FT_BEGIN_HEADER * mm :: * A pointer to the Multiple Masters service. * - * var :: - * A pointer to the Metrics Variations service. + * tt_var :: + * A pointer to the Metrics Variations service for the "truetype" + * driver. * - * hdmx :: - * The face's horizontal device metrics ('hdmx' table). This table is - * optional in TrueType/OpenType fonts. + * face_var :: + * A pointer to the Metrics Variations service for this `TT_Face`'s + * driver. + * + * psaux :: + * A pointer to the PostScript Auxiliary service. * * gasp :: * The grid-fitting and scaling properties table ('gasp'). This table @@ -1395,8 +1270,10 @@ FT_BEGIN_HEADER * * cvt :: * The face's original control value table. Coordinates are expressed - * in unscaled font units. Comes from the 'cvt~' table. Ignored for - * Type 2 fonts. + * in unscaled font units (in 26.6 format). Comes from the 'cvt~' + * table. Ignored for Type 2 fonts. + * + * If varied by the `CVAR' table, non-integer values are possible. * * interpreter :: * A pointer to the TrueType bytecode interpreters field is also used @@ -1438,6 +1315,12 @@ FT_BEGIN_HEADER * var_postscript_prefix_len :: * The length of the `var_postscript_prefix` string. * + * var_default_named_instance :: + * The index of the default named instance. + * + * non_var_style_name :: + * The non-variation style name, used as a backup. + * * horz_metrics_size :: * The size of the 'hmtx' table. * @@ -1446,7 +1329,7 @@ FT_BEGIN_HEADER * * num_locations :: * The number of glyph locations in this TrueType file. This should be - * identical to the number of glyphs. Ignored for Type 2 fonts. + * one more than the number of glyphs. Ignored for Type 2 fonts. * * glyph_locations :: * An array of longs. These are offsets to glyph data within the @@ -1464,8 +1347,8 @@ FT_BEGIN_HEADER * hdmx_record_size :: * The size of a single hdmx record. * - * hdmx_record_sizes :: - * An array holding the ppem sizes available in the 'hdmx' table. + * hdmx_records :: + * A array of pointers to the 'hdmx' table records sorted by ppem. * * sbit_table :: * A pointer to the font's embedded bitmap location table. @@ -1484,14 +1367,6 @@ FT_BEGIN_HEADER * A mapping between the strike indices exposed by the API and the * indices used in the font's sbit table. * - * cpal :: - * A pointer to data related to the 'CPAL' table. `NULL` if the table - * is not available. - * - * colr :: - * A pointer to data related to the 'COLR' table. `NULL` if the table - * is not available. - * * kern_table :: * A pointer to the 'kern' table. * @@ -1519,19 +1394,23 @@ FT_BEGIN_HEADER * vert_metrics_offset :: * The file offset of the 'vmtx' table. * - * sph_found_func_flags :: - * Flags identifying special bytecode functions (used by the v38 - * implementation of the bytecode interpreter). - * - * sph_compatibility_mode :: - * This flag is set if we are in ClearType backward compatibility mode - * (used by the v38 implementation of the bytecode interpreter). - * * ebdt_start :: * The file offset of the sbit data table (CBDT, bdat, etc.). * * ebdt_size :: * The size of the sbit data table. + * + * cpal :: + * A pointer to data related to the 'CPAL' table. `NULL` if the table + * is not available. + * + * colr :: + * A pointer to data related to the 'COLR' table. `NULL` if the table + * is not available. + * + * svg :: + * A pointer to data related to the 'SVG' table. `NULL` if the table + * is not available. */ typedef struct TT_FaceRec_ { @@ -1582,8 +1461,14 @@ FT_BEGIN_HEADER void* mm; /* a typeless pointer to the FT_Service_MetricsVariationsRec table */ - /* used to handle the HVAR, VVAR, and MVAR OpenType tables */ - void* var; + /* used to handle the HVAR, VVAR, and MVAR OpenType tables by the */ + /* "truetype" driver */ + void* tt_var; + + /* a typeless pointer to the FT_Service_MetricsVariationsRec table */ + /* used to handle the HVAR, VVAR, and MVAR OpenType tables by this */ + /* TT_Face's driver */ + void* face_var; /* since 2.13.1 */ #endif /* a typeless pointer to the PostScript Aux service */ @@ -1633,7 +1518,7 @@ FT_BEGIN_HEADER /* the original, unscaled, control value table */ FT_ULong cvt_size; - FT_Short* cvt; + FT_Int32* cvt; /* A pointer to the bytecode interpreter to use. This is also */ /* used to hook the debugger for the `ttdebug' utility. */ @@ -1665,6 +1550,9 @@ FT_BEGIN_HEADER const char* var_postscript_prefix; /* since 2.7.2 */ FT_UInt var_postscript_prefix_len; /* since 2.7.2 */ + FT_UInt var_default_named_instance; /* since 2.13.1 */ + + const char* non_var_style_name; /* since 2.13.1 */ #endif /* since version 2.2 */ @@ -1672,14 +1560,14 @@ FT_BEGIN_HEADER FT_ULong horz_metrics_size; FT_ULong vert_metrics_size; - FT_ULong num_locations; /* in broken TTF, gid > 0xFFFF */ + FT_ULong num_locations; /* up to 0xFFFF + 1 */ FT_Byte* glyph_locations; FT_Byte* hdmx_table; FT_ULong hdmx_table_size; FT_UInt hdmx_record_count; FT_ULong hdmx_record_size; - FT_Byte* hdmx_record_sizes; + FT_Byte** hdmx_records; FT_Byte* sbit_table; FT_ULong sbit_table_size; @@ -1701,13 +1589,6 @@ FT_BEGIN_HEADER FT_ULong horz_metrics_offset; FT_ULong vert_metrics_offset; -#ifdef TT_SUPPORT_SUBPIXEL_HINTING_INFINALITY - /* since 2.4.12 */ - FT_ULong sph_found_func_flags; /* special functions found */ - /* for this face */ - FT_Bool sph_compatibility_mode; -#endif /* TT_SUPPORT_SUBPIXEL_HINTING_INFINALITY */ - #ifdef TT_CONFIG_OPTION_EMBEDDED_BITMAPS /* since 2.7 */ FT_ULong ebdt_start; /* either `CBDT', `EBDT', or `bdat' */ @@ -1718,6 +1599,9 @@ FT_BEGIN_HEADER void* cpal; void* colr; + /* since 2.12 */ + void* svg; + } TT_FaceRec; @@ -1808,7 +1692,7 @@ FT_BEGIN_HEADER FT_UInt glyph_index; FT_Stream stream; - FT_Int byte_len; + FT_UInt byte_len; FT_Short n_contours; FT_BBox bbox; @@ -1843,6 +1727,9 @@ FT_BEGIN_HEADER /* since version 2.6.2 */ FT_ListRec composites; + /* since version 2.11.2 */ + FT_Byte* widthp; + } TT_LoaderRec; diff --git a/Source/ThirdParty/freetype/internal/wofftypes.h b/Source/ThirdParty/freetype/internal/wofftypes.h new file mode 100644 index 000000000..0c1d8eeaf --- /dev/null +++ b/Source/ThirdParty/freetype/internal/wofftypes.h @@ -0,0 +1,312 @@ +/**************************************************************************** + * + * wofftypes.h + * + * Basic WOFF/WOFF2 type definitions and interface (specification + * only). + * + * Copyright (C) 1996-2023 by + * David Turner, Robert Wilhelm, and Werner Lemberg. + * + * This file is part of the FreeType project, and may only be used, + * modified, and distributed under the terms of the FreeType project + * license, LICENSE.TXT. By continuing to use, modify, or distribute + * this file you indicate that you have read the license and + * understand and accept it fully. + * + */ + + +#ifndef WOFFTYPES_H_ +#define WOFFTYPES_H_ + + +#include +#include + + +FT_BEGIN_HEADER + + + /************************************************************************** + * + * @struct: + * WOFF_HeaderRec + * + * @description: + * WOFF file format header. + * + * @fields: + * See + * + * https://www.w3.org/TR/WOFF/#WOFFHeader + */ + typedef struct WOFF_HeaderRec_ + { + FT_ULong signature; + FT_ULong flavor; + FT_ULong length; + FT_UShort num_tables; + FT_UShort reserved; + FT_ULong totalSfntSize; + FT_UShort majorVersion; + FT_UShort minorVersion; + FT_ULong metaOffset; + FT_ULong metaLength; + FT_ULong metaOrigLength; + FT_ULong privOffset; + FT_ULong privLength; + + } WOFF_HeaderRec, *WOFF_Header; + + + /************************************************************************** + * + * @struct: + * WOFF_TableRec + * + * @description: + * This structure describes a given table of a WOFF font. + * + * @fields: + * Tag :: + * A four-bytes tag describing the table. + * + * Offset :: + * The offset of the table from the start of the WOFF font in its + * resource. + * + * CompLength :: + * Compressed table length (in bytes). + * + * OrigLength :: + * Uncompressed table length (in bytes). + * + * CheckSum :: + * The table checksum. This value can be ignored. + * + * OrigOffset :: + * The uncompressed table file offset. This value gets computed while + * constructing the (uncompressed) SFNT header. It is not contained in + * the WOFF file. + */ + typedef struct WOFF_TableRec_ + { + FT_Tag Tag; /* table ID */ + FT_ULong Offset; /* table file offset */ + FT_ULong CompLength; /* compressed table length */ + FT_ULong OrigLength; /* uncompressed table length */ + FT_ULong CheckSum; /* uncompressed checksum */ + + FT_ULong OrigOffset; /* uncompressed table file offset */ + /* (not in the WOFF file) */ + } WOFF_TableRec, *WOFF_Table; + + + /************************************************************************** + * + * @struct: + * WOFF2_TtcFontRec + * + * @description: + * Metadata for a TTC font entry in WOFF2. + * + * @fields: + * flavor :: + * TTC font flavor. + * + * num_tables :: + * Number of tables in TTC, indicating number of elements in + * `table_indices`. + * + * table_indices :: + * Array of table indices for each TTC font. + */ + typedef struct WOFF2_TtcFontRec_ + { + FT_ULong flavor; + FT_UShort num_tables; + FT_UShort* table_indices; + + } WOFF2_TtcFontRec, *WOFF2_TtcFont; + + + /************************************************************************** + * + * @struct: + * WOFF2_HeaderRec + * + * @description: + * WOFF2 file format header. + * + * @fields: + * See + * + * https://www.w3.org/TR/WOFF2/#woff20Header + * + * @note: + * We don't care about the fields `reserved`, `majorVersion` and + * `minorVersion`, so they are not included. The `totalSfntSize` field + * does not necessarily represent the actual size of the uncompressed + * SFNT font stream, so that is used as a reference value instead. + */ + typedef struct WOFF2_HeaderRec_ + { + FT_ULong signature; + FT_ULong flavor; + FT_ULong length; + FT_UShort num_tables; + FT_ULong totalSfntSize; + FT_ULong totalCompressedSize; + FT_ULong metaOffset; + FT_ULong metaLength; + FT_ULong metaOrigLength; + FT_ULong privOffset; + FT_ULong privLength; + + FT_ULong uncompressed_size; /* uncompressed brotli stream size */ + FT_ULong compressed_offset; /* compressed stream offset */ + FT_ULong header_version; /* version of original TTC Header */ + FT_UShort num_fonts; /* number of fonts in TTC */ + FT_ULong actual_sfnt_size; /* actual size of sfnt stream */ + + WOFF2_TtcFont ttc_fonts; /* metadata for fonts in a TTC */ + + } WOFF2_HeaderRec, *WOFF2_Header; + + + /************************************************************************** + * + * @struct: + * WOFF2_TableRec + * + * @description: + * This structure describes a given table of a WOFF2 font. + * + * @fields: + * See + * + * https://www.w3.org/TR/WOFF2/#table_dir_format + */ + typedef struct WOFF2_TableRec_ + { + FT_Byte FlagByte; /* table type and flags */ + FT_Tag Tag; /* table file offset */ + FT_ULong dst_length; /* uncompressed table length */ + FT_ULong TransformLength; /* transformed length */ + + FT_ULong flags; /* calculated flags */ + FT_ULong src_offset; /* compressed table offset */ + FT_ULong src_length; /* compressed table length */ + FT_ULong dst_offset; /* uncompressed table offset */ + + } WOFF2_TableRec, *WOFF2_Table; + + + /************************************************************************** + * + * @struct: + * WOFF2_InfoRec + * + * @description: + * Metadata for WOFF2 font that may be required for reconstruction of + * sfnt tables. + * + * @fields: + * header_checksum :: + * Checksum of SFNT offset table. + * + * num_glyphs :: + * Number of glyphs in the font. + * + * num_hmetrics :: + * `numberOfHMetrics` field in the 'hhea' table. + * + * x_mins :: + * `xMin` values of glyph bounding box. + * + * glyf_table :: + * A pointer to the `glyf' table record. + * + * loca_table :: + * A pointer to the `loca' table record. + * + * head_table :: + * A pointer to the `head' table record. + */ + typedef struct WOFF2_InfoRec_ + { + FT_ULong header_checksum; + FT_UShort num_glyphs; + FT_UShort num_hmetrics; + FT_Short* x_mins; + + WOFF2_Table glyf_table; + WOFF2_Table loca_table; + WOFF2_Table head_table; + + } WOFF2_InfoRec, *WOFF2_Info; + + + /************************************************************************** + * + * @struct: + * WOFF2_SubstreamRec + * + * @description: + * This structure stores information about a substream in the transformed + * 'glyf' table in a WOFF2 stream. + * + * @fields: + * start :: + * Beginning of the substream relative to uncompressed table stream. + * + * offset :: + * Offset of the substream relative to uncompressed table stream. + * + * size :: + * Size of the substream. + */ + typedef struct WOFF2_SubstreamRec_ + { + FT_ULong start; + FT_ULong offset; + FT_ULong size; + + } WOFF2_SubstreamRec, *WOFF2_Substream; + + + /************************************************************************** + * + * @struct: + * WOFF2_PointRec + * + * @description: + * This structure stores information about a point in the transformed + * 'glyf' table in a WOFF2 stream. + * + * @fields: + * x :: + * x-coordinate of point. + * + * y :: + * y-coordinate of point. + * + * on_curve :: + * Set if point is on-curve. + */ + typedef struct WOFF2_PointRec_ + { + FT_Int x; + FT_Int y; + FT_Bool on_curve; + + } WOFF2_PointRec, *WOFF2_Point; + + +FT_END_HEADER + +#endif /* WOFFTYPES_H_ */ + + +/* END */ diff --git a/Source/ThirdParty/freetype/otsvg.h b/Source/ThirdParty/freetype/otsvg.h new file mode 100644 index 000000000..bfe9a6ab7 --- /dev/null +++ b/Source/ThirdParty/freetype/otsvg.h @@ -0,0 +1,336 @@ +/**************************************************************************** + * + * otsvg.h + * + * Interface for OT-SVG support related things (specification). + * + * Copyright (C) 2022-2023 by + * David Turner, Robert Wilhelm, Werner Lemberg, and Moazin Khatti. + * + * This file is part of the FreeType project, and may only be used, + * modified, and distributed under the terms of the FreeType project + * license, LICENSE.TXT. By continuing to use, modify, or distribute + * this file you indicate that you have read the license and + * understand and accept it fully. + * + */ + + +#ifndef OTSVG_H_ +#define OTSVG_H_ + +#include + +#ifdef FREETYPE_H +#error "freetype.h of FreeType 1 has been loaded!" +#error "Please fix the directory search order for header files" +#error "so that freetype.h of FreeType 2 is found first." +#endif + + +FT_BEGIN_HEADER + + + /************************************************************************** + * + * @section: + * svg_fonts + * + * @title: + * OpenType SVG Fonts + * + * @abstract: + * OT-SVG API between FreeType and an external SVG rendering library. + * + * @description: + * This section describes the four hooks necessary to render SVG + * 'documents' that are contained in an OpenType font's 'SVG~' table. + * + * For more information on the implementation, see our standard hooks + * based on 'librsvg' in the [FreeType Demo + * Programs](https://gitlab.freedesktop.org/freetype/freetype-demos) + * repository. + * + */ + + + /************************************************************************** + * + * @functype: + * SVG_Lib_Init_Func + * + * @description: + * A callback that is called when the first OT-SVG glyph is rendered in + * the lifetime of an @FT_Library object. In a typical implementation, + * one would want to allocate a structure and point the `data_pointer` + * to it and perform any library initializations that might be needed. + * + * @inout: + * data_pointer :: + * The SVG rendering module stores a pointer variable that can be used + * by clients to store any data that needs to be shared across + * different hooks. `data_pointer` is essentially a pointer to that + * pointer such that it can be written to as well as read from. + * + * @return: + * FreeType error code. 0 means success. + * + * @since: + * 2.12 + */ + typedef FT_Error + (*SVG_Lib_Init_Func)( FT_Pointer *data_pointer ); + + + /************************************************************************** + * + * @functype: + * SVG_Lib_Free_Func + * + * @description: + * A callback that is called when the `ot-svg` module is being freed. + * It is only called if the init hook was called earlier. This means + * that neither the init nor the free hook is called if no OT-SVG glyph + * is rendered. + * + * In a typical implementation, one would want to free any state + * structure that was allocated in the init hook and perform any + * library-related closure that might be needed. + * + * @inout: + * data_pointer :: + * The SVG rendering module stores a pointer variable that can be used + * by clients to store any data that needs to be shared across + * different hooks. `data_pointer` is essentially a pointer to that + * pointer such that it can be written to as well as read from. + * + * @since: + * 2.12 + */ + typedef void + (*SVG_Lib_Free_Func)( FT_Pointer *data_pointer ); + + + /************************************************************************** + * + * @functype: + * SVG_Lib_Render_Func + * + * @description: + * A callback that is called to render an OT-SVG glyph. This callback + * hook is called right after the preset hook @SVG_Lib_Preset_Slot_Func + * has been called with `cache` set to `TRUE`. The data necessary to + * render is available through the handle @FT_SVG_Document, which is set + * in the `other` field of @FT_GlyphSlotRec. + * + * The render hook is expected to render the SVG glyph to the bitmap + * buffer that is allocated already at `slot->bitmap.buffer`. It also + * sets the `num_grays` value as well as `slot->format`. + * + * @input: + * slot :: + * The slot to render. + * + * @inout: + * data_pointer :: + * The SVG rendering module stores a pointer variable that can be used + * by clients to store any data that needs to be shared across + * different hooks. `data_pointer` is essentially a pointer to that + * pointer such that it can be written to as well as read from. + * + * @return: + * FreeType error code. 0 means success. + * + * @since: + * 2.12 + */ + typedef FT_Error + (*SVG_Lib_Render_Func)( FT_GlyphSlot slot, + FT_Pointer *data_pointer ); + + + /************************************************************************** + * + * @functype: + * SVG_Lib_Preset_Slot_Func + * + * @description: + * A callback that is called to preset the glyph slot. It is called from + * two places. + * + * 1. When `FT_Load_Glyph` needs to preset the glyph slot. + * + * 2. Right before the `svg` module calls the render callback hook. + * + * When it is the former, the argument `cache` is set to `FALSE`. When + * it is the latter, the argument `cache` is set to `TRUE`. This + * distinction has been made because many calculations that are necessary + * for presetting a glyph slot are the same needed later for the render + * callback hook. Thus, if `cache` is `TRUE`, the hook can _cache_ those + * calculations in a memory block referenced by the state pointer. + * + * This hook is expected to preset the slot by setting parameters such as + * `bitmap_left`, `bitmap_top`, `width`, `rows`, `pitch`, and + * `pixel_mode`. It is also expected to set all the metrics for the slot + * including the vertical advance if it is not already set. Typically, + * fonts have horizontal advances but not vertical ones. If those are + * available, they had already been set, otherwise they have to be + * estimated and set manually. The hook must take into account the + * transformations that have been set, and translate the transformation + * matrices into the SVG coordinate system, as the original matrix is + * intended for the TTF/CFF coordinate system. + * + * @input: + * slot :: + * The glyph slot that has the SVG document loaded. + * + * cache :: + * See description. + * + * @inout: + * data_pointer :: + * The SVG rendering module stores a pointer variable that can be used + * by clients to store any data that needs to be shared across + * different hooks. `data_pointer` is essentially a pointer to that + * pointer such that it can be written to as well as read from. + * + * @return: + * FreeType error code. 0 means success. + * + * @since: + * 2.12 + */ + typedef FT_Error + (*SVG_Lib_Preset_Slot_Func)( FT_GlyphSlot slot, + FT_Bool cache, + FT_Pointer *state ); + + + /************************************************************************** + * + * @struct: + * SVG_RendererHooks + * + * @description: + * A structure that stores the four hooks needed to render OT-SVG glyphs + * properly. The structure is publicly used to set the hooks via the + * @svg-hooks driver property. + * + * The behavior of each hook is described in its documentation. One + * thing to note is that the preset hook and the render hook often need + * to do the same operations; therefore, it's better to cache the + * intermediate data in a state structure to avoid calculating it twice. + * For example, in the preset hook one can draw the glyph on a recorder + * surface and later create a bitmap surface from it in the render hook. + * + * All four hooks must be non-NULL. + * + * @fields: + * init_svg :: + * The initialization hook. + * + * free_svg :: + * The cleanup hook. + * + * render_hook :: + * The render hook. + * + * preset_slot :: + * The preset hook. + * + * @since: + * 2.12 + */ + typedef struct SVG_RendererHooks_ + { + SVG_Lib_Init_Func init_svg; + SVG_Lib_Free_Func free_svg; + SVG_Lib_Render_Func render_svg; + + SVG_Lib_Preset_Slot_Func preset_slot; + + } SVG_RendererHooks; + + + /************************************************************************** + * + * @struct: + * FT_SVG_DocumentRec + * + * @description: + * A structure that models one SVG document. + * + * @fields: + * svg_document :: + * A pointer to the SVG document. + * + * svg_document_length :: + * The length of `svg_document`. + * + * metrics :: + * A metrics object storing the size information. + * + * units_per_EM :: + * The size of the EM square. + * + * start_glyph_id :: + * The first glyph ID in the glyph range covered by this document. + * + * end_glyph_id :: + * The last glyph ID in the glyph range covered by this document. + * + * transform :: + * A 2x2 transformation matrix to apply to the glyph while rendering + * it. + * + * delta :: + * The translation to apply to the glyph while rendering. + * + * @note: + * When an @FT_GlyphSlot object `slot` is passed down to a renderer, the + * renderer can only access the `metrics` and `units_per_EM` fields via + * `slot->face`. However, when @FT_Glyph_To_Bitmap sets up a dummy + * object, it has no way to set a `face` object. Thus, metrics + * information and `units_per_EM` (which is necessary for OT-SVG) has to + * be stored separately. + * + * @since: + * 2.12 + */ + typedef struct FT_SVG_DocumentRec_ + { + FT_Byte* svg_document; + FT_ULong svg_document_length; + + FT_Size_Metrics metrics; + FT_UShort units_per_EM; + + FT_UShort start_glyph_id; + FT_UShort end_glyph_id; + + FT_Matrix transform; + FT_Vector delta; + + } FT_SVG_DocumentRec; + + + /************************************************************************** + * + * @type: + * FT_SVG_Document + * + * @description: + * A handle to an @FT_SVG_DocumentRec object. + * + * @since: + * 2.12 + */ + typedef struct FT_SVG_DocumentRec_* FT_SVG_Document; + + +FT_END_HEADER + +#endif /* OTSVG_H_ */ + + +/* END */ diff --git a/Source/ThirdParty/freetype/t1tables.h b/Source/ThirdParty/freetype/t1tables.h index 645e64572..1aecfbbd9 100644 --- a/Source/ThirdParty/freetype/t1tables.h +++ b/Source/ThirdParty/freetype/t1tables.h @@ -5,7 +5,7 @@ * Basic Type 1/Type 2 tables definitions and interface (specification * only). * - * Copyright (C) 1996-2019 by + * Copyright (C) 1996-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -21,8 +21,7 @@ #define T1TABLES_H_ -#include -#include FT_FREETYPE_H +#include #ifdef FREETYPE_H #error "freetype.h of FreeType 1 has been loaded!" @@ -361,7 +360,7 @@ FT_BEGIN_HEADER FT_UInt num_subrs; FT_ULong subrmap_offset; - FT_Int sd_bytes; + FT_UInt sd_bytes; } CID_FaceDictRec; @@ -416,11 +415,11 @@ FT_BEGIN_HEADER FT_ULong xuid[16]; FT_ULong cidmap_offset; - FT_Int fd_bytes; - FT_Int gd_bytes; + FT_UInt fd_bytes; + FT_UInt gd_bytes; FT_ULong cid_count; - FT_Int num_dicts; + FT_UInt num_dicts; CID_FaceDict font_dicts; FT_ULong data_offset; @@ -454,22 +453,22 @@ FT_BEGIN_HEADER /************************************************************************** * * @function: - * FT_Has_PS_Glyph_Names + * FT_Has_PS_Glyph_Names * * @description: - * Return true if a given face provides reliable PostScript glyph names. - * This is similar to using the @FT_HAS_GLYPH_NAMES macro, except that - * certain fonts (mostly TrueType) contain incorrect glyph name tables. + * Return true if a given face provides reliable PostScript glyph names. + * This is similar to using the @FT_HAS_GLYPH_NAMES macro, except that + * certain fonts (mostly TrueType) contain incorrect glyph name tables. * - * When this function returns true, the caller is sure that the glyph - * names returned by @FT_Get_Glyph_Name are reliable. + * When this function returns true, the caller is sure that the glyph + * names returned by @FT_Get_Glyph_Name are reliable. * * @input: - * face :: - * face handle + * face :: + * face handle * * @return: - * Boolean. True if glyph names are reliable. + * Boolean. True if glyph names are reliable. * */ FT_EXPORT( FT_Int ) @@ -479,30 +478,40 @@ FT_BEGIN_HEADER /************************************************************************** * * @function: - * FT_Get_PS_Font_Info + * FT_Get_PS_Font_Info * * @description: - * Retrieve the @PS_FontInfoRec structure corresponding to a given - * PostScript font. + * Retrieve the @PS_FontInfoRec structure corresponding to a given + * PostScript font. * * @input: - * face :: - * PostScript face handle. + * face :: + * PostScript face handle. * * @output: - * afont_info :: - * Output font info structure pointer. + * afont_info :: + * A pointer to a @PS_FontInfoRec object. * * @return: - * FreeType error code. 0~means success. + * FreeType error code. 0~means success. * * @note: - * String pointers within the @PS_FontInfoRec structure are owned by the - * face and don't need to be freed by the caller. Missing entries in - * the font's FontInfo dictionary are represented by `NULL` pointers. + * String pointers within the @PS_FontInfoRec structure are owned by the + * face and don't need to be freed by the caller. Missing entries in the + * font's FontInfo dictionary are represented by `NULL` pointers. * - * If the font's format is not PostScript-based, this function will - * return the `FT_Err_Invalid_Argument` error code. + * The following font formats support this feature: 'Type~1', 'Type~42', + * 'CFF', 'CID~Type~1'. For other font formats this function returns the + * `FT_Err_Invalid_Argument` error code. + * + * @example: + * ``` + * PS_FontInfoRec font_info; + * + * + * error = FT_Get_PS_Font_Info( face, &font_info ); + * ... + * ``` * */ FT_EXPORT( FT_Error ) @@ -513,29 +522,39 @@ FT_BEGIN_HEADER /************************************************************************** * * @function: - * FT_Get_PS_Font_Private + * FT_Get_PS_Font_Private * * @description: - * Retrieve the @PS_PrivateRec structure corresponding to a given - * PostScript font. + * Retrieve the @PS_PrivateRec structure corresponding to a given + * PostScript font. * * @input: - * face :: - * PostScript face handle. + * face :: + * PostScript face handle. * * @output: - * afont_private :: - * Output private dictionary structure pointer. + * afont_private :: + * A pointer to a @PS_PrivateRec object. * * @return: - * FreeType error code. 0~means success. + * FreeType error code. 0~means success. * * @note: - * The string pointers within the @PS_PrivateRec structure are owned by - * the face and don't need to be freed by the caller. + * The string pointers within the @PS_PrivateRec structure are owned by + * the face and don't need to be freed by the caller. * - * If the font's format is not PostScript-based, this function returns - * the `FT_Err_Invalid_Argument` error code. + * Only the 'Type~1' font format supports this feature. For other font + * formats this function returns the `FT_Err_Invalid_Argument` error + * code. + * + * @example: + * ``` + * PS_PrivateRec font_private; + * + * + * error = FT_Get_PS_Font_Private( face, &font_private ); + * ... + * ``` * */ FT_EXPORT( FT_Error ) @@ -694,67 +713,67 @@ FT_BEGIN_HEADER /************************************************************************** * * @function: - * FT_Get_PS_Font_Value + * FT_Get_PS_Font_Value * * @description: - * Retrieve the value for the supplied key from a PostScript font. + * Retrieve the value for the supplied key from a PostScript font. * * @input: - * face :: - * PostScript face handle. + * face :: + * PostScript face handle. * - * key :: - * An enumeration value representing the dictionary key to retrieve. + * key :: + * An enumeration value representing the dictionary key to retrieve. * - * idx :: - * For array values, this specifies the index to be returned. + * idx :: + * For array values, this specifies the index to be returned. * - * value :: - * A pointer to memory into which to write the value. + * value :: + * A pointer to memory into which to write the value. * - * valen_len :: - * The size, in bytes, of the memory supplied for the value. + * valen_len :: + * The size, in bytes, of the memory supplied for the value. * * @output: - * value :: - * The value matching the above key, if it exists. + * value :: + * The value matching the above key, if it exists. * * @return: - * The amount of memory (in bytes) required to hold the requested value - * (if it exists, -1 otherwise). + * The amount of memory (in bytes) required to hold the requested value + * (if it exists, -1 otherwise). * * @note: - * The values returned are not pointers into the internal structures of - * the face, but are 'fresh' copies, so that the memory containing them - * belongs to the calling application. This also enforces the - * 'read-only' nature of these values, i.e., this function cannot be - * used to manipulate the face. + * The values returned are not pointers into the internal structures of + * the face, but are 'fresh' copies, so that the memory containing them + * belongs to the calling application. This also enforces the + * 'read-only' nature of these values, i.e., this function cannot be + * used to manipulate the face. * - * `value` is a void pointer because the values returned can be of - * various types. + * `value` is a void pointer because the values returned can be of + * various types. * - * If either `value` is `NULL` or `value_len` is too small, just the - * required memory size for the requested entry is returned. + * If either `value` is `NULL` or `value_len` is too small, just the + * required memory size for the requested entry is returned. * - * The `idx` parameter is used, not only to retrieve elements of, for - * example, the FontMatrix or FontBBox, but also to retrieve name keys - * from the CharStrings dictionary, and the charstrings themselves. It - * is ignored for atomic values. + * The `idx` parameter is used, not only to retrieve elements of, for + * example, the FontMatrix or FontBBox, but also to retrieve name keys + * from the CharStrings dictionary, and the charstrings themselves. It + * is ignored for atomic values. * - * `PS_DICT_BLUE_SCALE` returns a value that is scaled up by 1000. To - * get the value as in the font stream, you need to divide by 65536000.0 - * (to remove the FT_Fixed scale, and the x1000 scale). + * `PS_DICT_BLUE_SCALE` returns a value that is scaled up by 1000. To + * get the value as in the font stream, you need to divide by 65536000.0 + * (to remove the FT_Fixed scale, and the x1000 scale). * - * IMPORTANT: Only key/value pairs read by the FreeType interpreter can - * be retrieved. So, for example, PostScript procedures such as NP, ND, - * and RD are not available. Arbitrary keys are, obviously, not be - * available either. + * IMPORTANT: Only key/value pairs read by the FreeType interpreter can + * be retrieved. So, for example, PostScript procedures such as NP, ND, + * and RD are not available. Arbitrary keys are, obviously, not be + * available either. * - * If the font's format is not PostScript-based, this function returns - * the `FT_Err_Invalid_Argument` error code. + * If the font's format is not PostScript-based, this function returns + * the `FT_Err_Invalid_Argument` error code. * * @since: - * 2.4.8 + * 2.4.8 * */ FT_EXPORT( FT_Long ) diff --git a/Source/ThirdParty/freetype/ttnameid.h b/Source/ThirdParty/freetype/ttnameid.h index cc677de75..e31c68b9b 100644 --- a/Source/ThirdParty/freetype/ttnameid.h +++ b/Source/ThirdParty/freetype/ttnameid.h @@ -4,7 +4,7 @@ * * TrueType name ID definitions (specification only). * - * Copyright (C) 1996-2019 by + * Copyright (C) 1996-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -20,7 +20,6 @@ #define TTNAMEID_H_ -#include FT_BEGIN_HEADER @@ -592,7 +591,7 @@ FT_BEGIN_HEADER #define TT_MS_LANGID_MALAY_MALAYSIA 0x043E #define TT_MS_LANGID_MALAY_BRUNEI_DARUSSALAM 0x083E #define TT_MS_LANGID_KAZAKH_KAZAKHSTAN 0x043F -#define TT_MS_LANGID_KYRGYZ_KYRGYZSTAN /* Cyrillic*/ 0x0440 +#define TT_MS_LANGID_KYRGYZ_KYRGYZSTAN /* Cyrillic */ 0x0440 #define TT_MS_LANGID_KISWAHILI_KENYA 0x0441 #define TT_MS_LANGID_TURKMEN_TURKMENISTAN 0x0442 #define TT_MS_LANGID_UZBEK_UZBEKISTAN_LATIN 0x0443 diff --git a/Source/ThirdParty/freetype/tttables.h b/Source/ThirdParty/freetype/tttables.h index d04f81021..a9f60e762 100644 --- a/Source/ThirdParty/freetype/tttables.h +++ b/Source/ThirdParty/freetype/tttables.h @@ -5,7 +5,7 @@ * Basic SFNT/TrueType tables definitions and interface * (specification only). * - * Copyright (C) 1996-2019 by + * Copyright (C) 1996-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -21,8 +21,7 @@ #define TTTABLES_H_ -#include -#include FT_FREETYPE_H +#include #ifdef FREETYPE_H #error "freetype.h of FreeType 1 has been loaded!" @@ -425,8 +424,8 @@ FT_BEGIN_HEADER /* only version 5 and higher: */ - FT_UShort usLowerOpticalPointSize; /* in twips (1/20th points) */ - FT_UShort usUpperOpticalPointSize; /* in twips (1/20th points) */ + FT_UShort usLowerOpticalPointSize; /* in twips (1/20 points) */ + FT_UShort usUpperOpticalPointSize; /* in twips (1/20 points) */ } TT_OS2; diff --git a/Source/ThirdParty/freetype/tttags.h b/Source/ThirdParty/freetype/tttags.h index bd0986eff..9bf4fca23 100644 --- a/Source/ThirdParty/freetype/tttags.h +++ b/Source/ThirdParty/freetype/tttags.h @@ -4,7 +4,7 @@ * * Tags for TrueType and OpenType tables (specification only). * - * Copyright (C) 1996-2019 by + * Copyright (C) 1996-2023 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, @@ -20,8 +20,7 @@ #define TTAGS_H_ -#include -#include FT_FREETYPE_H +#include #ifdef FREETYPE_H #error "freetype.h of FreeType 1 has been loaded!" @@ -96,6 +95,7 @@ FT_BEGIN_HEADER #define TTAG_sbix FT_MAKE_TAG( 's', 'b', 'i', 'x' ) #define TTAG_sfnt FT_MAKE_TAG( 's', 'f', 'n', 't' ) #define TTAG_SING FT_MAKE_TAG( 'S', 'I', 'N', 'G' ) +#define TTAG_SVG FT_MAKE_TAG( 'S', 'V', 'G', ' ' ) #define TTAG_trak FT_MAKE_TAG( 't', 'r', 'a', 'k' ) #define TTAG_true FT_MAKE_TAG( 't', 'r', 'u', 'e' ) #define TTAG_ttc FT_MAKE_TAG( 't', 't', 'c', ' ' ) @@ -107,6 +107,7 @@ FT_BEGIN_HEADER #define TTAG_vmtx FT_MAKE_TAG( 'v', 'm', 't', 'x' ) #define TTAG_VVAR FT_MAKE_TAG( 'V', 'V', 'A', 'R' ) #define TTAG_wOFF FT_MAKE_TAG( 'w', 'O', 'F', 'F' ) +#define TTAG_wOF2 FT_MAKE_TAG( 'w', 'O', 'F', '2' ) /* used by "Keyboard.dfont" on legacy Mac OS X */ #define TTAG_0xA5kbd FT_MAKE_TAG( 0xA5, 'k', 'b', 'd' ) diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/freetype.cs b/Source/Tools/Flax.Build/Deps/Dependencies/freetype.cs index c2a69cc48..4d0d0857e 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/freetype.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/freetype.cs @@ -70,7 +70,7 @@ namespace Flax.Deps.Dependencies // Get the source if (!File.Exists(packagePath)) - Downloader.DownloadFileFromUrlToPath("https://sourceforge.net/projects/freetype/files/freetype2/2.10.0/ft2100.zip/download", packagePath); + Downloader.DownloadFileFromUrlToPath("https://sourceforge.net/projects/freetype/files/freetype2/2.13.2/ft2132.zip/download", packagePath); using (ZipArchive archive = ZipFile.Open(packagePath, ZipArchiveMode.Read)) { var newRoot = Path.Combine(root, archive.Entries.First().FullName); @@ -99,15 +99,22 @@ namespace Flax.Deps.Dependencies { case TargetPlatform.Windows: { - // Fix the MSVC project settings for Windows - PatchWindowsTargetPlatformVersion(vcxprojPath, vcxprojContents, "8.1", "140"); - - // Build for Win64 - Deploy.VCEnvironment.BuildSolution(vsSolutionPath, configurationMsvc, "x64"); - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); - foreach (var filename in binariesToCopyMsvc) - Utilities.FileCopy(Path.Combine(root, "objs", "x64", configurationMsvc, filename), Path.Combine(depsFolder, filename)); + // Patch the RuntimeLibrary value + File.WriteAllText(vcxprojPath, vcxprojContents); + // Build for Windows + foreach (var architecture in new[] { TargetArchitecture.x64, TargetArchitecture.ARM64 }) + { + Deploy.VCEnvironment.BuildSolution(vsSolutionPath, configurationMsvc, architecture.ToString(), + new Dictionary() { + { "WindowsTargetPlatformVersion", "10.0" }, + { "PlatformToolset", "v143" }, + //{ "RuntimeLibrary", "MultiThreadedDLL" } + }); + var depsFolder = GetThirdPartyFolder(options, platform, architecture); + foreach (var filename in binariesToCopyMsvc) + Utilities.FileCopy(Path.Combine(root, "objs", architecture.ToString(), configurationMsvc, filename), Path.Combine(depsFolder, filename)); + } break; } case TargetPlatform.UWP: @@ -286,18 +293,25 @@ namespace Flax.Deps.Dependencies Utilities.FileCopy(src, dst); } - // Setup headers directory - SetupDirectory(dstIncludePath, true); - - // Deploy header files and restore files - Utilities.DirectoryCopy(srcIncludePath, dstIncludePath, true, true); - Utilities.FileCopy(Path.Combine(root, "include", "ft2build.h"), Path.Combine(dstIncludePath, "ft2build.h")); - Utilities.FileCopy(Path.Combine(root, "docs", "LICENSE.TXT"), Path.Combine(dstIncludePath, "LICENSE.TXT")); - foreach (var filename in filesToKeep) + try { - var src = Path.Combine(options.IntermediateFolder, filename + ".tmp"); - var dst = Path.Combine(dstIncludePath, filename); - Utilities.FileCopy(src, dst); + // Setup headers directory + SetupDirectory(dstIncludePath, true); + + // Deploy header files and restore files + Utilities.DirectoryCopy(srcIncludePath, dstIncludePath, true, true); + Utilities.FileCopy(Path.Combine(root, "include", "ft2build.h"), Path.Combine(dstIncludePath, "ft2build.h")); + Utilities.FileCopy(Path.Combine(root, "LICENSE.TXT"), Path.Combine(dstIncludePath, "LICENSE.TXT")); + Utilities.FileCopy(Path.Combine(root, "docs", "FTL.TXT"), Path.Combine(dstIncludePath, "FTL.TXT")); + } + finally + { + foreach (var filename in filesToKeep) + { + var src = Path.Combine(options.IntermediateFolder, filename + ".tmp"); + var dst = Path.Combine(dstIncludePath, filename); + Utilities.FileCopy(src, dst); + } } } } From 8ab462f72c86d16db0a6f99f565b3f658f533781 Mon Sep 17 00:00:00 2001 From: Ari Vuollet Date: Thu, 9 May 2024 20:22:36 +0300 Subject: [PATCH 113/292] Update curl to 7.88.1 --- Source/ThirdParty/curl/curl License.txt | 2 +- Source/ThirdParty/curl/curl.h | 1288 +++++++++++------ Source/ThirdParty/curl/curlver.h | 26 +- Source/ThirdParty/curl/easy.h | 23 +- Source/ThirdParty/curl/header.h | 74 + Source/ThirdParty/curl/mprintf.h | 14 +- Source/ThirdParty/curl/multi.h | 111 +- Source/ThirdParty/curl/options.h | 70 + Source/ThirdParty/curl/stdcheaders.h | 12 +- Source/ThirdParty/curl/system.h | 117 +- Source/ThirdParty/curl/typecheck-gcc.h | 614 ++++---- Source/ThirdParty/curl/urlapi.h | 41 +- Source/ThirdParty/curl/websockets.h | 84 ++ .../Flax.Build/Deps/Dependencies/curl.cs | 29 +- 14 files changed, 1587 insertions(+), 918 deletions(-) create mode 100644 Source/ThirdParty/curl/header.h create mode 100644 Source/ThirdParty/curl/options.h create mode 100644 Source/ThirdParty/curl/websockets.h diff --git a/Source/ThirdParty/curl/curl License.txt b/Source/ThirdParty/curl/curl License.txt index 3528bd756..d1eab3eb9 100644 --- a/Source/ThirdParty/curl/curl License.txt +++ b/Source/ThirdParty/curl/curl License.txt @@ -1,6 +1,6 @@ COPYRIGHT AND PERMISSION NOTICE -Copyright (c) 1996 - 2019, Daniel Stenberg, , and many +Copyright (c) 1996 - 2023, Daniel Stenberg, , and many contributors, see the THANKS file. All rights reserved. diff --git a/Source/ThirdParty/curl/curl.h b/Source/ThirdParty/curl/curl.h index 86a24184a..8cc0b6ffe 100644 --- a/Source/ThirdParty/curl/curl.h +++ b/Source/ThirdParty/curl/curl.h @@ -1,5 +1,5 @@ -#ifndef __CURL_CURL_H -#define __CURL_CURL_H +#ifndef CURLINC_CURL_H +#define CURLINC_CURL_H /*************************************************************************** * _ _ ____ _ * Project ___| | | | _ \| | @@ -7,11 +7,11 @@ * | (__| |_| | _ <| |___ * \___|\___/|_| \_\_____| * - * Copyright (C) 1998 - 2019, Daniel Stenberg, , et al. + * Copyright (C) Daniel Stenberg, , et al. * * This software is licensed as described in the file COPYING, which * you should have received as part of this distribution. The terms - * are also available at https://curl.haxx.se/docs/copyright.html. + * are also available at https://curl.se/docs/copyright.html. * * You may opt to use, copy, modify, merge, publish, distribute and/or sell * copies of the Software, and permit persons to whom the Software is @@ -20,37 +20,52 @@ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY * KIND, either express or implied. * + * SPDX-License-Identifier: curl + * ***************************************************************************/ /* * If you have libcurl problems, all docs and details are found here: - * https://curl.haxx.se/libcurl/ - * - * curl-library mailing list subscription and unsubscription web interface: - * https://cool.haxx.se/mailman/listinfo/curl-library/ + * https://curl.se/libcurl/ */ #ifdef CURL_NO_OLDIES #define CURL_STRICTER #endif +/* Compile-time deprecation macros. */ +#if defined(__GNUC__) && (__GNUC__ >= 6) && \ + !defined(__INTEL_COMPILER) && \ + !defined(CURL_DISABLE_DEPRECATION) && !defined(BUILDING_LIBCURL) +#define CURL_DEPRECATED(version, message) \ + __attribute__((deprecated("since " # version ". " message))) +#define CURL_IGNORE_DEPRECATION(statements) \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") \ + statements \ + _Pragma("GCC diagnostic pop") +#else +#define CURL_DEPRECATED(version, message) +#define CURL_IGNORE_DEPRECATION(statements) statements +#endif + #include "curlver.h" /* libcurl version defines */ #include "system.h" /* determine things run-time */ /* - * Define WIN32 when build target is Win32 API + * Define CURL_WIN32 when build target is Win32 API */ -#if (defined(_WIN32) || defined(__WIN32__)) && \ - !defined(WIN32) && !defined(__SYMBIAN32__) -#define WIN32 +#if (defined(_WIN32) || defined(__WIN32__) || defined(WIN32)) && \ + !defined(__SYMBIAN32__) +#define CURL_WIN32 #endif #include #include -#if defined(__FreeBSD__) && (__FreeBSD__ >= 2) -/* Needed for __FreeBSD_version symbol definition */ +#if (defined(__FreeBSD__) && (__FreeBSD__ >= 2)) || defined(__MidnightBSD__) +/* Needed for __FreeBSD_version or __MidnightBSD_version symbol definition */ #include #endif @@ -58,7 +73,7 @@ #include #include -#if defined(WIN32) && !defined(_WIN32_WCE) && !defined(__CYGWIN__) +#if defined(CURL_WIN32) && !defined(_WIN32_WCE) && !defined(__CYGWIN__) #if !(defined(_WINSOCKAPI_) || defined(_WINSOCK_H) || \ defined(__LWIP_OPT_H__) || defined(LWIP_HDR_OPT_H)) /* The check above prevents the winsock2 inclusion if winsock.h already was @@ -74,23 +89,21 @@ #if defined(_AIX) || defined(__NOVELL_LIBC__) || defined(__NetBSD__) || \ defined(__minix) || defined(__SYMBIAN32__) || defined(__INTEGRITY) || \ defined(ANDROID) || defined(__ANDROID__) || defined(__OpenBSD__) || \ - defined(__CYGWIN__) || \ - (defined(__FreeBSD_version) && (__FreeBSD_version < 800000)) + defined(__CYGWIN__) || defined(AMIGA) || defined(__NuttX__) || \ + (defined(__FreeBSD_version) && (__FreeBSD_version < 800000)) || \ + (defined(__MidnightBSD_version) && (__MidnightBSD_version < 100000)) || \ + defined(__sun__) || defined(__serenity__) #include #endif -#if !defined(WIN32) && !defined(_WIN32_WCE) +#if !defined(CURL_WIN32) && !defined(_WIN32_WCE) #include #endif -#if !defined(WIN32) && !defined(__WATCOMC__) && !defined(__VXWORKS__) +#if !defined(CURL_WIN32) #include #endif -#ifdef __BEOS__ -#include -#endif - /* Compatibility for non-Clang compilers */ #ifndef __has_declspec_attribute # define __has_declspec_attribute(x) 0 @@ -114,7 +127,7 @@ typedef void CURLSH; #ifdef CURL_STATICLIB # define CURL_EXTERN -#elif defined(WIN32) || defined(_WIN32) || defined(__SYMBIAN32__) || \ +#elif defined(CURL_WIN32) || defined(__SYMBIAN32__) || \ (__has_declspec_attribute(dllexport) && \ __has_declspec_attribute(dllimport)) # if defined(BUILDING_LIBCURL) @@ -130,7 +143,7 @@ typedef void CURLSH; #ifndef curl_socket_typedef /* socket typedef */ -#if defined(WIN32) && !defined(__LWIP_OPT_H__) && !defined(LWIP_HDR_OPT_H) +#if defined(CURL_WIN32) && !defined(__LWIP_OPT_H__) && !defined(LWIP_HDR_OPT_H) typedef SOCKET curl_socket_t; #define CURL_SOCKET_BAD INVALID_SOCKET #else @@ -148,13 +161,15 @@ typedef enum { CURLSSLBACKEND_NSS = 3, CURLSSLBACKEND_OBSOLETE4 = 4, /* Was QSOSSL. */ CURLSSLBACKEND_GSKIT = 5, - CURLSSLBACKEND_POLARSSL = 6, + CURLSSLBACKEND_POLARSSL CURL_DEPRECATED(7.69.0, "") = 6, CURLSSLBACKEND_WOLFSSL = 7, CURLSSLBACKEND_SCHANNEL = 8, CURLSSLBACKEND_SECURETRANSPORT = 9, - CURLSSLBACKEND_AXTLS = 10, /* never used since 7.63.0 */ + CURLSSLBACKEND_AXTLS CURL_DEPRECATED(7.61.0, "") = 10, CURLSSLBACKEND_MBEDTLS = 11, - CURLSSLBACKEND_MESALINK = 12 + CURLSSLBACKEND_MESALINK CURL_DEPRECATED(7.82.0, "") = 12, + CURLSSLBACKEND_BEARSSL = 13, + CURLSSLBACKEND_RUSTLS = 14 } curl_sslbackend; /* aliases for library clones and renames */ @@ -209,16 +224,22 @@ struct curl_httppost { set. Added in 7.46.0 */ }; -/* This is the CURLOPT_PROGRESSFUNCTION callback proto. It is now considered - deprecated but was the only choice up until 7.31.0 */ + +/* This is a return code for the progress callback that, when returned, will + signal libcurl to continue executing the default progress function */ +#define CURL_PROGRESSFUNC_CONTINUE 0x10000001 + +/* This is the CURLOPT_PROGRESSFUNCTION callback prototype. It is now + considered deprecated but was the only choice up until 7.31.0 */ typedef int (*curl_progress_callback)(void *clientp, double dltotal, double dlnow, double ultotal, double ulnow); -/* This is the CURLOPT_XFERINFOFUNCTION callback proto. It was introduced in - 7.32.0, it avoids floating point and provides more detailed information. */ +/* This is the CURLOPT_XFERINFOFUNCTION callback prototype. It was introduced + in 7.32.0, avoids the use of floating point numbers and provides more + detailed information. */ typedef int (*curl_xferinfo_callback)(void *clientp, curl_off_t dltotal, curl_off_t dlnow, @@ -227,7 +248,7 @@ typedef int (*curl_xferinfo_callback)(void *clientp, #ifndef CURL_MAX_READ_SIZE /* The maximum receive buffer size configurable via CURLOPT_BUFFERSIZE. */ -#define CURL_MAX_READ_SIZE 524288 +#define CURL_MAX_READ_SIZE (10*1024*1024) #endif #ifndef CURL_MAX_WRITE_SIZE @@ -251,6 +272,10 @@ typedef int (*curl_xferinfo_callback)(void *clientp, will signal libcurl to pause receiving on the current transfer. */ #define CURL_WRITEFUNC_PAUSE 0x10000001 +/* This is a magic return code for the write callback that, when returned, + will signal an error from the callback. */ +#define CURL_WRITEFUNC_ERROR 0xFFFFFFFF + typedef size_t (*curl_write_callback)(char *buffer, size_t size, size_t nitems, @@ -283,14 +308,11 @@ typedef enum { #define CURLFINFOFLAG_KNOWN_SIZE (1<<6) #define CURLFINFOFLAG_KNOWN_HLINKCOUNT (1<<7) -/* Content of this structure depends on information which is known and is - achievable (e.g. by FTP LIST parsing). Please see the url_easy_setopt(3) man - page for callbacks returning this structure -- some fields are mandatory, - some others are optional. The FLAG field has special meaning. */ +/* Information about a single file, used when doing FTP wildcard matching */ struct curl_fileinfo { char *filename; curlfiletype filetype; - time_t time; + time_t time; /* always zero! */ unsigned int perm; int uid; int gid; @@ -366,7 +388,7 @@ typedef int (*curl_seek_callback)(void *instream, #define CURL_READFUNC_PAUSE 0x10000001 /* Return code for when the trailing headers' callback has terminated - without any errors*/ + without any errors */ #define CURL_TRAILERFUNC_OK 0 /* Return code for when was an error in the trailing header's list and we want to abort the request */ @@ -448,7 +470,7 @@ typedef void *(*curl_calloc_callback)(size_t nmemb, size_t size); #define CURL_DID_MEMORY_FUNC_TYPEDEFS #endif -/* the kind of data that is passed to information_callback*/ +/* the kind of data that is passed to information_callback */ typedef enum { CURLINFO_TEXT = 0, CURLINFO_HEADER_IN, /* 1 */ @@ -467,6 +489,20 @@ typedef int (*curl_debug_callback) size_t size, /* size of the data pointed to */ void *userptr); /* whatever the user please */ +/* This is the CURLOPT_PREREQFUNCTION callback prototype. */ +typedef int (*curl_prereq_callback)(void *clientp, + char *conn_primary_ip, + char *conn_local_ip, + int conn_primary_port, + int conn_local_port); + +/* Return code for when the pre-request callback has terminated without + any errors */ +#define CURL_PREREQFUNC_OK 0 +/* Return code for when the pre-request callback wants to abort the + request */ +#define CURL_PREREQFUNC_ABORT 1 + /* All possible error codes from all sorts of curl functions. Future versions may return other values, stay prepared. @@ -511,10 +547,6 @@ typedef enum { CURLE_UPLOAD_FAILED, /* 25 - failed upload "command" */ CURLE_READ_ERROR, /* 26 - couldn't open/read from file */ CURLE_OUT_OF_MEMORY, /* 27 */ - /* Note: CURLE_OUT_OF_MEMORY may sometimes indicate a conversion error - instead of a memory allocation error if CURL_DOES_CONVERSIONS - is defined - */ CURLE_OPERATION_TIMEDOUT, /* 28 - the timeout time was reached */ CURLE_OBSOLETE29, /* 29 - NOT USED */ CURLE_FTP_PORT_FAILED, /* 30 - FTP PORT operation failed */ @@ -536,7 +568,7 @@ typedef enum { CURLE_OBSOLETE46, /* 46 - NOT USED */ CURLE_TOO_MANY_REDIRECTS, /* 47 - catch endless re-direct loops */ CURLE_UNKNOWN_OPTION, /* 48 - User specified an unknown option */ - CURLE_TELNET_OPTION_SYNTAX, /* 49 - Malformed telnet option */ + CURLE_SETOPT_OPTION_SYNTAX, /* 49 - Malformed setopt option */ CURLE_OBSOLETE50, /* 50 - NOT USED */ CURLE_OBSOLETE51, /* 51 - NOT USED */ CURLE_GOT_NOTHING, /* 52 - when this is a specific error */ @@ -551,7 +583,7 @@ typedef enum { CURLE_PEER_FAILED_VERIFICATION, /* 60 - peer's certificate or fingerprint wasn't verified fine */ CURLE_BAD_CONTENT_ENCODING, /* 61 - Unrecognized/bad encoding */ - CURLE_LDAP_INVALID_URL, /* 62 - Invalid LDAP URL */ + CURLE_OBSOLETE62, /* 62 - NOT IN USE since 7.82.0 */ CURLE_FILESIZE_EXCEEDED, /* 63 - Maximum file size exceeded */ CURLE_USE_SSL_FAILED, /* 64 - Requested FTP SSL level failed */ CURLE_SEND_FAIL_REWIND, /* 65 - Sending the data requires a rewind @@ -566,12 +598,8 @@ typedef enum { CURLE_TFTP_UNKNOWNID, /* 72 - Unknown transfer ID */ CURLE_REMOTE_FILE_EXISTS, /* 73 - File already exists */ CURLE_TFTP_NOSUCHUSER, /* 74 - No such user */ - CURLE_CONV_FAILED, /* 75 - conversion failed */ - CURLE_CONV_REQD, /* 76 - caller must register conversion - callbacks using curl_easy_setopt options - CURLOPT_CONV_FROM_NETWORK_FUNCTION, - CURLOPT_CONV_TO_NETWORK_FUNCTION, and - CURLOPT_CONV_FROM_UTF8_FUNCTION */ + CURLE_OBSOLETE75, /* 75 - NOT IN USE since 7.82.0 */ + CURLE_OBSOLETE76, /* 76 - NOT IN USE since 7.82.0 */ CURLE_SSL_CACERT_BADFILE, /* 77 - could not load CACERT file, missing or wrong format */ CURLE_REMOTE_FILE_NOT_FOUND, /* 78 - remote file not found */ @@ -602,6 +630,13 @@ typedef enum { */ CURLE_RECURSIVE_API_CALL, /* 93 - an api function was called from inside a callback */ + CURLE_AUTH_ERROR, /* 94 - an authentication function returned an + error */ + CURLE_HTTP3, /* 95 - An HTTP/3 layer problem */ + CURLE_QUIC_CONNECT_ERROR, /* 96 - QUIC connection error */ + CURLE_PROXY, /* 97 - proxy handshake error */ + CURLE_SSL_CLIENTCERT, /* 98 - client-side certificate required */ + CURLE_UNRECOVERABLE_POLL, /* 99 - poll/select returned fatal error */ CURL_LAST /* never use! */ } CURLcode; @@ -625,6 +660,9 @@ typedef enum { /* The following were added in 7.21.5, April 2011 */ #define CURLE_UNKNOWN_TELNET_OPTION CURLE_UNKNOWN_OPTION +/* Added for 7.78.0 */ +#define CURLE_TELNET_OPTION_SYNTAX CURLE_SETOPT_OPTION_SYNTAX + /* The following were added in 7.17.1 */ /* These are scheduled to disappear by 2009 */ #define CURLE_SSL_PEER_CERTIFICATE CURLE_PEER_FAILED_VERIFICATION @@ -656,13 +694,14 @@ typedef enum { /* The following were added earlier */ #define CURLE_OPERATION_TIMEOUTED CURLE_OPERATION_TIMEDOUT - #define CURLE_HTTP_NOT_FOUND CURLE_HTTP_RETURNED_ERROR #define CURLE_HTTP_PORT_FAILED CURLE_INTERFACE_FAILED #define CURLE_FTP_COULDNT_STOR_FILE CURLE_UPLOAD_FAILED - #define CURLE_FTP_PARTIAL_FILE CURLE_PARTIAL_FILE #define CURLE_FTP_BAD_DOWNLOAD_RESUME CURLE_BAD_DOWNLOAD_RESUME +#define CURLE_LDAP_INVALID_URL CURLE_OBSOLETE62 +#define CURLE_CONV_REQD CURLE_OBSOLETE76 +#define CURLE_CONV_FAILED CURLE_OBSOLETE75 /* This was the error code 50 in 7.7.3 and a few earlier versions, this is no longer used by libcurl but is instead #defined here only to not @@ -679,14 +718,58 @@ typedef enum { #define CURLOPT_WRITEINFO CURLOPT_OBSOLETE40 #define CURLOPT_CLOSEPOLICY CURLOPT_OBSOLETE72 -#endif /*!CURL_NO_OLDIES*/ +#endif /* !CURL_NO_OLDIES */ + +/* + * Proxy error codes. Returned in CURLINFO_PROXY_ERROR if CURLE_PROXY was + * return for the transfers. + */ +typedef enum { + CURLPX_OK, + CURLPX_BAD_ADDRESS_TYPE, + CURLPX_BAD_VERSION, + CURLPX_CLOSED, + CURLPX_GSSAPI, + CURLPX_GSSAPI_PERMSG, + CURLPX_GSSAPI_PROTECTION, + CURLPX_IDENTD, + CURLPX_IDENTD_DIFFER, + CURLPX_LONG_HOSTNAME, + CURLPX_LONG_PASSWD, + CURLPX_LONG_USER, + CURLPX_NO_AUTH, + CURLPX_RECV_ADDRESS, + CURLPX_RECV_AUTH, + CURLPX_RECV_CONNECT, + CURLPX_RECV_REQACK, + CURLPX_REPLY_ADDRESS_TYPE_NOT_SUPPORTED, + CURLPX_REPLY_COMMAND_NOT_SUPPORTED, + CURLPX_REPLY_CONNECTION_REFUSED, + CURLPX_REPLY_GENERAL_SERVER_FAILURE, + CURLPX_REPLY_HOST_UNREACHABLE, + CURLPX_REPLY_NETWORK_UNREACHABLE, + CURLPX_REPLY_NOT_ALLOWED, + CURLPX_REPLY_TTL_EXPIRED, + CURLPX_REPLY_UNASSIGNED, + CURLPX_REQUEST_FAILED, + CURLPX_RESOLVE_HOST, + CURLPX_SEND_AUTH, + CURLPX_SEND_CONNECT, + CURLPX_SEND_REQUEST, + CURLPX_UNKNOWN_FAIL, + CURLPX_UNKNOWN_MODE, + CURLPX_USER_REJECTED, + CURLPX_LAST /* never use */ +} CURLproxycode; /* This prototype applies to all conversion callbacks */ typedef CURLcode (*curl_conv_callback)(char *buffer, size_t length); typedef CURLcode (*curl_ssl_ctx_callback)(CURL *curl, /* easy handle */ - void *ssl_ctx, /* actually an - OpenSSL SSL_CTX */ + void *ssl_ctx, /* actually an OpenSSL + or WolfSSL SSL_CTX, + or an mbedTLS + mbedtls_ssl_config */ void *userptr); typedef enum { @@ -734,6 +817,7 @@ typedef enum { #define CURLAUTH_DIGEST_IE (((unsigned long)1)<<4) #define CURLAUTH_NTLM_WB (((unsigned long)1)<<5) #define CURLAUTH_BEARER (((unsigned long)1)<<6) +#define CURLAUTH_AWS_SIGV4 (((unsigned long)1)<<7) #define CURLAUTH_ONLY (((unsigned long)1)<<31) #define CURLAUTH_ANY (~CURLAUTH_DIGEST_IE) #define CURLAUTH_ANYSAFE (~(CURLAUTH_BASIC|CURLAUTH_DIGEST_IE)) @@ -764,7 +848,7 @@ enum curl_khtype { }; struct curl_khkey { - const char *key; /* points to a zero-terminated string encoded with base64 + const char *key; /* points to a null-terminated string encoded with base64 if len is zero, otherwise to the "raw" data */ size_t len; enum curl_khtype keytype; @@ -776,9 +860,10 @@ enum curl_khstat { CURLKHSTAT_FINE_ADD_TO_FILE, CURLKHSTAT_FINE, CURLKHSTAT_REJECT, /* reject the connection, return an error */ - CURLKHSTAT_DEFER, /* do not accept it, but we can't answer right now so - this causes a CURLE_DEFER error but otherwise the + CURLKHSTAT_DEFER, /* do not accept it, but we can't answer right now. + Causes a CURLE_PEER_FAILED_VERIFICATION error but the connection will be left intact etc */ + CURLKHSTAT_FINE_REPLACE, /* accept and replace the wrong key */ CURLKHSTAT_LAST /* not for use, only a marker for last-in-list */ }; @@ -795,7 +880,18 @@ typedef int const struct curl_khkey *knownkey, /* known */ const struct curl_khkey *foundkey, /* found */ enum curl_khmatch, /* libcurl's view on the keys */ - void *clientp); /* custom pointer passed from app */ + void *clientp); /* custom pointer passed with */ + /* CURLOPT_SSH_KEYDATA */ + +typedef int + (*curl_sshhostkeycallback) (void *clientp,/* custom pointer passed */ + /* with CURLOPT_SSH_HOSTKEYDATA */ + int keytype, /* CURLKHTYPE */ + const char *key, /* hostkey to check */ + size_t keylen); /* length of the key */ + /* return CURLE_OK to accept */ + /* or something else to refuse */ + /* parameter for the CURLOPT_USE_SSL option */ typedef enum { @@ -819,6 +915,23 @@ typedef enum { SSL backends where such behavior is present. */ #define CURLSSLOPT_NO_REVOKE (1<<1) +/* - NO_PARTIALCHAIN tells libcurl to *NOT* accept a partial certificate chain + if possible. The OpenSSL backend has this ability. */ +#define CURLSSLOPT_NO_PARTIALCHAIN (1<<2) + +/* - REVOKE_BEST_EFFORT tells libcurl to ignore certificate revocation offline + checks and ignore missing revocation list for those SSL backends where such + behavior is present. */ +#define CURLSSLOPT_REVOKE_BEST_EFFORT (1<<3) + +/* - CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of + operating system. Currently implemented under MS-Windows. */ +#define CURLSSLOPT_NATIVE_CA (1<<4) + +/* - CURLSSLOPT_AUTO_CLIENT_CERT tells libcurl to automatically locate and use + a client certificate for authentication. (Schannel) */ +#define CURLSSLOPT_AUTO_CLIENT_CERT (1<<5) + /* The default connection attempt delay in milliseconds for happy eyeballs. CURLOPT_HAPPY_EYEBALLS_TIMEOUT_MS.3 and happy-eyeballs-timeout-ms.d document this value, keep them in sync. */ @@ -839,7 +952,7 @@ typedef enum { #define CURLFTPSSL_ALL CURLUSESSL_ALL #define CURLFTPSSL_LAST CURLUSESSL_LAST #define curl_ftpssl curl_usessl -#endif /*!CURL_NO_OLDIES*/ +#endif /* !CURL_NO_OLDIES */ /* parameter for the CURLOPT_FTP_SSL_CCC option */ typedef enum { @@ -882,14 +995,44 @@ typedef enum { #define CURLHEADER_SEPARATE (1<<0) /* CURLALTSVC_* are bits for the CURLOPT_ALTSVC_CTRL option */ -#define CURLALTSVC_IMMEDIATELY (1<<0) -#define CURLALTSVC_ALTUSED (1<<1) #define CURLALTSVC_READONLYFILE (1<<2) #define CURLALTSVC_H1 (1<<3) #define CURLALTSVC_H2 (1<<4) #define CURLALTSVC_H3 (1<<5) -/* CURLPROTO_ defines are for the CURLOPT_*PROTOCOLS options */ + +struct curl_hstsentry { + char *name; + size_t namelen; + unsigned int includeSubDomains:1; + char expire[18]; /* YYYYMMDD HH:MM:SS [null-terminated] */ +}; + +struct curl_index { + size_t index; /* the provided entry's "index" or count */ + size_t total; /* total number of entries to save */ +}; + +typedef enum { + CURLSTS_OK, + CURLSTS_DONE, + CURLSTS_FAIL +} CURLSTScode; + +typedef CURLSTScode (*curl_hstsread_callback)(CURL *easy, + struct curl_hstsentry *e, + void *userp); +typedef CURLSTScode (*curl_hstswrite_callback)(CURL *easy, + struct curl_hstsentry *e, + struct curl_index *i, + void *userp); + +/* CURLHSTS_* are bits for the CURLOPT_HSTS option */ +#define CURLHSTS_ENABLE (long)(1<<0) +#define CURLHSTS_READONLYFILE (long)(1<<1) + +/* The CURLPROTO_ defines below are for the **deprecated** CURLOPT_*PROTOCOLS + options. Do not use. */ #define CURLPROTO_HTTP (1<<0) #define CURLPROTO_HTTPS (1<<1) #define CURLPROTO_FTP (1<<2) @@ -918,87 +1061,86 @@ typedef enum { #define CURLPROTO_GOPHER (1<<25) #define CURLPROTO_SMB (1<<26) #define CURLPROTO_SMBS (1<<27) +#define CURLPROTO_MQTT (1<<28) +#define CURLPROTO_GOPHERS (1<<29) #define CURLPROTO_ALL (~0) /* enable everything */ /* long may be 32 or 64 bits, but we should never depend on anything else but 32 */ #define CURLOPTTYPE_LONG 0 #define CURLOPTTYPE_OBJECTPOINT 10000 -#define CURLOPTTYPE_STRINGPOINT 10000 #define CURLOPTTYPE_FUNCTIONPOINT 20000 #define CURLOPTTYPE_OFF_T 30000 +#define CURLOPTTYPE_BLOB 40000 /* *STRINGPOINT is an alias for OBJECTPOINT to allow tools to extract the string options from the header file */ -/* name is uppercase CURLOPT_, - type is one of the defined CURLOPTTYPE_ - number is unique identifier */ -#ifdef CINIT -#undef CINIT -#endif -#ifdef CURL_ISOCPP -#define CINIT(na,t,nu) CURLOPT_ ## na = CURLOPTTYPE_ ## t + nu -#else -/* The macro "##" is ISO C, we assume pre-ISO C doesn't support it. */ -#define LONG CURLOPTTYPE_LONG -#define OBJECTPOINT CURLOPTTYPE_OBJECTPOINT -#define STRINGPOINT CURLOPTTYPE_OBJECTPOINT -#define FUNCTIONPOINT CURLOPTTYPE_FUNCTIONPOINT -#define OFF_T CURLOPTTYPE_OFF_T -#define CINIT(name,type,number) CURLOPT_/**/name = type + number -#endif +#define CURLOPT(na,t,nu) na = t + nu +#define CURLOPTDEPRECATED(na,t,nu,v,m) na CURL_DEPRECATED(v,m) = t + nu + +/* CURLOPT aliases that make no run-time difference */ + +/* 'char *' argument to a string with a trailing zero */ +#define CURLOPTTYPE_STRINGPOINT CURLOPTTYPE_OBJECTPOINT + +/* 'struct curl_slist *' argument */ +#define CURLOPTTYPE_SLISTPOINT CURLOPTTYPE_OBJECTPOINT + +/* 'void *' argument passed untouched to callback */ +#define CURLOPTTYPE_CBPOINT CURLOPTTYPE_OBJECTPOINT + +/* 'long' argument with a set of values/bitmask */ +#define CURLOPTTYPE_VALUES CURLOPTTYPE_LONG /* - * This macro-mania below setups the CURLOPT_[what] enum, to be used with - * curl_easy_setopt(). The first argument in the CINIT() macro is the [what] - * word. + * All CURLOPT_* values. */ typedef enum { /* This is the FILE * or void * the regular output should be written to. */ - CINIT(WRITEDATA, OBJECTPOINT, 1), + CURLOPT(CURLOPT_WRITEDATA, CURLOPTTYPE_CBPOINT, 1), /* The full URL to get/put */ - CINIT(URL, STRINGPOINT, 2), + CURLOPT(CURLOPT_URL, CURLOPTTYPE_STRINGPOINT, 2), /* Port number to connect to, if other than default. */ - CINIT(PORT, LONG, 3), + CURLOPT(CURLOPT_PORT, CURLOPTTYPE_LONG, 3), /* Name of proxy to use. */ - CINIT(PROXY, STRINGPOINT, 4), + CURLOPT(CURLOPT_PROXY, CURLOPTTYPE_STRINGPOINT, 4), /* "user:password;options" to use when fetching. */ - CINIT(USERPWD, STRINGPOINT, 5), + CURLOPT(CURLOPT_USERPWD, CURLOPTTYPE_STRINGPOINT, 5), /* "user:password" to use with proxy. */ - CINIT(PROXYUSERPWD, STRINGPOINT, 6), + CURLOPT(CURLOPT_PROXYUSERPWD, CURLOPTTYPE_STRINGPOINT, 6), /* Range to get, specified as an ASCII string. */ - CINIT(RANGE, STRINGPOINT, 7), + CURLOPT(CURLOPT_RANGE, CURLOPTTYPE_STRINGPOINT, 7), /* not used */ /* Specified file stream to upload from (use as input): */ - CINIT(READDATA, OBJECTPOINT, 9), + CURLOPT(CURLOPT_READDATA, CURLOPTTYPE_CBPOINT, 9), /* Buffer to receive error messages in, must be at least CURL_ERROR_SIZE * bytes big. */ - CINIT(ERRORBUFFER, OBJECTPOINT, 10), + CURLOPT(CURLOPT_ERRORBUFFER, CURLOPTTYPE_OBJECTPOINT, 10), /* Function that will be called to store the output (instead of fwrite). The * parameters will use fwrite() syntax, make sure to follow them. */ - CINIT(WRITEFUNCTION, FUNCTIONPOINT, 11), + CURLOPT(CURLOPT_WRITEFUNCTION, CURLOPTTYPE_FUNCTIONPOINT, 11), /* Function that will be called to read the input (instead of fread). The * parameters will use fread() syntax, make sure to follow them. */ - CINIT(READFUNCTION, FUNCTIONPOINT, 12), + CURLOPT(CURLOPT_READFUNCTION, CURLOPTTYPE_FUNCTIONPOINT, 12), /* Time-out the read operation after this amount of seconds */ - CINIT(TIMEOUT, LONG, 13), + CURLOPT(CURLOPT_TIMEOUT, CURLOPTTYPE_LONG, 13), - /* If the CURLOPT_INFILE is used, this can be used to inform libcurl about + /* If CURLOPT_READDATA is used, this can be used to inform libcurl about * how large the file being sent really is. That allows better error * checking and better verifies that the upload was successful. -1 means * unknown size. @@ -1007,20 +1149,20 @@ typedef enum { * which takes an off_t type, allowing platforms with larger off_t * sizes to handle larger files. See below for INFILESIZE_LARGE. */ - CINIT(INFILESIZE, LONG, 14), + CURLOPT(CURLOPT_INFILESIZE, CURLOPTTYPE_LONG, 14), /* POST static input fields. */ - CINIT(POSTFIELDS, OBJECTPOINT, 15), + CURLOPT(CURLOPT_POSTFIELDS, CURLOPTTYPE_OBJECTPOINT, 15), /* Set the referrer page (needed by some CGIs) */ - CINIT(REFERER, STRINGPOINT, 16), + CURLOPT(CURLOPT_REFERER, CURLOPTTYPE_STRINGPOINT, 16), /* Set the FTP PORT string (interface name, named or numerical IP address) Use i.e '-' to use default address. */ - CINIT(FTPPORT, STRINGPOINT, 17), + CURLOPT(CURLOPT_FTPPORT, CURLOPTTYPE_STRINGPOINT, 17), /* Set the User-Agent string (examined by some CGIs) */ - CINIT(USERAGENT, STRINGPOINT, 18), + CURLOPT(CURLOPT_USERAGENT, CURLOPTTYPE_STRINGPOINT, 18), /* If the download receives less than "low speed limit" bytes/second * during "low speed time" seconds, the operations is aborted. @@ -1029,10 +1171,10 @@ typedef enum { */ /* Set the "low speed limit" */ - CINIT(LOW_SPEED_LIMIT, LONG, 19), + CURLOPT(CURLOPT_LOW_SPEED_LIMIT, CURLOPTTYPE_LONG, 19), /* Set the "low speed time" */ - CINIT(LOW_SPEED_TIME, LONG, 20), + CURLOPT(CURLOPT_LOW_SPEED_TIME, CURLOPTTYPE_LONG, 20), /* Set the continuation offset. * @@ -1040,48 +1182,49 @@ typedef enum { * off_t types, allowing for large file offsets on platforms which * use larger-than-32-bit off_t's. Look below for RESUME_FROM_LARGE. */ - CINIT(RESUME_FROM, LONG, 21), + CURLOPT(CURLOPT_RESUME_FROM, CURLOPTTYPE_LONG, 21), /* Set cookie in request: */ - CINIT(COOKIE, STRINGPOINT, 22), + CURLOPT(CURLOPT_COOKIE, CURLOPTTYPE_STRINGPOINT, 22), /* This points to a linked list of headers, struct curl_slist kind. This list is also used for RTSP (in spite of its name) */ - CINIT(HTTPHEADER, OBJECTPOINT, 23), + CURLOPT(CURLOPT_HTTPHEADER, CURLOPTTYPE_SLISTPOINT, 23), /* This points to a linked list of post entries, struct curl_httppost */ - CINIT(HTTPPOST, OBJECTPOINT, 24), + CURLOPTDEPRECATED(CURLOPT_HTTPPOST, CURLOPTTYPE_OBJECTPOINT, 24, + 7.56.0, "Use CURLOPT_MIMEPOST"), /* name of the file keeping your private SSL-certificate */ - CINIT(SSLCERT, STRINGPOINT, 25), + CURLOPT(CURLOPT_SSLCERT, CURLOPTTYPE_STRINGPOINT, 25), /* password for the SSL or SSH private key */ - CINIT(KEYPASSWD, STRINGPOINT, 26), + CURLOPT(CURLOPT_KEYPASSWD, CURLOPTTYPE_STRINGPOINT, 26), /* send TYPE parameter? */ - CINIT(CRLF, LONG, 27), + CURLOPT(CURLOPT_CRLF, CURLOPTTYPE_LONG, 27), /* send linked-list of QUOTE commands */ - CINIT(QUOTE, OBJECTPOINT, 28), + CURLOPT(CURLOPT_QUOTE, CURLOPTTYPE_SLISTPOINT, 28), /* send FILE * or void * to store headers to, if you use a callback it is simply passed to the callback unmodified */ - CINIT(HEADERDATA, OBJECTPOINT, 29), + CURLOPT(CURLOPT_HEADERDATA, CURLOPTTYPE_CBPOINT, 29), /* point to a file to read the initial cookies from, also enables "cookie awareness" */ - CINIT(COOKIEFILE, STRINGPOINT, 31), + CURLOPT(CURLOPT_COOKIEFILE, CURLOPTTYPE_STRINGPOINT, 31), /* What version to specifically try to use. See CURL_SSLVERSION defines below. */ - CINIT(SSLVERSION, LONG, 32), + CURLOPT(CURLOPT_SSLVERSION, CURLOPTTYPE_VALUES, 32), /* What kind of HTTP time condition to use, see defines */ - CINIT(TIMECONDITION, LONG, 33), + CURLOPT(CURLOPT_TIMECONDITION, CURLOPTTYPE_VALUES, 33), /* Time to use with the above condition. Specified in number of seconds since 1 Jan 1970 */ - CINIT(TIMEVALUE, LONG, 34), + CURLOPT(CURLOPT_TIMEVALUE, CURLOPTTYPE_LONG, 34), /* 35 = OBSOLETE */ @@ -1089,37 +1232,59 @@ typedef enum { HTTP: DELETE, TRACE and others FTP: to use a different list command */ - CINIT(CUSTOMREQUEST, STRINGPOINT, 36), + CURLOPT(CURLOPT_CUSTOMREQUEST, CURLOPTTYPE_STRINGPOINT, 36), /* FILE handle to use instead of stderr */ - CINIT(STDERR, OBJECTPOINT, 37), + CURLOPT(CURLOPT_STDERR, CURLOPTTYPE_OBJECTPOINT, 37), /* 38 is not used */ /* send linked-list of post-transfer QUOTE commands */ - CINIT(POSTQUOTE, OBJECTPOINT, 39), + CURLOPT(CURLOPT_POSTQUOTE, CURLOPTTYPE_SLISTPOINT, 39), - CINIT(OBSOLETE40, OBJECTPOINT, 40), /* OBSOLETE, do not use! */ + /* OBSOLETE, do not use! */ + CURLOPT(CURLOPT_OBSOLETE40, CURLOPTTYPE_OBJECTPOINT, 40), - CINIT(VERBOSE, LONG, 41), /* talk a lot */ - CINIT(HEADER, LONG, 42), /* throw the header out too */ - CINIT(NOPROGRESS, LONG, 43), /* shut off the progress meter */ - CINIT(NOBODY, LONG, 44), /* use HEAD to get http document */ - CINIT(FAILONERROR, LONG, 45), /* no output on http error codes >= 400 */ - CINIT(UPLOAD, LONG, 46), /* this is an upload */ - CINIT(POST, LONG, 47), /* HTTP POST method */ - CINIT(DIRLISTONLY, LONG, 48), /* bare names when listing directories */ + /* talk a lot */ + CURLOPT(CURLOPT_VERBOSE, CURLOPTTYPE_LONG, 41), - CINIT(APPEND, LONG, 50), /* Append instead of overwrite on upload! */ + /* throw the header out too */ + CURLOPT(CURLOPT_HEADER, CURLOPTTYPE_LONG, 42), + + /* shut off the progress meter */ + CURLOPT(CURLOPT_NOPROGRESS, CURLOPTTYPE_LONG, 43), + + /* use HEAD to get http document */ + CURLOPT(CURLOPT_NOBODY, CURLOPTTYPE_LONG, 44), + + /* no output on http error codes >= 400 */ + CURLOPT(CURLOPT_FAILONERROR, CURLOPTTYPE_LONG, 45), + + /* this is an upload */ + CURLOPT(CURLOPT_UPLOAD, CURLOPTTYPE_LONG, 46), + + /* HTTP POST method */ + CURLOPT(CURLOPT_POST, CURLOPTTYPE_LONG, 47), + + /* bare names when listing directories */ + CURLOPT(CURLOPT_DIRLISTONLY, CURLOPTTYPE_LONG, 48), + + /* Append instead of overwrite on upload! */ + CURLOPT(CURLOPT_APPEND, CURLOPTTYPE_LONG, 50), /* Specify whether to read the user+password from the .netrc or the URL. * This must be one of the CURL_NETRC_* enums below. */ - CINIT(NETRC, LONG, 51), + CURLOPT(CURLOPT_NETRC, CURLOPTTYPE_VALUES, 51), - CINIT(FOLLOWLOCATION, LONG, 52), /* use Location: Luke! */ + /* use Location: Luke! */ + CURLOPT(CURLOPT_FOLLOWLOCATION, CURLOPTTYPE_LONG, 52), - CINIT(TRANSFERTEXT, LONG, 53), /* transfer data in text/ASCII format */ - CINIT(PUT, LONG, 54), /* HTTP PUT */ + /* transfer data in text/ASCII format */ + CURLOPT(CURLOPT_TRANSFERTEXT, CURLOPTTYPE_LONG, 53), + + /* HTTP PUT */ + CURLOPTDEPRECATED(CURLOPT_PUT, CURLOPTTYPE_LONG, 54, + 7.12.1, "Use CURLOPT_UPLOAD"), /* 55 = OBSOLETE */ @@ -1127,265 +1292,270 @@ typedef enum { * Function that will be called instead of the internal progress display * function. This function should be defined as the curl_progress_callback * prototype defines. */ - CINIT(PROGRESSFUNCTION, FUNCTIONPOINT, 56), + CURLOPTDEPRECATED(CURLOPT_PROGRESSFUNCTION, CURLOPTTYPE_FUNCTIONPOINT, 56, + 7.32.0, "Use CURLOPT_XFERINFOFUNCTION"), /* Data passed to the CURLOPT_PROGRESSFUNCTION and CURLOPT_XFERINFOFUNCTION callbacks */ - CINIT(PROGRESSDATA, OBJECTPOINT, 57), -#define CURLOPT_XFERINFODATA CURLOPT_PROGRESSDATA + CURLOPT(CURLOPT_XFERINFODATA, CURLOPTTYPE_CBPOINT, 57), +#define CURLOPT_PROGRESSDATA CURLOPT_XFERINFODATA /* We want the referrer field set automatically when following locations */ - CINIT(AUTOREFERER, LONG, 58), + CURLOPT(CURLOPT_AUTOREFERER, CURLOPTTYPE_LONG, 58), /* Port of the proxy, can be set in the proxy string as well with: "[host]:[port]" */ - CINIT(PROXYPORT, LONG, 59), + CURLOPT(CURLOPT_PROXYPORT, CURLOPTTYPE_LONG, 59), /* size of the POST input data, if strlen() is not good to use */ - CINIT(POSTFIELDSIZE, LONG, 60), + CURLOPT(CURLOPT_POSTFIELDSIZE, CURLOPTTYPE_LONG, 60), - /* tunnel non-http operations through a HTTP proxy */ - CINIT(HTTPPROXYTUNNEL, LONG, 61), + /* tunnel non-http operations through an HTTP proxy */ + CURLOPT(CURLOPT_HTTPPROXYTUNNEL, CURLOPTTYPE_LONG, 61), /* Set the interface string to use as outgoing network interface */ - CINIT(INTERFACE, STRINGPOINT, 62), + CURLOPT(CURLOPT_INTERFACE, CURLOPTTYPE_STRINGPOINT, 62), /* Set the krb4/5 security level, this also enables krb4/5 awareness. This * is a string, 'clear', 'safe', 'confidential' or 'private'. If the string * is set but doesn't match one of these, 'private' will be used. */ - CINIT(KRBLEVEL, STRINGPOINT, 63), + CURLOPT(CURLOPT_KRBLEVEL, CURLOPTTYPE_STRINGPOINT, 63), /* Set if we should verify the peer in ssl handshake, set 1 to verify. */ - CINIT(SSL_VERIFYPEER, LONG, 64), + CURLOPT(CURLOPT_SSL_VERIFYPEER, CURLOPTTYPE_LONG, 64), /* The CApath or CAfile used to validate the peer certificate this option is used only if SSL_VERIFYPEER is true */ - CINIT(CAINFO, STRINGPOINT, 65), + CURLOPT(CURLOPT_CAINFO, CURLOPTTYPE_STRINGPOINT, 65), /* 66 = OBSOLETE */ /* 67 = OBSOLETE */ /* Maximum number of http redirects to follow */ - CINIT(MAXREDIRS, LONG, 68), + CURLOPT(CURLOPT_MAXREDIRS, CURLOPTTYPE_LONG, 68), /* Pass a long set to 1 to get the date of the requested document (if possible)! Pass a zero to shut it off. */ - CINIT(FILETIME, LONG, 69), + CURLOPT(CURLOPT_FILETIME, CURLOPTTYPE_LONG, 69), /* This points to a linked list of telnet options */ - CINIT(TELNETOPTIONS, OBJECTPOINT, 70), + CURLOPT(CURLOPT_TELNETOPTIONS, CURLOPTTYPE_SLISTPOINT, 70), /* Max amount of cached alive connections */ - CINIT(MAXCONNECTS, LONG, 71), + CURLOPT(CURLOPT_MAXCONNECTS, CURLOPTTYPE_LONG, 71), - CINIT(OBSOLETE72, LONG, 72), /* OBSOLETE, do not use! */ + /* OBSOLETE, do not use! */ + CURLOPT(CURLOPT_OBSOLETE72, CURLOPTTYPE_LONG, 72), /* 73 = OBSOLETE */ /* Set to explicitly use a new connection for the upcoming transfer. Do not use this unless you're absolutely sure of this, as it makes the operation slower and is less friendly for the network. */ - CINIT(FRESH_CONNECT, LONG, 74), + CURLOPT(CURLOPT_FRESH_CONNECT, CURLOPTTYPE_LONG, 74), /* Set to explicitly forbid the upcoming transfer's connection to be re-used when done. Do not use this unless you're absolutely sure of this, as it makes the operation slower and is less friendly for the network. */ - CINIT(FORBID_REUSE, LONG, 75), + CURLOPT(CURLOPT_FORBID_REUSE, CURLOPTTYPE_LONG, 75), /* Set to a file name that contains random data for libcurl to use to seed the random engine when doing SSL connects. */ - CINIT(RANDOM_FILE, STRINGPOINT, 76), + CURLOPTDEPRECATED(CURLOPT_RANDOM_FILE, CURLOPTTYPE_STRINGPOINT, 76, + 7.84.0, "Serves no purpose anymore"), /* Set to the Entropy Gathering Daemon socket pathname */ - CINIT(EGDSOCKET, STRINGPOINT, 77), + CURLOPTDEPRECATED(CURLOPT_EGDSOCKET, CURLOPTTYPE_STRINGPOINT, 77, + 7.84.0, "Serves no purpose anymore"), /* Time-out connect operations after this amount of seconds, if connects are OK within this time, then fine... This only aborts the connect phase. */ - CINIT(CONNECTTIMEOUT, LONG, 78), + CURLOPT(CURLOPT_CONNECTTIMEOUT, CURLOPTTYPE_LONG, 78), /* Function that will be called to store headers (instead of fwrite). The * parameters will use fwrite() syntax, make sure to follow them. */ - CINIT(HEADERFUNCTION, FUNCTIONPOINT, 79), + CURLOPT(CURLOPT_HEADERFUNCTION, CURLOPTTYPE_FUNCTIONPOINT, 79), /* Set this to force the HTTP request to get back to GET. Only really usable if POST, PUT or a custom request have been used first. */ - CINIT(HTTPGET, LONG, 80), + CURLOPT(CURLOPT_HTTPGET, CURLOPTTYPE_LONG, 80), /* Set if we should verify the Common name from the peer certificate in ssl * handshake, set 1 to check existence, 2 to ensure that it matches the * provided hostname. */ - CINIT(SSL_VERIFYHOST, LONG, 81), + CURLOPT(CURLOPT_SSL_VERIFYHOST, CURLOPTTYPE_LONG, 81), /* Specify which file name to write all known cookies in after completed operation. Set file name to "-" (dash) to make it go to stdout. */ - CINIT(COOKIEJAR, STRINGPOINT, 82), + CURLOPT(CURLOPT_COOKIEJAR, CURLOPTTYPE_STRINGPOINT, 82), /* Specify which SSL ciphers to use */ - CINIT(SSL_CIPHER_LIST, STRINGPOINT, 83), + CURLOPT(CURLOPT_SSL_CIPHER_LIST, CURLOPTTYPE_STRINGPOINT, 83), /* Specify which HTTP version to use! This must be set to one of the CURL_HTTP_VERSION* enums set below. */ - CINIT(HTTP_VERSION, LONG, 84), + CURLOPT(CURLOPT_HTTP_VERSION, CURLOPTTYPE_VALUES, 84), /* Specifically switch on or off the FTP engine's use of the EPSV command. By default, that one will always be attempted before the more traditional PASV command. */ - CINIT(FTP_USE_EPSV, LONG, 85), + CURLOPT(CURLOPT_FTP_USE_EPSV, CURLOPTTYPE_LONG, 85), /* type of the file keeping your SSL-certificate ("DER", "PEM", "ENG") */ - CINIT(SSLCERTTYPE, STRINGPOINT, 86), + CURLOPT(CURLOPT_SSLCERTTYPE, CURLOPTTYPE_STRINGPOINT, 86), /* name of the file keeping your private SSL-key */ - CINIT(SSLKEY, STRINGPOINT, 87), + CURLOPT(CURLOPT_SSLKEY, CURLOPTTYPE_STRINGPOINT, 87), /* type of the file keeping your private SSL-key ("DER", "PEM", "ENG") */ - CINIT(SSLKEYTYPE, STRINGPOINT, 88), + CURLOPT(CURLOPT_SSLKEYTYPE, CURLOPTTYPE_STRINGPOINT, 88), /* crypto engine for the SSL-sub system */ - CINIT(SSLENGINE, STRINGPOINT, 89), + CURLOPT(CURLOPT_SSLENGINE, CURLOPTTYPE_STRINGPOINT, 89), /* set the crypto engine for the SSL-sub system as default the param has no meaning... */ - CINIT(SSLENGINE_DEFAULT, LONG, 90), + CURLOPT(CURLOPT_SSLENGINE_DEFAULT, CURLOPTTYPE_LONG, 90), /* Non-zero value means to use the global dns cache */ - CINIT(DNS_USE_GLOBAL_CACHE, LONG, 91), /* DEPRECATED, do not use! */ + /* DEPRECATED, do not use! */ + CURLOPTDEPRECATED(CURLOPT_DNS_USE_GLOBAL_CACHE, CURLOPTTYPE_LONG, 91, + 7.11.1, "Use CURLOPT_SHARE"), /* DNS cache timeout */ - CINIT(DNS_CACHE_TIMEOUT, LONG, 92), + CURLOPT(CURLOPT_DNS_CACHE_TIMEOUT, CURLOPTTYPE_LONG, 92), /* send linked-list of pre-transfer QUOTE commands */ - CINIT(PREQUOTE, OBJECTPOINT, 93), + CURLOPT(CURLOPT_PREQUOTE, CURLOPTTYPE_SLISTPOINT, 93), /* set the debug function */ - CINIT(DEBUGFUNCTION, FUNCTIONPOINT, 94), + CURLOPT(CURLOPT_DEBUGFUNCTION, CURLOPTTYPE_FUNCTIONPOINT, 94), /* set the data for the debug function */ - CINIT(DEBUGDATA, OBJECTPOINT, 95), + CURLOPT(CURLOPT_DEBUGDATA, CURLOPTTYPE_CBPOINT, 95), /* mark this as start of a cookie session */ - CINIT(COOKIESESSION, LONG, 96), + CURLOPT(CURLOPT_COOKIESESSION, CURLOPTTYPE_LONG, 96), /* The CApath directory used to validate the peer certificate this option is used only if SSL_VERIFYPEER is true */ - CINIT(CAPATH, STRINGPOINT, 97), + CURLOPT(CURLOPT_CAPATH, CURLOPTTYPE_STRINGPOINT, 97), /* Instruct libcurl to use a smaller receive buffer */ - CINIT(BUFFERSIZE, LONG, 98), + CURLOPT(CURLOPT_BUFFERSIZE, CURLOPTTYPE_LONG, 98), /* Instruct libcurl to not use any signal/alarm handlers, even when using timeouts. This option is useful for multi-threaded applications. See libcurl-the-guide for more background information. */ - CINIT(NOSIGNAL, LONG, 99), + CURLOPT(CURLOPT_NOSIGNAL, CURLOPTTYPE_LONG, 99), /* Provide a CURLShare for mutexing non-ts data */ - CINIT(SHARE, OBJECTPOINT, 100), + CURLOPT(CURLOPT_SHARE, CURLOPTTYPE_OBJECTPOINT, 100), /* indicates type of proxy. accepted values are CURLPROXY_HTTP (default), CURLPROXY_HTTPS, CURLPROXY_SOCKS4, CURLPROXY_SOCKS4A and CURLPROXY_SOCKS5. */ - CINIT(PROXYTYPE, LONG, 101), + CURLOPT(CURLOPT_PROXYTYPE, CURLOPTTYPE_VALUES, 101), /* Set the Accept-Encoding string. Use this to tell a server you would like the response to be compressed. Before 7.21.6, this was known as CURLOPT_ENCODING */ - CINIT(ACCEPT_ENCODING, STRINGPOINT, 102), + CURLOPT(CURLOPT_ACCEPT_ENCODING, CURLOPTTYPE_STRINGPOINT, 102), /* Set pointer to private data */ - CINIT(PRIVATE, OBJECTPOINT, 103), + CURLOPT(CURLOPT_PRIVATE, CURLOPTTYPE_OBJECTPOINT, 103), /* Set aliases for HTTP 200 in the HTTP Response header */ - CINIT(HTTP200ALIASES, OBJECTPOINT, 104), + CURLOPT(CURLOPT_HTTP200ALIASES, CURLOPTTYPE_SLISTPOINT, 104), /* Continue to send authentication (user+password) when following locations, even when hostname changed. This can potentially send off the name and password to whatever host the server decides. */ - CINIT(UNRESTRICTED_AUTH, LONG, 105), + CURLOPT(CURLOPT_UNRESTRICTED_AUTH, CURLOPTTYPE_LONG, 105), /* Specifically switch on or off the FTP engine's use of the EPRT command ( it also disables the LPRT attempt). By default, those ones will always be attempted before the good old traditional PORT command. */ - CINIT(FTP_USE_EPRT, LONG, 106), + CURLOPT(CURLOPT_FTP_USE_EPRT, CURLOPTTYPE_LONG, 106), /* Set this to a bitmask value to enable the particular authentications methods you like. Use this in combination with CURLOPT_USERPWD. Note that setting multiple bits may cause extra network round-trips. */ - CINIT(HTTPAUTH, LONG, 107), + CURLOPT(CURLOPT_HTTPAUTH, CURLOPTTYPE_VALUES, 107), - /* Set the ssl context callback function, currently only for OpenSSL ssl_ctx - in second argument. The function must be matching the - curl_ssl_ctx_callback proto. */ - CINIT(SSL_CTX_FUNCTION, FUNCTIONPOINT, 108), + /* Set the ssl context callback function, currently only for OpenSSL or + WolfSSL ssl_ctx, or mbedTLS mbedtls_ssl_config in the second argument. + The function must match the curl_ssl_ctx_callback prototype. */ + CURLOPT(CURLOPT_SSL_CTX_FUNCTION, CURLOPTTYPE_FUNCTIONPOINT, 108), /* Set the userdata for the ssl context callback function's third argument */ - CINIT(SSL_CTX_DATA, OBJECTPOINT, 109), + CURLOPT(CURLOPT_SSL_CTX_DATA, CURLOPTTYPE_CBPOINT, 109), /* FTP Option that causes missing dirs to be created on the remote server. In 7.19.4 we introduced the convenience enums for this option using the CURLFTP_CREATE_DIR prefix. */ - CINIT(FTP_CREATE_MISSING_DIRS, LONG, 110), + CURLOPT(CURLOPT_FTP_CREATE_MISSING_DIRS, CURLOPTTYPE_LONG, 110), /* Set this to a bitmask value to enable the particular authentications methods you like. Use this in combination with CURLOPT_PROXYUSERPWD. Note that setting multiple bits may cause extra network round-trips. */ - CINIT(PROXYAUTH, LONG, 111), + CURLOPT(CURLOPT_PROXYAUTH, CURLOPTTYPE_VALUES, 111), - /* FTP option that changes the timeout, in seconds, associated with - getting a response. This is different from transfer timeout time and - essentially places a demand on the FTP server to acknowledge commands - in a timely manner. */ - CINIT(FTP_RESPONSE_TIMEOUT, LONG, 112), -#define CURLOPT_SERVER_RESPONSE_TIMEOUT CURLOPT_FTP_RESPONSE_TIMEOUT + /* Option that changes the timeout, in seconds, associated with getting a + response. This is different from transfer timeout time and essentially + places a demand on the server to acknowledge commands in a timely + manner. For FTP, SMTP, IMAP and POP3. */ + CURLOPT(CURLOPT_SERVER_RESPONSE_TIMEOUT, CURLOPTTYPE_LONG, 112), /* Set this option to one of the CURL_IPRESOLVE_* defines (see below) to - tell libcurl to resolve names to those IP versions only. This only has - affect on systems with support for more than one, i.e IPv4 _and_ IPv6. */ - CINIT(IPRESOLVE, LONG, 113), + tell libcurl to use those IP versions only. This only has effect on + systems with support for more than one, i.e IPv4 _and_ IPv6. */ + CURLOPT(CURLOPT_IPRESOLVE, CURLOPTTYPE_VALUES, 113), /* Set this option to limit the size of a file that will be downloaded from an HTTP or FTP server. Note there is also _LARGE version which adds large file support for platforms which have larger off_t sizes. See MAXFILESIZE_LARGE below. */ - CINIT(MAXFILESIZE, LONG, 114), + CURLOPT(CURLOPT_MAXFILESIZE, CURLOPTTYPE_LONG, 114), /* See the comment for INFILESIZE above, but in short, specifies * the size of the file being uploaded. -1 means unknown. */ - CINIT(INFILESIZE_LARGE, OFF_T, 115), + CURLOPT(CURLOPT_INFILESIZE_LARGE, CURLOPTTYPE_OFF_T, 115), - /* Sets the continuation offset. There is also a LONG version of this; - * look above for RESUME_FROM. + /* Sets the continuation offset. There is also a CURLOPTTYPE_LONG version + * of this; look above for RESUME_FROM. */ - CINIT(RESUME_FROM_LARGE, OFF_T, 116), + CURLOPT(CURLOPT_RESUME_FROM_LARGE, CURLOPTTYPE_OFF_T, 116), /* Sets the maximum size of data that will be downloaded from * an HTTP or FTP server. See MAXFILESIZE above for the LONG version. */ - CINIT(MAXFILESIZE_LARGE, OFF_T, 117), + CURLOPT(CURLOPT_MAXFILESIZE_LARGE, CURLOPTTYPE_OFF_T, 117), /* Set this option to the file name of your .netrc file you want libcurl to parse (using the CURLOPT_NETRC option). If not set, libcurl will do a poor attempt to find the user's home directory and check for a .netrc file in there. */ - CINIT(NETRC_FILE, STRINGPOINT, 118), + CURLOPT(CURLOPT_NETRC_FILE, CURLOPTTYPE_STRINGPOINT, 118), /* Enable SSL/TLS for FTP, pick one of: CURLUSESSL_TRY - try using SSL, proceed anyway otherwise CURLUSESSL_CONTROL - SSL for the control connection or fail CURLUSESSL_ALL - SSL for all communication or fail */ - CINIT(USE_SSL, LONG, 119), + CURLOPT(CURLOPT_USE_SSL, CURLOPTTYPE_VALUES, 119), /* The _LARGE version of the standard POSTFIELDSIZE option */ - CINIT(POSTFIELDSIZE_LARGE, OFF_T, 120), + CURLOPT(CURLOPT_POSTFIELDSIZE_LARGE, CURLOPTTYPE_OFF_T, 120), /* Enable/disable the TCP Nagle algorithm */ - CINIT(TCP_NODELAY, LONG, 121), + CURLOPT(CURLOPT_TCP_NODELAY, CURLOPTTYPE_LONG, 121), /* 122 OBSOLETE, used in 7.12.3. Gone in 7.13.0 */ /* 123 OBSOLETE. Gone in 7.16.0 */ @@ -1405,143 +1575,151 @@ typedef enum { CURLFTPAUTH_SSL - try "AUTH SSL" first, then TLS CURLFTPAUTH_TLS - try "AUTH TLS" first, then SSL */ - CINIT(FTPSSLAUTH, LONG, 129), + CURLOPT(CURLOPT_FTPSSLAUTH, CURLOPTTYPE_VALUES, 129), - CINIT(IOCTLFUNCTION, FUNCTIONPOINT, 130), - CINIT(IOCTLDATA, OBJECTPOINT, 131), + CURLOPTDEPRECATED(CURLOPT_IOCTLFUNCTION, CURLOPTTYPE_FUNCTIONPOINT, 130, + 7.18.0, "Use CURLOPT_SEEKFUNCTION"), + CURLOPTDEPRECATED(CURLOPT_IOCTLDATA, CURLOPTTYPE_CBPOINT, 131, + 7.18.0, "Use CURLOPT_SEEKDATA"), /* 132 OBSOLETE. Gone in 7.16.0 */ /* 133 OBSOLETE. Gone in 7.16.0 */ - /* zero terminated string for pass on to the FTP server when asked for + /* null-terminated string for pass on to the FTP server when asked for "account" info */ - CINIT(FTP_ACCOUNT, STRINGPOINT, 134), + CURLOPT(CURLOPT_FTP_ACCOUNT, CURLOPTTYPE_STRINGPOINT, 134), /* feed cookie into cookie engine */ - CINIT(COOKIELIST, STRINGPOINT, 135), + CURLOPT(CURLOPT_COOKIELIST, CURLOPTTYPE_STRINGPOINT, 135), /* ignore Content-Length */ - CINIT(IGNORE_CONTENT_LENGTH, LONG, 136), + CURLOPT(CURLOPT_IGNORE_CONTENT_LENGTH, CURLOPTTYPE_LONG, 136), /* Set to non-zero to skip the IP address received in a 227 PASV FTP server response. Typically used for FTP-SSL purposes but is not restricted to that. libcurl will then instead use the same IP address it used for the control connection. */ - CINIT(FTP_SKIP_PASV_IP, LONG, 137), + CURLOPT(CURLOPT_FTP_SKIP_PASV_IP, CURLOPTTYPE_LONG, 137), /* Select "file method" to use when doing FTP, see the curl_ftpmethod above. */ - CINIT(FTP_FILEMETHOD, LONG, 138), + CURLOPT(CURLOPT_FTP_FILEMETHOD, CURLOPTTYPE_VALUES, 138), /* Local port number to bind the socket to */ - CINIT(LOCALPORT, LONG, 139), + CURLOPT(CURLOPT_LOCALPORT, CURLOPTTYPE_LONG, 139), /* Number of ports to try, including the first one set with LOCALPORT. Thus, setting it to 1 will make no additional attempts but the first. */ - CINIT(LOCALPORTRANGE, LONG, 140), + CURLOPT(CURLOPT_LOCALPORTRANGE, CURLOPTTYPE_LONG, 140), /* no transfer, set up connection and let application use the socket by extracting it with CURLINFO_LASTSOCKET */ - CINIT(CONNECT_ONLY, LONG, 141), + CURLOPT(CURLOPT_CONNECT_ONLY, CURLOPTTYPE_LONG, 141), /* Function that will be called to convert from the network encoding (instead of using the iconv calls in libcurl) */ - CINIT(CONV_FROM_NETWORK_FUNCTION, FUNCTIONPOINT, 142), + CURLOPTDEPRECATED(CURLOPT_CONV_FROM_NETWORK_FUNCTION, + CURLOPTTYPE_FUNCTIONPOINT, 142, + 7.82.0, "Serves no purpose anymore"), /* Function that will be called to convert to the network encoding (instead of using the iconv calls in libcurl) */ - CINIT(CONV_TO_NETWORK_FUNCTION, FUNCTIONPOINT, 143), + CURLOPTDEPRECATED(CURLOPT_CONV_TO_NETWORK_FUNCTION, + CURLOPTTYPE_FUNCTIONPOINT, 143, + 7.82.0, "Serves no purpose anymore"), /* Function that will be called to convert from UTF8 (instead of using the iconv calls in libcurl) Note that this is used only for SSL certificate processing */ - CINIT(CONV_FROM_UTF8_FUNCTION, FUNCTIONPOINT, 144), + CURLOPTDEPRECATED(CURLOPT_CONV_FROM_UTF8_FUNCTION, + CURLOPTTYPE_FUNCTIONPOINT, 144, + 7.82.0, "Serves no purpose anymore"), /* if the connection proceeds too quickly then need to slow it down */ /* limit-rate: maximum number of bytes per second to send or receive */ - CINIT(MAX_SEND_SPEED_LARGE, OFF_T, 145), - CINIT(MAX_RECV_SPEED_LARGE, OFF_T, 146), + CURLOPT(CURLOPT_MAX_SEND_SPEED_LARGE, CURLOPTTYPE_OFF_T, 145), + CURLOPT(CURLOPT_MAX_RECV_SPEED_LARGE, CURLOPTTYPE_OFF_T, 146), /* Pointer to command string to send if USER/PASS fails. */ - CINIT(FTP_ALTERNATIVE_TO_USER, STRINGPOINT, 147), + CURLOPT(CURLOPT_FTP_ALTERNATIVE_TO_USER, CURLOPTTYPE_STRINGPOINT, 147), /* callback function for setting socket options */ - CINIT(SOCKOPTFUNCTION, FUNCTIONPOINT, 148), - CINIT(SOCKOPTDATA, OBJECTPOINT, 149), + CURLOPT(CURLOPT_SOCKOPTFUNCTION, CURLOPTTYPE_FUNCTIONPOINT, 148), + CURLOPT(CURLOPT_SOCKOPTDATA, CURLOPTTYPE_CBPOINT, 149), /* set to 0 to disable session ID re-use for this transfer, default is enabled (== 1) */ - CINIT(SSL_SESSIONID_CACHE, LONG, 150), + CURLOPT(CURLOPT_SSL_SESSIONID_CACHE, CURLOPTTYPE_LONG, 150), /* allowed SSH authentication methods */ - CINIT(SSH_AUTH_TYPES, LONG, 151), + CURLOPT(CURLOPT_SSH_AUTH_TYPES, CURLOPTTYPE_VALUES, 151), /* Used by scp/sftp to do public/private key authentication */ - CINIT(SSH_PUBLIC_KEYFILE, STRINGPOINT, 152), - CINIT(SSH_PRIVATE_KEYFILE, STRINGPOINT, 153), + CURLOPT(CURLOPT_SSH_PUBLIC_KEYFILE, CURLOPTTYPE_STRINGPOINT, 152), + CURLOPT(CURLOPT_SSH_PRIVATE_KEYFILE, CURLOPTTYPE_STRINGPOINT, 153), /* Send CCC (Clear Command Channel) after authentication */ - CINIT(FTP_SSL_CCC, LONG, 154), + CURLOPT(CURLOPT_FTP_SSL_CCC, CURLOPTTYPE_LONG, 154), /* Same as TIMEOUT and CONNECTTIMEOUT, but with ms resolution */ - CINIT(TIMEOUT_MS, LONG, 155), - CINIT(CONNECTTIMEOUT_MS, LONG, 156), + CURLOPT(CURLOPT_TIMEOUT_MS, CURLOPTTYPE_LONG, 155), + CURLOPT(CURLOPT_CONNECTTIMEOUT_MS, CURLOPTTYPE_LONG, 156), /* set to zero to disable the libcurl's decoding and thus pass the raw body data to the application even when it is encoded/compressed */ - CINIT(HTTP_TRANSFER_DECODING, LONG, 157), - CINIT(HTTP_CONTENT_DECODING, LONG, 158), + CURLOPT(CURLOPT_HTTP_TRANSFER_DECODING, CURLOPTTYPE_LONG, 157), + CURLOPT(CURLOPT_HTTP_CONTENT_DECODING, CURLOPTTYPE_LONG, 158), /* Permission used when creating new files and directories on the remote server for protocols that support it, SFTP/SCP/FILE */ - CINIT(NEW_FILE_PERMS, LONG, 159), - CINIT(NEW_DIRECTORY_PERMS, LONG, 160), + CURLOPT(CURLOPT_NEW_FILE_PERMS, CURLOPTTYPE_LONG, 159), + CURLOPT(CURLOPT_NEW_DIRECTORY_PERMS, CURLOPTTYPE_LONG, 160), - /* Set the behaviour of POST when redirecting. Values must be set to one + /* Set the behavior of POST when redirecting. Values must be set to one of CURL_REDIR* defines below. This used to be called CURLOPT_POST301 */ - CINIT(POSTREDIR, LONG, 161), + CURLOPT(CURLOPT_POSTREDIR, CURLOPTTYPE_VALUES, 161), /* used by scp/sftp to verify the host's public key */ - CINIT(SSH_HOST_PUBLIC_KEY_MD5, STRINGPOINT, 162), + CURLOPT(CURLOPT_SSH_HOST_PUBLIC_KEY_MD5, CURLOPTTYPE_STRINGPOINT, 162), /* Callback function for opening socket (instead of socket(2)). Optionally, callback is able change the address or refuse to connect returning CURL_SOCKET_BAD. The callback should have type curl_opensocket_callback */ - CINIT(OPENSOCKETFUNCTION, FUNCTIONPOINT, 163), - CINIT(OPENSOCKETDATA, OBJECTPOINT, 164), + CURLOPT(CURLOPT_OPENSOCKETFUNCTION, CURLOPTTYPE_FUNCTIONPOINT, 163), + CURLOPT(CURLOPT_OPENSOCKETDATA, CURLOPTTYPE_CBPOINT, 164), /* POST volatile input fields. */ - CINIT(COPYPOSTFIELDS, OBJECTPOINT, 165), + CURLOPT(CURLOPT_COPYPOSTFIELDS, CURLOPTTYPE_OBJECTPOINT, 165), /* set transfer mode (;type=) when doing FTP via an HTTP proxy */ - CINIT(PROXY_TRANSFER_MODE, LONG, 166), + CURLOPT(CURLOPT_PROXY_TRANSFER_MODE, CURLOPTTYPE_LONG, 166), /* Callback function for seeking in the input stream */ - CINIT(SEEKFUNCTION, FUNCTIONPOINT, 167), - CINIT(SEEKDATA, OBJECTPOINT, 168), + CURLOPT(CURLOPT_SEEKFUNCTION, CURLOPTTYPE_FUNCTIONPOINT, 167), + CURLOPT(CURLOPT_SEEKDATA, CURLOPTTYPE_CBPOINT, 168), /* CRL file */ - CINIT(CRLFILE, STRINGPOINT, 169), + CURLOPT(CURLOPT_CRLFILE, CURLOPTTYPE_STRINGPOINT, 169), /* Issuer certificate */ - CINIT(ISSUERCERT, STRINGPOINT, 170), + CURLOPT(CURLOPT_ISSUERCERT, CURLOPTTYPE_STRINGPOINT, 170), /* (IPv6) Address scope */ - CINIT(ADDRESS_SCOPE, LONG, 171), + CURLOPT(CURLOPT_ADDRESS_SCOPE, CURLOPTTYPE_LONG, 171), /* Collect certificate chain info and allow it to get retrievable with CURLINFO_CERTINFO after the transfer is complete. */ - CINIT(CERTINFO, LONG, 172), + CURLOPT(CURLOPT_CERTINFO, CURLOPTTYPE_LONG, 172), /* "name" and "pwd" to use when fetching. */ - CINIT(USERNAME, STRINGPOINT, 173), - CINIT(PASSWORD, STRINGPOINT, 174), + CURLOPT(CURLOPT_USERNAME, CURLOPTTYPE_STRINGPOINT, 173), + CURLOPT(CURLOPT_PASSWORD, CURLOPTTYPE_STRINGPOINT, 174), /* "name" and "pwd" to use with Proxy when fetching. */ - CINIT(PROXYUSERNAME, STRINGPOINT, 175), - CINIT(PROXYPASSWORD, STRINGPOINT, 176), + CURLOPT(CURLOPT_PROXYUSERNAME, CURLOPTTYPE_STRINGPOINT, 175), + CURLOPT(CURLOPT_PROXYPASSWORD, CURLOPTTYPE_STRINGPOINT, 176), /* Comma separated list of hostnames defining no-proxy zones. These should match both hostnames directly, and hostnames within a domain. For @@ -1550,103 +1728,107 @@ typedef enum { implementations of this, .local.com will be considered to be the same as local.com. A single * is the only valid wildcard, and effectively disables the use of proxy. */ - CINIT(NOPROXY, STRINGPOINT, 177), + CURLOPT(CURLOPT_NOPROXY, CURLOPTTYPE_STRINGPOINT, 177), /* block size for TFTP transfers */ - CINIT(TFTP_BLKSIZE, LONG, 178), + CURLOPT(CURLOPT_TFTP_BLKSIZE, CURLOPTTYPE_LONG, 178), /* Socks Service */ - CINIT(SOCKS5_GSSAPI_SERVICE, STRINGPOINT, 179), /* DEPRECATED, do not use! */ + /* DEPRECATED, do not use! */ + CURLOPTDEPRECATED(CURLOPT_SOCKS5_GSSAPI_SERVICE, + CURLOPTTYPE_STRINGPOINT, 179, + 7.49.0, "Use CURLOPT_PROXY_SERVICE_NAME"), /* Socks Service */ - CINIT(SOCKS5_GSSAPI_NEC, LONG, 180), + CURLOPT(CURLOPT_SOCKS5_GSSAPI_NEC, CURLOPTTYPE_LONG, 180), /* set the bitmask for the protocols that are allowed to be used for the transfer, which thus helps the app which takes URLs from users or other external inputs and want to restrict what protocol(s) to deal with. Defaults to CURLPROTO_ALL. */ - CINIT(PROTOCOLS, LONG, 181), + CURLOPTDEPRECATED(CURLOPT_PROTOCOLS, CURLOPTTYPE_LONG, 181, + 7.85.0, "Use CURLOPT_PROTOCOLS_STR"), /* set the bitmask for the protocols that libcurl is allowed to follow to, as a subset of the CURLOPT_PROTOCOLS ones. That means the protocol needs - to be set in both bitmasks to be allowed to get redirected to. Defaults - to all protocols except FILE and SCP. */ - CINIT(REDIR_PROTOCOLS, LONG, 182), + to be set in both bitmasks to be allowed to get redirected to. */ + CURLOPTDEPRECATED(CURLOPT_REDIR_PROTOCOLS, CURLOPTTYPE_LONG, 182, + 7.85.0, "Use CURLOPT_REDIR_PROTOCOLS_STR"), /* set the SSH knownhost file name to use */ - CINIT(SSH_KNOWNHOSTS, STRINGPOINT, 183), + CURLOPT(CURLOPT_SSH_KNOWNHOSTS, CURLOPTTYPE_STRINGPOINT, 183), /* set the SSH host key callback, must point to a curl_sshkeycallback function */ - CINIT(SSH_KEYFUNCTION, FUNCTIONPOINT, 184), + CURLOPT(CURLOPT_SSH_KEYFUNCTION, CURLOPTTYPE_FUNCTIONPOINT, 184), /* set the SSH host key callback custom pointer */ - CINIT(SSH_KEYDATA, OBJECTPOINT, 185), + CURLOPT(CURLOPT_SSH_KEYDATA, CURLOPTTYPE_CBPOINT, 185), /* set the SMTP mail originator */ - CINIT(MAIL_FROM, STRINGPOINT, 186), + CURLOPT(CURLOPT_MAIL_FROM, CURLOPTTYPE_STRINGPOINT, 186), /* set the list of SMTP mail receiver(s) */ - CINIT(MAIL_RCPT, OBJECTPOINT, 187), + CURLOPT(CURLOPT_MAIL_RCPT, CURLOPTTYPE_SLISTPOINT, 187), /* FTP: send PRET before PASV */ - CINIT(FTP_USE_PRET, LONG, 188), + CURLOPT(CURLOPT_FTP_USE_PRET, CURLOPTTYPE_LONG, 188), /* RTSP request method (OPTIONS, SETUP, PLAY, etc...) */ - CINIT(RTSP_REQUEST, LONG, 189), + CURLOPT(CURLOPT_RTSP_REQUEST, CURLOPTTYPE_VALUES, 189), /* The RTSP session identifier */ - CINIT(RTSP_SESSION_ID, STRINGPOINT, 190), + CURLOPT(CURLOPT_RTSP_SESSION_ID, CURLOPTTYPE_STRINGPOINT, 190), /* The RTSP stream URI */ - CINIT(RTSP_STREAM_URI, STRINGPOINT, 191), + CURLOPT(CURLOPT_RTSP_STREAM_URI, CURLOPTTYPE_STRINGPOINT, 191), /* The Transport: header to use in RTSP requests */ - CINIT(RTSP_TRANSPORT, STRINGPOINT, 192), + CURLOPT(CURLOPT_RTSP_TRANSPORT, CURLOPTTYPE_STRINGPOINT, 192), /* Manually initialize the client RTSP CSeq for this handle */ - CINIT(RTSP_CLIENT_CSEQ, LONG, 193), + CURLOPT(CURLOPT_RTSP_CLIENT_CSEQ, CURLOPTTYPE_LONG, 193), /* Manually initialize the server RTSP CSeq for this handle */ - CINIT(RTSP_SERVER_CSEQ, LONG, 194), + CURLOPT(CURLOPT_RTSP_SERVER_CSEQ, CURLOPTTYPE_LONG, 194), /* The stream to pass to INTERLEAVEFUNCTION. */ - CINIT(INTERLEAVEDATA, OBJECTPOINT, 195), + CURLOPT(CURLOPT_INTERLEAVEDATA, CURLOPTTYPE_CBPOINT, 195), /* Let the application define a custom write method for RTP data */ - CINIT(INTERLEAVEFUNCTION, FUNCTIONPOINT, 196), + CURLOPT(CURLOPT_INTERLEAVEFUNCTION, CURLOPTTYPE_FUNCTIONPOINT, 196), /* Turn on wildcard matching */ - CINIT(WILDCARDMATCH, LONG, 197), + CURLOPT(CURLOPT_WILDCARDMATCH, CURLOPTTYPE_LONG, 197), /* Directory matching callback called before downloading of an individual file (chunk) started */ - CINIT(CHUNK_BGN_FUNCTION, FUNCTIONPOINT, 198), + CURLOPT(CURLOPT_CHUNK_BGN_FUNCTION, CURLOPTTYPE_FUNCTIONPOINT, 198), /* Directory matching callback called after the file (chunk) was downloaded, or skipped */ - CINIT(CHUNK_END_FUNCTION, FUNCTIONPOINT, 199), + CURLOPT(CURLOPT_CHUNK_END_FUNCTION, CURLOPTTYPE_FUNCTIONPOINT, 199), /* Change match (fnmatch-like) callback for wildcard matching */ - CINIT(FNMATCH_FUNCTION, FUNCTIONPOINT, 200), + CURLOPT(CURLOPT_FNMATCH_FUNCTION, CURLOPTTYPE_FUNCTIONPOINT, 200), /* Let the application define custom chunk data pointer */ - CINIT(CHUNK_DATA, OBJECTPOINT, 201), + CURLOPT(CURLOPT_CHUNK_DATA, CURLOPTTYPE_CBPOINT, 201), /* FNMATCH_FUNCTION user pointer */ - CINIT(FNMATCH_DATA, OBJECTPOINT, 202), + CURLOPT(CURLOPT_FNMATCH_DATA, CURLOPTTYPE_CBPOINT, 202), /* send linked-list of name:port:address sets */ - CINIT(RESOLVE, OBJECTPOINT, 203), + CURLOPT(CURLOPT_RESOLVE, CURLOPTTYPE_SLISTPOINT, 203), /* Set a username for authenticated TLS */ - CINIT(TLSAUTH_USERNAME, STRINGPOINT, 204), + CURLOPT(CURLOPT_TLSAUTH_USERNAME, CURLOPTTYPE_STRINGPOINT, 204), /* Set a password for authenticated TLS */ - CINIT(TLSAUTH_PASSWORD, STRINGPOINT, 205), + CURLOPT(CURLOPT_TLSAUTH_PASSWORD, CURLOPTTYPE_STRINGPOINT, 205), /* Set authentication type for authenticated TLS */ - CINIT(TLSAUTH_TYPE, STRINGPOINT, 206), + CURLOPT(CURLOPT_TLSAUTH_TYPE, CURLOPTTYPE_STRINGPOINT, 206), /* Set to 1 to enable the "TE:" header in HTTP requests to ask for compressed transfer-encoded responses. Set to 0 to disable the use of TE: @@ -1658,265 +1840,368 @@ typedef enum { option is set to 1. */ - CINIT(TRANSFER_ENCODING, LONG, 207), + CURLOPT(CURLOPT_TRANSFER_ENCODING, CURLOPTTYPE_LONG, 207), /* Callback function for closing socket (instead of close(2)). The callback should have type curl_closesocket_callback */ - CINIT(CLOSESOCKETFUNCTION, FUNCTIONPOINT, 208), - CINIT(CLOSESOCKETDATA, OBJECTPOINT, 209), + CURLOPT(CURLOPT_CLOSESOCKETFUNCTION, CURLOPTTYPE_FUNCTIONPOINT, 208), + CURLOPT(CURLOPT_CLOSESOCKETDATA, CURLOPTTYPE_CBPOINT, 209), /* allow GSSAPI credential delegation */ - CINIT(GSSAPI_DELEGATION, LONG, 210), + CURLOPT(CURLOPT_GSSAPI_DELEGATION, CURLOPTTYPE_VALUES, 210), /* Set the name servers to use for DNS resolution */ - CINIT(DNS_SERVERS, STRINGPOINT, 211), + CURLOPT(CURLOPT_DNS_SERVERS, CURLOPTTYPE_STRINGPOINT, 211), /* Time-out accept operations (currently for FTP only) after this amount of milliseconds. */ - CINIT(ACCEPTTIMEOUT_MS, LONG, 212), + CURLOPT(CURLOPT_ACCEPTTIMEOUT_MS, CURLOPTTYPE_LONG, 212), /* Set TCP keepalive */ - CINIT(TCP_KEEPALIVE, LONG, 213), + CURLOPT(CURLOPT_TCP_KEEPALIVE, CURLOPTTYPE_LONG, 213), /* non-universal keepalive knobs (Linux, AIX, HP-UX, more) */ - CINIT(TCP_KEEPIDLE, LONG, 214), - CINIT(TCP_KEEPINTVL, LONG, 215), + CURLOPT(CURLOPT_TCP_KEEPIDLE, CURLOPTTYPE_LONG, 214), + CURLOPT(CURLOPT_TCP_KEEPINTVL, CURLOPTTYPE_LONG, 215), /* Enable/disable specific SSL features with a bitmask, see CURLSSLOPT_* */ - CINIT(SSL_OPTIONS, LONG, 216), + CURLOPT(CURLOPT_SSL_OPTIONS, CURLOPTTYPE_VALUES, 216), /* Set the SMTP auth originator */ - CINIT(MAIL_AUTH, STRINGPOINT, 217), + CURLOPT(CURLOPT_MAIL_AUTH, CURLOPTTYPE_STRINGPOINT, 217), /* Enable/disable SASL initial response */ - CINIT(SASL_IR, LONG, 218), + CURLOPT(CURLOPT_SASL_IR, CURLOPTTYPE_LONG, 218), /* Function that will be called instead of the internal progress display * function. This function should be defined as the curl_xferinfo_callback * prototype defines. (Deprecates CURLOPT_PROGRESSFUNCTION) */ - CINIT(XFERINFOFUNCTION, FUNCTIONPOINT, 219), + CURLOPT(CURLOPT_XFERINFOFUNCTION, CURLOPTTYPE_FUNCTIONPOINT, 219), /* The XOAUTH2 bearer token */ - CINIT(XOAUTH2_BEARER, STRINGPOINT, 220), + CURLOPT(CURLOPT_XOAUTH2_BEARER, CURLOPTTYPE_STRINGPOINT, 220), /* Set the interface string to use as outgoing network * interface for DNS requests. * Only supported by the c-ares DNS backend */ - CINIT(DNS_INTERFACE, STRINGPOINT, 221), + CURLOPT(CURLOPT_DNS_INTERFACE, CURLOPTTYPE_STRINGPOINT, 221), /* Set the local IPv4 address to use for outgoing DNS requests. * Only supported by the c-ares DNS backend */ - CINIT(DNS_LOCAL_IP4, STRINGPOINT, 222), + CURLOPT(CURLOPT_DNS_LOCAL_IP4, CURLOPTTYPE_STRINGPOINT, 222), /* Set the local IPv6 address to use for outgoing DNS requests. * Only supported by the c-ares DNS backend */ - CINIT(DNS_LOCAL_IP6, STRINGPOINT, 223), + CURLOPT(CURLOPT_DNS_LOCAL_IP6, CURLOPTTYPE_STRINGPOINT, 223), /* Set authentication options directly */ - CINIT(LOGIN_OPTIONS, STRINGPOINT, 224), + CURLOPT(CURLOPT_LOGIN_OPTIONS, CURLOPTTYPE_STRINGPOINT, 224), /* Enable/disable TLS NPN extension (http2 over ssl might fail without) */ - CINIT(SSL_ENABLE_NPN, LONG, 225), + CURLOPTDEPRECATED(CURLOPT_SSL_ENABLE_NPN, CURLOPTTYPE_LONG, 225, + 7.86.0, "Has no function"), /* Enable/disable TLS ALPN extension (http2 over ssl might fail without) */ - CINIT(SSL_ENABLE_ALPN, LONG, 226), + CURLOPT(CURLOPT_SSL_ENABLE_ALPN, CURLOPTTYPE_LONG, 226), - /* Time to wait for a response to a HTTP request containing an + /* Time to wait for a response to an HTTP request containing an * Expect: 100-continue header before sending the data anyway. */ - CINIT(EXPECT_100_TIMEOUT_MS, LONG, 227), + CURLOPT(CURLOPT_EXPECT_100_TIMEOUT_MS, CURLOPTTYPE_LONG, 227), /* This points to a linked list of headers used for proxy requests only, struct curl_slist kind */ - CINIT(PROXYHEADER, OBJECTPOINT, 228), + CURLOPT(CURLOPT_PROXYHEADER, CURLOPTTYPE_SLISTPOINT, 228), /* Pass in a bitmask of "header options" */ - CINIT(HEADEROPT, LONG, 229), + CURLOPT(CURLOPT_HEADEROPT, CURLOPTTYPE_VALUES, 229), /* The public key in DER form used to validate the peer public key this option is used only if SSL_VERIFYPEER is true */ - CINIT(PINNEDPUBLICKEY, STRINGPOINT, 230), + CURLOPT(CURLOPT_PINNEDPUBLICKEY, CURLOPTTYPE_STRINGPOINT, 230), /* Path to Unix domain socket */ - CINIT(UNIX_SOCKET_PATH, STRINGPOINT, 231), + CURLOPT(CURLOPT_UNIX_SOCKET_PATH, CURLOPTTYPE_STRINGPOINT, 231), /* Set if we should verify the certificate status. */ - CINIT(SSL_VERIFYSTATUS, LONG, 232), + CURLOPT(CURLOPT_SSL_VERIFYSTATUS, CURLOPTTYPE_LONG, 232), /* Set if we should enable TLS false start. */ - CINIT(SSL_FALSESTART, LONG, 233), + CURLOPT(CURLOPT_SSL_FALSESTART, CURLOPTTYPE_LONG, 233), /* Do not squash dot-dot sequences */ - CINIT(PATH_AS_IS, LONG, 234), + CURLOPT(CURLOPT_PATH_AS_IS, CURLOPTTYPE_LONG, 234), /* Proxy Service Name */ - CINIT(PROXY_SERVICE_NAME, STRINGPOINT, 235), + CURLOPT(CURLOPT_PROXY_SERVICE_NAME, CURLOPTTYPE_STRINGPOINT, 235), /* Service Name */ - CINIT(SERVICE_NAME, STRINGPOINT, 236), + CURLOPT(CURLOPT_SERVICE_NAME, CURLOPTTYPE_STRINGPOINT, 236), /* Wait/don't wait for pipe/mutex to clarify */ - CINIT(PIPEWAIT, LONG, 237), + CURLOPT(CURLOPT_PIPEWAIT, CURLOPTTYPE_LONG, 237), /* Set the protocol used when curl is given a URL without a protocol */ - CINIT(DEFAULT_PROTOCOL, STRINGPOINT, 238), + CURLOPT(CURLOPT_DEFAULT_PROTOCOL, CURLOPTTYPE_STRINGPOINT, 238), /* Set stream weight, 1 - 256 (default is 16) */ - CINIT(STREAM_WEIGHT, LONG, 239), + CURLOPT(CURLOPT_STREAM_WEIGHT, CURLOPTTYPE_LONG, 239), /* Set stream dependency on another CURL handle */ - CINIT(STREAM_DEPENDS, OBJECTPOINT, 240), + CURLOPT(CURLOPT_STREAM_DEPENDS, CURLOPTTYPE_OBJECTPOINT, 240), /* Set E-xclusive stream dependency on another CURL handle */ - CINIT(STREAM_DEPENDS_E, OBJECTPOINT, 241), + CURLOPT(CURLOPT_STREAM_DEPENDS_E, CURLOPTTYPE_OBJECTPOINT, 241), /* Do not send any tftp option requests to the server */ - CINIT(TFTP_NO_OPTIONS, LONG, 242), + CURLOPT(CURLOPT_TFTP_NO_OPTIONS, CURLOPTTYPE_LONG, 242), /* Linked-list of host:port:connect-to-host:connect-to-port, overrides the URL's host:port (only for the network layer) */ - CINIT(CONNECT_TO, OBJECTPOINT, 243), + CURLOPT(CURLOPT_CONNECT_TO, CURLOPTTYPE_SLISTPOINT, 243), /* Set TCP Fast Open */ - CINIT(TCP_FASTOPEN, LONG, 244), + CURLOPT(CURLOPT_TCP_FASTOPEN, CURLOPTTYPE_LONG, 244), /* Continue to send data if the server responds early with an * HTTP status code >= 300 */ - CINIT(KEEP_SENDING_ON_ERROR, LONG, 245), + CURLOPT(CURLOPT_KEEP_SENDING_ON_ERROR, CURLOPTTYPE_LONG, 245), /* The CApath or CAfile used to validate the proxy certificate this option is used only if PROXY_SSL_VERIFYPEER is true */ - CINIT(PROXY_CAINFO, STRINGPOINT, 246), + CURLOPT(CURLOPT_PROXY_CAINFO, CURLOPTTYPE_STRINGPOINT, 246), /* The CApath directory used to validate the proxy certificate this option is used only if PROXY_SSL_VERIFYPEER is true */ - CINIT(PROXY_CAPATH, STRINGPOINT, 247), + CURLOPT(CURLOPT_PROXY_CAPATH, CURLOPTTYPE_STRINGPOINT, 247), /* Set if we should verify the proxy in ssl handshake, set 1 to verify. */ - CINIT(PROXY_SSL_VERIFYPEER, LONG, 248), + CURLOPT(CURLOPT_PROXY_SSL_VERIFYPEER, CURLOPTTYPE_LONG, 248), /* Set if we should verify the Common name from the proxy certificate in ssl * handshake, set 1 to check existence, 2 to ensure that it matches * the provided hostname. */ - CINIT(PROXY_SSL_VERIFYHOST, LONG, 249), + CURLOPT(CURLOPT_PROXY_SSL_VERIFYHOST, CURLOPTTYPE_LONG, 249), /* What version to specifically try to use for proxy. See CURL_SSLVERSION defines below. */ - CINIT(PROXY_SSLVERSION, LONG, 250), + CURLOPT(CURLOPT_PROXY_SSLVERSION, CURLOPTTYPE_VALUES, 250), /* Set a username for authenticated TLS for proxy */ - CINIT(PROXY_TLSAUTH_USERNAME, STRINGPOINT, 251), + CURLOPT(CURLOPT_PROXY_TLSAUTH_USERNAME, CURLOPTTYPE_STRINGPOINT, 251), /* Set a password for authenticated TLS for proxy */ - CINIT(PROXY_TLSAUTH_PASSWORD, STRINGPOINT, 252), + CURLOPT(CURLOPT_PROXY_TLSAUTH_PASSWORD, CURLOPTTYPE_STRINGPOINT, 252), /* Set authentication type for authenticated TLS for proxy */ - CINIT(PROXY_TLSAUTH_TYPE, STRINGPOINT, 253), + CURLOPT(CURLOPT_PROXY_TLSAUTH_TYPE, CURLOPTTYPE_STRINGPOINT, 253), /* name of the file keeping your private SSL-certificate for proxy */ - CINIT(PROXY_SSLCERT, STRINGPOINT, 254), + CURLOPT(CURLOPT_PROXY_SSLCERT, CURLOPTTYPE_STRINGPOINT, 254), /* type of the file keeping your SSL-certificate ("DER", "PEM", "ENG") for proxy */ - CINIT(PROXY_SSLCERTTYPE, STRINGPOINT, 255), + CURLOPT(CURLOPT_PROXY_SSLCERTTYPE, CURLOPTTYPE_STRINGPOINT, 255), /* name of the file keeping your private SSL-key for proxy */ - CINIT(PROXY_SSLKEY, STRINGPOINT, 256), + CURLOPT(CURLOPT_PROXY_SSLKEY, CURLOPTTYPE_STRINGPOINT, 256), /* type of the file keeping your private SSL-key ("DER", "PEM", "ENG") for proxy */ - CINIT(PROXY_SSLKEYTYPE, STRINGPOINT, 257), + CURLOPT(CURLOPT_PROXY_SSLKEYTYPE, CURLOPTTYPE_STRINGPOINT, 257), /* password for the SSL private key for proxy */ - CINIT(PROXY_KEYPASSWD, STRINGPOINT, 258), + CURLOPT(CURLOPT_PROXY_KEYPASSWD, CURLOPTTYPE_STRINGPOINT, 258), /* Specify which SSL ciphers to use for proxy */ - CINIT(PROXY_SSL_CIPHER_LIST, STRINGPOINT, 259), + CURLOPT(CURLOPT_PROXY_SSL_CIPHER_LIST, CURLOPTTYPE_STRINGPOINT, 259), /* CRL file for proxy */ - CINIT(PROXY_CRLFILE, STRINGPOINT, 260), + CURLOPT(CURLOPT_PROXY_CRLFILE, CURLOPTTYPE_STRINGPOINT, 260), /* Enable/disable specific SSL features with a bitmask for proxy, see CURLSSLOPT_* */ - CINIT(PROXY_SSL_OPTIONS, LONG, 261), + CURLOPT(CURLOPT_PROXY_SSL_OPTIONS, CURLOPTTYPE_LONG, 261), /* Name of pre proxy to use. */ - CINIT(PRE_PROXY, STRINGPOINT, 262), + CURLOPT(CURLOPT_PRE_PROXY, CURLOPTTYPE_STRINGPOINT, 262), /* The public key in DER form used to validate the proxy public key this option is used only if PROXY_SSL_VERIFYPEER is true */ - CINIT(PROXY_PINNEDPUBLICKEY, STRINGPOINT, 263), + CURLOPT(CURLOPT_PROXY_PINNEDPUBLICKEY, CURLOPTTYPE_STRINGPOINT, 263), /* Path to an abstract Unix domain socket */ - CINIT(ABSTRACT_UNIX_SOCKET, STRINGPOINT, 264), + CURLOPT(CURLOPT_ABSTRACT_UNIX_SOCKET, CURLOPTTYPE_STRINGPOINT, 264), /* Suppress proxy CONNECT response headers from user callbacks */ - CINIT(SUPPRESS_CONNECT_HEADERS, LONG, 265), + CURLOPT(CURLOPT_SUPPRESS_CONNECT_HEADERS, CURLOPTTYPE_LONG, 265), /* The request target, instead of extracted from the URL */ - CINIT(REQUEST_TARGET, STRINGPOINT, 266), + CURLOPT(CURLOPT_REQUEST_TARGET, CURLOPTTYPE_STRINGPOINT, 266), /* bitmask of allowed auth methods for connections to SOCKS5 proxies */ - CINIT(SOCKS5_AUTH, LONG, 267), + CURLOPT(CURLOPT_SOCKS5_AUTH, CURLOPTTYPE_LONG, 267), /* Enable/disable SSH compression */ - CINIT(SSH_COMPRESSION, LONG, 268), + CURLOPT(CURLOPT_SSH_COMPRESSION, CURLOPTTYPE_LONG, 268), /* Post MIME data. */ - CINIT(MIMEPOST, OBJECTPOINT, 269), + CURLOPT(CURLOPT_MIMEPOST, CURLOPTTYPE_OBJECTPOINT, 269), /* Time to use with the CURLOPT_TIMECONDITION. Specified in number of seconds since 1 Jan 1970. */ - CINIT(TIMEVALUE_LARGE, OFF_T, 270), + CURLOPT(CURLOPT_TIMEVALUE_LARGE, CURLOPTTYPE_OFF_T, 270), /* Head start in milliseconds to give happy eyeballs. */ - CINIT(HAPPY_EYEBALLS_TIMEOUT_MS, LONG, 271), + CURLOPT(CURLOPT_HAPPY_EYEBALLS_TIMEOUT_MS, CURLOPTTYPE_LONG, 271), /* Function that will be called before a resolver request is made */ - CINIT(RESOLVER_START_FUNCTION, FUNCTIONPOINT, 272), + CURLOPT(CURLOPT_RESOLVER_START_FUNCTION, CURLOPTTYPE_FUNCTIONPOINT, 272), /* User data to pass to the resolver start callback. */ - CINIT(RESOLVER_START_DATA, OBJECTPOINT, 273), + CURLOPT(CURLOPT_RESOLVER_START_DATA, CURLOPTTYPE_CBPOINT, 273), /* send HAProxy PROXY protocol header? */ - CINIT(HAPROXYPROTOCOL, LONG, 274), + CURLOPT(CURLOPT_HAPROXYPROTOCOL, CURLOPTTYPE_LONG, 274), /* shuffle addresses before use when DNS returns multiple */ - CINIT(DNS_SHUFFLE_ADDRESSES, LONG, 275), + CURLOPT(CURLOPT_DNS_SHUFFLE_ADDRESSES, CURLOPTTYPE_LONG, 275), /* Specify which TLS 1.3 ciphers suites to use */ - CINIT(TLS13_CIPHERS, STRINGPOINT, 276), - CINIT(PROXY_TLS13_CIPHERS, STRINGPOINT, 277), + CURLOPT(CURLOPT_TLS13_CIPHERS, CURLOPTTYPE_STRINGPOINT, 276), + CURLOPT(CURLOPT_PROXY_TLS13_CIPHERS, CURLOPTTYPE_STRINGPOINT, 277), /* Disallow specifying username/login in URL. */ - CINIT(DISALLOW_USERNAME_IN_URL, LONG, 278), + CURLOPT(CURLOPT_DISALLOW_USERNAME_IN_URL, CURLOPTTYPE_LONG, 278), /* DNS-over-HTTPS URL */ - CINIT(DOH_URL, STRINGPOINT, 279), + CURLOPT(CURLOPT_DOH_URL, CURLOPTTYPE_STRINGPOINT, 279), /* Preferred buffer size to use for uploads */ - CINIT(UPLOAD_BUFFERSIZE, LONG, 280), + CURLOPT(CURLOPT_UPLOAD_BUFFERSIZE, CURLOPTTYPE_LONG, 280), /* Time in ms between connection upkeep calls for long-lived connections. */ - CINIT(UPKEEP_INTERVAL_MS, LONG, 281), + CURLOPT(CURLOPT_UPKEEP_INTERVAL_MS, CURLOPTTYPE_LONG, 281), /* Specify URL using CURL URL API. */ - CINIT(CURLU, OBJECTPOINT, 282), + CURLOPT(CURLOPT_CURLU, CURLOPTTYPE_OBJECTPOINT, 282), /* add trailing data just after no more data is available */ - CINIT(TRAILERFUNCTION, FUNCTIONPOINT, 283), + CURLOPT(CURLOPT_TRAILERFUNCTION, CURLOPTTYPE_FUNCTIONPOINT, 283), /* pointer to be passed to HTTP_TRAILER_FUNCTION */ - CINIT(TRAILERDATA, OBJECTPOINT, 284), + CURLOPT(CURLOPT_TRAILERDATA, CURLOPTTYPE_CBPOINT, 284), /* set this to 1L to allow HTTP/0.9 responses or 0L to disallow */ - CINIT(HTTP09_ALLOWED, LONG, 285), + CURLOPT(CURLOPT_HTTP09_ALLOWED, CURLOPTTYPE_LONG, 285), /* alt-svc control bitmask */ - CINIT(ALTSVC_CTRL, LONG, 286), + CURLOPT(CURLOPT_ALTSVC_CTRL, CURLOPTTYPE_LONG, 286), /* alt-svc cache file name to possibly read from/write to */ - CINIT(ALTSVC, STRINGPOINT, 287), + CURLOPT(CURLOPT_ALTSVC, CURLOPTTYPE_STRINGPOINT, 287), + + /* maximum age (idle time) of a connection to consider it for reuse + * (in seconds) */ + CURLOPT(CURLOPT_MAXAGE_CONN, CURLOPTTYPE_LONG, 288), + + /* SASL authorization identity */ + CURLOPT(CURLOPT_SASL_AUTHZID, CURLOPTTYPE_STRINGPOINT, 289), + + /* allow RCPT TO command to fail for some recipients */ + CURLOPT(CURLOPT_MAIL_RCPT_ALLLOWFAILS, CURLOPTTYPE_LONG, 290), + + /* the private SSL-certificate as a "blob" */ + CURLOPT(CURLOPT_SSLCERT_BLOB, CURLOPTTYPE_BLOB, 291), + CURLOPT(CURLOPT_SSLKEY_BLOB, CURLOPTTYPE_BLOB, 292), + CURLOPT(CURLOPT_PROXY_SSLCERT_BLOB, CURLOPTTYPE_BLOB, 293), + CURLOPT(CURLOPT_PROXY_SSLKEY_BLOB, CURLOPTTYPE_BLOB, 294), + CURLOPT(CURLOPT_ISSUERCERT_BLOB, CURLOPTTYPE_BLOB, 295), + + /* Issuer certificate for proxy */ + CURLOPT(CURLOPT_PROXY_ISSUERCERT, CURLOPTTYPE_STRINGPOINT, 296), + CURLOPT(CURLOPT_PROXY_ISSUERCERT_BLOB, CURLOPTTYPE_BLOB, 297), + + /* the EC curves requested by the TLS client (RFC 8422, 5.1); + * OpenSSL support via 'set_groups'/'set_curves': + * https://www.openssl.org/docs/manmaster/man3/SSL_CTX_set1_groups.html + */ + CURLOPT(CURLOPT_SSL_EC_CURVES, CURLOPTTYPE_STRINGPOINT, 298), + + /* HSTS bitmask */ + CURLOPT(CURLOPT_HSTS_CTRL, CURLOPTTYPE_LONG, 299), + /* HSTS file name */ + CURLOPT(CURLOPT_HSTS, CURLOPTTYPE_STRINGPOINT, 300), + + /* HSTS read callback */ + CURLOPT(CURLOPT_HSTSREADFUNCTION, CURLOPTTYPE_FUNCTIONPOINT, 301), + CURLOPT(CURLOPT_HSTSREADDATA, CURLOPTTYPE_CBPOINT, 302), + + /* HSTS write callback */ + CURLOPT(CURLOPT_HSTSWRITEFUNCTION, CURLOPTTYPE_FUNCTIONPOINT, 303), + CURLOPT(CURLOPT_HSTSWRITEDATA, CURLOPTTYPE_CBPOINT, 304), + + /* Parameters for V4 signature */ + CURLOPT(CURLOPT_AWS_SIGV4, CURLOPTTYPE_STRINGPOINT, 305), + + /* Same as CURLOPT_SSL_VERIFYPEER but for DoH (DNS-over-HTTPS) servers. */ + CURLOPT(CURLOPT_DOH_SSL_VERIFYPEER, CURLOPTTYPE_LONG, 306), + + /* Same as CURLOPT_SSL_VERIFYHOST but for DoH (DNS-over-HTTPS) servers. */ + CURLOPT(CURLOPT_DOH_SSL_VERIFYHOST, CURLOPTTYPE_LONG, 307), + + /* Same as CURLOPT_SSL_VERIFYSTATUS but for DoH (DNS-over-HTTPS) servers. */ + CURLOPT(CURLOPT_DOH_SSL_VERIFYSTATUS, CURLOPTTYPE_LONG, 308), + + /* The CA certificates as "blob" used to validate the peer certificate + this option is used only if SSL_VERIFYPEER is true */ + CURLOPT(CURLOPT_CAINFO_BLOB, CURLOPTTYPE_BLOB, 309), + + /* The CA certificates as "blob" used to validate the proxy certificate + this option is used only if PROXY_SSL_VERIFYPEER is true */ + CURLOPT(CURLOPT_PROXY_CAINFO_BLOB, CURLOPTTYPE_BLOB, 310), + + /* used by scp/sftp to verify the host's public key */ + CURLOPT(CURLOPT_SSH_HOST_PUBLIC_KEY_SHA256, CURLOPTTYPE_STRINGPOINT, 311), + + /* Function that will be called immediately before the initial request + is made on a connection (after any protocol negotiation step). */ + CURLOPT(CURLOPT_PREREQFUNCTION, CURLOPTTYPE_FUNCTIONPOINT, 312), + + /* Data passed to the CURLOPT_PREREQFUNCTION callback */ + CURLOPT(CURLOPT_PREREQDATA, CURLOPTTYPE_CBPOINT, 313), + + /* maximum age (since creation) of a connection to consider it for reuse + * (in seconds) */ + CURLOPT(CURLOPT_MAXLIFETIME_CONN, CURLOPTTYPE_LONG, 314), + + /* Set MIME option flags. */ + CURLOPT(CURLOPT_MIME_OPTIONS, CURLOPTTYPE_LONG, 315), + + /* set the SSH host key callback, must point to a curl_sshkeycallback + function */ + CURLOPT(CURLOPT_SSH_HOSTKEYFUNCTION, CURLOPTTYPE_FUNCTIONPOINT, 316), + + /* set the SSH host key callback custom pointer */ + CURLOPT(CURLOPT_SSH_HOSTKEYDATA, CURLOPTTYPE_CBPOINT, 317), + + /* specify which protocols that are allowed to be used for the transfer, + which thus helps the app which takes URLs from users or other external + inputs and want to restrict what protocol(s) to deal with. Defaults to + all built-in protocols. */ + CURLOPT(CURLOPT_PROTOCOLS_STR, CURLOPTTYPE_STRINGPOINT, 318), + + /* specify which protocols that libcurl is allowed to follow directs to */ + CURLOPT(CURLOPT_REDIR_PROTOCOLS_STR, CURLOPTTYPE_STRINGPOINT, 319), + + /* websockets options */ + CURLOPT(CURLOPT_WS_OPTIONS, CURLOPTTYPE_LONG, 320), + + /* CA cache timeout */ + CURLOPT(CURLOPT_CA_CACHE_TIMEOUT, CURLOPTTYPE_LONG, 321), + + /* Can leak things, gonna exit() soon */ + CURLOPT(CURLOPT_QUICK_EXIT, CURLOPTTYPE_LONG, 322), CURLOPT_LASTENTRY /* the last unused */ } CURLoption; @@ -1943,6 +2228,9 @@ typedef enum { #define CURLOPT_SSLCERTPASSWD CURLOPT_KEYPASSWD #define CURLOPT_KRB4LEVEL CURLOPT_KRBLEVEL +/* */ +#define CURLOPT_FTP_RESPONSE_TIMEOUT CURLOPT_SERVER_RESPONSE_TIMEOUT + #else /* This is set if CURL_NO_OLDIES is defined at compile-time */ #undef CURLOPT_DNS_USE_GLOBAL_CACHE /* soon obsolete */ @@ -1952,12 +2240,12 @@ typedef enum { /* Below here follows defines for the CURLOPT_IPRESOLVE option. If a host name resolves addresses using more than one IP protocol version, this option might be handy to force libcurl to use a specific IP version. */ -#define CURL_IPRESOLVE_WHATEVER 0 /* default, resolves addresses to all IP +#define CURL_IPRESOLVE_WHATEVER 0 /* default, uses addresses to all IP versions that your system allows */ -#define CURL_IPRESOLVE_V4 1 /* resolve to IPv4 addresses */ -#define CURL_IPRESOLVE_V6 2 /* resolve to IPv6 addresses */ +#define CURL_IPRESOLVE_V4 1 /* uses only IPv4 addresses/connections */ +#define CURL_IPRESOLVE_V6 2 /* uses only IPv6 addresses/connections */ - /* three convenient "aliases" that follow the name scheme better */ + /* Convenient "aliases" */ #define CURLOPT_RTSPHEADER CURLOPT_HTTPHEADER /* These enums are for use with the CURLOPT_HTTP_VERSION option. */ @@ -1971,6 +2259,12 @@ enum { CURL_HTTP_VERSION_2TLS, /* use version 2 for HTTPS, version 1.1 for HTTP */ CURL_HTTP_VERSION_2_PRIOR_KNOWLEDGE, /* please use HTTP 2 without HTTP/1.1 Upgrade */ + CURL_HTTP_VERSION_3 = 30, /* Use HTTP/3, fallback to HTTP/2 or HTTP/1 if + needed. For HTTPS only. For HTTP, this option + makes libcurl return error. */ + CURL_HTTP_VERSION_3ONLY = 31, /* Use HTTP/3 without fallback. For HTTPS + only. For HTTP, this makes libcurl + return error. */ CURL_HTTP_VERSION_LAST /* *ILLEGAL* http version */ }; @@ -2064,7 +2358,7 @@ typedef enum { CURL_TIMECOND_LAST } curl_TimeCond; -/* Special size_t value signaling a zero-terminated string. */ +/* Special size_t value signaling a null-terminated string. */ #define CURL_ZERO_TERMINATED ((size_t) -1) /* curl_strequal() and curl_strnequal() are subject for removal in a future @@ -2073,8 +2367,11 @@ CURL_EXTERN int curl_strequal(const char *s1, const char *s2); CURL_EXTERN int curl_strnequal(const char *s1, const char *s2, size_t n); /* Mime/form handling support. */ -typedef struct curl_mime_s curl_mime; /* Mime context. */ -typedef struct curl_mimepart_s curl_mimepart; /* Mime part context. */ +typedef struct curl_mime curl_mime; /* Mime context. */ +typedef struct curl_mimepart curl_mimepart; /* Mime part context. */ + +/* CURLMIMEOPT_ defines are for the CURLOPT_MIME_OPTIONS option. */ +#define CURLMIMEOPT_FORMESCAPE (1<<0) /* Use backslash-escaping for forms. */ /* * NAME curl_mime_init() @@ -2197,52 +2494,37 @@ CURL_EXTERN CURLcode curl_mime_headers(curl_mimepart *part, struct curl_slist *headers, int take_ownership); -/* Old form API. */ -/* name is uppercase CURLFORM_ */ -#ifdef CFINIT -#undef CFINIT -#endif - -#ifdef CURL_ISOCPP -#define CFINIT(name) CURLFORM_ ## name -#else -/* The macro "##" is ISO C, we assume pre-ISO C doesn't support it. */ -#define CFINIT(name) CURLFORM_/**/name -#endif - typedef enum { - CFINIT(NOTHING), /********* the first one is unused ************/ + /********* the first one is unused ************/ + CURLFORM_NOTHING CURL_DEPRECATED(7.56.0, ""), + CURLFORM_COPYNAME CURL_DEPRECATED(7.56.0, "Use curl_mime_name()"), + CURLFORM_PTRNAME CURL_DEPRECATED(7.56.0, "Use curl_mime_name()"), + CURLFORM_NAMELENGTH CURL_DEPRECATED(7.56.0, ""), + CURLFORM_COPYCONTENTS CURL_DEPRECATED(7.56.0, "Use curl_mime_data()"), + CURLFORM_PTRCONTENTS CURL_DEPRECATED(7.56.0, "Use curl_mime_data()"), + CURLFORM_CONTENTSLENGTH CURL_DEPRECATED(7.56.0, "Use curl_mime_data()"), + CURLFORM_FILECONTENT CURL_DEPRECATED(7.56.0, "Use curl_mime_data_cb()"), + CURLFORM_ARRAY CURL_DEPRECATED(7.56.0, ""), + CURLFORM_OBSOLETE, + CURLFORM_FILE CURL_DEPRECATED(7.56.0, "Use curl_mime_filedata()"), - /* */ - CFINIT(COPYNAME), - CFINIT(PTRNAME), - CFINIT(NAMELENGTH), - CFINIT(COPYCONTENTS), - CFINIT(PTRCONTENTS), - CFINIT(CONTENTSLENGTH), - CFINIT(FILECONTENT), - CFINIT(ARRAY), - CFINIT(OBSOLETE), - CFINIT(FILE), + CURLFORM_BUFFER CURL_DEPRECATED(7.56.0, "Use curl_mime_filename()"), + CURLFORM_BUFFERPTR CURL_DEPRECATED(7.56.0, "Use curl_mime_data()"), + CURLFORM_BUFFERLENGTH CURL_DEPRECATED(7.56.0, "Use curl_mime_data()"), - CFINIT(BUFFER), - CFINIT(BUFFERPTR), - CFINIT(BUFFERLENGTH), + CURLFORM_CONTENTTYPE CURL_DEPRECATED(7.56.0, "Use curl_mime_type()"), + CURLFORM_CONTENTHEADER CURL_DEPRECATED(7.56.0, "Use curl_mime_headers()"), + CURLFORM_FILENAME CURL_DEPRECATED(7.56.0, "Use curl_mime_filename()"), + CURLFORM_END, + CURLFORM_OBSOLETE2, - CFINIT(CONTENTTYPE), - CFINIT(CONTENTHEADER), - CFINIT(FILENAME), - CFINIT(END), - CFINIT(OBSOLETE2), - - CFINIT(STREAM), - CFINIT(CONTENTLEN), /* added in 7.46.0, provide a curl_off_t length */ + CURLFORM_STREAM CURL_DEPRECATED(7.56.0, "Use curl_mime_data_cb()"), + CURLFORM_CONTENTLEN /* added in 7.46.0, provide a curl_off_t length */ + CURL_DEPRECATED(7.56.0, "Use curl_mime_data()"), CURLFORM_LASTENTRY /* the last unused */ } CURLformoption; -#undef CFINIT /* done */ - /* structure to be used as parameter for CURLFORM_ARRAY */ struct curl_forms { CURLformoption option; @@ -2266,15 +2548,16 @@ struct curl_forms { * ***************************************************************************/ typedef enum { - CURL_FORMADD_OK, /* first, no error */ + CURL_FORMADD_OK CURL_DEPRECATED(7.56.0, ""), /* 1st, no error */ - CURL_FORMADD_MEMORY, - CURL_FORMADD_OPTION_TWICE, - CURL_FORMADD_NULL, - CURL_FORMADD_UNKNOWN_OPTION, - CURL_FORMADD_INCOMPLETE, - CURL_FORMADD_ILLEGAL_ARRAY, - CURL_FORMADD_DISABLED, /* libcurl was built with this disabled */ + CURL_FORMADD_MEMORY CURL_DEPRECATED(7.56.0, ""), + CURL_FORMADD_OPTION_TWICE CURL_DEPRECATED(7.56.0, ""), + CURL_FORMADD_NULL CURL_DEPRECATED(7.56.0, ""), + CURL_FORMADD_UNKNOWN_OPTION CURL_DEPRECATED(7.56.0, ""), + CURL_FORMADD_INCOMPLETE CURL_DEPRECATED(7.56.0, ""), + CURL_FORMADD_ILLEGAL_ARRAY CURL_DEPRECATED(7.56.0, ""), + /* libcurl was built with form api disabled */ + CURL_FORMADD_DISABLED CURL_DEPRECATED(7.56.0, ""), CURL_FORMADD_LAST /* last */ } CURLFORMcode; @@ -2288,9 +2571,10 @@ typedef enum { * adds one part that together construct a full post. Then use * CURLOPT_HTTPPOST to send it off to libcurl. */ -CURL_EXTERN CURLFORMcode curl_formadd(struct curl_httppost **httppost, - struct curl_httppost **last_post, - ...); +CURL_EXTERN CURLFORMcode CURL_DEPRECATED(7.56.0, "Use curl_mime_init()") +curl_formadd(struct curl_httppost **httppost, + struct curl_httppost **last_post, + ...); /* * callback function for curl_formget() @@ -2313,8 +2597,9 @@ typedef size_t (*curl_formget_callback)(void *arg, const char *buf, * the curl_formget_callback function. * Returns 0 on success. */ -CURL_EXTERN int curl_formget(struct curl_httppost *form, void *arg, - curl_formget_callback append); +CURL_EXTERN int CURL_DEPRECATED(7.56.0, "") +curl_formget(struct curl_httppost *form, void *arg, + curl_formget_callback append); /* * NAME curl_formfree() * @@ -2322,7 +2607,8 @@ CURL_EXTERN int curl_formget(struct curl_httppost *form, void *arg, * * Free a multipart formpost previously built with curl_formadd(). */ -CURL_EXTERN void curl_formfree(struct curl_httppost *form); +CURL_EXTERN void CURL_DEPRECATED(7.56.0, "Use curl_mime_free()") +curl_formfree(struct curl_httppost *form); /* * NAME curl_getenv() @@ -2398,8 +2684,10 @@ CURL_EXTERN void curl_free(void *p); * * curl_global_init() should be invoked exactly once for each application that * uses libcurl and before any call of other libcurl functions. - * - * This function is not thread-safe! + + * This function is thread-safe if CURL_VERSION_THREADSAFE is set in the + * curl_version_info_data.features flag (fetch by curl_version_info()). + */ CURL_EXTERN CURLcode curl_global_init(long flags); @@ -2413,7 +2701,7 @@ CURL_EXTERN CURLcode curl_global_init(long flags); * initialize libcurl and set user defined memory management callback * functions. Users can implement memory management routines to check for * memory leaks, check for mis-use of the curl library etc. User registered - * callback routines with be invoked by this library instead of the system + * callback routines will be invoked by this library instead of the system * memory management routines like malloc, free etc. */ CURL_EXTERN CURLcode curl_global_init_mem(long flags, @@ -2465,10 +2753,11 @@ struct curl_slist { * subsequent attempt to change it will result in a CURLSSLSET_TOO_LATE. */ -typedef struct { +struct curl_ssl_backend { curl_sslbackend id; const char *name; -} curl_ssl_backend; +}; +typedef struct curl_ssl_backend curl_ssl_backend; typedef enum { CURLSSLSET_OK = 0, @@ -2488,8 +2777,8 @@ CURL_EXTERN CURLsslset curl_global_sslset(curl_sslbackend id, const char *name, * Appends a string to a linked list. If no list exists, it will be created * first. Returns the new list, after appending. */ -CURL_EXTERN struct curl_slist *curl_slist_append(struct curl_slist *, - const char *); +CURL_EXTERN struct curl_slist *curl_slist_append(struct curl_slist *list, + const char *data); /* * NAME curl_slist_free_all() @@ -2498,7 +2787,7 @@ CURL_EXTERN struct curl_slist *curl_slist_append(struct curl_slist *, * * free a previously built curl_slist. */ -CURL_EXTERN void curl_slist_free_all(struct curl_slist *); +CURL_EXTERN void curl_slist_free_all(struct curl_slist *list); /* * NAME curl_getdate() @@ -2511,8 +2800,8 @@ CURL_EXTERN void curl_slist_free_all(struct curl_slist *); */ CURL_EXTERN time_t curl_getdate(const char *p, const time_t *unused); -/* info about the certificate chain, only for OpenSSL builds. Asked - for with CURLOPT_CERTINFO / CURLINFO_CERTINFO */ +/* info about the certificate chain, only for OpenSSL, GnuTLS, Schannel, NSS + and GSKit builds. Asked for with CURLOPT_CERTINFO / CURLINFO_CERTINFO */ struct curl_certinfo { int num_of_certs; /* number of certificates with information */ struct curl_slist **certinfo; /* for each index in this array, there's a @@ -2546,22 +2835,35 @@ typedef enum { CURLINFO_NAMELOOKUP_TIME = CURLINFO_DOUBLE + 4, CURLINFO_CONNECT_TIME = CURLINFO_DOUBLE + 5, CURLINFO_PRETRANSFER_TIME = CURLINFO_DOUBLE + 6, - CURLINFO_SIZE_UPLOAD = CURLINFO_DOUBLE + 7, + CURLINFO_SIZE_UPLOAD CURL_DEPRECATED(7.55.0, "Use CURLINFO_SIZE_UPLOAD_T") + = CURLINFO_DOUBLE + 7, CURLINFO_SIZE_UPLOAD_T = CURLINFO_OFF_T + 7, - CURLINFO_SIZE_DOWNLOAD = CURLINFO_DOUBLE + 8, + CURLINFO_SIZE_DOWNLOAD + CURL_DEPRECATED(7.55.0, "Use CURLINFO_SIZE_DOWNLOAD_T") + = CURLINFO_DOUBLE + 8, CURLINFO_SIZE_DOWNLOAD_T = CURLINFO_OFF_T + 8, - CURLINFO_SPEED_DOWNLOAD = CURLINFO_DOUBLE + 9, + CURLINFO_SPEED_DOWNLOAD + CURL_DEPRECATED(7.55.0, "Use CURLINFO_SPEED_DOWNLOAD_T") + = CURLINFO_DOUBLE + 9, CURLINFO_SPEED_DOWNLOAD_T = CURLINFO_OFF_T + 9, - CURLINFO_SPEED_UPLOAD = CURLINFO_DOUBLE + 10, + CURLINFO_SPEED_UPLOAD + CURL_DEPRECATED(7.55.0, "Use CURLINFO_SPEED_UPLOAD_T") + = CURLINFO_DOUBLE + 10, CURLINFO_SPEED_UPLOAD_T = CURLINFO_OFF_T + 10, CURLINFO_HEADER_SIZE = CURLINFO_LONG + 11, CURLINFO_REQUEST_SIZE = CURLINFO_LONG + 12, CURLINFO_SSL_VERIFYRESULT = CURLINFO_LONG + 13, CURLINFO_FILETIME = CURLINFO_LONG + 14, CURLINFO_FILETIME_T = CURLINFO_OFF_T + 14, - CURLINFO_CONTENT_LENGTH_DOWNLOAD = CURLINFO_DOUBLE + 15, + CURLINFO_CONTENT_LENGTH_DOWNLOAD + CURL_DEPRECATED(7.55.0, + "Use CURLINFO_CONTENT_LENGTH_DOWNLOAD_T") + = CURLINFO_DOUBLE + 15, CURLINFO_CONTENT_LENGTH_DOWNLOAD_T = CURLINFO_OFF_T + 15, - CURLINFO_CONTENT_LENGTH_UPLOAD = CURLINFO_DOUBLE + 16, + CURLINFO_CONTENT_LENGTH_UPLOAD + CURL_DEPRECATED(7.55.0, + "Use CURLINFO_CONTENT_LENGTH_UPLOAD_T") + = CURLINFO_DOUBLE + 16, CURLINFO_CONTENT_LENGTH_UPLOAD_T = CURLINFO_OFF_T + 16, CURLINFO_STARTTRANSFER_TIME = CURLINFO_DOUBLE + 17, CURLINFO_CONTENT_TYPE = CURLINFO_STRING + 18, @@ -2575,7 +2877,8 @@ typedef enum { CURLINFO_NUM_CONNECTS = CURLINFO_LONG + 26, CURLINFO_SSL_ENGINES = CURLINFO_SLIST + 27, CURLINFO_COOKIELIST = CURLINFO_SLIST + 28, - CURLINFO_LASTSOCKET = CURLINFO_LONG + 29, + CURLINFO_LASTSOCKET CURL_DEPRECATED(7.45.0, "Use CURLINFO_ACTIVESOCKET") + = CURLINFO_LONG + 29, CURLINFO_FTP_ENTRY_PATH = CURLINFO_STRING + 30, CURLINFO_REDIRECT_URL = CURLINFO_STRING + 31, CURLINFO_PRIMARY_IP = CURLINFO_STRING + 32, @@ -2589,17 +2892,15 @@ typedef enum { CURLINFO_PRIMARY_PORT = CURLINFO_LONG + 40, CURLINFO_LOCAL_IP = CURLINFO_STRING + 41, CURLINFO_LOCAL_PORT = CURLINFO_LONG + 42, - CURLINFO_TLS_SESSION = CURLINFO_PTR + 43, + CURLINFO_TLS_SESSION CURL_DEPRECATED(7.48.0, "Use CURLINFO_TLS_SSL_PTR") + = CURLINFO_PTR + 43, CURLINFO_ACTIVESOCKET = CURLINFO_SOCKET + 44, CURLINFO_TLS_SSL_PTR = CURLINFO_PTR + 45, CURLINFO_HTTP_VERSION = CURLINFO_LONG + 46, CURLINFO_PROXY_SSL_VERIFYRESULT = CURLINFO_LONG + 47, - CURLINFO_PROTOCOL = CURLINFO_LONG + 48, + CURLINFO_PROTOCOL CURL_DEPRECATED(7.85.0, "Use CURLINFO_SCHEME") + = CURLINFO_LONG + 48, CURLINFO_SCHEME = CURLINFO_STRING + 49, - /* Fill in new entries below here! */ - - /* Preferably these would be defined conditionally based on the - sizeof curl_off_t being 64-bits */ CURLINFO_TOTAL_TIME_T = CURLINFO_OFF_T + 50, CURLINFO_NAMELOOKUP_TIME_T = CURLINFO_OFF_T + 51, CURLINFO_CONNECT_TIME_T = CURLINFO_OFF_T + 52, @@ -2607,8 +2908,13 @@ typedef enum { CURLINFO_STARTTRANSFER_TIME_T = CURLINFO_OFF_T + 54, CURLINFO_REDIRECT_TIME_T = CURLINFO_OFF_T + 55, CURLINFO_APPCONNECT_TIME_T = CURLINFO_OFF_T + 56, - - CURLINFO_LASTONE = 56 + CURLINFO_RETRY_AFTER = CURLINFO_OFF_T + 57, + CURLINFO_EFFECTIVE_METHOD = CURLINFO_STRING + 58, + CURLINFO_PROXY_ERROR = CURLINFO_LONG + 59, + CURLINFO_REFERER = CURLINFO_STRING + 60, + CURLINFO_CAINFO = CURLINFO_STRING + 61, + CURLINFO_CAPATH = CURLINFO_STRING + 62, + CURLINFO_LASTONE = 62 } CURLINFO; /* CURLINFO_RESPONSE_CODE is the new name for the option previously known as @@ -2627,7 +2933,7 @@ typedef enum { CURLCLOSEPOLICY_LAST /* last, never use this */ } curl_closepolicy; -#define CURL_GLOBAL_SSL (1<<0) /* no purpose since since 7.57.0 */ +#define CURL_GLOBAL_SSL (1<<0) /* no purpose since 7.57.0 */ #define CURL_GLOBAL_WIN32 (1<<1) #define CURL_GLOBAL_ALL (CURL_GLOBAL_SSL|CURL_GLOBAL_WIN32) #define CURL_GLOBAL_NOTHING 0 @@ -2652,6 +2958,7 @@ typedef enum { CURL_LOCK_DATA_SSL_SESSION, CURL_LOCK_DATA_CONNECT, CURL_LOCK_DATA_PSL, + CURL_LOCK_DATA_HSTS, CURL_LOCK_DATA_LAST } curl_lock_data; @@ -2694,8 +3001,9 @@ typedef enum { } CURLSHoption; CURL_EXTERN CURLSH *curl_share_init(void); -CURL_EXTERN CURLSHcode curl_share_setopt(CURLSH *, CURLSHoption option, ...); -CURL_EXTERN CURLSHcode curl_share_cleanup(CURLSH *); +CURL_EXTERN CURLSHcode curl_share_setopt(CURLSH *share, CURLSHoption option, + ...); +CURL_EXTERN CURLSHcode curl_share_cleanup(CURLSH *share); /**************************************************************************** * Structures for querying information about the curl library at runtime. @@ -2707,6 +3015,12 @@ typedef enum { CURLVERSION_THIRD, CURLVERSION_FOURTH, CURLVERSION_FIFTH, + CURLVERSION_SIXTH, + CURLVERSION_SEVENTH, + CURLVERSION_EIGHTH, + CURLVERSION_NINTH, + CURLVERSION_TENTH, + CURLVERSION_ELEVENTH, CURLVERSION_LAST /* never actually use this */ } CURLversion; @@ -2715,9 +3029,9 @@ typedef enum { meant to be a built-in version number for what kind of struct the caller expects. If the struct ever changes, we redefine the NOW to another enum from above. */ -#define CURLVERSION_NOW CURLVERSION_FIFTH +#define CURLVERSION_NOW CURLVERSION_ELEVENTH -typedef struct { +struct curl_version_info_data { CURLversion age; /* age of the returned struct */ const char *version; /* LIBCURL_VERSION */ unsigned int version_num; /* LIBCURL_VERSION_NUM */ @@ -2744,12 +3058,39 @@ typedef struct { const char *libssh_version; /* human readable string */ /* These fields were added in CURLVERSION_FIFTH */ - unsigned int brotli_ver_num; /* Numeric Brotli version (MAJOR << 24) | (MINOR << 12) | PATCH */ const char *brotli_version; /* human readable string. */ -} curl_version_info_data; + /* These fields were added in CURLVERSION_SIXTH */ + unsigned int nghttp2_ver_num; /* Numeric nghttp2 version + (MAJOR << 16) | (MINOR << 8) | PATCH */ + const char *nghttp2_version; /* human readable string. */ + const char *quic_version; /* human readable quic (+ HTTP/3) library + + version or NULL */ + + /* These fields were added in CURLVERSION_SEVENTH */ + const char *cainfo; /* the built-in default CURLOPT_CAINFO, might + be NULL */ + const char *capath; /* the built-in default CURLOPT_CAPATH, might + be NULL */ + + /* These fields were added in CURLVERSION_EIGHTH */ + unsigned int zstd_ver_num; /* Numeric Zstd version + (MAJOR << 24) | (MINOR << 12) | PATCH */ + const char *zstd_version; /* human readable string. */ + + /* These fields were added in CURLVERSION_NINTH */ + const char *hyper_version; /* human readable string. */ + + /* These fields were added in CURLVERSION_TENTH */ + const char *gsasl_version; /* human readable string. */ + + /* These fields were added in CURLVERSION_ELEVENTH */ + /* feature_names is terminated by an entry with a NULL feature name */ + const char * const *feature_names; +}; +typedef struct curl_version_info_data curl_version_info_data; #define CURL_VERSION_IPV6 (1<<0) /* IPv6-enabled */ #define CURL_VERSION_KERBEROS4 (1<<1) /* Kerberos V4 auth is supported @@ -2781,6 +3122,12 @@ typedef struct { #define CURL_VERSION_MULTI_SSL (1<<22) /* Multiple SSL backends available */ #define CURL_VERSION_BROTLI (1<<23) /* Brotli features are present. */ #define CURL_VERSION_ALTSVC (1<<24) /* Alt-Svc handling built-in */ +#define CURL_VERSION_HTTP3 (1<<25) /* HTTP3 support built-in */ +#define CURL_VERSION_ZSTD (1<<26) /* zstd features are present */ +#define CURL_VERSION_UNICODE (1<<27) /* Unicode support on Windows */ +#define CURL_VERSION_HSTS (1<<28) /* HSTS is supported */ +#define CURL_VERSION_GSASL (1<<29) /* libgsasl is supported */ +#define CURL_VERSION_THREADSAFE (1<<30) /* libcurl API is thread-safe */ /* * NAME curl_version_info() @@ -2835,7 +3182,7 @@ CURL_EXTERN CURLcode curl_easy_pause(CURL *handle, int bitmask); #define CURLPAUSE_CONT (CURLPAUSE_RECV_CONT|CURLPAUSE_SEND_CONT) #ifdef __cplusplus -} +} /* end of extern "C" */ #endif /* unfortunately, the easy.h and multi.h include files need options and info @@ -2843,6 +3190,9 @@ CURL_EXTERN CURLcode curl_easy_pause(CURL *handle, int bitmask); #include "easy.h" /* nothing in curl is fun without the easy stuff */ #include "multi.h" #include "urlapi.h" +#include "options.h" +#include "header.h" +#include "websockets.h" /* the typechecker doesn't work in C++ (yet) */ #if defined(__GNUC__) && defined(__GNUC_MINOR__) && \ @@ -2859,6 +3209,6 @@ CURL_EXTERN CURLcode curl_easy_pause(CURL *handle, int bitmask); #define curl_share_setopt(share,opt,param) curl_share_setopt(share,opt,param) #define curl_multi_setopt(handle,opt,param) curl_multi_setopt(handle,opt,param) #endif /* __STDC__ >= 1 */ -#endif /* gcc >= 4.3 && !__cplusplus */ +#endif /* gcc >= 4.3 && !__cplusplus && !CURL_DISABLE_TYPECHECK */ -#endif /* __CURL_CURL_H */ +#endif /* CURLINC_CURL_H */ diff --git a/Source/ThirdParty/curl/curlver.h b/Source/ThirdParty/curl/curlver.h index 9a4b9b02d..d59537120 100644 --- a/Source/ThirdParty/curl/curlver.h +++ b/Source/ThirdParty/curl/curlver.h @@ -1,5 +1,5 @@ -#ifndef __CURL_CURLVER_H -#define __CURL_CURLVER_H +#ifndef CURLINC_CURLVER_H +#define CURLINC_CURLVER_H /*************************************************************************** * _ _ ____ _ * Project ___| | | | _ \| | @@ -7,11 +7,11 @@ * | (__| |_| | _ <| |___ * \___|\___/|_| \_\_____| * - * Copyright (C) 1998 - 2019, Daniel Stenberg, , et al. + * Copyright (C) Daniel Stenberg, , et al. * * This software is licensed as described in the file COPYING, which * you should have received as part of this distribution. The terms - * are also available at https://curl.haxx.se/docs/copyright.html. + * are also available at https://curl.se/docs/copyright.html. * * You may opt to use, copy, modify, merge, publish, distribute and/or sell * copies of the Software, and permit persons to whom the Software is @@ -20,26 +20,28 @@ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY * KIND, either express or implied. * + * SPDX-License-Identifier: curl + * ***************************************************************************/ /* This header file contains nothing but libcurl version info, generated by a script at release-time. This was made its own header file in 7.11.2 */ /* This is the global package copyright */ -#define LIBCURL_COPYRIGHT "1996 - 2019 Daniel Stenberg, ." +#define LIBCURL_COPYRIGHT "Daniel Stenberg, ." /* This is the version number of the libcurl package from which this header file origins: */ -#define LIBCURL_VERSION "7.64.1" +#define LIBCURL_VERSION "7.88.1" /* The numeric version number is also available "in parts" by using these defines: */ #define LIBCURL_VERSION_MAJOR 7 -#define LIBCURL_VERSION_MINOR 64 +#define LIBCURL_VERSION_MINOR 88 #define LIBCURL_VERSION_PATCH 1 /* This is the numeric version of the libcurl version number, meant for easier - parsing and comparions by programs. The LIBCURL_VERSION_NUM define will + parsing and comparisons by programs. The LIBCURL_VERSION_NUM define will always follow this syntax: 0xXXYYZZ @@ -57,7 +59,7 @@ CURL_VERSION_BITS() macro since curl's own configure script greps for it and needs it to contain the full number. */ -#define LIBCURL_VERSION_NUM 0x074001 +#define LIBCURL_VERSION_NUM 0x075801 /* * This is the date and time when the full source package was created. The @@ -68,10 +70,10 @@ * * "2007-11-23" */ -#define LIBCURL_TIMESTAMP "2019-03-27" +#define LIBCURL_TIMESTAMP "2023-02-20" -#define CURL_VERSION_BITS(x,y,z) ((x)<<16|(y)<<8|z) +#define CURL_VERSION_BITS(x,y,z) ((x)<<16|(y)<<8|(z)) #define CURL_AT_LEAST_VERSION(x,y,z) \ (LIBCURL_VERSION_NUM >= CURL_VERSION_BITS(x, y, z)) -#endif /* __CURL_CURLVER_H */ +#endif /* CURLINC_CURLVER_H */ diff --git a/Source/ThirdParty/curl/easy.h b/Source/ThirdParty/curl/easy.h index f42a8a969..394668a8f 100644 --- a/Source/ThirdParty/curl/easy.h +++ b/Source/ThirdParty/curl/easy.h @@ -1,5 +1,5 @@ -#ifndef __CURL_EASY_H -#define __CURL_EASY_H +#ifndef CURLINC_EASY_H +#define CURLINC_EASY_H /*************************************************************************** * _ _ ____ _ * Project ___| | | | _ \| | @@ -7,11 +7,11 @@ * | (__| |_| | _ <| |___ * \___|\___/|_| \_\_____| * - * Copyright (C) 1998 - 2016, Daniel Stenberg, , et al. + * Copyright (C) Daniel Stenberg, , et al. * * This software is licensed as described in the file COPYING, which * you should have received as part of this distribution. The terms - * are also available at https://curl.haxx.se/docs/copyright.html. + * are also available at https://curl.se/docs/copyright.html. * * You may opt to use, copy, modify, merge, publish, distribute and/or sell * copies of the Software, and permit persons to whom the Software is @@ -20,11 +20,24 @@ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY * KIND, either express or implied. * + * SPDX-License-Identifier: curl + * ***************************************************************************/ #ifdef __cplusplus extern "C" { #endif +/* Flag bits in the curl_blob struct: */ +#define CURL_BLOB_COPY 1 /* tell libcurl to copy the data */ +#define CURL_BLOB_NOCOPY 0 /* tell libcurl to NOT copy the data */ + +struct curl_blob { + void *data; + size_t len; + unsigned int flags; /* bit 0 is defined, the rest are reserved and should be + left zeroes */ +}; + CURL_EXTERN CURL *curl_easy_init(void); CURL_EXTERN CURLcode curl_easy_setopt(CURL *curl, CURLoption option, ...); CURL_EXTERN CURLcode curl_easy_perform(CURL *curl); @@ -106,7 +119,7 @@ CURL_EXTERN CURLcode curl_easy_send(CURL *curl, const void *buffer, CURL_EXTERN CURLcode curl_easy_upkeep(CURL *curl); #ifdef __cplusplus -} +} /* end of extern "C" */ #endif #endif diff --git a/Source/ThirdParty/curl/header.h b/Source/ThirdParty/curl/header.h new file mode 100644 index 000000000..8df11e1e4 --- /dev/null +++ b/Source/ThirdParty/curl/header.h @@ -0,0 +1,74 @@ +#ifndef CURLINC_HEADER_H +#define CURLINC_HEADER_H +/*************************************************************************** + * _ _ ____ _ + * Project ___| | | | _ \| | + * / __| | | | |_) | | + * | (__| |_| | _ <| |___ + * \___|\___/|_| \_\_____| + * + * Copyright (C) Daniel Stenberg, , et al. + * + * This software is licensed as described in the file COPYING, which + * you should have received as part of this distribution. The terms + * are also available at https://curl.se/docs/copyright.html. + * + * You may opt to use, copy, modify, merge, publish, distribute and/or sell + * copies of the Software, and permit persons to whom the Software is + * furnished to do so, under the terms of the COPYING file. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + * SPDX-License-Identifier: curl + * + ***************************************************************************/ + +#ifdef __cplusplus +extern "C" { +#endif + +struct curl_header { + char *name; /* this might not use the same case */ + char *value; + size_t amount; /* number of headers using this name */ + size_t index; /* ... of this instance, 0 or higher */ + unsigned int origin; /* see bits below */ + void *anchor; /* handle privately used by libcurl */ +}; + +/* 'origin' bits */ +#define CURLH_HEADER (1<<0) /* plain server header */ +#define CURLH_TRAILER (1<<1) /* trailers */ +#define CURLH_CONNECT (1<<2) /* CONNECT headers */ +#define CURLH_1XX (1<<3) /* 1xx headers */ +#define CURLH_PSEUDO (1<<4) /* pseudo headers */ + +typedef enum { + CURLHE_OK, + CURLHE_BADINDEX, /* header exists but not with this index */ + CURLHE_MISSING, /* no such header exists */ + CURLHE_NOHEADERS, /* no headers at all exist (yet) */ + CURLHE_NOREQUEST, /* no request with this number was used */ + CURLHE_OUT_OF_MEMORY, /* out of memory while processing */ + CURLHE_BAD_ARGUMENT, /* a function argument was not okay */ + CURLHE_NOT_BUILT_IN /* if API was disabled in the build */ +} CURLHcode; + +CURL_EXTERN CURLHcode curl_easy_header(CURL *easy, + const char *name, + size_t index, + unsigned int origin, + int request, + struct curl_header **hout); + +CURL_EXTERN struct curl_header *curl_easy_nextheader(CURL *easy, + unsigned int origin, + int request, + struct curl_header *prev); + +#ifdef __cplusplus +} /* end of extern "C" */ +#endif + +#endif /* CURLINC_HEADER_H */ diff --git a/Source/ThirdParty/curl/mprintf.h b/Source/ThirdParty/curl/mprintf.h index e20f546e1..e652a6520 100644 --- a/Source/ThirdParty/curl/mprintf.h +++ b/Source/ThirdParty/curl/mprintf.h @@ -1,5 +1,5 @@ -#ifndef __CURL_MPRINTF_H -#define __CURL_MPRINTF_H +#ifndef CURLINC_MPRINTF_H +#define CURLINC_MPRINTF_H /*************************************************************************** * _ _ ____ _ * Project ___| | | | _ \| | @@ -7,11 +7,11 @@ * | (__| |_| | _ <| |___ * \___|\___/|_| \_\_____| * - * Copyright (C) 1998 - 2016, Daniel Stenberg, , et al. + * Copyright (C) Daniel Stenberg, , et al. * * This software is licensed as described in the file COPYING, which * you should have received as part of this distribution. The terms - * are also available at https://curl.haxx.se/docs/copyright.html. + * are also available at https://curl.se/docs/copyright.html. * * You may opt to use, copy, modify, merge, publish, distribute and/or sell * copies of the Software, and permit persons to whom the Software is @@ -20,6 +20,8 @@ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY * KIND, either express or implied. * + * SPDX-License-Identifier: curl + * ***************************************************************************/ #include @@ -44,7 +46,7 @@ CURL_EXTERN char *curl_maprintf(const char *format, ...); CURL_EXTERN char *curl_mvaprintf(const char *format, va_list args); #ifdef __cplusplus -} +} /* end of extern "C" */ #endif -#endif /* __CURL_MPRINTF_H */ +#endif /* CURLINC_MPRINTF_H */ diff --git a/Source/ThirdParty/curl/multi.h b/Source/ThirdParty/curl/multi.h index b19dbaf79..30a3d9301 100644 --- a/Source/ThirdParty/curl/multi.h +++ b/Source/ThirdParty/curl/multi.h @@ -1,5 +1,5 @@ -#ifndef __CURL_MULTI_H -#define __CURL_MULTI_H +#ifndef CURLINC_MULTI_H +#define CURLINC_MULTI_H /*************************************************************************** * _ _ ____ _ * Project ___| | | | _ \| | @@ -7,11 +7,11 @@ * | (__| |_| | _ <| |___ * \___|\___/|_| \_\_____| * - * Copyright (C) 1998 - 2017, Daniel Stenberg, , et al. + * Copyright (C) Daniel Stenberg, , et al. * * This software is licensed as described in the file COPYING, which * you should have received as part of this distribution. The terms - * are also available at https://curl.haxx.se/docs/copyright.html. + * are also available at https://curl.se/docs/copyright.html. * * You may opt to use, copy, modify, merge, publish, distribute and/or sell * copies of the Software, and permit persons to whom the Software is @@ -20,6 +20,8 @@ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY * KIND, either express or implied. * + * SPDX-License-Identifier: curl + * ***************************************************************************/ /* This is an "external" header file. Don't give away any internals here! @@ -72,6 +74,10 @@ typedef enum { attempted to get added - again */ CURLM_RECURSIVE_API_CALL, /* an api function was called from inside a callback */ + CURLM_WAKEUP_FAILURE, /* wakeup is unavailable or failed */ + CURLM_BAD_FUNCTION_ARGUMENT, /* function called with a bad parameter */ + CURLM_ABORTED_BY_CALLBACK, + CURLM_UNRECOVERABLE_POLL, CURLM_LAST } CURLMcode; @@ -118,7 +124,7 @@ struct curl_waitfd { /* * Name: curl_multi_init() * - * Desc: inititalize multi-style curl usage + * Desc: initialize multi-style curl usage * * Returns: a new CURLM handle to use in all 'curl_multi' functions. */ @@ -173,6 +179,29 @@ CURL_EXTERN CURLMcode curl_multi_wait(CURLM *multi_handle, int timeout_ms, int *ret); +/* + * Name: curl_multi_poll() + * + * Desc: Poll on all fds within a CURLM set as well as any + * additional fds passed to the function. + * + * Returns: CURLMcode type, general multi error code. + */ +CURL_EXTERN CURLMcode curl_multi_poll(CURLM *multi_handle, + struct curl_waitfd extra_fds[], + unsigned int extra_nfds, + int timeout_ms, + int *ret); + +/* + * Name: curl_multi_wakeup() + * + * Desc: wakes up a sleeping curl_multi_poll call. + * + * Returns: CURLMcode type, general multi error code. + */ +CURL_EXTERN CURLMcode curl_multi_wakeup(CURLM *multi_handle); + /* * Name: curl_multi_perform() * @@ -242,7 +271,7 @@ CURL_EXTERN CURLMsg *curl_multi_info_read(CURLM *multi_handle, * value into the equivalent human readable error string. This is * useful for printing meaningful error messages. * - * Returns: A pointer to a zero-terminated error message. + * Returns: A pointer to a null-terminated error message. */ CURL_EXTERN const char *curl_multi_strerror(CURLMcode); @@ -289,16 +318,16 @@ typedef int (*curl_multi_timer_callback)(CURLM *multi, /* multi handle */ void *userp); /* private callback pointer */ -CURL_EXTERN CURLMcode curl_multi_socket(CURLM *multi_handle, curl_socket_t s, - int *running_handles); +CURL_EXTERN CURLMcode CURL_DEPRECATED(7.19.5, "Use curl_multi_socket_action()") +curl_multi_socket(CURLM *multi_handle, curl_socket_t s, int *running_handles); CURL_EXTERN CURLMcode curl_multi_socket_action(CURLM *multi_handle, curl_socket_t s, int ev_bitmask, int *running_handles); -CURL_EXTERN CURLMcode curl_multi_socket_all(CURLM *multi_handle, - int *running_handles); +CURL_EXTERN CURLMcode CURL_DEPRECATED(7.19.5, "Use curl_multi_socket_action()") +curl_multi_socket_all(CURLM *multi_handle, int *running_handles); #ifndef CURL_ALLOW_OLD_MULTI_SOCKET /* This macro below was added in 7.16.3 to push users who recompile to use @@ -319,68 +348,56 @@ CURL_EXTERN CURLMcode curl_multi_socket_all(CURLM *multi_handle, CURL_EXTERN CURLMcode curl_multi_timeout(CURLM *multi_handle, long *milliseconds); -#undef CINIT /* re-using the same name as in curl.h */ - -#ifdef CURL_ISOCPP -#define CINIT(name,type,num) CURLMOPT_ ## name = CURLOPTTYPE_ ## type + num -#else -/* The macro "##" is ISO C, we assume pre-ISO C doesn't support it. */ -#define LONG CURLOPTTYPE_LONG -#define OBJECTPOINT CURLOPTTYPE_OBJECTPOINT -#define FUNCTIONPOINT CURLOPTTYPE_FUNCTIONPOINT -#define OFF_T CURLOPTTYPE_OFF_T -#define CINIT(name,type,number) CURLMOPT_/**/name = type + number -#endif - typedef enum { /* This is the socket callback function pointer */ - CINIT(SOCKETFUNCTION, FUNCTIONPOINT, 1), + CURLOPT(CURLMOPT_SOCKETFUNCTION, CURLOPTTYPE_FUNCTIONPOINT, 1), /* This is the argument passed to the socket callback */ - CINIT(SOCKETDATA, OBJECTPOINT, 2), + CURLOPT(CURLMOPT_SOCKETDATA, CURLOPTTYPE_OBJECTPOINT, 2), /* set to 1 to enable pipelining for this multi handle */ - CINIT(PIPELINING, LONG, 3), + CURLOPT(CURLMOPT_PIPELINING, CURLOPTTYPE_LONG, 3), /* This is the timer callback function pointer */ - CINIT(TIMERFUNCTION, FUNCTIONPOINT, 4), + CURLOPT(CURLMOPT_TIMERFUNCTION, CURLOPTTYPE_FUNCTIONPOINT, 4), /* This is the argument passed to the timer callback */ - CINIT(TIMERDATA, OBJECTPOINT, 5), + CURLOPT(CURLMOPT_TIMERDATA, CURLOPTTYPE_OBJECTPOINT, 5), /* maximum number of entries in the connection cache */ - CINIT(MAXCONNECTS, LONG, 6), + CURLOPT(CURLMOPT_MAXCONNECTS, CURLOPTTYPE_LONG, 6), /* maximum number of (pipelining) connections to one host */ - CINIT(MAX_HOST_CONNECTIONS, LONG, 7), + CURLOPT(CURLMOPT_MAX_HOST_CONNECTIONS, CURLOPTTYPE_LONG, 7), /* maximum number of requests in a pipeline */ - CINIT(MAX_PIPELINE_LENGTH, LONG, 8), + CURLOPT(CURLMOPT_MAX_PIPELINE_LENGTH, CURLOPTTYPE_LONG, 8), /* a connection with a content-length longer than this will not be considered for pipelining */ - CINIT(CONTENT_LENGTH_PENALTY_SIZE, OFF_T, 9), + CURLOPT(CURLMOPT_CONTENT_LENGTH_PENALTY_SIZE, CURLOPTTYPE_OFF_T, 9), /* a connection with a chunk length longer than this will not be considered for pipelining */ - CINIT(CHUNK_LENGTH_PENALTY_SIZE, OFF_T, 10), + CURLOPT(CURLMOPT_CHUNK_LENGTH_PENALTY_SIZE, CURLOPTTYPE_OFF_T, 10), - /* a list of site names(+port) that are blacklisted from - pipelining */ - CINIT(PIPELINING_SITE_BL, OBJECTPOINT, 11), + /* a list of site names(+port) that are blocked from pipelining */ + CURLOPT(CURLMOPT_PIPELINING_SITE_BL, CURLOPTTYPE_OBJECTPOINT, 11), - /* a list of server types that are blacklisted from - pipelining */ - CINIT(PIPELINING_SERVER_BL, OBJECTPOINT, 12), + /* a list of server types that are blocked from pipelining */ + CURLOPT(CURLMOPT_PIPELINING_SERVER_BL, CURLOPTTYPE_OBJECTPOINT, 12), /* maximum number of open connections in total */ - CINIT(MAX_TOTAL_CONNECTIONS, LONG, 13), + CURLOPT(CURLMOPT_MAX_TOTAL_CONNECTIONS, CURLOPTTYPE_LONG, 13), /* This is the server push callback function pointer */ - CINIT(PUSHFUNCTION, FUNCTIONPOINT, 14), + CURLOPT(CURLMOPT_PUSHFUNCTION, CURLOPTTYPE_FUNCTIONPOINT, 14), /* This is the argument passed to the server push callback */ - CINIT(PUSHDATA, OBJECTPOINT, 15), + CURLOPT(CURLMOPT_PUSHDATA, CURLOPTTYPE_OBJECTPOINT, 15), + + /* maximum number of concurrent streams to support on a connection */ + CURLOPT(CURLMOPT_MAX_CONCURRENT_STREAMS, CURLOPTTYPE_LONG, 16), CURLMOPT_LASTENTRY /* the last unused */ } CURLMoption; @@ -414,12 +431,14 @@ CURL_EXTERN CURLMcode curl_multi_assign(CURLM *multi_handle, * Name: curl_push_callback * * Desc: This callback gets called when a new stream is being pushed by the - * server. It approves or denies the new stream. + * server. It approves or denies the new stream. It can also decide + * to completely fail the connection. * - * Returns: CURL_PUSH_OK or CURL_PUSH_DENY. + * Returns: CURL_PUSH_OK, CURL_PUSH_DENY or CURL_PUSH_ERROROUT */ -#define CURL_PUSH_OK 0 -#define CURL_PUSH_DENY 1 +#define CURL_PUSH_OK 0 +#define CURL_PUSH_DENY 1 +#define CURL_PUSH_ERROROUT 2 /* added in 7.72.0 */ struct curl_pushheaders; /* forward declaration only */ diff --git a/Source/ThirdParty/curl/options.h b/Source/ThirdParty/curl/options.h new file mode 100644 index 000000000..1ed76a95c --- /dev/null +++ b/Source/ThirdParty/curl/options.h @@ -0,0 +1,70 @@ +#ifndef CURLINC_OPTIONS_H +#define CURLINC_OPTIONS_H +/*************************************************************************** + * _ _ ____ _ + * Project ___| | | | _ \| | + * / __| | | | |_) | | + * | (__| |_| | _ <| |___ + * \___|\___/|_| \_\_____| + * + * Copyright (C) Daniel Stenberg, , et al. + * + * This software is licensed as described in the file COPYING, which + * you should have received as part of this distribution. The terms + * are also available at https://curl.se/docs/copyright.html. + * + * You may opt to use, copy, modify, merge, publish, distribute and/or sell + * copies of the Software, and permit persons to whom the Software is + * furnished to do so, under the terms of the COPYING file. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + * SPDX-License-Identifier: curl + * + ***************************************************************************/ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + CURLOT_LONG, /* long (a range of values) */ + CURLOT_VALUES, /* (a defined set or bitmask) */ + CURLOT_OFF_T, /* curl_off_t (a range of values) */ + CURLOT_OBJECT, /* pointer (void *) */ + CURLOT_STRING, /* (char * to null-terminated buffer) */ + CURLOT_SLIST, /* (struct curl_slist *) */ + CURLOT_CBPTR, /* (void * passed as-is to a callback) */ + CURLOT_BLOB, /* blob (struct curl_blob *) */ + CURLOT_FUNCTION /* function pointer */ +} curl_easytype; + +/* Flag bits */ + +/* "alias" means it is provided for old programs to remain functional, + we prefer another name */ +#define CURLOT_FLAG_ALIAS (1<<0) + +/* The CURLOPTTYPE_* id ranges can still be used to figure out what type/size + to use for curl_easy_setopt() for the given id */ +struct curl_easyoption { + const char *name; + CURLoption id; + curl_easytype type; + unsigned int flags; +}; + +CURL_EXTERN const struct curl_easyoption * +curl_easy_option_by_name(const char *name); + +CURL_EXTERN const struct curl_easyoption * +curl_easy_option_by_id(CURLoption id); + +CURL_EXTERN const struct curl_easyoption * +curl_easy_option_next(const struct curl_easyoption *prev); + +#ifdef __cplusplus +} /* end of extern "C" */ +#endif +#endif /* CURLINC_OPTIONS_H */ diff --git a/Source/ThirdParty/curl/stdcheaders.h b/Source/ThirdParty/curl/stdcheaders.h index 027b6f421..7451aa305 100644 --- a/Source/ThirdParty/curl/stdcheaders.h +++ b/Source/ThirdParty/curl/stdcheaders.h @@ -1,5 +1,5 @@ -#ifndef __STDC_HEADERS_H -#define __STDC_HEADERS_H +#ifndef CURLINC_STDCHEADERS_H +#define CURLINC_STDCHEADERS_H /*************************************************************************** * _ _ ____ _ * Project ___| | | | _ \| | @@ -7,11 +7,11 @@ * | (__| |_| | _ <| |___ * \___|\___/|_| \_\_____| * - * Copyright (C) 1998 - 2016, Daniel Stenberg, , et al. + * Copyright (C) Daniel Stenberg, , et al. * * This software is licensed as described in the file COPYING, which * you should have received as part of this distribution. The terms - * are also available at https://curl.haxx.se/docs/copyright.html. + * are also available at https://curl.se/docs/copyright.html. * * You may opt to use, copy, modify, merge, publish, distribute and/or sell * copies of the Software, and permit persons to whom the Software is @@ -20,6 +20,8 @@ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY * KIND, either express or implied. * + * SPDX-License-Identifier: curl + * ***************************************************************************/ #include @@ -30,4 +32,4 @@ size_t fwrite(const void *, size_t, size_t, FILE *); int strcasecmp(const char *, const char *); int strncasecmp(const char *, const char *, size_t); -#endif /* __STDC_HEADERS_H */ +#endif /* CURLINC_STDCHEADERS_H */ diff --git a/Source/ThirdParty/curl/system.h b/Source/ThirdParty/curl/system.h index 1e555ec19..def773924 100644 --- a/Source/ThirdParty/curl/system.h +++ b/Source/ThirdParty/curl/system.h @@ -1,5 +1,5 @@ -#ifndef __CURL_SYSTEM_H -#define __CURL_SYSTEM_H +#ifndef CURLINC_SYSTEM_H +#define CURLINC_SYSTEM_H /*************************************************************************** * _ _ ____ _ * Project ___| | | | _ \| | @@ -7,11 +7,11 @@ * | (__| |_| | _ <| |___ * \___|\___/|_| \_\_____| * - * Copyright (C) 1998 - 2017, Daniel Stenberg, , et al. + * Copyright (C) Daniel Stenberg, , et al. * * This software is licensed as described in the file COPYING, which * you should have received as part of this distribution. The terms - * are also available at https://curl.haxx.se/docs/copyright.html. + * are also available at https://curl.se/docs/copyright.html. * * You may opt to use, copy, modify, merge, publish, distribute and/or sell * copies of the Software, and permit persons to whom the Software is @@ -20,6 +20,8 @@ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY * KIND, either express or implied. * + * SPDX-License-Identifier: curl + * ***************************************************************************/ /* @@ -98,22 +100,6 @@ # define CURL_SUFFIX_CURL_OFF_TU UL # define CURL_TYPEOF_CURL_SOCKLEN_T int -#elif defined(__WATCOMC__) -# if defined(__386__) -# define CURL_TYPEOF_CURL_OFF_T __int64 -# define CURL_FORMAT_CURL_OFF_T "I64d" -# define CURL_FORMAT_CURL_OFF_TU "I64u" -# define CURL_SUFFIX_CURL_OFF_T i64 -# define CURL_SUFFIX_CURL_OFF_TU ui64 -# else -# define CURL_TYPEOF_CURL_OFF_T long -# define CURL_FORMAT_CURL_OFF_T "ld" -# define CURL_FORMAT_CURL_OFF_TU "lu" -# define CURL_SUFFIX_CURL_OFF_T L -# define CURL_SUFFIX_CURL_OFF_TU UL -# endif -# define CURL_TYPEOF_CURL_SOCKLEN_T int - #elif defined(__POCC__) # if (__POCC__ < 280) # define CURL_TYPEOF_CURL_OFF_T long @@ -137,15 +123,26 @@ # define CURL_TYPEOF_CURL_SOCKLEN_T int #elif defined(__LCC__) -# define CURL_TYPEOF_CURL_OFF_T long -# define CURL_FORMAT_CURL_OFF_T "ld" -# define CURL_FORMAT_CURL_OFF_TU "lu" -# define CURL_SUFFIX_CURL_OFF_T L -# define CURL_SUFFIX_CURL_OFF_TU UL -# define CURL_TYPEOF_CURL_SOCKLEN_T int +# if defined(__MCST__) /* MCST eLbrus Compiler Collection */ +# define CURL_TYPEOF_CURL_OFF_T long +# define CURL_FORMAT_CURL_OFF_T "ld" +# define CURL_FORMAT_CURL_OFF_TU "lu" +# define CURL_SUFFIX_CURL_OFF_T L +# define CURL_SUFFIX_CURL_OFF_TU UL +# define CURL_TYPEOF_CURL_SOCKLEN_T socklen_t +# define CURL_PULL_SYS_TYPES_H 1 +# define CURL_PULL_SYS_SOCKET_H 1 +# else /* Local (or Little) C Compiler */ +# define CURL_TYPEOF_CURL_OFF_T long +# define CURL_FORMAT_CURL_OFF_T "ld" +# define CURL_FORMAT_CURL_OFF_TU "lu" +# define CURL_SUFFIX_CURL_OFF_T L +# define CURL_SUFFIX_CURL_OFF_TU UL +# define CURL_TYPEOF_CURL_SOCKLEN_T int +# endif #elif defined(__SYMBIAN32__) -# if defined(__EABI__) /* Treat all ARM compilers equally */ +# if defined(__EABI__) /* Treat all ARM compilers equally */ # define CURL_TYPEOF_CURL_OFF_T long long # define CURL_FORMAT_CURL_OFF_T "lld" # define CURL_FORMAT_CURL_OFF_TU "llu" @@ -167,13 +164,33 @@ # endif # define CURL_TYPEOF_CURL_SOCKLEN_T unsigned int -#elif defined(__MWERKS__) +#elif defined(macintosh) +# include +# if TYPE_LONGLONG +# define CURL_TYPEOF_CURL_OFF_T long long +# define CURL_FORMAT_CURL_OFF_T "lld" +# define CURL_FORMAT_CURL_OFF_TU "llu" +# define CURL_SUFFIX_CURL_OFF_T LL +# define CURL_SUFFIX_CURL_OFF_TU ULL +# else +# define CURL_TYPEOF_CURL_OFF_T long +# define CURL_FORMAT_CURL_OFF_T "ld" +# define CURL_FORMAT_CURL_OFF_TU "lu" +# define CURL_SUFFIX_CURL_OFF_T L +# define CURL_SUFFIX_CURL_OFF_TU UL +# endif +# define CURL_TYPEOF_CURL_SOCKLEN_T unsigned int + +#elif defined(__TANDEM) +# if ! defined(__LP64) + /* Required for 32-bit NonStop builds only. */ # define CURL_TYPEOF_CURL_OFF_T long long # define CURL_FORMAT_CURL_OFF_T "lld" # define CURL_FORMAT_CURL_OFF_TU "llu" # define CURL_SUFFIX_CURL_OFF_T LL # define CURL_SUFFIX_CURL_OFF_TU ULL # define CURL_TYPEOF_CURL_SOCKLEN_T int +# endif #elif defined(_WIN32_WCE) # define CURL_TYPEOF_CURL_OFF_T __int64 @@ -210,16 +227,14 @@ # define CURL_TYPEOF_CURL_SOCKLEN_T unsigned int #elif defined(__OS400__) -# if defined(__ILEC400__) -# define CURL_TYPEOF_CURL_OFF_T long long -# define CURL_FORMAT_CURL_OFF_T "lld" -# define CURL_FORMAT_CURL_OFF_TU "llu" -# define CURL_SUFFIX_CURL_OFF_T LL -# define CURL_SUFFIX_CURL_OFF_TU ULL -# define CURL_TYPEOF_CURL_SOCKLEN_T socklen_t -# define CURL_PULL_SYS_TYPES_H 1 -# define CURL_PULL_SYS_SOCKET_H 1 -# endif +# define CURL_TYPEOF_CURL_OFF_T long long +# define CURL_FORMAT_CURL_OFF_T "lld" +# define CURL_FORMAT_CURL_OFF_TU "llu" +# define CURL_SUFFIX_CURL_OFF_T LL +# define CURL_SUFFIX_CURL_OFF_TU ULL +# define CURL_TYPEOF_CURL_SOCKLEN_T socklen_t +# define CURL_PULL_SYS_TYPES_H 1 +# define CURL_PULL_SYS_SOCKET_H 1 #elif defined(__MVS__) # if defined(__IBMC__) || defined(__IBMCPP__) @@ -288,7 +303,6 @@ # define CURL_TYPEOF_CURL_SOCKLEN_T int #elif defined(__TINYC__) /* also known as tcc */ - # define CURL_TYPEOF_CURL_OFF_T long long # define CURL_FORMAT_CURL_OFF_T "lld" # define CURL_FORMAT_CURL_OFF_TU "llu" @@ -377,6 +391,7 @@ # define CURL_SUFFIX_CURL_OFF_TU ULL # elif defined(__LP64__) || \ defined(__x86_64__) || defined(__ppc64__) || defined(__sparc64__) || \ + defined(__e2k__) || \ (defined(__SIZEOF_LONG__) && __SIZEOF_LONG__ == 8) || \ (defined(__LONG_MAX__) && __LONG_MAX__ == 9223372036854775807L) # define CURL_TYPEOF_CURL_OFF_T long @@ -473,21 +488,21 @@ */ #if defined(__BORLANDC__) && (__BORLANDC__ == 0x0551) -# define __CURL_OFF_T_C_HLPR2(x) x -# define __CURL_OFF_T_C_HLPR1(x) __CURL_OFF_T_C_HLPR2(x) -# define CURL_OFF_T_C(Val) __CURL_OFF_T_C_HLPR1(Val) ## \ - __CURL_OFF_T_C_HLPR1(CURL_SUFFIX_CURL_OFF_T) -# define CURL_OFF_TU_C(Val) __CURL_OFF_T_C_HLPR1(Val) ## \ - __CURL_OFF_T_C_HLPR1(CURL_SUFFIX_CURL_OFF_TU) +# define CURLINC_OFF_T_C_HLPR2(x) x +# define CURLINC_OFF_T_C_HLPR1(x) CURLINC_OFF_T_C_HLPR2(x) +# define CURL_OFF_T_C(Val) CURLINC_OFF_T_C_HLPR1(Val) ## \ + CURLINC_OFF_T_C_HLPR1(CURL_SUFFIX_CURL_OFF_T) +# define CURL_OFF_TU_C(Val) CURLINC_OFF_T_C_HLPR1(Val) ## \ + CURLINC_OFF_T_C_HLPR1(CURL_SUFFIX_CURL_OFF_TU) #else # ifdef CURL_ISOCPP -# define __CURL_OFF_T_C_HLPR2(Val,Suffix) Val ## Suffix +# define CURLINC_OFF_T_C_HLPR2(Val,Suffix) Val ## Suffix # else -# define __CURL_OFF_T_C_HLPR2(Val,Suffix) Val/**/Suffix +# define CURLINC_OFF_T_C_HLPR2(Val,Suffix) Val/**/Suffix # endif -# define __CURL_OFF_T_C_HLPR1(Val,Suffix) __CURL_OFF_T_C_HLPR2(Val,Suffix) -# define CURL_OFF_T_C(Val) __CURL_OFF_T_C_HLPR1(Val,CURL_SUFFIX_CURL_OFF_T) -# define CURL_OFF_TU_C(Val) __CURL_OFF_T_C_HLPR1(Val,CURL_SUFFIX_CURL_OFF_TU) +# define CURLINC_OFF_T_C_HLPR1(Val,Suffix) CURLINC_OFF_T_C_HLPR2(Val,Suffix) +# define CURL_OFF_T_C(Val) CURLINC_OFF_T_C_HLPR1(Val,CURL_SUFFIX_CURL_OFF_T) +# define CURL_OFF_TU_C(Val) CURLINC_OFF_T_C_HLPR1(Val,CURL_SUFFIX_CURL_OFF_TU) #endif -#endif /* __CURL_SYSTEM_H */ +#endif /* CURLINC_SYSTEM_H */ diff --git a/Source/ThirdParty/curl/typecheck-gcc.h b/Source/ThirdParty/curl/typecheck-gcc.h index 8018ea37f..bc8d7a78c 100644 --- a/Source/ThirdParty/curl/typecheck-gcc.h +++ b/Source/ThirdParty/curl/typecheck-gcc.h @@ -1,5 +1,5 @@ -#ifndef __CURL_TYPECHECK_GCC_H -#define __CURL_TYPECHECK_GCC_H +#ifndef CURLINC_TYPECHECK_GCC_H +#define CURLINC_TYPECHECK_GCC_H /*************************************************************************** * _ _ ____ _ * Project ___| | | | _ \| | @@ -7,11 +7,11 @@ * | (__| |_| | _ <| |___ * \___|\___/|_| \_\_____| * - * Copyright (C) 1998 - 2019, Daniel Stenberg, , et al. + * Copyright (C) Daniel Stenberg, , et al. * * This software is licensed as described in the file COPYING, which * you should have received as part of this distribution. The terms - * are also available at https://curl.haxx.se/docs/copyright.html. + * are also available at https://curl.se/docs/copyright.html. * * You may opt to use, copy, modify, merge, publish, distribute and/or sell * copies of the Software, and permit persons to whom the Software is @@ -20,15 +20,17 @@ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY * KIND, either express or implied. * + * SPDX-License-Identifier: curl + * ***************************************************************************/ /* wraps curl_easy_setopt() with typechecking */ /* To add a new kind of warning, add an - * if(_curl_is_sometype_option(_curl_opt)) - * if(!_curl_is_sometype(value)) + * if(curlcheck_sometype_option(_curl_opt)) + * if(!curlcheck_sometype(value)) * _curl_easy_setopt_err_sometype(); - * block and define _curl_is_sometype_option, _curl_is_sometype and + * block and define curlcheck_sometype_option, curlcheck_sometype and * _curl_easy_setopt_err_sometype below * * NOTE: We use two nested 'if' statements here instead of the && operator, in @@ -38,117 +40,119 @@ * To add an option that uses the same type as an existing option, you'll just * need to extend the appropriate _curl_*_option macro */ -#define curl_easy_setopt(handle, option, value) \ -__extension__ ({ \ - __typeof__(option) _curl_opt = option; \ - if(__builtin_constant_p(_curl_opt)) { \ - if(_curl_is_long_option(_curl_opt)) \ - if(!_curl_is_long(value)) \ - _curl_easy_setopt_err_long(); \ - if(_curl_is_off_t_option(_curl_opt)) \ - if(!_curl_is_off_t(value)) \ - _curl_easy_setopt_err_curl_off_t(); \ - if(_curl_is_string_option(_curl_opt)) \ - if(!_curl_is_string(value)) \ - _curl_easy_setopt_err_string(); \ - if(_curl_is_write_cb_option(_curl_opt)) \ - if(!_curl_is_write_cb(value)) \ - _curl_easy_setopt_err_write_callback(); \ - if((_curl_opt) == CURLOPT_RESOLVER_START_FUNCTION) \ - if(!_curl_is_resolver_start_callback(value)) \ - _curl_easy_setopt_err_resolver_start_callback(); \ - if((_curl_opt) == CURLOPT_READFUNCTION) \ - if(!_curl_is_read_cb(value)) \ - _curl_easy_setopt_err_read_cb(); \ - if((_curl_opt) == CURLOPT_IOCTLFUNCTION) \ - if(!_curl_is_ioctl_cb(value)) \ - _curl_easy_setopt_err_ioctl_cb(); \ - if((_curl_opt) == CURLOPT_SOCKOPTFUNCTION) \ - if(!_curl_is_sockopt_cb(value)) \ - _curl_easy_setopt_err_sockopt_cb(); \ - if((_curl_opt) == CURLOPT_OPENSOCKETFUNCTION) \ - if(!_curl_is_opensocket_cb(value)) \ - _curl_easy_setopt_err_opensocket_cb(); \ - if((_curl_opt) == CURLOPT_PROGRESSFUNCTION) \ - if(!_curl_is_progress_cb(value)) \ - _curl_easy_setopt_err_progress_cb(); \ - if((_curl_opt) == CURLOPT_DEBUGFUNCTION) \ - if(!_curl_is_debug_cb(value)) \ - _curl_easy_setopt_err_debug_cb(); \ - if((_curl_opt) == CURLOPT_SSL_CTX_FUNCTION) \ - if(!_curl_is_ssl_ctx_cb(value)) \ - _curl_easy_setopt_err_ssl_ctx_cb(); \ - if(_curl_is_conv_cb_option(_curl_opt)) \ - if(!_curl_is_conv_cb(value)) \ - _curl_easy_setopt_err_conv_cb(); \ - if((_curl_opt) == CURLOPT_SEEKFUNCTION) \ - if(!_curl_is_seek_cb(value)) \ - _curl_easy_setopt_err_seek_cb(); \ - if(_curl_is_cb_data_option(_curl_opt)) \ - if(!_curl_is_cb_data(value)) \ - _curl_easy_setopt_err_cb_data(); \ - if((_curl_opt) == CURLOPT_ERRORBUFFER) \ - if(!_curl_is_error_buffer(value)) \ - _curl_easy_setopt_err_error_buffer(); \ - if((_curl_opt) == CURLOPT_STDERR) \ - if(!_curl_is_FILE(value)) \ - _curl_easy_setopt_err_FILE(); \ - if(_curl_is_postfields_option(_curl_opt)) \ - if(!_curl_is_postfields(value)) \ - _curl_easy_setopt_err_postfields(); \ - if((_curl_opt) == CURLOPT_HTTPPOST) \ - if(!_curl_is_arr((value), struct curl_httppost)) \ - _curl_easy_setopt_err_curl_httpost(); \ - if((_curl_opt) == CURLOPT_MIMEPOST) \ - if(!_curl_is_ptr((value), curl_mime)) \ - _curl_easy_setopt_err_curl_mimepost(); \ - if(_curl_is_slist_option(_curl_opt)) \ - if(!_curl_is_arr((value), struct curl_slist)) \ - _curl_easy_setopt_err_curl_slist(); \ - if((_curl_opt) == CURLOPT_SHARE) \ - if(!_curl_is_ptr((value), CURLSH)) \ - _curl_easy_setopt_err_CURLSH(); \ - } \ - curl_easy_setopt(handle, _curl_opt, value); \ -}) +#define curl_easy_setopt(handle, option, value) \ + __extension__({ \ + CURLoption _curl_opt = (option); \ + if(__builtin_constant_p(_curl_opt)) { \ + CURL_IGNORE_DEPRECATION( \ + if(curlcheck_long_option(_curl_opt)) \ + if(!curlcheck_long(value)) \ + _curl_easy_setopt_err_long(); \ + if(curlcheck_off_t_option(_curl_opt)) \ + if(!curlcheck_off_t(value)) \ + _curl_easy_setopt_err_curl_off_t(); \ + if(curlcheck_string_option(_curl_opt)) \ + if(!curlcheck_string(value)) \ + _curl_easy_setopt_err_string(); \ + if(curlcheck_write_cb_option(_curl_opt)) \ + if(!curlcheck_write_cb(value)) \ + _curl_easy_setopt_err_write_callback(); \ + if((_curl_opt) == CURLOPT_RESOLVER_START_FUNCTION) \ + if(!curlcheck_resolver_start_callback(value)) \ + _curl_easy_setopt_err_resolver_start_callback(); \ + if((_curl_opt) == CURLOPT_READFUNCTION) \ + if(!curlcheck_read_cb(value)) \ + _curl_easy_setopt_err_read_cb(); \ + if((_curl_opt) == CURLOPT_IOCTLFUNCTION) \ + if(!curlcheck_ioctl_cb(value)) \ + _curl_easy_setopt_err_ioctl_cb(); \ + if((_curl_opt) == CURLOPT_SOCKOPTFUNCTION) \ + if(!curlcheck_sockopt_cb(value)) \ + _curl_easy_setopt_err_sockopt_cb(); \ + if((_curl_opt) == CURLOPT_OPENSOCKETFUNCTION) \ + if(!curlcheck_opensocket_cb(value)) \ + _curl_easy_setopt_err_opensocket_cb(); \ + if((_curl_opt) == CURLOPT_PROGRESSFUNCTION) \ + if(!curlcheck_progress_cb(value)) \ + _curl_easy_setopt_err_progress_cb(); \ + if((_curl_opt) == CURLOPT_DEBUGFUNCTION) \ + if(!curlcheck_debug_cb(value)) \ + _curl_easy_setopt_err_debug_cb(); \ + if((_curl_opt) == CURLOPT_SSL_CTX_FUNCTION) \ + if(!curlcheck_ssl_ctx_cb(value)) \ + _curl_easy_setopt_err_ssl_ctx_cb(); \ + if(curlcheck_conv_cb_option(_curl_opt)) \ + if(!curlcheck_conv_cb(value)) \ + _curl_easy_setopt_err_conv_cb(); \ + if((_curl_opt) == CURLOPT_SEEKFUNCTION) \ + if(!curlcheck_seek_cb(value)) \ + _curl_easy_setopt_err_seek_cb(); \ + if(curlcheck_cb_data_option(_curl_opt)) \ + if(!curlcheck_cb_data(value)) \ + _curl_easy_setopt_err_cb_data(); \ + if((_curl_opt) == CURLOPT_ERRORBUFFER) \ + if(!curlcheck_error_buffer(value)) \ + _curl_easy_setopt_err_error_buffer(); \ + if((_curl_opt) == CURLOPT_STDERR) \ + if(!curlcheck_FILE(value)) \ + _curl_easy_setopt_err_FILE(); \ + if(curlcheck_postfields_option(_curl_opt)) \ + if(!curlcheck_postfields(value)) \ + _curl_easy_setopt_err_postfields(); \ + if((_curl_opt) == CURLOPT_HTTPPOST) \ + if(!curlcheck_arr((value), struct curl_httppost)) \ + _curl_easy_setopt_err_curl_httpost(); \ + if((_curl_opt) == CURLOPT_MIMEPOST) \ + if(!curlcheck_ptr((value), curl_mime)) \ + _curl_easy_setopt_err_curl_mimepost(); \ + if(curlcheck_slist_option(_curl_opt)) \ + if(!curlcheck_arr((value), struct curl_slist)) \ + _curl_easy_setopt_err_curl_slist(); \ + if((_curl_opt) == CURLOPT_SHARE) \ + if(!curlcheck_ptr((value), CURLSH)) \ + _curl_easy_setopt_err_CURLSH(); \ + ) \ + } \ + curl_easy_setopt(handle, _curl_opt, value); \ + }) /* wraps curl_easy_getinfo() with typechecking */ -/* FIXME: don't allow const pointers */ -#define curl_easy_getinfo(handle, info, arg) \ -__extension__ ({ \ - __typeof__(info) _curl_info = info; \ - if(__builtin_constant_p(_curl_info)) { \ - if(_curl_is_string_info(_curl_info)) \ - if(!_curl_is_arr((arg), char *)) \ - _curl_easy_getinfo_err_string(); \ - if(_curl_is_long_info(_curl_info)) \ - if(!_curl_is_arr((arg), long)) \ - _curl_easy_getinfo_err_long(); \ - if(_curl_is_double_info(_curl_info)) \ - if(!_curl_is_arr((arg), double)) \ - _curl_easy_getinfo_err_double(); \ - if(_curl_is_slist_info(_curl_info)) \ - if(!_curl_is_arr((arg), struct curl_slist *)) \ - _curl_easy_getinfo_err_curl_slist(); \ - if(_curl_is_tlssessioninfo_info(_curl_info)) \ - if(!_curl_is_arr((arg), struct curl_tlssessioninfo *)) \ - _curl_easy_getinfo_err_curl_tlssesssioninfo(); \ - if(_curl_is_certinfo_info(_curl_info)) \ - if(!_curl_is_arr((arg), struct curl_certinfo *)) \ - _curl_easy_getinfo_err_curl_certinfo(); \ - if(_curl_is_socket_info(_curl_info)) \ - if(!_curl_is_arr((arg), curl_socket_t)) \ - _curl_easy_getinfo_err_curl_socket(); \ - if(_curl_is_off_t_info(_curl_info)) \ - if(!_curl_is_arr((arg), curl_off_t)) \ - _curl_easy_getinfo_err_curl_off_t(); \ - } \ - curl_easy_getinfo(handle, _curl_info, arg); \ -}) +#define curl_easy_getinfo(handle, info, arg) \ + __extension__({ \ + CURLINFO _curl_info = (info); \ + if(__builtin_constant_p(_curl_info)) { \ + CURL_IGNORE_DEPRECATION( \ + if(curlcheck_string_info(_curl_info)) \ + if(!curlcheck_arr((arg), char *)) \ + _curl_easy_getinfo_err_string(); \ + if(curlcheck_long_info(_curl_info)) \ + if(!curlcheck_arr((arg), long)) \ + _curl_easy_getinfo_err_long(); \ + if(curlcheck_double_info(_curl_info)) \ + if(!curlcheck_arr((arg), double)) \ + _curl_easy_getinfo_err_double(); \ + if(curlcheck_slist_info(_curl_info)) \ + if(!curlcheck_arr((arg), struct curl_slist *)) \ + _curl_easy_getinfo_err_curl_slist(); \ + if(curlcheck_tlssessioninfo_info(_curl_info)) \ + if(!curlcheck_arr((arg), struct curl_tlssessioninfo *)) \ + _curl_easy_getinfo_err_curl_tlssesssioninfo(); \ + if(curlcheck_certinfo_info(_curl_info)) \ + if(!curlcheck_arr((arg), struct curl_certinfo *)) \ + _curl_easy_getinfo_err_curl_certinfo(); \ + if(curlcheck_socket_info(_curl_info)) \ + if(!curlcheck_arr((arg), curl_socket_t)) \ + _curl_easy_getinfo_err_curl_socket(); \ + if(curlcheck_off_t_info(_curl_info)) \ + if(!curlcheck_arr((arg), curl_off_t)) \ + _curl_easy_getinfo_err_curl_off_t(); \ + ) \ + } \ + curl_easy_getinfo(handle, _curl_info, arg); \ + }) -/* TODO: typechecking for curl_share_setopt() and curl_multi_setopt(), - * for now just make sure that the functions are called with three - * arguments +/* + * For now, just make sure that the functions are called with three arguments */ #define curl_share_setopt(share,opt,param) curl_share_setopt(share,opt,param) #define curl_multi_setopt(handle,opt,param) curl_multi_setopt(handle,opt,param) @@ -158,83 +162,83 @@ __extension__ ({ \ * functions */ /* To define a new warning, use _CURL_WARNING(identifier, "message") */ -#define _CURL_WARNING(id, message) \ - static void __attribute__((__warning__(message))) \ - __attribute__((__unused__)) __attribute__((__noinline__)) \ +#define CURLWARNING(id, message) \ + static void __attribute__((__warning__(message))) \ + __attribute__((__unused__)) __attribute__((__noinline__)) \ id(void) { __asm__(""); } -_CURL_WARNING(_curl_easy_setopt_err_long, +CURLWARNING(_curl_easy_setopt_err_long, "curl_easy_setopt expects a long argument for this option") -_CURL_WARNING(_curl_easy_setopt_err_curl_off_t, +CURLWARNING(_curl_easy_setopt_err_curl_off_t, "curl_easy_setopt expects a curl_off_t argument for this option") -_CURL_WARNING(_curl_easy_setopt_err_string, +CURLWARNING(_curl_easy_setopt_err_string, "curl_easy_setopt expects a " "string ('char *' or char[]) argument for this option" ) -_CURL_WARNING(_curl_easy_setopt_err_write_callback, +CURLWARNING(_curl_easy_setopt_err_write_callback, "curl_easy_setopt expects a curl_write_callback argument for this option") -_CURL_WARNING(_curl_easy_setopt_err_resolver_start_callback, +CURLWARNING(_curl_easy_setopt_err_resolver_start_callback, "curl_easy_setopt expects a " "curl_resolver_start_callback argument for this option" ) -_CURL_WARNING(_curl_easy_setopt_err_read_cb, +CURLWARNING(_curl_easy_setopt_err_read_cb, "curl_easy_setopt expects a curl_read_callback argument for this option") -_CURL_WARNING(_curl_easy_setopt_err_ioctl_cb, +CURLWARNING(_curl_easy_setopt_err_ioctl_cb, "curl_easy_setopt expects a curl_ioctl_callback argument for this option") -_CURL_WARNING(_curl_easy_setopt_err_sockopt_cb, +CURLWARNING(_curl_easy_setopt_err_sockopt_cb, "curl_easy_setopt expects a curl_sockopt_callback argument for this option") -_CURL_WARNING(_curl_easy_setopt_err_opensocket_cb, +CURLWARNING(_curl_easy_setopt_err_opensocket_cb, "curl_easy_setopt expects a " "curl_opensocket_callback argument for this option" ) -_CURL_WARNING(_curl_easy_setopt_err_progress_cb, +CURLWARNING(_curl_easy_setopt_err_progress_cb, "curl_easy_setopt expects a curl_progress_callback argument for this option") -_CURL_WARNING(_curl_easy_setopt_err_debug_cb, +CURLWARNING(_curl_easy_setopt_err_debug_cb, "curl_easy_setopt expects a curl_debug_callback argument for this option") -_CURL_WARNING(_curl_easy_setopt_err_ssl_ctx_cb, +CURLWARNING(_curl_easy_setopt_err_ssl_ctx_cb, "curl_easy_setopt expects a curl_ssl_ctx_callback argument for this option") -_CURL_WARNING(_curl_easy_setopt_err_conv_cb, +CURLWARNING(_curl_easy_setopt_err_conv_cb, "curl_easy_setopt expects a curl_conv_callback argument for this option") -_CURL_WARNING(_curl_easy_setopt_err_seek_cb, +CURLWARNING(_curl_easy_setopt_err_seek_cb, "curl_easy_setopt expects a curl_seek_callback argument for this option") -_CURL_WARNING(_curl_easy_setopt_err_cb_data, +CURLWARNING(_curl_easy_setopt_err_cb_data, "curl_easy_setopt expects a " "private data pointer as argument for this option") -_CURL_WARNING(_curl_easy_setopt_err_error_buffer, +CURLWARNING(_curl_easy_setopt_err_error_buffer, "curl_easy_setopt expects a " "char buffer of CURL_ERROR_SIZE as argument for this option") -_CURL_WARNING(_curl_easy_setopt_err_FILE, +CURLWARNING(_curl_easy_setopt_err_FILE, "curl_easy_setopt expects a 'FILE *' argument for this option") -_CURL_WARNING(_curl_easy_setopt_err_postfields, +CURLWARNING(_curl_easy_setopt_err_postfields, "curl_easy_setopt expects a 'void *' or 'char *' argument for this option") -_CURL_WARNING(_curl_easy_setopt_err_curl_httpost, +CURLWARNING(_curl_easy_setopt_err_curl_httpost, "curl_easy_setopt expects a 'struct curl_httppost *' " "argument for this option") -_CURL_WARNING(_curl_easy_setopt_err_curl_mimepost, +CURLWARNING(_curl_easy_setopt_err_curl_mimepost, "curl_easy_setopt expects a 'curl_mime *' " "argument for this option") -_CURL_WARNING(_curl_easy_setopt_err_curl_slist, +CURLWARNING(_curl_easy_setopt_err_curl_slist, "curl_easy_setopt expects a 'struct curl_slist *' argument for this option") -_CURL_WARNING(_curl_easy_setopt_err_CURLSH, +CURLWARNING(_curl_easy_setopt_err_CURLSH, "curl_easy_setopt expects a CURLSH* argument for this option") -_CURL_WARNING(_curl_easy_getinfo_err_string, +CURLWARNING(_curl_easy_getinfo_err_string, "curl_easy_getinfo expects a pointer to 'char *' for this info") -_CURL_WARNING(_curl_easy_getinfo_err_long, +CURLWARNING(_curl_easy_getinfo_err_long, "curl_easy_getinfo expects a pointer to long for this info") -_CURL_WARNING(_curl_easy_getinfo_err_double, +CURLWARNING(_curl_easy_getinfo_err_double, "curl_easy_getinfo expects a pointer to double for this info") -_CURL_WARNING(_curl_easy_getinfo_err_curl_slist, +CURLWARNING(_curl_easy_getinfo_err_curl_slist, "curl_easy_getinfo expects a pointer to 'struct curl_slist *' for this info") -_CURL_WARNING(_curl_easy_getinfo_err_curl_tlssesssioninfo, +CURLWARNING(_curl_easy_getinfo_err_curl_tlssesssioninfo, "curl_easy_getinfo expects a pointer to " "'struct curl_tlssessioninfo *' for this info") -_CURL_WARNING(_curl_easy_getinfo_err_curl_certinfo, +CURLWARNING(_curl_easy_getinfo_err_curl_certinfo, "curl_easy_getinfo expects a pointer to " "'struct curl_certinfo *' for this info") -_CURL_WARNING(_curl_easy_getinfo_err_curl_socket, +CURLWARNING(_curl_easy_getinfo_err_curl_socket, "curl_easy_getinfo expects a pointer to curl_socket_t for this info") -_CURL_WARNING(_curl_easy_getinfo_err_curl_off_t, +CURLWARNING(_curl_easy_getinfo_err_curl_off_t, "curl_easy_getinfo expects a pointer to curl_off_t for this info") /* groups of curl_easy_setops options that take the same type of argument */ @@ -246,14 +250,14 @@ _CURL_WARNING(_curl_easy_getinfo_err_curl_off_t, */ /* evaluates to true if option takes a long argument */ -#define _curl_is_long_option(option) \ +#define curlcheck_long_option(option) \ (0 < (option) && (option) < CURLOPTTYPE_OBJECTPOINT) -#define _curl_is_off_t_option(option) \ - ((option) > CURLOPTTYPE_OFF_T) +#define curlcheck_off_t_option(option) \ + (((option) > CURLOPTTYPE_OFF_T) && ((option) < CURLOPTTYPE_BLOB)) /* evaluates to true if option takes a char* argument */ -#define _curl_is_string_option(option) \ +#define curlcheck_string_option(option) \ ((option) == CURLOPT_ABSTRACT_UNIX_SOCKET || \ (option) == CURLOPT_ACCEPT_ENCODING || \ (option) == CURLOPT_ALTSVC || \ @@ -272,9 +276,10 @@ _CURL_WARNING(_curl_easy_getinfo_err_curl_off_t, (option) == CURLOPT_DNS_SERVERS || \ (option) == CURLOPT_DOH_URL || \ (option) == CURLOPT_EGDSOCKET || \ - (option) == CURLOPT_FTPPORT || \ (option) == CURLOPT_FTP_ACCOUNT || \ (option) == CURLOPT_FTP_ALTERNATIVE_TO_USER || \ + (option) == CURLOPT_FTPPORT || \ + (option) == CURLOPT_HSTS || \ (option) == CURLOPT_INTERFACE || \ (option) == CURLOPT_ISSUERCERT || \ (option) == CURLOPT_KEYPASSWD || \ @@ -287,33 +292,40 @@ _CURL_WARNING(_curl_easy_getinfo_err_curl_off_t, (option) == CURLOPT_PASSWORD || \ (option) == CURLOPT_PINNEDPUBLICKEY || \ (option) == CURLOPT_PRE_PROXY || \ + (option) == CURLOPT_PROTOCOLS_STR || \ (option) == CURLOPT_PROXY || \ - (option) == CURLOPT_PROXYPASSWORD || \ - (option) == CURLOPT_PROXYUSERNAME || \ - (option) == CURLOPT_PROXYUSERPWD || \ (option) == CURLOPT_PROXY_CAINFO || \ (option) == CURLOPT_PROXY_CAPATH || \ (option) == CURLOPT_PROXY_CRLFILE || \ + (option) == CURLOPT_PROXY_ISSUERCERT || \ (option) == CURLOPT_PROXY_KEYPASSWD || \ (option) == CURLOPT_PROXY_PINNEDPUBLICKEY || \ (option) == CURLOPT_PROXY_SERVICE_NAME || \ + (option) == CURLOPT_PROXY_SSL_CIPHER_LIST || \ (option) == CURLOPT_PROXY_SSLCERT || \ (option) == CURLOPT_PROXY_SSLCERTTYPE || \ (option) == CURLOPT_PROXY_SSLKEY || \ (option) == CURLOPT_PROXY_SSLKEYTYPE || \ - (option) == CURLOPT_PROXY_SSL_CIPHER_LIST || \ + (option) == CURLOPT_PROXY_TLS13_CIPHERS || \ (option) == CURLOPT_PROXY_TLSAUTH_PASSWORD || \ - (option) == CURLOPT_PROXY_TLSAUTH_USERNAME || \ (option) == CURLOPT_PROXY_TLSAUTH_TYPE || \ + (option) == CURLOPT_PROXY_TLSAUTH_USERNAME || \ + (option) == CURLOPT_PROXYPASSWORD || \ + (option) == CURLOPT_PROXYUSERNAME || \ + (option) == CURLOPT_PROXYUSERPWD || \ (option) == CURLOPT_RANDOM_FILE || \ (option) == CURLOPT_RANGE || \ + (option) == CURLOPT_REDIR_PROTOCOLS_STR || \ (option) == CURLOPT_REFERER || \ + (option) == CURLOPT_REQUEST_TARGET || \ (option) == CURLOPT_RTSP_SESSION_ID || \ (option) == CURLOPT_RTSP_STREAM_URI || \ (option) == CURLOPT_RTSP_TRANSPORT || \ + (option) == CURLOPT_SASL_AUTHZID || \ (option) == CURLOPT_SERVICE_NAME || \ (option) == CURLOPT_SOCKS5_GSSAPI_SERVICE || \ (option) == CURLOPT_SSH_HOST_PUBLIC_KEY_MD5 || \ + (option) == CURLOPT_SSH_HOST_PUBLIC_KEY_SHA256 || \ (option) == CURLOPT_SSH_KNOWNHOSTS || \ (option) == CURLOPT_SSH_PRIVATE_KEYFILE || \ (option) == CURLOPT_SSH_PUBLIC_KEYFILE || \ @@ -323,6 +335,7 @@ _CURL_WARNING(_curl_easy_getinfo_err_curl_off_t, (option) == CURLOPT_SSLKEY || \ (option) == CURLOPT_SSLKEYTYPE || \ (option) == CURLOPT_SSL_CIPHER_LIST || \ + (option) == CURLOPT_TLS13_CIPHERS || \ (option) == CURLOPT_TLSAUTH_PASSWORD || \ (option) == CURLOPT_TLSAUTH_TYPE || \ (option) == CURLOPT_TLSAUTH_USERNAME || \ @@ -330,32 +343,36 @@ _CURL_WARNING(_curl_easy_getinfo_err_curl_off_t, (option) == CURLOPT_URL || \ (option) == CURLOPT_USERAGENT || \ (option) == CURLOPT_USERNAME || \ + (option) == CURLOPT_AWS_SIGV4 || \ (option) == CURLOPT_USERPWD || \ (option) == CURLOPT_XOAUTH2_BEARER || \ + (option) == CURLOPT_SSL_EC_CURVES || \ 0) /* evaluates to true if option takes a curl_write_callback argument */ -#define _curl_is_write_cb_option(option) \ - ((option) == CURLOPT_HEADERFUNCTION || \ +#define curlcheck_write_cb_option(option) \ + ((option) == CURLOPT_HEADERFUNCTION || \ (option) == CURLOPT_WRITEFUNCTION) /* evaluates to true if option takes a curl_conv_callback argument */ -#define _curl_is_conv_cb_option(option) \ - ((option) == CURLOPT_CONV_TO_NETWORK_FUNCTION || \ - (option) == CURLOPT_CONV_FROM_NETWORK_FUNCTION || \ +#define curlcheck_conv_cb_option(option) \ + ((option) == CURLOPT_CONV_TO_NETWORK_FUNCTION || \ + (option) == CURLOPT_CONV_FROM_NETWORK_FUNCTION || \ (option) == CURLOPT_CONV_FROM_UTF8_FUNCTION) /* evaluates to true if option takes a data argument to pass to a callback */ -#define _curl_is_cb_data_option(option) \ +#define curlcheck_cb_data_option(option) \ ((option) == CURLOPT_CHUNK_DATA || \ (option) == CURLOPT_CLOSESOCKETDATA || \ (option) == CURLOPT_DEBUGDATA || \ (option) == CURLOPT_FNMATCH_DATA || \ (option) == CURLOPT_HEADERDATA || \ + (option) == CURLOPT_HSTSREADDATA || \ + (option) == CURLOPT_HSTSWRITEDATA || \ (option) == CURLOPT_INTERLEAVEDATA || \ (option) == CURLOPT_IOCTLDATA || \ (option) == CURLOPT_OPENSOCKETDATA || \ - (option) == CURLOPT_PRIVATE || \ + (option) == CURLOPT_PREREQDATA || \ (option) == CURLOPT_PROGRESSDATA || \ (option) == CURLOPT_READDATA || \ (option) == CURLOPT_SEEKDATA || \ @@ -364,17 +381,18 @@ _CURL_WARNING(_curl_easy_getinfo_err_curl_off_t, (option) == CURLOPT_SSL_CTX_DATA || \ (option) == CURLOPT_WRITEDATA || \ (option) == CURLOPT_RESOLVER_START_DATA || \ - (option) == CURLOPT_CURLU || \ + (option) == CURLOPT_TRAILERDATA || \ + (option) == CURLOPT_SSH_HOSTKEYDATA || \ 0) /* evaluates to true if option takes a POST data argument (void* or char*) */ -#define _curl_is_postfields_option(option) \ +#define curlcheck_postfields_option(option) \ ((option) == CURLOPT_POSTFIELDS || \ (option) == CURLOPT_COPYPOSTFIELDS || \ 0) /* evaluates to true if option takes a struct curl_slist * argument */ -#define _curl_is_slist_option(option) \ +#define curlcheck_slist_option(option) \ ((option) == CURLOPT_HTTP200ALIASES || \ (option) == CURLOPT_HTTPHEADER || \ (option) == CURLOPT_MAIL_RCPT || \ @@ -384,45 +402,47 @@ _CURL_WARNING(_curl_easy_getinfo_err_curl_off_t, (option) == CURLOPT_QUOTE || \ (option) == CURLOPT_RESOLVE || \ (option) == CURLOPT_TELNETOPTIONS || \ + (option) == CURLOPT_CONNECT_TO || \ 0) /* groups of curl_easy_getinfo infos that take the same type of argument */ /* evaluates to true if info expects a pointer to char * argument */ -#define _curl_is_string_info(info) \ - (CURLINFO_STRING < (info) && (info) < CURLINFO_LONG) +#define curlcheck_string_info(info) \ + (CURLINFO_STRING < (info) && (info) < CURLINFO_LONG && \ + (info) != CURLINFO_PRIVATE) /* evaluates to true if info expects a pointer to long argument */ -#define _curl_is_long_info(info) \ +#define curlcheck_long_info(info) \ (CURLINFO_LONG < (info) && (info) < CURLINFO_DOUBLE) /* evaluates to true if info expects a pointer to double argument */ -#define _curl_is_double_info(info) \ +#define curlcheck_double_info(info) \ (CURLINFO_DOUBLE < (info) && (info) < CURLINFO_SLIST) /* true if info expects a pointer to struct curl_slist * argument */ -#define _curl_is_slist_info(info) \ +#define curlcheck_slist_info(info) \ (((info) == CURLINFO_SSL_ENGINES) || ((info) == CURLINFO_COOKIELIST)) /* true if info expects a pointer to struct curl_tlssessioninfo * argument */ -#define _curl_is_tlssessioninfo_info(info) \ +#define curlcheck_tlssessioninfo_info(info) \ (((info) == CURLINFO_TLS_SSL_PTR) || ((info) == CURLINFO_TLS_SESSION)) /* true if info expects a pointer to struct curl_certinfo * argument */ -#define _curl_is_certinfo_info(info) ((info) == CURLINFO_CERTINFO) +#define curlcheck_certinfo_info(info) ((info) == CURLINFO_CERTINFO) /* true if info expects a pointer to struct curl_socket_t argument */ -#define _curl_is_socket_info(info) \ +#define curlcheck_socket_info(info) \ (CURLINFO_SOCKET < (info) && (info) < CURLINFO_OFF_T) /* true if info expects a pointer to curl_off_t argument */ -#define _curl_is_off_t_info(info) \ +#define curlcheck_off_t_info(info) \ (CURLINFO_OFF_T < (info)) -/* typecheck helpers -- check whether given expression has requested type*/ +/* typecheck helpers -- check whether given expression has requested type */ -/* For pointers, you can use the _curl_is_ptr/_curl_is_arr macros, +/* For pointers, you can use the curlcheck_ptr/curlcheck_arr macros, * otherwise define a new macro. Search for __builtin_types_compatible_p * in the GCC manual. * NOTE: these macros MUST NOT EVALUATE their arguments! The argument is @@ -432,35 +452,35 @@ _CURL_WARNING(_curl_easy_getinfo_err_curl_off_t, */ /* XXX: should evaluate to true if expr is a pointer */ -#define _curl_is_any_ptr(expr) \ +#define curlcheck_any_ptr(expr) \ (sizeof(expr) == sizeof(void *)) /* evaluates to true if expr is NULL */ /* XXX: must not evaluate expr, so this check is not accurate */ -#define _curl_is_NULL(expr) \ +#define curlcheck_NULL(expr) \ (__builtin_types_compatible_p(__typeof__(expr), __typeof__(NULL))) /* evaluates to true if expr is type*, const type* or NULL */ -#define _curl_is_ptr(expr, type) \ - (_curl_is_NULL(expr) || \ - __builtin_types_compatible_p(__typeof__(expr), type *) || \ +#define curlcheck_ptr(expr, type) \ + (curlcheck_NULL(expr) || \ + __builtin_types_compatible_p(__typeof__(expr), type *) || \ __builtin_types_compatible_p(__typeof__(expr), const type *)) /* evaluates to true if expr is one of type[], type*, NULL or const type* */ -#define _curl_is_arr(expr, type) \ - (_curl_is_ptr((expr), type) || \ +#define curlcheck_arr(expr, type) \ + (curlcheck_ptr((expr), type) || \ __builtin_types_compatible_p(__typeof__(expr), type [])) /* evaluates to true if expr is a string */ -#define _curl_is_string(expr) \ - (_curl_is_arr((expr), char) || \ - _curl_is_arr((expr), signed char) || \ - _curl_is_arr((expr), unsigned char)) +#define curlcheck_string(expr) \ + (curlcheck_arr((expr), char) || \ + curlcheck_arr((expr), signed char) || \ + curlcheck_arr((expr), unsigned char)) /* evaluates to true if expr is a long (no matter the signedness) * XXX: for now, int is also accepted (and therefore short and char, which * are promoted to int when passed to a variadic function) */ -#define _curl_is_long(expr) \ +#define curlcheck_long(expr) \ (__builtin_types_compatible_p(__typeof__(expr), long) || \ __builtin_types_compatible_p(__typeof__(expr), signed long) || \ __builtin_types_compatible_p(__typeof__(expr), unsigned long) || \ @@ -475,63 +495,59 @@ _CURL_WARNING(_curl_easy_getinfo_err_curl_off_t, __builtin_types_compatible_p(__typeof__(expr), unsigned char)) /* evaluates to true if expr is of type curl_off_t */ -#define _curl_is_off_t(expr) \ +#define curlcheck_off_t(expr) \ (__builtin_types_compatible_p(__typeof__(expr), curl_off_t)) /* evaluates to true if expr is abuffer suitable for CURLOPT_ERRORBUFFER */ /* XXX: also check size of an char[] array? */ -#define _curl_is_error_buffer(expr) \ - (_curl_is_NULL(expr) || \ - __builtin_types_compatible_p(__typeof__(expr), char *) || \ +#define curlcheck_error_buffer(expr) \ + (curlcheck_NULL(expr) || \ + __builtin_types_compatible_p(__typeof__(expr), char *) || \ __builtin_types_compatible_p(__typeof__(expr), char[])) /* evaluates to true if expr is of type (const) void* or (const) FILE* */ #if 0 -#define _curl_is_cb_data(expr) \ - (_curl_is_ptr((expr), void) || \ - _curl_is_ptr((expr), FILE)) +#define curlcheck_cb_data(expr) \ + (curlcheck_ptr((expr), void) || \ + curlcheck_ptr((expr), FILE)) #else /* be less strict */ -#define _curl_is_cb_data(expr) \ - _curl_is_any_ptr(expr) +#define curlcheck_cb_data(expr) \ + curlcheck_any_ptr(expr) #endif /* evaluates to true if expr is of type FILE* */ -#define _curl_is_FILE(expr) \ - (_curl_is_NULL(expr) || \ +#define curlcheck_FILE(expr) \ + (curlcheck_NULL(expr) || \ (__builtin_types_compatible_p(__typeof__(expr), FILE *))) /* evaluates to true if expr can be passed as POST data (void* or char*) */ -#define _curl_is_postfields(expr) \ - (_curl_is_ptr((expr), void) || \ - _curl_is_arr((expr), char) || \ - _curl_is_arr((expr), unsigned char)) +#define curlcheck_postfields(expr) \ + (curlcheck_ptr((expr), void) || \ + curlcheck_arr((expr), char) || \ + curlcheck_arr((expr), unsigned char)) -/* FIXME: the whole callback checking is messy... - * The idea is to tolerate char vs. void and const vs. not const - * pointers in arguments at least - */ /* helper: __builtin_types_compatible_p distinguishes between functions and * function pointers, hide it */ -#define _curl_callback_compatible(func, type) \ - (__builtin_types_compatible_p(__typeof__(func), type) || \ +#define curlcheck_cb_compatible(func, type) \ + (__builtin_types_compatible_p(__typeof__(func), type) || \ __builtin_types_compatible_p(__typeof__(func) *, type)) /* evaluates to true if expr is of type curl_resolver_start_callback */ -#define _curl_is_resolver_start_callback(expr) \ - (_curl_is_NULL(expr) || \ - _curl_callback_compatible((expr), curl_resolver_start_callback)) +#define curlcheck_resolver_start_callback(expr) \ + (curlcheck_NULL(expr) || \ + curlcheck_cb_compatible((expr), curl_resolver_start_callback)) /* evaluates to true if expr is of type curl_read_callback or "similar" */ -#define _curl_is_read_cb(expr) \ - (_curl_is_NULL(expr) || \ - _curl_callback_compatible((expr), __typeof__(fread) *) || \ - _curl_callback_compatible((expr), curl_read_callback) || \ - _curl_callback_compatible((expr), _curl_read_callback1) || \ - _curl_callback_compatible((expr), _curl_read_callback2) || \ - _curl_callback_compatible((expr), _curl_read_callback3) || \ - _curl_callback_compatible((expr), _curl_read_callback4) || \ - _curl_callback_compatible((expr), _curl_read_callback5) || \ - _curl_callback_compatible((expr), _curl_read_callback6)) +#define curlcheck_read_cb(expr) \ + (curlcheck_NULL(expr) || \ + curlcheck_cb_compatible((expr), __typeof__(fread) *) || \ + curlcheck_cb_compatible((expr), curl_read_callback) || \ + curlcheck_cb_compatible((expr), _curl_read_callback1) || \ + curlcheck_cb_compatible((expr), _curl_read_callback2) || \ + curlcheck_cb_compatible((expr), _curl_read_callback3) || \ + curlcheck_cb_compatible((expr), _curl_read_callback4) || \ + curlcheck_cb_compatible((expr), _curl_read_callback5) || \ + curlcheck_cb_compatible((expr), _curl_read_callback6)) typedef size_t (*_curl_read_callback1)(char *, size_t, size_t, void *); typedef size_t (*_curl_read_callback2)(char *, size_t, size_t, const void *); typedef size_t (*_curl_read_callback3)(char *, size_t, size_t, FILE *); @@ -540,16 +556,16 @@ typedef size_t (*_curl_read_callback5)(void *, size_t, size_t, const void *); typedef size_t (*_curl_read_callback6)(void *, size_t, size_t, FILE *); /* evaluates to true if expr is of type curl_write_callback or "similar" */ -#define _curl_is_write_cb(expr) \ - (_curl_is_read_cb(expr) || \ - _curl_callback_compatible((expr), __typeof__(fwrite) *) || \ - _curl_callback_compatible((expr), curl_write_callback) || \ - _curl_callback_compatible((expr), _curl_write_callback1) || \ - _curl_callback_compatible((expr), _curl_write_callback2) || \ - _curl_callback_compatible((expr), _curl_write_callback3) || \ - _curl_callback_compatible((expr), _curl_write_callback4) || \ - _curl_callback_compatible((expr), _curl_write_callback5) || \ - _curl_callback_compatible((expr), _curl_write_callback6)) +#define curlcheck_write_cb(expr) \ + (curlcheck_read_cb(expr) || \ + curlcheck_cb_compatible((expr), __typeof__(fwrite) *) || \ + curlcheck_cb_compatible((expr), curl_write_callback) || \ + curlcheck_cb_compatible((expr), _curl_write_callback1) || \ + curlcheck_cb_compatible((expr), _curl_write_callback2) || \ + curlcheck_cb_compatible((expr), _curl_write_callback3) || \ + curlcheck_cb_compatible((expr), _curl_write_callback4) || \ + curlcheck_cb_compatible((expr), _curl_write_callback5) || \ + curlcheck_cb_compatible((expr), _curl_write_callback6)) typedef size_t (*_curl_write_callback1)(const char *, size_t, size_t, void *); typedef size_t (*_curl_write_callback2)(const char *, size_t, size_t, const void *); @@ -560,37 +576,37 @@ typedef size_t (*_curl_write_callback5)(const void *, size_t, size_t, typedef size_t (*_curl_write_callback6)(const void *, size_t, size_t, FILE *); /* evaluates to true if expr is of type curl_ioctl_callback or "similar" */ -#define _curl_is_ioctl_cb(expr) \ - (_curl_is_NULL(expr) || \ - _curl_callback_compatible((expr), curl_ioctl_callback) || \ - _curl_callback_compatible((expr), _curl_ioctl_callback1) || \ - _curl_callback_compatible((expr), _curl_ioctl_callback2) || \ - _curl_callback_compatible((expr), _curl_ioctl_callback3) || \ - _curl_callback_compatible((expr), _curl_ioctl_callback4)) +#define curlcheck_ioctl_cb(expr) \ + (curlcheck_NULL(expr) || \ + curlcheck_cb_compatible((expr), curl_ioctl_callback) || \ + curlcheck_cb_compatible((expr), _curl_ioctl_callback1) || \ + curlcheck_cb_compatible((expr), _curl_ioctl_callback2) || \ + curlcheck_cb_compatible((expr), _curl_ioctl_callback3) || \ + curlcheck_cb_compatible((expr), _curl_ioctl_callback4)) typedef curlioerr (*_curl_ioctl_callback1)(CURL *, int, void *); typedef curlioerr (*_curl_ioctl_callback2)(CURL *, int, const void *); typedef curlioerr (*_curl_ioctl_callback3)(CURL *, curliocmd, void *); typedef curlioerr (*_curl_ioctl_callback4)(CURL *, curliocmd, const void *); /* evaluates to true if expr is of type curl_sockopt_callback or "similar" */ -#define _curl_is_sockopt_cb(expr) \ - (_curl_is_NULL(expr) || \ - _curl_callback_compatible((expr), curl_sockopt_callback) || \ - _curl_callback_compatible((expr), _curl_sockopt_callback1) || \ - _curl_callback_compatible((expr), _curl_sockopt_callback2)) +#define curlcheck_sockopt_cb(expr) \ + (curlcheck_NULL(expr) || \ + curlcheck_cb_compatible((expr), curl_sockopt_callback) || \ + curlcheck_cb_compatible((expr), _curl_sockopt_callback1) || \ + curlcheck_cb_compatible((expr), _curl_sockopt_callback2)) typedef int (*_curl_sockopt_callback1)(void *, curl_socket_t, curlsocktype); typedef int (*_curl_sockopt_callback2)(const void *, curl_socket_t, curlsocktype); /* evaluates to true if expr is of type curl_opensocket_callback or "similar" */ -#define _curl_is_opensocket_cb(expr) \ - (_curl_is_NULL(expr) || \ - _curl_callback_compatible((expr), curl_opensocket_callback) || \ - _curl_callback_compatible((expr), _curl_opensocket_callback1) || \ - _curl_callback_compatible((expr), _curl_opensocket_callback2) || \ - _curl_callback_compatible((expr), _curl_opensocket_callback3) || \ - _curl_callback_compatible((expr), _curl_opensocket_callback4)) +#define curlcheck_opensocket_cb(expr) \ + (curlcheck_NULL(expr) || \ + curlcheck_cb_compatible((expr), curl_opensocket_callback) || \ + curlcheck_cb_compatible((expr), _curl_opensocket_callback1) || \ + curlcheck_cb_compatible((expr), _curl_opensocket_callback2) || \ + curlcheck_cb_compatible((expr), _curl_opensocket_callback3) || \ + curlcheck_cb_compatible((expr), _curl_opensocket_callback4)) typedef curl_socket_t (*_curl_opensocket_callback1) (void *, curlsocktype, struct curl_sockaddr *); typedef curl_socket_t (*_curl_opensocket_callback2) @@ -601,28 +617,28 @@ typedef curl_socket_t (*_curl_opensocket_callback4) (const void *, curlsocktype, const struct curl_sockaddr *); /* evaluates to true if expr is of type curl_progress_callback or "similar" */ -#define _curl_is_progress_cb(expr) \ - (_curl_is_NULL(expr) || \ - _curl_callback_compatible((expr), curl_progress_callback) || \ - _curl_callback_compatible((expr), _curl_progress_callback1) || \ - _curl_callback_compatible((expr), _curl_progress_callback2)) +#define curlcheck_progress_cb(expr) \ + (curlcheck_NULL(expr) || \ + curlcheck_cb_compatible((expr), curl_progress_callback) || \ + curlcheck_cb_compatible((expr), _curl_progress_callback1) || \ + curlcheck_cb_compatible((expr), _curl_progress_callback2)) typedef int (*_curl_progress_callback1)(void *, double, double, double, double); typedef int (*_curl_progress_callback2)(const void *, double, double, double, double); /* evaluates to true if expr is of type curl_debug_callback or "similar" */ -#define _curl_is_debug_cb(expr) \ - (_curl_is_NULL(expr) || \ - _curl_callback_compatible((expr), curl_debug_callback) || \ - _curl_callback_compatible((expr), _curl_debug_callback1) || \ - _curl_callback_compatible((expr), _curl_debug_callback2) || \ - _curl_callback_compatible((expr), _curl_debug_callback3) || \ - _curl_callback_compatible((expr), _curl_debug_callback4) || \ - _curl_callback_compatible((expr), _curl_debug_callback5) || \ - _curl_callback_compatible((expr), _curl_debug_callback6) || \ - _curl_callback_compatible((expr), _curl_debug_callback7) || \ - _curl_callback_compatible((expr), _curl_debug_callback8)) +#define curlcheck_debug_cb(expr) \ + (curlcheck_NULL(expr) || \ + curlcheck_cb_compatible((expr), curl_debug_callback) || \ + curlcheck_cb_compatible((expr), _curl_debug_callback1) || \ + curlcheck_cb_compatible((expr), _curl_debug_callback2) || \ + curlcheck_cb_compatible((expr), _curl_debug_callback3) || \ + curlcheck_cb_compatible((expr), _curl_debug_callback4) || \ + curlcheck_cb_compatible((expr), _curl_debug_callback5) || \ + curlcheck_cb_compatible((expr), _curl_debug_callback6) || \ + curlcheck_cb_compatible((expr), _curl_debug_callback7) || \ + curlcheck_cb_compatible((expr), _curl_debug_callback8)) typedef int (*_curl_debug_callback1) (CURL *, curl_infotype, char *, size_t, void *); typedef int (*_curl_debug_callback2) (CURL *, @@ -642,17 +658,17 @@ typedef int (*_curl_debug_callback8) (CURL *, /* evaluates to true if expr is of type curl_ssl_ctx_callback or "similar" */ /* this is getting even messier... */ -#define _curl_is_ssl_ctx_cb(expr) \ - (_curl_is_NULL(expr) || \ - _curl_callback_compatible((expr), curl_ssl_ctx_callback) || \ - _curl_callback_compatible((expr), _curl_ssl_ctx_callback1) || \ - _curl_callback_compatible((expr), _curl_ssl_ctx_callback2) || \ - _curl_callback_compatible((expr), _curl_ssl_ctx_callback3) || \ - _curl_callback_compatible((expr), _curl_ssl_ctx_callback4) || \ - _curl_callback_compatible((expr), _curl_ssl_ctx_callback5) || \ - _curl_callback_compatible((expr), _curl_ssl_ctx_callback6) || \ - _curl_callback_compatible((expr), _curl_ssl_ctx_callback7) || \ - _curl_callback_compatible((expr), _curl_ssl_ctx_callback8)) +#define curlcheck_ssl_ctx_cb(expr) \ + (curlcheck_NULL(expr) || \ + curlcheck_cb_compatible((expr), curl_ssl_ctx_callback) || \ + curlcheck_cb_compatible((expr), _curl_ssl_ctx_callback1) || \ + curlcheck_cb_compatible((expr), _curl_ssl_ctx_callback2) || \ + curlcheck_cb_compatible((expr), _curl_ssl_ctx_callback3) || \ + curlcheck_cb_compatible((expr), _curl_ssl_ctx_callback4) || \ + curlcheck_cb_compatible((expr), _curl_ssl_ctx_callback5) || \ + curlcheck_cb_compatible((expr), _curl_ssl_ctx_callback6) || \ + curlcheck_cb_compatible((expr), _curl_ssl_ctx_callback7) || \ + curlcheck_cb_compatible((expr), _curl_ssl_ctx_callback8)) typedef CURLcode (*_curl_ssl_ctx_callback1)(CURL *, void *, void *); typedef CURLcode (*_curl_ssl_ctx_callback2)(CURL *, void *, const void *); typedef CURLcode (*_curl_ssl_ctx_callback3)(CURL *, const void *, void *); @@ -662,11 +678,11 @@ typedef CURLcode (*_curl_ssl_ctx_callback4)(CURL *, const void *, /* hack: if we included OpenSSL's ssl.h, we know about SSL_CTX * this will of course break if we're included before OpenSSL headers... */ -typedef CURLcode (*_curl_ssl_ctx_callback5)(CURL *, SSL_CTX, void *); -typedef CURLcode (*_curl_ssl_ctx_callback6)(CURL *, SSL_CTX, const void *); -typedef CURLcode (*_curl_ssl_ctx_callback7)(CURL *, const SSL_CTX, void *); -typedef CURLcode (*_curl_ssl_ctx_callback8)(CURL *, const SSL_CTX, - const void *); +typedef CURLcode (*_curl_ssl_ctx_callback5)(CURL *, SSL_CTX *, void *); +typedef CURLcode (*_curl_ssl_ctx_callback6)(CURL *, SSL_CTX *, const void *); +typedef CURLcode (*_curl_ssl_ctx_callback7)(CURL *, const SSL_CTX *, void *); +typedef CURLcode (*_curl_ssl_ctx_callback8)(CURL *, const SSL_CTX *, + const void *); #else typedef _curl_ssl_ctx_callback1 _curl_ssl_ctx_callback5; typedef _curl_ssl_ctx_callback1 _curl_ssl_ctx_callback6; @@ -675,26 +691,26 @@ typedef _curl_ssl_ctx_callback1 _curl_ssl_ctx_callback8; #endif /* evaluates to true if expr is of type curl_conv_callback or "similar" */ -#define _curl_is_conv_cb(expr) \ - (_curl_is_NULL(expr) || \ - _curl_callback_compatible((expr), curl_conv_callback) || \ - _curl_callback_compatible((expr), _curl_conv_callback1) || \ - _curl_callback_compatible((expr), _curl_conv_callback2) || \ - _curl_callback_compatible((expr), _curl_conv_callback3) || \ - _curl_callback_compatible((expr), _curl_conv_callback4)) +#define curlcheck_conv_cb(expr) \ + (curlcheck_NULL(expr) || \ + curlcheck_cb_compatible((expr), curl_conv_callback) || \ + curlcheck_cb_compatible((expr), _curl_conv_callback1) || \ + curlcheck_cb_compatible((expr), _curl_conv_callback2) || \ + curlcheck_cb_compatible((expr), _curl_conv_callback3) || \ + curlcheck_cb_compatible((expr), _curl_conv_callback4)) typedef CURLcode (*_curl_conv_callback1)(char *, size_t length); typedef CURLcode (*_curl_conv_callback2)(const char *, size_t length); typedef CURLcode (*_curl_conv_callback3)(void *, size_t length); typedef CURLcode (*_curl_conv_callback4)(const void *, size_t length); /* evaluates to true if expr is of type curl_seek_callback or "similar" */ -#define _curl_is_seek_cb(expr) \ - (_curl_is_NULL(expr) || \ - _curl_callback_compatible((expr), curl_seek_callback) || \ - _curl_callback_compatible((expr), _curl_seek_callback1) || \ - _curl_callback_compatible((expr), _curl_seek_callback2)) +#define curlcheck_seek_cb(expr) \ + (curlcheck_NULL(expr) || \ + curlcheck_cb_compatible((expr), curl_seek_callback) || \ + curlcheck_cb_compatible((expr), _curl_seek_callback1) || \ + curlcheck_cb_compatible((expr), _curl_seek_callback2)) typedef CURLcode (*_curl_seek_callback1)(void *, curl_off_t, int); typedef CURLcode (*_curl_seek_callback2)(const void *, curl_off_t, int); -#endif /* __CURL_TYPECHECK_GCC_H */ +#endif /* CURLINC_TYPECHECK_GCC_H */ diff --git a/Source/ThirdParty/curl/urlapi.h b/Source/ThirdParty/curl/urlapi.h index 850faa97a..b97b53475 100644 --- a/Source/ThirdParty/curl/urlapi.h +++ b/Source/ThirdParty/curl/urlapi.h @@ -1,5 +1,5 @@ -#ifndef __CURL_URLAPI_H -#define __CURL_URLAPI_H +#ifndef CURLINC_URLAPI_H +#define CURLINC_URLAPI_H /*************************************************************************** * _ _ ____ _ * Project ___| | | | _ \| | @@ -7,11 +7,11 @@ * | (__| |_| | _ <| |___ * \___|\___/|_| \_\_____| * - * Copyright (C) 2018 - 2019, Daniel Stenberg, , et al. + * Copyright (C) Daniel Stenberg, , et al. * * This software is licensed as described in the file COPYING, which * you should have received as part of this distribution. The terms - * are also available at https://curl.haxx.se/docs/copyright.html. + * are also available at https://curl.se/docs/copyright.html. * * You may opt to use, copy, modify, merge, publish, distribute and/or sell * copies of the Software, and permit persons to whom the Software is @@ -20,6 +20,8 @@ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY * KIND, either express or implied. * + * SPDX-License-Identifier: curl + * ***************************************************************************/ #include "curl.h" @@ -47,7 +49,21 @@ typedef enum { CURLUE_NO_HOST, /* 14 */ CURLUE_NO_PORT, /* 15 */ CURLUE_NO_QUERY, /* 16 */ - CURLUE_NO_FRAGMENT /* 17 */ + CURLUE_NO_FRAGMENT, /* 17 */ + CURLUE_NO_ZONEID, /* 18 */ + CURLUE_BAD_FILE_URL, /* 19 */ + CURLUE_BAD_FRAGMENT, /* 20 */ + CURLUE_BAD_HOSTNAME, /* 21 */ + CURLUE_BAD_IPV6, /* 22 */ + CURLUE_BAD_LOGIN, /* 23 */ + CURLUE_BAD_PASSWORD, /* 24 */ + CURLUE_BAD_PATH, /* 25 */ + CURLUE_BAD_QUERY, /* 26 */ + CURLUE_BAD_SCHEME, /* 27 */ + CURLUE_BAD_SLASHES, /* 28 */ + CURLUE_BAD_USER, /* 29 */ + CURLUE_LACKS_IDN, /* 30 */ + CURLUE_LAST } CURLUcode; typedef enum { @@ -60,7 +76,8 @@ typedef enum { CURLUPART_PORT, CURLUPART_PATH, CURLUPART_QUERY, - CURLUPART_FRAGMENT + CURLUPART_FRAGMENT, + CURLUPART_ZONEID /* added in 7.65.0 */ } CURLUPart; #define CURLU_DEFAULT_PORT (1<<0) /* return default port number */ @@ -76,6 +93,10 @@ typedef enum { #define CURLU_URLENCODE (1<<7) /* URL encode on set */ #define CURLU_APPENDQUERY (1<<8) /* append a form style part */ #define CURLU_GUESS_SCHEME (1<<9) /* legacy curl-style guessing */ +#define CURLU_NO_AUTHORITY (1<<10) /* Allow empty authority when the + scheme is unknown. */ +#define CURLU_ALLOW_SPACE (1<<11) /* Allow spaces in the URL */ +#define CURLU_PUNYCODE (1<<12) /* get the host name in pynycode */ typedef struct Curl_URL CURLU; @@ -114,9 +135,15 @@ CURL_EXTERN CURLUcode curl_url_get(CURLU *handle, CURLUPart what, CURL_EXTERN CURLUcode curl_url_set(CURLU *handle, CURLUPart what, const char *part, unsigned int flags); +/* + * curl_url_strerror() turns a CURLUcode value into the equivalent human + * readable error string. This is useful for printing meaningful error + * messages. + */ +CURL_EXTERN const char *curl_url_strerror(CURLUcode); #ifdef __cplusplus } /* end of extern "C" */ #endif -#endif +#endif /* CURLINC_URLAPI_H */ diff --git a/Source/ThirdParty/curl/websockets.h b/Source/ThirdParty/curl/websockets.h new file mode 100644 index 000000000..fd6a91654 --- /dev/null +++ b/Source/ThirdParty/curl/websockets.h @@ -0,0 +1,84 @@ +#ifndef CURLINC_WEBSOCKETS_H +#define CURLINC_WEBSOCKETS_H +/*************************************************************************** + * _ _ ____ _ + * Project ___| | | | _ \| | + * / __| | | | |_) | | + * | (__| |_| | _ <| |___ + * \___|\___/|_| \_\_____| + * + * Copyright (C) Daniel Stenberg, , et al. + * + * This software is licensed as described in the file COPYING, which + * you should have received as part of this distribution. The terms + * are also available at https://curl.se/docs/copyright.html. + * + * You may opt to use, copy, modify, merge, publish, distribute and/or sell + * copies of the Software, and permit persons to whom the Software is + * furnished to do so, under the terms of the COPYING file. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + * SPDX-License-Identifier: curl + * + ***************************************************************************/ + +#ifdef __cplusplus +extern "C" { +#endif + +struct curl_ws_frame { + int age; /* zero */ + int flags; /* See the CURLWS_* defines */ + curl_off_t offset; /* the offset of this data into the frame */ + curl_off_t bytesleft; /* number of pending bytes left of the payload */ + size_t len; /* size of the current data chunk */ +}; + +/* flag bits */ +#define CURLWS_TEXT (1<<0) +#define CURLWS_BINARY (1<<1) +#define CURLWS_CONT (1<<2) +#define CURLWS_CLOSE (1<<3) +#define CURLWS_PING (1<<4) +#define CURLWS_OFFSET (1<<5) + +/* + * NAME curl_ws_recv() + * + * DESCRIPTION + * + * Receives data from the websocket connection. Use after successful + * curl_easy_perform() with CURLOPT_CONNECT_ONLY option. + */ +CURL_EXTERN CURLcode curl_ws_recv(CURL *curl, void *buffer, size_t buflen, + size_t *recv, + struct curl_ws_frame **metap); + +/* sendflags for curl_ws_send() */ +#define CURLWS_PONG (1<<6) + +/* + * NAME curl_easy_send() + * + * DESCRIPTION + * + * Sends data over the websocket connection. Use after successful + * curl_easy_perform() with CURLOPT_CONNECT_ONLY option. + */ +CURL_EXTERN CURLcode curl_ws_send(CURL *curl, const void *buffer, + size_t buflen, size_t *sent, + curl_off_t framesize, + unsigned int sendflags); + +/* bits for the CURLOPT_WS_OPTIONS bitmask: */ +#define CURLWS_RAW_MODE (1<<0) + +CURL_EXTERN struct curl_ws_frame *curl_ws_meta(CURL *curl); + +#ifdef __cplusplus +} +#endif + +#endif /* CURLINC_WEBSOCKETS_H */ diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/curl.cs b/Source/Tools/Flax.Build/Deps/Dependencies/curl.cs index 896df66bd..8492cbae7 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/curl.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/curl.cs @@ -46,12 +46,10 @@ namespace Flax.Deps.Dependencies { var root = options.IntermediateFolder; var packagePath = Path.Combine(root, "package.zip"); - var vcVersion = "VC14"; - var configuration = "LIB Release - DLL Windows SSPI"; + var configuration = "Release"; var binariesToCopyWin = new[] { "libcurl.lib", - "lib/libcurl.pdb", }; var filesToKeep = new[] { @@ -60,7 +58,7 @@ namespace Flax.Deps.Dependencies // Get the source if (!File.Exists(packagePath)) - Downloader.DownloadFileFromUrlToPath("https://curl.haxx.se/download/curl-7.64.1.zip", packagePath); + Downloader.DownloadFileFromUrlToPath("https://curl.haxx.se/download/curl-7.88.1.zip", packagePath); using (ZipArchive archive = ZipFile.Open(packagePath, ZipArchiveMode.Read)) { var newRoot = Path.Combine(root, archive.Entries.First().FullName); @@ -75,20 +73,17 @@ namespace Flax.Deps.Dependencies { case TargetPlatform.Windows: { - var vsSolutionPath = Path.Combine(root, "projects", "Windows", vcVersion, "curl-all.sln"); - var vcxprojPath = Path.Combine(root, "projects", "Windows", vcVersion, "lib", "libcurl.vcxproj"); - var vcxprojContents = File.ReadAllText(vcxprojPath); - vcxprojContents = vcxprojContents.Replace("MultiThreaded", "MultiThreadedDLL"); - vcxprojContents = vcxprojContents.Replace("MultiThreadedDebug", "MultiThreadedDebugDLL"); - vcxprojContents = vcxprojContents.Replace("true", "false"); - vcxprojContents = vcxprojContents.Replace("ProgramDatabase", ""); - File.WriteAllText(vcxprojPath, vcxprojContents); + // Build for Win64 and ARM64 + foreach (var architecture in new[] { TargetArchitecture.x64, TargetArchitecture.ARM64 }) { - // Build for Win64 - Deploy.VCEnvironment.BuildSolution(vsSolutionPath, configuration, "x64"); - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); - foreach (var filename in binariesToCopyWin) - Utilities.FileCopy(Path.Combine(root, "build", "Win64", vcVersion, configuration, filename), Path.Combine(depsFolder, Path.GetFileName(filename))); + var buildDir = Path.Combine(root, "build-" + architecture.ToString()); + var solutionPath = Path.Combine(buildDir, "CURL.sln"); + + RunCmake(root, platform, architecture, $"-B\"{buildDir}\" -DBUILD_CURL_EXE=OFF -DBUILD_SHARED_LIBS=OFF -DCURL_STATIC_CRT=OFF"); + Deploy.VCEnvironment.BuildSolution(solutionPath, configuration, architecture.ToString()); + var depsFolder = GetThirdPartyFolder(options, platform, architecture); + foreach (var file in binariesToCopyWin) + Utilities.FileCopy(Path.Combine(buildDir, "lib", configuration, file), Path.Combine(depsFolder, Path.GetFileName(file))); } break; } From 90e7d4076b36cab8e991422aea6d3028a99582c3 Mon Sep 17 00:00:00 2001 From: Ari Vuollet Date: Thu, 9 May 2024 20:24:51 +0300 Subject: [PATCH 114/292] Update PhysX with Windows on ARM support --- Source/ThirdParty/PhysX/PxPhysics.h | 2 +- Source/ThirdParty/PhysX/PxPhysicsAPI.h | 2 +- .../ThirdParty/PhysX/PxPhysicsSerialization.h | 2 +- .../ThirdParty/PhysX/PxSimulationStatistics.h | 2 +- .../PhysX/extensions/PxParticleExt.h | 2 + Source/ThirdParty/PhysX/foundation/PxAoS.h | 2 + .../ThirdParty/PhysX/foundation/PxInlineAoS.h | 6 +- .../PhysX/foundation/PxPreprocessor.h | 2 +- .../ThirdParty/PhysX/foundation/PxVecMath.h | 2 + .../PhysX/foundation/unix/PxUnixIntrinsics.h | 1 + .../windows/neon/PxWindowsNeonAoS.h | 136 + .../windows/neon/PxWindowsNeonInlineAoS.h | 3656 +++++++++++++++++ .../PhysX/geomutils/PxContactPoint.h | 9 + .../Flax.Build/Deps/Dependencies/PhysX.cs | 15 + 14 files changed, 3833 insertions(+), 6 deletions(-) create mode 100644 Source/ThirdParty/PhysX/foundation/windows/neon/PxWindowsNeonAoS.h create mode 100644 Source/ThirdParty/PhysX/foundation/windows/neon/PxWindowsNeonInlineAoS.h diff --git a/Source/ThirdParty/PhysX/PxPhysics.h b/Source/ThirdParty/PhysX/PxPhysics.h index 6446856ec..21fe137f1 100644 --- a/Source/ThirdParty/PhysX/PxPhysics.h +++ b/Source/ThirdParty/PhysX/PxPhysics.h @@ -22,7 +22,7 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Copyright (c) 2008-2024 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2008-2023 NVIDIA Corporation. All rights reserved. // Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. // Copyright (c) 2001-2004 NovodeX AG. All rights reserved. diff --git a/Source/ThirdParty/PhysX/PxPhysicsAPI.h b/Source/ThirdParty/PhysX/PxPhysicsAPI.h index 343a0099e..d66780805 100644 --- a/Source/ThirdParty/PhysX/PxPhysicsAPI.h +++ b/Source/ThirdParty/PhysX/PxPhysicsAPI.h @@ -22,7 +22,7 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Copyright (c) 2008-2024 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2008-2023 NVIDIA Corporation. All rights reserved. // Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. // Copyright (c) 2001-2004 NovodeX AG. All rights reserved. diff --git a/Source/ThirdParty/PhysX/PxPhysicsSerialization.h b/Source/ThirdParty/PhysX/PxPhysicsSerialization.h index 82a95f0be..e9383ab51 100644 --- a/Source/ThirdParty/PhysX/PxPhysicsSerialization.h +++ b/Source/ThirdParty/PhysX/PxPhysicsSerialization.h @@ -22,7 +22,7 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Copyright (c) 2008-2024 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2008-2023 NVIDIA Corporation. All rights reserved. // Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. // Copyright (c) 2001-2004 NovodeX AG. All rights reserved. diff --git a/Source/ThirdParty/PhysX/PxSimulationStatistics.h b/Source/ThirdParty/PhysX/PxSimulationStatistics.h index baac037b2..2102ecd90 100644 --- a/Source/ThirdParty/PhysX/PxSimulationStatistics.h +++ b/Source/ThirdParty/PhysX/PxSimulationStatistics.h @@ -22,7 +22,7 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Copyright (c) 2008-2024 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2008-2023 NVIDIA Corporation. All rights reserved. // Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. // Copyright (c) 2001-2004 NovodeX AG. All rights reserved. diff --git a/Source/ThirdParty/PhysX/extensions/PxParticleExt.h b/Source/ThirdParty/PhysX/extensions/PxParticleExt.h index 4ad9c5e80..d78eae2a5 100644 --- a/Source/ThirdParty/PhysX/extensions/PxParticleExt.h +++ b/Source/ThirdParty/PhysX/extensions/PxParticleExt.h @@ -275,7 +275,9 @@ class PxParticleAttachmentBuffer : public PxUserAllocated PxU32 mNumDeviceAttachments; PxU32 mNumDeviceFilters; +#if PX_SUPPORT_GPU_PHYSX PxCudaContextManager* mCudaContextManager; +#endif PxParticleSystem& mParticleSystem; diff --git a/Source/ThirdParty/PhysX/foundation/PxAoS.h b/Source/ThirdParty/PhysX/foundation/PxAoS.h index bb5e74807..03584e7a0 100644 --- a/Source/ThirdParty/PhysX/foundation/PxAoS.h +++ b/Source/ThirdParty/PhysX/foundation/PxAoS.h @@ -33,6 +33,8 @@ #if PX_WINDOWS && !PX_NEON #include "windows/PxWindowsAoS.h" +#elif PX_WINDOWS_FAMILY && PX_NEON +#include "windows/neon/PxWindowsNeonAoS.h" #elif(PX_UNIX_FAMILY || PX_PS4 || PX_PS5 || PX_SWITCH) #include "unix/PxUnixAoS.h" #else diff --git a/Source/ThirdParty/PhysX/foundation/PxInlineAoS.h b/Source/ThirdParty/PhysX/foundation/PxInlineAoS.h index f59cc5972..e415e11e3 100644 --- a/Source/ThirdParty/PhysX/foundation/PxInlineAoS.h +++ b/Source/ThirdParty/PhysX/foundation/PxInlineAoS.h @@ -31,9 +31,13 @@ #include "foundation/PxPreprocessor.h" -#if PX_WINDOWS +#if PX_WINDOWS_FAMILY #include "windows/PxWindowsTrigConstants.h" +#if PX_NEON +#include "windows/neon/PxWindowsNeonInlineAoS.h" +#else #include "windows/PxWindowsInlineAoS.h" +#endif #elif (PX_UNIX_FAMILY || PX_PS4 || PX_PS5 || PX_SWITCH) #include "unix/PxUnixTrigConstants.h" #include "unix/PxUnixInlineAoS.h" diff --git a/Source/ThirdParty/PhysX/foundation/PxPreprocessor.h b/Source/ThirdParty/PhysX/foundation/PxPreprocessor.h index 5dc1217d8..2b23aa222 100644 --- a/Source/ThirdParty/PhysX/foundation/PxPreprocessor.h +++ b/Source/ThirdParty/PhysX/foundation/PxPreprocessor.h @@ -143,7 +143,7 @@ SIMD defines #if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) || (defined (__EMSCRIPTEN__) && defined(__SSE2__)) #define PX_SSE2 1 #endif - #if defined(_M_ARM) || defined(__ARM_NEON__) || defined(__ARM_NEON) + #if defined(_M_ARM) || defined(__ARM_NEON__) || defined(__ARM_NEON) || defined(_M_ARM64) #define PX_NEON 1 #endif #if defined(_M_PPC) || defined(__CELLOS_LV2__) diff --git a/Source/ThirdParty/PhysX/foundation/PxVecMath.h b/Source/ThirdParty/PhysX/foundation/PxVecMath.h index 729729e6b..3e5ed3ced 100644 --- a/Source/ThirdParty/PhysX/foundation/PxVecMath.h +++ b/Source/ThirdParty/PhysX/foundation/PxVecMath.h @@ -57,6 +57,8 @@ #define COMPILE_VECTOR_INTRINSICS 1 #elif PX_SWITCH #define COMPILE_VECTOR_INTRINSICS 1 +#elif PX_ARM_FAMILY && PX_NEON + #define COMPILE_VECTOR_INTRINSICS 1 #else #define COMPILE_VECTOR_INTRINSICS 0 #endif diff --git a/Source/ThirdParty/PhysX/foundation/unix/PxUnixIntrinsics.h b/Source/ThirdParty/PhysX/foundation/unix/PxUnixIntrinsics.h index cb529be5c..02104221a 100644 --- a/Source/ThirdParty/PhysX/foundation/unix/PxUnixIntrinsics.h +++ b/Source/ThirdParty/PhysX/foundation/unix/PxUnixIntrinsics.h @@ -58,6 +58,7 @@ Return the index of the highest set bit. Undefined for zero arg. */ PX_INLINE uint32_t PxHighestSetBitUnsafe(uint32_t v) { + return uint32_t(31 - __builtin_clz(v)); } diff --git a/Source/ThirdParty/PhysX/foundation/windows/neon/PxWindowsNeonAoS.h b/Source/ThirdParty/PhysX/foundation/windows/neon/PxWindowsNeonAoS.h new file mode 100644 index 000000000..d4f2c856a --- /dev/null +++ b/Source/ThirdParty/PhysX/foundation/windows/neon/PxWindowsNeonAoS.h @@ -0,0 +1,136 @@ +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA CORPORATION nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Copyright (c) 2008-2023 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PXFOUNDATION_PXWINDOWSNEONAOS_H +#define PXFOUNDATION_PXWINDOWSNEONAOS_H + +// no includes here! this file should be included from PxcVecMath.h only!!! + +#if !COMPILE_VECTOR_INTRINSICS +#error Vector intrinsics should not be included when using scalar implementation. +#endif + +// only ARM NEON compatible platforms should reach this +#include + +namespace physx +{ +namespace aos +{ + +typedef float32x2_t FloatV; +typedef float32x4_t Vec3V; +typedef float32x4_t Vec4V; +typedef uint32x4_t BoolV; +typedef float32x4_t QuatV; + +typedef uint32x4_t VecU32V; +typedef int32x4_t VecI32V; +typedef uint16x8_t VecU16V; +typedef int16x8_t VecI16V; +typedef uint8x16_t VecU8V; + +#define FloatVArg FloatV & +#define Vec3VArg Vec3V & +#define Vec4VArg Vec4V & +#define BoolVArg BoolV & +#define VecU32VArg VecU32V & +#define VecI32VArg VecI32V & +#define VecU16VArg VecU16V & +#define VecI16VArg VecI16V & +#define VecU8VArg VecU8V & +#define QuatVArg QuatV & + +// KS - TODO - make an actual VecCrossV type for NEON +#define VecCrossV Vec3V + +typedef VecI32V VecShiftV; +#define VecShiftVArg VecShiftV & + +PX_ALIGN_PREFIX(16) +struct Mat33V +{ + Mat33V() + { + } + Mat33V(const Vec3V& c0, const Vec3V& c1, const Vec3V& c2) : col0(c0), col1(c1), col2(c2) + { + } + Vec3V PX_ALIGN(16, col0); + Vec3V PX_ALIGN(16, col1); + Vec3V PX_ALIGN(16, col2); +} PX_ALIGN_SUFFIX(16); + +PX_ALIGN_PREFIX(16) +struct Mat34V +{ + Mat34V() + { + } + Mat34V(const Vec3V& c0, const Vec3V& c1, const Vec3V& c2, const Vec3V& c3) : col0(c0), col1(c1), col2(c2), col3(c3) + { + } + Vec3V PX_ALIGN(16, col0); + Vec3V PX_ALIGN(16, col1); + Vec3V PX_ALIGN(16, col2); + Vec3V PX_ALIGN(16, col3); +} PX_ALIGN_SUFFIX(16); + +PX_ALIGN_PREFIX(16) +struct Mat43V +{ + Mat43V() + { + } + Mat43V(const Vec4V& c0, const Vec4V& c1, const Vec4V& c2) : col0(c0), col1(c1), col2(c2) + { + } + Vec4V PX_ALIGN(16, col0); + Vec4V PX_ALIGN(16, col1); + Vec4V PX_ALIGN(16, col2); +} PX_ALIGN_SUFFIX(16); + +PX_ALIGN_PREFIX(16) +struct Mat44V +{ + Mat44V() + { + } + Mat44V(const Vec4V& c0, const Vec4V& c1, const Vec4V& c2, const Vec4V& c3) : col0(c0), col1(c1), col2(c2), col3(c3) + { + } + Vec4V PX_ALIGN(16, col0); + Vec4V PX_ALIGN(16, col1); + Vec4V PX_ALIGN(16, col2); + Vec4V PX_ALIGN(16, col3); +} PX_ALIGN_SUFFIX(16); + +} // namespace aos +} // namespace physx + +#endif // PXFOUNDATION_PXWINDOWSNEONAOS_H diff --git a/Source/ThirdParty/PhysX/foundation/windows/neon/PxWindowsNeonInlineAoS.h b/Source/ThirdParty/PhysX/foundation/windows/neon/PxWindowsNeonInlineAoS.h new file mode 100644 index 000000000..eafb17d9f --- /dev/null +++ b/Source/ThirdParty/PhysX/foundation/windows/neon/PxWindowsNeonInlineAoS.h @@ -0,0 +1,3656 @@ +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA CORPORATION nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Copyright (c) 2008-2023 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PXFOUNDATION_PXWINDOWSNEONINLINEAOS_H +#define PXFOUNDATION_PXWINDOWSNEONINLINEAOS_H + +#if !COMPILE_VECTOR_INTRINSICS +#error Vector intrinsics should not be included when using scalar implementation. +#endif + +namespace physx +{ +namespace aos +{ + +// improved estimates +#define VRECIPEQ recipq_newton<1> +#define VRECIPE recip_newton<1> +#define VRECIPSQRTEQ rsqrtq_newton<1> +#define VRECIPSQRTE rsqrt_newton<1> + +// "exact" +#define VRECIPQ recipq_newton<4> +#if PX_SWITCH +// StabilizationTests.AveragePoint needs more precision to succeed. +#define VRECIP recip_newton<5> +#else +#define VRECIP recip_newton<4> +#endif +#define VRECIPSQRTQ rsqrtq_newton<4> +#define VRECIPSQRT rsqrt_newton<4> + +#define VECMATH_AOS_EPSILON (1e-3f) + +////////////////////////////////////////////////////////////////////// +//Test that Vec3V and FloatV are legal +////////////////////////////////// + +#define FLOAT_COMPONENTS_EQUAL_THRESHOLD 0.01f +PX_FORCE_INLINE bool isValidFloatV(const FloatV a) +{ + /* + PX_ALIGN(16, PxF32) data[4]; + vst1_f32(reinterpret_cast(data), a); + return + PxU32* intData = reinterpret_cast(data); + return intData[0] == intData[1]; + */ + PX_ALIGN(16, PxF32) data[4]; + vst1_f32(reinterpret_cast(data), a); + const float32_t x = data[0]; + const float32_t y = data[1]; + + return (x == y); + + /*if (PxAbs(x - y) < FLOAT_COMPONENTS_EQUAL_THRESHOLD) + { + return true; + } + + if (PxAbs((x - y) / x) < FLOAT_COMPONENTS_EQUAL_THRESHOLD) + { + return true; + } + + return false;*/ +} + +PX_FORCE_INLINE bool isValidVec3V(const Vec3V a) +{ + const float32_t w = vgetq_lane_f32(a, 3); + return (0.0f == w); + //const PxU32* intData = reinterpret_cast(&w); + //return *intData == 0; +} + +PX_FORCE_INLINE bool isAligned16(const void* a) +{ + return(0 == (size_t(a) & 0x0f)); +} + +#if PX_DEBUG +#define ASSERT_ISVALIDVEC3V(a) PX_ASSERT(isValidVec3V(a)) +#define ASSERT_ISVALIDFLOATV(a) PX_ASSERT(isValidFloatV(a)) +#define ASSERT_ISALIGNED16(a) PX_ASSERT(isAligned16(static_cast(a))) +#else +#define ASSERT_ISVALIDVEC3V(a) +#define ASSERT_ISVALIDFLOATV(a) +#define ASSERT_ISALIGNED16(a) +#endif + +namespace internalUnitNeonSimd +{ +PX_FORCE_INLINE PxU32 BAllTrue4_R(const BoolV a) +{ + const uint16x4_t dHigh = vget_high_u16(vreinterpretq_u16_u32(a)); + const uint16x4_t dLow = vmovn_u32(a); + const uint16x8_t combined = vcombine_u16(dLow, dHigh); + const uint32x2_t finalReduce = vreinterpret_u32_u8(vmovn_u16(combined)); + return PxU32(vget_lane_u32(finalReduce, 0) == 0xffffFFFF); +} + +PX_FORCE_INLINE PxU32 BAllTrue3_R(const BoolV a) +{ + const uint16x4_t dHigh = vget_high_u16(vreinterpretq_u16_u32(a)); + const uint16x4_t dLow = vmovn_u32(a); + const uint16x8_t combined = vcombine_u16(dLow, dHigh); + const uint32x2_t finalReduce = vreinterpret_u32_u8(vmovn_u16(combined)); + return PxU32((vget_lane_u32(finalReduce, 0) & 0xffFFff) == 0xffFFff); +} + +PX_FORCE_INLINE PxU32 BAnyTrue4_R(const BoolV a) +{ + const uint16x4_t dHigh = vget_high_u16(vreinterpretq_u16_u32(a)); + const uint16x4_t dLow = vmovn_u32(a); + const uint16x8_t combined = vcombine_u16(dLow, dHigh); + const uint32x2_t finalReduce = vreinterpret_u32_u8(vmovn_u16(combined)); + return PxU32(vget_lane_u32(finalReduce, 0) != 0x0); +} + +PX_FORCE_INLINE PxU32 BAnyTrue3_R(const BoolV a) +{ + const uint16x4_t dHigh = vget_high_u16(vreinterpretq_u16_u32(a)); + const uint16x4_t dLow = vmovn_u32(a); + const uint16x8_t combined = vcombine_u16(dLow, dHigh); + const uint32x2_t finalReduce = vreinterpret_u32_u8(vmovn_u16(combined)); + return PxU32((vget_lane_u32(finalReduce, 0) & 0xffFFff) != 0); +} +} + +namespace vecMathTests +{ +// PT: this function returns an invalid Vec3V (W!=0.0f) just for unit-testing 'isValidVec3V' +PX_FORCE_INLINE Vec3V getInvalidVec3V() +{ + PX_ALIGN(16, PxF32) data[4] = { 1.0f, 1.0f, 1.0f, 1.0f }; + return V4LoadA(data); +} + +PX_FORCE_INLINE bool allElementsEqualFloatV(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return vget_lane_u32(vceq_f32(a, b), 0) != 0; +} + +PX_FORCE_INLINE bool allElementsEqualVec3V(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return V3AllEq(a, b) != 0; +} + +PX_FORCE_INLINE bool allElementsEqualVec4V(const Vec4V a, const Vec4V b) +{ + return V4AllEq(a, b) != 0; +} + +PX_FORCE_INLINE bool allElementsEqualBoolV(const BoolV a, const BoolV b) +{ + return internalUnitNeonSimd::BAllTrue4_R(vceqq_u32(a, b)) != 0; +} + +PX_FORCE_INLINE PxU32 V4U32AllEq(const VecU32V a, const VecU32V b) +{ + return internalUnitNeonSimd::BAllTrue4_R(V4IsEqU32(a, b)); +} + +PX_FORCE_INLINE bool allElementsEqualVecU32V(const VecU32V a, const VecU32V b) +{ + return V4U32AllEq(a, b) != 0; +} + +PX_FORCE_INLINE BoolV V4IsEqI32(const VecI32V a, const VecI32V b) +{ + return vceqq_s32(a, b); +} + +PX_FORCE_INLINE PxU32 V4I32AllEq(const VecI32V a, const VecI32V b) +{ + return internalUnitNeonSimd::BAllTrue4_R(V4IsEqI32(a, b)); +} + +PX_FORCE_INLINE bool allElementsEqualVecI32V(const VecI32V a, const VecI32V b) +{ + return V4I32AllEq(a, b) != 0; +} + +PX_FORCE_INLINE bool allElementsNearEqualFloatV(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + + const float32x2_t c = vsub_f32(a, b); + const float32x2_t error = vdup_n_f32(VECMATH_AOS_EPSILON); +// absolute compare abs(error) > abs(c) + const uint32x2_t greater = vcagt_f32(error, c); + const uint32x2_t min = vpmin_u32(greater, greater); + return vget_lane_u32(min, 0) != 0x0; +} + +PX_FORCE_INLINE bool allElementsNearEqualVec3V(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + const float32x4_t c = vsubq_f32(a, b); + const float32x4_t error = vdupq_n_f32(VECMATH_AOS_EPSILON); +// absolute compare abs(error) > abs(c) + const uint32x4_t greater = vcagtq_f32(error, c); + return internalUnitNeonSimd::BAllTrue3_R(greater) != 0; +} + +PX_FORCE_INLINE bool allElementsNearEqualVec4V(const Vec4V a, const Vec4V b) +{ + const float32x4_t c = vsubq_f32(a, b); + const float32x4_t error = vdupq_n_f32(VECMATH_AOS_EPSILON); +// absolute compare abs(error) > abs(c) + const uint32x4_t greater = vcagtq_f32(error, c); + return internalUnitNeonSimd::BAllTrue4_R(greater) != 0x0; +} +} + +#if 0 // debugging printfs +#include +PX_FORCE_INLINE void printVec(const float32x4_t& v, const char* name) +{ + PX_ALIGN(16, float32_t) data[4]; + vst1q_f32(data, v); + printf("%s: (%f, %f, %f, %f)\n", name, data[0], data[1], data[2], data[3]); +} + +PX_FORCE_INLINE void printVec(const float32x2_t& v, const char* name) +{ + PX_ALIGN(16, float32_t) data[2]; + vst1_f32(data, v); + printf("%s: (%f, %f)\n", name, data[0], data[1]); +} + +PX_FORCE_INLINE void printVec(const uint32x4_t& v, const char* name) +{ + PX_ALIGN(16, uint32_t) data[4]; + vst1q_u32(data, v); + printf("%s: (0x%x, 0x%x, 0x%x, 0x%x)\n", name, data[0], data[1], data[2], data[3]); +} + +PX_FORCE_INLINE void printVec(const uint16x8_t& v, const char* name) +{ + PX_ALIGN(16, uint16_t) data[8]; + vst1q_u16(data, v); + printf("%s: (0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x)\n", name, data[0], data[1], data[2], data[3], + data[4], data[5], data[6], data[7]); +} + +PX_FORCE_INLINE void printVec(const int32x4_t& v, const char* name) +{ + PX_ALIGN(16, int32_t) data[4]; + vst1q_s32(data, v); + printf("%s: (0x%x, 0x%x, 0x%x, 0x%x)\n", name, data[0], data[1], data[2], data[3]); +} + +PX_FORCE_INLINE void printVec(const int16x8_t& v, const char* name) +{ + PX_ALIGN(16, int16_t) data[8]; + vst1q_s16(data, v); + printf("%s: (0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x)\n", name, data[0], data[1], data[2], data[3], + data[4], data[5], data[6], data[7]); +} + +PX_FORCE_INLINE void printVec(const uint16x4_t& v, const char* name) +{ + PX_ALIGN(16, uint16_t) data[4]; + vst1_u16(data, v); + printf("%s: (0x%x, 0x%x, 0x%x, 0x%x)\n", name, data[0], data[1], data[2], data[3]); +} + +PX_FORCE_INLINE void printVec(const uint32x2_t& v, const char* name) +{ + PX_ALIGN(16, uint32_t) data[2]; + vst1_u32(data, v); + printf("%s: (0x%x, 0x%x)\n", name, data[0], data[1]); +} + +PX_FORCE_INLINE void printVar(const PxU32 v, const char* name) +{ + printf("%s: 0x%x\n", name, v); +} + +PX_FORCE_INLINE void printVar(const PxF32 v, const char* name) +{ + printf("%s: %f\n", name, v); +} + +#define PRINT_VAR(X) printVar((X), #X) +#define PRINT_VEC(X) printVec((X), #X) +#define PRINT_VEC_TITLE(TITLE, X) printVec((X), TITLE #X) +#endif // debugging printf + +///////////////////////////////////////////////////////////////////// +////FUNCTIONS USED ONLY FOR ASSERTS IN VECTORISED IMPLEMENTATIONS +///////////////////////////////////////////////////////////////////// + +PX_FORCE_INLINE bool isFiniteFloatV(const FloatV a) +{ + PX_ALIGN(16, PxF32) data[4]; + vst1_f32(reinterpret_cast(data), a); + return PxIsFinite(data[0]) && PxIsFinite(data[1]); +} + +PX_FORCE_INLINE bool isFiniteVec3V(const Vec3V a) +{ + PX_ALIGN(16, PxF32) data[4]; + vst1q_f32(reinterpret_cast(data), a); + return PxIsFinite(data[0]) && PxIsFinite(data[1]) && PxIsFinite(data[2]); +} + +PX_FORCE_INLINE bool isFiniteVec4V(const Vec4V a) +{ + PX_ALIGN(16, PxF32) data[4]; + vst1q_f32(reinterpret_cast(data), a); + return PxIsFinite(data[0]) && PxIsFinite(data[1]) && PxIsFinite(data[2]) && PxIsFinite(data[3]); +} + +PX_FORCE_INLINE bool hasZeroElementinFloatV(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + return vget_lane_u32(vreinterpret_u32_f32(a), 0) == 0; +} + +PX_FORCE_INLINE bool hasZeroElementInVec3V(const Vec3V a) +{ + const uint32x2_t dLow = vget_low_u32(vreinterpretq_u32_f32(a)); + const uint32x2_t dMin = vpmin_u32(dLow, dLow); + + return vget_lane_u32(dMin, 0) == 0 || vgetq_lane_u32(vreinterpretq_u32_f32(a), 2) == 0; +} + +PX_FORCE_INLINE bool hasZeroElementInVec4V(const Vec4V a) +{ + const uint32x2_t dHigh = vget_high_u32(vreinterpretq_u32_f32(a)); + const uint32x2_t dLow = vget_low_u32(vreinterpretq_u32_f32(a)); + + const uint32x2_t dMin = vmin_u32(dHigh, dLow); + const uint32x2_t pairMin = vpmin_u32(dMin, dMin); + return vget_lane_u32(pairMin, 0) == 0; +} + +///////////////////////////////////////////////////////////////////// +////VECTORISED FUNCTION IMPLEMENTATIONS +///////////////////////////////////////////////////////////////////// + +PX_FORCE_INLINE FloatV FLoad(const PxF32 f) +{ + return vdup_n_f32(reinterpret_cast(f)); +} + +PX_FORCE_INLINE FloatV FLoadA(const PxF32* const f) +{ + ASSERT_ISALIGNED16(f); + return vld1_f32(reinterpret_cast(f)); +} + +PX_FORCE_INLINE Vec3V V3Load(const PxF32 f) +{ + PX_ALIGN(16, PxF32) data[4] = { f, f, f, 0.0f }; + return V4LoadA(data); +} + +PX_FORCE_INLINE Vec4V V4Load(const PxF32 f) +{ + return vdupq_n_f32(reinterpret_cast(f)); +} + +PX_FORCE_INLINE BoolV BLoad(const bool f) +{ + const PxU32 i = static_cast(-(static_cast(f))); + return vdupq_n_u32(i); +} + +PX_FORCE_INLINE Vec3V V3LoadA(const PxVec3& f) +{ + ASSERT_ISALIGNED16(&f); + PX_ALIGN(16, PxF32) data[4] = { f.x, f.y, f.z, 0.0f }; + return V4LoadA(data); +} + +PX_FORCE_INLINE Vec3V V3LoadU(const PxVec3& f) +{ + PX_ALIGN(16, PxF32) data[4] = { f.x, f.y, f.z, 0.0f }; + return V4LoadA(data); +} + +PX_FORCE_INLINE Vec3V V3LoadUnsafeA(const PxVec3& f) +{ + ASSERT_ISALIGNED16(&f); + PX_ALIGN(16, PxF32) data[4] = { f.x, f.y, f.z, 0.0f }; + return V4LoadA(data); +} + +PX_FORCE_INLINE Vec3V V3LoadA(const PxF32* f) +{ + ASSERT_ISALIGNED16(f); + PX_ALIGN(16, PxF32) data[4] = { f[0], f[1], f[2], 0.0f }; + return V4LoadA(data); +} + +PX_FORCE_INLINE Vec3V V3LoadU(const PxF32* f) +{ + PX_ALIGN(16, PxF32) data[4] = { f[0], f[1], f[2], 0.0f }; + return V4LoadA(data); +} + +PX_FORCE_INLINE Vec3V Vec3V_From_Vec4V(Vec4V v) +{ + return vsetq_lane_f32(0.0f, v, 3); +} + +PX_FORCE_INLINE Vec3V Vec3V_From_Vec4V_WUndefined(Vec4V v) +{ + return v; +} + +PX_FORCE_INLINE Vec4V Vec4V_From_Vec3V(Vec3V f) +{ + return f; // ok if it is implemented as the same type. +} + +PX_FORCE_INLINE Vec4V Vec4V_From_FloatV(FloatV f) +{ + return vcombine_f32(f, f); +} + +PX_FORCE_INLINE Vec3V Vec3V_From_FloatV(FloatV f) +{ + return Vec3V_From_Vec4V(Vec4V_From_FloatV(f)); +} + +PX_FORCE_INLINE Vec3V Vec3V_From_FloatV_WUndefined(FloatV f) +{ + return Vec3V_From_Vec4V_WUndefined(Vec4V_From_FloatV(f)); +} + +PX_FORCE_INLINE Vec4V Vec4V_From_PxVec3_WUndefined(const PxVec3& f) +{ + PX_ALIGN(16, PxF32) data[4] = { f.x, f.y, f.z, 0.0f }; + return V4LoadA(data); +} + +PX_FORCE_INLINE Mat33V Mat33V_From_PxMat33(const PxMat33& m) +{ + return Mat33V(V3LoadU(m.column0), V3LoadU(m.column1), V3LoadU(m.column2)); +} + +PX_FORCE_INLINE void PxMat33_From_Mat33V(const Mat33V& m, PxMat33& out) +{ + V3StoreU(m.col0, out.column0); + V3StoreU(m.col1, out.column1); + V3StoreU(m.col2, out.column2); +} + +PX_FORCE_INLINE Vec4V V4LoadA(const PxF32* const f) +{ + ASSERT_ISALIGNED16(f); + return vld1q_f32(reinterpret_cast(f)); +} + +PX_FORCE_INLINE void V4StoreA(Vec4V a, PxF32* f) +{ + ASSERT_ISALIGNED16(f); + vst1q_f32(reinterpret_cast(f), a); +} + +PX_FORCE_INLINE void V4StoreU(const Vec4V a, PxF32* f) +{ + PX_ALIGN(16, PxF32) f2[4]; + vst1q_f32(reinterpret_cast(f2), a); + f[0] = f2[0]; + f[1] = f2[1]; + f[2] = f2[2]; + f[3] = f2[3]; +} + +PX_FORCE_INLINE void BStoreA(const BoolV a, PxU32* u) +{ + ASSERT_ISALIGNED16(u); + vst1q_u32(reinterpret_cast(u), a); +} + +PX_FORCE_INLINE void U4StoreA(const VecU32V uv, PxU32* u) +{ + ASSERT_ISALIGNED16(u); + vst1q_u32(reinterpret_cast(u), uv); +} + +PX_FORCE_INLINE void I4StoreA(const VecI32V iv, PxI32* i) +{ + ASSERT_ISALIGNED16(i); + vst1q_s32(reinterpret_cast(i), iv); +} + +PX_FORCE_INLINE Vec4V V4LoadU(const PxF32* const f) +{ + return vld1q_f32(reinterpret_cast(f)); +} + +PX_FORCE_INLINE BoolV BLoad(const bool* const f) +{ + const PX_ALIGN(16, PxU32) b[4] = { static_cast(-static_cast(f[0])), + static_cast(-static_cast(f[1])), + static_cast(-static_cast(f[2])), + static_cast(-static_cast(f[3])) }; + return vld1q_u32(b); +} + +PX_FORCE_INLINE void FStore(const FloatV a, PxF32* PX_RESTRICT f) +{ + ASSERT_ISVALIDFLOATV(a); + // vst1q_lane_f32(f, a, 0); // causes vst1 alignment bug + *f = vget_lane_f32(a, 0); +} + +PX_FORCE_INLINE void Store_From_BoolV(const BoolV a, PxU32* PX_RESTRICT f) +{ + *f = vget_lane_u32(vget_low_u32(a), 0); +} + +PX_FORCE_INLINE void V3StoreA(const Vec3V a, PxVec3& f) +{ + ASSERT_ISALIGNED16(&f); + PX_ALIGN(16, PxF32) f2[4]; + vst1q_f32(reinterpret_cast(f2), a); + f = PxVec3(f2[0], f2[1], f2[2]); +} + +PX_FORCE_INLINE void V3StoreU(const Vec3V a, PxVec3& f) +{ + PX_ALIGN(16, PxF32) f2[4]; + vst1q_f32(reinterpret_cast(f2), a); + f = PxVec3(f2[0], f2[1], f2[2]); +} + +////////////////////////////////// +// FLOATV +////////////////////////////////// + +PX_FORCE_INLINE FloatV FZero() +{ + return FLoad(0.0f); +} + +PX_FORCE_INLINE FloatV FOne() +{ + return FLoad(1.0f); +} + +PX_FORCE_INLINE FloatV FHalf() +{ + return FLoad(0.5f); +} + +PX_FORCE_INLINE FloatV FEps() +{ + return FLoad(PX_EPS_REAL); +} + +PX_FORCE_INLINE FloatV FEps6() +{ + return FLoad(1e-6f); +} + +PX_FORCE_INLINE FloatV FMax() +{ + return FLoad(PX_MAX_REAL); +} + +PX_FORCE_INLINE FloatV FNegMax() +{ + return FLoad(-PX_MAX_REAL); +} + +PX_FORCE_INLINE FloatV IZero() +{ + return vreinterpret_f32_u32(vdup_n_u32(0)); +} + +PX_FORCE_INLINE FloatV IOne() +{ + return vreinterpret_f32_u32(vdup_n_u32(1)); +} + +PX_FORCE_INLINE FloatV ITwo() +{ + return vreinterpret_f32_u32(vdup_n_u32(2)); +} + +PX_FORCE_INLINE FloatV IThree() +{ + return vreinterpret_f32_u32(vdup_n_u32(3)); +} + +PX_FORCE_INLINE FloatV IFour() +{ + return vreinterpret_f32_u32(vdup_n_u32(4)); +} + +PX_FORCE_INLINE FloatV FNeg(const FloatV f) +{ + ASSERT_ISVALIDFLOATV(f); + return vneg_f32(f); +} + +PX_FORCE_INLINE FloatV FAdd(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return vadd_f32(a, b); +} + +PX_FORCE_INLINE FloatV FSub(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return vsub_f32(a, b); +} + +PX_FORCE_INLINE FloatV FMul(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return vmul_f32(a, b); +} + +template +PX_FORCE_INLINE float32x2_t recip_newton(const float32x2_t& in) +{ + float32x2_t recip = vrecpe_f32(in); + for(int i = 0; i < n; ++i) + recip = vmul_f32(recip, vrecps_f32(in, recip)); + return recip; +} + +template +PX_FORCE_INLINE float32x4_t recipq_newton(const float32x4_t& in) +{ + float32x4_t recip = vrecpeq_f32(in); + for(int i = 0; i < n; ++i) + recip = vmulq_f32(recip, vrecpsq_f32(recip, in)); + return recip; +} + +template +PX_FORCE_INLINE float32x2_t rsqrt_newton(const float32x2_t& in) +{ + float32x2_t rsqrt = vrsqrte_f32(in); + for(int i = 0; i < n; ++i) + rsqrt = vmul_f32(rsqrt, vrsqrts_f32(vmul_f32(rsqrt, rsqrt), in)); + return rsqrt; +} + +template +PX_FORCE_INLINE float32x4_t rsqrtq_newton(const float32x4_t& in) +{ + float32x4_t rsqrt = vrsqrteq_f32(in); + for(int i = 0; i < n; ++i) + rsqrt = vmulq_f32(rsqrt, vrsqrtsq_f32(vmulq_f32(rsqrt, rsqrt), in)); + return rsqrt; +} + +PX_FORCE_INLINE FloatV FDiv(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return vmul_f32(a, VRECIP(b)); +} + +PX_FORCE_INLINE FloatV FDivFast(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return vmul_f32(a, VRECIPE(b)); +} + +PX_FORCE_INLINE FloatV FRecip(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + return VRECIP(a); +} + +PX_FORCE_INLINE FloatV FRecipFast(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + return VRECIPE(a); +} + +PX_FORCE_INLINE FloatV FRsqrt(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + return VRECIPSQRT(a); +} + +PX_FORCE_INLINE FloatV FSqrt(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + return FSel(FIsEq(a, FZero()), a, vmul_f32(a, VRECIPSQRT(a))); +} + +PX_FORCE_INLINE FloatV FRsqrtFast(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + return VRECIPSQRTE(a); +} + +PX_FORCE_INLINE FloatV FScaleAdd(const FloatV a, const FloatV b, const FloatV c) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + ASSERT_ISVALIDFLOATV(c); + return vmla_f32(c, a, b); +} + +PX_FORCE_INLINE FloatV FNegScaleSub(const FloatV a, const FloatV b, const FloatV c) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + ASSERT_ISVALIDFLOATV(c); + return vmls_f32(c, a, b); +} + +PX_FORCE_INLINE FloatV FAbs(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + return vabs_f32(a); +} + +PX_FORCE_INLINE FloatV FSel(const BoolV c, const FloatV a, const FloatV b) +{ + PX_ASSERT( vecMathTests::allElementsEqualBoolV(c, BTTTT()) || + vecMathTests::allElementsEqualBoolV(c, BFFFF())); + ASSERT_ISVALIDFLOATV(vbsl_f32(vget_low_u32(c), a, b)); + return vbsl_f32(vget_low_u32(c), a, b); +} + +PX_FORCE_INLINE BoolV FIsGrtr(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return vdupq_lane_u32(vcgt_f32(a, b), 0); +} + +PX_FORCE_INLINE BoolV FIsGrtrOrEq(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return vdupq_lane_u32(vcge_f32(a, b), 0); +} + +PX_FORCE_INLINE BoolV FIsEq(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return vdupq_lane_u32(vceq_f32(a, b), 0); +} + +PX_FORCE_INLINE FloatV FMax(const FloatV a, const FloatV b) +{ + //ASSERT_ISVALIDFLOATV(a); + //ASSERT_ISVALIDFLOATV(b); + return vmax_f32(a, b); +} + +PX_FORCE_INLINE FloatV FMin(const FloatV a, const FloatV b) +{ + //ASSERT_ISVALIDFLOATV(a); + //ASSERT_ISVALIDFLOATV(b); + return vmin_f32(a, b); +} + +PX_FORCE_INLINE FloatV FClamp(const FloatV a, const FloatV minV, const FloatV maxV) +{ + ASSERT_ISVALIDFLOATV(minV); + ASSERT_ISVALIDFLOATV(maxV); + return vmax_f32(vmin_f32(a, maxV), minV); +} + +PX_FORCE_INLINE PxU32 FAllGrtr(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return vget_lane_u32(vcgt_f32(a, b), 0); +} + +PX_FORCE_INLINE PxU32 FAllGrtrOrEq(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return vget_lane_u32(vcge_f32(a, b), 0); +} + +PX_FORCE_INLINE PxU32 FAllEq(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return vget_lane_u32(vceq_f32(a, b), 0); +} + +PX_FORCE_INLINE FloatV FRound(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + + // truncate(a + (0.5f - sign(a))) + const float32x2_t half = vdup_n_f32(0.5f); + const float32x2_t sign = vcvt_f32_u32((vshr_n_u32(vreinterpret_u32_f32(a), 31))); + const float32x2_t aPlusHalf = vadd_f32(a, half); + const float32x2_t aRound = vsub_f32(aPlusHalf, sign); + int32x2_t tmp = vcvt_s32_f32(aRound); + return vcvt_f32_s32(tmp); +} + +PX_FORCE_INLINE FloatV FSin(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + + // Modulo the range of the given angles such that -XM_2PI <= Angles < XM_2PI + const FloatV recipTwoPi = FLoadA(g_PXReciprocalTwoPi.f); + const FloatV twoPi = FLoadA(g_PXTwoPi.f); + const FloatV tmp = FMul(a, recipTwoPi); + const FloatV b = FRound(tmp); + const FloatV V1 = FNegScaleSub(twoPi, b, a); + + // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - + // V^15 / 15! + V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI) + const FloatV V2 = FMul(V1, V1); + const FloatV V3 = FMul(V2, V1); + const FloatV V5 = FMul(V3, V2); + const FloatV V7 = FMul(V5, V2); + const FloatV V9 = FMul(V7, V2); + const FloatV V11 = FMul(V9, V2); + const FloatV V13 = FMul(V11, V2); + const FloatV V15 = FMul(V13, V2); + const FloatV V17 = FMul(V15, V2); + const FloatV V19 = FMul(V17, V2); + const FloatV V21 = FMul(V19, V2); + const FloatV V23 = FMul(V21, V2); + + const Vec4V sinCoefficients0 = V4LoadA(g_PXSinCoefficients0.f); + const Vec4V sinCoefficients1 = V4LoadA(g_PXSinCoefficients1.f); + const Vec4V sinCoefficients2 = V4LoadA(g_PXSinCoefficients2.f); + + const FloatV S1 = V4GetY(sinCoefficients0); + const FloatV S2 = V4GetZ(sinCoefficients0); + const FloatV S3 = V4GetW(sinCoefficients0); + const FloatV S4 = V4GetX(sinCoefficients1); + const FloatV S5 = V4GetY(sinCoefficients1); + const FloatV S6 = V4GetZ(sinCoefficients1); + const FloatV S7 = V4GetW(sinCoefficients1); + const FloatV S8 = V4GetX(sinCoefficients2); + const FloatV S9 = V4GetY(sinCoefficients2); + const FloatV S10 = V4GetZ(sinCoefficients2); + const FloatV S11 = V4GetW(sinCoefficients2); + + FloatV Result; + Result = FScaleAdd(S1, V3, V1); + Result = FScaleAdd(S2, V5, Result); + Result = FScaleAdd(S3, V7, Result); + Result = FScaleAdd(S4, V9, Result); + Result = FScaleAdd(S5, V11, Result); + Result = FScaleAdd(S6, V13, Result); + Result = FScaleAdd(S7, V15, Result); + Result = FScaleAdd(S8, V17, Result); + Result = FScaleAdd(S9, V19, Result); + Result = FScaleAdd(S10, V21, Result); + Result = FScaleAdd(S11, V23, Result); + + return Result; +} + +PX_FORCE_INLINE FloatV FCos(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + + // Modulo the range of the given angles such that -XM_2PI <= Angles < XM_2PI + const FloatV recipTwoPi = FLoadA(g_PXReciprocalTwoPi.f); + const FloatV twoPi = FLoadA(g_PXTwoPi.f); + const FloatV tmp = FMul(a, recipTwoPi); + const FloatV b = FRound(tmp); + const FloatV V1 = FNegScaleSub(twoPi, b, a); + + // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + V^12 / 12! - + // V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI) + const FloatV V2 = FMul(V1, V1); + const FloatV V4 = FMul(V2, V2); + const FloatV V6 = FMul(V4, V2); + const FloatV V8 = FMul(V4, V4); + const FloatV V10 = FMul(V6, V4); + const FloatV V12 = FMul(V6, V6); + const FloatV V14 = FMul(V8, V6); + const FloatV V16 = FMul(V8, V8); + const FloatV V18 = FMul(V10, V8); + const FloatV V20 = FMul(V10, V10); + const FloatV V22 = FMul(V12, V10); + + const Vec4V cosCoefficients0 = V4LoadA(g_PXCosCoefficients0.f); + const Vec4V cosCoefficients1 = V4LoadA(g_PXCosCoefficients1.f); + const Vec4V cosCoefficients2 = V4LoadA(g_PXCosCoefficients2.f); + + const FloatV C1 = V4GetY(cosCoefficients0); + const FloatV C2 = V4GetZ(cosCoefficients0); + const FloatV C3 = V4GetW(cosCoefficients0); + const FloatV C4 = V4GetX(cosCoefficients1); + const FloatV C5 = V4GetY(cosCoefficients1); + const FloatV C6 = V4GetZ(cosCoefficients1); + const FloatV C7 = V4GetW(cosCoefficients1); + const FloatV C8 = V4GetX(cosCoefficients2); + const FloatV C9 = V4GetY(cosCoefficients2); + const FloatV C10 = V4GetZ(cosCoefficients2); + const FloatV C11 = V4GetW(cosCoefficients2); + + FloatV Result; + Result = FScaleAdd(C1, V2, FOne()); + Result = FScaleAdd(C2, V4, Result); + Result = FScaleAdd(C3, V6, Result); + Result = FScaleAdd(C4, V8, Result); + Result = FScaleAdd(C5, V10, Result); + Result = FScaleAdd(C6, V12, Result); + Result = FScaleAdd(C7, V14, Result); + Result = FScaleAdd(C8, V16, Result); + Result = FScaleAdd(C9, V18, Result); + Result = FScaleAdd(C10, V20, Result); + Result = FScaleAdd(C11, V22, Result); + + return Result; +} + +PX_FORCE_INLINE PxU32 FOutOfBounds(const FloatV a, const FloatV min, const FloatV max) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(min); + ASSERT_ISVALIDFLOATV(max); + + const BoolV c = BOr(FIsGrtr(a, max), FIsGrtr(min, a)); + return PxU32(!BAllEqFFFF(c)); +} + +PX_FORCE_INLINE PxU32 FInBounds(const FloatV a, const FloatV min, const FloatV max) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(min); + ASSERT_ISVALIDFLOATV(max); + + const BoolV c = BAnd(FIsGrtrOrEq(a, min), FIsGrtrOrEq(max, a)); + return PxU32(BAllEqTTTT(c)); +} + +PX_FORCE_INLINE PxU32 FOutOfBounds(const FloatV a, const FloatV bounds) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(bounds); + const uint32x2_t greater = vcagt_f32(a, bounds); + return vget_lane_u32(greater, 0); +} + +PX_FORCE_INLINE PxU32 FInBounds(const FloatV a, const FloatV bounds) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(bounds); + const uint32x2_t geq = vcage_f32(bounds, a); + return vget_lane_u32(geq, 0); +} + +////////////////////////////////// +// VEC3V +////////////////////////////////// + +PX_FORCE_INLINE Vec3V V3Splat(const FloatV f) +{ + ASSERT_ISVALIDFLOATV(f); + + const uint32_t ui32[2] = { 0xffffFFFF, 0x0 }; + const uint32x2_t mask = vld1_u32(ui32); + const uint32x2_t uHigh = vreinterpret_u32_f32(f); + const float32x2_t dHigh = vreinterpret_f32_u32(vand_u32(uHigh, mask)); + + return vcombine_f32(f, dHigh); +} + +PX_FORCE_INLINE Vec3V V3Merge(const FloatVArg x, const FloatVArg y, const FloatVArg z) +{ + ASSERT_ISVALIDFLOATV(x); + ASSERT_ISVALIDFLOATV(y); + ASSERT_ISVALIDFLOATV(z); + + const uint32_t ui32[2] = { 0xffffFFFF, 0x0 }; + const uint32x2_t mask = vld1_u32(ui32); + const uint32x2_t dHigh = vand_u32(vreinterpret_u32_f32(z), mask); + const uint32x2_t dLow = vext_u32(vreinterpret_u32_f32(x), vreinterpret_u32_f32(y), 1); + return vreinterpretq_f32_u32(vcombine_u32(dLow, dHigh)); +} + +PX_FORCE_INLINE Vec3V V3UnitX() +{ + const float f4[4] = { 1.0f, 0.0f, 0.0f, 0.0f }; + const float32x4_t x = vld1q_f32(f4); + return x; +} + +PX_FORCE_INLINE Vec3V V3UnitY() +{ + const float f4[4] = { 0, 1.0f, 0, 0 }; + const float32x4_t y = vld1q_f32(f4); + return y; +} + +PX_FORCE_INLINE Vec3V V3UnitZ() +{ + const float f4[4] = { 0, 0, 1.0f, 0 }; + const float32x4_t z = vld1q_f32(f4); + return z; +} + +PX_FORCE_INLINE FloatV V3GetX(const Vec3V f) +{ + ASSERT_ISVALIDVEC3V(f); + const float32x2_t fLow = vget_low_f32(f); + return vdup_lane_f32(fLow, 0); +} + +PX_FORCE_INLINE FloatV V3GetY(const Vec3V f) +{ + ASSERT_ISVALIDVEC3V(f); + const float32x2_t fLow = vget_low_f32(f); + return vdup_lane_f32(fLow, 1); +} + +PX_FORCE_INLINE FloatV V3GetZ(const Vec3V f) +{ + ASSERT_ISVALIDVEC3V(f); + const float32x2_t fhigh = vget_high_f32(f); + return vdup_lane_f32(fhigh, 0); +} + +PX_FORCE_INLINE Vec3V V3SetX(const Vec3V v, const FloatV f) +{ + ASSERT_ISVALIDVEC3V(v); + ASSERT_ISVALIDFLOATV(f); + return V4Sel(BFTTT(), v, vcombine_f32(f, f)); +} + +PX_FORCE_INLINE Vec3V V3SetY(const Vec3V v, const FloatV f) +{ + ASSERT_ISVALIDVEC3V(v); + ASSERT_ISVALIDFLOATV(f); + return V4Sel(BTFTT(), v, vcombine_f32(f, f)); +} + +PX_FORCE_INLINE Vec3V V3SetZ(const Vec3V v, const FloatV f) +{ + ASSERT_ISVALIDVEC3V(v); + ASSERT_ISVALIDFLOATV(f); + return V4Sel(BTTFT(), v, vcombine_f32(f, f)); +} + +PX_FORCE_INLINE Vec3V V3ColX(const Vec3V a, const Vec3V b, const Vec3V c) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + ASSERT_ISVALIDVEC3V(c); + + const float32x2_t aLow = vget_low_f32(a); + const float32x2_t bLow = vget_low_f32(b); + const float32x2_t cLow = vget_low_f32(c); + const float32x2_t zero = vdup_n_f32(0.0f); + + const float32x2x2_t zipL = vzip_f32(aLow, bLow); + const float32x2x2_t zipH = vzip_f32(cLow, zero); + + return vcombine_f32(zipL.val[0], zipH.val[0]); +} + +PX_FORCE_INLINE Vec3V V3ColY(const Vec3V a, const Vec3V b, const Vec3V c) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + ASSERT_ISVALIDVEC3V(c); + + const float32x2_t aLow = vget_low_f32(a); + const float32x2_t bLow = vget_low_f32(b); + const float32x2_t cLow = vget_low_f32(c); + const float32x2_t zero = vdup_n_f32(0.0f); + + const float32x2x2_t zipL = vzip_f32(aLow, bLow); + const float32x2x2_t zipH = vzip_f32(cLow, zero); + + return vcombine_f32(zipL.val[1], zipH.val[1]); +} + +PX_FORCE_INLINE Vec3V V3ColZ(const Vec3V a, const Vec3V b, const Vec3V c) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + ASSERT_ISVALIDVEC3V(c); + + const float32x2_t aHi = vget_high_f32(a); + const float32x2_t bHi = vget_high_f32(b); + const float32x2_t cHi = vget_high_f32(c); + + const float32x2x2_t zipL = vzip_f32(aHi, bHi); + + return vcombine_f32(zipL.val[0], cHi); +} + +PX_FORCE_INLINE Vec3V V3Zero() +{ + return vdupq_n_f32(0.0f); +} + +PX_FORCE_INLINE Vec3V V3Eps() +{ + return V3Load(PX_EPS_REAL); +} + +PX_FORCE_INLINE Vec3V V3One() +{ + return V3Load(1.0f); +} + +PX_FORCE_INLINE Vec3V V3Neg(const Vec3V f) +{ + ASSERT_ISVALIDVEC3V(f); + const float32x4_t tmp = vnegq_f32(f); + return vsetq_lane_f32(0.0f, tmp, 3); +} + +PX_FORCE_INLINE Vec3V V3Add(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return vaddq_f32(a, b); +} + +PX_FORCE_INLINE Vec3V V3Add(const Vec3V a, const FloatV b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDFLOATV(b); + return vaddq_f32(a, Vec3V_From_FloatV(b)); +} + +PX_FORCE_INLINE Vec3V V3Sub(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return vsubq_f32(a, b); +} + +PX_FORCE_INLINE Vec3V V3Sub(const Vec3V a, const FloatV b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDFLOATV(b); + return vsubq_f32(a, Vec3V_From_FloatV(b)); +} + +PX_FORCE_INLINE Vec3V V3Scale(const Vec3V a, const FloatV b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDFLOATV(b); + const float32x4_t tmp = vmulq_lane_f32(a, b, 0); + return vsetq_lane_f32(0.0f, tmp, 3); +} + +PX_FORCE_INLINE Vec3V V3Mul(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return vmulq_f32(a, b); +} + +PX_FORCE_INLINE Vec3V V3ScaleInv(const Vec3V a, const FloatV b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDFLOATV(b); + const float32x2_t invB = VRECIP(b); + const float32x4_t tmp = vmulq_lane_f32(a, invB, 0); + return vsetq_lane_f32(0.0f, tmp, 3); +} + +PX_FORCE_INLINE Vec3V V3Div(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + float32x4_t invB = VRECIPQ(b); + invB = vsetq_lane_f32(0.0f, invB, 3); + return vmulq_f32(a, invB); +} + +PX_FORCE_INLINE Vec3V V3ScaleInvFast(const Vec3V a, const FloatV b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDFLOATV(b); + const float32x2_t invB = VRECIPE(b); + const float32x4_t tmp = vmulq_lane_f32(a, invB, 0); + return vsetq_lane_f32(0.0f, tmp, 3); +} + +PX_FORCE_INLINE Vec3V V3DivFast(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + float32x4_t invB = VRECIPEQ(b); + invB = vsetq_lane_f32(0.0f, invB, 3); + return vmulq_f32(a, invB); +} + +PX_FORCE_INLINE Vec3V V3Recip(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + const float32x4_t recipA = VRECIPQ(a); + return vsetq_lane_f32(0.0f, recipA, 3); +} + +PX_FORCE_INLINE Vec3V V3RecipFast(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + const float32x4_t recipA = VRECIPEQ(a); + return vsetq_lane_f32(0.0f, recipA, 3); +} + +PX_FORCE_INLINE Vec3V V3Rsqrt(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + const float32x4_t rSqrA = VRECIPSQRTQ(a); + return vsetq_lane_f32(0.0f, rSqrA, 3); +} + +PX_FORCE_INLINE Vec3V V3RsqrtFast(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + const float32x4_t rSqrA = VRECIPSQRTEQ(a); + return vsetq_lane_f32(0.0f, rSqrA, 3); +} + +PX_FORCE_INLINE Vec3V V3ScaleAdd(const Vec3V a, const FloatV b, const Vec3V c) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDFLOATV(b); + ASSERT_ISVALIDVEC3V(c); + + float32x4_t tmp = vmlaq_lane_f32(c, a, b, 0); + // using vsetq_lane_f32 resulted in failures, + // probably related to a compiler bug on + // ndk r9d-win32, gcc 4.8, cardhu/shield + + // code with issue + // return vsetq_lane_f32(0.0f, tmp, 3); + + // workaround + float32x2_t w_z = vget_high_f32(tmp); + float32x2_t y_x = vget_low_f32(tmp); + w_z = vset_lane_f32(0.0f, w_z, 1); + return vcombine_f32(y_x, w_z); +} + +PX_FORCE_INLINE Vec3V V3NegScaleSub(const Vec3V a, const FloatV b, const Vec3V c) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDFLOATV(b); + ASSERT_ISVALIDVEC3V(c); + + float32x4_t tmp = vmlsq_lane_f32(c, a, b, 0); + // using vsetq_lane_f32 resulted in failures, + // probably related to a compiler bug on + // ndk r9d-win32, gcc 4.8, cardhu/shield + + // code with issue + // return vsetq_lane_f32(0.0f, tmp, 3); + + // workaround + float32x2_t w_z = vget_high_f32(tmp); + float32x2_t y_x = vget_low_f32(tmp); + w_z = vset_lane_f32(0.0f, w_z, 1); + return vcombine_f32(y_x, w_z); +} + +PX_FORCE_INLINE Vec3V V3MulAdd(const Vec3V a, const Vec3V b, const Vec3V c) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + ASSERT_ISVALIDVEC3V(c); + return vmlaq_f32(c, a, b); +} + +PX_FORCE_INLINE Vec3V V3NegMulSub(const Vec3V a, const Vec3V b, const Vec3V c) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + ASSERT_ISVALIDVEC3V(c); + return vmlsq_f32(c, a, b); +} + +PX_FORCE_INLINE Vec3V V3Abs(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + return vabsq_f32(a); +} + +PX_FORCE_INLINE FloatV V3Dot(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + + // const uint32x2_t mask = {0xffffFFFF, 0x0}; + const float32x4_t tmp = vmulq_f32(a, b); + + const float32x2_t low = vget_low_f32(tmp); + const float32x2_t high = vget_high_f32(tmp); + // const float32x2_t high = vreinterpret_f32_u32(vand_u32(vreinterpret_u32_f32(high_), mask)); + + const float32x2_t sumTmp = vpadd_f32(low, high); // = {0+z, x+y} + const float32x2_t sum0ZYX = vpadd_f32(sumTmp, sumTmp); // = {x+y+z, x+y+z} + + return sum0ZYX; +} + +PX_FORCE_INLINE Vec3V V3Cross(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + + const uint32_t ui32[2] = { 0xffffFFFF, 0x0 }; + const uint32x2_t TF = vld1_u32(ui32); + const float32x2_t ay_ax = vget_low_f32(a); // d2 + const float32x2_t aw_az = vget_high_f32(a); // d3 + const float32x2_t by_bx = vget_low_f32(b); // d4 + const float32x2_t bw_bz = vget_high_f32(b); // d5 + // Hi, Lo + const float32x2_t bz_by = vext_f32(by_bx, bw_bz, 1); // bz, by + const float32x2_t az_ay = vext_f32(ay_ax, aw_az, 1); // az, ay + + const float32x2_t azbx = vmul_f32(aw_az, by_bx); // 0, az*bx + const float32x2_t aybz_axby = vmul_f32(ay_ax, bz_by); // ay*bz, ax*by + + const float32x2_t azbxSUBaxbz = vmls_f32(azbx, bw_bz, ay_ax); // 0, az*bx-ax*bz + const float32x2_t aybzSUBazby_axbySUBaybx = vmls_f32(aybz_axby, by_bx, az_ay); // ay*bz-az*by, ax*by-ay*bx + + const float32x2_t retLow = vext_f32(aybzSUBazby_axbySUBaybx, azbxSUBaxbz, 1); // az*bx-ax*bz, ay*bz-az*by + const uint32x2_t retHigh = vand_u32(TF, vreinterpret_u32_f32(aybzSUBazby_axbySUBaybx)); // 0, ax*by-ay*bx + + return vcombine_f32(retLow, vreinterpret_f32_u32(retHigh)); +} + +PX_FORCE_INLINE VecCrossV V3PrepareCross(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + return a; +} + +PX_FORCE_INLINE FloatV V3Length(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + + // const uint32x2_t mask = {0xffffFFFF, 0x0}; + + const float32x4_t tmp = vmulq_f32(a, a); + const float32x2_t low = vget_low_f32(tmp); + const float32x2_t high = vget_high_f32(tmp); + // const float32x2_t high = vreinterpret_f32_u32(vand_u32(vreinterpret_u32_f32(high_), mask)); + + const float32x2_t sumTmp = vpadd_f32(low, high); // = {0+z, x+y} + const float32x2_t sum0ZYX = vpadd_f32(sumTmp, sumTmp); // = {x+y+z, x+y+z} + + return FSqrt(sum0ZYX); +} + +PX_FORCE_INLINE FloatV V3LengthSq(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + return V3Dot(a, a); +} + +PX_FORCE_INLINE Vec3V V3Normalize(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + //PX_ASSERT(!FAllEq(V4LengthSq(a), FZero())); + return V3ScaleInv(a, V3Length(a)); +} + +PX_FORCE_INLINE Vec3V V3NormalizeFast(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + //PX_ASSERT(!FAllEq(V4LengthSq(a), FZero())); + return V3Scale(a, VRECIPSQRTE(V3Dot(a, a))); +} + +PX_FORCE_INLINE Vec3V V3NormalizeSafe(const Vec3V a, const Vec3V unsafeReturnValue) +{ + ASSERT_ISVALIDVEC3V(a); + const FloatV zero = vdup_n_f32(0.0f); + const FloatV length = V3Length(a); + const uint32x4_t isGreaterThanZero = FIsGrtr(length, zero); + return V3Sel(isGreaterThanZero, V3ScaleInv(a, length), unsafeReturnValue); +} + +PX_FORCE_INLINE Vec3V V3Sel(const BoolV c, const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V( vbslq_f32(c, a, b)); + return vbslq_f32(c, a, b); +} + +PX_FORCE_INLINE BoolV V3IsGrtr(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return vcgtq_f32(a, b); +} + +PX_FORCE_INLINE BoolV V3IsGrtrOrEq(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return vcgeq_f32(a, b); +} + +PX_FORCE_INLINE BoolV V3IsEq(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return vceqq_f32(a, b); +} + +PX_FORCE_INLINE Vec3V V3Max(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return vmaxq_f32(a, b); +} + +PX_FORCE_INLINE Vec3V V3Min(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return vminq_f32(a, b); +} + +PX_FORCE_INLINE FloatV V3ExtractMax(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + + const float32x2_t low = vget_low_f32(a); + const float32x2_t high = vget_high_f32(a); + + const float32x2_t zz = vdup_lane_f32(high, 0); + const float32x2_t max0 = vpmax_f32(zz, low); + const float32x2_t max1 = vpmax_f32(max0, max0); + + return max1; +} + +PX_FORCE_INLINE FloatV V3ExtractMin(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + + const float32x2_t low = vget_low_f32(a); + const float32x2_t high = vget_high_f32(a); + + const float32x2_t zz = vdup_lane_f32(high, 0); + const float32x2_t min0 = vpmin_f32(zz, low); + const float32x2_t min1 = vpmin_f32(min0, min0); + + return min1; +} + +// return (a >= 0.0f) ? 1.0f : -1.0f; +PX_FORCE_INLINE Vec3V V3Sign(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + const Vec3V zero = V3Zero(); + const Vec3V one = V3One(); + const Vec3V none = V3Neg(one); + return V3Sel(V3IsGrtrOrEq(a, zero), one, none); +} + +PX_FORCE_INLINE Vec3V V3Clamp(const Vec3V a, const Vec3V minV, const Vec3V maxV) +{ + ASSERT_ISVALIDVEC3V(minV); + ASSERT_ISVALIDVEC3V(maxV); + return V3Max(V3Min(a, maxV), minV); +} + +PX_FORCE_INLINE PxU32 V3AllGrtr(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return internalUnitNeonSimd::BAllTrue3_R(V4IsGrtr(a, b)); +} + +PX_FORCE_INLINE PxU32 V3AllGrtrOrEq(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return internalUnitNeonSimd::BAllTrue3_R(V4IsGrtrOrEq(a, b)); +} + +PX_FORCE_INLINE PxU32 V3AllEq(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return internalUnitNeonSimd::BAllTrue3_R(V4IsEq(a, b)); +} + +PX_FORCE_INLINE Vec3V V3Round(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + // truncate(a + (0.5f - sign(a))) + const Vec3V half = V3Load(0.5f); + const float32x4_t sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(a), 31))); + const Vec3V aPlusHalf = V3Add(a, half); + const Vec3V aRound = V3Sub(aPlusHalf, sign); + return vcvtq_f32_s32(vcvtq_s32_f32(aRound)); +} + +PX_FORCE_INLINE Vec3V V3Sin(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + + // Modulo the range of the given angles such that -XM_2PI <= Angles < XM_2PI + const Vec4V recipTwoPi = V4LoadA(g_PXReciprocalTwoPi.f); + const Vec4V twoPi = V4LoadA(g_PXTwoPi.f); + const Vec3V tmp = V4Mul(a, recipTwoPi); + const Vec3V b = V3Round(tmp); + const Vec3V V1 = V4NegMulSub(twoPi, b, a); + + // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - + // V^15 / 15! + V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI) + const Vec3V V2 = V3Mul(V1, V1); + const Vec3V V3 = V3Mul(V2, V1); + const Vec3V V5 = V3Mul(V3, V2); + const Vec3V V7 = V3Mul(V5, V2); + const Vec3V V9 = V3Mul(V7, V2); + const Vec3V V11 = V3Mul(V9, V2); + const Vec3V V13 = V3Mul(V11, V2); + const Vec3V V15 = V3Mul(V13, V2); + const Vec3V V17 = V3Mul(V15, V2); + const Vec3V V19 = V3Mul(V17, V2); + const Vec3V V21 = V3Mul(V19, V2); + const Vec3V V23 = V3Mul(V21, V2); + + const Vec4V sinCoefficients0 = V4LoadA(g_PXSinCoefficients0.f); + const Vec4V sinCoefficients1 = V4LoadA(g_PXSinCoefficients1.f); + const Vec4V sinCoefficients2 = V4LoadA(g_PXSinCoefficients2.f); + + const FloatV S1 = V4GetY(sinCoefficients0); + const FloatV S2 = V4GetZ(sinCoefficients0); + const FloatV S3 = V4GetW(sinCoefficients0); + const FloatV S4 = V4GetX(sinCoefficients1); + const FloatV S5 = V4GetY(sinCoefficients1); + const FloatV S6 = V4GetZ(sinCoefficients1); + const FloatV S7 = V4GetW(sinCoefficients1); + const FloatV S8 = V4GetX(sinCoefficients2); + const FloatV S9 = V4GetY(sinCoefficients2); + const FloatV S10 = V4GetZ(sinCoefficients2); + const FloatV S11 = V4GetW(sinCoefficients2); + + Vec3V Result; + Result = V4ScaleAdd(V3, S1, V1); + Result = V4ScaleAdd(V5, S2, Result); + Result = V4ScaleAdd(V7, S3, Result); + Result = V4ScaleAdd(V9, S4, Result); + Result = V4ScaleAdd(V11, S5, Result); + Result = V4ScaleAdd(V13, S6, Result); + Result = V4ScaleAdd(V15, S7, Result); + Result = V4ScaleAdd(V17, S8, Result); + Result = V4ScaleAdd(V19, S9, Result); + Result = V4ScaleAdd(V21, S10, Result); + Result = V4ScaleAdd(V23, S11, Result); + + return Result; +} + +PX_FORCE_INLINE Vec3V V3Cos(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + + // Modulo the range of the given angles such that -XM_2PI <= Angles < XM_2PI + const Vec4V recipTwoPi = V4LoadA(g_PXReciprocalTwoPi.f); + const Vec4V twoPi = V4LoadA(g_PXTwoPi.f); + const Vec3V tmp = V4Mul(a, recipTwoPi); + const Vec3V b = V3Round(tmp); + const Vec3V V1 = V4NegMulSub(twoPi, b, a); + + // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + V^12 / 12! - + // V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI) + const Vec3V V2 = V3Mul(V1, V1); + const Vec3V V4 = V3Mul(V2, V2); + const Vec3V V6 = V3Mul(V4, V2); + const Vec3V V8 = V3Mul(V4, V4); + const Vec3V V10 = V3Mul(V6, V4); + const Vec3V V12 = V3Mul(V6, V6); + const Vec3V V14 = V3Mul(V8, V6); + const Vec3V V16 = V3Mul(V8, V8); + const Vec3V V18 = V3Mul(V10, V8); + const Vec3V V20 = V3Mul(V10, V10); + const Vec3V V22 = V3Mul(V12, V10); + + const Vec4V cosCoefficients0 = V4LoadA(g_PXCosCoefficients0.f); + const Vec4V cosCoefficients1 = V4LoadA(g_PXCosCoefficients1.f); + const Vec4V cosCoefficients2 = V4LoadA(g_PXCosCoefficients2.f); + + const FloatV C1 = V4GetY(cosCoefficients0); + const FloatV C2 = V4GetZ(cosCoefficients0); + const FloatV C3 = V4GetW(cosCoefficients0); + const FloatV C4 = V4GetX(cosCoefficients1); + const FloatV C5 = V4GetY(cosCoefficients1); + const FloatV C6 = V4GetZ(cosCoefficients1); + const FloatV C7 = V4GetW(cosCoefficients1); + const FloatV C8 = V4GetX(cosCoefficients2); + const FloatV C9 = V4GetY(cosCoefficients2); + const FloatV C10 = V4GetZ(cosCoefficients2); + const FloatV C11 = V4GetW(cosCoefficients2); + + Vec3V Result; + Result = V4ScaleAdd(V2, C1, V4One()); + Result = V4ScaleAdd(V4, C2, Result); + Result = V4ScaleAdd(V6, C3, Result); + Result = V4ScaleAdd(V8, C4, Result); + Result = V4ScaleAdd(V10, C5, Result); + Result = V4ScaleAdd(V12, C6, Result); + Result = V4ScaleAdd(V14, C7, Result); + Result = V4ScaleAdd(V16, C8, Result); + Result = V4ScaleAdd(V18, C9, Result); + Result = V4ScaleAdd(V20, C10, Result); + Result = V4ScaleAdd(V22, C11, Result); + + return V4ClearW(Result); +} + +PX_FORCE_INLINE Vec3V V3PermYZZ(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + const float32x2_t xy = vget_low_f32(a); + const float32x2_t zw = vget_high_f32(a); + const float32x2_t yz = vext_f32(xy, zw, 1); + return vcombine_f32(yz, zw); +} + +PX_FORCE_INLINE Vec3V V3PermXYX(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + + const uint32_t ui32[2] = { 0xffffFFFF, 0x0 }; + const uint32x2_t mask = vld1_u32(ui32); + const uint32x2_t xy = vget_low_u32(vreinterpretq_u32_f32(a)); + const uint32x2_t xw = vand_u32(xy, mask); + return vreinterpretq_f32_u32(vcombine_u32(xy, xw)); +} + +PX_FORCE_INLINE Vec3V V3PermYZX(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + + const uint32_t ui32[2] = { 0xffffFFFF, 0x0 }; + const uint32x2_t mask = vld1_u32(ui32); + const uint32x2_t xy = vget_low_u32(vreinterpretq_u32_f32(a)); + const uint32x2_t zw = vget_high_u32(vreinterpretq_u32_f32(a)); + const uint32x2_t yz = vext_u32(xy, zw, 1); + const uint32x2_t xw = vand_u32(xy, mask); + return vreinterpretq_f32_u32(vcombine_u32(yz, xw)); +} + +PX_FORCE_INLINE Vec3V V3PermZXY(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + + const uint32x2_t xy = vget_low_u32(vreinterpretq_u32_f32(a)); + const uint32x2_t zw = vget_high_u32(vreinterpretq_u32_f32(a)); + const uint32x2_t wz = vrev64_u32(zw); + + const uint32x2_t zx = vext_u32(wz, xy, 1); + const uint32x2_t yw = vext_u32(xy, wz, 1); + + return vreinterpretq_f32_u32(vcombine_u32(zx, yw)); +} + +PX_FORCE_INLINE Vec3V V3PermZZY(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + + const uint32x2_t xy = vget_low_u32(vreinterpretq_u32_f32(a)); + const uint32x2_t zw = vget_high_u32(vreinterpretq_u32_f32(a)); + + const uint32x2_t wz = vrev64_u32(zw); + const uint32x2_t yw = vext_u32(xy, wz, 1); + const uint32x2_t zz = vdup_lane_u32(wz, 1); + + return vreinterpretq_f32_u32(vcombine_u32(zz, yw)); +} + +PX_FORCE_INLINE Vec3V V3PermYXX(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + + const uint32_t ui32[2] = { 0xffffFFFF, 0x0 }; + const uint32x2_t mask = vld1_u32(ui32); + const uint32x2_t xy = vget_low_u32(vreinterpretq_u32_f32(a)); + const uint32x2_t yx = vrev64_u32(xy); + const uint32x2_t xw = vand_u32(xy, mask); + return vreinterpretq_f32_u32(vcombine_u32(yx, xw)); +} + +PX_FORCE_INLINE Vec3V V3Perm_Zero_1Z_0Y(const Vec3V v0, const Vec3V v1) +{ + ASSERT_ISVALIDVEC3V(v0); + ASSERT_ISVALIDVEC3V(v1); + + const uint32x2_t xy = vget_low_u32(vreinterpretq_u32_f32(v0)); + const uint32x2_t zw = vget_high_u32(vreinterpretq_u32_f32(v1)); + const uint32x2_t wz = vrev64_u32(zw); + const uint32x2_t yw = vext_u32(xy, wz, 1); + + return vreinterpretq_f32_u32(vcombine_u32(wz, yw)); +} + +PX_FORCE_INLINE Vec3V V3Perm_0Z_Zero_1X(const Vec3V v0, const Vec3V v1) +{ + ASSERT_ISVALIDVEC3V(v0); + ASSERT_ISVALIDVEC3V(v1); + + const uint32_t ui32[2] = { 0xffffFFFF, 0x0 }; + const uint32x2_t mask = vld1_u32(ui32); + const uint32x2_t zw = vget_high_u32(vreinterpretq_u32_f32(v0)); + const uint32x2_t xy = vget_low_u32(vreinterpretq_u32_f32(v1)); + const uint32x2_t xw = vand_u32(xy, mask); + + return vreinterpretq_f32_u32(vcombine_u32(zw, xw)); +} + +PX_FORCE_INLINE Vec3V V3Perm_1Y_0X_Zero(const Vec3V v0, const Vec3V v1) +{ + ASSERT_ISVALIDVEC3V(v0); + ASSERT_ISVALIDVEC3V(v1); + + const uint32x2_t axy = vget_low_u32(vreinterpretq_u32_f32(v0)); + const uint32x2_t bxy = vget_low_u32(vreinterpretq_u32_f32(v1)); + const uint32x2_t byax = vext_u32(bxy, axy, 1); + const uint32x2_t ww = vdup_n_u32(0); + + return vreinterpretq_f32_u32(vcombine_u32(byax, ww)); +} + +PX_FORCE_INLINE FloatV V3SumElems(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + + // const uint32x2_t mask = {0xffffFFFF, 0x0}; + + const float32x2_t low = vget_low_f32(a); + const float32x2_t high = vget_high_f32(a); + // const float32x2_t high = vreinterpret_f32_u32(vand_u32(vreinterpret_u32_f32(high_), mask)); + + const float32x2_t sumTmp = vpadd_f32(low, high); // = {0+z, x+y} + const float32x2_t sum0ZYX = vpadd_f32(sumTmp, sumTmp); // = {x+y+z, x+y+z} + + return sum0ZYX; +} + +PX_FORCE_INLINE PxU32 V3OutOfBounds(const Vec3V a, const Vec3V min, const Vec3V max) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(min); + ASSERT_ISVALIDVEC3V(max); + + const BoolV c = BOr(V3IsGrtr(a, max), V3IsGrtr(min, a)); + return internalUnitNeonSimd::BAnyTrue3_R(c); +} + +PX_FORCE_INLINE PxU32 V3InBounds(const Vec3V a, const Vec3V min, const Vec3V max) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(min); + ASSERT_ISVALIDVEC3V(max); + + const BoolV c = BAnd(V3IsGrtrOrEq(a, min), V3IsGrtrOrEq(max, a)); + return internalUnitNeonSimd::BAllTrue4_R(c); +} + +PX_FORCE_INLINE PxU32 V3OutOfBounds(const Vec3V a, const Vec3V bounds) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(bounds); + + const BoolV greater = V3IsGrtr(V3Abs(a), bounds); + return internalUnitNeonSimd::BAnyTrue3_R(greater); +} + +PX_FORCE_INLINE PxU32 V3InBounds(const Vec3V a, const Vec3V bounds) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(bounds); + + const BoolV greaterOrEq = V3IsGrtrOrEq(bounds, V3Abs(a)); + return internalUnitNeonSimd::BAllTrue4_R(greaterOrEq); +} + +PX_FORCE_INLINE void V3Transpose(Vec3V& col0, Vec3V& col1, Vec3V& col2) +{ + ASSERT_ISVALIDVEC3V(col0); + ASSERT_ISVALIDVEC3V(col1); + ASSERT_ISVALIDVEC3V(col2); + + Vec3V col3 = V3Zero(); + const float32x4x2_t v0v1 = vzipq_f32(col0, col2); + const float32x4x2_t v2v3 = vzipq_f32(col1, col3); + const float32x4x2_t zip0 = vzipq_f32(v0v1.val[0], v2v3.val[0]); + const float32x4x2_t zip1 = vzipq_f32(v0v1.val[1], v2v3.val[1]); + col0 = zip0.val[0]; + col1 = zip0.val[1]; + col2 = zip1.val[0]; + // col3 = zip1.val[1]; +} + +////////////////////////////////// +// VEC4V +////////////////////////////////// + +PX_FORCE_INLINE Vec4V V4Splat(const FloatV f) +{ + ASSERT_ISVALIDFLOATV(f); + return vcombine_f32(f, f); +} + +PX_FORCE_INLINE Vec4V V4Merge(const FloatV* const floatVArray) +{ + ASSERT_ISVALIDFLOATV(floatVArray[0]); + ASSERT_ISVALIDFLOATV(floatVArray[1]); + ASSERT_ISVALIDFLOATV(floatVArray[2]); + ASSERT_ISVALIDFLOATV(floatVArray[3]); + + const uint32x2_t xLow = vreinterpret_u32_f32(floatVArray[0]); + const uint32x2_t yLow = vreinterpret_u32_f32(floatVArray[1]); + const uint32x2_t zLow = vreinterpret_u32_f32(floatVArray[2]); + const uint32x2_t wLow = vreinterpret_u32_f32(floatVArray[3]); + + const uint32x2_t dLow = vext_u32(xLow, yLow, 1); + const uint32x2_t dHigh = vext_u32(zLow, wLow, 1); + + return vreinterpretq_f32_u32(vcombine_u32(dLow, dHigh)); +} + +PX_FORCE_INLINE Vec4V V4Merge(const FloatVArg x, const FloatVArg y, const FloatVArg z, const FloatVArg w) +{ + ASSERT_ISVALIDFLOATV(x); + ASSERT_ISVALIDFLOATV(y); + ASSERT_ISVALIDFLOATV(z); + ASSERT_ISVALIDFLOATV(w); + + const uint32x2_t xLow = vreinterpret_u32_f32(x); + const uint32x2_t yLow = vreinterpret_u32_f32(y); + const uint32x2_t zLow = vreinterpret_u32_f32(z); + const uint32x2_t wLow = vreinterpret_u32_f32(w); + + const uint32x2_t dLow = vext_u32(xLow, yLow, 1); + const uint32x2_t dHigh = vext_u32(zLow, wLow, 1); + + return vreinterpretq_f32_u32(vcombine_u32(dLow, dHigh)); +} + +PX_FORCE_INLINE Vec4V V4MergeW(const Vec4VArg x, const Vec4VArg y, const Vec4VArg z, const Vec4VArg w) +{ + const float32x2_t xx = vget_high_f32(x); + const float32x2_t yy = vget_high_f32(y); + const float32x2_t zz = vget_high_f32(z); + const float32x2_t ww = vget_high_f32(w); + + const float32x2x2_t zipL = vzip_f32(xx, yy); + const float32x2x2_t zipH = vzip_f32(zz, ww); + + return vcombine_f32(zipL.val[1], zipH.val[1]); +} + +PX_FORCE_INLINE Vec4V V4MergeZ(const Vec4VArg x, const Vec4VArg y, const Vec4VArg z, const Vec4VArg w) +{ + const float32x2_t xx = vget_high_f32(x); + const float32x2_t yy = vget_high_f32(y); + const float32x2_t zz = vget_high_f32(z); + const float32x2_t ww = vget_high_f32(w); + + const float32x2x2_t zipL = vzip_f32(xx, yy); + const float32x2x2_t zipH = vzip_f32(zz, ww); + + return vcombine_f32(zipL.val[0], zipH.val[0]); +} + +PX_FORCE_INLINE Vec4V V4MergeY(const Vec4VArg x, const Vec4VArg y, const Vec4VArg z, const Vec4VArg w) +{ + const float32x2_t xx = vget_low_f32(x); + const float32x2_t yy = vget_low_f32(y); + const float32x2_t zz = vget_low_f32(z); + const float32x2_t ww = vget_low_f32(w); + + const float32x2x2_t zipL = vzip_f32(xx, yy); + const float32x2x2_t zipH = vzip_f32(zz, ww); + + return vcombine_f32(zipL.val[1], zipH.val[1]); +} + +PX_FORCE_INLINE Vec4V V4MergeX(const Vec4VArg x, const Vec4VArg y, const Vec4VArg z, const Vec4VArg w) +{ + const float32x2_t xx = vget_low_f32(x); + const float32x2_t yy = vget_low_f32(y); + const float32x2_t zz = vget_low_f32(z); + const float32x2_t ww = vget_low_f32(w); + + const float32x2x2_t zipL = vzip_f32(xx, yy); + const float32x2x2_t zipH = vzip_f32(zz, ww); + + return vcombine_f32(zipL.val[0], zipH.val[0]); +} + +PX_FORCE_INLINE Vec4V V4UnpackXY(const Vec4VArg a, const Vec4VArg b) +{ + return vzipq_f32(a, b).val[0]; +} + +PX_FORCE_INLINE Vec4V V4UnpackZW(const Vec4VArg a, const Vec4VArg b) +{ + return vzipq_f32(a, b).val[1]; +} + +PX_FORCE_INLINE Vec4V V4UnitW() +{ + const float32x2_t zeros = vreinterpret_f32_u32(vmov_n_u32(0)); + const float32x2_t ones = vmov_n_f32(1.0f); + const float32x2_t zo = vext_f32(zeros, ones, 1); + return vcombine_f32(zeros, zo); +} + +PX_FORCE_INLINE Vec4V V4UnitX() +{ + const float32x2_t zeros = vreinterpret_f32_u32(vmov_n_u32(0)); + const float32x2_t ones = vmov_n_f32(1.0f); + const float32x2_t oz = vext_f32(ones, zeros, 1); + return vcombine_f32(oz, zeros); +} + +PX_FORCE_INLINE Vec4V V4UnitY() +{ + const float32x2_t zeros = vreinterpret_f32_u32(vmov_n_u32(0)); + const float32x2_t ones = vmov_n_f32(1.0f); + const float32x2_t zo = vext_f32(zeros, ones, 1); + return vcombine_f32(zo, zeros); +} + +PX_FORCE_INLINE Vec4V V4UnitZ() +{ + const float32x2_t zeros = vreinterpret_f32_u32(vmov_n_u32(0)); + const float32x2_t ones = vmov_n_f32(1.0f); + const float32x2_t oz = vext_f32(ones, zeros, 1); + return vcombine_f32(zeros, oz); +} + +PX_FORCE_INLINE FloatV V4GetW(const Vec4V f) +{ + const float32x2_t fhigh = vget_high_f32(f); + return vdup_lane_f32(fhigh, 1); +} + +PX_FORCE_INLINE FloatV V4GetX(const Vec4V f) +{ + const float32x2_t fLow = vget_low_f32(f); + return vdup_lane_f32(fLow, 0); +} + +PX_FORCE_INLINE FloatV V4GetY(const Vec4V f) +{ + const float32x2_t fLow = vget_low_f32(f); + return vdup_lane_f32(fLow, 1); +} + +PX_FORCE_INLINE FloatV V4GetZ(const Vec4V f) +{ + const float32x2_t fhigh = vget_high_f32(f); + return vdup_lane_f32(fhigh, 0); +} + +PX_FORCE_INLINE Vec4V V4SetW(const Vec4V v, const FloatV f) +{ + ASSERT_ISVALIDFLOATV(f); + return V4Sel(BTTTF(), v, vcombine_f32(f, f)); +} + +PX_FORCE_INLINE Vec4V V4SetX(const Vec4V v, const FloatV f) +{ + ASSERT_ISVALIDFLOATV(f); + return V4Sel(BFTTT(), v, vcombine_f32(f, f)); +} + +PX_FORCE_INLINE Vec4V V4SetY(const Vec4V v, const FloatV f) +{ + ASSERT_ISVALIDFLOATV(f); + return V4Sel(BTFTT(), v, vcombine_f32(f, f)); +} + +PX_FORCE_INLINE Vec4V V4SetZ(const Vec4V v, const FloatV f) +{ + ASSERT_ISVALIDFLOATV(f); + return V4Sel(BTTFT(), v, vcombine_f32(f, f)); +} + +PX_FORCE_INLINE Vec4V V4ClearW(const Vec4V v) +{ + return V4Sel(BTTTF(), v, V4Zero()); +} + +PX_FORCE_INLINE Vec4V V4PermYXWZ(const Vec4V a) +{ + const float32x2_t xy = vget_low_f32(a); + const float32x2_t zw = vget_high_f32(a); + const float32x2_t yx = vext_f32(xy, xy, 1); + const float32x2_t wz = vext_f32(zw, zw, 1); + return vcombine_f32(yx, wz); +} + +PX_FORCE_INLINE Vec4V V4PermXZXZ(const Vec4V a) +{ + const float32x2_t xy = vget_low_f32(a); + const float32x2_t zw = vget_high_f32(a); + const float32x2x2_t xzyw = vzip_f32(xy, zw); + return vcombine_f32(xzyw.val[0], xzyw.val[0]); +} + +PX_FORCE_INLINE Vec4V V4PermYWYW(const Vec4V a) +{ + const float32x2_t xy = vget_low_f32(a); + const float32x2_t zw = vget_high_f32(a); + const float32x2x2_t xzyw = vzip_f32(xy, zw); + return vcombine_f32(xzyw.val[1], xzyw.val[1]); +} + +PX_FORCE_INLINE Vec4V V4PermYZXW(const Vec4V a) +{ + const uint32x2_t xy = vget_low_u32(vreinterpretq_u32_f32(a)); + const uint32x2_t zw = vget_high_u32(vreinterpretq_u32_f32(a)); + const uint32x2_t yz = vext_u32(xy, zw, 1); + const uint32x2_t xw = vrev64_u32(vext_u32(zw, xy, 1)); + return vreinterpretq_f32_u32(vcombine_u32(yz, xw)); +} + +PX_FORCE_INLINE Vec4V V4PermZWXY(const Vec4V a) +{ + const float32x2_t low = vget_low_f32(a); + const float32x2_t high = vget_high_f32(a); + return vcombine_f32(high, low); +} + +template +PX_FORCE_INLINE Vec4V V4Perm(const Vec4V V) +{ + static const uint32_t ControlElement[4] = + { +#if 1 + 0x03020100, // XM_SWIZZLE_X + 0x07060504, // XM_SWIZZLE_Y + 0x0B0A0908, // XM_SWIZZLE_Z + 0x0F0E0D0C, // XM_SWIZZLE_W +#else + 0x00010203, // XM_SWIZZLE_X + 0x04050607, // XM_SWIZZLE_Y + 0x08090A0B, // XM_SWIZZLE_Z + 0x0C0D0E0F, // XM_SWIZZLE_W +#endif + }; + + uint8x8x2_t tbl; + tbl.val[0] = vreinterpret_u8_f32(vget_low_f32(V)); + tbl.val[1] = vreinterpret_u8_f32(vget_high_f32(V)); + + uint8x8_t idx = + vcreate_u8(static_cast(ControlElement[E0]) | (static_cast(ControlElement[E1]) << 32)); + const uint8x8_t rL = vtbl2_u8(tbl, idx); + idx = vcreate_u8(static_cast(ControlElement[E2]) | (static_cast(ControlElement[E3]) << 32)); + const uint8x8_t rH = vtbl2_u8(tbl, idx); + return vreinterpretq_f32_u8(vcombine_u8(rL, rH)); +} + +// PT: this seems measurably slower than the hardcoded version +/*PX_FORCE_INLINE Vec4V V4PermYZXW(const Vec4V a) +{ + return V4Perm<1, 2, 0, 3>(a); +}*/ + +PX_FORCE_INLINE Vec4V V4Zero() +{ + return vreinterpretq_f32_u32(vmovq_n_u32(0)); + // return vmovq_n_f32(0.0f); +} + +PX_FORCE_INLINE Vec4V V4One() +{ + return vmovq_n_f32(1.0f); +} + +PX_FORCE_INLINE Vec4V V4Eps() +{ + // return vmovq_n_f32(PX_EPS_REAL); + return V4Load(PX_EPS_REAL); +} + +PX_FORCE_INLINE Vec4V V4Neg(const Vec4V f) +{ + return vnegq_f32(f); +} + +PX_FORCE_INLINE Vec4V V4Add(const Vec4V a, const Vec4V b) +{ + return vaddq_f32(a, b); +} + +PX_FORCE_INLINE Vec4V V4Sub(const Vec4V a, const Vec4V b) +{ + return vsubq_f32(a, b); +} + +PX_FORCE_INLINE Vec4V V4Scale(const Vec4V a, const FloatV b) +{ + return vmulq_lane_f32(a, b, 0); +} + +PX_FORCE_INLINE Vec4V V4Mul(const Vec4V a, const Vec4V b) +{ + return vmulq_f32(a, b); +} + +PX_FORCE_INLINE Vec4V V4ScaleInv(const Vec4V a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(b); + const float32x2_t invB = VRECIP(b); + return vmulq_lane_f32(a, invB, 0); +} + +PX_FORCE_INLINE Vec4V V4Div(const Vec4V a, const Vec4V b) +{ + const float32x4_t invB = VRECIPQ(b); + return vmulq_f32(a, invB); +} + +PX_FORCE_INLINE Vec4V V4ScaleInvFast(const Vec4V a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(b); + const float32x2_t invB = VRECIPE(b); + return vmulq_lane_f32(a, invB, 0); +} + +PX_FORCE_INLINE Vec4V V4DivFast(const Vec4V a, const Vec4V b) +{ + const float32x4_t invB = VRECIPEQ(b); + return vmulq_f32(a, invB); +} + +PX_FORCE_INLINE Vec4V V4Recip(const Vec4V a) +{ + return VRECIPQ(a); +} + +PX_FORCE_INLINE Vec4V V4RecipFast(const Vec4V a) +{ + return VRECIPEQ(a); +} + +PX_FORCE_INLINE Vec4V V4Rsqrt(const Vec4V a) +{ + return VRECIPSQRTQ(a); +} + +PX_FORCE_INLINE Vec4V V4RsqrtFast(const Vec4V a) +{ + return VRECIPSQRTEQ(a); +} + +PX_FORCE_INLINE Vec4V V4Sqrt(const Vec4V a) +{ + return V4Sel(V4IsEq(a, V4Zero()), a, V4Mul(a, VRECIPSQRTQ(a))); +} + +PX_FORCE_INLINE Vec4V V4ScaleAdd(const Vec4V a, const FloatV b, const Vec4V c) +{ + ASSERT_ISVALIDFLOATV(b); + return vmlaq_lane_f32(c, a, b, 0); +} + +PX_FORCE_INLINE Vec4V V4NegScaleSub(const Vec4V a, const FloatV b, const Vec4V c) +{ + ASSERT_ISVALIDFLOATV(b); + return vmlsq_lane_f32(c, a, b, 0); +} + +PX_FORCE_INLINE Vec4V V4MulAdd(const Vec4V a, const Vec4V b, const Vec4V c) +{ + return vmlaq_f32(c, a, b); +} + +PX_FORCE_INLINE Vec4V V4NegMulSub(const Vec4V a, const Vec4V b, const Vec4V c) +{ + return vmlsq_f32(c, a, b); +} + +PX_FORCE_INLINE Vec4V V4Abs(const Vec4V a) +{ + return vabsq_f32(a); +} + +PX_FORCE_INLINE FloatV V4SumElements(const Vec4V a) +{ + const Vec4V xy = V4UnpackXY(a, a); // x,x,y,y + const Vec4V zw = V4UnpackZW(a, a); // z,z,w,w + const Vec4V xz_yw = V4Add(xy, zw); // x+z,x+z,y+w,y+w + const FloatV xz = V4GetX(xz_yw); // x+z + const FloatV yw = V4GetZ(xz_yw); // y+w + return FAdd(xz, yw); // sum +} + +PX_FORCE_INLINE FloatV V4Dot(const Vec4V a, const Vec4V b) +{ + const float32x4_t tmp = vmulq_f32(a, b); + const float32x2_t low = vget_low_f32(tmp); + const float32x2_t high = vget_high_f32(tmp); + + const float32x2_t sumTmp = vpadd_f32(low, high); // = {z+w, x+y} + const float32x2_t sumWZYX = vpadd_f32(sumTmp, sumTmp); // = {x+y+z+w, x+y+z+w} + return sumWZYX; +} + +PX_FORCE_INLINE FloatV V4Dot3(const Vec4V aa, const Vec4V bb) +{ + // PT: the V3Dot code relies on the fact that W=0 so we can't reuse it as-is, we need to clear W first. + // TODO: find a better implementation that does not need to clear W. + const Vec4V a = V4ClearW(aa); + const Vec4V b = V4ClearW(bb); + + const float32x4_t tmp = vmulq_f32(a, b); + const float32x2_t low = vget_low_f32(tmp); + const float32x2_t high = vget_high_f32(tmp); + + const float32x2_t sumTmp = vpadd_f32(low, high); // = {0+z, x+y} + const float32x2_t sum0ZYX = vpadd_f32(sumTmp, sumTmp); // = {x+y+z, x+y+z} + return sum0ZYX; +} + +PX_FORCE_INLINE Vec4V V4Cross(const Vec4V a, const Vec4V b) +{ + const uint32_t ui32[2] = { 0xffffFFFF, 0x0 }; + const uint32x2_t TF = vld1_u32(ui32); + const float32x2_t ay_ax = vget_low_f32(a); // d2 + const float32x2_t aw_az = vget_high_f32(a); // d3 + const float32x2_t by_bx = vget_low_f32(b); // d4 + const float32x2_t bw_bz = vget_high_f32(b); // d5 + // Hi, Lo + const float32x2_t bz_by = vext_f32(by_bx, bw_bz, 1); // bz, by + const float32x2_t az_ay = vext_f32(ay_ax, aw_az, 1); // az, ay + + const float32x2_t azbx = vmul_f32(aw_az, by_bx); // 0, az*bx + const float32x2_t aybz_axby = vmul_f32(ay_ax, bz_by); // ay*bz, ax*by + + const float32x2_t azbxSUBaxbz = vmls_f32(azbx, bw_bz, ay_ax); // 0, az*bx-ax*bz + const float32x2_t aybzSUBazby_axbySUBaybx = vmls_f32(aybz_axby, by_bx, az_ay); // ay*bz-az*by, ax*by-ay*bx + + const float32x2_t retLow = vext_f32(aybzSUBazby_axbySUBaybx, azbxSUBaxbz, 1); // az*bx-ax*bz, ay*bz-az*by + const uint32x2_t retHigh = vand_u32(TF, vreinterpret_u32_f32(aybzSUBazby_axbySUBaybx)); // 0, ax*by-ay*bx + + return vcombine_f32(retLow, vreinterpret_f32_u32(retHigh)); +} + +PX_FORCE_INLINE FloatV V4Length(const Vec4V a) +{ + const float32x4_t tmp = vmulq_f32(a, a); + const float32x2_t low = vget_low_f32(tmp); + const float32x2_t high = vget_high_f32(tmp); + + const float32x2_t sumTmp = vpadd_f32(low, high); // = {0+z, x+y} + const float32x2_t sumWZYX = vpadd_f32(sumTmp, sumTmp); // = {x+y+z, x+y+z} + return FSqrt(sumWZYX); +} + +PX_FORCE_INLINE FloatV V4LengthSq(const Vec4V a) +{ + return V4Dot(a, a); +} + +PX_FORCE_INLINE Vec4V V4Normalize(const Vec4V a) +{ + //PX_ASSERT(!FAllEq(V4LengthSq(a), FZero())); + return V4ScaleInv(a, V4Length(a)); +} + +PX_FORCE_INLINE Vec4V V4NormalizeFast(const Vec4V a) +{ + //PX_ASSERT(!FAllEq(V4LengthSq(a), FZero())); + return V4Scale(a, FRsqrtFast(V4Dot(a, a))); +} + +PX_FORCE_INLINE Vec4V V4NormalizeSafe(const Vec4V a, const Vec4V unsafeReturnValue) +{ + const FloatV zero = FZero(); + const FloatV length = V4Length(a); + const uint32x4_t isGreaterThanZero = FIsGrtr(length, zero); + return V4Sel(isGreaterThanZero, V4ScaleInv(a, length), unsafeReturnValue); +} + +PX_FORCE_INLINE BoolV V4IsEqU32(const VecU32V a, const VecU32V b) +{ + return vceqq_u32(a, b); +} + +PX_FORCE_INLINE Vec4V V4Sel(const BoolV c, const Vec4V a, const Vec4V b) +{ + return vbslq_f32(c, a, b); +} + +PX_FORCE_INLINE BoolV V4IsGrtr(const Vec4V a, const Vec4V b) +{ + return vcgtq_f32(a, b); +} + +PX_FORCE_INLINE BoolV V4IsGrtrOrEq(const Vec4V a, const Vec4V b) +{ + return vcgeq_f32(a, b); +} + +PX_FORCE_INLINE BoolV V4IsEq(const Vec4V a, const Vec4V b) +{ + return vceqq_f32(a, b); +} + +PX_FORCE_INLINE Vec4V V4Max(const Vec4V a, const Vec4V b) +{ + return vmaxq_f32(a, b); +} + +PX_FORCE_INLINE Vec4V V4Min(const Vec4V a, const Vec4V b) +{ + return vminq_f32(a, b); +} + +PX_FORCE_INLINE FloatV V4ExtractMax(const Vec4V a) +{ + const float32x2_t low = vget_low_f32(a); + const float32x2_t high = vget_high_f32(a); + + const float32x2_t max0 = vpmax_f32(high, low); + const float32x2_t max1 = vpmax_f32(max0, max0); + + return max1; +} + +PX_FORCE_INLINE FloatV V4ExtractMin(const Vec4V a) +{ + const float32x2_t low = vget_low_f32(a); + const float32x2_t high = vget_high_f32(a); + + const float32x2_t min0 = vpmin_f32(high, low); + const float32x2_t min1 = vpmin_f32(min0, min0); + + return min1; +} + +PX_FORCE_INLINE Vec4V V4Clamp(const Vec4V a, const Vec4V minV, const Vec4V maxV) +{ + return V4Max(V4Min(a, maxV), minV); +} + +PX_FORCE_INLINE PxU32 V4AllGrtr(const Vec4V a, const Vec4V b) +{ + return internalUnitNeonSimd::BAllTrue4_R(V4IsGrtr(a, b)); +} + +PX_FORCE_INLINE PxU32 V4AllGrtrOrEq(const Vec4V a, const Vec4V b) +{ + return internalUnitNeonSimd::BAllTrue4_R(V4IsGrtrOrEq(a, b)); +} + +PX_FORCE_INLINE PxU32 V4AllGrtrOrEq3(const Vec4V a, const Vec4V b) +{ + return internalUnitNeonSimd::BAllTrue3_R(V4IsGrtrOrEq(a, b)); +} + +PX_FORCE_INLINE PxU32 V4AllEq(const Vec4V a, const Vec4V b) +{ + return internalUnitNeonSimd::BAllTrue4_R(V4IsEq(a, b)); +} + +PX_FORCE_INLINE PxU32 V4AnyGrtr3(const Vec4V a, const Vec4V b) +{ + return internalUnitNeonSimd::BAnyTrue3_R(V4IsGrtr(a, b)); +} + +PX_FORCE_INLINE Vec4V V4Round(const Vec4V a) +{ + // truncate(a + (0.5f - sign(a))) + const Vec4V half = V4Load(0.5f); + const float32x4_t sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(a), 31))); + const Vec4V aPlusHalf = V4Add(a, half); + const Vec4V aRound = V4Sub(aPlusHalf, sign); + return vcvtq_f32_s32(vcvtq_s32_f32(aRound)); +} + +PX_FORCE_INLINE Vec4V V4Sin(const Vec4V a) +{ + const Vec4V recipTwoPi = V4LoadA(g_PXReciprocalTwoPi.f); + const Vec4V twoPi = V4LoadA(g_PXTwoPi.f); + const Vec4V tmp = V4Mul(a, recipTwoPi); + const Vec4V b = V4Round(tmp); + const Vec4V V1 = V4NegMulSub(twoPi, b, a); + + // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - + // V^15 / 15! + V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI) + const Vec4V V2 = V4Mul(V1, V1); + const Vec4V V3 = V4Mul(V2, V1); + const Vec4V V5 = V4Mul(V3, V2); + const Vec4V V7 = V4Mul(V5, V2); + const Vec4V V9 = V4Mul(V7, V2); + const Vec4V V11 = V4Mul(V9, V2); + const Vec4V V13 = V4Mul(V11, V2); + const Vec4V V15 = V4Mul(V13, V2); + const Vec4V V17 = V4Mul(V15, V2); + const Vec4V V19 = V4Mul(V17, V2); + const Vec4V V21 = V4Mul(V19, V2); + const Vec4V V23 = V4Mul(V21, V2); + + const Vec4V sinCoefficients0 = V4LoadA(g_PXSinCoefficients0.f); + const Vec4V sinCoefficients1 = V4LoadA(g_PXSinCoefficients1.f); + const Vec4V sinCoefficients2 = V4LoadA(g_PXSinCoefficients2.f); + + const FloatV S1 = V4GetY(sinCoefficients0); + const FloatV S2 = V4GetZ(sinCoefficients0); + const FloatV S3 = V4GetW(sinCoefficients0); + const FloatV S4 = V4GetX(sinCoefficients1); + const FloatV S5 = V4GetY(sinCoefficients1); + const FloatV S6 = V4GetZ(sinCoefficients1); + const FloatV S7 = V4GetW(sinCoefficients1); + const FloatV S8 = V4GetX(sinCoefficients2); + const FloatV S9 = V4GetY(sinCoefficients2); + const FloatV S10 = V4GetZ(sinCoefficients2); + const FloatV S11 = V4GetW(sinCoefficients2); + + Vec4V Result; + Result = V4ScaleAdd(V3, S1, V1); + Result = V4ScaleAdd(V5, S2, Result); + Result = V4ScaleAdd(V7, S3, Result); + Result = V4ScaleAdd(V9, S4, Result); + Result = V4ScaleAdd(V11, S5, Result); + Result = V4ScaleAdd(V13, S6, Result); + Result = V4ScaleAdd(V15, S7, Result); + Result = V4ScaleAdd(V17, S8, Result); + Result = V4ScaleAdd(V19, S9, Result); + Result = V4ScaleAdd(V21, S10, Result); + Result = V4ScaleAdd(V23, S11, Result); + + return Result; +} + +PX_FORCE_INLINE Vec4V V4Cos(const Vec4V a) +{ + const Vec4V recipTwoPi = V4LoadA(g_PXReciprocalTwoPi.f); + const Vec4V twoPi = V4LoadA(g_PXTwoPi.f); + const Vec4V tmp = V4Mul(a, recipTwoPi); + const Vec4V b = V4Round(tmp); + const Vec4V V1 = V4NegMulSub(twoPi, b, a); + + // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + V^12 / 12! - + // V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI) + const Vec4V V2 = V4Mul(V1, V1); + const Vec4V V4 = V4Mul(V2, V2); + const Vec4V V6 = V4Mul(V4, V2); + const Vec4V V8 = V4Mul(V4, V4); + const Vec4V V10 = V4Mul(V6, V4); + const Vec4V V12 = V4Mul(V6, V6); + const Vec4V V14 = V4Mul(V8, V6); + const Vec4V V16 = V4Mul(V8, V8); + const Vec4V V18 = V4Mul(V10, V8); + const Vec4V V20 = V4Mul(V10, V10); + const Vec4V V22 = V4Mul(V12, V10); + + const Vec4V cosCoefficients0 = V4LoadA(g_PXCosCoefficients0.f); + const Vec4V cosCoefficients1 = V4LoadA(g_PXCosCoefficients1.f); + const Vec4V cosCoefficients2 = V4LoadA(g_PXCosCoefficients2.f); + + const FloatV C1 = V4GetY(cosCoefficients0); + const FloatV C2 = V4GetZ(cosCoefficients0); + const FloatV C3 = V4GetW(cosCoefficients0); + const FloatV C4 = V4GetX(cosCoefficients1); + const FloatV C5 = V4GetY(cosCoefficients1); + const FloatV C6 = V4GetZ(cosCoefficients1); + const FloatV C7 = V4GetW(cosCoefficients1); + const FloatV C8 = V4GetX(cosCoefficients2); + const FloatV C9 = V4GetY(cosCoefficients2); + const FloatV C10 = V4GetZ(cosCoefficients2); + const FloatV C11 = V4GetW(cosCoefficients2); + + Vec4V Result; + Result = V4ScaleAdd(V2, C1, V4One()); + Result = V4ScaleAdd(V4, C2, Result); + Result = V4ScaleAdd(V6, C3, Result); + Result = V4ScaleAdd(V8, C4, Result); + Result = V4ScaleAdd(V10, C5, Result); + Result = V4ScaleAdd(V12, C6, Result); + Result = V4ScaleAdd(V14, C7, Result); + Result = V4ScaleAdd(V16, C8, Result); + Result = V4ScaleAdd(V18, C9, Result); + Result = V4ScaleAdd(V20, C10, Result); + Result = V4ScaleAdd(V22, C11, Result); + + return Result; +} + +PX_FORCE_INLINE void V4Transpose(Vec4V& col0, Vec4V& col1, Vec4V& col2, Vec4V& col3) +{ + const float32x4x2_t v0v1 = vzipq_f32(col0, col2); + const float32x4x2_t v2v3 = vzipq_f32(col1, col3); + const float32x4x2_t zip0 = vzipq_f32(v0v1.val[0], v2v3.val[0]); + const float32x4x2_t zip1 = vzipq_f32(v0v1.val[1], v2v3.val[1]); + col0 = zip0.val[0]; + col1 = zip0.val[1]; + col2 = zip1.val[0]; + col3 = zip1.val[1]; +} + +////////////////////////////////// +// VEC4V +////////////////////////////////// + +PX_FORCE_INLINE BoolV BFFFF() +{ + return vmovq_n_u32(0); +} + +PX_FORCE_INLINE BoolV BFFFT() +{ + const uint32x2_t zeros = vmov_n_u32(0); + const uint32x2_t ones = vmov_n_u32(0xffffFFFF); + const uint32x2_t zo = vext_u32(zeros, ones, 1); + return vcombine_u32(zeros, zo); +} + +PX_FORCE_INLINE BoolV BFFTF() +{ + const uint32x2_t zeros = vmov_n_u32(0); + const uint32x2_t ones = vmov_n_u32(0xffffFFFF); + const uint32x2_t oz = vext_u32(ones, zeros, 1); + return vcombine_u32(zeros, oz); +} + +PX_FORCE_INLINE BoolV BFFTT() +{ + const uint32x2_t zeros = vmov_n_u32(0); + const uint32x2_t ones = vmov_n_u32(0xffffFFFF); + return vcombine_u32(zeros, ones); +} + +PX_FORCE_INLINE BoolV BFTFF() +{ + const uint32x2_t zeros = vmov_n_u32(0); + const uint32x2_t ones = vmov_n_u32(0xffffFFFF); + const uint32x2_t zo = vext_u32(zeros, ones, 1); + return vcombine_u32(zo, zeros); +} + +PX_FORCE_INLINE BoolV BFTFT() +{ + const uint32x2_t zeros = vmov_n_u32(0); + const uint32x2_t ones = vmov_n_u32(0xffffFFFF); + const uint32x2_t zo = vext_u32(zeros, ones, 1); + return vcombine_u32(zo, zo); +} + +PX_FORCE_INLINE BoolV BFTTF() +{ + const uint32x2_t zeros = vmov_n_u32(0); + const uint32x2_t ones = vmov_n_u32(0xffffFFFF); + const uint32x2_t zo = vext_u32(zeros, ones, 1); + const uint32x2_t oz = vext_u32(ones, zeros, 1); + return vcombine_u32(zo, oz); +} + +PX_FORCE_INLINE BoolV BFTTT() +{ + const uint32x2_t zeros = vmov_n_u32(0); + const uint32x2_t ones = vmov_n_u32(0xffffFFFF); + const uint32x2_t zo = vext_u32(zeros, ones, 1); + return vcombine_u32(zo, ones); +} + +PX_FORCE_INLINE BoolV BTFFF() +{ + const uint32x2_t zeros = vmov_n_u32(0); + const uint32x2_t ones = vmov_n_u32(0xffffFFFF); + // const uint32x2_t zo = vext_u32(zeros, ones, 1); + const uint32x2_t oz = vext_u32(ones, zeros, 1); + return vcombine_u32(oz, zeros); +} + +PX_FORCE_INLINE BoolV BTFFT() +{ + const uint32x2_t zeros = vmov_n_u32(0); + const uint32x2_t ones = vmov_n_u32(0xffffFFFF); + const uint32x2_t zo = vext_u32(zeros, ones, 1); + const uint32x2_t oz = vext_u32(ones, zeros, 1); + return vcombine_u32(oz, zo); +} + +PX_FORCE_INLINE BoolV BTFTF() +{ + const uint32x2_t zeros = vmov_n_u32(0); + const uint32x2_t ones = vmov_n_u32(0xffffFFFF); + const uint32x2_t oz = vext_u32(ones, zeros, 1); + return vcombine_u32(oz, oz); +} + +PX_FORCE_INLINE BoolV BTFTT() +{ + const uint32x2_t zeros = vmov_n_u32(0); + const uint32x2_t ones = vmov_n_u32(0xffffFFFF); + const uint32x2_t oz = vext_u32(ones, zeros, 1); + return vcombine_u32(oz, ones); +} + +PX_FORCE_INLINE BoolV BTTFF() +{ + const uint32x2_t zeros = vmov_n_u32(0); + const uint32x2_t ones = vmov_n_u32(0xffffFFFF); + return vcombine_u32(ones, zeros); +} + +PX_FORCE_INLINE BoolV BTTFT() +{ + const uint32x2_t zeros = vmov_n_u32(0); + const uint32x2_t ones = vmov_n_u32(0xffffFFFF); + const uint32x2_t zo = vext_u32(zeros, ones, 1); + return vcombine_u32(ones, zo); +} + +PX_FORCE_INLINE BoolV BTTTF() +{ + const uint32x2_t zeros = vmov_n_u32(0); + const uint32x2_t ones = vmov_n_u32(0xffffFFFF); + const uint32x2_t oz = vext_u32(ones, zeros, 1); + return vcombine_u32(ones, oz); +} + +PX_FORCE_INLINE BoolV BTTTT() +{ + return vmovq_n_u32(0xffffFFFF); +} + +PX_FORCE_INLINE BoolV BXMask() +{ + return BTFFF(); +} + +PX_FORCE_INLINE BoolV BYMask() +{ + return BFTFF(); +} + +PX_FORCE_INLINE BoolV BZMask() +{ + return BFFTF(); +} + +PX_FORCE_INLINE BoolV BWMask() +{ + return BFFFT(); +} + +PX_FORCE_INLINE BoolV BGetX(const BoolV f) +{ + const uint32x2_t fLow = vget_low_u32(f); + return vdupq_lane_u32(fLow, 0); +} + +PX_FORCE_INLINE BoolV BGetY(const BoolV f) +{ + const uint32x2_t fLow = vget_low_u32(f); + return vdupq_lane_u32(fLow, 1); +} + +PX_FORCE_INLINE BoolV BGetZ(const BoolV f) +{ + const uint32x2_t fHigh = vget_high_u32(f); + return vdupq_lane_u32(fHigh, 0); +} + +PX_FORCE_INLINE BoolV BGetW(const BoolV f) +{ + const uint32x2_t fHigh = vget_high_u32(f); + return vdupq_lane_u32(fHigh, 1); +} + +PX_FORCE_INLINE BoolV BSetX(const BoolV v, const BoolV f) +{ + return vbslq_u32(BFTTT(), v, f); +} + +PX_FORCE_INLINE BoolV BSetY(const BoolV v, const BoolV f) +{ + return vbslq_u32(BTFTT(), v, f); +} + +PX_FORCE_INLINE BoolV BSetZ(const BoolV v, const BoolV f) +{ + return vbslq_u32(BTTFT(), v, f); +} + +PX_FORCE_INLINE BoolV BSetW(const BoolV v, const BoolV f) +{ + return vbslq_u32(BTTTF(), v, f); +} + +PX_FORCE_INLINE BoolV BAnd(const BoolV a, const BoolV b) +{ + return vandq_u32(a, b); +} + +PX_FORCE_INLINE BoolV BNot(const BoolV a) +{ + return vmvnq_u32(a); +} + +PX_FORCE_INLINE BoolV BAndNot(const BoolV a, const BoolV b) +{ + // return vbicq_u32(a, b); + return vandq_u32(a, vmvnq_u32(b)); +} + +PX_FORCE_INLINE BoolV BOr(const BoolV a, const BoolV b) +{ + return vorrq_u32(a, b); +} + +PX_FORCE_INLINE BoolV BAllTrue4(const BoolV a) +{ + const uint32x2_t allTrue = vmov_n_u32(0xffffFFFF); + const uint16x4_t dHigh = vget_high_u16(vreinterpretq_u16_u32(a)); + const uint16x4_t dLow = vmovn_u32(a); + uint16x8_t combined = vcombine_u16(dLow, dHigh); + const uint32x2_t finalReduce = vreinterpret_u32_u8(vmovn_u16(combined)); + const uint32x2_t result = vceq_u32(finalReduce, allTrue); + return vdupq_lane_u32(result, 0); +} + +PX_FORCE_INLINE BoolV BAnyTrue4(const BoolV a) +{ + const uint32x2_t allTrue = vmov_n_u32(0xffffFFFF); + const uint16x4_t dHigh = vget_high_u16(vreinterpretq_u16_u32(a)); + const uint16x4_t dLow = vmovn_u32(a); + uint16x8_t combined = vcombine_u16(dLow, dHigh); + const uint32x2_t finalReduce = vreinterpret_u32_u8(vmovn_u16(combined)); + const uint32x2_t result = vtst_u32(finalReduce, allTrue); + return vdupq_lane_u32(result, 0); +} + +PX_FORCE_INLINE BoolV BAllTrue3(const BoolV a) +{ + const uint32x2_t allTrue3 = vmov_n_u32(0x00ffFFFF); + const uint16x4_t dHigh = vget_high_u16(vreinterpretq_u16_u32(a)); + const uint16x4_t dLow = vmovn_u32(a); + uint16x8_t combined = vcombine_u16(dLow, dHigh); + const uint32x2_t finalReduce = vreinterpret_u32_u8(vmovn_u16(combined)); + const uint32x2_t result = vceq_u32(vand_u32(finalReduce, allTrue3), allTrue3); + return vdupq_lane_u32(result, 0); +} + +PX_FORCE_INLINE BoolV BAnyTrue3(const BoolV a) +{ + const uint32x2_t allTrue3 = vmov_n_u32(0x00ffFFFF); + const uint16x4_t dHigh = vget_high_u16(vreinterpretq_u16_u32(a)); + const uint16x4_t dLow = vmovn_u32(a); + uint16x8_t combined = vcombine_u16(dLow, dHigh); + const uint32x2_t finalReduce = vreinterpret_u32_u8(vmovn_u16(combined)); + const uint32x2_t result = vtst_u32(vand_u32(finalReduce, allTrue3), allTrue3); + return vdupq_lane_u32(result, 0); +} + +PX_FORCE_INLINE PxU32 BAllEq(const BoolV a, const BoolV b) +{ + const BoolV bTest = vceqq_u32(a, b); + return internalUnitNeonSimd::BAllTrue4_R(bTest); +} + +PX_FORCE_INLINE PxU32 BAllEqTTTT(const BoolV a) +{ + return BAllEq(a, BTTTT()); +} + +PX_FORCE_INLINE PxU32 BAllEqFFFF(const BoolV a) +{ + return BAllEq(a, BFFFF()); +} + +PX_FORCE_INLINE PxU32 BGetBitMask(const BoolV a) +{ + static PX_ALIGN(16, const PxU32) bitMaskData[4] = { 1, 2, 4, 8 }; + const uint32x4_t bitMask = *(reinterpret_cast(bitMaskData)); + const uint32x4_t t0 = vandq_u32(a, bitMask); + const uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0)); // Pairwise add (0 + 1), (2 + 3) + return PxU32(vget_lane_u32(vpadd_u32(t1, t1), 0)); +} + +////////////////////////////////// +// MAT33V +////////////////////////////////// + +PX_FORCE_INLINE Vec3V M33MulV3(const Mat33V& a, const Vec3V b) +{ + const FloatV x = V3GetX(b); + const FloatV y = V3GetY(b); + const FloatV z = V3GetZ(b); + const Vec3V v0 = V3Scale(a.col0, x); + const Vec3V v1 = V3Scale(a.col1, y); + const Vec3V v2 = V3Scale(a.col2, z); + const Vec3V v0PlusV1 = V3Add(v0, v1); + return V3Add(v0PlusV1, v2); +} + +PX_FORCE_INLINE Vec3V M33TrnspsMulV3(const Mat33V& a, const Vec3V b) +{ + const FloatV x = V3Dot(a.col0, b); + const FloatV y = V3Dot(a.col1, b); + const FloatV z = V3Dot(a.col2, b); + return V3Merge(x, y, z); +} + +PX_FORCE_INLINE Vec3V M33MulV3AddV3(const Mat33V& A, const Vec3V b, const Vec3V c) +{ + const FloatV x = V3GetX(b); + const FloatV y = V3GetY(b); + const FloatV z = V3GetZ(b); + Vec3V result = V3ScaleAdd(A.col0, x, c); + result = V3ScaleAdd(A.col1, y, result); + return V3ScaleAdd(A.col2, z, result); +} + +PX_FORCE_INLINE Mat33V M33MulM33(const Mat33V& a, const Mat33V& b) +{ + return Mat33V(M33MulV3(a, b.col0), M33MulV3(a, b.col1), M33MulV3(a, b.col2)); +} + +PX_FORCE_INLINE Mat33V M33Add(const Mat33V& a, const Mat33V& b) +{ + return Mat33V(V3Add(a.col0, b.col0), V3Add(a.col1, b.col1), V3Add(a.col2, b.col2)); +} + +PX_FORCE_INLINE Mat33V M33Scale(const Mat33V& a, const FloatV& b) +{ + return Mat33V(V3Scale(a.col0, b), V3Scale(a.col1, b), V3Scale(a.col2, b)); +} + +PX_FORCE_INLINE Mat33V M33Inverse(const Mat33V& a) +{ + const float32x2_t zeros = vreinterpret_f32_u32(vmov_n_u32(0)); + const BoolV btttf = BTTTF(); + + const Vec3V cross01 = V3Cross(a.col0, a.col1); + const Vec3V cross12 = V3Cross(a.col1, a.col2); + const Vec3V cross20 = V3Cross(a.col2, a.col0); + const FloatV dot = V3Dot(cross01, a.col2); + const FloatV invDet = FRecipFast(dot); + + const float32x4x2_t merge = vzipq_f32(cross12, cross01); + const float32x4_t mergeh = merge.val[0]; + const float32x4_t mergel = merge.val[1]; + + // const Vec3V colInv0 = XMVectorPermute(mergeh,cross20,PxPermuteControl(0,4,1,7)); + const float32x4_t colInv0_xxyy = vzipq_f32(mergeh, cross20).val[0]; + const float32x4_t colInv0 = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(colInv0_xxyy), btttf)); + + // const Vec3V colInv1 = XMVectorPermute(mergeh,cross20,PxPermuteControl(2,5,3,7)); + const float32x2_t zw0 = vget_high_f32(mergeh); + const float32x2_t xy1 = vget_low_f32(cross20); + const float32x2_t yzero1 = vext_f32(xy1, zeros, 1); + const float32x2x2_t merge1 = vzip_f32(zw0, yzero1); + const float32x4_t colInv1 = vcombine_f32(merge1.val[0], merge1.val[1]); + + // const Vec3V colInv2 = XMVectorPermute(mergel,cross20,PxPermuteControl(0,6,1,7)); + const float32x2_t x0y0 = vget_low_f32(mergel); + const float32x2_t z1w1 = vget_high_f32(cross20); + const float32x2x2_t merge2 = vzip_f32(x0y0, z1w1); + const float32x4_t colInv2 = vcombine_f32(merge2.val[0], merge2.val[1]); + + return Mat33V(vmulq_lane_f32(colInv0, invDet, 0), vmulq_lane_f32(colInv1, invDet, 0), + vmulq_lane_f32(colInv2, invDet, 0)); +} + +PX_FORCE_INLINE Mat33V M33Trnsps(const Mat33V& a) +{ + return Mat33V(V3Merge(V3GetX(a.col0), V3GetX(a.col1), V3GetX(a.col2)), + V3Merge(V3GetY(a.col0), V3GetY(a.col1), V3GetY(a.col2)), + V3Merge(V3GetZ(a.col0), V3GetZ(a.col1), V3GetZ(a.col2))); +} + +PX_FORCE_INLINE Mat33V M33Identity() +{ + return Mat33V(V3UnitX(), V3UnitY(), V3UnitZ()); +} + +PX_FORCE_INLINE Mat33V M33Sub(const Mat33V& a, const Mat33V& b) +{ + return Mat33V(V3Sub(a.col0, b.col0), V3Sub(a.col1, b.col1), V3Sub(a.col2, b.col2)); +} + +PX_FORCE_INLINE Mat33V M33Neg(const Mat33V& a) +{ + return Mat33V(V3Neg(a.col0), V3Neg(a.col1), V3Neg(a.col2)); +} + +PX_FORCE_INLINE Mat33V M33Abs(const Mat33V& a) +{ + return Mat33V(V3Abs(a.col0), V3Abs(a.col1), V3Abs(a.col2)); +} + +PX_FORCE_INLINE Mat33V PromoteVec3V(const Vec3V v) +{ + const BoolV bTFFF = BTFFF(); + const BoolV bFTFF = BFTFF(); + const BoolV bFFTF = BTFTF(); + + const Vec3V zero = V3Zero(); + + return Mat33V(V3Sel(bTFFF, v, zero), V3Sel(bFTFF, v, zero), V3Sel(bFFTF, v, zero)); +} + +PX_FORCE_INLINE Mat33V M33Diagonal(const Vec3VArg d) +{ + const Vec3V x = V3Mul(V3UnitX(), d); + const Vec3V y = V3Mul(V3UnitY(), d); + const Vec3V z = V3Mul(V3UnitZ(), d); + return Mat33V(x, y, z); +} + +////////////////////////////////// +// MAT34V +////////////////////////////////// + +PX_FORCE_INLINE Vec3V M34MulV3(const Mat34V& a, const Vec3V b) +{ + const FloatV x = V3GetX(b); + const FloatV y = V3GetY(b); + const FloatV z = V3GetZ(b); + const Vec3V v0 = V3Scale(a.col0, x); + const Vec3V v1 = V3Scale(a.col1, y); + const Vec3V v2 = V3Scale(a.col2, z); + const Vec3V v0PlusV1 = V3Add(v0, v1); + const Vec3V v0PlusV1Plusv2 = V3Add(v0PlusV1, v2); + return V3Add(v0PlusV1Plusv2, a.col3); +} + +PX_FORCE_INLINE Vec3V M34Mul33V3(const Mat34V& a, const Vec3V b) +{ + const FloatV x = V3GetX(b); + const FloatV y = V3GetY(b); + const FloatV z = V3GetZ(b); + const Vec3V v0 = V3Scale(a.col0, x); + const Vec3V v1 = V3Scale(a.col1, y); + const Vec3V v2 = V3Scale(a.col2, z); + const Vec3V v0PlusV1 = V3Add(v0, v1); + return V3Add(v0PlusV1, v2); +} + +PX_FORCE_INLINE Vec3V M34TrnspsMul33V3(const Mat34V& a, const Vec3V b) +{ + const FloatV x = V3Dot(a.col0, b); + const FloatV y = V3Dot(a.col1, b); + const FloatV z = V3Dot(a.col2, b); + return V3Merge(x, y, z); +} + +PX_FORCE_INLINE Mat34V M34MulM34(const Mat34V& a, const Mat34V& b) +{ + return Mat34V(M34Mul33V3(a, b.col0), M34Mul33V3(a, b.col1), M34Mul33V3(a, b.col2), M34MulV3(a, b.col3)); +} + +PX_FORCE_INLINE Mat33V M34MulM33(const Mat34V& a, const Mat33V& b) +{ + return Mat33V(M34Mul33V3(a, b.col0), M34Mul33V3(a, b.col1), M34Mul33V3(a, b.col2)); +} + +PX_FORCE_INLINE Mat33V M34Mul33MM34(const Mat34V& a, const Mat34V& b) +{ + return Mat33V(M34Mul33V3(a, b.col0), M34Mul33V3(a, b.col1), M34Mul33V3(a, b.col2)); +} + +PX_FORCE_INLINE Mat34V M34Add(const Mat34V& a, const Mat34V& b) +{ + return Mat34V(V3Add(a.col0, b.col0), V3Add(a.col1, b.col1), V3Add(a.col2, b.col2), V3Add(a.col3, b.col3)); +} + +PX_FORCE_INLINE Mat33V M34Trnsps33(const Mat34V& a) +{ + return Mat33V(V3Merge(V3GetX(a.col0), V3GetX(a.col1), V3GetX(a.col2)), + V3Merge(V3GetY(a.col0), V3GetY(a.col1), V3GetY(a.col2)), + V3Merge(V3GetZ(a.col0), V3GetZ(a.col1), V3GetZ(a.col2))); +} + +////////////////////////////////// +// MAT44V +////////////////////////////////// + +PX_FORCE_INLINE Vec4V M44MulV4(const Mat44V& a, const Vec4V b) +{ + const FloatV x = V4GetX(b); + const FloatV y = V4GetY(b); + const FloatV z = V4GetZ(b); + const FloatV w = V4GetW(b); + + const Vec4V v0 = V4Scale(a.col0, x); + const Vec4V v1 = V4Scale(a.col1, y); + const Vec4V v2 = V4Scale(a.col2, z); + const Vec4V v3 = V4Scale(a.col3, w); + const Vec4V v0PlusV1 = V4Add(v0, v1); + const Vec4V v0PlusV1Plusv2 = V4Add(v0PlusV1, v2); + return V4Add(v0PlusV1Plusv2, v3); +} + +PX_FORCE_INLINE Vec4V M44TrnspsMulV4(const Mat44V& a, const Vec4V b) +{ + return V4Merge(V4Dot(a.col0, b), V4Dot(a.col1, b), V4Dot(a.col2, b), V4Dot(a.col3, b)); +} + +PX_FORCE_INLINE Mat44V M44MulM44(const Mat44V& a, const Mat44V& b) +{ + return Mat44V(M44MulV4(a, b.col0), M44MulV4(a, b.col1), M44MulV4(a, b.col2), M44MulV4(a, b.col3)); +} + +PX_FORCE_INLINE Mat44V M44Add(const Mat44V& a, const Mat44V& b) +{ + return Mat44V(V4Add(a.col0, b.col0), V4Add(a.col1, b.col1), V4Add(a.col2, b.col2), V4Add(a.col3, b.col3)); +} + +PX_FORCE_INLINE Mat44V M44Trnsps(const Mat44V& a) +{ + // asm volatile( + // "vzip.f32 %q0, %q2 \n\t" + // "vzip.f32 %q1, %q3 \n\t" + // "vzip.f32 %q0, %q1 \n\t" + // "vzip.f32 %q2, %q3 \n\t" + // : "+w" (a.col0), "+w" (a.col1), "+w" (a.col2), "+w" a.col3)); + + const float32x4x2_t v0v1 = vzipq_f32(a.col0, a.col2); + const float32x4x2_t v2v3 = vzipq_f32(a.col1, a.col3); + const float32x4x2_t zip0 = vzipq_f32(v0v1.val[0], v2v3.val[0]); + const float32x4x2_t zip1 = vzipq_f32(v0v1.val[1], v2v3.val[1]); + + return Mat44V(zip0.val[0], zip0.val[1], zip1.val[0], zip1.val[1]); +} + +PX_FORCE_INLINE Mat44V M44Inverse(const Mat44V& a) +{ + float32x4_t minor0, minor1, minor2, minor3; + float32x4_t row0, row1, row2, row3; + float32x4_t det, tmp1; + + tmp1 = vmovq_n_f32(0.0f); + row1 = vmovq_n_f32(0.0f); + row3 = vmovq_n_f32(0.0f); + + row0 = a.col0; + row1 = vextq_f32(a.col1, a.col1, 2); + row2 = a.col2; + row3 = vextq_f32(a.col3, a.col3, 2); + + tmp1 = vmulq_f32(row2, row3); + tmp1 = vrev64q_f32(tmp1); + minor0 = vmulq_f32(row1, tmp1); + minor1 = vmulq_f32(row0, tmp1); + tmp1 = vextq_f32(tmp1, tmp1, 2); + minor0 = vsubq_f32(vmulq_f32(row1, tmp1), minor0); + minor1 = vsubq_f32(vmulq_f32(row0, tmp1), minor1); + minor1 = vextq_f32(minor1, minor1, 2); + + tmp1 = vmulq_f32(row1, row2); + tmp1 = vrev64q_f32(tmp1); + minor0 = vaddq_f32(vmulq_f32(row3, tmp1), minor0); + minor3 = vmulq_f32(row0, tmp1); + tmp1 = vextq_f32(tmp1, tmp1, 2); + minor0 = vsubq_f32(minor0, vmulq_f32(row3, tmp1)); + minor3 = vsubq_f32(vmulq_f32(row0, tmp1), minor3); + minor3 = vextq_f32(minor3, minor3, 2); + + tmp1 = vmulq_f32(vextq_f32(row1, row1, 2), row3); + tmp1 = vrev64q_f32(tmp1); + row2 = vextq_f32(row2, row2, 2); + minor0 = vaddq_f32(vmulq_f32(row2, tmp1), minor0); + minor2 = vmulq_f32(row0, tmp1); + tmp1 = vextq_f32(tmp1, tmp1, 2); + minor0 = vsubq_f32(minor0, vmulq_f32(row2, tmp1)); + minor2 = vsubq_f32(vmulq_f32(row0, tmp1), minor2); + minor2 = vextq_f32(minor2, minor2, 2); + + tmp1 = vmulq_f32(row0, row1); + tmp1 = vrev64q_f32(tmp1); + minor2 = vaddq_f32(vmulq_f32(row3, tmp1), minor2); + minor3 = vsubq_f32(vmulq_f32(row2, tmp1), minor3); + tmp1 = vextq_f32(tmp1, tmp1, 2); + minor2 = vsubq_f32(vmulq_f32(row3, tmp1), minor2); + minor3 = vsubq_f32(minor3, vmulq_f32(row2, tmp1)); + + tmp1 = vmulq_f32(row0, row3); + tmp1 = vrev64q_f32(tmp1); + minor1 = vsubq_f32(minor1, vmulq_f32(row2, tmp1)); + minor2 = vaddq_f32(vmulq_f32(row1, tmp1), minor2); + tmp1 = vextq_f32(tmp1, tmp1, 2); + minor1 = vaddq_f32(vmulq_f32(row2, tmp1), minor1); + minor2 = vsubq_f32(minor2, vmulq_f32(row1, tmp1)); + + tmp1 = vmulq_f32(row0, row2); + tmp1 = vrev64q_f32(tmp1); + minor1 = vaddq_f32(vmulq_f32(row3, tmp1), minor1); + minor3 = vsubq_f32(minor3, vmulq_f32(row1, tmp1)); + tmp1 = vextq_f32(tmp1, tmp1, 2); + minor1 = vsubq_f32(minor1, vmulq_f32(row3, tmp1)); + minor3 = vaddq_f32(vmulq_f32(row1, tmp1), minor3); + + det = vmulq_f32(row0, minor0); + det = vaddq_f32(vextq_f32(det, det, 2), det); + det = vaddq_f32(vrev64q_f32(det), det); + det = vdupq_lane_f32(VRECIPE(vget_low_f32(det)), 0); + + minor0 = vmulq_f32(det, minor0); + minor1 = vmulq_f32(det, minor1); + minor2 = vmulq_f32(det, minor2); + minor3 = vmulq_f32(det, minor3); + Mat44V invTrans(minor0, minor1, minor2, minor3); + return M44Trnsps(invTrans); +} + +PX_FORCE_INLINE Vec4V V4LoadXYZW(const PxF32& x, const PxF32& y, const PxF32& z, const PxF32& w) +{ + const float f4[4] = { x, y, z, w }; + const float32x4_t ret = vld1q_f32(f4); + return ret; +} + +/* +PX_FORCE_INLINE VecU16V V4U32PK(VecU32V a, VecU32V b) +{ + return vcombine_u16(vqmovn_u32(a), vqmovn_u32(b)); +} +*/ + +PX_FORCE_INLINE VecU32V V4U32Sel(const BoolV c, const VecU32V a, const VecU32V b) +{ + return vbslq_u32(c, a, b); +} + +PX_FORCE_INLINE VecU32V V4U32or(VecU32V a, VecU32V b) +{ + return vorrq_u32(a, b); +} + +PX_FORCE_INLINE VecU32V V4U32xor(VecU32V a, VecU32V b) +{ + return veorq_u32(a, b); +} + +PX_FORCE_INLINE VecU32V V4U32and(VecU32V a, VecU32V b) +{ + return vandq_u32(a, b); +} + +PX_FORCE_INLINE VecU32V V4U32Andc(VecU32V a, VecU32V b) +{ + // return vbicq_u32(a, b); // creates gcc compiler bug in RTreeQueries.cpp + return vandq_u32(a, vmvnq_u32(b)); +} + +/* +PX_FORCE_INLINE VecU16V V4U16Or(VecU16V a, VecU16V b) +{ + return vorrq_u16(a, b); +} +*/ + +/* +PX_FORCE_INLINE VecU16V V4U16And(VecU16V a, VecU16V b) +{ + return vandq_u16(a, b); +} +*/ +/* +PX_FORCE_INLINE VecU16V V4U16Andc(VecU16V a, VecU16V b) +{ + return vbicq_u16(a, b); +} +*/ + +PX_FORCE_INLINE VecI32V I4LoadXYZW(const PxI32& x, const PxI32& y, const PxI32& z, const PxI32& w) +{ + const int32_t i4[4] = { x, y, z, w }; + const int32x4_t ret = vld1q_s32(i4); + return ret; +} + +PX_FORCE_INLINE VecI32V I4Load(const PxI32 i) +{ + return vdupq_n_s32(i); +} + +PX_FORCE_INLINE VecI32V I4LoadU(const PxI32* i) +{ + return vld1q_s32(i); +} + +PX_FORCE_INLINE VecI32V I4LoadA(const PxI32* i) +{ + return vld1q_s32(i); +} + +PX_FORCE_INLINE VecI32V VecI32V_Add(const VecI32VArg a, const VecI32VArg b) +{ + return vaddq_s32(a, b); +} + +PX_FORCE_INLINE VecI32V VecI32V_Sub(const VecI32VArg a, const VecI32VArg b) +{ + return vsubq_s32(a, b); +} + +PX_FORCE_INLINE BoolV VecI32V_IsGrtr(const VecI32VArg a, const VecI32VArg b) +{ + return vcgtq_s32(a, b); +} + +PX_FORCE_INLINE BoolV VecI32V_IsEq(const VecI32VArg a, const VecI32VArg b) +{ + return vceqq_s32(a, b); +} + +PX_FORCE_INLINE VecI32V V4I32Sel(const BoolV c, const VecI32V a, const VecI32V b) +{ + return vbslq_s32(c, a, b); +} + +PX_FORCE_INLINE VecI32V VecI32V_Zero() +{ + return vdupq_n_s32(0); +} + +PX_FORCE_INLINE VecI32V VecI32V_One() +{ + return vdupq_n_s32(1); +} + +PX_FORCE_INLINE VecI32V VecI32V_Two() +{ + return vdupq_n_s32(2); +} + +PX_FORCE_INLINE VecI32V VecI32V_MinusOne() +{ + return vdupq_n_s32(-1); +} + +PX_FORCE_INLINE VecU32V U4Zero() +{ + return U4Load(0); +} + +PX_FORCE_INLINE VecU32V U4One() +{ + return U4Load(1); +} + +PX_FORCE_INLINE VecU32V U4Two() +{ + return U4Load(2); +} + +PX_FORCE_INLINE VecShiftV VecI32V_PrepareShift(const VecI32VArg shift) +{ + return shift; +} + +PX_FORCE_INLINE VecI32V VecI32V_LeftShift(const VecI32VArg a, const VecShiftVArg count) +{ + return vshlq_s32(a, count); +} + +PX_FORCE_INLINE VecI32V VecI32V_RightShift(const VecI32VArg a, const VecShiftVArg count) +{ + return vshlq_s32(a, VecI32V_Sub(I4Load(0), count)); +} + +PX_FORCE_INLINE VecI32V VecI32V_LeftShift(const VecI32VArg a, const PxU32 count) +{ + const PxI32 i = (PxI32)count; + const int32x4_t shiftCount = vld1q_dup_s32(&i); + return vshlq_s32(a, shiftCount); +} + +PX_FORCE_INLINE VecI32V VecI32V_RightShift(const VecI32VArg a, const PxU32 count) +{ + const PxI32 i = -(PxI32)count; + const int32x4_t shiftCount = vld1q_dup_s32(&i); + return vshlq_s32(a, shiftCount); +} + +PX_FORCE_INLINE VecI32V VecI32V_And(const VecI32VArg a, const VecI32VArg b) +{ + return vandq_s32(a, b); +} + +PX_FORCE_INLINE VecI32V VecI32V_Or(const VecI32VArg a, const VecI32VArg b) +{ + return vorrq_s32(a, b); +} + +PX_FORCE_INLINE VecI32V VecI32V_GetX(const VecI32VArg f) +{ + const int32x2_t fLow = vget_low_s32(f); + return vdupq_lane_s32(fLow, 0); +} + +PX_FORCE_INLINE VecI32V VecI32V_GetY(const VecI32VArg f) +{ + const int32x2_t fLow = vget_low_s32(f); + return vdupq_lane_s32(fLow, 1); +} + +PX_FORCE_INLINE VecI32V VecI32V_GetZ(const VecI32VArg f) +{ + const int32x2_t fHigh = vget_high_s32(f); + return vdupq_lane_s32(fHigh, 0); +} + +PX_FORCE_INLINE VecI32V VecI32V_GetW(const VecI32VArg f) +{ + const int32x2_t fHigh = vget_high_s32(f); + return vdupq_lane_s32(fHigh, 1); +} + +PX_FORCE_INLINE VecI32V VecI32V_Sel(const BoolV c, const VecI32VArg a, const VecI32VArg b) +{ + return vbslq_s32(c, a, b); +} + +PX_FORCE_INLINE void PxI32_From_VecI32V(const VecI32VArg a, PxI32* i) +{ + *i = vgetq_lane_s32(a, 0); +} + +PX_FORCE_INLINE VecI32V VecI32V_Merge(const VecI32VArg a, const VecI32VArg b, const VecI32VArg c, const VecI32VArg d) +{ + const int32x2_t aLow = vget_low_s32(a); + const int32x2_t bLow = vget_low_s32(b); + const int32x2_t cLow = vget_low_s32(c); + const int32x2_t dLow = vget_low_s32(d); + + const int32x2_t low = vext_s32(aLow, bLow, 1); + const int32x2_t high = vext_s32(cLow, dLow, 1); + + return vcombine_s32(low, high); +} + +PX_FORCE_INLINE VecI32V VecI32V_From_BoolV(const BoolVArg a) +{ + return vreinterpretq_s32_u32(a); +} + +PX_FORCE_INLINE VecU32V VecU32V_From_BoolV(const BoolVArg a) +{ + return a; +} + +/* +template PX_FORCE_INLINE VecI32V V4ISplat() +{ + return vdupq_n_s32(a); +} + +template PX_FORCE_INLINE VecU32V V4USplat() +{ + return vdupq_n_u32(a); +} +*/ + +/* +PX_FORCE_INLINE void V4U16StoreAligned(VecU16V val, VecU16V* address) +{ + vst1q_u16((uint16_t*)address, val); +} +*/ + +PX_FORCE_INLINE void V4U32StoreAligned(VecU32V val, VecU32V* address) +{ + vst1q_u32(reinterpret_cast(address), val); +} + +PX_FORCE_INLINE Vec4V V4LoadAligned(Vec4V* addr) +{ + return vld1q_f32(reinterpret_cast(addr)); +} + +PX_FORCE_INLINE Vec4V V4LoadUnaligned(Vec4V* addr) +{ + return vld1q_f32(reinterpret_cast(addr)); +} + +PX_FORCE_INLINE Vec4V V4Andc(const Vec4V a, const VecU32V b) +{ + return vreinterpretq_f32_u32(V4U32Andc(vreinterpretq_u32_f32(a), b)); +} + +PX_FORCE_INLINE VecU32V V4IsGrtrV32u(const Vec4V a, const Vec4V b) +{ + return V4IsGrtr(a, b); +} + +PX_FORCE_INLINE VecU16V V4U16LoadAligned(VecU16V* addr) +{ + return vld1q_u16(reinterpret_cast(addr)); +} + +PX_FORCE_INLINE VecU16V V4U16LoadUnaligned(VecU16V* addr) +{ + return vld1q_u16(reinterpret_cast(addr)); +} + +PX_FORCE_INLINE VecU16V V4U16CompareGt(VecU16V a, VecU16V b) +{ + return vcgtq_u16(a, b); +} + +PX_FORCE_INLINE VecU16V V4I16CompareGt(VecI16V a, VecI16V b) +{ + return vcgtq_s16(a, b); +} + +PX_FORCE_INLINE Vec4V Vec4V_From_VecU32V(VecU32V a) +{ + return vcvtq_f32_u32(a); +} + +PX_FORCE_INLINE Vec4V Vec4V_From_VecI32V(VecI32V a) +{ + return vcvtq_f32_s32(a); +} + +PX_FORCE_INLINE VecI32V VecI32V_From_Vec4V(Vec4V a) +{ + return vcvtq_s32_f32(a); +} + +PX_FORCE_INLINE Vec4V Vec4V_ReinterpretFrom_VecU32V(VecU32V a) +{ + return vreinterpretq_f32_u32(a); +} + +PX_FORCE_INLINE Vec4V Vec4V_ReinterpretFrom_VecI32V(VecI32V a) +{ + return vreinterpretq_f32_s32(a); +} + +PX_FORCE_INLINE VecU32V VecU32V_ReinterpretFrom_Vec4V(Vec4V a) +{ + return vreinterpretq_u32_f32(a); +} + +PX_FORCE_INLINE VecI32V VecI32V_ReinterpretFrom_Vec4V(Vec4V a) +{ + return vreinterpretq_s32_f32(a); +} + +#if !PX_SWITCH +template +PX_FORCE_INLINE BoolV BSplatElement(BoolV a) +{ + if(index < 2) + { + return vdupq_lane_u32(vget_low_u32(a), index); + } + else if(index == 2) + { + return vdupq_lane_u32(vget_high_u32(a), 0); + } + else if(index == 3) + { + return vdupq_lane_u32(vget_high_u32(a), 1); + } +} +#else +//workaround for template compile issue +template PX_FORCE_INLINE BoolV BSplatElement(BoolV a); +template<> PX_FORCE_INLINE BoolV BSplatElement<0>(BoolV a) { return vdupq_lane_u32(vget_low_u32(a), 0); } +template<> PX_FORCE_INLINE BoolV BSplatElement<1>(BoolV a) { return vdupq_lane_u32(vget_low_u32(a), 1); } +template<> PX_FORCE_INLINE BoolV BSplatElement<2>(BoolV a) { return vdupq_lane_u32(vget_high_u32(a), 0); } +template<> PX_FORCE_INLINE BoolV BSplatElement<3>(BoolV a) { return vdupq_lane_u32(vget_high_u32(a), 1); } +#endif + +#if !PX_SWITCH +template +PX_FORCE_INLINE VecU32V V4U32SplatElement(VecU32V a) +{ + if(index < 2) + { + return vdupq_lane_u32(vget_low_u32(a), index); + } + else if(index == 2) + { + return vdupq_lane_u32(vget_high_u32(a), 0); + } + else if(index == 3) + { + return vdupq_lane_u32(vget_high_u32(a), 1); + } +} +#else +//workaround for template compile issue +template PX_FORCE_INLINE VecU32V V4U32SplatElement(VecU32V a); +template <> PX_FORCE_INLINE VecU32V V4U32SplatElement<0>(VecU32V a) { return vdupq_lane_u32(vget_low_u32(a), 0); } +template <> PX_FORCE_INLINE VecU32V V4U32SplatElement<1>(VecU32V a) { return vdupq_lane_u32(vget_low_u32(a), 1); } +template <> PX_FORCE_INLINE VecU32V V4U32SplatElement<2>(VecU32V a) { return vdupq_lane_u32(vget_high_u32(a), 0); } +template <> PX_FORCE_INLINE VecU32V V4U32SplatElement<3>(VecU32V a) { return vdupq_lane_u32(vget_high_u32(a), 1); } +#endif + +#if !PX_SWITCH +template +PX_FORCE_INLINE Vec4V V4SplatElement(Vec4V a) +{ + if(index == 0) + { + return vdupq_lane_f32(vget_low_f32(a), 0); + } + else if (index == 1) + { + return vdupq_lane_f32(vget_low_f32(a), 1); + } + else if(index == 2) + { + return vdupq_lane_f32(vget_high_f32(a), 0); + } + else if(index == 3) + { + return vdupq_lane_f32(vget_high_f32(a), 1); + } +} +#else +//workaround for template compile issue +template PX_FORCE_INLINE Vec4V V4SplatElement(Vec4V a); +template <> PX_FORCE_INLINE Vec4V V4SplatElement<0>(Vec4V a) { return vdupq_lane_f32(vget_low_f32(a), 0); } +template <> PX_FORCE_INLINE Vec4V V4SplatElement<1>(Vec4V a) { return vdupq_lane_f32(vget_low_f32(a), 1); } +template <> PX_FORCE_INLINE Vec4V V4SplatElement<2>(Vec4V a) { return vdupq_lane_f32(vget_high_f32(a), 0); } +template <> PX_FORCE_INLINE Vec4V V4SplatElement<3>(Vec4V a) { return vdupq_lane_f32(vget_high_f32(a), 1); } +#endif + +PX_FORCE_INLINE VecU32V U4LoadXYZW(PxU32 x, PxU32 y, PxU32 z, PxU32 w) +{ + const uint32_t u4[4] = { x, y, z, w }; + const uint32x4_t ret = vld1q_u32(u4); + return ret; +} + +PX_FORCE_INLINE VecU32V U4Load(const PxU32 i) +{ + return vdupq_n_u32(i); +} + +PX_FORCE_INLINE VecU32V U4LoadU(const PxU32* i) +{ + return vld1q_u32(i); +} + +PX_FORCE_INLINE VecU32V U4LoadA(const PxU32* i) +{ + return vld1q_u32(i); +} + +PX_FORCE_INLINE Vec4V V4Ceil(const Vec4V in) +{ + const float32x4_t ones = vdupq_n_f32(1.0f); + const float32x4_t rdToZero = vcvtq_f32_s32(vcvtq_s32_f32(in)); + const float32x4_t rdToZeroPlusOne = vaddq_f32(rdToZero, ones); + const uint32x4_t gt = vcgtq_f32(in, rdToZero); + return vbslq_f32(gt, rdToZeroPlusOne, rdToZero); +} + +PX_FORCE_INLINE Vec4V V4Floor(const Vec4V in) +{ + const float32x4_t ones = vdupq_n_f32(1.0f); + const float32x4_t rdToZero = vcvtq_f32_s32(vcvtq_s32_f32(in)); + const float32x4_t rdToZeroMinusOne = vsubq_f32(rdToZero, ones); + const uint32x4_t lt = vcltq_f32(in, rdToZero); + return vbslq_f32(lt, rdToZeroMinusOne, rdToZero); +} + +PX_FORCE_INLINE VecU32V V4ConvertToU32VSaturate(const Vec4V in, PxU32 power) +{ + PX_ASSERT(power == 0 && "Non-zero power not supported in convertToU32VSaturate"); + PX_UNUSED(power); // prevent warning in release builds + + return vcvtq_u32_f32(in); +} + +PX_FORCE_INLINE void QuatGetMat33V(const QuatVArg q, Vec3V& column0, Vec3V& column1, Vec3V& column2) +{ + const FloatV one = FOne(); + const FloatV x = V4GetX(q); + const FloatV y = V4GetY(q); + const FloatV z = V4GetZ(q); + const FloatV w = V4GetW(q); + + const FloatV x2 = FAdd(x, x); + const FloatV y2 = FAdd(y, y); + const FloatV z2 = FAdd(z, z); + + const FloatV xx = FMul(x2, x); + const FloatV yy = FMul(y2, y); + const FloatV zz = FMul(z2, z); + + const FloatV xy = FMul(x2, y); + const FloatV xz = FMul(x2, z); + const FloatV xw = FMul(x2, w); + + const FloatV yz = FMul(y2, z); + const FloatV yw = FMul(y2, w); + const FloatV zw = FMul(z2, w); + + const FloatV v = FSub(one, xx); + + column0 = V3Merge(FSub(FSub(one, yy), zz), FAdd(xy, zw), FSub(xz, yw)); + column1 = V3Merge(FSub(xy, zw), FSub(v, zz), FAdd(yz, xw)); + column2 = V3Merge(FAdd(xz, yw), FSub(yz, xw), FSub(v, yy)); +} + +} // namespace aos +} // namespace physx + +#endif // PXFOUNDATION_PXWINDOWSNEONINLINEAOS_H diff --git a/Source/ThirdParty/PhysX/geomutils/PxContactPoint.h b/Source/ThirdParty/PhysX/geomutils/PxContactPoint.h index 9630bf255..cdd884e21 100644 --- a/Source/ThirdParty/PhysX/geomutils/PxContactPoint.h +++ b/Source/ThirdParty/PhysX/geomutils/PxContactPoint.h @@ -31,6 +31,11 @@ #include "foundation/PxVec3.h" +#if PX_VC +#pragma warning(push) +#pragma warning(disable : 4324) // structure was padded due to alignment specifier +#endif + #if !PX_DOXYGEN namespace physx { @@ -97,4 +102,8 @@ namespace physx } // namespace physx #endif +#if PX_VC +#pragma warning(pop) +#endif + #endif diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/PhysX.cs b/Source/Tools/Flax.Build/Deps/Dependencies/PhysX.cs index 734ba3929..f59fac426 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/PhysX.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/PhysX.cs @@ -92,6 +92,14 @@ namespace Flax.Deps.Dependencies var cmakeParams = presetXml["preset"]["CMakeParams"]; switch (targetPlatform) { + case TargetPlatform.Windows: + if (architecture == TargetArchitecture.ARM64) + { + // Windows ARM64 doesn't have GPU support, so avoid copying those DLLs around + ConfigureCmakeSwitch(cmakeSwitches, "PX_COPY_EXTERNAL_DLL", "OFF"); + ConfigureCmakeSwitch(cmakeParams, "PX_COPY_EXTERNAL_DLL", "OFF"); + } + break; case TargetPlatform.Android: ConfigureCmakeSwitch(cmakeParams, "CMAKE_INSTALL_PREFIX", $"install/android-{Configuration.AndroidPlatformApi}/PhysX"); ConfigureCmakeSwitch(cmakeParams, "ANDROID_NATIVE_API_LEVEL", $"android-{Configuration.AndroidPlatformApi}"); @@ -241,6 +249,12 @@ namespace Flax.Deps.Dependencies envVars.Add("PM_ANDROIDNDK_PATH", AndroidNdk.Instance.RootPath); } + // Update packman for old PhysX version (https://github.com/NVIDIA-Omniverse/PhysX/issues/229) + if (BuildPlatform == TargetPlatform.Windows) + Utilities.Run(Path.Combine(projectGenDir, "buildtools", "packman", "packman.cmd"), "update -y"); + else + Utilities.Run(Path.Combine(projectGenDir, "buildtools", "packman", "packman"), "update -y"); + // Print the PhysX version Log.Info("Building PhysX version " + File.ReadAllText(Path.Combine(root, "physx", "version.txt")) + " to " + binariesSubDir); @@ -375,6 +389,7 @@ namespace Flax.Deps.Dependencies case TargetPlatform.Windows: { Build(options, "vc17win64", platform, TargetArchitecture.x64); + Build(options, "vc17win-arm64", platform, TargetArchitecture.ARM64); break; } case TargetPlatform.Linux: From 24c645d8d2417af5da9f03c12e10661c3c1cbb7c Mon Sep 17 00:00:00 2001 From: Ari Vuollet Date: Thu, 9 May 2024 20:25:16 +0300 Subject: [PATCH 115/292] Build NvCloth for Windows on ARM --- Source/ThirdParty/NvCloth/ps/PsMathUtils.h | 2 +- Source/ThirdParty/NvCloth/ps/windows/PsWindowsIntrinsics.h | 2 +- Source/Tools/Flax.Build/Deps/Dependencies/NvCloth.cs | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Source/ThirdParty/NvCloth/ps/PsMathUtils.h b/Source/ThirdParty/NvCloth/ps/PsMathUtils.h index b2c4602c0..123114598 100644 --- a/Source/ThirdParty/NvCloth/ps/PsMathUtils.h +++ b/Source/ThirdParty/NvCloth/ps/PsMathUtils.h @@ -313,7 +313,7 @@ PX_FORCE_INLINE physx::PxI32 rand(const physx::PxI32 a, const physx::PxI32 b) */ PX_FORCE_INLINE physx::PxF32 rand(const physx::PxF32 a, const physx::PxF32 b) { - return a + (b - a) * ::rand() / RAND_MAX; + return a + (b - a) * ::rand() / (physx::PxF32)RAND_MAX; } //! \brief return angle between two vectors in radians diff --git a/Source/ThirdParty/NvCloth/ps/windows/PsWindowsIntrinsics.h b/Source/ThirdParty/NvCloth/ps/windows/PsWindowsIntrinsics.h index 5eaaac40f..f32b3802d 100644 --- a/Source/ThirdParty/NvCloth/ps/windows/PsWindowsIntrinsics.h +++ b/Source/ThirdParty/NvCloth/ps/windows/PsWindowsIntrinsics.h @@ -118,7 +118,7 @@ PX_FORCE_INLINE uint32_t PxCountLeadingZeros(uint32_t v) /*! Prefetch aligned cache size around \c ptr+offset. */ -#if !PX_ARM +#if !PX_ARM && !PX_A64 PX_FORCE_INLINE void PxPrefetchLine(const void* ptr, uint32_t offset = 0) { // cache line on X86/X64 is 64-bytes so a 128-byte prefetch would require 2 prefetches. diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/NvCloth.cs b/Source/Tools/Flax.Build/Deps/Dependencies/NvCloth.cs index 110e26f81..80852fb61 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/NvCloth.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/NvCloth.cs @@ -65,6 +65,7 @@ namespace Flax.Deps.Dependencies { case TargetPlatform.Windows: Build(options, platform, TargetArchitecture.x64); + Build(options, platform, TargetArchitecture.ARM64); break; case TargetPlatform.XboxOne: case TargetPlatform.XboxScarlett: @@ -193,7 +194,7 @@ namespace Flax.Deps.Dependencies RunCmake(cmakeFolder, platform, architecture, " -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF " + cmakeArgs, envVars); // Run build - Utilities.Run("cmake", "--build . --config Release", null, cmakeFolder, Utilities.RunOptions.ThrowExceptionOnError, envVars); + BuildCmake(cmakeFolder, envVars); // Deploy binaries var libs = new[] From c8f951e6e85668a86b2fc5c524a52e79ea27ff6c Mon Sep 17 00:00:00 2001 From: Ari Vuollet Date: Thu, 9 May 2024 20:30:24 +0300 Subject: [PATCH 116/292] Use VS2022 Win10 MSVC solutions in DirectX-related dependencies --- Source/ThirdParty/DirectXMesh/DirectXMesh.h | 308 ++++++++++++------ Source/ThirdParty/DirectXMesh/DirectXMesh.inl | 16 +- Source/ThirdParty/DirectXMesh/LICENSE | 34 +- Source/ThirdParty/UVAtlas/LICENSE | 34 +- Source/ThirdParty/UVAtlas/UVAtlas.h | 152 +++++---- .../Deps/Dependencies/DirectXMesh.cs | 19 +- .../Deps/Dependencies/DirectXTex.cs | 15 +- .../Flax.Build/Deps/Dependencies/UVAtlas.cs | 19 +- 8 files changed, 385 insertions(+), 212 deletions(-) diff --git a/Source/ThirdParty/DirectXMesh/DirectXMesh.h b/Source/ThirdParty/DirectXMesh/DirectXMesh.h index 9a8c72a3b..f3c3779dd 100644 --- a/Source/ThirdParty/DirectXMesh/DirectXMesh.h +++ b/Source/ThirdParty/DirectXMesh/DirectXMesh.h @@ -1,9 +1,9 @@ //------------------------------------------------------------------------------------- // DirectXMesh.h -// +// // DirectX Mesh Geometry Library // -// Copyright (c) Microsoft Corporation. All rights reserved. +// Copyright (c) Microsoft Corporation. // Licensed under the MIT License. // // http://go.microsoft.com/fwlink/?LinkID=324981 @@ -11,51 +11,63 @@ #pragma once +#include +#include #include #include #include +#include #include -#include - -#if !defined(__d3d11_h__) && !defined(__d3d11_x_h__) && !defined(__d3d12_h__) && !defined(__d3d12_x_h__) -#if defined(_XBOX_ONE) && defined(_TITLE) -#include +#ifdef _WIN32 +#if !defined(__d3d11_h__) && !defined(__d3d11_x_h__) && !defined(__d3d12_h__) && !defined(__d3d12_x_h__) && !defined(__XBOX_D3D12_X__) +#ifdef _GAMING_XBOX_SCARLETT +#include +#elif defined(_GAMING_XBOX) +#include +#elif defined(_XBOX_ONE) && defined(_TITLE) +#error This library no longer supports legacy Xbox One XDK #else #include #endif #endif +#else // !WIN32 +#include +#include +#endif -#include +#include +#include +#include -#define DIRECTX_MESH_VERSION 130 +#define DIRECTX_MESH_VERSION 166 namespace DirectX { //--------------------------------------------------------------------------------- // DXGI Format Utilities - bool __cdecl IsValidVB(_In_ DXGI_FORMAT fmt); - bool __cdecl IsValidIB(_In_ DXGI_FORMAT fmt); - size_t __cdecl BytesPerElement(_In_ DXGI_FORMAT fmt); + bool __cdecl IsValidVB(_In_ DXGI_FORMAT fmt) noexcept; + constexpr bool __cdecl IsValidIB(_In_ DXGI_FORMAT fmt) noexcept; + size_t __cdecl BytesPerElement(_In_ DXGI_FORMAT fmt) noexcept; //--------------------------------------------------------------------------------- // Input Layout Descriptor Utilities #if defined(__d3d11_h__) || defined(__d3d11_x_h__) - bool __cdecl IsValid(_In_reads_(nDecl) const D3D11_INPUT_ELEMENT_DESC* vbDecl, _In_ size_t nDecl); + bool __cdecl IsValid(_In_reads_(nDecl) const D3D11_INPUT_ELEMENT_DESC* vbDecl, _In_ size_t nDecl) noexcept; void __cdecl ComputeInputLayout( _In_reads_(nDecl) const D3D11_INPUT_ELEMENT_DESC* vbDecl, _In_ size_t nDecl, _Out_writes_opt_(nDecl) uint32_t* offsets, - _Out_writes_opt_(D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT) uint32_t* strides); + _Out_writes_opt_(D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT) uint32_t* strides) noexcept; #endif -#if defined(__d3d12_h__) || defined(__d3d12_x_h__) - bool __cdecl IsValid(const D3D12_INPUT_LAYOUT_DESC& vbDecl); +#if defined(__d3d12_h__) || defined(__d3d12_x_h__) || defined(__XBOX_D3D12_X__) + bool __cdecl IsValid(const D3D12_INPUT_LAYOUT_DESC& vbDecl) noexcept; void __cdecl ComputeInputLayout( const D3D12_INPUT_LAYOUT_DESC& vbDecl, _Out_writes_opt_(vbDecl.NumElements) uint32_t* offsets, - _Out_writes_opt_(D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT) uint32_t* strides); + _Out_writes_opt_(D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT) uint32_t* strides) noexcept; #endif //--------------------------------------------------------------------------------- @@ -80,8 +92,8 @@ namespace DirectX { public: VBReader() noexcept(false); - VBReader(VBReader&& moveFrom) noexcept; - VBReader& operator= (VBReader&& moveFrom) noexcept; + VBReader(VBReader&&) noexcept; + VBReader& operator= (VBReader&&) noexcept; VBReader(VBReader const&) = delete; VBReader& operator= (VBReader const&) = delete; @@ -93,12 +105,12 @@ namespace DirectX // Does not support VB decls with D3D11_INPUT_PER_INSTANCE_DATA #endif - #if defined(__d3d12_h__) || defined(__d3d12_x_h__) + #if defined(__d3d12_h__) || defined(__d3d12_x_h__) || defined(__XBOX_D3D12_X__) HRESULT __cdecl Initialize(const D3D12_INPUT_LAYOUT_DESC& vbDecl); // Does not support VB decls with D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA #endif - HRESULT __cdecl AddStream(_In_reads_bytes_(stride*nVerts) const void* vb, _In_ size_t nVerts, _In_ size_t inputSlot, _In_ size_t stride = 0); + HRESULT __cdecl AddStream(_In_reads_bytes_(stride*nVerts) const void* vb, _In_ size_t nVerts, _In_ size_t inputSlot, _In_ size_t stride = 0) noexcept; // Add vertex buffer to reader HRESULT __cdecl Read(_Out_writes_(count) XMVECTOR* buffer, _In_z_ const char* semanticName, _In_ unsigned int semanticIndex, _In_ size_t count, bool x2bias = false) const; @@ -110,7 +122,7 @@ namespace DirectX HRESULT __cdecl Read(_Out_writes_(count) XMFLOAT4* buffer, _In_z_ const char* semanticName, _In_ unsigned int semanticIndex, _In_ size_t count, bool x2bias = false) const; // Helpers for data extraction - void __cdecl Release(); + void __cdecl Release() noexcept; #if defined(__d3d11_h__) || defined(__d3d11_x_h__) const D3D11_INPUT_ELEMENT_DESC* GetElement(_In_z_ const char* semanticName, _In_ unsigned int semanticIndex) const @@ -121,7 +133,7 @@ namespace DirectX const D3D11_INPUT_ELEMENT_DESC* __cdecl GetElement11(_In_z_ const char* semanticName, _In_ unsigned int semanticIndex) const; #endif - #if defined(__d3d12_h__) || defined(__d3d12_x_h__) + #if defined(__d3d12_h__) || defined(__d3d12_x_h__) || defined(__XBOX_D3D12_X__) const D3D12_INPUT_ELEMENT_DESC* __cdecl GetElement12(_In_z_ const char* semanticName, _In_ unsigned int semanticIndex) const; #endif @@ -136,8 +148,8 @@ namespace DirectX { public: VBWriter() noexcept(false); - VBWriter(VBWriter&& moveFrom) noexcept; - VBWriter& operator= (VBWriter&& moveFrom) noexcept; + VBWriter(VBWriter&&) noexcept; + VBWriter& operator= (VBWriter&&) noexcept; VBWriter(VBWriter const&) = delete; VBWriter& operator= (VBWriter const&) = delete; @@ -149,12 +161,12 @@ namespace DirectX // Does not support VB decls with D3D11_INPUT_PER_INSTANCE_DATA #endif - #if defined(__d3d12_h__) || defined(__d3d12_x_h__) + #if defined(__d3d12_h__) || defined(__d3d12_x_h__) || defined(__XBOX_D3D12_X__) HRESULT __cdecl Initialize(const D3D12_INPUT_LAYOUT_DESC& vbDecl); // Does not support VB decls with D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA #endif - HRESULT __cdecl AddStream(_Out_writes_bytes_(stride*nVerts) void* vb, _In_ size_t nVerts, _In_ size_t inputSlot, _In_ size_t stride = 0); + HRESULT __cdecl AddStream(_Out_writes_bytes_(stride*nVerts) void* vb, _In_ size_t nVerts, _In_ size_t inputSlot, _In_ size_t stride = 0) noexcept; // Add vertex buffer to writer HRESULT __cdecl Write(_In_reads_(count) const XMVECTOR* buffer, _In_z_ const char* semanticName, _In_ unsigned int semanticIndex, _In_ size_t count, bool x2bias = false) const; @@ -166,7 +178,7 @@ namespace DirectX HRESULT __cdecl Write(_In_reads_(count) const XMFLOAT4* buffer, _In_z_ const char* semanticName, _In_ unsigned int semanticIndex, _In_ size_t count, bool x2bias = false) const; // Helpers for data insertion - void __cdecl Release(); + void __cdecl Release() noexcept; #if defined(__d3d11_h__) || defined(__d3d11_x_h__) const D3D11_INPUT_ELEMENT_DESC* __cdecl GetElement(_In_z_ const char* semanticName, _In_ unsigned int semanticIndex) const @@ -177,7 +189,7 @@ namespace DirectX const D3D11_INPUT_ELEMENT_DESC* __cdecl GetElement11(_In_z_ const char* semanticName, _In_ unsigned int semanticIndex) const; #endif - #if defined(__d3d12_h__) || defined(__d3d12_x_h__) + #if defined(__d3d12_h__) || defined(__d3d12_x_h__) || defined(__XBOX_D3D12_X__) const D3D12_INPUT_ELEMENT_DESC* __cdecl GetElement12(_In_z_ const char* semanticName, _In_ unsigned int semanticIndex) const; #endif @@ -221,42 +233,42 @@ namespace DirectX _In_reads_(nFaces * 3) const uint16_t* indices, _In_ size_t nFaces, _In_reads_(nVerts) const uint32_t* pointRep, _In_reads_(nFaces * 3) const uint32_t* adjacency, _In_ size_t nVerts, - _Out_writes_(nFaces * 6) uint16_t* indicesAdj); + _Out_writes_(nFaces * 6) uint16_t* indicesAdj) noexcept; HRESULT __cdecl GenerateGSAdjacency( _In_reads_(nFaces * 3) const uint32_t* indices, _In_ size_t nFaces, _In_reads_(nVerts) const uint32_t* pointRep, _In_reads_(nFaces * 3) const uint32_t* adjacency, _In_ size_t nVerts, - _Out_writes_(nFaces * 6) uint32_t* indicesAdj); + _Out_writes_(nFaces * 6) uint32_t* indicesAdj) noexcept; // Generates an IB suitable for Geometry Shader using D3D1x_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ //--------------------------------------------------------------------------------- // Normals, Tangents, and Bi-Tangents Computation - enum CNORM_FLAGS + enum CNORM_FLAGS : unsigned long { - CNORM_DEFAULT = 0x0, - // Default is to compute normals using weight-by-angle + CNORM_DEFAULT = 0x0, + // Default is to compute normals using weight-by-angle - CNORM_WEIGHT_BY_AREA = 0x1, - // Computes normals using weight-by-area + CNORM_WEIGHT_BY_AREA = 0x1, + // Computes normals using weight-by-area - CNORM_WEIGHT_EQUAL = 0x2, - // Compute normals with equal weights + CNORM_WEIGHT_EQUAL = 0x2, + // Compute normals with equal weights - CNORM_WIND_CW = 0x4, - // Vertices are clock-wise (defaults to CCW) + CNORM_WIND_CW = 0x4, + // Vertices are clock-wise (defaults to CCW) }; HRESULT __cdecl ComputeNormals( _In_reads_(nFaces * 3) const uint16_t* indices, _In_ size_t nFaces, _In_reads_(nVerts) const XMFLOAT3* positions, _In_ size_t nVerts, - _In_ DWORD flags, - _Out_writes_(nVerts) XMFLOAT3* normals); + _In_ CNORM_FLAGS flags, + _Out_writes_(nVerts) XMFLOAT3* normals) noexcept; HRESULT __cdecl ComputeNormals( _In_reads_(nFaces * 3) const uint32_t* indices, _In_ size_t nFaces, _In_reads_(nVerts) const XMFLOAT3* positions, _In_ size_t nVerts, - _In_ DWORD flags, - _Out_writes_(nVerts) XMFLOAT3* normals); + _In_ CNORM_FLAGS flags, + _Out_writes_(nVerts) XMFLOAT3* normals) noexcept; // Computes vertex normals HRESULT __cdecl ComputeTangentFrame( @@ -265,73 +277,73 @@ namespace DirectX _In_reads_(nVerts) const XMFLOAT3* normals, _In_reads_(nVerts) const XMFLOAT2* texcoords, _In_ size_t nVerts, _Out_writes_opt_(nVerts) XMFLOAT3* tangents, - _Out_writes_opt_(nVerts) XMFLOAT3* bitangents); + _Out_writes_opt_(nVerts) XMFLOAT3* bitangents) noexcept; HRESULT __cdecl ComputeTangentFrame( _In_reads_(nFaces * 3) const uint32_t* indices, _In_ size_t nFaces, _In_reads_(nVerts) const XMFLOAT3* positions, _In_reads_(nVerts) const XMFLOAT3* normals, _In_reads_(nVerts) const XMFLOAT2* texcoords, _In_ size_t nVerts, _Out_writes_opt_(nVerts) XMFLOAT3* tangents, - _Out_writes_opt_(nVerts) XMFLOAT3* bitangents); + _Out_writes_opt_(nVerts) XMFLOAT3* bitangents) noexcept; HRESULT __cdecl ComputeTangentFrame( _In_reads_(nFaces * 3) const uint16_t* indices, _In_ size_t nFaces, _In_reads_(nVerts) const XMFLOAT3* positions, _In_reads_(nVerts) const XMFLOAT3* normals, _In_reads_(nVerts) const XMFLOAT2* texcoords, _In_ size_t nVerts, _Out_writes_opt_(nVerts) XMFLOAT4* tangents, - _Out_writes_opt_(nVerts) XMFLOAT3* bitangents); + _Out_writes_opt_(nVerts) XMFLOAT3* bitangents) noexcept; HRESULT __cdecl ComputeTangentFrame( _In_reads_(nFaces * 3) const uint32_t* indices, _In_ size_t nFaces, _In_reads_(nVerts) const XMFLOAT3* positions, _In_reads_(nVerts) const XMFLOAT3* normals, _In_reads_(nVerts) const XMFLOAT2* texcoords, _In_ size_t nVerts, _Out_writes_opt_(nVerts) XMFLOAT4* tangents, - _Out_writes_opt_(nVerts) XMFLOAT3* bitangents); + _Out_writes_opt_(nVerts) XMFLOAT3* bitangents) noexcept; HRESULT __cdecl ComputeTangentFrame( _In_reads_(nFaces * 3) const uint16_t* indices, _In_ size_t nFaces, _In_reads_(nVerts) const XMFLOAT3* positions, _In_reads_(nVerts) const XMFLOAT3* normals, _In_reads_(nVerts) const XMFLOAT2* texcoords, _In_ size_t nVerts, - _Out_writes_(nVerts) XMFLOAT4* tangents); + _Out_writes_(nVerts) XMFLOAT4* tangents) noexcept; HRESULT __cdecl ComputeTangentFrame( _In_reads_(nFaces * 3) const uint32_t* indices, _In_ size_t nFaces, _In_reads_(nVerts) const XMFLOAT3* positions, _In_reads_(nVerts) const XMFLOAT3* normals, _In_reads_(nVerts) const XMFLOAT2* texcoords, _In_ size_t nVerts, - _Out_writes_(nVerts) XMFLOAT4* tangents); + _Out_writes_(nVerts) XMFLOAT4* tangents) noexcept; // Computes tangents and/or bi-tangents (optionally with handedness stored in .w) //--------------------------------------------------------------------------------- // Mesh clean-up and validation - enum VALIDATE_FLAGS + enum VALIDATE_FLAGS : unsigned long { - VALIDATE_DEFAULT = 0x0, + VALIDATE_DEFAULT = 0x0, - VALIDATE_BACKFACING = 0x1, - // Check for duplicate neighbor from triangle (requires adjacency) + VALIDATE_BACKFACING = 0x1, + // Check for duplicate neighbor from triangle (requires adjacency) - VALIDATE_BOWTIES = 0x2, - // Check for two fans of triangles using the same vertex (requires adjacency) + VALIDATE_BOWTIES = 0x2, + // Check for two fans of triangles using the same vertex (requires adjacency) - VALIDATE_DEGENERATE = 0x4, - // Check for degenerate triangles + VALIDATE_DEGENERATE = 0x4, + // Check for degenerate triangles - VALIDATE_UNUSED = 0x8, - // Check for issues with 'unused' triangles + VALIDATE_UNUSED = 0x8, + // Check for issues with 'unused' triangles - VALIDATE_ASYMMETRIC_ADJ = 0x10, - // Checks that neighbors are symmetric (requires adjacency) + VALIDATE_ASYMMETRIC_ADJ = 0x10, + // Checks that neighbors are symmetric (requires adjacency) }; HRESULT __cdecl Validate( _In_reads_(nFaces * 3) const uint16_t* indices, _In_ size_t nFaces, _In_ size_t nVerts, _In_reads_opt_(nFaces * 3) const uint32_t* adjacency, - _In_ DWORD flags, _In_opt_ std::wstring* msgs = nullptr); + _In_ VALIDATE_FLAGS flags, _In_opt_ std::wstring* msgs = nullptr); HRESULT __cdecl Validate( _In_reads_(nFaces * 3) const uint32_t* indices, _In_ size_t nFaces, _In_ size_t nVerts, _In_reads_opt_(nFaces * 3) const uint32_t* adjacency, - _In_ DWORD flags, _In_opt_ std::wstring* msgs = nullptr); + _In_ VALIDATE_FLAGS flags, _In_opt_ std::wstring* msgs = nullptr); // Checks the mesh for common problems, return 'S_OK' if no problems were found HRESULT __cdecl Clean( @@ -361,6 +373,15 @@ namespace DirectX _In_ std::function weldTest); // Welds vertices together based on a test function + HRESULT __cdecl ConcatenateMesh( + _In_ size_t nFaces, + _In_ size_t nVerts, + _Out_writes_(nFaces) uint32_t* faceDestMap, + _Out_writes_(nVerts) uint32_t* vertexDestMap, + _Inout_ size_t& totalFaces, + _Inout_ size_t& totalVerts) noexcept; + // Merge meshes together + //--------------------------------------------------------------------------------- // Mesh Optimization @@ -369,17 +390,17 @@ namespace DirectX _Out_writes_(nFaces) uint32_t* faceRemap); // Reorders faces by attribute id - enum OPTFACES + enum OPTFACES : uint32_t { - OPTFACES_V_DEFAULT = 12, - OPTFACES_R_DEFAULT = 7, - // Default vertex cache size and restart threshold which is considered 'device independent' + OPTFACES_V_DEFAULT = 12, + OPTFACES_R_DEFAULT = 7, + // Default vertex cache size and restart threshold which is considered 'device independent' - OPTFACES_LRU_DEFAULT = 32, - // Default vertex cache size for the LRU algorithm + OPTFACES_LRU_DEFAULT = 32, + // Default vertex cache size for the LRU algorithm - OPTFACES_V_STRIPORDER = 0, - // Indicates no vertex cache optimization, only reordering into strips + OPTFACES_V_STRIPORDER = 0, + // Indicates no vertex cache optimization, only reordering into strips }; HRESULT __cdecl OptimizeFaces( @@ -432,10 +453,10 @@ namespace DirectX HRESULT __cdecl OptimizeVertices( _In_reads_(nFaces * 3) const uint16_t* indices, _In_ size_t nFaces, _In_ size_t nVerts, - _Out_writes_(nVerts) uint32_t* vertexRemap, _Out_opt_ size_t* trailingUnused = nullptr); + _Out_writes_(nVerts) uint32_t* vertexRemap, _Out_opt_ size_t* trailingUnused = nullptr) noexcept; HRESULT __cdecl OptimizeVertices( _In_reads_(nFaces * 3) const uint32_t* indices, _In_ size_t nFaces, _In_ size_t nVerts, - _Out_writes_(nVerts) uint32_t* vertexRemap, _Out_opt_ size_t* trailingUnused = nullptr); + _Out_writes_(nVerts) uint32_t* vertexRemap, _Out_opt_ size_t* trailingUnused = nullptr) noexcept; // Reorders vertices in order of use //--------------------------------------------------------------------------------- @@ -444,59 +465,59 @@ namespace DirectX HRESULT __cdecl ReorderIB( _In_reads_(nFaces * 3) const uint16_t* ibin, _In_ size_t nFaces, _In_reads_(nFaces) const uint32_t* faceRemap, - _Out_writes_(nFaces * 3) uint16_t* ibout); + _Out_writes_(nFaces * 3) uint16_t* ibout) noexcept; HRESULT __cdecl ReorderIB( _Inout_updates_all_(nFaces * 3) uint16_t* ib, _In_ size_t nFaces, - _In_reads_(nFaces) const uint32_t* faceRemap); + _In_reads_(nFaces) const uint32_t* faceRemap) noexcept; HRESULT __cdecl ReorderIB( _In_reads_(nFaces * 3) const uint32_t* ibin, _In_ size_t nFaces, _In_reads_(nFaces) const uint32_t* faceRemap, - _Out_writes_(nFaces * 3) uint32_t* ibout); + _Out_writes_(nFaces * 3) uint32_t* ibout) noexcept; HRESULT __cdecl ReorderIB( _Inout_updates_all_(nFaces * 3) uint32_t* ib, _In_ size_t nFaces, - _In_reads_(nFaces) const uint32_t* faceRemap); + _In_reads_(nFaces) const uint32_t* faceRemap) noexcept; // Applies a face remap reordering to an index buffer HRESULT __cdecl ReorderIBAndAdjacency( _In_reads_(nFaces * 3) const uint16_t* ibin, _In_ size_t nFaces, _In_reads_(nFaces * 3) const uint32_t* adjin, _In_reads_(nFaces) const uint32_t* faceRemap, - _Out_writes_(nFaces * 3) uint16_t* ibout, _Out_writes_(nFaces * 3) uint32_t* adjout); + _Out_writes_(nFaces * 3) uint16_t* ibout, _Out_writes_(nFaces * 3) uint32_t* adjout) noexcept; HRESULT __cdecl ReorderIBAndAdjacency( _Inout_updates_all_(nFaces * 3) uint16_t* ib, _In_ size_t nFaces, _Inout_updates_all_(nFaces * 3) uint32_t* adj, - _In_reads_(nFaces) const uint32_t* faceRemap); + _In_reads_(nFaces) const uint32_t* faceRemap) noexcept; HRESULT __cdecl ReorderIBAndAdjacency( _In_reads_(nFaces * 3) const uint32_t* ibin, _In_ size_t nFaces, _In_reads_(nFaces * 3) const uint32_t* adjin, _In_reads_(nFaces) const uint32_t* faceRemap, - _Out_writes_(nFaces * 3) uint32_t* ibout, _Out_writes_(nFaces * 3) uint32_t* adjout); + _Out_writes_(nFaces * 3) uint32_t* ibout, _Out_writes_(nFaces * 3) uint32_t* adjout) noexcept; HRESULT __cdecl ReorderIBAndAdjacency( _Inout_updates_all_(nFaces * 3) uint32_t* ib, _In_ size_t nFaces, _Inout_updates_all_(nFaces * 3) uint32_t* adj, - _In_reads_(nFaces) const uint32_t* faceRemap); + _In_reads_(nFaces) const uint32_t* faceRemap) noexcept; // Applies a face remap reordering to an index buffer and adjacency HRESULT __cdecl FinalizeIB( _In_reads_(nFaces * 3) const uint16_t* ibin, _In_ size_t nFaces, _In_reads_(nVerts) const uint32_t* vertexRemap, _In_ size_t nVerts, - _Out_writes_(nFaces * 3) uint16_t* ibout); + _Out_writes_(nFaces * 3) uint16_t* ibout) noexcept; HRESULT __cdecl FinalizeIB( _Inout_updates_all_(nFaces * 3) uint16_t* ib, _In_ size_t nFaces, - _In_reads_(nVerts) const uint32_t* vertexRemap, _In_ size_t nVerts); + _In_reads_(nVerts) const uint32_t* vertexRemap, _In_ size_t nVerts) noexcept; HRESULT __cdecl FinalizeIB( _In_reads_(nFaces * 3) const uint32_t* ibin, _In_ size_t nFaces, _In_reads_(nVerts) const uint32_t* vertexRemap, _In_ size_t nVerts, - _Out_writes_(nFaces * 3) uint32_t* ibout); + _Out_writes_(nFaces * 3) uint32_t* ibout) noexcept; HRESULT __cdecl FinalizeIB( _Inout_updates_all_(nFaces * 3) uint32_t* ib, _In_ size_t nFaces, - _In_reads_(nVerts) const uint32_t* vertexRemap, _In_ size_t nVerts); + _In_reads_(nVerts) const uint32_t* vertexRemap, _In_ size_t nVerts) noexcept; // Applies a vertex remap reordering to an index buffer HRESULT __cdecl FinalizeVB( _In_reads_bytes_(nVerts*stride) const void* vbin, _In_ size_t stride, _In_ size_t nVerts, _In_reads_opt_(nDupVerts) const uint32_t* dupVerts, _In_ size_t nDupVerts, _In_reads_opt_(nVerts + nDupVerts) const uint32_t* vertexRemap, - _Out_writes_bytes_((nVerts + nDupVerts)*stride) void* vbout); + _Out_writes_bytes_((nVerts + nDupVerts)*stride) void* vbout) noexcept; HRESULT __cdecl FinalizeVB( _Inout_updates_bytes_all_(nVerts*stride) void* vb, _In_ size_t stride, _In_ size_t nVerts, - _In_reads_(nVerts) const uint32_t* vertexRemap); + _In_reads_(nVerts) const uint32_t* vertexRemap) noexcept; // Applies a vertex remap and/or a vertex duplication set to a vertex buffer HRESULT __cdecl FinalizeVBAndPointReps( @@ -505,20 +526,125 @@ namespace DirectX _In_reads_opt_(nDupVerts) const uint32_t* dupVerts, _In_ size_t nDupVerts, _In_reads_opt_(nVerts + nDupVerts) const uint32_t* vertexRemap, _Out_writes_bytes_((nVerts + nDupVerts)*stride) void* vbout, - _Out_writes_(nVerts + nDupVerts) uint32_t* prout); + _Out_writes_(nVerts + nDupVerts) uint32_t* prout) noexcept; HRESULT __cdecl FinalizeVBAndPointReps( _Inout_updates_bytes_all_(nVerts*stride) void* vb, _In_ size_t stride, _In_ size_t nVerts, _Inout_updates_all_(nVerts) uint32_t* pointRep, - _In_reads_(nVerts) const uint32_t* vertexRemap); + _In_reads_(nVerts) const uint32_t* vertexRemap) noexcept; // Applies a vertex remap and/or a vertex duplication set to a vertex buffer and point representatives HRESULT __cdecl CompactVB( _In_reads_bytes_(nVerts*stride) const void* vbin, _In_ size_t stride, _In_ size_t nVerts, _In_ size_t trailingUnused, _In_reads_opt_(nVerts) const uint32_t* vertexRemap, - _Out_writes_bytes_((nVerts - trailingUnused)*stride) void* vbout); + _Out_writes_bytes_((nVerts - trailingUnused)*stride) void* vbout) noexcept; // Applies a vertex remap which contains a known number of unused entries at the end + //--------------------------------------------------------------------------------- + // Meshlet Generation + + constexpr size_t MESHLET_DEFAULT_MAX_VERTS = 128u; + constexpr size_t MESHLET_DEFAULT_MAX_PRIMS = 128u; + + constexpr size_t MESHLET_MINIMUM_SIZE = 32u; + constexpr size_t MESHLET_MAXIMUM_SIZE = 256u; + + enum MESHLET_FLAGS : unsigned long + { + MESHLET_DEFAULT = 0x0, + + MESHLET_WIND_CW = 0x1, + // Vertices are clock-wise (defaults to CCW) + }; + + struct Meshlet + { + uint32_t VertCount; + uint32_t VertOffset; + uint32_t PrimCount; + uint32_t PrimOffset; + }; + + struct MeshletTriangle + { + uint32_t i0 : 10; + uint32_t i1 : 10; + uint32_t i2 : 10; + }; + + struct CullData + { + DirectX::BoundingSphere BoundingSphere; // xyz = center, w = radius + DirectX::PackedVector::XMUBYTEN4 NormalCone; // xyz = axis, w = -cos(a + 90) + float ApexOffset; // apex = center - axis * offset + }; + + HRESULT __cdecl ComputeMeshlets( + _In_reads_(nFaces * 3) const uint16_t* indices, _In_ size_t nFaces, + _In_reads_(nVerts) const XMFLOAT3* positions, _In_ size_t nVerts, + _In_reads_opt_(nFaces * 3) const uint32_t* adjacency, + _Inout_ std::vector& meshlets, + _Inout_ std::vector& uniqueVertexIB, + _Inout_ std::vector& primitiveIndices, + _In_ size_t maxVerts = MESHLET_DEFAULT_MAX_VERTS, _In_ size_t maxPrims = MESHLET_DEFAULT_MAX_PRIMS); + HRESULT __cdecl ComputeMeshlets( + _In_reads_(nFaces * 3) const uint32_t* indices, _In_ size_t nFaces, + _In_reads_(nVerts) const XMFLOAT3* positions, _In_ size_t nVerts, + _In_reads_opt_(nFaces * 3) const uint32_t* adjacency, + _Inout_ std::vector& meshlets, + _Inout_ std::vector& uniqueVertexIB, + _Inout_ std::vector& primitiveIndices, + _In_ size_t maxVerts = MESHLET_DEFAULT_MAX_VERTS, _In_ size_t maxPrims = MESHLET_DEFAULT_MAX_PRIMS); + // Generates meshlets for a single subset mesh + + HRESULT __cdecl ComputeMeshlets( + _In_reads_(nFaces * 3) const uint16_t* indices, _In_ size_t nFaces, + _In_reads_(nVerts) const XMFLOAT3* positions, _In_ size_t nVerts, + _In_reads_(nSubsets) const std::pair* subsets, _In_ size_t nSubsets, + _In_reads_opt_(nFaces * 3) const uint32_t* adjacency, + _Inout_ std::vector& meshlets, + _Inout_ std::vector& uniqueVertexIB, + _Inout_ std::vector& primitiveIndices, + _Out_writes_(nSubsets) std::pair* meshletSubsets, + _In_ size_t maxVerts = MESHLET_DEFAULT_MAX_VERTS, _In_ size_t maxPrims = MESHLET_DEFAULT_MAX_PRIMS); + HRESULT __cdecl ComputeMeshlets( + _In_reads_(nFaces * 3) const uint32_t* indices, _In_ size_t nFaces, + _In_reads_(nVerts) const XMFLOAT3* positions, _In_ size_t nVerts, + _In_reads_(nSubsets) const std::pair* subsets, _In_ size_t nSubsets, + _In_reads_opt_(nFaces * 3) const uint32_t* adjacency, + _Inout_ std::vector& meshlets, + _Inout_ std::vector& uniqueVertexIB, + _Inout_ std::vector& primitiveIndices, + _Out_writes_(nSubsets) std::pair* meshletSubsets, + _In_ size_t maxVerts = MESHLET_DEFAULT_MAX_VERTS, _In_ size_t maxPrims = MESHLET_DEFAULT_MAX_PRIMS); + // Generates meshlets for a mesh with several face subsets + + HRESULT __cdecl ComputeCullData( + _In_reads_(nVerts) const XMFLOAT3* positions, _In_ size_t nVerts, + _In_reads_(nMeshlets) const Meshlet* meshlets, _In_ size_t nMeshlets, + _In_reads_(nVertIndices) const uint16_t* uniqueVertexIndices, _In_ size_t nVertIndices, + _In_reads_(nPrimIndices) const MeshletTriangle* primitiveIndices, _In_ size_t nPrimIndices, + _Out_writes_(nMeshlets) CullData* cullData, + _In_ MESHLET_FLAGS flags = MESHLET_DEFAULT) noexcept; + HRESULT __cdecl ComputeCullData( + _In_reads_(nVerts) const XMFLOAT3* positions, _In_ size_t nVerts, + _In_reads_(nMeshlets) const Meshlet* meshlets, _In_ size_t nMeshlets, + _In_reads_(nVertIndices) const uint32_t* uniqueVertexIndices, _In_ size_t nVertIndices, + _In_reads_(nPrimIndices) const MeshletTriangle* primitiveIndices, _In_ size_t nPrimIndices, + _Out_writes_(nMeshlets) CullData* cullData, + _In_ MESHLET_FLAGS flags = MESHLET_DEFAULT) noexcept; + // Computes culling data for each input meshlet + + //--------------------------------------------------------------------------------- +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdeprecated-dynamic-exception-spec" +#endif + #include "DirectXMesh.inl" +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + } // namespace diff --git a/Source/ThirdParty/DirectXMesh/DirectXMesh.inl b/Source/ThirdParty/DirectXMesh/DirectXMesh.inl index bed0f4ae6..7f9cbb37e 100644 --- a/Source/ThirdParty/DirectXMesh/DirectXMesh.inl +++ b/Source/ThirdParty/DirectXMesh/DirectXMesh.inl @@ -1,9 +1,9 @@ //------------------------------------------------------------------------------------- // DirectXMesh.inl -// +// // DirectX Mesh Geometry Library // -// Copyright (c) Microsoft Corporation. All rights reserved. +// Copyright (c) Microsoft Corporation. // Licensed under the MIT License. // // http://go.microsoft.com/fwlink/?LinkID=324981 @@ -11,17 +11,25 @@ #pragma once +//===================================================================================== +// Bitmask flags enumerator operators +//===================================================================================== +DEFINE_ENUM_FLAG_OPERATORS(CNORM_FLAGS); +DEFINE_ENUM_FLAG_OPERATORS(VALIDATE_FLAGS); +DEFINE_ENUM_FLAG_OPERATORS(MESHLET_FLAGS); + + //===================================================================================== // DXGI Format Utilities //===================================================================================== _Use_decl_annotations_ -inline bool __cdecl IsValidVB(DXGI_FORMAT fmt) +inline bool __cdecl IsValidVB(DXGI_FORMAT fmt) noexcept { return BytesPerElement(fmt) != 0; } _Use_decl_annotations_ -inline bool __cdecl IsValidIB(DXGI_FORMAT fmt) +constexpr bool __cdecl IsValidIB(DXGI_FORMAT fmt) noexcept { return (fmt == DXGI_FORMAT_R32_UINT || fmt == DXGI_FORMAT_R16_UINT) != 0; } diff --git a/Source/ThirdParty/DirectXMesh/LICENSE b/Source/ThirdParty/DirectXMesh/LICENSE index e8e78a94f..9e841e7a2 100644 --- a/Source/ThirdParty/DirectXMesh/LICENSE +++ b/Source/ThirdParty/DirectXMesh/LICENSE @@ -1,21 +1,21 @@ - The MIT License (MIT) + MIT License -Copyright (c) 2014-2019 Microsoft Corp + Copyright (c) Microsoft Corporation. -Permission is hereby granted, free of charge, to any person obtaining a copy of this -software and associated documentation files (the "Software"), to deal in the Software -without restriction, including without limitation the rights to use, copy, modify, -merge, publish, distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to the following -conditions: + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in all copies -or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF -CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE -OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE diff --git a/Source/ThirdParty/UVAtlas/LICENSE b/Source/ThirdParty/UVAtlas/LICENSE index e8e78a94f..9e841e7a2 100644 --- a/Source/ThirdParty/UVAtlas/LICENSE +++ b/Source/ThirdParty/UVAtlas/LICENSE @@ -1,21 +1,21 @@ - The MIT License (MIT) + MIT License -Copyright (c) 2014-2019 Microsoft Corp + Copyright (c) Microsoft Corporation. -Permission is hereby granted, free of charge, to any person obtaining a copy of this -software and associated documentation files (the "Software"), to deal in the Software -without restriction, including without limitation the rights to use, copy, modify, -merge, publish, distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to the following -conditions: + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in all copies -or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF -CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE -OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE diff --git a/Source/ThirdParty/UVAtlas/UVAtlas.h b/Source/ThirdParty/UVAtlas/UVAtlas.h index edd84f54c..41b53bf41 100644 --- a/Source/ThirdParty/UVAtlas/UVAtlas.h +++ b/Source/ThirdParty/UVAtlas/UVAtlas.h @@ -1,7 +1,7 @@ //------------------------------------------------------------------------------------- // UVAtlas // -// Copyright (c) Microsoft Corporation. All rights reserved. +// Copyright (c) Microsoft Corporation. // Licensed under the MIT License. // // http://go.microsoft.com/fwlink/?LinkID=512686 @@ -9,21 +9,41 @@ #pragma once -#include - -#if defined(_XBOX_ONE) && defined(_TITLE) -#include +#ifdef _WIN32 +#ifdef _GAMING_XBOX_SCARLETT +#pragma warning(push) +#pragma warning(disable: 5204 5249) +#include +#pragma warning(pop) +#elif defined(_GAMING_XBOX) +#pragma warning(push) +#pragma warning(disable: 5204) +#include +#pragma warning(pop) +#elif defined(_XBOX_ONE) && defined(_TITLE) +#error This library no longer supports legacy Xbox One XDK +#else +#include +#ifdef USING_DIRECTX_HEADERS +#include #else -#include #include #endif +#endif +#else // !WIN32 +#include +#include +#endif -#include - +#include +#include #include #include -#define UVATLAS_VERSION 160 +#include + +#define UVATLAS_VERSION 187 + namespace DirectX { @@ -37,28 +57,28 @@ namespace DirectX // UVATLAS_IMT_WRAP_U means the texture wraps in the U direction // UVATLAS_IMT_WRAP_V means the texture wraps in the V direction // UVATLAS_IMT_WRAP_UV means the texture wraps in both directions - enum UVATLAS_IMT + enum UVATLAS_IMT : unsigned int { UVATLAS_IMT_DEFAULT = 0x00, UVATLAS_IMT_WRAP_U = 0x01, UVATLAS_IMT_WRAP_V = 0x02, UVATLAS_IMT_WRAP_UV = 0x03, - UVATLAS_IMT_VALIDBITS = 0x03, }; // These options are only valid for UVAtlasCreate and UVAtlasPartition // UVATLAS_DEFAULT - Meshes with more than 25k faces go through fast, meshes with fewer than 25k faces go through quality // UVATLAS_GEODESIC_FAST - Uses approximations to improve charting speed at the cost of added stretch or more charts. // UVATLAS_GEODESIC_QUALITY - Provides better quality charts, but requires more time and memory than fast. - enum UVATLAS + enum UVATLAS : unsigned int { UVATLAS_DEFAULT = 0x00, UVATLAS_GEODESIC_FAST = 0x01, UVATLAS_GEODESIC_QUALITY = 0x02, - UVATLAS_PARTITIONVALIDBITS = 0x03, + UVATLAS_LIMIT_MERGE_STRETCH = 0x04, + UVATLAS_LIMIT_FACE_STRETCH = 0x08, }; - static const float UVATLAS_DEFAULT_CALLBACK_FREQUENCY = 0.0001f; + constexpr float UVATLAS_DEFAULT_CALLBACK_FREQUENCY = 0.0001f; //============================================================================ // @@ -94,7 +114,7 @@ namespace DirectX // integrated metric tensor for that face. This lets you control // the way this triangle may be stretched in the atlas. The IMT // passed in will be 3 floats (a,b,c) and specify a symmetric - // matrix (a b) that, given a vector (s,t), specifies the + // matrix (a b) that, given a vector (s,t), specifies the // (b c) // distance between a vector v1 and a vector v2 = v1 + (s,t) as // sqrt((s, t) * M * (s, t)^T). @@ -129,8 +149,8 @@ namespace DirectX HRESULT __cdecl UVAtlasCreate( _In_reads_(nVerts) const XMFLOAT3* positions, _In_ size_t nVerts, - _When_(indexFormat == DXGI_FORMAT_R16_UINT, _In_reads_bytes_(nFaces*sizeof(uint16_t))) - _When_(indexFormat != DXGI_FORMAT_R16_UINT, _In_reads_bytes_(nFaces*sizeof(uint32_t))) const void* indices, + _When_(indexFormat == DXGI_FORMAT_R16_UINT, _In_reads_bytes_(nFaces * 3 * sizeof(uint16_t))) + _When_(indexFormat != DXGI_FORMAT_R16_UINT, _In_reads_bytes_(nFaces * 3 * sizeof(uint32_t))) const void* indices, _In_ DXGI_FORMAT indexFormat, _In_ size_t nFaces, _In_ size_t maxChartNumber, @@ -138,18 +158,18 @@ namespace DirectX _In_ size_t width, _In_ size_t height, _In_ float gutter, - _In_reads_(nFaces*3) const uint32_t *adjacency, - _In_reads_opt_(nFaces*3) const uint32_t *falseEdgeAdjacency, - _In_reads_opt_(nFaces*3) const float *pIMTArray, - _In_opt_ std::function statusCallBack, + _In_reads_(nFaces * 3) const uint32_t* adjacency, + _In_reads_opt_(nFaces * 3) const uint32_t* falseEdgeAdjacency, + _In_reads_opt_(nFaces * 3) const float* pIMTArray, + _In_ std::function statusCallBack, _In_ float callbackFrequency, - _In_ DWORD options, + _In_ UVATLAS options, _Inout_ std::vector& vMeshOutVertexBuffer, - _Inout_ std::vector& vMeshOutIndexBuffer, - _Inout_opt_ std::vector* pvFacePartitioning = nullptr, - _Inout_opt_ std::vector* pvVertexRemapArray = nullptr, - _Out_opt_ float *maxStretchOut = nullptr, - _Out_opt_ size_t *numChartsOut = nullptr); + _Inout_ std::vector& vMeshOutIndexBuffer, + _Inout_opt_ std::vector* pvFacePartitioning = nullptr, + _Inout_opt_ std::vector* pvVertexRemapArray = nullptr, + _Out_opt_ float* maxStretchOut = nullptr, + _Out_opt_ size_t* numChartsOut = nullptr); // This has the same exact arguments as Create, except that it does not perform the // final packing step. This method allows one to get a partitioning out, and possibly @@ -178,25 +198,25 @@ namespace DirectX HRESULT __cdecl UVAtlasPartition( _In_reads_(nVerts) const XMFLOAT3* positions, _In_ size_t nVerts, - _When_(indexFormat == DXGI_FORMAT_R16_UINT, _In_reads_bytes_(nFaces*sizeof(uint16_t))) - _When_(indexFormat != DXGI_FORMAT_R16_UINT, _In_reads_bytes_(nFaces*sizeof(uint32_t))) const void* indices, + _When_(indexFormat == DXGI_FORMAT_R16_UINT, _In_reads_bytes_(nFaces * 3 * sizeof(uint16_t))) + _When_(indexFormat != DXGI_FORMAT_R16_UINT, _In_reads_bytes_(nFaces * 3 * sizeof(uint32_t))) const void* indices, _In_ DXGI_FORMAT indexFormat, _In_ size_t nFaces, _In_ size_t maxChartNumber, _In_ float maxStretch, - _In_reads_(nFaces*3) const uint32_t *adjacency, - _In_reads_opt_(nFaces*3) const uint32_t *falseEdgeAdjacency, - _In_reads_opt_(nFaces*3) const float *pIMTArray, - _In_opt_ std::function statusCallBack, + _In_reads_(nFaces * 3) const uint32_t* adjacency, + _In_reads_opt_(nFaces * 3) const uint32_t* falseEdgeAdjacency, + _In_reads_opt_(nFaces * 3) const float* pIMTArray, + _In_ std::function statusCallBack, _In_ float callbackFrequency, - _In_ DWORD options, + _In_ UVATLAS options, _Inout_ std::vector& vMeshOutVertexBuffer, _Inout_ std::vector& vMeshOutIndexBuffer, _Inout_opt_ std::vector* pvFacePartitioning, _Inout_opt_ std::vector* pvVertexRemapArray, _Inout_ std::vector& vPartitionResultAdjacency, - _Out_opt_ float *maxStretchOut = nullptr, - _Out_opt_ size_t *numChartsOut = nullptr); + _Out_opt_ float* maxStretchOut = nullptr, + _Out_opt_ size_t* numChartsOut = nullptr); // This takes the face partitioning result from Partition and packs it into an // atlas of the given size. pPartitionResultAdjacency should be derived from @@ -209,7 +229,7 @@ namespace DirectX _In_ size_t height, _In_ float gutter, _In_ const std::vector& vPartitionResultAdjacency, - _In_opt_ std::function statusCallBack, + _In_ std::function statusCallBack, _In_ float callbackFrequency); @@ -244,14 +264,14 @@ namespace DirectX HRESULT __cdecl UVAtlasComputeIMTFromPerVertexSignal( _In_reads_(nVerts) const XMFLOAT3* positions, _In_ size_t nVerts, - _When_(indexFormat == DXGI_FORMAT_R16_UINT, _In_reads_bytes_(nFaces*sizeof(uint16_t))) - _When_(indexFormat != DXGI_FORMAT_R16_UINT, _In_reads_bytes_(nFaces*sizeof(uint32_t))) const void* indices, + _When_(indexFormat == DXGI_FORMAT_R16_UINT, _In_reads_bytes_(nFaces * 3 * sizeof(uint16_t))) + _When_(indexFormat != DXGI_FORMAT_R16_UINT, _In_reads_bytes_(nFaces * 3 * sizeof(uint32_t))) const void* indices, _In_ DXGI_FORMAT indexFormat, _In_ size_t nFaces, - _In_reads_(signalStride*nVerts) const float *pVertexSignal, + _In_reads_(signalStride* nVerts) const float* pVertexSignal, _In_ size_t signalDimension, _In_ size_t signalStride, - _In_opt_ std::function statusCallBack, + _In_ std::function statusCallBack, _Out_writes_(nFaces * 3) float* pIMTArray); // This function is used to calculate the IMT from data that varies over the @@ -275,16 +295,16 @@ namespace DirectX _In_reads_(nVerts) const XMFLOAT3* positions, _In_reads_(nVerts) const XMFLOAT2* texcoords, _In_ size_t nVerts, - _When_(indexFormat == DXGI_FORMAT_R16_UINT, _In_reads_bytes_(nFaces*sizeof(uint16_t))) - _When_(indexFormat != DXGI_FORMAT_R16_UINT, _In_reads_bytes_(nFaces*sizeof(uint32_t))) const void* indices, + _When_(indexFormat == DXGI_FORMAT_R16_UINT, _In_reads_bytes_(nFaces * 3 * sizeof(uint16_t))) + _When_(indexFormat != DXGI_FORMAT_R16_UINT, _In_reads_bytes_(nFaces * 3 * sizeof(uint32_t))) const void* indices, _In_ DXGI_FORMAT indexFormat, _In_ size_t nFaces, _In_ size_t signalDimension, _In_ float maxUVDistance, - _In_ std::function - signalCallback, - _In_opt_ void *userData, - _In_opt_ std::function statusCallBack, + _In_ std::function + signalCallback, + _In_opt_ void* userData, + _In_ std::function statusCallBack, _Out_writes_(nFaces * 3) float* pIMTArray); // This function is used to calculate the IMT from texture data. Given a texture @@ -300,15 +320,15 @@ namespace DirectX _In_reads_(nVerts) const XMFLOAT3* positions, _In_reads_(nVerts) const XMFLOAT2* texcoords, _In_ size_t nVerts, - _When_(indexFormat == DXGI_FORMAT_R16_UINT, _In_reads_bytes_(nFaces*sizeof(uint16_t))) - _When_(indexFormat != DXGI_FORMAT_R16_UINT, _In_reads_bytes_(nFaces*sizeof(uint32_t))) const void* indices, + _When_(indexFormat == DXGI_FORMAT_R16_UINT, _In_reads_bytes_(nFaces * 3 * sizeof(uint16_t))) + _When_(indexFormat != DXGI_FORMAT_R16_UINT, _In_reads_bytes_(nFaces * 3 * sizeof(uint32_t))) const void* indices, _In_ DXGI_FORMAT indexFormat, _In_ size_t nFaces, - _In_reads_(width*height*4) const float* pTexture, + _In_reads_(width* height * 4) const float* pTexture, _In_ size_t width, _In_ size_t height, - _In_ DWORD options, - _In_opt_ std::function statusCallBack, + _In_ UVATLAS_IMT options, + _In_ std::function statusCallBack, _Out_writes_(nFaces * 3) float* pIMTArray); // This function is very similar to UVAtlasComputeIMTFromTexture, but it can @@ -325,17 +345,17 @@ namespace DirectX _In_reads_(nVerts) const XMFLOAT3* positions, _In_reads_(nVerts) const XMFLOAT2* texcoords, _In_ size_t nVerts, - _When_(indexFormat == DXGI_FORMAT_R16_UINT, _In_reads_bytes_(nFaces*sizeof(uint16_t))) - _When_(indexFormat != DXGI_FORMAT_R16_UINT, _In_reads_bytes_(nFaces*sizeof(uint32_t))) const void* indices, + _When_(indexFormat == DXGI_FORMAT_R16_UINT, _In_reads_bytes_(nFaces * 3 * sizeof(uint16_t))) + _When_(indexFormat != DXGI_FORMAT_R16_UINT, _In_reads_bytes_(nFaces * 3 * sizeof(uint32_t))) const void* indices, _In_ DXGI_FORMAT indexFormat, _In_ size_t nFaces, - _In_reads_(width*height*nComponents) const float *pTexelSignal, + _In_reads_(width* height* nComponents) const float* pTexelSignal, _In_ size_t width, _In_ size_t height, _In_ size_t signalDimension, _In_ size_t nComponents, - _In_ DWORD options, - _In_opt_ std::function statusCallBack, + _In_ UVATLAS_IMT options, + _In_ std::function statusCallBack, _Out_writes_(nFaces * 3) float* pIMTArray); // This function is for applying the a vertex remap array from UVAtlasCreate/UVAtlasPartition to a vertex buffer @@ -344,10 +364,22 @@ namespace DirectX // vbout - This is the output vertex buffer and is nNewVerts*stride in size // nNewVerts - This should be >= nVerts HRESULT __cdecl UVAtlasApplyRemap( - _In_reads_bytes_(nVerts*stride) const void* vbin, + _In_reads_bytes_(nVerts* stride) const void* vbin, _In_ size_t stride, _In_ size_t nVerts, _In_ size_t nNewVerts, _In_reads_(nNewVerts) const uint32_t* vertexRemap, - _Out_writes_bytes_(nNewVerts*stride) void* vbout ); -} \ No newline at end of file + _Out_writes_bytes_(nNewVerts* stride) void* vbout) noexcept; + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdeprecated-dynamic-exception-spec" +#endif + + DEFINE_ENUM_FLAG_OPERATORS(UVATLAS_IMT); + DEFINE_ENUM_FLAG_OPERATORS(UVATLAS); + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif +} diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/DirectXMesh.cs b/Source/Tools/Flax.Build/Deps/Dependencies/DirectXMesh.cs index 9c589cfe1..f7c19c437 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/DirectXMesh.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/DirectXMesh.cs @@ -1,4 +1,4 @@ -// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. +// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. using System.IO; using Flax.Build; @@ -32,14 +32,14 @@ namespace Flax.Deps.Dependencies public override void Build(BuildOptions options) { var root = options.IntermediateFolder; - var solutionPath = Path.Combine(root, "DirectXMesh_Desktop_2015.sln"); + var solutionPath = Path.Combine(root, "DirectXMesh_Desktop_2022_Win10.sln"); var configuration = "Release"; var outputFileNames = new[] { "DirectXMesh.lib", "DirectXMesh.pdb", }; - var binFolder = Path.Combine(root, "DirectXMesh", "Bin", "Desktop_2015"); + var binFolder = Path.Combine(root, "DirectXMesh", "Bin", "Desktop_2022_Win10"); // Get the source CloneGitRepoFast(root, "https://github.com/Microsoft/DirectXMesh.git"); @@ -50,14 +50,15 @@ namespace Flax.Deps.Dependencies { case TargetPlatform.Windows: { - // Build for Win64 - Deploy.VCEnvironment.BuildSolution(solutionPath, configuration, "x64"); - var depsFolder = GetThirdPartyFolder(options, TargetPlatform.Windows, TargetArchitecture.x64); - foreach (var file in outputFileNames) + foreach (var architecture in new[] { TargetArchitecture.x64, TargetArchitecture.ARM64 }) { - Utilities.FileCopy(Path.Combine(binFolder, "x64", "Release", file), Path.Combine(depsFolder, file)); + Deploy.VCEnvironment.BuildSolution(solutionPath, configuration, architecture.ToString()); + var depsFolder = GetThirdPartyFolder(options, TargetPlatform.Windows, architecture); + foreach (var file in outputFileNames) + { + Utilities.FileCopy(Path.Combine(binFolder, architecture.ToString(), "Release", file), Path.Combine(depsFolder, file)); + } } - break; } } diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/DirectXTex.cs b/Source/Tools/Flax.Build/Deps/Dependencies/DirectXTex.cs index 3cc80f888..384cb25e8 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/DirectXTex.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/DirectXTex.cs @@ -52,12 +52,15 @@ namespace Flax.Deps.Dependencies { case TargetPlatform.Windows: { - var solutionPath = Path.Combine(root, "DirectXTex_Desktop_2022.sln"); - var binFolder = Path.Combine(root, "DirectXTex", "Bin", "Desktop_2022"); - Deploy.VCEnvironment.BuildSolution(solutionPath, configuration, "x64"); - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); - foreach (var file in outputFileNames) - Utilities.FileCopy(Path.Combine(binFolder, "x64", configuration, file), Path.Combine(depsFolder, file)); + var solutionPath = Path.Combine(root, "DirectXTex_Desktop_2022_Win10.sln"); + var binFolder = Path.Combine(root, "DirectXTex", "Bin", "Desktop_2022_Win10"); + foreach (var architecture in new[] { TargetArchitecture.x64, TargetArchitecture.ARM64 }) + { + Deploy.VCEnvironment.BuildSolution(solutionPath, configuration, architecture.ToString()); + var depsFolder = GetThirdPartyFolder(options, platform, architecture); + foreach (var file in outputFileNames) + Utilities.FileCopy(Path.Combine(binFolder, architecture.ToString(), configuration, file), Path.Combine(depsFolder, file)); + } break; } case TargetPlatform.UWP: diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/UVAtlas.cs b/Source/Tools/Flax.Build/Deps/Dependencies/UVAtlas.cs index 4d288e765..1f8f046f0 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/UVAtlas.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/UVAtlas.cs @@ -1,5 +1,6 @@ -// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. +// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. +using System.Collections.Generic; using System.IO; using Flax.Build; @@ -32,14 +33,14 @@ namespace Flax.Deps.Dependencies public override void Build(BuildOptions options) { var root = options.IntermediateFolder; - var solutionPath = Path.Combine(root, "UVAtlas", "UVAtlas_2015.sln"); + var solutionPath = Path.Combine(root, "UVAtlas_2022_Win10.sln"); var configuration = "Release"; var outputFileNames = new[] { "UVAtlas.lib", "UVAtlas.pdb", }; - var binFolder = Path.Combine(root, "UVAtlas", "Bin", "Desktop_2015"); + var binFolder = Path.Combine(root, "UVAtlas", "Bin", "Desktop_2022_Win10"); // Get the source CloneGitRepoFast(root, "https://github.com/Microsoft/UVAtlas.git"); @@ -51,13 +52,15 @@ namespace Flax.Deps.Dependencies case TargetPlatform.Windows: { // Build for Win64 - Deploy.VCEnvironment.BuildSolution(solutionPath, configuration, "x64"); - var depsFolder = GetThirdPartyFolder(options, TargetPlatform.Windows, TargetArchitecture.x64); - foreach (var file in outputFileNames) + foreach (var architecture in new[] { TargetArchitecture.x64, TargetArchitecture.ARM64 }) { - Utilities.FileCopy(Path.Combine(binFolder, "x64", "Release", file), Path.Combine(depsFolder, file)); + Deploy.VCEnvironment.BuildSolution(solutionPath, configuration, architecture.ToString(), new Dictionary() { { "RestorePackagesConfig", "true" } }); + var depsFolder = GetThirdPartyFolder(options, TargetPlatform.Windows, architecture); + foreach (var file in outputFileNames) + { + Utilities.FileCopy(Path.Combine(binFolder, architecture.ToString(), "Release", file), Path.Combine(depsFolder, file)); + } } - break; } } From a716025094758de547404b0618f360a00f59cbaa Mon Sep 17 00:00:00 2001 From: Ari Vuollet Date: Fri, 10 May 2024 22:12:14 +0300 Subject: [PATCH 117/292] Patch pix3.h for Windows on ARM --- .../DirectX/DX12/GPUContextDX12.cpp | 17 ++++++++++++++--- Source/ThirdParty/WinPixEventRuntime/pix3.h | 2 +- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp index 970e55ce5..81fcd1d2c 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp @@ -2,6 +2,20 @@ #if GRAPHICS_API_DIRECTX12 +#include "Engine/Graphics/Config.h" +#if USE_PIX && GPU_ALLOW_PROFILE_EVENTS +// Include these header files before pix3 +#define WIN32_LEAN_AND_MEAN +#define NOMINMAX +#define NOGDI +#define NODRAWTEXT +//#define NOCTLMGR +#define NOFLATSBAPIS +#include +#include +#include +#endif + #include "GPUContextDX12.h" #include "Engine/Core/Log.h" #include "Engine/Core/Math/Viewport.h" @@ -22,9 +36,6 @@ #include "Engine/Profiler/RenderStats.h" #include "Engine/Graphics/Shaders/GPUShader.h" #include "Engine/Threading/Threading.h" -#if USE_PIX && GPU_ALLOW_PROFILE_EVENTS -#include -#endif #define DX12_ENABLE_RESOURCE_BARRIERS_BATCHING 1 #define DX12_ENABLE_RESOURCE_BARRIERS_DEBUGGING 0 diff --git a/Source/ThirdParty/WinPixEventRuntime/pix3.h b/Source/ThirdParty/WinPixEventRuntime/pix3.h index 9d3ba301b..74f1338b7 100644 --- a/Source/ThirdParty/WinPixEventRuntime/pix3.h +++ b/Source/ThirdParty/WinPixEventRuntime/pix3.h @@ -18,7 +18,7 @@ #endif #if !defined(USE_PIX_SUPPORTED_ARCHITECTURE) -#if defined(_M_X64) || defined(USE_PIX_ON_ALL_ARCHITECTURES) || defined(_M_ARM64) +#if defined(_M_X64) || defined(USE_PIX_ON_ALL_ARCHITECTURES) || defined(_M_ARM64) || defined(_M_ARM64EC) #define USE_PIX_SUPPORTED_ARCHITECTURE #endif #endif From 9777e71ee0534513a0629db82eb8b2ffd10d154a Mon Sep 17 00:00:00 2001 From: Ari Vuollet Date: Fri, 10 May 2024 22:16:43 +0300 Subject: [PATCH 118/292] Update ogg and vorbis to use CMake for compilation on Windows --- Source/ThirdParty/ogg/config_types.h | 26 -- Source/ThirdParty/ogg/ogg.h | 1 + Source/ThirdParty/ogg/os_types.h | 14 +- Source/ThirdParty/vorbis/COPYING | 2 +- Source/ThirdParty/vorbis/codec.h | 2 +- Source/ThirdParty/vorbis/vorbisenc.h | 2 +- Source/ThirdParty/vorbis/vorbisfile.h | 2 +- .../Tools/Flax.Build/Deps/Dependencies/ogg.cs | 268 ------------------ .../Flax.Build/Deps/Dependencies/vorbis.cs | 204 +++++++++---- 9 files changed, 163 insertions(+), 358 deletions(-) delete mode 100644 Source/ThirdParty/ogg/config_types.h delete mode 100644 Source/Tools/Flax.Build/Deps/Dependencies/ogg.cs diff --git a/Source/ThirdParty/ogg/config_types.h b/Source/ThirdParty/ogg/config_types.h deleted file mode 100644 index f586c26a0..000000000 --- a/Source/ThirdParty/ogg/config_types.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef __CONFIG_TYPES_H__ -#define __CONFIG_TYPES_H__ - -/* these are filled in by configure */ -#define INCLUDE_INTTYPES_H 1 -#define INCLUDE_STDINT_H 1 -#define INCLUDE_SYS_TYPES_H 1 - -#if INCLUDE_INTTYPES_H -# include -#endif -#if INCLUDE_STDINT_H -# include -#endif -#if INCLUDE_SYS_TYPES_H -# include -#endif - -typedef int16_t ogg_int16_t; -typedef uint16_t ogg_uint16_t; -typedef int32_t ogg_int32_t; -typedef uint32_t ogg_uint32_t; -typedef int64_t ogg_int64_t; -typedef uint64_t ogg_uint64_t; - -#endif diff --git a/Source/ThirdParty/ogg/ogg.h b/Source/ThirdParty/ogg/ogg.h index c4325aa76..7609fc24d 100644 --- a/Source/ThirdParty/ogg/ogg.h +++ b/Source/ThirdParty/ogg/ogg.h @@ -11,6 +11,7 @@ ******************************************************************** function: toplevel libogg include + last mod: $Id$ ********************************************************************/ #ifndef _OGG_H diff --git a/Source/ThirdParty/ogg/os_types.h b/Source/ThirdParty/ogg/os_types.h index 4165bcecf..b8f56308b 100644 --- a/Source/ThirdParty/ogg/os_types.h +++ b/Source/ThirdParty/ogg/os_types.h @@ -10,7 +10,8 @@ * * ******************************************************************** - function: Define a consistent set of types on each platform. + function: #ifdef jail to whip a few platforms into the UNIX ideal. + last mod: $Id$ ********************************************************************/ #ifndef _OS_TYPES_H @@ -43,7 +44,6 @@ typedef unsigned long long ogg_uint64_t; # elif defined(__MWERKS__) typedef long long ogg_int64_t; - typedef unsigned long long ogg_uint64_t; typedef int ogg_int32_t; typedef unsigned int ogg_uint32_t; typedef short ogg_int16_t; @@ -62,7 +62,6 @@ typedef __int64 ogg_int64_t; typedef __int32 ogg_int32_t; typedef unsigned __int32 ogg_uint32_t; - typedef unsigned __int64 ogg_uint64_t; typedef __int16 ogg_int16_t; typedef unsigned __int16 ogg_uint16_t; # endif @@ -76,7 +75,6 @@ typedef int32_t ogg_int32_t; typedef uint32_t ogg_uint32_t; typedef int64_t ogg_int64_t; - typedef uint64_t ogg_uint64_t; #elif defined(__HAIKU__) @@ -87,7 +85,6 @@ typedef int ogg_int32_t; typedef unsigned int ogg_uint32_t; typedef long long ogg_int64_t; - typedef unsigned long long ogg_uint64_t; #elif defined(__BEOS__) @@ -98,7 +95,6 @@ typedef int32_t ogg_int32_t; typedef uint32_t ogg_uint32_t; typedef int64_t ogg_int64_t; - typedef uint64_t ogg_uint64_t; #elif defined (__EMX__) @@ -108,8 +104,6 @@ typedef int ogg_int32_t; typedef unsigned int ogg_uint32_t; typedef long long ogg_int64_t; - typedef unsigned long long ogg_uint64_t; - #elif defined (DJGPP) @@ -118,13 +112,11 @@ typedef int ogg_int32_t; typedef unsigned int ogg_uint32_t; typedef long long ogg_int64_t; - typedef unsigned long long ogg_uint64_t; #elif defined(R5900) /* PS2 EE */ typedef long ogg_int64_t; - typedef unsigned long ogg_uint64_t; typedef int ogg_int32_t; typedef unsigned ogg_uint32_t; typedef short ogg_int16_t; @@ -137,7 +129,6 @@ typedef signed int ogg_int32_t; typedef unsigned int ogg_uint32_t; typedef long long int ogg_int64_t; - typedef unsigned long long int ogg_uint64_t; #elif defined(__TMS320C6X__) @@ -147,7 +138,6 @@ typedef signed int ogg_int32_t; typedef unsigned int ogg_uint32_t; typedef long long int ogg_int64_t; - typedef unsigned long long int ogg_uint64_t; #else diff --git a/Source/ThirdParty/vorbis/COPYING b/Source/ThirdParty/vorbis/COPYING index 153b926a1..fb456a87b 100644 --- a/Source/ThirdParty/vorbis/COPYING +++ b/Source/ThirdParty/vorbis/COPYING @@ -1,4 +1,4 @@ -Copyright (c) 2002-2018 Xiph.org Foundation +Copyright (c) 2002-2020 Xiph.org Foundation Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions diff --git a/Source/ThirdParty/vorbis/codec.h b/Source/ThirdParty/vorbis/codec.h index 42aa29138..f8a912bc2 100644 --- a/Source/ThirdParty/vorbis/codec.h +++ b/Source/ThirdParty/vorbis/codec.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2001 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * ******************************************************************** diff --git a/Source/ThirdParty/vorbis/vorbisenc.h b/Source/ThirdParty/vorbis/vorbisenc.h index 55f3b4a66..085b15e66 100644 --- a/Source/ThirdParty/vorbis/vorbisenc.h +++ b/Source/ThirdParty/vorbis/vorbisenc.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2001 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/Source/ThirdParty/vorbis/vorbisfile.h b/Source/ThirdParty/vorbis/vorbisfile.h index 56626119b..3d65393f5 100644 --- a/Source/ThirdParty/vorbis/vorbisfile.h +++ b/Source/ThirdParty/vorbis/vorbisfile.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/ogg.cs b/Source/Tools/Flax.Build/Deps/Dependencies/ogg.cs deleted file mode 100644 index 72009cabb..000000000 --- a/Source/Tools/Flax.Build/Deps/Dependencies/ogg.cs +++ /dev/null @@ -1,268 +0,0 @@ -// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. - -using System.Collections.Generic; -using System.IO; -using System.Linq; -using Flax.Build; -using Flax.Build.Platforms; - -namespace Flax.Deps.Dependencies -{ - /// - /// Ogg project codecs use the Ogg bitstream format to arrange the raw, compressed bitstream into a more robust, useful form. For example, the Ogg bitstream makes seeking, time stamping and error recovery possible, as well as mixing several sepearate, concurrent media streams into a single physical bitstream. - /// - /// - class ogg : Dependency - { - /// - public override TargetPlatform[] Platforms - { - get - { - switch (BuildPlatform) - { - case TargetPlatform.Windows: - return new[] - { - TargetPlatform.Windows, - TargetPlatform.UWP, - TargetPlatform.XboxOne, - TargetPlatform.PS4, - TargetPlatform.PS5, - TargetPlatform.XboxScarlett, - TargetPlatform.Android, - TargetPlatform.Switch, - }; - case TargetPlatform.Linux: - return new[] - { - TargetPlatform.Linux, - }; - case TargetPlatform.Mac: - return new[] - { - TargetPlatform.Mac, - TargetPlatform.iOS, - }; - default: return new TargetPlatform[0]; - } - } - } - - private void PatchWindowsTargetPlatformVersion(string vcxprojPath, string vcxprojContents, string windowsTargetPlatformVersion, string platformToolset) - { - // Fix the MSVC project settings for Windows - var contents = vcxprojContents.Replace("v140", string.Format("{0}", platformToolset)); - contents = contents.Replace("", string.Format("{0}", windowsTargetPlatformVersion)); - File.WriteAllText(vcxprojPath, contents); - } - - /// - public override void Build(BuildOptions options) - { - var root = options.IntermediateFolder; - var configuration = "Release"; - var filesToKeep = new[] - { - "ogg.Build.cs", - }; - - // Get the source - CloneGitRepo(root, "https://github.com/xiph/ogg"); - GitCheckout(root, "master", "4380566a44b8d5e85ad511c9c17eb04197863ec5"); - - var binariesToCopyMsvc = new[] - { - "libogg_static.lib", - }; - var vsSolutionPath = Path.Combine(root, "win32", "VS2015", "libogg_static.sln"); - var vcxprojPath = Path.Combine(root, "win32", "VS2015", "libogg_static.vcxproj"); - var vcxprojContents = File.ReadAllText(vcxprojPath); - var libraryFileName = "libogg.a"; - vcxprojContents = vcxprojContents.Replace("MultiThreaded", "MultiThreadedDLL"); - vcxprojContents = vcxprojContents.Replace("MultiThreadedDebug", "MultiThreadedDebugDLL"); - vcxprojContents = vcxprojContents.Replace("true", "false"); - var buildDir = Path.Combine(root, "build"); - - foreach (var platform in options.Platforms) - { - switch (platform) - { - case TargetPlatform.Windows: - { - // Fix the MSVC project settings for Windows - PatchWindowsTargetPlatformVersion(vcxprojPath, vcxprojContents, "8.1", "140"); - - // Build for Win64 - Deploy.VCEnvironment.BuildSolution(vsSolutionPath, configuration, "x64"); - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); - foreach (var file in binariesToCopyMsvc) - Utilities.FileCopy(Path.Combine(root, "win32", "VS2015", "x64", configuration, file), Path.Combine(depsFolder, file)); - - break; - } - case TargetPlatform.UWP: - { - // Fix the MSVC project settings for UWP - PatchWindowsTargetPlatformVersion(vcxprojPath, vcxprojContents, "10.0.17763.0", "v141"); - - // Build for UWP x64 - Deploy.VCEnvironment.BuildSolution(vsSolutionPath, configuration, "x64"); - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); - foreach (var file in binariesToCopyMsvc) - Utilities.FileCopy(Path.Combine(root, "win32", "VS2015", "x64", configuration, file), Path.Combine(depsFolder, file)); - - break; - } - case TargetPlatform.Linux: - { - var envVars = new Dictionary - { - { "CC", "clang-7" }, - { "CC_FOR_BUILD", "clang-7" } - }; - - Utilities.Run(Path.Combine(root, "autogen.sh"), null, null, root, Utilities.RunOptions.Default, envVars); - - // Build for Linux - var toolchain = UnixToolchain.GetToolchainName(platform, TargetArchitecture.x64); - Utilities.Run(Path.Combine(root, "configure"), string.Format("--host={0}", toolchain), null, root, Utilities.RunOptions.Default, envVars); - SetupDirectory(buildDir, true); - Utilities.Run("cmake", "-G \"Unix Makefiles\" -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_BUILD_TYPE=Release ..", null, buildDir, Utilities.RunOptions.ConsoleLogOutput, envVars); - Utilities.Run("cmake", "--build .", null, buildDir, Utilities.RunOptions.ConsoleLogOutput, envVars); - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); - Utilities.FileCopy(Path.Combine(buildDir, libraryFileName), Path.Combine(depsFolder, libraryFileName)); - - break; - } - case TargetPlatform.PS4: - { - // Get the build data files - Utilities.DirectoryCopy( - Path.Combine(GetBinariesFolder(options, platform), "Data", "ogg"), - Path.Combine(root, "PS4"), true, true); - - // Build for PS4 - var solutionPath = Path.Combine(root, "PS4", "libogg_static.sln"); - Deploy.VCEnvironment.BuildSolution(solutionPath, "Release", "ORBIS"); - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); - Utilities.FileCopy(Path.Combine(root, "PS4", "lib", libraryFileName), Path.Combine(depsFolder, libraryFileName)); - - break; - } - case TargetPlatform.PS5: - { - // Get the build data files - Utilities.DirectoryCopy( - Path.Combine(GetBinariesFolder(options, platform), "Data", "ogg"), - Path.Combine(root, "PS5"), true, true); - - // Build for PS5 - var solutionPath = Path.Combine(root, "PS5", "libogg_static.sln"); - Deploy.VCEnvironment.BuildSolution(solutionPath, "Release", "PROSPERO"); - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); - Utilities.FileCopy(Path.Combine(root, "PS5", "lib", libraryFileName), Path.Combine(depsFolder, libraryFileName)); - - break; - } - case TargetPlatform.XboxOne: - { - // Fix the MSVC project settings for Xbox Scarlett - PatchWindowsTargetPlatformVersion(vcxprojPath, vcxprojContents, "10.0.19041.0", "v142"); - - // Build for Xbox Scarlett x64 - Deploy.VCEnvironment.BuildSolution(vsSolutionPath, configuration, "x64"); - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); - foreach (var file in binariesToCopyMsvc) - Utilities.FileCopy(Path.Combine(root, "win32", "VS2015", "x64", configuration, file), Path.Combine(depsFolder, file)); - - break; - } - case TargetPlatform.XboxScarlett: - { - // Fix the MSVC project settings for Xbox Scarlett - PatchWindowsTargetPlatformVersion(vcxprojPath, vcxprojContents, "10.0.19041.0", "v142"); - - // Build for Xbox Scarlett x64 - Deploy.VCEnvironment.BuildSolution(vsSolutionPath, configuration, "x64"); - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); - foreach (var file in binariesToCopyMsvc) - Utilities.FileCopy(Path.Combine(root, "win32", "VS2015", "x64", configuration, file), Path.Combine(depsFolder, file)); - - break; - } - case TargetPlatform.Android: - { - // Build for Android - SetupDirectory(buildDir, true); - RunCmake(buildDir, platform, TargetArchitecture.ARM64, ".. -DCMAKE_BUILD_TYPE=Release"); - BuildCmake(buildDir); - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.ARM64); - Utilities.FileCopy(Path.Combine(buildDir, libraryFileName), Path.Combine(depsFolder, libraryFileName)); - break; - } - case TargetPlatform.Switch: - { - // Get the build data files - Utilities.DirectoryCopy(Path.Combine(GetBinariesFolder(options, platform), "Data", "ogg"), root, true, true); - - // Build for Switch - SetupDirectory(buildDir, true); - RunCmake(buildDir, platform, TargetArchitecture.ARM64, ".. -DCMAKE_BUILD_TYPE=Release"); - BuildCmake(buildDir); - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.ARM64); - Utilities.FileCopy(Path.Combine(buildDir, libraryFileName), Path.Combine(depsFolder, libraryFileName)); - break; - } - case TargetPlatform.Mac: - { - // Build for Mac - foreach (var architecture in new[] { TargetArchitecture.x64, TargetArchitecture.ARM64 }) - { - SetupDirectory(buildDir, true); - RunCmake(buildDir, platform, architecture, ".. -DCMAKE_BUILD_TYPE=Release"); - BuildCmake(buildDir); - var depsFolder = GetThirdPartyFolder(options, platform, architecture); - Utilities.FileCopy(Path.Combine(buildDir, libraryFileName), Path.Combine(depsFolder, libraryFileName)); - } - break; - } - case TargetPlatform.iOS: - { - SetupDirectory(buildDir, true); - RunCmake(buildDir, platform, TargetArchitecture.ARM64, ".. -DCMAKE_BUILD_TYPE=Release"); - BuildCmake(buildDir); - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.ARM64); - Utilities.FileCopy(Path.Combine(buildDir, libraryFileName), Path.Combine(depsFolder, libraryFileName)); - break; - } - } - } - - // Backup files - var srcIncludePath = Path.Combine(root, "include", "ogg"); - var dstIncludePath = Path.Combine(options.ThirdPartyFolder, "ogg"); - foreach (var filename in filesToKeep) - { - var src = Path.Combine(dstIncludePath, filename); - var dst = Path.Combine(options.IntermediateFolder, filename + ".tmp"); - Utilities.FileCopy(src, dst); - } - - // Setup headers directory - SetupDirectory(dstIncludePath, true); - - // Deploy header files and restore files - Directory.GetFiles(srcIncludePath, "Makefile*").ToList().ForEach(File.Delete); - Directory.GetFiles(srcIncludePath, "*.in").ToList().ForEach(File.Delete); - Utilities.DirectoryCopy(srcIncludePath, dstIncludePath, true, true); - File.Copy(Path.Combine(root, "COPYING"), Path.Combine(dstIncludePath, "COPYING")); - foreach (var filename in filesToKeep) - { - var src = Path.Combine(options.IntermediateFolder, filename + ".tmp"); - var dst = Path.Combine(dstIncludePath, filename); - Utilities.FileCopy(src, dst); - } - } - } -} diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/vorbis.cs b/Source/Tools/Flax.Build/Deps/Dependencies/vorbis.cs index b5298eb9f..dbf5d6bc6 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/vorbis.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/vorbis.cs @@ -69,11 +69,24 @@ namespace Flax.Deps.Dependencies private List vcxprojContentsWindows; private string[] vcxprojPathsWindows; - private Binary[] binariesToCopyWindows = + private Binary[] vorbisBinariesToCopyWindows = { new Binary("libvorbis_static.lib", "libvorbis"), new Binary("libvorbisfile_static.lib", "libvorbisfile"), }; + private (string, string)[] vorbisBinariesToCopyWindowsCmake = + { + ("vorbis.lib", "libvorbis_static.lib"), + ("vorbisfile.lib", "libvorbisfile_static.lib"), + }; + private Binary[] oggBinariesToCopyWindows = + { + new Binary("libogg_static.lib", "ogg"), + }; + private (string, string)[] oggBinariesToCopyWindowsCmake = + { + ("ogg.lib", "libogg_static.lib"), + }; private void PatchWindowsTargetPlatformVersion(string windowsTargetPlatformVersion, string platformToolset) { @@ -92,28 +105,24 @@ namespace Flax.Deps.Dependencies return; hasSourcesReady = true; - - var packagePath = Path.Combine(root, "package.zip"); configurationMsvc = "Release"; - // Get the additional source (ogg dependency) - Downloader.DownloadFileFromUrlToPath("http://downloads.xiph.org/releases/ogg/libogg-1.3.3.zip", packagePath); - using (ZipArchive archive = ZipFile.Open(packagePath, ZipArchiveMode.Read)) - { - archive.ExtractToDirectory(root); - Directory.Move(Path.Combine(root, archive.Entries.First().FullName), Path.Combine(root, "libogg")); - } + string oggRoot = Path.Combine(root, "libogg"); + string vorbisRoot = Path.Combine(root, "libvorbis"); - // Get the source - File.Delete(packagePath); - Downloader.DownloadFileFromUrlToPath("http://downloads.xiph.org/releases/vorbis/libvorbis-1.3.6.zip", packagePath); - using (ZipArchive archive = ZipFile.Open(packagePath, ZipArchiveMode.Read)) - { - archive.ExtractToDirectory(root); - rootMsvcLib = Path.Combine(root, archive.Entries.First().FullName); - } + SetupDirectory(oggRoot, false); + CloneGitRepo(oggRoot, "https://github.com/xiph/ogg.git"); + GitResetLocalChanges(oggRoot); // Reset patches + GitCheckout(oggRoot, "master", "db5c7a49ce7ebda47b15b78471e78fb7f2483e22"); - // Patch Windows projects + SetupDirectory(vorbisRoot, false); + CloneGitRepo(vorbisRoot, "https://github.com/xiph/vorbis.git"); + GitResetLocalChanges(vorbisRoot); // Reset patches + GitCheckout(vorbisRoot, "master", "84c023699cdf023a32fa4ded32019f194afcdad0"); + + rootMsvcLib = vorbisRoot; + + // Patch Windows projects which use MSBuild vcxprojPathsWindows = new[] { Path.Combine(rootMsvcLib, "win32", "VS2010", "libvorbis", "libvorbis_static.vcxproj"), @@ -127,6 +136,36 @@ namespace Flax.Deps.Dependencies contents = contents.Replace("ProgramDatabase", ""); vcxprojContentsWindows[i] = contents.Replace("true", "false"); } + + // TODO: FIXME for UWP/XBoxOne (use CMake for these too?) +#if false + var packagePath = Path.Combine(root, "package.zip"); + configurationMsvc = "Release"; + + // Get the additional source (ogg dependency) + if (!Directory.Exists(Path.Combine(root, "libogg"))) + { + File.Delete(packagePath); + Downloader.DownloadFileFromUrlToPath("http://downloads.xiph.org/releases/ogg/libogg-1.3.3.zip", packagePath); + using (ZipArchive archive = ZipFile.Open(packagePath, ZipArchiveMode.Read)) + { + archive.ExtractToDirectory(root); + Directory.Move(Path.Combine(root, archive.Entries.First().FullName), Path.Combine(root, "libogg")); + } + } + + // Get the source + if (!Directory.Exists(Path.Combine(root, "libvorbis"))) + { + File.Delete(packagePath); + Downloader.DownloadFileFromUrlToPath("http://downloads.xiph.org/releases/vorbis/libvorbis-1.3.6.zip", packagePath); + using (ZipArchive archive = ZipFile.Open(packagePath, ZipArchiveMode.Read)) + { + archive.ExtractToDirectory(root); + Directory.Move(Path.Combine(root, archive.Entries.First().FullName), Path.Combine(root, "libvorbis")); + } + } +#endif } private void BuildMsbuild(BuildOptions options, TargetPlatform platform, TargetArchitecture architecture) @@ -135,14 +174,14 @@ namespace Flax.Deps.Dependencies string buildPlatform, buildDir; string[] vcxprojPaths; - Binary[] binariesToCopy; + List binariesToCopy = new List(); switch (platform) { case TargetPlatform.Windows: + { buildDir = Path.Combine(rootMsvcLib, "win32", "VS2010"); - binariesToCopy = binariesToCopyWindows; vcxprojPaths = vcxprojPathsWindows; - PatchWindowsTargetPlatformVersion("8.1", "v140"); + PatchWindowsTargetPlatformVersion("10.0", "v143"); switch (architecture) { case TargetArchitecture.x86: @@ -151,13 +190,17 @@ namespace Flax.Deps.Dependencies case TargetArchitecture.x64: buildPlatform = "x64"; break; + case TargetArchitecture.ARM64: + buildPlatform = "ARM64"; + break; default: throw new InvalidArchitectureException(architecture); } - + binariesToCopy.AddRange(vorbisBinariesToCopyWindows.Select(x => new Binary(x.Filename, Path.Combine(buildDir, x.SrcFolder, buildPlatform, configurationMsvc)))); break; + } case TargetPlatform.UWP: + { buildDir = Path.Combine(rootMsvcLib, "win32", "VS2010"); - binariesToCopy = binariesToCopyWindows; vcxprojPaths = vcxprojPathsWindows; PatchWindowsTargetPlatformVersion("10.0.17763.0", "v141"); switch (architecture) @@ -173,11 +216,13 @@ namespace Flax.Deps.Dependencies break; default: throw new InvalidArchitectureException(architecture); } - + binariesToCopy.AddRange(vorbisBinariesToCopyWindows.Select(x => new Binary(x.Filename, Path.Combine(buildDir, x.SrcFolder, buildPlatform, configurationMsvc)))); break; + } case TargetPlatform.PS4: + { buildDir = Path.Combine(rootMsvcLib, "PS4"); - binariesToCopy = new[] + var binariesToCopyVorbis = new[] { new Binary("libvorbis.a", "libvorbis"), }; @@ -186,16 +231,17 @@ namespace Flax.Deps.Dependencies Path.Combine(buildDir, "libvorbis", "libvorbis_static.vcxproj"), }; buildPlatform = "ORBIS"; - Utilities.DirectoryCopy( - Path.Combine(GetBinariesFolder(options, platform), "Data", "vorbis"), + Utilities.DirectoryCopy(Path.Combine(GetBinariesFolder(options, platform), "Data", "vorbis"), buildDir, true, true); - Utilities.FileCopy( - Path.Combine(GetBinariesFolder(options, platform), "Data", "ogg", "ogg", "config_types.h"), - Path.Combine(root, "libogg", "include", "ogg", "config_types.h")); + Utilities.FileCopy(Path.Combine(GetBinariesFolder(options, platform), "Data", "ogg", "ogg", "config_types.h"), + Path.Combine(root, "..", "ogg", "include", "ogg", "config_types.h")); + binariesToCopy.AddRange(binariesToCopyVorbis.Select(x => new Binary(x.Filename, Path.Combine(buildDir, x.SrcFolder, buildPlatform, configurationMsvc)))); break; + } case TargetPlatform.PS5: + { buildDir = Path.Combine(rootMsvcLib, "PS5"); - binariesToCopy = new[] + var binariesToCopyVorbis = new[] { new Binary("libvorbis.a", "libvorbis"), }; @@ -209,21 +255,23 @@ namespace Flax.Deps.Dependencies buildDir, true, true); Utilities.FileCopy( Path.Combine(GetBinariesFolder(options, platform), "Data", "ogg", "ogg", "config_types.h"), - Path.Combine(root, "libogg", "include", "ogg", "config_types.h")); + Path.Combine(root, "..", "ogg", "include", "ogg", "config_types.h")); + binariesToCopy.AddRange(binariesToCopyVorbis.Select(x => new Binary(x.Filename, Path.Combine(buildDir, x.SrcFolder, buildPlatform, configurationMsvc)))); break; + } case TargetPlatform.XboxOne: buildDir = Path.Combine(rootMsvcLib, "win32", "VS2010"); - binariesToCopy = binariesToCopyWindows; vcxprojPaths = vcxprojPathsWindows; buildPlatform = "x64"; PatchWindowsTargetPlatformVersion("10.0.19041.0", "v142"); + binariesToCopy.AddRange(vorbisBinariesToCopyWindows.Select(x => new Binary(x.Filename, Path.Combine(buildDir, x.SrcFolder, buildPlatform, configurationMsvc)))); break; case TargetPlatform.XboxScarlett: buildDir = Path.Combine(rootMsvcLib, "win32", "VS2010"); - binariesToCopy = binariesToCopyWindows; vcxprojPaths = vcxprojPathsWindows; buildPlatform = "x64"; PatchWindowsTargetPlatformVersion("10.0.19041.0", "v142"); + binariesToCopy.AddRange(vorbisBinariesToCopyWindows.Select(x => new Binary(x.Filename, Path.Combine(buildDir, x.SrcFolder, buildPlatform, configurationMsvc)))); break; default: throw new InvalidPlatformException(platform); } @@ -235,7 +283,60 @@ namespace Flax.Deps.Dependencies // Copy binaries var depsFolder = GetThirdPartyFolder(options, platform, architecture); foreach (var filename in binariesToCopy) - Utilities.FileCopy(Path.Combine(buildDir, filename.SrcFolder, buildPlatform, configurationMsvc, filename.Filename), Path.Combine(depsFolder, filename.Filename)); + Utilities.FileCopy(Path.Combine(filename.SrcFolder, filename.Filename), Path.Combine(depsFolder, filename.Filename)); + } + + private void BuildCmake(BuildOptions options, TargetPlatform platform, TargetArchitecture architecture) + { + GetSources(); + + string oggRoot = Path.Combine(root, "libogg"); + string vorbisRoot = Path.Combine(root, "libvorbis"); + + var oggBuildDir = Path.Combine(oggRoot, "build-" + architecture.ToString()); + var vorbisBuildDir = Path.Combine(vorbisRoot, "build-" + architecture.ToString()); + + string ext; + switch (platform) + { + case TargetPlatform.Windows: + case TargetPlatform.UWP: + case TargetPlatform.XboxOne: + ext = ".lib"; + break; + case TargetPlatform.Linux: + ext = ".a"; + break; + default: + throw new InvalidPlatformException(platform); + } + + List<(string, string)> binariesToCopy = new List<(string, string)>(); + // Build ogg + { + var solutionPath = Path.Combine(oggBuildDir, "ogg.sln"); + + RunCmake(oggRoot, platform, architecture, $"-B\"{oggBuildDir}\" -DBUILD_SHARED_LIBS=OFF"); + Deploy.VCEnvironment.BuildSolution(solutionPath, configurationMsvc, architecture.ToString()); + foreach (var file in oggBinariesToCopyWindowsCmake) + binariesToCopy.Add((Path.Combine(oggBuildDir, configurationMsvc, file.Item1), file.Item2)); + } + + // Build vorbis + { + var oggLibraryPath = Path.Combine(oggBuildDir, configurationMsvc, "ogg" + ext); + var solutionPath = Path.Combine(vorbisBuildDir, "vorbis.sln"); + + RunCmake(vorbisRoot, platform, architecture, $"-B\"{vorbisBuildDir}\" -DOGG_INCLUDE_DIR=\"{Path.Combine(oggRoot, "include")}\" -DOGG_LIBRARY=\"{oggLibraryPath}\" -DBUILD_SHARED_LIBS=OFF"); + Deploy.VCEnvironment.BuildSolution(solutionPath, configurationMsvc, architecture.ToString()); + foreach (var file in vorbisBinariesToCopyWindowsCmake) + binariesToCopy.Add((Path.Combine(vorbisBuildDir, "lib", configurationMsvc, file.Item1), file.Item2)); + } + + // Copy binaries + var depsFolder = GetThirdPartyFolder(options, platform, architecture); + foreach (var file in binariesToCopy) + Utilities.FileCopy(file.Item1, Path.Combine(depsFolder, file.Item2)); } /// @@ -259,7 +360,8 @@ namespace Flax.Deps.Dependencies { case TargetPlatform.Windows: { - BuildMsbuild(options, TargetPlatform.Windows, TargetArchitecture.x64); + BuildCmake(options, TargetPlatform.Windows, TargetArchitecture.x64); + BuildCmake(options, TargetPlatform.Windows, TargetArchitecture.ARM64); break; } case TargetPlatform.UWP: @@ -428,18 +530,24 @@ namespace Flax.Deps.Dependencies Utilities.FileCopy(src, dst); } - // Setup headers directory - SetupDirectory(dstIncludePath, true); - - // Deploy header files and restore files - Directory.GetFiles(srcIncludePath, "Makefile*").ToList().ForEach(File.Delete); - Utilities.DirectoryCopy(srcIncludePath, dstIncludePath, true, true); - Utilities.FileCopy(Path.Combine(root, "COPYING"), Path.Combine(dstIncludePath, "COPYING")); - foreach (var filename in filesToKeep) + try { - var src = Path.Combine(options.IntermediateFolder, filename + ".tmp"); - var dst = Path.Combine(dstIncludePath, filename); - Utilities.FileCopy(src, dst); + // Setup headers directory + SetupDirectory(dstIncludePath, true); + + // Deploy header files and restore files + Directory.GetFiles(srcIncludePath, "Makefile*").ToList().ForEach(File.Delete); + Utilities.DirectoryCopy(srcIncludePath, dstIncludePath, true, true); + Utilities.FileCopy(Path.Combine(root, "COPYING"), Path.Combine(dstIncludePath, "COPYING")); + } + finally + { + foreach (var filename in filesToKeep) + { + var src = Path.Combine(options.IntermediateFolder, filename + ".tmp"); + var dst = Path.Combine(dstIncludePath, filename); + Utilities.FileCopy(src, dst); + } } } } From 0765d88ff2124f0da59ddb6fb54cf645418b8f83 Mon Sep 17 00:00:00 2001 From: Ari Vuollet Date: Sat, 11 May 2024 18:10:18 +0300 Subject: [PATCH 119/292] Update OpenAL to use CMake for compilation on Windows --- .../Flax.Build/Deps/Dependencies/OpenAL.cs | 37 ++++++++++++++++--- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/OpenAL.cs b/Source/Tools/Flax.Build/Deps/Dependencies/OpenAL.cs index f128c0abc..eeb87586e 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/OpenAL.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/OpenAL.cs @@ -58,13 +58,40 @@ namespace Flax.Deps.Dependencies { case TargetPlatform.Windows: { + var binariesToCopy = new[] + { + "OpenAL32.lib", + "OpenAL32.dll", + }; + + string configuration = "Release"; + + // Get the source + CloneGitRepo(root, "https://github.com/kcat/openal-soft.git"); + GitCheckout(root, "master", "d3875f333fb6abe2f39d82caca329414871ae53b"); // 1.23.1 + + // Build for Win64 and ARM64 + foreach (var architecture in new[] { TargetArchitecture.x64, TargetArchitecture.ARM64 }) + { + var buildDir = Path.Combine(root, "build-" + architecture.ToString()); + var solutionPath = Path.Combine(buildDir, "OpenAL.sln"); + + RunCmake(root, platform, architecture, $"-B\"{buildDir}\" -DBUILD_SHARED_LIBS=OFF"); + Deploy.VCEnvironment.BuildSolution(solutionPath, configuration, architecture.ToString()); + var depsFolder = GetThirdPartyFolder(options, platform, architecture); + foreach (var file in binariesToCopy) + Utilities.FileCopy(Path.Combine(buildDir, configuration, file), Path.Combine(depsFolder, Path.GetFileName(file))); + } + +#if false // Get the binaries var packagePath = Path.Combine(root, "package.zip"); - File.Delete(packagePath); - Downloader.DownloadFileFromUrlToPath("https://openal-soft.org/openal-binaries/openal-soft-" + version + "-bin.zip", packagePath); + if (!File.Exists(packagePath)) + Downloader.DownloadFileFromUrlToPath("https://openal-soft.org/openal-binaries/openal-soft-" + version + "-bin.zip", packagePath); using (ZipArchive archive = ZipFile.Open(packagePath, ZipArchiveMode.Read)) { - archive.ExtractToDirectory(root); + if (!Directory.Exists(root)) + archive.ExtractToDirectory(root); root = Path.Combine(root, archive.Entries.First().FullName); } @@ -74,7 +101,7 @@ namespace Flax.Deps.Dependencies Utilities.FileCopy(Path.Combine(root, "libs", "Win64", "OpenAL32.lib"), Path.Combine(depsFolder, "OpenAL32.lib")); // Deploy license - Utilities.FileCopy(Path.Combine(root, "COPYING"), Path.Combine(dstIncludePath, "COPYING")); + Utilities.FileCopy(Path.Combine(root, "COPYING"), Path.Combine(dstIncludePath, "COPYING"), true); // Deploy header files var files = Directory.GetFiles(Path.Combine(root, "include", "AL")); @@ -82,7 +109,7 @@ namespace Flax.Deps.Dependencies { Utilities.FileCopy(file, Path.Combine(dstIncludePath, Path.GetFileName(file))); } - +#endif break; } case TargetPlatform.Linux: From 526ccd52fd78513ca2c52f8ddd372d05c524766d Mon Sep 17 00:00:00 2001 From: Ari Vuollet Date: Sat, 11 May 2024 18:12:59 +0300 Subject: [PATCH 120/292] Build astc for Windows on ARM --- Source/Tools/Flax.Build/Deps/Dependencies/astc.cs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/astc.cs b/Source/Tools/Flax.Build/Deps/Dependencies/astc.cs index 01e369d62..dcf24c3f3 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/astc.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/astc.cs @@ -1,5 +1,6 @@ // Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. +using System.Collections.Generic; using System.IO; using Flax.Build; @@ -37,7 +38,6 @@ namespace Flax.Deps.Dependencies public override void Build(BuildOptions options) { var root = options.IntermediateFolder; - var buildDir = Path.Combine(root, "build"); // Get the source var commit = "aeece2f609db959d1c5e43e4f00bd177ea130575"; // 4.6.1 @@ -48,10 +48,12 @@ namespace Flax.Deps.Dependencies switch (platform) { case TargetPlatform.Windows: - foreach (var architecture in new []{ TargetArchitecture.x64 }) + + foreach (var architecture in new []{ TargetArchitecture.x64, TargetArchitecture.ARM64 }) { - var isa = "-DASTCENC_ISA_SSE2=ON"; - var lib = "astcenc-sse2-static.lib"; + string buildDir = Path.Combine(root, "build-" + architecture.ToString()); + var isa = architecture == TargetArchitecture.ARM64 ? "-DASTCENC_ISA_NEON=ON" : "-DASTCENC_ISA_SSE2=ON"; + var lib = architecture == TargetArchitecture.ARM64 ? "astcenc-neon-static.lib" : "astcenc-sse2-static.lib"; SetupDirectory(buildDir, true); RunCmake(buildDir, platform, architecture, ".. -DCMAKE_BUILD_TYPE=Release -DASTCENC_CLI=OFF -DCMAKE_MSVC_RUNTIME_LIBRARY=MultiThreadedDLL " + isa); BuildCmake(buildDir); @@ -62,6 +64,7 @@ namespace Flax.Deps.Dependencies case TargetPlatform.Mac: foreach (var architecture in new []{ TargetArchitecture.x64, TargetArchitecture.ARM64 }) { + string buildDir = Path.Combine(root, "build-" + architecture.ToString()); var isa = architecture == TargetArchitecture.ARM64 ? "-DASTCENC_ISA_NEON=ON" : "-DASTCENC_ISA_SSE2=ON"; var lib = architecture == TargetArchitecture.ARM64 ? "libastcenc-neon-static.a" : "libastcenc-sse2-static.a"; SetupDirectory(buildDir, true); From 34d294263f6fa01a0005e020db1d6271a5dcbc5f Mon Sep 17 00:00:00 2001 From: Ari Vuollet Date: Sat, 11 May 2024 18:15:36 +0300 Subject: [PATCH 121/292] Add dependencies to copy dbghelp and dxcompiler files from SDK --- .../Dependencies/DirectXShaderCompiler.cs | 69 +++++++++++++++++++ .../Flax.Build/Deps/Dependencies/dbghelp.cs | 60 ++++++++++++++++ 2 files changed, 129 insertions(+) create mode 100644 Source/Tools/Flax.Build/Deps/Dependencies/DirectXShaderCompiler.cs create mode 100644 Source/Tools/Flax.Build/Deps/Dependencies/dbghelp.cs diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/DirectXShaderCompiler.cs b/Source/Tools/Flax.Build/Deps/Dependencies/DirectXShaderCompiler.cs new file mode 100644 index 000000000..df7d49a81 --- /dev/null +++ b/Source/Tools/Flax.Build/Deps/Dependencies/DirectXShaderCompiler.cs @@ -0,0 +1,69 @@ +// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. + +using System; +using System.IO; +using System.Linq; +using Flax.Build; +using Flax.Build.Platforms; + +namespace Flax.Deps.Dependencies +{ + /// + /// DirectX Shader Compiler and tools. https://github.com/microsoft/DirectXShaderCompiler + /// + /// + class DirectXShaderCompiler : Dependency + { + /// + public override TargetPlatform[] Platforms + { + get + { + switch (BuildPlatform) + { + case TargetPlatform.Windows: + return new[] + { + TargetPlatform.Windows, + }; + default: return new TargetPlatform[0]; + } + } + } + + /// + public override void Build(BuildOptions options) + { + foreach (var platform in options.Platforms) + { + switch (platform) + { + case TargetPlatform.Windows: + { + var sdk = WindowsPlatformBase.GetSDKs().Last(); + var sdkLibLocation = Path.Combine(sdk.Value, "Lib", WindowsPlatformBase.GetSDKVersion(sdk.Key).ToString(), "um"); + string binLocation = Path.Combine(sdk.Value, "bin", WindowsPlatformBase.GetSDKVersion(sdk.Key).ToString()); + + foreach (var architecture in new[] { TargetArchitecture.x64, TargetArchitecture.ARM64 }) + { + var depsFolder = GetThirdPartyFolder(options, platform, architecture); + + string dxilLocation = @$"{binLocation}\{architecture}\dxil.dll"; + string dxcompilerLocation = @$"{binLocation}\{architecture}\dxcompiler.dll"; + string d3dcompilerLocation = @$"{binLocation}\{architecture}\d3dcompiler_47.dll"; + Utilities.FileCopy(dxilLocation, Path.Combine(depsFolder, Path.GetFileName(dxilLocation))); + Utilities.FileCopy(dxcompilerLocation, Path.Combine(depsFolder, Path.GetFileName(dxcompilerLocation))); + Utilities.FileCopy(d3dcompilerLocation, Path.Combine(depsFolder, Path.GetFileName(d3dcompilerLocation))); + + string dxcompilerLibLocation = @$"{sdkLibLocation}\{architecture}\dxcompiler.lib"; + string d3dcompilerLibLocation = @$"{sdkLibLocation}\{architecture}\d3dcompiler.lib"; + Utilities.FileCopy(dxcompilerLibLocation, Path.Combine(depsFolder, Path.GetFileName(dxcompilerLibLocation))); + Utilities.FileCopy(d3dcompilerLibLocation, Path.Combine(depsFolder, "d3dcompiler_47.lib")); + } + break; + } + } + } + } + } +} diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/dbghelp.cs b/Source/Tools/Flax.Build/Deps/Dependencies/dbghelp.cs new file mode 100644 index 000000000..e9096be08 --- /dev/null +++ b/Source/Tools/Flax.Build/Deps/Dependencies/dbghelp.cs @@ -0,0 +1,60 @@ +// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. + +using System; +using System.IO; +using System.Linq; +using Flax.Build; +using Flax.Build.Platforms; + +namespace Flax.Deps.Dependencies +{ + /// + /// Windows Debug Help Library. + /// + /// + class dbghelp : Dependency + { + /// + public override TargetPlatform[] Platforms + { + get + { + switch (BuildPlatform) + { + case TargetPlatform.Windows: + return new[] + { + TargetPlatform.Windows, + }; + default: return new TargetPlatform[0]; + } + } + } + + /// + public override void Build(BuildOptions options) + { + foreach (var platform in options.Platforms) + { + switch (platform) + { + case TargetPlatform.Windows: + { + var sdk = WindowsPlatformBase.GetSDKs().Last(); + + foreach (var architecture in new[] { TargetArchitecture.x64, TargetArchitecture.ARM64 }) + { + var depsFolder = GetThirdPartyFolder(options, platform, architecture); + + string libLocation = @$"{sdk.Value}Debuggers\lib\{architecture}\dbghelp.lib"; + string dllLocation = @$"{sdk.Value}Debuggers\{architecture}\dbghelp.dll"; + Utilities.FileCopy(libLocation, Path.Combine(depsFolder, Path.GetFileName(libLocation))); + Utilities.FileCopy(dllLocation, Path.Combine(depsFolder, Path.GetFileName(dllLocation))); + } + break; + } + } + } + } + } +} From 2226ff32dc07c906c063bf4392a875c78a7d153d Mon Sep 17 00:00:00 2001 From: Ari Vuollet Date: Sat, 11 May 2024 18:47:05 +0300 Subject: [PATCH 122/292] Compile assimp for Windows on ARM --- Source/ThirdParty/assimp/.editorconfig | 8 + Source/ThirdParty/assimp/config.h.in | 992 ++++++++++++++++++ .../Flax.Build/Deps/Dependencies/Assimp.cs | 31 +- 3 files changed, 1020 insertions(+), 11 deletions(-) create mode 100644 Source/ThirdParty/assimp/.editorconfig create mode 100644 Source/ThirdParty/assimp/config.h.in diff --git a/Source/ThirdParty/assimp/.editorconfig b/Source/ThirdParty/assimp/.editorconfig new file mode 100644 index 000000000..9ea66423a --- /dev/null +++ b/Source/ThirdParty/assimp/.editorconfig @@ -0,0 +1,8 @@ +# See for details + +[*.{h,hpp,inl}] +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true +indent_size = 4 +indent_style = space diff --git a/Source/ThirdParty/assimp/config.h.in b/Source/ThirdParty/assimp/config.h.in new file mode 100644 index 000000000..a37ff0b8c --- /dev/null +++ b/Source/ThirdParty/assimp/config.h.in @@ -0,0 +1,992 @@ +/* +--------------------------------------------------------------------------- +Open Asset Import Library (assimp) +--------------------------------------------------------------------------- + +Copyright (c) 2006-2018, assimp team + + +All rights reserved. + +Redistribution and use of this software in source and binary forms, +with or without modification, are permitted provided that the following +conditions are met: + +* Redistributions of source code must retain the above + copyright notice, this list of conditions and the + following disclaimer. + +* Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the + following disclaimer in the documentation and/or other + materials provided with the distribution. + +* Neither the name of the assimp team, nor the names of its + contributors may be used to endorse or promote products + derived from this software without specific prior + written permission of the assimp team. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +--------------------------------------------------------------------------- +*/ + +/** @file config.h + * @brief Defines constants for configurable properties for the library + * + * Typically these properties are set via + * #Assimp::Importer::SetPropertyFloat, + * #Assimp::Importer::SetPropertyInteger or + * #Assimp::Importer::SetPropertyString, + * depending on the data type of a property. All properties have a + * default value. See the doc for the mentioned methods for more details. + * + *

+ * The corresponding functions for use with the plain-c API are: + * #aiSetImportPropertyInteger, + * #aiSetImportPropertyFloat, + * #aiSetImportPropertyString + */ +#pragma once +#ifndef AI_CONFIG_H_INC +#define AI_CONFIG_H_INC + + +// ########################################################################### +// LIBRARY SETTINGS +// General, global settings +// ########################################################################### + +// --------------------------------------------------------------------------- +/** @brief Enables time measurements. + * + * If enabled, measures the time needed for each part of the loading + * process (i.e. IO time, importing, postprocessing, ..) and dumps + * these timings to the DefaultLogger. See the @link perf Performance + * Page@endlink for more information on this topic. + * + * Property type: bool. Default value: false. + */ +#define AI_CONFIG_GLOB_MEASURE_TIME \ + "GLOB_MEASURE_TIME" + + +// --------------------------------------------------------------------------- +/** @brief Global setting to disable generation of skeleton dummy meshes + * + * Skeleton dummy meshes are generated as a visualization aid in cases which + * the input data contains no geometry, but only animation data. + * Property data type: bool. Default value: false + */ +// --------------------------------------------------------------------------- +#define AI_CONFIG_IMPORT_NO_SKELETON_MESHES \ + "IMPORT_NO_SKELETON_MESHES" + + + +# if 0 // not implemented yet +// --------------------------------------------------------------------------- +/** @brief Set Assimp's multithreading policy. + * + * This setting is ignored if Assimp was built without boost.thread + * support (ASSIMP_BUILD_NO_THREADING, which is implied by ASSIMP_BUILD_BOOST_WORKAROUND). + * Possible values are: -1 to let Assimp decide what to do, 0 to disable + * multithreading entirely and any number larger than 0 to force a specific + * number of threads. Assimp is always free to ignore this settings, which is + * merely a hint. Usually, the default value (-1) will be fine. However, if + * Assimp is used concurrently from multiple user threads, it might be useful + * to limit each Importer instance to a specific number of cores. + * + * For more information, see the @link threading Threading page@endlink. + * Property type: int, default value: -1. + */ +#define AI_CONFIG_GLOB_MULTITHREADING \ + "GLOB_MULTITHREADING" +#endif + +// ########################################################################### +// POST PROCESSING SETTINGS +// Various stuff to fine-tune the behavior of a specific post processing step. +// ########################################################################### + + +// --------------------------------------------------------------------------- +/** @brief Maximum bone count per mesh for the SplitbyBoneCount step. + * + * Meshes are split until the maximum number of bones is reached. The default + * value is AI_SBBC_DEFAULT_MAX_BONES, which may be altered at + * compile-time. + * Property data type: integer. + */ +// --------------------------------------------------------------------------- +#define AI_CONFIG_PP_SBBC_MAX_BONES \ + "PP_SBBC_MAX_BONES" + + +// default limit for bone count +#if (!defined AI_SBBC_DEFAULT_MAX_BONES) +# define AI_SBBC_DEFAULT_MAX_BONES 60 +#endif + + +// --------------------------------------------------------------------------- +/** @brief Specifies the maximum angle that may be between two vertex tangents + * that their tangents and bi-tangents are smoothed. + * + * This applies to the CalcTangentSpace-Step. The angle is specified + * in degrees. The maximum value is 175. + * Property type: float. Default value: 45 degrees + */ +#define AI_CONFIG_PP_CT_MAX_SMOOTHING_ANGLE \ + "PP_CT_MAX_SMOOTHING_ANGLE" + +// --------------------------------------------------------------------------- +/** @brief Source UV channel for tangent space computation. + * + * The specified channel must exist or an error will be raised. + * Property type: integer. Default value: 0 + */ +// --------------------------------------------------------------------------- +#define AI_CONFIG_PP_CT_TEXTURE_CHANNEL_INDEX \ + "PP_CT_TEXTURE_CHANNEL_INDEX" + +// --------------------------------------------------------------------------- +/** @brief Specifies the maximum angle that may be between two face normals + * at the same vertex position that their are smoothed together. + * + * Sometimes referred to as 'crease angle'. + * This applies to the GenSmoothNormals-Step. The angle is specified + * in degrees, so 180 is PI. The default value is 175 degrees (all vertex + * normals are smoothed). The maximum value is 175, too. Property type: float. + * Warning: setting this option may cause a severe loss of performance. The + * performance is unaffected if the #AI_CONFIG_FAVOUR_SPEED flag is set but + * the output quality may be reduced. + */ +#define AI_CONFIG_PP_GSN_MAX_SMOOTHING_ANGLE \ + "PP_GSN_MAX_SMOOTHING_ANGLE" + + +// --------------------------------------------------------------------------- +/** @brief Sets the colormap (= palette) to be used to decode embedded + * textures in MDL (Quake or 3DGS) files. + * + * This must be a valid path to a file. The file is 768 (256*3) bytes + * large and contains RGB triplets for each of the 256 palette entries. + * The default value is colormap.lmp. If the file is not found, + * a default palette (from Quake 1) is used. + * Property type: string. + */ +#define AI_CONFIG_IMPORT_MDL_COLORMAP \ + "IMPORT_MDL_COLORMAP" + +// --------------------------------------------------------------------------- +/** @brief Configures the #aiProcess_RemoveRedundantMaterials step to + * keep materials matching a name in a given list. + * + * This is a list of 1 to n strings, ' ' serves as delimiter character. + * Identifiers containing whitespaces must be enclosed in *single* + * quotation marks. For example: + * "keep-me and_me_to anotherMaterialToBeKept \'name with whitespace\'". + * If a material matches on of these names, it will not be modified or + * removed by the postprocessing step nor will other materials be replaced + * by a reference to it.
+ * This option might be useful if you are using some magic material names + * to pass additional semantics through the content pipeline. This ensures + * they won't be optimized away, but a general optimization is still + * performed for materials not contained in the list. + * Property type: String. Default value: n/a + * @note Linefeeds, tabs or carriage returns are treated as whitespace. + * Material names are case sensitive. + */ +#define AI_CONFIG_PP_RRM_EXCLUDE_LIST \ + "PP_RRM_EXCLUDE_LIST" + +// --------------------------------------------------------------------------- +/** @brief Configures the #aiProcess_PreTransformVertices step to + * keep the scene hierarchy. Meshes are moved to worldspace, but + * no optimization is performed (read: meshes with equal materials are not + * joined. The total number of meshes won't change). + * + * This option could be of use for you if the scene hierarchy contains + * important additional information which you intend to parse. + * For rendering, you can still render all meshes in the scene without + * any transformations. + * Property type: bool. Default value: false. + */ +#define AI_CONFIG_PP_PTV_KEEP_HIERARCHY \ + "PP_PTV_KEEP_HIERARCHY" + +// --------------------------------------------------------------------------- +/** @brief Configures the #aiProcess_PreTransformVertices step to normalize + * all vertex components into the [-1,1] range. That is, a bounding box + * for the whole scene is computed, the maximum component is taken and all + * meshes are scaled appropriately (uniformly of course!). + * This might be useful if you don't know the spatial dimension of the input + * data*/ +#define AI_CONFIG_PP_PTV_NORMALIZE \ + "PP_PTV_NORMALIZE" + +// --------------------------------------------------------------------------- +/** @brief Configures the #aiProcess_PreTransformVertices step to use + * a users defined matrix as the scene root node transformation before + * transforming vertices. + * Property type: bool. Default value: false. + */ +#define AI_CONFIG_PP_PTV_ADD_ROOT_TRANSFORMATION \ + "PP_PTV_ADD_ROOT_TRANSFORMATION" + +// --------------------------------------------------------------------------- +/** @brief Configures the #aiProcess_PreTransformVertices step to use + * a users defined matrix as the scene root node transformation before + * transforming vertices. This property correspond to the 'a1' component + * of the transformation matrix. + * Property type: aiMatrix4x4. + */ +#define AI_CONFIG_PP_PTV_ROOT_TRANSFORMATION \ + "PP_PTV_ROOT_TRANSFORMATION" + +// --------------------------------------------------------------------------- +/** @brief Configures the #aiProcess_FindDegenerates step to + * remove degenerated primitives from the import - immediately. + * + * The default behaviour converts degenerated triangles to lines and + * degenerated lines to points. See the documentation to the + * #aiProcess_FindDegenerates step for a detailed example of the various ways + * to get rid of these lines and points if you don't want them. + * Property type: bool. Default value: false. + */ +#define AI_CONFIG_PP_FD_REMOVE \ + "PP_FD_REMOVE" + +// --------------------------------------------------------------------------- +/** + * @brief Configures the #aiProcess_FindDegenerates to check the area of a + * trinagle to be greates than e-6. If this is not the case the triangle will + * be removed if #AI_CONFIG_PP_FD_REMOVE is set to true. + */ +#define AI_CONFIG_PP_FD_CHECKAREA \ + "PP_FD_CHECKAREA" + +// --------------------------------------------------------------------------- +/** @brief Configures the #aiProcess_OptimizeGraph step to preserve nodes + * matching a name in a given list. + * + * This is a list of 1 to n strings, ' ' serves as delimiter character. + * Identifiers containing whitespaces must be enclosed in *single* + * quotation marks. For example: + * "keep-me and_me_to anotherNodeToBeKept \'name with whitespace\'". + * If a node matches on of these names, it will not be modified or + * removed by the postprocessing step.
+ * This option might be useful if you are using some magic node names + * to pass additional semantics through the content pipeline. This ensures + * they won't be optimized away, but a general optimization is still + * performed for nodes not contained in the list. + * Property type: String. Default value: n/a + * @note Linefeeds, tabs or carriage returns are treated as whitespace. + * Node names are case sensitive. + */ +#define AI_CONFIG_PP_OG_EXCLUDE_LIST \ + "PP_OG_EXCLUDE_LIST" + +// --------------------------------------------------------------------------- +/** @brief Set the maximum number of triangles in a mesh. + * + * This is used by the "SplitLargeMeshes" PostProcess-Step to determine + * whether a mesh must be split or not. + * @note The default value is AI_SLM_DEFAULT_MAX_TRIANGLES + * Property type: integer. + */ +#define AI_CONFIG_PP_SLM_TRIANGLE_LIMIT \ + "PP_SLM_TRIANGLE_LIMIT" + +// default value for AI_CONFIG_PP_SLM_TRIANGLE_LIMIT +#if (!defined AI_SLM_DEFAULT_MAX_TRIANGLES) +# define AI_SLM_DEFAULT_MAX_TRIANGLES 1000000 +#endif + +// --------------------------------------------------------------------------- +/** @brief Set the maximum number of vertices in a mesh. + * + * This is used by the "SplitLargeMeshes" PostProcess-Step to determine + * whether a mesh must be split or not. + * @note The default value is AI_SLM_DEFAULT_MAX_VERTICES + * Property type: integer. + */ +#define AI_CONFIG_PP_SLM_VERTEX_LIMIT \ + "PP_SLM_VERTEX_LIMIT" + +// default value for AI_CONFIG_PP_SLM_VERTEX_LIMIT +#if (!defined AI_SLM_DEFAULT_MAX_VERTICES) +# define AI_SLM_DEFAULT_MAX_VERTICES 1000000 +#endif + +// --------------------------------------------------------------------------- +/** @brief Set the maximum number of bones affecting a single vertex + * + * This is used by the #aiProcess_LimitBoneWeights PostProcess-Step. + * @note The default value is AI_LMW_MAX_WEIGHTS + * Property type: integer.*/ +#define AI_CONFIG_PP_LBW_MAX_WEIGHTS \ + "PP_LBW_MAX_WEIGHTS" + +// default value for AI_CONFIG_PP_LBW_MAX_WEIGHTS +#if (!defined AI_LMW_MAX_WEIGHTS) +# define AI_LMW_MAX_WEIGHTS 0x4 +#endif // !! AI_LMW_MAX_WEIGHTS + +// --------------------------------------------------------------------------- +/** @brief Lower the deboning threshold in order to remove more bones. + * + * This is used by the #aiProcess_Debone PostProcess-Step. + * @note The default value is AI_DEBONE_THRESHOLD + * Property type: float.*/ +#define AI_CONFIG_PP_DB_THRESHOLD \ + "PP_DB_THRESHOLD" + +// default value for AI_CONFIG_PP_LBW_MAX_WEIGHTS +#if (!defined AI_DEBONE_THRESHOLD) +# define AI_DEBONE_THRESHOLD 1.0f +#endif // !! AI_DEBONE_THRESHOLD + +// --------------------------------------------------------------------------- +/** @brief Require all bones qualify for deboning before removing any + * + * This is used by the #aiProcess_Debone PostProcess-Step. + * @note The default value is 0 + * Property type: bool.*/ +#define AI_CONFIG_PP_DB_ALL_OR_NONE \ + "PP_DB_ALL_OR_NONE" + +/** @brief Default value for the #AI_CONFIG_PP_ICL_PTCACHE_SIZE property + */ +#ifndef PP_ICL_PTCACHE_SIZE +# define PP_ICL_PTCACHE_SIZE 12 +#endif + +// --------------------------------------------------------------------------- +/** @brief Set the size of the post-transform vertex cache to optimize the + * vertices for. This configures the #aiProcess_ImproveCacheLocality step. + * + * The size is given in vertices. Of course you can't know how the vertex + * format will exactly look like after the import returns, but you can still + * guess what your meshes will probably have. + * @note The default value is #PP_ICL_PTCACHE_SIZE. That results in slight + * performance improvements for most nVidia/AMD cards since 2002. + * Property type: integer. + */ +#define AI_CONFIG_PP_ICL_PTCACHE_SIZE "PP_ICL_PTCACHE_SIZE" + +// --------------------------------------------------------------------------- +/** @brief Enumerates components of the aiScene and aiMesh data structures + * that can be excluded from the import using the #aiProcess_RemoveComponent step. + * + * See the documentation to #aiProcess_RemoveComponent for more details. + */ +enum aiComponent +{ + /** Normal vectors */ +#ifdef SWIG + aiComponent_NORMALS = 0x2, +#else + aiComponent_NORMALS = 0x2u, +#endif + + /** Tangents and bitangents go always together ... */ +#ifdef SWIG + aiComponent_TANGENTS_AND_BITANGENTS = 0x4, +#else + aiComponent_TANGENTS_AND_BITANGENTS = 0x4u, +#endif + + /** ALL color sets + * Use aiComponent_COLORn(N) to specify the N'th set */ + aiComponent_COLORS = 0x8, + + /** ALL texture UV sets + * aiComponent_TEXCOORDn(N) to specify the N'th set */ + aiComponent_TEXCOORDS = 0x10, + + /** Removes all bone weights from all meshes. + * The scenegraph nodes corresponding to the bones are NOT removed. + * use the #aiProcess_OptimizeGraph step to do this */ + aiComponent_BONEWEIGHTS = 0x20, + + /** Removes all node animations (aiScene::mAnimations). + * The corresponding scenegraph nodes are NOT removed. + * use the #aiProcess_OptimizeGraph step to do this */ + aiComponent_ANIMATIONS = 0x40, + + /** Removes all embedded textures (aiScene::mTextures) */ + aiComponent_TEXTURES = 0x80, + + /** Removes all light sources (aiScene::mLights). + * The corresponding scenegraph nodes are NOT removed. + * use the #aiProcess_OptimizeGraph step to do this */ + aiComponent_LIGHTS = 0x100, + + /** Removes all cameras (aiScene::mCameras). + * The corresponding scenegraph nodes are NOT removed. + * use the #aiProcess_OptimizeGraph step to do this */ + aiComponent_CAMERAS = 0x200, + + /** Removes all meshes (aiScene::mMeshes). */ + aiComponent_MESHES = 0x400, + + /** Removes all materials. One default material will + * be generated, so aiScene::mNumMaterials will be 1. */ + aiComponent_MATERIALS = 0x800, + + + /** This value is not used. It is just there to force the + * compiler to map this enum to a 32 Bit integer. */ +#ifndef SWIG + _aiComponent_Force32Bit = 0x9fffffff +#endif +}; + +// Remove a specific color channel 'n' +#define aiComponent_COLORSn(n) (1u << (n+20u)) + +// Remove a specific UV channel 'n' +#define aiComponent_TEXCOORDSn(n) (1u << (n+25u)) + +// --------------------------------------------------------------------------- +/** @brief Input parameter to the #aiProcess_RemoveComponent step: + * Specifies the parts of the data structure to be removed. + * + * See the documentation to this step for further details. The property + * is expected to be an integer, a bitwise combination of the + * #aiComponent flags defined above in this header. The default + * value is 0. Important: if no valid mesh is remaining after the + * step has been executed (e.g you thought it was funny to specify ALL + * of the flags defined above) the import FAILS. Mainly because there is + * no data to work on anymore ... + */ +#define AI_CONFIG_PP_RVC_FLAGS \ + "PP_RVC_FLAGS" + +// --------------------------------------------------------------------------- +/** @brief Input parameter to the #aiProcess_SortByPType step: + * Specifies which primitive types are removed by the step. + * + * This is a bitwise combination of the aiPrimitiveType flags. + * Specifying all of them is illegal, of course. A typical use would + * be to exclude all line and point meshes from the import. This + * is an integer property, its default value is 0. + */ +#define AI_CONFIG_PP_SBP_REMOVE \ + "PP_SBP_REMOVE" + +// --------------------------------------------------------------------------- +/** @brief Input parameter to the #aiProcess_FindInvalidData step: + * Specifies the floating-point accuracy for animation values. The step + * checks for animation tracks where all frame values are absolutely equal + * and removes them. This tweakable controls the epsilon for floating-point + * comparisons - two keys are considered equal if the invariant + * abs(n0-n1)>epsilon holds true for all vector respectively quaternion + * components. The default value is 0.f - comparisons are exact then. + */ +#define AI_CONFIG_PP_FID_ANIM_ACCURACY \ + "PP_FID_ANIM_ACCURACY" + +// --------------------------------------------------------------------------- +/** @brief Input parameter to the #aiProcess_FindInvalidData step: + * Set to true to ignore texture coordinates. This may be useful if you have + * to assign different kind of textures like one for the summer or one for the winter. + */ +#define AI_CONFIG_PP_FID_IGNORE_TEXTURECOORDS \ + "PP_FID_IGNORE_TEXTURECOORDS" + +// TransformUVCoords evaluates UV scalings +#define AI_UVTRAFO_SCALING 0x1 + +// TransformUVCoords evaluates UV rotations +#define AI_UVTRAFO_ROTATION 0x2 + +// TransformUVCoords evaluates UV translation +#define AI_UVTRAFO_TRANSLATION 0x4 + +// Everything baked together -> default value +#define AI_UVTRAFO_ALL (AI_UVTRAFO_SCALING | AI_UVTRAFO_ROTATION | AI_UVTRAFO_TRANSLATION) + +// --------------------------------------------------------------------------- +/** @brief Input parameter to the #aiProcess_TransformUVCoords step: + * Specifies which UV transformations are evaluated. + * + * This is a bitwise combination of the AI_UVTRAFO_XXX flags (integer + * property, of course). By default all transformations are enabled + * (AI_UVTRAFO_ALL). + */ +#define AI_CONFIG_PP_TUV_EVALUATE \ + "PP_TUV_EVALUATE" + +// --------------------------------------------------------------------------- +/** @brief A hint to assimp to favour speed against import quality. + * + * Enabling this option may result in faster loading, but it needn't. + * It represents just a hint to loaders and post-processing steps to use + * faster code paths, if possible. + * This property is expected to be an integer, != 0 stands for true. + * The default value is 0. + */ +#define AI_CONFIG_FAVOUR_SPEED \ + "FAVOUR_SPEED" + + +// ########################################################################### +// IMPORTER SETTINGS +// Various stuff to fine-tune the behaviour of specific importer plugins. +// ########################################################################### + + +// --------------------------------------------------------------------------- +/** @brief Set whether the fbx importer will merge all geometry layers present + * in the source file or take only the first. + * + * The default value is true (1) + * Property type: bool + */ +#define AI_CONFIG_IMPORT_FBX_READ_ALL_GEOMETRY_LAYERS \ + "IMPORT_FBX_READ_ALL_GEOMETRY_LAYERS" + +// --------------------------------------------------------------------------- +/** @brief Set whether the fbx importer will read all materials present in the + * source file or take only the referenced materials. + * + * This is void unless IMPORT_FBX_READ_MATERIALS=1. + * + * The default value is false (0) + * Property type: bool + */ +#define AI_CONFIG_IMPORT_FBX_READ_ALL_MATERIALS \ + "IMPORT_FBX_READ_ALL_MATERIALS" + +// --------------------------------------------------------------------------- +/** @brief Set whether the fbx importer will read materials. + * + * The default value is true (1) + * Property type: bool + */ +#define AI_CONFIG_IMPORT_FBX_READ_MATERIALS \ + "IMPORT_FBX_READ_MATERIALS" + +// --------------------------------------------------------------------------- +/** @brief Set whether the fbx importer will read embedded textures. + * + * The default value is true (1) + * Property type: bool + */ +#define AI_CONFIG_IMPORT_FBX_READ_TEXTURES \ + "IMPORT_FBX_READ_TEXTURES" + +// --------------------------------------------------------------------------- +/** @brief Set whether the fbx importer will read cameras. + * + * The default value is true (1) + * Property type: bool + */ +#define AI_CONFIG_IMPORT_FBX_READ_CAMERAS \ + "IMPORT_FBX_READ_CAMERAS" + +// --------------------------------------------------------------------------- +/** @brief Set whether the fbx importer will read light sources. + * + * The default value is true (1) + * Property type: bool + */ +#define AI_CONFIG_IMPORT_FBX_READ_LIGHTS \ + "IMPORT_FBX_READ_LIGHTS" + +// --------------------------------------------------------------------------- +/** @brief Set whether the fbx importer will read animations. + * + * The default value is true (1) + * Property type: bool + */ +#define AI_CONFIG_IMPORT_FBX_READ_ANIMATIONS \ + "IMPORT_FBX_READ_ANIMATIONS" + +// --------------------------------------------------------------------------- +/** @brief Set whether the fbx importer will act in strict mode in which only + * FBX 2013 is supported and any other sub formats are rejected. FBX 2013 + * is the primary target for the importer, so this format is best + * supported and well-tested. + * + * The default value is false (0) + * Property type: bool + */ +#define AI_CONFIG_IMPORT_FBX_STRICT_MODE \ + "IMPORT_FBX_STRICT_MODE" + +// --------------------------------------------------------------------------- +/** @brief Set whether the fbx importer will preserve pivot points for + * transformations (as extra nodes). If set to false, pivots and offsets + * will be evaluated whenever possible. + * + * The default value is true (1) + * Property type: bool + */ +#define AI_CONFIG_IMPORT_FBX_PRESERVE_PIVOTS \ + "IMPORT_FBX_PRESERVE_PIVOTS" + +// --------------------------------------------------------------------------- +/** @brief Specifies whether the importer will drop empty animation curves or + * animation curves which match the bind pose transformation over their + * entire defined range. + * + * The default value is true (1) + * Property type: bool + */ +#define AI_CONFIG_IMPORT_FBX_OPTIMIZE_EMPTY_ANIMATION_CURVES \ + "IMPORT_FBX_OPTIMIZE_EMPTY_ANIMATION_CURVES" + +// --------------------------------------------------------------------------- +/** @brief Set whether the fbx importer will use the legacy embedded texture naming. +* +* The default value is false (0) +* Property type: bool +*/ +#define AI_CONFIG_IMPORT_FBX_EMBEDDED_TEXTURES_LEGACY_NAMING \ + "AI_CONFIG_IMPORT_FBX_EMBEDDED_TEXTURES_LEGACY_NAMING" + +// --------------------------------------------------------------------------- +/** @brief Set the vertex animation keyframe to be imported + * + * ASSIMP does not support vertex keyframes (only bone animation is supported). + * The library reads only one frame of models with vertex animations. + * By default this is the first frame. + * \note The default value is 0. This option applies to all importers. + * However, it is also possible to override the global setting + * for a specific loader. You can use the AI_CONFIG_IMPORT_XXX_KEYFRAME + * options (where XXX is a placeholder for the file format for which you + * want to override the global setting). + * Property type: integer. + */ +#define AI_CONFIG_IMPORT_GLOBAL_KEYFRAME "IMPORT_GLOBAL_KEYFRAME" + +#define AI_CONFIG_IMPORT_MD3_KEYFRAME "IMPORT_MD3_KEYFRAME" +#define AI_CONFIG_IMPORT_MD2_KEYFRAME "IMPORT_MD2_KEYFRAME" +#define AI_CONFIG_IMPORT_MDL_KEYFRAME "IMPORT_MDL_KEYFRAME" +#define AI_CONFIG_IMPORT_MDC_KEYFRAME "IMPORT_MDC_KEYFRAME" +#define AI_CONFIG_IMPORT_SMD_KEYFRAME "IMPORT_SMD_KEYFRAME" +#define AI_CONFIG_IMPORT_UNREAL_KEYFRAME "IMPORT_UNREAL_KEYFRAME" + +// --------------------------------------------------------------------------- +/** Smd load multiple animations + * + * Property type: bool. Default value: true. + */ +#define AI_CONFIG_IMPORT_SMD_LOAD_ANIMATION_LIST "IMPORT_SMD_LOAD_ANIMATION_LIST" + +// --------------------------------------------------------------------------- +/** @brief Configures the AC loader to collect all surfaces which have the + * "Backface cull" flag set in separate meshes. + * + * Property type: bool. Default value: true. + */ +#define AI_CONFIG_IMPORT_AC_SEPARATE_BFCULL \ + "IMPORT_AC_SEPARATE_BFCULL" + +// --------------------------------------------------------------------------- +/** @brief Configures whether the AC loader evaluates subdivision surfaces ( + * indicated by the presence of the 'subdiv' attribute in the file). By + * default, Assimp performs the subdivision using the standard + * Catmull-Clark algorithm + * + * * Property type: bool. Default value: true. + */ +#define AI_CONFIG_IMPORT_AC_EVAL_SUBDIVISION \ + "IMPORT_AC_EVAL_SUBDIVISION" + +// --------------------------------------------------------------------------- +/** @brief Configures the UNREAL 3D loader to separate faces with different + * surface flags (e.g. two-sided vs. single-sided). + * + * * Property type: bool. Default value: true. + */ +#define AI_CONFIG_IMPORT_UNREAL_HANDLE_FLAGS \ + "UNREAL_HANDLE_FLAGS" + +// --------------------------------------------------------------------------- +/** @brief Configures the terragen import plugin to compute uv's for + * terrains, if not given. Furthermore a default texture is assigned. + * + * UV coordinates for terrains are so simple to compute that you'll usually + * want to compute them on your own, if you need them. This option is intended + * for model viewers which want to offer an easy way to apply textures to + * terrains. + * * Property type: bool. Default value: false. + */ +#define AI_CONFIG_IMPORT_TER_MAKE_UVS \ + "IMPORT_TER_MAKE_UVS" + +// --------------------------------------------------------------------------- +/** @brief Configures the ASE loader to always reconstruct normal vectors + * basing on the smoothing groups loaded from the file. + * + * Some ASE files have carry invalid normals, other don't. + * * Property type: bool. Default value: true. + */ +#define AI_CONFIG_IMPORT_ASE_RECONSTRUCT_NORMALS \ + "IMPORT_ASE_RECONSTRUCT_NORMALS" + +// --------------------------------------------------------------------------- +/** @brief Configures the M3D loader to detect and process multi-part + * Quake player models. + * + * These models usually consist of 3 files, lower.md3, upper.md3 and + * head.md3. If this property is set to true, Assimp will try to load and + * combine all three files if one of them is loaded. + * Property type: bool. Default value: true. + */ +#define AI_CONFIG_IMPORT_MD3_HANDLE_MULTIPART \ + "IMPORT_MD3_HANDLE_MULTIPART" + +// --------------------------------------------------------------------------- +/** @brief Tells the MD3 loader which skin files to load. + * + * When loading MD3 files, Assimp checks whether a file + * [md3_file_name]_[skin_name].skin is existing. These files are used by + * Quake III to be able to assign different skins (e.g. red and blue team) + * to models. 'default', 'red', 'blue' are typical skin names. + * Property type: String. Default value: "default". + */ +#define AI_CONFIG_IMPORT_MD3_SKIN_NAME \ + "IMPORT_MD3_SKIN_NAME" + +// --------------------------------------------------------------------------- +/** @brief Specify the Quake 3 shader file to be used for a particular + * MD3 file. This can also be a search path. + * + * By default Assimp's behaviour is as follows: If a MD3 file + * any_path/models/any_q3_subdir/model_name/file_name.md3 is + * loaded, the library tries to locate the corresponding shader file in + * any_path/scripts/model_name.shader. This property overrides this + * behaviour. It can either specify a full path to the shader to be loaded + * or alternatively the path (relative or absolute) to the directory where + * the shaders for all MD3s to be loaded reside. Assimp attempts to open + * IMPORT_MD3_SHADER_SRC/model_name.shader first, IMPORT_MD3_SHADER_SRC/file_name.shader + * is the fallback file. Note that IMPORT_MD3_SHADER_SRC should have a terminal (back)slash. + * Property type: String. Default value: n/a. + */ +#define AI_CONFIG_IMPORT_MD3_SHADER_SRC \ + "IMPORT_MD3_SHADER_SRC" + +// --------------------------------------------------------------------------- +/** @brief Configures the LWO loader to load just one layer from the model. + * + * LWO files consist of layers and in some cases it could be useful to load + * only one of them. This property can be either a string - which specifies + * the name of the layer - or an integer - the index of the layer. If the + * property is not set the whole LWO model is loaded. Loading fails if the + * requested layer is not available. The layer index is zero-based and the + * layer name may not be empty.
+ * Property type: Integer. Default value: all layers are loaded. + */ +#define AI_CONFIG_IMPORT_LWO_ONE_LAYER_ONLY \ + "IMPORT_LWO_ONE_LAYER_ONLY" + +// --------------------------------------------------------------------------- +/** @brief Configures the MD5 loader to not load the MD5ANIM file for + * a MD5MESH file automatically. + * + * The default strategy is to look for a file with the same name but the + * MD5ANIM extension in the same directory. If it is found, it is loaded + * and combined with the MD5MESH file. This configuration option can be + * used to disable this behaviour. + * + * * Property type: bool. Default value: false. + */ +#define AI_CONFIG_IMPORT_MD5_NO_ANIM_AUTOLOAD \ + "IMPORT_MD5_NO_ANIM_AUTOLOAD" + +// --------------------------------------------------------------------------- +/** @brief Defines the begin of the time range for which the LWS loader + * evaluates animations and computes aiNodeAnim's. + * + * Assimp provides full conversion of LightWave's envelope system, including + * pre and post conditions. The loader computes linearly subsampled animation + * chanels with the frame rate given in the LWS file. This property defines + * the start time. Note: animation channels are only generated if a node + * has at least one envelope with more tan one key assigned. This property. + * is given in frames, '0' is the first frame. By default, if this property + * is not set, the importer takes the animation start from the input LWS + * file ('FirstFrame' line)
+ * Property type: Integer. Default value: taken from file. + * + * @see AI_CONFIG_IMPORT_LWS_ANIM_END - end of the imported time range + */ +#define AI_CONFIG_IMPORT_LWS_ANIM_START \ + "IMPORT_LWS_ANIM_START" +#define AI_CONFIG_IMPORT_LWS_ANIM_END \ + "IMPORT_LWS_ANIM_END" + +// --------------------------------------------------------------------------- +/** @brief Defines the output frame rate of the IRR loader. + * + * IRR animations are difficult to convert for Assimp and there will + * always be a loss of quality. This setting defines how many keys per second + * are returned by the converter.
+ * Property type: integer. Default value: 100 + */ +#define AI_CONFIG_IMPORT_IRR_ANIM_FPS \ + "IMPORT_IRR_ANIM_FPS" + +// --------------------------------------------------------------------------- +/** @brief Ogre Importer will try to find referenced materials from this file. + * + * Ogre meshes reference with material names, this does not tell Assimp the file + * where it is located in. Assimp will try to find the source file in the following + * order: .material, .material and + * lastly the material name defined by this config property. + *
+ * Property type: String. Default value: Scene.material. + */ +#define AI_CONFIG_IMPORT_OGRE_MATERIAL_FILE \ + "IMPORT_OGRE_MATERIAL_FILE" + +// --------------------------------------------------------------------------- +/** @brief Ogre Importer detect the texture usage from its filename. + * + * Ogre material texture units do not define texture type, the textures usage + * depends on the used shader or Ogre's fixed pipeline. If this config property + * is true Assimp will try to detect the type from the textures filename postfix: + * _n, _nrm, _nrml, _normal, _normals and _normalmap for normal map, _s, _spec, + * _specular and _specularmap for specular map, _l, _light, _lightmap, _occ + * and _occlusion for light map, _disp and _displacement for displacement map. + * The matching is case insensitive. Post fix is taken between the last + * underscore and the last period. + * Default behavior is to detect type from lower cased texture unit name by + * matching against: normalmap, specularmap, lightmap and displacementmap. + * For both cases if no match is found aiTextureType_DIFFUSE is used. + *
+ * Property type: Bool. Default value: false. + */ +#define AI_CONFIG_IMPORT_OGRE_TEXTURETYPE_FROM_FILENAME \ + "IMPORT_OGRE_TEXTURETYPE_FROM_FILENAME" + + /** @brief Specifies whether the Android JNI asset extraction is supported. + * + * Turn on this option if you want to manage assets in native + * Android application without having to keep the internal directory and asset + * manager pointer. + */ + #define AI_CONFIG_ANDROID_JNI_ASSIMP_MANAGER_SUPPORT "AI_CONFIG_ANDROID_JNI_ASSIMP_MANAGER_SUPPORT" + +// --------------------------------------------------------------------------- +/** @brief Specifies whether the IFC loader skips over IfcSpace elements. + * + * IfcSpace elements (and their geometric representations) are used to + * represent, well, free space in a building storey.
+ * Property type: Bool. Default value: true. + */ +#define AI_CONFIG_IMPORT_IFC_SKIP_SPACE_REPRESENTATIONS "IMPORT_IFC_SKIP_SPACE_REPRESENTATIONS" + +// --------------------------------------------------------------------------- +/** @brief Specifies whether the IFC loader will use its own, custom triangulation + * algorithm to triangulate wall and floor meshes. + * + * If this property is set to false, walls will be either triangulated by + * #aiProcess_Triangulate or will be passed through as huge polygons with + * faked holes (i.e. holes that are connected with the outer boundary using + * a dummy edge). It is highly recommended to set this property to true + * if you want triangulated data because #aiProcess_Triangulate is known to + * have problems with the kind of polygons that the IFC loader spits out for + * complicated meshes. + * Property type: Bool. Default value: true. + */ +#define AI_CONFIG_IMPORT_IFC_CUSTOM_TRIANGULATION "IMPORT_IFC_CUSTOM_TRIANGULATION" + +// --------------------------------------------------------------------------- +/** @brief Set the tessellation conic angle for IFC smoothing curves. + * + * This is used by the IFC importer to determine the tessellation parameter + * for smoothing curves. + * @note The default value is AI_IMPORT_IFC_DEFAULT_SMOOTHING_ANGLE and the + * accepted values are in range [5.0, 120.0]. + * Property type: Float. + */ +#define AI_CONFIG_IMPORT_IFC_SMOOTHING_ANGLE "IMPORT_IFC_SMOOTHING_ANGLE" + +// default value for AI_CONFIG_IMPORT_IFC_SMOOTHING_ANGLE +#if (!defined AI_IMPORT_IFC_DEFAULT_SMOOTHING_ANGLE) +# define AI_IMPORT_IFC_DEFAULT_SMOOTHING_ANGLE 10.0f +#endif + +// --------------------------------------------------------------------------- +/** @brief Set the tessellation for IFC cylindrical shapes. + * + * This is used by the IFC importer to determine the tessellation parameter + * for cylindrical shapes, i.e. the number of segments used to approximate a circle. + * @note The default value is AI_IMPORT_IFC_DEFAULT_CYLINDRICAL_TESSELLATION and the + * accepted values are in range [3, 180]. + * Property type: Integer. + */ +#define AI_CONFIG_IMPORT_IFC_CYLINDRICAL_TESSELLATION "IMPORT_IFC_CYLINDRICAL_TESSELLATION" + +// default value for AI_CONFIG_IMPORT_IFC_CYLINDRICAL_TESSELLATION +#if (!defined AI_IMPORT_IFC_DEFAULT_CYLINDRICAL_TESSELLATION) +# define AI_IMPORT_IFC_DEFAULT_CYLINDRICAL_TESSELLATION 32 +#endif + +// --------------------------------------------------------------------------- +/** @brief Specifies whether the Collada loader will ignore the provided up direction. + * + * If this property is set to true, the up direction provided in the file header will + * be ignored and the file will be loaded as is. + * Property type: Bool. Default value: false. + */ +#define AI_CONFIG_IMPORT_COLLADA_IGNORE_UP_DIRECTION "IMPORT_COLLADA_IGNORE_UP_DIRECTION" + +// --------------------------------------------------------------------------- +/** @brief Specifies whether the Collada loader should use Collada names as node names. + * + * If this property is set to true, the Collada names will be used as the + * node name. The default is to use the id tag (resp. sid tag, if no id tag is present) + * instead. + * Property type: Bool. Default value: false. + */ +#define AI_CONFIG_IMPORT_COLLADA_USE_COLLADA_NAMES "IMPORT_COLLADA_USE_COLLADA_NAMES" + +// ---------- All the Export defines ------------ + +/** @brief Specifies the xfile use double for real values of float + * + * Property type: Bool. Default value: false. + */ + +#define AI_CONFIG_EXPORT_XFILE_64BIT "EXPORT_XFILE_64BIT" + +/** + * + */ +#define AI_CONFIG_EXPORT_POINT_CLOUDS "EXPORT_POINT_CLOUDS" + +/** + * @brief Specifies a gobal key factor for scale, float value + */ +#define AI_CONFIG_GLOBAL_SCALE_FACTOR_KEY "GLOBAL_SCALE_FACTOR" + +#if (!defined AI_CONFIG_GLOBAL_SCALE_FACTOR_DEFAULT) +# define AI_CONFIG_GLOBAL_SCALE_FACTOR_DEFAULT 1.0f +#endif // !! AI_DEBONE_THRESHOLD + +// ---------- All the Build/Compile-time defines ------------ + +/** @brief Specifies if double precision is supported inside assimp + * + * Property type: Bool. Default value: undefined. + */ + +#cmakedefine ASSIMP_DOUBLE_PRECISION 1 + +#endif // !! AI_CONFIG_H_INC diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/Assimp.cs b/Source/Tools/Flax.Build/Deps/Dependencies/Assimp.cs index aef6b94f6..ea986b41b 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/Assimp.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/Assimp.cs @@ -44,6 +44,7 @@ namespace Flax.Deps.Dependencies { var root = options.IntermediateFolder; var moduleFilename = "assimp.Build.cs"; + string configHeaderFilePath = null; var configs = new string[] { "-DASSIMP_NO_EXPORT=ON", @@ -86,8 +87,8 @@ namespace Flax.Deps.Dependencies var globalConfig = string.Join(" ", configs); // Get the source - CloneGitRepo(root, "https://github.com/FlaxEngine/assimp.git"); - GitCheckout(root, "master", "5c900d689a5db5637b98f665fc1e9e9c9ed416b9"); + CloneGitRepoFast(root, "https://github.com/FlaxEngine/assimp.git"); + GitCheckout(root, "master", "684f65176338b1bd391404ea43a89c5e028b8468"); foreach (var platform in options.Platforms) { @@ -95,22 +96,27 @@ namespace Flax.Deps.Dependencies { case TargetPlatform.Windows: { - var solutionPath = Path.Combine(root, "Assimp.sln"); var configuration = "Release"; var binariesWin = new[] { - Path.Combine(root, "bin", configuration, "assimp-vc140-md.dll"), - Path.Combine(root, "lib", configuration, "assimp-vc140-md.lib"), + Path.Combine("code", configuration, "assimp-vc140-md.dll"), + Path.Combine("code", configuration, "assimp-vc140-md.lib"), }; - // Build for Win64 + // Build for Windows File.Delete(Path.Combine(root, "CMakeCache.txt")); - RunCmake(root, platform, TargetArchitecture.x64); - Deploy.VCEnvironment.BuildSolution(solutionPath, configuration, "x64"); - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); - foreach (var file in binariesWin) + + foreach (var architecture in new[] { TargetArchitecture.x64, TargetArchitecture.ARM64 }) { - Utilities.FileCopy(file, Path.Combine(depsFolder, Path.GetFileName(file))); + var buildDir = Path.Combine(root, "build-" + architecture.ToString()); + var solutionPath = Path.Combine(buildDir, "Assimp.sln"); + SetupDirectory(buildDir, true); + RunCmake(root, platform, architecture, $"-B\"{buildDir}\" -DLIBRARY_SUFFIX=-vc140-md"); + Deploy.VCEnvironment.BuildSolution(solutionPath, configuration, architecture.ToString()); + configHeaderFilePath = Path.Combine(buildDir, "include", "assimp", "config.h"); + var depsFolder = GetThirdPartyFolder(options, platform, architecture); + foreach (var file in binariesWin) + Utilities.FileCopy(Path.Combine(buildDir, file), Path.Combine(depsFolder, Path.GetFileName(file))); } break; @@ -120,6 +126,7 @@ namespace Flax.Deps.Dependencies // Build for Linux RunCmake(root, platform, TargetArchitecture.x64, " -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF " + globalConfig); Utilities.Run("make", null, null, root, Utilities.RunOptions.ThrowExceptionOnError); + configHeaderFilePath = Path.Combine(root, "include", "assimp", "config.h"); var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); Utilities.FileCopy(Path.Combine(root, "lib", "libassimp.a"), Path.Combine(depsFolder, "libassimp.a")); Utilities.FileCopy(Path.Combine(root, "lib", "libIrrXML.a"), Path.Combine(depsFolder, "libIrrXML.a")); @@ -132,6 +139,7 @@ namespace Flax.Deps.Dependencies { RunCmake(root, platform, architecture, " -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF " + globalConfig); Utilities.Run("make", null, null, root, Utilities.RunOptions.ThrowExceptionOnError); + configHeaderFilePath = Path.Combine(root, "include", "assimp", "config.h"); var depsFolder = GetThirdPartyFolder(options, platform, architecture); Utilities.FileCopy(Path.Combine(root, "lib", "libassimp.a"), Path.Combine(depsFolder, "libassimp.a")); Utilities.FileCopy(Path.Combine(root, "lib", "libIrrXML.a"), Path.Combine(depsFolder, "libIrrXML.a")); @@ -151,6 +159,7 @@ namespace Flax.Deps.Dependencies Utilities.FileCopy(moduleFileBackup, moduleFile); Utilities.DirectoryCopy(srcIncludePath, dstIncludePath, true, true); Utilities.FileCopy(Path.Combine(root, "LICENSE"), Path.Combine(dstIncludePath, "LICENSE")); + Utilities.FileCopy(configHeaderFilePath, Path.Combine(dstIncludePath, "config.h")); } } } From ca40e6140a2cf10b72de00cd386d480060a018bb Mon Sep 17 00:00:00 2001 From: Ari Vuollet Date: Sat, 11 May 2024 18:58:25 +0300 Subject: [PATCH 123/292] Compile glslang for Windows on ARM --- Source/ThirdParty/glslang/Include/revision.h | 3 -- .../Flax.Build/Deps/Dependencies/glslang.cs | 29 ++++++++++++------- 2 files changed, 18 insertions(+), 14 deletions(-) delete mode 100644 Source/ThirdParty/glslang/Include/revision.h diff --git a/Source/ThirdParty/glslang/Include/revision.h b/Source/ThirdParty/glslang/Include/revision.h deleted file mode 100644 index a0e4b2066..000000000 --- a/Source/ThirdParty/glslang/Include/revision.h +++ /dev/null @@ -1,3 +0,0 @@ -// This header is generated by the make-revision script. - -#define GLSLANG_PATCH_LEVEL 3559 diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/glslang.cs b/Source/Tools/Flax.Build/Deps/Dependencies/glslang.cs index 086124867..813e0f016 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/glslang.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/glslang.cs @@ -21,7 +21,7 @@ namespace Flax.Deps.Dependencies case TargetPlatform.Windows: return new[] { - TargetPlatform.Linux, + TargetPlatform.Windows, }; case TargetPlatform.Linux: return new[] @@ -43,8 +43,6 @@ namespace Flax.Deps.Dependencies { var root = options.IntermediateFolder; var installDir = Path.Combine(root, "install"); - var buildDir = root; - var solutionPath = Path.Combine(buildDir, "glslang.sln"); var configuration = "Release"; var cmakeArgs = string.Format("-DCMAKE_INSTALL_PREFIX=\"{0}\" -DCMAKE_BUILD_TYPE={1} -DENABLE_RTTI=ON -DENABLE_CTEST=OFF -DENABLE_HLSL=ON -DENABLE_SPVREMAPPER=ON -DENABLE_GLSLANG_BINARIES=OFF", installDir, configuration); var libsRoot = Path.Combine(installDir, "lib"); @@ -53,6 +51,7 @@ namespace Flax.Deps.Dependencies CloneGitRepoFast(root, "https://github.com/FlaxEngine/glslang.git"); // Setup the external sources + // Requires distutils (pip install setuptools) Utilities.Run("python", "update_glslang_sources.py", null, root, Utilities.RunOptions.ConsoleLogOutput); foreach (var platform in options.Platforms) @@ -74,15 +73,21 @@ namespace Flax.Deps.Dependencies Path.Combine(libsRoot, "glslang.lib"), }; - // Build for Win64 - File.Delete(Path.Combine(buildDir, "CMakeCache.txt")); - RunCmake(buildDir, platform, TargetArchitecture.x64, cmakeArgs); - Utilities.Run("cmake", string.Format("--build . --config {0} --target install", configuration), null, buildDir, Utilities.RunOptions.ConsoleLogOutput); - Deploy.VCEnvironment.BuildSolution(solutionPath, configuration, "x64"); - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); - foreach (var file in outputFiles) + // Build for Windows + foreach (var architecture in new[] { TargetArchitecture.x64, TargetArchitecture.ARM64 }) { - Utilities.FileCopy(file, Path.Combine(depsFolder, Path.GetFileName(file))); + var buildDir = Path.Combine(root, "build-" + architecture.ToString()); + var solutionPath = Path.Combine(buildDir, "glslang.sln"); + + SetupDirectory(buildDir, false); + RunCmake(root, platform, architecture, cmakeArgs + $" -B\"{buildDir}\""); + Utilities.Run("cmake", string.Format("--build . --config {0} --target install", configuration), null, buildDir, Utilities.RunOptions.ConsoleLogOutput); + Deploy.VCEnvironment.BuildSolution(solutionPath, configuration, architecture.ToString()); + var depsFolder = GetThirdPartyFolder(options, platform, architecture); + foreach (var file in outputFiles) + { + Utilities.FileCopy(file, Path.Combine(depsFolder, Path.GetFileName(file))); + } } break; } @@ -100,6 +105,7 @@ namespace Flax.Deps.Dependencies Path.Combine(libsRoot, "libSPIRV.a"), Path.Combine(libsRoot, "libglslang.a"), }; + var buildDir = root; // Build for Linux RunCmake(root, platform, TargetArchitecture.x64, cmakeArgs); @@ -128,6 +134,7 @@ namespace Flax.Deps.Dependencies Path.Combine(libsRoot, "libSPIRV.a"), Path.Combine(libsRoot, "libglslang.a"), }; + var buildDir = root; // Build for Mac foreach (var architecture in new[] { TargetArchitecture.x64, TargetArchitecture.ARM64 }) From 2f4673ce1bea5002537cb3645f4f0f30bb79166d Mon Sep 17 00:00:00 2001 From: Ari Vuollet Date: Sat, 11 May 2024 19:01:45 +0300 Subject: [PATCH 124/292] Patch tracy for Windows on ARM --- Source/ThirdParty/tracy/client/TracyProfiler.hpp | 2 ++ Source/ThirdParty/tracy/client/tracy_rpmalloc.cpp | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Source/ThirdParty/tracy/client/TracyProfiler.hpp b/Source/ThirdParty/tracy/client/TracyProfiler.hpp index 8892fb14f..b303a4503 100644 --- a/Source/ThirdParty/tracy/client/TracyProfiler.hpp +++ b/Source/ThirdParty/tracy/client/TracyProfiler.hpp @@ -177,6 +177,8 @@ public: # elif defined _WIN32 # ifdef TRACY_TIMER_QPC return GetTimeQpc(); +# elif defined(_M_ARM64) + if( HardwareSupportsInvariantTSC() ) return int64_t( _ReadStatusReg(ARM64_PMCCNTR_EL0) ); # else if( HardwareSupportsInvariantTSC() ) return int64_t( __rdtsc() ); # endif diff --git a/Source/ThirdParty/tracy/client/tracy_rpmalloc.cpp b/Source/ThirdParty/tracy/client/tracy_rpmalloc.cpp index 711505d21..e94957552 100644 --- a/Source/ThirdParty/tracy/client/tracy_rpmalloc.cpp +++ b/Source/ThirdParty/tracy/client/tracy_rpmalloc.cpp @@ -781,7 +781,9 @@ rpmalloc_set_main_thread(void) { static void _rpmalloc_spin(void) { -#if defined(_MSC_VER) +#if defined(_MSC_VER) && defined(_M_ARM64) + __isb(_ARM64_BARRIER_SY); +#elif defined(_MSC_VER) _mm_pause(); #elif defined(__x86_64__) || defined(__i386__) __asm__ volatile("pause" ::: "memory"); From c963dbc48d1b4fe87f64782d6325b98334d4ab27 Mon Sep 17 00:00:00 2001 From: Ari Vuollet Date: Sat, 11 May 2024 19:02:09 +0300 Subject: [PATCH 125/292] Patch rapidjson for Windows on ARM --- Source/ThirdParty/rapidjson/internal/biginteger.h | 6 +++++- Source/ThirdParty/rapidjson/internal/diyfp.h | 6 +++++- Source/ThirdParty/rapidjson/rapidjson.h | 2 +- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/Source/ThirdParty/rapidjson/internal/biginteger.h b/Source/ThirdParty/rapidjson/internal/biginteger.h index acc26fa27..a9d3586c3 100644 --- a/Source/ThirdParty/rapidjson/internal/biginteger.h +++ b/Source/ThirdParty/rapidjson/internal/biginteger.h @@ -17,14 +17,18 @@ #include "../rapidjson.h" -#if defined(_MSC_VER) && defined(_M_AMD64) +#if defined(_MSC_VER) && (defined(_M_AMD64) || defined(_M_ARM64)) #if _MSC_VER <= 1900 #include #else #include #endif +#if defined(_M_ARM64) +#pragma intrinsic(__umulh) +#else #pragma intrinsic(_umul128) #endif +#endif RAPIDJSON_NAMESPACE_BEGIN namespace internal { diff --git a/Source/ThirdParty/rapidjson/internal/diyfp.h b/Source/ThirdParty/rapidjson/internal/diyfp.h index 1530c22e0..eec14b4ac 100644 --- a/Source/ThirdParty/rapidjson/internal/diyfp.h +++ b/Source/ThirdParty/rapidjson/internal/diyfp.h @@ -21,15 +21,19 @@ #include "../rapidjson.h" -#if defined(_MSC_VER) && defined(_M_AMD64) +#if defined(_MSC_VER) && (defined(_M_AMD64) || defined(_M_ARM64)) #if _MSC_VER <= 1900 #include #else #include #endif #pragma intrinsic(_BitScanReverse64) +#if defined(_M_ARM64) +#pragma intrinsic(__umulh) +#else #pragma intrinsic(_umul128) #endif +#endif RAPIDJSON_NAMESPACE_BEGIN namespace internal { diff --git a/Source/ThirdParty/rapidjson/rapidjson.h b/Source/ThirdParty/rapidjson/rapidjson.h index ddc22d7ac..8a49e6067 100644 --- a/Source/ThirdParty/rapidjson/rapidjson.h +++ b/Source/ThirdParty/rapidjson/rapidjson.h @@ -236,7 +236,7 @@ # define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN # elif defined(__i386__) || defined(__alpha__) || defined(__ia64) || defined(__ia64__) || defined(_M_IX86) || defined(_M_IA64) || defined(_M_ALPHA) || defined(__amd64) || defined(__amd64__) || defined(_M_AMD64) || defined(__x86_64) || defined(__x86_64__) || defined(_M_X64) || defined(__bfin__) # define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN -# elif defined(_MSC_VER) && defined(_M_ARM) +# elif defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64)) # define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN # elif defined(RAPIDJSON_DOXYGEN_RUNNING) # define RAPIDJSON_ENDIAN From 72043c0260e9adf6f9a1e27557f34818a6270fc8 Mon Sep 17 00:00:00 2001 From: Ari Vuollet Date: Sat, 11 May 2024 20:14:28 +0300 Subject: [PATCH 126/292] Update minimp3 --- Source/ThirdParty/minimp3/minimp3.h | 521 +++++++++++++++++----------- 1 file changed, 325 insertions(+), 196 deletions(-) diff --git a/Source/ThirdParty/minimp3/minimp3.h b/Source/ThirdParty/minimp3/minimp3.h index eb1dd01ef..3220ae1a8 100644 --- a/Source/ThirdParty/minimp3/minimp3.h +++ b/Source/ThirdParty/minimp3/minimp3.h @@ -6,48 +6,50 @@ This software is distributed without any warranty. See . */ +#include #define MINIMP3_MAX_SAMPLES_PER_FRAME (1152*2) typedef struct { - int frame_bytes; - int channels; - int hz; - int layer; - int bitrate_kbps; + int frame_bytes, frame_offset, channels, hz, layer, bitrate_kbps; } mp3dec_frame_info_t; typedef struct { - float mdct_overlap[2][9*32]; - float qmf_state[15*2*32]; - int reserv; - int free_format_bytes; - unsigned char header[4]; - unsigned char reserv_buf[511]; + float mdct_overlap[2][9*32], qmf_state[15*2*32]; + int reserv, free_format_bytes; + unsigned char header[4], reserv_buf[511]; } mp3dec_t; #ifdef __cplusplus extern "C" { -#endif +#endif /* __cplusplus */ void mp3dec_init(mp3dec_t *dec); -int mp3dec_decode_frame(mp3dec_t *dec, const unsigned char *mp3, int mp3_bytes, short *pcm, mp3dec_frame_info_t *info); +#ifndef MINIMP3_FLOAT_OUTPUT +typedef int16_t mp3d_sample_t; +#else /* MINIMP3_FLOAT_OUTPUT */ +typedef float mp3d_sample_t; +void mp3dec_f32_to_s16(const float *in, int16_t *out, int num_samples); +#endif /* MINIMP3_FLOAT_OUTPUT */ +int mp3dec_decode_frame(mp3dec_t *dec, const uint8_t *mp3, int mp3_bytes, mp3d_sample_t *pcm, mp3dec_frame_info_t *info); #ifdef __cplusplus } -#endif -#endif /*MINIMP3_H*/ +#endif /* __cplusplus */ -#ifdef MINIMP3_IMPLEMENTATION +#endif /* MINIMP3_H */ +#if defined(MINIMP3_IMPLEMENTATION) && !defined(_MINIMP3_IMPLEMENTATION_GUARD) +#define _MINIMP3_IMPLEMENTATION_GUARD #include #include -#include #define MAX_FREE_FORMAT_FRAME_SIZE 2304 /* more than ISO spec's */ +#ifndef MAX_FRAME_SYNC_MATCHES #define MAX_FRAME_SYNC_MATCHES 10 +#endif /* MAX_FRAME_SYNC_MATCHES */ #define MAX_L3_FRAME_PAYLOAD_BYTES MAX_FREE_FORMAT_FRAME_SIZE /* MUST be >= 320000/8/32000*1152 = 1440 */ @@ -71,12 +73,12 @@ int mp3dec_decode_frame(mp3dec_t *dec, const unsigned char *mp3, int mp3_bytes, #define HDR_GET_LAYER(h) (((h[1]) >> 1) & 3) #define HDR_GET_BITRATE(h) ((h[2]) >> 4) #define HDR_GET_SAMPLE_RATE(h) (((h[2]) >> 2) & 3) -#define HDR_GET_MY_SAMPLE_RATE(h) (HDR_GET_SAMPLE_RATE(h) + (((h[1] >> 3) & 1) + ((h[1] >> 4) & 1)) * 3) +#define HDR_GET_MY_SAMPLE_RATE(h) (HDR_GET_SAMPLE_RATE(h) + (((h[1] >> 3) & 1) + ((h[1] >> 4) & 1))*3) #define HDR_IS_FRAME_576(h) ((h[1] & 14) == 2) #define HDR_IS_LAYER_1(h) ((h[1] & 6) == 6) #define BITS_DEQUANTIZER_OUT -1 -#define MAX_SCF (255 + BITS_DEQUANTIZER_OUT * 4 - 210) +#define MAX_SCF (255 + BITS_DEQUANTIZER_OUT*4 - 210) #define MAX_SCFI ((MAX_SCF + 3) & ~3) #define MINIMP3_MIN(a, b) ((a) > (b) ? (b) : (a)) @@ -84,15 +86,15 @@ int mp3dec_decode_frame(mp3dec_t *dec, const unsigned char *mp3, int mp3_bytes, #if !defined(MINIMP3_NO_SIMD) -#if !defined(MINIMP3_ONLY_SIMD) && (defined(_M_X64) || defined(_M_ARM64) || defined(__x86_64__) || defined(__aarch64__)) +#if !defined(MINIMP3_ONLY_SIMD) && (defined(_M_X64) || defined(__x86_64__) || defined(__aarch64__) || defined(_M_ARM64)) /* x64 always have SSE2, arm64 always have neon, no need for generic code */ #define MINIMP3_ONLY_SIMD -#endif +#endif /* SIMD checks... */ -#if defined(_MSC_VER) || ((defined(__i386__) || defined(__x86_64__)) && defined(__SSE2__)) +#if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) || ((defined(__i386__) || defined(__x86_64__)) && defined(__SSE2__)) #if defined(_MSC_VER) #include -#endif +#endif /* defined(_MSC_VER) */ #include #define HAVE_SSE 1 #define HAVE_SIMD 1 @@ -109,7 +111,7 @@ int mp3dec_decode_frame(mp3dec_t *dec, const unsigned char *mp3, int mp3_bytes, typedef __m128 f4; #if defined(_MSC_VER) || defined(MINIMP3_ONLY_SIMD) #define minimp3_cpuid __cpuid -#else +#else /* defined(_MSC_VER) || defined(MINIMP3_ONLY_SIMD) */ static __inline__ __attribute__((always_inline)) void minimp3_cpuid(int CPUInfo[], const int InfoType) { #if defined(__PIC__) @@ -119,51 +121,49 @@ static __inline__ __attribute__((always_inline)) void minimp3_cpuid(int CPUInfo[ "cpuid\n" "xchgl %%ebx, %1\n" "pop %%rbx\n" -#else +#else /* defined(__x86_64__) */ "xchgl %%ebx, %1\n" "cpuid\n" "xchgl %%ebx, %1\n" -#endif +#endif /* defined(__x86_64__) */ : "=a" (CPUInfo[0]), "=r" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3]) : "a" (InfoType)); -#else +#else /* defined(__PIC__) */ __asm__ __volatile__( "cpuid" : "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3]) : "a" (InfoType)); -#endif +#endif /* defined(__PIC__)*/ } -#endif -static int have_simd() +#endif /* defined(_MSC_VER) || defined(MINIMP3_ONLY_SIMD) */ +static int have_simd(void) { #ifdef MINIMP3_ONLY_SIMD return 1; -#else +#else /* MINIMP3_ONLY_SIMD */ static int g_have_simd; int CPUInfo[4]; #ifdef MINIMP3_TEST static int g_counter; if (g_counter++ > 100) - goto test_nosimd; -#endif + return 0; +#endif /* MINIMP3_TEST */ if (g_have_simd) - return g_have_simd - 1; + goto end; minimp3_cpuid(CPUInfo, 0); + g_have_simd = 1; if (CPUInfo[0] > 0) { minimp3_cpuid(CPUInfo, 1); g_have_simd = (CPUInfo[3] & (1 << 26)) + 1; /* SSE2 */ - return g_have_simd - 1; } -#ifdef MINIMP3_TEST -test_nosimd: -#endif - g_have_simd = 1; - return 0; -#endif +end: + return g_have_simd - 1; +#endif /* MINIMP3_ONLY_SIMD */ } -#elif defined(__ARM_NEON) || defined(__aarch64__) +#elif defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64) #include +#define HAVE_SSE 0 #define HAVE_SIMD 1 #define VSTORE vst1q_f32 #define VLD vld1q_f32 @@ -180,60 +180,53 @@ static int have_simd() { /* TODO: detect neon for !MINIMP3_ONLY_SIMD */ return 1; } -#else +#else /* SIMD checks... */ +#define HAVE_SSE 0 #define HAVE_SIMD 0 #ifdef MINIMP3_ONLY_SIMD #error MINIMP3_ONLY_SIMD used, but SSE/NEON not enabled -#endif -#endif - -#else - +#endif /* MINIMP3_ONLY_SIMD */ +#endif /* SIMD checks... */ +#else /* !defined(MINIMP3_NO_SIMD) */ #define HAVE_SIMD 0 +#endif /* !defined(MINIMP3_NO_SIMD) */ +#if defined(__ARM_ARCH) && (__ARM_ARCH >= 6) && !defined(__aarch64__) && !defined(_M_ARM64) +#define HAVE_ARMV6 1 +static __inline__ __attribute__((always_inline)) int32_t minimp3_clip_int16_arm(int32_t a) +{ + int32_t x = 0; + __asm__ ("ssat %0, #16, %1" : "=r"(x) : "r"(a)); + return x; +} +#else +#define HAVE_ARMV6 0 #endif typedef struct { const uint8_t *buf; - int pos; - int limit; + int pos, limit; } bs_t; typedef struct { - uint8_t total_bands; - uint8_t stereo_bands; - uint8_t bitalloc[64]; - uint8_t scfcod[64]; float scf[3*64]; + uint8_t total_bands, stereo_bands, bitalloc[64], scfcod[64]; } L12_scale_info; typedef struct { - uint8_t tab_offset; - uint8_t code_tab_width; - uint8_t band_count; + uint8_t tab_offset, code_tab_width, band_count; } L12_subband_alloc_t; typedef struct { const uint8_t *sfbtab; - uint16_t part_23_length; - uint16_t big_values; - uint16_t scalefac_compress; - uint8_t global_gain; - uint8_t block_type; - uint8_t mixed_block_flag; - uint8_t n_long_sfb; - uint8_t n_short_sfb; - uint8_t table_select[3]; - uint8_t region_count[3]; - uint8_t subblock_gain[3]; - uint8_t preflag; - uint8_t scalefac_scale; - uint8_t count1_table; - uint8_t scfsi; + uint16_t part_23_length, big_values, scalefac_compress; + uint8_t global_gain, block_type, mixed_block_flag, n_long_sfb, n_short_sfb; + uint8_t table_select[3], region_count[3], subblock_gain[3]; + uint8_t preflag, scalefac_scale, count1_table, scfsi; } L3_gr_info_t; typedef struct @@ -241,10 +234,8 @@ typedef struct bs_t bs; uint8_t maindata[MAX_BITRESERVOIR_BYTES + MAX_L3_FRAME_PAYLOAD_BYTES]; L3_gr_info_t gr_info[4]; - float grbuf[2][576]; - float scf[40]; + float grbuf[2][576], scf[40], syn[18 + 15][2*32]; uint8_t ist_pos[2][39]; - float syn[18 + 15][2*32]; } mp3dec_scratch_t; static void bs_init(bs_t *bs, const uint8_t *data, int bytes) @@ -309,7 +300,7 @@ static unsigned hdr_frame_samples(const uint8_t *h) static int hdr_frame_bytes(const uint8_t *h, int free_format_size) { - int frame_bytes = hdr_frame_samples(h) * hdr_bitrate_kbps(h) * 125 / hdr_sample_rate_hz(h); + int frame_bytes = hdr_frame_samples(h)*hdr_bitrate_kbps(h)*125/hdr_sample_rate_hz(h); if (HDR_IS_LAYER_1(h)) { frame_bytes &= ~3; /* slot align */ @@ -385,7 +376,7 @@ static void L12_read_scalefactors(bs_t *bs, uint8_t *pba, uint8_t *scfcod, int b if (mask & m) { int b = get_bits(bs, 6); - s = g_deq_L12[ba*3 - 6 + b % 3] * (1 << 21 >> b/3); + s = g_deq_L12[ba*3 - 6 + b % 3]*(1 << 21 >> b/3); } *scf++ = s; } @@ -403,7 +394,7 @@ static void L12_read_scale_info(const uint8_t *hdr, bs_t *bs, L12_scale_info *sc 0,17,18, 3,19,4,5, 6,7, 8, 9,10,11,12,13,14, 0, 2, 3, 4, 5,6,7, 8,9,10,11,12,13,14,15,16 }; - const L12_subband_alloc_t * subband_alloc = L12_subband_alloc_table(hdr, sci); + const L12_subband_alloc_t *subband_alloc = L12_subband_alloc_table(hdr, sci); int i, k = 0, ba_bits = 0; const uint8_t *ba_code_tab = g_bitalloc_code_tab; @@ -427,12 +418,12 @@ static void L12_read_scale_info(const uint8_t *hdr, bs_t *bs, L12_scale_info *sc sci->bitalloc[2*i + 1] = sci->stereo_bands ? ba : 0; } - for (i = 0; i < 2 * sci->total_bands; i++) + for (i = 0; i < 2*sci->total_bands; i++) { sci->scfcod[i] = sci->bitalloc[i] ? HDR_IS_LAYER_1(hdr) ? 2 : get_bits(bs, 2) : 6; } - L12_read_scalefactors(bs, sci->bitalloc, sci->scfcod, sci->total_bands * 2, sci->scf); + L12_read_scalefactors(bs, sci->bitalloc, sci->scfcod, sci->total_bands*2, sci->scf); for (i = sci->stereo_bands; i < sci->total_bands; i++) { @@ -488,12 +479,11 @@ static void L12_apply_scf_384(L12_scale_info *sci, const float *scf, float *dst) } } } -#endif +#endif /* MINIMP3_ONLY_MP3 */ static int L3_read_side_info(bs_t *bs, L3_gr_info_t *gr, const uint8_t *hdr) { - static const uint8_t g_scf_long[9][23] = { - { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 }, + static const uint8_t g_scf_long[8][23] = { { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 }, { 12,12,12,12,12,12,16,20,24,28,32,40,48,56,64,76,90,2,2,2,2,2,0 }, { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 }, @@ -503,8 +493,7 @@ static int L3_read_side_info(bs_t *bs, L3_gr_info_t *gr, const uint8_t *hdr) { 4,4,4,4,4,4,6,6,6,8,10,12,16,18,22,28,34,40,46,54,54,192,0 }, { 4,4,4,4,4,4,6,6,8,10,12,16,20,24,30,38,46,56,68,84,102,26,0 } }; - static const uint8_t g_scf_short[9][40] = { - { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 }, + static const uint8_t g_scf_short[8][40] = { { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 }, { 8,8,8,8,8,8,8,8,8,12,12,12,16,16,16,20,20,20,24,24,24,28,28,28,36,36,36,2,2,2,2,2,2,2,2,2,26,26,26,0 }, { 4,4,4,4,4,4,4,4,4,6,6,6,6,6,6,8,8,8,10,10,10,14,14,14,18,18,18,26,26,26,32,32,32,42,42,42,18,18,18,0 }, @@ -514,8 +503,7 @@ static int L3_read_side_info(bs_t *bs, L3_gr_info_t *gr, const uint8_t *hdr) { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,6,6,6,10,10,10,12,12,12,14,14,14,16,16,16,20,20,20,26,26,26,66,66,66,0 }, { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,12,12,12,16,16,16,20,20,20,26,26,26,34,34,34,42,42,42,12,12,12,0 } }; - static const uint8_t g_scf_mixed[9][40] = { - { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 }, + static const uint8_t g_scf_mixed[8][40] = { { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 }, { 12,12,12,4,4,4,8,8,8,12,12,12,16,16,16,20,20,20,24,24,24,28,28,28,36,36,36,2,2,2,2,2,2,2,2,2,26,26,26,0 }, { 6,6,6,6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,14,14,14,18,18,18,26,26,26,32,32,32,42,42,42,18,18,18,0 }, @@ -528,7 +516,7 @@ static int L3_read_side_info(bs_t *bs, L3_gr_info_t *gr, const uint8_t *hdr) unsigned tables, scfsi = 0; int main_data_begin, part_23_sum = 0; - int sr_idx = HDR_GET_MY_SAMPLE_RATE(hdr); + int sr_idx = HDR_GET_MY_SAMPLE_RATE(hdr); sr_idx -= (sr_idx != 0); int gr_count = HDR_IS_MONO(hdr) ? 1 : 2; if (HDR_TEST_MPEG1(hdr)) @@ -725,17 +713,19 @@ static void L3_decode_scalefactors(const uint8_t *hdr, uint8_t *ist_pos, bs_t *b } } +static const float g_pow43[129 + 16] = { + 0,-1,-2.519842f,-4.326749f,-6.349604f,-8.549880f,-10.902724f,-13.390518f,-16.000000f,-18.720754f,-21.544347f,-24.463781f,-27.473142f,-30.567351f,-33.741992f,-36.993181f, + 0,1,2.519842f,4.326749f,6.349604f,8.549880f,10.902724f,13.390518f,16.000000f,18.720754f,21.544347f,24.463781f,27.473142f,30.567351f,33.741992f,36.993181f,40.317474f,43.711787f,47.173345f,50.699631f,54.288352f,57.937408f,61.644865f,65.408941f,69.227979f,73.100443f,77.024898f,81.000000f,85.024491f,89.097188f,93.216975f,97.382800f,101.593667f,105.848633f,110.146801f,114.487321f,118.869381f,123.292209f,127.755065f,132.257246f,136.798076f,141.376907f,145.993119f,150.646117f,155.335327f,160.060199f,164.820202f,169.614826f,174.443577f,179.305980f,184.201575f,189.129918f,194.090580f,199.083145f,204.107210f,209.162385f,214.248292f,219.364564f,224.510845f,229.686789f,234.892058f,240.126328f,245.389280f,250.680604f,256.000000f,261.347174f,266.721841f,272.123723f,277.552547f,283.008049f,288.489971f,293.998060f,299.532071f,305.091761f,310.676898f,316.287249f,321.922592f,327.582707f,333.267377f,338.976394f,344.709550f,350.466646f,356.247482f,362.051866f,367.879608f,373.730522f,379.604427f,385.501143f,391.420496f,397.362314f,403.326427f,409.312672f,415.320884f,421.350905f,427.402579f,433.475750f,439.570269f,445.685987f,451.822757f,457.980436f,464.158883f,470.357960f,476.577530f,482.817459f,489.077615f,495.357868f,501.658090f,507.978156f,514.317941f,520.677324f,527.056184f,533.454404f,539.871867f,546.308458f,552.764065f,559.238575f,565.731879f,572.243870f,578.774440f,585.323483f,591.890898f,598.476581f,605.080431f,611.702349f,618.342238f,625.000000f,631.675540f,638.368763f,645.079578f +}; + static float L3_pow_43(int x) { - static const float g_pow43[129] = { - 0,1,2.519842f,4.326749f,6.349604f,8.549880f,10.902724f,13.390518f,16.000000f,18.720754f,21.544347f,24.463781f,27.473142f,30.567351f,33.741992f,36.993181f,40.317474f,43.711787f,47.173345f,50.699631f,54.288352f,57.937408f,61.644865f,65.408941f,69.227979f,73.100443f,77.024898f,81.000000f,85.024491f,89.097188f,93.216975f,97.382800f,101.593667f,105.848633f,110.146801f,114.487321f,118.869381f,123.292209f,127.755065f,132.257246f,136.798076f,141.376907f,145.993119f,150.646117f,155.335327f,160.060199f,164.820202f,169.614826f,174.443577f,179.305980f,184.201575f,189.129918f,194.090580f,199.083145f,204.107210f,209.162385f,214.248292f,219.364564f,224.510845f,229.686789f,234.892058f,240.126328f,245.389280f,250.680604f,256.000000f,261.347174f,266.721841f,272.123723f,277.552547f,283.008049f,288.489971f,293.998060f,299.532071f,305.091761f,310.676898f,316.287249f,321.922592f,327.582707f,333.267377f,338.976394f,344.709550f,350.466646f,356.247482f,362.051866f,367.879608f,373.730522f,379.604427f,385.501143f,391.420496f,397.362314f,403.326427f,409.312672f,415.320884f,421.350905f,427.402579f,433.475750f,439.570269f,445.685987f,451.822757f,457.980436f,464.158883f,470.357960f,476.577530f,482.817459f,489.077615f,495.357868f,501.658090f,507.978156f,514.317941f,520.677324f,527.056184f,533.454404f,539.871867f,546.308458f,552.764065f,559.238575f,565.731879f,572.243870f,578.774440f,585.323483f,591.890898f,598.476581f,605.080431f,611.702349f,618.342238f,625.000000f,631.675540f,638.368763f,645.079578f - }; float frac; int sign, mult = 256; if (x < 129) { - return g_pow43[x]; + return g_pow43[16 + x]; } if (x < 1024) @@ -746,37 +736,36 @@ static float L3_pow_43(int x) sign = 2*x & 64; frac = (float)((x & 63) - sign) / ((x & ~63) + sign); - return g_pow43[(x + sign) >> 6] * (1.f + frac * ((4.f/3) + frac * (2.f/9))) * mult; + return g_pow43[16 + ((x + sign) >> 6)]*(1.f + frac*((4.f/3) + frac*(2.f/9)))*mult; } static void L3_huffman(float *dst, bs_t *bs, const L3_gr_info_t *gr_info, const float *scf, int layer3gr_limit) { - static const float g_pow43_signed[32] = { 0,0,1,-1,2.519842f,-2.519842f,4.326749f,-4.326749f,6.349604f,-6.349604f,8.549880f,-8.549880f,10.902724f,-10.902724f,13.390518f,-13.390518f,16.000000f,-16.000000f,18.720754f,-18.720754f,21.544347f,-21.544347f,24.463781f,-24.463781f,27.473142f,-27.473142f,30.567351f,-30.567351f,33.741992f,-33.741992f,36.993181f,-36.993181f }; - static const int16_t tab0[32] = { 0, }; - static const int16_t tab1[] = { 785,785,785,785,784,784,784,784,513,513,513,513,513,513,513,513,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256 }; - static const int16_t tab2[] = { -255,1313,1298,1282,785,785,785,785,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,290,288 }; - static const int16_t tab3[] = { -255,1313,1298,1282,769,769,769,769,529,529,529,529,529,529,529,529,528,528,528,528,528,528,528,528,512,512,512,512,512,512,512,512,290,288 }; - static const int16_t tab5[] = { -253,-318,-351,-367,785,785,785,785,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,819,818,547,547,275,275,275,275,561,560,515,546,289,274,288,258 }; - static const int16_t tab6[] = { -254,-287,1329,1299,1314,1312,1057,1057,1042,1042,1026,1026,784,784,784,784,529,529,529,529,529,529,529,529,769,769,769,769,768,768,768,768,563,560,306,306,291,259 }; - static const int16_t tab7[] = { -252,-413,-477,-542,1298,-575,1041,1041,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-383,-399,1107,1092,1106,1061,849,849,789,789,1104,1091,773,773,1076,1075,341,340,325,309,834,804,577,577,532,532,516,516,832,818,803,816,561,561,531,531,515,546,289,289,288,258 }; - static const int16_t tab8[] = { -252,-429,-493,-559,1057,1057,1042,1042,529,529,529,529,529,529,529,529,784,784,784,784,769,769,769,769,512,512,512,512,512,512,512,512,-382,1077,-415,1106,1061,1104,849,849,789,789,1091,1076,1029,1075,834,834,597,581,340,340,339,324,804,833,532,532,832,772,818,803,817,787,816,771,290,290,290,290,288,258 }; - static const int16_t tab9[] = { -253,-349,-414,-447,-463,1329,1299,-479,1314,1312,1057,1057,1042,1042,1026,1026,785,785,785,785,784,784,784,784,769,769,769,769,768,768,768,768,-319,851,821,-335,836,850,805,849,341,340,325,336,533,533,579,579,564,564,773,832,578,548,563,516,321,276,306,291,304,259 }; - static const int16_t tab10[] = { -251,-572,-733,-830,-863,-879,1041,1041,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-511,-527,-543,1396,1351,1381,1366,1395,1335,1380,-559,1334,1138,1138,1063,1063,1350,1392,1031,1031,1062,1062,1364,1363,1120,1120,1333,1348,881,881,881,881,375,374,359,373,343,358,341,325,791,791,1123,1122,-703,1105,1045,-719,865,865,790,790,774,774,1104,1029,338,293,323,308,-799,-815,833,788,772,818,803,816,322,292,307,320,561,531,515,546,289,274,288,258 }; - static const int16_t tab11[] = { -251,-525,-605,-685,-765,-831,-846,1298,1057,1057,1312,1282,785,785,785,785,784,784,784,784,769,769,769,769,512,512,512,512,512,512,512,512,1399,1398,1383,1367,1382,1396,1351,-511,1381,1366,1139,1139,1079,1079,1124,1124,1364,1349,1363,1333,882,882,882,882,807,807,807,807,1094,1094,1136,1136,373,341,535,535,881,775,867,822,774,-591,324,338,-671,849,550,550,866,864,609,609,293,336,534,534,789,835,773,-751,834,804,308,307,833,788,832,772,562,562,547,547,305,275,560,515,290,290 }; - static const int16_t tab12[] = { -252,-397,-477,-557,-622,-653,-719,-735,-750,1329,1299,1314,1057,1057,1042,1042,1312,1282,1024,1024,785,785,785,785,784,784,784,784,769,769,769,769,-383,1127,1141,1111,1126,1140,1095,1110,869,869,883,883,1079,1109,882,882,375,374,807,868,838,881,791,-463,867,822,368,263,852,837,836,-543,610,610,550,550,352,336,534,534,865,774,851,821,850,805,593,533,579,564,773,832,578,578,548,548,577,577,307,276,306,291,516,560,259,259 }; - static const int16_t tab13[] = { -250,-2107,-2507,-2764,-2909,-2974,-3007,-3023,1041,1041,1040,1040,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-767,-1052,-1213,-1277,-1358,-1405,-1469,-1535,-1550,-1582,-1614,-1647,-1662,-1694,-1726,-1759,-1774,-1807,-1822,-1854,-1886,1565,-1919,-1935,-1951,-1967,1731,1730,1580,1717,-1983,1729,1564,-1999,1548,-2015,-2031,1715,1595,-2047,1714,-2063,1610,-2079,1609,-2095,1323,1323,1457,1457,1307,1307,1712,1547,1641,1700,1699,1594,1685,1625,1442,1442,1322,1322,-780,-973,-910,1279,1278,1277,1262,1276,1261,1275,1215,1260,1229,-959,974,974,989,989,-943,735,478,478,495,463,506,414,-1039,1003,958,1017,927,942,987,957,431,476,1272,1167,1228,-1183,1256,-1199,895,895,941,941,1242,1227,1212,1135,1014,1014,490,489,503,487,910,1013,985,925,863,894,970,955,1012,847,-1343,831,755,755,984,909,428,366,754,559,-1391,752,486,457,924,997,698,698,983,893,740,740,908,877,739,739,667,667,953,938,497,287,271,271,683,606,590,712,726,574,302,302,738,736,481,286,526,725,605,711,636,724,696,651,589,681,666,710,364,467,573,695,466,466,301,465,379,379,709,604,665,679,316,316,634,633,436,436,464,269,424,394,452,332,438,363,347,408,393,448,331,422,362,407,392,421,346,406,391,376,375,359,1441,1306,-2367,1290,-2383,1337,-2399,-2415,1426,1321,-2431,1411,1336,-2447,-2463,-2479,1169,1169,1049,1049,1424,1289,1412,1352,1319,-2495,1154,1154,1064,1064,1153,1153,416,390,360,404,403,389,344,374,373,343,358,372,327,357,342,311,356,326,1395,1394,1137,1137,1047,1047,1365,1392,1287,1379,1334,1364,1349,1378,1318,1363,792,792,792,792,1152,1152,1032,1032,1121,1121,1046,1046,1120,1120,1030,1030,-2895,1106,1061,1104,849,849,789,789,1091,1076,1029,1090,1060,1075,833,833,309,324,532,532,832,772,818,803,561,561,531,560,515,546,289,274,288,258 }; - static const int16_t tab15[] = { -250,-1179,-1579,-1836,-1996,-2124,-2253,-2333,-2413,-2477,-2542,-2574,-2607,-2622,-2655,1314,1313,1298,1312,1282,785,785,785,785,1040,1040,1025,1025,768,768,768,768,-766,-798,-830,-862,-895,-911,-927,-943,-959,-975,-991,-1007,-1023,-1039,-1055,-1070,1724,1647,-1103,-1119,1631,1767,1662,1738,1708,1723,-1135,1780,1615,1779,1599,1677,1646,1778,1583,-1151,1777,1567,1737,1692,1765,1722,1707,1630,1751,1661,1764,1614,1736,1676,1763,1750,1645,1598,1721,1691,1762,1706,1582,1761,1566,-1167,1749,1629,767,766,751,765,494,494,735,764,719,749,734,763,447,447,748,718,477,506,431,491,446,476,461,505,415,430,475,445,504,399,460,489,414,503,383,474,429,459,502,502,746,752,488,398,501,473,413,472,486,271,480,270,-1439,-1455,1357,-1471,-1487,-1503,1341,1325,-1519,1489,1463,1403,1309,-1535,1372,1448,1418,1476,1356,1462,1387,-1551,1475,1340,1447,1402,1386,-1567,1068,1068,1474,1461,455,380,468,440,395,425,410,454,364,467,466,464,453,269,409,448,268,432,1371,1473,1432,1417,1308,1460,1355,1446,1459,1431,1083,1083,1401,1416,1458,1445,1067,1067,1370,1457,1051,1051,1291,1430,1385,1444,1354,1415,1400,1443,1082,1082,1173,1113,1186,1066,1185,1050,-1967,1158,1128,1172,1097,1171,1081,-1983,1157,1112,416,266,375,400,1170,1142,1127,1065,793,793,1169,1033,1156,1096,1141,1111,1155,1080,1126,1140,898,898,808,808,897,897,792,792,1095,1152,1032,1125,1110,1139,1079,1124,882,807,838,881,853,791,-2319,867,368,263,822,852,837,866,806,865,-2399,851,352,262,534,534,821,836,594,594,549,549,593,593,533,533,848,773,579,579,564,578,548,563,276,276,577,576,306,291,516,560,305,305,275,259 }; - static const int16_t tab16[] = { -251,-892,-2058,-2620,-2828,-2957,-3023,-3039,1041,1041,1040,1040,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-511,-527,-543,-559,1530,-575,-591,1528,1527,1407,1526,1391,1023,1023,1023,1023,1525,1375,1268,1268,1103,1103,1087,1087,1039,1039,1523,-604,815,815,815,815,510,495,509,479,508,463,507,447,431,505,415,399,-734,-782,1262,-815,1259,1244,-831,1258,1228,-847,-863,1196,-879,1253,987,987,748,-767,493,493,462,477,414,414,686,669,478,446,461,445,474,429,487,458,412,471,1266,1264,1009,1009,799,799,-1019,-1276,-1452,-1581,-1677,-1757,-1821,-1886,-1933,-1997,1257,1257,1483,1468,1512,1422,1497,1406,1467,1496,1421,1510,1134,1134,1225,1225,1466,1451,1374,1405,1252,1252,1358,1480,1164,1164,1251,1251,1238,1238,1389,1465,-1407,1054,1101,-1423,1207,-1439,830,830,1248,1038,1237,1117,1223,1148,1236,1208,411,426,395,410,379,269,1193,1222,1132,1235,1221,1116,976,976,1192,1162,1177,1220,1131,1191,963,963,-1647,961,780,-1663,558,558,994,993,437,408,393,407,829,978,813,797,947,-1743,721,721,377,392,844,950,828,890,706,706,812,859,796,960,948,843,934,874,571,571,-1919,690,555,689,421,346,539,539,944,779,918,873,932,842,903,888,570,570,931,917,674,674,-2575,1562,-2591,1609,-2607,1654,1322,1322,1441,1441,1696,1546,1683,1593,1669,1624,1426,1426,1321,1321,1639,1680,1425,1425,1305,1305,1545,1668,1608,1623,1667,1592,1638,1666,1320,1320,1652,1607,1409,1409,1304,1304,1288,1288,1664,1637,1395,1395,1335,1335,1622,1636,1394,1394,1319,1319,1606,1621,1392,1392,1137,1137,1137,1137,345,390,360,375,404,373,1047,-2751,-2767,-2783,1062,1121,1046,-2799,1077,-2815,1106,1061,789,789,1105,1104,263,355,310,340,325,354,352,262,339,324,1091,1076,1029,1090,1060,1075,833,833,788,788,1088,1028,818,818,803,803,561,561,531,531,816,771,546,546,289,274,288,258 }; - static const int16_t tab24[] = { -253,-317,-381,-446,-478,-509,1279,1279,-811,-1179,-1451,-1756,-1900,-2028,-2189,-2253,-2333,-2414,-2445,-2511,-2526,1313,1298,-2559,1041,1041,1040,1040,1025,1025,1024,1024,1022,1007,1021,991,1020,975,1019,959,687,687,1018,1017,671,671,655,655,1016,1015,639,639,758,758,623,623,757,607,756,591,755,575,754,559,543,543,1009,783,-575,-621,-685,-749,496,-590,750,749,734,748,974,989,1003,958,988,973,1002,942,987,957,972,1001,926,986,941,971,956,1000,910,985,925,999,894,970,-1071,-1087,-1102,1390,-1135,1436,1509,1451,1374,-1151,1405,1358,1480,1420,-1167,1507,1494,1389,1342,1465,1435,1450,1326,1505,1310,1493,1373,1479,1404,1492,1464,1419,428,443,472,397,736,526,464,464,486,457,442,471,484,482,1357,1449,1434,1478,1388,1491,1341,1490,1325,1489,1463,1403,1309,1477,1372,1448,1418,1433,1476,1356,1462,1387,-1439,1475,1340,1447,1402,1474,1324,1461,1371,1473,269,448,1432,1417,1308,1460,-1711,1459,-1727,1441,1099,1099,1446,1386,1431,1401,-1743,1289,1083,1083,1160,1160,1458,1445,1067,1067,1370,1457,1307,1430,1129,1129,1098,1098,268,432,267,416,266,400,-1887,1144,1187,1082,1173,1113,1186,1066,1050,1158,1128,1143,1172,1097,1171,1081,420,391,1157,1112,1170,1142,1127,1065,1169,1049,1156,1096,1141,1111,1155,1080,1126,1154,1064,1153,1140,1095,1048,-2159,1125,1110,1137,-2175,823,823,1139,1138,807,807,384,264,368,263,868,838,853,791,867,822,852,837,866,806,865,790,-2319,851,821,836,352,262,850,805,849,-2399,533,533,835,820,336,261,578,548,563,577,532,532,832,772,562,562,547,547,305,275,560,515,290,290,288,258 }; - static const uint8_t tab32[] = { 130,162,193,209,44,28,76,140,9,9,9,9,9,9,9,9,190,254,222,238,126,94,157,157,109,61,173,205}; + static const int16_t tabs[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 785,785,785,785,784,784,784,784,513,513,513,513,513,513,513,513,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256, + -255,1313,1298,1282,785,785,785,785,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,290,288, + -255,1313,1298,1282,769,769,769,769,529,529,529,529,529,529,529,529,528,528,528,528,528,528,528,528,512,512,512,512,512,512,512,512,290,288, + -253,-318,-351,-367,785,785,785,785,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,819,818,547,547,275,275,275,275,561,560,515,546,289,274,288,258, + -254,-287,1329,1299,1314,1312,1057,1057,1042,1042,1026,1026,784,784,784,784,529,529,529,529,529,529,529,529,769,769,769,769,768,768,768,768,563,560,306,306,291,259, + -252,-413,-477,-542,1298,-575,1041,1041,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-383,-399,1107,1092,1106,1061,849,849,789,789,1104,1091,773,773,1076,1075,341,340,325,309,834,804,577,577,532,532,516,516,832,818,803,816,561,561,531,531,515,546,289,289,288,258, + -252,-429,-493,-559,1057,1057,1042,1042,529,529,529,529,529,529,529,529,784,784,784,784,769,769,769,769,512,512,512,512,512,512,512,512,-382,1077,-415,1106,1061,1104,849,849,789,789,1091,1076,1029,1075,834,834,597,581,340,340,339,324,804,833,532,532,832,772,818,803,817,787,816,771,290,290,290,290,288,258, + -253,-349,-414,-447,-463,1329,1299,-479,1314,1312,1057,1057,1042,1042,1026,1026,785,785,785,785,784,784,784,784,769,769,769,769,768,768,768,768,-319,851,821,-335,836,850,805,849,341,340,325,336,533,533,579,579,564,564,773,832,578,548,563,516,321,276,306,291,304,259, + -251,-572,-733,-830,-863,-879,1041,1041,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-511,-527,-543,1396,1351,1381,1366,1395,1335,1380,-559,1334,1138,1138,1063,1063,1350,1392,1031,1031,1062,1062,1364,1363,1120,1120,1333,1348,881,881,881,881,375,374,359,373,343,358,341,325,791,791,1123,1122,-703,1105,1045,-719,865,865,790,790,774,774,1104,1029,338,293,323,308,-799,-815,833,788,772,818,803,816,322,292,307,320,561,531,515,546,289,274,288,258, + -251,-525,-605,-685,-765,-831,-846,1298,1057,1057,1312,1282,785,785,785,785,784,784,784,784,769,769,769,769,512,512,512,512,512,512,512,512,1399,1398,1383,1367,1382,1396,1351,-511,1381,1366,1139,1139,1079,1079,1124,1124,1364,1349,1363,1333,882,882,882,882,807,807,807,807,1094,1094,1136,1136,373,341,535,535,881,775,867,822,774,-591,324,338,-671,849,550,550,866,864,609,609,293,336,534,534,789,835,773,-751,834,804,308,307,833,788,832,772,562,562,547,547,305,275,560,515,290,290, + -252,-397,-477,-557,-622,-653,-719,-735,-750,1329,1299,1314,1057,1057,1042,1042,1312,1282,1024,1024,785,785,785,785,784,784,784,784,769,769,769,769,-383,1127,1141,1111,1126,1140,1095,1110,869,869,883,883,1079,1109,882,882,375,374,807,868,838,881,791,-463,867,822,368,263,852,837,836,-543,610,610,550,550,352,336,534,534,865,774,851,821,850,805,593,533,579,564,773,832,578,578,548,548,577,577,307,276,306,291,516,560,259,259, + -250,-2107,-2507,-2764,-2909,-2974,-3007,-3023,1041,1041,1040,1040,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-767,-1052,-1213,-1277,-1358,-1405,-1469,-1535,-1550,-1582,-1614,-1647,-1662,-1694,-1726,-1759,-1774,-1807,-1822,-1854,-1886,1565,-1919,-1935,-1951,-1967,1731,1730,1580,1717,-1983,1729,1564,-1999,1548,-2015,-2031,1715,1595,-2047,1714,-2063,1610,-2079,1609,-2095,1323,1323,1457,1457,1307,1307,1712,1547,1641,1700,1699,1594,1685,1625,1442,1442,1322,1322,-780,-973,-910,1279,1278,1277,1262,1276,1261,1275,1215,1260,1229,-959,974,974,989,989,-943,735,478,478,495,463,506,414,-1039,1003,958,1017,927,942,987,957,431,476,1272,1167,1228,-1183,1256,-1199,895,895,941,941,1242,1227,1212,1135,1014,1014,490,489,503,487,910,1013,985,925,863,894,970,955,1012,847,-1343,831,755,755,984,909,428,366,754,559,-1391,752,486,457,924,997,698,698,983,893,740,740,908,877,739,739,667,667,953,938,497,287,271,271,683,606,590,712,726,574,302,302,738,736,481,286,526,725,605,711,636,724,696,651,589,681,666,710,364,467,573,695,466,466,301,465,379,379,709,604,665,679,316,316,634,633,436,436,464,269,424,394,452,332,438,363,347,408,393,448,331,422,362,407,392,421,346,406,391,376,375,359,1441,1306,-2367,1290,-2383,1337,-2399,-2415,1426,1321,-2431,1411,1336,-2447,-2463,-2479,1169,1169,1049,1049,1424,1289,1412,1352,1319,-2495,1154,1154,1064,1064,1153,1153,416,390,360,404,403,389,344,374,373,343,358,372,327,357,342,311,356,326,1395,1394,1137,1137,1047,1047,1365,1392,1287,1379,1334,1364,1349,1378,1318,1363,792,792,792,792,1152,1152,1032,1032,1121,1121,1046,1046,1120,1120,1030,1030,-2895,1106,1061,1104,849,849,789,789,1091,1076,1029,1090,1060,1075,833,833,309,324,532,532,832,772,818,803,561,561,531,560,515,546,289,274,288,258, + -250,-1179,-1579,-1836,-1996,-2124,-2253,-2333,-2413,-2477,-2542,-2574,-2607,-2622,-2655,1314,1313,1298,1312,1282,785,785,785,785,1040,1040,1025,1025,768,768,768,768,-766,-798,-830,-862,-895,-911,-927,-943,-959,-975,-991,-1007,-1023,-1039,-1055,-1070,1724,1647,-1103,-1119,1631,1767,1662,1738,1708,1723,-1135,1780,1615,1779,1599,1677,1646,1778,1583,-1151,1777,1567,1737,1692,1765,1722,1707,1630,1751,1661,1764,1614,1736,1676,1763,1750,1645,1598,1721,1691,1762,1706,1582,1761,1566,-1167,1749,1629,767,766,751,765,494,494,735,764,719,749,734,763,447,447,748,718,477,506,431,491,446,476,461,505,415,430,475,445,504,399,460,489,414,503,383,474,429,459,502,502,746,752,488,398,501,473,413,472,486,271,480,270,-1439,-1455,1357,-1471,-1487,-1503,1341,1325,-1519,1489,1463,1403,1309,-1535,1372,1448,1418,1476,1356,1462,1387,-1551,1475,1340,1447,1402,1386,-1567,1068,1068,1474,1461,455,380,468,440,395,425,410,454,364,467,466,464,453,269,409,448,268,432,1371,1473,1432,1417,1308,1460,1355,1446,1459,1431,1083,1083,1401,1416,1458,1445,1067,1067,1370,1457,1051,1051,1291,1430,1385,1444,1354,1415,1400,1443,1082,1082,1173,1113,1186,1066,1185,1050,-1967,1158,1128,1172,1097,1171,1081,-1983,1157,1112,416,266,375,400,1170,1142,1127,1065,793,793,1169,1033,1156,1096,1141,1111,1155,1080,1126,1140,898,898,808,808,897,897,792,792,1095,1152,1032,1125,1110,1139,1079,1124,882,807,838,881,853,791,-2319,867,368,263,822,852,837,866,806,865,-2399,851,352,262,534,534,821,836,594,594,549,549,593,593,533,533,848,773,579,579,564,578,548,563,276,276,577,576,306,291,516,560,305,305,275,259, + -251,-892,-2058,-2620,-2828,-2957,-3023,-3039,1041,1041,1040,1040,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-511,-527,-543,-559,1530,-575,-591,1528,1527,1407,1526,1391,1023,1023,1023,1023,1525,1375,1268,1268,1103,1103,1087,1087,1039,1039,1523,-604,815,815,815,815,510,495,509,479,508,463,507,447,431,505,415,399,-734,-782,1262,-815,1259,1244,-831,1258,1228,-847,-863,1196,-879,1253,987,987,748,-767,493,493,462,477,414,414,686,669,478,446,461,445,474,429,487,458,412,471,1266,1264,1009,1009,799,799,-1019,-1276,-1452,-1581,-1677,-1757,-1821,-1886,-1933,-1997,1257,1257,1483,1468,1512,1422,1497,1406,1467,1496,1421,1510,1134,1134,1225,1225,1466,1451,1374,1405,1252,1252,1358,1480,1164,1164,1251,1251,1238,1238,1389,1465,-1407,1054,1101,-1423,1207,-1439,830,830,1248,1038,1237,1117,1223,1148,1236,1208,411,426,395,410,379,269,1193,1222,1132,1235,1221,1116,976,976,1192,1162,1177,1220,1131,1191,963,963,-1647,961,780,-1663,558,558,994,993,437,408,393,407,829,978,813,797,947,-1743,721,721,377,392,844,950,828,890,706,706,812,859,796,960,948,843,934,874,571,571,-1919,690,555,689,421,346,539,539,944,779,918,873,932,842,903,888,570,570,931,917,674,674,-2575,1562,-2591,1609,-2607,1654,1322,1322,1441,1441,1696,1546,1683,1593,1669,1624,1426,1426,1321,1321,1639,1680,1425,1425,1305,1305,1545,1668,1608,1623,1667,1592,1638,1666,1320,1320,1652,1607,1409,1409,1304,1304,1288,1288,1664,1637,1395,1395,1335,1335,1622,1636,1394,1394,1319,1319,1606,1621,1392,1392,1137,1137,1137,1137,345,390,360,375,404,373,1047,-2751,-2767,-2783,1062,1121,1046,-2799,1077,-2815,1106,1061,789,789,1105,1104,263,355,310,340,325,354,352,262,339,324,1091,1076,1029,1090,1060,1075,833,833,788,788,1088,1028,818,818,803,803,561,561,531,531,816,771,546,546,289,274,288,258, + -253,-317,-381,-446,-478,-509,1279,1279,-811,-1179,-1451,-1756,-1900,-2028,-2189,-2253,-2333,-2414,-2445,-2511,-2526,1313,1298,-2559,1041,1041,1040,1040,1025,1025,1024,1024,1022,1007,1021,991,1020,975,1019,959,687,687,1018,1017,671,671,655,655,1016,1015,639,639,758,758,623,623,757,607,756,591,755,575,754,559,543,543,1009,783,-575,-621,-685,-749,496,-590,750,749,734,748,974,989,1003,958,988,973,1002,942,987,957,972,1001,926,986,941,971,956,1000,910,985,925,999,894,970,-1071,-1087,-1102,1390,-1135,1436,1509,1451,1374,-1151,1405,1358,1480,1420,-1167,1507,1494,1389,1342,1465,1435,1450,1326,1505,1310,1493,1373,1479,1404,1492,1464,1419,428,443,472,397,736,526,464,464,486,457,442,471,484,482,1357,1449,1434,1478,1388,1491,1341,1490,1325,1489,1463,1403,1309,1477,1372,1448,1418,1433,1476,1356,1462,1387,-1439,1475,1340,1447,1402,1474,1324,1461,1371,1473,269,448,1432,1417,1308,1460,-1711,1459,-1727,1441,1099,1099,1446,1386,1431,1401,-1743,1289,1083,1083,1160,1160,1458,1445,1067,1067,1370,1457,1307,1430,1129,1129,1098,1098,268,432,267,416,266,400,-1887,1144,1187,1082,1173,1113,1186,1066,1050,1158,1128,1143,1172,1097,1171,1081,420,391,1157,1112,1170,1142,1127,1065,1169,1049,1156,1096,1141,1111,1155,1080,1126,1154,1064,1153,1140,1095,1048,-2159,1125,1110,1137,-2175,823,823,1139,1138,807,807,384,264,368,263,868,838,853,791,867,822,852,837,866,806,865,790,-2319,851,821,836,352,262,850,805,849,-2399,533,533,835,820,336,261,578,548,563,577,532,532,832,772,562,562,547,547,305,275,560,515,290,290,288,258 }; + static const uint8_t tab32[] = { 130,162,193,209,44,28,76,140,9,9,9,9,9,9,9,9,190,254,222,238,126,94,157,157,109,61,173,205 }; static const uint8_t tab33[] = { 252,236,220,204,188,172,156,140,124,108,92,76,60,44,28,12 }; - static const int16_t * const tabindex[2*16] = { tab0,tab1,tab2,tab3,tab0,tab5,tab6,tab7,tab8,tab9,tab10,tab11,tab12,tab13,tab0,tab15,tab16,tab16,tab16,tab16,tab16,tab16,tab16,tab16,tab24,tab24,tab24,tab24,tab24,tab24,tab24,tab24 }; + static const int16_t tabindex[2*16] = { 0,32,64,98,0,132,180,218,292,364,426,538,648,746,0,1126,1460,1460,1460,1460,1460,1460,1460,1460,1842,1842,1842,1842,1842,1842,1842,1842 }; static const uint8_t g_linbits[] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,3,4,6,8,10,13,4,5,6,7,8,9,11,13 }; -#define PEEK_BITS(n) (bs_cache >> (32 - n)) -#define FLUSH_BITS(n) {bs_cache <<= (n); bs_sh += (n);} -#define CHECK_BITS while (bs_sh >= 0) { bs_cache |= (uint32_t)*bs_next_ptr++ << bs_sh; bs_sh -= 8; } -#define BSPOS ((bs_next_ptr - bs->buf)*8 - 24 + bs_sh) +#define PEEK_BITS(n) (bs_cache >> (32 - n)) +#define FLUSH_BITS(n) { bs_cache <<= (n); bs_sh += (n); } +#define CHECK_BITS while (bs_sh >= 0) { bs_cache |= (uint32_t)*bs_next_ptr++ << bs_sh; bs_sh -= 8; } +#define BSPOS ((bs_next_ptr - bs->buf)*8 - 24 + bs_sh) float one = 0.0f; int ireg = 0, big_val_cnt = gr_info->big_values; @@ -790,31 +779,31 @@ static void L3_huffman(float *dst, bs_t *bs, const L3_gr_info_t *gr_info, const { int tab_num = gr_info->table_select[ireg]; int sfb_cnt = gr_info->region_count[ireg++]; - const short * codebook = tabindex[tab_num]; + const int16_t *codebook = tabs + tabindex[tab_num]; int linbits = g_linbits[tab_num]; - do + if (linbits) { - np = *sfb++ / 2; - pairs_to_decode = MINIMP3_MIN(big_val_cnt, np); - one = *scf++; do { - int j, w = 5; - int leaf = codebook[PEEK_BITS(w)]; - while (leaf < 0) + np = *sfb++ / 2; + pairs_to_decode = MINIMP3_MIN(big_val_cnt, np); + one = *scf++; + do { - FLUSH_BITS(w); - w = leaf & 7; - leaf = codebook[PEEK_BITS(w) - (leaf >> 3)]; - } - FLUSH_BITS(leaf >> 8); - - for (j = 0; j < 2; j++, dst++, leaf >>= 4) - { - int lsb = leaf & 0x0F; - if (lsb) + int j, w = 5; + int leaf = codebook[PEEK_BITS(w)]; + while (leaf < 0) { - if (lsb == 15 && linbits) + FLUSH_BITS(w); + w = leaf & 7; + leaf = codebook[PEEK_BITS(w) - (leaf >> 3)]; + } + FLUSH_BITS(leaf >> 8); + + for (j = 0; j < 2; j++, dst++, leaf >>= 4) + { + int lsb = leaf & 0x0F; + if (lsb == 15) { lsb += PEEK_BITS(linbits); FLUSH_BITS(linbits); @@ -822,14 +811,42 @@ static void L3_huffman(float *dst, bs_t *bs, const L3_gr_info_t *gr_info, const *dst = one*L3_pow_43(lsb)*((int32_t)bs_cache < 0 ? -1: 1); } else { - *dst = g_pow43_signed[lsb*2 + (bs_cache >> 31)]*one; + *dst = g_pow43[16 + lsb - 16*(bs_cache >> 31)]*one; } - FLUSH_BITS(1); + FLUSH_BITS(lsb ? 1 : 0); } - } - CHECK_BITS; - } while (--pairs_to_decode); - } while ((big_val_cnt -= np) > 0 && --sfb_cnt >= 0 ); + CHECK_BITS; + } while (--pairs_to_decode); + } while ((big_val_cnt -= np) > 0 && --sfb_cnt >= 0); + } else + { + do + { + np = *sfb++ / 2; + pairs_to_decode = MINIMP3_MIN(big_val_cnt, np); + one = *scf++; + do + { + int j, w = 5; + int leaf = codebook[PEEK_BITS(w)]; + while (leaf < 0) + { + FLUSH_BITS(w); + w = leaf & 7; + leaf = codebook[PEEK_BITS(w) - (leaf >> 3)]; + } + FLUSH_BITS(leaf >> 8); + + for (j = 0; j < 2; j++, dst++, leaf >>= 4) + { + int lsb = leaf & 0x0F; + *dst = g_pow43[16 + lsb - 16*(bs_cache >> 31)]*one; + FLUSH_BITS(lsb ? 1 : 0); + } + CHECK_BITS; + } while (--pairs_to_decode); + } while ((big_val_cnt -= np) > 0 && --sfb_cnt >= 0); + } } for (np = 1 - big_val_cnt;; dst += 4) @@ -845,8 +862,8 @@ static void L3_huffman(float *dst, bs_t *bs, const L3_gr_info_t *gr_info, const { break; } -#define RELOAD_SCALEFACTOR if (!--np) {np = *sfb++/2; if (!np) break; one = *scf++;} -#define DEQ_COUNT1(s) if (leaf & (128 >> s)) {dst[s] = ((int32_t)bs_cache < 0) ? -one : one; FLUSH_BITS(1)} +#define RELOAD_SCALEFACTOR if (!--np) { np = *sfb++/2; if (!np) break; one = *scf++; } +#define DEQ_COUNT1(s) if (leaf & (128 >> s)) { dst[s] = ((int32_t)bs_cache < 0) ? -one : one; FLUSH_BITS(1) } RELOAD_SCALEFACTOR; DEQ_COUNT1(0); DEQ_COUNT1(1); @@ -864,14 +881,24 @@ static void L3_midside_stereo(float *left, int n) int i = 0; float *right = left + 576; #if HAVE_SIMD - if (have_simd()) for (; i < n - 3; i += 4) + if (have_simd()) { - f4 vl = VLD(left + i); - f4 vr = VLD(right + i); - VSTORE(left + i, VADD(vl, vr)); - VSTORE(right + i, VSUB(vl, vr)); - } + for (; i < n - 3; i += 4) + { + f4 vl = VLD(left + i); + f4 vr = VLD(right + i); + VSTORE(left + i, VADD(vl, vr)); + VSTORE(right + i, VSUB(vl, vr)); + } +#ifdef __GNUC__ + /* Workaround for spurious -Waggressive-loop-optimizations warning from gcc. + * For more info see: https://github.com/lieff/minimp3/issues/88 + */ + if (__builtin_constant_p(n % 4 == 0) && n % 4 == 0) + return; #endif + } +#endif /* HAVE_SIMD */ for (; i < n; i++) { float a = left[i]; @@ -962,7 +989,7 @@ static void L3_intensity_stereo(float *left, uint8_t *ist_pos, const L3_gr_info_ int prev = itop - max_blocks; ist_pos[itop] = max_band[i] >= prev ? default_pos : ist_pos[prev]; } - L3_stereo_process(left, ist_pos, gr->sfbtab, hdr, max_band, gr[1].scalefac_compress&1); + L3_stereo_process(left, ist_pos, gr->sfbtab, hdr, max_band, gr[1].scalefac_compress & 1); } static void L3_reorder(float *grbuf, float *scratch, const uint8_t *sfb) @@ -979,7 +1006,7 @@ static void L3_reorder(float *grbuf, float *scratch, const uint8_t *sfb) *dst++ = src[2*len]; } } - memcpy(grbuf, scratch, (dst - scratch) * sizeof(float)); + memcpy(grbuf, scratch, (dst - scratch)*sizeof(float)); } static void L3_antialias(float *grbuf, int nbands) @@ -1004,7 +1031,7 @@ static void L3_antialias(float *grbuf, int nbands) vd = VADD(VMUL(vu, vc1), VMUL(vd, vc0)); VSTORE(grbuf + 14 - i, VREV(vd)); } -#endif +#endif /* HAVE_SIMD */ #ifndef MINIMP3_ONLY_SIMD for(; i < 8; i++) { @@ -1013,7 +1040,7 @@ static void L3_antialias(float *grbuf, int nbands) grbuf[18 + i] = u*g_aa[0][i] - d*g_aa[1][i]; grbuf[17 - i] = u*g_aa[1][i] + d*g_aa[0][i]; } -#endif +#endif /* MINIMP3_ONLY_SIMD */ } } @@ -1102,7 +1129,7 @@ static void L3_imdct36(float *grbuf, float *overlap, const float *window, int nb vsum = VADD(VMUL(vovl, vw1), VMUL(vsum, vw0)); VSTORE(grbuf + 14 - i, VREV(vsum)); } -#endif +#endif /* HAVE_SIMD */ for (; i < 9; i++) { float ovl = overlap[i]; @@ -1302,9 +1329,9 @@ static void mp3d_DCT_II(float *grbuf, int n) { #if HAVE_SSE #define VSAVE2(i, v) _mm_storel_pi((__m64 *)(void*)&y[i*18], v) -#else +#else /* HAVE_SSE */ #define VSAVE2(i, v) vst1_f32((float32_t *)&y[i*18], vget_low_f32(v)) -#endif +#endif /* HAVE_SSE */ for (i = 0; i < 7; i++, y += 4*18) { f4 s = VADD(t[3][i], t[3][i + 1]); @@ -1334,10 +1361,10 @@ static void mp3d_DCT_II(float *grbuf, int n) VSAVE4(3, t[3][7]); } } else -#endif +#endif /* HAVE_SIMD */ #ifdef MINIMP3_ONLY_SIMD - {} -#else + {} /* for HAVE_SIMD=1, MINIMP3_ONLY_SIMD=1 case we do not need non-intrinsic "else" branch */ +#else /* MINIMP3_ONLY_SIMD */ for (; k < n; k++) { float t[4][8], *x, *y = grbuf + k; @@ -1396,21 +1423,32 @@ static void mp3d_DCT_II(float *grbuf, int n) y[2*18] = t[1][7]; y[3*18] = t[3][7]; } -#endif +#endif /* MINIMP3_ONLY_SIMD */ } -static short mp3d_scale_pcm(float sample) +#ifndef MINIMP3_FLOAT_OUTPUT +static int16_t mp3d_scale_pcm(float sample) { - if (sample > 32767.0) return (short) 32767; - if (sample < -32768.0) return (short)-32768; - int s = (int)(sample + .5f); +#if HAVE_ARMV6 + int32_t s32 = (int32_t)(sample + .5f); + s32 -= (s32 < 0); + int16_t s = (int16_t)minimp3_clip_int16_arm(s32); +#else + if (sample >= 32766.5) return (int16_t) 32767; + if (sample <= -32767.5) return (int16_t)-32768; + int16_t s = (int16_t)(sample + .5f); s -= (s < 0); /* away from zero, to be compliant */ - if (s > 32767) return (short) 32767; - if (s < -32768) return (short)-32768; - return (short)s; +#endif + return s; } +#else /* MINIMP3_FLOAT_OUTPUT */ +static float mp3d_scale_pcm(float sample) +{ + return sample*(1.f/32768.f); +} +#endif /* MINIMP3_FLOAT_OUTPUT */ -static void mp3d_synth_pair(short *pcm, int nch, const float *z) +static void mp3d_synth_pair(mp3d_sample_t *pcm, int nch, const float *z) { float a; a = (z[14*64] - z[ 0]) * 29; @@ -1435,11 +1473,11 @@ static void mp3d_synth_pair(short *pcm, int nch, const float *z) pcm[16*nch] = mp3d_scale_pcm(a); } -static void mp3d_synth(float *xl, short *dstl, int nch, float *lins) +static void mp3d_synth(float *xl, mp3d_sample_t *dstl, int nch, float *lins) { int i; float *xr = xl + 576*(nch - 1); - short *dstr = dstl + (nch - 1); + mp3d_sample_t *dstr = dstl + (nch - 1); static const float g_win[] = { -1,26,-31,208,218,401,-519,2063,2000,4788,-5517,7134,5959,35640,-39336,74992, @@ -1458,8 +1496,8 @@ static void mp3d_synth(float *xl, short *dstl, int nch, float *lins) -4,7,-91,117,177,-106,-1428,1698,402,545,-9416,9916,-7154,12980,-61289,66494, -5,6,-97,111,163,-127,-1498,1634,185,288,-9585,9838,-8540,11455,-62684,65290 }; - float * zlin = lins + 15*64; - const float * w = g_win; + float *zlin = lins + 15*64; + const float *w = g_win; zlin[4*15] = xl[18*16]; zlin[4*15 + 1] = xr[18*16]; @@ -1480,10 +1518,10 @@ static void mp3d_synth(float *xl, short *dstl, int nch, float *lins) if (have_simd()) for (i = 14; i >= 0; i--) { #define VLOAD(k) f4 w0 = VSET(*w++); f4 w1 = VSET(*w++); f4 vz = VLD(&zlin[4*i - 64*k]); f4 vy = VLD(&zlin[4*i - 64*(15 - k)]); -#define V0(k) {VLOAD(k) b = VADD(VMUL(vz, w1), VMUL(vy, w0)) ; a = VSUB(VMUL(vz, w0),VMUL(vy, w1)); } -#define V1(k) {VLOAD(k) b = VADD(b, VADD(VMUL(vz, w1), VMUL(vy, w0))); a = VADD(a, VSUB(VMUL(vz, w0),VMUL(vy, w1))); } -#define V2(k) {VLOAD(k) b = VADD(b, VADD(VMUL(vz, w1), VMUL(vy, w0))); a = VADD(a, VSUB(VMUL(vy, w1),VMUL(vz, w0))); } - f4 a,b; +#define V0(k) { VLOAD(k) b = VADD(VMUL(vz, w1), VMUL(vy, w0)) ; a = VSUB(VMUL(vz, w0), VMUL(vy, w1)); } +#define V1(k) { VLOAD(k) b = VADD(b, VADD(VMUL(vz, w1), VMUL(vy, w0))); a = VADD(a, VSUB(VMUL(vz, w0), VMUL(vy, w1))); } +#define V2(k) { VLOAD(k) b = VADD(b, VADD(VMUL(vz, w1), VMUL(vy, w0))); a = VADD(a, VSUB(VMUL(vy, w1), VMUL(vz, w0))); } + f4 a, b; zlin[4*i] = xl[18*(31 - i)]; zlin[4*i + 1] = xr[18*(31 - i)]; zlin[4*i + 2] = xl[1 + 18*(31 - i)]; @@ -1496,6 +1534,7 @@ static void mp3d_synth(float *xl, short *dstl, int nch, float *lins) V0(0) V2(1) V1(2) V2(3) V1(4) V2(5) V1(6) V2(7) { +#ifndef MINIMP3_FLOAT_OUTPUT #if HAVE_SSE static const f4 g_max = { 32767.0f, 32767.0f, 32767.0f, 32767.0f }; static const f4 g_min = { -32768.0f, -32768.0f, -32768.0f, -32768.0f }; @@ -1509,7 +1548,7 @@ static void mp3d_synth(float *xl, short *dstl, int nch, float *lins) dstr[(49 + i)*nch] = _mm_extract_epi16(pcm8, 7); dstl[(47 - i)*nch] = _mm_extract_epi16(pcm8, 2); dstl[(49 + i)*nch] = _mm_extract_epi16(pcm8, 6); -#else +#else /* HAVE_SSE */ int16x4_t pcma, pcmb; a = VADD(a, VSET(0.5f)); b = VADD(b, VSET(0.5f)); @@ -1523,19 +1562,45 @@ static void mp3d_synth(float *xl, short *dstl, int nch, float *lins) vst1_lane_s16(dstr + (49 + i)*nch, pcmb, 3); vst1_lane_s16(dstl + (47 - i)*nch, pcma, 2); vst1_lane_s16(dstl + (49 + i)*nch, pcmb, 2); -#endif +#endif /* HAVE_SSE */ + +#else /* MINIMP3_FLOAT_OUTPUT */ + + static const f4 g_scale = { 1.0f/32768.0f, 1.0f/32768.0f, 1.0f/32768.0f, 1.0f/32768.0f }; + a = VMUL(a, g_scale); + b = VMUL(b, g_scale); +#if HAVE_SSE + _mm_store_ss(dstr + (15 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1))); + _mm_store_ss(dstr + (17 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(1, 1, 1, 1))); + _mm_store_ss(dstl + (15 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 0, 0, 0))); + _mm_store_ss(dstl + (17 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(0, 0, 0, 0))); + _mm_store_ss(dstr + (47 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 3, 3, 3))); + _mm_store_ss(dstr + (49 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 3, 3, 3))); + _mm_store_ss(dstl + (47 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 2, 2, 2))); + _mm_store_ss(dstl + (49 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(2, 2, 2, 2))); +#else /* HAVE_SSE */ + vst1q_lane_f32(dstr + (15 - i)*nch, a, 1); + vst1q_lane_f32(dstr + (17 + i)*nch, b, 1); + vst1q_lane_f32(dstl + (15 - i)*nch, a, 0); + vst1q_lane_f32(dstl + (17 + i)*nch, b, 0); + vst1q_lane_f32(dstr + (47 - i)*nch, a, 3); + vst1q_lane_f32(dstr + (49 + i)*nch, b, 3); + vst1q_lane_f32(dstl + (47 - i)*nch, a, 2); + vst1q_lane_f32(dstl + (49 + i)*nch, b, 2); +#endif /* HAVE_SSE */ +#endif /* MINIMP3_FLOAT_OUTPUT */ } } else -#endif +#endif /* HAVE_SIMD */ #ifdef MINIMP3_ONLY_SIMD - {} -#else + {} /* for HAVE_SIMD=1, MINIMP3_ONLY_SIMD=1 case we do not need non-intrinsic "else" branch */ +#else /* MINIMP3_ONLY_SIMD */ for (i = 14; i >= 0; i--) { -#define LOAD(k) float w0 = *w++; float w1 = *w++; float * vz = &zlin[4*i - k*64]; float * vy = &zlin[4*i - (15 - k)*64]; -#define S0(k) {int j; LOAD(k); for (j = 0; j < 4; j++) b[j] = vz[j] * w1 + vy[j] * w0, a[j] = vz[j] * w0 - vy[j] * w1;} -#define S1(k) {int j; LOAD(k); for (j = 0; j < 4; j++) b[j] += vz[j] * w1 + vy[j] * w0, a[j] += vz[j] * w0 - vy[j] * w1;} -#define S2(k) {int j; LOAD(k); for (j = 0; j < 4; j++) b[j] += vz[j] * w1 + vy[j] * w0, a[j] += vy[j] * w1 - vz[j] * w0;} +#define LOAD(k) float w0 = *w++; float w1 = *w++; float *vz = &zlin[4*i - k*64]; float *vy = &zlin[4*i - (15 - k)*64]; +#define S0(k) { int j; LOAD(k); for (j = 0; j < 4; j++) b[j] = vz[j]*w1 + vy[j]*w0, a[j] = vz[j]*w0 - vy[j]*w1; } +#define S1(k) { int j; LOAD(k); for (j = 0; j < 4; j++) b[j] += vz[j]*w1 + vy[j]*w0, a[j] += vz[j]*w0 - vy[j]*w1; } +#define S2(k) { int j; LOAD(k); for (j = 0; j < 4; j++) b[j] += vz[j]*w1 + vy[j]*w0, a[j] += vy[j]*w1 - vz[j]*w0; } float a[4], b[4]; zlin[4*i] = xl[18*(31 - i)]; @@ -1558,10 +1623,10 @@ static void mp3d_synth(float *xl, short *dstl, int nch, float *lins) dstl[(47 - i)*nch] = mp3d_scale_pcm(a[2]); dstl[(49 + i)*nch] = mp3d_scale_pcm(b[2]); } -#endif +#endif /* MINIMP3_ONLY_SIMD */ } -static void mp3d_synth_granule(float *qmf_state, float *grbuf, int nbands, int nch, short *pcm, float *lins) +static void mp3d_synth_granule(float *qmf_state, float *grbuf, int nbands, int nch, mp3d_sample_t *pcm, float *lins) { int i; for (i = 0; i < nch; i++) @@ -1583,7 +1648,7 @@ static void mp3d_synth_granule(float *qmf_state, float *grbuf, int nbands, int n qmf_state[i] = lins[nbands*64 + i]; } } else -#endif +#endif /* MINIMP3_NONSTANDARD_BUT_LOGICAL */ { memcpy(qmf_state, lins + nbands*64, sizeof(float)*15*64); } @@ -1626,9 +1691,9 @@ static int mp3d_find_frame(const uint8_t *mp3, int mp3_bytes, int *free_format_b *free_format_bytes = fb; } } - - if (frame_bytes && i + frame_and_padding <= mp3_bytes && - mp3d_match_frame(mp3, mp3_bytes - i, frame_bytes)) + if ((frame_bytes && i + frame_and_padding <= mp3_bytes && + mp3d_match_frame(mp3, mp3_bytes - i, frame_bytes)) || + (!i && frame_and_padding == mp3_bytes)) { *ptr_frame_bytes = frame_and_padding; return i; @@ -1637,7 +1702,7 @@ static int mp3d_find_frame(const uint8_t *mp3, int mp3_bytes, int *free_format_b } } *ptr_frame_bytes = 0; - return i; + return mp3_bytes; } void mp3dec_init(mp3dec_t *dec) @@ -1645,7 +1710,7 @@ void mp3dec_init(mp3dec_t *dec) dec->header[0] = 0; } -int mp3dec_decode_frame(mp3dec_t *dec, const uint8_t *mp3, int mp3_bytes, short *pcm, mp3dec_frame_info_t *info) +int mp3dec_decode_frame(mp3dec_t *dec, const uint8_t *mp3, int mp3_bytes, mp3d_sample_t *pcm, mp3dec_frame_info_t *info) { int i = 0, igr, frame_size = 0, success = 1; const uint8_t *hdr; @@ -1674,11 +1739,17 @@ int mp3dec_decode_frame(mp3dec_t *dec, const uint8_t *mp3, int mp3_bytes, short hdr = mp3 + i; memcpy(dec->header, hdr, HDR_SIZE); info->frame_bytes = i + frame_size; + info->frame_offset = i; info->channels = HDR_IS_MONO(hdr) ? 1 : 2; info->hz = hdr_sample_rate_hz(hdr); info->layer = 4 - HDR_GET_LAYER(hdr); info->bitrate_kbps = hdr_bitrate_kbps(hdr); + if (!pcm) + { + return hdr_frame_samples(hdr); + } + bs_init(bs_frame, hdr + HDR_SIZE, frame_size - HDR_SIZE); if (HDR_IS_CRC(hdr)) { @@ -1708,7 +1779,7 @@ int mp3dec_decode_frame(mp3dec_t *dec, const uint8_t *mp3, int mp3_bytes, short { #ifdef MINIMP3_ONLY_MP3 return 0; -#else +#else /* MINIMP3_ONLY_MP3 */ L12_scale_info sci[1]; L12_read_scale_info(hdr, bs_frame, sci); @@ -1729,8 +1800,66 @@ int mp3dec_decode_frame(mp3dec_t *dec, const uint8_t *mp3, int mp3_bytes, short return 0; } } -#endif +#endif /* MINIMP3_ONLY_MP3 */ } return success*hdr_frame_samples(dec->header); } -#endif /*MINIMP3_IMPLEMENTATION*/ + +#ifdef MINIMP3_FLOAT_OUTPUT +void mp3dec_f32_to_s16(const float *in, int16_t *out, int num_samples) +{ + int i = 0; +#if HAVE_SIMD + int aligned_count = num_samples & ~7; + for(; i < aligned_count; i += 8) + { + static const f4 g_scale = { 32768.0f, 32768.0f, 32768.0f, 32768.0f }; + f4 a = VMUL(VLD(&in[i ]), g_scale); + f4 b = VMUL(VLD(&in[i+4]), g_scale); +#if HAVE_SSE + static const f4 g_max = { 32767.0f, 32767.0f, 32767.0f, 32767.0f }; + static const f4 g_min = { -32768.0f, -32768.0f, -32768.0f, -32768.0f }; + __m128i pcm8 = _mm_packs_epi32(_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(a, g_max), g_min)), + _mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(b, g_max), g_min))); + out[i ] = _mm_extract_epi16(pcm8, 0); + out[i+1] = _mm_extract_epi16(pcm8, 1); + out[i+2] = _mm_extract_epi16(pcm8, 2); + out[i+3] = _mm_extract_epi16(pcm8, 3); + out[i+4] = _mm_extract_epi16(pcm8, 4); + out[i+5] = _mm_extract_epi16(pcm8, 5); + out[i+6] = _mm_extract_epi16(pcm8, 6); + out[i+7] = _mm_extract_epi16(pcm8, 7); +#else /* HAVE_SSE */ + int16x4_t pcma, pcmb; + a = VADD(a, VSET(0.5f)); + b = VADD(b, VSET(0.5f)); + pcma = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(a), vreinterpretq_s32_u32(vcltq_f32(a, VSET(0))))); + pcmb = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(b), vreinterpretq_s32_u32(vcltq_f32(b, VSET(0))))); + vst1_lane_s16(out+i , pcma, 0); + vst1_lane_s16(out+i+1, pcma, 1); + vst1_lane_s16(out+i+2, pcma, 2); + vst1_lane_s16(out+i+3, pcma, 3); + vst1_lane_s16(out+i+4, pcmb, 0); + vst1_lane_s16(out+i+5, pcmb, 1); + vst1_lane_s16(out+i+6, pcmb, 2); + vst1_lane_s16(out+i+7, pcmb, 3); +#endif /* HAVE_SSE */ + } +#endif /* HAVE_SIMD */ + for(; i < num_samples; i++) + { + float sample = in[i] * 32768.0f; + if (sample >= 32766.5) + out[i] = (int16_t) 32767; + else if (sample <= -32767.5) + out[i] = (int16_t)-32768; + else + { + int16_t s = (int16_t)(sample + .5f); + s -= (s < 0); /* away from zero, to be compliant */ + out[i] = s; + } + } +} +#endif /* MINIMP3_FLOAT_OUTPUT */ +#endif /* MINIMP3_IMPLEMENTATION && !_MINIMP3_IMPLEMENTATION_GUARD */ From c332d6f89acfe0289b0ae00ba4294124848e7259 Mon Sep 17 00:00:00 2001 From: Ari Vuollet Date: Sun, 12 May 2024 15:00:02 +0300 Subject: [PATCH 127/292] Add cooking support for Windows on ARM --- Source/Editor/Cooker/CookingData.h | 6 ++++++ Source/Editor/Cooker/GameCooker.cpp | 16 +++++++++++++++- Source/Editor/Cooker/Steps/CookAssetsStep.cpp | 1 + Source/Editor/Cooker/Steps/DeployDataStep.cpp | 1 + Source/Editor/Windows/GameCookerWindow.cs | 13 +++---------- 5 files changed, 26 insertions(+), 11 deletions(-) diff --git a/Source/Editor/Cooker/CookingData.h b/Source/Editor/Cooker/CookingData.h index ef12d204e..06e48cc95 100644 --- a/Source/Editor/Cooker/CookingData.h +++ b/Source/Editor/Cooker/CookingData.h @@ -134,6 +134,12 @@ API_ENUM() enum class BuildPlatform ///
API_ENUM(Attributes="EditorDisplay(null, \"iOS ARM64\")") iOSARM64 = 14, + + /// + /// Windows (ARM64) + /// + API_ENUM(Attributes = "EditorDisplay(null, \"Windows ARM64\")") + WindowsARM64 = 15, }; /// diff --git a/Source/Editor/Cooker/GameCooker.cpp b/Source/Editor/Cooker/GameCooker.cpp index 118dd397f..17392adee 100644 --- a/Source/Editor/Cooker/GameCooker.cpp +++ b/Source/Editor/Cooker/GameCooker.cpp @@ -148,6 +148,8 @@ const Char* ToString(const BuildPlatform platform) return TEXT("Mac ARM64"); case BuildPlatform::iOSARM64: return TEXT("iOS ARM64"); + case BuildPlatform::WindowsARM64: + return TEXT("Windows ARM64"); default: return TEXT(""); } @@ -300,6 +302,10 @@ void CookingData::GetBuildPlatformName(const Char*& platform, const Char*& archi platform = TEXT("iOS"); architecture = TEXT("ARM64"); break; + case BuildPlatform::WindowsARM64: + platform = TEXT("Windows"); + architecture = TEXT("ARM64"); + break; default: LOG(Fatal, "Unknown or unsupported build platform."); } @@ -386,6 +392,9 @@ PlatformTools* GameCooker::GetTools(BuildPlatform platform) case BuildPlatform::Windows64: result = New(ArchitectureType::x64); break; + case BuildPlatform::WindowsARM64: + result = New(ArchitectureType::ARM64); + break; #endif #if PLATFORM_TOOLS_UWP case BuildPlatform::UWPx86: @@ -547,7 +556,12 @@ void GameCooker::GetCurrentPlatform(PlatformType& platform, BuildPlatform& build switch (PLATFORM_TYPE) { case PlatformType::Windows: - buildPlatform = PLATFORM_64BITS ? BuildPlatform::Windows64 : BuildPlatform::Windows32; + if (PLATFORM_ARCH == ArchitectureType::x64) + buildPlatform = BuildPlatform::Windows64; + else if (PLATFORM_ARCH == ArchitectureType::ARM64) + buildPlatform = BuildPlatform::WindowsARM64; + else + buildPlatform = BuildPlatform::Windows32; break; case PlatformType::XboxOne: buildPlatform = BuildPlatform::XboxOne; diff --git a/Source/Editor/Cooker/Steps/CookAssetsStep.cpp b/Source/Editor/Cooker/Steps/CookAssetsStep.cpp index 52050708e..dddfc3ff9 100644 --- a/Source/Editor/Cooker/Steps/CookAssetsStep.cpp +++ b/Source/Editor/Cooker/Steps/CookAssetsStep.cpp @@ -447,6 +447,7 @@ bool ProcessShaderBase(CookAssetsStep::AssetCookData& data, ShaderAssetBase* ass #if PLATFORM_TOOLS_WINDOWS case BuildPlatform::Windows32: case BuildPlatform::Windows64: + case BuildPlatform::WindowsARM64: { const char* platformDefineName = "PLATFORM_WINDOWS"; const auto settings = WindowsPlatformSettings::Get(); diff --git a/Source/Editor/Cooker/Steps/DeployDataStep.cpp b/Source/Editor/Cooker/Steps/DeployDataStep.cpp index 0b4f5d939..76fb3f1b2 100644 --- a/Source/Editor/Cooker/Steps/DeployDataStep.cpp +++ b/Source/Editor/Cooker/Steps/DeployDataStep.cpp @@ -73,6 +73,7 @@ bool DeployDataStep::Perform(CookingData& data) { case BuildPlatform::Windows32: case BuildPlatform::Windows64: + case BuildPlatform::WindowsARM64: canUseSystemDotnet = PLATFORM_TYPE == PlatformType::Windows; break; case BuildPlatform::LinuxX64: diff --git a/Source/Editor/Windows/GameCookerWindow.cs b/Source/Editor/Windows/GameCookerWindow.cs index 55262910c..855faed63 100644 --- a/Source/Editor/Windows/GameCookerWindow.cs +++ b/Source/Editor/Windows/GameCookerWindow.cs @@ -767,13 +767,6 @@ namespace FlaxEditor.Windows Platform = BuildPlatform.Windows64, Mode = BuildConfiguration.Development, }, - new BuildTarget - { - Name = "Windows 32bit", - Output = "Output\\Win32", - Platform = BuildPlatform.Windows32, - Mode = BuildConfiguration.Development, - }, } }; _data = presets; @@ -793,9 +786,9 @@ namespace FlaxEditor.Windows Array.Copy(_data[_selectedPresetIndex].Targets, targets, count); targets[count] = new BuildTarget { - Name = "Xbox One", - Output = "Output\\XboxOne", - Platform = BuildPlatform.XboxOne, + Name = "Windows 64bit", + Output = "Output\\Win64", + Platform = BuildPlatform.Windows64, Mode = BuildConfiguration.Development, }; _data[_selectedPresetIndex].Targets = targets; From de10f2a6e5d3afcde173e9a753f080b64e95d1ab Mon Sep 17 00:00:00 2001 From: Ari Vuollet Date: Sun, 12 May 2024 15:00:51 +0300 Subject: [PATCH 128/292] Copy hostfxr from platform architecture specific dependencies folder --- Source/Editor/Cooker/Steps/DeployDataStep.cpp | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/Source/Editor/Cooker/Steps/DeployDataStep.cpp b/Source/Editor/Cooker/Steps/DeployDataStep.cpp index 76fb3f1b2..551002e0b 100644 --- a/Source/Editor/Cooker/Steps/DeployDataStep.cpp +++ b/Source/Editor/Cooker/Steps/DeployDataStep.cpp @@ -160,7 +160,20 @@ bool DeployDataStep::Perform(CookingData& data) } else { - failed |= EditorUtilities::CopyDirectoryIfNewer(dstDotnet / TEXT("host/fxr") / version, srcDotnet / TEXT("host/fxr") / version, true); + // TODO: hostfxr for target platform should be copied from nuget package location: microsoft.netcore.app.runtime.//runtimes//native/hostfxr.dll + String dstHostfxr = dstDotnet / TEXT("host/fxr") / version; + if (!FileSystem::DirectoryExists(dstHostfxr)) + FileSystem::CreateDirectory(dstHostfxr); + const Char *platformName, *archName; + data.GetBuildPlatformName(platformName, archName); + if (data.Platform == BuildPlatform::Windows64 || data.Platform == BuildPlatform::WindowsARM64 || data.Platform == BuildPlatform::Windows32) + failed |= FileSystem::CopyFile(dstHostfxr / TEXT("hostfxr.dll"), depsRoot / TEXT("ThirdParty") / archName / TEXT("hostfxr.dll")); + else if (data.Platform == BuildPlatform::LinuxX64) + failed |= FileSystem::CopyFile(dstHostfxr / TEXT("hostfxr.so"), depsRoot / TEXT("ThirdParty") / archName / TEXT("hostfxr.so")); + else if (data.Platform == BuildPlatform::MacOSx64 || data.Platform == BuildPlatform::MacOSARM64) + failed |= FileSystem::CopyFile(dstHostfxr / TEXT("hostfxr.dylib"), depsRoot / TEXT("ThirdParty") / archName / TEXT("hostfxr.dylib")); + else + failed |= true; failed |= EditorUtilities::CopyDirectoryIfNewer(dstDotnet / TEXT("shared/Microsoft.NETCore.App") / version, srcDotnet / TEXT("shared/Microsoft.NETCore.App") / version, true); } if (failed) From ba397836a64a2c48a23ba679da6876e18e564537 Mon Sep 17 00:00:00 2001 From: Ari Vuollet Date: Wed, 15 May 2024 20:56:16 +0300 Subject: [PATCH 129/292] Include configuration specific source files in solution configurations Include only relevant generated source files for selected solution configuration. Fixes Intellisense issues when both ARM64 and Win64 configurations are present in project. --- .../Flax.Build.Tests/Flax.Build.Tests.csproj | 2 +- Source/Tools/Flax.Build/Flax.Build.csproj | 2 +- .../VisualStudio/CSSDKProjectGenerator.cs | 38 +++++++++++++------ 3 files changed, 29 insertions(+), 13 deletions(-) diff --git a/Source/Tools/Flax.Build.Tests/Flax.Build.Tests.csproj b/Source/Tools/Flax.Build.Tests/Flax.Build.Tests.csproj index 873f916c4..0b7036058 100644 --- a/Source/Tools/Flax.Build.Tests/Flax.Build.Tests.csproj +++ b/Source/Tools/Flax.Build.Tests/Flax.Build.Tests.csproj @@ -2,7 +2,7 @@ net8.0 - 11.0 + 12.0 disable annotations true diff --git a/Source/Tools/Flax.Build/Flax.Build.csproj b/Source/Tools/Flax.Build/Flax.Build.csproj index 0270e7f82..d19d931ce 100644 --- a/Source/Tools/Flax.Build/Flax.Build.csproj +++ b/Source/Tools/Flax.Build/Flax.Build.csproj @@ -2,7 +2,7 @@ Exe net8.0 - 11.0 + 12.0 disable annotations false diff --git a/Source/Tools/Flax.Build/Projects/VisualStudio/CSSDKProjectGenerator.cs b/Source/Tools/Flax.Build/Projects/VisualStudio/CSSDKProjectGenerator.cs index 692c5132f..64c333499 100644 --- a/Source/Tools/Flax.Build/Projects/VisualStudio/CSSDKProjectGenerator.cs +++ b/Source/Tools/Flax.Build/Projects/VisualStudio/CSSDKProjectGenerator.cs @@ -162,7 +162,6 @@ namespace Flax.Build.Projects.VisualStudio csProjectFileContent.AppendLine(""); // Files and folders - csProjectFileContent.AppendLine(" "); var files = new List(); @@ -214,24 +213,41 @@ namespace Flax.Build.Projects.VisualStudio else csProjectFileContent.AppendLine(string.Format(" <{0} Include=\"{1}\" />", fileType, projectPath)); } + csProjectFileContent.AppendLine(" "); if (project.GeneratedSourceFiles != null) { - foreach (var file in project.GeneratedSourceFiles) + foreach (var group in project.GeneratedSourceFiles.GroupBy(x => GetGroupingFromPath(x), y => y)) { - string fileType; - if (file.EndsWith(".cs", StringComparison.OrdinalIgnoreCase)) - fileType = "Compile"; - else - fileType = "None"; + (string targetName, string platform, string arch, string configuration) = group.Key; - var filePath = file.Replace('/', '\\'); - csProjectFileContent.AppendLine(string.Format(" <{0} Visible=\"false\" Include=\"{1}\" />", fileType, filePath)); + var targetConfiguration = project.Targets.First(x => x.Name == targetName).ConfigurationName; + csProjectFileContent.AppendLine($" "); + + foreach (var file in group) + { + string fileType; + if (file.EndsWith(".cs", StringComparison.OrdinalIgnoreCase)) + fileType = "Compile"; + else + fileType = "None"; + + var filePath = file.Replace('/', '\\'); + csProjectFileContent.AppendLine(string.Format(" <{0} Visible=\"false\" Include=\"{1}\" />", fileType, filePath)); + } + + csProjectFileContent.AppendLine(" "); + } + + (string target, string platform, string arch, string configuration) GetGroupingFromPath(string path) + { + ReadOnlySpan span = path.AsSpan(); + Span split = stackalloc Range[path.Count((c) => c == '/' || c == '\\')]; + var _ = MemoryExtensions.SplitAny(path, split, [ '/', '\\' ]); + return (span[split[^5]].ToString(), span[split[^4]].ToString(), span[split[^3]].ToString(), span[split[^2]].ToString()); } } - csProjectFileContent.AppendLine(" "); - // End csProjectFileContent.AppendLine(""); From b0904fef690695d16050a77b296ccdfa987c9797 Mon Sep 17 00:00:00 2001 From: Ari Vuollet Date: Wed, 15 May 2024 21:48:22 +0300 Subject: [PATCH 130/292] Skip setup of ARM64 configuration for Windows with no compiler support --- .../Flax.Build/Build/Builder.Projects.cs | 4 +- Source/Tools/Flax.Build/Build/Platform.cs | 10 +++ .../Platforms/Windows/WindowsPlatform.cs | 18 +++++ .../Platforms/Windows/WindowsPlatformBase.cs | 75 +++++++++---------- .../Platforms/Windows/WindowsToolchainBase.cs | 2 + 5 files changed, 66 insertions(+), 43 deletions(-) diff --git a/Source/Tools/Flax.Build/Build/Builder.Projects.cs b/Source/Tools/Flax.Build/Build/Builder.Projects.cs index c928861eb..819ee74c3 100644 --- a/Source/Tools/Flax.Build/Build/Builder.Projects.cs +++ b/Source/Tools/Flax.Build/Build/Builder.Projects.cs @@ -125,9 +125,7 @@ namespace Flax.Build continue; if (!platform.HasRequiredSDKsInstalled && (!projectInfo.IsCSharpOnlyProject || platform != Platform.BuildPlatform)) continue; - - // Prevent generating configuration data for Windows x86 - if (architecture == TargetArchitecture.x86 && targetPlatform == TargetPlatform.Windows) + if (!platform.CanBuildArchitecture(architecture)) continue; string configurationText = targetName + '.' + platformName + '.' + configurationName; diff --git a/Source/Tools/Flax.Build/Build/Platform.cs b/Source/Tools/Flax.Build/Build/Platform.cs index 526dd2c54..21002f26a 100644 --- a/Source/Tools/Flax.Build/Build/Platform.cs +++ b/Source/Tools/Flax.Build/Build/Platform.cs @@ -188,6 +188,16 @@ namespace Flax.Build return false; } + /// + /// Determines whether this platform can compile or cross-compile for the specified architecture. + /// + /// The architecture. + /// true if this platform can build the specified architecture; otherwise, false. + public virtual bool CanBuildArchitecture(TargetArchitecture targetArchitecture) + { + return IsPlatformSupported(Target, targetArchitecture); + } + /// /// Gets the path to the output file for the linker. /// diff --git a/Source/Tools/Flax.Build/Platforms/Windows/WindowsPlatform.cs b/Source/Tools/Flax.Build/Platforms/Windows/WindowsPlatform.cs index 8fd361700..924d29f86 100644 --- a/Source/Tools/Flax.Build/Platforms/Windows/WindowsPlatform.cs +++ b/Source/Tools/Flax.Build/Platforms/Windows/WindowsPlatform.cs @@ -70,6 +70,24 @@ namespace Flax.Build.Platforms } } + /// + public override bool CanBuildArchitecture(TargetArchitecture targetArchitecture) + { + // Prevent generating configuration data for Windows x86 (deprecated) + if (targetArchitecture == TargetArchitecture.x86) + return false; + + // Check if we have a compiler for this architecture + var toolsets = GetToolsets(); + foreach (var toolset in toolsets) + { + if (GetVCToolPath(toolset.Key, BuildTargetArchitecture, targetArchitecture) != null) + return true; + } + + return false; + } + /// void IVisualStudioProjectCustomizer.WriteVisualStudioBegin(VisualStudioProject project, Platform platform, StringBuilder vcProjectFileContent, StringBuilder vcFiltersFileContent, StringBuilder vcUserFileContent) { diff --git a/Source/Tools/Flax.Build/Platforms/Windows/WindowsPlatformBase.cs b/Source/Tools/Flax.Build/Platforms/Windows/WindowsPlatformBase.cs index d2b490704..1f58d7b45 100644 --- a/Source/Tools/Flax.Build/Platforms/Windows/WindowsPlatformBase.cs +++ b/Source/Tools/Flax.Build/Platforms/Windows/WindowsPlatformBase.cs @@ -424,53 +424,48 @@ namespace Flax.Build.Platforms switch (toolset) { - case WindowsPlatformToolset.v140: + case WindowsPlatformToolset.v140: + { + if (hostArchitecture != TargetArchitecture.x86) { - if (hostArchitecture != TargetArchitecture.x86) - { - string nativeCompilerPath = Path.Combine(vcToolChainDir, "bin", "amd64", "cl.exe"); - if (File.Exists(nativeCompilerPath)) - { - return Path.GetDirectoryName(nativeCompilerPath); - } - - string crossCompilerPath = Path.Combine(vcToolChainDir, "bin", "x86_amd64", "cl.exe"); - if (File.Exists(crossCompilerPath)) - { - return Path.GetDirectoryName(crossCompilerPath); - } - throw new Exception(string.Format("No {0} host compiler toolchain found in {1} or {2}", hostArchitecture.ToString(), nativeCompilerPath, crossCompilerPath)); - } - else - { - string compilerPath = Path.Combine(vcToolChainDir, "bin", "cl.exe"); - if (File.Exists(compilerPath)) - { - return Path.GetDirectoryName(compilerPath); - } - throw new Exception(string.Format("No {0} host compiler toolchain found in {1}", hostArchitecture.ToString())); - } - } - case WindowsPlatformToolset.v141: - case WindowsPlatformToolset.v142: - case WindowsPlatformToolset.v143: - case WindowsPlatformToolset.v144: - { - string hostFolder = hostArchitecture == TargetArchitecture.x86 ? "HostX86" : $"Host{hostArchitecture.ToString().ToLower()}"; - string nativeCompilerPath = Path.Combine(vcToolChainDir, "bin", hostFolder, architecture.ToString().ToLower(), "cl.exe"); + string nativeCompilerPath = Path.Combine(vcToolChainDir, "bin", "amd64", "cl.exe"); if (File.Exists(nativeCompilerPath)) - { return Path.GetDirectoryName(nativeCompilerPath); - } - string crossCompilerPath = Path.Combine(vcToolChainDir, "bin", hostFolder, architecture.ToString().ToLower(), "cl.exe"); + string crossCompilerPath = Path.Combine(vcToolChainDir, "bin", "x86_amd64", "cl.exe"); if (File.Exists(crossCompilerPath)) - { return Path.GetDirectoryName(crossCompilerPath); - } - throw new Exception(string.Format("No {0} host compiler toolchain found in {1} or {2}", hostArchitecture.ToString(), nativeCompilerPath, crossCompilerPath)); + + Log.Verbose(string.Format("No {0} host compiler toolchain found in {1} or {2}", hostArchitecture.ToString(), nativeCompilerPath, crossCompilerPath)); + return null; } - default: throw new ArgumentOutOfRangeException(nameof(toolset), toolset, null); + else + { + string compilerPath = Path.Combine(vcToolChainDir, "bin", "cl.exe"); + if (File.Exists(compilerPath)) + return Path.GetDirectoryName(compilerPath); + Log.Verbose(string.Format("No {0} host compiler toolchain found in {1}", hostArchitecture.ToString())); + return null; + } + } + case WindowsPlatformToolset.v141: + case WindowsPlatformToolset.v142: + case WindowsPlatformToolset.v143: + case WindowsPlatformToolset.v144: + { + string hostFolder = hostArchitecture == TargetArchitecture.x86 ? "HostX86" : $"Host{hostArchitecture.ToString().ToLower()}"; + string nativeCompilerPath = Path.Combine(vcToolChainDir, "bin", hostFolder, architecture.ToString().ToLower(), "cl.exe"); + if (File.Exists(nativeCompilerPath)) + return Path.GetDirectoryName(nativeCompilerPath); + + string crossCompilerPath = Path.Combine(vcToolChainDir, "bin", hostFolder, architecture.ToString().ToLower(), "cl.exe"); + if (File.Exists(crossCompilerPath)) + return Path.GetDirectoryName(crossCompilerPath); + + Log.Verbose(string.Format("No {0} host compiler toolchain found in {1} or {2}", hostArchitecture.ToString(), nativeCompilerPath, crossCompilerPath)); + return null; + } + default: throw new ArgumentOutOfRangeException(nameof(toolset), toolset, null); } } diff --git a/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchainBase.cs b/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchainBase.cs index 6df1c38eb..58a3c030f 100644 --- a/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchainBase.cs +++ b/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchainBase.cs @@ -136,6 +136,8 @@ namespace Flax.Build.Platforms // Get the tools paths var hostArchitecture = Platform.BuildTargetArchitecture; _vcToolPath = WindowsPlatformBase.GetVCToolPath(Toolset, hostArchitecture, Architecture); + if (string.IsNullOrEmpty(_vcToolPath)) + throw new Exception(string.Format("No {0} host compiler tools found for target architecture {1}", hostArchitecture, Architecture)); _compilerPath = Path.Combine(_vcToolPath, "cl.exe"); _linkerPath = Path.Combine(_vcToolPath, "link.exe"); _libToolPath = Path.Combine(_vcToolPath, "lib.exe"); From 6d5c7bcaf54e3515d9e7bbc99bde2863ea65fa37 Mon Sep 17 00:00:00 2001 From: Ari Vuollet Date: Wed, 15 May 2024 22:04:54 +0300 Subject: [PATCH 131/292] Hide Visual Studio solution architectures not supported by main project --- .../Projects/VisualStudio/VisualStudioProjectGenerator.cs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Source/Tools/Flax.Build/Projects/VisualStudio/VisualStudioProjectGenerator.cs b/Source/Tools/Flax.Build/Projects/VisualStudio/VisualStudioProjectGenerator.cs index 5ca85c448..c524416a7 100644 --- a/Source/Tools/Flax.Build/Projects/VisualStudio/VisualStudioProjectGenerator.cs +++ b/Source/Tools/Flax.Build/Projects/VisualStudio/VisualStudioProjectGenerator.cs @@ -434,6 +434,7 @@ namespace Flax.Build.Projects.VisualStudio // Collect all unique configurations var configurations = new HashSet(); + var mainArchitectures = solution.MainProject.Targets.SelectMany(x => x.Architectures).Distinct().ToArray(); foreach (var project in projects) { if (project.Configurations == null || project.Configurations.Count == 0) @@ -445,6 +446,10 @@ namespace Flax.Build.Projects.VisualStudio foreach (var configuration in project.Configurations) { + // Skip architectures which are not included in the game project + if (!mainArchitectures.Contains(configuration.Architecture)) + continue; + configurations.Add(new SolutionConfiguration(configuration)); } } From 40cc967c30e7297f580e433d088d0b4421cf084b Mon Sep 17 00:00:00 2001 From: Ari Vuollet Date: Sun, 19 May 2024 23:36:33 +0300 Subject: [PATCH 132/292] Fix alignment issues in stack allocators --- Source/Engine/Core/Memory/Allocation.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Core/Memory/Allocation.h b/Source/Engine/Core/Memory/Allocation.h index 42b39a682..7d28c70cb 100644 --- a/Source/Engine/Core/Memory/Allocation.h +++ b/Source/Engine/Core/Memory/Allocation.h @@ -15,7 +15,7 @@ public: enum { HasSwap = false }; template - class Data + class alignas(sizeof(void*)) Data { private: byte _data[Capacity * sizeof(T)]; @@ -183,7 +183,7 @@ public: enum { HasSwap = false }; template - class Data + class alignas(sizeof(void*)) Data { private: typedef typename OtherAllocator::template Data OtherData; From 2492d0b38f819c13022a431c5aa41c2b9231486a Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 2 Jun 2024 00:51:11 +0200 Subject: [PATCH 133/292] Refactor `WindowsFileSystemWatcher` to properly handle file modifications --- .../Platform/Base/FileSystemWatcherBase.h | 9 +- .../Platform/Mac/MacFileSystemWatcher.cpp | 12 +- .../Windows/WindowsFileSystemWatcher.cpp | 192 +++++++++--------- .../Windows/WindowsFileSystemWatcher.h | 12 +- 4 files changed, 116 insertions(+), 109 deletions(-) diff --git a/Source/Engine/Platform/Base/FileSystemWatcherBase.h b/Source/Engine/Platform/Base/FileSystemWatcherBase.h index df57695bd..81e28335b 100644 --- a/Source/Engine/Platform/Base/FileSystemWatcherBase.h +++ b/Source/Engine/Platform/Base/FileSystemWatcherBase.h @@ -11,10 +11,11 @@ /// enum class FileSystemAction { - Unknown, - Create, - Delete, - Modify, + Unknown = 0, + Create = 1, + Delete = 2, + Modify = 4, + Rename = 8, }; /// diff --git a/Source/Engine/Platform/Mac/MacFileSystemWatcher.cpp b/Source/Engine/Platform/Mac/MacFileSystemWatcher.cpp index 1d4a64f78..ba3358d35 100644 --- a/Source/Engine/Platform/Mac/MacFileSystemWatcher.cpp +++ b/Source/Engine/Platform/Mac/MacFileSystemWatcher.cpp @@ -35,16 +35,18 @@ void DirectoryWatchCallback( ConstFSEventStreamRef StreamRef, void* FileWatcherP { action = FileSystemAction::Create; } - - if (renamed || modified) + if (renamed) { - action = FileSystemAction::Delete; + action = FileSystemAction::Rename; } - - if (removed) + if (rmodified) { action = FileSystemAction::Modify; } + if (removed) + { + action = FileSystemAction::Delete; + } const String resolvedPath = AppleUtils::ToString((CFStringRef)CFArrayGetValueAtIndex(EventPathArray,EventIndex)); diff --git a/Source/Engine/Platform/Windows/WindowsFileSystemWatcher.cpp b/Source/Engine/Platform/Windows/WindowsFileSystemWatcher.cpp index cb7eaeaf1..3194d8533 100644 --- a/Source/Engine/Platform/Windows/WindowsFileSystemWatcher.cpp +++ b/Source/Engine/Platform/Windows/WindowsFileSystemWatcher.cpp @@ -11,117 +11,45 @@ #include "Engine/Core/Collections/Array.h" #include "../Win32/IncludeWindowsHeaders.h" -BOOL RefreshWatch(WindowsFileSystemWatcher* watcher); - namespace FileSystemWatchers { CriticalSection Locker; Array> Watchers; Win32Thread* Thread = nullptr; + Windows::HANDLE IoHandle = INVALID_HANDLE_VALUE; bool ThreadActive; int32 Run() { + DWORD numBytes = 0; + LPOVERLAPPED overlapped; + ULONG_PTR compKey = 0; while (ThreadActive) { - SleepEx(INFINITE, true); + if (GetQueuedCompletionStatus(IoHandle, &numBytes, &compKey, &overlapped, INFINITE) && overlapped && numBytes != 0) + { + // Send further to the specific watcher + Locker.Lock(); + for (auto watcher : Watchers) + { + if ((OVERLAPPED*)&watcher->Overlapped == overlapped) + { + watcher->NotificationCompletion(); + break; + } + } + Locker.Unlock(); + } } return 0; } - - static void CALLBACK StopProc(ULONG_PTR arg) - { - ThreadActive = false; - } - - static void CALLBACK AddDirectoryProc(ULONG_PTR arg) - { - const auto watcher = (FileSystemWatcher*)arg; - RefreshWatch(watcher); - } }; -VOID CALLBACK NotificationCompletion(DWORD dwErrorCode, DWORD dwNumberOfBytesTransfered, LPOVERLAPPED lpOverlapped) -{ - auto watcher = (FileSystemWatcher*)lpOverlapped->hEvent; - if (dwErrorCode == ERROR_OPERATION_ABORTED || - dwNumberOfBytesTransfered <= 0 || - !watcher) - { - return; - } - - // Swap buffers - watcher->CurrentBuffer = (watcher->CurrentBuffer + 1) % 2; - - // Get the new read issued as fast as possible - if (!watcher->StopNow) - { - RefreshWatch(watcher); - } - - // Process notifications - auto notify = (FILE_NOTIFY_INFORMATION*)watcher->Buffer[(watcher->CurrentBuffer + 1) % 2]; - do - { - // Convert action type - auto action = FileSystemAction::Unknown; - switch (notify->Action) - { - case FILE_ACTION_RENAMED_NEW_NAME: - case FILE_ACTION_ADDED: - action = FileSystemAction::Create; - break; - case FILE_ACTION_RENAMED_OLD_NAME: - case FILE_ACTION_REMOVED: - action = FileSystemAction::Delete; - break; - case FILE_ACTION_MODIFIED: - action = FileSystemAction::Modify; - break; - default: - action = FileSystemAction::Unknown; - break; - } - if (action != FileSystemAction::Unknown) - { - // Build path - String path(notify->FileName, notify->FileNameLength / sizeof(WCHAR)); - path = watcher->Directory / path; - - // Send event - watcher->OnEvent(path, action); - } - - // Move to the next notify - notify = (FILE_NOTIFY_INFORMATION*)((byte*)notify + notify->NextEntryOffset); - } while (notify->NextEntryOffset != 0); -} - -// Refreshes the directory monitoring -BOOL RefreshWatch(WindowsFileSystemWatcher* watcher) -{ - DWORD dwBytesReturned = 0; - return ReadDirectoryChangesW( - watcher->DirectoryHandle, - watcher->Buffer[watcher->CurrentBuffer], - FileSystemWatcher::BufferSize, - watcher->WithSubDirs ? TRUE : FALSE, - FILE_NOTIFY_CHANGE_CREATION | FILE_NOTIFY_CHANGE_SIZE | FILE_NOTIFY_CHANGE_FILE_NAME, - &dwBytesReturned, - (OVERLAPPED*)&watcher->Overlapped, - NotificationCompletion - ); -} - WindowsFileSystemWatcher::WindowsFileSystemWatcher(const String& directory, bool withSubDirs) : FileSystemWatcherBase(directory, withSubDirs) - , StopNow(false) - , CurrentBuffer(0) { // Setup Platform::MemoryClear(&Overlapped, sizeof(Overlapped)); - ((OVERLAPPED&)Overlapped).hEvent = this; // Create directory handle for events handling DirectoryHandle = CreateFileW( @@ -144,25 +72,26 @@ WindowsFileSystemWatcher::WindowsFileSystemWatcher(const String& directory, bool FileSystemWatchers::Watchers.Add(this); if (!FileSystemWatchers::Thread) { + FileSystemWatchers::IoHandle = CreateIoCompletionPort(INVALID_HANDLE_VALUE, nullptr, 0, 1); FileSystemWatchers::ThreadActive = true; FileSystemWatchers::Thread = ThreadSpawner::Start(FileSystemWatchers::Run, TEXT("File System Watchers"), ThreadPriority::BelowNormal); } + CreateIoCompletionPort(DirectoryHandle, FileSystemWatchers::IoHandle, 0, 1); FileSystemWatchers::Locker.Unlock(); - // Issue the first read - QueueUserAPC(FileSystemWatchers::AddDirectoryProc, FileSystemWatchers::Thread->GetHandle(), (ULONG_PTR)this); + // Initialize filesystem events tracking + ReadDirectoryChanges(); } WindowsFileSystemWatcher::~WindowsFileSystemWatcher() { FileSystemWatchers::Locker.Lock(); FileSystemWatchers::Watchers.Remove(this); + StopNow = true; FileSystemWatchers::Locker.Unlock(); if (DirectoryHandle != INVALID_HANDLE_VALUE) { - StopNow = true; - #if WINVER >= 0x600 CancelIoEx(DirectoryHandle, (OVERLAPPED*)&Overlapped); #else @@ -180,12 +109,85 @@ WindowsFileSystemWatcher::~WindowsFileSystemWatcher() if (FileSystemWatchers::Watchers.IsEmpty() && FileSystemWatchers::Thread) { FileSystemWatchers::ThreadActive = false; - QueueUserAPC(FileSystemWatchers::StopProc, FileSystemWatchers::Thread->GetHandle(), 0); + FileSystemWatchers::Locker.Unlock(); + PostQueuedCompletionStatus(FileSystemWatchers::IoHandle, 0, 0, nullptr); FileSystemWatchers::Thread->Join(); + FileSystemWatchers::Locker.Lock(); Delete(FileSystemWatchers::Thread); FileSystemWatchers::Thread = nullptr; + CloseHandle(FileSystemWatchers::IoHandle); + FileSystemWatchers::IoHandle = INVALID_HANDLE_VALUE; } FileSystemWatchers::Locker.Unlock(); } +void WindowsFileSystemWatcher::ReadDirectoryChanges() +{ + BOOL result = ReadDirectoryChangesW( + DirectoryHandle, + Buffer, + BufferSize, + WithSubDirs ? TRUE : FALSE, + FILE_NOTIFY_CHANGE_CREATION | FILE_NOTIFY_CHANGE_SIZE | FILE_NOTIFY_CHANGE_FILE_NAME, + nullptr, + (OVERLAPPED*)&Overlapped, + nullptr + ); + if (!result) + { + LOG_WIN32_LAST_ERROR; + Sleep(1); + } +} + +void WindowsFileSystemWatcher::NotificationCompletion() +{ + ScopeLock lock(Locker); + + // Process notifications + auto notify = (FILE_NOTIFY_INFORMATION*)Buffer; + do + { + // Convert action type + FileSystemAction action; + switch (notify->Action) + { + case FILE_ACTION_RENAMED_NEW_NAME: + case FILE_ACTION_RENAMED_OLD_NAME: + action = FileSystemAction::Rename; + break; + case FILE_ACTION_ADDED: + action = FileSystemAction::Create; + break; + case FILE_ACTION_REMOVED: + action = FileSystemAction::Delete; + break; + case FILE_ACTION_MODIFIED: + action = FileSystemAction::Modify; + break; + default: + action = FileSystemAction::Unknown; + break; + } + if (action != FileSystemAction::Unknown) + { + // Build path + String path(notify->FileName, notify->FileNameLength / sizeof(WCHAR)); + path = Directory / path; + + // Send event + OnEvent(path, action); + } + + // Move to the next notify + notify = (FILE_NOTIFY_INFORMATION*)((byte*)notify + notify->NextEntryOffset); + } while (notify->NextEntryOffset != 0); + + // Get the new read issued as fast as possible + if (!StopNow) + { + ReadDirectoryChanges(); + } +} + #endif diff --git a/Source/Engine/Platform/Windows/WindowsFileSystemWatcher.h b/Source/Engine/Platform/Windows/WindowsFileSystemWatcher.h index 36cb2c8db..fbe0a1a33 100644 --- a/Source/Engine/Platform/Windows/WindowsFileSystemWatcher.h +++ b/Source/Engine/Platform/Windows/WindowsFileSystemWatcher.h @@ -13,7 +13,6 @@ class FLAXENGINE_API WindowsFileSystemWatcher : public FileSystemWatcherBase { public: - /// /// Initializes a new instance of the class. /// @@ -27,13 +26,16 @@ public: ~WindowsFileSystemWatcher(); public: - Windows::OVERLAPPED Overlapped; Windows::HANDLE DirectoryHandle; - bool StopNow; - int32 CurrentBuffer; + Win32Thread* Thread = nullptr; + Win32CriticalSection Locker; + bool StopNow = false; static const int32 BufferSize = 32 * 1024; - byte Buffer[2][BufferSize]; + alignas(Windows::DWORD) byte Buffer[BufferSize]; + + void ReadDirectoryChanges(); + void NotificationCompletion(); }; #endif From 1e86eff2d33d2961d19d2dc6978b18d37a75c549 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 2 Jun 2024 09:52:10 +0200 Subject: [PATCH 134/292] Improve GPU constant buffers structures definitions to properly align on new Clang --- Source/Engine/Core/Compiler.h | 2 +- Source/Engine/Debug/DebugDraw.cpp | 2 +- Source/Engine/Graphics/Config.h | 3 +++ Source/Engine/Graphics/RenderTools.h | 3 +-- Source/Engine/Level/Actors/ExponentialHeightFog.cpp | 2 +- Source/Engine/Level/Actors/Sky.cpp | 2 +- Source/Engine/Particles/Graph/GPU/GPUParticles.cpp | 2 +- Source/Engine/Particles/Particles.cpp | 2 +- Source/Engine/Render2D/Render2D.cpp | 4 ++-- Source/Engine/Renderer/AmbientOcclusionPass.h | 2 +- Source/Engine/Renderer/AntiAliasing/FXAA.cpp | 3 +-- Source/Engine/Renderer/AntiAliasing/SMAA.h | 3 +-- Source/Engine/Renderer/AntiAliasing/TAA.cpp | 3 +-- Source/Engine/Renderer/AtmospherePreCompute.cpp | 3 +-- Source/Engine/Renderer/ColorGradingPass.cpp | 2 +- Source/Engine/Renderer/Config.h | 13 +++++-------- Source/Engine/Renderer/DepthOfFieldPass.cpp | 2 +- .../Engine/Renderer/Editor/LightmapUVsDensity.cpp | 2 +- Source/Engine/Renderer/Editor/VertexColors.cpp | 2 +- Source/Engine/Renderer/EyeAdaptationPass.cpp | 2 +- Source/Engine/Renderer/GBufferPass.cpp | 2 +- .../GI/DynamicDiffuseGlobalIllumination.cpp | 6 ++---- .../Renderer/GI/DynamicDiffuseGlobalIllumination.h | 3 +-- .../Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp | 3 +-- Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.h | 3 +-- .../Engine/Renderer/GlobalSignDistanceFieldPass.cpp | 9 +++------ .../Engine/Renderer/GlobalSignDistanceFieldPass.h | 3 +-- Source/Engine/Renderer/HistogramPass.cpp | 2 +- Source/Engine/Renderer/LightPass.cpp | 4 ++-- Source/Engine/Renderer/MotionBlurPass.cpp | 2 +- Source/Engine/Renderer/PostProcessingPass.h | 4 ++-- Source/Engine/Renderer/ProbesRenderer.cpp | 3 +-- Source/Engine/Renderer/ReflectionsPass.cpp | 2 +- .../Engine/Renderer/ScreenSpaceReflectionsPass.cpp | 11 ++++------- Source/Engine/Renderer/ShadowsPass.cpp | 2 +- Source/Engine/Renderer/Utils/BitonicSort.cpp | 2 +- Source/Engine/Renderer/Utils/MultiScaler.cpp | 2 +- Source/Engine/Renderer/VolumetricFogPass.h | 6 +++--- Source/Engine/ShadowsOfMordor/Builder.Jobs.cpp | 2 +- Source/Engine/Tools/ModelTool/ModelTool.cpp | 3 +-- 40 files changed, 57 insertions(+), 76 deletions(-) diff --git a/Source/Engine/Core/Compiler.h b/Source/Engine/Core/Compiler.h index 47edbc7d9..9c736aa14 100644 --- a/Source/Engine/Core/Compiler.h +++ b/Source/Engine/Core/Compiler.h @@ -98,7 +98,7 @@ #endif -#define PACK_STRUCT(__Declaration__) PACK_BEGIN() __Declaration__ PACK_END() +#define PACK_STRUCT(_declaration) PACK_BEGIN() _declaration PACK_END() // C++ 17 #if __cplusplus >= 201703L diff --git a/Source/Engine/Debug/DebugDraw.cpp b/Source/Engine/Debug/DebugDraw.cpp index a3a9c21a8..5723fc265 100644 --- a/Source/Engine/Debug/DebugDraw.cpp +++ b/Source/Engine/Debug/DebugDraw.cpp @@ -125,7 +125,7 @@ PACK_STRUCT(struct Vertex { Color32 Color; }); -PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data { +GPU_CB_STRUCT(Data { Matrix ViewProjection; Float2 Padding; float ClipPosZBias; diff --git a/Source/Engine/Graphics/Config.h b/Source/Engine/Graphics/Config.h index c37594bdb..2a9167045 100644 --- a/Source/Engine/Graphics/Config.h +++ b/Source/Engine/Graphics/Config.h @@ -85,3 +85,6 @@ #undef GPU_ENABLE_ASSERTION #define GPU_ENABLE_ASSERTION 0 #endif + +// Helper macro for defining shader structures wrappers in C++ that match HLSL constant buffers +#define GPU_CB_STRUCT(_declaration) ALIGN_BEGIN(GPU_SHADER_DATA_ALIGNMENT) PACK_BEGIN() struct _declaration PACK_END() ALIGN_END(GPU_SHADER_DATA_ALIGNMENT) diff --git a/Source/Engine/Graphics/RenderTools.h b/Source/Engine/Graphics/RenderTools.h index 8a19718f1..8d2873c29 100644 --- a/Source/Engine/Graphics/RenderTools.h +++ b/Source/Engine/Graphics/RenderTools.h @@ -12,8 +12,7 @@ class SkinnedModel; struct RenderContext; struct FloatR10G10B10A2; -PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) QuadShaderData - { +GPU_CB_STRUCT(QuadShaderData { Float4 Color; }); diff --git a/Source/Engine/Level/Actors/ExponentialHeightFog.cpp b/Source/Engine/Level/Actors/ExponentialHeightFog.cpp index 10be7bdf0..7c3cfa846 100644 --- a/Source/Engine/Level/Actors/ExponentialHeightFog.cpp +++ b/Source/Engine/Level/Actors/ExponentialHeightFog.cpp @@ -179,7 +179,7 @@ void ExponentialHeightFog::GetExponentialHeightFogData(const RenderView& view, S result.VolumetricFogMaxDistance = VolumetricFogDistance; } -PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data { +GPU_CB_STRUCT(Data { ShaderGBufferData GBuffer; ShaderExponentialHeightFogData ExponentialHeightFog; }); diff --git a/Source/Engine/Level/Actors/Sky.cpp b/Source/Engine/Level/Actors/Sky.cpp index f0a7c354d..2e1d46161 100644 --- a/Source/Engine/Level/Actors/Sky.cpp +++ b/Source/Engine/Level/Actors/Sky.cpp @@ -17,7 +17,7 @@ #include "Engine/Serialization/Serialization.h" #include "Engine/Level/Scene/SceneRendering.h" -PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data { +GPU_CB_STRUCT(Data { Matrix WVP; Float3 ViewOffset; float Padding; diff --git a/Source/Engine/Particles/Graph/GPU/GPUParticles.cpp b/Source/Engine/Particles/Graph/GPU/GPUParticles.cpp index e4de0ad89..2a728170b 100644 --- a/Source/Engine/Particles/Graph/GPU/GPUParticles.cpp +++ b/Source/Engine/Particles/Graph/GPU/GPUParticles.cpp @@ -13,7 +13,7 @@ #include "Engine/Graphics/Shaders/GPUShader.h" #include "Engine/Graphics/Shaders/GPUConstantBuffer.h" -PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) GPUParticlesData { +GPU_CB_STRUCT(GPUParticlesData { Matrix ViewProjectionMatrix; Matrix InvViewProjectionMatrix; Matrix InvViewMatrix; diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index 0e468b66f..c7d9e01d8 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -569,7 +569,7 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa #if COMPILE_WITH_GPU_PARTICLES -PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) GPUParticlesSortingData { +GPU_CB_STRUCT(GPUParticlesSortingData { Float3 ViewPosition; uint32 ParticleCounterOffset; uint32 ParticleStride; diff --git a/Source/Engine/Render2D/Render2D.cpp b/Source/Engine/Render2D/Render2D.cpp index 41a136314..788d46088 100644 --- a/Source/Engine/Render2D/Render2D.cpp +++ b/Source/Engine/Render2D/Render2D.cpp @@ -52,11 +52,11 @@ // True if enable downscaling when rendering blur const bool DownsampleForBlur = false; -PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data { +GPU_CB_STRUCT(Data { Matrix ViewProjection; }); -PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) BlurData { +GPU_CB_STRUCT(BlurData { Float2 InvBufferSize; uint32 SampleCount; float Dummy0; diff --git a/Source/Engine/Renderer/AmbientOcclusionPass.h b/Source/Engine/Renderer/AmbientOcclusionPass.h index 69fcd211c..fe86fa7e7 100644 --- a/Source/Engine/Renderer/AmbientOcclusionPass.h +++ b/Source/Engine/Renderer/AmbientOcclusionPass.h @@ -20,7 +20,7 @@ class AmbientOcclusionPass : public RendererPass private: // Packed shader constant buffer structure (this MUST match shader code) - PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) ASSAOConstants { + GPU_CB_STRUCT(ASSAOConstants { ShaderGBufferData GBuffer; Float2 ViewportPixelSize; diff --git a/Source/Engine/Renderer/AntiAliasing/FXAA.cpp b/Source/Engine/Renderer/AntiAliasing/FXAA.cpp index 032230d64..57da136c1 100644 --- a/Source/Engine/Renderer/AntiAliasing/FXAA.cpp +++ b/Source/Engine/Renderer/AntiAliasing/FXAA.cpp @@ -7,8 +7,7 @@ #include "Engine/Graphics/Graphics.h" #include "Engine/Graphics/RenderTask.h" -PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data - { +GPU_CB_STRUCT(Data { Float4 ScreenSize; }); diff --git a/Source/Engine/Renderer/AntiAliasing/SMAA.h b/Source/Engine/Renderer/AntiAliasing/SMAA.h index 7a6690f5d..d1170a403 100644 --- a/Source/Engine/Renderer/AntiAliasing/SMAA.h +++ b/Source/Engine/Renderer/AntiAliasing/SMAA.h @@ -15,8 +15,7 @@ class SMAA : public RendererPass { private: - PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data - { + GPU_CB_STRUCT(Data { Float4 RtSize; }); diff --git a/Source/Engine/Renderer/AntiAliasing/TAA.cpp b/Source/Engine/Renderer/AntiAliasing/TAA.cpp index 168e15ed9..316001d99 100644 --- a/Source/Engine/Renderer/AntiAliasing/TAA.cpp +++ b/Source/Engine/Renderer/AntiAliasing/TAA.cpp @@ -11,8 +11,7 @@ #include "Engine/Renderer/GBufferPass.h" #include "Engine/Engine/Engine.h" -PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data - { +GPU_CB_STRUCT(Data { Float2 ScreenSizeInv; Float2 JitterInv; float Sharpness; diff --git a/Source/Engine/Renderer/AtmospherePreCompute.cpp b/Source/Engine/Renderer/AtmospherePreCompute.cpp index 6d17e7613..789526349 100644 --- a/Source/Engine/Renderer/AtmospherePreCompute.cpp +++ b/Source/Engine/Renderer/AtmospherePreCompute.cpp @@ -61,8 +61,7 @@ protected: bool Run() override; }; -PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data - { +GPU_CB_STRUCT(Data { float First; float AtmosphereR; int AtmosphereLayer; diff --git a/Source/Engine/Renderer/ColorGradingPass.cpp b/Source/Engine/Renderer/ColorGradingPass.cpp index 9edc83969..82a1056c8 100644 --- a/Source/Engine/Renderer/ColorGradingPass.cpp +++ b/Source/Engine/Renderer/ColorGradingPass.cpp @@ -8,7 +8,7 @@ #include "Engine/Graphics/RenderTargetPool.h" #include "Engine/Graphics/RenderTask.h" -PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data { +GPU_CB_STRUCT(Data { Float4 ColorSaturationShadows; Float4 ColorContrastShadows; Float4 ColorGammaShadows; diff --git a/Source/Engine/Renderer/Config.h b/Source/Engine/Renderer/Config.h index a861eb127..2685757ed 100644 --- a/Source/Engine/Renderer/Config.h +++ b/Source/Engine/Renderer/Config.h @@ -11,8 +11,7 @@ /// /// Structure that contains information about GBuffer for shaders. /// -PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) ShaderGBufferData - { +GPU_CB_STRUCT(ShaderGBufferData { Float4 ViewInfo; Float4 ScreenSize; Float3 ViewPos; @@ -24,8 +23,7 @@ PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) ShaderGBufferData /// /// Structure that contains information about exponential height fog for shaders. /// -PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) ShaderExponentialHeightFogData - { +GPU_CB_STRUCT(ShaderExponentialHeightFogData { Float3 FogInscatteringColor; float FogMinOpacity; @@ -49,8 +47,7 @@ PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) ShaderExponentialHeightFog /// /// Structure that contains information about atmosphere fog for shaders. /// -PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) ShaderAtmosphericFogData - { +GPU_CB_STRUCT(ShaderAtmosphericFogData { float AtmosphericFogDensityScale; float AtmosphericFogSunDiscScale; float AtmosphericFogDistanceScale; @@ -71,7 +68,7 @@ PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) ShaderAtmosphericFogData /// /// Structure that contains information about light for shaders. /// -PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) ShaderLightData { +GPU_CB_STRUCT(ShaderLightData { Float2 SpotAngles; float SourceRadius; float SourceLength; @@ -90,7 +87,7 @@ PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) ShaderLightData { /// /// Packed env probe data /// -PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) ShaderEnvProbeData { +GPU_CB_STRUCT(ShaderEnvProbeData { Float4 Data0; // x - Position.x, y - Position.y, z - Position.z, w - unused Float4 Data1; // x - Radius , y - 1 / Radius, z - Brightness, w - unused }); diff --git a/Source/Engine/Renderer/DepthOfFieldPass.cpp b/Source/Engine/Renderer/DepthOfFieldPass.cpp index 23c626e9c..a0731649b 100644 --- a/Source/Engine/Renderer/DepthOfFieldPass.cpp +++ b/Source/Engine/Renderer/DepthOfFieldPass.cpp @@ -18,7 +18,7 @@ #define DOF_GRID_SIZE 450 #define DOF_DEPTH_BLUR_FORMAT PixelFormat::R16G16_Float -PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data { +GPU_CB_STRUCT(Data { Float2 ProjectionAB; float BokehDepthCullThreshold; float BokehDepthCutoff; diff --git a/Source/Engine/Renderer/Editor/LightmapUVsDensity.cpp b/Source/Engine/Renderer/Editor/LightmapUVsDensity.cpp index e51117434..287fd006c 100644 --- a/Source/Engine/Renderer/Editor/LightmapUVsDensity.cpp +++ b/Source/Engine/Renderer/Editor/LightmapUVsDensity.cpp @@ -19,7 +19,7 @@ #include "Engine/Level/Scene/Scene.h" #include "Engine/Level/Actors/StaticModel.h" -PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) LightmapUVsDensityMaterialShaderData { +GPU_CB_STRUCT(LightmapUVsDensityMaterialShaderData { Matrix ViewProjectionMatrix; Matrix WorldMatrix; Rectangle LightmapArea; diff --git a/Source/Engine/Renderer/Editor/VertexColors.cpp b/Source/Engine/Renderer/Editor/VertexColors.cpp index 26513f1c8..6da039561 100644 --- a/Source/Engine/Renderer/Editor/VertexColors.cpp +++ b/Source/Engine/Renderer/Editor/VertexColors.cpp @@ -12,7 +12,7 @@ #include "Engine/Graphics/RenderTask.h" #include "Engine/Renderer/DrawCall.h" -PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) VertexColorsMaterialShaderData { +GPU_CB_STRUCT(VertexColorsMaterialShaderData { Matrix ViewProjectionMatrix; Matrix WorldMatrix; }); diff --git a/Source/Engine/Renderer/EyeAdaptationPass.cpp b/Source/Engine/Renderer/EyeAdaptationPass.cpp index 79e0c30a8..01c8f1853 100644 --- a/Source/Engine/Renderer/EyeAdaptationPass.cpp +++ b/Source/Engine/Renderer/EyeAdaptationPass.cpp @@ -16,7 +16,7 @@ #include "Engine/Engine/Engine.h" #include "Engine/Engine/Time.h" -PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) EyeAdaptationData { +GPU_CB_STRUCT(EyeAdaptationData { float MinBrightness; float MaxBrightness; float SpeedUp; diff --git a/Source/Engine/Renderer/GBufferPass.cpp b/Source/Engine/Renderer/GBufferPass.cpp index 4addcb6c6..500e62f12 100644 --- a/Source/Engine/Renderer/GBufferPass.cpp +++ b/Source/Engine/Renderer/GBufferPass.cpp @@ -21,7 +21,7 @@ #include "Engine/Level/Actors/Decal.h" #include "Engine/Engine/Engine.h" -PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) GBufferPassData{ +GPU_CB_STRUCT(GBufferPassData { ShaderGBufferData GBuffer; Float3 Dummy0; int32 ViewMode; diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp index 10a40473e..e4e7c0dd2 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp @@ -41,8 +41,7 @@ #define DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE 8 #define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32 -PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data0 - { +GPU_CB_STRUCT(Data0 { DynamicDiffuseGlobalIlluminationPass::ConstantsData DDGI; GlobalSignDistanceFieldPass::ConstantsData GlobalSDF; GlobalSurfaceAtlasPass::ConstantsData GlobalSurfaceAtlas; @@ -54,8 +53,7 @@ PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data0 Int4 ProbeScrollClears[4]; }); -PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data1 - { +GPU_CB_STRUCT(Data1 { // TODO: use push constants on Vulkan or root signature data on DX12 to reduce overhead of changing single DWORD Float2 Padding1; uint32 CascadeIndex; diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h index ed2642153..3931b2777 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h @@ -13,8 +13,7 @@ class FLAXENGINE_API DynamicDiffuseGlobalIlluminationPass : public RendererPass< { public: // Constant buffer data for DDGI access on a GPU. - PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) ConstantsData - { + GPU_CB_STRUCT(ConstantsData { Float4 ProbesOriginAndSpacing[4]; Int4 ProbesScrollOffsets[4]; uint32 ProbesCounts[3]; diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp index 858825145..ffd570fa7 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp @@ -42,8 +42,7 @@ #include "Engine/Debug/DebugDraw.h" #endif -PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data0 - { +GPU_CB_STRUCT(Data0 { Float3 ViewWorldPos; float ViewNearPlane; float SkyboxIntensity; diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.h b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.h index 7961eb8b3..a498b3767 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.h +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.h @@ -11,8 +11,7 @@ class FLAXENGINE_API GlobalSurfaceAtlasPass : public RendererPass { private: - PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data { + GPU_CB_STRUCT(Data { float BloomLimit; float BloomThreshold; float BloomMagnitude; @@ -56,7 +56,7 @@ private: Matrix LensFlareStarMat; }); - PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) GaussianBlurData { + GPU_CB_STRUCT(GaussianBlurData { Float2 Size; float Dummy3; float Dummy4; diff --git a/Source/Engine/Renderer/ProbesRenderer.cpp b/Source/Engine/Renderer/ProbesRenderer.cpp index da4e56f31..ec11deeb4 100644 --- a/Source/Engine/Renderer/ProbesRenderer.cpp +++ b/Source/Engine/Renderer/ProbesRenderer.cpp @@ -69,8 +69,7 @@ public: } }; -PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data - { +GPU_CB_STRUCT(Data { Float2 Dummy0; int32 CubeFace; float SourceMipIndex; diff --git a/Source/Engine/Renderer/ReflectionsPass.cpp b/Source/Engine/Renderer/ReflectionsPass.cpp index f37f711fb..ac4c2b703 100644 --- a/Source/Engine/Renderer/ReflectionsPass.cpp +++ b/Source/Engine/Renderer/ReflectionsPass.cpp @@ -13,7 +13,7 @@ #include "Engine/Graphics/RenderTargetPool.h" #include "Engine/Level/Actors/EnvironmentProbe.h" -PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data { +GPU_CB_STRUCT(Data { ShaderEnvProbeData PData; Matrix WVP; ShaderGBufferData GBuffer; diff --git a/Source/Engine/Renderer/ScreenSpaceReflectionsPass.cpp b/Source/Engine/Renderer/ScreenSpaceReflectionsPass.cpp index 82873b81c..11eb3e464 100644 --- a/Source/Engine/Renderer/ScreenSpaceReflectionsPass.cpp +++ b/Source/Engine/Renderer/ScreenSpaceReflectionsPass.cpp @@ -17,15 +17,12 @@ #include "Engine/Graphics/GPUContext.h" #include "Engine/Graphics/RenderTask.h" -#define RESOLVE_PASS_OUTPUT_FORMAT PixelFormat::R16G16B16A16_Float - // Shader input texture slots mapping #define TEXTURE0 4 #define TEXTURE1 5 #define TEXTURE2 6 -PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data - { +GPU_CB_STRUCT(Data { ShaderGBufferData GBuffer; float MaxColorMiplevel; @@ -197,7 +194,7 @@ void ScreenSpaceReflectionsPass::Render(RenderContext& renderContext, GPUTexture tempDesc = GPUTextureDescription::New2D(traceWidth, traceHeight, PixelFormat::R16G16B16A16_Float); auto traceBuffer = RenderTargetPool::Get(tempDesc); RENDER_TARGET_POOL_SET_NAME(traceBuffer, "SSR.TraceBuffer"); - tempDesc = GPUTextureDescription::New2D(resolveWidth, resolveHeight, RESOLVE_PASS_OUTPUT_FORMAT); + tempDesc = GPUTextureDescription::New2D(resolveWidth, resolveHeight, PixelFormat::R16G16B16A16_Float); auto resolveBuffer = RenderTargetPool::Get(tempDesc); RENDER_TARGET_POOL_SET_NAME(resolveBuffer, "SSR.ResolveBuffer"); @@ -253,7 +250,7 @@ void ScreenSpaceReflectionsPass::Render(RenderContext& renderContext, GPUTexture // Wrong size temporal buffer if (buffers->TemporalSSR) RenderTargetPool::Release(buffers->TemporalSSR); - tempDesc = GPUTextureDescription::New2D(temporalWidth, temporalHeight, RESOLVE_PASS_OUTPUT_FORMAT); + tempDesc = GPUTextureDescription::New2D(temporalWidth, temporalHeight, PixelFormat::R16G16B16A16_Float); buffers->TemporalSSR = RenderTargetPool::Get(tempDesc); RENDER_TARGET_POOL_SET_NAME(buffers->TemporalSSR, "SSR.TemporalSSR"); } @@ -378,7 +375,7 @@ void ScreenSpaceReflectionsPass::Render(RenderContext& renderContext, GPUTexture GPUTexture* reflectionsBuffer = resolveBuffer; if (useTemporal) { - tempDesc = GPUTextureDescription::New2D(temporalWidth, temporalHeight, RESOLVE_PASS_OUTPUT_FORMAT); + tempDesc = GPUTextureDescription::New2D(temporalWidth, temporalHeight, PixelFormat::R16G16B16A16_Float); auto newTemporal = RenderTargetPool::Get(tempDesc); RENDER_TARGET_POOL_SET_NAME(newTemporal, "SSR.TemporalSSR"); const auto oldTemporal = buffers->TemporalSSR; diff --git a/Source/Engine/Renderer/ShadowsPass.cpp b/Source/Engine/Renderer/ShadowsPass.cpp index a9920ffb2..d779e0b83 100644 --- a/Source/Engine/Renderer/ShadowsPass.cpp +++ b/Source/Engine/Renderer/ShadowsPass.cpp @@ -28,7 +28,7 @@ #define NormalOffsetScaleTweak METERS_TO_UNITS(1) #define LocalLightNearPlane METERS_TO_UNITS(0.1f) -PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data{ +GPU_CB_STRUCT(Data { ShaderGBufferData GBuffer; ShaderLightData Light; Matrix WVP; diff --git a/Source/Engine/Renderer/Utils/BitonicSort.cpp b/Source/Engine/Renderer/Utils/BitonicSort.cpp index 8255afdf1..805a5cabd 100644 --- a/Source/Engine/Renderer/Utils/BitonicSort.cpp +++ b/Source/Engine/Renderer/Utils/BitonicSort.cpp @@ -15,7 +15,7 @@ struct Item uint32 Value; }; -PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data { +GPU_CB_STRUCT(Data { Item NullItem; uint32 CounterOffset; uint32 MaxIterations; diff --git a/Source/Engine/Renderer/Utils/MultiScaler.cpp b/Source/Engine/Renderer/Utils/MultiScaler.cpp index f6a7c6693..dfa94e74a 100644 --- a/Source/Engine/Renderer/Utils/MultiScaler.cpp +++ b/Source/Engine/Renderer/Utils/MultiScaler.cpp @@ -5,7 +5,7 @@ #include "Engine/Content/Content.h" #include "Engine/Graphics/GPUContext.h" -PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data { +GPU_CB_STRUCT(Data { Float2 TexelSize; Float2 Padding; }); diff --git a/Source/Engine/Renderer/VolumetricFogPass.h b/Source/Engine/Renderer/VolumetricFogPass.h index d39c7638f..0489bb88a 100644 --- a/Source/Engine/Renderer/VolumetricFogPass.h +++ b/Source/Engine/Renderer/VolumetricFogPass.h @@ -28,14 +28,14 @@ public: private: - PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) SkyLightData { + GPU_CB_STRUCT(SkyLightData { Float3 MultiplyColor; float VolumetricScatteringIntensity; Float3 AdditiveColor; float Dummy0; }); - PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data { + GPU_CB_STRUCT(Data { ShaderGBufferData GBuffer; Float3 GlobalAlbedo; @@ -67,7 +67,7 @@ private: DynamicDiffuseGlobalIlluminationPass::ConstantsData DDGI; }); - PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) PerLight { + GPU_CB_STRUCT(PerLight { Float2 SliceToDepth; int32 MinZ; float LocalLightScatteringIntensity; diff --git a/Source/Engine/ShadowsOfMordor/Builder.Jobs.cpp b/Source/Engine/ShadowsOfMordor/Builder.Jobs.cpp index b846ab0a9..c6b0cd056 100644 --- a/Source/Engine/ShadowsOfMordor/Builder.Jobs.cpp +++ b/Source/Engine/ShadowsOfMordor/Builder.Jobs.cpp @@ -23,7 +23,7 @@ namespace ShadowsOfMordor { - PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) ShaderData { + GPU_CB_STRUCT(ShaderData { Rectangle LightmapArea; Matrix WorldMatrix; Matrix ToTangentSpace; diff --git a/Source/Engine/Tools/ModelTool/ModelTool.cpp b/Source/Engine/Tools/ModelTool/ModelTool.cpp index d9b8d0bf3..b71267e4c 100644 --- a/Source/Engine/Tools/ModelTool/ModelTool.cpp +++ b/Source/Engine/Tools/ModelTool/ModelTool.cpp @@ -90,8 +90,7 @@ class GPUModelSDFTask : public GPUTask Float3 _xyzToLocalMul, _xyzToLocalAdd; const uint32 ThreadGroupSize = 64; - PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data - { + GPU_CB_STRUCT(Data { Int3 Resolution; uint32 ResolutionSize; float MaxDistance; From 975d5f98a316df63d8c46088a241d1267e1f0390 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 2 Jun 2024 09:52:31 +0200 Subject: [PATCH 135/292] Fix `MIN_int32` compilation on the latest Clang --- Source/Engine/Core/Types/BaseTypes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Engine/Core/Types/BaseTypes.h b/Source/Engine/Core/Types/BaseTypes.h index 5818512ae..6868ad035 100644 --- a/Source/Engine/Core/Types/BaseTypes.h +++ b/Source/Engine/Core/Types/BaseTypes.h @@ -46,7 +46,7 @@ typedef wchar_t Char; #define MIN_uint64 ((uint64)0x0000000000000000) #define MIN_int8 ((int8)-128) #define MIN_int16 ((int16)-32768) -#define MIN_int32 -((int32)2147483648) +#define MIN_int32 ((int32)-2147483648) #define MIN_int64 -((int64)9223372036854775808) #define MIN_float -(3.402823466e+38f) #define MIN_double -(1.7976931348623158e+308) From eab96f208639c3c594a6a565970ac48bae2064eb Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 2 Jun 2024 11:01:23 +0200 Subject: [PATCH 136/292] Fix TAA jittering when rendering UI in world after TAA resolve --- Source/Engine/UI/UICanvas.cs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Source/Engine/UI/UICanvas.cs b/Source/Engine/UI/UICanvas.cs index 0ddac70e1..0d8d79eb3 100644 --- a/Source/Engine/UI/UICanvas.cs +++ b/Source/Engine/UI/UICanvas.cs @@ -72,6 +72,7 @@ namespace FlaxEngine bounds.Transformation.Translation -= renderContext.View.Origin; if (renderContext.View.Frustum.Contains(bounds.GetBoundingBox()) == ContainmentType.Disjoint) return; + var worldSpace = Canvas.RenderMode == CanvasRenderMode.WorldSpace || Canvas.RenderMode == CanvasRenderMode.WorldSpaceFaceCamera; Profiler.BeginEvent("UI Canvas"); Profiler.BeginEventGPU("UI Canvas"); @@ -79,14 +80,17 @@ namespace FlaxEngine // Calculate rendering matrix (world*view*projection) Canvas.GetWorldMatrix(renderContext.View.Origin, out Matrix worldMatrix); Matrix.Multiply(ref worldMatrix, ref renderContext.View.View, out Matrix viewMatrix); - Matrix.Multiply(ref viewMatrix, ref renderContext.View.Projection, out Matrix viewProjectionMatrix); + Matrix projectionMatrix = renderContext.View.Projection; + if (worldSpace && (Canvas.RenderLocation == PostProcessEffectLocation.Default || Canvas.RenderLocation == PostProcessEffectLocation.AfterAntiAliasingPass)) + projectionMatrix = renderContext.View.NonJitteredProjection; // Fix TAA jittering when rendering UI in world after TAA resolve + Matrix.Multiply(ref viewMatrix, ref projectionMatrix, out Matrix viewProjectionMatrix); // Pick a depth buffer GPUTexture depthBuffer = Canvas.IgnoreDepth ? null : renderContext.Buffers.DepthBuffer; // Render GUI in 3D var features = Render2D.Features; - if (Canvas.RenderMode == CanvasRenderMode.WorldSpace || Canvas.RenderMode == CanvasRenderMode.WorldSpaceFaceCamera) + if (worldSpace) Render2D.Features &= ~Render2D.RenderingFeatures.VertexSnapping; Render2D.CallDrawing(Canvas.GUI, context, input, depthBuffer, ref viewProjectionMatrix); Render2D.Features = features; From 79ee6a973e20cd906162ce2889afd98b360a895d Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 2 Jun 2024 12:06:25 +0200 Subject: [PATCH 137/292] Fix GlobalSDF for meshes intersections for negative distances inside the geometry --- Source/Shaders/GlobalSignDistanceField.shader | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/Source/Shaders/GlobalSignDistanceField.shader b/Source/Shaders/GlobalSignDistanceField.shader index 9810c53f8..0709977db 100644 --- a/Source/Shaders/GlobalSignDistanceField.shader +++ b/Source/Shaders/GlobalSignDistanceField.shader @@ -60,6 +60,14 @@ float CombineDistanceToSDF(float sdf, float distanceToSDF) return sqrt(Square(max(sdf, 0)) + Square(distanceToSDF)); } +float CombineSDF(float oldSdf, float newSdf) +{ + // Use distance closer to 0 + if (oldSdf < 0 && newSdf < 0) + return max(oldSdf, newSdf); + return min(oldSdf, newSdf); +} + #if defined(_CS_RasterizeModel) || defined(_CS_RasterizeHeightfield) RWTexture3D GlobalSDFTex : register(u0); @@ -127,7 +135,7 @@ void CS_RasterizeModel(uint3 DispatchThreadId : SV_DispatchThreadID) { ObjectRasterizeData objectData = ObjectsBuffer[Objects[i / 4][i % 4]]; float objectDistance = DistanceToModelSDF(minDistance, objectData, ObjectsTextures[i], voxelWorldPos); - minDistance = min(minDistance, objectDistance); + minDistance = CombineSDF(minDistance, objectDistance); } GlobalSDFTex[voxelCoord] = clamp(minDistance / MaxDistance, -1, 1); } @@ -177,7 +185,7 @@ void CS_RasterizeHeightfield(uint3 DispatchThreadId : SV_DispatchThreadID) float objectDistance = dot(heightfieldNormal, voxelWorldPos - heightfieldPosition); if (objectDistance < thickness) objectDistance = thickness - objectDistance; - minDistance = min(minDistance, objectDistance); + minDistance = CombineSDF(minDistance, objectDistance); } GlobalSDFTex[voxelCoord] = clamp(minDistance / MaxDistance, -1, 1); } From f103afb495d9a71192b7c4d67ed6d80c6d466392 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 2 Jun 2024 21:36:32 +0200 Subject: [PATCH 138/292] Improve 975d5f98a316df63d8c46088a241d1267e1f0390 --- Source/Engine/Core/Types/BaseTypes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Engine/Core/Types/BaseTypes.h b/Source/Engine/Core/Types/BaseTypes.h index 6868ad035..64db93be9 100644 --- a/Source/Engine/Core/Types/BaseTypes.h +++ b/Source/Engine/Core/Types/BaseTypes.h @@ -46,7 +46,7 @@ typedef wchar_t Char; #define MIN_uint64 ((uint64)0x0000000000000000) #define MIN_int8 ((int8)-128) #define MIN_int16 ((int16)-32768) -#define MIN_int32 ((int32)-2147483648) +#define MIN_int32 ((int32)-2147483648ll) #define MIN_int64 -((int64)9223372036854775808) #define MIN_float -(3.402823466e+38f) #define MIN_double -(1.7976931348623158e+308) From 59e0b43585890a886b0ec975b9ee4ba92613b80a Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 2 Jun 2024 22:05:14 +0200 Subject: [PATCH 139/292] Fix missing DirectX resources naming in Development builds for profiling --- Source/Engine/GraphicsDevice/DirectX/RenderToolsDX.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Engine/GraphicsDevice/DirectX/RenderToolsDX.h b/Source/Engine/GraphicsDevice/DirectX/RenderToolsDX.h index 3973ec631..d818e3f2b 100644 --- a/Source/Engine/GraphicsDevice/DirectX/RenderToolsDX.h +++ b/Source/Engine/GraphicsDevice/DirectX/RenderToolsDX.h @@ -138,7 +138,7 @@ namespace RenderToolsDX #endif -#if GPU_ENABLE_DIAGNOSTICS && GPU_ENABLE_RESOURCE_NAMING +#if GPU_ENABLE_RESOURCE_NAMING // SetDebugObjectName - ANSI From bba14d265ad899272af10563fc3a901d243aac95 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 2 Jun 2024 22:05:45 +0200 Subject: [PATCH 140/292] Add D3D11 shaders naming for profiling --- .../DirectX/DX11/GPUShaderProgramDX11.h | 20 +++---------------- .../DirectX/DX12/GPUShaderProgramDX12.h | 17 +--------------- 2 files changed, 4 insertions(+), 33 deletions(-) diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUShaderProgramDX11.h b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUShaderProgramDX11.h index 246a5e24b..519afcbfc 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUShaderProgramDX11.h +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUShaderProgramDX11.h @@ -14,11 +14,9 @@ template class GPUShaderProgramDX11 : public BaseType { protected: - BufferType* _buffer; public: - /// /// Initializes a new instance of the class. /// @@ -28,6 +26,9 @@ public: : _buffer(buffer) { BaseType::Init(initializer); +#if GPU_ENABLE_RESOURCE_NAMING + SetDebugObjectName(buffer, initializer.Name.Get(), initializer.Name.Length()); +#endif } /// @@ -39,24 +40,20 @@ public: } public: - /// /// Gets DirectX 11 buffer handle. /// - /// The DirectX 11 buffer. FORCE_INLINE BufferType* GetBufferHandleDX11() const { return _buffer; } public: - // [BaseType] uint32 GetBufferSize() const override { return 0; } - void* GetBufferHandle() const override { return _buffer; @@ -69,12 +66,10 @@ public: class GPUShaderProgramVSDX11 : public GPUShaderProgramDX11 { private: - byte _inputLayoutSize; ID3D11InputLayout* _inputLayout; public: - /// /// Initializes a new instance of the class. /// @@ -98,24 +93,20 @@ public: } public: - /// /// Gets the DirectX 11 input layout handle /// - /// DirectX 11 input layout FORCE_INLINE ID3D11InputLayout* GetInputLayoutDX11() const { return _inputLayout; } public: - // [GPUShaderProgramDX11] void* GetInputLayout() const override { return (void*)_inputLayout; } - byte GetInputLayoutSize() const override { return _inputLayoutSize; @@ -129,7 +120,6 @@ public: class GPUShaderProgramHSDX11 : public GPUShaderProgramDX11 { public: - /// /// Initializes a new instance of the class. /// @@ -149,7 +139,6 @@ public: class GPUShaderProgramDSDX11 : public GPUShaderProgramDX11 { public: - /// /// Initializes a new instance of the class. /// @@ -169,7 +158,6 @@ public: class GPUShaderProgramGSDX11 : public GPUShaderProgramDX11 { public: - /// /// Initializes a new instance of the class. /// @@ -188,7 +176,6 @@ public: class GPUShaderProgramPSDX11 : public GPUShaderProgramDX11 { public: - /// /// Initializes a new instance of the class. /// @@ -206,7 +193,6 @@ public: class GPUShaderProgramCSDX11 : public GPUShaderProgramDX11 { public: - /// /// Initializes a new instance of the class. /// diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUShaderProgramDX12.h b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUShaderProgramDX12.h index 843666307..12f2f2656 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUShaderProgramDX12.h +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUShaderProgramDX12.h @@ -16,12 +16,9 @@ template class GPUShaderProgramDX12 : public BaseType { protected: - Array _data; public: - - GPUShaderProgramDX12(const GPUShaderProgramInitializer& initializer, DxShaderHeader* header, byte* cacheBytes, uint32 cacheSize) : Header(*header) { @@ -30,11 +27,9 @@ public: } public: - DxShaderHeader Header; public: - // [BaseType] void* GetBufferHandle() const override { @@ -52,12 +47,10 @@ public: class GPUShaderProgramVSDX12 : public GPUShaderProgramDX12 { private: - byte _inputLayoutSize; D3D12_INPUT_ELEMENT_DESC _inputLayout[VERTEX_SHADER_MAX_INPUT_ELEMENTS]; public: - GPUShaderProgramVSDX12(const GPUShaderProgramInitializer& initializer, DxShaderHeader* header, byte* cacheBytes, uint32 cacheSize, D3D12_INPUT_ELEMENT_DESC* inputLayout, byte inputLayoutSize) : GPUShaderProgramDX12(initializer, header, cacheBytes, cacheSize) , _inputLayoutSize(inputLayoutSize) @@ -67,7 +60,6 @@ public: } public: - // [GPUShaderProgramDX12] void* GetInputLayout() const override { @@ -86,7 +78,6 @@ public: class GPUShaderProgramHSDX12 : public GPUShaderProgramDX12 { public: - GPUShaderProgramHSDX12(const GPUShaderProgramInitializer& initializer, DxShaderHeader* header, byte* cacheBytes, uint32 cacheSize, int32 controlPointsCount) : GPUShaderProgramDX12(initializer, header, cacheBytes, cacheSize) { @@ -100,7 +91,6 @@ public: class GPUShaderProgramDSDX12 : public GPUShaderProgramDX12 { public: - GPUShaderProgramDSDX12(const GPUShaderProgramInitializer& initializer, DxShaderHeader* header, byte* cacheBytes, uint32 cacheSize) : GPUShaderProgramDX12(initializer, header, cacheBytes, cacheSize) { @@ -115,7 +105,6 @@ public: class GPUShaderProgramGSDX12 : public GPUShaderProgramDX12 { public: - GPUShaderProgramGSDX12(const GPUShaderProgramInitializer& initializer, DxShaderHeader* header, byte* cacheBytes, uint32 cacheSize) : GPUShaderProgramDX12(initializer, header, cacheBytes, cacheSize) { @@ -129,7 +118,6 @@ public: class GPUShaderProgramPSDX12 : public GPUShaderProgramDX12 { public: - GPUShaderProgramPSDX12(const GPUShaderProgramInitializer& initializer, DxShaderHeader* header, byte* cacheBytes, uint32 cacheSize) : GPUShaderProgramDX12(initializer, header, cacheBytes, cacheSize) { @@ -142,27 +130,24 @@ public: class GPUShaderProgramCSDX12 : public GPUShaderProgramDX12 { private: - GPUDeviceDX12* _device; Array _data; ID3D12PipelineState* _state; public: - GPUShaderProgramCSDX12(GPUDeviceDX12* device, const GPUShaderProgramInitializer& initializer, DxShaderHeader* header, byte* cacheBytes, uint32 cacheSize) : GPUShaderProgramDX12(initializer, header, cacheBytes, cacheSize) , _device(device) , _state(nullptr) { } - + ~GPUShaderProgramCSDX12() { _device->AddResourceToLateRelease(_state); } public: - /// /// Gets DirectX 12 compute pipeline state object /// From a08954f7db0b9102d23a2191b86ad04190cf95db Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 2 Jun 2024 22:38:46 +0200 Subject: [PATCH 141/292] Fix CSM shadows intensity due to sharpness trick --- Source/Shaders/ShadowsSampling.hlsl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Source/Shaders/ShadowsSampling.hlsl b/Source/Shaders/ShadowsSampling.hlsl index c56904e6b..e53cab08d 100644 --- a/Source/Shaders/ShadowsSampling.hlsl +++ b/Source/Shaders/ShadowsSampling.hlsl @@ -283,6 +283,11 @@ ShadowSample SampleDirectionalLightShadow(LightData light, Buffer shadow #endif result.SurfaceShadow = PostProcessShadow(shadow, result.SurfaceShadow); + + // Fix shadow intensity that got reduced by cascades sharpness stabilization (see above) + if (cascadeIndex != 0 && result.SurfaceShadow <= 0.1f) + result.SurfaceShadow += 0.01f; + return result; } From aad428210d14dbeb1ce5bf947b8c3a7515945040 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 2 Jun 2024 23:02:20 +0200 Subject: [PATCH 142/292] Improve DDGI quality by not using lower-res cascade fallback --- Source/Shaders/GI/DDGI.hlsl | 44 ++++++++++++++--------------------- Source/Shaders/GI/DDGI.shader | 21 ++++++++++++++--- 2 files changed, 35 insertions(+), 30 deletions(-) diff --git a/Source/Shaders/GI/DDGI.hlsl b/Source/Shaders/GI/DDGI.hlsl index e304b4ffc..10ea761e4 100644 --- a/Source/Shaders/GI/DDGI.hlsl +++ b/Source/Shaders/GI/DDGI.hlsl @@ -136,43 +136,34 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesData, T { // Select the highest cascade that contains the sample location uint cascadeIndex = 0; - float4 probesDatas[8]; + float probesSpacing = 0; + float3 probesOrigin = (float3)0, probesExtent = (float3)0; for (; cascadeIndex < data.CascadesCount; cascadeIndex++) { - float probesSpacing = data.ProbesOriginAndSpacing[cascadeIndex].w; - float3 probesOrigin = data.ProbesScrollOffsets[cascadeIndex].xyz * probesSpacing + data.ProbesOriginAndSpacing[cascadeIndex].xyz; - float3 probesExtent = (data.ProbesCounts - 1) * (probesSpacing * 0.5f); + probesSpacing = data.ProbesOriginAndSpacing[cascadeIndex].w; + probesOrigin = data.ProbesScrollOffsets[cascadeIndex].xyz * probesSpacing + data.ProbesOriginAndSpacing[cascadeIndex].xyz; + probesExtent = (data.ProbesCounts - 1) * (probesSpacing * 0.5f); float fadeDistance = probesSpacing * 0.5f; float cascadeWeight = saturate(Min3(probesExtent - abs(worldPosition - probesOrigin)) / fadeDistance); if (cascadeWeight > dither) // Use dither to make transition smoother { - // Load probes state for this cascade - uint activeCount = 0; - uint3 baseProbeCoords = clamp(uint3((worldPosition - probesOrigin + probesExtent) / probesSpacing), uint3(0, 0, 0), data.ProbesCounts - uint3(1, 1, 1)); - UNROLL - for (uint i = 0; i < 8; i++) - { - uint3 probeCoordsOffset = uint3(i, i >> 1, i >> 2) & 1; - uint3 probeCoords = clamp(baseProbeCoords + probeCoordsOffset, uint3(0, 0, 0), data.ProbesCounts - uint3(1, 1, 1)); - uint probeIndex = GetDDGIScrollingProbeIndex(data, cascadeIndex, probeCoords); - float4 probeData = LoadDDGIProbeData(data, probesData, cascadeIndex, probeIndex); - probesDatas[i] = probeData; - uint probeState = DecodeDDGIProbeState(probeData); - if (probeState != DDGI_PROBE_STATE_INACTIVE) - activeCount++; - } - - // Ensure there are some valid probes in this cascade - if (activeCount >= 3) - break; + break; } } if (cascadeIndex == data.CascadesCount) return data.FallbackIrradiance; + uint3 baseProbeCoords = clamp(uint3((worldPosition - probesOrigin + probesExtent) / probesSpacing), uint3(0, 0, 0), data.ProbesCounts - uint3(1, 1, 1)); - float probesSpacing = data.ProbesOriginAndSpacing[cascadeIndex].w; - float3 probesOrigin = data.ProbesScrollOffsets[cascadeIndex].xyz * probesSpacing + data.ProbesOriginAndSpacing[cascadeIndex].xyz; - float3 probesExtent = (data.ProbesCounts - 1) * (probesSpacing * 0.5f); + // Load probes state for this cascade + float4 probesDatas[8]; + UNROLL + for (uint i = 0; i < 8; i++) + { + uint3 probeCoordsOffset = uint3(i, i >> 1, i >> 2) & 1; + uint3 probeCoords = clamp(baseProbeCoords + probeCoordsOffset, uint3(0, 0, 0), data.ProbesCounts - uint3(1, 1, 1)); + uint probeIndex = GetDDGIScrollingProbeIndex(data, cascadeIndex, probeCoords); + probesDatas[i] = LoadDDGIProbeData(data, probesData, cascadeIndex, probeIndex); + } // Bias the world-space position to reduce artifacts float3 viewDir = normalize(data.ViewPos - worldPosition); @@ -180,7 +171,6 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesData, T float3 biasedWorldPosition = worldPosition + surfaceBias; // Get the grid coordinates of the probe nearest the biased world position - uint3 baseProbeCoords = clamp(uint3((worldPosition - probesOrigin + probesExtent) / probesSpacing), uint3(0, 0, 0), data.ProbesCounts - uint3(1, 1, 1)); float3 baseProbeWorldPosition = GetDDGIProbeWorldPosition(data, cascadeIndex, baseProbeCoords); float3 biasAlpha = saturate((biasedWorldPosition - baseProbeWorldPosition) / probesSpacing); diff --git a/Source/Shaders/GI/DDGI.shader b/Source/Shaders/GI/DDGI.shader index a39758f24..24a483ba8 100644 --- a/Source/Shaders/GI/DDGI.shader +++ b/Source/Shaders/GI/DDGI.shader @@ -21,6 +21,7 @@ #define DDGI_TRACE_RAYS_LIMIT 256 // Limit of rays per-probe (runtime value can be smaller) #define DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE 8 #define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32 +#define DDGI_PROBE_RELOCATE_ITERATIVE 0 // If true, probes relocation algorithm tries to move them in additive way, otherwise all nearby locations are checked to find the best position META_CB_BEGIN(0, Data0) DDGIData DDGI; @@ -67,8 +68,6 @@ uint GetProbeRaysCount(DDGIData data, uint probeState) #ifdef _CS_Classify -#define DDGI_PROBE_RELOCATE_ITERATIVE 0 // If true, probes relocation algorithm tries to move them in additive way, otherwise all nearby locations are checked to find the best position - RWTexture2D RWProbesData : register(u0); RWByteAddressBuffer RWActiveProbes : register(u1); @@ -93,13 +92,29 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) probeIndex = GetDDGIScrollingProbeIndex(DDGI, CascadeIndex, probeCoords); int2 probeDataCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex); float probesSpacing = DDGI.ProbesOriginAndSpacing[CascadeIndex].w; + float3 probeBasePosition = GetDDGIProbeWorldPosition(DDGI, CascadeIndex, probeCoords); + + // Disable probes that are is in the range of higher-quality cascade + if (CascadeIndex > 0) + { + uint prevCascade = CascadeIndex - 1; + float prevProbesSpacing = DDGI.ProbesOriginAndSpacing[prevCascade].w; + float3 prevProbesOrigin = DDGI.ProbesScrollOffsets[prevCascade].xyz * prevProbesSpacing + DDGI.ProbesOriginAndSpacing[prevCascade].xyz; + float3 prevProbesExtent = (DDGI.ProbesCounts - 1) * (prevProbesSpacing * 0.5f); + float prevCascadeWeight = Min3(prevProbesExtent - abs(probeBasePosition - prevProbesOrigin)); + if (prevCascadeWeight > 0.1f) + { + // Disable probe + RWProbesData[probeDataCoords] = EncodeDDGIProbeData(float3(0, 0, 0), DDGI_PROBE_STATE_INACTIVE); + return; + } + } // Load probe state and position float4 probeData = RWProbesData[probeDataCoords]; uint probeState = DecodeDDGIProbeState(probeData); float3 probeOffset = probeData.xyz * probesSpacing; // Probe offset is [-1;1] within probes spacing float3 probeOffsetOld = probeOffset; - float3 probeBasePosition = GetDDGIProbeWorldPosition(DDGI, CascadeIndex, probeCoords); float3 probePosition = probeBasePosition; #if DDGI_PROBE_RELOCATE_ITERATIVE probePosition += probeOffset; From bd819b1f5f4def4035233f5214d15ab0d927fcda Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 2 Jun 2024 23:02:43 +0200 Subject: [PATCH 143/292] Rebake default Cube sdf with res scale `2` --- Content/Editor/Primitives/Cube.flax | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Content/Editor/Primitives/Cube.flax b/Content/Editor/Primitives/Cube.flax index 2aafb70ee..e383f23d6 100644 --- a/Content/Editor/Primitives/Cube.flax +++ b/Content/Editor/Primitives/Cube.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3f54c6aa9d56964baf70273a37d88258b3ff21abd62876f74eb54de295221aef -size 5643 +oid sha256:4384d8a57a90063d62c5526384d31f6a699b686eec5a0a3ff62fa458f5725216 +size 23487 From 1cbcddf4101d13d6548d28309b9ccfe0c0751b83 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 3 Jun 2024 09:24:27 +0200 Subject: [PATCH 144/292] Add `IsDebugToolAttached` to GPUDevice for detecting gpu debugger tools attached and auto-enable GPU profiler in that case --- Source/Engine/Graphics/GPUDevice.cpp | 3 +- Source/Engine/Graphics/GPUDevice.h | 5 ++ Source/Engine/Graphics/Graphics.cpp | 8 +++ .../DirectX/DX11/GPUDeviceDX11.cpp | 51 ++++++++++++------- .../DirectX/DX12/GPUDeviceDX12.cpp | 25 +++++++-- .../Vulkan/GPUDeviceVulkan.Layers.cpp | 14 +++++ .../GraphicsDevice/Vulkan/GPUDeviceVulkan.h | 11 ++-- 7 files changed, 90 insertions(+), 27 deletions(-) diff --git a/Source/Engine/Graphics/GPUDevice.cpp b/Source/Engine/Graphics/GPUDevice.cpp index bdacb9766..60298a513 100644 --- a/Source/Engine/Graphics/GPUDevice.cpp +++ b/Source/Engine/Graphics/GPUDevice.cpp @@ -321,10 +321,11 @@ GPUDevice::GPUDevice(RendererType type, ShaderProfile profile) , _res(New()) , _resources(1024) , TotalGraphicsMemory(0) + , IsDebugToolAttached(false) , QuadShader(nullptr) , CurrentTask(nullptr) { - ASSERT(_rendererType != RendererType::Unknown); + ASSERT_LOW_LAYER(_rendererType != RendererType::Unknown); } GPUDevice::~GPUDevice() diff --git a/Source/Engine/Graphics/GPUDevice.h b/Source/Engine/Graphics/GPUDevice.h index 081f22d03..f7ffe00a9 100644 --- a/Source/Engine/Graphics/GPUDevice.h +++ b/Source/Engine/Graphics/GPUDevice.h @@ -121,6 +121,11 @@ public: /// API_FIELD(ReadOnly) uint64 TotalGraphicsMemory; + /// + /// Indicates that debug tool is profiling device (eg. RenderDoc). + /// + API_FIELD(ReadOnly) bool IsDebugToolAttached; + /// /// The GPU limits. /// diff --git a/Source/Engine/Graphics/Graphics.cpp b/Source/Engine/Graphics/Graphics.cpp index 954fc7c73..8483d1f6c 100644 --- a/Source/Engine/Graphics/Graphics.cpp +++ b/Source/Engine/Graphics/Graphics.cpp @@ -8,6 +8,7 @@ #include "Engine/Core/Config/GraphicsSettings.h" #include "Engine/Engine/CommandLine.h" #include "Engine/Engine/EngineService.h" +#include "Engine/Profiler/ProfilerGPU.h" #include "Engine/Render2D/Font.h" bool Graphics::UseVSync = false; @@ -186,6 +187,13 @@ bool GraphicsService::Init() ); // Initialize + if (device->IsDebugToolAttached) + { +#if COMPILE_WITH_PROFILER + // Auto-enable GPU profiler + ProfilerGPU::Enabled = true; +#endif + } if (device->LoadContent()) { return true; diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp index 2d90b50ae..3e6d97b28 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp @@ -286,22 +286,6 @@ bool GPUDeviceDX11::Init() } UpdateOutputs(adapter); - ComPtr factory5; - _factoryDXGI->QueryInterface(IID_PPV_ARGS(&factory5)); - if (factory5) - { - BOOL allowTearing; - if (SUCCEEDED(factory5->CheckFeatureSupport(DXGI_FEATURE_PRESENT_ALLOW_TEARING, &allowTearing, sizeof(allowTearing))) - && allowTearing -#if PLATFORM_WINDOWS - && GetModuleHandleA("renderdoc.dll") == nullptr // Disable tearing with RenderDoc (prevents crashing) -#endif - ) - { - _allowTearing = true; - } - } - // Get flags and device type base on current configuration uint32 flags = D3D11_CREATE_DEVICE_BGRA_SUPPORT; #if GPU_ENABLE_DIAGNOSTICS @@ -313,13 +297,44 @@ bool GPUDeviceDX11::Init() D3D_FEATURE_LEVEL createdFeatureLevel = static_cast(0); auto targetFeatureLevel = GetD3DFeatureLevel(); VALIDATE_DIRECTX_CALL(D3D11CreateDevice(adapter, D3D_DRIVER_TYPE_UNKNOWN, NULL, flags, &targetFeatureLevel, 1, D3D11_SDK_VERSION, &_device, &createdFeatureLevel, &_imContext)); - - // Validate result ASSERT(_device); ASSERT(_imContext); ASSERT(createdFeatureLevel == targetFeatureLevel); _state = DeviceState::Created; +#if PLATFORM_WINDOWS + // Detect RenderDoc usage (UUID {A7AA6116-9C8D-4BBA-9083-B4D816B71B78}) + IUnknown* unknown = nullptr; + const GUID uuidRenderDoc = { 0xa7aa6116, 0x9c8d, 0x4bba, {0x90, 0x83, 0xb4, 0xd8, 0x16, 0xb7, 0x1b, 0x78}}; + HRESULT hr = _device->QueryInterface(uuidRenderDoc, (void**)&unknown); + if(SUCCEEDED(hr) && unknown) + { + IsDebugToolAttached = true; + unknown->Release(); + } + if (!IsDebugToolAttached && GetModuleHandleA("renderdoc.dll") != nullptr) + { + IsDebugToolAttached = true; + } +#endif + + // Check if can use screen tearing on a swapchain + ComPtr factory5; + _factoryDXGI->QueryInterface(IID_PPV_ARGS(&factory5)); + if (factory5) + { + BOOL allowTearing; + if (SUCCEEDED(factory5->CheckFeatureSupport(DXGI_FEATURE_PRESENT_ALLOW_TEARING, &allowTearing, sizeof(allowTearing))) + && allowTearing +#if PLATFORM_WINDOWS + && !IsDebugToolAttached // Disable tearing with RenderDoc (prevents crashing) +#endif + ) + { + _allowTearing = true; + } + } + // Init device limits { auto& limits = Limits; diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp index 05da7c839..9f024136a 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp @@ -304,6 +304,26 @@ bool GPUDeviceDX12::Init() } UpdateOutputs(adapter); + // Create DirectX device + VALIDATE_DIRECTX_CALL(D3D12CreateDevice(adapter, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&_device))); + +#if PLATFORM_WINDOWS + // Detect RenderDoc usage (UUID {A7AA6116-9C8D-4BBA-9083-B4D816B71B78}) + IUnknown* unknown = nullptr; + const GUID uuidRenderDoc = { 0xa7aa6116, 0x9c8d, 0x4bba, { 0x90, 0x83, 0xb4, 0xd8, 0x16, 0xb7, 0x1b, 0x78 } }; + HRESULT hr = _device->QueryInterface(uuidRenderDoc, (void**)&unknown); + if (SUCCEEDED(hr) && unknown) + { + IsDebugToolAttached = true; + unknown->Release(); + } + if (!IsDebugToolAttached && GetModuleHandleA("renderdoc.dll") != nullptr) + { + IsDebugToolAttached = true; + } +#endif + + // Check if can use screen tearing on a swapchain ComPtr factory5; _factoryDXGI->QueryInterface(IID_PPV_ARGS(&factory5)); if (factory5) @@ -312,15 +332,12 @@ bool GPUDeviceDX12::Init() if (SUCCEEDED(factory5->CheckFeatureSupport(DXGI_FEATURE_PRESENT_ALLOW_TEARING, &allowTearing, sizeof(allowTearing))) && allowTearing #if PLATFORM_WINDOWS - && GetModuleHandleA("renderdoc.dll") == nullptr // Disable tearing with RenderDoc (prevents crashing) + && !IsDebugToolAttached // Disable tearing with RenderDoc (prevents crashing) #endif ) AllowTearing = true; } - // Create DirectX device - VALIDATE_DIRECTX_CALL(D3D12CreateDevice(adapter, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&_device))); - // Debug Layer #if GPU_ENABLE_DIAGNOSTICS ComPtr infoQueue; diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.Layers.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.Layers.cpp index c8ee2ccd9..3a1a34e1c 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.Layers.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.Layers.cpp @@ -200,6 +200,16 @@ static bool ListContains(const Array& list, const char* name) return false; } +static bool ListContains(const Array& list, const char* name) +{ + for (const StringAnsi& element : list) + { + if (element == name) + return true; + } + return false; +} + void GPUDeviceVulkan::GetInstanceLayersAndExtensions(Array& outInstanceExtensions, Array& outInstanceLayers, bool& outDebugUtils) { VkResult result; @@ -473,6 +483,10 @@ void GPUDeviceVulkan::GetDeviceExtensionsAndLayers(VkPhysicalDevice gpu, Array& outInstanceExtensions, Array& outInstanceLayers, bool& outDebugUtils); - static void GetDeviceExtensionsAndLayers(VkPhysicalDevice gpu, Array& outDeviceExtensions, Array& outDeviceLayers); - - void ParseOptionalDeviceExtensions(const Array& deviceExtensions); static OptionalVulkanDeviceExtensions OptionalDeviceExtensions; +private: + static void GetInstanceLayersAndExtensions(Array& outInstanceExtensions, Array& outInstanceLayers, bool& outDebugUtils); + void GetDeviceExtensionsAndLayers(VkPhysicalDevice gpu, Array& outDeviceExtensions, Array& outDeviceLayers); + static void ParseOptionalDeviceExtensions(const Array& deviceExtensions); + public: /// /// The Vulkan instance. From 3c5d2f8b4716f63af8eed681f5ffe3209306aaca Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 3 Jun 2024 22:29:00 +0200 Subject: [PATCH 145/292] Skip ddgi probes data preload to reduce memory wait time --- Source/Shaders/GI/DDGI.hlsl | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/Source/Shaders/GI/DDGI.hlsl b/Source/Shaders/GI/DDGI.hlsl index 10ea761e4..6694f1195 100644 --- a/Source/Shaders/GI/DDGI.hlsl +++ b/Source/Shaders/GI/DDGI.hlsl @@ -154,17 +154,6 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesData, T return data.FallbackIrradiance; uint3 baseProbeCoords = clamp(uint3((worldPosition - probesOrigin + probesExtent) / probesSpacing), uint3(0, 0, 0), data.ProbesCounts - uint3(1, 1, 1)); - // Load probes state for this cascade - float4 probesDatas[8]; - UNROLL - for (uint i = 0; i < 8; i++) - { - uint3 probeCoordsOffset = uint3(i, i >> 1, i >> 2) & 1; - uint3 probeCoords = clamp(baseProbeCoords + probeCoordsOffset, uint3(0, 0, 0), data.ProbesCounts - uint3(1, 1, 1)); - uint probeIndex = GetDDGIScrollingProbeIndex(data, cascadeIndex, probeCoords); - probesDatas[i] = LoadDDGIProbeData(data, probesData, cascadeIndex, probeIndex); - } - // Bias the world-space position to reduce artifacts float3 viewDir = normalize(data.ViewPos - worldPosition); float3 surfaceBias = (worldNormal * 0.2f + viewDir * 0.8f) * (0.75f * probesSpacing * bias); @@ -183,7 +172,7 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesData, T uint probeIndex = GetDDGIScrollingProbeIndex(data, cascadeIndex, probeCoords); // Load probe position and state - float4 probeData = probesDatas[i]; + float4 probeData = LoadDDGIProbeData(data, probesData, cascadeIndex, probeIndex); uint probeState = DecodeDDGIProbeState(probeData); if (probeState == DDGI_PROBE_STATE_INACTIVE) continue; From abdbd1ee64d67721dad2644d175f876f31d252a4 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 4 Jun 2024 10:47:37 +0200 Subject: [PATCH 146/292] Add improved Global SDF tracing when going over different cascades --- Source/Shaders/GlobalSignDistanceField.hlsl | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/Source/Shaders/GlobalSignDistanceField.hlsl b/Source/Shaders/GlobalSignDistanceField.hlsl index 93bdee22b..eee202784 100644 --- a/Source/Shaders/GlobalSignDistanceField.hlsl +++ b/Source/Shaders/GlobalSignDistanceField.hlsl @@ -223,26 +223,25 @@ GlobalSDFHit RayTraceGlobalSDF(const GlobalSDFData data, Texture3D tex, T float4 cascadePosDistance = data.CascadePosDistance[cascade]; float voxelSize = data.CascadeVoxelSize[cascade]; float voxelExtent = voxelSize * 0.5f; - float3 worldPosition = trace.WorldPosition + trace.WorldDirection * (voxelSize * cascadeTraceStartBias); + float3 worldPosition = trace.WorldPosition + trace.WorldDirection * max(voxelSize * cascadeTraceStartBias, trace.MinDistance); // Hit the cascade bounds to find the intersection points float2 intersections = LineHitBox(worldPosition, traceEndPosition, cascadePosDistance.xyz - cascadePosDistance.www, cascadePosDistance.xyz + cascadePosDistance.www); intersections.xy *= traceMaxDistance; intersections.x = max(intersections.x, nextIntersectionStart); - float stepTime = intersections.x; if (intersections.x >= intersections.y) { // Skip the current cascade if the ray starts outside it - stepTime = intersections.y; + continue; } - else - { - // Skip the current cascade tracing on the next cascade + + // Skip the current cascade tracing on the next cascade (if we're tracing from inside SDF volume) + if (intersections.x <= 0.0f) nextIntersectionStart = intersections.y; - } // Walk over the cascade SDF uint step = 0; + float stepTime = intersections.x; LOOP for (; step < 250 && stepTime < intersections.y; step++) { From 0620310f4f6930d7bf2b8e1b3a94809725837af5 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 4 Jun 2024 10:53:41 +0200 Subject: [PATCH 147/292] Add improved Global Surface Atlas lighting at far cascades and cascade borders --- .../Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp | 4 +--- Source/Shaders/GI/DDGI.shader | 4 ++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp index e4e7c0dd2..27bc6a33b 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp @@ -46,8 +46,7 @@ GPU_CB_STRUCT(Data0 { GlobalSignDistanceFieldPass::ConstantsData GlobalSDF; GlobalSurfaceAtlasPass::ConstantsData GlobalSurfaceAtlas; ShaderGBufferData GBuffer; - float Padding0; - float ProbesDistanceLimit; + Float2 Padding0; float ResetBlend; float TemporalTime; Int4 ProbeScrollClears[4]; @@ -471,7 +470,6 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont data.DDGI = ddgiData.Result.Constants; data.GlobalSDF = bindingDataSDF.Constants; data.GlobalSurfaceAtlas = bindingDataSurfaceAtlas.Constants; - data.ProbesDistanceLimit = 1.05f; // TODO: expose to be configurable? data.ResetBlend = clear ? 1.0f : 0.0f; for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++) { diff --git a/Source/Shaders/GI/DDGI.shader b/Source/Shaders/GI/DDGI.shader index 24a483ba8..569ede92e 100644 --- a/Source/Shaders/GI/DDGI.shader +++ b/Source/Shaders/GI/DDGI.shader @@ -28,8 +28,7 @@ DDGIData DDGI; GlobalSDFData GlobalSDF; GlobalSurfaceAtlasData GlobalSurfaceAtlas; GBufferData GBuffer; -float Padding0; -float ProbesDistanceLimit; +float2 Padding0; float ResetBlend; float TemporalTime; int4 ProbeScrollClears[4]; @@ -101,6 +100,7 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) float prevProbesSpacing = DDGI.ProbesOriginAndSpacing[prevCascade].w; float3 prevProbesOrigin = DDGI.ProbesScrollOffsets[prevCascade].xyz * prevProbesSpacing + DDGI.ProbesOriginAndSpacing[prevCascade].xyz; float3 prevProbesExtent = (DDGI.ProbesCounts - 1) * (prevProbesSpacing * 0.5f); + prevProbesExtent -= probesSpacing; // Apply safe margin to allow probes on cascade edges float prevCascadeWeight = Min3(prevProbesExtent - abs(probeBasePosition - prevProbesOrigin)); if (prevCascadeWeight > 0.1f) { From c26649a0a8ec05d0dbf47800a12fd1688cfc849d Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 5 Jun 2024 13:43:34 +0200 Subject: [PATCH 148/292] Improve abdbd1ee64d67721dad2644d175f876f31d252a4 to better handle cascade transitions --- Source/Shaders/GlobalSignDistanceField.hlsl | 118 ++++++++++---------- 1 file changed, 60 insertions(+), 58 deletions(-) diff --git a/Source/Shaders/GlobalSignDistanceField.hlsl b/Source/Shaders/GlobalSignDistanceField.hlsl index eee202784..9225994e4 100644 --- a/Source/Shaders/GlobalSignDistanceField.hlsl +++ b/Source/Shaders/GlobalSignDistanceField.hlsl @@ -215,85 +215,87 @@ GlobalSDFHit RayTraceGlobalSDF(const GlobalSDFData data, Texture3D tex, T hit.HitTime = -1.0f; float chunkSizeDistance = (float)GLOBAL_SDF_RASTERIZE_CHUNK_SIZE / data.Resolution; // Size of the chunk in SDF distance (0-1) float chunkMarginDistance = (float)GLOBAL_SDF_RASTERIZE_CHUNK_MARGIN / data.Resolution; // Size of the chunk margin in SDF distance (0-1) - float nextIntersectionStart = 0.0f; + float nextIntersectionStart = trace.MinDistance; float traceMaxDistance = min(trace.MaxDistance, data.CascadePosDistance[3].w * 2); float3 traceEndPosition = trace.WorldPosition + trace.WorldDirection * traceMaxDistance; + LOOP for (uint cascade = 0; cascade < data.CascadesCount && hit.HitTime < 0.0f; cascade++) { float4 cascadePosDistance = data.CascadePosDistance[cascade]; float voxelSize = data.CascadeVoxelSize[cascade]; float voxelExtent = voxelSize * 0.5f; - float3 worldPosition = trace.WorldPosition + trace.WorldDirection * max(voxelSize * cascadeTraceStartBias, trace.MinDistance); + float3 worldPosition = trace.WorldPosition; + + // Skip until cascade that contains the start location + if (any(abs(worldPosition - cascadePosDistance.xyz) > cascadePosDistance.w)) + continue; // Hit the cascade bounds to find the intersection points + float traceStartBias = voxelSize * cascadeTraceStartBias; float2 intersections = LineHitBox(worldPosition, traceEndPosition, cascadePosDistance.xyz - cascadePosDistance.www, cascadePosDistance.xyz + cascadePosDistance.www); intersections.xy *= traceMaxDistance; + intersections.x = max(intersections.x, traceStartBias); intersections.x = max(intersections.x, nextIntersectionStart); - if (intersections.x >= intersections.y) + if (intersections.x < intersections.y) { - // Skip the current cascade if the ray starts outside it - continue; - } + // Skip the current cascade tracing on the next cascade + nextIntersectionStart = max(nextIntersectionStart, intersections.y - voxelSize); - // Skip the current cascade tracing on the next cascade (if we're tracing from inside SDF volume) - if (intersections.x <= 0.0f) - nextIntersectionStart = intersections.y; - - // Walk over the cascade SDF - uint step = 0; - float stepTime = intersections.x; - LOOP - for (; step < 250 && stepTime < intersections.y; step++) - { - float3 stepPosition = worldPosition + trace.WorldDirection * stepTime; - - // Sample SDF - float cascadeMaxDistance; - float3 cascadeUV, textureUV; - GetGlobalSDFCascadeUV(data, cascade, stepPosition, cascadeMaxDistance, cascadeUV, textureUV); - float stepDistance = mip.SampleLevel(SamplerLinearClamp, textureUV, 0); - if (stepDistance < chunkSizeDistance) + // Walk over the cascade SDF + uint step = 0; + float stepTime = intersections.x; + LOOP + for (; step < 250 && stepTime < intersections.y && hit.HitTime < 0.0f; step++) { - float stepDistanceTex = tex.SampleLevel(SamplerLinearClamp, textureUV, 0); - if (stepDistanceTex < chunkMarginDistance * 2) + float3 stepPosition = worldPosition + trace.WorldDirection * stepTime; + + // Sample SDF + float cascadeMaxDistance; + float3 cascadeUV, textureUV; + GetGlobalSDFCascadeUV(data, cascade, stepPosition, cascadeMaxDistance, cascadeUV, textureUV); + float stepDistance = mip.SampleLevel(SamplerLinearClamp, textureUV, 0); + if (stepDistance < chunkSizeDistance) { - stepDistance = stepDistanceTex; + float stepDistanceTex = tex.SampleLevel(SamplerLinearClamp, textureUV, 0); + if (stepDistanceTex < chunkMarginDistance * 2) + { + stepDistance = stepDistanceTex; + } } - } - else - { - // Assume no SDF nearby so perform a jump - stepDistance = chunkSizeDistance; - } - stepDistance *= cascadeMaxDistance; - - // Detect surface hit - float minSurfaceThickness = voxelExtent * saturate(stepTime / voxelSize); - if (stepDistance < minSurfaceThickness) - { - // Surface hit - hit.HitTime = max(stepTime + stepDistance - minSurfaceThickness, 0.0f); - hit.HitCascade = cascade; - hit.HitSDF = stepDistance; - if (trace.NeedsHitNormal) + else { - // Calculate hit normal from SDF gradient - float texelOffset = 1.0f / data.Resolution; - float xp = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x + texelOffset, textureUV.y, textureUV.z), 0).x; - float xn = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x - texelOffset, textureUV.y, textureUV.z), 0).x; - float yp = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y + texelOffset, textureUV.z), 0).x; - float yn = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y - texelOffset, textureUV.z), 0).x; - float zp = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y, textureUV.z + texelOffset), 0).x; - float zn = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y, textureUV.z - texelOffset), 0).x; - hit.HitNormal = normalize(float3(xp - xn, yp - yn, zp - zn)); + // Assume no SDF nearby so perform a jump + stepDistance = chunkSizeDistance; } - break; - } + stepDistance *= cascadeMaxDistance; - // Move forward - stepTime += max(stepDistance * trace.StepScale, voxelSize); + // Detect surface hit + float minSurfaceThickness = voxelExtent * saturate(stepTime / voxelSize); + if (stepDistance < minSurfaceThickness) + { + // Surface hit + hit.HitTime = max(stepTime + stepDistance - minSurfaceThickness, 0.0f); + hit.HitCascade = cascade; + hit.HitSDF = stepDistance; + if (trace.NeedsHitNormal) + { + // Calculate hit normal from SDF gradient + float texelOffset = 1.0f / data.Resolution; + float xp = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x + texelOffset, textureUV.y, textureUV.z), 0).x; + float xn = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x - texelOffset, textureUV.y, textureUV.z), 0).x; + float yp = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y + texelOffset, textureUV.z), 0).x; + float yn = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y - texelOffset, textureUV.z), 0).x; + float zp = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y, textureUV.z + texelOffset), 0).x; + float zn = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y, textureUV.z - texelOffset), 0).x; + hit.HitNormal = normalize(float3(xp - xn, yp - yn, zp - zn)); + } + } + + // Move forward + stepTime += max(stepDistance * trace.StepScale, voxelSize); + } + hit.StepsCount += step; } - hit.StepsCount += step; } return hit; } From 96303e57ecdbfa414f2525c0932ae32adcd197f5 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 5 Jun 2024 13:43:50 +0200 Subject: [PATCH 149/292] Fix terrain SDF thickness to be fixed at 300 units --- Source/Shaders/GlobalSignDistanceField.shader | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Shaders/GlobalSignDistanceField.shader b/Source/Shaders/GlobalSignDistanceField.shader index 0709977db..5c8e4ee4f 100644 --- a/Source/Shaders/GlobalSignDistanceField.shader +++ b/Source/Shaders/GlobalSignDistanceField.shader @@ -155,7 +155,7 @@ void CS_RasterizeHeightfield(uint3 DispatchThreadId : SV_DispatchThreadID) float3 voxelWorldPos = voxelCoord * CascadeCoordToPosMul + CascadeCoordToPosAdd; voxelCoord.x += CascadeIndex * CascadeResolution; float minDistance = MaxDistance * GlobalSDFTex[voxelCoord]; - float thickness = CascadeVoxelSize * -8; + float thickness = -300.0f; for (uint i = 0; i < ObjectsCount; i++) { ObjectRasterizeData objectData = ObjectsBuffer[Objects[i / 4][i % 4]]; From a232dac19634562e49345050c415ffd512aea6d7 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 6 Jun 2024 12:10:44 +0200 Subject: [PATCH 150/292] Fix GlobalSDF sampling in materials to properly read far cascades --- Source/Engine/Graphics/Materials/MaterialParams.cpp | 1 + Source/Engine/Graphics/Materials/MaterialShader.h | 2 +- .../Engine/Renderer/GlobalSignDistanceFieldPass.cpp | 1 + .../MaterialGenerator.Textures.cpp | 4 ++-- Source/Engine/Visject/ShaderGraph.cpp | 13 ------------- Source/Engine/Visject/ShaderGraphUtilities.cpp | 5 +++-- Source/Shaders/GlobalSignDistanceField.hlsl | 11 ++++++----- 7 files changed, 14 insertions(+), 23 deletions(-) diff --git a/Source/Engine/Graphics/Materials/MaterialParams.cpp b/Source/Engine/Graphics/Materials/MaterialParams.cpp index f54d90601..ee5b690f0 100644 --- a/Source/Engine/Graphics/Materials/MaterialParams.cpp +++ b/Source/Engine/Graphics/Materials/MaterialParams.cpp @@ -463,6 +463,7 @@ void MaterialParameter::Bind(BindMeta& meta) const if (GlobalSignDistanceFieldPass::Instance()->Get(meta.Buffers, bindingData)) Platform::MemoryClear(&bindingData, sizeof(bindingData)); meta.Context->BindSR(_registerIndex, bindingData.Texture ? bindingData.Texture->ViewVolume() : nullptr); + meta.Context->BindSR(_registerIndex + 1, bindingData.TextureMip ? bindingData.TextureMip->ViewVolume() : nullptr); *((GlobalSignDistanceFieldPass::ConstantsData*)(meta.Constants.Get() + _offset)) = bindingData.Constants; break; } diff --git a/Source/Engine/Graphics/Materials/MaterialShader.h b/Source/Engine/Graphics/Materials/MaterialShader.h index 2d3c2836b..223289a46 100644 --- a/Source/Engine/Graphics/Materials/MaterialShader.h +++ b/Source/Engine/Graphics/Materials/MaterialShader.h @@ -10,7 +10,7 @@ /// /// Current materials shader version. /// -#define MATERIAL_GRAPH_VERSION 165 +#define MATERIAL_GRAPH_VERSION 166 class Material; class GPUShader; diff --git a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp index d0bf0c35c..4b5ee7816 100644 --- a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp +++ b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp @@ -917,6 +917,7 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex } result.Constants.Resolution = (float)resolution; result.Constants.CascadesCount = cascadesCount; + result.Constants.Padding = Float2::Zero; sdfData.Result = result; return false; } diff --git a/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Textures.cpp b/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Textures.cpp index 854f25398..d7b3bbb23 100644 --- a/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Textures.cpp +++ b/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Textures.cpp @@ -665,7 +665,7 @@ void MaterialGenerator::ProcessGroupTextures(Box* box, Node* node, Value& value) { auto param = findOrAddGlobalSDF(); Value worldPosition = tryGetValue(node->GetBox(1), Value(VariantType::Float3, TEXT("input.WorldPosition.xyz"))).Cast(VariantType::Float3); - value = writeLocal(VariantType::Float, String::Format(TEXT("SampleGlobalSDF({0}, {0}_Tex, {1})"), param.ShaderName, worldPosition.Value), node); + value = writeLocal(VariantType::Float, String::Format(TEXT("SampleGlobalSDF({0}, {0}_Tex, {0}_Mip, {1})"), param.ShaderName, worldPosition.Value), node); _includes.Add(TEXT("./Flax/GlobalSignDistanceField.hlsl")); break; } @@ -677,7 +677,7 @@ void MaterialGenerator::ProcessGroupTextures(Box* box, Node* node, Value& value) auto param = findOrAddGlobalSDF(); Value worldPosition = tryGetValue(node->GetBox(1), Value(VariantType::Float3, TEXT("input.WorldPosition.xyz"))).Cast(VariantType::Float3); auto distance = writeLocal(VariantType::Float, node); - auto gradient = writeLocal(VariantType::Float3, String::Format(TEXT("SampleGlobalSDFGradient({0}, {0}_Tex, {1}, {2})"), param.ShaderName, worldPosition.Value, distance.Value), node); + auto gradient = writeLocal(VariantType::Float3, String::Format(TEXT("SampleGlobalSDFGradient({0}, {0}_Tex, {0}_Mip, {1}, {2})"), param.ShaderName, worldPosition.Value, distance.Value), node); _includes.Add(TEXT("./Flax/GlobalSignDistanceField.hlsl")); gradientBox->Cache = gradient; distanceBox->Cache = distance; diff --git a/Source/Engine/Visject/ShaderGraph.cpp b/Source/Engine/Visject/ShaderGraph.cpp index 235094fd4..886452af6 100644 --- a/Source/Engine/Visject/ShaderGraph.cpp +++ b/Source/Engine/Visject/ShaderGraph.cpp @@ -1206,11 +1206,8 @@ SerializedMaterialParam* ShaderGenerator::findParam(const String& shaderName) { SerializedMaterialParam& param = _parameters[i]; if (param.ShaderName == shaderName) - { return ¶m; - } } - return nullptr; } @@ -1235,9 +1232,7 @@ SerializedMaterialParam ShaderGenerator::findOrAddTexture(const Guid& id) { SerializedMaterialParam& param = _parameters[i]; if (!param.IsPublic && param.Type == MaterialParameterType::Texture && param.AsGuid == id) - { return param; - } } // Create @@ -1259,9 +1254,7 @@ SerializedMaterialParam ShaderGenerator::findOrAddNormalMap(const Guid& id) { SerializedMaterialParam& param = _parameters[i]; if (!param.IsPublic && param.Type == MaterialParameterType::NormalMap && param.AsGuid == id) - { return param; - } } // Create @@ -1283,9 +1276,7 @@ SerializedMaterialParam ShaderGenerator::findOrAddCubeTexture(const Guid& id) { SerializedMaterialParam& param = _parameters[i]; if (!param.IsPublic && param.Type == MaterialParameterType::CubeTexture && param.AsGuid == id) - { return param; - } } // Create @@ -1309,9 +1300,7 @@ SerializedMaterialParam ShaderGenerator::findOrAddSceneTexture(MaterialSceneText { SerializedMaterialParam& param = _parameters[i]; if (!param.IsPublic && param.Type == MaterialParameterType::SceneTexture && param.AsInteger == asInt) - { return param; - } } // Create @@ -1333,9 +1322,7 @@ SerializedMaterialParam& ShaderGenerator::findOrAddTextureGroupSampler(int32 ind { SerializedMaterialParam& param = _parameters[i]; if (!param.IsPublic && param.Type == MaterialParameterType::TextureGroupSampler && param.AsInteger == index) - { return param; - } } // Create diff --git a/Source/Engine/Visject/ShaderGraphUtilities.cpp b/Source/Engine/Visject/ShaderGraphUtilities.cpp index 4bedd11ab..89e25d9d6 100644 --- a/Source/Engine/Visject/ShaderGraphUtilities.cpp +++ b/Source/Engine/Visject/ShaderGraphUtilities.cpp @@ -170,8 +170,9 @@ const Char* ShaderGraphUtilities::GenerateShaderResources(TextWriterUnicode& wri format = TEXT("Texture3D {0} : register(t{1});"); break; case MaterialParameterType::GlobalSDF: - format = TEXT("Texture3D {0}_Tex : register(t{1});"); + format = TEXT("Texture3D {0}_Tex : register(t{1});\nTexture3D {0}_Mip : register(t{2});"); zeroOffset = false; + registers = 2; break; } if (format) @@ -179,7 +180,7 @@ const Char* ShaderGraphUtilities::GenerateShaderResources(TextWriterUnicode& wri if (zeroOffset) param.Offset = 0; param.RegisterIndex = (byte)startRegister; - writer.WriteLine(format, param.ShaderName, startRegister); + writer.WriteLine(format, param.ShaderName, startRegister, startRegister + 1); startRegister += registers; if (param.RegisterIndex >= GPU_MAX_SR_BINDED) { diff --git a/Source/Shaders/GlobalSignDistanceField.hlsl b/Source/Shaders/GlobalSignDistanceField.hlsl index 9225994e4..9d00887cf 100644 --- a/Source/Shaders/GlobalSignDistanceField.hlsl +++ b/Source/Shaders/GlobalSignDistanceField.hlsl @@ -7,6 +7,7 @@ #define GLOBAL_SDF_RASTERIZE_CHUNK_MARGIN 4 #define GLOBAL_SDF_MIP_FLOODS 5 #define GLOBAL_SDF_WORLD_SIZE 60000.0f +#define GLOBAL_SDF_MIN_VALID 0.9f // Global SDF data for a constant buffer struct GlobalSDFData @@ -90,7 +91,7 @@ float SampleGlobalSDFCascade(const GlobalSDFData data, Texture3D tex, flo float3 cascadeUV, textureUV; GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeMaxDistance, cascadeUV, textureUV); float cascadeDistance = tex.SampleLevel(SamplerLinearClamp, textureUV, 0); - if (cascadeDistance < 1.0f && !any(cascadeUV < 0) && !any(cascadeUV > 1)) + if (cascadeDistance < GLOBAL_SDF_MIN_VALID && all(cascadeUV > 0) && all(cascadeUV < 1)) distance = cascadeDistance * cascadeMaxDistance; return distance; } @@ -107,7 +108,7 @@ float SampleGlobalSDF(const GlobalSDFData data, Texture3D tex, float3 wor float3 cascadeUV, textureUV; GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeMaxDistance, cascadeUV, textureUV); float cascadeDistance = tex.SampleLevel(SamplerLinearClamp, textureUV, 0); - if (cascadeDistance < 0.9f && !any(cascadeUV < 0) && !any(cascadeUV > 1)) + if (cascadeDistance < GLOBAL_SDF_MIN_VALID && all(cascadeUV > 0) && all(cascadeUV < 1)) { distance = cascadeDistance * cascadeMaxDistance; break; @@ -130,7 +131,7 @@ float SampleGlobalSDF(const GlobalSDFData data, Texture3D tex, Texture3D< float3 cascadeUV, textureUV; GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeMaxDistance, cascadeUV, textureUV); float cascadeDistance = mip.SampleLevel(SamplerLinearClamp, textureUV, 0); - if (cascadeDistance < chunkSizeDistance && !any(cascadeUV < 0) && !any(cascadeUV > 1)) + if (cascadeDistance < chunkSizeDistance && all(cascadeUV > 0) && all(cascadeUV < 1)) { float cascadeDistanceTex = tex.SampleLevel(SamplerLinearClamp, textureUV, 0); if (cascadeDistanceTex < chunkMarginDistance * 2) @@ -155,7 +156,7 @@ float3 SampleGlobalSDFGradient(const GlobalSDFData data, Texture3D tex, f float3 cascadeUV, textureUV; GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeMaxDistance, cascadeUV, textureUV); float cascadeDistance = tex.SampleLevel(SamplerLinearClamp, textureUV, 0); - if (cascadeDistance < 0.9f && !any(cascadeUV < 0) && !any(cascadeUV > 1)) + if (cascadeDistance < GLOBAL_SDF_MIN_VALID && all(cascadeUV > 0) && all(cascadeUV < 1)) { float texelOffset = 1.0f / data.Resolution; float xp = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x + texelOffset, textureUV.y, textureUV.z), 0).x; @@ -187,7 +188,7 @@ float3 SampleGlobalSDFGradient(const GlobalSDFData data, Texture3D tex, T float3 cascadeUV, textureUV; GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeMaxDistance, cascadeUV, textureUV); float cascadeDistance = mip.SampleLevel(SamplerLinearClamp, textureUV, 0); - if (cascadeDistance < chunkSizeDistance && !any(cascadeUV < 0) && !any(cascadeUV > 1)) + if (cascadeDistance < chunkSizeDistance && all(cascadeUV > 0) && all(cascadeUV < 1)) { float cascadeDistanceTex = tex.SampleLevel(SamplerLinearClamp, textureUV, 0); if (cascadeDistanceTex < chunkMarginDistance * 2) From bc01e6369e1594f1bb820befc2edfa5728290fce Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 6 Jun 2024 13:46:29 +0200 Subject: [PATCH 151/292] Fix GlobalSDF mip sampling artifacts on edges of distance limit --- Source/Shaders/GlobalSignDistanceField.hlsl | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/Source/Shaders/GlobalSignDistanceField.hlsl b/Source/Shaders/GlobalSignDistanceField.hlsl index 9d00887cf..4ecc7b81c 100644 --- a/Source/Shaders/GlobalSignDistanceField.hlsl +++ b/Source/Shaders/GlobalSignDistanceField.hlsl @@ -8,6 +8,7 @@ #define GLOBAL_SDF_MIP_FLOODS 5 #define GLOBAL_SDF_WORLD_SIZE 60000.0f #define GLOBAL_SDF_MIN_VALID 0.9f +#define GLOBAL_SDF_CHUNK_MARGIN_SCALE 4.0f // Global SDF data for a constant buffer struct GlobalSDFData @@ -124,7 +125,7 @@ float SampleGlobalSDF(const GlobalSDFData data, Texture3D tex, Texture3D< if (distance <= 0.0f) return GLOBAL_SDF_WORLD_SIZE; float chunkSizeDistance = (float)GLOBAL_SDF_RASTERIZE_CHUNK_SIZE / data.Resolution; // Size of the chunk in SDF distance (0-1) - float chunkMarginDistance = (float)GLOBAL_SDF_RASTERIZE_CHUNK_MARGIN / data.Resolution; // Size of the chunk margin in SDF distance (0-1) + float chunkMarginDistance = GLOBAL_SDF_CHUNK_MARGIN_SCALE * (float)GLOBAL_SDF_RASTERIZE_CHUNK_MARGIN / data.Resolution; // Size of the chunk margin in SDF distance (0-1) for (uint cascade = 0; cascade < data.CascadesCount; cascade++) { float cascadeMaxDistance; @@ -134,7 +135,7 @@ float SampleGlobalSDF(const GlobalSDFData data, Texture3D tex, Texture3D< if (cascadeDistance < chunkSizeDistance && all(cascadeUV > 0) && all(cascadeUV < 1)) { float cascadeDistanceTex = tex.SampleLevel(SamplerLinearClamp, textureUV, 0); - if (cascadeDistanceTex < chunkMarginDistance * 2) + if (cascadeDistanceTex < chunkMarginDistance) cascadeDistance = cascadeDistanceTex; distance = cascadeDistance * cascadeMaxDistance; break; @@ -181,7 +182,7 @@ float3 SampleGlobalSDFGradient(const GlobalSDFData data, Texture3D tex, T if (data.CascadePosDistance[3].w <= 0.0f) return gradient; float chunkSizeDistance = (float)GLOBAL_SDF_RASTERIZE_CHUNK_SIZE / data.Resolution; // Size of the chunk in SDF distance (0-1) - float chunkMarginDistance = (float)GLOBAL_SDF_RASTERIZE_CHUNK_MARGIN / data.Resolution; // Size of the chunk margin in SDF distance (0-1) + float chunkMarginDistance = GLOBAL_SDF_CHUNK_MARGIN_SCALE * (float)GLOBAL_SDF_RASTERIZE_CHUNK_MARGIN / data.Resolution; // Size of the chunk margin in SDF distance (0-1) for (uint cascade = 0; cascade < data.CascadesCount; cascade++) { float cascadeMaxDistance; @@ -191,7 +192,7 @@ float3 SampleGlobalSDFGradient(const GlobalSDFData data, Texture3D tex, T if (cascadeDistance < chunkSizeDistance && all(cascadeUV > 0) && all(cascadeUV < 1)) { float cascadeDistanceTex = tex.SampleLevel(SamplerLinearClamp, textureUV, 0); - if (cascadeDistanceTex < chunkMarginDistance * 2) + if (cascadeDistanceTex < chunkMarginDistance) cascadeDistance = cascadeDistanceTex; float texelOffset = 1.0f / data.Resolution; float xp = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x + texelOffset, textureUV.y, textureUV.z), 0).x; @@ -215,7 +216,7 @@ GlobalSDFHit RayTraceGlobalSDF(const GlobalSDFData data, Texture3D tex, T GlobalSDFHit hit = (GlobalSDFHit)0; hit.HitTime = -1.0f; float chunkSizeDistance = (float)GLOBAL_SDF_RASTERIZE_CHUNK_SIZE / data.Resolution; // Size of the chunk in SDF distance (0-1) - float chunkMarginDistance = (float)GLOBAL_SDF_RASTERIZE_CHUNK_MARGIN / data.Resolution; // Size of the chunk margin in SDF distance (0-1) + float chunkMarginDistance = GLOBAL_SDF_CHUNK_MARGIN_SCALE * (float)GLOBAL_SDF_RASTERIZE_CHUNK_MARGIN / data.Resolution; // Size of the chunk margin in SDF distance (0-1) float nextIntersectionStart = trace.MinDistance; float traceMaxDistance = min(trace.MaxDistance, data.CascadePosDistance[3].w * 2); float3 traceEndPosition = trace.WorldPosition + trace.WorldDirection * traceMaxDistance; @@ -258,10 +259,8 @@ GlobalSDFHit RayTraceGlobalSDF(const GlobalSDFData data, Texture3D tex, T if (stepDistance < chunkSizeDistance) { float stepDistanceTex = tex.SampleLevel(SamplerLinearClamp, textureUV, 0); - if (stepDistanceTex < chunkMarginDistance * 2) - { + if (stepDistanceTex < chunkMarginDistance) stepDistance = stepDistanceTex; - } } else { From 66f93744779120e8957f7d0d4e579a4d5511ed97 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 8 Jun 2024 13:14:20 +0200 Subject: [PATCH 152/292] Add improved terrain rasterization into Global SDF #754 --- Source/Shaders/GlobalSignDistanceField.shader | 52 +++++++++++++------ Source/Shaders/TerrainCommon.hlsl | 52 +++++++++++++++++++ 2 files changed, 87 insertions(+), 17 deletions(-) create mode 100644 Source/Shaders/TerrainCommon.hlsl diff --git a/Source/Shaders/GlobalSignDistanceField.shader b/Source/Shaders/GlobalSignDistanceField.shader index 5c8e4ee4f..c8788e3be 100644 --- a/Source/Shaders/GlobalSignDistanceField.shader +++ b/Source/Shaders/GlobalSignDistanceField.shader @@ -3,6 +3,7 @@ #include "./Flax/Common.hlsl" #include "./Flax/Math.hlsl" #include "./Flax/GlobalSignDistanceField.hlsl" +#include "./Flax/TerrainCommon.hlsl" #define GLOBAL_SDF_RASTERIZE_MODEL_MAX_COUNT 28 #define GLOBAL_SDF_RASTERIZE_HEIGHTFIELD_MAX_COUNT 2 @@ -151,6 +152,9 @@ META_CS(true, FEATURE_LEVEL_SM5) [numthreads(GLOBAL_SDF_RASTERIZE_GROUP_SIZE, GLOBAL_SDF_RASTERIZE_GROUP_SIZE, GLOBAL_SDF_RASTERIZE_GROUP_SIZE)] void CS_RasterizeHeightfield(uint3 DispatchThreadId : SV_DispatchThreadID) { +#if defined(PLATFORM_PS4) || defined(PLATFORM_PS5) + // TODO: fix shader compilation error +#else uint3 voxelCoord = ChunkCoord + DispatchThreadId; float3 voxelWorldPos = voxelCoord * CascadeCoordToPosMul + CascadeCoordToPosAdd; voxelCoord.x += CascadeIndex * CascadeResolution; @@ -161,33 +165,47 @@ void CS_RasterizeHeightfield(uint3 DispatchThreadId : SV_DispatchThreadID) ObjectRasterizeData objectData = ObjectsBuffer[Objects[i / 4][i % 4]]; // Convert voxel world-space position into heightfield local-space position and get heightfield UV - float3 volumePos = mul(float4(voxelWorldPos, 1), ToMatrix4x4(objectData.WorldToVolume)).xyz; + float4x4 worldToLocal = ToMatrix4x4(objectData.WorldToVolume); + float3 volumePos = mul(float4(voxelWorldPos, 1), worldToLocal).xyz; float3 volumeUV = volumePos * objectData.VolumeToUVWMul + objectData.VolumeToUVWAdd; float2 heightfieldUV = float2(volumeUV.x, volumeUV.z); - // Sample the heightfield -#if defined(PLATFORM_PS4) || defined(PLATFORM_PS5) - float4 heightmapValue = 0; // TODO: fix shader compilation error -#else - float4 heightmapValue = ObjectsTextures[i].SampleLevel(SamplerLinearClamp, heightfieldUV, objectData.MipOffset); -#endif - bool isHole = (heightmapValue.b + heightmapValue.a) >= 1.9f; - if (isHole || any(heightfieldUV < 0.0f) || any(heightfieldUV > 1.0f)) - continue; - float height = (float)((int)(heightmapValue.x * 255.0) + ((int)(heightmapValue.y * 255) << 8)) / 65535.0; - float2 positionXZ = volumePos.xz; - float3 position = float3(positionXZ.x, height, positionXZ.y); - float4x4 volumeToWorld = ToMatrix4x4(objectData.VolumeToWorld); - float3 heightfieldPosition = mul(float4(position, 1), volumeToWorld).xyz; - float3 heightfieldNormal = normalize(float3(volumeToWorld[0].y, volumeToWorld[1].y, volumeToWorld[2].y)); + // Sample heightfield around the voxel location (heightmap uses point sampler) + Texture2D heightmap = ObjectsTextures[i]; + float4 localToUV = float4(objectData.VolumeToUVWMul.xz, objectData.VolumeToUVWAdd.xz); + float3 n00, n10, n01, n11; + bool h00, h10, h01, h11; + float offset = CascadeVoxelSize * 2; + float3 p00 = SampleHeightmap(heightmap, volumePos + float3(-offset, 0, 0), localToUV, n00, h00, objectData.MipOffset); + float3 p10 = SampleHeightmap(heightmap, volumePos + float3(+offset, 0, 0), localToUV, n10, h10, objectData.MipOffset); + float3 p01 = SampleHeightmap(heightmap, volumePos + float3(0, 0, -offset), localToUV, n01, h01, objectData.MipOffset); + float3 p11 = SampleHeightmap(heightmap, volumePos + float3(0, 0, +offset), localToUV, n11, h11, objectData.MipOffset); + + // Calculate average sample (linear interpolation) + float3 heightfieldPosition = (p00 + p10 + p01 + p11) * 0.25f; + float3 heightfieldNormal = (n00 + n10 + n01 + n11) * 0.25f; + heightfieldNormal = normalize(heightfieldNormal); + bool isHole = h00 || h10 || h01 || h11; + + // Skip holes and pixels outside the heightfield + if (isHole) + continue; + + // Transform to world-space + float4x4 localToWorld = ToMatrix4x4(objectData.VolumeToWorld); + heightfieldPosition = mul(float4(heightfieldPosition, 1), localToWorld).xyz; + // TODO: rotate normal vector + //heightfieldNormal = normalize(float3(localToWorld[0].y, localToWorld[1].y, localToWorld[2].y)); + //heightfieldNormal = float3(0, 1, 0); // Calculate distance from voxel center to the heightfield float objectDistance = dot(heightfieldNormal, voxelWorldPos - heightfieldPosition); - if (objectDistance < thickness) + if (objectDistance < thickness * 0.5f) objectDistance = thickness - objectDistance; minDistance = CombineSDF(minDistance, objectDistance); } GlobalSDFTex[voxelCoord] = clamp(minDistance / MaxDistance, -1, 1); +#endif } #endif diff --git a/Source/Shaders/TerrainCommon.hlsl b/Source/Shaders/TerrainCommon.hlsl new file mode 100644 index 000000000..bbd4469f1 --- /dev/null +++ b/Source/Shaders/TerrainCommon.hlsl @@ -0,0 +1,52 @@ +// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. + +#ifndef __TERRAIN_COMMON__ +#define __TERRAIN_COMMON__ + +#include "./Flax/Common.hlsl" + +float SampleHeightmap(Texture2D heightmap, float2 uv, float mipOffset = 0.0f) +{ + // Sample heightmap + float4 value = heightmap.SampleLevel(SamplerPointClamp, uv, mipOffset); + + // Decode heightmap + float height = (float)((int)(value.x * 255.0) + ((int)(value.y * 255) << 8)) / 65535.0; + return height; +} + +float SampleHeightmap(Texture2D heightmap, float2 uv, out float3 normal, out bool isHole, float mipOffset = 0.0f) +{ + // Sample heightmap + float4 value = heightmap.SampleLevel(SamplerPointClamp, uv, mipOffset); + + // Decode heightmap + float height = (float)((int)(value.x * 255.0) + ((int)(value.y * 255) << 8)) / 65535.0; + float2 normalTemp = float2(value.b, value.a) * 2.0f - 1.0f; + normal = float3(normalTemp.x, sqrt(1.0 - saturate(dot(normalTemp, normalTemp))), normalTemp.y); + isHole = (value.b + value.a) >= 1.9f; + normal = normalize(normal); + return height; +} + +float3 SampleHeightmap(Texture2D heightmap, float3 localPosition, float4 localToUV, out float3 normal, out bool isHole, float mipOffset = 0.0f) +{ + // Sample heightmap + float2 uv = localPosition.xz * localToUV.xy + localToUV.zw; + float4 value = heightmap.SampleLevel(SamplerPointClamp, uv, mipOffset); + + // Decode heightmap + isHole = (value.b + value.a) >= 1.9f; + float height = (float)((int)(value.x * 255.0) + ((int)(value.y * 255) << 8)) / 65535.0; + float3 position = float3(localPosition.x, height, localPosition.z); + float2 normalTemp = float2(value.b, value.a) * 2.0f - 1.0f; + normal = float3(normalTemp.x, sqrt(1.0 - saturate(dot(normalTemp, normalTemp))), normalTemp.y); + normal = normalize(normal); + + // UVs outside the heightmap are empty + isHole = isHole || any(uv < 0.0f) || any(uv > 1.0f); + + return position; +} + +#endif From 5c5fad6bb4f672886bcaeab441d2d19a33fa939a Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 13 Jun 2024 17:03:23 +0200 Subject: [PATCH 153/292] Refactor DDGI probe relocation algorithm to result in smoother and more precise placement #754 #1614 --- Source/Shaders/GI/DDGI.hlsl | 4 +- Source/Shaders/GI/DDGI.shader | 140 ++++++++++-------- Source/Shaders/GlobalSignDistanceField.shader | 1 + 3 files changed, 78 insertions(+), 67 deletions(-) diff --git a/Source/Shaders/GI/DDGI.hlsl b/Source/Shaders/GI/DDGI.hlsl index 6694f1195..19619113d 100644 --- a/Source/Shaders/GI/DDGI.hlsl +++ b/Source/Shaders/GI/DDGI.hlsl @@ -208,7 +208,7 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesData, T // Adjust weight curve to inject a small portion of light const float minWeightThreshold = 0.2f; if (weight < minWeightThreshold) - weight *= Square(weight) * (1.0f / (minWeightThreshold * minWeightThreshold)); + weight *= weight * weight * (1.0f / (minWeightThreshold * minWeightThreshold)); // Calculate trilinear weights based on the distance to each probe to smoothly transition between grid of 8 probes float3 trilinear = lerp(1.0f - biasAlpha, biasAlpha, (float3)probeCoordsOffset); @@ -244,7 +244,7 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesData, T if (irradiance.a > 0.0f) { // Normalize irradiance - irradiance.rgb *= 1.f / irradiance.a; + irradiance.rgb *= 1.0f / irradiance.a; #if DDGI_SRGB_BLENDING irradiance.rgb *= irradiance.rgb; #endif diff --git a/Source/Shaders/GI/DDGI.shader b/Source/Shaders/GI/DDGI.shader index 569ede92e..420333bd3 100644 --- a/Source/Shaders/GI/DDGI.shader +++ b/Source/Shaders/GI/DDGI.shader @@ -21,7 +21,8 @@ #define DDGI_TRACE_RAYS_LIMIT 256 // Limit of rays per-probe (runtime value can be smaller) #define DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE 8 #define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32 -#define DDGI_PROBE_RELOCATE_ITERATIVE 0 // If true, probes relocation algorithm tries to move them in additive way, otherwise all nearby locations are checked to find the best position +#define DDGI_PROBE_RELOCATE_ITERATIVE 1 // If true, probes relocation algorithm tries to move them in additive way, otherwise all nearby locations are checked to find the best position +#define DDGI_PROBE_RELOCATE_FIND_BEST 1 // If true, probes relocation algorithm tries to move to the best matching location within nearby area META_CB_BEGIN(0, Data0) DDGIData DDGI; @@ -113,12 +114,10 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) // Load probe state and position float4 probeData = RWProbesData[probeDataCoords]; uint probeState = DecodeDDGIProbeState(probeData); + uint probeStateOld = probeState; float3 probeOffset = probeData.xyz * probesSpacing; // Probe offset is [-1;1] within probes spacing float3 probeOffsetOld = probeOffset; - float3 probePosition = probeBasePosition; -#if DDGI_PROBE_RELOCATE_ITERATIVE - probePosition += probeOffset; -#endif + float3 probePosition = probeBasePosition + probeOffset; // Use Global SDF to quickly get distance and direction to the scene geometry #if DDGI_PROBE_RELOCATE_ITERATIVE @@ -128,10 +127,12 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) float sdf = SampleGlobalSDF(GlobalSDF, GlobalSDFTex, GlobalSDFMip, probePosition); #endif float sdfDst = abs(sdf); - float threshold = GlobalSDF.CascadeVoxelSize[CascadeIndex]; - float distanceLimit = length(probesSpacing) * ProbesDistanceLimit; - float relocateLimit = length(probesSpacing) * 0.6f; - if (sdfDst > distanceLimit) // Probe is too far from geometry + const float ProbesDistanceLimits[4] = { 1.1f, 2.3f, 2.5f, 2.5f }; + const float ProbesRelocateLimits[4] = { 0.4f, 0.5f, 0.6f, 0.7f }; + float voxelLimit = GlobalSDF.CascadeVoxelSize[CascadeIndex]; + float distanceLimit = length(probesSpacing) * ProbesDistanceLimits[CascadeIndex]; + float relocateLimit = length(probesSpacing) * ProbesRelocateLimits[CascadeIndex]; + if (sdfDst > distanceLimit + length(probeOffset)) // Probe is too far from geometry (or deep inside) { // Disable it probeOffset = float3(0, 0, 0); @@ -139,64 +140,72 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) } else { -#if DDGI_PROBE_RELOCATE_ITERATIVE - if (sdf < threshold) // Probe is inside geometry + // Relocate only if probe location is not good enough + probeState = DDGI_PROBE_STATE_ACTIVE; + if (sdf <= voxelLimit) { - if (sdfDst < relocateLimit) +#if DDGI_PROBE_RELOCATE_ITERATIVE { - float3 offsetToAdd = sdfNormal * (sdf + threshold); - if (distance(probeOffset, offsetToAdd) < relocateLimit) + // Use SDF gradient to relocate probe away the surface + float iterativeRelocateSpeed = probeStateOld != DDGI_PROBE_STATE_ACTIVE ? 1.0f : 0.3f; + float3 offsetToSet = probeOffset + sdfNormal * ((sdf + voxelLimit) * iterativeRelocateSpeed); + if (length(offsetToSet) < relocateLimit) { // Relocate it - probeOffset += offsetToAdd; + probeOffset = offsetToSet; } + else + { + // Reset offset + probeOffset = float3(0, 0, 0); + } + + // Read SDF at the new position for additional check + probePosition = probeBasePosition + probeOffset; + sdf = SampleGlobalSDF(GlobalSDF, GlobalSDFTex, GlobalSDFMip, probePosition); + sdfDst = abs(sdf); } - else - { - // Reset relocation - probeOffset = float3(0, 0, 0); - } - } - else if (sdf > threshold * 4.0f) // Probe is far enough from any geometry - { - // Reset relocation - probeOffset = float3(0, 0, 0); - } - - // Check if probe is relocated but the base location is fine - sdf = SampleGlobalSDF(GlobalSDF, GlobalSDFTex, probeBasePosition.xyz); - if (sdf > threshold) - { - // Reset relocation - probeOffset = float3(0, 0, 0); - } -#else - // Sample Global SDF around the probe location - uint sdfCascade = GetGlobalSDFCascade(GlobalSDF, probePosition); - float4 CachedProbeOffsets[64]; - // TODO: test performance diff when using shared memory and larger thread group (is it worth it?) - for (uint x = 0; x < 4; x++) - for (uint y = 0; y < 4; y++) - for (uint z = 0; z < 4; z++) - { - float3 offset = Remap(float3(x, y, z), 0, 3, -0.5f, 0.5f) * relocateLimit; - float offsetSdf = SampleGlobalSDFCascade(GlobalSDF, GlobalSDFTex, probeBasePosition + offset, sdfCascade); - CachedProbeOffsets[x * 16 + y * 4 + z] = float4(offset, offsetSdf); - } - - // Select the best probe location around the base position - float4 bestOffset = CachedProbeOffsets[0]; - for (uint i = 1; i < 64; i++) - { - if (CachedProbeOffsets[i].w > bestOffset.w) - bestOffset = CachedProbeOffsets[i]; - } - - // Relocate the probe to the best found location (or zero if nothing good found) - if (bestOffset.w <= threshold) - bestOffset.xyz = float3(0, 0, 0); - probeOffset = bestOffset.xyz; + if (sdf <= voxelLimit * 1.1f) // Add some safe-bias to reduce artifacts #endif + { +#if DDGI_PROBE_RELOCATE_FIND_BEST + // Sample Global SDF around the probe base location + uint sdfCascade = GetGlobalSDFCascade(GlobalSDF, probeBasePosition); + float4 CachedProbeOffsets[64]; + for (uint x = 0; x < 4; x++) + for (uint y = 0; y < 4; y++) + for (uint z = 0; z < 4; z++) + { + float3 offset = Remap(float3(x, y, z), 0, 3, -0.707f, 0.707f) * relocateLimit; + float offsetSdf = SampleGlobalSDFCascade(GlobalSDF, GlobalSDFTex, probeBasePosition + offset, sdfCascade); + CachedProbeOffsets[x * 16 + y * 4 + z] = float4(offset, offsetSdf); + } + + // Select the best probe location around the base position + float4 bestOffset = CachedProbeOffsets[0]; + for (uint i = 1; i < 64; i++) + { + if (CachedProbeOffsets[i].w > bestOffset.w) + bestOffset = CachedProbeOffsets[i]; + } + if (bestOffset.w <= voxelLimit) + { + // Disable probe that is too close to the geometry + probeOffset = float3(0, 0, 0); + probeState = DDGI_PROBE_STATE_INACTIVE; + } + else + { + // Relocate the probe to the best found location + probeOffset = bestOffset.xyz; + } +#elif DDGI_PROBE_RELOCATE_ITERATIVE + // Disable probe + probeOffset = float3(0, 0, 0); + probeState = DDGI_PROBE_STATE_INACTIVE; +#endif + } + } // Check if probe was scrolled int3 probeScrollClears = ProbeScrollClears[CascadeIndex].xyz; @@ -210,10 +219,11 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) wasScrolled = true; } - // If probe was in different location or was inactive last frame then mark it as activated - bool wasInactive = probeState == DDGI_PROBE_STATE_INACTIVE; + // If probe was in a different location or was activated now then mark it as activated + bool wasActivated = probeStateOld == DDGI_PROBE_STATE_INACTIVE; bool wasRelocated = distance(probeOffset, probeOffsetOld) > 2.0f; - probeState = wasInactive || wasScrolled || wasRelocated ? DDGI_PROBE_STATE_ACTIVATED : DDGI_PROBE_STATE_ACTIVE; + if ((wasActivated || wasScrolled || wasRelocated) && probeState == DDGI_PROBE_STATE_ACTIVE) + probeState = DDGI_PROBE_STATE_ACTIVATED; } // Save probe state @@ -302,12 +312,12 @@ void CS_TraceRays(uint3 DispatchThreadId : SV_DispatchThreadID) float4 radiance; if (hit.IsHit()) { - if (hit.HitSDF <= 0.0f && hit.HitTime <= GlobalSDF.CascadeVoxelSize[0]) + /*if (hit.HitSDF <= 0.0f && hit.HitTime <= GlobalSDF.CascadeVoxelSize[0]) { // Ray starts inside geometry (mark as negative distance and reduce it's influence during irradiance blending) radiance = float4(0, 0, 0, hit.HitTime * -0.25f); } - else + else*/ { // Sample Global Surface Atlas to get the lighting at the hit location float3 hitPosition = hit.GetHitPosition(trace); diff --git a/Source/Shaders/GlobalSignDistanceField.shader b/Source/Shaders/GlobalSignDistanceField.shader index c8788e3be..d92f98acf 100644 --- a/Source/Shaders/GlobalSignDistanceField.shader +++ b/Source/Shaders/GlobalSignDistanceField.shader @@ -200,6 +200,7 @@ void CS_RasterizeHeightfield(uint3 DispatchThreadId : SV_DispatchThreadID) // Calculate distance from voxel center to the heightfield float objectDistance = dot(heightfieldNormal, voxelWorldPos - heightfieldPosition); + //objectDistance += (1.0f - saturate(dot(heightfieldNormal, float3(0, 1, 0)))) * -50.0f; if (objectDistance < thickness * 0.5f) objectDistance = thickness - objectDistance; minDistance = CombineSDF(minDistance, objectDistance); From 2f688892ea7c7b368523b3288e5dd5a31d043140 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 13 Jun 2024 17:04:29 +0200 Subject: [PATCH 154/292] Fix DDGI probes scrolling to properly handle bigger scroll deltas --- .../GI/DynamicDiffuseGlobalIllumination.cpp | 32 +++++++++++-------- Source/Shaders/GI/DDGI.hlsl | 6 ++-- Source/Shaders/GI/DDGI.shader | 32 +++++++++++-------- 3 files changed, 42 insertions(+), 28 deletions(-) diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp index 27bc6a33b..238418a47 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp @@ -67,14 +67,12 @@ public: Float3 ProbesOrigin; float ProbesSpacing = 0.0f; Int3 ProbeScrollOffsets; - Int3 ProbeScrollDirections; Int3 ProbeScrollClears; void Clear() { ProbesOrigin = Float3::Zero; ProbeScrollOffsets = Int3::Zero; - ProbeScrollDirections = Int3::Zero; ProbeScrollClears = Int3::Zero; } } Cascades[4]; @@ -400,6 +398,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont const uint64 cascadeFrequencies[] = { 2, 3, 5, 7 }; //const uint64 cascadeFrequencies[] = { 1, 2, 3, 5 }; //const uint64 cascadeFrequencies[] = { 1, 1, 1, 1 }; + //const uint64 cascadeFrequencies[] = { 10, 10, 10, 10 }; bool cascadeSkipUpdate[4]; for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++) { @@ -413,16 +412,6 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont continue; auto& cascade = ddgiData.Cascades[cascadeIndex]; - // Reset the volume origin and scroll offsets for each axis once it overflows - for (int32 axis = 0; axis < 3; axis++) - { - if (cascade.ProbeScrollOffsets.Raw[axis] != 0 && (cascade.ProbeScrollOffsets.Raw[axis] % ddgiData.ProbeCounts.Raw[axis] == 0)) - { - cascade.ProbesOrigin.Raw[axis] += (float)ddgiData.ProbeCounts.Raw[axis] * cascade.ProbesSpacing * (float)cascade.ProbeScrollDirections.Raw[axis]; - cascade.ProbeScrollOffsets.Raw[axis] = 0; - } - } - // Calculate the count of grid cells between the view origin and the scroll anchor const Float3 volumeOrigin = cascade.ProbesOrigin + Float3(cascade.ProbeScrollOffsets) * cascade.ProbesSpacing; const Float3 translation = viewOrigins[cascadeIndex] - volumeOrigin; @@ -432,7 +421,24 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont const int32 scroll = value >= 0.0f ? (int32)Math::Floor(value) : (int32)Math::Ceil(value); cascade.ProbeScrollOffsets.Raw[axis] += scroll; cascade.ProbeScrollClears.Raw[axis] = scroll; - cascade.ProbeScrollDirections.Raw[axis] = translation.Raw[axis] >= 0.0f ? 1 : -1; + } + + // Shift the volume origin based on scroll offsets for each axis once it overflows + for (int32 axis = 0; axis < 3; axis++) + { + // different volume scroll that preserves the scroll offset delta relative to the probe count + const int32 probeCount = ddgiData.ProbeCounts.Raw[axis]; + int32& scrollOffset = cascade.ProbeScrollOffsets.Raw[axis]; + while (scrollOffset >= probeCount) + { + cascade.ProbesOrigin.Raw[axis] += cascade.ProbesSpacing * probeCount; + scrollOffset -= probeCount; + } + while (scrollOffset <= -probeCount) + { + cascade.ProbesOrigin.Raw[axis] -= cascade.ProbesSpacing * probeCount; + scrollOffset += probeCount; + } } } diff --git a/Source/Shaders/GI/DDGI.hlsl b/Source/Shaders/GI/DDGI.hlsl index 19619113d..16e79df68 100644 --- a/Source/Shaders/GI/DDGI.hlsl +++ b/Source/Shaders/GI/DDGI.hlsl @@ -77,7 +77,8 @@ uint2 GetDDGIProbeTexelCoords(DDGIData data, uint cascadeIndex, uint probeIndex) uint GetDDGIScrollingProbeIndex(DDGIData data, uint cascadeIndex, uint3 probeCoords) { // Probes are scrolled on edges to stabilize GI when camera moves - return GetDDGIProbeIndex(data, (probeCoords + data.ProbesCounts + data.ProbesScrollOffsets[cascadeIndex].xyz) % data.ProbesCounts); + int3 probeCoordsOffset = (int3)data.ProbesCounts + data.ProbesScrollOffsets[cascadeIndex].xyz; + return GetDDGIProbeIndex(data, (probeCoords + (uint3)probeCoordsOffset) % data.ProbesCounts); } float3 GetDDGIProbeWorldPosition(DDGIData data, uint cascadeIndex, uint3 probeCoords) @@ -86,7 +87,8 @@ float3 GetDDGIProbeWorldPosition(DDGIData data, uint cascadeIndex, uint3 probeCo float probesSpacing = data.ProbesOriginAndSpacing[cascadeIndex].w; float3 probePosition = probeCoords * probesSpacing; float3 probeGridOffset = (probesSpacing * (data.ProbesCounts - 1)) * 0.5f; - return probesOrigin + probePosition - probeGridOffset + (data.ProbesScrollOffsets[cascadeIndex].xyz * probesSpacing); + float3 probeScrollOffset = data.ProbesScrollOffsets[cascadeIndex].xyz * probesSpacing; + return probesOrigin + probePosition - probeGridOffset + probeScrollOffset; } // Loads probe probe data (encoded) diff --git a/Source/Shaders/GI/DDGI.shader b/Source/Shaders/GI/DDGI.shader index 420333bd3..e9ee51687 100644 --- a/Source/Shaders/GI/DDGI.shader +++ b/Source/Shaders/GI/DDGI.shader @@ -111,11 +111,28 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) } } + // Check if probe was scrolled + int3 probeScrollClears = ProbeScrollClears[CascadeIndex].xyz; + bool wasScrolled = false; + UNROLL + for (uint planeIndex = 0; planeIndex < 3; planeIndex++) + { + int probeCount = (int)DDGI.ProbesCounts[planeIndex]; + int newCoord = (int)probeCoords[planeIndex] + probeScrollClears[planeIndex]; + if (newCoord < 0 || newCoord >= probeCount) + wasScrolled = true; + newCoord = (int)probeCoords[planeIndex] - probeScrollClears[planeIndex]; + if (newCoord < 0 || newCoord >= probeCount) + wasScrolled = true; + } + // Load probe state and position float4 probeData = RWProbesData[probeDataCoords]; uint probeState = DecodeDDGIProbeState(probeData); uint probeStateOld = probeState; float3 probeOffset = probeData.xyz * probesSpacing; // Probe offset is [-1;1] within probes spacing + if (wasScrolled || probeState == DDGI_PROBE_STATE_INACTIVE) + probeOffset = float3(0, 0, 0); // Clear offset for a new probe float3 probeOffsetOld = probeOffset; float3 probePosition = probeBasePosition + probeOffset; @@ -207,18 +224,6 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) } } - // Check if probe was scrolled - int3 probeScrollClears = ProbeScrollClears[CascadeIndex].xyz; - bool wasScrolled = false; - UNROLL - for (uint planeIndex = 0; planeIndex < 3; planeIndex++) - { - int probeCount = (int)DDGI.ProbesCounts[planeIndex]; - int newCord = (int)probeCoords[planeIndex] + probeScrollClears[planeIndex]; - if (newCord < 0 || newCord >= probeCount) - wasScrolled = true; - } - // If probe was in a different location or was activated now then mark it as activated bool wasActivated = probeStateOld == DDGI_PROBE_STATE_INACTIVE; bool wasRelocated = distance(probeOffset, probeOffsetOld) > 2.0f; @@ -468,8 +473,9 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_ // Blend current value with the previous probe data float historyWeight = DDGI.ProbeHistoryWeight; + //historyWeight = 1.0f; //historyWeight = 0.0f; - if (ResetBlend || wasActivated || dot(previous, previous) == 0) + if (ResetBlend || wasActivated) historyWeight = 0.0f; #if DDGI_PROBE_UPDATE_MODE == 0 result *= DDGI.IndirectLightingIntensity; From eeede88fb824a83e5e08d68f0d8184390ab98b1d Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 13 Jun 2024 17:05:31 +0200 Subject: [PATCH 155/292] Add more precise Global SDF raycasting nearby geometry (at cost of performance) --- Source/Shaders/GlobalSignDistanceField.hlsl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Source/Shaders/GlobalSignDistanceField.hlsl b/Source/Shaders/GlobalSignDistanceField.hlsl index 4ecc7b81c..cafb02869 100644 --- a/Source/Shaders/GlobalSignDistanceField.hlsl +++ b/Source/Shaders/GlobalSignDistanceField.hlsl @@ -250,6 +250,7 @@ GlobalSDFHit RayTraceGlobalSDF(const GlobalSDFData data, Texture3D tex, T for (; step < 250 && stepTime < intersections.y && hit.HitTime < 0.0f; step++) { float3 stepPosition = worldPosition + trace.WorldDirection * stepTime; + float stepScale = trace.StepScale; // Sample SDF float cascadeMaxDistance; @@ -260,7 +261,10 @@ GlobalSDFHit RayTraceGlobalSDF(const GlobalSDFData data, Texture3D tex, T { float stepDistanceTex = tex.SampleLevel(SamplerLinearClamp, textureUV, 0); if (stepDistanceTex < chunkMarginDistance) + { stepDistance = stepDistanceTex; + stepScale *= 0.63f; // Perform smaller steps nearby geometry + } } else { @@ -292,7 +296,7 @@ GlobalSDFHit RayTraceGlobalSDF(const GlobalSDFData data, Texture3D tex, T } // Move forward - stepTime += max(stepDistance * trace.StepScale, voxelSize); + stepTime += max(stepDistance * stepScale, voxelSize); } hit.StepsCount += step; } From d926bdb6096dd5d59fda49b3d2eaf9fc1ca8ffde Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 13 Jun 2024 17:15:30 +0200 Subject: [PATCH 156/292] Update assets --- Content/Editor/Camera/M_Camera.flax | 2 +- Content/Editor/DebugMaterials/DDGIDebugProbes.flax | 2 +- Content/Editor/DebugMaterials/SingleColor/Decal.flax | 2 +- Content/Editor/DebugMaterials/SingleColor/Particle.flax | 2 +- Content/Editor/DebugMaterials/SingleColor/Surface.flax | 2 +- .../Editor/DebugMaterials/SingleColor/SurfaceAdditive.flax | 2 +- Content/Editor/DebugMaterials/SingleColor/Terrain.flax | 2 +- Content/Editor/Gizmo/Material.flax | 2 +- Content/Editor/Gizmo/SelectionOutlineMaterial.flax | 2 +- Content/Editor/Highlight Material.flax | 2 +- Content/Editor/Icons/IconsMaterial.flax | 2 +- Content/Editor/Particles/Particle Material Color.flax | 2 +- Content/Editor/Particles/Smoke Material.flax | 4 ++-- Content/Editor/Terrain/Circle Brush Material.flax | 2 +- Content/Engine/DefaultMaterial.flax | 2 +- Content/Engine/DefaultTerrainMaterial.flax | 2 +- Content/Engine/SingleColorMaterial.flax | 2 +- Content/Engine/SkyboxMaterial.flax | 2 +- Content/Shaders/GI/DDGI.flax | 4 ++-- Content/Shaders/GlobalSignDistanceField.flax | 4 ++-- 20 files changed, 23 insertions(+), 23 deletions(-) diff --git a/Content/Editor/Camera/M_Camera.flax b/Content/Editor/Camera/M_Camera.flax index 7a1d035cc..0411c1615 100644 --- a/Content/Editor/Camera/M_Camera.flax +++ b/Content/Editor/Camera/M_Camera.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ec04224ecf3c58c749c902f0ca5419c54896ac17ca48de1e9075afbca23e6a6e +oid sha256:b2a2d03b7e3bbafed896311cf6fabfe2ce301671860b33cdafc4dcd47fddfcbf size 30521 diff --git a/Content/Editor/DebugMaterials/DDGIDebugProbes.flax b/Content/Editor/DebugMaterials/DDGIDebugProbes.flax index 24f908bbc..68d922a96 100644 --- a/Content/Editor/DebugMaterials/DDGIDebugProbes.flax +++ b/Content/Editor/DebugMaterials/DDGIDebugProbes.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7972dcbec033481c0ace42eb0dff24154f0cdb32b4d039200b54d8be932b9806 +oid sha256:6c5d036a454fce8eaad8bb114f588f5566677a2a14a2091061e9c3825a18215d size 41469 diff --git a/Content/Editor/DebugMaterials/SingleColor/Decal.flax b/Content/Editor/DebugMaterials/SingleColor/Decal.flax index fd259c950..bff316f5f 100644 --- a/Content/Editor/DebugMaterials/SingleColor/Decal.flax +++ b/Content/Editor/DebugMaterials/SingleColor/Decal.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1e4cd9ca7d54553b805d87d1908d780205b28b52ae6e00dcf70a5aa27d2236be +oid sha256:603d34b26c593725c4a4d65ee06c04c1d8be6bc39bd6d0912f44430ee98452e8 size 7489 diff --git a/Content/Editor/DebugMaterials/SingleColor/Particle.flax b/Content/Editor/DebugMaterials/SingleColor/Particle.flax index 55d544e59..e8f0bd906 100644 --- a/Content/Editor/DebugMaterials/SingleColor/Particle.flax +++ b/Content/Editor/DebugMaterials/SingleColor/Particle.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6644f415267c6ccfd4277ef633b165cf21d8081833f8ccd456e8c8ae7defe33e +oid sha256:73e7b3bb45947431e87ea816a7919d994ffe249628648e4c3c8f5fde2618ee84 size 31681 diff --git a/Content/Editor/DebugMaterials/SingleColor/Surface.flax b/Content/Editor/DebugMaterials/SingleColor/Surface.flax index 1a290abf9..434843e5b 100644 --- a/Content/Editor/DebugMaterials/SingleColor/Surface.flax +++ b/Content/Editor/DebugMaterials/SingleColor/Surface.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3cea6b86ff521ecd437c328888c3d8aa06782705fc5608ca5d1f94b5cb727dc5 +oid sha256:ece28c7bee7ab96745f4d80e9f1c1d35739357025959f6a4d2ff642ccca61d8e size 30417 diff --git a/Content/Editor/DebugMaterials/SingleColor/SurfaceAdditive.flax b/Content/Editor/DebugMaterials/SingleColor/SurfaceAdditive.flax index ced4d8700..8494e5756 100644 --- a/Content/Editor/DebugMaterials/SingleColor/SurfaceAdditive.flax +++ b/Content/Editor/DebugMaterials/SingleColor/SurfaceAdditive.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e9f1d68b6af48224eb2ba618b0770234c98cf902821e9aed9bb2a3308a66190d +oid sha256:62b952bfb3f68fb966b1ade28536ee9cfc55fd8189a388fa1437466b0c4578d5 size 32134 diff --git a/Content/Editor/DebugMaterials/SingleColor/Terrain.flax b/Content/Editor/DebugMaterials/SingleColor/Terrain.flax index 95abeb2e7..44dc7be84 100644 --- a/Content/Editor/DebugMaterials/SingleColor/Terrain.flax +++ b/Content/Editor/DebugMaterials/SingleColor/Terrain.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a044ea32b8bbb570481e4913ab479378805b21d5e7cf215525820041ca9778e4 +oid sha256:a73341ea4465e90bf196969a5396ecb12e9001c5d6fabc2c7720deeb5a1f1137 size 20826 diff --git a/Content/Editor/Gizmo/Material.flax b/Content/Editor/Gizmo/Material.flax index 37330d22d..d7dc5f050 100644 --- a/Content/Editor/Gizmo/Material.flax +++ b/Content/Editor/Gizmo/Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a8824519ee184e8ec041f2e3898cb0cbe0a5771e1bc87945fb12426231070bbd +oid sha256:5f940b3bb183ac25b4dfc896086f4cd5f840659aa0caff8c90d0ebf6695656a1 size 32738 diff --git a/Content/Editor/Gizmo/SelectionOutlineMaterial.flax b/Content/Editor/Gizmo/SelectionOutlineMaterial.flax index 14cafb11c..f376d17ee 100644 --- a/Content/Editor/Gizmo/SelectionOutlineMaterial.flax +++ b/Content/Editor/Gizmo/SelectionOutlineMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:be0bbc49bc0ff36a7043c84e2c046fccc9e9e07fb82557f64df2155c731ad735 +oid sha256:2c431e061d06bba5145820f9d39b0570901c4e169d6d520266f710d0013c3d3f size 16166 diff --git a/Content/Editor/Highlight Material.flax b/Content/Editor/Highlight Material.flax index 0119b246b..e42a57a5e 100644 --- a/Content/Editor/Highlight Material.flax +++ b/Content/Editor/Highlight Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b00eb326111e276edc04f82ac55e79aaa763b1db6335582cc2b626d02fc7c86f +oid sha256:ebee2e0554c42bcef0c2736baafc41779b9ad396fc5328375e3bb7ae9ea99c9a size 30523 diff --git a/Content/Editor/Icons/IconsMaterial.flax b/Content/Editor/Icons/IconsMaterial.flax index 2e26c8088..119d8e67e 100644 --- a/Content/Editor/Icons/IconsMaterial.flax +++ b/Content/Editor/Icons/IconsMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:557850506c6354ec27dde4cb747303ae413c4cda607675936d78fb04691abfb7 +oid sha256:3b87295cd8c906adcb0640aafeeea5aa5fbf6055fbd0ab22507a12d1dc064575 size 30451 diff --git a/Content/Editor/Particles/Particle Material Color.flax b/Content/Editor/Particles/Particle Material Color.flax index 3d3b6733a..bdd0fa4cb 100644 --- a/Content/Editor/Particles/Particle Material Color.flax +++ b/Content/Editor/Particles/Particle Material Color.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9364ba9a7dd268b15d94caca9b05eb9552ed3113542b9bd4ec6f556d982c6c81 +oid sha256:3e99bffeca23ab3273c284cd2202ffaa65b9dd73c60b22d7e57ad0807cd64b01 size 29912 diff --git a/Content/Editor/Particles/Smoke Material.flax b/Content/Editor/Particles/Smoke Material.flax index 1175c0659..de45c178e 100644 --- a/Content/Editor/Particles/Smoke Material.flax +++ b/Content/Editor/Particles/Smoke Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:efb898179f3ce9e2ba6ab7a24194777fd0849003e534e7fd5d0c50447a1608b3 -size 37107 +oid sha256:aa271d248a69f9541679267ca058f7ffcb2d0f136e210e0db41b0e793c3af0b6 +size 38680 diff --git a/Content/Editor/Terrain/Circle Brush Material.flax b/Content/Editor/Terrain/Circle Brush Material.flax index 0a49faf4d..bcaa5b08e 100644 --- a/Content/Editor/Terrain/Circle Brush Material.flax +++ b/Content/Editor/Terrain/Circle Brush Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:03e69fdfba63e931c034c46f28e882d5bd03318264110974975521bcdaf5bb80 +oid sha256:83364e5d0f20485779c538e80c0e14a3acdbc5f8ef8dd48378353729b8a72cb4 size 27498 diff --git a/Content/Engine/DefaultMaterial.flax b/Content/Engine/DefaultMaterial.flax index 8e9b8a19d..8e998c38b 100644 --- a/Content/Engine/DefaultMaterial.flax +++ b/Content/Engine/DefaultMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:316320b701074fbc005840741365da69f3fbe0b4e3035b301ed2c1b9178ee940 +oid sha256:3dde40a2494bc7175939d9c4edc2c55ecfdc9142739482cd4303ac7f9c67c798 size 32442 diff --git a/Content/Engine/DefaultTerrainMaterial.flax b/Content/Engine/DefaultTerrainMaterial.flax index 910a9de77..29aa8424e 100644 --- a/Content/Engine/DefaultTerrainMaterial.flax +++ b/Content/Engine/DefaultTerrainMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:39b75d36a61f2060d556271a60ccd6426011797ae91fce109dce4ac360cf7e64 +oid sha256:9979bb1d7fdd67cf124f01fc0de7eb28666cc04ff5925e9580bc130a8a7adb65 size 22963 diff --git a/Content/Engine/SingleColorMaterial.flax b/Content/Engine/SingleColorMaterial.flax index 4f09e5897..193639a39 100644 --- a/Content/Engine/SingleColorMaterial.flax +++ b/Content/Engine/SingleColorMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:64f1141ed962a6582a320e5fa7e83305d7271593f80cf760dfb8a03b957152d2 +oid sha256:97fa28df30dfa46eb10b78459aaef08b36d9ae170e16f3aa70f71e3529074c2b size 30618 diff --git a/Content/Engine/SkyboxMaterial.flax b/Content/Engine/SkyboxMaterial.flax index 0af37967f..401a4ed74 100644 --- a/Content/Engine/SkyboxMaterial.flax +++ b/Content/Engine/SkyboxMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b5aa76eac8fdb989a2f1821b0ee815ec13dc75cda64e0950c3885a00b7bb713b +oid sha256:03b39e366563b7d26d76c18bf48ae414597a8e8c9258d146c0588a753f65bc93 size 31816 diff --git a/Content/Shaders/GI/DDGI.flax b/Content/Shaders/GI/DDGI.flax index 727527228..8e7bff88e 100644 --- a/Content/Shaders/GI/DDGI.flax +++ b/Content/Shaders/GI/DDGI.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:41fa86206efb10cebf8260c5dbac43e07dc0977725c958e4aff9732b44eac0ab -size 23690 +oid sha256:29915a5b5df2982298aa5c5fd585086774ff70eb98a8e6d8e521d3ba261e9a45 +size 25864 diff --git a/Content/Shaders/GlobalSignDistanceField.flax b/Content/Shaders/GlobalSignDistanceField.flax index 0affdb165..5e694f134 100644 --- a/Content/Shaders/GlobalSignDistanceField.flax +++ b/Content/Shaders/GlobalSignDistanceField.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fb1ffe921c3a317cf6d90562db6ba897baccc43223111ab74b5d1ac23665264e -size 11827 +oid sha256:ed1f8075002df1e142c9d2dc84e0931bc5dd33e5db13a2a8e0282e13ca716bcf +size 13061 From b760dcdb58b84f6c6954fb72266a0b5c7e2ef6f8 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 13 Jun 2024 18:10:18 +0200 Subject: [PATCH 157/292] Fix Global Surface Atlas flickering in scenes with lots of objects --- Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp index ffd570fa7..cea0cb2ba 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp @@ -712,7 +712,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co // Calculate optimal capacity for the objects buffer objectsBufferCapacity *= sizeof(uint32) * 2; // Convert to bytes and add safe margin - objectsBufferCapacity = Math::Clamp(Math::AlignUp(objectsBufferCapacity, 4096u), 32u * 1024u, 1024u * 1024u); // Align up to 4kB, clamp 32kB - 1MB + objectsBufferCapacity = Math::Clamp(Math::AlignUp(objectsBufferCapacity, 4096u), 32u * 1024u, 16 * 1024u * 1024u); // Align up to 4kB, clamp 32kB - 16MB surfaceAtlasData.CulledObjectsUsageHistory.Add(objectsBufferCapacity); // Record history objectsBufferCapacity = surfaceAtlasData.CulledObjectsUsageHistory.Maximum(); // Use biggest value from history if (surfaceAtlasData.CulledObjectsUsageHistory.Count() == surfaceAtlasData.CulledObjectsUsageHistory.Capacity()) From f02e7d0936f8fd32ed46c1daee73628e2ae37299 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 13 Jun 2024 23:01:51 +0200 Subject: [PATCH 158/292] Fix compilation error --- Source/Engine/Platform/Mac/MacFileSystemWatcher.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Engine/Platform/Mac/MacFileSystemWatcher.cpp b/Source/Engine/Platform/Mac/MacFileSystemWatcher.cpp index ba3358d35..fed010e4e 100644 --- a/Source/Engine/Platform/Mac/MacFileSystemWatcher.cpp +++ b/Source/Engine/Platform/Mac/MacFileSystemWatcher.cpp @@ -39,7 +39,7 @@ void DirectoryWatchCallback( ConstFSEventStreamRef StreamRef, void* FileWatcherP { action = FileSystemAction::Rename; } - if (rmodified) + if (modified) { action = FileSystemAction::Modify; } From e3f0991805b879b4b76a3d185699895d16242827 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 13 Jun 2024 23:02:20 +0200 Subject: [PATCH 159/292] Optimize Global Surface Atlas drawing when atlas is full --- Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp index cea0cb2ba..941290936 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp @@ -109,6 +109,7 @@ class GlobalSurfaceAtlasCustomBuffer : public RenderBuffers::CustomBuffer, publi { public: int32 Resolution = 0; + int32 AtlasPixelsTotal = 0; int32 AtlasPixelsUsed = 0; uint64 LastFrameAtlasInsertFail = 0; uint64 LastFrameAtlasDefragmentation = 0; @@ -394,6 +395,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co INIT_ATLAS_TEXTURE(AtlasDepth, PixelFormat::D16_UNorm); #undef INIT_ATLAS_TEXTURE surfaceAtlasData.Resolution = resolution; + surfaceAtlasData.AtlasPixelsTotal = resolution * resolution; if (!surfaceAtlasData.ChunksBuffer) { surfaceAtlasData.ChunksBuffer = GPUDevice::Instance->CreateBuffer(TEXT("GlobalSurfaceAtlas.ChunksBuffer")); @@ -409,7 +411,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co constexpr float maxUsageToDefrag = 0.8f; if (currentFrame - surfaceAtlasData.LastFrameAtlasInsertFail < 10 && currentFrame - surfaceAtlasData.LastFrameAtlasDefragmentation > 60 && - (float)surfaceAtlasData.AtlasPixelsUsed / (resolution * resolution) < maxUsageToDefrag) + (float)surfaceAtlasData.AtlasPixelsUsed / surfaceAtlasData.AtlasPixelsTotal < maxUsageToDefrag) { surfaceAtlasData.ClearObjects(); } @@ -1231,7 +1233,10 @@ void GlobalSurfaceAtlasPass::RasterizeActor(Actor* actor, void* actorObject, con } // Insert tile into atlas - auto* tile = surfaceAtlasData.AtlasTiles->Insert(tileResolution, tileResolution, 0, &surfaceAtlasData, actorObject, tileIndex); + uint16 tilePixels = tileResolution * tileResolution; + GlobalSurfaceAtlasTile* tile = nullptr; + if (tilePixels <= surfaceAtlasData.AtlasPixelsTotal - surfaceAtlasData.AtlasPixelsUsed) + tile = surfaceAtlasData.AtlasTiles->Insert(tileResolution, tileResolution, 0, &surfaceAtlasData, actorObject, tileIndex); if (tile) { if (!object) From 6e0dd2064afc3615b2db975deb4bb2c082ba11c5 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 17 Jun 2024 18:25:11 +0200 Subject: [PATCH 160/292] Add nearby probes search to fill cells with missing GI data #1614 --- .../GI/DynamicDiffuseGlobalIllumination.cpp | 1 - Source/Shaders/GI/DDGI.hlsl | 47 +++++++++++++------ Source/Shaders/GI/DDGI.shader | 17 ++++--- 3 files changed, 44 insertions(+), 21 deletions(-) diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp index 238418a47..3fbf2dc47 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp @@ -426,7 +426,6 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont // Shift the volume origin based on scroll offsets for each axis once it overflows for (int32 axis = 0; axis < 3; axis++) { - // different volume scroll that preserves the scroll offset delta relative to the probe count const int32 probeCount = ddgiData.ProbeCounts.Raw[axis]; int32& scrollOffset = cascade.ProbeScrollOffsets.Raw[axis]; while (scrollOffset >= probeCount) diff --git a/Source/Shaders/GI/DDGI.hlsl b/Source/Shaders/GI/DDGI.hlsl index 16e79df68..1c052e8e0 100644 --- a/Source/Shaders/GI/DDGI.hlsl +++ b/Source/Shaders/GI/DDGI.hlsl @@ -148,13 +148,12 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesData, T float fadeDistance = probesSpacing * 0.5f; float cascadeWeight = saturate(Min3(probesExtent - abs(worldPosition - probesOrigin)) / fadeDistance); if (cascadeWeight > dither) // Use dither to make transition smoother - { break; - } } if (cascadeIndex == data.CascadesCount) return data.FallbackIrradiance; - uint3 baseProbeCoords = clamp(uint3((worldPosition - probesOrigin + probesExtent) / probesSpacing), uint3(0, 0, 0), data.ProbesCounts - uint3(1, 1, 1)); + uint3 probeCoordsEnd = data.ProbesCounts - uint3(1, 1, 1); + uint3 baseProbeCoords = clamp(uint3((worldPosition - probesOrigin + probesExtent) / probesSpacing), uint3(0, 0, 0), probeCoordsEnd); // Bias the world-space position to reduce artifacts float3 viewDir = normalize(data.ViewPos - worldPosition); @@ -167,17 +166,40 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesData, T // Loop over the closest probes to accumulate their contributions float4 irradiance = float4(0, 0, 0, 0); + const int3 SearchAxisMasks[3] = { int3(1, 0, 0), int3(0, 1, 0), int3(0, 0, 1) }; for (uint i = 0; i < 8; i++) { uint3 probeCoordsOffset = uint3(i, i >> 1, i >> 2) & 1; - uint3 probeCoords = clamp(baseProbeCoords + probeCoordsOffset, uint3(0, 0, 0), data.ProbesCounts - uint3(1, 1, 1)); + uint3 probeCoords = clamp(baseProbeCoords + probeCoordsOffset, uint3(0, 0, 0), probeCoordsEnd); uint probeIndex = GetDDGIScrollingProbeIndex(data, cascadeIndex, probeCoords); // Load probe position and state float4 probeData = LoadDDGIProbeData(data, probesData, cascadeIndex, probeIndex); uint probeState = DecodeDDGIProbeState(probeData); if (probeState == DDGI_PROBE_STATE_INACTIVE) - continue; + { + // Search nearby probes to find any nearby GI sample + for (int searchDistance = 1; searchDistance < 3 && probeState == DDGI_PROBE_STATE_INACTIVE; searchDistance++) + for (uint searchAxis = 0; searchAxis < 3; searchAxis++) + { + int searchAxisDir = probeCoordsOffset[searchAxis] ? 1 : -1; + int3 searchCoordsOffset = SearchAxisMasks[searchAxis] * searchAxisDir * searchDistance; + uint3 searchCoords = clamp((int3)probeCoords + searchCoordsOffset, int3(0, 0, 0), (int3)probeCoordsEnd); + uint searchIndex = GetDDGIScrollingProbeIndex(data, cascadeIndex, searchCoords); + float4 searchData = LoadDDGIProbeData(data, probesData, cascadeIndex, searchIndex); + uint searchState = DecodeDDGIProbeState(searchData); + if (searchState != DDGI_PROBE_STATE_INACTIVE) + { + // Use nearby probe as a fallback (visibility test might ignore it but with smooth gradient) + probeCoords = searchCoords; + probeIndex = searchIndex; + probeData = searchData; + probeState = searchState; + break; + } + } + if (probeState == DDGI_PROBE_STATE_INACTIVE) continue; + } float3 probeBasePosition = baseProbeWorldPosition + ((probeCoords - baseProbeCoords) * probesSpacing); float3 probePosition = probeBasePosition + probeData.xyz * probesSpacing; // Probe offset is [-1;1] within probes spacing @@ -193,15 +215,13 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesData, T float2 octahedralCoords = GetOctahedralCoords(-biasedPosToProbe); float2 uv = GetDDGIProbeUV(data, cascadeIndex, probeIndex, octahedralCoords, DDGI_PROBE_RESOLUTION_DISTANCE); float2 probeDistance = probesDistance.SampleLevel(SamplerLinearClamp, uv, 0).rg * 2.0f; - float probeDistanceMean = probeDistance.x; // Visibility weight (Chebyshev) - if (biasedPosToProbeDist > probeDistanceMean) + if (biasedPosToProbeDist > probeDistance.x) { - float probeDistanceMean2 = probeDistance.y; - float probeDistanceVariance = abs(Square(probeDistanceMean) - probeDistanceMean2); - float chebyshevWeight = probeDistanceVariance / (probeDistanceVariance + Square(biasedPosToProbeDist - probeDistanceMean)); - weight *= max(chebyshevWeight * chebyshevWeight * chebyshevWeight, 0.05f); + float variance = abs(Square(probeDistance.x) - probeDistance.y); + float visibilityWeight = variance / (variance + Square(biasedPosToProbeDist - probeDistance.x)); + weight *= max(visibilityWeight * visibilityWeight * visibilityWeight, 0.05f); } // Avoid a weight of zero @@ -209,8 +229,7 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesData, T // Adjust weight curve to inject a small portion of light const float minWeightThreshold = 0.2f; - if (weight < minWeightThreshold) - weight *= weight * weight * (1.0f / (minWeightThreshold * minWeightThreshold)); + if (weight < minWeightThreshold) weight *= Square(weight) / Square(minWeightThreshold); // Calculate trilinear weights based on the distance to each probe to smoothly transition between grid of 8 probes float3 trilinear = lerp(1.0f - biasAlpha, biasAlpha, (float3)probeCoordsOffset); @@ -246,7 +265,7 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesData, T if (irradiance.a > 0.0f) { // Normalize irradiance - irradiance.rgb *= 1.0f / irradiance.a; + irradiance.rgb /= irradiance.a; #if DDGI_SRGB_BLENDING irradiance.rgb *= irradiance.rgb; #endif diff --git a/Source/Shaders/GI/DDGI.shader b/Source/Shaders/GI/DDGI.shader index e9ee51687..f6c00d331 100644 --- a/Source/Shaders/GI/DDGI.shader +++ b/Source/Shaders/GI/DDGI.shader @@ -19,6 +19,7 @@ // This must match C++ #define DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT 4096 // Maximum amount of probes to update at once during rays tracing and blending #define DDGI_TRACE_RAYS_LIMIT 256 // Limit of rays per-probe (runtime value can be smaller) +#define DDGI_TRACE_NEGATIVE 0 // If true, rays that start inside geometry will use negative distance to indicate backface hit #define DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE 8 #define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32 #define DDGI_PROBE_RELOCATE_ITERATIVE 1 // If true, probes relocation algorithm tries to move them in additive way, otherwise all nearby locations are checked to find the best position @@ -146,9 +147,9 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) float sdfDst = abs(sdf); const float ProbesDistanceLimits[4] = { 1.1f, 2.3f, 2.5f, 2.5f }; const float ProbesRelocateLimits[4] = { 0.4f, 0.5f, 0.6f, 0.7f }; - float voxelLimit = GlobalSDF.CascadeVoxelSize[CascadeIndex]; - float distanceLimit = length(probesSpacing) * ProbesDistanceLimits[CascadeIndex]; - float relocateLimit = length(probesSpacing) * ProbesRelocateLimits[CascadeIndex]; + float voxelLimit = GlobalSDF.CascadeVoxelSize[CascadeIndex] * 0.8f; + float distanceLimit = probesSpacing * ProbesDistanceLimits[CascadeIndex]; + float relocateLimit = probesSpacing * ProbesRelocateLimits[CascadeIndex]; if (sdfDst > distanceLimit + length(probeOffset)) // Probe is too far from geometry (or deep inside) { // Disable it @@ -317,12 +318,14 @@ void CS_TraceRays(uint3 DispatchThreadId : SV_DispatchThreadID) float4 radiance; if (hit.IsHit()) { - /*if (hit.HitSDF <= 0.0f && hit.HitTime <= GlobalSDF.CascadeVoxelSize[0]) +#if DDGI_TRACE_NEGATIVE + if (hit.HitSDF <= 0.0f && hit.HitTime <= GlobalSDF.CascadeVoxelSize[0]) { // Ray starts inside geometry (mark as negative distance and reduce it's influence during irradiance blending) radiance = float4(0, 0, 0, hit.HitTime * -0.25f); } - else*/ + else +#endif { // Sample Global Surface Atlas to get the lighting at the hit location float3 hitPosition = hit.GetHitPosition(trace); @@ -393,7 +396,7 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_ uint backfacesLimit = uint(probeRaysCount * 0.1f); #else float probesSpacing = DDGI.ProbesOriginAndSpacing[CascadeIndex].w; - float distanceLimit = length(probesSpacing) * 1.5f; + float distanceLimit = probesSpacing * 1.5f; #endif BRANCH @@ -435,6 +438,7 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_ #if DDGI_PROBE_UPDATE_MODE == 0 float4 rayRadiance = CachedProbesTraceRadiance[rayIndex]; +#if DDGI_TRACE_NEGATIVE if (rayRadiance.w < 0.0f) { // Count backface hits @@ -448,6 +452,7 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_ } continue; } +#endif // Add radiance (RGB) and weight (A) result += float4(rayRadiance.rgb * rayWeight, rayWeight); From 87aa23107d8e362f20287a94fa5e58b0baac2b0f Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 17 Jun 2024 18:59:13 +0200 Subject: [PATCH 161/292] Fix shadow sharpness processing --- Source/Shaders/ShadowsCommon.hlsl | 2 +- Source/Shaders/ShadowsSampling.hlsl | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/Source/Shaders/ShadowsCommon.hlsl b/Source/Shaders/ShadowsCommon.hlsl index 43fd0c2d5..dc449a9a5 100644 --- a/Source/Shaders/ShadowsCommon.hlsl +++ b/Source/Shaders/ShadowsCommon.hlsl @@ -80,7 +80,7 @@ float CalculateSubsurfaceOcclusion(float opacity, float sceneDepth, float shadow float PostProcessShadow(ShadowData lightShadow, float shadow) { // Apply shadow fade and sharpness - shadow = saturate((shadow - 0.5) * lightShadow.Sharpness + 0.5); + shadow = saturate((shadow - 0.51f) * lightShadow.Sharpness + 0.5f); shadow = lerp(1.0f, shadow, lightShadow.Fade); return shadow; } diff --git a/Source/Shaders/ShadowsSampling.hlsl b/Source/Shaders/ShadowsSampling.hlsl index e53cab08d..393234fc4 100644 --- a/Source/Shaders/ShadowsSampling.hlsl +++ b/Source/Shaders/ShadowsSampling.hlsl @@ -284,10 +284,6 @@ ShadowSample SampleDirectionalLightShadow(LightData light, Buffer shadow result.SurfaceShadow = PostProcessShadow(shadow, result.SurfaceShadow); - // Fix shadow intensity that got reduced by cascades sharpness stabilization (see above) - if (cascadeIndex != 0 && result.SurfaceShadow <= 0.1f) - result.SurfaceShadow += 0.01f; - return result; } From e08b57e814682c4d8bafab992db3aa513b562662 Mon Sep 17 00:00:00 2001 From: Ari Vuollet Date: Mon, 17 Jun 2024 20:00:30 +0300 Subject: [PATCH 162/292] Support custom deprecation messages in bindings generator --- Source/Engine/Content/Asset.h | 16 ++++++- Source/Engine/Core/Compiler.h | 8 ++-- Source/Engine/Core/Config/GraphicsSettings.h | 2 +- Source/Engine/Core/Math/Matrix.h | 2 +- Source/Engine/Core/Types/StringView.h | 2 +- Source/Engine/Debug/DebugDraw.h | 2 +- Source/Engine/Graphics/Graphics.h | 2 +- Source/Engine/Graphics/RenderView.h | 4 +- Source/Engine/Level/Actor.h | 4 +- Source/Engine/Level/Actors/AnimatedModel.h | 2 +- Source/Engine/Navigation/NavMeshRuntime.h | 2 +- Source/Engine/Navigation/Navigation.h | 2 +- Source/Engine/Networking/NetworkConfig.h | 4 +- Source/Engine/Platform/Base/PlatformBase.h | 6 +-- .../Tools/Flax.Build/Bindings/ApiTypeInfo.cs | 7 +-- .../Bindings/BindingsGenerator.CSharp.cs | 15 ++++-- .../Bindings/BindingsGenerator.Cache.cs | 2 +- .../Bindings/BindingsGenerator.Parsing.cs | 46 +++++++++++++++---- .../Tools/Flax.Build/Bindings/MemberInfo.cs | 8 ++-- .../Tools/Flax.Build/Bindings/TypedefInfo.cs | 2 +- .../Platforms/Windows/WindowsToolchainBase.cs | 3 ++ 21 files changed, 98 insertions(+), 43 deletions(-) diff --git a/Source/Engine/Content/Asset.h b/Source/Engine/Content/Asset.h index 15c5cae6f..125abdda2 100644 --- a/Source/Engine/Content/Asset.h +++ b/Source/Engine/Content/Asset.h @@ -184,8 +184,20 @@ public: /// The output collection of the asset ids referenced by this asset. /// The output list of file paths referenced by this asset. Files might come from project Content folder (relative path is preserved in cooked game), or external location (copied into Content root folder of cooked game). virtual void GetReferences(Array& assets, Array& files) const; - // [Deprecated in v1.9] - DEPRECATED virtual void GetReferences(Array& output) const; + + + /// + /// Gets the asset references. Supported only in Editor. + /// [Deprecated in v1.9] + /// + /// + /// For some asset types (e.g. scene or prefab) it may contain invalid asset ids due to not perfect gather method, + /// which is optimized to perform scan very quickly. Before using those ids perform simple validation via Content cache API. + /// The result collection contains only 1-level-deep references (only direct ones) and is invalid if asset is not loaded. + /// Also, the output data may have duplicated asset ids or even invalid ids (Guid::Empty). + /// + /// The output collection of the asset ids referenced by this asset. + DEPRECATED("Use GetReferences with assets and files parameter instead") virtual void GetReferences(Array& output) const; /// /// Gets the asset references. Supported only in Editor. diff --git a/Source/Engine/Core/Compiler.h b/Source/Engine/Core/Compiler.h index 9c736aa14..3e178143a 100644 --- a/Source/Engine/Core/Compiler.h +++ b/Source/Engine/Core/Compiler.h @@ -21,7 +21,6 @@ #define ALIGN_BEGIN(_align) #define ALIGN_END(_align) __attribute__( (aligned(_align) ) ) #define OFFSET_OF(X, Y) __builtin_offsetof(X, Y) -#define DEPRECATED [[deprecated]] #define PRAGMA_DISABLE_DEPRECATION_WARNINGS \ _Pragma("clang diagnostic push") \ _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") @@ -53,7 +52,6 @@ #define ALIGN_BEGIN(_align) #define ALIGN_END(_align) __attribute__( (aligned(_align) ) ) #define OFFSET_OF(X, Y) __builtin_offsetof(X, Y) -#define DEPRECATED [[deprecated]] #define PRAGMA_DISABLE_DEPRECATION_WARNINGS #define PRAGMA_ENABLE_DEPRECATION_WARNINGS @@ -80,7 +78,6 @@ #define ALIGN_BEGIN(_align) __declspec(align(_align)) #define ALIGN_END(_align) #define OFFSET_OF(X, Y) offsetof(X, Y) -#define DEPRECATED __declspec(deprecated) #undef __PRETTY_FUNCTION__ #define __PRETTY_FUNCTION__ __FUNCSIG__ #define PRAGMA_DISABLE_DEPRECATION_WARNINGS \ @@ -100,6 +97,11 @@ #define PACK_STRUCT(_declaration) PACK_BEGIN() _declaration PACK_END() +#define _DEPRECATED_0() [[deprecated]] +#define _DEPRECATED_1(msg) [[deprecated(msg)]] +#define _DEPRECATED(_0, _1, LASTARG, ...) LASTARG +#define DEPRECATED(...) _DEPRECATED(, ##__VA_ARGS__, _DEPRECATED_1(__VA_ARGS__), _DEPRECATED_0()) + // C++ 17 #if __cplusplus >= 201703L #define IF_CONSTEXPR constexpr diff --git a/Source/Engine/Core/Config/GraphicsSettings.h b/Source/Engine/Core/Config/GraphicsSettings.h index 9fef78f74..4f837fd52 100644 --- a/Source/Engine/Core/Config/GraphicsSettings.h +++ b/Source/Engine/Core/Config/GraphicsSettings.h @@ -64,7 +64,7 @@ public: /// [Deprecated in v1.9] /// API_FIELD(Attributes="EditorOrder(1320), DefaultValue(false), EditorDisplay(\"Quality\", \"Allow CSM Blending\")") - DEPRECATED bool AllowCSMBlending = false; + DEPRECATED() bool AllowCSMBlending = false; /// /// Default probes cubemap resolution (use for Environment Probes, can be overriden per-actor). diff --git a/Source/Engine/Core/Math/Matrix.h b/Source/Engine/Core/Math/Matrix.h index b7f5ef8cf..0ee31df4f 100644 --- a/Source/Engine/Core/Math/Matrix.h +++ b/Source/Engine/Core/Math/Matrix.h @@ -481,7 +481,7 @@ public: /// When the method completes, contains the translation component of the decomposed matrix. /// This method is designed to decompose an SRT transformation matrix only. void Decompose(Float3& scale, Matrix3x3& rotation, Float3& translation) const; - DEPRECATED void Decompose(Float3& scale, Matrix& rotation, Float3& translation) const; + DEPRECATED("Use Decompose with 'Matrix3x3& rotation' parameter instead") void Decompose(Float3& scale, Matrix& rotation, Float3& translation) const; public: Matrix operator*(const float scale) const diff --git a/Source/Engine/Core/Types/StringView.h b/Source/Engine/Core/Types/StringView.h index 052bc1d54..2e9e9aaf5 100644 --- a/Source/Engine/Core/Types/StringView.h +++ b/Source/Engine/Core/Types/StringView.h @@ -115,7 +115,7 @@ public: /// Gets the pointer to the string or to the static empty text if string is null. Returned pointer is always non-null, but is not null-terminated. /// [Deprecated on 26.10.2022, expires on 26.10.2024] Use GetText() /// - DEPRECATED const T* GetNonTerminatedText() const + DEPRECATED("Use GetText instead") const T* GetNonTerminatedText() const { return _data ? _data : (const T*)TEXT(""); } diff --git a/Source/Engine/Debug/DebugDraw.h b/Source/Engine/Debug/DebugDraw.h index 30e4c905f..93718abe0 100644 --- a/Source/Engine/Debug/DebugDraw.h +++ b/Source/Engine/Debug/DebugDraw.h @@ -96,7 +96,7 @@ API_CLASS(Static) class FLAXENGINE_API DebugDraw /// The color. /// The duration (in seconds). Use 0 to draw it only once. /// If set to true depth test will be performed, otherwise depth will be ignored. - API_FUNCTION() DEPRECATED static void DrawRay(const Vector3& origin, const Vector3& direction, const Color& color = Color::White, float duration = 0.0f, bool depthTest = true); + API_FUNCTION() DEPRECATED("Use DrawRay with length parameter instead") static void DrawRay(const Vector3& origin, const Vector3& direction, const Color& color, float duration, bool depthTest); /// /// Draws the line in a direction. diff --git a/Source/Engine/Graphics/Graphics.h b/Source/Engine/Graphics/Graphics.h index 55ded56e5..be9ec9ac3 100644 --- a/Source/Engine/Graphics/Graphics.h +++ b/Source/Engine/Graphics/Graphics.h @@ -52,7 +52,7 @@ public: /// Enables cascades splits blending for directional light shadows. /// [Deprecated in v1.9] /// - API_FIELD() DEPRECATED static bool AllowCSMBlending; + API_FIELD() DEPRECATED() static bool AllowCSMBlending; /// /// The Global SDF quality. Controls the volume texture resolution and amount of cascades to use. diff --git a/Source/Engine/Graphics/RenderView.h b/Source/Engine/Graphics/RenderView.h index f2411ae2a..0b9f931e7 100644 --- a/Source/Engine/Graphics/RenderView.h +++ b/Source/Engine/Graphics/RenderView.h @@ -161,13 +161,13 @@ public: /// The model LOD bias. Default is 0. Applied to all the objects in the shadow maps render views. Can be used to improve shadows rendering performance or increase quality. /// [Deprecated on 26.10.2022, expires on 26.10.2024] /// - API_FIELD() DEPRECATED int32 ShadowModelLODBias = 0; + API_FIELD() DEPRECATED() int32 ShadowModelLODBias = 0; /// /// The model LOD distance scale factor. Default is 1. Applied to all the objects in the shadow maps render views. Higher values increase LODs quality. Can be used to improve shadows rendering performance or increase quality. /// [Deprecated on 26.10.2022, expires on 26.10.2024] /// - API_FIELD() DEPRECATED float ShadowModelLODDistanceFactor = 1.0f; + API_FIELD() DEPRECATED() float ShadowModelLODDistanceFactor = 1.0f; /// /// Temporal Anti-Aliasing jitter frame index. diff --git a/Source/Engine/Level/Actor.h b/Source/Engine/Level/Actor.h index dc77cdbee..5a792858d 100644 --- a/Source/Engine/Level/Actor.h +++ b/Source/Engine/Level/Actor.h @@ -164,13 +164,13 @@ public: /// [Deprecated in v1.5] /// API_PROPERTY(Attributes="HideInEditor, NoSerialize, NoAnimate") - DEPRECATED const String& GetTag() const; + DEPRECATED("Use HasTag instead") const String& GetTag() const; /// /// Sets the name of the tag. /// [Deprecated in v1.5] /// - API_PROPERTY() DEPRECATED void SetTag(const StringView& value); + API_PROPERTY() DEPRECATED("Use AddTag instead") void SetTag(const StringView& value); /// /// Gets the actor name. diff --git a/Source/Engine/Level/Actors/AnimatedModel.h b/Source/Engine/Level/Actors/AnimatedModel.h index bb4b3a8f0..cf70ec220 100644 --- a/Source/Engine/Level/Actors/AnimatedModel.h +++ b/Source/Engine/Level/Actors/AnimatedModel.h @@ -160,7 +160,7 @@ public: /// [Deprecated on 26.10.2022, expires on 26.10.2024] /// API_FIELD(Attributes="EditorOrder(110), DefaultValue(ShadowsCastingMode.All), EditorDisplay(\"Skinned Model\")") - DEPRECATED ShadowsCastingMode ShadowsMode = ShadowsCastingMode::All; + DEPRECATED() ShadowsCastingMode ShadowsMode = ShadowsCastingMode::All; /// /// The animation root motion apply target. If not specified the animated model will apply it itself. diff --git a/Source/Engine/Navigation/NavMeshRuntime.h b/Source/Engine/Navigation/NavMeshRuntime.h index 40ea0959e..a3d7d9d53 100644 --- a/Source/Engine/Navigation/NavMeshRuntime.h +++ b/Source/Engine/Navigation/NavMeshRuntime.h @@ -160,7 +160,7 @@ public: /// The source point. /// The result position on the navmesh (valid only if method returns true). /// True if found valid location on the navmesh, otherwise false. - API_FUNCTION() DEPRECATED bool ProjectPoint(const Vector3& point, API_PARAM(Out) Vector3& result) const + API_FUNCTION() DEPRECATED("Use FindClosestPoint instead") bool ProjectPoint(const Vector3& point, API_PARAM(Out) Vector3& result) const { return FindClosestPoint(point, result); } diff --git a/Source/Engine/Navigation/Navigation.h b/Source/Engine/Navigation/Navigation.h index 7628aa59a..4f920b673 100644 --- a/Source/Engine/Navigation/Navigation.h +++ b/Source/Engine/Navigation/Navigation.h @@ -54,7 +54,7 @@ public: /// The source point. /// The result position on the navmesh (valid only if method returns true). /// True if found valid location on the navmesh, otherwise false. - API_FUNCTION() DEPRECATED static bool ProjectPoint(const Vector3& point, API_PARAM(Out) Vector3& result) + API_FUNCTION() DEPRECATED("Use FindClosestPoint instead") static bool ProjectPoint(const Vector3& point, API_PARAM(Out) Vector3& result) { return FindClosestPoint(point, result); } diff --git a/Source/Engine/Networking/NetworkConfig.h b/Source/Engine/Networking/NetworkConfig.h index e9827e536..0559dd34b 100644 --- a/Source/Engine/Networking/NetworkConfig.h +++ b/Source/Engine/Networking/NetworkConfig.h @@ -10,7 +10,7 @@ class ScriptingObject; /// Network driver implementations enum. /// [Deprecated in v1.3] /// -API_ENUM(Namespace="FlaxEngine.Networking") enum class DEPRECATED NetworkDriverType +API_ENUM(Namespace="FlaxEngine.Networking") enum class DEPRECATED() NetworkDriverType { /// /// Invalid network driver implementation. @@ -36,7 +36,7 @@ API_STRUCT(Namespace="FlaxEngine.Networking") struct FLAXENGINE_API NetworkConfi /// [Deprecated in v1.3] /// API_FIELD() - DEPRECATED NetworkDriverType NetworkDriverType; + DEPRECATED("Use NetworkDriver field instead") NetworkDriverType NetworkDriverType; /// /// The network driver instance (implements INetworkDriver) that will be used to create and manage the peer, send and receive messages. diff --git a/Source/Engine/Platform/Base/PlatformBase.h b/Source/Engine/Platform/Base/PlatformBase.h index 35cfd8d17..d5ff4c59f 100644 --- a/Source/Engine/Platform/Base/PlatformBase.h +++ b/Source/Engine/Platform/Base/PlatformBase.h @@ -753,7 +753,7 @@ public: /// True if start process with hidden window /// True if wait for process competition /// Retrieves the termination status of the specified process. Valid only if processed ended. - API_FUNCTION() DEPRECATED static int32 StartProcess(const StringView& filename, const StringView& args, const StringView& workingDir, bool hiddenWindow = false, bool waitForEnd = false); + API_FUNCTION() DEPRECATED("Use CreateProcess instead") static int32 StartProcess(const StringView& filename, const StringView& args, const StringView& workingDir, bool hiddenWindow = false, bool waitForEnd = false); /// /// Starts a new process (runs commandline). Waits for it's end and captures its output. @@ -763,7 +763,7 @@ public: /// The custom path of the working directory. /// True if start process with hidden window. /// Retrieves the termination status of the specified process. Valid only if processed ended. - API_FUNCTION() DEPRECATED static int32 RunProcess(const StringView& cmdLine, const StringView& workingDir, bool hiddenWindow = true); + API_FUNCTION() DEPRECATED("Use CreateProcess instead") static int32 RunProcess(const StringView& cmdLine, const StringView& workingDir, bool hiddenWindow = true); /// /// Starts a new process (runs commandline). Waits for it's end and captures its output. @@ -774,7 +774,7 @@ public: /// The process environment variables. If null the current process environment is used. /// True if start process with hidden window. /// Retrieves the termination status of the specified process. Valid only if processed ended. - API_FUNCTION() DEPRECATED static int32 RunProcess(const StringView& cmdLine, const StringView& workingDir, const Dictionary& environment, bool hiddenWindow = true); + API_FUNCTION() DEPRECATED("Use CreateProcess instead") static int32 RunProcess(const StringView& cmdLine, const StringView& workingDir, const Dictionary& environment, bool hiddenWindow = true); /// /// Creates a new process. diff --git a/Source/Tools/Flax.Build/Bindings/ApiTypeInfo.cs b/Source/Tools/Flax.Build/Bindings/ApiTypeInfo.cs index ee1004213..9c3fe6dd8 100644 --- a/Source/Tools/Flax.Build/Bindings/ApiTypeInfo.cs +++ b/Source/Tools/Flax.Build/Bindings/ApiTypeInfo.cs @@ -21,7 +21,7 @@ namespace Flax.Build.Bindings public string Attributes; public string[] Comment; public bool IsInBuild; - public bool IsDeprecated; + public string DeprecatedMessage; public TypeInfo MarshalAs; internal bool IsInited; internal TypedefInfo Instigator; @@ -34,6 +34,7 @@ namespace Flax.Build.Bindings public virtual bool IsScriptingObject => false; public virtual bool IsPod => false; public virtual bool SkipGeneration => IsInBuild; + public virtual bool IsDeprecated => DeprecatedMessage != null; public FileInfo File { @@ -154,7 +155,7 @@ namespace Flax.Build.Bindings BindingsGenerator.Write(writer, Comment); BindingsGenerator.Write(writer, MarshalAs); writer.Write(IsInBuild); - writer.Write(IsDeprecated); + BindingsGenerator.Write(writer, DeprecatedMessage); BindingsGenerator.Write(writer, Tags); BindingsGenerator.Write(writer, Children); } @@ -168,7 +169,7 @@ namespace Flax.Build.Bindings Comment = BindingsGenerator.Read(reader, Comment); MarshalAs = BindingsGenerator.Read(reader, MarshalAs); IsInBuild = reader.ReadBoolean(); - IsDeprecated = reader.ReadBoolean(); + DeprecatedMessage = BindingsGenerator.Read(reader, DeprecatedMessage); Tags = BindingsGenerator.Read(reader, Tags); Children = BindingsGenerator.Read(reader, Children); diff --git a/Source/Tools/Flax.Build/Bindings/BindingsGenerator.CSharp.cs b/Source/Tools/Flax.Build/Bindings/BindingsGenerator.CSharp.cs index 70b23502b..5274ad43b 100644 --- a/Source/Tools/Flax.Build/Bindings/BindingsGenerator.CSharp.cs +++ b/Source/Tools/Flax.Build/Bindings/BindingsGenerator.CSharp.cs @@ -829,7 +829,7 @@ namespace Flax.Build.Bindings } } - private static void GenerateCSharpAttributes(BuildData buildData, StringBuilder contents, string indent, ApiTypeInfo apiTypeInfo, string attributes = null, string[] comment = null, bool canUseTooltip = false, bool useUnmanaged = false, string defaultValue = null, bool isDeprecated = false, TypeInfo defaultValueType = null) + private static void GenerateCSharpAttributes(BuildData buildData, StringBuilder contents, string indent, ApiTypeInfo apiTypeInfo, string attributes = null, string[] comment = null, bool canUseTooltip = false, bool useUnmanaged = false, string defaultValue = null, string deprecatedMessage = null, TypeInfo defaultValueType = null) { #if AUTO_DOC_TOOLTIPS var writeTooltip = true; @@ -853,10 +853,15 @@ namespace Flax.Build.Bindings // Skip boilerplate code when using debugger //contents.Append(indent).AppendLine("[System.Diagnostics.DebuggerStepThrough]"); } - if (isDeprecated || apiTypeInfo.IsDeprecated) + if (deprecatedMessage != null || apiTypeInfo.IsDeprecated) { // Deprecated type - contents.Append(indent).AppendLine("[Obsolete]"); + if (!string.IsNullOrEmpty(apiTypeInfo.DeprecatedMessage)) + contents.Append(indent).AppendLine($"[Obsolete(\"{apiTypeInfo.DeprecatedMessage}\")]"); + else if (!string.IsNullOrEmpty(deprecatedMessage)) + contents.Append(indent).AppendLine($"[Obsolete(\"{deprecatedMessage}\")]"); + else + contents.Append(indent).AppendLine("[Obsolete]"); } #if AUTO_DOC_TOOLTIPS @@ -901,12 +906,12 @@ namespace Flax.Build.Bindings private static void GenerateCSharpAttributes(BuildData buildData, StringBuilder contents, string indent, ApiTypeInfo apiTypeInfo, bool useUnmanaged, string defaultValue = null, TypeInfo defaultValueType = null) { - GenerateCSharpAttributes(buildData, contents, indent, apiTypeInfo, apiTypeInfo.Attributes, apiTypeInfo.Comment, true, useUnmanaged, defaultValue, false, defaultValueType); + GenerateCSharpAttributes(buildData, contents, indent, apiTypeInfo, apiTypeInfo.Attributes, apiTypeInfo.Comment, true, useUnmanaged, defaultValue, null, defaultValueType); } private static void GenerateCSharpAttributes(BuildData buildData, StringBuilder contents, string indent, ApiTypeInfo apiTypeInfo, MemberInfo memberInfo, bool useUnmanaged, string defaultValue = null, TypeInfo defaultValueType = null) { - GenerateCSharpAttributes(buildData, contents, indent, apiTypeInfo, memberInfo.Attributes, memberInfo.Comment, true, useUnmanaged, defaultValue, memberInfo.IsDeprecated, defaultValueType); + GenerateCSharpAttributes(buildData, contents, indent, apiTypeInfo, memberInfo.Attributes, memberInfo.Comment, true, useUnmanaged, defaultValue, memberInfo.DeprecatedMessage, defaultValueType); } private static bool GenerateCSharpStructureUseDefaultInitialize(BuildData buildData, StructureInfo structureInfo) diff --git a/Source/Tools/Flax.Build/Bindings/BindingsGenerator.Cache.cs b/Source/Tools/Flax.Build/Bindings/BindingsGenerator.Cache.cs index d9fca9fca..a94307ed6 100644 --- a/Source/Tools/Flax.Build/Bindings/BindingsGenerator.Cache.cs +++ b/Source/Tools/Flax.Build/Bindings/BindingsGenerator.Cache.cs @@ -19,7 +19,7 @@ namespace Flax.Build.Bindings partial class BindingsGenerator { private static readonly Dictionary TypeCache = new Dictionary(); - private const int CacheVersion = 22; + private const int CacheVersion = 23; internal static void Write(BinaryWriter writer, string e) { diff --git a/Source/Tools/Flax.Build/Bindings/BindingsGenerator.Parsing.cs b/Source/Tools/Flax.Build/Bindings/BindingsGenerator.Parsing.cs index 46eeeb572..15fe42bfd 100644 --- a/Source/Tools/Flax.Build/Bindings/BindingsGenerator.Parsing.cs +++ b/Source/Tools/Flax.Build/Bindings/BindingsGenerator.Parsing.cs @@ -589,7 +589,13 @@ namespace Flax.Build.Bindings token = context.Tokenizer.NextToken(); if (!desc.IsDeprecated && token.Value == "DEPRECATED") { - desc.IsDeprecated = true; + token = context.Tokenizer.NextToken(); + string message = ""; + if (token.Type == TokenType.LeftParent) + context.Tokenizer.SkipUntil(TokenType.RightParent, out message); + else + context.Tokenizer.PreviousToken(); + desc.DeprecatedMessage = message.Trim('"'); } else { @@ -698,7 +704,13 @@ namespace Flax.Build.Bindings token = context.Tokenizer.NextToken(); if (!desc.IsDeprecated && token.Value == "DEPRECATED") { - desc.IsDeprecated = true; + token = context.Tokenizer.NextToken(); + string message = ""; + if (token.Type == TokenType.LeftParent) + context.Tokenizer.SkipUntil(TokenType.RightParent, out message); + else + context.Tokenizer.PreviousToken(); + desc.DeprecatedMessage = message.Trim('"'); } else { @@ -797,8 +809,14 @@ namespace Flax.Build.Bindings } else if (!desc.IsDeprecated && token.Value == "DEPRECATED") { - desc.IsDeprecated = true; - context.Tokenizer.NextToken(); + token = context.Tokenizer.NextToken(); + string message = ""; + if (token.Type == TokenType.LeftParent) + { + context.Tokenizer.SkipUntil(TokenType.RightParent, out message); + context.Tokenizer.NextToken(); + } + desc.DeprecatedMessage = message.Trim('"'); } else { @@ -960,7 +978,7 @@ namespace Flax.Build.Bindings propertyInfo.Getter = functionInfo; else propertyInfo.Setter = functionInfo; - propertyInfo.IsDeprecated |= functionInfo.IsDeprecated; + propertyInfo.DeprecatedMessage = functionInfo.DeprecatedMessage; propertyInfo.IsHidden |= functionInfo.IsHidden; if (propertyInfo.Getter != null && propertyInfo.Setter != null) @@ -1025,7 +1043,13 @@ namespace Flax.Build.Bindings token = context.Tokenizer.NextToken(); if (!desc.IsDeprecated && token.Value == "DEPRECATED") { - desc.IsDeprecated = true; + token = context.Tokenizer.NextToken(); + string message = ""; + if (token.Type == TokenType.LeftParent) + context.Tokenizer.SkipUntil(TokenType.RightParent, out message); + else + context.Tokenizer.PreviousToken(); + desc.DeprecatedMessage = message.Trim('"'); } else { @@ -1300,8 +1324,14 @@ namespace Flax.Build.Bindings } else if (!desc.IsDeprecated && token.Value == "DEPRECATED") { - desc.IsDeprecated = true; - context.Tokenizer.NextToken(); + token = context.Tokenizer.NextToken(); + string message = ""; + if (token.Type == TokenType.LeftParent) + { + context.Tokenizer.SkipUntil(TokenType.RightParent, out message); + context.Tokenizer.NextToken(); + } + desc.DeprecatedMessage = message.Trim('"'); } else { diff --git a/Source/Tools/Flax.Build/Bindings/MemberInfo.cs b/Source/Tools/Flax.Build/Bindings/MemberInfo.cs index b0eb00313..5d2a5644d 100644 --- a/Source/Tools/Flax.Build/Bindings/MemberInfo.cs +++ b/Source/Tools/Flax.Build/Bindings/MemberInfo.cs @@ -14,12 +14,14 @@ namespace Flax.Build.Bindings public string[] Comment; public bool IsStatic; public bool IsConstexpr; - public bool IsDeprecated; + public string DeprecatedMessage; public bool IsHidden; public AccessLevel Access; public string Attributes; public Dictionary Tags; + public virtual bool IsDeprecated => DeprecatedMessage != null; + public bool HasAttribute(string name) { return Attributes != null && Attributes.Contains(name); @@ -46,7 +48,7 @@ namespace Flax.Build.Bindings BindingsGenerator.Write(writer, Comment); writer.Write(IsStatic); writer.Write(IsConstexpr); - writer.Write(IsDeprecated); + BindingsGenerator.Write(writer, DeprecatedMessage); writer.Write(IsHidden); writer.Write((byte)Access); BindingsGenerator.Write(writer, Attributes); @@ -59,7 +61,7 @@ namespace Flax.Build.Bindings Comment = BindingsGenerator.Read(reader, Comment); IsStatic = reader.ReadBoolean(); IsConstexpr = reader.ReadBoolean(); - IsDeprecated = reader.ReadBoolean(); + DeprecatedMessage = BindingsGenerator.Read(reader, DeprecatedMessage); IsHidden = reader.ReadBoolean(); Access = (AccessLevel)reader.ReadByte(); Attributes = BindingsGenerator.Read(reader, Attributes); diff --git a/Source/Tools/Flax.Build/Bindings/TypedefInfo.cs b/Source/Tools/Flax.Build/Bindings/TypedefInfo.cs index 661ca51a4..3d85fb1b3 100644 --- a/Source/Tools/Flax.Build/Bindings/TypedefInfo.cs +++ b/Source/Tools/Flax.Build/Bindings/TypedefInfo.cs @@ -76,7 +76,7 @@ namespace Flax.Build.Bindings if (Comment != null && Comment.Length != 0) typedef.Comment = Comment; typedef.IsInBuild |= IsInBuild; - typedef.IsDeprecated |= IsDeprecated; + typedef.DeprecatedMessage = DeprecatedMessage; if (typedef is ClassStructInfo typedefClassStruct && typedefClassStruct.IsTemplate) { // Inflate template type diff --git a/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchainBase.cs b/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchainBase.cs index 252d9134d..adda38e93 100644 --- a/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchainBase.cs +++ b/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchainBase.cs @@ -617,6 +617,9 @@ namespace Flax.Build.Platforms commonArgs.Add(string.Format("/FU\"{0}\\platform.winmd\"", dir)); } } + + // Preprocessor conformance mode + commonArgs.Add("/Zc:preprocessor"); } // Add preprocessor definitions From 6c8288f38a1ba40897799af50793c84e78be8d97 Mon Sep 17 00:00:00 2001 From: Ari Vuollet Date: Mon, 17 Jun 2024 20:13:53 +0300 Subject: [PATCH 163/292] Fix deprecation warnings in generated bindings code --- .../Tools/Flax.Build/Bindings/BindingsGenerator.CSharp.cs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Source/Tools/Flax.Build/Bindings/BindingsGenerator.CSharp.cs b/Source/Tools/Flax.Build/Bindings/BindingsGenerator.CSharp.cs index 5274ad43b..2b97a7168 100644 --- a/Source/Tools/Flax.Build/Bindings/BindingsGenerator.CSharp.cs +++ b/Source/Tools/Flax.Build/Bindings/BindingsGenerator.CSharp.cs @@ -1431,6 +1431,7 @@ namespace Flax.Build.Bindings {{GenerateCSharpAccessLevel(classInfo.Access)}}static class {{marshallerName}} { #pragma warning disable 1591 + #pragma warning disable 618 #if FLAX_EDITOR [HideInEditor] #endif @@ -1467,6 +1468,7 @@ namespace Flax.Build.Bindings internal static {{classInfo.Name}} ToManaged(IntPtr managed) => Unsafe.As<{{classInfo.Name}}>(ManagedHandleMarshaller.ToManaged(managed)); internal static IntPtr ToNative({{classInfo.Name}} managed) => ManagedHandleMarshaller.ToNative(managed); + #pragma warning restore 618 #pragma warning restore 1591 } """).Split(new char[] { '\n' }))); @@ -1764,6 +1766,7 @@ namespace Flax.Build.Bindings {{GenerateCSharpAccessLevel(structureInfo.Access)}}static unsafe class {{marshallerName}} { #pragma warning disable 1591 + #pragma warning disable 618 {{structContents.Replace("\n", "\n" + " ").ToString().TrimEnd()}} {{InsertHideInEditorSection()}} @@ -1810,6 +1813,7 @@ namespace Flax.Build.Bindings { {{toNativeContent.Replace("\n", "\n" + " ").ToString().TrimEnd()}} } + #pragma warning restore 618 #pragma warning restore 1591 } """).Split(new char[] { '\n' }))); @@ -2170,8 +2174,10 @@ namespace Flax.Build.Bindings contents.Append(indent).AppendLine($"public static class {marshallerName}"); contents.Append(indent).AppendLine("{"); contents.AppendLine("#pragma warning disable 1591"); + contents.AppendLine("#pragma warning disable 618"); contents.Append(indent).Append(" ").AppendLine($"internal static {interfaceInfo.Name} ConvertToManaged(IntPtr unmanaged) => ({interfaceInfo.Name})ManagedHandleMarshaller.ConvertToManaged(unmanaged);"); contents.Append(indent).Append(" ").AppendLine($"internal static IntPtr ConvertToUnmanaged({interfaceInfo.Name} managed) => ManagedHandleMarshaller.ConvertToUnmanaged(managed);"); + contents.AppendLine("#pragma warning restore 618"); contents.AppendLine("#pragma warning restore 1591"); contents.Append(indent).AppendLine("}"); } From 22b4f33984953070a700f0ec0bf77792dfb7b4e0 Mon Sep 17 00:00:00 2001 From: Ari Vuollet Date: Mon, 17 Jun 2024 20:35:24 +0300 Subject: [PATCH 164/292] Use consistent and less verbose deprecation messages --- .../CustomEditors/Elements/DoubleValueElement.cs | 2 +- .../CustomEditors/Elements/FloatValueElement.cs | 2 +- Source/Editor/Editor.cs | 4 ++-- Source/Engine/Core/Math/BoundingBox.cs | 2 +- Source/Engine/Core/Math/CollisionsHelper.cs | 2 +- Source/Engine/Core/Math/Mathd.cs | 4 ++-- Source/Engine/Core/Math/Mathf.cs | 4 ++-- Source/Engine/Core/Math/Matrix.cs | 2 +- Source/Engine/Core/Math/Ray.cs | 6 +++--- Source/Engine/Engine/InputEvent.cs | 2 +- Source/Engine/Graphics/Mesh.cs | 12 ++++++------ Source/Engine/Graphics/SkinnedMesh.cs | 6 +++--- Source/Engine/Physics/CollisionData.cs | 6 +++--- Source/Engine/Utilities/Utils.cs | 2 +- 14 files changed, 28 insertions(+), 28 deletions(-) diff --git a/Source/Editor/CustomEditors/Elements/DoubleValueElement.cs b/Source/Editor/CustomEditors/Elements/DoubleValueElement.cs index cf16a1194..eb0bdd823 100644 --- a/Source/Editor/CustomEditors/Elements/DoubleValueElement.cs +++ b/Source/Editor/CustomEditors/Elements/DoubleValueElement.cs @@ -22,7 +22,7 @@ namespace FlaxEditor.CustomEditors.Elements /// /// [Deprecated on 26.05.2022, expires on 26.05.2024] /// - [System.Obsolete("Deprecated in 1.4, use ValueBox instead")] + [System.Obsolete("Use ValueBox instead")] public DoubleValueBox DoubleValue => ValueBox; /// diff --git a/Source/Editor/CustomEditors/Elements/FloatValueElement.cs b/Source/Editor/CustomEditors/Elements/FloatValueElement.cs index aabdb79e4..32a8b42c3 100644 --- a/Source/Editor/CustomEditors/Elements/FloatValueElement.cs +++ b/Source/Editor/CustomEditors/Elements/FloatValueElement.cs @@ -22,7 +22,7 @@ namespace FlaxEditor.CustomEditors.Elements /// /// [Deprecated on 26.05.2022, expires on 26.05.2024] /// - [System.Obsolete("Deprecated in 1.4, use ValueBox instead")] + [System.Obsolete("Use ValueBox instead")] public FloatValueBox FloatValue => ValueBox; /// diff --git a/Source/Editor/Editor.cs b/Source/Editor/Editor.cs index 6a1e804b8..e64be977b 100644 --- a/Source/Editor/Editor.cs +++ b/Source/Editor/Editor.cs @@ -850,7 +850,7 @@ namespace FlaxEditor /// New asset types allowed to create. /// [Deprecated in v1.8] /// - [Obsolete("Use CreateAsset with named tag.")] + [Obsolete("Use CreateAsset with named tag instead")] public enum NewAssetType { /// @@ -1031,7 +1031,7 @@ namespace FlaxEditor /// /// New asset type. /// Output asset path. - [Obsolete("Use CreateAsset with named tag.")] + [Obsolete("Use CreateAsset with named tag instead")] public static bool CreateAsset(NewAssetType type, string outputPath) { // [Deprecated on 18.02.2024, expires on 18.02.2025] diff --git a/Source/Engine/Core/Math/BoundingBox.cs b/Source/Engine/Core/Math/BoundingBox.cs index d6c3a54b1..7f871a8a1 100644 --- a/Source/Engine/Core/Math/BoundingBox.cs +++ b/Source/Engine/Core/Math/BoundingBox.cs @@ -171,7 +171,7 @@ namespace FlaxEngine /// The ray to test. /// When the method completes, contains the distance of the intersection, or 0 if there was no intersection. /// Whether the two objects intersected. - [Obsolete("Deprecated in v1.4")] + [Obsolete("Use Intersects with 'out Real distance' parameter instead")] public bool Intersects(ref Ray ray, out float distance) { var result = CollisionsHelper.RayIntersectsBox(ref ray, ref this, out Real dst); diff --git a/Source/Engine/Core/Math/CollisionsHelper.cs b/Source/Engine/Core/Math/CollisionsHelper.cs index d8393a507..e51aaf7f3 100644 --- a/Source/Engine/Core/Math/CollisionsHelper.cs +++ b/Source/Engine/Core/Math/CollisionsHelper.cs @@ -659,7 +659,7 @@ namespace FlaxEngine /// The plane to test. /// When the method completes, contains the distance of the intersection, or 0 if there was no intersection. /// Whether the two objects intersect. - [Obsolete("Deprecated in 1.4")] + [Obsolete("Use RayIntersectsPlane with 'out Real distance' parameter instead")] public static bool RayIntersectsPlane(ref Ray ray, ref Plane plane, out float distance) { //Source: Real-Time Collision Detection by Christer Ericson diff --git a/Source/Engine/Core/Math/Mathd.cs b/Source/Engine/Core/Math/Mathd.cs index 98f215cdf..556300451 100644 --- a/Source/Engine/Core/Math/Mathd.cs +++ b/Source/Engine/Core/Math/Mathd.cs @@ -645,6 +645,7 @@ namespace FlaxEngine /// /// Maps the specified value from the given range into another. + /// [Deprecated on 17.04.2023, expires on 17.04.2024] /// /// The value to map from range [fromMin; fromMax]. /// The source range minimum value. @@ -652,8 +653,7 @@ namespace FlaxEngine /// The destination range minimum value. /// The destination range maximum value. /// The mapped value in range [toMin; toMax]. - // [Deprecated on 17.04.2023, expires on 17.04.2024] - [Obsolete("Please use Remap to upkeep the API consistency")] + [Obsolete("Use Remap instead")] public static double Map(double value, double fromMin, double fromMax, double toMin, double toMax) { double t = (value - fromMin) / (fromMax - fromMin); diff --git a/Source/Engine/Core/Math/Mathf.cs b/Source/Engine/Core/Math/Mathf.cs index 2ea80cbb3..31f1e4569 100644 --- a/Source/Engine/Core/Math/Mathf.cs +++ b/Source/Engine/Core/Math/Mathf.cs @@ -808,6 +808,7 @@ namespace FlaxEngine /// /// Maps the specified value from the given range into another. + /// [Deprecated on 17.04.2023, expires on 17.04.2024] /// /// The value to map from range [fromMin; fromMax]. /// The source range minimum value. @@ -815,8 +816,7 @@ namespace FlaxEngine /// The destination range minimum value. /// The destination range maximum value. /// The mapped value in range [toMin; toMax]. - // [Deprecated on 17.04.2023, expires on 17.04.2024] - [Obsolete("Please use Remap to upkeep the API consistency")] + [Obsolete("Use Remap instead")] public static float Map(float value, float fromMin, float fromMax, float toMin, float toMax) { float t = (value - fromMin) / (fromMax - fromMin); diff --git a/Source/Engine/Core/Math/Matrix.cs b/Source/Engine/Core/Math/Matrix.cs index 34e6b4bec..c065379d3 100644 --- a/Source/Engine/Core/Math/Matrix.cs +++ b/Source/Engine/Core/Math/Matrix.cs @@ -825,7 +825,7 @@ namespace FlaxEngine /// When the method completes, contains the rotation component of the decomposed matrix. /// When the method completes, contains the translation component of the decomposed matrix. /// This method is designed to decompose an SRT transformation matrix only. - [Obsolete("Deprecated in v1.8")] + [Obsolete("Use Decompose with 'out Matrix3x3 rotation' parameter instead")] public void Decompose(out Float3 scale, out Matrix rotation, out Float3 translation) { Decompose(out scale, out Matrix3x3 r, out translation); diff --git a/Source/Engine/Core/Math/Ray.cs b/Source/Engine/Core/Math/Ray.cs index 3ed5e8257..268bfddcf 100644 --- a/Source/Engine/Core/Math/Ray.cs +++ b/Source/Engine/Core/Math/Ray.cs @@ -149,7 +149,7 @@ namespace FlaxEngine /// The plane to test. /// When the method completes, contains the distance of the intersection, or 0 if there was no intersection. /// Whether the two objects intersected. - [Obsolete("Deprecated in 1.4")] + [Obsolete("Use Intersects with 'out Real distance' parameter instead")] public bool Intersects(ref Plane plane, out float distance) { return CollisionsHelper.RayIntersectsPlane(ref this, ref plane, out distance); @@ -202,7 +202,7 @@ namespace FlaxEngine /// The third vertex of the triangle to test. /// When the method completes, contains the distance of the intersection, or 0 if there was no intersection. /// Whether the two objects intersected. - [Obsolete("Deprecated in 1.4")] + [Obsolete("Use Intersects with 'out Real distance' parameter instead")] public bool Intersects(ref Vector3 vertex1, ref Vector3 vertex2, ref Vector3 vertex3, out float distance) { var result = CollisionsHelper.RayIntersectsTriangle(ref this, ref vertex1, ref vertex2, ref vertex3, out Real dst); @@ -305,7 +305,7 @@ namespace FlaxEngine /// The sphere to test. /// When the method completes, contains the distance of the intersection, or 0 if there was no intersection. /// Whether the two objects intersected. - [Obsolete("Deprecated in 1.4")] + [Obsolete("Use Intersects with 'out Real distance' parameter instead")] public bool Intersects(ref BoundingSphere sphere, out float distance) { var result = CollisionsHelper.RayIntersectsSphere(ref this, ref sphere, out Real dst); diff --git a/Source/Engine/Engine/InputEvent.cs b/Source/Engine/Engine/InputEvent.cs index e5653631f..287e044f1 100644 --- a/Source/Engine/Engine/InputEvent.cs +++ b/Source/Engine/Engine/InputEvent.cs @@ -28,7 +28,7 @@ namespace FlaxEngine /// /// Occurs when event is triggered (e.g. user pressed a key). Called before scripts update. /// - [System.Obsolete("Depreciated in 1.7, use Pressed Action.")] + [System.Obsolete("Use Pressed instead")] public event Action Triggered; /// diff --git a/Source/Engine/Graphics/Mesh.cs b/Source/Engine/Graphics/Mesh.cs index 384bb5d4f..42604c872 100644 --- a/Source/Engine/Graphics/Mesh.cs +++ b/Source/Engine/Graphics/Mesh.cs @@ -339,7 +339,7 @@ namespace FlaxEngine /// The normal vectors (per vertex). Use null to compute them from normal vectors. /// The texture coordinates (per vertex). /// The vertex colors (per vertex). - [Obsolete("Deprecated in 1.4, use overload with Float3 and Float2 parameters")] + [Obsolete("Use UpdateMesh with Float3 and Float2 parameters instead")] public void UpdateMesh(Vector3[] vertices, int[] triangles, Vector3[] normals = null, Vector3[] tangents = null, Vector2[] uv = null, Color32[] colors = null) { UpdateMesh(Utils.ConvertCollection(vertices), triangles, Utils.ConvertCollection(normals), Utils.ConvertCollection(tangents), Utils.ConvertCollection(uv), colors); @@ -357,7 +357,7 @@ namespace FlaxEngine /// The normal vectors (per vertex). Use null to compute them from normal vectors. /// The texture coordinates (per vertex). /// The vertex colors (per vertex). - [Obsolete("Deprecated in 1.4, use overload with Float3 and Float2 parameters")] + [Obsolete("Use UpdateMesh with Float3 and Float2 parameters instead")] public void UpdateMesh(List vertices, List triangles, List normals = null, List tangents = null, List uv = null, List colors = null) { UpdateMesh(Utils.ConvertCollection(vertices), triangles, Utils.ConvertCollection(normals), Utils.ConvertCollection(tangents), Utils.ConvertCollection(uv), colors); @@ -375,7 +375,7 @@ namespace FlaxEngine /// The normal vectors (per vertex). Use null to compute them from normal vectors. /// The texture coordinates (per vertex). /// The vertex colors (per vertex). - [Obsolete("Deprecated in 1.4, use overload with Float3 and Float2 parameters")] + [Obsolete("Use UpdateMesh with Float3 and Float2 parameters instead")] public void UpdateMesh(Vector3[] vertices, uint[] triangles, Vector3[] normals = null, Vector3[] tangents = null, Vector2[] uv = null, Color32[] colors = null) { UpdateMesh(Utils.ConvertCollection(vertices), triangles, Utils.ConvertCollection(normals), Utils.ConvertCollection(tangents), Utils.ConvertCollection(uv), colors); @@ -393,7 +393,7 @@ namespace FlaxEngine /// The normal vectors (per vertex). Use null to compute them from normal vectors. /// The texture coordinates (per vertex). /// The vertex colors (per vertex). - [Obsolete("Deprecated in 1.4, use overload with Float3 and Float2 parameters")] + [Obsolete("Use UpdateMesh with Float3 and Float2 parameters instead")] public void UpdateMesh(List vertices, List triangles, List normals = null, List tangents = null, List uv = null, List colors = null) { UpdateMesh(Utils.ConvertCollection(vertices), triangles, Utils.ConvertCollection(normals), Utils.ConvertCollection(tangents), Utils.ConvertCollection(uv), colors); @@ -411,7 +411,7 @@ namespace FlaxEngine /// The tangent vectors (per vertex). Use null to compute them from normal vectors. /// The texture coordinates (per vertex). /// The vertex colors (per vertex). - [Obsolete("Deprecated in 1.4, use overload with Float3 and Float2 parameters")] + [Obsolete("Use UpdateMesh with Float3 and Float2 parameters instead")] public void UpdateMesh(Vector3[] vertices, ushort[] triangles, Vector3[] normals = null, Vector3[] tangents = null, Vector2[] uv = null, Color32[] colors = null) { UpdateMesh(Utils.ConvertCollection(vertices), triangles, Utils.ConvertCollection(normals), Utils.ConvertCollection(tangents), Utils.ConvertCollection(uv), colors); @@ -429,7 +429,7 @@ namespace FlaxEngine /// The tangent vectors (per vertex). Use null to compute them from normal vectors. /// The texture coordinates (per vertex). /// The vertex colors (per vertex). - [Obsolete("Deprecated in 1.4, use overload with Float3 and Float2 parameters")] + [Obsolete("Use UpdateMesh with Float3 and Float2 parameters instead")] public void UpdateMesh(List vertices, List triangles, List normals = null, List tangents = null, List uv = null, List colors = null) { UpdateMesh(Utils.ConvertCollection(vertices), triangles, Utils.ConvertCollection(normals), Utils.ConvertCollection(tangents), Utils.ConvertCollection(uv), colors); diff --git a/Source/Engine/Graphics/SkinnedMesh.cs b/Source/Engine/Graphics/SkinnedMesh.cs index 6c9594bed..0381622b2 100644 --- a/Source/Engine/Graphics/SkinnedMesh.cs +++ b/Source/Engine/Graphics/SkinnedMesh.cs @@ -216,7 +216,7 @@ namespace FlaxEngine /// The normal vectors (per vertex). /// The normal vectors (per vertex). Use null to compute them from normal vectors. /// The texture coordinates (per vertex). - [Obsolete("Deprecated in 1.4, use overload with Float3 and Float2 parameters")] + [Obsolete("Use UpdateMesh with Float3 and Float2 parameters instead")] public void UpdateMesh(Vector3[] vertices, int[] triangles, Int4[] blendIndices, Vector4[] blendWeights, Vector3[] normals = null, Vector3[] tangents = null, Vector2[] uv = null) { UpdateMesh(Utils.ConvertCollection(vertices), triangles, blendIndices, Utils.ConvertCollection(blendWeights), Utils.ConvertCollection(normals), Utils.ConvertCollection(tangents), Utils.ConvertCollection(uv)); @@ -235,7 +235,7 @@ namespace FlaxEngine /// The normal vectors (per vertex). /// The normal vectors (per vertex). Use null to compute them from normal vectors. /// The texture coordinates (per vertex). - [Obsolete("Deprecated in 1.4, use overload with Float3 and Float2 parameters")] + [Obsolete("Use UpdateMesh with Float3 and Float2 parameters instead")] public void UpdateMesh(Vector3[] vertices, uint[] triangles, Int4[] blendIndices, Vector4[] blendWeights, Vector3[] normals = null, Vector3[] tangents = null, Vector2[] uv = null) { UpdateMesh(Utils.ConvertCollection(vertices), triangles, blendIndices, Utils.ConvertCollection(blendWeights), Utils.ConvertCollection(normals), Utils.ConvertCollection(tangents), Utils.ConvertCollection(uv)); @@ -254,7 +254,7 @@ namespace FlaxEngine /// The normal vectors (per vertex). /// The tangent vectors (per vertex). Use null to compute them from normal vectors. /// The texture coordinates (per vertex). - [Obsolete("Deprecated in 1.4, use overload with Float3 and Float2 parameters")] + [Obsolete("Use UpdateMesh with Float3 and Float2 parameters instead")] public void UpdateMesh(Vector3[] vertices, ushort[] triangles, Int4[] blendIndices, Vector4[] blendWeights, Vector3[] normals = null, Vector3[] tangents = null, Vector2[] uv = null) { UpdateMesh(Utils.ConvertCollection(vertices), triangles, blendIndices, Utils.ConvertCollection(blendWeights), Utils.ConvertCollection(normals), Utils.ConvertCollection(tangents), Utils.ConvertCollection(uv)); diff --git a/Source/Engine/Physics/CollisionData.cs b/Source/Engine/Physics/CollisionData.cs index 3eb5543be..c17e452e4 100644 --- a/Source/Engine/Physics/CollisionData.cs +++ b/Source/Engine/Physics/CollisionData.cs @@ -19,7 +19,7 @@ namespace FlaxEngine /// The convex mesh generation flags. /// The convex mesh vertex limit. Use values in range [8;255] /// True if failed, otherwise false. - [Obsolete("Deprecated in 1.4, use overload with Float3 and Float2 parameters")] + [Obsolete("Use CookCollision with Float3 and Float2 parameters instead")] public bool CookCollision(CollisionDataType type, Vector3[] vertices, uint[] triangles, ConvexMeshGenerationFlags convexFlags = ConvexMeshGenerationFlags.None, int convexVertexLimit = 255) { if (vertices == null) @@ -43,7 +43,7 @@ namespace FlaxEngine /// The convex mesh generation flags. /// The convex mesh vertex limit. Use values in range [8;255] /// True if failed, otherwise false. - [Obsolete("Deprecated in 1.4, use overload with Float3 and Float2 parameters")] + [Obsolete("Use CookCollision with Float3 and Float2 parameters instead")] public bool CookCollision(CollisionDataType type, Vector3[] vertices, int[] triangles, ConvexMeshGenerationFlags convexFlags = ConvexMeshGenerationFlags.None, int convexVertexLimit = 255) { if (vertices == null) @@ -60,7 +60,7 @@ namespace FlaxEngine /// /// The output vertex buffer. /// The output index buffer. - [Obsolete("Deprecated in 1.4, use overload with Float3 and Float2 parameters")] + [Obsolete("Use ExtractGeometry with Float3 and Float2 parameters instead")] public void ExtractGeometry(out Vector3[] vertexBuffer, out int[] indexBuffer) { ExtractGeometry(out Float3[] tmp, out indexBuffer); diff --git a/Source/Engine/Utilities/Utils.cs b/Source/Engine/Utilities/Utils.cs index 0767ab8f4..890bbcfb9 100644 --- a/Source/Engine/Utilities/Utils.cs +++ b/Source/Engine/Utilities/Utils.cs @@ -23,7 +23,7 @@ namespace FlaxEngine /// The source location. /// The destination location. /// The length (amount of bytes to copy). - [Obsolete("Use MemoryCopy with long length and source/destination swapped to match C++ API.")] + [Obsolete("Use MemoryCopy with long length and source/destination parameters swapped instead")] public static void MemoryCopy(IntPtr source, IntPtr destination, int length) { // [Deprecated on 30.05.2021, expires on 30.05.2022] From 611961cefedf273fd920aef3820d163ac2d59f0c Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 18 Jun 2024 10:52:28 +0200 Subject: [PATCH 165/292] Improve DDGI cascade selection by using biased world position --- Source/Shaders/GI/DDGI.hlsl | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/Source/Shaders/GI/DDGI.hlsl b/Source/Shaders/GI/DDGI.hlsl index 1c052e8e0..63f70b15a 100644 --- a/Source/Shaders/GI/DDGI.hlsl +++ b/Source/Shaders/GI/DDGI.hlsl @@ -139,14 +139,16 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesData, T // Select the highest cascade that contains the sample location uint cascadeIndex = 0; float probesSpacing = 0; - float3 probesOrigin = (float3)0, probesExtent = (float3)0; + float3 probesOrigin = (float3)0, probesExtent = (float3)0, biasedWorldPosition = (float3)0; + float3 viewDir = normalize(data.ViewPos - worldPosition); for (; cascadeIndex < data.CascadesCount; cascadeIndex++) { probesSpacing = data.ProbesOriginAndSpacing[cascadeIndex].w; probesOrigin = data.ProbesScrollOffsets[cascadeIndex].xyz * probesSpacing + data.ProbesOriginAndSpacing[cascadeIndex].xyz; probesExtent = (data.ProbesCounts - 1) * (probesSpacing * 0.5f); + biasedWorldPosition = worldPosition + (worldNormal * 0.2f + viewDir * 0.8f) * (0.75f * probesSpacing * bias); float fadeDistance = probesSpacing * 0.5f; - float cascadeWeight = saturate(Min3(probesExtent - abs(worldPosition - probesOrigin)) / fadeDistance); + float cascadeWeight = saturate(Min3(probesExtent - abs(biasedWorldPosition - probesOrigin)) / fadeDistance); if (cascadeWeight > dither) // Use dither to make transition smoother break; } @@ -155,11 +157,6 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesData, T uint3 probeCoordsEnd = data.ProbesCounts - uint3(1, 1, 1); uint3 baseProbeCoords = clamp(uint3((worldPosition - probesOrigin + probesExtent) / probesSpacing), uint3(0, 0, 0), probeCoordsEnd); - // Bias the world-space position to reduce artifacts - float3 viewDir = normalize(data.ViewPos - worldPosition); - float3 surfaceBias = (worldNormal * 0.2f + viewDir * 0.8f) * (0.75f * probesSpacing * bias); - float3 biasedWorldPosition = worldPosition + surfaceBias; - // Get the grid coordinates of the probe nearest the biased world position float3 baseProbeWorldPosition = GetDDGIProbeWorldPosition(data, cascadeIndex, baseProbeCoords); float3 biasAlpha = saturate((biasedWorldPosition - baseProbeWorldPosition) / probesSpacing); From 2c4de9878eabdcfbfe4c46f5c16aefc67d6bd0b0 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 18 Jun 2024 13:22:10 +0200 Subject: [PATCH 166/292] Improve DDGI cascade transitions to be smoother --- Source/Shaders/GI/DDGI.hlsl | 17 +++++++++++++---- Source/Shaders/GI/DDGI.shader | 2 +- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/Source/Shaders/GI/DDGI.hlsl b/Source/Shaders/GI/DDGI.hlsl index 63f70b15a..d2649f805 100644 --- a/Source/Shaders/GI/DDGI.hlsl +++ b/Source/Shaders/GI/DDGI.hlsl @@ -18,6 +18,7 @@ #define DDGI_PROBE_STATE_ACTIVE 2 #define DDGI_PROBE_RESOLUTION_IRRADIANCE 6 // Resolution (in texels) for probe irradiance data (excluding 1px padding on each side) #define DDGI_PROBE_RESOLUTION_DISTANCE 14 // Resolution (in texels) for probe distance data (excluding 1px padding on each side) +#define DDGI_CASCADE_BLEND_SIZE 2.5f // Distance in probes over which cascades blending happens #define DDGI_SRGB_BLENDING 1 // Enables blending in sRGB color space, otherwise irradiance blending is done in linear space // DDGI data for a constant buffer @@ -143,13 +144,21 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesData, T float3 viewDir = normalize(data.ViewPos - worldPosition); for (; cascadeIndex < data.CascadesCount; cascadeIndex++) { + // Get cascade data probesSpacing = data.ProbesOriginAndSpacing[cascadeIndex].w; probesOrigin = data.ProbesScrollOffsets[cascadeIndex].xyz * probesSpacing + data.ProbesOriginAndSpacing[cascadeIndex].xyz; probesExtent = (data.ProbesCounts - 1) * (probesSpacing * 0.5f); - biasedWorldPosition = worldPosition + (worldNormal * 0.2f + viewDir * 0.8f) * (0.75f * probesSpacing * bias); - float fadeDistance = probesSpacing * 0.5f; - float cascadeWeight = saturate(Min3(probesExtent - abs(biasedWorldPosition - probesOrigin)) / fadeDistance); - if (cascadeWeight > dither) // Use dither to make transition smoother + + // Bias the world-space position to reduce artifacts + float3 surfaceBias = (worldNormal * 0.2f + viewDir * 0.8f) * (0.75f * probesSpacing * bias); + biasedWorldPosition = worldPosition + surfaceBias; + + // Calculate cascade blending weight (use input bias to smooth transition) + float cascadeBlendSmooth = frac(max(distance(data.ViewPos, worldPosition) - probesExtent, 0) / probesSpacing) * 0.1f; + float3 cascadeBlendPoint = worldPosition - probesOrigin - cascadeBlendSmooth * probesSpacing; + float fadeDistance = probesSpacing * DDGI_CASCADE_BLEND_SIZE; + float cascadeWeight = saturate(Min3(probesExtent - abs(cascadeBlendPoint)) / fadeDistance); + if (cascadeWeight > dither) break; } if (cascadeIndex == data.CascadesCount) diff --git a/Source/Shaders/GI/DDGI.shader b/Source/Shaders/GI/DDGI.shader index f6c00d331..203a84d3a 100644 --- a/Source/Shaders/GI/DDGI.shader +++ b/Source/Shaders/GI/DDGI.shader @@ -102,7 +102,7 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) float prevProbesSpacing = DDGI.ProbesOriginAndSpacing[prevCascade].w; float3 prevProbesOrigin = DDGI.ProbesScrollOffsets[prevCascade].xyz * prevProbesSpacing + DDGI.ProbesOriginAndSpacing[prevCascade].xyz; float3 prevProbesExtent = (DDGI.ProbesCounts - 1) * (prevProbesSpacing * 0.5f); - prevProbesExtent -= probesSpacing; // Apply safe margin to allow probes on cascade edges + prevProbesExtent -= probesSpacing * ceil(DDGI_CASCADE_BLEND_SIZE); // Apply safe margin to allow probes on cascade edges float prevCascadeWeight = Min3(prevProbesExtent - abs(probeBasePosition - prevProbesOrigin)); if (prevCascadeWeight > 0.1f) { From 7f2464684ae6484945159b4d2d026ac7fd7921c5 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 18 Jun 2024 14:17:50 +0200 Subject: [PATCH 167/292] Fix single-frame DDGI black artifacts when Global Surface Atlas got defragmented --- Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp index 941290936..a85269fcd 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp @@ -719,6 +719,8 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co objectsBufferCapacity = surfaceAtlasData.CulledObjectsUsageHistory.Maximum(); // Use biggest value from history if (surfaceAtlasData.CulledObjectsUsageHistory.Count() == surfaceAtlasData.CulledObjectsUsageHistory.Capacity()) notReady = false; // Always ready when rendering for some time + else if (currentFrame != 0 && surfaceAtlasData.LastFrameAtlasDefragmentation == currentFrame) + notReady = false; // Always ready when did atlas defragmentation during this frame (prevent 1 black frame) // Allocate buffer for culled objects (estimated size) if (!surfaceAtlasData.CulledObjectsBuffer) From 0d48ac8fc27e58ddf8b0771d07ed4a1778ea7fbc Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 18 Jun 2024 15:05:21 +0200 Subject: [PATCH 168/292] Add smoother DDGi probe relocation when old position is visible from new position --- Source/Shaders/GI/DDGI.shader | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Source/Shaders/GI/DDGI.shader b/Source/Shaders/GI/DDGI.shader index 203a84d3a..92df2c043 100644 --- a/Source/Shaders/GI/DDGI.shader +++ b/Source/Shaders/GI/DDGI.shader @@ -228,6 +228,20 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) // If probe was in a different location or was activated now then mark it as activated bool wasActivated = probeStateOld == DDGI_PROBE_STATE_INACTIVE; bool wasRelocated = distance(probeOffset, probeOffsetOld) > 2.0f; +#if DDGI_PROBE_RELOCATE_FIND_BEST || DDGI_PROBE_RELOCATE_ITERATIVE + if (wasRelocated && !wasActivated) + { + // If probe was relocated but the previous location is visible from the new one, then don't re-activate it for smoother blend + float3 diff = probeOffsetOld - probeOffset; + float diffLen = length(diff); + float3 diffDir = diff / diffLen; + GlobalSDFTrace trace; + trace.Init(probeBasePosition + probeOffset, diffDir, 0.0f, diffLen); + GlobalSDFHit hit = RayTraceGlobalSDF(GlobalSDF, GlobalSDFTex, GlobalSDFMip, trace); + if (!hit.IsHit()) + wasRelocated = false; + } +#endif if ((wasActivated || wasScrolled || wasRelocated) && probeState == DDGI_PROBE_STATE_ACTIVE) probeState = DDGI_PROBE_STATE_ACTIVATED; } From 9c161121b37da41b2cfd237e7d37c2c067b0ef0c Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 19 Jun 2024 14:03:55 +0200 Subject: [PATCH 169/292] Add `Graphics::SpreadWorkload` to disable graphics workloads amortization over several frames when debugging graphics perf --- Source/Engine/Graphics/Graphics.cpp | 1 + Source/Engine/Graphics/Graphics.h | 13 +++++++++++++ .../GI/DynamicDiffuseGlobalIllumination.cpp | 2 +- .../Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp | 5 +++-- .../Engine/Renderer/GlobalSignDistanceFieldPass.cpp | 2 +- Source/Engine/Renderer/ShadowsPass.cpp | 5 +++++ 6 files changed, 24 insertions(+), 4 deletions(-) diff --git a/Source/Engine/Graphics/Graphics.cpp b/Source/Engine/Graphics/Graphics.cpp index 8483d1f6c..9535f3ecf 100644 --- a/Source/Engine/Graphics/Graphics.cpp +++ b/Source/Engine/Graphics/Graphics.cpp @@ -22,6 +22,7 @@ bool Graphics::AllowCSMBlending = false; Quality Graphics::GlobalSDFQuality = Quality::High; Quality Graphics::GIQuality = Quality::High; PostProcessSettings Graphics::PostProcessSettings; +bool Graphics::SpreadWorkload = true; #if GRAPHICS_API_NULL extern GPUDevice* CreateGPUDeviceNull(); diff --git a/Source/Engine/Graphics/Graphics.h b/Source/Engine/Graphics/Graphics.h index 55ded56e5..b7f1fbcd7 100644 --- a/Source/Engine/Graphics/Graphics.h +++ b/Source/Engine/Graphics/Graphics.h @@ -69,9 +69,22 @@ public: /// API_FIELD() static PostProcessSettings PostProcessSettings; +public: + /// + /// Debug utility to toggle graphics workloads amortization over several frames by systems such as shadows mapping, global illumination or surface atlas. Can be used to test performance in the worst-case scenario (eg. camera-cut). + /// + API_FIELD() static bool SpreadWorkload; + public: /// /// Disposes the device. /// static void DisposeDevice(); }; + +// Skip disabling workload spreading in Release builds +#if BUILD_RELEASE +#define GPU_SPREAD_WORKLOAD true +#else +#define GPU_SPREAD_WORKLOAD Graphics::SpreadWorkload +#endif diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp index 3fbf2dc47..f82123a46 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp @@ -402,7 +402,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont bool cascadeSkipUpdate[4]; for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++) { - cascadeSkipUpdate[cascadeIndex] = !clear && (ddgiData.LastFrameUsed % cascadeFrequencies[cascadeIndex]) != 0; + cascadeSkipUpdate[cascadeIndex] = !clear && (ddgiData.LastFrameUsed % cascadeFrequencies[cascadeIndex]) != 0 && GPU_SPREAD_WORKLOAD; } // Compute scrolling (probes are placed around camera but are scrolling to increase stability during movement) diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp index a85269fcd..b1ed55eec 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp @@ -13,6 +13,7 @@ #include "Engine/Core/Config/GraphicsSettings.h" #include "Engine/Graphics/GPUContext.h" #include "Engine/Graphics/GPUDevice.h" +#include "Engine/Graphics/Graphics.h" #include "Engine/Graphics/RenderTask.h" #include "Engine/Graphics/RenderBuffers.h" #include "Engine/Graphics/RenderTargetPool.h" @@ -536,7 +537,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co context->SetRenderTarget(depthBuffer, ToSpan(targetBuffers, ARRAY_COUNT(targetBuffers))); { PROFILE_GPU_CPU_NAMED("Clear"); - if (noCache || GLOBAL_SURFACE_ATLAS_DEBUG_FORCE_REDRAW_TILES) + if (noCache || GLOBAL_SURFACE_ATLAS_DEBUG_FORCE_REDRAW_TILES || !GPU_SPREAD_WORKLOAD) { // Full-atlas hardware clear context->ClearDepth(depthBuffer); @@ -1268,7 +1269,7 @@ void GlobalSurfaceAtlasPass::RasterizeActor(Actor* actor, void* actorObject, con object->Bounds = OrientedBoundingBox(localBounds); object->Bounds.Transform(localToWorld); object->Radius = (float)actorObjectBounds.Radius; - if (dirty || GLOBAL_SURFACE_ATLAS_DEBUG_FORCE_REDRAW_TILES) + if (dirty || GLOBAL_SURFACE_ATLAS_DEBUG_FORCE_REDRAW_TILES || !GPU_SPREAD_WORKLOAD) { object->LastFrameUpdated = surfaceAtlasData.CurrentFrame; object->LightingUpdateFrame = surfaceAtlasData.CurrentFrame; diff --git a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp index 4b5ee7816..2d35ac09e 100644 --- a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp +++ b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp @@ -493,7 +493,7 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex // Rasterize world geometry into Global SDF renderContext.View.Pass = DrawPass::GlobalSDF; uint32 viewMask = renderContext.View.RenderLayersMask; - const bool useCache = !updated && !GLOBAL_SDF_DEBUG_FORCE_REDRAW; + const bool useCache = !updated && !GLOBAL_SDF_DEBUG_FORCE_REDRAW && GPU_SPREAD_WORKLOAD; static_assert(GLOBAL_SDF_RASTERIZE_CHUNK_SIZE % GLOBAL_SDF_RASTERIZE_GROUP_SIZE == 0, "Invalid chunk size for Global SDF rasterization group size."); const int32 rasterizeChunks = Math::CeilToInt((float)resolution / (float)GLOBAL_SDF_RASTERIZE_CHUNK_SIZE); auto& chunks = ChunksCache; diff --git a/Source/Engine/Renderer/ShadowsPass.cpp b/Source/Engine/Renderer/ShadowsPass.cpp index d779e0b83..25c4b993d 100644 --- a/Source/Engine/Renderer/ShadowsPass.cpp +++ b/Source/Engine/Renderer/ShadowsPass.cpp @@ -231,6 +231,11 @@ struct ShadowAtlasLight float CalculateUpdateRateInv(const RenderLightData& light, float distanceFromView, bool& freezeUpdate) const { + if (!GPU_SPREAD_WORKLOAD) + { + freezeUpdate = false; + return 1.0f; + } const float shadowsUpdateRate = light.ShadowsUpdateRate; const float shadowsUpdateRateAtDistance = shadowsUpdateRate * light.ShadowsUpdateRateAtDistance; float updateRate = Math::Lerp(shadowsUpdateRate, shadowsUpdateRateAtDistance, Math::Saturate(distanceFromView / Distance)); From a377933884f2fd206ba3c6345ce3ef8d281fb27a Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 19 Jun 2024 14:10:23 +0200 Subject: [PATCH 170/292] Fix d3d12 build regression --- Source/Engine/GraphicsDevice/DirectX/RenderToolsDX.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Engine/GraphicsDevice/DirectX/RenderToolsDX.h b/Source/Engine/GraphicsDevice/DirectX/RenderToolsDX.h index d818e3f2b..5cfb5b4eb 100644 --- a/Source/Engine/GraphicsDevice/DirectX/RenderToolsDX.h +++ b/Source/Engine/GraphicsDevice/DirectX/RenderToolsDX.h @@ -129,7 +129,7 @@ namespace RenderToolsDX #endif -#if GPU_ENABLE_DIAGNOSTICS || COMPILE_WITH_SHADER_COMPILER +#if GPU_ENABLE_DIAGNOSTICS || COMPILE_WITH_SHADER_COMPILER || GPU_ENABLE_RESOURCE_NAMING #include "Engine/Utilities/StringConverter.h" From 9ec23559afe090950061af02940d887390559b73 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 19 Jun 2024 15:03:38 +0200 Subject: [PATCH 171/292] Optimize GPU memory usage in Editor when viewport is inactive --- Source/Engine/Graphics/RenderBuffers.cpp | 2 +- Source/Engine/Graphics/RenderBuffers.h | 4 ++-- Source/Engine/Graphics/RenderTask.cpp | 19 +++++++++++++++++-- Source/Engine/Graphics/RenderTask.h | 8 +++++++- 4 files changed, 27 insertions(+), 6 deletions(-) diff --git a/Source/Engine/Graphics/RenderBuffers.cpp b/Source/Engine/Graphics/RenderBuffers.cpp index 16a7af7a9..f95f8ec74 100644 --- a/Source/Engine/Graphics/RenderBuffers.cpp +++ b/Source/Engine/Graphics/RenderBuffers.cpp @@ -34,7 +34,7 @@ RenderBuffers::~RenderBuffers() _resources.ClearDelete(); } -void RenderBuffers::Prepare() +void RenderBuffers::ReleaseUnusedMemory() { // Auto release temporal buffer if not used for some time const uint64 frameIndex = Engine::FrameCount; diff --git a/Source/Engine/Graphics/RenderBuffers.h b/Source/Engine/Graphics/RenderBuffers.h index 617388d67..2cb78cc07 100644 --- a/Source/Engine/Graphics/RenderBuffers.h +++ b/Source/Engine/Graphics/RenderBuffers.h @@ -100,9 +100,9 @@ public: public: /// - /// Prepares buffers for rendering a scene. Called before rendering so other parts can reuse calculated value. + /// Frees unused buffers to reduce memory usage for certain drawing effects that are state-dependant but unused for multiple frames. /// - void Prepare(); + void ReleaseUnusedMemory(); /// /// Requests the half-resolution depth to be prepared for the current frame. diff --git a/Source/Engine/Graphics/RenderTask.cpp b/Source/Engine/Graphics/RenderTask.cpp index e3cb6bb0c..22fb9a9d6 100644 --- a/Source/Engine/Graphics/RenderTask.cpp +++ b/Source/Engine/Graphics/RenderTask.cpp @@ -46,9 +46,9 @@ void RenderTask::DrawAll() for (auto task : Tasks) { if (task->CanDraw()) - { task->OnDraw(); - } + else + task->OnIdle(); } } @@ -84,6 +84,10 @@ void RenderTask::OnDraw() OnEnd(context); } +void RenderTask::OnIdle() +{ +} + void RenderTask::OnBegin(GPUContext* context) { Begin(this, context); @@ -325,6 +329,9 @@ void SceneRenderTask::OnPostRender(GPUContext* context, RenderContext& renderCon OnCollectDrawCalls(renderContextBatch, SceneRendering::PostRender); PostRender(context, renderContext); + + if (Buffers) + Buffers->ReleaseUnusedMemory(); } Viewport SceneRenderTask::GetViewport() const @@ -424,6 +431,14 @@ bool SceneRenderTask::CanDraw() const return RenderTask::CanDraw(); } +void SceneRenderTask::OnIdle() +{ + RenderTask::OnIdle(); + + if (Buffers) + Buffers->ReleaseUnusedMemory(); +} + MainRenderTask::MainRenderTask(const SpawnParams& params) : SceneRenderTask(params) { diff --git a/Source/Engine/Graphics/RenderTask.h b/Source/Engine/Graphics/RenderTask.h index fc7270c9e..853a5c14f 100644 --- a/Source/Engine/Graphics/RenderTask.h +++ b/Source/Engine/Graphics/RenderTask.h @@ -112,10 +112,15 @@ public: API_PROPERTY() virtual bool CanDraw() const; /// - /// Called by graphics device to draw this task. Can be used to invoke task rendering nested inside another task - use on own risk! + /// Called by graphics device to draw this task. /// API_FUNCTION() virtual void OnDraw(); + /// + /// Called by graphics device to idle task that has not been selected for drawing this frame (CanDraw returned false). Can be used to recycle cached memory if task is idle for many frames in a row. + /// + virtual void OnIdle(); + /// /// Called on task rendering begin. /// @@ -407,6 +412,7 @@ public: // [RenderTask] bool Resize(int32 width, int32 height) override; bool CanDraw() const override; + void OnIdle() override; void OnBegin(GPUContext* context) override; void OnRender(GPUContext* context) override; void OnEnd(GPUContext* context) override; From a6324c8f657a4c32dbd8884b02cc1f079231e41a Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 19 Jun 2024 15:45:45 +0200 Subject: [PATCH 172/292] Missing change --- Source/Engine/Renderer/Renderer.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/Source/Engine/Renderer/Renderer.cpp b/Source/Engine/Renderer/Renderer.cpp index bf2941104..b199fc231 100644 --- a/Source/Engine/Renderer/Renderer.cpp +++ b/Source/Engine/Renderer/Renderer.cpp @@ -370,10 +370,7 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont for (PostProcessEffect* e : renderContext.List->PostFx) e->PreRender(context, renderContext); } - - // Prepare renderContext.View.Prepare(renderContext); - renderContext.Buffers->Prepare(); // Build batch of render contexts (main view and shadow projections) { From 8e49a35e0e454215f3a33431a3a2d546e98dd282 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 20 Jun 2024 15:26:58 +0200 Subject: [PATCH 173/292] Refactor `RectPack` into new `RectPackNode` and `RectPackAtlas` that uses more optimized memory allocations --- Source/Engine/CSG/CSGData.cpp | 15 +- .../Engine/ContentImporters/ImportModel.cpp | 11 +- Source/Engine/Render2D/FontManager.cpp | 31 +-- Source/Engine/Render2D/FontTextureAtlas.cpp | 145 +++++------- Source/Engine/Render2D/FontTextureAtlas.h | 30 +-- .../Renderer/GI/GlobalSurfaceAtlasPass.cpp | 21 +- Source/Engine/Renderer/ShadowsPass.cpp | 70 +++--- .../ShadowsOfMordor/AtlasChartsPacker.h | 12 +- Source/Engine/Utilities/RectPack.h | 210 +++++++++++++++++- 9 files changed, 351 insertions(+), 194 deletions(-) diff --git a/Source/Engine/CSG/CSGData.cpp b/Source/Engine/CSG/CSGData.cpp index 991d502eb..219ea4b73 100644 --- a/Source/Engine/CSG/CSGData.cpp +++ b/Source/Engine/CSG/CSGData.cpp @@ -15,15 +15,10 @@ namespace CSG public: - struct Node : RectPack + struct Node : RectPackNode { - Node(float xy, float wh) - : RectPack(xy, xy, wh, wh) - { - } - Node(float x, float y, float width, float height) - : RectPack(x, y, width, height) + : RectPackNode(x, y, width, height) { } @@ -36,17 +31,17 @@ namespace CSG private: - Node _root; + RectPackAtlas _root; const float _atlasSize; const float _chartsPadding; public: LightmapUVsPacker(float atlasSize, float chartsPadding) - : _root(chartsPadding, atlasSize - chartsPadding) - , _atlasSize(atlasSize) + : _atlasSize(atlasSize) , _chartsPadding(chartsPadding) { + _root.Init(atlasSize, atlasSize, chartsPadding); } ~LightmapUVsPacker() diff --git a/Source/Engine/ContentImporters/ImportModel.cpp b/Source/Engine/ContentImporters/ImportModel.cpp index 6f0a9c626..568abfbeb 100644 --- a/Source/Engine/ContentImporters/ImportModel.cpp +++ b/Source/Engine/ContentImporters/ImportModel.cpp @@ -70,10 +70,10 @@ void RepackMeshLightmapUVs(ModelData& data) auto& lod = data.LODs[lodIndex]; // Build list of meshes with their area - struct LightmapUVsPack : RectPack + struct LightmapUVsPack : RectPackNode { LightmapUVsPack(float x, float y, float width, float height) - : RectPack(x, y, width, height) + : RectPackNode(x, y, width, height) { } @@ -109,10 +109,11 @@ void RepackMeshLightmapUVs(ModelData& data) { bool failed = false; const float chartsPadding = (4.0f / 256.0f) * atlasSize; - LightmapUVsPack root(chartsPadding, chartsPadding, atlasSize - chartsPadding, atlasSize - chartsPadding); + RectPackAtlas atlas; + atlas.Init(chartsPadding, chartsPadding); for (auto& entry : entries) { - entry.Slot = root.Insert(entry.Size, entry.Size, chartsPadding); + entry.Slot = atlas.Insert(entry.Size, entry.Size, chartsPadding); if (entry.Slot == nullptr) { // Failed to insert surface, increase atlas size and try again @@ -129,7 +130,7 @@ void RepackMeshLightmapUVs(ModelData& data) for (const auto& entry : entries) { Float2 uvOffset(entry.Slot->X * atlasSizeInv, entry.Slot->Y * atlasSizeInv); - Float2 uvScale((entry.Slot->Width - chartsPadding) * atlasSizeInv, (entry.Slot->Height - chartsPadding) * atlasSizeInv); + Float2 uvScale(entry.Slot->Width * atlasSizeInv, entry.Slot->Height * atlasSizeInv); // TODO: SIMD for (auto& uv : entry.Mesh->LightmapUVs) { diff --git a/Source/Engine/Render2D/FontManager.cpp b/Source/Engine/Render2D/FontManager.cpp index 80d260e20..6e3c3b42b 100644 --- a/Source/Engine/Render2D/FontManager.cpp +++ b/Source/Engine/Render2D/FontManager.cpp @@ -252,17 +252,9 @@ bool FontManager::AddNewEntry(Font* font, Char c, FontCharacterEntry& entry) // Find atlas for the character texture int32 atlasIndex = 0; const FontTextureAtlasSlot* slot = nullptr; - for (; atlasIndex < Atlases.Count(); atlasIndex++) - { - // Add the character to the texture + for (; atlasIndex < Atlases.Count() && slot == nullptr; atlasIndex++) slot = Atlases[atlasIndex]->AddEntry(glyphWidth, glyphHeight, GlyphImageData); - - // Check result, if not null char has been added - if (slot) - { - break; - } - } + atlasIndex--; // Check if there is no atlas for this character if (!slot) @@ -271,6 +263,7 @@ bool FontManager::AddNewEntry(Font* font, Char c, FontCharacterEntry& entry) auto atlas = Content::CreateVirtualAsset(); atlas->Setup(PixelFormat::R8_UNorm, FontTextureAtlas::PaddingStyle::PadWithZero); Atlases.Add(atlas); + atlasIndex++; // Init atlas const int32 fontAtlasSize = 512; // TODO: make it a configuration variable @@ -286,12 +279,11 @@ bool FontManager::AddNewEntry(Font* font, Char c, FontCharacterEntry& entry) } // Fill with atlas dependant data - const uint32 padding = Atlases[atlasIndex]->GetPaddingAmount(); - entry.TextureIndex = atlasIndex; - entry.UV.X = static_cast(slot->X + padding); - entry.UV.Y = static_cast(slot->Y + padding); - entry.UVSize.X = static_cast(slot->Width - 2 * padding); - entry.UVSize.Y = static_cast(slot->Height - 2 * padding); + entry.TextureIndex = (byte)atlasIndex; + entry.UV.X = (float)slot->X; + entry.UV.Y = (float)slot->Y; + entry.UVSize.X = (float)slot->Width; + entry.UVSize.Y = (float)slot->Height; entry.Slot = slot; return false; @@ -302,12 +294,7 @@ void FontManager::Invalidate(FontCharacterEntry& entry) if (entry.TextureIndex == MAX_uint8) return; auto atlas = Atlases[entry.TextureIndex]; - const uint32 padding = atlas->GetPaddingAmount(); - const uint32 slotX = static_cast(entry.UV.X - padding); - const uint32 slotY = static_cast(entry.UV.Y - padding); - const uint32 slotSizeX = static_cast(entry.UVSize.X + 2 * padding); - const uint32 slotSizeY = static_cast(entry.UVSize.Y + 2 * padding); - atlas->Invalidate(slotX, slotY, slotSizeX, slotSizeY); + atlas->Invalidate(entry.Slot); } void FontManager::Flush() diff --git a/Source/Engine/Render2D/FontTextureAtlas.cpp b/Source/Engine/Render2D/FontTextureAtlas.cpp index 9ed871364..b1c8e6c3f 100644 --- a/Source/Engine/Render2D/FontTextureAtlas.cpp +++ b/Source/Engine/Render2D/FontTextureAtlas.cpp @@ -16,7 +16,6 @@ FontTextureAtlas::FontTextureAtlas(const SpawnParams& params, const AssetInfo* i , _width(0) , _height(0) , _isDirty(true) - , _root(nullptr) { } @@ -34,13 +33,10 @@ void FontTextureAtlas::Setup(PixelFormat format, PaddingStyle paddingStyle) void FontTextureAtlas::Init(uint32 width, uint32 height) { - ASSERT(_root == nullptr); - // Setup - uint32 padding = GetPaddingAmount(); _width = width; _height = height; - _root = New(padding, padding, _width - padding, _height - padding); + _atlas.Init(_width, _height, GetPaddingAmount()); _isDirty = false; // Reserve upload data memory @@ -50,18 +46,15 @@ void FontTextureAtlas::Init(uint32 width, uint32 height) FontTextureAtlasSlot* FontTextureAtlas::AddEntry(uint32 targetWidth, uint32 targetHeight, const Array& data) { - // Check for invalid size if (targetWidth == 0 || targetHeight == 0) return nullptr; // Try to find slot for the texture FontTextureAtlasSlot* slot = nullptr; - const uint32 padding = GetPaddingAmount(); - const uint32 allPadding = padding * 2; for (int32 i = 0; i < _freeSlots.Count(); i++) { FontTextureAtlasSlot* e = _freeSlots[i]; - if (e->Width == targetWidth + allPadding && e->Height == targetHeight + allPadding) + if (e->Width == targetWidth && e->Height == targetHeight) { slot = e; _freeSlots.RemoveAt(i); @@ -70,77 +63,74 @@ FontTextureAtlasSlot* FontTextureAtlas::AddEntry(uint32 targetWidth, uint32 targ } if (!slot) { - slot = _root->Insert(targetWidth, targetHeight, GetPaddingAmount() * 2); + slot = _atlas.Insert(targetWidth, targetHeight, GetPaddingAmount()); } - // Check if can fit it if (slot) { // Copy data to into the atlas memory CopyDataIntoSlot(slot, data); - - // Set dirty state - markAsDirty(); + _isDirty = true; } - // Returns result return slot; } +bool FontTextureAtlas::Invalidate(const FontTextureAtlasSlot* slot) +{ + if (slot) + { + // Push back to free slots list to be used on the next insert (in theory slot handle is still valid but we keep it free) + _freeSlots.AddUnique((FontTextureAtlasSlot*)slot); + return true; + } + return false; +} + bool FontTextureAtlas::Invalidate(uint32 x, uint32 y, uint32 width, uint32 height) { - FontTextureAtlasSlot* slot = invalidate(_root, x, y, width, height); - if (slot) + for (const FontTextureAtlasSlot& node : _atlas.Nodes) { - _freeSlots.Add(slot); + if (node.X == x && node.Y == y && node.Width == width && node.Height == height) + return Invalidate(&node); } - return slot != nullptr; + return false; } void FontTextureAtlas::CopyDataIntoSlot(const FontTextureAtlasSlot* slot, const Array& data) { - uint8* start = &_data[slot->Y * _width * _bytesPerPixel + slot->X * _bytesPerPixel]; - const uint32 padding = GetPaddingAmount(); - const uint32 allPadding = padding * 2; - const uint32 srcWidth = slot->Width - allPadding; - const uint32 srcHeight = slot->Height - allPadding; - RowData rowData; - rowData.DstData = start; + rowData.DstData = &_data[slot->Y * _width * _bytesPerPixel + slot->X * _bytesPerPixel]; rowData.SrcData = data.Get(); rowData.DstTextureWidth = _width; - rowData.SrcTextureWidth = srcWidth; + rowData.SrcTextureWidth = slot->Width; rowData.RowWidth = slot->Width; + rowData.Padding = GetPaddingAmount(); // Start with padding - if (padding > 0) + if (rowData.Padding > 0) { rowData.SrcRow = 0; - rowData.DstRow = 0; - + rowData.DstRow = -1; if (_paddingStyle == DilateBorder) - { copyRow(rowData); - } else - { zeroRow(rowData); - } } // Actual data copy - for (uint32 row = padding; row < slot->Height - padding; row++) + for (uint32 row = 0; row < slot->Height; row++) { - rowData.SrcRow = row - padding; + rowData.SrcRow = row; rowData.DstRow = row; copyRow(rowData); } // Finish with padding - if (padding > 0) + if (rowData.Padding > 0) { - rowData.SrcRow = srcHeight - 1; - rowData.DstRow = slot->Height - padding; + rowData.SrcRow = slot->Height - 1; + rowData.DstRow = slot->Height; if (_paddingStyle == DilateBorder) copyRow(rowData); else @@ -150,54 +140,52 @@ void FontTextureAtlas::CopyDataIntoSlot(const FontTextureAtlasSlot* slot, const byte* FontTextureAtlas::GetSlotData(const FontTextureAtlasSlot* slot, uint32& width, uint32& height, uint32& stride) { - const uint32 padding = GetPaddingAmount(); - uint32 x = slot->X + padding; - uint32 y = slot->Y + padding; - width = slot->Width - padding * 2; - height = slot->Height - padding * 2; + width = slot->Width; + height = slot->Height; stride = _width * _bytesPerPixel; - return &_data[y * _width * _bytesPerPixel + x * _bytesPerPixel]; + return &_data[slot->Y * _width * _bytesPerPixel + slot->X * _bytesPerPixel]; } void FontTextureAtlas::copyRow(const RowData& copyRowData) const { - const byte* data = copyRowData.SrcData; - byte* start = copyRowData.DstData; - const uint32 srdWidth = copyRowData.SrcTextureWidth; - const uint32 dstWidth = copyRowData.DstTextureWidth; - const uint32 srcRow = copyRowData.SrcRow; - const uint32 dstRow = copyRowData.DstRow; - const uint32 padding = GetPaddingAmount(); + const byte* srcData = (const byte*)((intptr)copyRowData.SrcData + (intptr)copyRowData.SrcRow * copyRowData.SrcTextureWidth * _bytesPerPixel); + byte* dstData = (byte*)((intptr)copyRowData.DstData + (intptr)copyRowData.DstRow * copyRowData.DstTextureWidth * _bytesPerPixel); + Platform::MemoryCopy(dstData, srcData, copyRowData.SrcTextureWidth * _bytesPerPixel); - const byte* srcData = &data[srcRow * srdWidth * _bytesPerPixel]; - byte* dstData = &start[(dstRow * dstWidth + padding) * _bytesPerPixel]; - Platform::MemoryCopy(dstData, srcData, srdWidth * _bytesPerPixel); - - if (padding > 0) + if (copyRowData.Padding > 0) { - byte* dstPaddingPixelLeft = &start[dstRow * dstWidth * _bytesPerPixel]; - byte* dstPaddingPixelRight = dstPaddingPixelLeft + (copyRowData.RowWidth - 1) * _bytesPerPixel; + const uint32 padSize = copyRowData.Padding * _bytesPerPixel; + byte* dstPaddingPixelLeft = (byte*)((intptr)copyRowData.DstData + (intptr)copyRowData.DstRow * copyRowData.DstTextureWidth * _bytesPerPixel - padSize); + byte* dstPaddingPixelRight = dstPaddingPixelLeft + copyRowData.RowWidth * _bytesPerPixel + padSize; if (_paddingStyle == DilateBorder) { + // Dilate left and right sides of the padded row const byte* firstPixel = srcData; - const byte* lastPixel = srcData + (srdWidth - 1) * _bytesPerPixel; - Platform::MemoryCopy(dstPaddingPixelLeft, firstPixel, _bytesPerPixel); - Platform::MemoryCopy(dstPaddingPixelRight, lastPixel, _bytesPerPixel); + const byte* lastPixel = srcData + (copyRowData.SrcTextureWidth - 1) * _bytesPerPixel; + Platform::MemoryCopy(dstPaddingPixelLeft, firstPixel, padSize); + Platform::MemoryCopy(dstPaddingPixelRight, lastPixel, padSize); } else { - Platform::MemoryClear(dstPaddingPixelLeft, _bytesPerPixel); - Platform::MemoryClear(dstPaddingPixelRight, _bytesPerPixel); + // Clear left and right sides of the padded row + Platform::MemoryClear(dstPaddingPixelLeft, padSize); + Platform::MemoryClear(dstPaddingPixelRight, padSize); } } } void FontTextureAtlas::zeroRow(const RowData& copyRowData) const { - const uint32 dstWidth = copyRowData.DstTextureWidth; - const uint32 dstRow = copyRowData.DstRow; - byte* dstData = ©RowData.DstData[dstRow * dstWidth * _bytesPerPixel]; - Platform::MemoryClear(dstData, copyRowData.RowWidth * _bytesPerPixel); + byte* dstData = (byte*)((intptr)copyRowData.DstData + (intptr)copyRowData.DstRow * copyRowData.DstTextureWidth * _bytesPerPixel); + uint32 dstSize = copyRowData.RowWidth * _bytesPerPixel; + if (copyRowData.Padding > 0) + { + // Extend clear by left and right borders of the padded row + const uint32 padSize = copyRowData.Padding * _bytesPerPixel; + dstData -= padSize; + dstSize += padSize * 2; + } + Platform::MemoryClear(dstData, dstSize); } void FontTextureAtlas::unload(bool isReloading) @@ -205,13 +193,13 @@ void FontTextureAtlas::unload(bool isReloading) Texture::unload(isReloading); Clear(); - _data.Clear(); + _data.Resize(0); } void FontTextureAtlas::Clear() { - SAFE_DELETE(_root); _freeSlots.Clear(); + _atlas.Clear(); } void FontTextureAtlas::Flush() @@ -248,20 +236,3 @@ bool FontTextureAtlas::HasDataSyncWithGPU() const { return _isDirty == false; } - -FontTextureAtlasSlot* FontTextureAtlas::invalidate(FontTextureAtlasSlot* parent, uint32 x, uint32 y, uint32 width, uint32 height) -{ - if (parent->X == x && parent->Y == y && parent->Width == width && parent->Height == height) - { - return parent; - } - FontTextureAtlasSlot* result = parent->Left ? invalidate(parent->Left, x, y, width, height) : nullptr; - if (result) - return result; - return parent->Right ? invalidate(parent->Right, x, y, width, height) : nullptr; -} - -void FontTextureAtlas::markAsDirty() -{ - _isDirty = true; -} diff --git a/Source/Engine/Render2D/FontTextureAtlas.h b/Source/Engine/Render2D/FontTextureAtlas.h index ae4197b10..b63c7b705 100644 --- a/Source/Engine/Render2D/FontTextureAtlas.h +++ b/Source/Engine/Render2D/FontTextureAtlas.h @@ -11,10 +11,10 @@ /// /// Contains information about single texture atlas slot. /// -struct FontTextureAtlasSlot : RectPack +struct FontTextureAtlasSlot : RectPackNode<> { FontTextureAtlasSlot(uint32 x, uint32 y, uint32 width, uint32 height) - : RectPack(x, y, width, height) + : RectPackNode<>(x, y, width, height) { } @@ -35,11 +35,12 @@ private: { const byte* SrcData; uint8* DstData; - uint32 SrcRow; - uint32 DstRow; - uint32 RowWidth; - uint32 SrcTextureWidth; - uint32 DstTextureWidth; + int32 SrcRow; + int32 DstRow; + int32 RowWidth; + int32 SrcTextureWidth; + int32 DstTextureWidth; + uint32 Padding; }; public: @@ -74,7 +75,7 @@ private: uint32 _bytesPerPixel; PaddingStyle _paddingStyle; bool _isDirty; - FontTextureAtlasSlot* _root; + RectPackAtlas _atlas; Array _freeSlots; public: @@ -159,6 +160,13 @@ public: /// The atlas slot occupied by the new entry. FontTextureAtlasSlot* AddEntry(uint32 targetWidth, uint32 targetHeight, const Array& data); + /// + /// Invalidates the cached dynamic entry from the atlas. + /// + /// The slot to invalidate. + /// True if slot has been freed, otherwise false. + bool Invalidate(const FontTextureAtlasSlot* slot); + /// /// Invalidates the cached dynamic entry from the atlas. /// @@ -191,11 +199,6 @@ public: /// void Clear(); - /// - /// Disposed whole atlas data (texture, nodes etc.). - /// - void Dispose(); - /// /// Flushes this atlas data to the GPU /// @@ -214,7 +217,6 @@ public: private: - FontTextureAtlasSlot* invalidate(FontTextureAtlasSlot* parent, uint32 x, uint32 y, uint32 width, uint32 height); void markAsDirty(); void copyRow(const RowData& copyRowData) const; void zeroRow(const RowData& copyRowData) const; diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp index b1ed55eec..20288cfe0 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp @@ -64,7 +64,7 @@ PACK_STRUCT(struct AtlasTileVertex uint32 TileAddress; }); -struct GlobalSurfaceAtlasTile : RectPack +struct GlobalSurfaceAtlasTile : RectPackNode { Float3 ViewDirection; Float3 ViewPosition; @@ -74,7 +74,7 @@ struct GlobalSurfaceAtlasTile : RectPack uint32 ObjectAddressOffset; GlobalSurfaceAtlasTile(uint16 x, uint16 y, uint16 width, uint16 height) - : RectPack(x, y, width, height) + : RectPackNode(x, y, width, height) { } @@ -125,7 +125,7 @@ public: DynamicTypedBuffer ObjectsBuffer; int32 CulledObjectsCounterIndex = -1; GlobalSurfaceAtlasPass::BindingData Result; - GlobalSurfaceAtlasTile* AtlasTiles = nullptr; // TODO: optimize with a single allocation for atlas tiles + RectPackAtlas Atlas; Dictionary Objects; Dictionary Lights; SamplesBuffer CulledObjectsUsageHistory; @@ -150,7 +150,7 @@ public: CulledObjectsUsageHistory.Clear(); LastFrameAtlasDefragmentation = Engine::FrameCount; AtlasPixelsUsed = 0; - SAFE_DELETE(AtlasTiles); + Atlas.Clear(); Objects.Clear(); Lights.Clear(); } @@ -382,6 +382,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co if (noCache) { surfaceAtlasData.Reset(); + surfaceAtlasData.Atlas.Init(resolution, resolution); auto desc = GPUTextureDescription::New2D(resolution, resolution, PixelFormat::Unknown); uint64 memUsage = 0; @@ -419,8 +420,6 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co } for (SceneRendering* scene : renderContext.List->Scenes) surfaceAtlasData.ListenSceneRendering(scene); - if (!surfaceAtlasData.AtlasTiles) - surfaceAtlasData.AtlasTiles = New(0, 0, resolution, resolution); if (!_vertexBuffer) _vertexBuffer = New(0u, (uint32)sizeof(AtlasTileVertex), TEXT("GlobalSurfaceAtlas.VertexBuffer")); @@ -504,7 +503,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co for (auto& tile : it->Value.Tiles) { if (tile) - tile->Free(&surfaceAtlasData); + surfaceAtlasData.Atlas.Free(tile, &surfaceAtlasData); } surfaceAtlasData.Objects.Remove(it); } @@ -1076,8 +1075,6 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co } } - // TODO: explore atlas tiles optimization with feedback from renderer (eg. when tile is sampled by GI/Reflections mark it as used, then sort tiles by importance and prioritize updates for ones frequently used) - #undef WRITE_TILE context->ResetSR(); context->ResetRenderTarget(); @@ -1209,7 +1206,7 @@ void GlobalSurfaceAtlasPass::RasterizeActor(Actor* actor, void* actorObject, con // Skip too small surfaces if (object && object->Tiles[tileIndex]) { - object->Tiles[tileIndex]->Free(&surfaceAtlasData); + surfaceAtlasData.Atlas.Free(object->Tiles[tileIndex], &surfaceAtlasData); object->Tiles[tileIndex] = nullptr; } continue; @@ -1232,14 +1229,14 @@ void GlobalSurfaceAtlasPass::RasterizeActor(Actor* actor, void* actorObject, con anyTile = true; continue; } - object->Tiles[tileIndex]->Free(&surfaceAtlasData); + surfaceAtlasData.Atlas.Free(object->Tiles[tileIndex], &surfaceAtlasData); } // Insert tile into atlas uint16 tilePixels = tileResolution * tileResolution; GlobalSurfaceAtlasTile* tile = nullptr; if (tilePixels <= surfaceAtlasData.AtlasPixelsTotal - surfaceAtlasData.AtlasPixelsUsed) - tile = surfaceAtlasData.AtlasTiles->Insert(tileResolution, tileResolution, 0, &surfaceAtlasData, actorObject, tileIndex); + tile = surfaceAtlasData.Atlas.Insert(tileResolution, tileResolution, 0, &surfaceAtlasData, actorObject, tileIndex); if (tile) { if (!object) diff --git a/Source/Engine/Renderer/ShadowsPass.cpp b/Source/Engine/Renderer/ShadowsPass.cpp index 25c4b993d..dbd9c7fdd 100644 --- a/Source/Engine/Renderer/ShadowsPass.cpp +++ b/Source/Engine/Renderer/ShadowsPass.cpp @@ -39,12 +39,12 @@ GPU_CB_STRUCT(Data { float ContactShadowsLength; }); -struct ShadowsAtlasRectTile : RectPack +struct ShadowsAtlasRectTile : RectPackNode { bool IsStatic; ShadowsAtlasRectTile(uint16 x, uint16 y, uint16 width, uint16 height) - : RectPack(x, y, width, height) + : RectPackNode(x, y, width, height) { } @@ -75,23 +75,8 @@ struct ShadowAtlasLightTile bool HasStaticGeometry; Viewport CachedViewport; // The viewport used the last time to render shadow to the atlas - void FreeDynamic(ShadowsCustomBuffer* buffer) - { - if (RectTile) - { - RectTile->Free(buffer); - RectTile = nullptr; - } - } - - void FreeStatic(ShadowsCustomBuffer* buffer) - { - if (StaticRectTile) - { - StaticRectTile->Free(buffer); - StaticRectTile = nullptr; - } - } + void FreeDynamic(ShadowsCustomBuffer* buffer); + void FreeStatic(ShadowsCustomBuffer* buffer); void Free(ShadowsCustomBuffer* buffer) { @@ -315,8 +300,8 @@ public: GPUTexture* StaticShadowMapAtlas = nullptr; DynamicTypedBuffer ShadowsBuffer; GPUBufferView* ShadowsBufferView = nullptr; - ShadowsAtlasRectTile* AtlasTiles = nullptr; // TODO: optimize with a single allocation for atlas tiles - ShadowsAtlasRectTile* StaticAtlasTiles = nullptr; // TODO: optimize with a single allocation for atlas tiles + RectPackAtlas Atlas; + RectPackAtlas StaticAtlas; Dictionary Lights; ShadowsCustomBuffer() @@ -335,7 +320,7 @@ public: for (int32 i = 0; i < atlasLight.TilesCount; i++) atlasLight.Tiles[i].ClearDynamic(); } - SAFE_DELETE(AtlasTiles); + Atlas.Clear(); AtlasPixelsUsed = 0; } @@ -349,7 +334,7 @@ public: for (int32 i = 0; i < atlasLight.TilesCount; i++) atlasLight.Tiles[i].ClearDynamic(); } - SAFE_DELETE(StaticAtlasTiles); + StaticAtlas.Clear(); StaticAtlasPixelsUsed = 0; } @@ -357,17 +342,17 @@ public: { Lights.Clear(); StaticAtlasPixelsUsed = 0; - SAFE_DELETE(StaticAtlasTiles); + StaticAtlas.Clear(); ClearDynamic(); ViewOrigin = Vector3::Zero; } void InitStaticAtlas() { - if (StaticAtlasTiles) - return; const int32 atlasResolution = Resolution * 2; - StaticAtlasTiles = New(0, 0, atlasResolution, atlasResolution); + if (StaticAtlas.Width == atlasResolution) + return; + StaticAtlas.Init(atlasResolution, atlasResolution); if (!StaticShadowMapAtlas) StaticShadowMapAtlas = GPUDevice::Instance->CreateTexture(TEXT("Static Shadow Map Atlas")); auto desc = ShadowMapAtlas->GetDescription(); @@ -449,6 +434,24 @@ void ShadowsAtlasRectTile::OnFree(ShadowsCustomBuffer* buffer) buffer->AtlasPixelsUsed -= pixels; } +void ShadowAtlasLightTile::FreeDynamic(ShadowsCustomBuffer* buffer) +{ + if (RectTile) + { + buffer->Atlas.Free(RectTile, buffer); + RectTile = nullptr; + } +} + +void ShadowAtlasLightTile::FreeStatic(ShadowsCustomBuffer* buffer) +{ + if (StaticRectTile) + { + buffer->StaticAtlas.Free(StaticRectTile, buffer); + StaticRectTile = nullptr; + } +} + String ShadowsPass::ToString() const { return TEXT("ShadowsPass"); @@ -679,7 +682,7 @@ bool ShadowsPass::SetupLight(ShadowsCustomBuffer& shadows, RenderContext& render auto& tile = atlasLight.Tiles[tileIndex]; if (tile.StaticRectTile == nullptr) { - tile.StaticRectTile = shadows.StaticAtlasTiles->Insert(atlasLight.StaticResolution, atlasLight.StaticResolution, 0, &shadows, true); + tile.StaticRectTile = shadows.StaticAtlas.Insert(atlasLight.StaticResolution, atlasLight.StaticResolution, 0, &shadows, true); if (!tile.StaticRectTile) { // Failed to insert tile to switch back to the default rendering @@ -1100,7 +1103,7 @@ void ShadowsPass::SetupShadows(RenderContext& renderContext, RenderContextBatch& shadows.Reset(); shadows.ViewOrigin = renderContext.View.Origin; } - if (shadows.StaticAtlasTiles && (float)shadows.StaticAtlasPixelsUsed / (shadows.StaticAtlasTiles->Width * shadows.StaticAtlasTiles->Height) < SHADOWS_MAX_STATIC_ATLAS_CAPACITY_TO_DEFRAG) + if (shadows.StaticAtlas.Width != 0 && (float)shadows.StaticAtlasPixelsUsed / (shadows.StaticAtlas.Width * shadows.StaticAtlas.Height) < SHADOWS_MAX_STATIC_ATLAS_CAPACITY_TO_DEFRAG) { // Defragment static shadow atlas if it failed to insert any light but it's still should have space bool anyStaticFailed = false; @@ -1117,8 +1120,8 @@ void ShadowsPass::SetupShadows(RenderContext& renderContext, RenderContextBatch& shadows.ClearStatic(); } } - if (!shadows.AtlasTiles) - shadows.AtlasTiles = New(0, 0, atlasResolution, atlasResolution); + if (!shadows.Atlas.IsInitialized()) + shadows.Atlas.Init(atlasResolution, atlasResolution); // Update/add lights const int32 baseLightResolution = SHADOWS_BASE_LIGHT_RESOLUTION(atlasResolution); @@ -1222,7 +1225,7 @@ RETRY_ATLAS_SETUP: bool failedToInsert = false; for (int32 tileIndex = 0; tileIndex < atlasLight.TilesNeeded; tileIndex++) { - auto rectTile = shadows.AtlasTiles->Insert(atlasLight.Resolution, atlasLight.Resolution, 0, &shadows, false); + auto rectTile = shadows.Atlas.Insert(atlasLight.Resolution, atlasLight.Resolution, 0, &shadows, false); if (!rectTile) { // Free any previous tiles that were added @@ -1249,7 +1252,6 @@ RETRY_ATLAS_SETUP: // Rebuild atlas shadows.ClearDynamic(); - shadows.AtlasTiles = New(0, 0, atlasResolution, atlasResolution); goto RETRY_ATLAS_SETUP; } } @@ -1276,7 +1278,7 @@ RETRY_ATLAS_SETUP: SetupLight(shadows, renderContext, renderContextBatch, *(RenderDirectionalLightData*)light, atlasLight); } } - if (shadows.StaticAtlasTiles) + if (shadows.StaticAtlas.IsInitialized()) { // Register for active scenes changes to invalidate static shadows for (SceneRendering* scene : renderContext.List->Scenes) diff --git a/Source/Engine/ShadowsOfMordor/AtlasChartsPacker.h b/Source/Engine/ShadowsOfMordor/AtlasChartsPacker.h index bf940630d..e28a53f0a 100644 --- a/Source/Engine/ShadowsOfMordor/AtlasChartsPacker.h +++ b/Source/Engine/ShadowsOfMordor/AtlasChartsPacker.h @@ -13,12 +13,12 @@ namespace ShadowsOfMordor { public: - struct Node : RectPack + struct Node : RectPackNode { Builder::LightmapUVsChart* Chart = nullptr; - Node(uint32 x, uint32 y, uint32 width, uint32 height) - : RectPack(x, y, width, height) + Node(int32 x, int32 y, int32 width, int32 height) + : RectPackNode(x, y, width, height) { } @@ -32,7 +32,7 @@ namespace ShadowsOfMordor private: - Node _root; + RectPackAtlas _root; const LightmapSettings* _settings; public: @@ -42,9 +42,9 @@ namespace ShadowsOfMordor /// /// The settings. AtlasChartsPacker(const LightmapSettings* settings) - : _root(settings->ChartsPadding, settings->ChartsPadding, (int32)settings->AtlasSize - settings->ChartsPadding, (int32)settings->AtlasSize - settings->ChartsPadding) - , _settings(settings) + : _settings(settings) { + _root.Init((int32)settings->AtlasSize, (int32)settings->AtlasSize, settings->ChartsPadding); } /// diff --git a/Source/Engine/Utilities/RectPack.h b/Source/Engine/Utilities/RectPack.h index 974295c0a..a3ed9d3fc 100644 --- a/Source/Engine/Utilities/RectPack.h +++ b/Source/Engine/Utilities/RectPack.h @@ -3,15 +3,217 @@ #pragma once #include "Engine/Core/Templates.h" +#include "Engine/Core/Collections/Array.h" +#include "Engine/Core/Collections/ChunkedArray.h" +#include "Engine/Core/Collections/Sorting.h" #include "Engine/Core/Types/BaseTypes.h" #include "Engine/Core/Memory/Memory.h" #include "Engine/Core/Math/Math.h" /// -/// Implementation of the rectangles packing into 2D atlas with padding. Uses simple space division. +/// Implementation of the rectangles packing node into 2D atlas with padding. Uses simple space division via Binary Tree. /// +template +struct RectPackNode +{ + typedef SizeType Size; + + // Position of the node in the atlas. + Size X; + Size Y; + + // Size of the node. + Size Width; + Size Height; + + RectPackNode(Size x, Size y, Size width, Size height) + : X(x) + , Y(y) + , Width(width) + , Height(height) + { + } + + bool operator<(const RectPackNode& other) const + { + // Sort largest to smallest + return Width * Height > other.Width * other.Height; + } +}; + +/// +/// Implementation of the rectangles packing atlas into 2D atlas with padding. Uses simple space division via Binary Tree but stored in a linear memory allocation. +/// +/// Implementation based on https://github.com/TeamHypersomnia/rectpack2D?tab=readme-ov-file#algorithm and https://blackpawn.com/texts/lightmaps/default.html. +template +struct RectPackAtlas +{ + typedef typename NodeType::Size Size; + + // Atlas width (in pixels). + Size Width = 0; + // Atlas height (in pixels). + Size Height = 0; + // Atlas borders padding (in pixels). + Size BordersPadding = 0; + // List with all allocated nodes (chunked array to ensure node pointers are always valid). + ChunkedArray Nodes; + +private: + Array FreeNodes; + bool FreeNodesDirty = false; + + struct SizeRect + { + Size X, Y, W, H; + + FORCE_INLINE SizeRect() + { + } + + FORCE_INLINE SizeRect(Size x, Size y, Size w, Size h) + : X(x) + , Y(y) + , W(w) + , H(h) + { + } + }; + +public: + FORCE_INLINE bool IsInitialized() + { + return Width != 0; + } + + /// + /// Initializes the atlas of a given size. Clears any previously added nodes. This won't invoke OnFree for atlas tiles. + /// + /// The atlas width (in pixels). + /// The atlas height (in pixels). + /// The atlas borders padding (in pixels). + void Init(Size atlasWidth, Size atlasHeight, Size bordersPadding = 0) + { + Width = atlasWidth; + Height = atlasHeight; + BordersPadding = bordersPadding; + Nodes.Clear(); + FreeNodes.Clear(); + Nodes.Add(NodeType(bordersPadding, bordersPadding, atlasWidth - bordersPadding * 2, atlasHeight - bordersPadding * 2)); + FreeNodes.Add(&Nodes[0]); + FreeNodesDirty = false; + } + + /// + /// Clears the atlas. This won't invoke OnFree for atlas tiles. + /// + void Clear() + { + if (Width == 0) + return; + Init(Width, Height, BordersPadding); + } + + /// + /// Tries to insert a node into the atlas using rectangle pack algorithm. + /// + /// The node width (in pixels). + /// The node height (in pixels). + /// The node padding margin (in pixels) around its contents. + /// The additional arguments. + /// The node that contains inserted an item or null if failed to find a free space. + template + NodeType* Insert(Size width, Size height, Size padding, Args&&... args) + { + // Ensure that free nodes list can be iterated from smallest to biggest nodes for efficient packing + if (FreeNodesDirty) + { + FreeNodesDirty = false; + Sorting::QuickSortObj(FreeNodes.Get(), FreeNodes.Count()); + } + + NodeType* result = nullptr; + const Size paddedWidth = width + padding; + const Size paddedHeight = height + padding; + + // Search free nodes from back to front and find the one that fits requested item size + for (int32 i = FreeNodes.Count() - 1; i >= 0; i--) + { + NodeType& freeNode = *FreeNodes.Get()[i]; + if (paddedWidth > freeNode.Width || paddedHeight > freeNode.Height) + { + // Not enough space + continue; + } + + // Check if there will be some remaining space left in this node + if (freeNode.Width != paddedWidth || freeNode.Height != paddedHeight) + { + // Subdivide this node into up to 2 additional nodes + const Size remainingWidth = freeNode.Width - paddedWidth; + const Size remainingHeight = freeNode.Height - paddedHeight; + + // Split the remaining area around this node into two children + SizeRect bigger, smaller; + if (remainingHeight <= remainingWidth) + { + // Split vertically + smaller = SizeRect(freeNode.X, freeNode.Y + paddedHeight, width, remainingHeight); + bigger = SizeRect(freeNode.X + paddedWidth, freeNode.Y, remainingWidth, freeNode.Height); + } + else + { + // Split horizontally + smaller = SizeRect(freeNode.X + paddedWidth, freeNode.Y, remainingWidth, height); + bigger = SizeRect(freeNode.X, freeNode.Y + paddedHeight, freeNode.Width, remainingHeight); + } + if (smaller.W * smaller.H > bigger.W * bigger.H) + Swap(bigger, smaller); + if (bigger.W * bigger.H > padding) + FreeNodes.Add(Nodes.Add(NodeType(bigger.X, bigger.Y, bigger.W, bigger.H))); + if (smaller.W * smaller.H > padding) + FreeNodes.Add(Nodes.Add(NodeType(smaller.X, smaller.Y, smaller.W, smaller.H))); + FreeNodesDirty = true; + + // Shrink to the actual area + freeNode.Width = width; + freeNode.Height = height; + } + + // Insert into this node + result = &freeNode; + if (FreeNodesDirty) + FreeNodes.RemoveAt(i); + else + FreeNodes.RemoveAtKeepOrder(i); + result->OnInsert(Forward(args)...); + break; + } + + return result; + } + + /// + /// Frees the node. + /// + /// The node to remove from atlas. + template + void Free(NodeType* node, Args&&... args) + { + ASSERT_LOW_LAYER(node); + node->OnFree(Forward(args)...); + FreeNodes.Add(node); + FreeNodesDirty = true; + } +}; + +/// +/// Implementation of the rectangles packing node into 2D atlas with padding. Uses simple space division via Binary Tree. +/// [Deprecated on 19.06.2024 expires on 19.06.2025] Use RectPackNode and RectPackAtlas instead. +/// +/// Implementation based on https://blackpawn.com/texts/lightmaps/default.html. template -struct RectPack +struct DEPRECATED RectPack { // Left and Right slots allow to easily move around the atlas like in a tree structure. NodeType* Left; @@ -66,7 +268,7 @@ struct RectPack /// The additional arguments. /// The node that contains inserted an item or null if failed to find a free space. template - NodeType* Insert(SizeType itemWidth, SizeType itemHeight, SizeType itemPadding, Args&&...args) + NodeType* Insert(SizeType itemWidth, SizeType itemHeight, SizeType itemPadding, Args&&... args) { NodeType* result; const SizeType paddedWidth = itemWidth + itemPadding; @@ -140,7 +342,7 @@ struct RectPack /// /// The node that contains inserted an item or null if failed to find a free space. template - void Free(Args&&...args) + void Free(Args&&... args) { if (!IsUsed) return; From 6d9f504639b5f10f4c7fc2d0b87a0e2959810314 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 20 Jun 2024 22:33:57 +0200 Subject: [PATCH 174/292] Optimize new rectangle packing to use binary search for faster inserting --- Source/Engine/Utilities/RectPack.h | 59 +++++++++++++++++++----------- 1 file changed, 37 insertions(+), 22 deletions(-) diff --git a/Source/Engine/Utilities/RectPack.h b/Source/Engine/Utilities/RectPack.h index a3ed9d3fc..6e51ac35a 100644 --- a/Source/Engine/Utilities/RectPack.h +++ b/Source/Engine/Utilities/RectPack.h @@ -5,10 +5,8 @@ #include "Engine/Core/Templates.h" #include "Engine/Core/Collections/Array.h" #include "Engine/Core/Collections/ChunkedArray.h" -#include "Engine/Core/Collections/Sorting.h" #include "Engine/Core/Types/BaseTypes.h" #include "Engine/Core/Memory/Memory.h" -#include "Engine/Core/Math/Math.h" /// /// Implementation of the rectangles packing node into 2D atlas with padding. Uses simple space division via Binary Tree. @@ -61,7 +59,6 @@ struct RectPackAtlas private: Array FreeNodes; - bool FreeNodesDirty = false; struct SizeRect { @@ -80,6 +77,36 @@ private: } }; + void AddFreeNode(NodeType* node) + { + // Use binary search to find the insert location (assumes that FreeNodes are always sorted) + int32 left = 0, right = FreeNodes.Count() - 1; + const uint32 nodeSize = (uint32)node->Width * (uint32)node->Height; + while (left <= right) + { + int32 mid = left + (right - left) / 2; + const NodeType* midNode = FreeNodes.Get()[mid]; + const uint32 midSize = (uint32)midNode->Width * (uint32)midNode->Height; + if (nodeSize == midSize) + { + // Insert right after node of the same size + left = mid; + break; + } + if (nodeSize > midSize) + { + // Go to the left half (contains nodes with higher sizes) + right = mid - 1; + } + else + { + // Go to the right half (contains nodes with lower sizes) + left = mid + 1; + } + } + FreeNodes.Insert(left, node); + } + public: FORCE_INLINE bool IsInitialized() { @@ -101,7 +128,6 @@ public: FreeNodes.Clear(); Nodes.Add(NodeType(bordersPadding, bordersPadding, atlasWidth - bordersPadding * 2, atlasHeight - bordersPadding * 2)); FreeNodes.Add(&Nodes[0]); - FreeNodesDirty = false; } /// @@ -125,18 +151,12 @@ public: template NodeType* Insert(Size width, Size height, Size padding, Args&&... args) { - // Ensure that free nodes list can be iterated from smallest to biggest nodes for efficient packing - if (FreeNodesDirty) - { - FreeNodesDirty = false; - Sorting::QuickSortObj(FreeNodes.Get(), FreeNodes.Count()); - } - NodeType* result = nullptr; const Size paddedWidth = width + padding; const Size paddedHeight = height + padding; // Search free nodes from back to front and find the one that fits requested item size + // TODO: FreeNodes are sorted so use Binary Search to quickly find the first tile that might have enough space for insert for (int32 i = FreeNodes.Count() - 1; i >= 0; i--) { NodeType& freeNode = *FreeNodes.Get()[i]; @@ -170,10 +190,9 @@ public: if (smaller.W * smaller.H > bigger.W * bigger.H) Swap(bigger, smaller); if (bigger.W * bigger.H > padding) - FreeNodes.Add(Nodes.Add(NodeType(bigger.X, bigger.Y, bigger.W, bigger.H))); + AddFreeNode(Nodes.Add(NodeType(bigger.X, bigger.Y, bigger.W, bigger.H))); if (smaller.W * smaller.H > padding) - FreeNodes.Add(Nodes.Add(NodeType(smaller.X, smaller.Y, smaller.W, smaller.H))); - FreeNodesDirty = true; + AddFreeNode(Nodes.Add(NodeType(smaller.X, smaller.Y, smaller.W, smaller.H))); // Shrink to the actual area freeNode.Width = width; @@ -182,10 +201,7 @@ public: // Insert into this node result = &freeNode; - if (FreeNodesDirty) - FreeNodes.RemoveAt(i); - else - FreeNodes.RemoveAtKeepOrder(i); + FreeNodes.RemoveAtKeepOrder(i); result->OnInsert(Forward(args)...); break; } @@ -202,8 +218,7 @@ public: { ASSERT_LOW_LAYER(node); node->OnFree(Forward(args)...); - FreeNodes.Add(node); - FreeNodesDirty = true; + AddFreeNode(node); } }; @@ -309,8 +324,8 @@ struct DEPRECATED RectPack } // The width and height of the new child node - const SizeType remainingWidth = Math::Max(0, Width - paddedWidth); - const SizeType remainingHeight = Math::Max(0, Height - paddedHeight); + const SizeType remainingWidth = Width - paddedWidth; + const SizeType remainingHeight = Height - paddedHeight; // Split the remaining area around this slot into two children if (remainingHeight <= remainingWidth) From c119750896db6eef56d3bfdd90a5ae84b2afd8e2 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 21 Jun 2024 16:24:47 +0200 Subject: [PATCH 175/292] Fix RectAtlas regression --- Source/Engine/CSG/CSGData.cpp | 6 +-- .../Engine/ContentImporters/ImportModel.cpp | 8 ++-- Source/Engine/Render2D/FontTextureAtlas.cpp | 34 +++++++------- Source/Engine/Render2D/FontTextureAtlas.h | 15 +++---- .../Renderer/GI/GlobalSurfaceAtlasPass.cpp | 6 +-- Source/Engine/Renderer/ShadowsPass.cpp | 8 ++-- .../ShadowsOfMordor/AtlasChartsPacker.h | 6 +-- Source/Engine/Utilities/RectPack.h | 45 ++++++++----------- 8 files changed, 60 insertions(+), 68 deletions(-) diff --git a/Source/Engine/CSG/CSGData.cpp b/Source/Engine/CSG/CSGData.cpp index 219ea4b73..a5da3fce1 100644 --- a/Source/Engine/CSG/CSGData.cpp +++ b/Source/Engine/CSG/CSGData.cpp @@ -17,8 +17,8 @@ namespace CSG struct Node : RectPackNode { - Node(float x, float y, float width, float height) - : RectPackNode(x, y, width, height) + Node(Size x, Size y, Size width, Size height) + : RectPackNode(x, y, width, height) { } @@ -50,7 +50,7 @@ namespace CSG Node* Insert(ChartType chart) { - return _root.Insert(chart->Size.X, chart->Size.Y, _chartsPadding, chart, _atlasSize); + return _root.Insert(chart->Size.X, chart->Size.Y, chart, _atlasSize); } }; } diff --git a/Source/Engine/ContentImporters/ImportModel.cpp b/Source/Engine/ContentImporters/ImportModel.cpp index 568abfbeb..d057bae0c 100644 --- a/Source/Engine/ContentImporters/ImportModel.cpp +++ b/Source/Engine/ContentImporters/ImportModel.cpp @@ -72,8 +72,8 @@ void RepackMeshLightmapUVs(ModelData& data) // Build list of meshes with their area struct LightmapUVsPack : RectPackNode { - LightmapUVsPack(float x, float y, float width, float height) - : RectPackNode(x, y, width, height) + LightmapUVsPack(Size x, Size y, Size width, Size height) + : RectPackNode(x, y, width, height) { } @@ -110,10 +110,10 @@ void RepackMeshLightmapUVs(ModelData& data) bool failed = false; const float chartsPadding = (4.0f / 256.0f) * atlasSize; RectPackAtlas atlas; - atlas.Init(chartsPadding, chartsPadding); + atlas.Init(atlasSize, atlasSize, chartsPadding); for (auto& entry : entries) { - entry.Slot = atlas.Insert(entry.Size, entry.Size, chartsPadding); + entry.Slot = atlas.Insert(entry.Size, entry.Size); if (entry.Slot == nullptr) { // Failed to insert surface, increase atlas size and try again diff --git a/Source/Engine/Render2D/FontTextureAtlas.cpp b/Source/Engine/Render2D/FontTextureAtlas.cpp index b1c8e6c3f..01112305c 100644 --- a/Source/Engine/Render2D/FontTextureAtlas.cpp +++ b/Source/Engine/Render2D/FontTextureAtlas.cpp @@ -36,7 +36,8 @@ void FontTextureAtlas::Init(uint32 width, uint32 height) // Setup _width = width; _height = height; - _atlas.Init(_width, _height, GetPaddingAmount()); + const uint32 padding = GetPaddingAmount() * 2; // Double the padding so each slot has own border around it + _atlas.Init(_width, _height, padding); _isDirty = false; // Reserve upload data memory @@ -44,9 +45,9 @@ void FontTextureAtlas::Init(uint32 width, uint32 height) Platform::MemoryClear(_data.Get(), _data.Capacity()); } -FontTextureAtlasSlot* FontTextureAtlas::AddEntry(uint32 targetWidth, uint32 targetHeight, const Array& data) +FontTextureAtlasSlot* FontTextureAtlas::AddEntry(uint32 width, uint32 height, const Array& data) { - if (targetWidth == 0 || targetHeight == 0) + if (width == 0 || height == 0) return nullptr; // Try to find slot for the texture @@ -54,7 +55,7 @@ FontTextureAtlasSlot* FontTextureAtlas::AddEntry(uint32 targetWidth, uint32 targ for (int32 i = 0; i < _freeSlots.Count(); i++) { FontTextureAtlasSlot* e = _freeSlots[i]; - if (e->Width == targetWidth && e->Height == targetHeight) + if (e->Width == width && e->Height == height) { slot = e; _freeSlots.RemoveAt(i); @@ -63,7 +64,7 @@ FontTextureAtlasSlot* FontTextureAtlas::AddEntry(uint32 targetWidth, uint32 targ } if (!slot) { - slot = _atlas.Insert(targetWidth, targetHeight, GetPaddingAmount()); + slot = _atlas.Insert(width, height); } if (slot) @@ -100,11 +101,10 @@ bool FontTextureAtlas::Invalidate(uint32 x, uint32 y, uint32 width, uint32 heigh void FontTextureAtlas::CopyDataIntoSlot(const FontTextureAtlasSlot* slot, const Array& data) { RowData rowData; - rowData.DstData = &_data[slot->Y * _width * _bytesPerPixel + slot->X * _bytesPerPixel]; + rowData.DstData = _data.Get() + (slot->Y * _width + slot->X) * _bytesPerPixel; rowData.SrcData = data.Get(); - rowData.DstTextureWidth = _width; - rowData.SrcTextureWidth = slot->Width; - rowData.RowWidth = slot->Width; + rowData.DstWidth = _width; + rowData.SrcWidth = slot->Width; rowData.Padding = GetPaddingAmount(); // Start with padding @@ -148,20 +148,20 @@ byte* FontTextureAtlas::GetSlotData(const FontTextureAtlasSlot* slot, uint32& wi void FontTextureAtlas::copyRow(const RowData& copyRowData) const { - const byte* srcData = (const byte*)((intptr)copyRowData.SrcData + (intptr)copyRowData.SrcRow * copyRowData.SrcTextureWidth * _bytesPerPixel); - byte* dstData = (byte*)((intptr)copyRowData.DstData + (intptr)copyRowData.DstRow * copyRowData.DstTextureWidth * _bytesPerPixel); - Platform::MemoryCopy(dstData, srcData, copyRowData.SrcTextureWidth * _bytesPerPixel); + const byte* srcData = (const byte*)((intptr)copyRowData.SrcData + (intptr)copyRowData.SrcRow * copyRowData.SrcWidth * _bytesPerPixel); + byte* dstData = (byte*)((intptr)copyRowData.DstData + (intptr)copyRowData.DstRow * copyRowData.DstWidth * _bytesPerPixel); + Platform::MemoryCopy(dstData, srcData, copyRowData.SrcWidth * _bytesPerPixel); if (copyRowData.Padding > 0) { const uint32 padSize = copyRowData.Padding * _bytesPerPixel; - byte* dstPaddingPixelLeft = (byte*)((intptr)copyRowData.DstData + (intptr)copyRowData.DstRow * copyRowData.DstTextureWidth * _bytesPerPixel - padSize); - byte* dstPaddingPixelRight = dstPaddingPixelLeft + copyRowData.RowWidth * _bytesPerPixel + padSize; + byte* dstPaddingPixelLeft = (byte*)((intptr)copyRowData.DstData + (intptr)copyRowData.DstRow * copyRowData.DstWidth * _bytesPerPixel - padSize); + byte* dstPaddingPixelRight = dstPaddingPixelLeft + copyRowData.SrcWidth * _bytesPerPixel + padSize; if (_paddingStyle == DilateBorder) { // Dilate left and right sides of the padded row const byte* firstPixel = srcData; - const byte* lastPixel = srcData + (copyRowData.SrcTextureWidth - 1) * _bytesPerPixel; + const byte* lastPixel = srcData + (copyRowData.SrcWidth - 1) * _bytesPerPixel; Platform::MemoryCopy(dstPaddingPixelLeft, firstPixel, padSize); Platform::MemoryCopy(dstPaddingPixelRight, lastPixel, padSize); } @@ -176,8 +176,8 @@ void FontTextureAtlas::copyRow(const RowData& copyRowData) const void FontTextureAtlas::zeroRow(const RowData& copyRowData) const { - byte* dstData = (byte*)((intptr)copyRowData.DstData + (intptr)copyRowData.DstRow * copyRowData.DstTextureWidth * _bytesPerPixel); - uint32 dstSize = copyRowData.RowWidth * _bytesPerPixel; + byte* dstData = (byte*)((intptr)copyRowData.DstData + (intptr)copyRowData.DstRow * copyRowData.DstWidth * _bytesPerPixel); + uint32 dstSize = copyRowData.SrcWidth * _bytesPerPixel; if (copyRowData.Padding > 0) { // Extend clear by left and right borders of the padded row diff --git a/Source/Engine/Render2D/FontTextureAtlas.h b/Source/Engine/Render2D/FontTextureAtlas.h index b63c7b705..adb46652a 100644 --- a/Source/Engine/Render2D/FontTextureAtlas.h +++ b/Source/Engine/Render2D/FontTextureAtlas.h @@ -13,8 +13,8 @@ /// struct FontTextureAtlasSlot : RectPackNode<> { - FontTextureAtlasSlot(uint32 x, uint32 y, uint32 width, uint32 height) - : RectPackNode<>(x, y, width, height) + FontTextureAtlasSlot(Size x, Size y, Size width, Size height) + : RectPackNode(x, y, width, height) { } @@ -37,9 +37,8 @@ private: uint8* DstData; int32 SrcRow; int32 DstRow; - int32 RowWidth; - int32 SrcTextureWidth; - int32 DstTextureWidth; + int32 SrcWidth; + int32 DstWidth; uint32 Padding; }; @@ -154,11 +153,11 @@ public: /// /// Adds the new entry to the atlas /// - /// Width of the entry. - /// Height of the entry. + /// Width of the entry. + /// Height of the entry. /// The data. /// The atlas slot occupied by the new entry. - FontTextureAtlasSlot* AddEntry(uint32 targetWidth, uint32 targetHeight, const Array& data); + FontTextureAtlasSlot* AddEntry(uint32 width, uint32 height, const Array& data); /// /// Invalidates the cached dynamic entry from the atlas. diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp index 20288cfe0..6dbe014f9 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp @@ -73,8 +73,8 @@ struct GlobalSurfaceAtlasTile : RectPackNode uint32 Address; uint32 ObjectAddressOffset; - GlobalSurfaceAtlasTile(uint16 x, uint16 y, uint16 width, uint16 height) - : RectPackNode(x, y, width, height) + GlobalSurfaceAtlasTile(Size x, Size y, Size width, Size height) + : RectPackNode(x, y, width, height) { } @@ -1236,7 +1236,7 @@ void GlobalSurfaceAtlasPass::RasterizeActor(Actor* actor, void* actorObject, con uint16 tilePixels = tileResolution * tileResolution; GlobalSurfaceAtlasTile* tile = nullptr; if (tilePixels <= surfaceAtlasData.AtlasPixelsTotal - surfaceAtlasData.AtlasPixelsUsed) - tile = surfaceAtlasData.Atlas.Insert(tileResolution, tileResolution, 0, &surfaceAtlasData, actorObject, tileIndex); + tile = surfaceAtlasData.Atlas.Insert(tileResolution, tileResolution, &surfaceAtlasData, actorObject, tileIndex); if (tile) { if (!object) diff --git a/Source/Engine/Renderer/ShadowsPass.cpp b/Source/Engine/Renderer/ShadowsPass.cpp index dbd9c7fdd..15ac7a097 100644 --- a/Source/Engine/Renderer/ShadowsPass.cpp +++ b/Source/Engine/Renderer/ShadowsPass.cpp @@ -43,8 +43,8 @@ struct ShadowsAtlasRectTile : RectPackNode { bool IsStatic; - ShadowsAtlasRectTile(uint16 x, uint16 y, uint16 width, uint16 height) - : RectPackNode(x, y, width, height) + ShadowsAtlasRectTile(Size x, Size y, Size width, Size height) + : RectPackNode(x, y, width, height) { } @@ -682,7 +682,7 @@ bool ShadowsPass::SetupLight(ShadowsCustomBuffer& shadows, RenderContext& render auto& tile = atlasLight.Tiles[tileIndex]; if (tile.StaticRectTile == nullptr) { - tile.StaticRectTile = shadows.StaticAtlas.Insert(atlasLight.StaticResolution, atlasLight.StaticResolution, 0, &shadows, true); + tile.StaticRectTile = shadows.StaticAtlas.Insert(atlasLight.StaticResolution, atlasLight.StaticResolution, &shadows, true); if (!tile.StaticRectTile) { // Failed to insert tile to switch back to the default rendering @@ -1225,7 +1225,7 @@ RETRY_ATLAS_SETUP: bool failedToInsert = false; for (int32 tileIndex = 0; tileIndex < atlasLight.TilesNeeded; tileIndex++) { - auto rectTile = shadows.Atlas.Insert(atlasLight.Resolution, atlasLight.Resolution, 0, &shadows, false); + auto rectTile = shadows.Atlas.Insert(atlasLight.Resolution, atlasLight.Resolution, &shadows, false); if (!rectTile) { // Free any previous tiles that were added diff --git a/Source/Engine/ShadowsOfMordor/AtlasChartsPacker.h b/Source/Engine/ShadowsOfMordor/AtlasChartsPacker.h index e28a53f0a..f94e7b5c1 100644 --- a/Source/Engine/ShadowsOfMordor/AtlasChartsPacker.h +++ b/Source/Engine/ShadowsOfMordor/AtlasChartsPacker.h @@ -17,8 +17,8 @@ namespace ShadowsOfMordor { Builder::LightmapUVsChart* Chart = nullptr; - Node(int32 x, int32 y, int32 width, int32 height) - : RectPackNode(x, y, width, height) + Node(Size x, Size y, Size width, Size height) + : RectPackNode(x, y, width, height) { } @@ -63,7 +63,7 @@ namespace ShadowsOfMordor /// Node* Insert(Builder::LightmapUVsChart* chart) { - return _root.Insert(chart->Width, chart->Height, _settings->ChartsPadding, chart, _settings); + return _root.Insert(chart->Width, chart->Height, chart, _settings); } }; }; diff --git a/Source/Engine/Utilities/RectPack.h b/Source/Engine/Utilities/RectPack.h index 6e51ac35a..a187e4c3b 100644 --- a/Source/Engine/Utilities/RectPack.h +++ b/Source/Engine/Utilities/RectPack.h @@ -31,12 +31,6 @@ struct RectPackNode , Height(height) { } - - bool operator<(const RectPackNode& other) const - { - // Sort largest to smallest - return Width * Height > other.Width * other.Height; - } }; /// @@ -118,7 +112,7 @@ public: /// /// The atlas width (in pixels). /// The atlas height (in pixels). - /// The atlas borders padding (in pixels). + /// The nodes padding (in pixels). Distance from node contents to atlas borders or other nodes. void Init(Size atlasWidth, Size atlasHeight, Size bordersPadding = 0) { Width = atlasWidth; @@ -126,7 +120,7 @@ public: BordersPadding = bordersPadding; Nodes.Clear(); FreeNodes.Clear(); - Nodes.Add(NodeType(bordersPadding, bordersPadding, atlasWidth - bordersPadding * 2, atlasHeight - bordersPadding * 2)); + Nodes.Add(NodeType(bordersPadding, bordersPadding, atlasWidth - bordersPadding, atlasHeight - bordersPadding)); FreeNodes.Add(&Nodes[0]); } @@ -145,62 +139,61 @@ public: /// /// The node width (in pixels). /// The node height (in pixels). - /// The node padding margin (in pixels) around its contents. /// The additional arguments. /// The node that contains inserted an item or null if failed to find a free space. template - NodeType* Insert(Size width, Size height, Size padding, Args&&... args) + NodeType* Insert(Size width, Size height, Args&&... args) { NodeType* result = nullptr; - const Size paddedWidth = width + padding; - const Size paddedHeight = height + padding; + const Size paddedWidth = width + BordersPadding; + const Size paddedHeight = height + BordersPadding; // Search free nodes from back to front and find the one that fits requested item size // TODO: FreeNodes are sorted so use Binary Search to quickly find the first tile that might have enough space for insert for (int32 i = FreeNodes.Count() - 1; i >= 0; i--) { - NodeType& freeNode = *FreeNodes.Get()[i]; - if (paddedWidth > freeNode.Width || paddedHeight > freeNode.Height) + NodeType* freeNode = FreeNodes.Get()[i]; + if (paddedWidth > freeNode->Width || paddedHeight > freeNode->Height) { // Not enough space continue; } // Check if there will be some remaining space left in this node - if (freeNode.Width != paddedWidth || freeNode.Height != paddedHeight) + if (freeNode->Width != width || freeNode->Height != height) { // Subdivide this node into up to 2 additional nodes - const Size remainingWidth = freeNode.Width - paddedWidth; - const Size remainingHeight = freeNode.Height - paddedHeight; + const Size remainingWidth = freeNode->Width - paddedWidth; + const Size remainingHeight = freeNode->Height - paddedHeight; // Split the remaining area around this node into two children SizeRect bigger, smaller; if (remainingHeight <= remainingWidth) { // Split vertically - smaller = SizeRect(freeNode.X, freeNode.Y + paddedHeight, width, remainingHeight); - bigger = SizeRect(freeNode.X + paddedWidth, freeNode.Y, remainingWidth, freeNode.Height); + smaller = SizeRect(freeNode->X, freeNode->Y + paddedHeight, width, remainingHeight); + bigger = SizeRect(freeNode->X + paddedWidth, freeNode->Y, remainingWidth, freeNode->Height); } else { // Split horizontally - smaller = SizeRect(freeNode.X + paddedWidth, freeNode.Y, remainingWidth, height); - bigger = SizeRect(freeNode.X, freeNode.Y + paddedHeight, freeNode.Width, remainingHeight); + smaller = SizeRect(freeNode->X + paddedWidth, freeNode->Y, remainingWidth, height); + bigger = SizeRect(freeNode->X, freeNode->Y + paddedHeight, freeNode->Width, remainingHeight); } if (smaller.W * smaller.H > bigger.W * bigger.H) Swap(bigger, smaller); - if (bigger.W * bigger.H > padding) + if (bigger.W * bigger.H > BordersPadding) AddFreeNode(Nodes.Add(NodeType(bigger.X, bigger.Y, bigger.W, bigger.H))); - if (smaller.W * smaller.H > padding) + if (smaller.W * smaller.H > BordersPadding) AddFreeNode(Nodes.Add(NodeType(smaller.X, smaller.Y, smaller.W, smaller.H))); // Shrink to the actual area - freeNode.Width = width; - freeNode.Height = height; + freeNode->Width = width; + freeNode->Height = height; } // Insert into this node - result = &freeNode; + result = freeNode; FreeNodes.RemoveAtKeepOrder(i); result->OnInsert(Forward(args)...); break; From 4d0d08f245a630049e3cbdf12432f007810cd35a Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 21 Jun 2024 17:55:02 +0200 Subject: [PATCH 176/292] Optimize Global Surface Atlas drawing with async job system --- .../Renderer/GI/GlobalSurfaceAtlasPass.cpp | 502 ++++++++++++------ .../Renderer/GI/GlobalSurfaceAtlasPass.h | 15 +- Source/Engine/Renderer/RenderSetup.h | 4 +- Source/Engine/Renderer/Renderer.cpp | 9 +- 4 files changed, 363 insertions(+), 167 deletions(-) diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp index 6dbe014f9..f92e0ca52 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp @@ -8,9 +8,10 @@ #include "../ShadowsPass.h" #include "Engine/Core/Math/Matrix3x3.h" #include "Engine/Core/Math/OrientedBoundingBox.h" -#include "Engine/Engine/Engine.h" -#include "Engine/Content/Content.h" #include "Engine/Core/Config/GraphicsSettings.h" +#include "Engine/Engine/Engine.h" +#include "Engine/Engine/Units.h" +#include "Engine/Content/Content.h" #include "Engine/Graphics/GPUContext.h" #include "Engine/Graphics/GPUDevice.h" #include "Engine/Graphics/Graphics.h" @@ -24,6 +25,7 @@ #include "Engine/Renderer/ColorGradingPass.h" #include "Engine/Renderer/EyeAdaptationPass.h" #include "Engine/Renderer/PostProcessingPass.h" +#include "Engine/Threading/JobSystem.h" #include "Engine/Utilities/RectPack.h" // This must match HLSL @@ -64,6 +66,22 @@ PACK_STRUCT(struct AtlasTileVertex uint32 TileAddress; }); +struct GlobalSurfaceAtlasNewObject +{ + void* ActorObject; + Actor* Actor; + OrientedBoundingBox Bounds; + BoundingSphere ActorObjectBounds; + bool UseVisibility; +}; + +struct GlobalSurfaceAtlasNewTile +{ + void* ActorObject; + uint16 TileIndex; + uint16 TileResolution; +}; + struct GlobalSurfaceAtlasTile : RectPackNode { Float3 ViewDirection; @@ -89,7 +107,10 @@ struct GlobalSurfaceAtlasObject uint64 LightingUpdateFrame; // Index of the frame to update lighting for this object (calculated when object gets dirty or overriden by dynamic lights) Actor* Actor; GlobalSurfaceAtlasTile* Tiles[6]; + Float3 Position; float Radius; + mutable bool Dirty; + bool UseVisibility; // TODO: merge into bit flags OrientedBoundingBox Bounds; GlobalSurfaceAtlasObject() @@ -131,6 +152,8 @@ public: SamplesBuffer CulledObjectsUsageHistory; // Cached data to be reused during RasterizeActor + Array DirtyObjectsBuffer; + Vector4 CullingPosDistance; uint64 CurrentFrame; float ResolutionInv; Float3 ViewPosition; @@ -138,6 +161,15 @@ public: float DistanceScalingStart; float DistanceScalingEnd; float DistanceScaling; + float MinObjectRadius; + + // Async objects drawing cache + Array> AsyncDrawWaitLabels; + RenderListBuffer AsyncFreeTiles; + RenderListBuffer AsyncNewObjects; + RenderListBuffer AsyncNewTiles; + Array AsyncScenesDrawCounters[2]; + RenderContext AsyncRenderContext; GlobalSurfaceAtlasCustomBuffer() : ObjectsBuffer(256 * (GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE + GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE * 3 / 4), PixelFormat::R32G32B32A32_Float, false, TEXT("GlobalSurfaceAtlas.ObjectsBuffer")) @@ -146,6 +178,7 @@ public: void ClearObjects() { + WaitForDrawActors(); CulledObjectsCounterIndex = -1; CulledObjectsUsageHistory.Clear(); LastFrameAtlasDefragmentation = Engine::FrameCount; @@ -173,6 +206,164 @@ public: Reset(); } + void GetOptions(const RenderContext& renderContext, int32& resolution, float& distance) + { + auto* graphicsSettings = GraphicsSettings::Get(); + resolution = Math::Clamp(graphicsSettings->GlobalSurfaceAtlasResolution, 256, GPU_MAX_TEXTURE_SIZE); + auto& giSettings = renderContext.List->Settings.GlobalIllumination; + distance = giSettings.Distance; + } + + void DrawActorsJobSync(int32) + { + DrawActorsJob(-1); + } + + void DrawActorsJob(int32 index) + { + PROFILE_CPU(); + + // Cache local data for the worker + auto drawCategory = index >= 0 ? SceneRendering::SceneDrawAsync : SceneRendering::SceneDraw; + const Vector3 cullingPos(CullingPosDistance); + const Real cullingDistance = CullingPosDistance.W; + const uint32 viewMask = AsyncRenderContext.View.RenderLayersMask; + const float minObjectRadius = MinObjectRadius; + auto& scenes = AsyncRenderContext.List->Scenes; + auto& drawCounters = AsyncScenesDrawCounters[index >= 0 ? 1 : 0]; + + // Draw all scenes and all actors (cooperative with other jobs) + for (int32 sceneIndex = 0; sceneIndex < drawCounters.Count(); sceneIndex++) + { + volatile int64* drawCounter = &drawCounters[sceneIndex]; + auto& list = scenes[sceneIndex]->Actors[drawCategory]; + int64 drawIndex; + while ((drawIndex = Platform::InterlockedIncrement(drawCounter)) < list.Count()) + { + auto& e = list.Get()[drawIndex]; + if (e.Bounds.Radius >= minObjectRadius && viewMask & e.LayerMask && Vector3::Distance(e.Bounds.Center, cullingPos) - e.Bounds.Radius < cullingDistance) + { + //PROFILE_CPU_ACTOR(e.Actor); + e.Actor->Draw(AsyncRenderContext); + } + } + } + } + + void StartDrawActors(const RenderContext& renderContext, bool enableAsync = false) + { + if (AsyncDrawWaitLabels.HasItems()) + return; // Already started earlier this frame + int32 resolution; + float distance; + GetOptions(renderContext, resolution, distance); + const float resolutionInv = 1.0f / (float)resolution; + const auto currentFrame = Engine::FrameCount; + if (Resolution == resolution) + { + // Perform atlas defragmentation if needed + constexpr float maxUsageToDefrag = 0.8f; + if (currentFrame - LastFrameAtlasInsertFail < 10 && + currentFrame - LastFrameAtlasDefragmentation > 60 && + (float)AtlasPixelsUsed / AtlasPixelsTotal < maxUsageToDefrag) + { + PROFILE_CPU_NAMED("Defragment Atlas"); + ClearObjects(); + } + } + + // Setup data for rendering + CurrentFrame = currentFrame; + ResolutionInv = resolutionInv; + ViewPosition = renderContext.View.Position; + TileTexelsPerWorldUnit = 1.0f / METERS_TO_UNITS(0.1f); // Scales the tiles resolution + DistanceScalingStart = METERS_TO_UNITS(20.0f); // Distance from camera at which the tiles resolution starts to be scaled down + DistanceScalingEnd = METERS_TO_UNITS(50.0f); // Distance from camera at which the tiles resolution end to be scaled down + DistanceScaling = 0.2f; // The scale for tiles at distanceScalingEnd and further away + // TODO: add DetailsScale param to adjust quality of scene details in Global Surface Atlas + MinObjectRadius = 20.0f; // Skip too small objects + CullingPosDistance = Vector4(renderContext.View.Position, distance); + AsyncRenderContext = renderContext; + AsyncRenderContext.View.Pass = DrawPass::GlobalSurfaceAtlas; + + // Each scene uses own atomic counter to draw all actors + AsyncScenesDrawCounters[0].Resize(renderContext.List->Scenes.Count()); + AsyncScenesDrawCounters[1].Resize(renderContext.List->Scenes.Count()); + AsyncScenesDrawCounters[0].SetAll(-1); + AsyncScenesDrawCounters[1].SetAll(-1); + + if (enableAsync) + { + // Run in async via Job System + Function func; + func.Bind(this); + const int32 jobCount = Math::Max(JobSystem::GetThreadsCount() - 1, 1); // Leave 1 thread unused to not block the main-thread (jobs will overlap with rendering) + AsyncDrawWaitLabels.Add(JobSystem::Dispatch(func, jobCount)); + + // Run sync actors drawing now or force in async (different drawing path doesn't interfere with normal scene drawing) + func.Bind(this); + AsyncDrawWaitLabels.Add(JobSystem::Dispatch(func, jobCount)); + } + else + { + DrawActorsJob(-1); + DrawActorsJob(0); + } + } + + void WaitForDrawActors() + { + for (int64 label : AsyncDrawWaitLabels) + JobSystem::Wait(label); + AsyncDrawWaitLabels.Clear(); + } + + void PostDrawActors() + { + PROFILE_CPU_NAMED("Post Draw"); + + // Flush atlas tiles freeing + for (auto* tile : AsyncFreeTiles) + { + Atlas.Free(tile, this); + } + AsyncFreeTiles.Clear(); + + // Flush new objects adding + for (auto& newObject : AsyncNewObjects) + { + auto& object = Objects[newObject.ActorObject]; + object.Actor = newObject.Actor; + object.LastFrameUsed = CurrentFrame; + object.Position = (Float3)newObject.ActorObjectBounds.Center; + object.Radius = (float)newObject.ActorObjectBounds.Radius; + object.Dirty = true; + object.UseVisibility = newObject.UseVisibility; + object.Bounds = newObject.Bounds; + } + AsyncNewObjects.Clear(); + + // Flush new tiles adding + for (auto& newTile : AsyncNewTiles) + { + auto& object = Objects[newTile.ActorObject]; + int32 tilePixels = newTile.TileResolution * newTile.TileResolution; + GlobalSurfaceAtlasTile* tile = nullptr; + if (tilePixels <= AtlasPixelsTotal - AtlasPixelsUsed) + tile = Atlas.Insert(newTile.TileResolution, newTile.TileResolution, this, newTile.ActorObject, newTile.TileIndex); + if (tile) + { + object.Tiles[newTile.TileIndex] = tile; + object.Dirty = true; + } + else + { + LastFrameAtlasInsertFail = CurrentFrame; + } + } + AsyncNewTiles.Clear(); + } + // [ISceneRenderingListener] void OnSceneRenderingAddActor(Actor* a) override { @@ -346,6 +537,25 @@ void GlobalSurfaceAtlasPass::Dispose() _shader = nullptr; } +void GlobalSurfaceAtlasPass::OnCollectDrawCalls(RenderContextBatch& renderContextBatch) +{ + // Check if Global Surface Atlas will be used this frame + PROFILE_GPU_CPU_NAMED("Global Surface Atlas"); + if (checkIfSkipPass()) + return; + RenderContext& renderContext = renderContextBatch.GetMainContext(); + if (renderContext.List->Scenes.Count() == 0) + return; + if (GBufferPass::IsDebugView(renderContext.View.Mode) || + renderContext.View.Mode == ViewMode::GlobalSDF || + renderContext.View.Mode == ViewMode::QuadOverdraw || + renderContext.View.Mode == ViewMode::MaterialComplexity) + return; + auto& surfaceAtlasData = *renderContext.Buffers->GetCustomBuffer(TEXT("GlobalSurfaceAtlas")); + _surfaceAtlasData = &surfaceAtlasData; + surfaceAtlasData.StartDrawActors(renderContext, renderContextBatch.EnableAsync); +} + bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* context, BindingData& result) { // Skip if not supported @@ -370,12 +580,15 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co surfaceAtlasData.LastFrameUsed = currentFrame; PROFILE_GPU_CPU_NAMED("Global Surface Atlas"); + // Start objects drawing (in case not et started earlier this frame) + _surfaceAtlasData = &surfaceAtlasData; + surfaceAtlasData.StartDrawActors(renderContext); + // Setup options - auto* graphicsSettings = GraphicsSettings::Get(); - const int32 resolution = Math::Clamp(graphicsSettings->GlobalSurfaceAtlasResolution, 256, GPU_MAX_TEXTURE_SIZE); + int32 resolution; + float distance; + surfaceAtlasData.GetOptions(renderContext, resolution, distance); const float resolutionInv = 1.0f / (float)resolution; - auto& giSettings = renderContext.List->Settings.GlobalIllumination; - const float distance = giSettings.Distance; // Initialize buffers bool noCache = surfaceAtlasData.Resolution != resolution; @@ -407,17 +620,6 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co } LOG(Info, "Global Surface Atlas resolution: {0}, memory usage: {1} MB", resolution, memUsage / 1024 / 1024); } - else - { - // Perform atlas defragmentation if needed - constexpr float maxUsageToDefrag = 0.8f; - if (currentFrame - surfaceAtlasData.LastFrameAtlasInsertFail < 10 && - currentFrame - surfaceAtlasData.LastFrameAtlasDefragmentation > 60 && - (float)surfaceAtlasData.AtlasPixelsUsed / surfaceAtlasData.AtlasPixelsTotal < maxUsageToDefrag) - { - surfaceAtlasData.ClearObjects(); - } - } for (SceneRendering* scene : renderContext.List->Scenes) surfaceAtlasData.ListenSceneRendering(scene); if (!_vertexBuffer) @@ -453,45 +655,9 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co context->BindVB(ToSpan(&vb, 1)); \ context->DrawInstanced(_vertexBuffer->Data.Count() / sizeof(AtlasTileVertex), 1); - // Add objects into the atlas - { - PROFILE_CPU_NAMED("Draw"); - surfaceAtlasData.ObjectsBuffer.Clear(); - _dirtyObjectsBuffer.Clear(); - _surfaceAtlasData = &surfaceAtlasData; - renderContext.View.Pass = DrawPass::GlobalSurfaceAtlas; - surfaceAtlasData.CurrentFrame = currentFrame; - surfaceAtlasData.ResolutionInv = resolutionInv; - surfaceAtlasData.ViewPosition = renderContext.View.Position; - surfaceAtlasData.TileTexelsPerWorldUnit = 1.0f / 10.0f; // Scales the tiles resolution - surfaceAtlasData.DistanceScalingStart = 2000.0f; // Distance from camera at which the tiles resolution starts to be scaled down - surfaceAtlasData.DistanceScalingEnd = 5000.0f; // Distance from camera at which the tiles resolution end to be scaled down - surfaceAtlasData.DistanceScaling = 0.2f; // The scale for tiles at distanceScalingEnd and further away - // TODO: add DetailsScale param to adjust quality of scene details in Global Surface Atlas - const uint32 viewMask = renderContext.View.RenderLayersMask; - const Float3 viewPosition = renderContext.View.Position; - const float minObjectRadius = 20.0f; // Skip too small objects - _cullingPosDistance = Vector4(viewPosition, distance); - int32 actorsDrawn = 0; - SceneRendering::DrawCategory drawCategories[] = { SceneRendering::SceneDraw, SceneRendering::SceneDrawAsync }; - for (auto* scene : renderContext.List->Scenes) - { - for (SceneRendering::DrawCategory drawCategory : drawCategories) - { - auto& list = scene->Actors[drawCategory]; - for (auto& e : list) - { - if (e.Bounds.Radius >= minObjectRadius && viewMask & e.LayerMask && CollisionsHelper::DistanceSpherePoint(e.Bounds, viewPosition) < distance) - { - //PROFILE_CPU_ACTOR(e.Actor); - e.Actor->Draw(renderContext); - actorsDrawn++; - } - } - } - } - ZoneValue(actorsDrawn); - } + // Ensure that async objects drawing ended + surfaceAtlasData.WaitForDrawActors(); + surfaceAtlasData.PostDrawActors(); // Remove unused objects { @@ -510,8 +676,91 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co } } + // Write objects to the data buffer + { + PROFILE_CPU_NAMED("Write Objects"); + surfaceAtlasData.DirtyObjectsBuffer.Clear(); + surfaceAtlasData.ObjectsBuffer.Clear(); + for (auto& e : surfaceAtlasData.Objects) + { + auto& object = e.Value; + if (object.Dirty) + { + // Collect dirty objects + object.LastFrameUpdated = surfaceAtlasData.CurrentFrame; + object.LightingUpdateFrame = surfaceAtlasData.CurrentFrame; + surfaceAtlasData.DirtyObjectsBuffer.Add(e.Key); + } + + Matrix3x3 worldToLocalRotation; + Matrix3x3::RotationQuaternion(object.Bounds.Transformation.Orientation.Conjugated(), worldToLocalRotation); + Float3 worldPosition = object.Bounds.Transformation.Translation; + Float3 worldExtents = object.Bounds.Extents * object.Bounds.Transformation.Scale; + + // Write to objects buffer (this must match unpacking logic in HLSL) + uint32 objectAddress = surfaceAtlasData.ObjectsBuffer.Data.Count() / sizeof(Float4); + auto* objectData = surfaceAtlasData.ObjectsBuffer.WriteReserve(GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE); + objectData[0] = Float4(object.Position, object.Radius); + objectData[1] = Float4::Zero; + objectData[2] = Float4(worldToLocalRotation.M11, worldToLocalRotation.M12, worldToLocalRotation.M13, worldPosition.X); + objectData[3] = Float4(worldToLocalRotation.M21, worldToLocalRotation.M22, worldToLocalRotation.M23, worldPosition.Y); + objectData[4] = Float4(worldToLocalRotation.M31, worldToLocalRotation.M32, worldToLocalRotation.M33, worldPosition.Z); + objectData[5] = Float4(worldExtents, object.UseVisibility ? 1.0f : 0.0f); + auto tileOffsets = reinterpret_cast(&objectData[1]); // xyz used for tile offsets packed into uint16 + auto objectDataSize = reinterpret_cast(&objectData[1].W); // w used for object size (count of Float4s for object+tiles) + *objectDataSize = GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE; + for (int32 tileIndex = 0; tileIndex < 6; tileIndex++) + { + auto* tile = object.Tiles[tileIndex]; + if (!tile) + continue; + tile->ObjectAddressOffset = *objectDataSize; + tile->Address = objectAddress + tile->ObjectAddressOffset; + tileOffsets[tileIndex] = tile->ObjectAddressOffset; + *objectDataSize += GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE; + + // Setup view to render object from the side + Float3 xAxis, yAxis, zAxis = Float3::Zero; + zAxis.Raw[tileIndex / 2] = tileIndex & 1 ? 1.0f : -1.0f; + yAxis = tileIndex == 2 || tileIndex == 3 ? Float3::Right : Float3::Up; + Float3::Cross(yAxis, zAxis, xAxis); + Float3 localSpaceOffset = -zAxis * object.Bounds.Extents; + xAxis = object.Bounds.Transformation.LocalToWorldVector(xAxis); + yAxis = object.Bounds.Transformation.LocalToWorldVector(yAxis); + zAxis = object.Bounds.Transformation.LocalToWorldVector(zAxis); + xAxis.NormalizeFast(); + yAxis.NormalizeFast(); + zAxis.NormalizeFast(); + tile->ViewPosition = object.Bounds.Transformation.LocalToWorld(localSpaceOffset); + tile->ViewDirection = zAxis; + + // Create view matrix + tile->ViewMatrix.SetColumn1(Float4(xAxis, -Float3::Dot(xAxis, tile->ViewPosition))); + tile->ViewMatrix.SetColumn2(Float4(yAxis, -Float3::Dot(yAxis, tile->ViewPosition))); + tile->ViewMatrix.SetColumn3(Float4(zAxis, -Float3::Dot(zAxis, tile->ViewPosition))); + tile->ViewMatrix.SetColumn4(Float4(0, 0, 0, 1)); + + // Calculate object bounds size in the view + OrientedBoundingBox viewBounds(object.Bounds); + viewBounds.Transform(tile->ViewMatrix); + Float3 viewExtent = viewBounds.Transformation.LocalToWorldVector(viewBounds.Extents); + tile->ViewBoundsSize = viewExtent.GetAbsolute() * 2.0f; + + // Per-tile data + const float tileWidth = (float)tile->Width - GLOBAL_SURFACE_ATLAS_TILE_PADDING; + const float tileHeight = (float)tile->Height - GLOBAL_SURFACE_ATLAS_TILE_PADDING; + auto* tileData = surfaceAtlasData.ObjectsBuffer.WriteReserve(GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE); + tileData[0] = Float4(tile->X, tile->Y, tileWidth, tileHeight) * surfaceAtlasData.ResolutionInv; + tileData[1] = Float4(tile->ViewMatrix.M11, tile->ViewMatrix.M12, tile->ViewMatrix.M13, tile->ViewMatrix.M41); + tileData[2] = Float4(tile->ViewMatrix.M21, tile->ViewMatrix.M22, tile->ViewMatrix.M23, tile->ViewMatrix.M42); + tileData[3] = Float4(tile->ViewMatrix.M31, tile->ViewMatrix.M32, tile->ViewMatrix.M33, tile->ViewMatrix.M43); + tileData[4] = Float4(tile->ViewBoundsSize, 0.0f); // w unused + } + } + } + // Rasterize world geometry material properties into Global Surface Atlas - if (_dirtyObjectsBuffer.Count() != 0) + if (surfaceAtlasData.DirtyObjectsBuffer.Count() != 0) { PROFILE_GPU_CPU_NAMED("Rasterize Tiles"); @@ -549,8 +798,8 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co { // Per-tile clear (with a single draw call) _vertexBuffer->Clear(); - _vertexBuffer->Data.EnsureCapacity(_dirtyObjectsBuffer.Count() * 6 * sizeof(AtlasTileVertex)); - for (void* actorObject : _dirtyObjectsBuffer) + _vertexBuffer->Data.EnsureCapacity(surfaceAtlasData.DirtyObjectsBuffer.Count() * 6 * sizeof(AtlasTileVertex)); + for (void* actorObject : surfaceAtlasData.DirtyObjectsBuffer) { const GlobalSurfaceAtlasObject* objectPtr = surfaceAtlasData.Objects.TryGet(actorObject); if (!objectPtr) @@ -575,12 +824,13 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co drawCallsListGBuffer.CanUseInstancing = false; drawCallsListGBufferNoDecals.CanUseInstancing = false; int32 tilesDrawn = 0; - for (void* actorObject : _dirtyObjectsBuffer) + for (void* actorObject : surfaceAtlasData.DirtyObjectsBuffer) { const GlobalSurfaceAtlasObject* objectPtr = surfaceAtlasData.Objects.TryGet(actorObject); if (!objectPtr) continue; const GlobalSurfaceAtlasObject& object = *objectPtr; + object.Dirty = false; // Clear draw calls list renderContextTiles.List->DrawCalls.Clear(); @@ -1036,6 +1286,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co // Draw indirect light from Global Illumination if (EnumHasAnyFlags(renderContext.View.Flags, ViewFlags::GI)) { + auto& giSettings = renderContext.List->Settings.GlobalIllumination; switch (giSettings.Mode) { case GlobalIlluminationMode::DDGI: @@ -1184,6 +1435,11 @@ void GlobalSurfaceAtlasPass::RenderDebug(RenderContext& renderContext, GPUContex } } +void GlobalSurfaceAtlasPass::GetCullingData(Vector4& cullingPosDistance) const +{ + cullingPosDistance = _surfaceAtlasData->CullingPosDistance; +} + void GlobalSurfaceAtlasPass::RasterizeActor(Actor* actor, void* actorObject, const BoundingSphere& actorObjectBounds, const Transform& localToWorld, const BoundingBox& localBounds, uint32 tilesMask, bool useVisibility, float qualityScale) { GlobalSurfaceAtlasCustomBuffer& surfaceAtlasData = *_surfaceAtlasData; @@ -1191,7 +1447,9 @@ void GlobalSurfaceAtlasPass::RasterizeActor(Actor* actor, void* actorObject, con const float distanceScale = Math::Lerp(1.0f, surfaceAtlasData.DistanceScaling, Math::InverseLerp(surfaceAtlasData.DistanceScalingStart, surfaceAtlasData.DistanceScalingEnd, (float)CollisionsHelper::DistanceSpherePoint(actorObjectBounds, surfaceAtlasData.ViewPosition))); const float tilesScale = surfaceAtlasData.TileTexelsPerWorldUnit * distanceScale * qualityScale; GlobalSurfaceAtlasObject* object = surfaceAtlasData.Objects.TryGet(actorObject); - bool anyTile = false, dirty = false; + if (!object && surfaceAtlasData.AsyncNewObjects.Count() >= 512) + return; // Reduce load on 1st frame and add more objects during next frames to balance performance + bool anyTile = false, dirty = GLOBAL_SURFACE_ATLAS_DEBUG_FORCE_REDRAW_TILES || !GPU_SPREAD_WORKLOAD; for (int32 tileIndex = 0; tileIndex < 6; tileIndex++) { if (((1 << tileIndex) & tilesMask) == 0) @@ -1206,7 +1464,7 @@ void GlobalSurfaceAtlasPass::RasterizeActor(Actor* actor, void* actorObject, con // Skip too small surfaces if (object && object->Tiles[tileIndex]) { - surfaceAtlasData.Atlas.Free(object->Tiles[tileIndex], &surfaceAtlasData); + surfaceAtlasData.AsyncFreeTiles.Add(object->Tiles[tileIndex]); object->Tiles[tileIndex] = nullptr; } continue; @@ -1229,112 +1487,40 @@ void GlobalSurfaceAtlasPass::RasterizeActor(Actor* actor, void* actorObject, con anyTile = true; continue; } - surfaceAtlasData.Atlas.Free(object->Tiles[tileIndex], &surfaceAtlasData); + surfaceAtlasData.AsyncFreeTiles.Add(object->Tiles[tileIndex]); + object->Tiles[tileIndex] = nullptr; } // Insert tile into atlas - uint16 tilePixels = tileResolution * tileResolution; - GlobalSurfaceAtlasTile* tile = nullptr; - if (tilePixels <= surfaceAtlasData.AtlasPixelsTotal - surfaceAtlasData.AtlasPixelsUsed) - tile = surfaceAtlasData.Atlas.Insert(tileResolution, tileResolution, &surfaceAtlasData, actorObject, tileIndex); - if (tile) - { - if (!object) - object = &surfaceAtlasData.Objects[actorObject]; - object->Tiles[tileIndex] = tile; - anyTile = true; - dirty = true; - } - else - { - if (object) - object->Tiles[tileIndex] = nullptr; - surfaceAtlasData.LastFrameAtlasInsertFail = surfaceAtlasData.CurrentFrame; - } + surfaceAtlasData.AsyncNewTiles.Add({ actorObject, (uint16)tileIndex, tileResolution }); + anyTile = true; } if (!anyTile) return; - // Redraw objects from time-to-time (dynamic objects can be animated, static objects can have textures streamed) - uint32 redrawFramesCount = actor->HasStaticFlag(StaticFlags::Lightmap) ? 120 : 4; - if (surfaceAtlasData.CurrentFrame - object->LastFrameUpdated >= (redrawFramesCount + (actor->GetID().D & redrawFramesCount))) - dirty = true; + // Calculate world-space bounds + OrientedBoundingBox bounds(localBounds); + bounds.Transform(localToWorld); - // Mark object as used - object->Actor = actor; - object->LastFrameUsed = surfaceAtlasData.CurrentFrame; - object->Bounds = OrientedBoundingBox(localBounds); - object->Bounds.Transform(localToWorld); - object->Radius = (float)actorObjectBounds.Radius; - if (dirty || GLOBAL_SURFACE_ATLAS_DEBUG_FORCE_REDRAW_TILES || !GPU_SPREAD_WORKLOAD) + if (object) { - object->LastFrameUpdated = surfaceAtlasData.CurrentFrame; - object->LightingUpdateFrame = surfaceAtlasData.CurrentFrame; - _dirtyObjectsBuffer.Add(actorObject); + // Redraw objects from time-to-time (dynamic objects can be animated, static objects can have textures streamed) + uint32 redrawFramesCount = actor->HasStaticFlag(StaticFlags::Lightmap) ? 120 : 4; + if (surfaceAtlasData.CurrentFrame - object->LastFrameUpdated >= (redrawFramesCount + (actor->GetID().D & redrawFramesCount))) + dirty = true; + + // Mark object as used + object->Actor = actor; + object->LastFrameUsed = surfaceAtlasData.CurrentFrame; + object->Bounds = bounds; + object->Position = (Float3)actorObjectBounds.Center; // TODO: large worlds + object->Radius = (float)actorObjectBounds.Radius; + object->Dirty = dirty; + object->UseVisibility = useVisibility; } - - Matrix3x3 worldToLocalRotation; - Matrix3x3::RotationQuaternion(object->Bounds.Transformation.Orientation.Conjugated(), worldToLocalRotation); - Float3 worldPosition = object->Bounds.Transformation.Translation; - Float3 worldExtents = object->Bounds.Extents * object->Bounds.Transformation.Scale; - - // Write to objects buffer (this must match unpacking logic in HLSL) - uint32 objectAddress = surfaceAtlasData.ObjectsBuffer.Data.Count() / sizeof(Float4); - auto* objectData = surfaceAtlasData.ObjectsBuffer.WriteReserve(GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE); - objectData[0] = *(Float4*)&actorObjectBounds; - objectData[1] = Float4::Zero; - objectData[2] = Float4(worldToLocalRotation.M11, worldToLocalRotation.M12, worldToLocalRotation.M13, worldPosition.X); - objectData[3] = Float4(worldToLocalRotation.M21, worldToLocalRotation.M22, worldToLocalRotation.M23, worldPosition.Y); - objectData[4] = Float4(worldToLocalRotation.M31, worldToLocalRotation.M32, worldToLocalRotation.M33, worldPosition.Z); - objectData[5] = Float4(worldExtents, useVisibility ? 1.0f : 0.0f); - auto tileOffsets = reinterpret_cast(&objectData[1]); // xyz used for tile offsets packed into uint16 - auto objectDataSize = reinterpret_cast(&objectData[1].W); // w used for object size (count of Float4s for object+tiles) - *objectDataSize = GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE; - for (int32 tileIndex = 0; tileIndex < 6; tileIndex++) + else { - auto* tile = object->Tiles[tileIndex]; - if (!tile) - continue; - tile->ObjectAddressOffset = *objectDataSize; - tile->Address = objectAddress + tile->ObjectAddressOffset; - tileOffsets[tileIndex] = tile->ObjectAddressOffset; - *objectDataSize += GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE; - - // Setup view to render object from the side - Float3 xAxis, yAxis, zAxis = Float3::Zero; - zAxis.Raw[tileIndex / 2] = tileIndex & 1 ? 1.0f : -1.0f; - yAxis = tileIndex == 2 || tileIndex == 3 ? Float3::Right : Float3::Up; - Float3::Cross(yAxis, zAxis, xAxis); - Float3 localSpaceOffset = -zAxis * object->Bounds.Extents; - xAxis = object->Bounds.Transformation.LocalToWorldVector(xAxis); - yAxis = object->Bounds.Transformation.LocalToWorldVector(yAxis); - zAxis = object->Bounds.Transformation.LocalToWorldVector(zAxis); - xAxis.NormalizeFast(); - yAxis.NormalizeFast(); - zAxis.NormalizeFast(); - tile->ViewPosition = object->Bounds.Transformation.LocalToWorld(localSpaceOffset); - tile->ViewDirection = zAxis; - - // Create view matrix - tile->ViewMatrix.SetColumn1(Float4(xAxis, -Float3::Dot(xAxis, tile->ViewPosition))); - tile->ViewMatrix.SetColumn2(Float4(yAxis, -Float3::Dot(yAxis, tile->ViewPosition))); - tile->ViewMatrix.SetColumn3(Float4(zAxis, -Float3::Dot(zAxis, tile->ViewPosition))); - tile->ViewMatrix.SetColumn4(Float4(0, 0, 0, 1)); - - // Calculate object bounds size in the view - OrientedBoundingBox viewBounds(object->Bounds); - viewBounds.Transform(tile->ViewMatrix); - Float3 viewExtent = viewBounds.Transformation.LocalToWorldVector(viewBounds.Extents); - tile->ViewBoundsSize = viewExtent.GetAbsolute() * 2.0f; - - // Per-tile data - const float tileWidth = (float)tile->Width - GLOBAL_SURFACE_ATLAS_TILE_PADDING; - const float tileHeight = (float)tile->Height - GLOBAL_SURFACE_ATLAS_TILE_PADDING; - auto* tileData = surfaceAtlasData.ObjectsBuffer.WriteReserve(GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE); - tileData[0] = Float4(tile->X, tile->Y, tileWidth, tileHeight) * surfaceAtlasData.ResolutionInv; - tileData[1] = Float4(tile->ViewMatrix.M11, tile->ViewMatrix.M12, tile->ViewMatrix.M13, tile->ViewMatrix.M41); - tileData[2] = Float4(tile->ViewMatrix.M21, tile->ViewMatrix.M22, tile->ViewMatrix.M23, tile->ViewMatrix.M42); - tileData[3] = Float4(tile->ViewMatrix.M31, tile->ViewMatrix.M32, tile->ViewMatrix.M33, tile->ViewMatrix.M43); - tileData[4] = Float4(tile->ViewBoundsSize, 0.0f); // w unused + // Add new object + surfaceAtlasData.AsyncNewObjects.Add({ actorObject, actor, bounds, actorObjectBounds, useVisibility }); } } diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.h b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.h index a498b3767..89ce6875c 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.h +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.h @@ -58,12 +58,16 @@ private: class GPUBuffer* _culledObjectsSizeBuffer = nullptr; class DynamicVertexBuffer* _vertexBuffer = nullptr; class GlobalSurfaceAtlasCustomBuffer* _surfaceAtlasData; - Array _dirtyObjectsBuffer; uint64 _culledObjectsSizeFrames[8]; - Vector4 _cullingPosDistance; void* _currentActorObject; public: + /// + /// Calls drawing scene objects in async early in the frame. + /// + /// The rendering context batch. + void OnCollectDrawCalls(RenderContextBatch& renderContextBatch); + /// /// Renders the Global Surface Atlas. /// @@ -82,11 +86,8 @@ public: void RenderDebug(RenderContext& renderContext, GPUContext* context, GPUTexture* output); // Gets the culling view position (xyz) and view distance (w) - void GetCullingData(Vector4& cullingPosDistance) const - { - cullingPosDistance = _cullingPosDistance; - } - + void GetCullingData(Vector4& cullingPosDistance) const; + // Gets the current object of the actor that is drawn into atlas. void* GetCurrentActorObject() const { diff --git a/Source/Engine/Renderer/RenderSetup.h b/Source/Engine/Renderer/RenderSetup.h index da4835493..e0a07a7a1 100644 --- a/Source/Engine/Renderer/RenderSetup.h +++ b/Source/Engine/Renderer/RenderSetup.h @@ -1,4 +1,4 @@ -// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. +// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. #pragma once @@ -12,4 +12,6 @@ struct FLAXENGINE_API RenderSetup RenderingUpscaleLocation UpscaleLocation = RenderingUpscaleLocation::AfterAntiAliasingPass; bool UseMotionVectors = false; bool UseTemporalAAJitter = false; + bool UseGlobalSDF = false; + bool UseGlobalSurfaceAtlas = false; }; diff --git a/Source/Engine/Renderer/Renderer.cpp b/Source/Engine/Renderer/Renderer.cpp index b199fc231..b33e3ad47 100644 --- a/Source/Engine/Renderer/Renderer.cpp +++ b/Source/Engine/Renderer/Renderer.cpp @@ -338,6 +338,11 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont renderContext.List->Settings.AntiAliasing.Mode == AntialiasingMode::TemporalAntialiasing; } setup.UseTemporalAAJitter = aaMode == AntialiasingMode::TemporalAntialiasing; + setup.UseGlobalSurfaceAtlas = renderContext.View.Mode == ViewMode::GlobalSurfaceAtlas || + (EnumHasAnyFlags(renderContext.View.Flags, ViewFlags::GI) && renderContext.List->Settings.GlobalIllumination.Mode == GlobalIlluminationMode::DDGI); + setup.UseGlobalSDF = (graphicsSettings->EnableGlobalSDF && EnumHasAnyFlags(view.Flags, ViewFlags::GlobalSDF)) || + renderContext.View.Mode == ViewMode::GlobalSDF || + setup.UseGlobalSurfaceAtlas; // Disable TAA jitter in debug modes switch (renderContext.View.Mode) @@ -404,6 +409,8 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont JobSystem::SetJobStartingOnDispatch(false); task->OnCollectDrawCalls(renderContextBatch, SceneRendering::DrawCategory::SceneDraw); task->OnCollectDrawCalls(renderContextBatch, SceneRendering::DrawCategory::SceneDrawAsync); + if (setup.UseGlobalSurfaceAtlas) + GlobalSurfaceAtlasPass::Instance()->OnCollectDrawCalls(renderContextBatch); // Wait for async jobs to finish JobSystem::SetJobStartingOnDispatch(true); @@ -456,7 +463,7 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont #endif // Global SDF rendering (can be used by materials later on) - if (graphicsSettings->EnableGlobalSDF && EnumHasAnyFlags(view.Flags, ViewFlags::GlobalSDF)) + if (setup.UseGlobalSDF) { GlobalSignDistanceFieldPass::BindingData bindingData; GlobalSignDistanceFieldPass::Instance()->Render(renderContext, context, bindingData); From 3b3cd5ade4a7d5ef502b8256323e76bd2263dd3f Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 21 Jun 2024 23:01:32 +0200 Subject: [PATCH 177/292] Optimize Global Surface Atlas defragmenting to just reset tiles, not whole objects list --- Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp index f92e0ca52..3fd2060d9 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp @@ -268,7 +268,14 @@ public: (float)AtlasPixelsUsed / AtlasPixelsTotal < maxUsageToDefrag) { PROFILE_CPU_NAMED("Defragment Atlas"); - ClearObjects(); + LastFrameAtlasDefragmentation = Engine::FrameCount; + for (auto& e : Objects) + { + auto& object = e.Value; + Platform::MemoryClear(object.Tiles, sizeof(object.Tiles)); + } + Atlas.Clear(); + AtlasPixelsUsed = 0; } } From 27739491977c643916f4ca30f71043d74b448f90 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 22 Jun 2024 09:58:20 +0200 Subject: [PATCH 178/292] Optimize wait signal in Job System to wake waiting threads only when job batch ends --- Source/Engine/Threading/JobSystem.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Source/Engine/Threading/JobSystem.cpp b/Source/Engine/Threading/JobSystem.cpp index f278d7d02..090bab2ea 100644 --- a/Source/Engine/Threading/JobSystem.cpp +++ b/Source/Engine/Threading/JobSystem.cpp @@ -200,16 +200,19 @@ int32 JobSystemThread::Run() data.Job(data.Index); // Move forward with the job queue + bool notifyWaiting = false; JobsLocker.Lock(); JobContext& context = JobContexts.At(data.JobKey); if (Platform::InterlockedDecrement(&context.JobsLeft) <= 0) { ASSERT_LOW_LAYER(context.JobsLeft <= 0); JobContexts.Remove(data.JobKey); + notifyWaiting = true; } JobsLocker.Unlock(); - WaitSignal.NotifyAll(); + if (notifyWaiting) + WaitSignal.NotifyAll(); data.Job.Unbind(); } From 861d8a683f8158efb5dba5ec73457e9a7ca644c0 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 24 Jun 2024 13:12:48 +0200 Subject: [PATCH 179/292] Add `JobSystem::Dispatch` that accepts dependent jobs that needs to be completed before --- Source/Engine/Threading/JobSystem.cpp | 131 +++++++++++++++++++++----- Source/Engine/Threading/JobSystem.h | 12 +++ 2 files changed, 119 insertions(+), 24 deletions(-) diff --git a/Source/Engine/Threading/JobSystem.cpp b/Source/Engine/Threading/JobSystem.cpp index 090bab2ea..d3f879124 100644 --- a/Source/Engine/Threading/JobSystem.cpp +++ b/Source/Engine/Threading/JobSystem.cpp @@ -5,6 +5,7 @@ #include "Engine/Platform/CPUInfo.h" #include "Engine/Platform/Thread.h" #include "Engine/Platform/ConditionVariable.h" +#include "Engine/Core/Types/Span.h" #include "Engine/Core/Collections/Dictionary.h" #include "Engine/Engine/EngineService.h" #include "Engine/Profiler/ProfilerCPU.h" @@ -48,13 +49,26 @@ public: struct JobData { - Function Job; int32 Index; int64 JobKey; }; template<> struct TIsPODType +{ + enum { Value = true }; +}; + +struct JobContext +{ + volatile int64 JobsLeft; + volatile int64 DependenciesLeft; + Function Job; + Array Dependants; +}; + +template<> +struct TIsPODType { enum { Value = false }; }; @@ -79,17 +93,6 @@ public: } }; -struct JobContext -{ - volatile int64 JobsLeft; -}; - -template<> -struct TIsPODType -{ - enum { Value = true }; -}; - namespace { JobSystemService JobSystemInstance; @@ -158,6 +161,7 @@ int32 JobSystemThread::Run() Platform::SetThreadAffinityMask(1ull << Index); JobData data; + Function job; bool attachCSharpThread = true; #if !JOB_SYSTEM_USE_MUTEX moodycamel::ConsumerToken consumerToken(Jobs); @@ -174,18 +178,23 @@ int32 JobSystemThread::Run() { data = Jobs.PeekFront(); Jobs.PopFront(); + const JobContext& context = ((const Dictionary&)JobContexts).At(data.JobKey); + job = context.Job; } JobsLocker.Unlock(); #else - if (!Jobs.try_dequeue(consumerToken, data)) - data.Job.Unbind(); + if (Jobs.try_dequeue(consumerToken, data)) + { + const JobContext& context = ((const Dictionary&)JobContexts).At(data.JobKey); + job = context.Job; + } #endif #if JOB_SYSTEM_USE_STATS Platform::InterlockedIncrement(&DequeueCount); Platform::InterlockedAdd(&DequeueSum, Platform::GetTimeCycles() - start); #endif - if (data.Job.IsBinded()) + if (job.IsBinded()) { #if USE_CSHARP // Ensure to have C# thread attached to this thead (late init due to MCore being initialized after Job System) @@ -197,7 +206,7 @@ int32 JobSystemThread::Run() #endif // Run job - data.Job(data.Index); + job(data.Index); // Move forward with the job queue bool notifyWaiting = false; @@ -205,16 +214,33 @@ int32 JobSystemThread::Run() JobContext& context = JobContexts.At(data.JobKey); if (Platform::InterlockedDecrement(&context.JobsLeft) <= 0) { - ASSERT_LOW_LAYER(context.JobsLeft <= 0); + // Update any dependant jobs + for (int64 dependant : context.Dependants) + { + JobContext& dependantContext = JobContexts.At(dependant); + if (Platform::InterlockedDecrement(&dependantContext.DependenciesLeft) <= 0) + { + // Dispatch dependency when it's ready + JobData dependantData; + dependantData.JobKey = dependant; + for (dependantData.Index = 0; dependantData.Index < dependantContext.JobsLeft; dependantData.Index++) +#if JOB_SYSTEM_USE_MUTEX + Jobs.PushBack(dependantData); +#else + Jobs.enqueue(dependantData); +#endif + } + } + + // Remove completed context JobContexts.Remove(data.JobKey); notifyWaiting = true; } JobsLocker.Unlock(); - if (notifyWaiting) WaitSignal.NotifyAll(); - data.Job.Unbind(); + job.Unbind(); } else { @@ -250,9 +276,9 @@ void JobSystem::Execute(const Function& job, int32 jobCount) int64 JobSystem::Dispatch(const Function& job, int32 jobCount) { - PROFILE_CPU(); if (jobCount <= 0) return 0; + PROFILE_CPU(); #if JOB_SYSTEM_ENABLED #if JOB_SYSTEM_USE_STATS const auto start = Platform::GetTimeCycles(); @@ -260,21 +286,20 @@ int64 JobSystem::Dispatch(const Function& job, int32 jobCount) const auto label = Platform::InterlockedAdd(&JobLabel, (int64)jobCount) + jobCount; JobData data; - data.Job = job; data.JobKey = label; JobContext context; + context.Job = job; context.JobsLeft = jobCount; + context.DependenciesLeft = 0; -#if JOB_SYSTEM_USE_MUTEX JobsLocker.Lock(); JobContexts.Add(label, context); +#if JOB_SYSTEM_USE_MUTEX for (data.Index = 0; data.Index < jobCount; data.Index++) Jobs.PushBack(data); JobsLocker.Unlock(); #else - JobsLocker.Lock(); - JobContexts.Add(label, context); JobsLocker.Unlock(); for (data.Index = 0; data.Index < jobCount; data.Index++) Jobs.enqueue(data); @@ -300,6 +325,64 @@ int64 JobSystem::Dispatch(const Function& job, int32 jobCount) #endif } +int64 JobSystem::Dispatch(const Function& job, Span dependencies, int32 jobCount) +{ + if (jobCount <= 0) + return 0; + PROFILE_CPU(); +#if JOB_SYSTEM_ENABLED + const auto label = Platform::InterlockedAdd(&JobLabel, (int64)jobCount) + jobCount; + + JobData data; + data.JobKey = label; + + JobContext context; + context.Job = job; + context.JobsLeft = jobCount; + context.DependenciesLeft = 0; + + JobsLocker.Lock(); + for (int64 dependency : dependencies) + { + if (JobContext* dependencyContext = JobContexts.TryGet(dependency)) + { + context.DependenciesLeft++; + dependencyContext->Dependants.Add(label); + } + } + JobContexts.Add(label, context); +#if JOB_SYSTEM_USE_MUTEX + if (context.DependenciesLeft == 0) + { + for (data.Index = 0; data.Index < jobCount; data.Index++) + Jobs.PushBack(data); + } + JobsLocker.Unlock(); +#else + JobsLocker.Unlock(); + if (dispatchNow) + { + for (data.Index = 0; data.Index < jobCount; data.Index++) + Jobs.enqueue(data); + } +#endif + + if (context.DependenciesLeft == 0 && JobStartingOnDispatch) + { + if (jobCount == 1) + JobsSignal.NotifyOne(); + else + JobsSignal.NotifyAll(); + } + + return label; +#else + for (int32 i = 0; i < jobCount; i++) + job(i); + return 0; +#endif +} + void JobSystem::Wait() { #if JOB_SYSTEM_ENABLED diff --git a/Source/Engine/Threading/JobSystem.h b/Source/Engine/Threading/JobSystem.h index d269aa196..c6b4500fa 100644 --- a/Source/Engine/Threading/JobSystem.h +++ b/Source/Engine/Threading/JobSystem.h @@ -4,6 +4,9 @@ #include "Engine/Core/Delegate.h" +template +class Span; + /// /// Lightweight multi-threaded jobs execution scheduler. Uses a pool of threads and supports work-stealing concept. /// @@ -26,6 +29,15 @@ API_CLASS(Static) class FLAXENGINE_API JobSystem /// The label identifying this dispatch. Can be used to wait for the execution end. API_FUNCTION() static int64 Dispatch(const Function& job, int32 jobCount = 1); + /// + /// Dispatches the job for the execution after all of dependant jobs will complete. + /// + /// The job. Argument is an index of the job execution. + /// The list of dependant jobs that need to complete in order to start executing this job. + /// The job executions count. + /// The label identifying this dispatch. Can be used to wait for the execution end. + API_FUNCTION() static int64 Dispatch(const Function& job, Span dependencies, int32 jobCount = 1); + /// /// Waits for all dispatched jobs to finish. /// From 59bbb9e0584f916c6e2a54e98418c6aa76faa5d8 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 24 Jun 2024 13:15:05 +0200 Subject: [PATCH 180/292] Fix error when reopening project --- Source/Editor/Modules/UIModule.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/Source/Editor/Modules/UIModule.cs b/Source/Editor/Modules/UIModule.cs index 00200900d..4282d5305 100644 --- a/Source/Editor/Modules/UIModule.cs +++ b/Source/Editor/Modules/UIModule.cs @@ -841,7 +841,6 @@ namespace FlaxEditor.Modules { // Open project, then close it Editor.OpenProject(Editor.GameProject.ProjectPath); - Editor.Windows.MainWindow.Close(ClosingReason.User); } private void OnMenuFileShowHide(Control control) From 8190d7f171ccc323909f1a9067311f8afdf7b876 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 24 Jun 2024 13:15:45 +0200 Subject: [PATCH 181/292] Optimize Global Surface Atlas setup and objects buffer writing to be async --- .../Renderer/GI/GlobalSurfaceAtlasPass.cpp | 245 +++++++++--------- 1 file changed, 125 insertions(+), 120 deletions(-) diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp index 3fd2060d9..0fb63af62 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp @@ -131,6 +131,7 @@ class GlobalSurfaceAtlasCustomBuffer : public RenderBuffers::CustomBuffer, publi { public: int32 Resolution = 0; + float ResolutionInv; int32 AtlasPixelsTotal = 0; int32 AtlasPixelsUsed = 0; uint64 LastFrameAtlasInsertFail = 0; @@ -155,7 +156,6 @@ public: Array DirtyObjectsBuffer; Vector4 CullingPosDistance; uint64 CurrentFrame; - float ResolutionInv; Float3 ViewPosition; float TileTexelsPerWorldUnit; float DistanceScalingStart; @@ -164,7 +164,7 @@ public: float MinObjectRadius; // Async objects drawing cache - Array> AsyncDrawWaitLabels; + Array> AsyncDrawWaitLabels; RenderListBuffer AsyncFreeTiles; RenderListBuffer AsyncNewObjects; RenderListBuffer AsyncNewTiles; @@ -257,9 +257,9 @@ public: int32 resolution; float distance; GetOptions(renderContext, resolution, distance); - const float resolutionInv = 1.0f / (float)resolution; + if (Resolution != resolution) + return; // Not yet initialized const auto currentFrame = Engine::FrameCount; - if (Resolution == resolution) { // Perform atlas defragmentation if needed constexpr float maxUsageToDefrag = 0.8f; @@ -281,7 +281,6 @@ public: // Setup data for rendering CurrentFrame = currentFrame; - ResolutionInv = resolutionInv; ViewPosition = renderContext.View.Position; TileTexelsPerWorldUnit = 1.0f / METERS_TO_UNITS(0.1f); // Scales the tiles resolution DistanceScalingStart = METERS_TO_UNITS(20.0f); // Distance from camera at which the tiles resolution starts to be scaled down @@ -310,11 +309,16 @@ public: // Run sync actors drawing now or force in async (different drawing path doesn't interfere with normal scene drawing) func.Bind(this); AsyncDrawWaitLabels.Add(JobSystem::Dispatch(func, jobCount)); + + // Run dependant job that will process objects data in async + func.Bind(this); + AsyncDrawWaitLabels.Add(JobSystem::Dispatch(func, ToSpan(AsyncDrawWaitLabels))); } else { DrawActorsJob(-1); DrawActorsJob(0); + SetupJob(0); } } @@ -325,18 +329,14 @@ public: AsyncDrawWaitLabels.Clear(); } - void PostDrawActors() + void FlushNewObjects() { - PROFILE_CPU_NAMED("Post Draw"); + PROFILE_CPU_NAMED("Flush Atlas"); - // Flush atlas tiles freeing for (auto* tile : AsyncFreeTiles) - { Atlas.Free(tile, this); - } AsyncFreeTiles.Clear(); - // Flush new objects adding for (auto& newObject : AsyncNewObjects) { auto& object = Objects[newObject.ActorObject]; @@ -350,7 +350,6 @@ public: } AsyncNewObjects.Clear(); - // Flush new tiles adding for (auto& newTile : AsyncNewTiles) { auto& object = Objects[newTile.ActorObject]; @@ -371,6 +370,114 @@ public: AsyncNewTiles.Clear(); } + void CompactObjects() + { + PROFILE_CPU_NAMED("Compact Objects"); + for (auto it = Objects.Begin(); it.IsNotEnd(); ++it) + { + if (it->Value.LastFrameUsed != CurrentFrame) + { + for (auto& tile : it->Value.Tiles) + { + if (tile) + Atlas.Free(tile, this); + } + Objects.Remove(it); + } + } + } + + void WriteObjects() + { + PROFILE_CPU_NAMED("Write Objects"); + DirtyObjectsBuffer.Clear(); + ObjectsBuffer.Clear(); + for (auto& e : Objects) + { + auto& object = e.Value; + if (object.Dirty) + { + // Collect dirty objects + object.LastFrameUpdated = CurrentFrame; + object.LightingUpdateFrame = CurrentFrame; + DirtyObjectsBuffer.Add(e.Key); + } + + Matrix3x3 worldToLocalRotation; + Matrix3x3::RotationQuaternion(object.Bounds.Transformation.Orientation.Conjugated(), worldToLocalRotation); + Float3 worldPosition = object.Bounds.Transformation.Translation; + Float3 worldExtents = object.Bounds.Extents * object.Bounds.Transformation.Scale; + + // Write to objects buffer (this must match unpacking logic in HLSL) + uint32 objectAddress = ObjectsBuffer.Data.Count() / sizeof(Float4); + auto* objectData = ObjectsBuffer.WriteReserve(GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE); + objectData[0] = Float4(object.Position, object.Radius); + objectData[1] = Float4::Zero; + objectData[2] = Float4(worldToLocalRotation.M11, worldToLocalRotation.M12, worldToLocalRotation.M13, worldPosition.X); + objectData[3] = Float4(worldToLocalRotation.M21, worldToLocalRotation.M22, worldToLocalRotation.M23, worldPosition.Y); + objectData[4] = Float4(worldToLocalRotation.M31, worldToLocalRotation.M32, worldToLocalRotation.M33, worldPosition.Z); + objectData[5] = Float4(worldExtents, object.UseVisibility ? 1.0f : 0.0f); + auto tileOffsets = reinterpret_cast(&objectData[1]); // xyz used for tile offsets packed into uint16 + auto objectDataSize = reinterpret_cast(&objectData[1].W); // w used for object size (count of Float4s for object+tiles) + *objectDataSize = GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE; + for (int32 tileIndex = 0; tileIndex < 6; tileIndex++) + { + auto* tile = object.Tiles[tileIndex]; + if (!tile) + continue; + tile->ObjectAddressOffset = *objectDataSize; + tile->Address = objectAddress + tile->ObjectAddressOffset; + tileOffsets[tileIndex] = tile->ObjectAddressOffset; + *objectDataSize += GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE; + + // Setup view to render object from the side + Float3 xAxis, yAxis, zAxis = Float3::Zero; + zAxis.Raw[tileIndex / 2] = tileIndex & 1 ? 1.0f : -1.0f; + yAxis = tileIndex == 2 || tileIndex == 3 ? Float3::Right : Float3::Up; + Float3::Cross(yAxis, zAxis, xAxis); + Float3 localSpaceOffset = -zAxis * object.Bounds.Extents; + xAxis = object.Bounds.Transformation.LocalToWorldVector(xAxis); + yAxis = object.Bounds.Transformation.LocalToWorldVector(yAxis); + zAxis = object.Bounds.Transformation.LocalToWorldVector(zAxis); + xAxis.NormalizeFast(); + yAxis.NormalizeFast(); + zAxis.NormalizeFast(); + tile->ViewPosition = object.Bounds.Transformation.LocalToWorld(localSpaceOffset); + tile->ViewDirection = zAxis; + + // Create view matrix + tile->ViewMatrix.SetColumn1(Float4(xAxis, -Float3::Dot(xAxis, tile->ViewPosition))); + tile->ViewMatrix.SetColumn2(Float4(yAxis, -Float3::Dot(yAxis, tile->ViewPosition))); + tile->ViewMatrix.SetColumn3(Float4(zAxis, -Float3::Dot(zAxis, tile->ViewPosition))); + tile->ViewMatrix.SetColumn4(Float4(0, 0, 0, 1)); + + // Calculate object bounds size in the view + OrientedBoundingBox viewBounds(object.Bounds); + viewBounds.Transform(tile->ViewMatrix); + Float3 viewExtent = viewBounds.Transformation.LocalToWorldVector(viewBounds.Extents); + tile->ViewBoundsSize = viewExtent.GetAbsolute() * 2.0f; + + // Per-tile data + const float tileWidth = (float)tile->Width - GLOBAL_SURFACE_ATLAS_TILE_PADDING; + const float tileHeight = (float)tile->Height - GLOBAL_SURFACE_ATLAS_TILE_PADDING; + auto* tileData = ObjectsBuffer.WriteReserve(GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE); + tileData[0] = Float4(tile->X, tile->Y, tileWidth, tileHeight) * ResolutionInv; + tileData[1] = Float4(tile->ViewMatrix.M11, tile->ViewMatrix.M12, tile->ViewMatrix.M13, tile->ViewMatrix.M41); + tileData[2] = Float4(tile->ViewMatrix.M21, tile->ViewMatrix.M22, tile->ViewMatrix.M23, tile->ViewMatrix.M42); + tileData[3] = Float4(tile->ViewMatrix.M31, tile->ViewMatrix.M32, tile->ViewMatrix.M33, tile->ViewMatrix.M43); + tileData[4] = Float4(tile->ViewBoundsSize, 0.0f); // w unused + } + } + } + + void SetupJob(int32) + { + PROFILE_CPU(); + FlushNewObjects(); + CompactObjects(); + WriteObjects(); + } + // [ISceneRenderingListener] void OnSceneRenderingAddActor(Actor* a) override { @@ -587,10 +694,6 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co surfaceAtlasData.LastFrameUsed = currentFrame; PROFILE_GPU_CPU_NAMED("Global Surface Atlas"); - // Start objects drawing (in case not et started earlier this frame) - _surfaceAtlasData = &surfaceAtlasData; - surfaceAtlasData.StartDrawActors(renderContext); - // Setup options int32 resolution; float distance; @@ -617,6 +720,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co INIT_ATLAS_TEXTURE(AtlasDepth, PixelFormat::D16_UNorm); #undef INIT_ATLAS_TEXTURE surfaceAtlasData.Resolution = resolution; + surfaceAtlasData.ResolutionInv = resolutionInv; surfaceAtlasData.AtlasPixelsTotal = resolution * resolution; if (!surfaceAtlasData.ChunksBuffer) { @@ -632,6 +736,11 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co if (!_vertexBuffer) _vertexBuffer = New(0u, (uint32)sizeof(AtlasTileVertex), TEXT("GlobalSurfaceAtlas.VertexBuffer")); + // Ensure that async objects drawing ended + _surfaceAtlasData = &surfaceAtlasData; + surfaceAtlasData.StartDrawActors(renderContext); // (ignored if not started earlier this frame) + surfaceAtlasData.WaitForDrawActors(); + // Utility for writing into tiles vertex buffer const Float2 posToClipMul(2.0f * resolutionInv, -2.0f * resolutionInv); const Float2 posToClipAdd(-1.0f, 1.0f); @@ -662,110 +771,6 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co context->BindVB(ToSpan(&vb, 1)); \ context->DrawInstanced(_vertexBuffer->Data.Count() / sizeof(AtlasTileVertex), 1); - // Ensure that async objects drawing ended - surfaceAtlasData.WaitForDrawActors(); - surfaceAtlasData.PostDrawActors(); - - // Remove unused objects - { - PROFILE_GPU_CPU_NAMED("Compact Objects"); - for (auto it = surfaceAtlasData.Objects.Begin(); it.IsNotEnd(); ++it) - { - if (it->Value.LastFrameUsed != currentFrame) - { - for (auto& tile : it->Value.Tiles) - { - if (tile) - surfaceAtlasData.Atlas.Free(tile, &surfaceAtlasData); - } - surfaceAtlasData.Objects.Remove(it); - } - } - } - - // Write objects to the data buffer - { - PROFILE_CPU_NAMED("Write Objects"); - surfaceAtlasData.DirtyObjectsBuffer.Clear(); - surfaceAtlasData.ObjectsBuffer.Clear(); - for (auto& e : surfaceAtlasData.Objects) - { - auto& object = e.Value; - if (object.Dirty) - { - // Collect dirty objects - object.LastFrameUpdated = surfaceAtlasData.CurrentFrame; - object.LightingUpdateFrame = surfaceAtlasData.CurrentFrame; - surfaceAtlasData.DirtyObjectsBuffer.Add(e.Key); - } - - Matrix3x3 worldToLocalRotation; - Matrix3x3::RotationQuaternion(object.Bounds.Transformation.Orientation.Conjugated(), worldToLocalRotation); - Float3 worldPosition = object.Bounds.Transformation.Translation; - Float3 worldExtents = object.Bounds.Extents * object.Bounds.Transformation.Scale; - - // Write to objects buffer (this must match unpacking logic in HLSL) - uint32 objectAddress = surfaceAtlasData.ObjectsBuffer.Data.Count() / sizeof(Float4); - auto* objectData = surfaceAtlasData.ObjectsBuffer.WriteReserve(GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE); - objectData[0] = Float4(object.Position, object.Radius); - objectData[1] = Float4::Zero; - objectData[2] = Float4(worldToLocalRotation.M11, worldToLocalRotation.M12, worldToLocalRotation.M13, worldPosition.X); - objectData[3] = Float4(worldToLocalRotation.M21, worldToLocalRotation.M22, worldToLocalRotation.M23, worldPosition.Y); - objectData[4] = Float4(worldToLocalRotation.M31, worldToLocalRotation.M32, worldToLocalRotation.M33, worldPosition.Z); - objectData[5] = Float4(worldExtents, object.UseVisibility ? 1.0f : 0.0f); - auto tileOffsets = reinterpret_cast(&objectData[1]); // xyz used for tile offsets packed into uint16 - auto objectDataSize = reinterpret_cast(&objectData[1].W); // w used for object size (count of Float4s for object+tiles) - *objectDataSize = GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE; - for (int32 tileIndex = 0; tileIndex < 6; tileIndex++) - { - auto* tile = object.Tiles[tileIndex]; - if (!tile) - continue; - tile->ObjectAddressOffset = *objectDataSize; - tile->Address = objectAddress + tile->ObjectAddressOffset; - tileOffsets[tileIndex] = tile->ObjectAddressOffset; - *objectDataSize += GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE; - - // Setup view to render object from the side - Float3 xAxis, yAxis, zAxis = Float3::Zero; - zAxis.Raw[tileIndex / 2] = tileIndex & 1 ? 1.0f : -1.0f; - yAxis = tileIndex == 2 || tileIndex == 3 ? Float3::Right : Float3::Up; - Float3::Cross(yAxis, zAxis, xAxis); - Float3 localSpaceOffset = -zAxis * object.Bounds.Extents; - xAxis = object.Bounds.Transformation.LocalToWorldVector(xAxis); - yAxis = object.Bounds.Transformation.LocalToWorldVector(yAxis); - zAxis = object.Bounds.Transformation.LocalToWorldVector(zAxis); - xAxis.NormalizeFast(); - yAxis.NormalizeFast(); - zAxis.NormalizeFast(); - tile->ViewPosition = object.Bounds.Transformation.LocalToWorld(localSpaceOffset); - tile->ViewDirection = zAxis; - - // Create view matrix - tile->ViewMatrix.SetColumn1(Float4(xAxis, -Float3::Dot(xAxis, tile->ViewPosition))); - tile->ViewMatrix.SetColumn2(Float4(yAxis, -Float3::Dot(yAxis, tile->ViewPosition))); - tile->ViewMatrix.SetColumn3(Float4(zAxis, -Float3::Dot(zAxis, tile->ViewPosition))); - tile->ViewMatrix.SetColumn4(Float4(0, 0, 0, 1)); - - // Calculate object bounds size in the view - OrientedBoundingBox viewBounds(object.Bounds); - viewBounds.Transform(tile->ViewMatrix); - Float3 viewExtent = viewBounds.Transformation.LocalToWorldVector(viewBounds.Extents); - tile->ViewBoundsSize = viewExtent.GetAbsolute() * 2.0f; - - // Per-tile data - const float tileWidth = (float)tile->Width - GLOBAL_SURFACE_ATLAS_TILE_PADDING; - const float tileHeight = (float)tile->Height - GLOBAL_SURFACE_ATLAS_TILE_PADDING; - auto* tileData = surfaceAtlasData.ObjectsBuffer.WriteReserve(GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE); - tileData[0] = Float4(tile->X, tile->Y, tileWidth, tileHeight) * surfaceAtlasData.ResolutionInv; - tileData[1] = Float4(tile->ViewMatrix.M11, tile->ViewMatrix.M12, tile->ViewMatrix.M13, tile->ViewMatrix.M41); - tileData[2] = Float4(tile->ViewMatrix.M21, tile->ViewMatrix.M22, tile->ViewMatrix.M23, tile->ViewMatrix.M42); - tileData[3] = Float4(tile->ViewMatrix.M31, tile->ViewMatrix.M32, tile->ViewMatrix.M33, tile->ViewMatrix.M43); - tileData[4] = Float4(tile->ViewBoundsSize, 0.0f); // w unused - } - } - } - // Rasterize world geometry material properties into Global Surface Atlas if (surfaceAtlasData.DirtyObjectsBuffer.Count() != 0) { From a1c251c3b7ec7c65caac848e7488a8fe646cf351 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 24 Jun 2024 19:01:35 +0200 Subject: [PATCH 182/292] Add various optimizations to Global Surface Atlas --- .../Renderer/GI/GlobalSurfaceAtlasPass.cpp | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp index 0fb63af62..a64a06635 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp @@ -14,7 +14,6 @@ #include "Engine/Content/Content.h" #include "Engine/Graphics/GPUContext.h" #include "Engine/Graphics/GPUDevice.h" -#include "Engine/Graphics/Graphics.h" #include "Engine/Graphics/RenderTask.h" #include "Engine/Graphics/RenderBuffers.h" #include "Engine/Graphics/RenderTargetPool.h" @@ -40,6 +39,8 @@ #define GLOBAL_SURFACE_ATLAS_DEBUG_FORCE_REDRAW_TILES 0 // Forces to redraw all object tiles every frame #define GLOBAL_SURFACE_ATLAS_DEBUG_DRAW_OBJECTS 0 // Debug draws object bounds on redraw (and tile draw projection locations) #define GLOBAL_SURFACE_ATLAS_DEBUG_DRAW_CHUNKS 0 // Debug draws culled chunks bounds (non-empty) +#define GLOBAL_SURFACE_ATLAS_MAX_NEW_OBJECTS_PER_FRAME 500 // Limits the amount of newly added objects to atlas per-frame to reduce hitches on 1st frame or camera-cut +#define GLOBAL_SURFACE_ATLAS_DIRTY_FRAMES(flags) (EnumHasAnyFlags(flags, StaticFlags::Lightmap) ? 200 : 10) // Amount of frames after which update object (less frequent updates for static scenes) #if GLOBAL_SURFACE_ATLAS_DEBUG_DRAW_OBJECTS || GLOBAL_SURFACE_ATLAS_DEBUG_DRAW_CHUNKS #include "Engine/Debug/DebugDraw.h" @@ -300,14 +301,14 @@ public: if (enableAsync) { - // Run in async via Job System + // Run sync actors drawing now or force in async (different drawing path doesn't interfere with normal scene drawing) Function func; - func.Bind(this); + func.Bind(this); const int32 jobCount = Math::Max(JobSystem::GetThreadsCount() - 1, 1); // Leave 1 thread unused to not block the main-thread (jobs will overlap with rendering) AsyncDrawWaitLabels.Add(JobSystem::Dispatch(func, jobCount)); - // Run sync actors drawing now or force in async (different drawing path doesn't interfere with normal scene drawing) - func.Bind(this); + // Run in async via Job System + func.Bind(this); AsyncDrawWaitLabels.Add(JobSystem::Dispatch(func, jobCount)); // Run dependant job that will process objects data in async @@ -797,7 +798,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co context->SetRenderTarget(depthBuffer, ToSpan(targetBuffers, ARRAY_COUNT(targetBuffers))); { PROFILE_GPU_CPU_NAMED("Clear"); - if (noCache || GLOBAL_SURFACE_ATLAS_DEBUG_FORCE_REDRAW_TILES || !GPU_SPREAD_WORKLOAD) + if (noCache || GLOBAL_SURFACE_ATLAS_DEBUG_FORCE_REDRAW_TILES) { // Full-atlas hardware clear context->ClearDepth(depthBuffer); @@ -1086,7 +1087,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co { GlobalSurfaceAtlasLight& lightData = surfaceAtlasData.Lights[light.ID]; lightData.LastFrameUsed = currentFrame; - uint32 redrawFramesCount = EnumHasAnyFlags(light.StaticFlags, StaticFlags::Lightmap) ? 120 : 4; + uint32 redrawFramesCount = GLOBAL_SURFACE_ATLAS_DIRTY_FRAMES(light.StaticFlags); if (surfaceAtlasData.CurrentFrame - lightData.LastFrameUpdated < (redrawFramesCount + (light.ID.D & redrawFramesCount))) continue; lightData.LastFrameUpdated = currentFrame; @@ -1121,7 +1122,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co { GlobalSurfaceAtlasLight& lightData = surfaceAtlasData.Lights[light.ID]; lightData.LastFrameUsed = currentFrame; - uint32 redrawFramesCount = EnumHasAnyFlags(light.StaticFlags, StaticFlags::Lightmap) ? 120 : 4; + uint32 redrawFramesCount = GLOBAL_SURFACE_ATLAS_DIRTY_FRAMES(light.StaticFlags); if (surfaceAtlasData.CurrentFrame - lightData.LastFrameUpdated < (redrawFramesCount + (light.ID.D & redrawFramesCount))) continue; lightData.LastFrameUpdated = currentFrame; @@ -1143,7 +1144,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co { GlobalSurfaceAtlasLight& lightData = surfaceAtlasData.Lights[light.ID]; lightData.LastFrameUsed = currentFrame; - uint32 redrawFramesCount = EnumHasAnyFlags(light.StaticFlags, StaticFlags::Lightmap) ? 120 : 4; + uint32 redrawFramesCount = GLOBAL_SURFACE_ATLAS_DIRTY_FRAMES(light.StaticFlags); if (surfaceAtlasData.CurrentFrame - lightData.LastFrameUpdated < (redrawFramesCount + (light.ID.D & redrawFramesCount))) continue; lightData.LastFrameUpdated = currentFrame; @@ -1459,9 +1460,9 @@ void GlobalSurfaceAtlasPass::RasterizeActor(Actor* actor, void* actorObject, con const float distanceScale = Math::Lerp(1.0f, surfaceAtlasData.DistanceScaling, Math::InverseLerp(surfaceAtlasData.DistanceScalingStart, surfaceAtlasData.DistanceScalingEnd, (float)CollisionsHelper::DistanceSpherePoint(actorObjectBounds, surfaceAtlasData.ViewPosition))); const float tilesScale = surfaceAtlasData.TileTexelsPerWorldUnit * distanceScale * qualityScale; GlobalSurfaceAtlasObject* object = surfaceAtlasData.Objects.TryGet(actorObject); - if (!object && surfaceAtlasData.AsyncNewObjects.Count() >= 512) + if (!object && surfaceAtlasData.AsyncNewObjects.Count() >= GLOBAL_SURFACE_ATLAS_MAX_NEW_OBJECTS_PER_FRAME) return; // Reduce load on 1st frame and add more objects during next frames to balance performance - bool anyTile = false, dirty = GLOBAL_SURFACE_ATLAS_DEBUG_FORCE_REDRAW_TILES || !GPU_SPREAD_WORKLOAD; + bool anyTile = false, dirty = GLOBAL_SURFACE_ATLAS_DEBUG_FORCE_REDRAW_TILES; for (int32 tileIndex = 0; tileIndex < 6; tileIndex++) { if (((1 << tileIndex) & tilesMask) == 0) @@ -1517,7 +1518,7 @@ void GlobalSurfaceAtlasPass::RasterizeActor(Actor* actor, void* actorObject, con if (object) { // Redraw objects from time-to-time (dynamic objects can be animated, static objects can have textures streamed) - uint32 redrawFramesCount = actor->HasStaticFlag(StaticFlags::Lightmap) ? 120 : 4; + uint32 redrawFramesCount = GLOBAL_SURFACE_ATLAS_DIRTY_FRAMES(actor->GetStaticFlags()); if (surfaceAtlasData.CurrentFrame - object->LastFrameUpdated >= (redrawFramesCount + (actor->GetID().D & redrawFramesCount))) dirty = true; @@ -1527,7 +1528,7 @@ void GlobalSurfaceAtlasPass::RasterizeActor(Actor* actor, void* actorObject, con object->Bounds = bounds; object->Position = (Float3)actorObjectBounds.Center; // TODO: large worlds object->Radius = (float)actorObjectBounds.Radius; - object->Dirty = dirty; + object->Dirty |= dirty; object->UseVisibility = useVisibility; } else From 3bbaa8dad0c774d8090eca0dae50000b32d9565e Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 24 Jun 2024 19:02:38 +0200 Subject: [PATCH 183/292] Remove unused job system stats and concurrent queue code --- Source/Engine/Threading/JobSystem.cpp | 80 +-------------------------- 1 file changed, 1 insertion(+), 79 deletions(-) diff --git a/Source/Engine/Threading/JobSystem.cpp b/Source/Engine/Threading/JobSystem.cpp index d3f879124..34e2b4b9a 100644 --- a/Source/Engine/Threading/JobSystem.cpp +++ b/Source/Engine/Threading/JobSystem.cpp @@ -7,30 +7,14 @@ #include "Engine/Platform/ConditionVariable.h" #include "Engine/Core/Types/Span.h" #include "Engine/Core/Collections/Dictionary.h" +#include "Engine/Core/Collections/RingBuffer.h" #include "Engine/Engine/EngineService.h" #include "Engine/Profiler/ProfilerCPU.h" #if USE_CSHARP #include "Engine/Scripting/ManagedCLR/MCore.h" #endif -// Jobs storage perf info: -// (500 jobs, i7 9th gen) -// JOB_SYSTEM_USE_MUTEX=1, enqueue=130-280 cycles, dequeue=2-6 cycles -// JOB_SYSTEM_USE_MUTEX=0, enqueue=300-700 cycles, dequeue=10-16 cycles -// So using RingBuffer+Mutex+Signals is better than moodycamel::ConcurrentQueue - #define JOB_SYSTEM_ENABLED 1 -#define JOB_SYSTEM_USE_MUTEX 1 -#define JOB_SYSTEM_USE_STATS 0 - -#if JOB_SYSTEM_USE_STATS -#include "Engine/Core/Log.h" -#endif -#if JOB_SYSTEM_USE_MUTEX -#include "Engine/Core/Collections/RingBuffer.h" -#else -#include "ConcurrentQueue.h" -#endif #if JOB_SYSTEM_ENABLED @@ -107,15 +91,7 @@ namespace ConditionVariable WaitSignal; CriticalSection WaitMutex; CriticalSection JobsLocker; -#if JOB_SYSTEM_USE_MUTEX RingBuffer Jobs; -#else - ConcurrentQueue Jobs; -#endif -#if JOB_SYSTEM_USE_STATS - int64 DequeueCount = 0; - int64 DequeueSum = 0; -#endif } bool JobSystemService::Init() @@ -163,16 +139,9 @@ int32 JobSystemThread::Run() JobData data; Function job; bool attachCSharpThread = true; -#if !JOB_SYSTEM_USE_MUTEX - moodycamel::ConsumerToken consumerToken(Jobs); -#endif while (Platform::AtomicRead(&ExitFlag) == 0) { // Try to get a job -#if JOB_SYSTEM_USE_STATS - const auto start = Platform::GetTimeCycles(); -#endif -#if JOB_SYSTEM_USE_MUTEX JobsLocker.Lock(); if (Jobs.Count() != 0) { @@ -182,17 +151,6 @@ int32 JobSystemThread::Run() job = context.Job; } JobsLocker.Unlock(); -#else - if (Jobs.try_dequeue(consumerToken, data)) - { - const JobContext& context = ((const Dictionary&)JobContexts).At(data.JobKey); - job = context.Job; - } -#endif -#if JOB_SYSTEM_USE_STATS - Platform::InterlockedIncrement(&DequeueCount); - Platform::InterlockedAdd(&DequeueSum, Platform::GetTimeCycles() - start); -#endif if (job.IsBinded()) { @@ -224,11 +182,7 @@ int32 JobSystemThread::Run() JobData dependantData; dependantData.JobKey = dependant; for (dependantData.Index = 0; dependantData.Index < dependantContext.JobsLeft; dependantData.Index++) -#if JOB_SYSTEM_USE_MUTEX Jobs.PushBack(dependantData); -#else - Jobs.enqueue(dependantData); -#endif } } @@ -280,9 +234,6 @@ int64 JobSystem::Dispatch(const Function& job, int32 jobCount) return 0; PROFILE_CPU(); #if JOB_SYSTEM_ENABLED -#if JOB_SYSTEM_USE_STATS - const auto start = Platform::GetTimeCycles(); -#endif const auto label = Platform::InterlockedAdd(&JobLabel, (int64)jobCount) + jobCount; JobData data; @@ -295,19 +246,9 @@ int64 JobSystem::Dispatch(const Function& job, int32 jobCount) JobsLocker.Lock(); JobContexts.Add(label, context); -#if JOB_SYSTEM_USE_MUTEX for (data.Index = 0; data.Index < jobCount; data.Index++) Jobs.PushBack(data); JobsLocker.Unlock(); -#else - JobsLocker.Unlock(); - for (data.Index = 0; data.Index < jobCount; data.Index++) - Jobs.enqueue(data); -#endif - -#if JOB_SYSTEM_USE_STATS - LOG(Info, "Job enqueue time: {0} cycles", (int64)(Platform::GetTimeCycles() - start)); -#endif if (JobStartingOnDispatch) { @@ -351,21 +292,12 @@ int64 JobSystem::Dispatch(const Function& job, Span dependen } } JobContexts.Add(label, context); -#if JOB_SYSTEM_USE_MUTEX if (context.DependenciesLeft == 0) { for (data.Index = 0; data.Index < jobCount; data.Index++) Jobs.PushBack(data); } JobsLocker.Unlock(); -#else - JobsLocker.Unlock(); - if (dispatchNow) - { - for (data.Index = 0; data.Index < jobCount; data.Index++) - Jobs.enqueue(data); - } -#endif if (context.DependenciesLeft == 0 && JobStartingOnDispatch) { @@ -426,11 +358,6 @@ void JobSystem::Wait(int64 label) // Wake up any thread to prevent stalling in highly multi-threaded environment JobsSignal.NotifyOne(); } - -#if JOB_SYSTEM_USE_STATS - LOG(Info, "Job average dequeue time: {0} cycles", DequeueSum / DequeueCount); - DequeueSum = DequeueCount = 0; -#endif #endif } @@ -438,16 +365,11 @@ void JobSystem::SetJobStartingOnDispatch(bool value) { #if JOB_SYSTEM_ENABLED JobStartingOnDispatch = value; - if (value) { -#if JOB_SYSTEM_USE_MUTEX JobsLocker.Lock(); const int32 count = Jobs.Count(); JobsLocker.Unlock(); -#else - const int32 count = Jobs.Count(); -#endif if (count == 1) JobsSignal.NotifyOne(); else if (count != 0) From b545d8800c0b5c4683f4ec9418c75f636f2130cb Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 24 Jun 2024 23:19:01 +0200 Subject: [PATCH 184/292] Optimize job system memory allocations --- Source/Engine/Core/Collections/RingBuffer.h | 7 ++ .../Engine/Core/Memory/SimpleHeapAllocation.h | 86 +++++++++++++++++++ Source/Engine/Renderer/RenderList.cpp | 16 +--- Source/Engine/Renderer/RendererAllocation.h | 79 +---------------- Source/Engine/Threading/JobSystem.cpp | 52 +++++++++-- 5 files changed, 145 insertions(+), 95 deletions(-) create mode 100644 Source/Engine/Core/Memory/SimpleHeapAllocation.h diff --git a/Source/Engine/Core/Collections/RingBuffer.h b/Source/Engine/Core/Collections/RingBuffer.h index 898c51c67..4dcc75e33 100644 --- a/Source/Engine/Core/Collections/RingBuffer.h +++ b/Source/Engine/Core/Collections/RingBuffer.h @@ -5,6 +5,7 @@ #include "Engine/Platform/Platform.h" #include "Engine/Core/Memory/Memory.h" #include "Engine/Core/Memory/Allocation.h" +#include "Engine/Core/Math/Math.h" /// /// Template for ring buffer with variable capacity. @@ -98,4 +99,10 @@ public: Memory::DestructItems(Get() + Math::Min(_front, _back), _count); _front = _back = _count = 0; } + + void Release() + { + Clear(); + _allocation.Free(); + } }; diff --git a/Source/Engine/Core/Memory/SimpleHeapAllocation.h b/Source/Engine/Core/Memory/SimpleHeapAllocation.h new file mode 100644 index 000000000..0428d1d7f --- /dev/null +++ b/Source/Engine/Core/Memory/SimpleHeapAllocation.h @@ -0,0 +1,86 @@ +// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. + +#pragma once + +#include "Engine/Core/Memory/Memory.h" +#include "Engine/Core/Types/BaseTypes.h" + +// Base class for custom heap-based allocators (eg. with local pooling/paging). Expects only Allocate/Free methods to be provided. +template +class SimpleHeapAllocation +{ +public: + enum { HasSwap = true }; + + template + class Data + { + T* _data = nullptr; + uintptr _size; + + public: + FORCE_INLINE Data() + { + } + + FORCE_INLINE ~Data() + { + if (_data) + This::Free(_data, _size); + } + + FORCE_INLINE T* Get() + { + return _data; + } + + FORCE_INLINE const T* Get() const + { + return _data; + } + + FORCE_INLINE int32 CalculateCapacityGrow(int32 capacity, int32 minCapacity) const + { + capacity = capacity ? capacity * 2 : InitialCapacity; + if (capacity < minCapacity) + capacity = minCapacity; + return capacity; + } + + FORCE_INLINE void Allocate(uint64 capacity) + { + _size = capacity * sizeof(T); + _data = (T*)This::Allocate(_size); + } + + FORCE_INLINE void Relocate(uint64 capacity, int32 oldCount, int32 newCount) + { + T* newData = capacity != 0 ? (T*)This::Allocate(capacity * sizeof(T)) : nullptr; + if (oldCount) + { + if (newCount > 0) + Memory::MoveItems(newData, _data, newCount); + Memory::DestructItems(_data, oldCount); + } + if (_data) + This::Free(_data, _size); + _data = newData; + _size = capacity * sizeof(T); + } + + FORCE_INLINE void Free() + { + if (_data) + { + This::Free(_data, _size); + _data = nullptr; + } + } + + FORCE_INLINE void Swap(Data& other) + { + ::Swap(_data, other._data); + ::Swap(_size, other._size); + } + }; +}; diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index 09e7f9b62..1ca5cf244 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -30,13 +30,7 @@ namespace Array SortingBatches; Array FreeRenderList; - struct MemPoolEntry - { - void* Ptr; - uintptr Size; - }; - - Array MemPool; + Array> MemPool; CriticalSection MemPoolLocker; } @@ -147,18 +141,16 @@ void* RendererAllocation::Allocate(uintptr size) MemPoolLocker.Lock(); for (int32 i = 0; i < MemPool.Count(); i++) { - if (MemPool[i].Size == size) + if (MemPool.Get()[i].Second == size) { - result = MemPool[i].Ptr; + result = MemPool.Get()[i].First; MemPool.RemoveAt(i); break; } } MemPoolLocker.Unlock(); if (!result) - { result = Platform::Allocate(size, 16); - } return result; } @@ -201,7 +193,7 @@ void RenderList::CleanupCache() SortingIndices.Resize(0); FreeRenderList.ClearDelete(); for (auto& e : MemPool) - Platform::Free(e.Ptr); + Platform::Free(e.First); MemPool.Clear(); } diff --git a/Source/Engine/Renderer/RendererAllocation.h b/Source/Engine/Renderer/RendererAllocation.h index 42cd5e755..c0ef46a91 100644 --- a/Source/Engine/Renderer/RendererAllocation.h +++ b/Source/Engine/Renderer/RendererAllocation.h @@ -2,86 +2,11 @@ #pragma once -#include "Engine/Core/Memory/Memory.h" -#include "Engine/Core/Types/BaseTypes.h" +#include "Engine/Core/Memory/SimpleHeapAllocation.h" -class RendererAllocation +class RendererAllocation : public SimpleHeapAllocation { public: static FLAXENGINE_API void* Allocate(uintptr size); static FLAXENGINE_API void Free(void* ptr, uintptr size); - - enum { HasSwap = true }; - - template - class Data - { - T* _data = nullptr; - uintptr _size; - - public: - FORCE_INLINE Data() - { - } - - FORCE_INLINE ~Data() - { - if (_data) - RendererAllocation::Free(_data, _size); - } - - FORCE_INLINE T* Get() - { - return _data; - } - - FORCE_INLINE const T* Get() const - { - return _data; - } - - FORCE_INLINE int32 CalculateCapacityGrow(int32 capacity, int32 minCapacity) const - { - capacity = capacity ? capacity * 2 : 64; - if (capacity < minCapacity) - capacity = minCapacity; - return capacity; - } - - FORCE_INLINE void Allocate(uint64 capacity) - { - _size = capacity * sizeof(T); - _data = (T*)RendererAllocation::Allocate(_size); - } - - FORCE_INLINE void Relocate(uint64 capacity, int32 oldCount, int32 newCount) - { - T* newData = capacity != 0 ? (T*)RendererAllocation::Allocate(capacity * sizeof(T)) : nullptr; - if (oldCount) - { - if (newCount > 0) - Memory::MoveItems(newData, _data, newCount); - Memory::DestructItems(_data, oldCount); - } - if (_data) - RendererAllocation::Free(_data, _size); - _data = newData; - _size = capacity * sizeof(T); - } - - FORCE_INLINE void Free() - { - if (_data) - { - RendererAllocation::Free(_data, _size); - _data = nullptr; - } - } - - FORCE_INLINE void Swap(Data& other) - { - ::Swap(_data, other._data); - ::Swap(_size, other._size); - } - }; }; diff --git a/Source/Engine/Threading/JobSystem.cpp b/Source/Engine/Threading/JobSystem.cpp index 34e2b4b9a..847291bbc 100644 --- a/Source/Engine/Threading/JobSystem.cpp +++ b/Source/Engine/Threading/JobSystem.cpp @@ -6,6 +6,8 @@ #include "Engine/Platform/Thread.h" #include "Engine/Platform/ConditionVariable.h" #include "Engine/Core/Types/Span.h" +#include "Engine/Core/Types/Pair.h" +#include "Engine/Core/Memory/SimpleHeapAllocation.h" #include "Engine/Core/Collections/Dictionary.h" #include "Engine/Core/Collections/RingBuffer.h" #include "Engine/Engine/EngineService.h" @@ -18,6 +20,14 @@ #if JOB_SYSTEM_ENABLED +// Local allocator for job system memory that uses internal pooling and assumes that JobsLocker is taken (write access owned by the calling thread). +class JobSystemAllocation : public SimpleHeapAllocation +{ +public: + static void* Allocate(uintptr size); + static void Free(void* ptr, uintptr size); +}; + class JobSystemService : public EngineService { public: @@ -46,9 +56,9 @@ struct TIsPODType struct JobContext { volatile int64 JobsLeft; - volatile int64 DependenciesLeft; + int32 DependenciesLeft; Function Job; - Array Dependants; + Array Dependants; }; template<> @@ -80,12 +90,13 @@ public: namespace { JobSystemService JobSystemInstance; + Array> MemPool; Thread* Threads[PLATFORM_THREADS_LIMIT / 2] = {}; int32 ThreadsCount = 0; bool JobStartingOnDispatch = true; volatile int64 ExitFlag = 0; volatile int64 JobLabel = 0; - Dictionary JobContexts; + Dictionary JobContexts; ConditionVariable JobsSignal; CriticalSection JobsMutex; ConditionVariable WaitSignal; @@ -94,6 +105,28 @@ namespace RingBuffer Jobs; } +void* JobSystemAllocation::Allocate(uintptr size) +{ + void* result = nullptr; + for (int32 i = 0; i < MemPool.Count(); i++) + { + if (MemPool.Get()[i].Second == size) + { + result = MemPool.Get()[i].First; + MemPool.RemoveAt(i); + break; + } + } + if (!result) + result = Platform::Allocate(size, 16); + return result; +} + +void JobSystemAllocation::Free(void* ptr, uintptr size) +{ + MemPool.Add({ ptr, size }); +} + bool JobSystemService::Init() { ThreadsCount = Math::Min(Platform::GetCPUInfo().LogicalProcessorCount, ARRAY_COUNT(Threads)); @@ -130,6 +163,12 @@ void JobSystemService::Dispose() Threads[i] = nullptr; } } + + JobContexts.SetCapacity(0); + Jobs.Release(); + for (auto& e : MemPool) + Platform::Free(e.First); + MemPool.Clear(); } int32 JobSystemThread::Run() @@ -176,7 +215,7 @@ int32 JobSystemThread::Run() for (int64 dependant : context.Dependants) { JobContext& dependantContext = JobContexts.At(dependant); - if (Platform::InterlockedDecrement(&dependantContext.DependenciesLeft) <= 0) + if (--dependantContext.DependenciesLeft <= 0) { // Dispatch dependency when it's ready JobData dependantData; @@ -245,7 +284,7 @@ int64 JobSystem::Dispatch(const Function& job, int32 jobCount) context.DependenciesLeft = 0; JobsLocker.Lock(); - JobContexts.Add(label, context); + JobContexts.Add(label, MoveTemp(context)); for (data.Index = 0; data.Index < jobCount; data.Index++) Jobs.PushBack(data); JobsLocker.Unlock(); @@ -291,9 +330,10 @@ int64 JobSystem::Dispatch(const Function& job, Span dependen dependencyContext->Dependants.Add(label); } } - JobContexts.Add(label, context); + JobContexts.Add(label, MoveTemp(context)); if (context.DependenciesLeft == 0) { + // No dependencies left to complete so dispatch now for (data.Index = 0; data.Index < jobCount; data.Index++) Jobs.PushBack(data); } From 18c3f274f8b3b10243f39ac72934ecf812705e09 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 26 Jun 2024 18:16:58 +0200 Subject: [PATCH 185/292] Optimize Global SDF drawing with async job system --- .../Renderer/GlobalSignDistanceFieldPass.cpp | 704 +++++++++++------- .../Renderer/GlobalSignDistanceFieldPass.h | 22 +- Source/Engine/Renderer/Renderer.cpp | 2 + 3 files changed, 428 insertions(+), 300 deletions(-) diff --git a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp index 2d35ac09e..7a676f584 100644 --- a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp +++ b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp @@ -18,9 +18,9 @@ #include "Engine/Graphics/Shaders/GPUShader.h" #include "Engine/Level/Scene/SceneRendering.h" #include "Engine/Level/Actors/StaticModel.h" +#include "Engine/Threading/JobSystem.h" // Some of those constants must match in shader -// TODO: try using R8 format for Global SDF #define GLOBAL_SDF_FORMAT PixelFormat::R16_Float #define GLOBAL_SDF_RASTERIZE_MODEL_MAX_COUNT 28 // The maximum amount of models to rasterize at once as a batch into Global SDF. #define GLOBAL_SDF_RASTERIZE_HEIGHTFIELD_MAX_COUNT 2 // The maximum amount of heightfields to store in a single chunk. @@ -30,7 +30,7 @@ #define GLOBAL_SDF_RASTERIZE_MIP_FACTOR 4 // Global SDF mip resolution downscale factor. #define GLOBAL_SDF_MIP_GROUP_SIZE 4 #define GLOBAL_SDF_MIP_FLOODS 5 // Amount of flood fill passes for mip. -#define GLOBAL_SDF_DEBUG_CHUNKS 0 +#define GLOBAL_SDF_DEBUG_CHUNKS 0 // Toggles debug drawing of Global SDF chunks bounds including objects count label (only for the first cascade) #define GLOBAL_SDF_DEBUG_FORCE_REDRAW 0 // Forces to redraw all SDF cascades every frame #define GLOBAL_SDF_ACTOR_IS_STATIC(actor) EnumHasAllFlags(actor->GetStaticFlags(), StaticFlags::Lightmap | StaticFlags::Transform) @@ -130,13 +130,30 @@ uint32 GetHash(const RasterizeChunkKey& key) struct CascadeData { + bool Dirty; + int32 Index; + float ChunkSize; + float MaxDistance; Float3 Position; float VoxelSize; BoundingBox Bounds; + BoundingBox CullingBounds; + BoundingBox RasterizeBounds; + Vector3 OriginMin; + Vector3 OriginMax; HashSet NonEmptyChunks; HashSet StaticChunks; - FORCE_INLINE void OnSceneRenderingDirty(const BoundingBox& objectBounds) + // Cache + Dictionary Chunks; + Array RasterizeObjects; + Array ObjectsData; + Array ObjectsTextures; + Dictionary ObjectIndexToDataIndex; + HashSet PendingSDFTextures; + HashSet PendingObjectTypes; + + void OnSceneRenderingDirty(const BoundingBox& objectBounds) { if (StaticChunks.IsEmpty() || !Bounds.Intersects(objectBounds)) return; @@ -147,9 +164,8 @@ struct CascadeData Vector3::Subtract(objectBoundsCascade.Minimum, Bounds.Minimum, objectBoundsCascade.Minimum); Vector3::Clamp(objectBounds.Maximum + objectMargin, Bounds.Minimum, Bounds.Maximum, objectBoundsCascade.Maximum); Vector3::Subtract(objectBoundsCascade.Maximum, Bounds.Minimum, objectBoundsCascade.Maximum); - const float chunkSize = VoxelSize * GLOBAL_SDF_RASTERIZE_CHUNK_SIZE; - const Int3 objectChunkMin(objectBoundsCascade.Minimum / chunkSize); - const Int3 objectChunkMax(objectBoundsCascade.Maximum / chunkSize); + const Int3 objectChunkMin(objectBoundsCascade.Minimum / ChunkSize); + const Int3 objectChunkMax(objectBoundsCascade.Maximum / ChunkSize); // Invalidate static chunks intersecting with dirty bounds RasterizeChunkKey key; @@ -181,8 +197,13 @@ public: HashSet SDFTextures; GlobalSignDistanceFieldPass::BindingData Result; + // Async objects drawing cache + Array> AsyncDrawWaitLabels; + RenderContext AsyncRenderContext; + ~GlobalSignDistanceFieldCustomBuffer() { + WaitForDrawing(); for (const auto& e : SDFTextures) { e.Item->Deleted.Unbind(this); @@ -215,6 +236,145 @@ public: } } + const float CascadesDistanceScales[4] = { 1.0f, 2.5f, 5.0f, 10.0f }; + + void GetOptions(const RenderContext& renderContext, int32& resolution, int32& cascadesCount, int32& resolutionMip, float& distance) + { + switch (Graphics::GlobalSDFQuality) + { + case Quality::Low: + resolution = 128; + cascadesCount = 2; + break; + case Quality::Medium: + resolution = 128; + cascadesCount = 3; + break; + case Quality::High: + resolution = 192; + cascadesCount = 4; + break; + case Quality::Ultra: + default: + resolution = 256; + cascadesCount = 4; + break; + } + resolutionMip = Math::DivideAndRoundUp(resolution, GLOBAL_SDF_RASTERIZE_MIP_FACTOR); + auto& giSettings = renderContext.List->Settings.GlobalIllumination; + distance = GraphicsSettings::Get()->GlobalSDFDistance; + if (giSettings.Mode == GlobalIlluminationMode::DDGI) + distance = Math::Max(distance, giSettings.Distance); + distance = Math::Min(distance, renderContext.View.Far); + } + + void DrawCascadeActors(const CascadeData& cascade); + void UpdateCascadeChunks(CascadeData& cascade); + void WriteCascadeObjects(CascadeData& cascade); + void DrawCascadeJob(int32 cascadeIndex); + + void StartDrawing(const RenderContext& renderContext, bool enableAsync = false, bool reset = false) + { + if (AsyncDrawWaitLabels.HasItems()) + return; // Already started earlier this frame + int32 resolution, cascadesCount, resolutionMip; + float distance; + GetOptions(renderContext, resolution, cascadesCount, resolutionMip, distance); + if (Cascades.Count() != cascadesCount || Resolution != resolution || Origin != renderContext.View.Origin) + return; // Not yet initialized + PROFILE_CPU(); + + // Calculate origin for Global SDF by shifting it towards the view direction to account for better view frustum coverage + const float distanceExtent = distance / CascadesDistanceScales[cascadesCount - 1]; + Float3 viewPosition = renderContext.View.Position; + { + Float3 viewDirection = renderContext.View.Direction; + const float cascade0Distance = distanceExtent * CascadesDistanceScales[0]; + const Vector2 viewRayHit = CollisionsHelper::LineHitsBox(viewPosition, viewPosition + viewDirection * (cascade0Distance * 2.0f), viewPosition - cascade0Distance, viewPosition + cascade0Distance); + const float viewOriginOffset = (float)viewRayHit.Y * cascade0Distance * 0.6f; + viewPosition += viewDirection * viewOriginOffset; + } + + // Setup data for rendering + if (FrameIndex++ > 128) + FrameIndex = 0; + AsyncRenderContext = renderContext; + AsyncRenderContext.View.Pass = DrawPass::GlobalSDF; + const bool useCache = !reset && !GLOBAL_SDF_DEBUG_FORCE_REDRAW && GPU_SPREAD_WORKLOAD; + static_assert(GLOBAL_SDF_RASTERIZE_CHUNK_SIZE % GLOBAL_SDF_RASTERIZE_GROUP_SIZE == 0, "Invalid chunk size for Global SDF rasterization group size."); + const int32 rasterizeChunks = Math::CeilToInt((float)resolution / (float)GLOBAL_SDF_RASTERIZE_CHUNK_SIZE); + const bool updateEveryFrame = false; // true if update all cascades every frame + const int32 maxCascadeUpdatesPerFrame = 1; // maximum cascades to update at a single frame + + // Rasterize world geometry into Global SDF + for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++) + { + // Reduce frequency of the updates + auto& cascade = Cascades[cascadeIndex]; + cascade.Index = cascadeIndex; + cascade.Dirty = !useCache || RenderTools::ShouldUpdateCascade(FrameIndex, cascadeIndex, cascadesCount, maxCascadeUpdatesPerFrame, updateEveryFrame); + if (!cascade.Dirty) + continue; + const float cascadeDistance = distanceExtent * CascadesDistanceScales[cascadeIndex]; + const float cascadeMaxDistance = cascadeDistance * 2; + const float cascadeVoxelSize = cascadeMaxDistance / (float)resolution; + const float cascadeChunkSize = cascadeVoxelSize * GLOBAL_SDF_RASTERIZE_CHUNK_SIZE; + static_assert(GLOBAL_SDF_RASTERIZE_CHUNK_SIZE % GLOBAL_SDF_RASTERIZE_MIP_FACTOR == 0, "Adjust chunk size to match the mip factor scale."); + const Float3 center = Float3::Floor(viewPosition / cascadeChunkSize) * cascadeChunkSize; + //const Float3 center = Float3::Zero; + BoundingBox cascadeBounds(center - cascadeDistance, center + cascadeDistance); + + // Clear cascade before rasterization + cascade.Chunks.Clear(); + // TODO: consider using for RendererAllocation Chunks and RasterizeObjects to share memory with other rendering internals (ensure to release memory after SDF draw ends) + cascade.Chunks.EnsureCapacity(rasterizeChunks * rasterizeChunks, false); + // TODO: cache RasterizeObjects size from the previous frame (for this cascade) and preallocate it here once RendererAllocation is used + cascade.RasterizeObjects.Clear(); + cascade.PendingSDFTextures.Clear(); + + // Check if cascade center has been moved + if (!(useCache && Float3::NearEqual(cascade.Position, center, cascadeVoxelSize))) + { + // TODO: optimize for moving camera (use chunkCoords scrolling) + cascade.StaticChunks.Clear(); + } + + // Setup cascade info + cascade.Position = center; + cascade.VoxelSize = cascadeVoxelSize; + cascade.ChunkSize = cascadeVoxelSize * GLOBAL_SDF_RASTERIZE_CHUNK_SIZE; + cascade.MaxDistance = cascadeMaxDistance; + cascade.Bounds = cascadeBounds; + cascade.RasterizeBounds = cascadeBounds; + cascade.RasterizeBounds.Minimum += 0.1f; // Adjust to prevent overflowing chunk keys (cascade bounds are used for clamping object bounds) + cascade.RasterizeBounds.Maximum -= 0.1f; // Adjust to prevent overflowing chunk keys (cascade bounds are used for clamping object bounds) + cascade.CullingBounds = cascadeBounds.MakeOffsetted(Origin); + const float objectMargin = cascadeVoxelSize * GLOBAL_SDF_RASTERIZE_CHUNK_MARGIN; + cascade.OriginMin = -Origin - objectMargin; + cascade.OriginMax = -Origin + objectMargin; + } + if (enableAsync) + { + // Draw all dirty cascades in async (separate job for each cascade) + Function func; + func.Bind(this); + AsyncDrawWaitLabels.Add(JobSystem::Dispatch(func, cascadesCount)); + } + else + { + // Synchronized drawing in sequence + for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++) + DrawCascadeJob(cascadeIndex); + } + } + + void WaitForDrawing() + { + for (int64 label : AsyncDrawWaitLabels) + JobSystem::Wait(label); + AsyncDrawWaitLabels.Clear(); + } + FORCE_INLINE void OnSceneRenderingDirty(const BoundingBox& objectBounds) { for (auto& cascade : Cascades) @@ -256,9 +416,161 @@ public: namespace { - Dictionary ChunksCache; - Array RasterizeObjectsCache; - Dictionary ObjectIndexToDataIndexCache; + GlobalSignDistanceFieldCustomBuffer* Current = nullptr; + ThreadLocal CurrentCascade; +} + +void GlobalSignDistanceFieldCustomBuffer::DrawCascadeActors(const CascadeData& cascade) +{ + PROFILE_CPU(); + const BoundingBox cullingBounds = cascade.CullingBounds; + const uint32 viewMask = AsyncRenderContext.View.RenderLayersMask; + // TODO: add scene detail scale factor to PostFx settings (eg. to increase or decrease scene details and quality) + const float minObjectRadius = Math::Max(20.0f, cascade.VoxelSize * 2.0f); // Skip too small objects for this cascade + int32 actorsDrawn = 0; + SceneRendering::DrawCategory drawCategories[] = { SceneRendering::SceneDraw, SceneRendering::SceneDrawAsync }; + for (auto* scene : AsyncRenderContext.List->Scenes) + { + for (SceneRendering::DrawCategory drawCategory : drawCategories) + { + auto& list = scene->Actors[drawCategory]; + for (const auto& e : list) + { + if (e.Bounds.Radius >= minObjectRadius && viewMask & e.LayerMask && CollisionsHelper::BoxIntersectsSphere(cullingBounds, e.Bounds)) + { + //PROFILE_CPU_ACTOR(e.Actor); + e.Actor->Draw(AsyncRenderContext); +#if COMPILE_WITH_PROFILER + actorsDrawn++; +#endif + } + } + } + } + ZoneValue(actorsDrawn); +} + +void GlobalSignDistanceFieldCustomBuffer::UpdateCascadeChunks(CascadeData& cascade) +{ + PROFILE_CPU(); + + // Update static chunks + for (auto it = cascade.Chunks.Begin(); it.IsNotEnd(); ++it) + { + auto& e = *it; + if (e.Key.Layer != 0) + continue; + if (e.Value.Dynamic) + { + // Remove static chunk with dynamic objects + cascade.StaticChunks.Remove(e.Key); + } + else if (cascade.StaticChunks.Contains(e.Key)) + { + // Skip updating static chunk + auto key = e.Key; + while (cascade.Chunks.Remove(key)) + key.NextLayer(); + } + else + { + // Add to cache (render now but skip next frame) + cascade.StaticChunks.Add(e.Key); + } + } +} + +void GlobalSignDistanceFieldCustomBuffer::WriteCascadeObjects(CascadeData& cascade) +{ + PROFILE_CPU(); + + // Write all objects to the buffer + int32 objectsBufferCount = 0; + cascade.ObjectsData.Clear(); + cascade.ObjectsTextures.Clear(); + cascade.ObjectIndexToDataIndex.Clear(); + for (const auto& e : cascade.Chunks) + { + auto& chunk = e.Value; + for (int32 i = 0; i < chunk.ModelsCount; i++) + { + auto objectIndex = chunk.Models[i]; + if (cascade.ObjectIndexToDataIndex.ContainsKey(objectIndex)) + continue; + const auto& object = cascade.RasterizeObjects.Get()[objectIndex]; + + // Pick the SDF mip for the cascade + int32 mipLevelIndex = 1; + float worldUnitsPerVoxel = object.SDF->WorldUnitsPerVoxel * object.LocalToWorld.Scale.MaxValue() * 4; + const int32 mipLevels = object.SDF->Texture->MipLevels(); + while (cascade.VoxelSize > worldUnitsPerVoxel && mipLevelIndex < mipLevels) + { + mipLevelIndex++; + worldUnitsPerVoxel *= 2.0f; + } + mipLevelIndex--; + + // Add object data for the GPU buffer + uint16 dataIndex = objectsBufferCount++; + ObjectRasterizeData objectData; + Platform::MemoryClear(&objectData, sizeof(objectData)); + Matrix localToWorld, worldToLocal, volumeToWorld; + Matrix::Transformation(object.LocalToWorld.Scale, object.LocalToWorld.Orientation, object.LocalToWorld.Translation - Origin, localToWorld); + Matrix::Invert(localToWorld, worldToLocal); + BoundingBox localVolumeBounds(object.SDF->LocalBoundsMin, object.SDF->LocalBoundsMax); + Float3 volumeLocalBoundsExtent = localVolumeBounds.GetSize() * 0.5f; + Matrix worldToVolume = worldToLocal * Matrix::Translation(-(localVolumeBounds.Minimum + volumeLocalBoundsExtent)); + Matrix::Invert(worldToVolume, volumeToWorld); + objectData.WorldToVolume.SetMatrixTranspose(worldToVolume); + objectData.VolumeToWorld.SetMatrixTranspose(volumeToWorld); + objectData.VolumeLocalBoundsExtent = volumeLocalBoundsExtent; + objectData.VolumeToUVWMul = object.SDF->LocalToUVWMul; + objectData.VolumeToUVWAdd = object.SDF->LocalToUVWAdd + (localVolumeBounds.Minimum + volumeLocalBoundsExtent) * object.SDF->LocalToUVWMul; + objectData.MipOffset = (float)mipLevelIndex; + objectData.DecodeMul = 2.0f * object.SDF->MaxDistance; + objectData.DecodeAdd = -object.SDF->MaxDistance; + cascade.ObjectsData.Add((const byte*)&objectData, sizeof(objectData)); + cascade.ObjectsTextures.Add(object.SDF->Texture->ViewVolume()); + cascade.PendingObjectTypes.Add(object.Actor->GetTypeHandle()); + cascade.ObjectIndexToDataIndex.Add(objectIndex, dataIndex); + } + for (int32 i = 0; i < chunk.HeightfieldsCount; i++) + { + auto objectIndex = chunk.Heightfields[i]; + if (cascade.ObjectIndexToDataIndex.ContainsKey(objectIndex)) + continue; + const auto& object = cascade.RasterizeObjects.Get()[objectIndex]; + + // Add object data for the GPU buffer + uint16 dataIndex = objectsBufferCount++; + ObjectRasterizeData objectData; + Platform::MemoryClear(&objectData, sizeof(objectData)); + Matrix localToWorld, worldToLocal; + Matrix::Transformation(object.LocalToWorld.Scale, object.LocalToWorld.Orientation, object.LocalToWorld.Translation - Origin, localToWorld); + Matrix::Invert(localToWorld, worldToLocal); + objectData.WorldToVolume.SetMatrixTranspose(worldToLocal); + objectData.VolumeToWorld.SetMatrixTranspose(localToWorld); + objectData.VolumeToUVWMul = Float3(object.LocalToUV.X, 1.0f, object.LocalToUV.Y); + objectData.VolumeToUVWAdd = Float3(object.LocalToUV.Z, 0.0f, object.LocalToUV.W); + objectData.MipOffset = (float)cascade.Index * 0.5f; // Use lower-quality mip for far cascades + cascade.ObjectsData.Add((const byte*)&objectData, sizeof(objectData)); + cascade.ObjectsTextures.Add(object.Heightfield->View()); + cascade.PendingObjectTypes.Add(object.Actor->GetTypeHandle()); + cascade.ObjectIndexToDataIndex.Add(objectIndex, dataIndex); + } + } +} + +void GlobalSignDistanceFieldCustomBuffer::DrawCascadeJob(int32 cascadeIndex) +{ + auto& cascade = Cascades[cascadeIndex]; + if (!cascade.Dirty) + return; + PROFILE_CPU(); + CurrentCascade.Set(&cascade); + DrawCascadeActors(cascade); + UpdateCascadeChunks(cascade); + WriteCascadeObjects(cascade); } String GlobalSignDistanceFieldPass::ToString() const @@ -309,7 +621,7 @@ bool GlobalSignDistanceFieldPass::setupResources() // Init buffer if (!_objectsBuffer) - _objectsBuffer = New(64u * (uint32)sizeof(ObjectRasterizeData), (uint32)sizeof(ObjectRasterizeData), false, TEXT("GlobalSDF.ObjectsBuffer")); + _objectsBuffer = New(0, (uint32)sizeof(ObjectRasterizeData), false, TEXT("GlobalSDF.ObjectsBuffer")); // Create pipeline state GPUPipelineState::Description psDesc = GPUPipelineState::Description::DefaultFullscreenTriangle; @@ -347,12 +659,22 @@ void GlobalSignDistanceFieldPass::Dispose() // Cleanup SAFE_DELETE(_objectsBuffer); - _objectsTextures.Resize(0); SAFE_DELETE_GPU_RESOURCE(_psDebug); _shader = nullptr; - ChunksCache.SetCapacity(0); - RasterizeObjectsCache.SetCapacity(0); - ObjectIndexToDataIndexCache.SetCapacity(0); +} + +void GlobalSignDistanceFieldPass::OnCollectDrawCalls(RenderContextBatch& renderContextBatch) +{ + // Check if Global SDF will be used this frame + PROFILE_CPU_NAMED("Global SDF"); + if (checkIfSkipPass()) + return; + RenderContext& renderContext = renderContextBatch.GetMainContext(); + if (renderContext.List->Scenes.Count() == 0) + return; + auto& sdfData = *renderContext.Buffers->GetCustomBuffer(TEXT("GlobalSignDistanceField")); + Current = &sdfData; + sdfData.StartDrawing(renderContext, renderContextBatch.EnableAsync); } bool GlobalSignDistanceFieldPass::Get(const RenderBuffers* buffers, BindingData& result) @@ -386,44 +708,19 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex PROFILE_GPU_CPU("Global SDF"); // Setup options - int32 resolution, cascadesCount; - switch (Graphics::GlobalSDFQuality) - { - case Quality::Low: - resolution = 128; - cascadesCount = 2; - break; - case Quality::Medium: - resolution = 128; - cascadesCount = 3; - break; - case Quality::High: - resolution = 192; - cascadesCount = 4; - break; - case Quality::Ultra: - default: - resolution = 256; - cascadesCount = 4; - break; - } - const int32 resolutionMip = Math::DivideAndRoundUp(resolution, GLOBAL_SDF_RASTERIZE_MIP_FACTOR); - auto& giSettings = renderContext.List->Settings.GlobalIllumination; - float distance = GraphicsSettings::Get()->GlobalSDFDistance; - if (giSettings.Mode == GlobalIlluminationMode::DDGI) - distance = Math::Max(distance, giSettings.Distance); - distance = Math::Min(distance, renderContext.View.Far); - const float cascadesDistanceScales[] = { 1.0f, 2.5f, 5.0f, 10.0f }; - const float distanceExtent = distance / cascadesDistanceScales[cascadesCount - 1]; + int32 resolution, cascadesCount, resolutionMip; + float distance; + sdfData.GetOptions(renderContext, resolution, cascadesCount, resolutionMip, distance); + const float distanceExtent = distance / sdfData.CascadesDistanceScales[cascadesCount - 1]; // Initialize buffers - bool updated = false; + bool reset = false; if (sdfData.Cascades.Count() != cascadesCount || sdfData.Resolution != resolution) { sdfData.Cascades.Resize(cascadesCount); sdfData.Resolution = resolution; sdfData.FrameIndex = 0; - updated = true; + reset = true; auto desc = GPUTextureDescription::New3D(resolution * cascadesCount, resolution, resolution, GLOBAL_SDF_FORMAT, GPUTextureFlags::ShaderResource | GPUTextureFlags::UnorderedAccess, 1); { GPUTexture*& texture = sdfData.Texture; @@ -463,10 +760,10 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex if (sdfData.Origin != renderContext.View.Origin) { sdfData.Origin = renderContext.View.Origin; - updated = true; + reset = true; } GPUTexture* tmpMip = nullptr; - if (updated) + if (reset) { PROFILE_GPU_CPU_NAMED("Init"); for (auto& cascade : sdfData.Cascades) @@ -480,126 +777,60 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex for (SceneRendering* scene : renderContext.List->Scenes) sdfData.ListenSceneRendering(scene); - // Calculate origin for Global SDF by shifting it towards the view direction to account for better view frustum coverage - Float3 viewPosition = renderContext.View.Position; - { - Float3 viewDirection = renderContext.View.Direction; - const float cascade0Distance = distanceExtent * cascadesDistanceScales[0]; - const Vector2 viewRayHit = CollisionsHelper::LineHitsBox(viewPosition, viewPosition + viewDirection * (cascade0Distance * 2.0f), viewPosition - cascade0Distance, viewPosition + cascade0Distance); - const float viewOriginOffset = (float)viewRayHit.Y * cascade0Distance * 0.6f; - viewPosition += viewDirection * viewOriginOffset; - } + // Ensure that async objects drawing ended + Current = &sdfData; + sdfData.StartDrawing(renderContext, false, reset); // (ignored if not started earlier this frame) + sdfData.WaitForDrawing(); // Rasterize world geometry into Global SDF - renderContext.View.Pass = DrawPass::GlobalSDF; - uint32 viewMask = renderContext.View.RenderLayersMask; - const bool useCache = !updated && !GLOBAL_SDF_DEBUG_FORCE_REDRAW && GPU_SPREAD_WORKLOAD; - static_assert(GLOBAL_SDF_RASTERIZE_CHUNK_SIZE % GLOBAL_SDF_RASTERIZE_GROUP_SIZE == 0, "Invalid chunk size for Global SDF rasterization group size."); - const int32 rasterizeChunks = Math::CeilToInt((float)resolution / (float)GLOBAL_SDF_RASTERIZE_CHUNK_SIZE); - auto& chunks = ChunksCache; - chunks.EnsureCapacity(rasterizeChunks * rasterizeChunks, false); bool anyDraw = false; - const bool updateEveryFrame = false; // true if update all cascades every frame - const int32 maxCascadeUpdatesPerFrame = 1; // maximum cascades to update at a single frame GPUTextureView* textureView = sdfData.Texture->ViewVolume(); GPUTextureView* textureMipView = sdfData.TextureMip->ViewVolume(); - if (sdfData.FrameIndex++ > 128) - sdfData.FrameIndex = 0; for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++) { - // Reduce frequency of the updates - if (useCache && !RenderTools::ShouldUpdateCascade(sdfData.FrameIndex, cascadeIndex, cascadesCount, maxCascadeUpdatesPerFrame, updateEveryFrame)) - continue; auto& cascade = sdfData.Cascades[cascadeIndex]; - const float cascadeDistance = distanceExtent * cascadesDistanceScales[cascadeIndex]; - const float cascadeMaxDistance = cascadeDistance * 2; - const float cascadeVoxelSize = cascadeMaxDistance / (float)resolution; - const float cascadeChunkSize = cascadeVoxelSize * GLOBAL_SDF_RASTERIZE_CHUNK_SIZE; - static_assert(GLOBAL_SDF_RASTERIZE_CHUNK_SIZE % GLOBAL_SDF_RASTERIZE_MIP_FACTOR == 0, "Adjust chunk size to match the mip factor scale."); - const Float3 center = Float3::Floor(viewPosition / cascadeChunkSize) * cascadeChunkSize; - //const Float3 center = Float3::Zero; - BoundingBox cascadeBounds(center - cascadeDistance, center + cascadeDistance); - // TODO: add scene detail scale factor to PostFx settings (eg. to increase or decrease scene details and quality) - const float minObjectRadius = Math::Max(20.0f, cascadeVoxelSize * 2.0f); // Skip too small objects for this cascade + if (!cascade.Dirty) + continue; - // Clear cascade before rasterization + // Process all pending SDF textures tracking + for (auto& e : cascade.PendingSDFTextures) { - PROFILE_CPU_NAMED("Clear"); - chunks.Clear(); - RasterizeObjectsCache.Clear(); - _objectsBuffer->Clear(); - _objectsTextures.Clear(); - } - - // Check if cascade center has been moved - if (!(useCache && Float3::NearEqual(cascade.Position, center, cascadeVoxelSize))) - { - // TODO: optimize for moving camera (copy sdf for cached chunks) - cascade.StaticChunks.Clear(); - } - cascade.Position = center; - cascade.VoxelSize = cascadeVoxelSize; - cascade.Bounds = cascadeBounds; - - // Draw all objects from all scenes into the cascade - _objectsBufferCount = 0; - _voxelSize = cascadeVoxelSize; - _chunkSize = _voxelSize * GLOBAL_SDF_RASTERIZE_CHUNK_SIZE; - _cascadeBounds = cascadeBounds; - _cascadeBounds.Minimum += 0.1f; // Adjust to prevent overflowing chunk keys (cascade bounds are used for clamping object bounds) - _cascadeBounds.Maximum -= 0.1f; // Adjust to prevent overflowing chunk keys (cascade bounds are used for clamping object bounds) - _cascadeIndex = cascadeIndex; - _sdfData = &sdfData; - const float objectMargin = _voxelSize * GLOBAL_SDF_RASTERIZE_CHUNK_MARGIN; - _sdfDataOriginMin = -sdfData.Origin - objectMargin; - _sdfDataOriginMax = -sdfData.Origin + objectMargin; - { - PROFILE_CPU_NAMED("Draw"); - BoundingBox cascadeBoundsWorld = cascadeBounds.MakeOffsetted(sdfData.Origin); - _cascadeCullingBounds = cascadeBoundsWorld; - int32 actorsDrawn = 0; - SceneRendering::DrawCategory drawCategories[] = { SceneRendering::SceneDraw, SceneRendering::SceneDrawAsync }; - for (auto* scene : renderContext.List->Scenes) + GPUTexture* texture = e.Item; + if (Current->SDFTextures.Add(texture)) { - for (SceneRendering::DrawCategory drawCategory : drawCategories) - { - auto& list = scene->Actors[drawCategory]; - for (const auto& e : list) - { - if (e.Bounds.Radius >= minObjectRadius && viewMask & e.LayerMask && CollisionsHelper::BoxIntersectsSphere(cascadeBoundsWorld, e.Bounds)) - { - //PROFILE_CPU_ACTOR(e.Actor); - e.Actor->Draw(renderContext); - actorsDrawn++; - } - } - } + texture->Deleted.Bind(Current); + texture->ResidentMipsChanged.Bind(Current); } - ZoneValue(actorsDrawn); } + cascade.PendingSDFTextures.Clear(); + + // Process all pending object types tracking + for (auto& e : cascade.PendingObjectTypes) + sdfData.ObjectTypes.Add(e.Item); // Perform batched chunks rasterization anyDraw = true; context->ResetSR(); ModelsRasterizeData data; - data.CascadeCoordToPosMul = (Float3)cascadeBounds.GetSize() / (float)resolution; - data.CascadeCoordToPosAdd = (Float3)cascadeBounds.Minimum + cascadeVoxelSize * 0.5f; - data.MaxDistance = cascadeMaxDistance; + data.CascadeCoordToPosMul = (Float3)cascade.Bounds.GetSize() / (float)resolution; + data.CascadeCoordToPosAdd = (Float3)cascade.Bounds.Minimum + cascade.VoxelSize * 0.5f; + data.MaxDistance = cascade.MaxDistance; data.CascadeResolution = resolution; data.CascadeMipResolution = resolutionMip; data.CascadeIndex = cascadeIndex; data.CascadeMipFactor = GLOBAL_SDF_RASTERIZE_MIP_FACTOR; - data.CascadeVoxelSize = cascadeVoxelSize; + data.CascadeVoxelSize = cascade.VoxelSize; context->BindUA(0, textureView); context->BindCB(1, _cb1); - const int32 chunkDispatchGroups = GLOBAL_SDF_RASTERIZE_CHUNK_SIZE / GLOBAL_SDF_RASTERIZE_GROUP_SIZE; + constexpr int32 chunkDispatchGroups = GLOBAL_SDF_RASTERIZE_CHUNK_SIZE / GLOBAL_SDF_RASTERIZE_GROUP_SIZE; bool anyChunkDispatch = false; + if (!reset) { PROFILE_GPU_CPU_NAMED("Clear Chunks"); for (auto it = cascade.NonEmptyChunks.Begin(); it.IsNotEnd(); ++it) { auto& key = it->Item; - if (chunks.ContainsKey(key)) + if (cascade.Chunks.ContainsKey(key) || cascade.StaticChunks.Contains(key)) continue; // Clear empty chunk @@ -614,121 +845,21 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex { PROFILE_GPU_CPU_NAMED("Rasterize Chunks"); - // Update static chunks - for (auto it = chunks.Begin(); it.IsNotEnd(); ++it) - { - auto& e = *it; - if (e.Key.Layer != 0) - continue; - if (e.Value.Dynamic) - { - // Remove static chunk with dynamic objects - cascade.StaticChunks.Remove(e.Key); - } - else if (cascade.StaticChunks.Contains(e.Key)) - { - // Skip updating static chunk - auto key = e.Key; - while (chunks.Remove(key)) - key.NextLayer(); - } - else - { - // Add to cache (render now but skip next frame) - cascade.StaticChunks.Add(e.Key); - } - } - // Send models data to the GPU - const auto& objectIndexToDataIndex = ObjectIndexToDataIndexCache; - if (chunks.Count() != 0) + const auto& objectIndexToDataIndex = cascade.ObjectIndexToDataIndex; + GPUTextureView** objectsTextures = cascade.ObjectsTextures.Get(); + if (cascade.Chunks.Count() != 0) { - PROFILE_GPU_CPU_NAMED("Update Objects"); - auto& objectIndexToDataIndexCache = ObjectIndexToDataIndexCache; - objectIndexToDataIndexCache.Clear(); - - // Write used objects to the buffer - const auto& rasterizeObjectsCache = RasterizeObjectsCache; - for (const auto& e : chunks) - { - auto& chunk = e.Value; - for (int32 i = 0; i < chunk.ModelsCount; i++) - { - auto objectIndex = chunk.Models[i]; - if (objectIndexToDataIndexCache.ContainsKey(objectIndex)) - continue; - const auto& object = rasterizeObjectsCache.Get()[objectIndex]; - - // Pick the SDF mip for the cascade - int32 mipLevelIndex = 1; - float worldUnitsPerVoxel = object.SDF->WorldUnitsPerVoxel * object.LocalToWorld.Scale.MaxValue() * 4; - const int32 mipLevels = object.SDF->Texture->MipLevels(); - while (_voxelSize > worldUnitsPerVoxel && mipLevelIndex < mipLevels) - { - mipLevelIndex++; - worldUnitsPerVoxel *= 2.0f; - } - mipLevelIndex--; - - // Add object data for the GPU buffer - uint16 dataIndex = _objectsBufferCount++; - ObjectRasterizeData objectData; - Matrix localToWorld, worldToLocal, volumeToWorld; - Matrix::Transformation(object.LocalToWorld.Scale, object.LocalToWorld.Orientation, object.LocalToWorld.Translation - _sdfData->Origin, localToWorld); - Matrix::Invert(localToWorld, worldToLocal); - BoundingBox localVolumeBounds(object.SDF->LocalBoundsMin, object.SDF->LocalBoundsMax); - Float3 volumeLocalBoundsExtent = localVolumeBounds.GetSize() * 0.5f; - Matrix worldToVolume = worldToLocal * Matrix::Translation(-(localVolumeBounds.Minimum + volumeLocalBoundsExtent)); - Matrix::Invert(worldToVolume, volumeToWorld); - objectData.WorldToVolume.SetMatrixTranspose(worldToVolume); - objectData.VolumeToWorld.SetMatrixTranspose(volumeToWorld); - objectData.VolumeLocalBoundsExtent = volumeLocalBoundsExtent; - objectData.VolumeToUVWMul = object.SDF->LocalToUVWMul; - objectData.VolumeToUVWAdd = object.SDF->LocalToUVWAdd + (localVolumeBounds.Minimum + volumeLocalBoundsExtent) * object.SDF->LocalToUVWMul; - objectData.MipOffset = (float)mipLevelIndex; - objectData.DecodeMul = 2.0f * object.SDF->MaxDistance; - objectData.DecodeAdd = -object.SDF->MaxDistance; - _objectsBuffer->Write(objectData); - _objectsTextures.Add(object.SDF->Texture->ViewVolume()); - _sdfData->ObjectTypes.Add(object.Actor->GetTypeHandle()); - - // Cache the mapping - objectIndexToDataIndexCache.Add(objectIndex, dataIndex); - } - for (int32 i = 0; i < chunk.HeightfieldsCount; i++) - { - auto objectIndex = chunk.Heightfields[i]; - if (objectIndexToDataIndexCache.ContainsKey(objectIndex)) - continue; - const auto& object = rasterizeObjectsCache.Get()[objectIndex]; - - // Add object data for the GPU buffer - uint16 dataIndex = _objectsBufferCount++; - ObjectRasterizeData objectData; - Matrix localToWorld, worldToLocal; - Matrix::Transformation(object.LocalToWorld.Scale, object.LocalToWorld.Orientation, object.LocalToWorld.Translation - _sdfData->Origin, localToWorld); - Matrix::Invert(localToWorld, worldToLocal); - objectData.WorldToVolume.SetMatrixTranspose(worldToLocal); - objectData.VolumeToWorld.SetMatrixTranspose(localToWorld); - objectData.VolumeToUVWMul = Float3(object.LocalToUV.X, 1.0f, object.LocalToUV.Y); - objectData.VolumeToUVWAdd = Float3(object.LocalToUV.Z, 0.0f, object.LocalToUV.W); - objectData.MipOffset = (float)_cascadeIndex * 0.5f; // Use lower-quality mip for far cascades - _objectsBuffer->Write(objectData); - _objectsTextures.Add(object.Heightfield->View()); - _sdfData->ObjectTypes.Add(object.Actor->GetTypeHandle()); - - // Cache the mapping - objectIndexToDataIndexCache.Add(objectIndex, dataIndex); - } - } - - // Flush buffer + // Flush buffer but don't allocate any CPU memory by swapping Data pointer with the cascade ObjectsData + PROFILE_CPU_NAMED("Update Objects"); + _objectsBuffer->Data.Swap(cascade.ObjectsData); _objectsBuffer->Flush(context); + _objectsBuffer->Data.Swap(cascade.ObjectsData); } context->BindSR(0, _objectsBuffer->GetBuffer() ? _objectsBuffer->GetBuffer()->View() : nullptr); // Rasterize non-empty chunks (first layer so can override existing chunk data) - for (const auto& e : chunks) + for (const auto& e : cascade.Chunks) { if (e.Key.Layer != 0) continue; @@ -739,7 +870,7 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex { auto objectIndex = objectIndexToDataIndex.At(chunk.Models[i]); data.Objects[i] = objectIndex; - context->BindSR(i + 1, _objectsTextures[objectIndex]); + context->BindSR(i + 1, objectsTextures[objectIndex]); } for (int32 i = chunk.ModelsCount; i < GLOBAL_SDF_RASTERIZE_HEIGHTFIELD_MAX_COUNT; i++) context->UnBindSR(i + 1); @@ -758,7 +889,7 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex { auto objectIndex = objectIndexToDataIndex.At(chunk.Heightfields[i]); data.Objects[i] = objectIndex; - context->BindSR(i + 1, _objectsTextures[objectIndex]); + context->BindSR(i + 1, objectsTextures[objectIndex]); } for (int32 i = chunk.HeightfieldsCount; i < GLOBAL_SDF_RASTERIZE_HEIGHTFIELD_MAX_COUNT; i++) context->UnBindSR(i + 1); @@ -774,21 +905,21 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex int32 count = chunk.ModelsCount + chunk.HeightfieldsCount; RasterizeChunkKey tmp = e.Key; tmp.NextLayer(); - while (chunks.ContainsKey(tmp)) + while (cascade.Chunks.ContainsKey(tmp)) { - count += chunks[tmp].ModelsCount + chunks[tmp].HeightfieldsCount; + count += cascade.Chunks[tmp].ModelsCount + cascade.Chunks[tmp].HeightfieldsCount; tmp.NextLayer(); } - Float3 chunkMin = cascadeBounds.Minimum + Float3(e.Key.Coord) * cascadeChunkSize; - BoundingBox chunkBounds(chunkMin, chunkMin + cascadeChunkSize); + Float3 chunkMin = cascade.Bounds.Minimum + Float3(e.Key.Coord) * cascade.ChunkSize; + BoundingBox chunkBounds(chunkMin, chunkMin + cascade.ChunkSize); DebugDraw::DrawWireBox(chunkBounds, Color::Red, 0, false); DebugDraw::DrawText(StringUtils::ToString(count), chunkBounds.GetCenter(), Color::Red); } #endif } - // Rasterize non-empty chunks (additive layers so so need combine with existing chunk data) - for (const auto& e : chunks) + // Rasterize non-empty chunks (additive layers so need combine with existing chunk data) + for (const auto& e : cascade.Chunks) { if (e.Key.Layer == 0) continue; @@ -802,7 +933,7 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex { auto objectIndex = objectIndexToDataIndex.At(chunk.Models[i]); data.Objects[i] = objectIndex; - context->BindSR(i + 1, _objectsTextures[objectIndex]); + context->BindSR(i + 1, objectsTextures[objectIndex]); } for (int32 i = chunk.ModelsCount; i < GLOBAL_SDF_RASTERIZE_HEIGHTFIELD_MAX_COUNT; i++) context->UnBindSR(i + 1); @@ -818,7 +949,7 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex { auto objectIndex = objectIndexToDataIndex.At(chunk.Heightfields[i]); data.Objects[i] = objectIndex; - context->BindSR(i + 1, _objectsTextures[objectIndex]); + context->BindSR(i + 1, objectsTextures[objectIndex]); } for (int32 i = chunk.HeightfieldsCount; i < GLOBAL_SDF_RASTERIZE_HEIGHTFIELD_MAX_COUNT; i++) context->UnBindSR(i + 1); @@ -831,13 +962,13 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex } // Generate mip out of cascade (empty chunks have distance value 1 which is incorrect so mip will be used as a fallback - lower res) - if (updated || anyChunkDispatch) + if (reset || anyChunkDispatch) { PROFILE_GPU_CPU_NAMED("Generate Mip"); context->ResetUA(); const int32 mipDispatchGroups = Math::DivideAndRoundUp(resolutionMip, GLOBAL_SDF_MIP_GROUP_SIZE); static_assert((GLOBAL_SDF_MIP_FLOODS % 2) == 1, "Invalid Global SDF mip flood iterations count."); - int32 floodFillIterations = chunks.Count() == 0 ? 1 : GLOBAL_SDF_MIP_FLOODS; + int32 floodFillIterations = cascade.Chunks.Count() == 0 ? 1 : GLOBAL_SDF_MIP_FLOODS; if (!tmpMip) { // Use temporary texture to flood fill mip @@ -850,7 +981,6 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex GPUTextureView* tmpMipView = tmpMip->ViewVolume(); // Tex -> Mip - // TODO: use push constants on DX12/Vulkan to provide those 4 uints to the shader data.GenerateMipTexResolution = data.CascadeResolution; data.GenerateMipCoordScale = data.CascadeMipFactor; data.GenerateMipTexOffsetX = data.CascadeIndex * data.CascadeResolution; @@ -903,7 +1033,7 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++) { auto& cascade = sdfData.Cascades[cascadeIndex]; - const float cascadeDistance = distanceExtent * cascadesDistanceScales[cascadeIndex]; + const float cascadeDistance = distanceExtent * sdfData.CascadesDistanceScales[cascadeIndex]; const float cascadeMaxDistance = cascadeDistance * 2; const float cascadeVoxelSize = cascadeMaxDistance / (float)resolution; const Float3 center = cascade.Position; @@ -952,26 +1082,33 @@ void GlobalSignDistanceFieldPass::RenderDebug(RenderContext& renderContext, GPUC context->DrawFullscreenTriangle(); } +void GlobalSignDistanceFieldPass::GetCullingData(BoundingBox& bounds) const +{ + auto& cascade = *CurrentCascade.Get(); + bounds = cascade.CullingBounds; +} + void GlobalSignDistanceFieldPass::RasterizeModelSDF(Actor* actor, const ModelBase::SDFData& sdf, const Transform& localToWorld, const BoundingBox& objectBounds) { if (!sdf.Texture) return; + auto& cascade = *CurrentCascade.Get(); const bool dynamic = !GLOBAL_SDF_ACTOR_IS_STATIC(actor); const int32 residentMipLevels = sdf.Texture->ResidentMipLevels(); if (residentMipLevels != 0) { // Setup object data BoundingBox objectBoundsCascade; - Vector3::Clamp(objectBounds.Minimum + _sdfDataOriginMin, _cascadeBounds.Minimum, _cascadeBounds.Maximum, objectBoundsCascade.Minimum); - Vector3::Subtract(objectBoundsCascade.Minimum, _cascadeBounds.Minimum, objectBoundsCascade.Minimum); - Vector3::Clamp(objectBounds.Maximum + _sdfDataOriginMax, _cascadeBounds.Minimum, _cascadeBounds.Maximum, objectBoundsCascade.Maximum); - Vector3::Subtract(objectBoundsCascade.Maximum, _cascadeBounds.Minimum, objectBoundsCascade.Maximum); - const Int3 objectChunkMin(objectBoundsCascade.Minimum / _chunkSize); - const Int3 objectChunkMax(objectBoundsCascade.Maximum / _chunkSize); + Vector3::Clamp(objectBounds.Minimum + cascade.OriginMin, cascade.RasterizeBounds.Minimum, cascade.RasterizeBounds.Maximum, objectBoundsCascade.Minimum); + Vector3::Subtract(objectBoundsCascade.Minimum, cascade.RasterizeBounds.Minimum, objectBoundsCascade.Minimum); + Vector3::Clamp(objectBounds.Maximum + cascade.OriginMax, cascade.RasterizeBounds.Minimum, cascade.RasterizeBounds.Maximum, objectBoundsCascade.Maximum); + Vector3::Subtract(objectBoundsCascade.Maximum, cascade.RasterizeBounds.Minimum, objectBoundsCascade.Maximum); + const Int3 objectChunkMin(objectBoundsCascade.Minimum / cascade.ChunkSize); + const Int3 objectChunkMax(objectBoundsCascade.Maximum / cascade.ChunkSize); // Add object data - const uint16 dataIndex = RasterizeObjectsCache.Count(); - auto& data = RasterizeObjectsCache.AddOne(); + const uint16 dataIndex = cascade.RasterizeObjects.Count(); + auto& data = cascade.RasterizeObjects.AddOne(); data.Actor = actor; data.SDF = &sdf; data.LocalToWorld = localToWorld; @@ -979,7 +1116,7 @@ void GlobalSignDistanceFieldPass::RasterizeModelSDF(Actor* actor, const ModelBas // Inject object into the intersecting cascade chunks RasterizeChunkKey key; - auto& chunks = ChunksCache; + auto& chunks = cascade.Chunks; for (key.Coord.Z = objectChunkMin.Z; key.Coord.Z <= objectChunkMax.Z; key.Coord.Z++) { for (key.Coord.Y = objectChunkMin.Y; key.Coord.Y <= objectChunkMax.Y; key.Coord.Y++) @@ -1005,11 +1142,9 @@ void GlobalSignDistanceFieldPass::RasterizeModelSDF(Actor* actor, const ModelBas } // Track streaming for textures used in static chunks to invalidate cache - if (!dynamic && residentMipLevels != sdf.Texture->MipLevels() && !_sdfData->SDFTextures.Contains(sdf.Texture)) + if (!dynamic && residentMipLevels != sdf.Texture->MipLevels() && !Current->SDFTextures.Contains(sdf.Texture)) { - sdf.Texture->Deleted.Bind(_sdfData); - sdf.Texture->ResidentMipsChanged.Bind(_sdfData); - _sdfData->SDFTextures.Add(sdf.Texture); + cascade.PendingSDFTextures.Add(sdf.Texture); } } @@ -1017,22 +1152,23 @@ void GlobalSignDistanceFieldPass::RasterizeHeightfield(Actor* actor, GPUTexture* { if (!heightfield) return; + auto& cascade = *CurrentCascade.Get(); const bool dynamic = !GLOBAL_SDF_ACTOR_IS_STATIC(actor); const int32 residentMipLevels = heightfield->ResidentMipLevels(); if (residentMipLevels != 0) { // Setup object data BoundingBox objectBoundsCascade; - Vector3::Clamp(objectBounds.Minimum + _sdfDataOriginMin, _cascadeBounds.Minimum, _cascadeBounds.Maximum, objectBoundsCascade.Minimum); - Vector3::Subtract(objectBoundsCascade.Minimum, _cascadeBounds.Minimum, objectBoundsCascade.Minimum); - Vector3::Clamp(objectBounds.Maximum + _sdfDataOriginMax, _cascadeBounds.Minimum, _cascadeBounds.Maximum, objectBoundsCascade.Maximum); - Vector3::Subtract(objectBoundsCascade.Maximum, _cascadeBounds.Minimum, objectBoundsCascade.Maximum); - const Int3 objectChunkMin(objectBoundsCascade.Minimum / _chunkSize); - const Int3 objectChunkMax(objectBoundsCascade.Maximum / _chunkSize); + Vector3::Clamp(objectBounds.Minimum + cascade.OriginMin, cascade.RasterizeBounds.Minimum, cascade.RasterizeBounds.Maximum, objectBoundsCascade.Minimum); + Vector3::Subtract(objectBoundsCascade.Minimum, cascade.RasterizeBounds.Minimum, objectBoundsCascade.Minimum); + Vector3::Clamp(objectBounds.Maximum + cascade.OriginMax, cascade.RasterizeBounds.Minimum, cascade.RasterizeBounds.Maximum, objectBoundsCascade.Maximum); + Vector3::Subtract(objectBoundsCascade.Maximum, cascade.RasterizeBounds.Minimum, objectBoundsCascade.Maximum); + const Int3 objectChunkMin(objectBoundsCascade.Minimum / cascade.ChunkSize); + const Int3 objectChunkMax(objectBoundsCascade.Maximum / cascade.ChunkSize); // Add object data - const uint16 dataIndex = RasterizeObjectsCache.Count(); - auto& data = RasterizeObjectsCache.AddOne(); + const uint16 dataIndex = cascade.RasterizeObjects.Count(); + auto& data = cascade.RasterizeObjects.AddOne(); data.Actor = actor; data.Heightfield = heightfield; data.LocalToWorld = localToWorld; @@ -1041,7 +1177,7 @@ void GlobalSignDistanceFieldPass::RasterizeHeightfield(Actor* actor, GPUTexture* // Inject object into the intersecting cascade chunks RasterizeChunkKey key; - auto& chunks = ChunksCache; + auto& chunks = cascade.Chunks; for (key.Coord.Z = objectChunkMin.Z; key.Coord.Z <= objectChunkMax.Z; key.Coord.Z++) { for (key.Coord.Y = objectChunkMin.Y; key.Coord.Y <= objectChunkMax.Y; key.Coord.Y++) @@ -1067,10 +1203,8 @@ void GlobalSignDistanceFieldPass::RasterizeHeightfield(Actor* actor, GPUTexture* } // Track streaming for textures used in static chunks to invalidate cache - if (!dynamic && residentMipLevels != heightfield->MipLevels() && !_sdfData->SDFTextures.Contains(heightfield)) + if (!dynamic && residentMipLevels != heightfield->MipLevels() && !Current->SDFTextures.Contains(heightfield)) { - heightfield->Deleted.Bind(_sdfData); - heightfield->ResidentMipsChanged.Bind(_sdfData); - _sdfData->SDFTextures.Add(heightfield); + cascade.PendingSDFTextures.Add(heightfield); } } diff --git a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.h b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.h index 71ee97bea..104aae790 100644 --- a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.h +++ b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.h @@ -39,20 +39,15 @@ private: GPUShaderProgramCS* _csGenerateMip = nullptr; GPUConstantBuffer* _cb0 = nullptr; GPUConstantBuffer* _cb1 = nullptr; - - // Rasterization cache class DynamicStructuredBuffer* _objectsBuffer = nullptr; - Array _objectsTextures; - uint16 _objectsBufferCount; - int32 _cascadeIndex; - float _voxelSize, _chunkSize; - BoundingBox _cascadeBounds; - BoundingBox _cascadeCullingBounds; - class GlobalSignDistanceFieldCustomBuffer* _sdfData; - Vector3 _sdfDataOriginMin; - Vector3 _sdfDataOriginMax; public: + /// + /// Calls drawing scene objects in async early in the frame. + /// + /// The rendering context batch. + void OnCollectDrawCalls(RenderContextBatch& renderContextBatch); + /// /// Gets the Global SDF (only if enabled in Graphics Settings). /// @@ -78,10 +73,7 @@ public: /// The output buffer. void RenderDebug(RenderContext& renderContext, GPUContext* context, GPUTexture* output); - void GetCullingData(BoundingBox& bounds) const - { - bounds = _cascadeCullingBounds; - } + void GetCullingData(BoundingBox& bounds) const; // Rasterize Model SDF into the Global SDF. Call it from actor Draw() method during DrawPass::GlobalSDF. void RasterizeModelSDF(Actor* actor, const ModelBase::SDFData& sdf, const Transform& localToWorld, const BoundingBox& objectBounds); diff --git a/Source/Engine/Renderer/Renderer.cpp b/Source/Engine/Renderer/Renderer.cpp index b33e3ad47..d6fbb17cc 100644 --- a/Source/Engine/Renderer/Renderer.cpp +++ b/Source/Engine/Renderer/Renderer.cpp @@ -409,6 +409,8 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont JobSystem::SetJobStartingOnDispatch(false); task->OnCollectDrawCalls(renderContextBatch, SceneRendering::DrawCategory::SceneDraw); task->OnCollectDrawCalls(renderContextBatch, SceneRendering::DrawCategory::SceneDrawAsync); + if (setup.UseGlobalSDF) + GlobalSignDistanceFieldPass::Instance()->OnCollectDrawCalls(renderContextBatch); if (setup.UseGlobalSurfaceAtlas) GlobalSurfaceAtlasPass::Instance()->OnCollectDrawCalls(renderContextBatch); From 91d3216a005b4179ce4448d2537d80ae72ad9126 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 26 Jun 2024 18:19:36 +0200 Subject: [PATCH 186/292] Fix crash on shutdown when physical material asset was left alone --- Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp b/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp index 80d264857..8bdb87122 100644 --- a/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp +++ b/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp @@ -4476,6 +4476,8 @@ void PhysicsBackend::DestroyController(void* controller) void PhysicsBackend::DestroyMaterial(void* material) { + if (!PhysX) + return; // Skip when called by Content unload after Physics is disposed ASSERT_LOW_LAYER(material); auto materialPhysX = (PxMaterial*)material; materialPhysX->userData = nullptr; @@ -4486,6 +4488,8 @@ void PhysicsBackend::DestroyMaterial(void* material) void PhysicsBackend::DestroyObject(void* object) { + if (!PhysX) + return; // Skip when called by Content unload after Physics is disposed ASSERT_LOW_LAYER(object); auto objectPhysX = (PxBase*)object; FlushLocker.Lock(); From 8eaa635385f5bf96ad6d9c024e57684963a5f2bf Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 26 Jun 2024 18:19:48 +0200 Subject: [PATCH 187/292] Minor tweaks to GlobalSA --- .../Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp index a64a06635..e76c19b4e 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp @@ -179,7 +179,7 @@ public: void ClearObjects() { - WaitForDrawActors(); + WaitForDrawing(); CulledObjectsCounterIndex = -1; CulledObjectsUsageHistory.Clear(); LastFrameAtlasDefragmentation = Engine::FrameCount; @@ -251,7 +251,7 @@ public: } } - void StartDrawActors(const RenderContext& renderContext, bool enableAsync = false) + void StartDrawing(const RenderContext& renderContext, bool enableAsync = false) { if (AsyncDrawWaitLabels.HasItems()) return; // Already started earlier this frame @@ -260,6 +260,7 @@ public: GetOptions(renderContext, resolution, distance); if (Resolution != resolution) return; // Not yet initialized + PROFILE_CPU(); const auto currentFrame = Engine::FrameCount; { // Perform atlas defragmentation if needed @@ -323,7 +324,7 @@ public: } } - void WaitForDrawActors() + void WaitForDrawing() { for (int64 label : AsyncDrawWaitLabels) JobSystem::Wait(label); @@ -655,7 +656,7 @@ void GlobalSurfaceAtlasPass::Dispose() void GlobalSurfaceAtlasPass::OnCollectDrawCalls(RenderContextBatch& renderContextBatch) { // Check if Global Surface Atlas will be used this frame - PROFILE_GPU_CPU_NAMED("Global Surface Atlas"); + PROFILE_CPU_NAMED("Global Surface Atlas"); if (checkIfSkipPass()) return; RenderContext& renderContext = renderContextBatch.GetMainContext(); @@ -668,7 +669,7 @@ void GlobalSurfaceAtlasPass::OnCollectDrawCalls(RenderContextBatch& renderContex return; auto& surfaceAtlasData = *renderContext.Buffers->GetCustomBuffer(TEXT("GlobalSurfaceAtlas")); _surfaceAtlasData = &surfaceAtlasData; - surfaceAtlasData.StartDrawActors(renderContext, renderContextBatch.EnableAsync); + surfaceAtlasData.StartDrawing(renderContext, renderContextBatch.EnableAsync); } bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* context, BindingData& result) @@ -739,8 +740,8 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co // Ensure that async objects drawing ended _surfaceAtlasData = &surfaceAtlasData; - surfaceAtlasData.StartDrawActors(renderContext); // (ignored if not started earlier this frame) - surfaceAtlasData.WaitForDrawActors(); + surfaceAtlasData.StartDrawing(renderContext); // (ignored if not started earlier this frame) + surfaceAtlasData.WaitForDrawing(); // Utility for writing into tiles vertex buffer const Float2 posToClipMul(2.0f * resolutionInv, -2.0f * resolutionInv); From 7b5edc363a7b064a7c233e92d757e735247e061a Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 27 Jun 2024 09:29:09 +0200 Subject: [PATCH 188/292] Fix GLobalSDF update when not using workload spread and add dispatches count to profiler zone data --- .../Renderer/GlobalSignDistanceFieldPass.cpp | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp index 7a676f584..8e176f849 100644 --- a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp +++ b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp @@ -300,10 +300,10 @@ public: FrameIndex = 0; AsyncRenderContext = renderContext; AsyncRenderContext.View.Pass = DrawPass::GlobalSDF; - const bool useCache = !reset && !GLOBAL_SDF_DEBUG_FORCE_REDRAW && GPU_SPREAD_WORKLOAD; + const bool useCache = !reset && !GLOBAL_SDF_DEBUG_FORCE_REDRAW; static_assert(GLOBAL_SDF_RASTERIZE_CHUNK_SIZE % GLOBAL_SDF_RASTERIZE_GROUP_SIZE == 0, "Invalid chunk size for Global SDF rasterization group size."); const int32 rasterizeChunks = Math::CeilToInt((float)resolution / (float)GLOBAL_SDF_RASTERIZE_CHUNK_SIZE); - const bool updateEveryFrame = false; // true if update all cascades every frame + const bool updateEveryFrame = !GPU_SPREAD_WORKLOAD; // true if update all cascades every frame const int32 maxCascadeUpdatesPerFrame = 1; // maximum cascades to update at a single frame // Rasterize world geometry into Global SDF @@ -462,19 +462,19 @@ void GlobalSignDistanceFieldCustomBuffer::UpdateCascadeChunks(CascadeData& casca continue; if (e.Value.Dynamic) { - // Remove static chunk with dynamic objects + // Remove static chunk if it contains any dynamic object cascade.StaticChunks.Remove(e.Key); } else if (cascade.StaticChunks.Contains(e.Key)) { - // Skip updating static chunk + // Remove chunk from update since it's static auto key = e.Key; while (cascade.Chunks.Remove(key)) key.NextLayer(); } else { - // Add to cache (render now but skip next frame) + // Add to static cache (render now but skip next frame) cascade.StaticChunks.Add(e.Key); } } @@ -823,7 +823,7 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex context->BindUA(0, textureView); context->BindCB(1, _cb1); constexpr int32 chunkDispatchGroups = GLOBAL_SDF_RASTERIZE_CHUNK_SIZE / GLOBAL_SDF_RASTERIZE_GROUP_SIZE; - bool anyChunkDispatch = false; + int32 chunkDispatches = 0; if (!reset) { PROFILE_GPU_CPU_NAMED("Clear Chunks"); @@ -838,9 +838,10 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex data.ChunkCoord = key.Coord * GLOBAL_SDF_RASTERIZE_CHUNK_SIZE; context->UpdateCB(_cb1, &data); context->Dispatch(_csClearChunk, chunkDispatchGroups, chunkDispatchGroups, chunkDispatchGroups); - anyChunkDispatch = true; + chunkDispatches++; // TODO: don't stall with UAV barrier on D3D12/Vulkan if UAVs don't change between dispatches } + ZoneValue(chunkDispatches); } { PROFILE_GPU_CPU_NAMED("Rasterize Chunks"); @@ -879,7 +880,7 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex context->UpdateCB(_cb1, &data); auto cs = data.ObjectsCount != 0 ? _csRasterizeModel0 : _csClearChunk; // Terrain-only chunk can be quickly cleared context->Dispatch(cs, chunkDispatchGroups, chunkDispatchGroups, chunkDispatchGroups); - anyChunkDispatch = true; + chunkDispatches++; // TODO: don't stall with UAV barrier on D3D12/Vulkan if UAVs don't change between dispatches (maybe cache per-shader write/read flags for all UAVs?) if (chunk.HeightfieldsCount != 0) @@ -896,6 +897,7 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex data.ObjectsCount = chunk.HeightfieldsCount; context->UpdateCB(_cb1, &data); context->Dispatch(_csRasterizeHeightfield, chunkDispatchGroups, chunkDispatchGroups, chunkDispatchGroups); + chunkDispatches++; } #if GLOBAL_SDF_DEBUG_CHUNKS @@ -940,6 +942,7 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex data.ObjectsCount = chunk.ModelsCount; context->UpdateCB(_cb1, &data); context->Dispatch(_csRasterizeModel1, chunkDispatchGroups, chunkDispatchGroups, chunkDispatchGroups); + chunkDispatches++; } if (chunk.HeightfieldsCount != 0) @@ -956,13 +959,15 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex data.ObjectsCount = chunk.HeightfieldsCount; context->UpdateCB(_cb1, &data); context->Dispatch(_csRasterizeHeightfield, chunkDispatchGroups, chunkDispatchGroups, chunkDispatchGroups); + chunkDispatches++; } - anyChunkDispatch = true; } + + ZoneValue(chunkDispatches); } // Generate mip out of cascade (empty chunks have distance value 1 which is incorrect so mip will be used as a fallback - lower res) - if (reset || anyChunkDispatch) + if (reset || chunkDispatches != 0) { PROFILE_GPU_CPU_NAMED("Generate Mip"); context->ResetUA(); From 138e17508b4334f40cc614bf8b7c28570b361129 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 27 Jun 2024 21:03:52 +0200 Subject: [PATCH 189/292] Add profile event to hardware instancing building and insert draw count into profiler zone data --- Source/Engine/Renderer/RenderList.cpp | 69 ++++++++++++++------------- 1 file changed, 37 insertions(+), 32 deletions(-) diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index 1ca5cf244..bf38675c7 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -691,7 +691,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL // Prepare instance buffer if (useInstancing) { - // Prepare buffer memory + PROFILE_CPU_NAMED("Build Instancing"); int32 instancedBatchesCount = 0; for (int32 i = 0; i < list.Batches.Count(); i++) { @@ -705,49 +705,50 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL if (batch.Instances.Count() > 1) instancedBatchesCount += batch.Instances.Count(); } - if (instancedBatchesCount == 0) + if (instancedBatchesCount != 0) { - // Faster path if none of the draw batches requires instancing - useInstancing = false; - goto DRAW; - } - _instanceBuffer.Clear(); - _instanceBuffer.Data.Resize(instancedBatchesCount * sizeof(InstanceData)); - auto instanceData = (InstanceData*)_instanceBuffer.Data.Get(); + _instanceBuffer.Clear(); + _instanceBuffer.Data.Resize(instancedBatchesCount * sizeof(InstanceData)); + auto instanceData = (InstanceData*)_instanceBuffer.Data.Get(); - // Write to instance buffer - for (int32 i = 0; i < list.Batches.Count(); i++) - { - auto& batch = batchesData[i]; - if (batch.BatchSize > 1) + // Write to instance buffer + for (int32 i = 0; i < list.Batches.Count(); i++) { - IMaterial::InstancingHandler handler; - drawCallsData[listData[batch.StartIndex]].Material->CanUseInstancing(handler); - for (int32 j = 0; j < batch.BatchSize; j++) + auto& batch = batchesData[i]; + if (batch.BatchSize > 1) { - auto& drawCall = drawCallsData[listData[batch.StartIndex + j]]; - handler.WriteDrawCall(instanceData, drawCall); - instanceData++; + IMaterial::InstancingHandler handler; + drawCallsData[listData[batch.StartIndex]].Material->CanUseInstancing(handler); + for (int32 j = 0; j < batch.BatchSize; j++) + { + auto& drawCall = drawCallsData[listData[batch.StartIndex + j]]; + handler.WriteDrawCall(instanceData, drawCall); + instanceData++; + } } } - } - for (int32 i = 0; i < list.PreBatchedDrawCalls.Count(); i++) - { - auto& batch = BatchedDrawCalls.Get()[list.PreBatchedDrawCalls.Get()[i]]; - if (batch.Instances.Count() > 1) + for (int32 i = 0; i < list.PreBatchedDrawCalls.Count(); i++) { - Platform::MemoryCopy(instanceData, batch.Instances.Get(), batch.Instances.Count() * sizeof(InstanceData)); - instanceData += batch.Instances.Count(); + auto& batch = BatchedDrawCalls.Get()[list.PreBatchedDrawCalls.Get()[i]]; + if (batch.Instances.Count() > 1) + { + Platform::MemoryCopy(instanceData, batch.Instances.Get(), batch.Instances.Count() * sizeof(InstanceData)); + instanceData += batch.Instances.Count(); + } } - } - // Upload data - _instanceBuffer.Flush(context); + // Upload data + _instanceBuffer.Flush(context); + } + else + { + // No batches so no instancing + useInstancing = false; + } } -DRAW: - // Execute draw calls + int32 draws = list.Batches.Count(); MaterialBase::BindParameters bindParams(context, renderContext); bindParams.Input = input; bindParams.BindViewData(); @@ -856,6 +857,7 @@ DRAW: } } } + draws += list.PreBatchedDrawCalls.Count(); } else { @@ -909,6 +911,7 @@ DRAW: context->BindVB(ToSpan(drawCall.Geometry.VertexBuffers, 3), drawCall.Geometry.VertexBuffersOffsets); context->DrawIndexedInstanced(drawCall.Draw.IndicesCount, drawCall.InstanceCount, 0, 0, drawCall.Draw.StartIndex); } + draws += batch.Instances.Count(); } if (list.Batches.IsEmpty() && list.Indices.Count() != 0) { @@ -931,8 +934,10 @@ DRAW: context->DrawIndexedInstanced(drawCall.Draw.IndicesCount, drawCall.InstanceCount, 0, 0, drawCall.Draw.StartIndex); } } + draws += list.Indices.Count(); } } + ZoneValue(draws); } void SurfaceDrawCallHandler::GetHash(const DrawCall& drawCall, uint32& batchKey) From e265b760c1c13311f1c28db9397d55457ce30bf1 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 28 Jun 2024 08:50:14 +0200 Subject: [PATCH 190/292] USe local var to simplify code and fix `MaterialShaderDataPerView` struct to use new alignment for constant buffers data --- .../Graphics/Materials/MaterialShader.cpp | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/Source/Engine/Graphics/Materials/MaterialShader.cpp b/Source/Engine/Graphics/Materials/MaterialShader.cpp index b3512c5c8..beaedc324 100644 --- a/Source/Engine/Graphics/Materials/MaterialShader.cpp +++ b/Source/Engine/Graphics/Materials/MaterialShader.cpp @@ -20,7 +20,7 @@ #include "DeformableMaterialShader.h" #include "VolumeParticleMaterialShader.h" -PACK_STRUCT(struct MaterialShaderDataPerView { +GPU_CB_STRUCT(MaterialShaderDataPerView { Matrix ViewMatrix; Matrix ViewProjectionMatrix; Matrix PrevViewProjectionMatrix; @@ -73,20 +73,20 @@ void IMaterial::BindParameters::BindViewData() } // Setup data + const auto& view = RenderContext.View; MaterialShaderDataPerView cb; - int aa1 = sizeof(MaterialShaderDataPerView); - Matrix::Transpose(RenderContext.View.Frustum.GetMatrix(), cb.ViewProjectionMatrix); - Matrix::Transpose(RenderContext.View.View, cb.ViewMatrix); - Matrix::Transpose(RenderContext.View.PrevViewProjection, cb.PrevViewProjectionMatrix); - Matrix::Transpose(RenderContext.View.MainViewProjection, cb.MainViewProjectionMatrix); - cb.MainScreenSize = RenderContext.View.MainScreenSize; - cb.ViewPos = RenderContext.View.Position; - cb.ViewFar = RenderContext.View.Far; - cb.ViewDir = RenderContext.View.Direction; + Matrix::Transpose(view.Frustum.GetMatrix(), cb.ViewProjectionMatrix); + Matrix::Transpose(view.View, cb.ViewMatrix); + Matrix::Transpose(view.PrevViewProjection, cb.PrevViewProjectionMatrix); + Matrix::Transpose(view.MainViewProjection, cb.MainViewProjectionMatrix); + cb.MainScreenSize = view.MainScreenSize; + cb.ViewPos = view.Position; + cb.ViewFar = view.Far; + cb.ViewDir = view.Direction; cb.TimeParam = TimeParam; - cb.ViewInfo = RenderContext.View.ViewInfo; - cb.ScreenSize = RenderContext.View.ScreenSize; - cb.TemporalAAJitter = RenderContext.View.TemporalAAJitter; + cb.ViewInfo = view.ViewInfo; + cb.ScreenSize = view.ScreenSize; + cb.TemporalAAJitter = view.TemporalAAJitter; // Update constants GPUContext->UpdateCB(PerViewConstants, &cb); From dbbc2b70bfccecfe26772d64ef0059dc9d7b86a2 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 28 Jun 2024 09:43:18 +0200 Subject: [PATCH 191/292] Disable GlobalSurfaceAtlas on Mac due to GPU crashes in larges scenes --- Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp index 3fd2060d9..4d9e1c1c1 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp @@ -426,6 +426,9 @@ bool GlobalSurfaceAtlasPass::Init() // Check platform support const auto device = GPUDevice::Instance; _supported = device->GetFeatureLevel() >= FeatureLevel::SM5 && device->Limits.HasCompute && device->Limits.HasTypedUAVLoad; +#if PLATFORM_APPLE_FAMILY + _supported = false; // Vulkan over Metal has some issues in complex scenes with DDGI +#endif return false; } From 7f482219ef286f8ab4d8012c9dbed9b96e2413e8 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 28 Jun 2024 09:51:29 +0200 Subject: [PATCH 192/292] Fix generating project files for VSCode on Mac without .NET x64 SDK installed --- Source/Tools/Flax.Build/Build/Builder.Projects.cs | 4 +--- .../VisualStudioCode/VisualStudioCodeProjectGenerator.cs | 1 + 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Source/Tools/Flax.Build/Build/Builder.Projects.cs b/Source/Tools/Flax.Build/Build/Builder.Projects.cs index c928861eb..aa78cdb27 100644 --- a/Source/Tools/Flax.Build/Build/Builder.Projects.cs +++ b/Source/Tools/Flax.Build/Build/Builder.Projects.cs @@ -179,8 +179,7 @@ namespace Flax.Build using (new ProfileEventScope("GenerateProjects")) { // Pick the project format - HashSet projectFormats = new HashSet(); - + var projectFormats = new HashSet(); if (Configuration.ProjectFormatVS2022) projectFormats.Add(ProjectFormat.VisualStudio2022); if (Configuration.ProjectFormatVS2019) @@ -195,7 +194,6 @@ namespace Flax.Build projectFormats.Add(ProjectFormat.VisualStudio2022); if (!string.IsNullOrEmpty(Configuration.ProjectFormatCustom)) projectFormats.Add(ProjectFormat.Custom); - if (projectFormats.Count == 0) projectFormats.Add(Platform.BuildPlatform.DefaultProjectFormat); diff --git a/Source/Tools/Flax.Build/Projects/VisualStudioCode/VisualStudioCodeProjectGenerator.cs b/Source/Tools/Flax.Build/Projects/VisualStudioCode/VisualStudioCodeProjectGenerator.cs index 284b46e49..1830b8efe 100644 --- a/Source/Tools/Flax.Build/Projects/VisualStudioCode/VisualStudioCodeProjectGenerator.cs +++ b/Source/Tools/Flax.Build/Projects/VisualStudioCode/VisualStudioCodeProjectGenerator.cs @@ -553,6 +553,7 @@ namespace Flax.Build.Projects.VisualStudioCode { var toolchain = platform.GetToolchain(architecture); var targetBuildOptions = Builder.GetBuildOptions(target, platform, toolchain, architecture, configuration, Globals.Root); + targetBuildOptions.Flags |= NativeCpp.BuildFlags.GenerateProject; var modules = Builder.CollectModules(rules, platform, target, targetBuildOptions, toolchain, architecture, configuration); foreach (var module in modules) { From 516ed3e9a0c2ac3e19611cad99709f12a7346d19 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 28 Jun 2024 21:21:17 +0200 Subject: [PATCH 193/292] Cleanup some rendering code --- Source/Engine/Foliage/Foliage.cpp | 1 - Source/Engine/Graphics/Models/Mesh.cpp | 12 +++--------- Source/Engine/Graphics/Models/SkinnedMesh.cpp | 8 ++------ Source/Engine/Graphics/RenderTools.h | 5 +++++ Source/Engine/Level/Actors/Skybox.cpp | 5 +++-- Source/Engine/Renderer/Editor/QuadOverdrawPass.cpp | 3 ++- Source/Engine/Renderer/RenderList.cpp | 2 +- Source/Engine/Terrain/TerrainChunk.cpp | 5 +++-- Source/Engine/UI/TextRender.cpp | 7 ++----- Source/Shaders/MaterialCommon.hlsl | 9 +-------- 10 files changed, 22 insertions(+), 35 deletions(-) diff --git a/Source/Engine/Foliage/Foliage.cpp b/Source/Engine/Foliage/Foliage.cpp index 39451207d..562a8b706 100644 --- a/Source/Engine/Foliage/Foliage.cpp +++ b/Source/Engine/Foliage/Foliage.cpp @@ -490,7 +490,6 @@ void Foliage::DrawType(RenderContext& renderContext, const FoliageType& type, Dr batch.DrawCall.World.SetRow4(Float4(firstInstance.InstanceOrigin, 1.0f)); batch.DrawCall.Surface.PrevWorld = batch.DrawCall.World; batch.DrawCall.Surface.GeometrySize = mesh.GetBox().GetSize(); - batch.DrawCall.Surface.Skinning = nullptr; batch.DrawCall.WorldDeterminantSign = 1; if (EnumHasAnyFlags(drawModes, DrawPass::Forward)) diff --git a/Source/Engine/Graphics/Models/Mesh.cpp b/Source/Engine/Graphics/Models/Mesh.cpp index 39439af5c..fda0550ba 100644 --- a/Source/Engine/Graphics/Models/Mesh.cpp +++ b/Source/Engine/Graphics/Models/Mesh.cpp @@ -407,11 +407,7 @@ void Mesh::Draw(const RenderContext& renderContext, MaterialBase* material, cons drawCall.ObjectRadius = (float)_sphere.Radius * drawCall.World.GetScaleVector().GetAbsolute().MaxValue(); drawCall.Surface.GeometrySize = _box.GetSize(); drawCall.Surface.PrevWorld = world; - drawCall.Surface.Lightmap = nullptr; - drawCall.Surface.LightmapUVsArea = Rectangle::Empty; - drawCall.Surface.Skinning = nullptr; - drawCall.Surface.LODDitherFactor = 0.0f; - drawCall.WorldDeterminantSign = Math::FloatSelect(world.RotDeterminant(), 1, -1); + drawCall.WorldDeterminantSign = RenderTools::GetWorldDeterminantSign(drawCall.World); drawCall.PerInstanceRandom = perInstanceRandom; #if USE_EDITOR const ViewMode viewMode = renderContext.View.Mode; @@ -477,9 +473,8 @@ void Mesh::Draw(const RenderContext& renderContext, const DrawInfo& info, float drawCall.Surface.PrevWorld = info.DrawState->PrevWorld; drawCall.Surface.Lightmap = (info.Flags & StaticFlags::Lightmap) != StaticFlags::None ? info.Lightmap : nullptr; drawCall.Surface.LightmapUVsArea = info.LightmapUVs ? *info.LightmapUVs : Rectangle::Empty; - drawCall.Surface.Skinning = nullptr; drawCall.Surface.LODDitherFactor = lodDitherFactor; - drawCall.WorldDeterminantSign = Math::FloatSelect(drawCall.World.RotDeterminant(), 1, -1); + drawCall.WorldDeterminantSign = RenderTools::GetWorldDeterminantSign(drawCall.World); drawCall.PerInstanceRandom = info.PerInstanceRandom; #if USE_EDITOR const ViewMode viewMode = renderContext.View.Mode; @@ -539,9 +534,8 @@ void Mesh::Draw(const RenderContextBatch& renderContextBatch, const DrawInfo& in drawCall.Surface.PrevWorld = info.DrawState->PrevWorld; drawCall.Surface.Lightmap = (info.Flags & StaticFlags::Lightmap) != StaticFlags::None ? info.Lightmap : nullptr; drawCall.Surface.LightmapUVsArea = info.LightmapUVs ? *info.LightmapUVs : Rectangle::Empty; - drawCall.Surface.Skinning = nullptr; drawCall.Surface.LODDitherFactor = lodDitherFactor; - drawCall.WorldDeterminantSign = Math::FloatSelect(drawCall.World.RotDeterminant(), 1, -1); + drawCall.WorldDeterminantSign = RenderTools::GetWorldDeterminantSign(drawCall.World); drawCall.PerInstanceRandom = info.PerInstanceRandom; #if USE_EDITOR const ViewMode viewMode = renderContextBatch.GetMainContext().View.Mode; diff --git a/Source/Engine/Graphics/Models/SkinnedMesh.cpp b/Source/Engine/Graphics/Models/SkinnedMesh.cpp index 06c1201ef..207ff8991 100644 --- a/Source/Engine/Graphics/Models/SkinnedMesh.cpp +++ b/Source/Engine/Graphics/Models/SkinnedMesh.cpp @@ -249,11 +249,9 @@ void SkinnedMesh::Draw(const RenderContext& renderContext, const DrawInfo& info, drawCall.ObjectRadius = (float)info.Bounds.Radius; // TODO: should it be kept in sync with ObjectPosition? drawCall.Surface.GeometrySize = _box.GetSize(); drawCall.Surface.PrevWorld = info.DrawState->PrevWorld; - drawCall.Surface.Lightmap = nullptr; - drawCall.Surface.LightmapUVsArea = Rectangle::Empty; drawCall.Surface.Skinning = info.Skinning; drawCall.Surface.LODDitherFactor = lodDitherFactor; - drawCall.WorldDeterminantSign = Math::FloatSelect(drawCall.World.RotDeterminant(), 1, -1); + drawCall.WorldDeterminantSign = RenderTools::GetWorldDeterminantSign(drawCall.World); drawCall.PerInstanceRandom = info.PerInstanceRandom; // Push draw call to the render list @@ -292,11 +290,9 @@ void SkinnedMesh::Draw(const RenderContextBatch& renderContextBatch, const DrawI drawCall.ObjectRadius = (float)info.Bounds.Radius; // TODO: should it be kept in sync with ObjectPosition? drawCall.Surface.GeometrySize = _box.GetSize(); drawCall.Surface.PrevWorld = info.DrawState->PrevWorld; - drawCall.Surface.Lightmap = nullptr; - drawCall.Surface.LightmapUVsArea = Rectangle::Empty; drawCall.Surface.Skinning = info.Skinning; drawCall.Surface.LODDitherFactor = lodDitherFactor; - drawCall.WorldDeterminantSign = Math::FloatSelect(drawCall.World.RotDeterminant(), 1, -1); + drawCall.WorldDeterminantSign = RenderTools::GetWorldDeterminantSign(drawCall.World); drawCall.PerInstanceRandom = info.PerInstanceRandom; // Push draw call to the render lists diff --git a/Source/Engine/Graphics/RenderTools.h b/Source/Engine/Graphics/RenderTools.h index 8d2873c29..3f3cab65f 100644 --- a/Source/Engine/Graphics/RenderTools.h +++ b/Source/Engine/Graphics/RenderTools.h @@ -29,6 +29,11 @@ public: return mipSlice + arraySlice * mipLevels; } + FORCE_INLINE static float GetWorldDeterminantSign(const Matrix& worldMatrix) + { + return Math::FloatSelect(worldMatrix.RotDeterminant(), 1, -1); + } + /// /// Computes the feature level for the given shader profile. /// diff --git a/Source/Engine/Level/Actors/Skybox.cpp b/Source/Engine/Level/Actors/Skybox.cpp index e61800d0a..e59e3a3e4 100644 --- a/Source/Engine/Level/Actors/Skybox.cpp +++ b/Source/Engine/Level/Actors/Skybox.cpp @@ -3,10 +3,11 @@ #include "Skybox.h" #include "Engine/Core/Math/Color.h" #include "Engine/Core/Types/Variant.h" -#include "Engine/Graphics/RenderView.h" #include "Engine/Renderer/RenderList.h" #include "Engine/Serialization/Serialization.h" +#include "Engine/Graphics/RenderView.h" #include "Engine/Graphics/RenderTask.h" +#include "Engine/Graphics/RenderTools.h" #include "Engine/Level/Scene/SceneRendering.h" #include "Engine/Content/Assets/Material.h" #include "Engine/Content/Content.h" @@ -101,7 +102,7 @@ void Skybox::ApplySky(GPUContext* context, RenderContext& renderContext, const M drawCall.ObjectPosition = drawCall.World.GetTranslation(); drawCall.ObjectRadius = (float)_sphere.Radius; drawCall.Surface.GeometrySize = _box.GetSize(); - drawCall.WorldDeterminantSign = Math::FloatSelect(world.RotDeterminant(), 1, -1); + drawCall.WorldDeterminantSign = RenderTools::GetWorldDeterminantSign(drawCall.World); drawCall.PerInstanceRandom = GetPerInstanceRandom(); MaterialBase::BindParameters bindParams(context, renderContext, drawCall); bindParams.BindViewData(); diff --git a/Source/Engine/Renderer/Editor/QuadOverdrawPass.cpp b/Source/Engine/Renderer/Editor/QuadOverdrawPass.cpp index 2674c66bd..fda1bcc4c 100644 --- a/Source/Engine/Renderer/Editor/QuadOverdrawPass.cpp +++ b/Source/Engine/Renderer/Editor/QuadOverdrawPass.cpp @@ -14,6 +14,7 @@ #include "Engine/Graphics/RenderBuffers.h" #include "Engine/Graphics/RenderTargetPool.h" #include "Engine/Renderer/RenderList.h" +#include "Engine/Graphics/RenderTools.h" void QuadOverdrawPass::Render(RenderContext& renderContext, GPUContext* context, GPUTextureView* lightBuffer) { @@ -82,7 +83,7 @@ void QuadOverdrawPass::Render(RenderContext& renderContext, GPUContext* context, m1 *= m2; drawCall.World = m1; drawCall.ObjectPosition = drawCall.World.GetTranslation(); - drawCall.WorldDeterminantSign = Math::FloatSelect(drawCall.World.RotDeterminant(), 1, -1); + drawCall.WorldDeterminantSign = RenderTools::GetWorldDeterminantSign(drawCall.World); skyMaterial->Bind(bindParams); skyModel->Render(context); } diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index bf38675c7..154fed7c4 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -691,7 +691,6 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL // Prepare instance buffer if (useInstancing) { - PROFILE_CPU_NAMED("Build Instancing"); int32 instancedBatchesCount = 0; for (int32 i = 0; i < list.Batches.Count(); i++) { @@ -707,6 +706,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL } if (instancedBatchesCount != 0) { + PROFILE_CPU_NAMED("Build Instancing"); _instanceBuffer.Clear(); _instanceBuffer.Data.Resize(instancedBatchesCount * sizeof(InstanceData)); auto instanceData = (InstanceData*)_instanceBuffer.Data.Get(); diff --git a/Source/Engine/Terrain/TerrainChunk.cpp b/Source/Engine/Terrain/TerrainChunk.cpp index 6902b5d8d..fa483eb39 100644 --- a/Source/Engine/Terrain/TerrainChunk.cpp +++ b/Source/Engine/Terrain/TerrainChunk.cpp @@ -9,6 +9,7 @@ #include "Engine/Graphics/RenderTask.h" #include "Engine/Graphics/Textures/GPUTexture.h" #include "Engine/Renderer/RenderList.h" +#include "Engine/Graphics/RenderTools.h" #include "Engine/Core/Math/OrientedBoundingBox.h" #include "Engine/Level/Scene/Scene.h" #if USE_EDITOR @@ -121,7 +122,7 @@ void TerrainChunk::Draw(const RenderContext& renderContext) const drawCall.Terrain.Lightmap = nullptr; drawCall.Terrain.LightmapUVsArea = Rectangle::Empty; } - drawCall.WorldDeterminantSign = Math::FloatSelect(drawCall.World.RotDeterminant(), 1, -1); + drawCall.WorldDeterminantSign = RenderTools::GetWorldDeterminantSign(drawCall.World); drawCall.PerInstanceRandom = _perInstanceRandom; // Add half-texel offset for heightmap sampling in vertex shader @@ -178,7 +179,7 @@ void TerrainChunk::Draw(const RenderContext& renderContext, MaterialBase* materi drawCall.Terrain.Lightmap = nullptr; drawCall.Terrain.LightmapUVsArea = Rectangle::Empty; } - drawCall.WorldDeterminantSign = Math::FloatSelect(drawCall.World.RotDeterminant(), 1, -1); + drawCall.WorldDeterminantSign = RenderTools::GetWorldDeterminantSign(drawCall.World); drawCall.PerInstanceRandom = _perInstanceRandom; // Add half-texel offset for heightmap sampling in vertex shader diff --git a/Source/Engine/UI/TextRender.cpp b/Source/Engine/UI/TextRender.cpp index 8640791f5..5a18d915c 100644 --- a/Source/Engine/UI/TextRender.cpp +++ b/Source/Engine/UI/TextRender.cpp @@ -17,6 +17,7 @@ #include "Engine/Content/Assets/MaterialInstance.h" #include "Engine/Content/Content.h" #include "Engine/Core/Types/Variant.h" +#include "Engine/Graphics/RenderTools.h" #include "Engine/Localization/Localization.h" #if USE_EDITOR #include "Editor/Editor.h" @@ -369,11 +370,7 @@ void TextRender::Draw(RenderContext& renderContext) drawCall.ObjectRadius = (float)_sphere.Radius; drawCall.Surface.GeometrySize = _localBox.GetSize(); drawCall.Surface.PrevWorld = _drawState.PrevWorld; - drawCall.Surface.Lightmap = nullptr; - drawCall.Surface.LightmapUVsArea = Rectangle::Empty; - drawCall.Surface.Skinning = nullptr; - drawCall.Surface.LODDitherFactor = 0.0f; - drawCall.WorldDeterminantSign = Math::FloatSelect(world.RotDeterminant(), 1, -1); + drawCall.WorldDeterminantSign = RenderTools::GetWorldDeterminantSign(drawCall.World); drawCall.PerInstanceRandom = GetPerInstanceRandom(); drawCall.Geometry.IndexBuffer = _ib.GetBuffer(); drawCall.Geometry.VertexBuffers[0] = _vb0.GetBuffer(); diff --git a/Source/Shaders/MaterialCommon.hlsl b/Source/Shaders/MaterialCommon.hlsl index 7fcfae018..0660e5cf9 100644 --- a/Source/Shaders/MaterialCommon.hlsl +++ b/Source/Shaders/MaterialCommon.hlsl @@ -118,7 +118,7 @@ struct ModelInput float4 Tangent : TANGENT; float2 LightmapUV : TEXCOORD1; #if USE_VERTEX_COLOR - half4 Color : COLOR; + half4 Color : COLOR; #endif #if USE_INSTANCING float4 InstanceOrigin : ATTRIBUTE0; // .w contains PerInstanceRandom @@ -149,13 +149,6 @@ struct ModelInput_Skinned float4 Tangent : TANGENT; uint4 BlendIndices : BLENDINDICES; float4 BlendWeights : BLENDWEIGHT; -#if USE_INSTANCING - float4 InstanceOrigin : ATTRIBUTE0; // .w contains PerInstanceRandom - float4 InstanceTransform1 : ATTRIBUTE1; // .w contains LODDitherFactor - float3 InstanceTransform2 : ATTRIBUTE2; - float3 InstanceTransform3 : ATTRIBUTE3; - half4 InstanceLightmapArea : ATTRIBUTE4; -#endif }; struct Model_VS2PS From 78f3248ac9aceb5ecfd0e8fb23a846ff7e1a790a Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 29 Jun 2024 13:54:02 +0200 Subject: [PATCH 194/292] Refactor `SortOrder` to use `int8` instead of `int16` due to performance reasons (more efficent sort keys packing in rendering) --- Source/Engine/Content/Assets/Model.cpp | 2 +- Source/Engine/Content/Assets/Model.h | 2 +- Source/Engine/Graphics/Models/Mesh.cpp | 2 +- Source/Engine/Graphics/Models/Mesh.h | 2 +- Source/Engine/Graphics/Models/MeshBase.h | 2 +- Source/Engine/Graphics/Models/ModelLOD.h | 2 +- Source/Engine/Level/Actors/AnimatedModel.h | 4 +- Source/Engine/Level/Actors/StaticModel.cpp | 2 +- Source/Engine/Level/Actors/StaticModel.h | 6 +-- Source/Engine/Particles/ParticleEffect.h | 4 +- Source/Engine/Particles/Particles.cpp | 6 +-- Source/Engine/Renderer/RenderList.cpp | 54 +++++++++++----------- Source/Engine/Renderer/RenderList.h | 4 +- Source/Engine/UI/SpriteRender.h | 4 +- Source/Engine/UI/TextRender.h | 4 +- 15 files changed, 51 insertions(+), 49 deletions(-) diff --git a/Source/Engine/Content/Assets/Model.cpp b/Source/Engine/Content/Assets/Model.cpp index 33d244d0c..22de66263 100644 --- a/Source/Engine/Content/Assets/Model.cpp +++ b/Source/Engine/Content/Assets/Model.cpp @@ -186,7 +186,7 @@ BoundingBox Model::GetBox(int32 lodIndex) const return LODs[lodIndex].GetBox(); } -void Model::Draw(const RenderContext& renderContext, MaterialBase* material, const Matrix& world, StaticFlags flags, bool receiveDecals, int16 sortOrder) const +void Model::Draw(const RenderContext& renderContext, MaterialBase* material, const Matrix& world, StaticFlags flags, bool receiveDecals, int8 sortOrder) const { if (!CanBeRendered()) return; diff --git a/Source/Engine/Content/Assets/Model.h b/Source/Engine/Content/Assets/Model.h index 5c5310bb9..dd52ba34a 100644 --- a/Source/Engine/Content/Assets/Model.h +++ b/Source/Engine/Content/Assets/Model.h @@ -182,7 +182,7 @@ public: /// The object static flags. /// True if rendered geometry can receive decals, otherwise false. /// Object sorting key. - API_FUNCTION() void Draw(API_PARAM(Ref) const RenderContext& renderContext, MaterialBase* material, API_PARAM(Ref) const Matrix& world, StaticFlags flags = StaticFlags::None, bool receiveDecals = true, int16 sortOrder = 0) const; + API_FUNCTION() void Draw(API_PARAM(Ref) const RenderContext& renderContext, MaterialBase* material, API_PARAM(Ref) const Matrix& world, StaticFlags flags = StaticFlags::None, bool receiveDecals = true, int8 sortOrder = 0) const; /// /// Draws the model. diff --git a/Source/Engine/Graphics/Models/Mesh.cpp b/Source/Engine/Graphics/Models/Mesh.cpp index fda0550ba..f168ccd79 100644 --- a/Source/Engine/Graphics/Models/Mesh.cpp +++ b/Source/Engine/Graphics/Models/Mesh.cpp @@ -385,7 +385,7 @@ void Mesh::Render(GPUContext* context) const context->DrawIndexedInstanced(_triangles * 3, 1, 0, 0, 0); } -void Mesh::Draw(const RenderContext& renderContext, MaterialBase* material, const Matrix& world, StaticFlags flags, bool receiveDecals, DrawPass drawModes, float perInstanceRandom, int16 sortOrder) const +void Mesh::Draw(const RenderContext& renderContext, MaterialBase* material, const Matrix& world, StaticFlags flags, bool receiveDecals, DrawPass drawModes, float perInstanceRandom, int8 sortOrder) const { if (!material || !material->IsSurface() || !IsInitialized()) return; diff --git a/Source/Engine/Graphics/Models/Mesh.h b/Source/Engine/Graphics/Models/Mesh.h index 9b229fc5b..813836b85 100644 --- a/Source/Engine/Graphics/Models/Mesh.h +++ b/Source/Engine/Graphics/Models/Mesh.h @@ -287,7 +287,7 @@ public: /// The draw passes to use for rendering this object. /// The random per-instance value (normalized to range 0-1). /// Object sorting key. - API_FUNCTION() void Draw(API_PARAM(Ref) const RenderContext& renderContext, MaterialBase* material, API_PARAM(Ref) const Matrix& world, StaticFlags flags = StaticFlags::None, bool receiveDecals = true, DrawPass drawModes = DrawPass::Default, float perInstanceRandom = 0.0f, int16 sortOrder = 0) const; + API_FUNCTION() void Draw(API_PARAM(Ref) const RenderContext& renderContext, MaterialBase* material, API_PARAM(Ref) const Matrix& world, StaticFlags flags = StaticFlags::None, bool receiveDecals = true, DrawPass drawModes = DrawPass::Default, float perInstanceRandom = 0.0f, int8 sortOrder = 0) const; /// /// Draws the mesh. diff --git a/Source/Engine/Graphics/Models/MeshBase.h b/Source/Engine/Graphics/Models/MeshBase.h index 9963ec8ba..467a88ea3 100644 --- a/Source/Engine/Graphics/Models/MeshBase.h +++ b/Source/Engine/Graphics/Models/MeshBase.h @@ -240,6 +240,6 @@ public: /// /// The object sorting key. /// - int16 SortOrder; + int8 SortOrder; }; }; diff --git a/Source/Engine/Graphics/Models/ModelLOD.h b/Source/Engine/Graphics/Models/ModelLOD.h index c367a1830..a6937739e 100644 --- a/Source/Engine/Graphics/Models/ModelLOD.h +++ b/Source/Engine/Graphics/Models/ModelLOD.h @@ -139,7 +139,7 @@ public: /// The draw passes to use for rendering this object. /// The random per-instance value (normalized to range 0-1). /// Object sorting key. - API_FUNCTION() void Draw(API_PARAM(Ref) const RenderContext& renderContext, MaterialBase* material, API_PARAM(Ref) const Matrix& world, StaticFlags flags = StaticFlags::None, bool receiveDecals = true, DrawPass drawModes = DrawPass::Default, float perInstanceRandom = 0.0f, int16 sortOrder = 0) const + API_FUNCTION() void Draw(API_PARAM(Ref) const RenderContext& renderContext, MaterialBase* material, API_PARAM(Ref) const Matrix& world, StaticFlags flags = StaticFlags::None, bool receiveDecals = true, DrawPass drawModes = DrawPass::Default, float perInstanceRandom = 0.0f, int8 sortOrder = 0) const { for (int32 i = 0; i < Meshes.Count(); i++) Meshes.Get()[i].Draw(renderContext, material, world, flags, receiveDecals, drawModes, perInstanceRandom, sortOrder); diff --git a/Source/Engine/Level/Actors/AnimatedModel.h b/Source/Engine/Level/Actors/AnimatedModel.h index bb4b3a8f0..753e72080 100644 --- a/Source/Engine/Level/Actors/AnimatedModel.h +++ b/Source/Engine/Level/Actors/AnimatedModel.h @@ -150,10 +150,10 @@ public: DrawPass DrawModes = DrawPass::Default; /// - /// The object sort order key used when sorting drawable objects during rendering. Use lower values to draw object before others, higher values are rendered later (on top). Can be use to control transparency drawing. + /// The object sort order key used when sorting drawable objects during rendering. Use lower values to draw object before others, higher values are rendered later (on top). Can be used to control transparency drawing. /// API_FIELD(Attributes="EditorDisplay(\"Skinned Model\"), EditorOrder(110), DefaultValue(0)") - int16 SortOrder = 0; + int8 SortOrder = 0; /// /// The shadows casting mode. diff --git a/Source/Engine/Level/Actors/StaticModel.cpp b/Source/Engine/Level/Actors/StaticModel.cpp index dd77692b3..c735b2b5f 100644 --- a/Source/Engine/Level/Actors/StaticModel.cpp +++ b/Source/Engine/Level/Actors/StaticModel.cpp @@ -92,7 +92,7 @@ int32 StaticModel::GetSortOrder() const void StaticModel::SetSortOrder(int32 value) { - _sortOrder = (int16)Math::Clamp(value, MIN_int16, MAX_int16); + _sortOrder = (int8)Math::Clamp(value, MIN_int8, MAX_int8); } bool StaticModel::HasLightmap() const diff --git a/Source/Engine/Level/Actors/StaticModel.h b/Source/Engine/Level/Actors/StaticModel.h index 5a986d0fb..a8171d6ad 100644 --- a/Source/Engine/Level/Actors/StaticModel.h +++ b/Source/Engine/Level/Actors/StaticModel.h @@ -22,7 +22,7 @@ private: char _forcedLod; bool _vertexColorsDirty; byte _vertexColorsCount; - int16 _sortOrder; + int8 _sortOrder; Array _vertexColorsData[MODEL_MAX_LODS]; GPUBuffer* _vertexColorsBuffer[MODEL_MAX_LODS]; Model* _residencyChangedModel = nullptr; @@ -97,13 +97,13 @@ public: API_PROPERTY() void SetForcedLOD(int32 value); /// - /// Gets the model sort order key used when sorting drawable objects during rendering. Use lower values to draw object before others, higher values are rendered later (on top). Can be use to control transparency drawing. + /// Gets the model sort order key used when sorting drawable objects during rendering. Use lower values to draw object before others, higher values are rendered later (on top). Can be used to control transparency drawing. /// API_PROPERTY(Attributes="EditorOrder(60), DefaultValue(0), EditorDisplay(\"Model\")") int32 GetSortOrder() const; /// - /// Sets the model sort order key used when sorting drawable objects during rendering. Use lower values to draw object before others, higher values are rendered later (on top). Can be use to control transparency drawing. + /// Sets the model sort order key used when sorting drawable objects during rendering. Use lower values to draw object before others, higher values are rendered later (on top). Can be used to control transparency drawing. /// API_PROPERTY() void SetSortOrder(int32 value); diff --git a/Source/Engine/Particles/ParticleEffect.h b/Source/Engine/Particles/ParticleEffect.h index 7cf3abfad..1e2136ff9 100644 --- a/Source/Engine/Particles/ParticleEffect.h +++ b/Source/Engine/Particles/ParticleEffect.h @@ -255,10 +255,10 @@ public: DrawPass DrawModes = DrawPass::Default; /// - /// The object sort order key used when sorting drawable objects during rendering. Use lower values to draw object before others, higher values are rendered later (on top). Can be use to control transparency drawing. + /// The object sort order key used when sorting drawable objects during rendering. Use lower values to draw object before others, higher values are rendered later (on top). Can be used to control transparency drawing. /// API_FIELD(Attributes="EditorDisplay(\"Particle Effect\"), EditorOrder(80), DefaultValue(0)") - int16 SortOrder = 0; + int8 SortOrder = 0; public: /// diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index c7d9e01d8..4b869bf42 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -161,7 +161,7 @@ void Particles::OnEffectDestroy(ParticleEffect* effect) typedef Array> RenderModulesIndices; -void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCall& drawCall, DrawPass drawModes, StaticFlags staticFlags, ParticleEmitterInstance& emitterData, const RenderModulesIndices& renderModulesIndices, int16 sortOrder) +void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCall& drawCall, DrawPass drawModes, StaticFlags staticFlags, ParticleEmitterInstance& emitterData, const RenderModulesIndices& renderModulesIndices, int8 sortOrder) { // Skip if CPU buffer is empty if (buffer->CPU.Count == 0) @@ -598,7 +598,7 @@ void CleanupGPUParticlesSorting() GPUParticlesSorting = nullptr; } -void DrawEmitterGPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCall& drawCall, DrawPass drawModes, StaticFlags staticFlags, ParticleEmitterInstance& emitterData, const RenderModulesIndices& renderModulesIndices, int16 sortOrder) +void DrawEmitterGPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCall& drawCall, DrawPass drawModes, StaticFlags staticFlags, ParticleEmitterInstance& emitterData, const RenderModulesIndices& renderModulesIndices, int8 sortOrder) { const auto context = GPUDevice::Instance->GetMainContext(); auto emitter = buffer->Emitter; @@ -921,7 +921,7 @@ void Particles::DrawParticles(RenderContext& renderContext, ParticleEffect* effe worldDeterminantSigns[0] = Math::FloatSelect(worlds[0].RotDeterminant(), 1, -1); worldDeterminantSigns[1] = Math::FloatSelect(worlds[1].RotDeterminant(), 1, -1); const StaticFlags staticFlags = effect->GetStaticFlags(); - const int16 sortOrder = effect->SortOrder; + const int8 sortOrder = effect->SortOrder; // Draw lights for (int32 emitterIndex = 0; emitterIndex < effect->Instance.Emitters.Count(); emitterIndex++) diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index 154fed7c4..b85bbed1b 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -449,14 +449,16 @@ struct PackedSortKey struct { + // Sorting order: By Sort Order -> By Material -> By Geometry -> By Distance uint32 DistanceKey; + uint8 DrawKey; uint16 BatchKey; - uint16 SortKey; + uint8 SortKey; }; }; }; -FORCE_INLINE void CalculateSortKey(const RenderContext& renderContext, DrawCall& drawCall, int16 sortOrder) +FORCE_INLINE void CalculateSortKey(const RenderContext& renderContext, DrawCall& drawCall, int8 sortOrder) { const Float3 planeNormal = renderContext.View.Direction; const float planePoint = -Float3::Dot(planeNormal, renderContext.View.Position); @@ -464,20 +466,33 @@ FORCE_INLINE void CalculateSortKey(const RenderContext& renderContext, DrawCall& PackedSortKey key; key.DistanceKey = RenderTools::ComputeDistanceSortKey(distance); uint32 batchKey = GetHash(drawCall.Material); - batchKey = (batchKey * 397) ^ GetHash(drawCall.Geometry.VertexBuffers[0]); - batchKey = (batchKey * 397) ^ GetHash(drawCall.Geometry.VertexBuffers[1]); - batchKey = (batchKey * 397) ^ GetHash(drawCall.Geometry.VertexBuffers[2]); - batchKey = (batchKey * 397) ^ GetHash(drawCall.Geometry.IndexBuffer); IMaterial::InstancingHandler handler; if (drawCall.Material->CanUseInstancing(handler)) handler.GetHash(drawCall, batchKey); - batchKey += (int32)(471 * drawCall.WorldDeterminantSign); - key.SortKey = (uint16)(sortOrder - MIN_int16); key.BatchKey = (uint16)batchKey; + uint32 drawKey = (uint32)(471 * drawCall.WorldDeterminantSign); + drawKey = (drawKey * 397) ^ GetHash(drawCall.Geometry.VertexBuffers[0]); + drawKey = (drawKey * 397) ^ GetHash(drawCall.Geometry.VertexBuffers[1]); + drawKey = (drawKey * 397) ^ GetHash(drawCall.Geometry.VertexBuffers[2]); + drawKey = (drawKey * 397) ^ GetHash(drawCall.Geometry.IndexBuffer); + key.DrawKey = (uint8)drawKey; + key.SortKey = (uint8)(sortOrder - MIN_int8); drawCall.SortKey = key.Data; } -void RenderList::AddDrawCall(const RenderContext& renderContext, DrawPass drawModes, StaticFlags staticFlags, DrawCall& drawCall, bool receivesDecals, int16 sortOrder) +FORCE_INLINE bool CanBatchDrawCalls(const DrawCall& a, const DrawCall& b, DrawPass pass) +{ + IMaterial::InstancingHandler handlerA, handlerB; + return a.Material->CanUseInstancing(handlerA) && + b.Material->CanUseInstancing(handlerB) && + a.InstanceCount != 0 && + b.InstanceCount != 0 && + handlerA.CanBatch == handlerB.CanBatch && + handlerA.CanBatch(a, b, pass) && + a.WorldDeterminantSign * b.WorldDeterminantSign > 0; +} + +void RenderList::AddDrawCall(const RenderContext& renderContext, DrawPass drawModes, StaticFlags staticFlags, DrawCall& drawCall, bool receivesDecals, int8 sortOrder) { #if ENABLE_ASSERTION_LOW_LAYERS // Ensure that draw modes are non-empty and in conjunction with material draw modes @@ -515,7 +530,7 @@ void RenderList::AddDrawCall(const RenderContext& renderContext, DrawPass drawMo } } -void RenderList::AddDrawCall(const RenderContextBatch& renderContextBatch, DrawPass drawModes, StaticFlags staticFlags, ShadowsCastingMode shadowsMode, const BoundingSphere& bounds, DrawCall& drawCall, bool receivesDecals, int16 sortOrder) +void RenderList::AddDrawCall(const RenderContextBatch& renderContextBatch, DrawPass drawModes, StaticFlags staticFlags, ShadowsCastingMode shadowsMode, const BoundingSphere& bounds, DrawCall& drawCall, bool receivesDecals, int8 sortOrder) { #if ENABLE_ASSERTION_LOW_LAYERS // Ensure that draw modes are non-empty and in conjunction with material draw modes @@ -571,19 +586,8 @@ void RenderList::AddDrawCall(const RenderContextBatch& renderContextBatch, DrawP } } -namespace { - FORCE_INLINE bool CanBatchWith(const DrawCall& a, const DrawCall& b, DrawPass pass) { - IMaterial::InstancingHandler handlerA, handlerB; - return a.Material->CanUseInstancing(handlerA) && - b.Material->CanUseInstancing(handlerB) && - Platform::MemoryCompare(&a.Geometry, &b.Geometry, sizeof(a.Geometry)) == 0 && - a.InstanceCount != 0 && - b.InstanceCount != 0 && - handlerA.CanBatch == handlerB.CanBatch && - handlerA.CanBatch(a, b, pass) && - a.WorldDeterminantSign * b.WorldDeterminantSign > 0; } } @@ -639,11 +643,11 @@ void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseD int32 batchSize = 1; int32 instanceCount = drawCall.InstanceCount; - // Check the following draw calls to merge them (using instancing) + // Check the following draw calls sequence to merge them for (int32 j = i + 1; j < listSize; j++) { const DrawCall& other = drawCallsData[listData[j]]; - if (!CanBatchWith(drawCall, other, pass)) + if (!CanBatchDrawCalls(drawCall, other, pass)) break; batchSize++; instanceCount += other.InstanceCount; @@ -949,9 +953,7 @@ bool SurfaceDrawCallHandler::CanBatch(const DrawCall& a, const DrawCall& b, Draw { // TODO: find reason why batching static meshes with lightmap causes problems with sampling in shader (flickering when meshes in batch order gets changes due to async draw calls collection) if (a.Surface.Lightmap == nullptr && b.Surface.Lightmap == nullptr && - //return a.Surface.Lightmap == b.Surface.Lightmap && - a.Surface.Skinning == nullptr && - b.Surface.Skinning == nullptr) + a.Surface.Skinning == nullptr && b.Surface.Skinning == nullptr) { if (a.Material != b.Material) { diff --git a/Source/Engine/Renderer/RenderList.h b/Source/Engine/Renderer/RenderList.h index 8f8e86973..5b8c66647 100644 --- a/Source/Engine/Renderer/RenderList.h +++ b/Source/Engine/Renderer/RenderList.h @@ -502,7 +502,7 @@ public: /// The draw call data. /// True if the rendered mesh can receive decals. /// Object sorting key. - void AddDrawCall(const RenderContext& renderContext, DrawPass drawModes, StaticFlags staticFlags, DrawCall& drawCall, bool receivesDecals = true, int16 sortOrder = 0); + void AddDrawCall(const RenderContext& renderContext, DrawPass drawModes, StaticFlags staticFlags, DrawCall& drawCall, bool receivesDecals = true, int8 sortOrder = 0); /// /// Adds the draw call to the draw lists and references it in other render contexts. Performs additional per-context frustum culling. @@ -515,7 +515,7 @@ public: /// The draw call data. /// True if the rendered mesh can receive decals. /// Object sorting key. - void AddDrawCall(const RenderContextBatch& renderContextBatch, DrawPass drawModes, StaticFlags staticFlags, ShadowsCastingMode shadowsMode, const BoundingSphere& bounds, DrawCall& drawCall, bool receivesDecals = true, int16 sortOrder = 0); + void AddDrawCall(const RenderContextBatch& renderContextBatch, DrawPass drawModes, StaticFlags staticFlags, ShadowsCastingMode shadowsMode, const BoundingSphere& bounds, DrawCall& drawCall, bool receivesDecals = true, int8 sortOrder = 0); /// /// Sorts the collected draw calls list. diff --git a/Source/Engine/UI/SpriteRender.h b/Source/Engine/UI/SpriteRender.h index d9eb769c2..bd7294c31 100644 --- a/Source/Engine/UI/SpriteRender.h +++ b/Source/Engine/UI/SpriteRender.h @@ -85,10 +85,10 @@ public: DrawPass DrawModes = DrawPass::Default; /// - /// Gets the object sort order key used when sorting drawable objects during rendering. Use lower values to draw object before others, higher values are rendered later (on top). Can be use to control transparency drawing. + /// Gets the object sort order key used when sorting drawable objects during rendering. Use lower values to draw object before others, higher values are rendered later (on top). Can be used to control transparency drawing. /// API_FIELD(Attributes="EditorOrder(60), DefaultValue(0), EditorDisplay(\"Sprite\")") - int16 SortOrder = 0; + int8 SortOrder = 0; private: void OnMaterialLoaded(); diff --git a/Source/Engine/UI/TextRender.h b/Source/Engine/UI/TextRender.h index cc89fbee1..57e48ac9b 100644 --- a/Source/Engine/UI/TextRender.h +++ b/Source/Engine/UI/TextRender.h @@ -111,10 +111,10 @@ public: ShadowsCastingMode ShadowsMode = ShadowsCastingMode::All; /// - /// The object sort order key used when sorting drawable objects during rendering. Use lower values to draw object before others, higher values are rendered later (on top). Can be use to control transparency drawing. + /// The object sort order key used when sorting drawable objects during rendering. Use lower values to draw object before others, higher values are rendered later (on top). Can be used to control transparency drawing. /// API_FIELD(Attributes="EditorOrder(85), DefaultValue(0), EditorDisplay(\"Text\")") - int16 SortOrder = 0; + int8 SortOrder = 0; /// /// Gets the layout options. Layout is defined in local space of the object (on XY plane). From 08ef7c93ea196c8384dfe4187e56116597a89115 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 2 Jul 2024 00:52:22 +0200 Subject: [PATCH 195/292] Refactor draw calls drawing to use single objects buffer and better materials batching --- .../Features/DeferredShading.hlsl | 6 +- .../Features/Distortion.hlsl | 5 +- .../Features/ForwardShading.hlsl | 6 +- .../MaterialTemplates/Features/Lightmap.hlsl | 1 - .../Features/MotionVectors.hlsl | 7 +- .../Features/Tessellation.hlsl | 7 +- .../Editor/MaterialTemplates/Surface.shader | 161 +++----- .../Editor/MaterialTemplates/Terrain.shader | 17 + Source/Engine/Foliage/Foliage.cpp | 34 +- .../Materials/DecalMaterialShader.cpp | 2 +- .../Materials/DeferredMaterialShader.cpp | 43 +-- .../Materials/DeformableMaterialShader.cpp | 2 +- .../Materials/ForwardMaterialShader.cpp | 39 +- Source/Engine/Graphics/Materials/IMaterial.h | 14 +- .../Graphics/Materials/MaterialShader.cpp | 19 +- .../Graphics/Materials/MaterialShader.h | 8 +- .../Materials/MaterialShaderFeatures.cpp | 17 +- .../Materials/MaterialShaderFeatures.h | 5 - .../Materials/ParticleMaterialShader.cpp | 2 +- .../Materials/TerrainMaterialShader.cpp | 4 +- .../VolumeParticleMaterialShader.cpp | 2 +- Source/Engine/Graphics/Models/Mesh.cpp | 2 +- Source/Engine/Renderer/Editor/LODPreview.cpp | 3 +- .../Renderer/Editor/LightmapUVsDensity.cpp | 2 +- .../Renderer/Editor/MaterialComplexity.cpp | 2 +- .../Engine/Renderer/Editor/VertexColors.cpp | 2 +- Source/Engine/Renderer/GBufferPass.cpp | 2 +- .../Renderer/GI/GlobalSurfaceAtlasPass.cpp | 1 + Source/Engine/Renderer/RenderList.cpp | 360 ++++++++++-------- Source/Engine/Renderer/RenderList.h | 56 ++- Source/Engine/Renderer/Renderer.cpp | 8 + Source/Engine/Renderer/ShadowsPass.cpp | 6 +- Source/Engine/Renderer/VolumetricFogPass.cpp | 3 +- .../MaterialGenerator/MaterialGenerator.cpp | 2 +- Source/Shaders/MaterialCommon.hlsl | 83 +++- 35 files changed, 491 insertions(+), 442 deletions(-) diff --git a/Content/Editor/MaterialTemplates/Features/DeferredShading.hlsl b/Content/Editor/MaterialTemplates/Features/DeferredShading.hlsl index 09b628e54..12f7fa81a 100644 --- a/Content/Editor/MaterialTemplates/Features/DeferredShading.hlsl +++ b/Content/Editor/MaterialTemplates/Features/DeferredShading.hlsl @@ -26,14 +26,12 @@ void PS_GBuffer( ) { Light = float4(0, 0, 0, 1); - + MaterialInput materialInput = GetMaterialInput(input); #if USE_DITHERED_LOD_TRANSITION - // LOD masking - ClipLODTransition(input); + ClipLODTransition(materialInput); #endif // Get material parameters - MaterialInput materialInput = GetMaterialInput(input); Material material = GetMaterialPS(materialInput); // Masking diff --git a/Content/Editor/MaterialTemplates/Features/Distortion.hlsl b/Content/Editor/MaterialTemplates/Features/Distortion.hlsl index 16ed80522..c3a56b2e8 100644 --- a/Content/Editor/MaterialTemplates/Features/Distortion.hlsl +++ b/Content/Editor/MaterialTemplates/Features/Distortion.hlsl @@ -12,13 +12,12 @@ META_PS(USE_DISTORTION, FEATURE_LEVEL_ES2) float4 PS_Distortion(PixelInput input) : SV_Target0 { + MaterialInput materialInput = GetMaterialInput(input); #if USE_DITHERED_LOD_TRANSITION - // LOD masking - ClipLODTransition(input); + ClipLODTransition(materialInput); #endif // Get material parameters - MaterialInput materialInput = GetMaterialInput(input); Material material = GetMaterialPS(materialInput); // Masking diff --git a/Content/Editor/MaterialTemplates/Features/ForwardShading.hlsl b/Content/Editor/MaterialTemplates/Features/ForwardShading.hlsl index 7f7d16545..a9a757d87 100644 --- a/Content/Editor/MaterialTemplates/Features/ForwardShading.hlsl +++ b/Content/Editor/MaterialTemplates/Features/ForwardShading.hlsl @@ -38,14 +38,12 @@ void PS_Forward( ) { output = 0; - + MaterialInput materialInput = GetMaterialInput(input); #if USE_DITHERED_LOD_TRANSITION - // LOD masking - ClipLODTransition(input); + ClipLODTransition(materialInput); #endif // Get material parameters - MaterialInput materialInput = GetMaterialInput(input); Material material = GetMaterialPS(materialInput); // Masking diff --git a/Content/Editor/MaterialTemplates/Features/Lightmap.hlsl b/Content/Editor/MaterialTemplates/Features/Lightmap.hlsl index 46d021711..b47d65a72 100644 --- a/Content/Editor/MaterialTemplates/Features/Lightmap.hlsl +++ b/Content/Editor/MaterialTemplates/Features/Lightmap.hlsl @@ -4,7 +4,6 @@ #define CAN_USE_LIGHTMAP 1 @1// Lightmap: Includes @2// Lightmap: Constants -float4 LightmapArea; @3// Lightmap: Resources #if USE_LIGHTMAP // Irradiance and directionality prebaked lightmaps diff --git a/Content/Editor/MaterialTemplates/Features/MotionVectors.hlsl b/Content/Editor/MaterialTemplates/Features/MotionVectors.hlsl index 587902ae7..8531853f4 100644 --- a/Content/Editor/MaterialTemplates/Features/MotionVectors.hlsl +++ b/Content/Editor/MaterialTemplates/Features/MotionVectors.hlsl @@ -11,14 +11,15 @@ META_PS(true, FEATURE_LEVEL_ES2) float4 PS_MotionVectors(PixelInput input) : SV_Target0 { +#if USE_DITHERED_LOD_TRANSITION || MATERIAL_MASKED + MaterialInput materialInput = GetMaterialInput(input); #if USE_DITHERED_LOD_TRANSITION - // LOD masking - ClipLODTransition(input); + ClipLODTransition(materialInput); +#endif #endif #if MATERIAL_MASKED // Perform per pixel clipping if material requries it - MaterialInput materialInput = GetMaterialInput(input); Material material = GetMaterialPS(materialInput); clip(material.Mask - MATERIAL_MASK_THRESHOLD); #endif diff --git a/Content/Editor/MaterialTemplates/Features/Tessellation.hlsl b/Content/Editor/MaterialTemplates/Features/Tessellation.hlsl index f26fbd515..bc9e50834 100644 --- a/Content/Editor/MaterialTemplates/Features/Tessellation.hlsl +++ b/Content/Editor/MaterialTemplates/Features/Tessellation.hlsl @@ -33,8 +33,13 @@ struct TessalationDSToPS MaterialInput GetMaterialInput(TessalationDSToPS input) { MaterialInput output = GetGeometryMaterialInput(input.Geometry); +#if USE_PER_DRAW_CONSTANTS + output.Object = LoadObject(ObjectsBuffer, input.Geometry.ObjectIndex); +#else + LoadObjectFromCB(output.Object); +#endif output.SvPosition = input.Position; - output.TwoSidedSign = WorldDeterminantSign; + output.TwoSidedSign = output.Object.WorldDeterminantSign; #if USE_CUSTOM_VERTEX_INTERPOLATORS output.CustomVSToPS = input.CustomVSToPS; #endif diff --git a/Content/Editor/MaterialTemplates/Surface.shader b/Content/Editor/MaterialTemplates/Surface.shader index 4b3581cee..fd6cb31fa 100644 --- a/Content/Editor/MaterialTemplates/Surface.shader +++ b/Content/Editor/MaterialTemplates/Surface.shader @@ -3,6 +3,7 @@ #define MATERIAL 1 #define USE_PER_VIEW_CONSTANTS 1 +#define USE_PER_DRAW_CONSTANTS 1 @3 #include "./Flax/Common.hlsl" #include "./Flax/MaterialCommon.hlsl" @@ -10,17 +11,19 @@ @7 // Primary constant buffer (with additional material parameters) META_CB_BEGIN(0, Data) -float4x3 WorldMatrix; -float4x3 PrevWorldMatrix; -float2 Dummy0; -float LODDitherFactor; -float PerInstanceRandom; -float3 GeometrySize; -float WorldDeterminantSign; @1META_CB_END // Shader resources @2 +Buffer ObjectsBuffer : register(t0); +#if USE_SKINNING +// The skeletal bones matrix buffer (stored as 4x3, 3 float4 behind each other) +Buffer BoneMatrices : register(t1); +#if PER_BONE_MOTION_BLUR +// The skeletal bones matrix buffer from the previous frame +Buffer PrevBoneMatrices : register(t2); +#endif +#endif // Geometry data passed though the graphics rendering stages up to the pixel shader struct GeometryData { @@ -32,12 +35,8 @@ struct GeometryData #endif float3 WorldNormal : TEXCOORD3; float4 WorldTangent : TEXCOORD4; - nointerpolation float3 InstanceOrigin : TEXCOORD5; - nointerpolation float2 InstanceParams : TEXCOORD6; // x-PerInstanceRandom, y-LODDitherFactor float3 PrevWorldPosition : TEXCOORD7; - nointerpolation float3 InstanceTransform1 : TEXCOORD8; - nointerpolation float3 InstanceTransform2 : TEXCOORD9; - nointerpolation float3 InstanceTransform3 : TEXCOORD10; + nointerpolation uint ObjectIndex : TEXCOORD8; }; // Interpolants passed from the vertex shader @@ -80,11 +79,8 @@ struct MaterialInput float4 SvPosition; float3 PreSkinnedPosition; float3 PreSkinnedNormal; - float3 InstanceOrigin; - float2 InstanceParams; - float3 InstanceTransform1; - float3 InstanceTransform2; - float3 InstanceTransform3; + uint ObjectIndex; + ObjectData Object; #if USE_CUSTOM_VERTEX_INTERPOLATORS float4 CustomVSToPS[CUSTOM_VERTEX_INTERPOLATORS_COUNT]; #endif @@ -103,11 +99,7 @@ MaterialInput GetGeometryMaterialInput(GeometryData geometry) output.VertexColor = geometry.VertexColor; #endif output.TBN = CalcTangentBasis(geometry.WorldNormal, geometry.WorldTangent); - output.InstanceOrigin = geometry.InstanceOrigin; - output.InstanceParams = geometry.InstanceParams; - output.InstanceTransform1 = geometry.InstanceTransform1; - output.InstanceTransform2 = geometry.InstanceTransform2; - output.InstanceTransform3 = geometry.InstanceTransform3; + output.ObjectIndex = geometry.ObjectIndex; return output; } @@ -143,11 +135,7 @@ GeometryData InterpolateGeometry(GeometryData p0, float w0, GeometryData p1, flo output.WorldNormal = normalize(output.WorldNormal); output.WorldTangent = p0.WorldTangent * w0 + p1.WorldTangent * w1 + p2.WorldTangent * w2; output.WorldTangent.xyz = normalize(output.WorldTangent.xyz); - output.InstanceOrigin = p0.InstanceOrigin; - output.InstanceParams = p0.InstanceParams; - output.InstanceTransform1 = p0.InstanceTransform1; - output.InstanceTransform2 = p0.InstanceTransform2; - output.InstanceTransform3 = p0.InstanceTransform3; + output.ObjectIndex = p0.ObjectIndex; return output; } @@ -156,7 +144,8 @@ GeometryData InterpolateGeometry(GeometryData p0, float w0, GeometryData p1, flo MaterialInput GetMaterialInput(PixelInput input) { MaterialInput output = GetGeometryMaterialInput(input.Geometry); - output.TwoSidedSign = WorldDeterminantSign * (input.IsFrontFace ? 1.0 : -1.0); + output.Object = LoadObject(ObjectsBuffer, input.Geometry.ObjectIndex); + output.TwoSidedSign = output.Object.WorldDeterminantSign * (input.IsFrontFace ? 1.0 : -1.0); output.SvPosition = input.Position; #if USE_CUSTOM_VERTEX_INTERPOLATORS output.CustomVSToPS = input.CustomVSToPS; @@ -164,16 +153,6 @@ MaterialInput GetMaterialInput(PixelInput input) return output; } -// Gets the local to world transform matrix -#define GetInstanceTransform(input) float4x4(float4(input.InstanceTransform1.xyz, 0.0f), float4(input.InstanceTransform2.xyz, 0.0f), float4(input.InstanceTransform3.xyz, 0.0f), float4(input.InstanceOrigin.xyz, 1.0f)) - -// Extarcts the world matrix and instancce transform vector -#if USE_INSTANCING -#define CalculateInstanceTransform(input) float4x4 world = GetInstanceTransform(input); output.Geometry.InstanceTransform1 = input.InstanceTransform1.xyz; output.Geometry.InstanceTransform2 = input.InstanceTransform2.xyz; output.Geometry.InstanceTransform3 = input.InstanceTransform3.xyz; -#else -#define CalculateInstanceTransform(input) float4x4 world = ToMatrix4x4(WorldMatrix); output.Geometry.InstanceTransform1 = world[0].xyz; output.Geometry.InstanceTransform2 = world[1].xyz; output.Geometry.InstanceTransform3 = world[2].xyz; -#endif - // Removes the scale vector from the local to world transformation matrix (supports instancing) float3x3 RemoveScaleFromLocalToWorld(float3x3 localToWorld) { @@ -218,7 +197,7 @@ float3 TransformViewVectorToWorld(MaterialInput input, float3 viewVector) // Transforms a vector from local space to world space float3 TransformLocalVectorToWorld(MaterialInput input, float3 localVector) { - float3x3 localToWorld = (float3x3)GetInstanceTransform(input); + float3x3 localToWorld = (float3x3)input.Object.WorldMatrix; //localToWorld = RemoveScaleFromLocalToWorld(localToWorld); return mul(localVector, localToWorld); } @@ -226,7 +205,7 @@ float3 TransformLocalVectorToWorld(MaterialInput input, float3 localVector) // Transforms a vector from local space to world space float3 TransformWorldVectorToLocal(MaterialInput input, float3 worldVector) { - float3x3 localToWorld = (float3x3)GetInstanceTransform(input); + float3x3 localToWorld = (float3x3)input.Object.WorldMatrix; //localToWorld = RemoveScaleFromLocalToWorld(localToWorld); return mul(localToWorld, worldVector); } @@ -234,30 +213,26 @@ float3 TransformWorldVectorToLocal(MaterialInput input, float3 worldVector) // Gets the current object position (supports instancing) float3 GetObjectPosition(MaterialInput input) { - return input.InstanceOrigin.xyz; + return input.Object.WorldMatrix[3].xyz; } // Gets the current object size (supports instancing) float3 GetObjectSize(MaterialInput input) { - float4x4 world = GetInstanceTransform(input); - return GeometrySize * float3(world._m00, world._m11, world._m22); + float4x4 world = input.Object.WorldMatrix; + return input.Object.GeometrySize * float3(world._m00, world._m11, world._m22); } // Get the current object random value (supports instancing) float GetPerInstanceRandom(MaterialInput input) { - return input.InstanceParams.x; + return input.Object.PerInstanceRandom; } // Get the current object LOD transition dither factor (supports instancing) float GetLODDitherFactor(MaterialInput input) { -#if USE_DITHERED_LOD_TRANSITION - return input.InstanceParams.y; -#else - return 0; -#endif + return input.Object.LODDitherFactor; } // Gets the interpolated vertex color (in linear space) @@ -316,19 +291,22 @@ META_VS_IN_ELEMENT(NORMAL, 0, R10G10B10A2_UNORM, 1, ALIGN, PER_VERTEX, 0, true META_VS_IN_ELEMENT(TANGENT, 0, R10G10B10A2_UNORM, 1, ALIGN, PER_VERTEX, 0, true) META_VS_IN_ELEMENT(TEXCOORD, 1, R16G16_FLOAT, 1, ALIGN, PER_VERTEX, 0, true) META_VS_IN_ELEMENT(COLOR, 0, R8G8B8A8_UNORM, 2, 0, PER_VERTEX, 0, USE_VERTEX_COLOR) -META_VS_IN_ELEMENT(ATTRIBUTE,0, R32G32B32A32_FLOAT,3, 0, PER_INSTANCE, 1, USE_INSTANCING) -META_VS_IN_ELEMENT(ATTRIBUTE,1, R32G32B32A32_FLOAT,3, ALIGN, PER_INSTANCE, 1, USE_INSTANCING) -META_VS_IN_ELEMENT(ATTRIBUTE,2, R32G32B32_FLOAT, 3, ALIGN, PER_INSTANCE, 1, USE_INSTANCING) -META_VS_IN_ELEMENT(ATTRIBUTE,3, R32G32B32_FLOAT, 3, ALIGN, PER_INSTANCE, 1, USE_INSTANCING) -META_VS_IN_ELEMENT(ATTRIBUTE,4, R16G16B16A16_FLOAT,3, ALIGN, PER_INSTANCE, 1, USE_INSTANCING) +META_VS_IN_ELEMENT(ATTRIBUTE,0, R32_UINT, 3, 0, PER_INSTANCE, 1, USE_INSTANCING) VertexOutput VS(ModelInput input) { VertexOutput output; + // Load object data +#if USE_INSTANCING + output.Geometry.ObjectIndex = input.ObjectIndex; +#else + output.Geometry.ObjectIndex = DrawObjectIndex; +#endif + ObjectData object = LoadObject(ObjectsBuffer, output.Geometry.ObjectIndex); + // Compute world space vertex position - CalculateInstanceTransform(input); - output.Geometry.WorldPosition = mul(float4(input.Position.xyz, 1), world).xyz; - output.Geometry.PrevWorldPosition = mul(float4(input.Position.xyz, 1), ToMatrix4x4(PrevWorldMatrix)).xyz; + output.Geometry.WorldPosition = mul(float4(input.Position.xyz, 1), object.WorldMatrix).xyz; + output.Geometry.PrevWorldPosition = mul(float4(input.Position.xyz, 1), object.PrevWorldMatrix).xyz; // Compute clip space position output.Position = mul(float4(output.Geometry.WorldPosition, 1), ViewProjectionMatrix); @@ -338,22 +316,15 @@ VertexOutput VS(ModelInput input) #if USE_VERTEX_COLOR output.Geometry.VertexColor = input.Color; #endif - output.Geometry.InstanceOrigin = world[3].xyz; -#if USE_INSTANCING - output.Geometry.LightmapUV = input.LightmapUV * input.InstanceLightmapArea.zw + input.InstanceLightmapArea.xy; - output.Geometry.InstanceParams = float2(input.InstanceOrigin.w, input.InstanceTransform1.w); -#else #if CAN_USE_LIGHTMAP - output.Geometry.LightmapUV = input.LightmapUV * LightmapArea.zw + LightmapArea.xy; + output.Geometry.LightmapUV = input.LightmapUV * object.LightmapArea.zw + object.LightmapArea.xy; #else output.Geometry.LightmapUV = input.LightmapUV; #endif - output.Geometry.InstanceParams = float2(PerInstanceRandom, LODDitherFactor); -#endif // Calculate tanget space to world space transformation matrix for unit vectors float3x3 tangentToLocal = CalcTangentToLocal(input); - float3x3 tangentToWorld = CalcTangentToWorld(world, tangentToLocal); + float3x3 tangentToWorld = CalcTangentToWorld(object.WorldMatrix, tangentToLocal); output.Geometry.WorldNormal = tangentToWorld[2]; output.Geometry.WorldTangent.xyz = tangentToWorld[0]; output.Geometry.WorldTangent.w = input.Tangent.w ? -1.0f : +1.0f; @@ -361,10 +332,11 @@ VertexOutput VS(ModelInput input) // Get material input params if need to evaluate any material property #if USE_POSITION_OFFSET || USE_TESSELLATION || USE_CUSTOM_VERTEX_INTERPOLATORS MaterialInput materialInput = GetGeometryMaterialInput(output.Geometry); - materialInput.TwoSidedSign = WorldDeterminantSign; + materialInput.TwoSidedSign = object.WorldDeterminantSign; materialInput.SvPosition = output.Position; materialInput.PreSkinnedPosition = input.Position.xyz; materialInput.PreSkinnedNormal = tangentToLocal[2].xyz; + materialInput.Object = object; Material material = GetMaterialVS(materialInput); #endif @@ -392,33 +364,27 @@ META_VS(true, FEATURE_LEVEL_ES2) META_PERMUTATION_1(USE_INSTANCING=0) META_PERMUTATION_1(USE_INSTANCING=1) META_VS_IN_ELEMENT(POSITION, 0, R32G32B32_FLOAT, 0, 0, PER_VERTEX, 0, true) -META_VS_IN_ELEMENT(ATTRIBUTE,0, R32G32B32A32_FLOAT,3, 0, PER_INSTANCE, 1, USE_INSTANCING) -META_VS_IN_ELEMENT(ATTRIBUTE,1, R32G32B32A32_FLOAT,3, ALIGN, PER_INSTANCE, 1, USE_INSTANCING) -META_VS_IN_ELEMENT(ATTRIBUTE,2, R32G32B32_FLOAT, 3, ALIGN, PER_INSTANCE, 1, USE_INSTANCING) -META_VS_IN_ELEMENT(ATTRIBUTE,3, R32G32B32_FLOAT, 3, ALIGN, PER_INSTANCE, 1, USE_INSTANCING) -META_VS_IN_ELEMENT(ATTRIBUTE,4, R16G16B16A16_FLOAT,3, ALIGN, PER_INSTANCE, 1, USE_INSTANCING) +META_VS_IN_ELEMENT(ATTRIBUTE,0, R32_UINT, 3, 0, PER_INSTANCE, 1, USE_INSTANCING) float4 VS_Depth(ModelInput_PosOnly input) : SV_Position { + // Load object data #if USE_INSTANCING - float4x4 world = GetInstanceTransform(input); + uint objectIndex = input.ObjectIndex; #else - float4x4 world = ToMatrix4x4(WorldMatrix); + uint objectIndex = DrawObjectIndex; #endif - float3 worldPosition = mul(float4(input.Position.xyz, 1), world).xyz; + ObjectData object = LoadObject(ObjectsBuffer, objectIndex); + + // Transform vertex position into the screen + float3 worldPosition = mul(float4(input.Position.xyz, 1), object.WorldMatrix).xyz; float4 position = mul(float4(worldPosition, 1), ViewProjectionMatrix); return position; } #if USE_SKINNING -// The skeletal bones matrix buffer (stored as 4x3, 3 float4 behind each other) -Buffer BoneMatrices : register(t0); - #if PER_BONE_MOTION_BLUR -// The skeletal bones matrix buffer from the previous frame -Buffer PrevBoneMatrices : register(t1); - float3x4 GetPrevBoneMatrix(int index) { float4 a = PrevBoneMatrices[index * 3]; @@ -497,6 +463,10 @@ META_VS_IN_ELEMENT(BLENDWEIGHT, 0, R16G16B16A16_FLOAT,0, ALIGN, PER_VERTEX, 0, VertexOutput VS_Skinned(ModelInput_Skinned input) { VertexOutput output; + + // Load object data + output.Geometry.ObjectIndex = DrawObjectIndex; + ObjectData object = LoadObject(ObjectsBuffer, output.Geometry.ObjectIndex); // Perform skinning float3x4 boneMatrix = GetBoneMatrix(input); @@ -504,13 +474,12 @@ VertexOutput VS_Skinned(ModelInput_Skinned input) float3x3 tangentToLocal = SkinTangents(input, boneMatrix); // Compute world space vertex position - CalculateInstanceTransform(input); - output.Geometry.WorldPosition = mul(float4(position, 1), world).xyz; + output.Geometry.WorldPosition = mul(float4(position, 1), object.WorldMatrix).xyz; #if PER_BONE_MOTION_BLUR float3 prevPosition = SkinPrevPosition(input); - output.Geometry.PrevWorldPosition = mul(float4(prevPosition, 1), ToMatrix4x4(PrevWorldMatrix)).xyz; + output.Geometry.PrevWorldPosition = mul(float4(prevPosition, 1), object.PrevWorldMatrix).xyz; #else - output.Geometry.PrevWorldPosition = mul(float4(position, 1), ToMatrix4x4(PrevWorldMatrix)).xyz; + output.Geometry.PrevWorldPosition = mul(float4(position, 1), object.PrevWorldMatrix).xyz; #endif // Compute clip space position @@ -522,15 +491,9 @@ VertexOutput VS_Skinned(ModelInput_Skinned input) output.Geometry.VertexColor = float4(0, 0, 0, 1); #endif output.Geometry.LightmapUV = float2(0, 0); - output.Geometry.InstanceOrigin = world[3].xyz; -#if USE_INSTANCING - output.Geometry.InstanceParams = float2(input.InstanceOrigin.w, input.InstanceTransform1.w); -#else - output.Geometry.InstanceParams = float2(PerInstanceRandom, LODDitherFactor); -#endif // Calculate tanget space to world space transformation matrix for unit vectors - float3x3 tangentToWorld = CalcTangentToWorld(world, tangentToLocal); + float3x3 tangentToWorld = CalcTangentToWorld(object.WorldMatrix, tangentToLocal); output.Geometry.WorldNormal = tangentToWorld[2]; output.Geometry.WorldTangent.xyz = tangentToWorld[0]; output.Geometry.WorldTangent.w = input.Tangent.w ? -1.0f : +1.0f; @@ -538,10 +501,11 @@ VertexOutput VS_Skinned(ModelInput_Skinned input) // Get material input params if need to evaluate any material property #if USE_POSITION_OFFSET || USE_TESSELLATION || USE_CUSTOM_VERTEX_INTERPOLATORS MaterialInput materialInput = GetGeometryMaterialInput(output.Geometry); - materialInput.TwoSidedSign = WorldDeterminantSign; + materialInput.TwoSidedSign = object.WorldDeterminantSign; materialInput.SvPosition = output.Position; materialInput.PreSkinnedPosition = input.Position.xyz; materialInput.PreSkinnedNormal = tangentToLocal[2].xyz; + materialInput.Object = object; Material material = GetMaterialVS(materialInput); #endif @@ -568,12 +532,12 @@ VertexOutput VS_Skinned(ModelInput_Skinned input) #if USE_DITHERED_LOD_TRANSITION -void ClipLODTransition(PixelInput input) +void ClipLODTransition(MaterialInput input) { - float ditherFactor = input.Geometry.InstanceParams.y; + float ditherFactor = input.Object.LODDitherFactor; if (abs(ditherFactor) > 0.001) { - float randGrid = cos(dot(floor(input.Position.xy), float2(347.83452793, 3343.28371863))); + float randGrid = cos(dot(floor(input.SvPosition.xy), float2(347.83452793, 3343.28371863))); float randGridFrac = frac(randGrid * 1000.0); half mask = (ditherFactor < 0.0) ? (ditherFactor + 1.0 > randGridFrac) : (ditherFactor < randGridFrac); clip(mask - 0.001); @@ -586,14 +550,13 @@ void ClipLODTransition(PixelInput input) META_PS(true, FEATURE_LEVEL_ES2) void PS_Depth(PixelInput input) { + MaterialInput materialInput = GetMaterialInput(input); #if USE_DITHERED_LOD_TRANSITION - // LOD masking - ClipLODTransition(input); + ClipLODTransition(materialInput); #endif #if MATERIAL_MASKED || MATERIAL_BLEND != MATERIAL_BLEND_OPAQUE // Get material parameters - MaterialInput materialInput = GetMaterialInput(input); Material material = GetMaterialPS(materialInput); // Perform per pixel clipping diff --git a/Content/Editor/MaterialTemplates/Terrain.shader b/Content/Editor/MaterialTemplates/Terrain.shader index 71504f6ed..f7819c863 100644 --- a/Content/Editor/MaterialTemplates/Terrain.shader +++ b/Content/Editor/MaterialTemplates/Terrain.shader @@ -28,6 +28,7 @@ float4 HeightmapUVScaleBias; float4 NeighborLOD; float2 OffsetUV; float2 Dummy0; +float4 LightmapArea; @1META_CB_END // Terrain data @@ -88,6 +89,7 @@ struct MaterialInput float3 PreSkinnedPosition; float3 PreSkinnedNormal; float HolesMask; + ObjectData Object; #if USE_TERRAIN_LAYERS float4 Layers[TERRAIN_LAYERS_DATA_SIZE]; #endif @@ -147,9 +149,23 @@ GeometryData InterpolateGeometry(GeometryData p0, float w0, GeometryData p1, flo #endif +ObjectData GetObject() +{ + ObjectData object = (ObjectData)0; + object.WorldMatrix = ToMatrix4x4(WorldMatrix); + object.PrevWorldMatrix = object.WorldMatrix; + object.GeometrySize = float3(1, 1, 1); + object.PerInstanceRandom = PerInstanceRandom; + object.WorldDeterminantSign = WorldDeterminantSign; + object.LODDitherFactor = 0.0f; + object.LightmapArea = LightmapArea; + return object; +} + MaterialInput GetMaterialInput(PixelInput input) { MaterialInput output = GetGeometryMaterialInput(input.Geometry); + output.Object = GetObject(); output.TwoSidedSign = WorldDeterminantSign * (input.IsFrontFace ? 1.0 : -1.0); output.SvPosition = input.Position; #if USE_CUSTOM_VERTEX_INTERPOLATORS @@ -396,6 +412,7 @@ VertexOutput VS(TerrainVertexInput input) // Get material input params if need to evaluate any material property #if USE_POSITION_OFFSET || USE_TESSELLATION || USE_CUSTOM_VERTEX_INTERPOLATORS MaterialInput materialInput = (MaterialInput)0; + materialInput.Object = GetObject(); materialInput.WorldPosition = output.Geometry.WorldPosition; materialInput.TexCoord = output.Geometry.TexCoord; #if USE_LIGHTMAP diff --git a/Source/Engine/Foliage/Foliage.cpp b/Source/Engine/Foliage/Foliage.cpp index 562a8b706..530c5c662 100644 --- a/Source/Engine/Foliage/Foliage.cpp +++ b/Source/Engine/Foliage/Foliage.cpp @@ -116,6 +116,7 @@ void Foliage::DrawInstance(RenderContext& renderContext, FoliageInstance& instan ASSERT_LOW_LAYER(key.Mat); e->DrawCall.Material = key.Mat; e->DrawCall.Surface.Lightmap = EnumHasAnyFlags(_staticFlags, StaticFlags::Lightmap) && _scene ? _scene->LightmapsData.GetReadyLightmap(key.Lightmap) : nullptr; + e->DrawCall.Surface.GeometrySize = key.Geo->GetBox().GetSize(); } // Add instance to the draw batch @@ -124,13 +125,8 @@ void Foliage::DrawInstance(RenderContext& renderContext, FoliageInstance& instan const Transform transform = _transform.LocalToWorld(instance.Transform); const Float3 translation = transform.Translation - renderContext.View.Origin; Matrix::Transformation(transform.Scale, transform.Orientation, translation, world); - instanceData.InstanceOrigin = Float3(world.M41, world.M42, world.M43); - instanceData.PerInstanceRandom = instance.Random; - instanceData.InstanceTransform1 = Float3(world.M11, world.M12, world.M13); - instanceData.LODDitherFactor = lodDitherFactor; - instanceData.InstanceTransform2 = Float3(world.M21, world.M22, world.M23); - instanceData.InstanceTransform3 = Float3(world.M31, world.M32, world.M33); - instanceData.InstanceLightmapArea = Half4(instance.Lightmap.UVsArea); + constexpr float worldDeterminantSign = 1.0f; + instanceData.Store(world, world, instance.Lightmap.UVsArea, drawCall.DrawCall.Surface.GeometrySize, instance.Random, worldDeterminantSign, lodDitherFactor); } } @@ -456,6 +452,7 @@ void Foliage::DrawType(RenderContext& renderContext, const FoliageType& type, Dr continue; drawCall.DrawCall.Material = material; + drawCall.DrawCall.Surface.GeometrySize = mesh.GetBox().GetSize(); } } @@ -479,18 +476,7 @@ void Foliage::DrawType(RenderContext& renderContext, const FoliageType& type, Dr mesh.GetDrawCallGeometry(batch.DrawCall); batch.DrawCall.InstanceCount = 1; auto& firstInstance = batch.Instances[0]; - batch.DrawCall.ObjectPosition = firstInstance.InstanceOrigin; - batch.DrawCall.PerInstanceRandom = firstInstance.PerInstanceRandom; - auto lightmapArea = firstInstance.InstanceLightmapArea.ToFloat4(); - batch.DrawCall.Surface.LightmapUVsArea = *(Rectangle*)&lightmapArea; - batch.DrawCall.Surface.LODDitherFactor = firstInstance.LODDitherFactor; - batch.DrawCall.World.SetRow1(Float4(firstInstance.InstanceTransform1, 0.0f)); - batch.DrawCall.World.SetRow2(Float4(firstInstance.InstanceTransform2, 0.0f)); - batch.DrawCall.World.SetRow3(Float4(firstInstance.InstanceTransform3, 0.0f)); - batch.DrawCall.World.SetRow4(Float4(firstInstance.InstanceOrigin, 1.0f)); - batch.DrawCall.Surface.PrevWorld = batch.DrawCall.World; - batch.DrawCall.Surface.GeometrySize = mesh.GetBox().GetSize(); - batch.DrawCall.WorldDeterminantSign = 1; + firstInstance.Load(batch.DrawCall); if (EnumHasAnyFlags(drawModes, DrawPass::Forward)) { @@ -499,15 +485,7 @@ void Foliage::DrawType(RenderContext& renderContext, const FoliageType& type, Dr for (int32 j = 0; j < batch.Instances.Count(); j++) { auto& instance = batch.Instances[j]; - drawCall.ObjectPosition = instance.InstanceOrigin; - drawCall.PerInstanceRandom = instance.PerInstanceRandom; - lightmapArea = instance.InstanceLightmapArea.ToFloat4(); - drawCall.Surface.LightmapUVsArea = *(Rectangle*)&lightmapArea; - drawCall.Surface.LODDitherFactor = instance.LODDitherFactor; - drawCall.World.SetRow1(Float4(instance.InstanceTransform1, 0.0f)); - drawCall.World.SetRow2(Float4(instance.InstanceTransform2, 0.0f)); - drawCall.World.SetRow3(Float4(instance.InstanceTransform3, 0.0f)); - drawCall.World.SetRow4(Float4(instance.InstanceOrigin, 1.0f)); + instance.Load(drawCall); const int32 drawCallIndex = renderContext.List->DrawCalls.Add(drawCall); renderContext.List->DrawCallsLists[(int32)DrawCallsListType::Forward].Indices.Add(drawCallIndex); } diff --git a/Source/Engine/Graphics/Materials/DecalMaterialShader.cpp b/Source/Engine/Graphics/Materials/DecalMaterialShader.cpp index 9b2654f21..714805f45 100644 --- a/Source/Engine/Graphics/Materials/DecalMaterialShader.cpp +++ b/Source/Engine/Graphics/Materials/DecalMaterialShader.cpp @@ -29,7 +29,7 @@ void DecalMaterialShader::Bind(BindParameters& params) // Prepare auto context = params.GPUContext; auto& view = params.RenderContext.View; - auto& drawCall = *params.FirstDrawCall; + auto& drawCall = *params.DrawCall; Span cb(_cbData.Get(), _cbData.Count()); ASSERT_LOW_LAYER(cb.Length() >= sizeof(DecalMaterialShaderData)); auto materialData = reinterpret_cast(cb.Get()); diff --git a/Source/Engine/Graphics/Materials/DeferredMaterialShader.cpp b/Source/Engine/Graphics/Materials/DeferredMaterialShader.cpp index a08797bdb..152edcfae 100644 --- a/Source/Engine/Graphics/Materials/DeferredMaterialShader.cpp +++ b/Source/Engine/Graphics/Materials/DeferredMaterialShader.cpp @@ -3,7 +3,6 @@ #include "DeferredMaterialShader.h" #include "MaterialShaderFeatures.h" #include "MaterialParams.h" -#include "Engine/Core/Math/Matrix3x4.h" #include "Engine/Graphics/RenderBuffers.h" #include "Engine/Graphics/RenderView.h" #include "Engine/Renderer/DrawCall.h" @@ -17,16 +16,6 @@ #include "Engine/Graphics/GPULimits.h" #include "Engine/Graphics/RenderTask.h" -PACK_STRUCT(struct DeferredMaterialShaderData { - Matrix3x4 WorldMatrix; - Matrix3x4 PrevWorldMatrix; - Float2 Dummy0; - float LODDitherFactor; - float PerInstanceRandom; - Float3 GeometrySize; - float WorldDeterminantSign; - }); - DrawPass DeferredMaterialShader::GetDrawModes() const { return DrawPass::Depth | DrawPass::GBuffer | DrawPass::GlobalSurfaceAtlas | DrawPass::MotionVectors | DrawPass::QuadOverdraw; @@ -39,22 +28,17 @@ bool DeferredMaterialShader::CanUseLightmap() const bool DeferredMaterialShader::CanUseInstancing(InstancingHandler& handler) const { - handler = { SurfaceDrawCallHandler::GetHash, SurfaceDrawCallHandler::CanBatch, SurfaceDrawCallHandler::WriteDrawCall, }; + handler = { SurfaceDrawCallHandler::GetHash, SurfaceDrawCallHandler::CanBatch, }; return true; } void DeferredMaterialShader::Bind(BindParameters& params) { - //PROFILE_CPU(); - // Prepare auto context = params.GPUContext; auto& view = params.RenderContext.View; - auto& drawCall = *params.FirstDrawCall; + auto& drawCall = *params.DrawCall; Span cb(_cbData.Get(), _cbData.Count()); - ASSERT_LOW_LAYER(cb.Length() >= sizeof(DeferredMaterialShaderData)); - auto materialData = reinterpret_cast(cb.Get()); - cb = Span(cb.Get() + sizeof(DeferredMaterialShaderData), cb.Length() - sizeof(DeferredMaterialShaderData)); - int32 srv = 2; + int32 srv = 3; // Setup features const bool useLightmap = _info.BlendMode == MaterialBlendMode::Opaque && LightmapFeature::Bind(params, cb, srv); @@ -68,28 +52,19 @@ void DeferredMaterialShader::Bind(BindParameters& params) bindMeta.CanSampleDepth = false; bindMeta.CanSampleGBuffer = false; MaterialParams::Bind(params.ParamsLink, bindMeta); + context->BindSR(0, params.ObjectBuffer); - // Setup material constants - { - materialData->WorldMatrix.SetMatrixTranspose(drawCall.World); - materialData->PrevWorldMatrix.SetMatrixTranspose(drawCall.Surface.PrevWorld); - materialData->WorldDeterminantSign = drawCall.WorldDeterminantSign; - materialData->LODDitherFactor = drawCall.Surface.LODDitherFactor; - materialData->PerInstanceRandom = drawCall.PerInstanceRandom; - materialData->GeometrySize = drawCall.Surface.GeometrySize; - } - - // Check if is using mesh skinning + // Check if using mesh skinning const bool useSkinning = drawCall.Surface.Skinning != nullptr; bool perBoneMotionBlur = false; if (useSkinning) { // Bind skinning buffer ASSERT(drawCall.Surface.Skinning->IsReady()); - context->BindSR(0, drawCall.Surface.Skinning->BoneMatrices->View()); + context->BindSR(1, drawCall.Surface.Skinning->BoneMatrices->View()); if (drawCall.Surface.Skinning->PrevBoneMatrices && drawCall.Surface.Skinning->PrevBoneMatrices->IsAllocated()) { - context->BindSR(1, drawCall.Surface.Skinning->PrevBoneMatrices->View()); + context->BindSR(2, drawCall.Surface.Skinning->PrevBoneMatrices->View()); perBoneMotionBlur = true; } } @@ -116,8 +91,8 @@ void DeferredMaterialShader::Bind(BindParameters& params) else cullMode = CullMode::Normal; } - ASSERT_LOW_LAYER(!(useSkinning && params.DrawCallsCount > 1)); // No support for instancing skinned meshes - const auto cache = params.DrawCallsCount == 1 ? &_cache : &_cacheInstanced; + ASSERT_LOW_LAYER(!(useSkinning && params.Instanced)); // No support for instancing skinned meshes + const auto cache = params.Instanced ? &_cacheInstanced : &_cache; PipelineStateCache* psCache = cache->GetPS(view.Pass, useLightmap, useSkinning, perBoneMotionBlur); ASSERT(psCache); GPUPipelineState* state = psCache->GetPS(cullMode, wireframe); diff --git a/Source/Engine/Graphics/Materials/DeformableMaterialShader.cpp b/Source/Engine/Graphics/Materials/DeformableMaterialShader.cpp index f5b7f2368..434d1ef05 100644 --- a/Source/Engine/Graphics/Materials/DeformableMaterialShader.cpp +++ b/Source/Engine/Graphics/Materials/DeformableMaterialShader.cpp @@ -37,7 +37,7 @@ void DeformableMaterialShader::Bind(BindParameters& params) // Prepare auto context = params.GPUContext; auto& view = params.RenderContext.View; - auto& drawCall = *params.FirstDrawCall; + auto& drawCall = *params.DrawCall; Span cb(_cbData.Get(), _cbData.Count()); ASSERT_LOW_LAYER(cb.Length() >= sizeof(DeformableMaterialShaderData)); auto materialData = reinterpret_cast(cb.Get()); diff --git a/Source/Engine/Graphics/Materials/ForwardMaterialShader.cpp b/Source/Engine/Graphics/Materials/ForwardMaterialShader.cpp index eb843896c..23e9ec0b7 100644 --- a/Source/Engine/Graphics/Materials/ForwardMaterialShader.cpp +++ b/Source/Engine/Graphics/Materials/ForwardMaterialShader.cpp @@ -3,7 +3,6 @@ #include "ForwardMaterialShader.h" #include "MaterialShaderFeatures.h" #include "MaterialParams.h" -#include "Engine/Core/Math/Matrix3x4.h" #include "Engine/Graphics/GPUContext.h" #include "Engine/Graphics/GPUDevice.h" #include "Engine/Graphics/GPULimits.h" @@ -18,16 +17,6 @@ #include "Engine/Renderer/Lightmaps.h" #endif -PACK_STRUCT(struct ForwardMaterialShaderData { - Matrix3x4 WorldMatrix; - Matrix3x4 PrevWorldMatrix; - Float2 Dummy0; - float LODDitherFactor; - float PerInstanceRandom; - Float3 GeometrySize; - float WorldDeterminantSign; - }); - DrawPass ForwardMaterialShader::GetDrawModes() const { return _drawModes; @@ -35,7 +24,7 @@ DrawPass ForwardMaterialShader::GetDrawModes() const bool ForwardMaterialShader::CanUseInstancing(InstancingHandler& handler) const { - handler = { SurfaceDrawCallHandler::GetHash, SurfaceDrawCallHandler::CanBatch, SurfaceDrawCallHandler::WriteDrawCall, }; + handler = { SurfaceDrawCallHandler::GetHash, SurfaceDrawCallHandler::CanBatch, }; return true; } @@ -44,12 +33,9 @@ void ForwardMaterialShader::Bind(BindParameters& params) // Prepare auto context = params.GPUContext; auto& view = params.RenderContext.View; - auto& drawCall = *params.FirstDrawCall; + auto& drawCall = *params.DrawCall; Span cb(_cbData.Get(), _cbData.Count()); - ASSERT_LOW_LAYER(cb.Length() >= sizeof(ForwardMaterialShaderData)); - auto materialData = reinterpret_cast(cb.Get()); - cb = Span(cb.Get() + sizeof(ForwardMaterialShaderData), cb.Length() - sizeof(ForwardMaterialShaderData)); - int32 srv = 2; + int32 srv = 3; // Setup features if ((_info.FeaturesFlags & MaterialFeaturesFlags::GlobalIllumination) != MaterialFeaturesFlags::None) @@ -65,24 +51,15 @@ void ForwardMaterialShader::Bind(BindParameters& params) bindMeta.CanSampleDepth = GPUDevice::Instance->Limits.HasReadOnlyDepth; bindMeta.CanSampleGBuffer = true; MaterialParams::Bind(params.ParamsLink, bindMeta); + context->BindSR(0, params.ObjectBuffer); - // Check if is using mesh skinning + // Check if using mesh skinning const bool useSkinning = drawCall.Surface.Skinning != nullptr; if (useSkinning) { // Bind skinning buffer ASSERT(drawCall.Surface.Skinning->IsReady()); - context->BindSR(0, drawCall.Surface.Skinning->BoneMatrices->View()); - } - - // Setup material constants - { - materialData->WorldMatrix.SetMatrixTranspose(drawCall.World); - materialData->PrevWorldMatrix.SetMatrixTranspose(drawCall.Surface.PrevWorld); - materialData->WorldDeterminantSign = drawCall.WorldDeterminantSign; - materialData->LODDitherFactor = drawCall.Surface.LODDitherFactor; - materialData->PerInstanceRandom = drawCall.PerInstanceRandom; - materialData->GeometrySize = drawCall.Surface.GeometrySize; + context->BindSR(1, drawCall.Surface.Skinning->BoneMatrices->View()); } // Bind constants @@ -107,8 +84,8 @@ void ForwardMaterialShader::Bind(BindParameters& params) else cullMode = CullMode::Normal; } - ASSERT_LOW_LAYER(!(useSkinning && params.DrawCallsCount > 1)); // No support for instancing skinned meshes - const auto cacheObj = params.DrawCallsCount == 1 ? &_cache : &_cacheInstanced; + ASSERT_LOW_LAYER(!(useSkinning && params.Instanced)); // No support for instancing skinned meshes + const auto cacheObj = params.Instanced ? &_cacheInstanced : &_cache; PipelineStateCache* psCache = cacheObj->GetPS(view.Pass, useSkinning); ASSERT(psCache); GPUPipelineState* state = psCache->GetPS(cullMode, wireframe); diff --git a/Source/Engine/Graphics/Materials/IMaterial.h b/Source/Engine/Graphics/Materials/IMaterial.h index 5235dd59e..90d44897e 100644 --- a/Source/Engine/Graphics/Materials/IMaterial.h +++ b/Source/Engine/Graphics/Materials/IMaterial.h @@ -8,6 +8,7 @@ struct MaterialParamsLink; class GPUShader; class GPUContext; class GPUTextureView; +class GPUBufferView; class GPUConstantBuffer; class RenderBuffers; class SceneRenderTask; @@ -120,7 +121,6 @@ public: { void (*GetHash)(const DrawCall& drawCall, uint32& batchKey); bool (*CanBatch)(const DrawCall& a, const DrawCall& b, DrawPass pass); - void (*WriteDrawCall)(struct InstanceData* instanceData, const DrawCall& drawCall); }; /// @@ -131,7 +131,7 @@ public: virtual bool CanUseInstancing(InstancingHandler& handler) const { #if BUILD_DEBUG - handler = { nullptr, nullptr, nullptr }; + handler = { nullptr, nullptr }; #endif return false; } @@ -144,11 +144,12 @@ public: { GPUContext* GPUContext; const RenderContext& RenderContext; - const DrawCall* FirstDrawCall; - int32 DrawCallsCount; + GPUBufferView* ObjectBuffer = nullptr; + const ::DrawCall* DrawCall = nullptr; MaterialParamsLink* ParamsLink = nullptr; void* CustomData = nullptr; float TimeParam; + bool Instanced = false; /// /// The input scene color. It's optional and used in forward/postFx rendering. @@ -156,11 +157,12 @@ public: GPUTextureView* Input = nullptr; BindParameters(::GPUContext* context, const ::RenderContext& renderContext); - BindParameters(::GPUContext* context, const ::RenderContext& renderContext, const DrawCall& drawCall); - BindParameters(::GPUContext* context, const ::RenderContext& renderContext, const DrawCall* firstDrawCall, int32 drawCallsCount); + BindParameters(::GPUContext* context, const ::RenderContext& renderContext, const ::DrawCall& drawCall, bool instanced = false); // Per-view shared constant buffer (see ViewData in MaterialCommon.hlsl). static GPUConstantBuffer* PerViewConstants; + // Per-draw shared constant buffer (see ViewData in MaterialCommon.hlsl). + static GPUConstantBuffer* PerDrawConstants; // Binds the shared per-view constant buffer at slot 1 (see ViewData in MaterialCommon.hlsl) void BindViewData(); diff --git a/Source/Engine/Graphics/Materials/MaterialShader.cpp b/Source/Engine/Graphics/Materials/MaterialShader.cpp index beaedc324..fe1d1db2a 100644 --- a/Source/Engine/Graphics/Materials/MaterialShader.cpp +++ b/Source/Engine/Graphics/Materials/MaterialShader.cpp @@ -38,31 +38,21 @@ GPU_CB_STRUCT(MaterialShaderDataPerView { IMaterial::BindParameters::BindParameters(::GPUContext* context, const ::RenderContext& renderContext) : GPUContext(context) , RenderContext(renderContext) - , FirstDrawCall(nullptr) - , DrawCallsCount(0) , TimeParam(Time::Draw.UnscaledTime.GetTotalSeconds()) { } -IMaterial::BindParameters::BindParameters(::GPUContext* context, const ::RenderContext& renderContext, const DrawCall& drawCall) +IMaterial::BindParameters::BindParameters(::GPUContext* context, const ::RenderContext& renderContext, const ::DrawCall& drawCall, bool instanced) : GPUContext(context) , RenderContext(renderContext) - , FirstDrawCall(&drawCall) - , DrawCallsCount(1) - , TimeParam(Time::Draw.UnscaledTime.GetTotalSeconds()) -{ -} - -IMaterial::BindParameters::BindParameters(::GPUContext* context, const ::RenderContext& renderContext, const DrawCall* firstDrawCall, int32 drawCallsCount) - : GPUContext(context) - , RenderContext(renderContext) - , FirstDrawCall(firstDrawCall) - , DrawCallsCount(drawCallsCount) + , DrawCall(&drawCall) , TimeParam(Time::Draw.UnscaledTime.GetTotalSeconds()) + , Instanced(instanced) { } GPUConstantBuffer* IMaterial::BindParameters::PerViewConstants = nullptr; +GPUConstantBuffer* IMaterial::BindParameters::PerDrawConstants = nullptr; void IMaterial::BindParameters::BindViewData() { @@ -70,6 +60,7 @@ void IMaterial::BindParameters::BindViewData() if (!PerViewConstants) { PerViewConstants = GPUDevice::Instance->CreateConstantBuffer(sizeof(MaterialShaderDataPerView), TEXT("PerViewConstants")); + PerDrawConstants = GPUDevice::Instance->CreateConstantBuffer(sizeof(MaterialShaderDataPerDraw), TEXT("PerDrawConstants")); } // Setup data diff --git a/Source/Engine/Graphics/Materials/MaterialShader.h b/Source/Engine/Graphics/Materials/MaterialShader.h index 223289a46..3bea7a2ec 100644 --- a/Source/Engine/Graphics/Materials/MaterialShader.h +++ b/Source/Engine/Graphics/Materials/MaterialShader.h @@ -10,13 +10,19 @@ /// /// Current materials shader version. /// -#define MATERIAL_GRAPH_VERSION 166 +#define MATERIAL_GRAPH_VERSION 167 class Material; class GPUShader; class GPUConstantBuffer; class MemoryReadStream; +// Draw pipeline constant buffer (with per-draw constants at slot 2) +GPU_CB_STRUCT(MaterialShaderDataPerDraw { + Float3 DrawPadding; + uint32 DrawObjectIndex; + }); + /// /// Represents material shader that can be used to render objects, visuals or effects. Contains a dedicated shader. /// diff --git a/Source/Engine/Graphics/Materials/MaterialShaderFeatures.cpp b/Source/Engine/Graphics/Materials/MaterialShaderFeatures.cpp index c36fd9758..1582c75da 100644 --- a/Source/Engine/Graphics/Materials/MaterialShaderFeatures.cpp +++ b/Source/Engine/Graphics/Materials/MaterialShaderFeatures.cpp @@ -16,7 +16,7 @@ void ForwardShadingFeature::Bind(MaterialShader::BindParameters& params, Span= sizeof(Data)); const int32 envProbeShaderRegisterIndex = srv + 0; @@ -118,8 +118,7 @@ void ForwardShadingFeature::Bind(MaterialShader::BindParameters& params, Span& cb, int32& srv) { - auto& drawCall = *params.FirstDrawCall; - ASSERT_LOW_LAYER(cb.Length() >= sizeof(Data)); + auto& drawCall = *params.DrawCall; const bool useLightmap = EnumHasAnyFlags(params.RenderContext.View.Flags, ViewFlags::GI) #if USE_EDITOR @@ -134,13 +133,15 @@ bool LightmapFeature::Bind(MaterialShader::BindParameters& params, Span& c params.GPUContext->BindSR(srv + 0, lightmap0); params.GPUContext->BindSR(srv + 1, lightmap1); params.GPUContext->BindSR(srv + 2, lightmap2); - - // Set lightmap data - auto& data = *(Data*)cb.Get(); - data.LightmapArea = drawCall.Features.LightmapUVsArea; + } + else + { + // Free texture slots + params.GPUContext->UnBindSR(srv + 0); + params.GPUContext->UnBindSR(srv + 1); + params.GPUContext->UnBindSR(srv + 2); } - cb = Span(cb.Get() + sizeof(Data), cb.Length() - sizeof(Data)); srv += SRVs; return useLightmap; } diff --git a/Source/Engine/Graphics/Materials/MaterialShaderFeatures.h b/Source/Engine/Graphics/Materials/MaterialShaderFeatures.h index 81a6e260d..a6c829e12 100644 --- a/Source/Engine/Graphics/Materials/MaterialShaderFeatures.h +++ b/Source/Engine/Graphics/Materials/MaterialShaderFeatures.h @@ -63,11 +63,6 @@ struct LightmapFeature : MaterialShaderFeature { enum { SRVs = 3 }; - PACK_STRUCT(struct Data - { - Rectangle LightmapArea; - }); - static bool Bind(MaterialShader::BindParameters& params, Span& cb, int32& srv); #if USE_EDITOR static void Generate(GeneratorData& data); diff --git a/Source/Engine/Graphics/Materials/ParticleMaterialShader.cpp b/Source/Engine/Graphics/Materials/ParticleMaterialShader.cpp index 998a77843..f60742ccc 100644 --- a/Source/Engine/Graphics/Materials/ParticleMaterialShader.cpp +++ b/Source/Engine/Graphics/Materials/ParticleMaterialShader.cpp @@ -48,7 +48,7 @@ void ParticleMaterialShader::Bind(BindParameters& params) // Prepare auto context = params.GPUContext; auto& view = params.RenderContext.View; - auto& drawCall = *params.FirstDrawCall; + auto& drawCall = *params.DrawCall; const uint32 sortedIndicesOffset = drawCall.Particle.Module->SortedIndicesOffset; Span cb(_cbData.Get(), _cbData.Count()); ASSERT_LOW_LAYER(cb.Length() >= sizeof(ParticleMaterialShaderData)); diff --git a/Source/Engine/Graphics/Materials/TerrainMaterialShader.cpp b/Source/Engine/Graphics/Materials/TerrainMaterialShader.cpp index 214284176..2a040b55a 100644 --- a/Source/Engine/Graphics/Materials/TerrainMaterialShader.cpp +++ b/Source/Engine/Graphics/Materials/TerrainMaterialShader.cpp @@ -28,6 +28,7 @@ PACK_STRUCT(struct TerrainMaterialShaderData { Float4 NeighborLOD; // Per component LOD index for chunk neighbors ordered: top, left, right, bottom Float2 OffsetUV; // Offset applied to the texture coordinates (used to implement seamless UVs based on chunk location relative to terrain root) Float2 Dummy0; + Float4 LightmapArea; }); DrawPass TerrainMaterialShader::GetDrawModes() const @@ -45,7 +46,7 @@ void TerrainMaterialShader::Bind(BindParameters& params) // Prepare auto context = params.GPUContext; auto& view = params.RenderContext.View; - auto& drawCall = *params.FirstDrawCall; + auto& drawCall = *params.DrawCall; Span cb(_cbData.Get(), _cbData.Count()); ASSERT_LOW_LAYER(cb.Length() >= sizeof(TerrainMaterialShaderData)); auto materialData = reinterpret_cast(cb.Get()); @@ -83,6 +84,7 @@ void TerrainMaterialShader::Bind(BindParameters& params) materialData->HeightmapUVScaleBias = drawCall.Terrain.HeightmapUVScaleBias; materialData->NeighborLOD = drawCall.Terrain.NeighborLOD; materialData->OffsetUV = drawCall.Terrain.OffsetUV; + materialData->LightmapArea = *(Float4*)&drawCall.Terrain.LightmapUVsArea; } // Bind terrain textures diff --git a/Source/Engine/Graphics/Materials/VolumeParticleMaterialShader.cpp b/Source/Engine/Graphics/Materials/VolumeParticleMaterialShader.cpp index 585b4bf29..5b9bb27a6 100644 --- a/Source/Engine/Graphics/Materials/VolumeParticleMaterialShader.cpp +++ b/Source/Engine/Graphics/Materials/VolumeParticleMaterialShader.cpp @@ -37,7 +37,7 @@ void VolumeParticleMaterialShader::Bind(BindParameters& params) // Prepare auto context = params.GPUContext; const RenderView& view = params.RenderContext.View; - auto& drawCall = *params.FirstDrawCall; + auto& drawCall = *params.DrawCall; Span cb(_cbData.Get(), _cbData.Count()); ASSERT_LOW_LAYER(cb.Length() >= sizeof(VolumeParticleMaterialShaderData)); auto materialData = reinterpret_cast(cb.Get()); diff --git a/Source/Engine/Graphics/Models/Mesh.cpp b/Source/Engine/Graphics/Models/Mesh.cpp index f168ccd79..766e08a39 100644 --- a/Source/Engine/Graphics/Models/Mesh.cpp +++ b/Source/Engine/Graphics/Models/Mesh.cpp @@ -382,7 +382,7 @@ void Mesh::Render(GPUContext* context) const context->BindVB(ToSpan((GPUBuffer**)_vertexBuffers, 3)); context->BindIB(_indexBuffer); - context->DrawIndexedInstanced(_triangles * 3, 1, 0, 0, 0); + context->DrawIndexed(_triangles * 3); } void Mesh::Draw(const RenderContext& renderContext, MaterialBase* material, const Matrix& world, StaticFlags flags, bool receiveDecals, DrawPass drawModes, float perInstanceRandom, int8 sortOrder) const diff --git a/Source/Engine/Renderer/Editor/LODPreview.cpp b/Source/Engine/Renderer/Editor/LODPreview.cpp index 2f188bcd0..d3a633405 100644 --- a/Source/Engine/Renderer/Editor/LODPreview.cpp +++ b/Source/Engine/Renderer/Editor/LODPreview.cpp @@ -7,7 +7,6 @@ #include "Engine/Content/Content.h" #include "Engine/Content/Assets/Model.h" #include "Engine/Graphics/GPUDevice.h" -#include "Engine/Graphics/RenderTask.h" #include "Engine/Renderer/DrawCall.h" #include "Engine/Renderer/RenderList.h" #include "Engine/Renderer/GBufferPass.h" @@ -47,7 +46,7 @@ void LODPreviewMaterialShader::Bind(BindParameters& params) { // Find the LOD that produced this draw call int32 lodIndex = 0; - auto& drawCall = *params.FirstDrawCall; + auto& drawCall = *params.DrawCall; const ModelLOD* drawCallModelLod; if (GBufferPass::IndexBufferToModelLOD.TryGet(drawCall.Geometry.IndexBuffer, drawCallModelLod)) { diff --git a/Source/Engine/Renderer/Editor/LightmapUVsDensity.cpp b/Source/Engine/Renderer/Editor/LightmapUVsDensity.cpp index 287fd006c..47fbbe3df 100644 --- a/Source/Engine/Renderer/Editor/LightmapUVsDensity.cpp +++ b/Source/Engine/Renderer/Editor/LightmapUVsDensity.cpp @@ -108,7 +108,7 @@ void LightmapUVsDensityMaterialShader::Bind(BindParameters& params) { // Prepare auto context = params.GPUContext; - auto& drawCall = *params.FirstDrawCall; + auto& drawCall = *params.DrawCall; // Setup auto shader = _shader->GetShader(); diff --git a/Source/Engine/Renderer/Editor/MaterialComplexity.cpp b/Source/Engine/Renderer/Editor/MaterialComplexity.cpp index 1d7534485..afe66a8b2 100644 --- a/Source/Engine/Renderer/Editor/MaterialComplexity.cpp +++ b/Source/Engine/Renderer/Editor/MaterialComplexity.cpp @@ -48,7 +48,7 @@ DrawPass MaterialComplexityMaterialShader::WrapperShader::GetDrawModes() const void MaterialComplexityMaterialShader::WrapperShader::Bind(BindParameters& params) { - auto& drawCall = *params.FirstDrawCall; + auto& drawCall = *params.DrawCall; // Get original material from the draw call IMaterial* material = nullptr; diff --git a/Source/Engine/Renderer/Editor/VertexColors.cpp b/Source/Engine/Renderer/Editor/VertexColors.cpp index 6da039561..1f937439a 100644 --- a/Source/Engine/Renderer/Editor/VertexColors.cpp +++ b/Source/Engine/Renderer/Editor/VertexColors.cpp @@ -61,7 +61,7 @@ void VertexColorsMaterialShader::Bind(BindParameters& params) { // Prepare auto context = params.GPUContext; - auto& drawCall = *params.FirstDrawCall; + auto& drawCall = *params.DrawCall; // Setup auto shader = _shader->GetShader(); diff --git a/Source/Engine/Renderer/GBufferPass.cpp b/Source/Engine/Renderer/GBufferPass.cpp index 500e62f12..4d0826476 100644 --- a/Source/Engine/Renderer/GBufferPass.cpp +++ b/Source/Engine/Renderer/GBufferPass.cpp @@ -509,7 +509,7 @@ void GBufferPass::DrawDecals(RenderContext& renderContext, GPUTextureView* light drawCall.World = decal.World; decal.Material->Bind(bindParams); // TODO: use hardware instancing - context->DrawIndexedInstanced(drawCall.Draw.IndicesCount, 1, 0, 0, 0); + context->DrawIndexed(drawCall.Draw.IndicesCount); } context->ResetSR(); diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp index e76c19b4e..8077049a9 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp @@ -849,6 +849,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co // Clear draw calls list renderContextTiles.List->DrawCalls.Clear(); renderContextTiles.List->BatchedDrawCalls.Clear(); + renderContextTiles.List->ObjectBuffer.Clear(); drawCallsListGBuffer.Indices.Clear(); drawCallsListGBuffer.PreBatchedDrawCalls.Clear(); drawCallsListGBufferNoDecals.Indices.Clear(); diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index b85bbed1b..e96a7a9a9 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -3,6 +3,7 @@ #include "RenderList.h" #include "Engine/Core/Collections/Sorting.h" #include "Engine/Graphics/Materials/IMaterial.h" +#include "Engine/Graphics/Materials/MaterialShader.h" #include "Engine/Graphics/RenderTask.h" #include "Engine/Graphics/GPUContext.h" #include "Engine/Graphics/GPUDevice.h" @@ -21,6 +22,7 @@ static_assert(sizeof(DrawCall) <= 288, "Too big draw call data size."); static_assert(sizeof(DrawCall::Surface) >= sizeof(DrawCall::Terrain), "Wrong draw call data size."); static_assert(sizeof(DrawCall::Surface) >= sizeof(DrawCall::Particle), "Wrong draw call data size."); static_assert(sizeof(DrawCall::Surface) >= sizeof(DrawCall::Custom), "Wrong draw call data size."); +static_assert(sizeof(ShaderObjectData) == sizeof(Float4) * ARRAY_COUNT(ShaderObjectData::Raw), "Wrong object data."); namespace { @@ -34,6 +36,40 @@ namespace CriticalSection MemPoolLocker; } +void ShaderObjectData::Store(const Matrix& worldMatrix, const Matrix& prevWorldMatrix, const Rectangle& lightmapUVsArea, const Float3& geometrySize, float perInstanceRandom, float worldDeterminantSign, float lodDitherFactor) +{ + Half4 lightmapUVsAreaPacked(*(Float4*)&lightmapUVsArea); + Float2 lightmapUVsAreaPackedAliased = *(Float2*)&lightmapUVsAreaPacked; + Raw[0] = Float4(worldMatrix.M11, worldMatrix.M12, worldMatrix.M13, worldMatrix.M41); + Raw[1] = Float4(worldMatrix.M21, worldMatrix.M22, worldMatrix.M23, worldMatrix.M42); + Raw[2] = Float4(worldMatrix.M31, worldMatrix.M32, worldMatrix.M33, worldMatrix.M43); + Raw[3] = Float4(prevWorldMatrix.M11, prevWorldMatrix.M12, prevWorldMatrix.M13, prevWorldMatrix.M41); + Raw[4] = Float4(prevWorldMatrix.M21, prevWorldMatrix.M22, prevWorldMatrix.M23, prevWorldMatrix.M42); + Raw[5] = Float4(prevWorldMatrix.M31, prevWorldMatrix.M32, prevWorldMatrix.M33, prevWorldMatrix.M43); + Raw[6] = Float4(geometrySize, perInstanceRandom); + Raw[7] = Float4(worldDeterminantSign, lodDitherFactor, lightmapUVsAreaPackedAliased.X, lightmapUVsAreaPackedAliased.Y); + // TODO: pack WorldDeterminantSign and LODDitherFactor +} + +void ShaderObjectData::Load(Matrix& worldMatrix, Matrix& prevWorldMatrix, Rectangle& lightmapUVsArea, Float3& geometrySize, float& perInstanceRandom, float& worldDeterminantSign, float& lodDitherFactor) const +{ + worldMatrix.SetRow1(Float4(Float3(Raw[0]), 0.0f)); + worldMatrix.SetRow2(Float4(Float3(Raw[1]), 0.0f)); + worldMatrix.SetRow3(Float4(Float3(Raw[2]), 0.0f)); + worldMatrix.SetRow4(Float4(Raw[0].W, Raw[1].W, Raw[2].W, 1.0f)); + prevWorldMatrix.SetRow1(Float4(Float3(Raw[3]), 0.0f)); + prevWorldMatrix.SetRow2(Float4(Float3(Raw[4]), 0.0f)); + prevWorldMatrix.SetRow3(Float4(Float3(Raw[5]), 0.0f)); + prevWorldMatrix.SetRow4(Float4(Raw[3].W, Raw[4].W, Raw[5].W, 1.0f)); + geometrySize = Float3(Raw[6]); + perInstanceRandom = Raw[6].W; + worldDeterminantSign = Raw[7].X; + lodDitherFactor = Raw[7].Y; + Float2 lightmapUVsAreaPackedAliased(Raw[7].Z, Raw[7].W); + Half4 lightmapUVsAreaPacked(*(Half4*)&lightmapUVsAreaPackedAliased); + *(Float4*)&lightmapUVsArea = lightmapUVsAreaPacked.ToFloat4(); +} + bool RenderLightData::CanRenderShadow(const RenderView& view) const { bool result = false; @@ -406,7 +442,8 @@ RenderList::RenderList(const SpawnParams& params) , AtmosphericFog(nullptr) , Fog(nullptr) , Blendable(32) - , _instanceBuffer(1024 * sizeof(InstanceData), sizeof(InstanceData), TEXT("Instance Buffer")) + , ObjectBuffer(0, PixelFormat::R32G32B32A32_Float, false, TEXT("Object Bufffer")) + , _instanceBuffer(0, sizeof(ShaderObjectDrawInstanceData), TEXT("Instance Buffer")) { } @@ -439,6 +476,7 @@ void RenderList::Clear() Settings = PostProcessSettings(); Blendable.Clear(); _instanceBuffer.Clear(); + ObjectBuffer.Clear(); } struct PackedSortKey @@ -480,18 +518,6 @@ FORCE_INLINE void CalculateSortKey(const RenderContext& renderContext, DrawCall& drawCall.SortKey = key.Data; } -FORCE_INLINE bool CanBatchDrawCalls(const DrawCall& a, const DrawCall& b, DrawPass pass) -{ - IMaterial::InstancingHandler handlerA, handlerB; - return a.Material->CanUseInstancing(handlerA) && - b.Material->CanUseInstancing(handlerB) && - a.InstanceCount != 0 && - b.InstanceCount != 0 && - handlerA.CanBatch == handlerB.CanBatch && - handlerA.CanBatch(a, b, pass) && - a.WorldDeterminantSign * b.WorldDeterminantSign > 0; -} - void RenderList::AddDrawCall(const RenderContext& renderContext, DrawPass drawModes, StaticFlags staticFlags, DrawCall& drawCall, bool receivesDecals, int8 sortOrder) { #if ENABLE_ASSERTION_LOW_LAYERS @@ -586,9 +612,32 @@ void RenderList::AddDrawCall(const RenderContextBatch& renderContextBatch, DrawP } } +void RenderList::BuildObjectsBuffer() { + int32 count = DrawCalls.Count(); + for (const auto& e : BatchedDrawCalls) + count += e.Instances.Count(); + ObjectBuffer.Clear(); + if (count == 0) + return; + PROFILE_CPU(); + ObjectBuffer.Data.Resize(count * sizeof(ShaderObjectData)); + auto* src = (const DrawCall*)DrawCalls.Get(); + auto* dst = (ShaderObjectData*)ObjectBuffer.Data.Get(); + for (int32 i = 0; i < DrawCalls.Count(); i++) { + dst->Store(src[i]); + dst++; } + int32 startIndex = DrawCalls.Count(); + for (auto& batch : BatchedDrawCalls) + { + batch.ObjectsStartIndex = startIndex; + Platform::MemoryCopy(dst, batch.Instances.Get(), batch.Instances.Count() * sizeof(ShaderObjectData)); + dst += batch.Instances.Count(); + startIndex += batch.Instances.Count(); + } + ZoneValue(ObjectBuffer.Data.Count() / 1024); // Objects Buffer size in kB } void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseDistance, DrawCallsList& list, const RenderListBuffer& drawCalls, DrawPass pass, bool stable) @@ -642,15 +691,24 @@ void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseD const DrawCall& drawCall = drawCallsData[listData[i]]; int32 batchSize = 1; int32 instanceCount = drawCall.InstanceCount; - - // Check the following draw calls sequence to merge them - for (int32 j = i + 1; j < listSize; j++) + IMaterial::InstancingHandler drawCallHandler, otherHandler; + if (instanceCount != 0 && drawCall.Material->CanUseInstancing(drawCallHandler)) { - const DrawCall& other = drawCallsData[listData[j]]; - if (!CanBatchDrawCalls(drawCall, other, pass)) - break; - batchSize++; - instanceCount += other.InstanceCount; + // Check the following draw calls sequence to merge them + for (int32 j = i + 1; j < listSize; j++) + { + const DrawCall& other = drawCallsData[listData[j]]; + const bool canBatch = + other.Material->CanUseInstancing(otherHandler) && + other.InstanceCount != 0 && + drawCallHandler.CanBatch == otherHandler.CanBatch && + drawCallHandler.CanBatch(drawCall, other, pass) && + drawCall.WorldDeterminantSign * other.WorldDeterminantSign > 0; + if (!canBatch) + break; + batchSize++; + instanceCount += other.InstanceCount; + } } DrawBatch batch; @@ -677,72 +735,86 @@ FORCE_INLINE bool CanUseInstancing(DrawPass pass) return pass == DrawPass::GBuffer || pass == DrawPass::Depth; } -void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsList& list, const RenderListBuffer& drawCalls, GPUTextureView* input) +FORCE_INLINE bool DrawsEqual(const DrawCall* a, const DrawCall* b) +{ + return a->Geometry.IndexBuffer == b->Geometry.IndexBuffer && + a->Draw.IndicesCount == b->Draw.IndicesCount && + a->Draw.StartIndex == b->Draw.StartIndex && + Platform::MemoryCompare(a->Geometry.VertexBuffers, b->Geometry.VertexBuffers, sizeof(a->Geometry.VertexBuffers) + sizeof(a->Geometry.VertexBuffersOffsets)) == 0; +} + +void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsList& list, RenderList* drawCallsList, GPUTextureView* input) { if (list.IsEmpty()) return; PROFILE_GPU_CPU("Drawing"); - const auto* drawCallsData = drawCalls.Get(); + const auto* drawCallsData = drawCallsList->DrawCalls.Get(); const auto* listData = list.Indices.Get(); const auto* batchesData = list.Batches.Get(); const auto context = GPUDevice::Instance->GetMainContext(); bool useInstancing = list.CanUseInstancing && CanUseInstancing(renderContext.View.Pass) && GPUDevice::Instance->Limits.HasInstancing; TaaJitterRemoveContext taaJitterRemove(renderContext.View); + // Lazy-init objects buffer (if user didn't do it) + if (drawCallsList->ObjectBuffer.Data.IsEmpty()) + { + drawCallsList->BuildObjectsBuffer(); + drawCallsList->ObjectBuffer.Flush(context); + } + // Clear SR slots to prevent any resources binding issues (leftovers from the previous passes) context->ResetSR(); // Prepare instance buffer if (useInstancing) { - int32 instancedBatchesCount = 0; + // Estimate the maximum amount of elements for all instanced draws + int32 instancesCount = 0; for (int32 i = 0; i < list.Batches.Count(); i++) { - auto& batch = batchesData[i]; + const DrawBatch& batch = batchesData[i]; if (batch.BatchSize > 1) - instancedBatchesCount += batch.BatchSize; + instancesCount += batch.BatchSize; } for (int32 i = 0; i < list.PreBatchedDrawCalls.Count(); i++) { - auto& batch = BatchedDrawCalls.Get()[list.PreBatchedDrawCalls.Get()[i]]; - if (batch.Instances.Count() > 1) - instancedBatchesCount += batch.Instances.Count(); + const BatchedDrawCall& batch = BatchedDrawCalls.Get()[list.PreBatchedDrawCalls.Get()[i]]; + instancesCount += batch.Instances.Count(); } - if (instancedBatchesCount != 0) + if (instancesCount != 0) { PROFILE_CPU_NAMED("Build Instancing"); _instanceBuffer.Clear(); - _instanceBuffer.Data.Resize(instancedBatchesCount * sizeof(InstanceData)); - auto instanceData = (InstanceData*)_instanceBuffer.Data.Get(); + _instanceBuffer.Data.Resize(instancesCount * sizeof(ShaderObjectDrawInstanceData)); + auto instanceData = (ShaderObjectDrawInstanceData*)_instanceBuffer.Data.Get(); // Write to instance buffer for (int32 i = 0; i < list.Batches.Count(); i++) { - auto& batch = batchesData[i]; + const DrawBatch& batch = batchesData[i]; if (batch.BatchSize > 1) { - IMaterial::InstancingHandler handler; - drawCallsData[listData[batch.StartIndex]].Material->CanUseInstancing(handler); for (int32 j = 0; j < batch.BatchSize; j++) { - auto& drawCall = drawCallsData[listData[batch.StartIndex + j]]; - handler.WriteDrawCall(instanceData, drawCall); + instanceData->ObjectIndex = listData[batch.StartIndex + j]; instanceData++; } } } for (int32 i = 0; i < list.PreBatchedDrawCalls.Count(); i++) { - auto& batch = BatchedDrawCalls.Get()[list.PreBatchedDrawCalls.Get()[i]]; - if (batch.Instances.Count() > 1) + const BatchedDrawCall& batch = BatchedDrawCalls.Get()[list.PreBatchedDrawCalls.Get()[i]]; + for (int32 j = 0; j < batch.Instances.Count(); j++) { - Platform::MemoryCopy(instanceData, batch.Instances.Get(), batch.Instances.Count() * sizeof(InstanceData)); - instanceData += batch.Instances.Count(); + instanceData->ObjectIndex = batch.ObjectsStartIndex + j; + instanceData++; } } + ASSERT((byte*)instanceData == _instanceBuffer.Data.end()); // Upload data _instanceBuffer.Flush(context); + ZoneValue(instancesCount); } else { @@ -752,132 +824,122 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL } // Execute draw calls - int32 draws = list.Batches.Count(); + int32 materialBinds = list.Batches.Count(); MaterialBase::BindParameters bindParams(context, renderContext); + bindParams.ObjectBuffer = drawCallsList->ObjectBuffer.GetBuffer()->View(); bindParams.Input = input; bindParams.BindViewData(); + MaterialShaderDataPerDraw perDraw; + perDraw.DrawPadding = Float3::Zero; + GPUConstantBuffer* perDrawCB = IMaterial::BindParameters::PerDrawConstants; + context->BindCB(2, perDrawCB); // TODO: use rootSignature/pushConstants on D3D12/Vulkan + constexpr int32 vbMax = ARRAY_COUNT(DrawCall::Geometry.VertexBuffers); if (useInstancing) { + GPUBuffer* vb[vbMax + 1]; + uint32 vbOffsets[vbMax + 1]; + vb[3] = _instanceBuffer.GetBuffer(); // Pass object index in a vertex stream at slot 3 (used by VS in Surface.shader) + vbOffsets[3] = 0; int32 instanceBufferOffset = 0; - GPUBuffer* vb[4]; - uint32 vbOffsets[4]; for (int32 i = 0; i < list.Batches.Count(); i++) { - auto& batch = batchesData[i]; - const DrawCall& drawCall = drawCallsData[listData[batch.StartIndex]]; + const DrawBatch& batch = batchesData[i]; + uint32 drawCallIndex = listData[batch.StartIndex]; + const DrawCall& drawCall = drawCallsData[drawCallIndex]; - int32 vbCount = 0; - while (vbCount < ARRAY_COUNT(drawCall.Geometry.VertexBuffers) && drawCall.Geometry.VertexBuffers[vbCount]) + bindParams.Instanced = batch.BatchSize != 1; + bindParams.DrawCall = &drawCall; + bindParams.DrawCall->Material->Bind(bindParams); + + if (bindParams.Instanced) { - vb[vbCount] = drawCall.Geometry.VertexBuffers[vbCount]; - vbOffsets[vbCount] = drawCall.Geometry.VertexBuffersOffsets[vbCount]; - vbCount++; - } - for (int32 j = vbCount; j < ARRAY_COUNT(drawCall.Geometry.VertexBuffers); j++) - { - vb[vbCount] = nullptr; - vbOffsets[vbCount] = 0; - } + // One or more draw calls per batch + const DrawCall* activeDraw = &drawCall; + int32 activeCount = 1; + for (int32 j = 1; j <= batch.BatchSize; j++) + { + if (j != batch.BatchSize && DrawsEqual(activeDraw, drawCallsData + listData[batch.StartIndex + j])) + { + // Group two draw calls into active draw call + activeCount++; + continue; + } - bindParams.FirstDrawCall = &drawCall; - bindParams.DrawCallsCount = batch.BatchSize; - drawCall.Material->Bind(bindParams); + // Draw whole active draw (instanced) + Platform::MemoryCopy(vb, activeDraw->Geometry.VertexBuffers, sizeof(DrawCall::Geometry.VertexBuffers)); + Platform::MemoryCopy(vbOffsets, activeDraw->Geometry.VertexBuffersOffsets, sizeof(DrawCall::Geometry.VertexBuffersOffsets)); + context->BindIB(activeDraw->Geometry.IndexBuffer); + context->BindVB(ToSpan(vb, ARRAY_COUNT(vb)), vbOffsets); + context->DrawIndexedInstanced(activeDraw->Draw.IndicesCount, activeCount, instanceBufferOffset, 0, activeDraw->Draw.StartIndex); + instanceBufferOffset += activeCount; - context->BindIB(drawCall.Geometry.IndexBuffer); - - if (drawCall.InstanceCount == 0) - { - // No support for batching indirect draw calls - ASSERT_LOW_LAYER(batch.BatchSize == 1); - - context->BindVB(ToSpan(vb, vbCount), vbOffsets); - context->DrawIndexedInstancedIndirect(drawCall.Draw.IndirectArgsBuffer, drawCall.Draw.IndirectArgsOffset); + // Reset active draw + activeDraw = drawCallsData + listData[batch.StartIndex + j]; + activeCount = 1; + } } else { - if (batch.BatchSize == 1) + // Pass object index in constant buffer + perDraw.DrawObjectIndex = drawCallIndex; + context->UpdateCB(perDrawCB, &perDraw); + + // Single-draw call batch + context->BindIB(drawCall.Geometry.IndexBuffer); + context->BindVB(ToSpan(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets); + if (drawCall.InstanceCount == 0) { - context->BindVB(ToSpan(vb, vbCount), vbOffsets); - context->DrawIndexedInstanced(drawCall.Draw.IndicesCount, batch.InstanceCount, 0, 0, drawCall.Draw.StartIndex); + context->DrawIndexedInstancedIndirect(drawCall.Draw.IndirectArgsBuffer, drawCall.Draw.IndirectArgsOffset); } else { - vbCount = 3; - vb[vbCount] = _instanceBuffer.GetBuffer(); - vbOffsets[vbCount] = 0; - vbCount++; - context->BindVB(ToSpan(vb, vbCount), vbOffsets); - context->DrawIndexedInstanced(drawCall.Draw.IndicesCount, batch.InstanceCount, instanceBufferOffset, 0, drawCall.Draw.StartIndex); - instanceBufferOffset += batch.BatchSize; + context->DrawIndexedInstanced(drawCall.Draw.IndicesCount, batch.InstanceCount, 0, 0, drawCall.Draw.StartIndex); } } } for (int32 i = 0; i < list.PreBatchedDrawCalls.Count(); i++) { - auto& batch = BatchedDrawCalls.Get()[list.PreBatchedDrawCalls.Get()[i]]; - auto& drawCall = batch.DrawCall; + const BatchedDrawCall& batch = BatchedDrawCalls.Get()[list.PreBatchedDrawCalls.Get()[i]]; + const DrawCall& drawCall = batch.DrawCall; - int32 vbCount = 0; - while (vbCount < ARRAY_COUNT(drawCall.Geometry.VertexBuffers) && drawCall.Geometry.VertexBuffers[vbCount]) - { - vb[vbCount] = drawCall.Geometry.VertexBuffers[vbCount]; - vbOffsets[vbCount] = drawCall.Geometry.VertexBuffersOffsets[vbCount]; - vbCount++; - } - for (int32 j = vbCount; j < ARRAY_COUNT(drawCall.Geometry.VertexBuffers); j++) - { - vb[vbCount] = nullptr; - vbOffsets[vbCount] = 0; - } - - bindParams.FirstDrawCall = &drawCall; - bindParams.DrawCallsCount = batch.Instances.Count(); - drawCall.Material->Bind(bindParams); + bindParams.Instanced = true; + bindParams.DrawCall = &drawCall; + bindParams.DrawCall->Material->Bind(bindParams); + Platform::MemoryCopy(vb, drawCall.Geometry.VertexBuffers, sizeof(DrawCall::Geometry.VertexBuffers)); + Platform::MemoryCopy(vbOffsets, drawCall.Geometry.VertexBuffersOffsets, sizeof(DrawCall::Geometry.VertexBuffersOffsets)); context->BindIB(drawCall.Geometry.IndexBuffer); + context->BindVB(ToSpan(vb, vbMax + 1), vbOffsets); if (drawCall.InstanceCount == 0) { - ASSERT_LOW_LAYER(batch.Instances.Count() == 1); - context->BindVB(ToSpan(vb, vbCount), vbOffsets); context->DrawIndexedInstancedIndirect(drawCall.Draw.IndirectArgsBuffer, drawCall.Draw.IndirectArgsOffset); } else { - if (batch.Instances.Count() == 1) - { - context->BindVB(ToSpan(vb, vbCount), vbOffsets); - context->DrawIndexedInstanced(drawCall.Draw.IndicesCount, batch.Instances.Count(), 0, 0, drawCall.Draw.StartIndex); - } - else - { - vbCount = 3; - vb[vbCount] = _instanceBuffer.GetBuffer(); - vbOffsets[vbCount] = 0; - vbCount++; - context->BindVB(ToSpan(vb, vbCount), vbOffsets); - context->DrawIndexedInstanced(drawCall.Draw.IndicesCount, batch.Instances.Count(), instanceBufferOffset, 0, drawCall.Draw.StartIndex); - instanceBufferOffset += batch.Instances.Count(); - } + context->DrawIndexedInstanced(drawCall.Draw.IndicesCount, batch.Instances.Count(), instanceBufferOffset, 0, drawCall.Draw.StartIndex); + instanceBufferOffset += batch.Instances.Count(); } } - draws += list.PreBatchedDrawCalls.Count(); + materialBinds += list.PreBatchedDrawCalls.Count(); } else { - bindParams.DrawCallsCount = 1; for (int32 i = 0; i < list.Batches.Count(); i++) { - auto& batch = batchesData[i]; + const DrawBatch& batch = batchesData[i]; + + bindParams.DrawCall = drawCallsData + listData[batch.StartIndex]; + bindParams.DrawCall->Material->Bind(bindParams); for (int32 j = 0; j < batch.BatchSize; j++) { - const DrawCall& drawCall = drawCalls[listData[batch.StartIndex + j]]; - bindParams.FirstDrawCall = &drawCall; - drawCall.Material->Bind(bindParams); + perDraw.DrawObjectIndex = listData[batch.StartIndex + j]; + context->UpdateCB(perDrawCB, &perDraw); + const DrawCall& drawCall = drawCallsData[perDraw.DrawObjectIndex]; context->BindIB(drawCall.Geometry.IndexBuffer); - context->BindVB(ToSpan(drawCall.Geometry.VertexBuffers, 3), drawCall.Geometry.VertexBuffersOffsets); + context->BindVB(ToSpan(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets); if (drawCall.InstanceCount == 0) { @@ -891,43 +953,38 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL } for (int32 i = 0; i < list.PreBatchedDrawCalls.Count(); i++) { - auto& batch = BatchedDrawCalls.Get()[list.PreBatchedDrawCalls.Get()[i]]; - auto drawCall = batch.DrawCall; - drawCall.ObjectRadius = 0.0f; - bindParams.FirstDrawCall = &drawCall; - const auto* instancesData = batch.Instances.Get(); + const BatchedDrawCall& batch = BatchedDrawCalls.Get()[list.PreBatchedDrawCalls.Get()[i]]; + const DrawCall& drawCall = batch.DrawCall; + + bindParams.DrawCall = &drawCall; + bindParams.DrawCall->Material->Bind(bindParams); + + context->BindIB(drawCall.Geometry.IndexBuffer); + context->BindVB(ToSpan(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets); for (int32 j = 0; j < batch.Instances.Count(); j++) { - auto& instance = instancesData[j]; - drawCall.ObjectPosition = instance.InstanceOrigin; - drawCall.PerInstanceRandom = instance.PerInstanceRandom; - auto lightmapArea = instance.InstanceLightmapArea.ToFloat4(); - drawCall.Surface.LightmapUVsArea = *(Rectangle*)&lightmapArea; - drawCall.Surface.LODDitherFactor = instance.LODDitherFactor; - drawCall.World.SetRow1(Float4(instance.InstanceTransform1, 0.0f)); - drawCall.World.SetRow2(Float4(instance.InstanceTransform2, 0.0f)); - drawCall.World.SetRow3(Float4(instance.InstanceTransform3, 0.0f)); - drawCall.World.SetRow4(Float4(instance.InstanceOrigin, 1.0f)); - drawCall.Material->Bind(bindParams); + perDraw.DrawObjectIndex = batch.ObjectsStartIndex + j; + context->UpdateCB(perDrawCB, &perDraw); - context->BindIB(drawCall.Geometry.IndexBuffer); - context->BindVB(ToSpan(drawCall.Geometry.VertexBuffers, 3), drawCall.Geometry.VertexBuffersOffsets); context->DrawIndexedInstanced(drawCall.Draw.IndicesCount, drawCall.InstanceCount, 0, 0, drawCall.Draw.StartIndex); } - draws += batch.Instances.Count(); } + materialBinds += list.PreBatchedDrawCalls.Count(); if (list.Batches.IsEmpty() && list.Indices.Count() != 0) { - // Draw calls list has nto been batched so execute draw calls separately + // Draw calls list has bot been batched so execute draw calls separately for (int32 j = 0; j < list.Indices.Count(); j++) { - const DrawCall& drawCall = drawCalls[listData[j]]; - bindParams.FirstDrawCall = &drawCall; + perDraw.DrawObjectIndex = listData[j]; + context->UpdateCB(perDrawCB, &perDraw); + + const DrawCall& drawCall = drawCallsData[perDraw.DrawObjectIndex]; + bindParams.DrawCall = &drawCall; drawCall.Material->Bind(bindParams); context->BindIB(drawCall.Geometry.IndexBuffer); - context->BindVB(ToSpan(drawCall.Geometry.VertexBuffers, 3), drawCall.Geometry.VertexBuffersOffsets); + context->BindVB(ToSpan(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets); if (drawCall.InstanceCount == 0) { @@ -938,10 +995,10 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL context->DrawIndexedInstanced(drawCall.Draw.IndicesCount, drawCall.InstanceCount, 0, 0, drawCall.Draw.StartIndex); } } - draws += list.Indices.Count(); + materialBinds += list.Indices.Count(); } } - ZoneValue(draws); + ZoneValue(materialBinds); // Material shaders bindings count } void SurfaceDrawCallHandler::GetHash(const DrawCall& drawCall, uint32& batchKey) @@ -971,14 +1028,3 @@ bool SurfaceDrawCallHandler::CanBatch(const DrawCall& a, const DrawCall& b, Draw } return false; } - -void SurfaceDrawCallHandler::WriteDrawCall(InstanceData* instanceData, const DrawCall& drawCall) -{ - instanceData->InstanceOrigin = Float3(drawCall.World.M41, drawCall.World.M42, drawCall.World.M43); - instanceData->PerInstanceRandom = drawCall.PerInstanceRandom; - instanceData->InstanceTransform1 = Float3(drawCall.World.M11, drawCall.World.M12, drawCall.World.M13); - instanceData->LODDitherFactor = drawCall.Surface.LODDitherFactor; - instanceData->InstanceTransform2 = Float3(drawCall.World.M21, drawCall.World.M22, drawCall.World.M23); - instanceData->InstanceTransform3 = Float3(drawCall.World.M31, drawCall.World.M32, drawCall.World.M33); - instanceData->InstanceLightmapArea = Half4(drawCall.Surface.LightmapUVsArea); -} diff --git a/Source/Engine/Renderer/RenderList.h b/Source/Engine/Renderer/RenderList.h index 5b8c66647..aa484fceb 100644 --- a/Source/Engine/Renderer/RenderList.h +++ b/Source/Engine/Renderer/RenderList.h @@ -239,7 +239,8 @@ struct DrawBatch struct BatchedDrawCall { DrawCall DrawCall; - Array Instances; + uint16 ObjectsStartIndex = 0; // Index of the instances start in the ObjectsBuffer (set internally). + Array Instances; }; /// @@ -413,6 +414,11 @@ public: /// Float3 FrustumCornersVs[8]; + /// + /// Objects buffer that contains ShaderObjectData for each DrawCall. + /// + DynamicTypedBuffer ObjectBuffer; + private: DynamicVertexBuffer _instanceBuffer; @@ -517,6 +523,11 @@ public: /// Object sorting key. void AddDrawCall(const RenderContextBatch& renderContextBatch, DrawPass drawModes, StaticFlags staticFlags, ShadowsCastingMode shadowsMode, const BoundingSphere& bounds, DrawCall& drawCall, bool receivesDecals = true, int8 sortOrder = 0); + /// + /// Writes all draw calls into large objects buffer (used for random-access object data access on a GPU). Can be executed in async. + /// + void BuildObjectsBuffer(); + /// /// Sorts the collected draw calls list. /// @@ -549,7 +560,7 @@ public: /// The input scene color. It's optional and used in forward/postFx rendering. API_FUNCTION() FORCE_INLINE void ExecuteDrawCalls(API_PARAM(Ref) const RenderContext& renderContext, DrawCallsListType listType, GPUTextureView* input = nullptr) { - ExecuteDrawCalls(renderContext, DrawCallsLists[(int32)listType], DrawCalls, input); + ExecuteDrawCalls(renderContext, DrawCallsLists[(int32)listType], this, input); } /// @@ -560,7 +571,7 @@ public: /// The input scene color. It's optional and used in forward/postFx rendering. FORCE_INLINE void ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsList& list, GPUTextureView* input = nullptr) { - ExecuteDrawCalls(renderContext, list, DrawCalls, input); + ExecuteDrawCalls(renderContext, list, this, input); } /// @@ -568,28 +579,43 @@ public: /// /// The rendering context. /// The collected draw calls indices list. - /// The collected draw calls list. + /// The collected draw calls list owner. /// The input scene color. It's optional and used in forward/postFx rendering. - void ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsList& list, const RenderListBuffer& drawCalls, GPUTextureView* input); + void ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsList& list, RenderList* drawCallsList, GPUTextureView* input); }; /// -/// Represents data per instance element used for instanced rendering. +/// Represents a single object information for GPU rendering. /// -PACK_STRUCT(struct FLAXENGINE_API InstanceData +GPU_CB_STRUCT(ShaderObjectData { - Float3 InstanceOrigin; - float PerInstanceRandom; - Float3 InstanceTransform1; - float LODDitherFactor; - Float3 InstanceTransform2; - Float3 InstanceTransform3; - Half4 InstanceLightmapArea; + Float4 Raw[8]; + + void FLAXENGINE_API Store(const Matrix& worldMatrix, const Matrix& prevWorldMatrix, const Rectangle& lightmapUVsArea, const Float3& geometrySize, float perInstanceRandom = 0.0f, float worldDeterminantSign = 1.0f, float lodDitherFactor = 0.0f); + void FLAXENGINE_API Load(Matrix& worldMatrix, Matrix& prevWorldMatrix, Rectangle& lightmapUVsArea, Float3& geometrySize, float& perInstanceRandom, float& worldDeterminantSign, float& lodDitherFactor) const; + + FORCE_INLINE void Store(const DrawCall& drawCall) + { + Store(drawCall.World, drawCall.Surface.PrevWorld, drawCall.Surface.LightmapUVsArea, drawCall.Surface.GeometrySize, drawCall.PerInstanceRandom, drawCall.WorldDeterminantSign, drawCall.Surface.LODDitherFactor); + } + + FORCE_INLINE void Load(DrawCall& drawCall) const + { + Load(drawCall.World, drawCall.Surface.PrevWorld, drawCall.Surface.LightmapUVsArea, drawCall.Surface.GeometrySize, drawCall.PerInstanceRandom, drawCall.WorldDeterminantSign, drawCall.Surface.LODDitherFactor); + drawCall.ObjectPosition = drawCall.World.GetTranslation(); + } + }); + +/// +/// Represents data passed to Vertex Shader used for instanced rendering (per-instance element). +/// +PACK_STRUCT(struct ShaderObjectDrawInstanceData + { + uint32 ObjectIndex; }); struct SurfaceDrawCallHandler { static void GetHash(const DrawCall& drawCall, uint32& batchKey); static bool CanBatch(const DrawCall& a, const DrawCall& b, DrawPass pass); - static void WriteDrawCall(InstanceData* instanceData, const DrawCall& drawCall); }; diff --git a/Source/Engine/Renderer/Renderer.cpp b/Source/Engine/Renderer/Renderer.cpp index d6fbb17cc..d4e5b2590 100644 --- a/Source/Engine/Renderer/Renderer.cpp +++ b/Source/Engine/Renderer/Renderer.cpp @@ -428,6 +428,9 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont // Sort draw calls { PROFILE_CPU_NAMED("Sort Draw Calls"); + // TODO: run all of these functions in async via jobs + for (int32 i = 0; i < renderContextBatch.Contexts.Count(); i++) + renderContextBatch.Contexts[i].List->BuildObjectsBuffer(); renderContext.List->SortDrawCalls(renderContext, false, DrawCallsListType::GBuffer); renderContext.List->SortDrawCalls(renderContext, false, DrawCallsListType::GBufferNoDecals); renderContext.List->SortDrawCalls(renderContext, true, DrawCallsListType::Forward); @@ -440,6 +443,11 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont shadowContext.List->SortDrawCalls(shadowContext, false, DrawCallsListType::Depth, DrawPass::Depth); shadowContext.List->SortDrawCalls(shadowContext, false, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls, DrawPass::Depth); } + { + PROFILE_CPU_NAMED("FlushObjectsBuffer"); + for (int32 i = 0; i < renderContextBatch.Contexts.Count(); i++) + renderContextBatch.Contexts[i].List->ObjectBuffer.Flush(context); + } } // Get the light accumulation buffer diff --git a/Source/Engine/Renderer/ShadowsPass.cpp b/Source/Engine/Renderer/ShadowsPass.cpp index 15ac7a097..382c0be55 100644 --- a/Source/Engine/Renderer/ShadowsPass.cpp +++ b/Source/Engine/Renderer/ShadowsPass.cpp @@ -1390,7 +1390,7 @@ void ShadowsPass::RenderShadowMaps(RenderContextBatch& renderContextBatch) if (!shadowContextStatic.List->DrawCallsLists[(int32)DrawCallsListType::Depth].IsEmpty() || !shadowContextStatic.List->ShadowDepthDrawCallsList.IsEmpty()) { shadowContextStatic.List->ExecuteDrawCalls(shadowContextStatic, DrawCallsListType::Depth); - shadowContextStatic.List->ExecuteDrawCalls(shadowContextStatic, shadowContextStatic.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls, nullptr); + shadowContextStatic.List->ExecuteDrawCalls(shadowContextStatic, shadowContextStatic.List->ShadowDepthDrawCallsList, renderContext.List, nullptr); tile.HasStaticGeometry = true; } } @@ -1452,7 +1452,7 @@ void ShadowsPass::RenderShadowMaps(RenderContextBatch& renderContextBatch) // Draw objects depth auto& shadowContext = renderContextBatch.Contexts[atlasLight.ContextIndex + contextIndex++]; shadowContext.List->ExecuteDrawCalls(shadowContext, DrawCallsListType::Depth); - shadowContext.List->ExecuteDrawCalls(shadowContext, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls, nullptr); + shadowContext.List->ExecuteDrawCalls(shadowContext, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List, nullptr); if (atlasLight.HasStaticShadowContext) { auto& shadowContextStatic = renderContextBatch.Contexts[atlasLight.ContextIndex + contextIndex++]; @@ -1462,7 +1462,7 @@ void ShadowsPass::RenderShadowMaps(RenderContextBatch& renderContextBatch) { // Draw static objects directly to the shadow map shadowContextStatic.List->ExecuteDrawCalls(shadowContextStatic, DrawCallsListType::Depth); - shadowContextStatic.List->ExecuteDrawCalls(shadowContextStatic, shadowContextStatic.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls, nullptr); + shadowContextStatic.List->ExecuteDrawCalls(shadowContextStatic, shadowContextStatic.List->ShadowDepthDrawCallsList, renderContext.List, nullptr); } tile.HasStaticGeometry = true; } diff --git a/Source/Engine/Renderer/VolumetricFogPass.cpp b/Source/Engine/Renderer/VolumetricFogPass.cpp index 5a6fe1d02..2b9a4b441 100644 --- a/Source/Engine/Renderer/VolumetricFogPass.cpp +++ b/Source/Engine/Renderer/VolumetricFogPass.cpp @@ -408,7 +408,6 @@ void VolumetricFogPass::Render(RenderContext& renderContext) InitCircleBuffer(); MaterialBase::BindParameters bindParams(context, renderContext); - bindParams.DrawCallsCount = 1; CustomData customData; customData.Shader = _shader->GetShader(); customData.GridSize = cache.GridSize; @@ -435,7 +434,7 @@ void VolumetricFogPass::Render(RenderContext& renderContext) // Setup material shader data customData.ParticleIndex = drawCall.Particle.VolumetricFog.ParticleIndex; - bindParams.FirstDrawCall = &drawCall; + bindParams.DrawCall = &drawCall; drawCall.Material->Bind(bindParams); // Setup volumetric shader data diff --git a/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.cpp b/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.cpp index 8cacddf51..02b5ded87 100644 --- a/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.cpp +++ b/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.cpp @@ -461,7 +461,7 @@ bool MaterialGenerator::Generate(WriteStream& source, MaterialInfo& materialInfo switch (baseLayer->Domain) { case MaterialDomain::Surface: - srv = 2; // Skinning Bones + Prev Bones + srv = 3; // Objects + Skinning Bones + Prev Bones break; case MaterialDomain::Decal: srv = 1; // Depth buffer diff --git a/Source/Shaders/MaterialCommon.hlsl b/Source/Shaders/MaterialCommon.hlsl index 0660e5cf9..04e3a037e 100644 --- a/Source/Shaders/MaterialCommon.hlsl +++ b/Source/Shaders/MaterialCommon.hlsl @@ -58,6 +58,9 @@ #ifndef USE_PER_VIEW_CONSTANTS #define USE_PER_VIEW_CONSTANTS 0 #endif +#ifndef USE_PER_DRAW_CONSTANTS +#define USE_PER_DRAW_CONSTANTS 0 +#endif #ifndef MATERIAL_TESSELLATION #define MATERIAL_TESSELLATION MATERIAL_TESSELLATION_NONE #endif @@ -68,6 +71,65 @@ #define PER_BONE_MOTION_BLUR 0 #endif +// Object properties +struct ObjectData +{ + float4x4 WorldMatrix; + float4x4 PrevWorldMatrix; + float3 GeometrySize; + float WorldDeterminantSign; + float LODDitherFactor; + float PerInstanceRandom; + float4 LightmapArea; +}; + +float2 UnpackHalf2(uint xy) +{ + return float2(f16tof32(xy & 0xffff), f16tof32(xy >> 16)); +} + +// Loads the object data from the global buffer +ObjectData LoadObject(Buffer objectsBuffer, uint objectIndex) +{ + // This must match ShaderObjectData::Store + objectIndex *= 8; + ObjectData object = (ObjectData)0; + float4 vector0 = objectsBuffer.Load(objectIndex + 0); + float4 vector1 = objectsBuffer.Load(objectIndex + 1); + float4 vector2 = objectsBuffer.Load(objectIndex + 2); + float4 vector3 = objectsBuffer.Load(objectIndex + 3); + float4 vector4 = objectsBuffer.Load(objectIndex + 4); + float4 vector5 = objectsBuffer.Load(objectIndex + 5); + float4 vector6 = objectsBuffer.Load(objectIndex + 6); + float4 vector7 = objectsBuffer.Load(objectIndex + 7); + object.WorldMatrix[0] = float4(vector0.xyz, 0.0f); + object.WorldMatrix[1] = float4(vector1.xyz, 0.0f); + object.WorldMatrix[2] = float4(vector2.xyz, 0.0f); + object.WorldMatrix[3] = float4(vector0.w, vector1.w, vector2.w, 1.0f); + object.PrevWorldMatrix[0] = float4(vector3.xyz, 0.0f); + object.PrevWorldMatrix[1] = float4(vector4.xyz, 0.0f); + object.PrevWorldMatrix[2] = float4(vector5.xyz, 0.0f); + object.PrevWorldMatrix[3] = float4(vector3.w, vector4.w, vector5.w, 1.0f); + object.GeometrySize = vector6.xyz; + object.PerInstanceRandom = vector6.w; + object.WorldDeterminantSign = vector7.x; + object.LODDitherFactor = vector7.y; + object.LightmapArea.xy = UnpackHalf2(asuint(vector7.z)); + object.LightmapArea.zw = UnpackHalf2(asuint(vector7.w)); + return object; +} + +// Loads the object data from the constant buffer into the variable +#define LoadObjectFromCB(var) \ + var = (ObjectData)0; \ + var.WorldMatrix = ToMatrix4x4(WorldMatrix); \ + var.PrevWorldMatrix = ToMatrix4x4(PrevWorldMatrix); \ + var.GeometrySize = GeometrySize; \ + var.PerInstanceRandom = PerInstanceRandom; \ + var.WorldDeterminantSign = WorldDeterminantSign; \ + var.LODDitherFactor = LODDitherFactor; \ + var.LightmapArea = LightmapArea; + // Material properties struct Material { @@ -110,6 +172,15 @@ cbuffer ViewData : register(b1) }; #endif +// Draw pipeline constant buffer (with per-draw constants at slot 2) +#if USE_PER_DRAW_CONSTANTS +cbuffer DrawData : register(b2) +{ + float3 DrawPadding; + uint DrawObjectIndex; +}; +#endif + struct ModelInput { float3 Position : POSITION; @@ -121,11 +192,7 @@ struct ModelInput half4 Color : COLOR; #endif #if USE_INSTANCING - float4 InstanceOrigin : ATTRIBUTE0; // .w contains PerInstanceRandom - float4 InstanceTransform1 : ATTRIBUTE1; // .w contains LODDitherFactor - float3 InstanceTransform2 : ATTRIBUTE2; - float3 InstanceTransform3 : ATTRIBUTE3; - half4 InstanceLightmapArea : ATTRIBUTE4; + uint ObjectIndex : ATTRIBUTE0; #endif }; @@ -133,11 +200,7 @@ struct ModelInput_PosOnly { float3 Position : POSITION; #if USE_INSTANCING - float4 InstanceOrigin : ATTRIBUTE0; // .w contains PerInstanceRandom - float4 InstanceTransform1 : ATTRIBUTE1; // .w contains LODDitherFactor - float3 InstanceTransform2 : ATTRIBUTE2; - float3 InstanceTransform3 : ATTRIBUTE3; - half4 InstanceLightmapArea : ATTRIBUTE4; + uint ObjectIndex : ATTRIBUTE0; #endif }; From ef129a31353bdaa60202707b9840378142ec5c8d Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 2 Jul 2024 00:53:03 +0200 Subject: [PATCH 196/292] Add frustum culling for editor preview camera mesh --- Source/Engine/Level/Actors/Camera.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/Source/Engine/Level/Actors/Camera.cpp b/Source/Engine/Level/Actors/Camera.cpp index b2affbc2a..675d57664 100644 --- a/Source/Engine/Level/Actors/Camera.cpp +++ b/Source/Engine/Level/Actors/Camera.cpp @@ -320,6 +320,7 @@ bool Camera::HasContentLoaded() const void Camera::Draw(RenderContext& renderContext) { if (EnumHasAnyFlags(renderContext.View.Flags, ViewFlags::EditorSprites) + && renderContext.View.CullingFrustum.Intersects(_previewModelBox) && _previewModel && _previewModel->IsLoaded()) { From fbc648302dbc4b761f24ae6b4fabcf5f3be14088 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 2 Jul 2024 00:53:45 +0200 Subject: [PATCH 197/292] Fix timer queries reset before use on Vulkan --- Source/Engine/GraphicsDevice/Vulkan/Config.h | 2 +- .../GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp | 18 +++++----- .../GraphicsDevice/Vulkan/GPUDeviceVulkan.h | 5 ++- .../Vulkan/GPUTimerQueryVulkan.cpp | 33 +++++++------------ 4 files changed, 25 insertions(+), 33 deletions(-) diff --git a/Source/Engine/GraphicsDevice/Vulkan/Config.h b/Source/Engine/GraphicsDevice/Vulkan/Config.h index 16a19030e..e3cf01dba 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/Config.h +++ b/Source/Engine/GraphicsDevice/Vulkan/Config.h @@ -28,7 +28,7 @@ #define VULKAN_RESOURCE_DELETE_SAFE_FRAMES_COUNT 20 #define VULKAN_ENABLE_API_DUMP 0 -#define VULKAN_RESET_QUERY_POOLS 0 +#define VULKAN_RESET_QUERY_POOLS 1 #define VULKAN_HASH_POOLS_WITH_LAYOUT_TYPES 1 #define VULKAN_USE_DEBUG_LAYER GPU_ENABLE_DIAGNOSTICS #define VULKAN_USE_DEBUG_DATA (GPU_ENABLE_DIAGNOSTICS && COMPILE_WITH_DEV_ENV) diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp index febeb22a1..9b6d4ba2c 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp @@ -594,8 +594,6 @@ RenderPassVulkan::~RenderPassVulkan() QueryPoolVulkan::QueryPoolVulkan(GPUDeviceVulkan* device, int32 capacity, VkQueryType type) : _device(device) , _handle(VK_NULL_HANDLE) - , _count(0) - , _capacity(capacity) , _type(type) { VkQueryPoolCreateInfo createInfo; @@ -603,9 +601,11 @@ QueryPoolVulkan::QueryPoolVulkan(GPUDeviceVulkan* device, int32 capacity, VkQuer createInfo.queryType = type; createInfo.queryCount = capacity; VALIDATE_VULKAN_RESULT(vkCreateQueryPool(device->Device, &createInfo, nullptr, &_handle)); + #if VULKAN_RESET_QUERY_POOLS + // New queries have to be reset before use + ResetBeforeUse = true; _resetRanges.Add(Range{ 0, static_cast(capacity) }); - device->QueriesToReset.Add(this); #endif } @@ -626,6 +626,7 @@ void QueryPoolVulkan::Reset(CmdBufferVulkan* cmdBuffer) vkCmdResetQueryPool(cmdBuffer->GetHandle(), _handle, range.Start, range.Count); } _resetRanges.Clear(); + ResetBeforeUse = false; } #endif @@ -640,9 +641,9 @@ BufferedQueryPoolVulkan::BufferedQueryPoolVulkan(GPUDeviceVulkan* device, int32 _readResultsBits.AddZeroed((capacity + 63) / 64); } -bool BufferedQueryPoolVulkan::AcquireQuery(uint32& resultIndex) +bool BufferedQueryPoolVulkan::AcquireQuery(CmdBufferVulkan* cmdBuffer, uint32& resultIndex) { - const uint64 allUsedMask = (uint64)-1; + const uint64 allUsedMask = MAX_uint64; for (int32 wordIndex = _lastBeginIndex / 64; wordIndex < _usedQueryBits.Count(); wordIndex++) { uint64 beginQueryWord = _usedQueryBits[wordIndex]; @@ -659,10 +660,11 @@ bool BufferedQueryPoolVulkan::AcquireQuery(uint32& resultIndex) _usedQueryBits[wordIndex] = _usedQueryBits[wordIndex] | bit; _readResultsBits[wordIndex] &= ~bit; _lastBeginIndex = resultIndex + 1; + if (ResetBeforeUse) + Reset(cmdBuffer); return true; } } - return false; } @@ -675,7 +677,7 @@ void BufferedQueryPoolVulkan::ReleaseQuery(uint32 queryIndex) if (queryIndex < (uint32)_lastBeginIndex) { // Use the lowest word available - const uint64 allUsedMask = (uint64)-1; + const uint64 allUsedMask = MAX_uint64; const int32 lastQueryWord = _lastBeginIndex / 64; if (lastQueryWord < _usedQueryBits.Count() && _usedQueryBits[lastQueryWord] == allUsedMask) { @@ -736,7 +738,7 @@ bool BufferedQueryPoolVulkan::GetResults(GPUContextVulkan* context, uint32 index bool BufferedQueryPoolVulkan::HasRoom() const { - const uint64 allUsedMask = (uint64)-1; + const uint64 allUsedMask = MAX_uint64; if (_lastBeginIndex < _usedQueryBits.Count() * 64) { ASSERT((_usedQueryBits[_lastBeginIndex / 64] & allUsedMask) != allUsedMask); diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.h b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.h index 054c677a0..d31149d20 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.h +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.h @@ -259,8 +259,6 @@ protected: GPUDeviceVulkan* _device; VkQueryPool _handle; - volatile int32 _count; - const uint32 _capacity; const VkQueryType _type; #if VULKAN_RESET_QUERY_POOLS Array _resetRanges; @@ -277,6 +275,7 @@ public: } #if VULKAN_RESET_QUERY_POOLS + bool ResetBeforeUse; void Reset(CmdBufferVulkan* cmdBuffer); #endif }; @@ -294,7 +293,7 @@ private: public: BufferedQueryPoolVulkan(GPUDeviceVulkan* device, int32 capacity, VkQueryType type); - bool AcquireQuery(uint32& resultIndex); + bool AcquireQuery(CmdBufferVulkan* cmdBuffer, uint32& resultIndex); void ReleaseQuery(uint32 queryIndex); void MarkQueryAsStarted(uint32 queryIndex); bool GetResults(GPUContextVulkan* context, uint32 index, uint64& result); diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUTimerQueryVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUTimerQueryVulkan.cpp index 72107e84a..aee221183 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUTimerQueryVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUTimerQueryVulkan.cpp @@ -60,13 +60,18 @@ void GPUTimerQueryVulkan::WriteTimestamp(CmdBufferVulkan* cmdBuffer, Query& quer { auto pool = _device->FindAvailableTimestampQueryPool(); uint32 index; - pool->AcquireQuery(index); - - vkCmdWriteTimestamp(cmdBuffer->GetHandle(), stage, pool->GetHandle(), index); - pool->MarkQueryAsStarted(index); - - query.Pool = pool; - query.Index = index; + if (pool->AcquireQuery(cmdBuffer, index)) + { + vkCmdWriteTimestamp(cmdBuffer->GetHandle(), stage, pool->GetHandle(), index); + pool->MarkQueryAsStarted(index); + query.Pool = pool; + query.Index = index; + } + else + { + query.Pool = nullptr; + query.Index = 0; + } } bool GPUTimerQueryVulkan::TryGetResult() @@ -104,16 +109,10 @@ bool GPUTimerQueryVulkan::TryGetResult() for (int32 i = 0; i < _queries.Count(); i++) { auto& e = _queries[i]; - if (e.Begin.Pool) - { e.Begin.Pool->ReleaseQuery(e.Begin.Index); - } - if (e.End.Pool) - { e.End.Pool->ReleaseQuery(e.End.Index); - } } _queries.Clear(); #else @@ -141,16 +140,10 @@ void GPUTimerQueryVulkan::OnReleaseGPU() for (int32 i = 0; i < _queries.Count(); i++) { auto& e = _queries[i]; - if (e.Begin.Pool) - { e.Begin.Pool->ReleaseQuery(e.Begin.Index); - } - if (e.End.Pool) - { e.End.Pool->ReleaseQuery(e.End.Index); - } } _queries.Clear(); } @@ -208,7 +201,6 @@ bool GPUTimerQueryVulkan::HasResult() return false; if (_hasResult) return true; - return TryGetResult(); } @@ -216,7 +208,6 @@ float GPUTimerQueryVulkan::GetResult() { if (_hasResult) return _timeDelta; - TryGetResult(); return _timeDelta; } From 1328e869a94d03e46d1d87f100499bb6bd4c3361 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 2 Jul 2024 00:54:17 +0200 Subject: [PATCH 198/292] Fix crash in D3D12 when constant buffer was binded but not updated before the draw --- .../GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp index 970e55ce5..45d3e3777 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp @@ -459,8 +459,10 @@ void GPUContextDX12::flushCBs() const auto cb = _cbHandles[i]; if (cb) { - ASSERT(cb->GPUAddress != 0); - _commandList->SetGraphicsRootConstantBufferView(DX12_ROOT_SIGNATURE_CB + i, cb->GPUAddress); + if (cb->GPUAddress != 0) + _commandList->SetGraphicsRootConstantBufferView(DX12_ROOT_SIGNATURE_CB + i, cb->GPUAddress); + else + _cbGraphicsDirtyFlag = true; // CB was binded but not yet assigned so stay in dirty state } } } @@ -472,8 +474,10 @@ void GPUContextDX12::flushCBs() const auto cb = _cbHandles[i]; if (cb) { - ASSERT(cb->GPUAddress != 0); - _commandList->SetComputeRootConstantBufferView(DX12_ROOT_SIGNATURE_CB + i, cb->GPUAddress); + if (cb->GPUAddress != 0) + _commandList->SetComputeRootConstantBufferView(DX12_ROOT_SIGNATURE_CB + i, cb->GPUAddress); + else + _cbComputeDirtyFlag = true; // CB was binded but not yet assigned so stay in dirty state } } } From 9486466abf43e930aabf44682e3df32e139252e7 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 2 Jul 2024 00:54:49 +0200 Subject: [PATCH 199/292] Fix D3D11 to properly issue instanced draws even if instance count is `1` --- Source/Engine/Graphics/GPUContext.h | 2 +- .../GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp | 10 ++-------- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/Source/Engine/Graphics/GPUContext.h b/Source/Engine/Graphics/GPUContext.h index 5542e63ec..b3bd6ddcc 100644 --- a/Source/Engine/Graphics/GPUContext.h +++ b/Source/Engine/Graphics/GPUContext.h @@ -485,7 +485,7 @@ public: /// /// A value added to each index before reading a vertex from the vertex buffer. /// The vertices count. - API_FUNCTION() FORCE_INLINE void Draw(uint32 startVertex, uint32 verticesCount) + API_FUNCTION() FORCE_INLINE void Draw(int32 startVertex, uint32 verticesCount) { DrawInstanced(verticesCount, 1, 0, startVertex); } diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp index 5e2bc5a84..b00f2a687 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp @@ -487,20 +487,14 @@ void GPUContextDX11::ResolveMultisample(GPUTexture* sourceMultisampleTexture, GP void GPUContextDX11::DrawInstanced(uint32 verticesCount, uint32 instanceCount, int32 startInstance, int32 startVertex) { onDrawCall(); - if (instanceCount > 1) - _context->DrawInstanced(verticesCount, instanceCount, startVertex, startInstance); - else - _context->Draw(verticesCount, startVertex); + _context->DrawInstanced(verticesCount, instanceCount, startVertex, startInstance); RENDER_STAT_DRAW_CALL(verticesCount * instanceCount, verticesCount * instanceCount / 3); } void GPUContextDX11::DrawIndexedInstanced(uint32 indicesCount, uint32 instanceCount, int32 startInstance, int32 startVertex, int32 startIndex) { onDrawCall(); - if (instanceCount > 1) - _context->DrawIndexedInstanced(indicesCount, instanceCount, startIndex, startVertex, startInstance); - else - _context->DrawIndexed(indicesCount, startIndex, startVertex); + _context->DrawIndexedInstanced(indicesCount, instanceCount, startIndex, startVertex, startInstance); RENDER_STAT_DRAW_CALL(0, indicesCount / 3 * instanceCount); } From fe0711c3e2ccea23660c71247ccf71575dcc29b3 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 2 Jul 2024 00:55:41 +0200 Subject: [PATCH 200/292] Fix D3D11 to properly flush CB/SRVs when bindings new shaders after CB/SRV was set --- .../GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp index b00f2a687..15f7d1ce1 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp @@ -583,6 +583,7 @@ void GPUContextDX11::SetState(GPUPipelineState* state) } // Per pipeline stage state caching + bool shaderEnabled = false; if (CurrentDepthStencilState != depthStencilState) { CurrentDepthStencilState = depthStencilState; @@ -600,6 +601,7 @@ void GPUContextDX11::SetState(GPUPipelineState* state) } if (CurrentVS != vs) { + shaderEnabled |= CurrentVS == nullptr; #if DX11_CLEAR_SR_ON_STAGE_DISABLE if (CurrentVS && !vs) { @@ -613,6 +615,7 @@ void GPUContextDX11::SetState(GPUPipelineState* state) #if GPU_ALLOW_TESSELLATION_SHADERS if (CurrentHS != hs) { + shaderEnabled |= CurrentHS == nullptr; #if DX11_CLEAR_SR_ON_STAGE_DISABLE if (CurrentHS && !hs) { @@ -624,6 +627,7 @@ void GPUContextDX11::SetState(GPUPipelineState* state) } if (CurrentDS != ds) { + shaderEnabled |= CurrentDS == nullptr; #if DX11_CLEAR_SR_ON_STAGE_DISABLE if (CurrentDS && !ds) { @@ -637,6 +641,7 @@ void GPUContextDX11::SetState(GPUPipelineState* state) #if GPU_ALLOW_GEOMETRY_SHADERS if (CurrentGS != gs) { + shaderEnabled |= CurrentGS == nullptr; #if DX11_CLEAR_SR_ON_STAGE_DISABLE if (CurrentGS && !gs) { @@ -649,6 +654,7 @@ void GPUContextDX11::SetState(GPUPipelineState* state) #endif if (CurrentPS != ps) { + shaderEnabled |= CurrentPS == nullptr; #if DX11_CLEAR_SR_ON_STAGE_DISABLE if (CurrentPS && !ps) { @@ -663,6 +669,13 @@ void GPUContextDX11::SetState(GPUPipelineState* state) CurrentPrimitiveTopology = primitiveTopology; _context->IASetPrimitiveTopology(primitiveTopology); } + if (shaderEnabled) + { + // Fix bug when binding constant buffer or texture, then binding PSO with tess and the drawing (data binded before tess shader is active was missing) + // TODO: use per-shader dirty flags + _cbDirtyFlag = true; + _srMaskDirtyGraphics = MAX_uint32; + } RENDER_STAT_PS_STATE_CHANGE(); } From c8b5ac6c29740318c260fe5cabd35f9e6ec89b9b Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 2 Jul 2024 00:57:58 +0200 Subject: [PATCH 201/292] Update materials --- Content/Editor/Camera/M_Camera.flax | 4 ++-- Content/Editor/CubeTexturePreviewMaterial.flax | 4 ++-- Content/Editor/DebugMaterials/DDGIDebugProbes.flax | 4 ++-- Content/Editor/DebugMaterials/SingleColor/Decal.flax | 2 +- Content/Editor/DebugMaterials/SingleColor/Particle.flax | 2 +- Content/Editor/DebugMaterials/SingleColor/Surface.flax | 4 ++-- .../Editor/DebugMaterials/SingleColor/SurfaceAdditive.flax | 4 ++-- Content/Editor/DebugMaterials/SingleColor/Terrain.flax | 4 ++-- Content/Editor/DefaultFontMaterial.flax | 4 ++-- Content/Editor/Gizmo/FoliageBrushMaterial.flax | 4 ++-- Content/Editor/Gizmo/Material.flax | 4 ++-- Content/Editor/Gizmo/MaterialWire.flax | 4 ++-- Content/Editor/Gizmo/SelectionOutlineMaterial.flax | 2 +- Content/Editor/Gizmo/VertexColorsPreviewMaterial.flax | 4 ++-- Content/Editor/Highlight Material.flax | 4 ++-- Content/Editor/Icons/IconsMaterial.flax | 4 ++-- Content/Editor/IesProfilePreviewMaterial.flax | 2 +- Content/Editor/Particles/Particle Material Color.flax | 2 +- Content/Editor/Particles/Smoke Material.flax | 2 +- Content/Editor/SpriteMaterial.flax | 4 ++-- Content/Editor/Terrain/Circle Brush Material.flax | 4 ++-- Content/Editor/Terrain/Highlight Terrain Material.flax | 4 ++-- Content/Editor/TexturePreviewMaterial.flax | 2 +- Content/Editor/Wires Debug Material.flax | 4 ++-- Content/Engine/DefaultDeformableMaterial.flax | 4 ++-- Content/Engine/DefaultMaterial.flax | 4 ++-- Content/Engine/DefaultRadialMenu.flax | 2 +- Content/Engine/DefaultTerrainMaterial.flax | 4 ++-- Content/Engine/SingleColorMaterial.flax | 4 ++-- Content/Engine/SkyboxMaterial.flax | 4 ++-- 30 files changed, 52 insertions(+), 52 deletions(-) diff --git a/Content/Editor/Camera/M_Camera.flax b/Content/Editor/Camera/M_Camera.flax index 0411c1615..ffcd0ac50 100644 --- a/Content/Editor/Camera/M_Camera.flax +++ b/Content/Editor/Camera/M_Camera.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b2a2d03b7e3bbafed896311cf6fabfe2ce301671860b33cdafc4dcd47fddfcbf -size 30521 +oid sha256:73319b1efc4f16eb9129ab8f832fc358dc25cc5ea8b8c681bbed005d46c31ed9 +size 28071 diff --git a/Content/Editor/CubeTexturePreviewMaterial.flax b/Content/Editor/CubeTexturePreviewMaterial.flax index c5cda3152..0353c5519 100644 --- a/Content/Editor/CubeTexturePreviewMaterial.flax +++ b/Content/Editor/CubeTexturePreviewMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:80a2b852e2d85fa098e1e402669254b11430acb3ad4a235633582c6ba2ac0914 -size 32236 +oid sha256:8198710e63bdb513d71822c9e0c1ffce510e80c08ea6e30e8b27d1d7e3a657cd +size 29786 diff --git a/Content/Editor/DebugMaterials/DDGIDebugProbes.flax b/Content/Editor/DebugMaterials/DDGIDebugProbes.flax index 68d922a96..ce98f5e3b 100644 --- a/Content/Editor/DebugMaterials/DDGIDebugProbes.flax +++ b/Content/Editor/DebugMaterials/DDGIDebugProbes.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6c5d036a454fce8eaad8bb114f588f5566677a2a14a2091061e9c3825a18215d -size 41469 +oid sha256:ab5c59aa3f1ce5d74e6069cc03ec177c1051a9d0529d21f3b6254ad82c19763f +size 38982 diff --git a/Content/Editor/DebugMaterials/SingleColor/Decal.flax b/Content/Editor/DebugMaterials/SingleColor/Decal.flax index bff316f5f..017d89c88 100644 --- a/Content/Editor/DebugMaterials/SingleColor/Decal.flax +++ b/Content/Editor/DebugMaterials/SingleColor/Decal.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:603d34b26c593725c4a4d65ee06c04c1d8be6bc39bd6d0912f44430ee98452e8 +oid sha256:a353de21ffdc515b0317ae89c09dc1733623f0841496d82f2aaecc7cdd8294b0 size 7489 diff --git a/Content/Editor/DebugMaterials/SingleColor/Particle.flax b/Content/Editor/DebugMaterials/SingleColor/Particle.flax index e8f0bd906..727c18176 100644 --- a/Content/Editor/DebugMaterials/SingleColor/Particle.flax +++ b/Content/Editor/DebugMaterials/SingleColor/Particle.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:73e7b3bb45947431e87ea816a7919d994ffe249628648e4c3c8f5fde2618ee84 +oid sha256:f6ab245b683eee907b7fb9d18d1418bc553c2893859235720e98dd555e35a8f4 size 31681 diff --git a/Content/Editor/DebugMaterials/SingleColor/Surface.flax b/Content/Editor/DebugMaterials/SingleColor/Surface.flax index 434843e5b..ad6a58454 100644 --- a/Content/Editor/DebugMaterials/SingleColor/Surface.flax +++ b/Content/Editor/DebugMaterials/SingleColor/Surface.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ece28c7bee7ab96745f4d80e9f1c1d35739357025959f6a4d2ff642ccca61d8e -size 30417 +oid sha256:1e7424801de3c72adc081eaf9f3b6056a621ba6dc0d3fad7e60efa3ab8a729fb +size 27930 diff --git a/Content/Editor/DebugMaterials/SingleColor/SurfaceAdditive.flax b/Content/Editor/DebugMaterials/SingleColor/SurfaceAdditive.flax index 8494e5756..a991d614a 100644 --- a/Content/Editor/DebugMaterials/SingleColor/SurfaceAdditive.flax +++ b/Content/Editor/DebugMaterials/SingleColor/SurfaceAdditive.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:62b952bfb3f68fb966b1ade28536ee9cfc55fd8189a388fa1437466b0c4578d5 -size 32134 +oid sha256:6a82d0bb9748423c60a279ad65f68e350fc85654def84618c4e5ea18955ba239 +size 29668 diff --git a/Content/Editor/DebugMaterials/SingleColor/Terrain.flax b/Content/Editor/DebugMaterials/SingleColor/Terrain.flax index 44dc7be84..650fa5c6b 100644 --- a/Content/Editor/DebugMaterials/SingleColor/Terrain.flax +++ b/Content/Editor/DebugMaterials/SingleColor/Terrain.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a73341ea4465e90bf196969a5396ecb12e9001c5d6fabc2c7720deeb5a1f1137 -size 20826 +oid sha256:e16bb9ebcf09bf35a9a8bcd59cd55b7bc0bfc7c190aad727684eb4fc6d7e082e +size 21324 diff --git a/Content/Editor/DefaultFontMaterial.flax b/Content/Editor/DefaultFontMaterial.flax index dd7dd4187..d98226904 100644 --- a/Content/Editor/DefaultFontMaterial.flax +++ b/Content/Editor/DefaultFontMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:64a5880d4a39025682a5be5b24890d70c2adb6f0a7cb91c1f605003cef7b826c -size 30596 +oid sha256:8524b92333b4d803951e35ac02ad953b9af26b4a99bd6b5c05fe1092861ab4a6 +size 28109 diff --git a/Content/Editor/Gizmo/FoliageBrushMaterial.flax b/Content/Editor/Gizmo/FoliageBrushMaterial.flax index c7b992e69..da8b864af 100644 --- a/Content/Editor/Gizmo/FoliageBrushMaterial.flax +++ b/Content/Editor/Gizmo/FoliageBrushMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a7bde5eabf7c13d54ca3728c41aab08e77d47b06c5a8e855c7c1b1b7f6ef1649 -size 38161 +oid sha256:a1220702a7347c60df190dbf5cba1266e0b50803e602292bbf1cd6383ca14539 +size 35675 diff --git a/Content/Editor/Gizmo/Material.flax b/Content/Editor/Gizmo/Material.flax index d7dc5f050..ebff31379 100644 --- a/Content/Editor/Gizmo/Material.flax +++ b/Content/Editor/Gizmo/Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5f940b3bb183ac25b4dfc896086f4cd5f840659aa0caff8c90d0ebf6695656a1 -size 32738 +oid sha256:4237bb3c785375d550988a15fb4fcfce47a0936ee45b4a2e8e0f8cebbad90eec +size 30252 diff --git a/Content/Editor/Gizmo/MaterialWire.flax b/Content/Editor/Gizmo/MaterialWire.flax index d459fefb7..2dc49b1fe 100644 --- a/Content/Editor/Gizmo/MaterialWire.flax +++ b/Content/Editor/Gizmo/MaterialWire.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:729f0d8845e8fb3feac4cb85ba0eded1da5dbe8b9727d69f94f06a8f7181399b -size 31876 +oid sha256:cace2c7eb81be70b756d747fa6a3ef4778df2da50d27a26fdd7aa9884b7aec7b +size 29390 diff --git a/Content/Editor/Gizmo/SelectionOutlineMaterial.flax b/Content/Editor/Gizmo/SelectionOutlineMaterial.flax index f376d17ee..fb133f924 100644 --- a/Content/Editor/Gizmo/SelectionOutlineMaterial.flax +++ b/Content/Editor/Gizmo/SelectionOutlineMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2c431e061d06bba5145820f9d39b0570901c4e169d6d520266f710d0013c3d3f +oid sha256:babd06a770546e7bf8e2f8fd8cce829984e81f1b00751770a554ae121415d0e8 size 16166 diff --git a/Content/Editor/Gizmo/VertexColorsPreviewMaterial.flax b/Content/Editor/Gizmo/VertexColorsPreviewMaterial.flax index 210f95d9c..e9759f762 100644 --- a/Content/Editor/Gizmo/VertexColorsPreviewMaterial.flax +++ b/Content/Editor/Gizmo/VertexColorsPreviewMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4ff9de800c9936d91b929d702e132f84260f1f4d852927745b46f5b6470b484f -size 31530 +oid sha256:d3e98b80ebaf6b19acd8a82fca616a73ae9f62847e2f0111f3f94c9a3a3de28b +size 29080 diff --git a/Content/Editor/Highlight Material.flax b/Content/Editor/Highlight Material.flax index e42a57a5e..867846f32 100644 --- a/Content/Editor/Highlight Material.flax +++ b/Content/Editor/Highlight Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ebee2e0554c42bcef0c2736baafc41779b9ad396fc5328375e3bb7ae9ea99c9a -size 30523 +oid sha256:da905b7c1c53974be7cecb432a3dca87624235200e06fe9d98670026734cd13d +size 28045 diff --git a/Content/Editor/Icons/IconsMaterial.flax b/Content/Editor/Icons/IconsMaterial.flax index 119d8e67e..244bb178d 100644 --- a/Content/Editor/Icons/IconsMaterial.flax +++ b/Content/Editor/Icons/IconsMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3b87295cd8c906adcb0640aafeeea5aa5fbf6055fbd0ab22507a12d1dc064575 -size 30451 +oid sha256:7a0eddb7654731d9c229f650be3daf6c0d856c741ee1d1c2e191ccfd073fd87a +size 27973 diff --git a/Content/Editor/IesProfilePreviewMaterial.flax b/Content/Editor/IesProfilePreviewMaterial.flax index ef0b72c07..8a064bf9a 100644 --- a/Content/Editor/IesProfilePreviewMaterial.flax +++ b/Content/Editor/IesProfilePreviewMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6886c10b0e81d55e2c355bb67fc5454d9528ffe3e5b104aa58607e89708f31ba +oid sha256:36406c6c38796d81189c34eaa467bcbd8de109afb2da3a9b9083ec773b1c913c size 18205 diff --git a/Content/Editor/Particles/Particle Material Color.flax b/Content/Editor/Particles/Particle Material Color.flax index bdd0fa4cb..e1ecb1220 100644 --- a/Content/Editor/Particles/Particle Material Color.flax +++ b/Content/Editor/Particles/Particle Material Color.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3e99bffeca23ab3273c284cd2202ffaa65b9dd73c60b22d7e57ad0807cd64b01 +oid sha256:c0d83ec4a3402680e7a683089c077c991d0ede3a85e563cee967ef94ec62f395 size 29912 diff --git a/Content/Editor/Particles/Smoke Material.flax b/Content/Editor/Particles/Smoke Material.flax index de45c178e..7d2a48b32 100644 --- a/Content/Editor/Particles/Smoke Material.flax +++ b/Content/Editor/Particles/Smoke Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:aa271d248a69f9541679267ca058f7ffcb2d0f136e210e0db41b0e793c3af0b6 +oid sha256:4317109e6aee7efef7f02ef0740c2676e69a028b877afb50dee7ef850b499db2 size 38680 diff --git a/Content/Editor/SpriteMaterial.flax b/Content/Editor/SpriteMaterial.flax index c4dd0be82..76ff2a46e 100644 --- a/Content/Editor/SpriteMaterial.flax +++ b/Content/Editor/SpriteMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:69e13c6df67c185083c69367977ba37b274eaef9d08d3202f036a01d313b670f -size 31609 +oid sha256:c75b277a210d514b4b4c7c1533d01ea21ae49961402d72d53cbf3272abdf38d6 +size 29159 diff --git a/Content/Editor/Terrain/Circle Brush Material.flax b/Content/Editor/Terrain/Circle Brush Material.flax index bcaa5b08e..4f77e39cc 100644 --- a/Content/Editor/Terrain/Circle Brush Material.flax +++ b/Content/Editor/Terrain/Circle Brush Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:83364e5d0f20485779c538e80c0e14a3acdbc5f8ef8dd48378353729b8a72cb4 -size 27498 +oid sha256:d1eee4a66a26e6259a62f9a6c8e3fb00fb4ebe4741c28a38785be4e500aaed3d +size 27986 diff --git a/Content/Editor/Terrain/Highlight Terrain Material.flax b/Content/Editor/Terrain/Highlight Terrain Material.flax index 58225f4df..32d82d931 100644 --- a/Content/Editor/Terrain/Highlight Terrain Material.flax +++ b/Content/Editor/Terrain/Highlight Terrain Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:93e4b8b33d344ebd56762e6ccb75eb66a95635caeabd4465f53a273de7becebd -size 20879 +oid sha256:222bf0a136e25607e51b2d6878221dd8de17d8d0cf6b02fc3bb46f54a964addf +size 21367 diff --git a/Content/Editor/TexturePreviewMaterial.flax b/Content/Editor/TexturePreviewMaterial.flax index 8f2a99bba..4ef5dafdd 100644 --- a/Content/Editor/TexturePreviewMaterial.flax +++ b/Content/Editor/TexturePreviewMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:79e0a71beb547e03deb7ef2081e3f25d7ab10109ef64cff5882e1f6240603f17 +oid sha256:ccadf2b1a50715b2001715293f9de9c4f4c6507c958dc81cfcbde507649ecf2e size 10570 diff --git a/Content/Editor/Wires Debug Material.flax b/Content/Editor/Wires Debug Material.flax index e733d25be..b32c43553 100644 --- a/Content/Editor/Wires Debug Material.flax +++ b/Content/Editor/Wires Debug Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d05de5d7645b414807673b006a93805192c07a50a50a6be365fe522ea4f69304 -size 30523 +oid sha256:93da8fef9074ed5ce7574164835ca5644c1290bb561f8310d317fe3b34dca60f +size 28045 diff --git a/Content/Engine/DefaultDeformableMaterial.flax b/Content/Engine/DefaultDeformableMaterial.flax index 056de43fe..148feb26d 100644 --- a/Content/Engine/DefaultDeformableMaterial.flax +++ b/Content/Engine/DefaultDeformableMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a6598cfc4b823c593c4d7bb803a0919e3bd1d0da39f251e17e099afc67686573 -size 18524 +oid sha256:a69290ca55369fe2e4019907d99a3c2fbfbfaa21d6b4e9725ba2ecba2082c5c4 +size 18514 diff --git a/Content/Engine/DefaultMaterial.flax b/Content/Engine/DefaultMaterial.flax index 8e998c38b..653e625aa 100644 --- a/Content/Engine/DefaultMaterial.flax +++ b/Content/Engine/DefaultMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3dde40a2494bc7175939d9c4edc2c55ecfdc9142739482cd4303ac7f9c67c798 -size 32442 +oid sha256:186cb88fe6b666b1918a1569fc3269eb4675172e70b348ce210e9911df4342fa +size 29992 diff --git a/Content/Engine/DefaultRadialMenu.flax b/Content/Engine/DefaultRadialMenu.flax index 1159e4719..ee571f342 100644 --- a/Content/Engine/DefaultRadialMenu.flax +++ b/Content/Engine/DefaultRadialMenu.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d7c517175501d1b41143a9d2d81950bf0d91d0556cfebca5784e7ef2c4ba94cf +oid sha256:814157ca47fb0084c1268113c857aa10eab682ad3d8e0dbc730dd95bb0fb6a9e size 20340 diff --git a/Content/Engine/DefaultTerrainMaterial.flax b/Content/Engine/DefaultTerrainMaterial.flax index 29aa8424e..3b68da4c1 100644 --- a/Content/Engine/DefaultTerrainMaterial.flax +++ b/Content/Engine/DefaultTerrainMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9979bb1d7fdd67cf124f01fc0de7eb28666cc04ff5925e9580bc130a8a7adb65 -size 22963 +oid sha256:bcba019392a7f28f7613db7d6b6553e1a4de0ff8463a79da16c8824eb7949055 +size 23451 diff --git a/Content/Engine/SingleColorMaterial.flax b/Content/Engine/SingleColorMaterial.flax index 193639a39..4c7d36165 100644 --- a/Content/Engine/SingleColorMaterial.flax +++ b/Content/Engine/SingleColorMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:97fa28df30dfa46eb10b78459aaef08b36d9ae170e16f3aa70f71e3529074c2b -size 30618 +oid sha256:dee79e836a1379622db5829481af66298fdbee63314329d4ab87e36c02128c0b +size 28131 diff --git a/Content/Engine/SkyboxMaterial.flax b/Content/Engine/SkyboxMaterial.flax index 401a4ed74..774ee5fb8 100644 --- a/Content/Engine/SkyboxMaterial.flax +++ b/Content/Engine/SkyboxMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:03b39e366563b7d26d76c18bf48ae414597a8e8c9258d146c0588a753f65bc93 -size 31816 +oid sha256:ac058955e94e54f8ad165db9d6322066165bf98380652df37682a027b783a9fd +size 29329 From 6fbf4a6aac9afe19d23269096b6b3560fd614817 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 2 Jul 2024 16:07:09 +0200 Subject: [PATCH 202/292] Optimize draw calls sorting and objects buffer building to be async --- Source/Engine/Core/Collections/Sorting.h | 4 +- Source/Engine/Renderer/RenderList.cpp | 32 +++++----- Source/Engine/Renderer/Renderer.cpp | 76 ++++++++++++++++++------ 3 files changed, 76 insertions(+), 36 deletions(-) diff --git a/Source/Engine/Core/Collections/Sorting.h b/Source/Engine/Core/Collections/Sorting.h index 171c855a3..b6c2694a1 100644 --- a/Source/Engine/Core/Collections/Sorting.h +++ b/Source/Engine/Core/Collections/Sorting.h @@ -325,8 +325,8 @@ public: Platform::Free(tmp); } - template - FORCE_INLINE static void MergeSort(Array& data, Array* tmp = nullptr) + template + FORCE_INLINE static void MergeSort(Array& data, Array* tmp = nullptr) { if (tmp) tmp->Resize(data.Count()); diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index e96a7a9a9..1f45e7f78 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -26,12 +26,7 @@ static_assert(sizeof(ShaderObjectData) == sizeof(Float4) * ARRAY_COUNT(ShaderObj namespace { - // Cached data for the draw calls sorting - Array SortingKeys[2]; - Array SortingIndices; - Array SortingBatches; Array FreeRenderList; - Array> MemPool; CriticalSection MemPoolLocker; } @@ -199,12 +194,15 @@ void RendererAllocation::Free(void* ptr, uintptr size) RenderList* RenderList::GetFromPool() { + MemPoolLocker.Lock(); if (FreeRenderList.HasItems()) { const auto result = FreeRenderList.Last(); FreeRenderList.RemoveLast(); + MemPoolLocker.Unlock(); return result; } + MemPoolLocker.Unlock(); return New(); } @@ -213,10 +211,12 @@ void RenderList::ReturnToPool(RenderList* cache) { if (!cache) return; + cache->Clear(); + MemPoolLocker.Lock(); ASSERT(!FreeRenderList.Contains(cache)); FreeRenderList.Add(cache); - cache->Clear(); + MemPoolLocker.Unlock(); } void RenderList::CleanupCache() @@ -224,13 +224,12 @@ void RenderList::CleanupCache() // Don't call it during rendering (data may be already in use) ASSERT(GPUDevice::Instance == nullptr || GPUDevice::Instance->CurrentTask == nullptr); - SortingKeys[0].Resize(0); - SortingKeys[1].Resize(0); - SortingIndices.Resize(0); + MemPoolLocker.Lock(); FreeRenderList.ClearDelete(); for (auto& e : MemPool) Platform::Free(e.First); MemPool.Clear(); + MemPoolLocker.Unlock(); } bool RenderList::BlendableSettings::operator<(const BlendableSettings& other) const @@ -648,12 +647,12 @@ void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseD const int32 listSize = list.Indices.Count(); ZoneValue(listSize); - // Peek shared memory -#define PREPARE_CACHE(list) (list).Clear(); (list).Resize(listSize) - PREPARE_CACHE(SortingKeys[0]); - PREPARE_CACHE(SortingKeys[1]); - PREPARE_CACHE(SortingIndices); -#undef PREPARE_CACHE + // Use shared memory from renderer allocator + Array SortingKeys[2]; + Array SortingIndices; + SortingKeys[0].Resize(listSize); + SortingKeys[1].Resize(listSize); + SortingIndices.Resize(listSize); uint64* sortedKeys = SortingKeys[0].Get(); // Setup sort keys @@ -726,7 +725,8 @@ void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseD if (stable) { // Sort draw calls batches by depth - Sorting::MergeSort(list.Batches, &SortingBatches); + Array sortingBatches; + Sorting::MergeSort(list.Batches, &sortingBatches); } } diff --git a/Source/Engine/Renderer/Renderer.cpp b/Source/Engine/Renderer/Renderer.cpp index d4e5b2590..13921e079 100644 --- a/Source/Engine/Renderer/Renderer.cpp +++ b/Source/Engine/Renderer/Renderer.cpp @@ -425,29 +425,69 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont #endif } - // Sort draw calls + // Process draw calls (sorting, objects buffer building) { - PROFILE_CPU_NAMED("Sort Draw Calls"); - // TODO: run all of these functions in async via jobs - for (int32 i = 0; i < renderContextBatch.Contexts.Count(); i++) - renderContextBatch.Contexts[i].List->BuildObjectsBuffer(); - renderContext.List->SortDrawCalls(renderContext, false, DrawCallsListType::GBuffer); - renderContext.List->SortDrawCalls(renderContext, false, DrawCallsListType::GBufferNoDecals); - renderContext.List->SortDrawCalls(renderContext, true, DrawCallsListType::Forward); - renderContext.List->SortDrawCalls(renderContext, false, DrawCallsListType::Distortion); - if (setup.UseMotionVectors) - renderContext.List->SortDrawCalls(renderContext, false, DrawCallsListType::MotionVectors); - for (int32 i = 1; i < renderContextBatch.Contexts.Count(); i++) + PROFILE_CPU_NAMED("Process Draw Calls"); + + // Utility that handles async jobs for a specific rendering routines in async + struct DrawCallsProcessor { - auto& shadowContext = renderContextBatch.Contexts.Get()[i]; - shadowContext.List->SortDrawCalls(shadowContext, false, DrawCallsListType::Depth, DrawPass::Depth); - shadowContext.List->SortDrawCalls(shadowContext, false, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls, DrawPass::Depth); - } + RenderContextBatch& RenderContextBatch; + Pair MainContextSorting[5] = + { + // Draw List + Reverse Distance sorting + ToPair(DrawCallsListType::GBuffer, false), + ToPair(DrawCallsListType::GBufferNoDecals, false), + ToPair(DrawCallsListType::Forward, true), + ToPair(DrawCallsListType::Distortion, false), + ToPair(DrawCallsListType::MotionVectors, false), + }; + + void BuildObjectsBufferJob(int32 index) + { + RenderContextBatch.Contexts[index].List->BuildObjectsBuffer(); + } + + void SortDrawCallsJob(int32 index) + { + RenderContext& renderContext = RenderContextBatch.GetMainContext(); + if (index < ARRAY_COUNT(MainContextSorting)) + { + // Main context sorting + RenderSetup& setup = renderContext.List->Setup; + auto sorting = MainContextSorting[index]; + if (sorting.First == DrawCallsListType::MotionVectors && !setup.UseMotionVectors) + return; + renderContext.List->SortDrawCalls(renderContext, sorting.Second, sorting.First); + } + else + { + // Shadow context sorting + auto& shadowContext = RenderContextBatch.Contexts[index - ARRAY_COUNT(MainContextSorting)]; + shadowContext.List->SortDrawCalls(shadowContext, false, DrawCallsListType::Depth, DrawPass::Depth); + shadowContext.List->SortDrawCalls(shadowContext, false, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls, DrawPass::Depth); + } + } + } processor = { renderContextBatch }; + + // Dispatch async jobs + Function func; + func.Bind(&processor); + const int64 buildObjectsBufferJob = JobSystem::Dispatch(func, renderContextBatch.Contexts.Count()); + func.Bind(&processor); + const int64 sortDrawCallsJob = JobSystem::Dispatch(func, ARRAY_COUNT(DrawCallsProcessor::MainContextSorting) + renderContextBatch.Contexts.Count()); + + // Upload objects buffers to the GPU + JobSystem::Wait(buildObjectsBufferJob); { PROFILE_CPU_NAMED("FlushObjectsBuffer"); - for (int32 i = 0; i < renderContextBatch.Contexts.Count(); i++) - renderContextBatch.Contexts[i].List->ObjectBuffer.Flush(context); + for (auto& e : renderContextBatch.Contexts) + e.List->ObjectBuffer.Flush(context); } + + // Wait for async jobs to finish + // TODO: use per-pass wait labels (eg. don't wait for shadow pass draws sorting until ShadowPass needs it) + JobSystem::Wait(sortDrawCallsJob); } // Get the light accumulation buffer From 00a9c48fecb58cd12f43c8b2c28d33d52f3f17a7 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 3 Jul 2024 08:51:58 +0200 Subject: [PATCH 203/292] Optimize probes count multiplication in DDGI shader --- .../Renderer/GI/DynamicDiffuseGlobalIllumination.cpp | 4 +++- Source/Shaders/GI/DDGI.shader | 7 +++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp index f82123a46..6b92607ea 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp @@ -46,7 +46,8 @@ GPU_CB_STRUCT(Data0 { GlobalSignDistanceFieldPass::ConstantsData GlobalSDF; GlobalSurfaceAtlasPass::ConstantsData GlobalSurfaceAtlas; ShaderGBufferData GBuffer; - Float2 Padding0; + float Padding0; + uint32 ProbesCount; float ResetBlend; float TemporalTime; Int4 ProbeScrollClears[4]; @@ -475,6 +476,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont data.DDGI = ddgiData.Result.Constants; data.GlobalSDF = bindingDataSDF.Constants; data.GlobalSurfaceAtlas = bindingDataSurfaceAtlas.Constants; + data.ProbesCount = data.DDGI.ProbesCounts[0] * data.DDGI.ProbesCounts[1] * data.DDGI.ProbesCounts[2]; data.ResetBlend = clear ? 1.0f : 0.0f; for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++) { diff --git a/Source/Shaders/GI/DDGI.shader b/Source/Shaders/GI/DDGI.shader index 92df2c043..4eb71a7ae 100644 --- a/Source/Shaders/GI/DDGI.shader +++ b/Source/Shaders/GI/DDGI.shader @@ -30,7 +30,8 @@ DDGIData DDGI; GlobalSDFData GlobalSDF; GlobalSurfaceAtlasData GlobalSurfaceAtlas; GBufferData GBuffer; -float2 Padding0; +float Padding0; +uint ProbesCount; float ResetBlend; float TemporalTime; int4 ProbeScrollClears[4]; @@ -86,8 +87,7 @@ META_CS(true, FEATURE_LEVEL_SM5) void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) { uint probeIndex = DispatchThreadId.x; - uint probesCount = DDGI.ProbesCounts.x * DDGI.ProbesCounts.y * DDGI.ProbesCounts.z; - if (probeIndex >= probesCount) + if (probeIndex >= ProbesCount) return; uint3 probeCoords = GetDDGIProbeCoords(DDGI, probeIndex); probeIndex = GetDDGIScrollingProbeIndex(DDGI, CascadeIndex, probeCoords); @@ -271,7 +271,6 @@ META_CS(true, FEATURE_LEVEL_SM5) [numthreads(1, 1, 1)] void CS_UpdateProbesInitArgs() { - uint probesCount = DDGI.ProbesCounts.x * DDGI.ProbesCounts.y * DDGI.ProbesCounts.z; uint activeProbesCount = ActiveProbes.Load(0); uint arg = 0; for (uint probesOffset = 0; probesOffset < activeProbesCount; probesOffset += DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT) From 030a66c0915eeceede3f3cfc4e7e8daf92ef3b8a Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 3 Jul 2024 12:41:29 +0200 Subject: [PATCH 204/292] Add shader cache invalidation when using debug shader option via cmd line in Editor --- Flax.flaxproj | 2 +- .../Graphics/Shaders/Cache/ShaderCacheManager.cpp | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/Flax.flaxproj b/Flax.flaxproj index edb7c200c..defb13876 100644 --- a/Flax.flaxproj +++ b/Flax.flaxproj @@ -4,7 +4,7 @@ "Major": 1, "Minor": 9, "Revision": 0, - "Build": 6602 + "Build": 6603 }, "Company": "Flax", "Copyright": "Copyright (c) 2012-2024 Wojciech Figat. All rights reserved.", diff --git a/Source/Engine/Graphics/Shaders/Cache/ShaderCacheManager.cpp b/Source/Engine/Graphics/Shaders/Cache/ShaderCacheManager.cpp index 2af300f06..d882e5db7 100644 --- a/Source/Engine/Graphics/Shaders/Cache/ShaderCacheManager.cpp +++ b/Source/Engine/Graphics/Shaders/Cache/ShaderCacheManager.cpp @@ -7,6 +7,9 @@ #include "Engine/Core/Log.h" #include "Engine/Engine/EngineService.h" #include "Engine/Engine/Globals.h" +#if USE_EDITOR +#include "Engine/Engine/CommandLine.h" +#endif #include "Engine/Graphics/Shaders/GPUShader.h" #include "Engine/Graphics/Materials/MaterialShader.h" #include "Engine/Particles/Graph/GPU/ParticleEmitterGraph.GPU.h" @@ -184,9 +187,15 @@ bool ShaderCacheManagerService::Init() int32 ShaderCacheVersion = -1; int32 MaterialGraphVersion = -1; int32 ParticleGraphVersion = -1; + bool ShaderDebug; }; CacheVersion cacheVersion; const String cacheVerFile = rootDir / TEXT("CacheVersion"); +#if USE_EDITOR + const bool shaderDebug = CommandLine::Options.ShaderDebug; +#else + const bool shaderDebug = false; +#endif if (FileSystem::FileExists(cacheVerFile)) { if (File::ReadAllBytes(cacheVerFile, (byte*)&cacheVersion, sizeof(cacheVersion))) @@ -199,6 +208,7 @@ bool ShaderCacheManagerService::Init() || cacheVersion.ShaderCacheVersion != GPU_SHADER_CACHE_VERSION || cacheVersion.MaterialGraphVersion != MATERIAL_GRAPH_VERSION || cacheVersion.ParticleGraphVersion != PARTICLE_GPU_GRAPH_VERSION + || cacheVersion.ShaderDebug != shaderDebug ) { LOG(Warning, "Shaders cache database is invalid. Performing reset."); @@ -216,6 +226,7 @@ bool ShaderCacheManagerService::Init() cacheVersion.ShaderCacheVersion = GPU_SHADER_CACHE_VERSION; cacheVersion.MaterialGraphVersion = MATERIAL_GRAPH_VERSION; cacheVersion.ParticleGraphVersion = PARTICLE_GPU_GRAPH_VERSION; + cacheVersion.ShaderDebug = shaderDebug; if (File::WriteAllBytes(cacheVerFile, (byte*)&cacheVersion, sizeof(cacheVersion))) { LOG(Error, "Failed to create the shaders cache database version file."); From b3d77ab9eb75b7100d6ec00608e08f4b08bcfa0d Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 3 Jul 2024 13:22:19 +0200 Subject: [PATCH 205/292] Add shaders profiling console command --- Source/Engine/Engine/CommandLine.cpp | 1 + Source/Engine/Engine/CommandLine.h | 5 +++++ Source/Engine/Graphics/Shaders/Cache/ShaderAssetBase.cpp | 4 ++++ Source/Engine/Graphics/Shaders/Cache/ShaderCacheManager.cpp | 4 ++++ 4 files changed, 14 insertions(+) diff --git a/Source/Engine/Engine/CommandLine.cpp b/Source/Engine/Engine/CommandLine.cpp index c1d1a8540..744060288 100644 --- a/Source/Engine/Engine/CommandLine.cpp +++ b/Source/Engine/Engine/CommandLine.cpp @@ -156,6 +156,7 @@ bool CommandLine::Parse(const Char* cmdLine) PARSE_ARG_SWITCH("-build ", Build); PARSE_BOOL_SWITCH("-skipcompile ", SkipCompile); PARSE_BOOL_SWITCH("-shaderdebug ", ShaderDebug); + PARSE_BOOL_SWITCH("-shaderprofile ", ShaderProfile); PARSE_ARG_OPT_SWITCH("-play ", Play); #endif diff --git a/Source/Engine/Engine/CommandLine.h b/Source/Engine/Engine/CommandLine.h index 039349770..aac3000e9 100644 --- a/Source/Engine/Engine/CommandLine.h +++ b/Source/Engine/Engine/CommandLine.h @@ -169,6 +169,11 @@ public: /// Nullable ShaderDebug; + /// + /// -shaderprofile (enables debugging data generation for shaders but leaves shader compiler optimizations active for performance profiling) + /// + Nullable ShaderProfile; + /// /// -play !guid! ( Scene to play, can be empty to use default ) /// diff --git a/Source/Engine/Graphics/Shaders/Cache/ShaderAssetBase.cpp b/Source/Engine/Graphics/Shaders/Cache/ShaderAssetBase.cpp index 8a88e2d25..6f91ff4be 100644 --- a/Source/Engine/Graphics/Shaders/Cache/ShaderAssetBase.cpp +++ b/Source/Engine/Graphics/Shaders/Cache/ShaderAssetBase.cpp @@ -254,6 +254,10 @@ bool ShaderAssetBase::LoadShaderCache(ShaderCacheResult& result) options.GenerateDebugData = true; options.NoOptimize = true; } + else if (CommandLine::Options.ShaderProfile) + { + options.GenerateDebugData = true; + } auto& platformDefine = options.Macros.AddOne(); #if PLATFORM_WINDOWS platformDefine.Name = "PLATFORM_WINDOWS"; diff --git a/Source/Engine/Graphics/Shaders/Cache/ShaderCacheManager.cpp b/Source/Engine/Graphics/Shaders/Cache/ShaderCacheManager.cpp index d882e5db7..b8adce351 100644 --- a/Source/Engine/Graphics/Shaders/Cache/ShaderCacheManager.cpp +++ b/Source/Engine/Graphics/Shaders/Cache/ShaderCacheManager.cpp @@ -188,11 +188,13 @@ bool ShaderCacheManagerService::Init() int32 MaterialGraphVersion = -1; int32 ParticleGraphVersion = -1; bool ShaderDebug; + bool ShaderProfile; }; CacheVersion cacheVersion; const String cacheVerFile = rootDir / TEXT("CacheVersion"); #if USE_EDITOR const bool shaderDebug = CommandLine::Options.ShaderDebug; + const bool shaderProfile = CommandLine::Options.ShaderProfile; #else const bool shaderDebug = false; #endif @@ -209,6 +211,7 @@ bool ShaderCacheManagerService::Init() || cacheVersion.MaterialGraphVersion != MATERIAL_GRAPH_VERSION || cacheVersion.ParticleGraphVersion != PARTICLE_GPU_GRAPH_VERSION || cacheVersion.ShaderDebug != shaderDebug + || cacheVersion.ShaderProfile != shaderProfile ) { LOG(Warning, "Shaders cache database is invalid. Performing reset."); @@ -227,6 +230,7 @@ bool ShaderCacheManagerService::Init() cacheVersion.MaterialGraphVersion = MATERIAL_GRAPH_VERSION; cacheVersion.ParticleGraphVersion = PARTICLE_GPU_GRAPH_VERSION; cacheVersion.ShaderDebug = shaderDebug; + cacheVersion.ShaderProfile = shaderProfile; if (File::WriteAllBytes(cacheVerFile, (byte*)&cacheVersion, sizeof(cacheVersion))) { LOG(Error, "Failed to create the shaders cache database version file."); From b8100e941769ee215bd2d355e9ca4c24fc24e51d Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 3 Jul 2024 18:29:42 +0200 Subject: [PATCH 206/292] Fix missing GPU events when using shaders profiling/debugging or graphics dev tools are enabled --- Source/Engine/Graphics/Graphics.cpp | 8 +++++--- Source/Engine/Profiler/ProfilerGPU.cpp | 16 +++++++++------- Source/Engine/Profiler/ProfilerGPU.h | 7 ++++++- Source/Engine/Profiler/ProfilingTools.cpp | 1 + Source/Engine/ShadowsOfMordor/Builder.Jobs.cpp | 2 ++ 5 files changed, 23 insertions(+), 11 deletions(-) diff --git a/Source/Engine/Graphics/Graphics.cpp b/Source/Engine/Graphics/Graphics.cpp index 9535f3ecf..057cc0229 100644 --- a/Source/Engine/Graphics/Graphics.cpp +++ b/Source/Engine/Graphics/Graphics.cpp @@ -188,11 +188,13 @@ bool GraphicsService::Init() ); // Initialize - if (device->IsDebugToolAttached) + if (device->IsDebugToolAttached || + CommandLine::Options.ShaderProfile || + CommandLine::Options.ShaderDebug) { #if COMPILE_WITH_PROFILER - // Auto-enable GPU profiler - ProfilerGPU::Enabled = true; + // Auto-enable GPU events + ProfilerGPU::EventsEnabled = true; #endif } if (device->LoadContent()) diff --git a/Source/Engine/Profiler/ProfilerGPU.cpp b/Source/Engine/Profiler/ProfilerGPU.cpp index 1f5777fd2..6235ed669 100644 --- a/Source/Engine/Profiler/ProfilerGPU.cpp +++ b/Source/Engine/Profiler/ProfilerGPU.cpp @@ -15,6 +15,7 @@ int32 ProfilerGPU::_depth = 0; Array ProfilerGPU::_timerQueriesPool; Array ProfilerGPU::_timerQueriesFree; bool ProfilerGPU::Enabled = false; +bool ProfilerGPU::EventsEnabled = false; int32 ProfilerGPU::CurrentBuffer = 0; ProfilerGPU::EventBuffer ProfilerGPU::Buffers[PROFILER_GPU_EVENTS_FRAMES]; @@ -95,11 +96,12 @@ GPUTimerQuery* ProfilerGPU::GetTimerQuery() int32 ProfilerGPU::BeginEvent(const Char* name) { +#if GPU_ALLOW_PROFILE_EVENTS + if (EventsEnabled) + GPUDevice::Instance->GetMainContext()->EventBegin(name); +#endif if (!Enabled) return -1; -#if GPU_ALLOW_PROFILE_EVENTS - GPUDevice::Instance->GetMainContext()->EventBegin(name); -#endif Event e; e.Name = name; @@ -115,6 +117,10 @@ int32 ProfilerGPU::BeginEvent(const Char* name) void ProfilerGPU::EndEvent(int32 index) { +#if GPU_ALLOW_PROFILE_EVENTS + if (EventsEnabled) + GPUDevice::Instance->GetMainContext()->EventEnd(); +#endif if (index == -1) return; _depth--; @@ -123,10 +129,6 @@ void ProfilerGPU::EndEvent(int32 index) auto e = buffer.Get(index); e->Stats.Mix(RenderStatsData::Counter); e->Timer->End(); - -#if GPU_ALLOW_PROFILE_EVENTS - GPUDevice::Instance->GetMainContext()->EventEnd(); -#endif } void ProfilerGPU::BeginFrame() diff --git a/Source/Engine/Profiler/ProfilerGPU.h b/Source/Engine/Profiler/ProfilerGPU.h index 99fce3599..c316abb15 100644 --- a/Source/Engine/Profiler/ProfilerGPU.h +++ b/Source/Engine/Profiler/ProfilerGPU.h @@ -134,13 +134,18 @@ public: /// API_FIELD() static bool Enabled; + /// + /// True if GPU events are enabled (see GPUContext::EventBegin), otherwise false. Cannot be changed during rendering. + /// + API_FIELD() static bool EventsEnabled; + /// /// The current frame buffer to collect events. /// static int32 CurrentBuffer; /// - /// The events buffers (one per frame). + /// The event buffers (one per frame). /// static EventBuffer Buffers[PROFILER_GPU_EVENTS_FRAMES]; diff --git a/Source/Engine/Profiler/ProfilingTools.cpp b/Source/Engine/Profiler/ProfilingTools.cpp index 58f1c1ffc..8b39cba02 100644 --- a/Source/Engine/Profiler/ProfilingTools.cpp +++ b/Source/Engine/Profiler/ProfilingTools.cpp @@ -224,6 +224,7 @@ void ProfilingTools::SetEnabled(bool enabled) { ProfilerCPU::Enabled = enabled; ProfilerGPU::Enabled = enabled; + ProfilerGPU::EventsEnabled = enabled; NetworkInternal::EnableProfiling = enabled; } diff --git a/Source/Engine/ShadowsOfMordor/Builder.Jobs.cpp b/Source/Engine/ShadowsOfMordor/Builder.Jobs.cpp index c6b0cd056..4220c0be8 100644 --- a/Source/Engine/ShadowsOfMordor/Builder.Jobs.cpp +++ b/Source/Engine/ShadowsOfMordor/Builder.Jobs.cpp @@ -344,6 +344,7 @@ void ShadowsOfMordor::Builder::onJobRender(GPUContext* context) #if COMPILE_WITH_PROFILER auto gpuProfilerEnabled = ProfilerGPU::Enabled; ProfilerGPU::Enabled = false; + ProfilerGPU::EventsEnabled = false; #endif // Render hemispheres @@ -432,6 +433,7 @@ void ShadowsOfMordor::Builder::onJobRender(GPUContext* context) } #if COMPILE_WITH_PROFILER ProfilerGPU::Enabled = gpuProfilerEnabled; + ProfilerGPU::EventsEnabled = gpuProfilerEnabled; #endif // Report progress From 230c57cca4825d234d5d3d5f60a694855a95b563 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 3 Jul 2024 18:30:11 +0200 Subject: [PATCH 207/292] Add `WinPixEventRuntime` for D3D12 to provide GPU profiler event names --- .../GraphicsDevice/DirectX/DX12/GraphicsDeviceDX12.Build.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GraphicsDeviceDX12.Build.cs b/Source/Engine/GraphicsDevice/DirectX/DX12/GraphicsDeviceDX12.Build.cs index b8dda9068..d9cba37ec 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GraphicsDeviceDX12.Build.cs +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GraphicsDeviceDX12.Build.cs @@ -12,7 +12,7 @@ public class GraphicsDeviceDX12 : GraphicsDeviceBaseModule /// /// Enables using PIX events to instrument game labeling regions of CPU or GPU work and marking important occurrences. /// - public bool UseWinPixEventRuntime = false; + public static bool EnableWinPixEventRuntime = true; /// public override void Setup(BuildOptions options) @@ -34,7 +34,7 @@ public class GraphicsDeviceDX12 : GraphicsDeviceBaseModule break; } - if (UseWinPixEventRuntime) + if (EnableWinPixEventRuntime && options.Configuration != TargetConfiguration.Release) { options.PrivateDefinitions.Add("USE_PIX"); options.PrivateIncludePaths.Add(Path.Combine(Globals.EngineRoot, "Source/ThirdParty/WinPixEventRuntime")); From bee39dda584044529b641bd792b776deb9fef1ae Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 6 Jul 2024 12:57:19 +0200 Subject: [PATCH 208/292] Fix commandline options in build --- Source/Engine/Engine/CommandLine.cpp | 7 +++---- Source/Engine/Engine/CommandLine.h | 12 ++++++------ Source/Engine/Graphics/Graphics.cpp | 11 ++++++++--- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/Source/Engine/Engine/CommandLine.cpp b/Source/Engine/Engine/CommandLine.cpp index 744060288..e2de230ef 100644 --- a/Source/Engine/Engine/CommandLine.cpp +++ b/Source/Engine/Engine/CommandLine.cpp @@ -145,9 +145,7 @@ bool CommandLine::Parse(const Char* cmdLine) PARSE_BOOL_SWITCH("-monolog ", MonoLog); PARSE_BOOL_SWITCH("-mute ", Mute); PARSE_BOOL_SWITCH("-lowdpi ", LowDPI); - #if USE_EDITOR - PARSE_BOOL_SWITCH("-clearcache ", ClearCache); PARSE_BOOL_SWITCH("-clearcooker ", ClearCookerCache); PARSE_ARG_SWITCH("-project ", Project); @@ -156,9 +154,10 @@ bool CommandLine::Parse(const Char* cmdLine) PARSE_ARG_SWITCH("-build ", Build); PARSE_BOOL_SWITCH("-skipcompile ", SkipCompile); PARSE_BOOL_SWITCH("-shaderdebug ", ShaderDebug); - PARSE_BOOL_SWITCH("-shaderprofile ", ShaderProfile); PARSE_ARG_OPT_SWITCH("-play ", Play); - +#endif +#if USE_EDITOR || !BUILD_RELEASE + PARSE_BOOL_SWITCH("-shaderprofile ", ShaderProfile); #endif return false; diff --git a/Source/Engine/Engine/CommandLine.h b/Source/Engine/Engine/CommandLine.h index aac3000e9..347e9bebd 100644 --- a/Source/Engine/Engine/CommandLine.h +++ b/Source/Engine/Engine/CommandLine.h @@ -128,7 +128,6 @@ public: Nullable LowDPI; #if USE_EDITOR - /// /// -project !path! (Startup project path) /// @@ -169,16 +168,17 @@ public: /// Nullable ShaderDebug; - /// - /// -shaderprofile (enables debugging data generation for shaders but leaves shader compiler optimizations active for performance profiling) - /// - Nullable ShaderProfile; - /// /// -play !guid! ( Scene to play, can be empty to use default ) /// Nullable Play; +#endif +#if USE_EDITOR || !BUILD_RELEASE + /// + /// -shaderprofile (enables debugging data generation for shaders but leaves shader compiler optimizations active for performance profiling) + /// + Nullable ShaderProfile; #endif }; diff --git a/Source/Engine/Graphics/Graphics.cpp b/Source/Engine/Graphics/Graphics.cpp index 057cc0229..020d0ad0e 100644 --- a/Source/Engine/Graphics/Graphics.cpp +++ b/Source/Engine/Graphics/Graphics.cpp @@ -188,9 +188,14 @@ bool GraphicsService::Init() ); // Initialize - if (device->IsDebugToolAttached || - CommandLine::Options.ShaderProfile || - CommandLine::Options.ShaderDebug) + if (device->IsDebugToolAttached +#if USE_EDITOR || !BUILD_RELEASE + || CommandLine::Options.ShaderProfile +#endif +#if USE_EDITOR + || CommandLine::Options.ShaderDebug +#endif + ) { #if COMPILE_WITH_PROFILER // Auto-enable GPU events From ae2b3d361e0012b9b28a01ccc4abb9b7d314cb39 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 8 Jul 2024 12:56:31 +0200 Subject: [PATCH 209/292] Fix shader warning --- Source/Shaders/GI/DDGI.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Shaders/GI/DDGI.hlsl b/Source/Shaders/GI/DDGI.hlsl index d2649f805..03979035b 100644 --- a/Source/Shaders/GI/DDGI.hlsl +++ b/Source/Shaders/GI/DDGI.hlsl @@ -154,7 +154,7 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesData, T biasedWorldPosition = worldPosition + surfaceBias; // Calculate cascade blending weight (use input bias to smooth transition) - float cascadeBlendSmooth = frac(max(distance(data.ViewPos, worldPosition) - probesExtent, 0) / probesSpacing) * 0.1f; + float cascadeBlendSmooth = frac(max(distance(data.ViewPos, worldPosition) - probesExtent.x, 0) / probesSpacing) * 0.1f; float3 cascadeBlendPoint = worldPosition - probesOrigin - cascadeBlendSmooth * probesSpacing; float fadeDistance = probesSpacing * DDGI_CASCADE_BLEND_SIZE; float cascadeWeight = saturate(Min3(probesExtent - abs(cascadeBlendPoint)) / fadeDistance); From 082a5819cdb6bd99e48327508c40c13b4ec64833 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 8 Jul 2024 16:40:59 +0200 Subject: [PATCH 210/292] Optimize GlobalSurfaceAtlasPass by ignoring GBuffer2 properties (diffuse-only surfaces) --- .../Renderer/GI/GlobalSurfaceAtlasPass.cpp | 21 +++++-------------- .../Renderer/GI/GlobalSurfaceAtlasPass.h | 3 +-- Source/Shaders/GI/GlobalSurfaceAtlas.shader | 15 ++++++------- 3 files changed, 14 insertions(+), 25 deletions(-) diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp index 421baf7b1..dd9f3e31f 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp @@ -141,7 +141,6 @@ public: GPUTexture* AtlasEmissive = nullptr; GPUTexture* AtlasGBuffer0 = nullptr; GPUTexture* AtlasGBuffer1 = nullptr; - GPUTexture* AtlasGBuffer2 = nullptr; GPUTexture* AtlasLighting = nullptr; GPUBuffer* ChunksBuffer = nullptr; GPUBuffer* CulledObjectsBuffer = nullptr; @@ -195,7 +194,6 @@ public: RenderTargetPool::Release(AtlasEmissive); RenderTargetPool::Release(AtlasGBuffer0); RenderTargetPool::Release(AtlasGBuffer1); - RenderTargetPool::Release(AtlasGBuffer2); RenderTargetPool::Release(AtlasLighting); ClearObjects(); } @@ -719,7 +717,6 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co INIT_ATLAS_TEXTURE(AtlasEmissive, PixelFormat::R11G11B10_Float); INIT_ATLAS_TEXTURE(AtlasGBuffer0, GBUFFER0_FORMAT); INIT_ATLAS_TEXTURE(AtlasGBuffer1, GBUFFER1_FORMAT); - INIT_ATLAS_TEXTURE(AtlasGBuffer2, GBUFFER2_FORMAT); INIT_ATLAS_TEXTURE(AtlasLighting, PixelFormat::R11G11B10_Float); desc.Flags = GPUTextureFlags::DepthStencil | GPUTextureFlags::ShaderResource; INIT_ATLAS_TEXTURE(AtlasDepth, PixelFormat::D16_UNorm); @@ -792,12 +789,11 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co renderContextTiles.View.Prepare(renderContextTiles); GPUTextureView* depthBuffer = surfaceAtlasData.AtlasDepth->View(); - GPUTextureView* targetBuffers[4] = + GPUTextureView* targetBuffers[3] = { surfaceAtlasData.AtlasEmissive->View(), surfaceAtlasData.AtlasGBuffer0->View(), surfaceAtlasData.AtlasGBuffer1->View(), - surfaceAtlasData.AtlasGBuffer2->View(), }; context->SetRenderTarget(depthBuffer, ToSpan(targetBuffers, ARRAY_COUNT(targetBuffers))); { @@ -809,7 +805,6 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co context->Clear(targetBuffers[0], Color::Transparent); context->Clear(targetBuffers[1], Color::Transparent); context->Clear(targetBuffers[2], Color::Transparent); - context->Clear(targetBuffers[3], Color(1, 0, 0, 0)); } else { @@ -1060,8 +1055,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co result.Atlas[0] = surfaceAtlasData.AtlasDepth; result.Atlas[1] = surfaceAtlasData.AtlasGBuffer0; result.Atlas[2] = surfaceAtlasData.AtlasGBuffer1; - result.Atlas[3] = surfaceAtlasData.AtlasGBuffer2; - result.Atlas[4] = surfaceAtlasData.AtlasLighting; + result.Atlas[3] = surfaceAtlasData.AtlasLighting; result.Chunks = surfaceAtlasData.ChunksBuffer; result.CulledObjects = surfaceAtlasData.CulledObjectsBuffer; result.Objects = surfaceAtlasData.ObjectsBuffer.GetBuffer(); @@ -1075,7 +1069,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co context->SetRenderTarget(surfaceAtlasData.AtlasLighting->View()); context->BindSR(0, surfaceAtlasData.AtlasGBuffer0->View()); context->BindSR(1, surfaceAtlasData.AtlasGBuffer1->View()); - context->BindSR(2, surfaceAtlasData.AtlasGBuffer2->View()); + context->UnBindSR(2); context->BindSR(3, surfaceAtlasData.AtlasDepth->View()); context->BindSR(4, surfaceAtlasData.ObjectsBuffer.GetBuffer()->View()); context->BindSR(5, bindingDataSDF.Texture ? bindingDataSDF.Texture->ViewVolume() : nullptr); @@ -1407,7 +1401,7 @@ void GlobalSurfaceAtlasPass::RenderDebug(RenderContext& renderContext, GPUContex // Full screen - direct light context->BindSR(5, bindingData.AtlasLighting->View()); context->SetViewport(outputSize.X, outputSize.Y); - context->SetScissor(Rectangle(0, 0, outputSizeTwoThird.X, outputSize.Y)); + context->SetScissor(Rectangle(0, 0, outputSize.X, outputSize.Y)); context->DrawFullscreenTriangle(); // Color Grading and Post-Processing to improve readability in bright/dark scenes @@ -1440,14 +1434,9 @@ void GlobalSurfaceAtlasPass::RenderDebug(RenderContext& renderContext, GPUContex context->SetViewportAndScissors(Viewport(outputSizeTwoThird.X, 0, outputSizeThird.X, outputSizeThird.Y)); context->DrawFullscreenTriangle(); - // Bottom middle - normals + // Bottom right - normals context->SetState(_psDebug0); context->BindSR(5, bindingData.AtlasGBuffer1->View()); - context->SetViewportAndScissors(Viewport(outputSizeTwoThird.X, outputSizeThird.Y, outputSizeThird.X, outputSizeThird.Y)); - context->DrawFullscreenTriangle(); - - // Bottom right - roughness/metalness/ao - context->BindSR(5, bindingData.AtlasGBuffer2->View()); context->SetViewportAndScissors(Viewport(outputSizeTwoThird.X, outputSizeTwoThird.Y, outputSizeThird.X, outputSizeThird.Y)); context->DrawFullscreenTriangle(); } diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.h b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.h index 89ce6875c..ea2bc7863 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.h +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.h @@ -30,10 +30,9 @@ public: GPUTexture* AtlasDepth; GPUTexture* AtlasGBuffer0; GPUTexture* AtlasGBuffer1; - GPUTexture* AtlasGBuffer2; GPUTexture* AtlasLighting; }; - GPUTexture* Atlas[5]; + GPUTexture* Atlas[4]; }; GPUBuffer* Chunks; GPUBuffer* CulledObjects; diff --git a/Source/Shaders/GI/GlobalSurfaceAtlas.shader b/Source/Shaders/GI/GlobalSurfaceAtlas.shader index 23ebddda8..a81383dcf 100644 --- a/Source/Shaders/GI/GlobalSurfaceAtlas.shader +++ b/Source/Shaders/GI/GlobalSurfaceAtlas.shader @@ -107,8 +107,14 @@ float4 PS_Lighting(AtlasVertexOutput input) : SV_Target float2 atlasUV = input.TileUV * tile.AtlasRectUV.zw + tile.AtlasRectUV.xy; // Load GBuffer sample from atlas - GBufferData gBufferData = (GBufferData)0; - GBufferSample gBuffer = SampleGBuffer(gBufferData, atlasUV); + float4 gBuffer0 = SAMPLE_RT(GBuffer0, atlasUV); + float4 gBuffer1 = SAMPLE_RT(GBuffer1, atlasUV); + GBufferSample gBuffer = (GBufferSample)0; + gBuffer.Normal = DecodeNormal(gBuffer1.rgb); + gBuffer.ShadingModel = (int)(gBuffer1.a * 3.999); + gBuffer.Color = gBuffer0.rgb; + gBuffer.Roughness = 1.0f; + gBuffer.AO = gBuffer0.a; BRANCH if (gBuffer.ShadingModel == SHADING_MODEL_UNLIT) { @@ -119,11 +125,6 @@ float4 PS_Lighting(AtlasVertexOutput input) : SV_Target // Reconstruct world-space position manually (from uv+depth within a tile) float tileDepth = SampleZ(atlasUV); - //float tileNear = -GLOBAL_SURFACE_ATLAS_TILE_PROJ_PLANE_OFFSET; - //float tileFar = tile.ViewBoundsSize.z + 2 * GLOBAL_SURFACE_ATLAS_TILE_PROJ_PLANE_OFFSET; - //gBufferData.ViewInfo.zw = float2(tileFar / (tileFar - tileNear), (-tileFar * tileNear) / (tileFar - tileNear) / tileFar); - //gBufferData.ViewInfo.zw = float2(1, 0); - //float tileLinearDepth = LinearizeZ(gBufferData, tileDepth); float3 tileSpacePos = float3(input.TileUV.x - 0.5f, 0.5f - input.TileUV.y, tileDepth); float3 gBufferTilePos = tileSpacePos * tile.ViewBoundsSize; float4x4 tileLocalToWorld = Inverse(tile.WorldToLocal); From d5dd8e7ecf5b7c4e935cdd94389fa215e8ce98ed Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 8 Jul 2024 16:43:26 +0200 Subject: [PATCH 211/292] Optimize GlobalSurfaceAtlasPass tiles by sorting by size before inserting them into atlas --- Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp index dd9f3e31f..7386fb077 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp @@ -8,6 +8,7 @@ #include "../ShadowsPass.h" #include "Engine/Core/Math/Matrix3x3.h" #include "Engine/Core/Math/OrientedBoundingBox.h" +#include "Engine/Core/Collections/Sorting.h" #include "Engine/Core/Config/GraphicsSettings.h" #include "Engine/Engine/Engine.h" #include "Engine/Engine/Units.h" @@ -81,6 +82,11 @@ struct GlobalSurfaceAtlasNewTile void* ActorObject; uint16 TileIndex; uint16 TileResolution; + + bool operator<(const GlobalSurfaceAtlasNewTile& other) const + { + return TileResolution > other.TileResolution; + } }; struct GlobalSurfaceAtlasTile : RectPackNode @@ -350,6 +356,11 @@ public: } AsyncNewObjects.Clear(); + // Sort tiles by size to reduce atlas fragmentation issue by inserting larger tiles first + Array sortingTiles; + sortingTiles.Resize(AsyncNewTiles.Count()); + Sorting::MergeSort(AsyncNewTiles.Get(), AsyncNewTiles.Count(), sortingTiles.Get()); + for (auto& newTile : AsyncNewTiles) { auto& object = Objects[newTile.ActorObject]; From ffddbb455f36c65af0bd3a7f9662fd636265340f Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 9 Jul 2024 11:57:41 +0200 Subject: [PATCH 212/292] Add Scripting events to C++ api similar to C# ones --- Source/Engine/Scripting/Scripting.cpp | 10 +++++++-- Source/Engine/Scripting/Scripting.h | 30 +++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Scripting/Scripting.cpp b/Source/Engine/Scripting/Scripting.cpp index 67806a2ee..e5e5d1543 100644 --- a/Source/Engine/Scripting/Scripting.cpp +++ b/Source/Engine/Scripting/Scripting.cpp @@ -156,6 +156,12 @@ Action Scripting::ScriptsLoaded; Action Scripting::ScriptsUnload; Action Scripting::ScriptsReloading; Action Scripting::ScriptsReloaded; +Action Scripting::Update; +Action Scripting::LateUpdate; +Action Scripting::FixedUpdate; +Action Scripting::LateFixedUpdate; +Action Scripting::Draw; +Action Scripting::Exit; ThreadLocal Scripting::ObjectsLookupIdMapping; ScriptingService ScriptingServiceInstance; @@ -205,9 +211,9 @@ bool ScriptingService::Init() } #if COMPILE_WITHOUT_CSHARP -#define INVOKE_EVENT(name) +#define INVOKE_EVENT(name) Scripting::name(); #else -#define INVOKE_EVENT(name) \ +#define INVOKE_EVENT(name) Scripting::name(); \ if (!_isEngineAssemblyLoaded) return; \ if (_method_##name == nullptr) \ { \ diff --git a/Source/Engine/Scripting/Scripting.h b/Source/Engine/Scripting/Scripting.h index dad5a02fb..1a616ed79 100644 --- a/Source/Engine/Scripting/Scripting.h +++ b/Source/Engine/Scripting/Scripting.h @@ -44,6 +44,36 @@ public: /// static Delegate<> ScriptsReloaded; +public: + /// + /// Occurs on scripting update. + /// + static Delegate<> Update; + + /// + /// Occurs on scripting late update. + /// + static Delegate<> LateUpdate; + + /// + /// Occurs on scripting fixed update. + /// + static Delegate<> FixedUpdate; + + /// + /// Occurs on scripting late fixed update. + /// + static Delegate<> LateFixedUpdate; + + /// + /// Occurs on scripting draw update. Called during frame rendering and can be used to invoke custom rendering with GPUDevice. + /// + static Delegate<> Draw; + + /// + /// Occurs when scripting engine is disposing. Engine is during closing and some services may be unavailable (eg. loading scenes). This may be called after the engine fatal error event. + /// + static Delegate<> Exit; public: /// From 70912e1d564d91ab742ba29b9c1e0a61483f3630 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 9 Jul 2024 11:57:59 +0200 Subject: [PATCH 213/292] Add support for using InputAction editor on string variable --- .../CustomEditors/Editors/InputEditor.cs | 32 +++++++++++++++++-- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/Source/Editor/CustomEditors/Editors/InputEditor.cs b/Source/Editor/CustomEditors/Editors/InputEditor.cs index 5662aea0c..f73fe19f8 100644 --- a/Source/Editor/CustomEditors/Editors/InputEditor.cs +++ b/Source/Editor/CustomEditors/Editors/InputEditor.cs @@ -33,11 +33,25 @@ namespace FlaxEditor.CustomEditors.Editors names.Add(mapping.Name); } _comboBox.Items = names; - if (Values[0] is InputEvent inputEvent && names.Contains(inputEvent.Name)) + var prev = GetValue(); + if (prev is InputEvent inputEvent && names.Contains(inputEvent.Name)) _comboBox.SelectedItem = inputEvent.Name; + else if (prev is string name && names.Contains(name)) + _comboBox.SelectedItem = name; _comboBox.SelectedIndexChanged += OnSelectedIndexChanged; } + private object GetValue() + { + if (Values[0] is InputEvent inputEvent) + return inputEvent; + if (Values[0] is string str) + return str; + if (Values.Type.Type == typeof(string)) + return string.Empty; + return null; + } + private void OnSetupContextMenu(PropertyNameLabel label, ContextMenu menu, CustomEditor linkedEditor) { var button = menu.AddButton("Set to null"); @@ -46,7 +60,16 @@ namespace FlaxEditor.CustomEditors.Editors private void OnSelectedIndexChanged(ComboBox comboBox) { - SetValue(comboBox.SelectedItem == null ? null : new InputEvent(comboBox.SelectedItem)); + object value = null; + if (comboBox.SelectedItem != null) + { + var prev = GetValue(); + if (prev is InputEvent) + value = new InputEvent(comboBox.SelectedItem); + else if (prev is string) + value = comboBox.SelectedItem; + } + SetValue(value); } /// @@ -59,8 +82,11 @@ namespace FlaxEditor.CustomEditors.Editors } else { - if (Values[0] is InputEvent inputEvent && _comboBox.Items.Contains(inputEvent.Name)) + var prev = GetValue(); + if (prev is InputEvent inputEvent && _comboBox.Items.Contains(inputEvent.Name)) _comboBox.SelectedItem = inputEvent.Name; + else if (prev is string name && _comboBox.Items.Contains(name)) + _comboBox.SelectedItem = name; else _comboBox.SelectedItem = null; } From 6b06f1dbcf6c40f1e313a0c6718566692a9407e9 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 10 Jul 2024 13:22:25 +0200 Subject: [PATCH 214/292] Optimize Global SDF to use 8-bit storage (50% less memory usage) --- .../Graphics/Materials/MaterialShader.h | 2 +- .../Renderer/GlobalSignDistanceFieldPass.cpp | 70 +++++---- .../Renderer/GlobalSignDistanceFieldPass.h | 2 + .../Engine/Visject/ShaderGraphUtilities.cpp | 2 +- Source/Shaders/GI/DDGI.shader | 8 +- Source/Shaders/GI/GlobalSurfaceAtlas.shader | 8 +- Source/Shaders/GlobalSignDistanceField.hlsl | 138 ++++++++++-------- Source/Shaders/GlobalSignDistanceField.shader | 37 +++-- Source/Shaders/SSR.shader | 4 +- 9 files changed, 151 insertions(+), 120 deletions(-) diff --git a/Source/Engine/Graphics/Materials/MaterialShader.h b/Source/Engine/Graphics/Materials/MaterialShader.h index 3bea7a2ec..4b3f296a5 100644 --- a/Source/Engine/Graphics/Materials/MaterialShader.h +++ b/Source/Engine/Graphics/Materials/MaterialShader.h @@ -10,7 +10,7 @@ /// /// Current materials shader version. /// -#define MATERIAL_GRAPH_VERSION 167 +#define MATERIAL_GRAPH_VERSION 168 class Material; class GPUShader; diff --git a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp index 8e176f849..cf2b26d44 100644 --- a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp +++ b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp @@ -21,7 +21,7 @@ #include "Engine/Threading/JobSystem.h" // Some of those constants must match in shader -#define GLOBAL_SDF_FORMAT PixelFormat::R16_Float +#define GLOBAL_SDF_FORMAT PixelFormat::R8_SNorm #define GLOBAL_SDF_RASTERIZE_MODEL_MAX_COUNT 28 // The maximum amount of models to rasterize at once as a batch into Global SDF. #define GLOBAL_SDF_RASTERIZE_HEIGHTFIELD_MAX_COUNT 2 // The maximum amount of heightfields to store in a single chunk. #define GLOBAL_SDF_RASTERIZE_GROUP_SIZE 8 @@ -71,10 +71,13 @@ GPU_CB_STRUCT(ModelsRasterizeData { int32 CascadeMipResolution; int32 CascadeMipFactor; uint32 Objects[GLOBAL_SDF_RASTERIZE_MODEL_MAX_COUNT]; - uint32 GenerateMipTexResolution; - uint32 GenerateMipCoordScale; - uint32 GenerateMipTexOffsetX; - uint32 GenerateMipMipOffsetX; + Float2 Padding10; + float MipMaxDistanceLoad; + float MipMaxDistanceStore; + uint32 MipTexResolution; + uint32 MipCoordScale; + uint32 MipTexOffsetX; + uint32 MipMipOffsetX; }); struct RasterizeChunk @@ -133,9 +136,11 @@ struct CascadeData bool Dirty; int32 Index; float ChunkSize; - float MaxDistance; + float MaxDistanceTex; + float MaxDistanceMip; Float3 Position; float VoxelSize; + float Extent; BoundingBox Bounds; BoundingBox CullingBounds; BoundingBox RasterizeBounds; @@ -315,14 +320,14 @@ public: cascade.Dirty = !useCache || RenderTools::ShouldUpdateCascade(FrameIndex, cascadeIndex, cascadesCount, maxCascadeUpdatesPerFrame, updateEveryFrame); if (!cascade.Dirty) continue; - const float cascadeDistance = distanceExtent * CascadesDistanceScales[cascadeIndex]; - const float cascadeMaxDistance = cascadeDistance * 2; - const float cascadeVoxelSize = cascadeMaxDistance / (float)resolution; + const float cascadeExtent = distanceExtent * CascadesDistanceScales[cascadeIndex]; + const float cascadeSize = cascadeExtent * 2; + const float cascadeVoxelSize = cascadeSize / (float)resolution; const float cascadeChunkSize = cascadeVoxelSize * GLOBAL_SDF_RASTERIZE_CHUNK_SIZE; static_assert(GLOBAL_SDF_RASTERIZE_CHUNK_SIZE % GLOBAL_SDF_RASTERIZE_MIP_FACTOR == 0, "Adjust chunk size to match the mip factor scale."); const Float3 center = Float3::Floor(viewPosition / cascadeChunkSize) * cascadeChunkSize; //const Float3 center = Float3::Zero; - BoundingBox cascadeBounds(center - cascadeDistance, center + cascadeDistance); + BoundingBox cascadeBounds(center - cascadeExtent, center + cascadeExtent); // Clear cascade before rasterization cascade.Chunks.Clear(); @@ -342,8 +347,12 @@ public: // Setup cascade info cascade.Position = center; cascade.VoxelSize = cascadeVoxelSize; - cascade.ChunkSize = cascadeVoxelSize * GLOBAL_SDF_RASTERIZE_CHUNK_SIZE; - cascade.MaxDistance = cascadeMaxDistance; + cascade.Extent = cascadeExtent; + cascade.ChunkSize = cascadeChunkSize; + cascade.MaxDistanceTex = cascadeChunkSize * 1.5f; // Encodes SDF distance to [-maxDst; +maxDst] to be packed as normalized value, limits the max SDF trace step distance + cascade.MaxDistanceMip = cascade.MaxDistanceTex * 2.0f; // Encode mip distance with less but covers larger area for faster jumps during tracing + cascade.MaxDistanceTex = Math::Min(cascade.MaxDistanceTex, cascadeSize); + cascade.MaxDistanceMip = Math::Min(cascade.MaxDistanceMip, cascadeSize); cascade.Bounds = cascadeBounds; cascade.RasterizeBounds = cascadeBounds; cascade.RasterizeBounds.Minimum += 0.1f; // Adjust to prevent overflowing chunk keys (cascade bounds are used for clamping object bounds) @@ -814,7 +823,7 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex ModelsRasterizeData data; data.CascadeCoordToPosMul = (Float3)cascade.Bounds.GetSize() / (float)resolution; data.CascadeCoordToPosAdd = (Float3)cascade.Bounds.Minimum + cascade.VoxelSize * 0.5f; - data.MaxDistance = cascade.MaxDistance; + data.MaxDistance = cascade.MaxDistanceTex; data.CascadeResolution = resolution; data.CascadeMipResolution = resolutionMip; data.CascadeIndex = cascadeIndex; @@ -986,17 +995,20 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex GPUTextureView* tmpMipView = tmpMip->ViewVolume(); // Tex -> Mip - data.GenerateMipTexResolution = data.CascadeResolution; - data.GenerateMipCoordScale = data.CascadeMipFactor; - data.GenerateMipTexOffsetX = data.CascadeIndex * data.CascadeResolution; - data.GenerateMipMipOffsetX = data.CascadeIndex * data.CascadeMipResolution; + data.MipMaxDistanceLoad = cascade.MaxDistanceTex; // Decode tex distance within chunk (more precision, for detailed tracing nearby geometry) + data.MipMaxDistanceStore = cascade.MaxDistanceMip; // Encode mip distance within whole volume (less precision, for fast jumps over empty spaces) + data.MipTexResolution = data.CascadeResolution; + data.MipCoordScale = data.CascadeMipFactor; + data.MipTexOffsetX = data.CascadeIndex * data.CascadeResolution; + data.MipMipOffsetX = data.CascadeIndex * data.CascadeMipResolution; context->UpdateCB(_cb1, &data); context->BindSR(0, textureView); context->BindUA(0, textureMipView); context->Dispatch(_csGenerateMip, mipDispatchGroups, mipDispatchGroups, mipDispatchGroups); - data.GenerateMipTexResolution = data.CascadeMipResolution; - data.GenerateMipCoordScale = 1; + data.MipTexResolution = data.CascadeMipResolution; + data.MipCoordScale = 1; + data.MipMaxDistanceLoad = data.MipMaxDistanceStore; for (int32 i = 1; i < floodFillIterations; i++) { context->ResetUA(); @@ -1005,16 +1017,16 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex // Mip -> Tmp context->BindSR(0, textureMipView); context->BindUA(0, tmpMipView); - data.GenerateMipTexOffsetX = data.CascadeIndex * data.CascadeMipResolution; - data.GenerateMipMipOffsetX = 0; + data.MipTexOffsetX = data.CascadeIndex * data.CascadeMipResolution; + data.MipMipOffsetX = 0; } else { // Tmp -> Mip context->BindSR(0, tmpMipView); context->BindUA(0, textureMipView); - data.GenerateMipTexOffsetX = 0; - data.GenerateMipMipOffsetX = data.CascadeIndex * data.CascadeMipResolution; + data.MipTexOffsetX = 0; + data.MipMipOffsetX = data.CascadeIndex * data.CascadeMipResolution; } context->UpdateCB(_cb1, &data); context->Dispatch(_csGenerateMip, mipDispatchGroups, mipDispatchGroups, mipDispatchGroups); @@ -1038,17 +1050,17 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++) { auto& cascade = sdfData.Cascades[cascadeIndex]; - const float cascadeDistance = distanceExtent * sdfData.CascadesDistanceScales[cascadeIndex]; - const float cascadeMaxDistance = cascadeDistance * 2; - const float cascadeVoxelSize = cascadeMaxDistance / (float)resolution; - const Float3 center = cascade.Position; - result.Constants.CascadePosDistance[cascadeIndex] = Vector4(center, cascadeDistance); - result.Constants.CascadeVoxelSize.Raw[cascadeIndex] = cascadeVoxelSize; + const float cascadeExtent = distanceExtent * sdfData.CascadesDistanceScales[cascadeIndex]; + result.Constants.CascadePosDistance[cascadeIndex] = Vector4(cascade.Position, cascadeExtent); + result.Constants.CascadeVoxelSize.Raw[cascadeIndex] = cascade.VoxelSize; + result.Constants.CascadeMaxDistance.Raw[cascadeIndex] = cascade.MaxDistanceTex; + result.Constants.CascadeMaxDistanceMip.Raw[cascadeIndex] = cascade.MaxDistanceMip; } for (int32 cascadeIndex = cascadesCount; cascadeIndex < 4; cascadeIndex++) { result.Constants.CascadePosDistance[cascadeIndex] = result.Constants.CascadePosDistance[cascadesCount - 1]; result.Constants.CascadeVoxelSize.Raw[cascadeIndex] = result.Constants.CascadeVoxelSize.Raw[cascadesCount - 1]; + result.Constants.CascadeMaxDistance.Raw[cascadeIndex] = result.Constants.CascadeMaxDistance.Raw[cascadesCount - 1]; } result.Constants.Resolution = (float)resolution; result.Constants.CascadesCount = cascadesCount; diff --git a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.h b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.h index 104aae790..52a3708b8 100644 --- a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.h +++ b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.h @@ -15,6 +15,8 @@ public: GPU_CB_STRUCT(ConstantsData { Float4 CascadePosDistance[4]; Float4 CascadeVoxelSize; + Float4 CascadeMaxDistance; + Float4 CascadeMaxDistanceMip; Float2 Padding; uint32 CascadesCount; float Resolution; diff --git a/Source/Engine/Visject/ShaderGraphUtilities.cpp b/Source/Engine/Visject/ShaderGraphUtilities.cpp index 89e25d9d6..57d8f55ae 100644 --- a/Source/Engine/Visject/ShaderGraphUtilities.cpp +++ b/Source/Engine/Visject/ShaderGraphUtilities.cpp @@ -170,7 +170,7 @@ const Char* ShaderGraphUtilities::GenerateShaderResources(TextWriterUnicode& wri format = TEXT("Texture3D {0} : register(t{1});"); break; case MaterialParameterType::GlobalSDF: - format = TEXT("Texture3D {0}_Tex : register(t{1});\nTexture3D {0}_Mip : register(t{2});"); + format = TEXT("Texture3D {0}_Tex : register(t{1});\nTexture3D {0}_Mip : register(t{2});"); zeroOffset = false; registers = 2; break; diff --git a/Source/Shaders/GI/DDGI.shader b/Source/Shaders/GI/DDGI.shader index 4eb71a7ae..9344600b5 100644 --- a/Source/Shaders/GI/DDGI.shader +++ b/Source/Shaders/GI/DDGI.shader @@ -73,8 +73,8 @@ uint GetProbeRaysCount(DDGIData data, uint probeState) RWTexture2D RWProbesData : register(u0); RWByteAddressBuffer RWActiveProbes : register(u1); -Texture3D GlobalSDFTex : register(t0); -Texture3D GlobalSDFMip : register(t1); +Texture3D GlobalSDFTex : register(t0); +Texture3D GlobalSDFMip : register(t1); float3 Remap(float3 value, float3 fromMin, float3 fromMax, float3 toMin, float3 toMax) { @@ -288,8 +288,8 @@ void CS_UpdateProbesInitArgs() RWTexture2D RWProbesTrace : register(u0); -Texture3D GlobalSDFTex : register(t0); -Texture3D GlobalSDFMip : register(t1); +Texture3D GlobalSDFTex : register(t0); +Texture3D GlobalSDFMip : register(t1); ByteAddressBuffer GlobalSurfaceAtlasChunks : register(t2); ByteAddressBuffer RWGlobalSurfaceAtlasCulledObjects : register(t3); Buffer GlobalSurfaceAtlasObjects : register(t4); diff --git a/Source/Shaders/GI/GlobalSurfaceAtlas.shader b/Source/Shaders/GI/GlobalSurfaceAtlas.shader index a81383dcf..2a762f5be 100644 --- a/Source/Shaders/GI/GlobalSurfaceAtlas.shader +++ b/Source/Shaders/GI/GlobalSurfaceAtlas.shader @@ -91,8 +91,8 @@ Texture2D ProbesData : register(t5); Texture2D ProbesDistance : register(t6); Texture2D ProbesIrradiance : register(t7); #else -Texture3D GlobalSDFTex : register(t5); -Texture3D GlobalSDFMip : register(t6); +Texture3D GlobalSDFTex : register(t5); +Texture3D GlobalSDFMip : register(t6); #endif // Pixel shader for Global Surface Atlas shading @@ -289,8 +289,8 @@ void CS_CullObjects(uint3 DispatchThreadId : SV_DispatchThreadID) #ifdef _PS_Debug -Texture3D GlobalSDFTex : register(t0); -Texture3D GlobalSDFMip : register(t1); +Texture3D GlobalSDFTex : register(t0); +Texture3D GlobalSDFMip : register(t1); ByteAddressBuffer GlobalSurfaceAtlasChunks : register(t2); ByteAddressBuffer GlobalSurfaceAtlasCulledObjects : register(t3); Buffer GlobalSurfaceAtlasObjects : register(t4); diff --git a/Source/Shaders/GlobalSignDistanceField.hlsl b/Source/Shaders/GlobalSignDistanceField.hlsl index cafb02869..95f8e6f5b 100644 --- a/Source/Shaders/GlobalSignDistanceField.hlsl +++ b/Source/Shaders/GlobalSignDistanceField.hlsl @@ -3,8 +3,10 @@ #include "./Flax/Common.hlsl" #include "./Flax/Collisions.hlsl" +// This must match C++ #define GLOBAL_SDF_RASTERIZE_CHUNK_SIZE 32 #define GLOBAL_SDF_RASTERIZE_CHUNK_MARGIN 4 +#define GLOBAL_SDF_RASTERIZE_MIP_FACTOR 4 #define GLOBAL_SDF_MIP_FLOODS 5 #define GLOBAL_SDF_WORLD_SIZE 60000.0f #define GLOBAL_SDF_MIN_VALID 0.9f @@ -15,6 +17,8 @@ struct GlobalSDFData { float4 CascadePosDistance[4]; float4 CascadeVoxelSize; + float4 CascadeMaxDistanceTex; + float4 CascadeMaxDistanceMip; float2 Padding; uint CascadesCount; float Resolution; @@ -61,13 +65,13 @@ struct GlobalSDFHit } }; -void GetGlobalSDFCascadeUV(const GlobalSDFData data, uint cascade, float3 worldPosition, out float cascadeMaxDistance, out float3 cascadeUV, out float3 textureUV) +void GetGlobalSDFCascadeUV(const GlobalSDFData data, uint cascade, float3 worldPosition, out float cascadeSize, out float3 cascadeUV, out float3 textureUV) { float4 cascadePosDistance = data.CascadePosDistance[cascade]; float3 posInCascade = worldPosition - cascadePosDistance.xyz; - cascadeMaxDistance = cascadePosDistance.w * 2; - cascadeUV = saturate(posInCascade / cascadeMaxDistance + 0.5f); - textureUV = float3(((float)cascade + cascadeUV.x) / (float)data.CascadesCount, cascadeUV.y, cascadeUV.z); // cascades are placed next to each other on X axis + cascadeSize = cascadePosDistance.w * 2; + cascadeUV = saturate(posInCascade / cascadeSize + 0.5f); + textureUV = float3(((float)cascade + cascadeUV.x) / (float)data.CascadesCount, cascadeUV.y, cascadeUV.z); // Cascades are placed next to each other on X axis } // Gets the Global SDF cascade index for the given world location. @@ -75,9 +79,9 @@ uint GetGlobalSDFCascade(const GlobalSDFData data, float3 worldPosition) { for (uint cascade = 0; cascade < data.CascadesCount; cascade++) { - float cascadeMaxDistance; + float cascadeSize; float3 cascadeUV, textureUV; - GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeMaxDistance, cascadeUV, textureUV); + GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeSize, cascadeUV, textureUV); if (all(cascadeUV > 0) && all(cascadeUV < 1)) return cascade; } @@ -85,33 +89,35 @@ uint GetGlobalSDFCascade(const GlobalSDFData data, float3 worldPosition) } // Samples the Global SDF cascade and returns the distance to the closest surface (in world units) at the given world location. -float SampleGlobalSDFCascade(const GlobalSDFData data, Texture3D tex, float3 worldPosition, uint cascade) +float SampleGlobalSDFCascade(const GlobalSDFData data, Texture3D tex, float3 worldPosition, uint cascade) { float distance = GLOBAL_SDF_WORLD_SIZE; - float cascadeMaxDistance; + float cascadeSize; float3 cascadeUV, textureUV; - GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeMaxDistance, cascadeUV, textureUV); - float cascadeDistance = tex.SampleLevel(SamplerLinearClamp, textureUV, 0); - if (cascadeDistance < GLOBAL_SDF_MIN_VALID && all(cascadeUV > 0) && all(cascadeUV < 1)) - distance = cascadeDistance * cascadeMaxDistance; + GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeSize, cascadeUV, textureUV); + float maxDistanceTex = data.CascadeMaxDistanceTex[cascade]; + float distanceTex = tex.SampleLevel(SamplerLinearClamp, textureUV, 0); + if (distanceTex < GLOBAL_SDF_MIN_VALID && all(cascadeUV > 0) && all(cascadeUV < 1)) + distance = distanceTex * maxDistanceTex; return distance; } // Samples the Global SDF and returns the distance to the closest surface (in world units) at the given world location. -float SampleGlobalSDF(const GlobalSDFData data, Texture3D tex, float3 worldPosition) +float SampleGlobalSDF(const GlobalSDFData data, Texture3D tex, float3 worldPosition) { float distance = data.CascadePosDistance[3].w * 2.0f; if (distance <= 0.0f) return GLOBAL_SDF_WORLD_SIZE; for (uint cascade = 0; cascade < data.CascadesCount; cascade++) { - float cascadeMaxDistance; + float cascadeSize; float3 cascadeUV, textureUV; - GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeMaxDistance, cascadeUV, textureUV); - float cascadeDistance = tex.SampleLevel(SamplerLinearClamp, textureUV, 0); - if (cascadeDistance < GLOBAL_SDF_MIN_VALID && all(cascadeUV > 0) && all(cascadeUV < 1)) + GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeSize, cascadeUV, textureUV); + float maxDistanceTex = data.CascadeMaxDistanceTex[cascade]; + float distanceTex = tex.SampleLevel(SamplerLinearClamp, textureUV, 0); + if (distanceTex < GLOBAL_SDF_MIN_VALID && all(cascadeUV > 0) && all(cascadeUV < 1)) { - distance = cascadeDistance * cascadeMaxDistance; + distance = distanceTex * maxDistanceTex; break; } } @@ -119,25 +125,24 @@ float SampleGlobalSDF(const GlobalSDFData data, Texture3D tex, float3 wor } // Samples the Global SDF and returns the distance to the closest surface (in world units) at the given world location. -float SampleGlobalSDF(const GlobalSDFData data, Texture3D tex, Texture3D mip, float3 worldPosition) +float SampleGlobalSDF(const GlobalSDFData data, Texture3D tex, Texture3D mip, float3 worldPosition) { float distance = data.CascadePosDistance[3].w * 2.0f; if (distance <= 0.0f) return GLOBAL_SDF_WORLD_SIZE; - float chunkSizeDistance = (float)GLOBAL_SDF_RASTERIZE_CHUNK_SIZE / data.Resolution; // Size of the chunk in SDF distance (0-1) - float chunkMarginDistance = GLOBAL_SDF_CHUNK_MARGIN_SCALE * (float)GLOBAL_SDF_RASTERIZE_CHUNK_MARGIN / data.Resolution; // Size of the chunk margin in SDF distance (0-1) for (uint cascade = 0; cascade < data.CascadesCount; cascade++) { - float cascadeMaxDistance; + float cascadeSize; float3 cascadeUV, textureUV; - GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeMaxDistance, cascadeUV, textureUV); - float cascadeDistance = mip.SampleLevel(SamplerLinearClamp, textureUV, 0); - if (cascadeDistance < chunkSizeDistance && all(cascadeUV > 0) && all(cascadeUV < 1)) + GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeSize, cascadeUV, textureUV); + float distanceMip = mip.SampleLevel(SamplerLinearClamp, textureUV, 0); + if (distanceMip < GLOBAL_SDF_MIN_VALID && all(cascadeUV > 0) && all(cascadeUV < 1)) { - float cascadeDistanceTex = tex.SampleLevel(SamplerLinearClamp, textureUV, 0); - if (cascadeDistanceTex < chunkMarginDistance) - cascadeDistance = cascadeDistanceTex; - distance = cascadeDistance * cascadeMaxDistance; + distance = distanceMip * cascadeSize; + float maxDistanceTex = data.CascadeMaxDistanceTex[cascade]; + float distanceTex = tex.SampleLevel(SamplerLinearClamp, textureUV, 0); + if (distanceTex < GLOBAL_SDF_MIN_VALID) + distance = distanceTex * maxDistanceTex; break; } } @@ -145,7 +150,7 @@ float SampleGlobalSDF(const GlobalSDFData data, Texture3D tex, Texture3D< } // Samples the Global SDF and returns the gradient vector (derivative) at the given world location. Normalize it to get normal vector. -float3 SampleGlobalSDFGradient(const GlobalSDFData data, Texture3D tex, float3 worldPosition, out float distance) +float3 SampleGlobalSDFGradient(const GlobalSDFData data, Texture3D tex, float3 worldPosition, out float distance) { float3 gradient = float3(0, 0.00001f, 0); distance = GLOBAL_SDF_WORLD_SIZE; @@ -153,11 +158,11 @@ float3 SampleGlobalSDFGradient(const GlobalSDFData data, Texture3D tex, f return gradient; for (uint cascade = 0; cascade < data.CascadesCount; cascade++) { - float cascadeMaxDistance; + float cascadeSize; float3 cascadeUV, textureUV; - GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeMaxDistance, cascadeUV, textureUV); - float cascadeDistance = tex.SampleLevel(SamplerLinearClamp, textureUV, 0); - if (cascadeDistance < GLOBAL_SDF_MIN_VALID && all(cascadeUV > 0) && all(cascadeUV < 1)) + GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeSize, cascadeUV, textureUV); + float distanceTex = tex.SampleLevel(SamplerLinearClamp, textureUV, 0); + if (distanceTex < GLOBAL_SDF_MIN_VALID && all(cascadeUV > 0) && all(cascadeUV < 1)) { float texelOffset = 1.0f / data.Resolution; float xp = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x + texelOffset, textureUV.y, textureUV.z), 0).x; @@ -166,8 +171,8 @@ float3 SampleGlobalSDFGradient(const GlobalSDFData data, Texture3D tex, f float yn = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y - texelOffset, textureUV.z), 0).x; float zp = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y, textureUV.z + texelOffset), 0).x; float zn = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y, textureUV.z - texelOffset), 0).x; - gradient = float3(xp - xn, yp - yn, zp - zn) * cascadeMaxDistance; - distance = cascadeDistance * cascadeMaxDistance; + gradient = float3(xp - xn, yp - yn, zp - zn) * cascadeSize; + distance = distanceTex * cascadeSize; break; } } @@ -175,25 +180,29 @@ float3 SampleGlobalSDFGradient(const GlobalSDFData data, Texture3D tex, f } // Samples the Global SDF and returns the gradient vector (derivative) at the given world location. Normalize it to get normal vector. -float3 SampleGlobalSDFGradient(const GlobalSDFData data, Texture3D tex, Texture3D mip, float3 worldPosition, out float distance) +float3 SampleGlobalSDFGradient(const GlobalSDFData data, Texture3D tex, Texture3D mip, float3 worldPosition, out float distance) { float3 gradient = float3(0, 0.00001f, 0); distance = GLOBAL_SDF_WORLD_SIZE; if (data.CascadePosDistance[3].w <= 0.0f) return gradient; - float chunkSizeDistance = (float)GLOBAL_SDF_RASTERIZE_CHUNK_SIZE / data.Resolution; // Size of the chunk in SDF distance (0-1) - float chunkMarginDistance = GLOBAL_SDF_CHUNK_MARGIN_SCALE * (float)GLOBAL_SDF_RASTERIZE_CHUNK_MARGIN / data.Resolution; // Size of the chunk margin in SDF distance (0-1) for (uint cascade = 0; cascade < data.CascadesCount; cascade++) { - float cascadeMaxDistance; + float cascadeSize; float3 cascadeUV, textureUV; - GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeMaxDistance, cascadeUV, textureUV); - float cascadeDistance = mip.SampleLevel(SamplerLinearClamp, textureUV, 0); - if (cascadeDistance < chunkSizeDistance && all(cascadeUV > 0) && all(cascadeUV < 1)) + GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeSize, cascadeUV, textureUV); + float voxelSize = data.CascadeVoxelSize[cascade]; + float chunkSize = voxelSize * GLOBAL_SDF_RASTERIZE_CHUNK_SIZE; + float chunkMargin = voxelSize * (GLOBAL_SDF_CHUNK_MARGIN_SCALE * GLOBAL_SDF_RASTERIZE_CHUNK_MARGIN); + float maxDistanceMip = data.CascadeMaxDistanceMip[cascade]; + float distanceMip = mip.SampleLevel(SamplerLinearClamp, textureUV, 0) * maxDistanceMip; + if (distanceMip < chunkSize && all(cascadeUV > 0) && all(cascadeUV < 1)) { - float cascadeDistanceTex = tex.SampleLevel(SamplerLinearClamp, textureUV, 0); - if (cascadeDistanceTex < chunkMarginDistance) - cascadeDistance = cascadeDistanceTex; + distance = distanceMip; + float maxDistanceTex = data.CascadeMaxDistanceTex[cascade]; + float distanceTex = tex.SampleLevel(SamplerLinearClamp, textureUV, 0) * maxDistanceTex; + if (distanceTex < chunkMargin) + distance = distanceTex; float texelOffset = 1.0f / data.Resolution; float xp = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x + texelOffset, textureUV.y, textureUV.z), 0).x; float xn = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x - texelOffset, textureUV.y, textureUV.z), 0).x; @@ -201,8 +210,7 @@ float3 SampleGlobalSDFGradient(const GlobalSDFData data, Texture3D tex, T float yn = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y - texelOffset, textureUV.z), 0).x; float zp = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y, textureUV.z + texelOffset), 0).x; float zn = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y, textureUV.z - texelOffset), 0).x; - gradient = float3(xp - xn, yp - yn, zp - zn) * cascadeMaxDistance; - distance = cascadeDistance * cascadeMaxDistance; + gradient = float3(xp - xn, yp - yn, zp - zn) * maxDistanceTex; break; } } @@ -211,12 +219,10 @@ float3 SampleGlobalSDFGradient(const GlobalSDFData data, Texture3D tex, T // Ray traces the Global SDF. // cascadeTraceStartBias - scales the trace start position offset (along the trace direction) by cascade voxel size (reduces artifacts on far cascades). Use it for shadow rays to prevent self-occlusion when tracing from object surface that looses quality in far cascades. -GlobalSDFHit RayTraceGlobalSDF(const GlobalSDFData data, Texture3D tex, Texture3D mip, const GlobalSDFTrace trace, float cascadeTraceStartBias = 0.0f) +GlobalSDFHit RayTraceGlobalSDF(const GlobalSDFData data, Texture3D tex, Texture3D mip, const GlobalSDFTrace trace, float cascadeTraceStartBias = 0.0f) { GlobalSDFHit hit = (GlobalSDFHit)0; hit.HitTime = -1.0f; - float chunkSizeDistance = (float)GLOBAL_SDF_RASTERIZE_CHUNK_SIZE / data.Resolution; // Size of the chunk in SDF distance (0-1) - float chunkMarginDistance = GLOBAL_SDF_CHUNK_MARGIN_SCALE * (float)GLOBAL_SDF_RASTERIZE_CHUNK_MARGIN / data.Resolution; // Size of the chunk margin in SDF distance (0-1) float nextIntersectionStart = trace.MinDistance; float traceMaxDistance = min(trace.MaxDistance, data.CascadePosDistance[3].w * 2); float3 traceEndPosition = trace.WorldPosition + trace.WorldDirection * traceMaxDistance; @@ -246,6 +252,10 @@ GlobalSDFHit RayTraceGlobalSDF(const GlobalSDFData data, Texture3D tex, T // Walk over the cascade SDF uint step = 0; float stepTime = intersections.x; + float chunkSize = voxelSize * GLOBAL_SDF_RASTERIZE_CHUNK_SIZE; + float chunkMargin = voxelSize * (GLOBAL_SDF_CHUNK_MARGIN_SCALE * GLOBAL_SDF_RASTERIZE_CHUNK_MARGIN); + float maxDistanceTex = data.CascadeMaxDistanceTex[cascade]; + float maxDistanceMip = data.CascadeMaxDistanceMip[cascade]; LOOP for (; step < 250 && stepTime < intersections.y && hit.HitTime < 0.0f; step++) { @@ -253,28 +263,30 @@ GlobalSDFHit RayTraceGlobalSDF(const GlobalSDFData data, Texture3D tex, T float stepScale = trace.StepScale; // Sample SDF - float cascadeMaxDistance; + float stepDistance, cascadeSize, voxelSizeScale = (float)GLOBAL_SDF_RASTERIZE_MIP_FACTOR; float3 cascadeUV, textureUV; - GetGlobalSDFCascadeUV(data, cascade, stepPosition, cascadeMaxDistance, cascadeUV, textureUV); - float stepDistance = mip.SampleLevel(SamplerLinearClamp, textureUV, 0); - if (stepDistance < chunkSizeDistance) + GetGlobalSDFCascadeUV(data, cascade, stepPosition, cascadeSize, cascadeUV, textureUV); + float distanceMip = mip.SampleLevel(SamplerLinearClamp, textureUV, 0) * maxDistanceMip; + if (distanceMip < chunkSize) { - float stepDistanceTex = tex.SampleLevel(SamplerLinearClamp, textureUV, 0); - if (stepDistanceTex < chunkMarginDistance) + stepDistance = distanceMip; + float distanceTex = tex.SampleLevel(SamplerLinearClamp, textureUV, 0) * maxDistanceTex; + if (distanceTex < chunkMargin) { - stepDistance = stepDistanceTex; + stepDistance = distanceTex; + voxelSizeScale = 1.0f; stepScale *= 0.63f; // Perform smaller steps nearby geometry } } else { - // Assume no SDF nearby so perform a jump - stepDistance = chunkSizeDistance; + // Assume no SDF nearby so perform a jump tto the next chunk + stepDistance = chunkSize; + voxelSizeScale = 1.0f; } - stepDistance *= cascadeMaxDistance; // Detect surface hit - float minSurfaceThickness = voxelExtent * saturate(stepTime / voxelSize); + float minSurfaceThickness = voxelSizeScale * voxelExtent * saturate(stepTime / voxelSize); if (stepDistance < minSurfaceThickness) { // Surface hit @@ -308,5 +320,5 @@ GlobalSDFHit RayTraceGlobalSDF(const GlobalSDFData data, Texture3D tex, T float GetGlobalSurfaceAtlasThreshold(const GlobalSDFData data, const GlobalSDFHit hit) { // Scale the threshold based on the hit cascade (less precision) - return data.CascadeVoxelSize[hit.HitCascade] * 1.1f; + return data.CascadeVoxelSize[hit.HitCascade] * 1.17f; } diff --git a/Source/Shaders/GlobalSignDistanceField.shader b/Source/Shaders/GlobalSignDistanceField.shader index d92f98acf..b0a058cd3 100644 --- a/Source/Shaders/GlobalSignDistanceField.shader +++ b/Source/Shaders/GlobalSignDistanceField.shader @@ -43,10 +43,13 @@ float CascadeVoxelSize; int CascadeMipResolution; int CascadeMipFactor; uint4 Objects[GLOBAL_SDF_RASTERIZE_MODEL_MAX_COUNT / 4]; -uint GenerateMipTexResolution; -uint GenerateMipCoordScale; -uint GenerateMipTexOffsetX; -uint GenerateMipMipOffsetX; +float2 Padding20; +float MipMaxDistanceLoad; +float MipMaxDistanceStore; +uint MipTexResolution; +uint MipCoordScale; +uint MipTexOffsetX; +uint MipMipOffsetX; META_CB_END float CombineDistanceToSDF(float sdf, float distanceToSDF) @@ -71,7 +74,7 @@ float CombineSDF(float oldSdf, float newSdf) #if defined(_CS_RasterizeModel) || defined(_CS_RasterizeHeightfield) -RWTexture3D GlobalSDFTex : register(u0); +RWTexture3D GlobalSDFTex : register(u0); StructuredBuffer ObjectsBuffer : register(t0); #endif @@ -213,7 +216,7 @@ void CS_RasterizeHeightfield(uint3 DispatchThreadId : SV_DispatchThreadID) #if defined(_CS_ClearChunk) -RWTexture3D GlobalSDFTex : register(u0); +RWTexture3D GlobalSDFTex : register(u0); // Compute shader for clearing Global SDF chunk META_CS(true, FEATURE_LEVEL_SM5) @@ -229,19 +232,21 @@ void CS_ClearChunk(uint3 DispatchThreadId : SV_DispatchThreadID) #if defined(_CS_GenerateMip) -RWTexture3D GlobalSDFMip : register(u0); -Texture3D GlobalSDFTex : register(t0); +RWTexture3D GlobalSDFMip : register(u0); +Texture3D GlobalSDFTex : register(t0); float SampleSDF(uint3 voxelCoordMip, int3 offset) { // Sample SDF - voxelCoordMip = (uint3)clamp((int3)(voxelCoordMip * GenerateMipCoordScale) + offset, int3(0, 0, 0), (int3)(GenerateMipTexResolution - 1)); - voxelCoordMip.x += GenerateMipTexOffsetX; + voxelCoordMip = (uint3)clamp((int3)(voxelCoordMip * MipCoordScale) + offset, int3(0, 0, 0), (int3)(MipTexResolution - 1)); + voxelCoordMip.x += MipTexOffsetX; float result = GlobalSDFTex[voxelCoordMip].r; + if (result >= GLOBAL_SDF_MIN_VALID) + return MipMaxDistanceStore; // No valid distance so use the limit + result *= MipMaxDistanceLoad; // Decode normalized distance to world-units // Extend by distance to the sampled texel location - float distanceInWorldUnits = length((float3)offset) * (MaxDistance / (float)GenerateMipTexResolution); - float distanceToVoxel = distanceInWorldUnits / MaxDistance; + float distanceToVoxel = length((float3)offset) * CascadeVoxelSize * ((float)CascadeResolution / (float)MipTexResolution); result = CombineDistanceToSDF(result, distanceToVoxel); return result; @@ -263,16 +268,16 @@ void CS_GenerateMip(uint3 DispatchThreadId : SV_DispatchThreadID) minDistance = min(minDistance, SampleSDF(voxelCoordMip, int3(0, -1, 0))); minDistance = min(minDistance, SampleSDF(voxelCoordMip, int3(0, 0, -1))); - voxelCoordMip.x += GenerateMipMipOffsetX; - GlobalSDFMip[voxelCoordMip] = minDistance; + voxelCoordMip.x += MipMipOffsetX; + GlobalSDFMip[voxelCoordMip] = clamp(minDistance / MipMaxDistanceStore, -1, 1); } #endif #ifdef _PS_Debug -Texture3D GlobalSDFTex : register(t0); -Texture3D GlobalSDFMip : register(t1); +Texture3D GlobalSDFTex : register(t0); +Texture3D GlobalSDFMip : register(t1); // Pixel shader for Global SDF debug drawing META_PS(true, FEATURE_LEVEL_SM5) diff --git a/Source/Shaders/SSR.shader b/Source/Shaders/SSR.shader index a813bdf36..a3ee880fe 100644 --- a/Source/Shaders/SSR.shader +++ b/Source/Shaders/SSR.shader @@ -51,8 +51,8 @@ Texture2D Texture0 : register(t4); Texture2D Texture1 : register(t5); Texture2D Texture2 : register(t6); #if USE_GLOBAL_SURFACE_ATLAS -Texture3D GlobalSDFTex : register(t7); -Texture3D GlobalSDFMip : register(t8); +Texture3D GlobalSDFTex : register(t7); +Texture3D GlobalSDFMip : register(t8); ByteAddressBuffer GlobalSurfaceAtlasChunks : register(t9); ByteAddressBuffer RWGlobalSurfaceAtlasCulledObjects : register(t10); Buffer GlobalSurfaceAtlasObjects : register(t11); From d5214090337a81e13b7d896c252ac13be706dcb0 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 10 Jul 2024 13:58:01 +0200 Subject: [PATCH 215/292] Missing change for particles from 6b06f1dbcf6c40f1e313a0c6718566692a9407e9 --- Source/Engine/Particles/Graph/GPU/ParticleEmitterGraph.GPU.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Engine/Particles/Graph/GPU/ParticleEmitterGraph.GPU.h b/Source/Engine/Particles/Graph/GPU/ParticleEmitterGraph.GPU.h index 25d9c853f..461850d58 100644 --- a/Source/Engine/Particles/Graph/GPU/ParticleEmitterGraph.GPU.h +++ b/Source/Engine/Particles/Graph/GPU/ParticleEmitterGraph.GPU.h @@ -5,7 +5,7 @@ /// /// Current GPU particles emitter shader version. /// -#define PARTICLE_GPU_GRAPH_VERSION 10 +#define PARTICLE_GPU_GRAPH_VERSION 11 #if COMPILE_WITH_PARTICLE_GPU_GRAPH From dbda31d570acc73a23ff16748ba7ace2d7c9334d Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 10 Jul 2024 15:34:38 +0200 Subject: [PATCH 216/292] Add `timeBeginPeriod(1)` on `Win32` platforms to improve timer precision --- Source/Engine/Platform/Win32/Win32Platform.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Source/Engine/Platform/Win32/Win32Platform.cpp b/Source/Engine/Platform/Win32/Win32Platform.cpp index d46706621..25559d411 100644 --- a/Source/Engine/Platform/Win32/Win32Platform.cpp +++ b/Source/Engine/Platform/Win32/Win32Platform.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #pragma comment(lib, "Iphlpapi.lib") static_assert(sizeof(int32) == sizeof(long), "Invalid long size for Interlocked and Atomic operations in Win32Platform."); @@ -66,6 +67,7 @@ bool Win32Platform::Init() return true; // Init timing + timeBeginPeriod(1); LARGE_INTEGER frequency; const auto freqResult = QueryPerformanceFrequency(&frequency); ASSERT(freqResult && frequency.QuadPart > 0); @@ -220,6 +222,7 @@ bool Win32Platform::Init() void Win32Platform::Exit() { WSACleanup(); + timeEndPeriod(1); } void Win32Platform::MemoryBarrier() From 352913ba14ca56fc952ff0a1385c9027e9b002c1 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 11 Jul 2024 14:48:24 +0200 Subject: [PATCH 217/292] Add random per-probe rotation for rays tracing in DDGI --- .../GI/DynamicDiffuseGlobalIllumination.cpp | 9 ++++---- .../GI/DynamicDiffuseGlobalIllumination.h | 1 - .../Renderer/GI/GlobalSurfaceAtlasPass.cpp | 2 +- .../Renderer/GlobalSignDistanceFieldPass.cpp | 2 +- Source/Shaders/GI/DDGI.hlsl | 1 - Source/Shaders/GI/DDGI.shader | 23 ++++++++++++++----- Source/Shaders/Math.hlsl | 3 +++ Source/Shaders/Noise.hlsl | 18 +++++++++++++++ Source/Shaders/Quaternion.hlsl | 5 ++++ 9 files changed, 50 insertions(+), 14 deletions(-) diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp index 6b92607ea..27ec7146d 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp @@ -46,6 +46,7 @@ GPU_CB_STRUCT(Data0 { GlobalSignDistanceFieldPass::ConstantsData GlobalSDF; GlobalSurfaceAtlasPass::ConstantsData GlobalSurfaceAtlas; ShaderGBufferData GBuffer; + Float4 RaysRotation; float Padding0; uint32 ProbesCount; float ResetBlend; @@ -379,7 +380,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont desc2 = GPUBufferDescription::Buffer(sizeof(GPUDispatchIndirectArgs) * Math::DivideAndRoundUp(probesCountCascade, DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT), GPUBufferFlags::Argument | GPUBufferFlags::UnorderedAccess, PixelFormat::R32_UInt, nullptr, sizeof(uint32)); INIT_BUFFER(UpdateProbesInitArgs, "DDGI.UpdateProbesInitArgs"); #undef INIT_BUFFER - LOG(Info, "Dynamic Diffuse Global Illumination memory usage: {0} MB, probes: {1}", memUsage / 1024 / 1024, probesCountTotal); + LOG(Info, "Dynamic Diffuse Global Illumination probes: {0}, memory usage: {1} MB", probesCountTotal, memUsage / (1024 * 1024)); clear = true; } #if COMPILE_WITH_DEV_ENV @@ -465,14 +466,15 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont ddgiData.Result.ProbesDistance = ddgiData.ProbesDistance->View(); ddgiData.Result.ProbesIrradiance = ddgiData.ProbesIrradiance->View(); + Data0 data; + // Compute random rotation matrix for probe rays orientation (randomized every frame) Matrix3x3 raysRotationMatrix; CalculateVolumeRandomRotation(raysRotationMatrix); - Quaternion& raysRotation = *(Quaternion*)&ddgiData.Result.Constants.RaysRotation; + Quaternion& raysRotation = *(Quaternion*)&data.RaysRotation; Quaternion::RotationMatrix(raysRotationMatrix, raysRotation); raysRotation.Conjugate(); - Data0 data; data.DDGI = ddgiData.Result.Constants; data.GlobalSDF = bindingDataSDF.Constants; data.GlobalSurfaceAtlas = bindingDataSurfaceAtlas.Constants; @@ -638,7 +640,6 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext, auto& ddgiData = *renderBuffers->GetCustomBuffer(TEXT("DDGI")); if (render && ddgiData.LastFrameUsed == Engine::FrameCount) render = false; - PROFILE_GPU_CPU("Dynamic Diffuse Global Illumination"); if (render) diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h index 3931b2777..28b18f39c 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h @@ -22,7 +22,6 @@ public: float ProbeHistoryWeight; float RayMaxDistance; float IndirectLightingIntensity; - Float4 RaysRotation; Float3 ViewPos; uint32 RaysCount; Float3 FallbackIrradiance; diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp index 7386fb077..65a5ba7d6 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp @@ -742,7 +742,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co return true; memUsage += surfaceAtlasData.ChunksBuffer->GetMemoryUsage(); } - LOG(Info, "Global Surface Atlas resolution: {0}, memory usage: {1} MB", resolution, memUsage / 1024 / 1024); + LOG(Info, "Global Surface Atlas resolution: {0}, memory usage: {1} MB", resolution, memUsage / (1024 * 1024)); } for (SceneRendering* scene : renderContext.List->Scenes) surfaceAtlasData.ListenSceneRendering(scene); diff --git a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp index cf2b26d44..173604069 100644 --- a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp +++ b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp @@ -764,7 +764,7 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex } } uint64 memoryUsage = sdfData.Texture->GetMemoryUsage() + sdfData.TextureMip->GetMemoryUsage(); - LOG(Info, "Global SDF memory usage: {0} MB", memoryUsage / 1024 / 1024); + LOG(Info, "Global SDF memory usage: {0} MB", memoryUsage / (1024 * 1024)); } if (sdfData.Origin != renderContext.View.Origin) { diff --git a/Source/Shaders/GI/DDGI.hlsl b/Source/Shaders/GI/DDGI.hlsl index 03979035b..fff009022 100644 --- a/Source/Shaders/GI/DDGI.hlsl +++ b/Source/Shaders/GI/DDGI.hlsl @@ -32,7 +32,6 @@ struct DDGIData float ProbeHistoryWeight; float RayMaxDistance; float IndirectLightingIntensity; - float4 RaysRotation; float3 ViewPos; uint RaysCount; float3 FallbackIrradiance; diff --git a/Source/Shaders/GI/DDGI.shader b/Source/Shaders/GI/DDGI.shader index 9344600b5..c80407d54 100644 --- a/Source/Shaders/GI/DDGI.shader +++ b/Source/Shaders/GI/DDGI.shader @@ -11,6 +11,7 @@ #include "./Flax/Common.hlsl" #include "./Flax/Math.hlsl" +#include "./Flax/Noise.hlsl" #include "./Flax/Quaternion.hlsl" #include "./Flax/GlobalSignDistanceField.hlsl" #include "./Flax/GI/GlobalSurfaceAtlas.hlsl" @@ -30,6 +31,7 @@ DDGIData DDGI; GlobalSDFData GlobalSDF; GlobalSurfaceAtlasData GlobalSurfaceAtlas; GBufferData GBuffer; +float4 RaysRotation; float Padding0; uint ProbesCount; float ResetBlend; @@ -55,10 +57,19 @@ float3 GetSphericalFibonacci(float sampleIndex, float samplesCount) } // Calculates a random normalized ray direction (based on the ray index and the current probes rotation phrase) -float3 GetProbeRayDirection(DDGIData data, uint rayIndex) +float3 GetProbeRayDirection(DDGIData data, uint rayIndex, uint raysCount, uint probeIndex, uint3 probeCoords) { - float3 direction = GetSphericalFibonacci((float)rayIndex, (float)data.RaysCount); - return normalize(QuaternionRotate(data.RaysRotation, direction)); + float4 rotation = RaysRotation; + + // Randomize rotation per-probe (otherwise all probes are in sync) + float3 probePos = (float3)probeCoords / (float3)data.ProbesCounts; + float3 randomAxis = normalize(Mod289(probePos)); + float randomAngle = (float)probeIndex / (float)ProbesCount * (2.0f * PI); + rotation = QuaternionMultiply(rotation, QuaternionFromAxisAngle(randomAxis, randomAngle)); + + // Random rotation per-ray - relative to the per-frame rays rotation + float3 direction = GetSphericalFibonacci((float)rayIndex, (float)raysCount); + return normalize(QuaternionRotate(rotation, direction)); } // Calculates amount of rays to allocate for a probe @@ -299,7 +310,7 @@ Texture2D ProbesData : register(t7); TextureCube Skybox : register(t8); ByteAddressBuffer ActiveProbes : register(t9); -// Compute shader for tracing rays for probes using Global SDF and Global Surface Atlas. +// Compute shader for tracing rays for probes using Global SDF and Global Surface Atlas (1 ray per-thread). META_CS(true, FEATURE_LEVEL_SM5) META_PERMUTATION_1(DDGI_TRACE_RAYS_COUNT=96) META_PERMUTATION_1(DDGI_TRACE_RAYS_COUNT=128) @@ -320,7 +331,7 @@ void CS_TraceRays(uint3 DispatchThreadId : SV_DispatchThreadID) if (probeState == DDGI_PROBE_STATE_INACTIVE || rayIndex >= probeRaysCount) return; // Skip disabled probes or if current thread's ray is unused float3 probePosition = DecodeDDGIProbePosition(DDGI, probeData, CascadeIndex, probeIndex, probeCoords); - float3 probeRayDirection = GetProbeRayDirection(DDGI, rayIndex); + float3 probeRayDirection = GetProbeRayDirection(DDGI, rayIndex, probeRaysCount, probeIndex, probeCoords); // Trace ray with Global SDF GlobalSDFTrace trace; @@ -428,7 +439,7 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_ float rayDistance = ProbesTrace[uint2(rayIndex, GroupId.x)].w; CachedProbesTraceDistance[rayIndex] = min(abs(rayDistance), distanceLimit); #endif - CachedProbesTraceDirection[rayIndex] = GetProbeRayDirection(DDGI, rayIndex); + CachedProbesTraceDirection[rayIndex] = GetProbeRayDirection(DDGI, rayIndex, probeRaysCount, probeIndex, probeCoords); } } GroupMemoryBarrierWithGroupSync(); diff --git a/Source/Shaders/Math.hlsl b/Source/Shaders/Math.hlsl index 2eebbee47..cd91e09d0 100644 --- a/Source/Shaders/Math.hlsl +++ b/Source/Shaders/Math.hlsl @@ -3,6 +3,9 @@ #ifndef __MATH__ #define __MATH__ +#define RadiansToDegrees (180.0f / PI) +#define DegreesToRadians (PI / 180.0f) + uint NextPow2(uint value) { uint mask = (1 << firstbithigh(value)) - 1; diff --git a/Source/Shaders/Noise.hlsl b/Source/Shaders/Noise.hlsl index 7297145d4..8ba32c34c 100644 --- a/Source/Shaders/Noise.hlsl +++ b/Source/Shaders/Noise.hlsl @@ -71,6 +71,24 @@ float2 rand2dTo2d(float2 value) ); } +float rand3dTo1d(float3 value, float3 dotDir = float3(12.9898, 78.233, 37.719)) +{ + // https://www.ronja-tutorials.com/post/024-white-noise/ + float3 smallValue = sin(value); + float random = dot(smallValue, dotDir); + return frac(sin(random) * 143758.5453); +} + +float3 rand3dTo3d(float3 value) +{ + // https://www.ronja-tutorials.com/post/024-white-noise/ + return float3( + rand3dTo1d(value, float3(12.989, 78.233, 37.719)), + rand3dTo1d(value, float3(39.346, 11.135, 83.155)), + rand3dTo1d(value, float3(73.156, 52.235, 09.151)) + ); +} + // Classic Perlin noise float PerlinNoise(float2 p) { diff --git a/Source/Shaders/Quaternion.hlsl b/Source/Shaders/Quaternion.hlsl index 204fec811..d40511db2 100644 --- a/Source/Shaders/Quaternion.hlsl +++ b/Source/Shaders/Quaternion.hlsl @@ -15,4 +15,9 @@ float3 QuaternionRotate(float4 q, float3 v) return (v * (q.w * q.w - b2) + b * (dot(v, b) * 2.f) + cross(b, v) * (q.w * 2.f)); } +float4 QuaternionFromAxisAngle(float3 axis, float angle) +{ + return float4(axis * sin(angle * 0.5f), cos(angle * 0.5f)); +} + #endif From e83097d1142daa8dca6c37c1f5458414645e85bd Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 12 Jul 2024 17:04:23 +0200 Subject: [PATCH 218/292] Fix missing memory allocator for Vulkan Memory Allocator --- Source/Engine/GraphicsDevice/Vulkan/IncludeVulkanHeaders.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Source/Engine/GraphicsDevice/Vulkan/IncludeVulkanHeaders.h b/Source/Engine/GraphicsDevice/Vulkan/IncludeVulkanHeaders.h index 887e5f89d..f9f753cb5 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/IncludeVulkanHeaders.h +++ b/Source/Engine/GraphicsDevice/Vulkan/IncludeVulkanHeaders.h @@ -37,6 +37,8 @@ #define VMA_MIN(v1, v2) (Math::Min((v1), (v2))) #define VMA_MAX(v1, v2) (Math::Max((v1), (v2))) #define VMA_SWAP(v1, v2) (::Swap((v1), (v2))) +#define VMA_SYSTEM_ALIGNED_MALLOC(size, alignment) Platform::Allocate(uint64(size), uint64(alignment)) +#define VMA_SYSTEM_ALIGNED_FREE(ptr) Platform::Free(ptr) #define VMA_NULLABLE #define VMA_NOT_NULL #include From b80101411f34ae3a8922d545692ddb352f3f70c6 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 12 Jul 2024 17:05:07 +0200 Subject: [PATCH 219/292] Optimize Vulkan Memory Allocator to not use mutex as it's synced by engine to safely access resources --- Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp index 9b6d4ba2c..7a6c560ab 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp @@ -1845,6 +1845,7 @@ bool GPUDeviceVulkan::Init() #endif #undef INIT_FUNC VmaAllocatorCreateInfo allocatorInfo = {}; + allocatorInfo.flags = VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT; allocatorInfo.vulkanApiVersion = VULKAN_API_VERSION; allocatorInfo.physicalDevice = gpu; allocatorInfo.instance = Instance; From e8b0419ccff413a7e2fc991cd1e013ebdddb0802 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 12 Jul 2024 17:14:02 +0200 Subject: [PATCH 220/292] Update assets --- Content/Editor/Camera/M_Camera.flax | 2 +- Content/Editor/CubeTexturePreviewMaterial.flax | 2 +- Content/Editor/DebugMaterials/DDGIDebugProbes.flax | 4 ++-- Content/Editor/DebugMaterials/SingleColor/Decal.flax | 2 +- Content/Editor/DebugMaterials/SingleColor/Particle.flax | 4 ++-- Content/Editor/DebugMaterials/SingleColor/Surface.flax | 4 ++-- .../Editor/DebugMaterials/SingleColor/SurfaceAdditive.flax | 4 ++-- Content/Editor/DebugMaterials/SingleColor/Terrain.flax | 4 ++-- Content/Editor/DefaultFontMaterial.flax | 4 ++-- Content/Editor/Gizmo/FoliageBrushMaterial.flax | 2 +- Content/Editor/Gizmo/Material.flax | 2 +- Content/Editor/Gizmo/MaterialWire.flax | 2 +- Content/Editor/Gizmo/SelectionOutlineMaterial.flax | 2 +- Content/Editor/Gizmo/VertexColorsPreviewMaterial.flax | 2 +- Content/Editor/Highlight Material.flax | 2 +- Content/Editor/Icons/IconsMaterial.flax | 2 +- Content/Editor/IesProfilePreviewMaterial.flax | 2 +- Content/Editor/Particles/Particle Material Color.flax | 4 ++-- Content/Editor/Particles/Smoke Material.flax | 4 ++-- Content/Editor/Particles/Smoke.flax | 2 +- Content/Editor/Particles/Sparks.flax | 2 +- Content/Editor/SpriteMaterial.flax | 2 +- Content/Editor/Terrain/Circle Brush Material.flax | 2 +- Content/Editor/Terrain/Highlight Terrain Material.flax | 2 +- Content/Editor/TexturePreviewMaterial.flax | 2 +- Content/Editor/Wires Debug Material.flax | 2 +- Content/Engine/DefaultDeformableMaterial.flax | 2 +- Content/Engine/DefaultMaterial.flax | 2 +- Content/Engine/DefaultRadialMenu.flax | 2 +- Content/Engine/DefaultTerrainMaterial.flax | 2 +- Content/Engine/SingleColorMaterial.flax | 4 ++-- Content/Engine/SkyboxMaterial.flax | 4 ++-- Content/Shaders/GI/DDGI.flax | 4 ++-- Content/Shaders/GI/GlobalSurfaceAtlas.flax | 4 ++-- Content/Shaders/GlobalSignDistanceField.flax | 4 ++-- Content/Shaders/SSR.flax | 4 ++-- 36 files changed, 50 insertions(+), 50 deletions(-) diff --git a/Content/Editor/Camera/M_Camera.flax b/Content/Editor/Camera/M_Camera.flax index ffcd0ac50..e49e91a7e 100644 --- a/Content/Editor/Camera/M_Camera.flax +++ b/Content/Editor/Camera/M_Camera.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:73319b1efc4f16eb9129ab8f832fc358dc25cc5ea8b8c681bbed005d46c31ed9 +oid sha256:d2d30d16769e4159f2514fdd4914d9376b388905e5c3fd796693c5dfe8b94542 size 28071 diff --git a/Content/Editor/CubeTexturePreviewMaterial.flax b/Content/Editor/CubeTexturePreviewMaterial.flax index 0353c5519..d1f740849 100644 --- a/Content/Editor/CubeTexturePreviewMaterial.flax +++ b/Content/Editor/CubeTexturePreviewMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8198710e63bdb513d71822c9e0c1ffce510e80c08ea6e30e8b27d1d7e3a657cd +oid sha256:90892654cb922466dcb29a9dc8a395bf31b7c3aed5c85872d8cec04d2a9bd3f8 size 29786 diff --git a/Content/Editor/DebugMaterials/DDGIDebugProbes.flax b/Content/Editor/DebugMaterials/DDGIDebugProbes.flax index ce98f5e3b..b5d7769f9 100644 --- a/Content/Editor/DebugMaterials/DDGIDebugProbes.flax +++ b/Content/Editor/DebugMaterials/DDGIDebugProbes.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ab5c59aa3f1ce5d74e6069cc03ec177c1051a9d0529d21f3b6254ad82c19763f -size 38982 +oid sha256:b7a6c1ffe6855457f53a86155f4331d2d269a223c16dbd1b16f44e5a5d185561 +size 39019 diff --git a/Content/Editor/DebugMaterials/SingleColor/Decal.flax b/Content/Editor/DebugMaterials/SingleColor/Decal.flax index 017d89c88..559e60182 100644 --- a/Content/Editor/DebugMaterials/SingleColor/Decal.flax +++ b/Content/Editor/DebugMaterials/SingleColor/Decal.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a353de21ffdc515b0317ae89c09dc1733623f0841496d82f2aaecc7cdd8294b0 +oid sha256:ccf3359fde4c572e9e0c56af2b30c00bc1f15466df87ab3f20a6680c8bfe2f69 size 7489 diff --git a/Content/Editor/DebugMaterials/SingleColor/Particle.flax b/Content/Editor/DebugMaterials/SingleColor/Particle.flax index 727c18176..b3ecb5c4d 100644 --- a/Content/Editor/DebugMaterials/SingleColor/Particle.flax +++ b/Content/Editor/DebugMaterials/SingleColor/Particle.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f6ab245b683eee907b7fb9d18d1418bc553c2893859235720e98dd555e35a8f4 -size 31681 +oid sha256:be34f63503926fcbf354ee89cc38e6a4b2003d90b07ffa538dd13243d9561176 +size 31664 diff --git a/Content/Editor/DebugMaterials/SingleColor/Surface.flax b/Content/Editor/DebugMaterials/SingleColor/Surface.flax index ad6a58454..216a9ab49 100644 --- a/Content/Editor/DebugMaterials/SingleColor/Surface.flax +++ b/Content/Editor/DebugMaterials/SingleColor/Surface.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1e7424801de3c72adc081eaf9f3b6056a621ba6dc0d3fad7e60efa3ab8a729fb -size 27930 +oid sha256:9ce8b9f2d3fccb2c7d2c48943953820f3ee2f89e84fd7b2631c4504d880372ef +size 27967 diff --git a/Content/Editor/DebugMaterials/SingleColor/SurfaceAdditive.flax b/Content/Editor/DebugMaterials/SingleColor/SurfaceAdditive.flax index a991d614a..8f68e8d88 100644 --- a/Content/Editor/DebugMaterials/SingleColor/SurfaceAdditive.flax +++ b/Content/Editor/DebugMaterials/SingleColor/SurfaceAdditive.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6a82d0bb9748423c60a279ad65f68e350fc85654def84618c4e5ea18955ba239 -size 29668 +oid sha256:33bb400d3d9fa3da0c098207204b820da8e42512fb9cea966d8eaf5533fa07f2 +size 29648 diff --git a/Content/Editor/DebugMaterials/SingleColor/Terrain.flax b/Content/Editor/DebugMaterials/SingleColor/Terrain.flax index 650fa5c6b..932bfd595 100644 --- a/Content/Editor/DebugMaterials/SingleColor/Terrain.flax +++ b/Content/Editor/DebugMaterials/SingleColor/Terrain.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e16bb9ebcf09bf35a9a8bcd59cd55b7bc0bfc7c190aad727684eb4fc6d7e082e -size 21324 +oid sha256:82b8dd134ea46c5dab554de9a5913e8642fa94979b5ff45c08167bd6de91365d +size 21314 diff --git a/Content/Editor/DefaultFontMaterial.flax b/Content/Editor/DefaultFontMaterial.flax index d98226904..e6de70e78 100644 --- a/Content/Editor/DefaultFontMaterial.flax +++ b/Content/Editor/DefaultFontMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8524b92333b4d803951e35ac02ad953b9af26b4a99bd6b5c05fe1092861ab4a6 -size 28109 +oid sha256:f50d6604365ddf117f9dee2168f9d412ccff9c78ba04a943c24d710306770600 +size 28146 diff --git a/Content/Editor/Gizmo/FoliageBrushMaterial.flax b/Content/Editor/Gizmo/FoliageBrushMaterial.flax index da8b864af..16cb82140 100644 --- a/Content/Editor/Gizmo/FoliageBrushMaterial.flax +++ b/Content/Editor/Gizmo/FoliageBrushMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a1220702a7347c60df190dbf5cba1266e0b50803e602292bbf1cd6383ca14539 +oid sha256:ffde790bdd39a3931c5154aa5588752a2cbc8f916b7c7bab490bb174427b5bf2 size 35675 diff --git a/Content/Editor/Gizmo/Material.flax b/Content/Editor/Gizmo/Material.flax index ebff31379..830bf4e46 100644 --- a/Content/Editor/Gizmo/Material.flax +++ b/Content/Editor/Gizmo/Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4237bb3c785375d550988a15fb4fcfce47a0936ee45b4a2e8e0f8cebbad90eec +oid sha256:22265e90fac50f82a7f719ecf249bf6ac3cc612dd77fd929515347ba76b07456 size 30252 diff --git a/Content/Editor/Gizmo/MaterialWire.flax b/Content/Editor/Gizmo/MaterialWire.flax index 2dc49b1fe..b49737868 100644 --- a/Content/Editor/Gizmo/MaterialWire.flax +++ b/Content/Editor/Gizmo/MaterialWire.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cace2c7eb81be70b756d747fa6a3ef4778df2da50d27a26fdd7aa9884b7aec7b +oid sha256:de718dbc549d8777bac2c66f018c4780770bce5558701e7db81ef0b3d3ebaee1 size 29390 diff --git a/Content/Editor/Gizmo/SelectionOutlineMaterial.flax b/Content/Editor/Gizmo/SelectionOutlineMaterial.flax index fb133f924..2cd9ae1a8 100644 --- a/Content/Editor/Gizmo/SelectionOutlineMaterial.flax +++ b/Content/Editor/Gizmo/SelectionOutlineMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:babd06a770546e7bf8e2f8fd8cce829984e81f1b00751770a554ae121415d0e8 +oid sha256:f60b3eb8f6d717b4a09588449acd8cb2b23fbe3ff3dd9c8664048ab385b50db1 size 16166 diff --git a/Content/Editor/Gizmo/VertexColorsPreviewMaterial.flax b/Content/Editor/Gizmo/VertexColorsPreviewMaterial.flax index e9759f762..61679cd0e 100644 --- a/Content/Editor/Gizmo/VertexColorsPreviewMaterial.flax +++ b/Content/Editor/Gizmo/VertexColorsPreviewMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d3e98b80ebaf6b19acd8a82fca616a73ae9f62847e2f0111f3f94c9a3a3de28b +oid sha256:22a87367d53ded86ecc91579229b66a8c70bce3e9f94f89c963a6874a251cf38 size 29080 diff --git a/Content/Editor/Highlight Material.flax b/Content/Editor/Highlight Material.flax index 867846f32..038c25b2b 100644 --- a/Content/Editor/Highlight Material.flax +++ b/Content/Editor/Highlight Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:da905b7c1c53974be7cecb432a3dca87624235200e06fe9d98670026734cd13d +oid sha256:491932ca0051c107c459a8b4ebcbbef4d635dc00aac361085be7615da02f8961 size 28045 diff --git a/Content/Editor/Icons/IconsMaterial.flax b/Content/Editor/Icons/IconsMaterial.flax index 244bb178d..e57e7ec56 100644 --- a/Content/Editor/Icons/IconsMaterial.flax +++ b/Content/Editor/Icons/IconsMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7a0eddb7654731d9c229f650be3daf6c0d856c741ee1d1c2e191ccfd073fd87a +oid sha256:323125b078ad9429c2696ec5e1407dfecca1164744e0633d429d8d2420978625 size 27973 diff --git a/Content/Editor/IesProfilePreviewMaterial.flax b/Content/Editor/IesProfilePreviewMaterial.flax index 8a064bf9a..db4029597 100644 --- a/Content/Editor/IesProfilePreviewMaterial.flax +++ b/Content/Editor/IesProfilePreviewMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:36406c6c38796d81189c34eaa467bcbd8de109afb2da3a9b9083ec773b1c913c +oid sha256:83e4ae5c290b1ab5490e73266489cbed02b22aa2c00b31a7bfad4bac3f0f37db size 18205 diff --git a/Content/Editor/Particles/Particle Material Color.flax b/Content/Editor/Particles/Particle Material Color.flax index e1ecb1220..7bfa49c05 100644 --- a/Content/Editor/Particles/Particle Material Color.flax +++ b/Content/Editor/Particles/Particle Material Color.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c0d83ec4a3402680e7a683089c077c991d0ede3a85e563cee967ef94ec62f395 -size 29912 +oid sha256:84baddd25a044b1ca4079784d50a48fafbb119f95321d2e0ca7f650fd7ef8899 +size 29903 diff --git a/Content/Editor/Particles/Smoke Material.flax b/Content/Editor/Particles/Smoke Material.flax index 7d2a48b32..4700cd7ac 100644 --- a/Content/Editor/Particles/Smoke Material.flax +++ b/Content/Editor/Particles/Smoke Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4317109e6aee7efef7f02ef0740c2676e69a028b877afb50dee7ef850b499db2 -size 38680 +oid sha256:e98dd48bd4eae56eba17d3866102c7416227617b6d98ae8963f6567334dd3c58 +size 38663 diff --git a/Content/Editor/Particles/Smoke.flax b/Content/Editor/Particles/Smoke.flax index a682db6ee..68c0607db 100644 --- a/Content/Editor/Particles/Smoke.flax +++ b/Content/Editor/Particles/Smoke.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dc72b3152b85137a22b7dc72209ece02e8c2fff6674ddd395eaa4a4b089a51da +oid sha256:78b70b627e87f520fccf610a9f20f68e72af68022e5093e6cbbb2fdf6d0d886d size 14662 diff --git a/Content/Editor/Particles/Sparks.flax b/Content/Editor/Particles/Sparks.flax index 227d9e381..d962ef9d5 100644 --- a/Content/Editor/Particles/Sparks.flax +++ b/Content/Editor/Particles/Sparks.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d38b4ed6a68e0c327e7d6dda2f47c6665611c4ae9c7f8b1ba6148eb26abb205f +oid sha256:e5cd0be41d80f829f23ab67ad0d82f2023b9ee81f0f2119769034c7b57047ad1 size 13650 diff --git a/Content/Editor/SpriteMaterial.flax b/Content/Editor/SpriteMaterial.flax index 76ff2a46e..ed5efae88 100644 --- a/Content/Editor/SpriteMaterial.flax +++ b/Content/Editor/SpriteMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c75b277a210d514b4b4c7c1533d01ea21ae49961402d72d53cbf3272abdf38d6 +oid sha256:9c45f25462dbc839ec36b2cc889f30d4b000eb948a7fcf8ea8387d6e9e3df463 size 29159 diff --git a/Content/Editor/Terrain/Circle Brush Material.flax b/Content/Editor/Terrain/Circle Brush Material.flax index 4f77e39cc..0853dacd5 100644 --- a/Content/Editor/Terrain/Circle Brush Material.flax +++ b/Content/Editor/Terrain/Circle Brush Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d1eee4a66a26e6259a62f9a6c8e3fb00fb4ebe4741c28a38785be4e500aaed3d +oid sha256:3421653f72b9fa3d77ded95f9b81a261cbf7bde386430c80e8f970289e466b13 size 27986 diff --git a/Content/Editor/Terrain/Highlight Terrain Material.flax b/Content/Editor/Terrain/Highlight Terrain Material.flax index 32d82d931..7937f3831 100644 --- a/Content/Editor/Terrain/Highlight Terrain Material.flax +++ b/Content/Editor/Terrain/Highlight Terrain Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:222bf0a136e25607e51b2d6878221dd8de17d8d0cf6b02fc3bb46f54a964addf +oid sha256:7063c30e4ed481ec64ca97ada1d41e8c5c33bc2394f0dedd39abde64682755ba size 21367 diff --git a/Content/Editor/TexturePreviewMaterial.flax b/Content/Editor/TexturePreviewMaterial.flax index 4ef5dafdd..ec0c1fb12 100644 --- a/Content/Editor/TexturePreviewMaterial.flax +++ b/Content/Editor/TexturePreviewMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ccadf2b1a50715b2001715293f9de9c4f4c6507c958dc81cfcbde507649ecf2e +oid sha256:49871fd76ba4cdf9ce3f886c3c543b881d564aa8e6bea048857dfcfc58204f2a size 10570 diff --git a/Content/Editor/Wires Debug Material.flax b/Content/Editor/Wires Debug Material.flax index b32c43553..2cd18bf3f 100644 --- a/Content/Editor/Wires Debug Material.flax +++ b/Content/Editor/Wires Debug Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:93da8fef9074ed5ce7574164835ca5644c1290bb561f8310d317fe3b34dca60f +oid sha256:8612dbf449c251c37c4d37e44cc85363360e9333c7eeeef2dbf81079cc33aa6c size 28045 diff --git a/Content/Engine/DefaultDeformableMaterial.flax b/Content/Engine/DefaultDeformableMaterial.flax index 148feb26d..ec235a1b0 100644 --- a/Content/Engine/DefaultDeformableMaterial.flax +++ b/Content/Engine/DefaultDeformableMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a69290ca55369fe2e4019907d99a3c2fbfbfaa21d6b4e9725ba2ecba2082c5c4 +oid sha256:2055ede0f07b8d9ea45f5bb97649d76b24c2089ad3d423cdbc4be5d81c2dc7c7 size 18514 diff --git a/Content/Engine/DefaultMaterial.flax b/Content/Engine/DefaultMaterial.flax index 653e625aa..1e0e75755 100644 --- a/Content/Engine/DefaultMaterial.flax +++ b/Content/Engine/DefaultMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:186cb88fe6b666b1918a1569fc3269eb4675172e70b348ce210e9911df4342fa +oid sha256:97b5fde5ad1202826e64b1fdedc6f44fa8677c499674218d76bec7789891610f size 29992 diff --git a/Content/Engine/DefaultRadialMenu.flax b/Content/Engine/DefaultRadialMenu.flax index ee571f342..d6f680162 100644 --- a/Content/Engine/DefaultRadialMenu.flax +++ b/Content/Engine/DefaultRadialMenu.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:814157ca47fb0084c1268113c857aa10eab682ad3d8e0dbc730dd95bb0fb6a9e +oid sha256:f3bd840512ae1daaacf26843ce47a8bfe289541df61ca11a05bd082de9395828 size 20340 diff --git a/Content/Engine/DefaultTerrainMaterial.flax b/Content/Engine/DefaultTerrainMaterial.flax index 3b68da4c1..10395bca4 100644 --- a/Content/Engine/DefaultTerrainMaterial.flax +++ b/Content/Engine/DefaultTerrainMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bcba019392a7f28f7613db7d6b6553e1a4de0ff8463a79da16c8824eb7949055 +oid sha256:2f878f6542975aa2fa239bdc3cae061751d8a16908069ceb29b57efbd20256a9 size 23451 diff --git a/Content/Engine/SingleColorMaterial.flax b/Content/Engine/SingleColorMaterial.flax index 4c7d36165..0af9c8bd6 100644 --- a/Content/Engine/SingleColorMaterial.flax +++ b/Content/Engine/SingleColorMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dee79e836a1379622db5829481af66298fdbee63314329d4ab87e36c02128c0b -size 28131 +oid sha256:1f89a10ecbe30a949385392b3729b82a18991a6f82ea0e396df22ca13f58c300 +size 28168 diff --git a/Content/Engine/SkyboxMaterial.flax b/Content/Engine/SkyboxMaterial.flax index 774ee5fb8..a3fb977b1 100644 --- a/Content/Engine/SkyboxMaterial.flax +++ b/Content/Engine/SkyboxMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ac058955e94e54f8ad165db9d6322066165bf98380652df37682a027b783a9fd -size 29329 +oid sha256:4342fbcf46a15467209cff4d85a14eb955d9ae5954b58c18fa5ecdb5d571e05b +size 29366 diff --git a/Content/Shaders/GI/DDGI.flax b/Content/Shaders/GI/DDGI.flax index 8e7bff88e..b45314a3c 100644 --- a/Content/Shaders/GI/DDGI.flax +++ b/Content/Shaders/GI/DDGI.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:29915a5b5df2982298aa5c5fd585086774ff70eb98a8e6d8e521d3ba261e9a45 -size 25864 +oid sha256:dc1178697d8a11745194914ecec7d6c073e4a4efa7373bc976fa1f4135e17668 +size 27288 diff --git a/Content/Shaders/GI/GlobalSurfaceAtlas.flax b/Content/Shaders/GI/GlobalSurfaceAtlas.flax index 7963c2a71..e0ba99179 100644 --- a/Content/Shaders/GI/GlobalSurfaceAtlas.flax +++ b/Content/Shaders/GI/GlobalSurfaceAtlas.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5bccb119c58a4fcec267e452bdf6026b7e14531ffcf60680026ce964945457cb -size 12461 +oid sha256:090ee68f80c28a0eb800d4e23da3ace1c154d649e0501717c5bb87655c9a9669 +size 12337 diff --git a/Content/Shaders/GlobalSignDistanceField.flax b/Content/Shaders/GlobalSignDistanceField.flax index 5e694f134..1a1afcb0b 100644 --- a/Content/Shaders/GlobalSignDistanceField.flax +++ b/Content/Shaders/GlobalSignDistanceField.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ed1f8075002df1e142c9d2dc84e0931bc5dd33e5db13a2a8e0282e13ca716bcf -size 13061 +oid sha256:3ec583aa8eafdc0f5969560e51584751059ca821fe33c4025f12d481a9c6c69f +size 13265 diff --git a/Content/Shaders/SSR.flax b/Content/Shaders/SSR.flax index b3c34e95e..e60bd83b0 100644 --- a/Content/Shaders/SSR.flax +++ b/Content/Shaders/SSR.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1c42203227cf1070143a4cd1a414cc3ae3625577c7d0dfd7d204f99bf8bc4542 -size 11142 +oid sha256:d75cdf10a4e4f142a8704514dac715b9c410952cdeb083313d4c20ba0ca0caa2 +size 11154 From 74dfa30556022db65eb1d8f8038ae4e47b95feb1 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 12 Jul 2024 17:14:51 +0200 Subject: [PATCH 221/292] Fix crash when drawing particle effect where one of the emitter assets is not yet loaded --- Source/Engine/Particles/Particles.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index 4b869bf42..559c916d8 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -931,6 +931,8 @@ void Particles::DrawParticles(RenderContext& renderContext, ParticleEffect* effe if (!buffer || (buffer->Mode == ParticlesSimulationMode::CPU && buffer->CPU.Count == 0)) continue; auto emitter = buffer->Emitter; + if (!emitter || !emitter->IsLoaded()) + continue; buffer->Emitter->GraphExecutorCPU.Draw(buffer->Emitter, effect, emitterData, renderContext, worlds[(int32)emitter->SimulationSpace]); } @@ -949,6 +951,8 @@ void Particles::DrawParticles(RenderContext& renderContext, ParticleEffect* effe if (!buffer) continue; auto emitter = buffer->Emitter; + if (!emitter || !emitter->IsLoaded()) + continue; drawCall.World = worlds[(int32)emitter->SimulationSpace]; drawCall.WorldDeterminantSign = worldDeterminantSigns[(int32)emitter->SimulationSpace]; From 13cf3deb3f7b7ec31e18c517717d4e7887b444ca Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 12 Jul 2024 18:42:19 +0200 Subject: [PATCH 222/292] Revert b80101411f34ae3a8922d545692ddb352f3f70c6 --- Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp index 7a6c560ab..9b6d4ba2c 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp @@ -1845,7 +1845,6 @@ bool GPUDeviceVulkan::Init() #endif #undef INIT_FUNC VmaAllocatorCreateInfo allocatorInfo = {}; - allocatorInfo.flags = VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT; allocatorInfo.vulkanApiVersion = VULKAN_API_VERSION; allocatorInfo.physicalDevice = gpu; allocatorInfo.instance = Instance; From 8ba33d9d10c5066190c9c8ee2319ec4fd1739316 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 18 Jul 2024 00:17:15 +0200 Subject: [PATCH 223/292] Fix typo in doc comment --- Source/Engine/Graphics/GPUContext.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Graphics/GPUContext.h b/Source/Engine/Graphics/GPUContext.h index b3bd6ddcc..52ff5ef5d 100644 --- a/Source/Engine/Graphics/GPUContext.h +++ b/Source/Engine/Graphics/GPUContext.h @@ -197,14 +197,14 @@ public: API_FUNCTION() virtual void ClearUA(GPUBuffer* buf, const Float4& value) = 0; /// - /// Clears an unordered access buffer with a unsigned value. + /// Clears an unordered access buffer with an unsigned value. /// /// The buffer to clear. /// The clear value. virtual void ClearUA(GPUBuffer* buf, const uint32 value[4]) = 0; /// - /// Clears an unordered access texture with a unsigned value. + /// Clears an unordered access texture with an unsigned value. /// /// The texture to clear. /// The clear value. From ee02aa394aa0fce0eb00af8d0030f853cfcd0df5 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 18 Jul 2024 00:17:33 +0200 Subject: [PATCH 224/292] Fix potential error in new shadows atlas rendering --- Source/Engine/Renderer/ShadowsPass.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Source/Engine/Renderer/ShadowsPass.cpp b/Source/Engine/Renderer/ShadowsPass.cpp index 382c0be55..4cd25a17c 100644 --- a/Source/Engine/Renderer/ShadowsPass.cpp +++ b/Source/Engine/Renderer/ShadowsPass.cpp @@ -619,6 +619,10 @@ void ShadowsPass::SetupRenderContext(RenderContext& renderContext, RenderContext void ShadowsPass::SetupLight(ShadowsCustomBuffer& shadows, RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderLightData& light, ShadowAtlasLight& atlasLight) { + // Initialize frame-data + atlasLight.ContextIndex = 0; + atlasLight.ContextCount = 0; + // Copy light properties atlasLight.Sharpness = light.ShadowsSharpness; atlasLight.Fade = light.ShadowsStrength; @@ -1354,7 +1358,7 @@ void ShadowsPass::RenderShadowMaps(RenderContextBatch& renderContextBatch) for (auto& e : shadows.Lights) { ShadowAtlasLight& atlasLight = e.Value; - if (atlasLight.StaticState != ShadowAtlasLight::UpdateStaticShadow || !atlasLight.HasStaticShadowContext) + if (atlasLight.StaticState != ShadowAtlasLight::UpdateStaticShadow || !atlasLight.HasStaticShadowContext || atlasLight.ContextCount == 0) continue; int32 contextIndex = 0; for (int32 tileIndex = 0; tileIndex < atlasLight.TilesCount; tileIndex++) @@ -1413,6 +1417,8 @@ void ShadowsPass::RenderShadowMaps(RenderContextBatch& renderContextBatch) for (auto& e : shadows.Lights) { ShadowAtlasLight& atlasLight = e.Value; + if (atlasLight.ContextCount == 0) + continue; int32 contextIndex = 0; for (int32 tileIndex = 0; tileIndex < atlasLight.TilesCount; tileIndex++) { From aeff147b6ded0860b80b7e80696ae4d1f650611c Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 18 Jul 2024 08:38:23 +0200 Subject: [PATCH 225/292] Optimize and refactor DDGI to use linear `attention` per-probe to dynamically control ray count and blend speed Probes will use lower ray count when behind the camera or not correlated with the view direction or far from geometry. Probes nearby camera or with high instability in irradiance will maintain higher ray count. Probes that use less rays will have slower blending to reduce artifacts. Added probe instability, attention and stats debugging for devs. --- .../GI/DynamicDiffuseGlobalIllumination.cpp | 101 +++++++- Source/Shaders/GI/DDGI.hlsl | 29 ++- Source/Shaders/GI/DDGI.shader | 231 ++++++++++++++---- 3 files changed, 310 insertions(+), 51 deletions(-) diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp index 27ec7146d..2844b6482 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp @@ -40,6 +40,19 @@ #define DDGI_PROBE_RESOLUTION_DISTANCE 14 // Resolution (in texels) for probe distance data (excluding 1px padding on each side) #define DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE 8 #define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32 +#define DDGI_DEBUG_STATS 0 // Enables additional GPU-driven stats for probe/rays count +#define DDGI_DEBUG_INSTABILITY 0 // Enables additional probe irradiance instability debugging + +#if DDGI_DEBUG_STATS +#include "Engine/Core/Collections/SamplesBuffer.h" +#define DDGI_DEBUG_STATS_FRAMES 60 + +struct StatsData +{ + uint32 RaysCount; + uint32 ProbesCount; +}; +#endif GPU_CB_STRUCT(Data0 { DynamicDiffuseGlobalIlluminationPass::ConstantsData DDGI; @@ -52,11 +65,13 @@ GPU_CB_STRUCT(Data0 { float ResetBlend; float TemporalTime; Int4 ProbeScrollClears[4]; + Float3 ViewDir; + float Padding1; }); GPU_CB_STRUCT(Data1 { // TODO: use push constants on Vulkan or root signature data on DX12 to reduce overhead of changing single DWORD - Float2 Padding1; + Float2 Padding2; uint32 CascadeIndex; uint32 ProbeIndexOffset; }); @@ -84,11 +99,21 @@ public: int32 ProbesCountTotal = 0; Int3 ProbeCounts = Int3::Zero; GPUTexture* ProbesTrace = nullptr; // Probes ray tracing: (RGB: hit radiance, A: hit distance) - GPUTexture* ProbesData = nullptr; // Probes data: (RGB: world-space offset, A: state/data) + GPUTexture* ProbesData = nullptr; // Probes data: (RGB: probe-space offset, A: state/data) GPUTexture* ProbesIrradiance = nullptr; // Probes irradiance (RGB: sRGB color) GPUTexture* ProbesDistance = nullptr; // Probes distance (R: mean distance, G: mean distance^2) GPUBuffer* ActiveProbes = nullptr; // List with indices of the active probes (built during probes classification to use indirect dispatches for probes updating), counter at 0 GPUBuffer* UpdateProbesInitArgs = nullptr; // Indirect dispatch buffer for active-only probes updating (trace+blend) +#if DDGI_DEBUG_STATS + GPUBuffer* StatsWrite = nullptr; + GPUBuffer* StatsRead = nullptr; + SamplesBuffer StatsProbes; + SamplesBuffer StatsRays; + uint32 StatsFrames = 0; +#endif +#if DDGI_DEBUG_INSTABILITY + GPUTexture* ProbesInstability = nullptr; +#endif DynamicDiffuseGlobalIlluminationPass::BindingData Result; FORCE_INLINE void Release() @@ -99,6 +124,16 @@ public: RenderTargetPool::Release(ProbesDistance); SAFE_DELETE_GPU_RESOURCE(ActiveProbes); SAFE_DELETE_GPU_RESOURCE(UpdateProbesInitArgs); +#if DDGI_DEBUG_STATS + SAFE_DELETE_GPU_RESOURCE(StatsWrite); + SAFE_DELETE_GPU_RESOURCE(StatsRead); + StatsProbes.Clear(); + StatsRays.Clear(); + StatsFrames = 0; +#endif +#if DDGI_DEBUG_INSTABILITY + RenderTargetPool::Release(ProbesInstability); +#endif } ~DDGICustomBuffer() @@ -373,12 +408,21 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont INIT_TEXTURE(ProbesData, PixelFormat::R8G8B8A8_SNorm, probesCountTotalX, probesCountTotalY); INIT_TEXTURE(ProbesIrradiance, PixelFormat::R11G11B10_Float, probesCountTotalX * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), probesCountTotalY * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2)); INIT_TEXTURE(ProbesDistance, PixelFormat::R16G16_Float, probesCountTotalX * (DDGI_PROBE_RESOLUTION_DISTANCE + 2), probesCountTotalY * (DDGI_PROBE_RESOLUTION_DISTANCE + 2)); +#if DDGI_DEBUG_INSTABILITY + INIT_TEXTURE(ProbesInstability, PixelFormat::R16_Float, probesCountTotalX * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), probesCountTotalY * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2)); +#endif #undef INIT_TEXTURE #define INIT_BUFFER(buffer, name) ddgiData.buffer = GPUDevice::Instance->CreateBuffer(TEXT(name)); if (!ddgiData.buffer || ddgiData.buffer->Init(desc2)) return true; memUsage += ddgiData.buffer->GetMemoryUsage(); GPUBufferDescription desc2 = GPUBufferDescription::Raw((probesCountCascade + 1) * sizeof(uint32), GPUBufferFlags::ShaderResource | GPUBufferFlags::UnorderedAccess); INIT_BUFFER(ActiveProbes, "DDGI.ActiveProbes"); desc2 = GPUBufferDescription::Buffer(sizeof(GPUDispatchIndirectArgs) * Math::DivideAndRoundUp(probesCountCascade, DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT), GPUBufferFlags::Argument | GPUBufferFlags::UnorderedAccess, PixelFormat::R32_UInt, nullptr, sizeof(uint32)); INIT_BUFFER(UpdateProbesInitArgs, "DDGI.UpdateProbesInitArgs"); +#if DDGI_DEBUG_STATS + desc2 = GPUBufferDescription::Raw(sizeof(StatsData), GPUBufferFlags::UnorderedAccess); + INIT_BUFFER(StatsWrite, "DDGI.StatsWrite"); + desc2 = desc2.ToStagingReadback(); + INIT_BUFFER(StatsRead, "DDGI.StatsRead"); +#endif #undef INIT_BUFFER LOG(Info, "Dynamic Diffuse Global Illumination probes: {0}, memory usage: {1} MB", probesCountTotal, memUsage / (1024 * 1024)); clear = true; @@ -393,6 +437,9 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont context->ClearUA(ddgiData.ProbesData, Float4::Zero); context->ClearUA(ddgiData.ProbesIrradiance, Float4::Zero); context->ClearUA(ddgiData.ProbesDistance, Float4::Zero); +#if DDGI_DEBUG_INSTABILITY + context->ClearUA(ddgiData.ProbesInstability, Float4::Zero); +#endif } ddgiData.LastFrameUsed = Engine::FrameCount; @@ -486,6 +533,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont data.ProbeScrollClears[cascadeIndex] = Int4(cascade.ProbeScrollClears, 0); } data.TemporalTime = renderContext.List->Setup.UseTemporalAAJitter ? RenderTools::ComputeTemporalTime() : 0.0f; + data.ViewDir = renderContext.View.Direction; GBufferPass::SetInputs(renderContext.View, data.GBuffer); context->UpdateCB(_cb0, &data); context->BindCB(0, _cb0); @@ -496,6 +544,10 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont PROFILE_GPU_CPU_NAMED("Probes Update"); bool anyDirty = false; uint32 threadGroupsX, threadGroupsY; +#if DDGI_DEBUG_STATS + uint32 zero[4] = {}; + context->ClearUA(ddgiData.StatsWrite, zero); +#endif for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++) { if (cascadeSkipUpdate[cascadeIndex]) @@ -556,6 +608,9 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont context->BindSR(8, skybox); context->BindSR(9, ddgiData.ActiveProbes->View()); context->BindUA(0, ddgiData.ProbesTrace->View()); +#if DDGI_DEBUG_STATS + context->BindUA(1, ddgiData.StatsWrite->View()); +#endif context->DispatchIndirect(_csTraceRays[(int32)Graphics::GIQuality], ddgiData.UpdateProbesInitArgs, arg); context->ResetUA(); context->ResetSR(); @@ -564,21 +619,55 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont // Update probes irradiance and distance textures (one thread-group per probe) { PROFILE_GPU_CPU_NAMED("Update Probes"); + + // Distance context->BindSR(0, ddgiData.Result.ProbesData); context->BindSR(1, ddgiData.ProbesTrace->View()); context->BindSR(2, ddgiData.ActiveProbes->View()); - context->BindUA(0, ddgiData.Result.ProbesIrradiance); - context->DispatchIndirect(_csUpdateProbesIrradiance, ddgiData.UpdateProbesInitArgs, arg); context->BindUA(0, ddgiData.Result.ProbesDistance); context->DispatchIndirect(_csUpdateProbesDistance, ddgiData.UpdateProbesInitArgs, arg); context->ResetUA(); context->ResetSR(); + + // Irradiance + context->BindSR(1, ddgiData.ProbesTrace->View()); + context->BindSR(2, ddgiData.ActiveProbes->View()); + context->BindUA(0, ddgiData.Result.ProbesIrradiance); + context->BindUA(1, ddgiData.Result.ProbesData); +#if DDGI_DEBUG_INSTABILITY + context->BindUA(2, ddgiData.ProbesInstability->View()); +#endif + context->DispatchIndirect(_csUpdateProbesIrradiance, ddgiData.UpdateProbesInitArgs, arg); + context->ResetUA(); + context->ResetSR(); } arg += sizeof(GPUDispatchIndirectArgs); } } +#if DDGI_DEBUG_STATS + // Update stats + { + StatsData stats; + if (void* mapped = ddgiData.StatsRead->Map(GPUResourceMapMode::Read)) + { + Platform::MemoryCopy(&stats, mapped, sizeof(stats)); + ddgiData.StatsRead->Unmap(); + ddgiData.StatsProbes.Add(stats.ProbesCount); + ddgiData.StatsRays.Add(stats.RaysCount); + } + context->CopyBuffer(ddgiData.StatsRead, ddgiData.StatsWrite, sizeof(stats)); + if (++ddgiData.StatsFrames >= DDGI_DEBUG_STATS_FRAMES) + { + ddgiData.StatsFrames = 0; + stats.ProbesCount = ddgiData.StatsProbes.Average(); + stats.RaysCount = ddgiData.StatsRays.Average(); + LOG(Info, "DDGI active probes: {}, traced rays: {} per frame, rays per probe: {}", stats.ProbesCount, stats.RaysCount, stats.ProbesCount > 0 ? stats.RaysCount / stats.ProbesCount : 0); + } + } +#endif + // Update probes border pixels if (anyDirty) { @@ -718,7 +807,11 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext, { // Pass DDGI data to the material _debugMaterial->SetParameterValue(TEXT("ProbesData"), Variant(ddgiData.ProbesData)); +#if DDGI_DEBUG_INSTABILITY + _debugMaterial->SetParameterValue(TEXT("ProbesIrradiance"), Variant(ddgiData.ProbesInstability)); +#else _debugMaterial->SetParameterValue(TEXT("ProbesIrradiance"), Variant(ddgiData.ProbesIrradiance)); +#endif _debugMaterial->SetParameterValue(TEXT("ProbesDistance"), Variant(ddgiData.ProbesDistance)); auto cb = _debugMaterial->GetShader()->GetCB(3); if (cb) diff --git a/Source/Shaders/GI/DDGI.hlsl b/Source/Shaders/GI/DDGI.hlsl index fff009022..330a20420 100644 --- a/Source/Shaders/GI/DDGI.hlsl +++ b/Source/Shaders/GI/DDGI.hlsl @@ -16,6 +16,8 @@ #define DDGI_PROBE_STATE_INACTIVE 0 #define DDGI_PROBE_STATE_ACTIVATED 1 #define DDGI_PROBE_STATE_ACTIVE 2 +#define DDGI_PROBE_ATTENTION_MIN 0.02f // Minimum probe attention value that still makes it active. +#define DDGI_PROBE_ATTENTION_MAX 0.98f // Maximum probe attention value that still makes it active (but not activated which is 1.0f). #define DDGI_PROBE_RESOLUTION_IRRADIANCE 6 // Resolution (in texels) for probe irradiance data (excluding 1px padding on each side) #define DDGI_PROBE_RESOLUTION_DISTANCE 14 // Resolution (in texels) for probe distance data (excluding 1px padding on each side) #define DDGI_CASCADE_BLEND_SIZE 2.5f // Distance in probes over which cascades blending happens @@ -99,15 +101,36 @@ float4 LoadDDGIProbeData(DDGIData data, Texture2D probesData, uint } // Encodes probe probe data -float4 EncodeDDGIProbeData(float3 probeOffset, uint probeState) +float4 EncodeDDGIProbeData(float3 offset, uint state, float attention) { - return float4(probeOffset, (float)probeState * (1.0f / 8.0f)); + // [0;1] -> [-1;1] + attention = saturate(attention) * 2.0f - 1.0f; + if (state == DDGI_PROBE_STATE_INACTIVE) + attention = -1.0f; + else if (state == DDGI_PROBE_STATE_ACTIVATED) + attention = 1.0f; + return float4(offset, attention); +} + +// Decodes probe attention value from the encoded state +float DecodeDDGIProbeAttention(float4 probeData) +{ + // [-1;1] -> [0;1] + if (probeData.w <= -1.0f) + return 0.0f; + if (probeData.w >= 1.0f) + return 1.0f; + return probeData.w * 0.5f + 0.5f; } // Decodes probe state from the encoded state uint DecodeDDGIProbeState(float4 probeData) { - return (uint)(probeData.w * 8.0f); + if (probeData.w <= -1.0f) + return DDGI_PROBE_STATE_INACTIVE; + if (probeData.w >= 1.0f) + return DDGI_PROBE_STATE_ACTIVATED; + return DDGI_PROBE_STATE_ACTIVE; } // Decodes probe world-space position (XYZ) from the encoded state diff --git a/Source/Shaders/GI/DDGI.shader b/Source/Shaders/GI/DDGI.shader index c80407d54..59e74bbb9 100644 --- a/Source/Shaders/GI/DDGI.shader +++ b/Source/Shaders/GI/DDGI.shader @@ -20,11 +20,14 @@ // This must match C++ #define DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT 4096 // Maximum amount of probes to update at once during rays tracing and blending #define DDGI_TRACE_RAYS_LIMIT 256 // Limit of rays per-probe (runtime value can be smaller) +#define DDGI_TRACE_RAYS_MIN 16 // Minimum amount of rays to shoot for sleepy probes #define DDGI_TRACE_NEGATIVE 0 // If true, rays that start inside geometry will use negative distance to indicate backface hit #define DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE 8 #define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32 #define DDGI_PROBE_RELOCATE_ITERATIVE 1 // If true, probes relocation algorithm tries to move them in additive way, otherwise all nearby locations are checked to find the best position #define DDGI_PROBE_RELOCATE_FIND_BEST 1 // If true, probes relocation algorithm tries to move to the best matching location within nearby area +#define DDGI_DEBUG_STATS 0 // Enables additional GPU-driven stats for probe/rays count +#define DDGI_DEBUG_INSTABILITY 0 // Enables additional probe irradiance instability debugging META_CB_BEGIN(0, Data0) DDGIData DDGI; @@ -37,10 +40,12 @@ uint ProbesCount; float ResetBlend; float TemporalTime; int4 ProbeScrollClears[4]; +float3 ViewDir; +float Padding1; META_CB_END META_CB_BEGIN(1, Data1) -float2 Padding1; +float2 Padding2; uint CascadeIndex; uint ProbeIndexOffset; META_CB_END @@ -73,10 +78,11 @@ float3 GetProbeRayDirection(DDGIData data, uint rayIndex, uint raysCount, uint p } // Calculates amount of rays to allocate for a probe -uint GetProbeRaysCount(DDGIData data, uint probeState) +uint GetProbeRaysCount(DDGIData data, float probeAttention) { - // TODO: implement variable ray count based on probe location relative to the view frustum (use probe state for storage) - return data.RaysCount; + //return data.RaysCount; + probeAttention = saturate((probeAttention - DDGI_PROBE_ATTENTION_MIN) / (DDGI_PROBE_ATTENTION_MAX - DDGI_PROBE_ATTENTION_MIN)); + return DDGI_TRACE_RAYS_MIN + (uint)max(probeAttention * (float)(data.RaysCount - DDGI_TRACE_RAYS_MIN), 0.0f); } #ifdef _CS_Classify @@ -118,7 +124,7 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) if (prevCascadeWeight > 0.1f) { // Disable probe - RWProbesData[probeDataCoords] = EncodeDDGIProbeData(float3(0, 0, 0), DDGI_PROBE_STATE_INACTIVE); + RWProbesData[probeDataCoords] = EncodeDDGIProbeData(float3(0, 0, 0), DDGI_PROBE_STATE_INACTIVE, 0.0f); return; } } @@ -140,11 +146,15 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) // Load probe state and position float4 probeData = RWProbesData[probeDataCoords]; + float probeAttention = DecodeDDGIProbeAttention(probeData); uint probeState = DecodeDDGIProbeState(probeData); uint probeStateOld = probeState; float3 probeOffset = probeData.xyz * probesSpacing; // Probe offset is [-1;1] within probes spacing if (wasScrolled || probeState == DDGI_PROBE_STATE_INACTIVE) + { probeOffset = float3(0, 0, 0); // Clear offset for a new probe + probeAttention = 1.0f; // Wake-up + } float3 probeOffsetOld = probeOffset; float3 probePosition = probeBasePosition + probeOffset; @@ -166,11 +176,24 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) // Disable it probeOffset = float3(0, 0, 0); probeState = DDGI_PROBE_STATE_INACTIVE; + probeAttention = 0.0f; } else { - // Relocate only if probe location is not good enough + // Apply distance/view heuristics to probe attention probeState = DDGI_PROBE_STATE_ACTIVE; + float3 viewToProbe = probePosition - GBuffer.ViewPos; + float distanceToProbe = length(viewToProbe); + viewToProbe /= distanceToProbe; + float probeViewDot = dot(viewToProbe, ViewDir); + probeAttention *= lerp(0.1f, 1.0f, saturate(probeViewDot)); // Reduce quality for probes behind the camera (or away from view dir) + probeAttention *= lerp(1.0f, 0.5f, saturate(sdfDst / voxelLimit)); // Reduce quality for probes far away from geometry + probeAttention += (1.0f - saturate(distanceToProbe / 1000.0f)) * 1.2f; // Boost quality for probes nearby view + //probeAttention = 0.0f; // Debug test lowest ray count + //probeAttention = 1.0f; // Debug test highest ray count + probeAttention = clamp(probeAttention, DDGI_PROBE_ATTENTION_MIN, DDGI_PROBE_ATTENTION_MAX); + + // Relocate only if probe location is not good enough if (sdf <= voxelLimit) { #if DDGI_PROBE_RELOCATE_ITERATIVE @@ -222,6 +245,7 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) // Disable probe that is too close to the geometry probeOffset = float3(0, 0, 0); probeState = DDGI_PROBE_STATE_INACTIVE; + probeAttention = 0.0f; } else { @@ -232,6 +256,7 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) // Disable probe probeOffset = float3(0, 0, 0); probeState = DDGI_PROBE_STATE_INACTIVE; + probeAttention = 0.0f; #endif } } @@ -254,12 +279,15 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) } #endif if ((wasActivated || wasScrolled || wasRelocated) && probeState == DDGI_PROBE_STATE_ACTIVE) + { probeState = DDGI_PROBE_STATE_ACTIVATED; + probeAttention = 1.0f; + } } // Save probe state probeOffset /= probesSpacing; // Move offset back to [-1;1] space - RWProbesData[probeDataCoords] = EncodeDDGIProbeData(probeOffset, probeState); + RWProbesData[probeDataCoords] = EncodeDDGIProbeData(probeOffset, probeState, probeAttention); // Collect active probes if (probeState != DDGI_PROBE_STATE_INACTIVE) @@ -282,7 +310,7 @@ META_CS(true, FEATURE_LEVEL_SM5) [numthreads(1, 1, 1)] void CS_UpdateProbesInitArgs() { - uint activeProbesCount = ActiveProbes.Load(0); + uint activeProbesCount = ActiveProbes.Load(0); // Counter at 0 uint arg = 0; for (uint probesOffset = 0; probesOffset < activeProbesCount; probesOffset += DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT) { @@ -298,6 +326,9 @@ void CS_UpdateProbesInitArgs() #ifdef _CS_TraceRays RWTexture2D RWProbesTrace : register(u0); +#if DDGI_DEBUG_STATS +RWByteAddressBuffer RWStats : register(u1); +#endif Texture3D GlobalSDFTex : register(t0); Texture3D GlobalSDFMip : register(t1); @@ -326,12 +357,14 @@ void CS_TraceRays(uint3 DispatchThreadId : SV_DispatchThreadID) // Load current probe state and position float4 probeData = LoadDDGIProbeData(DDGI, ProbesData, CascadeIndex, probeIndex); + float probeAttention = DecodeDDGIProbeAttention(probeData); uint probeState = DecodeDDGIProbeState(probeData); - uint probeRaysCount = GetProbeRaysCount(DDGI, probeState); + uint probeRaysCount = GetProbeRaysCount(DDGI, probeAttention); if (probeState == DDGI_PROBE_STATE_INACTIVE || rayIndex >= probeRaysCount) return; // Skip disabled probes or if current thread's ray is unused float3 probePosition = DecodeDDGIProbePosition(DDGI, probeData, CascadeIndex, probeIndex, probeCoords); float3 probeRayDirection = GetProbeRayDirection(DDGI, rayIndex, probeRaysCount, probeIndex, probeCoords); + // TODO: implement ray-guiding based on the probe irradiance (prioritize directions with high luminance) // Trace ray with Global SDF GlobalSDFTrace trace; @@ -370,6 +403,14 @@ void CS_TraceRays(uint3 DispatchThreadId : SV_DispatchThreadID) // Write into probes trace results RWProbesTrace[uint2(rayIndex, DispatchThreadId.x)] = radiance; + +#if DDGI_DEBUG_STATS + // Update stats + uint tmp; + RWStats.InterlockedAdd(0, 1, tmp); + if (rayIndex == 0) + RWStats.InterlockedAdd(4, 1, tmp); +#endif } #endif @@ -380,6 +421,44 @@ void CS_TraceRays(uint3 DispatchThreadId : SV_DispatchThreadID) // Update irradiance #define DDGI_PROBE_RESOLUTION DDGI_PROBE_RESOLUTION_IRRADIANCE groupshared float4 CachedProbesTraceRadiance[DDGI_TRACE_RAYS_LIMIT]; +groupshared float OutputInstability[DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION]; + +// Source: https://github.com/turanszkij/WickedEngine +#define BorderOffsetsSize (4 * DDGI_PROBE_RESOLUTION + 4) +static const uint4 BorderOffsets[BorderOffsetsSize] = { + uint4(6, 1, 1, 0), + uint4(5, 1, 2, 0), + uint4(4, 1, 3, 0), + uint4(3, 1, 4, 0), + uint4(2, 1, 5, 0), + uint4(1, 1, 6, 0), + + uint4(6, 6, 1, 7), + uint4(5, 6, 2, 7), + uint4(4, 6, 3, 7), + uint4(3, 6, 4, 7), + uint4(2, 6, 5, 7), + uint4(1, 6, 6, 7), + + uint4(1, 1, 0, 6), + uint4(1, 2, 0, 5), + uint4(1, 3, 0, 4), + uint4(1, 4, 0, 3), + uint4(1, 5, 0, 2), + uint4(1, 6, 0, 1), + + uint4(6, 1, 7, 6), + uint4(6, 2, 7, 5), + uint4(6, 3, 7, 4), + uint4(6, 4, 7, 3), + uint4(6, 5, 7, 2), + uint4(6, 6, 7, 1), + + uint4(1, 1, 7, 7), + uint4(6, 1, 0, 7), + uint4(1, 6, 7, 0), + uint4(6, 6, 0, 0), +}; #else // Update distance #define DDGI_PROBE_RESOLUTION DDGI_PROBE_RESOLUTION_DISTANCE @@ -389,7 +468,14 @@ groupshared float CachedProbesTraceDistance[DDGI_TRACE_RAYS_LIMIT]; groupshared float3 CachedProbesTraceDirection[DDGI_TRACE_RAYS_LIMIT]; RWTexture2D RWOutput : register(u0); +#if DDGI_PROBE_UPDATE_MODE == 0 +RWTexture2D RWProbesData : register(u1); +#if DDGI_DEBUG_INSTABILITY +RWTexture2D RWOutputInstability : register(u2); +#endif +#else Texture2D ProbesData : register(t0); +#endif Texture2D ProbesTrace : register(t1); ByteAddressBuffer ActiveProbes : register(t2); @@ -407,13 +493,16 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_ uint3 probeCoords = GetDDGIProbeCoords(DDGI, probeIndex); probeIndex = GetDDGIScrollingProbeIndex(DDGI, CascadeIndex, probeCoords); - // Skip disabled probes - bool skip = false; + // Load probe data +#if DDGI_PROBE_UPDATE_MODE == 0 + int2 probeDataCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex); + float4 probeData = RWProbesData[probeDataCoords]; +#else float4 probeData = LoadDDGIProbeData(DDGI, ProbesData, CascadeIndex, probeIndex); +#endif + float probeAttention = DecodeDDGIProbeAttention(probeData); uint probeState = DecodeDDGIProbeState(probeData); - uint probeRaysCount = GetProbeRaysCount(DDGI, probeState); - if (probeState == DDGI_PROBE_STATE_INACTIVE) - skip = true; + uint probeRaysCount = GetProbeRaysCount(DDGI, probeAttention); #if DDGI_PROBE_UPDATE_MODE == 0 uint backfacesCount = 0; @@ -423,30 +512,23 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_ float distanceLimit = probesSpacing * 1.5f; #endif - BRANCH - if (!skip) + // Load trace rays results into shared memory to reuse across whole thread group (raysCount per thread) + uint raysCount = (uint)(ceil((float)probeRaysCount / (float)(DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION))); + uint raysStart = GroupIndex * raysCount; + raysCount = max(min(raysStart + raysCount, probeRaysCount), raysStart) - raysStart; + for (uint i = 0; i < raysCount; i++) { - // Load trace rays results into shared memory to reuse across whole thread group (raysCount per thread) - uint raysCount = (uint)(ceil((float)probeRaysCount / (float)(DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION))); - uint raysStart = GroupIndex * raysCount; - raysCount = max(min(raysStart + raysCount, probeRaysCount), raysStart) - raysStart; - for (uint i = 0; i < raysCount; i++) - { - uint rayIndex = raysStart + i; + uint rayIndex = raysStart + i; #if DDGI_PROBE_UPDATE_MODE == 0 - CachedProbesTraceRadiance[rayIndex] = ProbesTrace[uint2(rayIndex, GroupId.x)]; + CachedProbesTraceRadiance[rayIndex] = ProbesTrace[uint2(rayIndex, GroupId.x)]; #else - float rayDistance = ProbesTrace[uint2(rayIndex, GroupId.x)].w; - CachedProbesTraceDistance[rayIndex] = min(abs(rayDistance), distanceLimit); + float rayDistance = ProbesTrace[uint2(rayIndex, GroupId.x)].w; + CachedProbesTraceDistance[rayIndex] = min(abs(rayDistance), distanceLimit); #endif - CachedProbesTraceDirection[rayIndex] = GetProbeRayDirection(DDGI, rayIndex, probeRaysCount, probeIndex, probeCoords); - } + CachedProbesTraceDirection[rayIndex] = GetProbeRayDirection(DDGI, rayIndex, probeRaysCount, probeIndex, probeCoords); } GroupMemoryBarrierWithGroupSync(); - if (skip) - return; probeCoords = GetDDGIProbeCoords(DDGI, probeIndex); - uint2 outputCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex) * (DDGI_PROBE_RESOLUTION + 2) + 1 + GroupThreadId.xy; // Calculate octahedral projection for probe (unwraps spherical projection into a square) float2 octahedralCoords = GetOctahedralCoords(GroupThreadId.xy, DDGI_PROBE_RESOLUTION); @@ -495,30 +577,52 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_ result.rgb *= 1.0f / (2.0f * max(result.a, epsilon)); // Load current probe value + uint2 outputCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex) * (DDGI_PROBE_RESOLUTION + 2) + 1 + GroupThreadId.xy; float3 previous = RWOutput[outputCoords].rgb; - bool wasActivated = probeState == DDGI_PROBE_STATE_ACTIVATED; - if (ResetBlend || wasActivated) - previous = float3(0, 0, 0); + bool wasActivated = probeState == DDGI_PROBE_STATE_ACTIVATED || ResetBlend; + if (wasActivated) + previous = result.rgb; + +#if DDGI_PROBE_UPDATE_MODE == 0 + // Calculate instability of the irradiance + float previousLuma = Luminance(previous.rgb); + float resultLuma = Luminance(result.rgb); + float instability = abs(previousLuma - resultLuma) / previousLuma; // Percentage change in luminance of irradiance + instability = max(instability, Max3(abs(result.rgb - previous) / previous)); // Percentage of color delta change of irradiance + //instability *= saturate(result.a); // Reduce instability in areas with a small ray-coverage + //instability = pow(instability, 1.2f); // Increase contrast + instability *= 2.0f; // Make it stronger on scene changes + //instability = saturate(instability); + OutputInstability[GroupIndex] = instability; +#if DDGI_DEBUG_INSTABILITY + RWOutputInstability[outputCoords] = instability; + //RWOutputInstability[outputCoords] = probeAttention; // Debug test probe attention visualization +#endif +#endif // Blend current value with the previous probe data - float historyWeight = DDGI.ProbeHistoryWeight; - //historyWeight = 1.0f; - //historyWeight = 0.0f; - if (ResetBlend || wasActivated) - historyWeight = 0.0f; + float historyWeightFast = DDGI.ProbeHistoryWeight; + float historyWeightSlow = 0.97f; #if DDGI_PROBE_UPDATE_MODE == 0 - result *= DDGI.IndirectLightingIntensity; -#if DDGI_SRGB_BLENDING - result.rgb = pow(result.rgb, 1.0f / DDGI.IrradianceGamma); -#endif float3 irradianceDelta = result.rgb - previous; float irradianceDeltaMax = Max3(abs(irradianceDelta)); float irradianceDeltaLen = length(irradianceDelta); if (irradianceDeltaMax > 0.5f) { // Reduce history weight after significant lighting change - historyWeight = historyWeight * 0.5f; + historyWeightFast *= 0.5f; } +#endif + float historyWeight = lerp(historyWeightSlow, historyWeightFast, probeAttention * probeAttention * probeAttention); + //historyWeight = 1.0f; // Debug full-blend + //historyWeight = 0.0f; // Debug no-blend + if (wasActivated) + historyWeight = 0.0f; +#if DDGI_PROBE_UPDATE_MODE == 0 + result *= DDGI.IndirectLightingIntensity; +#if DDGI_SRGB_BLENDING + result.rgb = pow(max(result.rgb, 0), 1.0f / DDGI.IrradianceGamma); +#endif if (irradianceDeltaLen > 2.0f) { // Reduce flickering during rapid brightness changes @@ -530,6 +634,45 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_ #endif RWOutput[outputCoords] = result; + +#if DDGI_PROBE_UPDATE_MODE == 0 + // The first thread updates the probe attention based on the instability of all texels + GroupMemoryBarrierWithGroupSync(); + BRANCH + if (GroupIndex == 0 && probeState != DDGI_PROBE_STATE_INACTIVE) + { + // Calculate instability statistics for a whole probe + float instabilityAvg = 0; + for (uint i = 0; i < DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION; i++) + instabilityAvg += OutputInstability[i]; + instabilityAvg *= 1.0f / float(DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION); + instabilityAvg = saturate(instabilityAvg); + instability = instabilityAvg; + + // Calculate probe attention + float taregAttention = lerp(0.5f, DDGI_PROBE_ATTENTION_MAX, instability); // Use some base level + if (taregAttention >= probeAttention) + probeAttention = taregAttention; // Quick jump up + else + probeAttention = lerp(probeAttention, taregAttention, 0.2f); // Slow blend down + if (probeState == DDGI_PROBE_STATE_ACTIVATED) + probeAttention = DDGI_PROBE_ATTENTION_MAX; + + // Update probe data for the next frame + probeState = DDGI_PROBE_STATE_ACTIVE; + RWProbesData[probeDataCoords] = EncodeDDGIProbeData(probeData.xyz, probeState, probeAttention); + } + +#if DDGI_DEBUG_INSTABILITY + // Copy border pixels + uint2 baseCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex) * (DDGI_PROBE_RESOLUTION + 2); + for (uint borderIndex = GroupIndex; borderIndex < BorderOffsetsSize; borderIndex += DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION) + { + uint4 borderOffsets = BorderOffsets[borderIndex]; + RWOutputInstability[baseCoords + borderOffsets.zw] = RWOutputInstability[baseCoords + borderOffsets.xy]; + } +#endif +#endif } // Compute shader for updating probes irradiance or distance texture borders (fills gaps between probes to support bilinear filtering) From 03898a064aa1b9750026c155e7256940f655544d Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 18 Jul 2024 19:51:01 +0200 Subject: [PATCH 226/292] Optimize DDGI probes border pixels to be copied within probe update, rather than via separate dispatch --- .../GI/DynamicDiffuseGlobalIllumination.cpp | 37 ---- .../GI/DynamicDiffuseGlobalIllumination.h | 4 - Source/Shaders/GI/DDGI.shader | 158 ++++++++++-------- 3 files changed, 85 insertions(+), 114 deletions(-) diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp index 2844b6482..d74b03672 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp @@ -219,10 +219,6 @@ bool DynamicDiffuseGlobalIlluminationPass::setupResources() _csTraceRays[3] = shader->GetCS("CS_TraceRays", 3); _csUpdateProbesIrradiance = shader->GetCS("CS_UpdateProbes", 0); _csUpdateProbesDistance = shader->GetCS("CS_UpdateProbes", 1); - _csUpdateBordersIrradianceRow = shader->GetCS("CS_UpdateBorders", 0); - _csUpdateBordersIrradianceCollumn = shader->GetCS("CS_UpdateBorders", 1); - _csUpdateBordersDistanceRow = shader->GetCS("CS_UpdateBorders", 2); - _csUpdateBordersDistanceCollumn = shader->GetCS("CS_UpdateBorders", 3); auto device = GPUDevice::Instance; auto psDesc = GPUPipelineState::Description::DefaultFullscreenTriangle; if (!_psIndirectLighting) @@ -250,10 +246,6 @@ void DynamicDiffuseGlobalIlluminationPass::OnShaderReloading(Asset* obj) _csTraceRays[3] = nullptr; _csUpdateProbesIrradiance = nullptr; _csUpdateProbesDistance = nullptr; - _csUpdateBordersIrradianceRow = nullptr; - _csUpdateBordersIrradianceCollumn = nullptr; - _csUpdateBordersDistanceRow = nullptr; - _csUpdateBordersDistanceCollumn = nullptr; SAFE_DELETE_GPU_RESOURCE(_psIndirectLighting); invalidateResources(); } @@ -542,7 +534,6 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont // Update probes { PROFILE_GPU_CPU_NAMED("Probes Update"); - bool anyDirty = false; uint32 threadGroupsX, threadGroupsY; #if DDGI_DEBUG_STATS uint32 zero[4] = {}; @@ -552,7 +543,6 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont { if (cascadeSkipUpdate[cascadeIndex]) continue; - anyDirty = true; // Classify probes (activation/deactivation and relocation) { @@ -667,33 +657,6 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont } } #endif - - // Update probes border pixels - if (anyDirty) - { - PROFILE_GPU_CPU_NAMED("Update Borders"); - - // Irradiance - context->BindUA(0, ddgiData.Result.ProbesIrradiance); - threadGroupsX = Math::DivideAndRoundUp(probesCountTotalX * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); - threadGroupsY = Math::DivideAndRoundUp(probesCountTotalY, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); - context->Dispatch(_csUpdateBordersIrradianceRow, threadGroupsX, threadGroupsY, 1); - threadGroupsX = Math::DivideAndRoundUp(probesCountTotalX, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); - threadGroupsY = Math::DivideAndRoundUp(probesCountTotalY * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); - context->Dispatch(_csUpdateBordersIrradianceCollumn, threadGroupsX, threadGroupsY, 1); - - // Distance - context->BindUA(0, ddgiData.Result.ProbesDistance); - threadGroupsX = Math::DivideAndRoundUp(probesCountTotalX * (DDGI_PROBE_RESOLUTION_DISTANCE + 2), DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); - threadGroupsY = Math::DivideAndRoundUp(probesCountTotalY, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); - context->Dispatch(_csUpdateBordersDistanceRow, threadGroupsX, threadGroupsY, 1); - threadGroupsX = Math::DivideAndRoundUp(probesCountTotalX, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); - threadGroupsY = Math::DivideAndRoundUp(probesCountTotalY * (DDGI_PROBE_RESOLUTION_DISTANCE + 2), DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); - context->Dispatch(_csUpdateBordersDistanceCollumn, threadGroupsX, threadGroupsY, 1); - - context->ResetUA(); - context->ResetSR(); - } } return false; diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h index 28b18f39c..c56604255 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h @@ -47,10 +47,6 @@ private: GPUShaderProgramCS* _csTraceRays[4]; GPUShaderProgramCS* _csUpdateProbesIrradiance; GPUShaderProgramCS* _csUpdateProbesDistance; - GPUShaderProgramCS* _csUpdateBordersIrradianceRow; - GPUShaderProgramCS* _csUpdateBordersIrradianceCollumn; - GPUShaderProgramCS* _csUpdateBordersDistanceRow; - GPUShaderProgramCS* _csUpdateBordersDistanceCollumn; GPUPipelineState* _psIndirectLighting; #if USE_EDITOR AssetReference _debugModel; diff --git a/Source/Shaders/GI/DDGI.shader b/Source/Shaders/GI/DDGI.shader index 59e74bbb9..73cac7c26 100644 --- a/Source/Shaders/GI/DDGI.shader +++ b/Source/Shaders/GI/DDGI.shader @@ -415,16 +415,22 @@ void CS_TraceRays(uint3 DispatchThreadId : SV_DispatchThreadID) #endif -#if defined(_CS_UpdateProbes) || defined(_CS_UpdateBorders) +#if defined(_CS_UpdateProbes) #if DDGI_PROBE_UPDATE_MODE == 0 // Update irradiance #define DDGI_PROBE_RESOLUTION DDGI_PROBE_RESOLUTION_IRRADIANCE groupshared float4 CachedProbesTraceRadiance[DDGI_TRACE_RAYS_LIMIT]; groupshared float OutputInstability[DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION]; +#else +// Update distance +#define DDGI_PROBE_RESOLUTION DDGI_PROBE_RESOLUTION_DISTANCE +groupshared float CachedProbesTraceDistance[DDGI_TRACE_RAYS_LIMIT]; +#endif // Source: https://github.com/turanszkij/WickedEngine #define BorderOffsetsSize (4 * DDGI_PROBE_RESOLUTION + 4) +#if DDGI_PROBE_RESOLUTION == 6 static const uint4 BorderOffsets[BorderOffsetsSize] = { uint4(6, 1, 1, 0), uint4(5, 1, 2, 0), @@ -457,12 +463,77 @@ static const uint4 BorderOffsets[BorderOffsetsSize] = { uint4(1, 1, 7, 7), uint4(6, 1, 0, 7), uint4(1, 6, 7, 0), - uint4(6, 6, 0, 0), + uint4(6, 6, 0, 0) +}; +#elif DDGI_PROBE_RESOLUTION == 14 +static const uint4 BorderOffsets[BorderOffsetsSize] = { + uint4(14, 1, 1, 0), + uint4(13, 1, 2, 0), + uint4(12, 1, 3, 0), + uint4(11, 1, 4, 0), + uint4(10, 1, 5, 0), + uint4(9, 1, 6, 0), + uint4(8, 1, 7, 0), + uint4(7, 1, 8, 0), + uint4(6, 1, 9, 0), + uint4(5, 1, 10, 0), + uint4(4, 1, 11, 0), + uint4(3, 1, 12, 0), + uint4(2, 1, 13, 0), + uint4(1, 1, 14, 0), + + uint4(14, 14, 1, 15), + uint4(13, 14, 2, 15), + uint4(12, 14, 3, 15), + uint4(11, 14, 4, 15), + uint4(10, 14, 5, 15), + uint4(9, 14, 6, 15), + uint4(8, 14, 7, 15), + uint4(7, 14, 8, 15), + uint4(6, 14, 9, 15), + uint4(5, 14, 10, 15), + uint4(4, 14, 11, 15), + uint4(3, 14, 12, 15), + uint4(2, 14, 13, 15), + uint4(1, 14, 14, 15), + + uint4(1, 14, 0, 1), + uint4(1, 13, 0, 2), + uint4(1, 12, 0, 3), + uint4(1, 11, 0, 4), + uint4(1, 10, 0, 5), + uint4(1, 9, 0, 6), + uint4(1, 8, 0, 7), + uint4(1, 7, 0, 8), + uint4(1, 6, 0, 9), + uint4(1, 5, 0, 10), + uint4(1, 4, 0, 11), + uint4(1, 3, 0, 12), + uint4(1, 2, 0, 13), + uint4(1, 1, 0, 14), + + uint4(14, 14, 15, 1), + uint4(14, 13, 15, 2), + uint4(14, 12, 15, 3), + uint4(14, 11, 15, 4), + uint4(14, 10, 15, 5), + uint4(14, 9, 15, 6), + uint4(14, 8, 15, 7), + uint4(14, 7, 15, 8), + uint4(14, 6, 15, 9), + uint4(14, 5, 15, 10), + uint4(14, 4, 15, 11), + uint4(14, 3, 15, 12), + uint4(14, 2, 15, 13), + uint4(14, 1, 15, 14), + + uint4(14, 14, 0, 0), + uint4(1, 14, 15, 0), + uint4(14, 1, 0, 15), + uint4(1, 1, 15, 15) }; #else -// Update distance -#define DDGI_PROBE_RESOLUTION DDGI_PROBE_RESOLUTION_DISTANCE -groupshared float CachedProbesTraceDistance[DDGI_TRACE_RAYS_LIMIT]; +#error "Unsupported probe size for border values copy." #endif groupshared float3 CachedProbesTraceDirection[DDGI_TRACE_RAYS_LIMIT]; @@ -635,9 +706,11 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_ RWOutput[outputCoords] = result; + GroupMemoryBarrierWithGroupSync(); + uint2 baseCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex) * (DDGI_PROBE_RESOLUTION + 2); + #if DDGI_PROBE_UPDATE_MODE == 0 // The first thread updates the probe attention based on the instability of all texels - GroupMemoryBarrierWithGroupSync(); BRANCH if (GroupIndex == 0 && probeState != DDGI_PROBE_STATE_INACTIVE) { @@ -665,7 +738,6 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_ #if DDGI_DEBUG_INSTABILITY // Copy border pixels - uint2 baseCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex) * (DDGI_PROBE_RESOLUTION + 2); for (uint borderIndex = GroupIndex; borderIndex < BorderOffsetsSize; borderIndex += DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION) { uint4 borderOffsets = BorderOffsets[borderIndex]; @@ -673,73 +745,13 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_ } #endif #endif -} -// Compute shader for updating probes irradiance or distance texture borders (fills gaps between probes to support bilinear filtering) -META_CS(true, FEATURE_LEVEL_SM5) -META_PERMUTATION_2(DDGI_PROBE_UPDATE_MODE=0, BORDER_ROW=1) -META_PERMUTATION_2(DDGI_PROBE_UPDATE_MODE=0, BORDER_ROW=0) -META_PERMUTATION_2(DDGI_PROBE_UPDATE_MODE=1, BORDER_ROW=1) -META_PERMUTATION_2(DDGI_PROBE_UPDATE_MODE=1, BORDER_ROW=0) -[numthreads(DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE, 1)] -void CS_UpdateBorders(uint3 DispatchThreadId : SV_DispatchThreadID) -{ -#define COPY_PIXEL RWOutput[threadCoordinates] = RWOutput[copyCoordinates] -#define COPY_PIXEL_DEBUG RWOutput[threadCoordinates] = float4(5, 0, 0, 1) - - uint probeSideLength = DDGI_PROBE_RESOLUTION + 2; - uint probeSideLengthMinusOne = probeSideLength - 1; - uint2 copyCoordinates = uint2(0, 0); - uint2 threadCoordinates = DispatchThreadId.xy; -#if BORDER_ROW - threadCoordinates.y *= probeSideLength; - uint corner = DispatchThreadId.x % probeSideLength; -#else - threadCoordinates.x *= probeSideLength; - uint corner = threadCoordinates.y % probeSideLength; -#endif - if (corner == 0 || corner == probeSideLengthMinusOne) - { -#if !BORDER_ROW - // Left corner - copyCoordinates.x = threadCoordinates.x + DDGI_PROBE_RESOLUTION; - copyCoordinates.y = threadCoordinates.y - sign((int)corner - 1) * DDGI_PROBE_RESOLUTION; - COPY_PIXEL; - - // Right corner - threadCoordinates.x += probeSideLengthMinusOne; - copyCoordinates.x = threadCoordinates.x - DDGI_PROBE_RESOLUTION; - COPY_PIXEL; -#endif - return; - } - -#if BORDER_ROW - // Top row - uint probeStart = uint(threadCoordinates.x / probeSideLength) * probeSideLength; - uint offset = probeSideLengthMinusOne - (threadCoordinates.x % probeSideLength); - copyCoordinates = uint2(probeStart + offset, threadCoordinates.y + 1); -#else - // Left column - uint probeStart = uint(threadCoordinates.y / probeSideLength) * probeSideLength; - uint offset = probeSideLengthMinusOne - (threadCoordinates.y % probeSideLength); - copyCoordinates = uint2(threadCoordinates.x + 1, probeStart + offset); -#endif - COPY_PIXEL; - -#if BORDER_ROW - // Bottom row - threadCoordinates.y += probeSideLengthMinusOne; - copyCoordinates = uint2(probeStart + offset, threadCoordinates.y - 1); -#else - // Right column - threadCoordinates.x += probeSideLengthMinusOne; - copyCoordinates = uint2(threadCoordinates.x - 1, probeStart + offset); -#endif - COPY_PIXEL; - -#undef COPY_PIXEL -#undef COPY_PIXEL_DEBUG + // Copy border pixels + for (uint borderIndex = GroupIndex; borderIndex < BorderOffsetsSize; borderIndex += DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION) + { + uint4 borderOffsets = BorderOffsets[borderIndex]; + RWOutput[baseCoords + borderOffsets.zw] = RWOutput[baseCoords + borderOffsets.xy]; + } } #endif From 3aa8e675514c41aa7897bae46d6da90c4b509f05 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 18 Jul 2024 23:51:03 +0200 Subject: [PATCH 227/292] Add option for smooth DDGI cascades blending --- Source/Engine/Core/Config/GraphicsSettings.h | 6 + Source/Engine/Graphics/Graphics.cpp | 4 +- Source/Engine/Graphics/Graphics.h | 5 + .../GI/DynamicDiffuseGlobalIllumination.cpp | 20 ++- .../GI/DynamicDiffuseGlobalIllumination.h | 2 +- Source/Shaders/GI/DDGI.hlsl | 133 +++++++++++------- Source/Shaders/GI/DDGI.shader | 2 + 7 files changed, 115 insertions(+), 57 deletions(-) diff --git a/Source/Engine/Core/Config/GraphicsSettings.h b/Source/Engine/Core/Config/GraphicsSettings.h index 9fef78f74..26be6534e 100644 --- a/Source/Engine/Core/Config/GraphicsSettings.h +++ b/Source/Engine/Core/Config/GraphicsSettings.h @@ -116,6 +116,12 @@ public: API_FIELD(Attributes="EditorOrder(2120), Limit(50, 1000), EditorDisplay(\"Global Illumination\")") float GIProbesSpacing = 100; + /// + /// Enables cascades splits blending for Global Illumination. + /// + API_FIELD(Attributes="EditorOrder(2125), DefaultValue(false), EditorDisplay(\"Global Illumination\", \"GI Cascades Blending\")") + bool GICascadesBlending = false; + /// /// The Global Surface Atlas resolution. Adjust it if atlas `flickers` due to overflow (eg. to 4096). /// diff --git a/Source/Engine/Graphics/Graphics.cpp b/Source/Engine/Graphics/Graphics.cpp index 020d0ad0e..d60e3d8e8 100644 --- a/Source/Engine/Graphics/Graphics.cpp +++ b/Source/Engine/Graphics/Graphics.cpp @@ -21,8 +21,9 @@ Quality Graphics::ShadowMapsQuality = Quality::Medium; bool Graphics::AllowCSMBlending = false; Quality Graphics::GlobalSDFQuality = Quality::High; Quality Graphics::GIQuality = Quality::High; +bool Graphics::GICascadesBlending = false; PostProcessSettings Graphics::PostProcessSettings; -bool Graphics::SpreadWorkload = true; +bool Graphics::SpreadWorkload = false; #if GRAPHICS_API_NULL extern GPUDevice* CreateGPUDeviceNull(); @@ -69,6 +70,7 @@ void GraphicsSettings::Apply() Graphics::ShadowMapsQuality = ShadowMapsQuality; Graphics::GlobalSDFQuality = GlobalSDFQuality; Graphics::GIQuality = GIQuality; + Graphics::GICascadesBlending = GICascadesBlending; Graphics::PostProcessSettings = ::PostProcessSettings(); Graphics::PostProcessSettings.BlendWith(PostProcessSettings, 1.0f); #if !USE_EDITOR // OptionsModule handles fallback fonts in Editor diff --git a/Source/Engine/Graphics/Graphics.h b/Source/Engine/Graphics/Graphics.h index b7f1fbcd7..a08d78c10 100644 --- a/Source/Engine/Graphics/Graphics.h +++ b/Source/Engine/Graphics/Graphics.h @@ -64,6 +64,11 @@ public: /// API_FIELD() static Quality GIQuality; + /// + /// Enables cascades splits blending for Global Illumination. + /// + API_FIELD() static bool GICascadesBlending; + /// /// The default Post Process settings. Can be overriden by PostFxVolume on a level locally, per camera or for a whole map. /// diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp index d74b03672..eb2c9fd33 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp @@ -221,12 +221,16 @@ bool DynamicDiffuseGlobalIlluminationPass::setupResources() _csUpdateProbesDistance = shader->GetCS("CS_UpdateProbes", 1); auto device = GPUDevice::Instance; auto psDesc = GPUPipelineState::Description::DefaultFullscreenTriangle; - if (!_psIndirectLighting) + if (!_psIndirectLighting[0]) { - _psIndirectLighting = device->CreatePipelineState(); + _psIndirectLighting[0] = device->CreatePipelineState(); + _psIndirectLighting[1] = device->CreatePipelineState(); psDesc.PS = shader->GetPS("PS_IndirectLighting"); psDesc.BlendMode = BlendingMode::Add; - if (_psIndirectLighting->Init(psDesc)) + if (_psIndirectLighting[0]->Init(psDesc)) + return true; + psDesc.PS = shader->GetPS("PS_IndirectLighting", 1); + if (_psIndirectLighting[1]->Init(psDesc)) return true; } @@ -246,7 +250,8 @@ void DynamicDiffuseGlobalIlluminationPass::OnShaderReloading(Asset* obj) _csTraceRays[3] = nullptr; _csUpdateProbesIrradiance = nullptr; _csUpdateProbesDistance = nullptr; - SAFE_DELETE_GPU_RESOURCE(_psIndirectLighting); + SAFE_DELETE_GPU_RESOURCE(_psIndirectLighting[0]); + SAFE_DELETE_GPU_RESOURCE(_psIndirectLighting[1]); invalidateResources(); } @@ -260,7 +265,8 @@ void DynamicDiffuseGlobalIlluminationPass::Dispose() _cb0 = nullptr; _cb1 = nullptr; _shader = nullptr; - SAFE_DELETE_GPU_RESOURCE(_psIndirectLighting); + SAFE_DELETE_GPU_RESOURCE(_psIndirectLighting[0]); + SAFE_DELETE_GPU_RESOURCE(_psIndirectLighting[1]); #if USE_EDITOR _debugModel = nullptr; _debugMaterial = nullptr; @@ -534,7 +540,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont // Update probes { PROFILE_GPU_CPU_NAMED("Probes Update"); - uint32 threadGroupsX, threadGroupsY; + uint32 threadGroupsX; #if DDGI_DEBUG_STATS uint32 zero[4] = {}; context->ClearUA(ddgiData.StatsWrite, zero); @@ -732,7 +738,7 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext, context->BindSR(6, ddgiData.Result.ProbesIrradiance); context->SetViewportAndScissors(renderContext.View.ScreenSize.X, renderContext.View.ScreenSize.Y); context->SetRenderTarget(lightBuffer); - context->SetState(_psIndirectLighting); + context->SetState(_psIndirectLighting[Graphics::GICascadesBlending ? 1 : 0]); context->DrawFullscreenTriangle(); } diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h index c56604255..94560aa13 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h @@ -47,7 +47,7 @@ private: GPUShaderProgramCS* _csTraceRays[4]; GPUShaderProgramCS* _csUpdateProbesIrradiance; GPUShaderProgramCS* _csUpdateProbesDistance; - GPUPipelineState* _psIndirectLighting; + GPUPipelineState* _psIndirectLighting[2] = {}; #if USE_EDITOR AssetReference _debugModel; AssetReference _debugMaterial; diff --git a/Source/Shaders/GI/DDGI.hlsl b/Source/Shaders/GI/DDGI.hlsl index 330a20420..8b87ffea0 100644 --- a/Source/Shaders/GI/DDGI.hlsl +++ b/Source/Shaders/GI/DDGI.hlsl @@ -21,6 +21,9 @@ #define DDGI_PROBE_RESOLUTION_IRRADIANCE 6 // Resolution (in texels) for probe irradiance data (excluding 1px padding on each side) #define DDGI_PROBE_RESOLUTION_DISTANCE 14 // Resolution (in texels) for probe distance data (excluding 1px padding on each side) #define DDGI_CASCADE_BLEND_SIZE 2.5f // Distance in probes over which cascades blending happens +#ifndef DDGI_CASCADE_BLEND_SMOOTH +#define DDGI_CASCADE_BLEND_SMOOTH 0 // Enables smooth cascade blending, otherwise dithering will be used +#endif #define DDGI_SRGB_BLENDING 1 // Enables blending in sRGB color space, otherwise irradiance blending is done in linear space // DDGI data for a constant buffer @@ -154,37 +157,8 @@ float2 GetDDGIProbeUV(DDGIData data, uint cascadeIndex, uint probeIndex, float2 return uv; } -// Samples DDGI probes volume at the given world-space position and returns the irradiance. -// bias - scales the bias vector to the initial sample point to reduce self-shading artifacts -// dither - randomized per-pixel value in range 0-1, used to smooth dithering for cascades blending -float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesData, Texture2D probesDistance, Texture2D probesIrradiance, float3 worldPosition, float3 worldNormal, float bias = 0.2f, float dither = 0.0f) +float3 SampleDDGIIrradianceCascade(DDGIData data, Texture2D probesData, Texture2D probesDistance, Texture2D probesIrradiance, float3 worldPosition, float3 worldNormal, uint cascadeIndex, float3 probesOrigin, float3 probesExtent, float probesSpacing, float3 biasedWorldPosition) { - // Select the highest cascade that contains the sample location - uint cascadeIndex = 0; - float probesSpacing = 0; - float3 probesOrigin = (float3)0, probesExtent = (float3)0, biasedWorldPosition = (float3)0; - float3 viewDir = normalize(data.ViewPos - worldPosition); - for (; cascadeIndex < data.CascadesCount; cascadeIndex++) - { - // Get cascade data - probesSpacing = data.ProbesOriginAndSpacing[cascadeIndex].w; - probesOrigin = data.ProbesScrollOffsets[cascadeIndex].xyz * probesSpacing + data.ProbesOriginAndSpacing[cascadeIndex].xyz; - probesExtent = (data.ProbesCounts - 1) * (probesSpacing * 0.5f); - - // Bias the world-space position to reduce artifacts - float3 surfaceBias = (worldNormal * 0.2f + viewDir * 0.8f) * (0.75f * probesSpacing * bias); - biasedWorldPosition = worldPosition + surfaceBias; - - // Calculate cascade blending weight (use input bias to smooth transition) - float cascadeBlendSmooth = frac(max(distance(data.ViewPos, worldPosition) - probesExtent.x, 0) / probesSpacing) * 0.1f; - float3 cascadeBlendPoint = worldPosition - probesOrigin - cascadeBlendSmooth * probesSpacing; - float fadeDistance = probesSpacing * DDGI_CASCADE_BLEND_SIZE; - float cascadeWeight = saturate(Min3(probesExtent - abs(cascadeBlendPoint)) / fadeDistance); - if (cascadeWeight > dither) - break; - } - if (cascadeIndex == data.CascadesCount) - return data.FallbackIrradiance; uint3 probeCoordsEnd = data.ProbesCounts - uint3(1, 1, 1); uint3 baseProbeCoords = clamp(uint3((worldPosition - probesOrigin + probesExtent) / probesSpacing), uint3(0, 0, 0), probeCoordsEnd); @@ -208,25 +182,26 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesData, T { // Search nearby probes to find any nearby GI sample for (int searchDistance = 1; searchDistance < 3 && probeState == DDGI_PROBE_STATE_INACTIVE; searchDistance++) - for (uint searchAxis = 0; searchAxis < 3; searchAxis++) - { - int searchAxisDir = probeCoordsOffset[searchAxis] ? 1 : -1; - int3 searchCoordsOffset = SearchAxisMasks[searchAxis] * searchAxisDir * searchDistance; - uint3 searchCoords = clamp((int3)probeCoords + searchCoordsOffset, int3(0, 0, 0), (int3)probeCoordsEnd); - uint searchIndex = GetDDGIScrollingProbeIndex(data, cascadeIndex, searchCoords); - float4 searchData = LoadDDGIProbeData(data, probesData, cascadeIndex, searchIndex); - uint searchState = DecodeDDGIProbeState(searchData); - if (searchState != DDGI_PROBE_STATE_INACTIVE) + for (uint searchAxis = 0; searchAxis < 3; searchAxis++) { - // Use nearby probe as a fallback (visibility test might ignore it but with smooth gradient) - probeCoords = searchCoords; - probeIndex = searchIndex; - probeData = searchData; - probeState = searchState; - break; + int searchAxisDir = probeCoordsOffset[searchAxis] ? 1 : -1; + int3 searchCoordsOffset = SearchAxisMasks[searchAxis] * searchAxisDir * searchDistance; + uint3 searchCoords = clamp((int3)probeCoords + searchCoordsOffset, int3(0, 0, 0), (int3)probeCoordsEnd); + uint searchIndex = GetDDGIScrollingProbeIndex(data, cascadeIndex, searchCoords); + float4 searchData = LoadDDGIProbeData(data, probesData, cascadeIndex, searchIndex); + uint searchState = DecodeDDGIProbeState(searchData); + if (searchState != DDGI_PROBE_STATE_INACTIVE) + { + // Use nearby probe as a fallback (visibility test might ignore it but with smooth gradient) + probeCoords = searchCoords; + probeIndex = searchIndex; + probeData = searchData; + probeState = searchState; + break; + } } - } - if (probeState == DDGI_PROBE_STATE_INACTIVE) continue; + if (probeState == DDGI_PROBE_STATE_INACTIVE) + continue; } float3 probeBasePosition = baseProbeWorldPosition + ((probeCoords - baseProbeCoords) * probesSpacing); float3 probePosition = probeBasePosition + probeData.xyz * probesSpacing; // Probe offset is [-1;1] within probes spacing @@ -257,7 +232,8 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesData, T // Adjust weight curve to inject a small portion of light const float minWeightThreshold = 0.2f; - if (weight < minWeightThreshold) weight *= Square(weight) / Square(minWeightThreshold); + if (weight < minWeightThreshold) + weight *= Square(weight) / Square(minWeightThreshold); // Calculate trilinear weights based on the distance to each probe to smoothly transition between grid of 8 probes float3 trilinear = lerp(1.0f - biasAlpha, biasAlpha, (float3)probeCoordsOffset); @@ -301,3 +277,64 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesData, T } return irradiance.rgb; } + +float3 GetDDGISurfaceBias(float3 viewDir, float probesSpacing, float3 worldNormal, float bias) +{ + // Bias the world-space position to reduce artifacts + return (worldNormal * 0.2f + viewDir * 0.8f) * (0.75f * probesSpacing * bias); +} + +// Samples DDGI probes volume at the given world-space position and returns the irradiance. +// bias - scales the bias vector to the initial sample point to reduce self-shading artifacts +// dither - randomized per-pixel value in range 0-1, used to smooth dithering for cascades blending +float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesData, Texture2D probesDistance, Texture2D probesIrradiance, float3 worldPosition, float3 worldNormal, float bias = 0.2f, float dither = 0.0f) +{ + // Select the highest cascade that contains the sample location + uint cascadeIndex = 0; + float probesSpacing = 0, cascadeWeight = 0; + float3 probesOrigin = (float3)0, probesExtent = (float3)0, biasedWorldPosition = (float3)0; + float3 viewDir = normalize(data.ViewPos - worldPosition); +#if DDGI_CASCADE_BLEND_SMOOTH + dither = 0.0f; +#endif + for (; cascadeIndex < data.CascadesCount; cascadeIndex++) + { + // Get cascade data + probesSpacing = data.ProbesOriginAndSpacing[cascadeIndex].w; + probesOrigin = data.ProbesScrollOffsets[cascadeIndex].xyz * probesSpacing + data.ProbesOriginAndSpacing[cascadeIndex].xyz; + probesExtent = (data.ProbesCounts - 1) * (probesSpacing * 0.5f); + biasedWorldPosition = worldPosition + GetDDGISurfaceBias(viewDir, probesSpacing, worldNormal, bias); + + // Calculate cascade blending weight (use input bias to smooth transition) + float cascadeBlendSmooth = frac(max(distance(data.ViewPos, worldPosition) - probesExtent.x, 0) / probesSpacing) * 0.1f; + float3 cascadeBlendPoint = worldPosition - probesOrigin - cascadeBlendSmooth * probesSpacing; + float fadeDistance = probesSpacing * DDGI_CASCADE_BLEND_SIZE; +#if DDGI_CASCADE_BLEND_SMOOTH + fadeDistance *= 2.0f; // Make it even smoother when using linear blending +#endif + cascadeWeight = saturate(Min3(probesExtent - abs(cascadeBlendPoint)) / fadeDistance); + if (cascadeWeight > dither) + break; + } + if (cascadeIndex == data.CascadesCount) + return data.FallbackIrradiance; + + // Sample cascade + float3 result = SampleDDGIIrradianceCascade(data, probesData, probesDistance, probesIrradiance, worldPosition, worldNormal, cascadeIndex, probesOrigin, probesExtent, probesSpacing, biasedWorldPosition); + +#if DDGI_CASCADE_BLEND_SMOOTH + // Blend with the next cascade + cascadeIndex++; + if (cascadeIndex < data.CascadesCount && cascadeWeight < 0.99f) + { + probesSpacing = data.ProbesOriginAndSpacing[cascadeIndex].w; + probesOrigin = data.ProbesScrollOffsets[cascadeIndex].xyz * probesSpacing + data.ProbesOriginAndSpacing[cascadeIndex].xyz; + probesExtent = (data.ProbesCounts - 1) * (probesSpacing * 0.5f); + biasedWorldPosition = worldPosition + GetDDGISurfaceBias(viewDir, probesSpacing, worldNormal, bias); + float3 resultNext = SampleDDGIIrradianceCascade(data, probesData, probesDistance, probesIrradiance, worldPosition, worldNormal, cascadeIndex, probesOrigin, probesExtent, probesSpacing, biasedWorldPosition); + result = lerp(resultNext, result, cascadeWeight); + } +#endif + + return result; +} diff --git a/Source/Shaders/GI/DDGI.shader b/Source/Shaders/GI/DDGI.shader index 73cac7c26..89d0fdea3 100644 --- a/Source/Shaders/GI/DDGI.shader +++ b/Source/Shaders/GI/DDGI.shader @@ -768,6 +768,8 @@ Texture2D ProbesIrradiance : register(t6); // Pixel shader for drawing indirect lighting in fullscreen META_PS(true, FEATURE_LEVEL_SM5) +META_PERMUTATION_1(DDGI_CASCADE_BLEND_SMOOTH=0) +META_PERMUTATION_1(DDGI_CASCADE_BLEND_SMOOTH=1) void PS_IndirectLighting(Quad_VS2PS input, out float4 output : SV_Target0) { output = 0; From 3945e1416bdf1ccf6c1d3881fb9690f301295242 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 19 Jul 2024 00:27:30 +0200 Subject: [PATCH 228/292] Add improved Global SDF sampling at empty areas --- Source/Shaders/GlobalSignDistanceField.hlsl | 61 +++++++++++---------- 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/Source/Shaders/GlobalSignDistanceField.hlsl b/Source/Shaders/GlobalSignDistanceField.hlsl index 95f8e6f5b..ab5d37372 100644 --- a/Source/Shaders/GlobalSignDistanceField.hlsl +++ b/Source/Shaders/GlobalSignDistanceField.hlsl @@ -65,11 +65,11 @@ struct GlobalSDFHit } }; -void GetGlobalSDFCascadeUV(const GlobalSDFData data, uint cascade, float3 worldPosition, out float cascadeSize, out float3 cascadeUV, out float3 textureUV) +void GetGlobalSDFCascadeUV(const GlobalSDFData data, uint cascade, float3 worldPosition, out float3 cascadeUV, out float3 textureUV) { float4 cascadePosDistance = data.CascadePosDistance[cascade]; float3 posInCascade = worldPosition - cascadePosDistance.xyz; - cascadeSize = cascadePosDistance.w * 2; + float cascadeSize = cascadePosDistance.w * 2; cascadeUV = saturate(posInCascade / cascadeSize + 0.5f); textureUV = float3(((float)cascade + cascadeUV.x) / (float)data.CascadesCount, cascadeUV.y, cascadeUV.z); // Cascades are placed next to each other on X axis } @@ -79,9 +79,8 @@ uint GetGlobalSDFCascade(const GlobalSDFData data, float3 worldPosition) { for (uint cascade = 0; cascade < data.CascadesCount; cascade++) { - float cascadeSize; float3 cascadeUV, textureUV; - GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeSize, cascadeUV, textureUV); + GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeUV, textureUV); if (all(cascadeUV > 0) && all(cascadeUV < 1)) return cascade; } @@ -92,13 +91,14 @@ uint GetGlobalSDFCascade(const GlobalSDFData data, float3 worldPosition) float SampleGlobalSDFCascade(const GlobalSDFData data, Texture3D tex, float3 worldPosition, uint cascade) { float distance = GLOBAL_SDF_WORLD_SIZE; - float cascadeSize; float3 cascadeUV, textureUV; - GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeSize, cascadeUV, textureUV); + GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeUV, textureUV); + float voxelSize = data.CascadeVoxelSize[cascade]; + float chunkMargin = voxelSize * (GLOBAL_SDF_CHUNK_MARGIN_SCALE * GLOBAL_SDF_RASTERIZE_CHUNK_MARGIN); float maxDistanceTex = data.CascadeMaxDistanceTex[cascade]; - float distanceTex = tex.SampleLevel(SamplerLinearClamp, textureUV, 0); - if (distanceTex < GLOBAL_SDF_MIN_VALID && all(cascadeUV > 0) && all(cascadeUV < 1)) - distance = distanceTex * maxDistanceTex; + float distanceTex = tex.SampleLevel(SamplerLinearClamp, textureUV, 0) * maxDistanceTex; + if (distanceTex < chunkMargin && all(cascadeUV > 0) && all(cascadeUV < 1)) + distance = distanceTex; return distance; } @@ -110,12 +110,13 @@ float SampleGlobalSDF(const GlobalSDFData data, Texture3D tex, floa return GLOBAL_SDF_WORLD_SIZE; for (uint cascade = 0; cascade < data.CascadesCount; cascade++) { - float cascadeSize; float3 cascadeUV, textureUV; - GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeSize, cascadeUV, textureUV); + GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeUV, textureUV); + float voxelSize = data.CascadeVoxelSize[cascade]; + float chunkMargin = voxelSize * (GLOBAL_SDF_CHUNK_MARGIN_SCALE * GLOBAL_SDF_RASTERIZE_CHUNK_MARGIN); float maxDistanceTex = data.CascadeMaxDistanceTex[cascade]; float distanceTex = tex.SampleLevel(SamplerLinearClamp, textureUV, 0); - if (distanceTex < GLOBAL_SDF_MIN_VALID && all(cascadeUV > 0) && all(cascadeUV < 1)) + if (distanceTex < chunkMargin && all(cascadeUV > 0) && all(cascadeUV < 1)) { distance = distanceTex * maxDistanceTex; break; @@ -132,17 +133,20 @@ float SampleGlobalSDF(const GlobalSDFData data, Texture3D tex, Text return GLOBAL_SDF_WORLD_SIZE; for (uint cascade = 0; cascade < data.CascadesCount; cascade++) { - float cascadeSize; float3 cascadeUV, textureUV; - GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeSize, cascadeUV, textureUV); + GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeUV, textureUV); + float voxelSize = data.CascadeVoxelSize[cascade]; + float chunkSize = voxelSize * GLOBAL_SDF_RASTERIZE_CHUNK_SIZE; + float chunkMargin = voxelSize * (GLOBAL_SDF_CHUNK_MARGIN_SCALE * GLOBAL_SDF_RASTERIZE_CHUNK_MARGIN); + float maxDistanceMip = data.CascadeMaxDistanceMip[cascade]; float distanceMip = mip.SampleLevel(SamplerLinearClamp, textureUV, 0); - if (distanceMip < GLOBAL_SDF_MIN_VALID && all(cascadeUV > 0) && all(cascadeUV < 1)) + if (distanceMip < chunkSize && all(cascadeUV > 0) && all(cascadeUV < 1)) { - distance = distanceMip * cascadeSize; + distance = distanceMip * maxDistanceMip; float maxDistanceTex = data.CascadeMaxDistanceTex[cascade]; - float distanceTex = tex.SampleLevel(SamplerLinearClamp, textureUV, 0); - if (distanceTex < GLOBAL_SDF_MIN_VALID) - distance = distanceTex * maxDistanceTex; + float distanceTex = tex.SampleLevel(SamplerLinearClamp, textureUV, 0) * maxDistanceTex; + if (distanceTex < chunkMargin) + distance = distanceTex; break; } } @@ -158,11 +162,13 @@ float3 SampleGlobalSDFGradient(const GlobalSDFData data, Texture3D return gradient; for (uint cascade = 0; cascade < data.CascadesCount; cascade++) { - float cascadeSize; float3 cascadeUV, textureUV; - GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeSize, cascadeUV, textureUV); + GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeUV, textureUV); + float voxelSize = data.CascadeVoxelSize[cascade]; + float chunkMargin = voxelSize * (GLOBAL_SDF_CHUNK_MARGIN_SCALE * GLOBAL_SDF_RASTERIZE_CHUNK_MARGIN); + float maxDistanceTex = data.CascadeMaxDistanceTex[cascade]; float distanceTex = tex.SampleLevel(SamplerLinearClamp, textureUV, 0); - if (distanceTex < GLOBAL_SDF_MIN_VALID && all(cascadeUV > 0) && all(cascadeUV < 1)) + if (distanceTex < chunkMargin && all(cascadeUV > 0) && all(cascadeUV < 1)) { float texelOffset = 1.0f / data.Resolution; float xp = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x + texelOffset, textureUV.y, textureUV.z), 0).x; @@ -171,8 +177,8 @@ float3 SampleGlobalSDFGradient(const GlobalSDFData data, Texture3D float yn = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y - texelOffset, textureUV.z), 0).x; float zp = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y, textureUV.z + texelOffset), 0).x; float zn = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y, textureUV.z - texelOffset), 0).x; - gradient = float3(xp - xn, yp - yn, zp - zn) * cascadeSize; - distance = distanceTex * cascadeSize; + gradient = float3(xp - xn, yp - yn, zp - zn) * maxDistanceTex; + distance = distanceTex * maxDistanceTex; break; } } @@ -188,9 +194,8 @@ float3 SampleGlobalSDFGradient(const GlobalSDFData data, Texture3D return gradient; for (uint cascade = 0; cascade < data.CascadesCount; cascade++) { - float cascadeSize; float3 cascadeUV, textureUV; - GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeSize, cascadeUV, textureUV); + GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeUV, textureUV); float voxelSize = data.CascadeVoxelSize[cascade]; float chunkSize = voxelSize * GLOBAL_SDF_RASTERIZE_CHUNK_SIZE; float chunkMargin = voxelSize * (GLOBAL_SDF_CHUNK_MARGIN_SCALE * GLOBAL_SDF_RASTERIZE_CHUNK_MARGIN); @@ -263,9 +268,9 @@ GlobalSDFHit RayTraceGlobalSDF(const GlobalSDFData data, Texture3D float stepScale = trace.StepScale; // Sample SDF - float stepDistance, cascadeSize, voxelSizeScale = (float)GLOBAL_SDF_RASTERIZE_MIP_FACTOR; + float stepDistance, voxelSizeScale = (float)GLOBAL_SDF_RASTERIZE_MIP_FACTOR; float3 cascadeUV, textureUV; - GetGlobalSDFCascadeUV(data, cascade, stepPosition, cascadeSize, cascadeUV, textureUV); + GetGlobalSDFCascadeUV(data, cascade, stepPosition, cascadeUV, textureUV); float distanceMip = mip.SampleLevel(SamplerLinearClamp, textureUV, 0) * maxDistanceMip; if (distanceMip < chunkSize) { From 53ca33f301d8945fa7d87057ed7eedf35745cfe5 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 19 Jul 2024 00:30:06 +0200 Subject: [PATCH 229/292] Add option to sample Global SDF from higher cascade --- Source/Editor/Surface/Archetypes/Textures.cs | 6 +++++- .../GPU/ParticleEmitterGraph.GPU.Textures.cpp | 19 ++++++++++++++++++- .../MaterialGenerator.Textures.cpp | 6 ++++-- Source/Shaders/GlobalSignDistanceField.hlsl | 10 ++++++---- 4 files changed, 33 insertions(+), 8 deletions(-) diff --git a/Source/Editor/Surface/Archetypes/Textures.cs b/Source/Editor/Surface/Archetypes/Textures.cs index 00c0a806d..3398b0988 100644 --- a/Source/Editor/Surface/Archetypes/Textures.cs +++ b/Source/Editor/Surface/Archetypes/Textures.cs @@ -368,11 +368,13 @@ namespace FlaxEditor.Surface.Archetypes Title = "Sample Global SDF", Description = "Samples the Global SDF to get the distance to the closest surface (in world-space). Requires models SDF to be generated and checking `Enable Global SDF` in Graphics Settings.", Flags = NodeFlags.MaterialGraph | NodeFlags.ParticleEmitterGraph, - Size = new Float2(200, 20), + Size = new Float2(200, 40), + DefaultValues = new object[] { 0 }, Elements = new[] { NodeElementArchetype.Factory.Output(0, "Distance", typeof(float), 0), NodeElementArchetype.Factory.Input(0, "World Position", true, typeof(Float3), 1), + NodeElementArchetype.Factory.Input(1, "Start Cascade", true, typeof(int), 2, 0), } }, new NodeArchetype @@ -382,11 +384,13 @@ namespace FlaxEditor.Surface.Archetypes Description = "Samples the Global SDF to get the gradient and distance to the closest surface (in world-space). Normalize gradient to get SDF surface normal vector. Requires models SDF to be generated and checking `Enable Global SDF` in Graphics Settings.", Flags = NodeFlags.MaterialGraph | NodeFlags.ParticleEmitterGraph, Size = new Float2(260, 40), + DefaultValues = new object[] { 0 }, Elements = new[] { NodeElementArchetype.Factory.Output(0, "Gradient", typeof(Float3), 0), NodeElementArchetype.Factory.Output(1, "Distance", typeof(float), 2), NodeElementArchetype.Factory.Input(0, "World Position", true, typeof(Float3), 1), + NodeElementArchetype.Factory.Input(1, "Start Cascade", true, typeof(int), 2, 0), } }, new NodeArchetype diff --git a/Source/Engine/Particles/Graph/GPU/ParticleEmitterGraph.GPU.Textures.cpp b/Source/Engine/Particles/Graph/GPU/ParticleEmitterGraph.GPU.Textures.cpp index bd8777f64..f05b09dac 100644 --- a/Source/Engine/Particles/Graph/GPU/ParticleEmitterGraph.GPU.Textures.cpp +++ b/Source/Engine/Particles/Graph/GPU/ParticleEmitterGraph.GPU.Textures.cpp @@ -311,10 +311,27 @@ void ParticleEmitterGPUGenerator::ProcessGroupTextures(Box* box, Node* node, Val { auto param = findOrAddGlobalSDF(); Value worldPosition = tryGetValue(node->GetBox(1), Value(VariantType::Float3, TEXT("input.WorldPosition.xyz"))).Cast(VariantType::Float3); - value = writeLocal(VariantType::Float, String::Format(TEXT("SampleGlobalSDF({0}, {0}_Tex, {1})"), param.ShaderName, worldPosition.Value), node); + Value startCascade = tryGetValue(node->GetBox(2), 0, Value::Zero).Cast(VariantType::Uint); + value = writeLocal(VariantType::Float, String::Format(TEXT("SampleGlobalSDF({0}, {0}_Tex, {0}_Mip, {1}, {2})"), param.ShaderName, worldPosition.Value, startCascade.Value), node); _includes.Add(TEXT("./Flax/GlobalSignDistanceField.hlsl")); break; } + // Sample Global SDF Gradient + case 15: + { + auto gradientBox = node->GetBox(0); + auto distanceBox = node->GetBox(2); + auto param = findOrAddGlobalSDF(); + Value worldPosition = tryGetValue(node->GetBox(1), Value(VariantType::Float3, TEXT("input.WorldPosition.xyz"))).Cast(VariantType::Float3); + Value startCascade = tryGetValue(node->GetBox(2), 0, Value::Zero).Cast(VariantType::Uint); + auto distance = writeLocal(VariantType::Float, node); + auto gradient = writeLocal(VariantType::Float3, String::Format(TEXT("SampleGlobalSDFGradient({0}, {0}_Tex, {0}_Mip, {1}, {2}, {3})"), param.ShaderName, worldPosition.Value, distance.Value, startCascade.Value), node); + _includes.Add(TEXT("./Flax/GlobalSignDistanceField.hlsl")); + gradientBox->Cache = gradient; + distanceBox->Cache = distance; + value = box == gradientBox ? gradient : distance; + break; + } default: break; } diff --git a/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Textures.cpp b/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Textures.cpp index d7b3bbb23..539900886 100644 --- a/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Textures.cpp +++ b/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Textures.cpp @@ -665,7 +665,8 @@ void MaterialGenerator::ProcessGroupTextures(Box* box, Node* node, Value& value) { auto param = findOrAddGlobalSDF(); Value worldPosition = tryGetValue(node->GetBox(1), Value(VariantType::Float3, TEXT("input.WorldPosition.xyz"))).Cast(VariantType::Float3); - value = writeLocal(VariantType::Float, String::Format(TEXT("SampleGlobalSDF({0}, {0}_Tex, {0}_Mip, {1})"), param.ShaderName, worldPosition.Value), node); + Value startCascade = tryGetValue(node->GetBox(2), 0, Value::Zero).Cast(VariantType::Uint); + value = writeLocal(VariantType::Float, String::Format(TEXT("SampleGlobalSDF({0}, {0}_Tex, {0}_Mip, {1}, {2})"), param.ShaderName, worldPosition.Value, startCascade.Value), node); _includes.Add(TEXT("./Flax/GlobalSignDistanceField.hlsl")); break; } @@ -676,8 +677,9 @@ void MaterialGenerator::ProcessGroupTextures(Box* box, Node* node, Value& value) auto distanceBox = node->GetBox(2); auto param = findOrAddGlobalSDF(); Value worldPosition = tryGetValue(node->GetBox(1), Value(VariantType::Float3, TEXT("input.WorldPosition.xyz"))).Cast(VariantType::Float3); + Value startCascade = tryGetValue(node->GetBox(2), 0, Value::Zero).Cast(VariantType::Uint); auto distance = writeLocal(VariantType::Float, node); - auto gradient = writeLocal(VariantType::Float3, String::Format(TEXT("SampleGlobalSDFGradient({0}, {0}_Tex, {0}_Mip, {1}, {2})"), param.ShaderName, worldPosition.Value, distance.Value), node); + auto gradient = writeLocal(VariantType::Float3, String::Format(TEXT("SampleGlobalSDFGradient({0}, {0}_Tex, {0}_Mip, {1}, {2}, {3})"), param.ShaderName, worldPosition.Value, distance.Value, startCascade.Value), node); _includes.Add(TEXT("./Flax/GlobalSignDistanceField.hlsl")); gradientBox->Cache = gradient; distanceBox->Cache = distance; diff --git a/Source/Shaders/GlobalSignDistanceField.hlsl b/Source/Shaders/GlobalSignDistanceField.hlsl index ab5d37372..577b9ebca 100644 --- a/Source/Shaders/GlobalSignDistanceField.hlsl +++ b/Source/Shaders/GlobalSignDistanceField.hlsl @@ -126,12 +126,13 @@ float SampleGlobalSDF(const GlobalSDFData data, Texture3D tex, floa } // Samples the Global SDF and returns the distance to the closest surface (in world units) at the given world location. -float SampleGlobalSDF(const GlobalSDFData data, Texture3D tex, Texture3D mip, float3 worldPosition) +float SampleGlobalSDF(const GlobalSDFData data, Texture3D tex, Texture3D mip, float3 worldPosition, uint startCascade = 0) { float distance = data.CascadePosDistance[3].w * 2.0f; if (distance <= 0.0f) return GLOBAL_SDF_WORLD_SIZE; - for (uint cascade = 0; cascade < data.CascadesCount; cascade++) + startCascade = min(startCascade, data.CascadesCount - 1); + for (uint cascade = startCascade; cascade < data.CascadesCount; cascade++) { float3 cascadeUV, textureUV; GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeUV, textureUV); @@ -186,13 +187,14 @@ float3 SampleGlobalSDFGradient(const GlobalSDFData data, Texture3D } // Samples the Global SDF and returns the gradient vector (derivative) at the given world location. Normalize it to get normal vector. -float3 SampleGlobalSDFGradient(const GlobalSDFData data, Texture3D tex, Texture3D mip, float3 worldPosition, out float distance) +float3 SampleGlobalSDFGradient(const GlobalSDFData data, Texture3D tex, Texture3D mip, float3 worldPosition, out float distance, uint startCascade = 0) { float3 gradient = float3(0, 0.00001f, 0); distance = GLOBAL_SDF_WORLD_SIZE; if (data.CascadePosDistance[3].w <= 0.0f) return gradient; - for (uint cascade = 0; cascade < data.CascadesCount; cascade++) + startCascade = min(startCascade, data.CascadesCount - 1); + for (uint cascade = startCascade; cascade < data.CascadesCount; cascade++) { float3 cascadeUV, textureUV; GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeUV, textureUV); From 9518ce1d0ac1d6df30eb7f8c2c6f387618876361 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 19 Jul 2024 00:30:21 +0200 Subject: [PATCH 230/292] Fix physics error when disabling terrain --- Source/Engine/Terrain/Terrain.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Source/Engine/Terrain/Terrain.cpp b/Source/Engine/Terrain/Terrain.cpp index 66ecbd377..53881e210 100644 --- a/Source/Engine/Terrain/Terrain.cpp +++ b/Source/Engine/Terrain/Terrain.cpp @@ -831,6 +831,15 @@ void Terrain::OnDisable() #if TERRAIN_USE_PHYSICS_DEBUG GetSceneRendering()->RemovePhysicsDebug(this); #endif + void* scene = GetPhysicsScene()->GetPhysicsScene(); + for (int32 i = 0; i < _patches.Count(); i++) + { + auto patch = _patches[i]; + if (patch->_physicsActor) + { + PhysicsBackend::RemoveSceneActor(scene, patch->_physicsActor); + } + } // Base Actor::OnDisable(); From fe33f09f1d5f6e1092d5c965f07433ff10368955 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 19 Jul 2024 00:32:14 +0200 Subject: [PATCH 231/292] Update shader --- Content/Shaders/GI/DDGI.flax | 4 ++-- Source/Engine/Renderer/ShadowsPass.cpp | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Content/Shaders/GI/DDGI.flax b/Content/Shaders/GI/DDGI.flax index b45314a3c..34663c561 100644 --- a/Content/Shaders/GI/DDGI.flax +++ b/Content/Shaders/GI/DDGI.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dc1178697d8a11745194914ecec7d6c073e4a4efa7373bc976fa1f4135e17668 -size 27288 +oid sha256:12a8d1c9df11e370b205c0ea2fc9d9dd77f94c90e6b771658badb3a9327fab8d +size 33095 diff --git a/Source/Engine/Renderer/ShadowsPass.cpp b/Source/Engine/Renderer/ShadowsPass.cpp index 4cd25a17c..4776bc193 100644 --- a/Source/Engine/Renderer/ShadowsPass.cpp +++ b/Source/Engine/Renderer/ShadowsPass.cpp @@ -856,7 +856,6 @@ void ShadowsPass::SetupLight(ShadowsCustomBuffer& shadows, RenderContext& render } // Init shadow data - atlasLight.ContextIndex = renderContextBatch.Contexts.Count(); if (atlasLight.ContextCount == 0) return; renderContextBatch.Contexts.AddDefault(atlasLight.ContextCount); From 64bd762f44a445117a8c4243ace73392511de668 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 19 Jul 2024 00:35:36 +0200 Subject: [PATCH 232/292] Fix compilation --- Source/Editor/GUI/AssetPicker.cs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Source/Editor/GUI/AssetPicker.cs b/Source/Editor/GUI/AssetPicker.cs index 1edb5ec58..69f644827 100644 --- a/Source/Editor/GUI/AssetPicker.cs +++ b/Source/Editor/GUI/AssetPicker.cs @@ -7,6 +7,7 @@ using FlaxEditor.GUI.Drag; using FlaxEditor.Scripting; using FlaxEngine; using FlaxEngine.GUI; +using FlaxEngine.Utilities; namespace FlaxEditor.GUI { @@ -149,7 +150,7 @@ namespace FlaxEditor.GUI TextAlignment.Center); Render2D.DrawText( style.FontSmall, - $"{TypeUtils.GetTypeDisplayName(Validator.AssetType.Type)}", + $"{Validator.AssetType.Type.GetTypeDisplayName()}", new Rectangle(button1Rect.Right + 2, ButtonsSize + 2, sizeForTextLeft, ButtonsSize), style.ForegroundGrey, TextAlignment.Near, @@ -178,7 +179,7 @@ namespace FlaxEditor.GUI TextAlignment.Center); Render2D.DrawText( style.FontSmall, - $"{TypeUtils.GetTypeDisplayName(Validator.AssetType.Type)}", + $"{Validator.AssetType.Type.GetTypeDisplayName()}", new Rectangle(button1Rect.Right + 2, ButtonsSize + 2, sizeForTextLeft, ButtonsSize), style.ForegroundGrey, TextAlignment.Near, @@ -202,7 +203,7 @@ namespace FlaxEditor.GUI TextAlignment.Center); Render2D.DrawText( style.FontSmall, - $"{TypeUtils.GetTypeDisplayName(Validator.AssetType.Type)}", + $"{Validator.AssetType.Type.GetTypeDisplayName()}", new Rectangle(button1Rect.Right + 2, ButtonsSize + 2, sizeForTextLeft, ButtonsSize), style.ForegroundGrey, TextAlignment.Near, From 6e60a988a0031b70124231a6d2660b5a74e96d6e Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 19 Jul 2024 00:54:05 +0200 Subject: [PATCH 233/292] Fix node archetype --- Source/Editor/Surface/Archetypes/Textures.cs | 2 +- .../Particles/Graph/GPU/ParticleEmitterGraph.GPU.Textures.cpp | 2 +- .../Tools/MaterialGenerator/MaterialGenerator.Textures.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Source/Editor/Surface/Archetypes/Textures.cs b/Source/Editor/Surface/Archetypes/Textures.cs index 3398b0988..aca0a8f7a 100644 --- a/Source/Editor/Surface/Archetypes/Textures.cs +++ b/Source/Editor/Surface/Archetypes/Textures.cs @@ -390,7 +390,7 @@ namespace FlaxEditor.Surface.Archetypes NodeElementArchetype.Factory.Output(0, "Gradient", typeof(Float3), 0), NodeElementArchetype.Factory.Output(1, "Distance", typeof(float), 2), NodeElementArchetype.Factory.Input(0, "World Position", true, typeof(Float3), 1), - NodeElementArchetype.Factory.Input(1, "Start Cascade", true, typeof(int), 2, 0), + NodeElementArchetype.Factory.Input(1, "Start Cascade", true, typeof(int), 3, 0), } }, new NodeArchetype diff --git a/Source/Engine/Particles/Graph/GPU/ParticleEmitterGraph.GPU.Textures.cpp b/Source/Engine/Particles/Graph/GPU/ParticleEmitterGraph.GPU.Textures.cpp index f05b09dac..01c7017a9 100644 --- a/Source/Engine/Particles/Graph/GPU/ParticleEmitterGraph.GPU.Textures.cpp +++ b/Source/Engine/Particles/Graph/GPU/ParticleEmitterGraph.GPU.Textures.cpp @@ -323,7 +323,7 @@ void ParticleEmitterGPUGenerator::ProcessGroupTextures(Box* box, Node* node, Val auto distanceBox = node->GetBox(2); auto param = findOrAddGlobalSDF(); Value worldPosition = tryGetValue(node->GetBox(1), Value(VariantType::Float3, TEXT("input.WorldPosition.xyz"))).Cast(VariantType::Float3); - Value startCascade = tryGetValue(node->GetBox(2), 0, Value::Zero).Cast(VariantType::Uint); + Value startCascade = tryGetValue(node->GetBox(3), 0, Value::Zero).Cast(VariantType::Uint); auto distance = writeLocal(VariantType::Float, node); auto gradient = writeLocal(VariantType::Float3, String::Format(TEXT("SampleGlobalSDFGradient({0}, {0}_Tex, {0}_Mip, {1}, {2}, {3})"), param.ShaderName, worldPosition.Value, distance.Value, startCascade.Value), node); _includes.Add(TEXT("./Flax/GlobalSignDistanceField.hlsl")); diff --git a/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Textures.cpp b/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Textures.cpp index 539900886..08ef842a4 100644 --- a/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Textures.cpp +++ b/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Textures.cpp @@ -677,7 +677,7 @@ void MaterialGenerator::ProcessGroupTextures(Box* box, Node* node, Value& value) auto distanceBox = node->GetBox(2); auto param = findOrAddGlobalSDF(); Value worldPosition = tryGetValue(node->GetBox(1), Value(VariantType::Float3, TEXT("input.WorldPosition.xyz"))).Cast(VariantType::Float3); - Value startCascade = tryGetValue(node->GetBox(2), 0, Value::Zero).Cast(VariantType::Uint); + Value startCascade = tryGetValue(node->GetBox(3), 0, Value::Zero).Cast(VariantType::Uint); auto distance = writeLocal(VariantType::Float, node); auto gradient = writeLocal(VariantType::Float3, String::Format(TEXT("SampleGlobalSDFGradient({0}, {0}_Tex, {0}_Mip, {1}, {2}, {3})"), param.ShaderName, worldPosition.Value, distance.Value, startCascade.Value), node); _includes.Add(TEXT("./Flax/GlobalSignDistanceField.hlsl")); From 3f9989ea579b6a7dfa87cefb26bb812fae2f0224 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 19 Jul 2024 13:37:09 +0200 Subject: [PATCH 234/292] Add utility `Packing.hlsl` shader for colors and vectors packing --- Source/Shaders/Packing.hlsl | 68 +++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 Source/Shaders/Packing.hlsl diff --git a/Source/Shaders/Packing.hlsl b/Source/Shaders/Packing.hlsl new file mode 100644 index 000000000..55f8d4dc3 --- /dev/null +++ b/Source/Shaders/Packing.hlsl @@ -0,0 +1,68 @@ +// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. + +#ifndef __PACKING__ +#define __PACKING__ + +// https://github.com/turanszkij/WickedEngine/blob/4a6171fdd584fcf9d41045e6dd2a13c03fa2e4d9/WickedEngine/shaders/globals.hlsli#L1103-L1118 +inline uint PackUnitVector(in float3 value) +{ + uint result = 0; + result |= (uint)((value.x * 0.5 + 0.5) * 255.0) << 0u; + result |= (uint)((value.y * 0.5 + 0.5) * 255.0) << 8u; + result |= (uint)((value.z * 0.5 + 0.5) * 255.0) << 16u; + return result; +} +inline float3 UnpackUnitVector(in uint value) +{ + float3 result; + result.x = (float)((value >> 0u) & 0xFF) / 255.0 * 2 - 1; + result.y = (float)((value >> 8u) & 0xFF) / 255.0 * 2 - 1; + result.z = (float)((value >> 16u) & 0xFF) / 255.0 * 2 - 1; + return result; +} + +// https://github.com/turanszkij/WickedEngine/blob/4a6171fdd584fcf9d41045e6dd2a13c03fa2e4d9/WickedEngine/shaders/PixelPacking_RGBE.hlsli +// Copyright (c) Microsoft. All rights reserved. +// This code is licensed under the MIT License (MIT). +// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF +// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY +// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR +// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT. +// Developed by Minigraph +// Author: James Stanard +// RGBE, aka R9G9B9E5_SHAREDEXP, is an unsigned float HDR pixel format where red, green, +// and blue all share the same exponent. The color channels store a 9-bit value ranging +// from [0/512, 511/512] which multiplies by 2^Exp and Exp ranges from [-15, 16]. +// Floating point specials are not encoded. +uint PackRGBE(float3 rgb) +{ + // To determine the shared exponent, we must clamp the channels to an expressible range + const float kMaxVal = asfloat(0x477F8000); // 1.FF x 2^+15 + const float kMinVal = asfloat(0x37800000); // 1.00 x 2^-16 + + // Non-negative and <= kMaxVal + rgb = clamp(rgb, 0, kMaxVal); + + // From the maximum channel we will determine the exponent. We clamp to a min value + // so that the exponent is within the valid 5-bit range. + float MaxChannel = max(max(kMinVal, rgb.r), max(rgb.g, rgb.b)); + + // 'Bias' has to have the biggest exponent plus 15 (and nothing in the mantissa). When + // added to the three channels, it shifts the explicit '1' and the 8 most significant + // mantissa bits into the low 9 bits. IEEE rules of float addition will round rather + // than truncate the discarded bits. Channels with smaller natural exponents will be + // shifted further to the right (discarding more bits). + float Bias = asfloat((asuint(MaxChannel) + 0x07804000) & 0x7F800000); + + // Shift bits into the right places + uint3 RGB = asuint(rgb + Bias); + uint E = (asuint(Bias) << 4) + 0x10000000; + return E | RGB.b << 18 | RGB.g << 9 | (RGB.r & 0x1FF); +} +float3 UnpackRGBE(uint p) +{ + float3 rgb = uint3(p, p >> 9, p >> 18) & 0x1FF; + return ldexp(rgb, (int)(p >> 27) - 24); +} + +#endif From 3296337f40d1dfe217f2baf1b4a398aa9e309d07 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 19 Jul 2024 14:26:19 +0200 Subject: [PATCH 235/292] Add `SetStablePowerState` for D3D12 when doing shaders profiling --- .../Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp index 9f024136a..a33cd8194 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp @@ -423,9 +423,12 @@ bool GPUDeviceDX12::Init() } } -#if BUILD_DEBUG && false +#if !BUILD_RELEASE // Prevent the GPU from overclocking or under-clocking to get consistent timings - _device->SetStablePowerState(TRUE); + if (CommandLine::Options.ShaderProfile) + { + _device->SetStablePowerState(TRUE); + } #endif // Setup resources From b14ac354bbf76c7028e12e88d36af3c0bb9c7329 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 19 Jul 2024 14:27:50 +0200 Subject: [PATCH 236/292] Optimize `DynamicTypedBuffer` and `DynamicStructuredBuffer` to use GPU-memory for commonly used single-frame uploads --- Source/Engine/Graphics/DynamicBuffer.cpp | 103 +++++++++++------------ Source/Engine/Graphics/DynamicBuffer.h | 32 ++++--- 2 files changed, 62 insertions(+), 73 deletions(-) diff --git a/Source/Engine/Graphics/DynamicBuffer.cpp b/Source/Engine/Graphics/DynamicBuffer.cpp index 54388fe85..9c5c06fae 100644 --- a/Source/Engine/Graphics/DynamicBuffer.cpp +++ b/Source/Engine/Graphics/DynamicBuffer.cpp @@ -22,69 +22,43 @@ DynamicBuffer::~DynamicBuffer() SAFE_DELETE_GPU_RESOURCE(_buffer); } -void DynamicBuffer::Flush() -{ - // Check if has sth to flush - const uint32 size = Data.Count(); - if (size > 0) - { - // Check if has no buffer - if (_buffer == nullptr) - _buffer = GPUDevice::Instance->CreateBuffer(_name); - - // Check if need to resize buffer - if (_buffer->GetSize() < size) - { - const uint32 numElements = Math::AlignUp(static_cast((size / _stride) * 1.3f), 32); - GPUBufferDescription desc; - InitDesc(desc, numElements); - if (_buffer->Init(desc)) - { - LOG(Fatal, "Cannot setup dynamic buffer '{0}'! Size: {1}", _name, Utilities::BytesToText(size)); - return; - } - } - - // Upload data to the buffer - if (GPUDevice::Instance->IsRendering()) - { - RenderContext::GPULocker.Lock(); - GPUDevice::Instance->GetMainContext()->UpdateBuffer(_buffer, Data.Get(), size); - RenderContext::GPULocker.Unlock(); - } - else - { - _buffer->SetData(Data.Get(), size); - } - } -} - void DynamicBuffer::Flush(GPUContext* context) { - // Check if has sth to flush const uint32 size = Data.Count(); - if (size > 0) + if (size == 0) + return; + + // Lazy-resize buffer + if (_buffer == nullptr) + _buffer = GPUDevice::Instance->CreateBuffer(_name); + if (_buffer->GetSize() < size) { - // Check if has no buffer - if (_buffer == nullptr) - _buffer = GPUDevice::Instance->CreateBuffer(_name); - - // Check if need to resize buffer - if (_buffer->GetSize() < size) + const int32 numElements = Math::AlignUp(static_cast((size / _stride) * 1.3f), 32); + GPUBufferDescription desc; + InitDesc(desc, numElements); + desc.Usage = SingleFrame ? GPUResourceUsage::Default : GPUResourceUsage::Dynamic; + if (_buffer->Init(desc)) { - const uint32 numElements = Math::AlignUp(static_cast((size / _stride) * 1.3f), 32); - GPUBufferDescription desc; - InitDesc(desc, numElements); - if (_buffer->Init(desc)) - { - LOG(Fatal, "Cannot setup dynamic buffer '{0}'! Size: {1}", _name, Utilities::BytesToText(size)); - return; - } + LOG(Fatal, "Cannot setup dynamic buffer '{0}'! Size: {1}", _name, Utilities::BytesToText(size)); + return; } + } - // Upload data to the buffer + // Upload data to the buffer + if (context) + { context->UpdateBuffer(_buffer, Data.Get(), size); } + else if (GPUDevice::Instance->IsRendering()) + { + RenderContext::GPULocker.Lock(); + GPUDevice::Instance->GetMainContext()->UpdateBuffer(_buffer, Data.Get(), size); + RenderContext::GPULocker.Unlock(); + } + else + { + _buffer->SetData(Data.Get(), size); + } } void DynamicBuffer::Dispose() @@ -93,10 +67,26 @@ void DynamicBuffer::Dispose() Data.Resize(0); } +void DynamicVertexBuffer::InitDesc(GPUBufferDescription& desc, int32 numElements) +{ + desc = GPUBufferDescription::Vertex(_stride, numElements, GPUResourceUsage::Dynamic); +} + +void DynamicIndexBuffer::InitDesc(GPUBufferDescription& desc, int32 numElements) +{ + desc = GPUBufferDescription::Index(_stride, numElements, GPUResourceUsage::Dynamic); +} + +DynamicStructuredBuffer::DynamicStructuredBuffer(uint32 initialCapacity, uint32 stride, bool isUnorderedAccess, const String& name) + : DynamicBuffer(initialCapacity, stride, name) + , _isUnorderedAccess(isUnorderedAccess) +{ + SingleFrame = true; // The most common use-case is just for a single upload of data prepared by CPU +} + void DynamicStructuredBuffer::InitDesc(GPUBufferDescription& desc, int32 numElements) { desc = GPUBufferDescription::Structured(numElements, _stride, _isUnorderedAccess); - desc.Usage = GPUResourceUsage::Dynamic; } DynamicTypedBuffer::DynamicTypedBuffer(uint32 initialCapacity, PixelFormat format, bool isUnorderedAccess, const String& name) @@ -104,6 +94,7 @@ DynamicTypedBuffer::DynamicTypedBuffer(uint32 initialCapacity, PixelFormat forma , _format(format) , _isUnorderedAccess(isUnorderedAccess) { + SingleFrame = true; // The most common use-case is just for a single upload of data prepared by CPU } void DynamicTypedBuffer::InitDesc(GPUBufferDescription& desc, int32 numElements) @@ -111,5 +102,5 @@ void DynamicTypedBuffer::InitDesc(GPUBufferDescription& desc, int32 numElements) auto bufferFlags = GPUBufferFlags::ShaderResource; if (_isUnorderedAccess) bufferFlags |= GPUBufferFlags::UnorderedAccess; - desc = GPUBufferDescription::Buffer(numElements * _stride, bufferFlags, _format, nullptr, _stride, GPUResourceUsage::Dynamic); + desc = GPUBufferDescription::Buffer(numElements * _stride, bufferFlags, _format, nullptr, _stride); } diff --git a/Source/Engine/Graphics/DynamicBuffer.h b/Source/Engine/Graphics/DynamicBuffer.h index 9fccb8b3a..d859301e0 100644 --- a/Source/Engine/Graphics/DynamicBuffer.h +++ b/Source/Engine/Graphics/DynamicBuffer.h @@ -6,7 +6,7 @@ #include "GPUBuffer.h" /// -/// Dynamic GPU buffer that allows to update and use GPU data (index/vertex/other) during single frame (supports dynamic resizing) +/// Dynamic GPU buffer that allows to update and use GPU data (index/vertex/other) during single frame (supports dynamic resizing). /// class FLAXENGINE_API DynamicBuffer { @@ -32,13 +32,18 @@ public: virtual ~DynamicBuffer(); public: + /// + /// True if buffer will be used once per-frame, otherwise it should support uploading data multiple times per-frame. If true 'GPUResourceUsage::Dynamic' will be used, otherwise 'GPUResourceUsage::Default'. + /// + bool SingleFrame = false; + /// /// The data container (raw bytes storage). /// Array Data; /// - /// Gets buffer (may be null since it's using 'late init' feature) + /// Gets buffer (can be null due to 'late init' feature). /// FORCE_INLINE GPUBuffer* GetBuffer() const { @@ -70,7 +75,7 @@ public: /// Amount of data to write (in bytes) FORCE_INLINE void Write(const void* bytes, int32 size) { - Data.Add((byte*)bytes, size); + Data.Add((const byte*)bytes, size); } /// @@ -97,7 +102,10 @@ public: /// /// Unlock buffer and flush data with a buffer (it will be ready for an immediate draw). /// - void Flush(); + void Flush() + { + Flush(nullptr); + } /// /// Unlock buffer and flush data with a buffer (it will be ready for a during next frame draw). @@ -133,10 +141,7 @@ public: protected: // [DynamicBuffer] - void InitDesc(GPUBufferDescription& desc, int32 numElements) override - { - desc = GPUBufferDescription::Vertex(_stride, numElements, GPUResourceUsage::Dynamic); - } + void InitDesc(GPUBufferDescription& desc, int32 numElements) override; }; /// @@ -158,10 +163,7 @@ public: protected: // [DynamicBuffer] - void InitDesc(GPUBufferDescription& desc, int32 numElements) override - { - desc = GPUBufferDescription::Index(_stride, numElements, GPUResourceUsage::Dynamic); - } + void InitDesc(GPUBufferDescription& desc, int32 numElements) override; }; /// @@ -180,11 +182,7 @@ public: /// Stride in bytes. /// True if unordered access usage. /// Buffer name. - DynamicStructuredBuffer(uint32 initialCapacity, uint32 stride, bool isUnorderedAccess = false, const String& name = String::Empty) - : DynamicBuffer(initialCapacity, stride, name) - , _isUnorderedAccess(isUnorderedAccess) - { - } + DynamicStructuredBuffer(uint32 initialCapacity, uint32 stride, bool isUnorderedAccess = false, const String& name = String::Empty); protected: // [DynamicBuffer] From 4976a2ef6a7e4c878088530ccd519906bce94187 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 20 Jul 2024 22:42:14 +0200 Subject: [PATCH 237/292] Change `DynamicBuffer` usage feature from b14ac354bbf76c7028e12e88d36af3c0bb9c7329 --- Source/Engine/Graphics/DynamicBuffer.cpp | 8 ++++---- Source/Engine/Graphics/DynamicBuffer.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Source/Engine/Graphics/DynamicBuffer.cpp b/Source/Engine/Graphics/DynamicBuffer.cpp index 9c5c06fae..8414d0a57 100644 --- a/Source/Engine/Graphics/DynamicBuffer.cpp +++ b/Source/Engine/Graphics/DynamicBuffer.cpp @@ -31,12 +31,12 @@ void DynamicBuffer::Flush(GPUContext* context) // Lazy-resize buffer if (_buffer == nullptr) _buffer = GPUDevice::Instance->CreateBuffer(_name); - if (_buffer->GetSize() < size) + if (_buffer->GetSize() < size || _buffer->GetDescription().Usage != Usage) { const int32 numElements = Math::AlignUp(static_cast((size / _stride) * 1.3f), 32); GPUBufferDescription desc; InitDesc(desc, numElements); - desc.Usage = SingleFrame ? GPUResourceUsage::Default : GPUResourceUsage::Dynamic; + desc.Usage = Usage; if (_buffer->Init(desc)) { LOG(Fatal, "Cannot setup dynamic buffer '{0}'! Size: {1}", _name, Utilities::BytesToText(size)); @@ -81,7 +81,7 @@ DynamicStructuredBuffer::DynamicStructuredBuffer(uint32 initialCapacity, uint32 : DynamicBuffer(initialCapacity, stride, name) , _isUnorderedAccess(isUnorderedAccess) { - SingleFrame = true; // The most common use-case is just for a single upload of data prepared by CPU + Usage = GPUResourceUsage::Default; // The most common use-case is just for a single upload of data prepared by CPU } void DynamicStructuredBuffer::InitDesc(GPUBufferDescription& desc, int32 numElements) @@ -94,7 +94,7 @@ DynamicTypedBuffer::DynamicTypedBuffer(uint32 initialCapacity, PixelFormat forma , _format(format) , _isUnorderedAccess(isUnorderedAccess) { - SingleFrame = true; // The most common use-case is just for a single upload of data prepared by CPU + Usage = GPUResourceUsage::Default; // The most common use-case is just for a single upload of data prepared by CPU } void DynamicTypedBuffer::InitDesc(GPUBufferDescription& desc, int32 numElements) diff --git a/Source/Engine/Graphics/DynamicBuffer.h b/Source/Engine/Graphics/DynamicBuffer.h index d859301e0..e00ffacbd 100644 --- a/Source/Engine/Graphics/DynamicBuffer.h +++ b/Source/Engine/Graphics/DynamicBuffer.h @@ -33,9 +33,9 @@ public: public: /// - /// True if buffer will be used once per-frame, otherwise it should support uploading data multiple times per-frame. If true 'GPUResourceUsage::Dynamic' will be used, otherwise 'GPUResourceUsage::Default'. + /// GPU usage of the resource. Use Dynamic for resources that can be updated multiple timers per-frame. /// - bool SingleFrame = false; + GPUResourceUsage Usage = GPUResourceUsage::Dynamic; /// /// The data container (raw bytes storage). From 6f3d1cdd0a634d9329cb09ef245dca32b9049385 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 24 Jul 2024 11:46:20 +0200 Subject: [PATCH 238/292] Optimize Global Surface Atlas objects culling with shared thread group pre-cull --- .../Renderer/GI/GlobalSurfaceAtlasPass.cpp | 86 +++++++++--------- Source/Shaders/GI/GlobalSurfaceAtlas.shader | 91 ++++++++++--------- 2 files changed, 95 insertions(+), 82 deletions(-) diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp index 65a5ba7d6..34b745e59 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp @@ -151,6 +151,7 @@ public: GPUBuffer* ChunksBuffer = nullptr; GPUBuffer* CulledObjectsBuffer = nullptr; DynamicTypedBuffer ObjectsBuffer; + DynamicTypedBuffer ObjectsListBuffer; int32 CulledObjectsCounterIndex = -1; GlobalSurfaceAtlasPass::BindingData Result; RectPackAtlas Atlas; @@ -179,6 +180,7 @@ public: GlobalSurfaceAtlasCustomBuffer() : ObjectsBuffer(256 * (GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE + GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE * 3 / 4), PixelFormat::R32G32B32A32_Float, false, TEXT("GlobalSurfaceAtlas.ObjectsBuffer")) + , ObjectsListBuffer(0, PixelFormat::R32_UInt, false, TEXT("GlobalSurfaceAtlas.ObjectsListBuffer")) { } @@ -403,6 +405,8 @@ public: PROFILE_CPU_NAMED("Write Objects"); DirtyObjectsBuffer.Clear(); ObjectsBuffer.Clear(); + ObjectsListBuffer.Clear(); + ObjectsListBuffer.Data.EnsureCapacity(Objects.Count() * sizeof(uint32)); for (auto& e : Objects) { auto& object = e.Value; @@ -421,6 +425,7 @@ public: // Write to objects buffer (this must match unpacking logic in HLSL) uint32 objectAddress = ObjectsBuffer.Data.Count() / sizeof(Float4); + ObjectsListBuffer.Write(objectAddress); auto* objectData = ObjectsBuffer.WriteReserve(GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE); objectData[0] = Float4(object.Position, object.Radius); objectData[1] = Float4::Zero; @@ -912,6 +917,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co { PROFILE_GPU_CPU_NAMED("Update Objects"); surfaceAtlasData.ObjectsBuffer.Flush(context); + surfaceAtlasData.ObjectsListBuffer.Flush(context); } // Init constants @@ -924,7 +930,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co bool notReady = false; // Cull objects into chunks (for faster Atlas sampling) - if (surfaceAtlasData.Objects.Count() != 0) + if (surfaceAtlasData.Objects.Count() != 0 && surfaceAtlasData.ChunksBuffer) { // Each chunk (ChunksBuffer) contains uint with address of the culled objects data start in CulledObjectsBuffer. // If chunk has address=0 then it's unused/empty. @@ -935,55 +941,52 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co uint32 objectsBufferCapacity = (uint32)((float)surfaceAtlasData.Objects.Count() * 1.3f); // Copy counter from ChunksBuffer into staging buffer to access current chunks memory usage to adapt dynamically to the scene complexity - if (surfaceAtlasData.ChunksBuffer) + if (!_culledObjectsSizeBuffer) { - if (!_culledObjectsSizeBuffer) + Platform::MemoryClear(_culledObjectsSizeFrames, sizeof(_culledObjectsSizeFrames)); + _culledObjectsSizeBuffer = GPUDevice::Instance->CreateBuffer(TEXT("GlobalSurfaceAtlas.CulledObjectsSizeBuffer")); + const GPUBufferDescription desc = GPUBufferDescription::Buffer(ARRAY_COUNT(_culledObjectsSizeFrames) * sizeof(uint32), GPUBufferFlags::None, PixelFormat::R32_UInt, _culledObjectsSizeFrames, sizeof(uint32), GPUResourceUsage::StagingReadback); + if (_culledObjectsSizeBuffer->Init(desc)) + return true; + } + if (surfaceAtlasData.CulledObjectsCounterIndex != -1) + { + // Get the last counter value (accept staging readback delay or not available data yet) + notReady = true; + auto data = (uint32*)_culledObjectsSizeBuffer->Map(GPUResourceMapMode::Read); + if (data) { - Platform::MemoryClear(_culledObjectsSizeFrames, sizeof(_culledObjectsSizeFrames)); - _culledObjectsSizeBuffer = GPUDevice::Instance->CreateBuffer(TEXT("GlobalSurfaceAtlas.CulledObjectsSizeBuffer")); - const GPUBufferDescription desc = GPUBufferDescription::Buffer(ARRAY_COUNT(_culledObjectsSizeFrames) * sizeof(uint32), GPUBufferFlags::None, PixelFormat::R32_UInt, _culledObjectsSizeFrames, sizeof(uint32), GPUResourceUsage::StagingReadback); - if (_culledObjectsSizeBuffer->Init(desc)) - return true; - } - if (surfaceAtlasData.CulledObjectsCounterIndex != -1) - { - // Get the last counter value (accept staging readback delay or not available data yet) - notReady = true; - auto data = (uint32*)_culledObjectsSizeBuffer->Map(GPUResourceMapMode::Read); - if (data) + uint32 counter = data[surfaceAtlasData.CulledObjectsCounterIndex]; + if (counter > 0) { - uint32 counter = data[surfaceAtlasData.CulledObjectsCounterIndex]; - if (counter > 0) - { - objectsBufferCapacity = counter; - notReady = false; - } - _culledObjectsSizeBuffer->Unmap(); - } - - // Allow to be ready if the buffer was already used - if (notReady && surfaceAtlasData.CulledObjectsBuffer && surfaceAtlasData.CulledObjectsBuffer->IsAllocated()) + objectsBufferCapacity = counter; notReady = false; + } + _culledObjectsSizeBuffer->Unmap(); } - if (surfaceAtlasData.CulledObjectsCounterIndex == -1) + + // Allow to be ready if the buffer was already used + if (notReady && surfaceAtlasData.CulledObjectsBuffer && surfaceAtlasData.CulledObjectsBuffer->IsAllocated()) + notReady = false; + } + if (surfaceAtlasData.CulledObjectsCounterIndex == -1) + { + // Find a free timer slot + notReady = true; + for (int32 i = 0; i < ARRAY_COUNT(_culledObjectsSizeFrames); i++) { - // Find a free timer slot - notReady = true; - for (int32 i = 0; i < ARRAY_COUNT(_culledObjectsSizeFrames); i++) + if (currentFrame - _culledObjectsSizeFrames[i] > GPU_ASYNC_LATENCY) { - if (currentFrame - _culledObjectsSizeFrames[i] > GPU_ASYNC_LATENCY) - { - surfaceAtlasData.CulledObjectsCounterIndex = i; - break; - } + surfaceAtlasData.CulledObjectsCounterIndex = i; + break; } } - if (surfaceAtlasData.CulledObjectsCounterIndex != -1 && surfaceAtlasData.CulledObjectsBuffer) - { - // Copy current counter value - _culledObjectsSizeFrames[surfaceAtlasData.CulledObjectsCounterIndex] = currentFrame; - context->CopyBuffer(_culledObjectsSizeBuffer, surfaceAtlasData.CulledObjectsBuffer, sizeof(uint32), surfaceAtlasData.CulledObjectsCounterIndex * sizeof(uint32), 0); - } + } + if (surfaceAtlasData.CulledObjectsCounterIndex != -1 && surfaceAtlasData.CulledObjectsBuffer) + { + // Copy current counter value + _culledObjectsSizeFrames[surfaceAtlasData.CulledObjectsCounterIndex] = currentFrame; + context->CopyBuffer(_culledObjectsSizeBuffer, surfaceAtlasData.CulledObjectsBuffer, sizeof(uint32), surfaceAtlasData.CulledObjectsCounterIndex * sizeof(uint32), 0); } // Calculate optimal capacity for the objects buffer @@ -1024,6 +1027,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co static_assert(GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION % GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE == 0, "Invalid chunks resolution/groups setting."); const int32 chunkDispatchGroups = GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION / GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE; context->BindSR(0, surfaceAtlasData.ObjectsBuffer.GetBuffer()->View()); + context->BindSR(1, surfaceAtlasData.ObjectsListBuffer.GetBuffer()->View()); context->BindUA(0, surfaceAtlasData.ChunksBuffer->View()); context->BindUA(1, surfaceAtlasData.CulledObjectsBuffer->View()); context->Dispatch(_csCullObjects, chunkDispatchGroups, chunkDispatchGroups, chunkDispatchGroups); diff --git a/Source/Shaders/GI/GlobalSurfaceAtlas.shader b/Source/Shaders/GI/GlobalSurfaceAtlas.shader index 2a762f5be..f8066549f 100644 --- a/Source/Shaders/GI/GlobalSurfaceAtlas.shader +++ b/Source/Shaders/GI/GlobalSurfaceAtlas.shader @@ -201,41 +201,67 @@ float4 PS_Lighting(AtlasVertexOutput input) : SV_Target RWByteAddressBuffer RWGlobalSurfaceAtlasChunks : register(u0); RWByteAddressBuffer RWGlobalSurfaceAtlasCulledObjects : register(u1); Buffer GlobalSurfaceAtlasObjects : register(t0); +Buffer GlobalSurfaceAtlasObjectsList : register(t1); -#define GLOBAL_SURFACE_ATLAS_CULL_LOCAL_SIZE 32 // Amount of objects to cache locally per-thread for culling +#define GLOBAL_SURFACE_ATLAS_SHARED_CULL_SIZE 255 // Limit of objects that can be culled for a whole group of 4x4x4 threads (64 chunks) + +groupshared uint SharedCulledObjectsCount; +groupshared uint SharedCulledObjects[GLOBAL_SURFACE_ATLAS_SHARED_CULL_SIZE]; // Compute shader for culling objects into chunks META_CS(true, FEATURE_LEVEL_SM5) [numthreads(GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE, GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE, GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE)] -void CS_CullObjects(uint3 DispatchThreadId : SV_DispatchThreadID) +void CS_CullObjects(uint3 DispatchThreadId : SV_DispatchThreadID, uint3 GroupId : SV_GroupID, uint3 GroupThreadId : SV_GroupThreadID) { uint3 chunkCoord = DispatchThreadId; uint chunkAddress = (chunkCoord.z * (GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION * GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION) + chunkCoord.y * GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION + chunkCoord.x) * 4; float3 chunkMin = GlobalSurfaceAtlas.ViewPos + (chunkCoord - (GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION * 0.5f)) * GlobalSurfaceAtlas.ChunkSize; - float3 chunkMax = chunkMin + GlobalSurfaceAtlas.ChunkSize; + float3 chunkMax = chunkMin + GlobalSurfaceAtlas.ChunkSize.xxx; + uint groupIndex = (GroupThreadId.z * GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE + GroupThreadId.y) * GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE + GroupThreadId.x; + float3 groupMin = GlobalSurfaceAtlas.ViewPos + (GroupId * GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE - (GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION * 0.5f)) * GlobalSurfaceAtlas.ChunkSize; + float3 groupMax = groupMin + (GlobalSurfaceAtlas.ChunkSize * GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE).xxx; - // Count objects in this chunk - uint objectAddress = 0, objectsCount = 0; - // TODO: pre-cull objects within a thread group - uint localCulledObjects[GLOBAL_SURFACE_ATLAS_CULL_LOCAL_SIZE]; + // Clear shared memory + if (groupIndex == 0) + { + SharedCulledObjectsCount = 0; + } + GroupMemoryBarrierWithGroupSync(); + + // Shared culling of all objects by all threads for a whole group LOOP - for (uint objectIndex = 0; objectIndex < GlobalSurfaceAtlas.ObjectsCount; objectIndex++) + for (uint objectIndex = groupIndex; objectIndex < GlobalSurfaceAtlas.ObjectsCount; objectIndex += GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE * GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE * GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE) { + uint objectAddress = GlobalSurfaceAtlasObjectsList.Load(objectIndex); float4 objectBounds = LoadGlobalSurfaceAtlasObjectBounds(GlobalSurfaceAtlasObjects, objectAddress); - uint objectSize = LoadGlobalSurfaceAtlasObjectDataSize(GlobalSurfaceAtlasObjects, objectAddress); - if (BoxIntersectsSphere(chunkMin, chunkMax, objectBounds.xyz, objectBounds.w)) + if (BoxIntersectsSphere(groupMin, groupMax, objectBounds.xyz, objectBounds.w)) { - localCulledObjects[objectsCount % GLOBAL_SURFACE_ATLAS_CULL_LOCAL_SIZE] = objectAddress; - objectsCount++; + uint sharedIndex; + InterlockedAdd(SharedCulledObjectsCount, 1, sharedIndex); + if (sharedIndex < GLOBAL_SURFACE_ATLAS_SHARED_CULL_SIZE) + SharedCulledObjects[sharedIndex] = objectAddress; } - objectAddress += objectSize; } - if (objectsCount == 0) + GroupMemoryBarrierWithGroupSync(); + + // Cull objects from the shared buffer against active thread's chunk + uint objectsCount = 0; + LOOP + for (uint i = 0; i < SharedCulledObjectsCount; i++) { - // Empty chunk - RWGlobalSurfaceAtlasChunks.Store(chunkAddress, 0); - return; + uint objectAddress = SharedCulledObjects[i]; + float4 objectBounds = LoadGlobalSurfaceAtlasObjectBounds(GlobalSurfaceAtlasObjects, objectAddress); + if (BoxIntersectsSphere(chunkMin, chunkMax, objectBounds.xyz, objectBounds.w)) + { + objectsCount++; + } } + if (objectsCount == 0) + { + // Empty chunk + RWGlobalSurfaceAtlasChunks.Store(chunkAddress, 0); + return; + } // Allocate object data size in the buffer uint objectsStart; @@ -254,34 +280,17 @@ void CS_CullObjects(uint3 DispatchThreadId : SV_DispatchThreadID) // Write objects count before actual objects indices RWGlobalSurfaceAtlasCulledObjects.Store(objectsStart * 4, objectsCount); - // Copy objects data in this chunk - if (objectsCount <= GLOBAL_SURFACE_ATLAS_CULL_LOCAL_SIZE) - { - // Reuse locally cached objects - LOOP - for (uint objectIndex = 0; objectIndex < objectsCount; objectIndex++) + // Copy objects data in this chunk (cull from the shared buffer) + LOOP + for (uint i = 0; i < SharedCulledObjectsCount; i++) + { + uint objectAddress = SharedCulledObjects[i]; + float4 objectBounds = LoadGlobalSurfaceAtlasObjectBounds(GlobalSurfaceAtlasObjects, objectAddress); + if (BoxIntersectsSphere(chunkMin, chunkMax, objectBounds.xyz, objectBounds.w)) { - objectAddress = localCulledObjects[objectIndex]; objectsStart++; RWGlobalSurfaceAtlasCulledObjects.Store(objectsStart * 4, objectAddress); } - } - else - { - // Brute-force culling - objectAddress = 0; - LOOP - for (uint objectIndex = 0; objectIndex < GlobalSurfaceAtlas.ObjectsCount; objectIndex++) - { - float4 objectBounds = LoadGlobalSurfaceAtlasObjectBounds(GlobalSurfaceAtlasObjects, objectAddress); - uint objectSize = LoadGlobalSurfaceAtlasObjectDataSize(GlobalSurfaceAtlasObjects, objectAddress); - if (BoxIntersectsSphere(chunkMin, chunkMax, objectBounds.xyz, objectBounds.w)) - { - objectsStart++; - RWGlobalSurfaceAtlasCulledObjects.Store(objectsStart * 4, objectAddress); - } - objectAddress += objectSize; - } } } From e9144ff8345f2049238ba58256833af2a51a0a10 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 24 Jul 2024 12:21:50 +0200 Subject: [PATCH 239/292] Fix blending between DDGI cascades on the end of the range --- Source/Shaders/GI/DDGI.hlsl | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/Source/Shaders/GI/DDGI.hlsl b/Source/Shaders/GI/DDGI.hlsl index 8b87ffea0..79ef1bea7 100644 --- a/Source/Shaders/GI/DDGI.hlsl +++ b/Source/Shaders/GI/DDGI.hlsl @@ -322,9 +322,10 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesData, T // Sample cascade float3 result = SampleDDGIIrradianceCascade(data, probesData, probesDistance, probesIrradiance, worldPosition, worldNormal, cascadeIndex, probesOrigin, probesExtent, probesSpacing, biasedWorldPosition); -#if DDGI_CASCADE_BLEND_SMOOTH - // Blend with the next cascade + // Blend with the next cascade (or fallback irradiance outside the volume) cascadeIndex++; +#if DDGI_CASCADE_BLEND_SMOOTH + result *= cascadeWeight; if (cascadeIndex < data.CascadesCount && cascadeWeight < 0.99f) { probesSpacing = data.ProbesOriginAndSpacing[cascadeIndex].w; @@ -332,7 +333,16 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesData, T probesExtent = (data.ProbesCounts - 1) * (probesSpacing * 0.5f); biasedWorldPosition = worldPosition + GetDDGISurfaceBias(viewDir, probesSpacing, worldNormal, bias); float3 resultNext = SampleDDGIIrradianceCascade(data, probesData, probesDistance, probesIrradiance, worldPosition, worldNormal, cascadeIndex, probesOrigin, probesExtent, probesSpacing, biasedWorldPosition); - result = lerp(resultNext, result, cascadeWeight); + result += resultNext * (1 - cascadeWeight); + } + else + { + result += data.FallbackIrradiance * (1 - cascadeWeight); + } +#else + if (cascadeIndex == data.CascadesCount) + { + result += data.FallbackIrradiance * (1 - cascadeWeight); } #endif From b2e228c090b3f7a05c655955fd7e416375cd07c8 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 24 Jul 2024 13:43:53 +0200 Subject: [PATCH 240/292] Fix shadows rendering in reflection probes --- Source/Engine/Graphics/RenderBuffers.cpp | 6 ++--- Source/Engine/Graphics/RenderBuffers.h | 14 ++++++------ Source/Engine/Renderer/ProbesRenderer.cpp | 1 + Source/Engine/Renderer/ShadowsPass.cpp | 27 ++++++++++++++++------- 4 files changed, 30 insertions(+), 18 deletions(-) diff --git a/Source/Engine/Graphics/RenderBuffers.cpp b/Source/Engine/Graphics/RenderBuffers.cpp index f95f8ec74..e1a12bab8 100644 --- a/Source/Engine/Graphics/RenderBuffers.cpp +++ b/Source/Engine/Graphics/RenderBuffers.cpp @@ -126,10 +126,10 @@ void RenderBuffers::SetUseAlpha(bool value) _useAlpha = value; } -const RenderBuffers::CustomBuffer* RenderBuffers::FindCustomBuffer(const StringView& name) const +const RenderBuffers::CustomBuffer* RenderBuffers::FindCustomBuffer(const StringView& name, bool withLinked) const { - if (LinkedCustomBuffers) - return LinkedCustomBuffers->FindCustomBuffer(name); + if (LinkedCustomBuffers && withLinked) + return LinkedCustomBuffers->FindCustomBuffer(name, withLinked); for (const CustomBuffer* e : CustomBuffers) { if (e->Name == name) diff --git a/Source/Engine/Graphics/RenderBuffers.h b/Source/Engine/Graphics/RenderBuffers.h index 2cb78cc07..1b6ef6dcb 100644 --- a/Source/Engine/Graphics/RenderBuffers.h +++ b/Source/Engine/Graphics/RenderBuffers.h @@ -167,20 +167,20 @@ public: /// API_PROPERTY() void SetUseAlpha(bool value); - const CustomBuffer* FindCustomBuffer(const StringView& name) const; + const CustomBuffer* FindCustomBuffer(const StringView& name, bool withLinked = true) const; template - const T* FindCustomBuffer(const StringView& name) const + const T* FindCustomBuffer(const StringView& name, bool withLinked = true) const { - return (const T*)FindCustomBuffer(name); + return (const T*)FindCustomBuffer(name, withLinked); } template - T* GetCustomBuffer(const StringView& name) + T* GetCustomBuffer(const StringView& name, bool withLinked = true) { - if (LinkedCustomBuffers) - return LinkedCustomBuffers->GetCustomBuffer(name); - CustomBuffer* result = (CustomBuffer*)FindCustomBuffer(name); + if (LinkedCustomBuffers && withLinked) + return LinkedCustomBuffers->GetCustomBuffer(name, withLinked); + CustomBuffer* result = (CustomBuffer*)FindCustomBuffer(name, withLinked); if (!result) { result = New(); diff --git a/Source/Engine/Renderer/ProbesRenderer.cpp b/Source/Engine/Renderer/ProbesRenderer.cpp index ec11deeb4..54e1f6991 100644 --- a/Source/Engine/Renderer/ProbesRenderer.cpp +++ b/Source/Engine/Renderer/ProbesRenderer.cpp @@ -492,6 +492,7 @@ void ProbesRenderer::OnRender(RenderTask* task, GPUContext* context) // Render frame Renderer::Render(_task); context->ClearState(); + _task->CameraCut(); // Copy frame to cube face { diff --git a/Source/Engine/Renderer/ShadowsPass.cpp b/Source/Engine/Renderer/ShadowsPass.cpp index 4776bc193..68f26d05e 100644 --- a/Source/Engine/Renderer/ShadowsPass.cpp +++ b/Source/Engine/Renderer/ShadowsPass.cpp @@ -1043,8 +1043,6 @@ void ShadowsPass::SetupShadows(RenderContext& renderContext, RenderContextBatch& // Early out and skip shadows setup if no lights is actively casting shadows // RenderBuffers will automatically free any old ShadowsCustomBuffer after a few frames if we don't update LastFrameUsed - if (_shadowMapFormat == PixelFormat::Unknown || checkIfSkipPass() || EnumHasNoneFlags(renderContext.View.Flags, ViewFlags::Shadows)) - return; Array shadowedLights; for (auto& light : renderContext.List->DirectionalLights) { @@ -1061,12 +1059,25 @@ void ShadowsPass::SetupShadows(RenderContext& renderContext, RenderContextBatch& if (light.CanRenderShadow(renderContext.View)) shadowedLights.Add(&light); } - if (shadowedLights.IsEmpty()) + const auto currentFrame = Engine::FrameCount; + if (_shadowMapFormat == PixelFormat::Unknown || + EnumHasNoneFlags(renderContext.View.Flags, ViewFlags::Shadows) || + checkIfSkipPass() || + shadowedLights.IsEmpty()) + { + // Invalidate any existing custom buffer that could have been used by the same task (eg. when rendering 6 sides of env probe) + if (auto* old = (ShadowsCustomBuffer*)renderContext.Buffers->FindCustomBuffer(TEXT("Shadows"), false)) + { + if (old->LastFrameUsed == currentFrame) + old->LastFrameUsed = 0; + } return; + } // Initialize shadow atlas - auto& shadows = *renderContext.Buffers->GetCustomBuffer(TEXT("Shadows")); - const auto currentFrame = Engine::FrameCount; + auto& shadows = *renderContext.Buffers->GetCustomBuffer(TEXT("Shadows"), false); + if (shadows.LastFrameUsed == currentFrame) + shadows.Reset(); shadows.LastFrameUsed = currentFrame; shadows.MaxShadowsQuality = Math::Clamp(Math::Min((int32)Graphics::ShadowsQuality, (int32)renderContext.View.MaxShadowsQuality), 0, (int32)Quality::MAX - 1); shadows.EnableStaticShadows = !renderContext.View.IsOfflinePass && !renderContext.View.IsSingleFrame; @@ -1337,7 +1348,7 @@ RETRY_ATLAS_SETUP: void ShadowsPass::RenderShadowMaps(RenderContextBatch& renderContextBatch) { const RenderContext& renderContext = renderContextBatch.GetMainContext(); - const ShadowsCustomBuffer* shadowsPtr = renderContext.Buffers->FindCustomBuffer(TEXT("Shadows")); + const ShadowsCustomBuffer* shadowsPtr = renderContext.Buffers->FindCustomBuffer(TEXT("Shadows"), false); if (shadowsPtr == nullptr || shadowsPtr->Lights.IsEmpty() || shadowsPtr->LastFrameUsed != Engine::FrameCount) return; PROFILE_GPU_CPU("ShadowMaps"); @@ -1488,7 +1499,7 @@ void ShadowsPass::RenderShadowMask(RenderContextBatch& renderContextBatch, Rende PROFILE_GPU_CPU("Shadow"); GPUContext* context = GPUDevice::Instance->GetMainContext(); RenderContext& renderContext = renderContextBatch.GetMainContext(); - const ShadowsCustomBuffer& shadows = *renderContext.Buffers->FindCustomBuffer(TEXT("Shadows")); + const ShadowsCustomBuffer& shadows = *renderContext.Buffers->FindCustomBuffer(TEXT("Shadows"), false); ASSERT(shadows.LastFrameUsed == Engine::FrameCount); auto& view = renderContext.View; auto shader = _shader->GetShader(); @@ -1559,7 +1570,7 @@ void ShadowsPass::RenderShadowMask(RenderContextBatch& renderContextBatch, Rende void ShadowsPass::GetShadowAtlas(const RenderBuffers* renderBuffers, GPUTexture*& shadowMapAtlas, GPUBufferView*& shadowsBuffer) { - const ShadowsCustomBuffer* shadowsPtr = renderBuffers->FindCustomBuffer(TEXT("Shadows")); + const ShadowsCustomBuffer* shadowsPtr = renderBuffers->FindCustomBuffer(TEXT("Shadows"), false); if (shadowsPtr && shadowsPtr->ShadowMapAtlas && shadowsPtr->LastFrameUsed == Engine::FrameCount) { shadowMapAtlas = shadowsPtr->ShadowMapAtlas; From aa57db03bbc6aabdd2024ceb5ad462c306315361 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 24 Jul 2024 14:09:02 +0200 Subject: [PATCH 241/292] Revert debug change --- Source/Engine/Graphics/Graphics.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Engine/Graphics/Graphics.cpp b/Source/Engine/Graphics/Graphics.cpp index d60e3d8e8..489299714 100644 --- a/Source/Engine/Graphics/Graphics.cpp +++ b/Source/Engine/Graphics/Graphics.cpp @@ -23,7 +23,7 @@ Quality Graphics::GlobalSDFQuality = Quality::High; Quality Graphics::GIQuality = Quality::High; bool Graphics::GICascadesBlending = false; PostProcessSettings Graphics::PostProcessSettings; -bool Graphics::SpreadWorkload = false; +bool Graphics::SpreadWorkload = true; #if GRAPHICS_API_NULL extern GPUDevice* CreateGPUDeviceNull(); From 4715492f0cb179308da2ac705c14d957ecc68457 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 24 Jul 2024 15:50:16 +0200 Subject: [PATCH 242/292] Update shaders --- Content/Shaders/GI/DDGI.flax | 4 ++-- Content/Shaders/GI/GlobalSurfaceAtlas.flax | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Content/Shaders/GI/DDGI.flax b/Content/Shaders/GI/DDGI.flax index 34663c561..72e02aebb 100644 --- a/Content/Shaders/GI/DDGI.flax +++ b/Content/Shaders/GI/DDGI.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:12a8d1c9df11e370b205c0ea2fc9d9dd77f94c90e6b771658badb3a9327fab8d -size 33095 +oid sha256:571c028a427297ae250a600539291ffb5e830c81cd6b4998f2c713108fa2fb27 +size 32878 diff --git a/Content/Shaders/GI/GlobalSurfaceAtlas.flax b/Content/Shaders/GI/GlobalSurfaceAtlas.flax index e0ba99179..184f9f0d2 100644 --- a/Content/Shaders/GI/GlobalSurfaceAtlas.flax +++ b/Content/Shaders/GI/GlobalSurfaceAtlas.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:090ee68f80c28a0eb800d4e23da3ace1c154d649e0501717c5bb87655c9a9669 -size 12337 +oid sha256:92cb1700c5bff4146d16c277112d741606c6292e50ed52b0ed27108da8976b00 +size 13194 From 305bf653cf7f131a4e1d4600b8a01064564c6b5a Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 24 Jul 2024 16:39:37 +0200 Subject: [PATCH 243/292] Add todo comments for future improvements to DDGI --- .../Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp | 2 ++ Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp index eb2c9fd33..1169d7ff6 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp @@ -404,6 +404,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont desc.Flags = GPUTextureFlags::ShaderResource | GPUTextureFlags::UnorderedAccess; INIT_TEXTURE(ProbesTrace, PixelFormat::R16G16B16A16_Float, probeRaysCount, Math::Min(probesCountCascade, DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT)); INIT_TEXTURE(ProbesData, PixelFormat::R8G8B8A8_SNorm, probesCountTotalX, probesCountTotalY); + // TODO: add BC6H compression to probes data (https://github.com/knarkowicz/GPURealTimeBC6H) INIT_TEXTURE(ProbesIrradiance, PixelFormat::R11G11B10_Float, probesCountTotalX * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), probesCountTotalY * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2)); INIT_TEXTURE(ProbesDistance, PixelFormat::R16G16_Float, probesCountTotalX * (DDGI_PROBE_RESOLUTION_DISTANCE + 2), probesCountTotalY * (DDGI_PROBE_RESOLUTION_DISTANCE + 2)); #if DDGI_DEBUG_INSTABILITY @@ -580,6 +581,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont // Update probes in batches so ProbesTrace texture can be smaller uint32 arg = 0; + // TODO: use rays allocator to dispatch raytracing in packets (eg. 8 threads in a group instead of hardcoded limit) for (int32 probesOffset = 0; probesOffset < probesCountCascade; probesOffset += DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT) { Data1 data; diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp index 34b745e59..cfffe8d93 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp @@ -729,6 +729,10 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co auto desc = GPUTextureDescription::New2D(resolution, resolution, PixelFormat::Unknown); uint64 memUsage = 0; // TODO: try using BC4/BC5/BC7 block compression for Surface Atlas (eg. for Tiles material properties) + // TODO: pre-multiply AO into Color of surface so AtlasGBuffer0 can be RGB only (useful for block compression) + // TODO: Emissive into Diffuse and compress via BC6H (then remove AtlasEmissive) - if len(Color) > 1 then pixel emits light, otherwise it's normal + // TODO: store Normals in tile local-space (xy only, z can be reconstructed), then block compress, if ShadingModel==SHADING_MODEL_UNLIT then write empty normal + // TODO: pack depth in 0-255 range and block compress (should be enough quality for per-object tile trace) #define INIT_ATLAS_TEXTURE(texture, format) desc.Format = format; surfaceAtlasData.texture = RenderTargetPool::Get(desc); if (!surfaceAtlasData.texture) return true; memUsage += surfaceAtlasData.texture->GetMemoryUsage(); RENDER_TARGET_POOL_SET_NAME(surfaceAtlasData.texture, "GlobalSurfaceAtlas." #texture); INIT_ATLAS_TEXTURE(AtlasEmissive, PixelFormat::R11G11B10_Float); INIT_ATLAS_TEXTURE(AtlasGBuffer0, GBUFFER0_FORMAT); From faf7a28cf263168193001fa5795cf76d68a171d6 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 24 Jul 2024 16:48:46 +0200 Subject: [PATCH 244/292] Fix compilation after merge #2714 --- Source/Engine/Audio/AudioSource.h | 2 +- Source/Engine/Content/Asset.h | 3 +-- Source/Engine/Utilities/RectPack.h | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/Source/Engine/Audio/AudioSource.h b/Source/Engine/Audio/AudioSource.h index d55761cb7..f292c9f25 100644 --- a/Source/Engine/Audio/AudioSource.h +++ b/Source/Engine/Audio/AudioSource.h @@ -273,7 +273,7 @@ public: /// Determines whether this audio source started playing audio via audio backend. After audio play it may wait for audio clip data to be loaded or streamed. /// [Deprecated in v1.9] /// - API_PROPERTY() DEPRECATED FORCE_INLINE bool IsActuallyPlayingSth() const + API_PROPERTY() DEPRECATED("Use IsActuallyPlaying instead.") FORCE_INLINE bool IsActuallyPlayingSth() const { return _isActuallyPlayingSth; } diff --git a/Source/Engine/Content/Asset.h b/Source/Engine/Content/Asset.h index 125abdda2..de79a640b 100644 --- a/Source/Engine/Content/Asset.h +++ b/Source/Engine/Content/Asset.h @@ -184,8 +184,7 @@ public: /// The output collection of the asset ids referenced by this asset. /// The output list of file paths referenced by this asset. Files might come from project Content folder (relative path is preserved in cooked game), or external location (copied into Content root folder of cooked game). virtual void GetReferences(Array& assets, Array& files) const; - - + /// /// Gets the asset references. Supported only in Editor. /// [Deprecated in v1.9] diff --git a/Source/Engine/Utilities/RectPack.h b/Source/Engine/Utilities/RectPack.h index a187e4c3b..8217effe4 100644 --- a/Source/Engine/Utilities/RectPack.h +++ b/Source/Engine/Utilities/RectPack.h @@ -221,7 +221,7 @@ public: /// /// Implementation based on https://blackpawn.com/texts/lightmaps/default.html. template -struct DEPRECATED RectPack +struct DEPRECATED("Use RectPackNode and RectPackAtlas instead.") RectPack { // Left and Right slots allow to easily move around the atlas like in a tree structure. NodeType* Left; From 34d4904b2eab60bbc3fe36da317010296a93ef45 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 24 Jul 2024 23:28:48 +0200 Subject: [PATCH 245/292] Update deps binaries #2624 --- Source/Editor/Cooker/Steps/DeployDataStep.cpp | 4 ++ .../Binaries/ThirdParty/ARM64/libfreetype.a | 4 +- .../Binaries/ThirdParty/ARM64/libopenal.a | 4 +- .../Binaries/ThirdParty/x64/DirectXMesh.lib | 4 +- .../Binaries/ThirdParty/x64/DirectXMesh.pdb | 4 +- .../Binaries/ThirdParty/x64/DirectXTex.lib | 4 +- .../Binaries/ThirdParty/x64/DirectXTex.pdb | 4 +- .../ThirdParty/x64/GenericCodeGen.lib | 4 +- .../Windows/Binaries/ThirdParty/x64/HLSL.lib | 4 +- .../ThirdParty/x64/MachineIndependent.lib | 4 +- .../Binaries/ThirdParty/x64/NvCloth_x64.lib | 4 +- .../Binaries/ThirdParty/x64/NvCloth_x64.pdb | 4 +- .../Binaries/ThirdParty/x64/OGLCompiler.lib | 4 +- .../Binaries/ThirdParty/x64/OSDependent.lib | 4 +- .../Binaries/ThirdParty/x64/OpenAL32.dll | 4 +- .../Binaries/ThirdParty/x64/OpenAL32.lib | 4 +- .../ThirdParty/x64/SPIRV-Tools-opt.lib | 4 +- .../Binaries/ThirdParty/x64/SPIRV-Tools.lib | 4 +- .../Windows/Binaries/ThirdParty/x64/SPIRV.lib | 4 +- .../Binaries/ThirdParty/x64/UVAtlas.lib | 4 +- .../Binaries/ThirdParty/x64/UVAtlas.pdb | 4 +- .../ThirdParty/x64/assimp-vc140-md.dll | 2 +- .../ThirdParty/x64/assimp-vc140-md.lib | 2 +- .../Binaries/ThirdParty/x64/astcenc.lib | 4 +- .../ThirdParty/x64/d3dcompiler_47.dll | 4 +- .../ThirdParty/x64/d3dcompiler_47.lib | 3 ++ .../Binaries/ThirdParty/x64/dxcompiler.dll | 4 +- .../Binaries/ThirdParty/x64/dxcompiler.lib | 4 +- .../Windows/Binaries/ThirdParty/x64/dxil.dll | 4 +- .../Binaries/ThirdParty/x64/freetype.lib | 4 +- .../Binaries/ThirdParty/x64/freetype.pdb | 4 +- .../Binaries/ThirdParty/x64/glslang.lib | 4 +- .../Binaries/ThirdParty/x64/libcurl.lib | 4 +- .../Binaries/ThirdParty/x64/libogg_static.lib | 4 +- .../ThirdParty/x64/libvorbis_static.lib | 4 +- .../ThirdParty/x64/libvorbisfile_static.lib | 4 +- Source/ThirdParty/freetype/config/ftoption.h | 2 +- Source/ThirdParty/freetype/config/ftstdlib.h | 2 +- .../Flax.Build/Deps/Dependencies/Assimp.cs | 1 + .../Deps/Dependencies/DirectXMesh.cs | 1 + .../Dependencies/DirectXShaderCompiler.cs | 1 + .../Deps/Dependencies/DirectXTex.cs | 2 +- .../Deps/Dependencies/NewtonsoftJson.cs | 3 +- .../Flax.Build/Deps/Dependencies/NvCloth.cs | 1 + .../Flax.Build/Deps/Dependencies/OpenAL.cs | 1 + .../Flax.Build/Deps/Dependencies/PhysX.cs | 1 + .../Flax.Build/Deps/Dependencies/UVAtlas.cs | 1 + .../Flax.Build/Deps/Dependencies/astc.cs | 1 + .../Flax.Build/Deps/Dependencies/curl.cs | 1 + .../Flax.Build/Deps/Dependencies/dbghelp.cs | 1 + .../Flax.Build/Deps/Dependencies/freetype.cs | 48 +++++-------------- .../Flax.Build/Deps/Dependencies/glslang.cs | 1 + .../Flax.Build/Deps/Dependencies/nethost.cs | 1 + .../Flax.Build/Deps/Dependencies/vorbis.cs | 40 +++++----------- Source/Tools/Flax.Build/Deps/Dependency.cs | 9 ++++ 55 files changed, 122 insertions(+), 135 deletions(-) create mode 100644 Source/Platforms/Windows/Binaries/ThirdParty/x64/d3dcompiler_47.lib diff --git a/Source/Editor/Cooker/Steps/DeployDataStep.cpp b/Source/Editor/Cooker/Steps/DeployDataStep.cpp index 551002e0b..fefe48b97 100644 --- a/Source/Editor/Cooker/Steps/DeployDataStep.cpp +++ b/Source/Editor/Cooker/Steps/DeployDataStep.cpp @@ -160,6 +160,9 @@ bool DeployDataStep::Perform(CookingData& data) } else { +#if 1 + failed |= EditorUtilities::CopyDirectoryIfNewer(dstDotnet / TEXT("host/fxr") / version, srcDotnet / TEXT("host/fxr") / version, true); +#else // TODO: hostfxr for target platform should be copied from nuget package location: microsoft.netcore.app.runtime.//runtimes//native/hostfxr.dll String dstHostfxr = dstDotnet / TEXT("host/fxr") / version; if (!FileSystem::DirectoryExists(dstHostfxr)) @@ -174,6 +177,7 @@ bool DeployDataStep::Perform(CookingData& data) failed |= FileSystem::CopyFile(dstHostfxr / TEXT("hostfxr.dylib"), depsRoot / TEXT("ThirdParty") / archName / TEXT("hostfxr.dylib")); else failed |= true; +#endif failed |= EditorUtilities::CopyDirectoryIfNewer(dstDotnet / TEXT("shared/Microsoft.NETCore.App") / version, srcDotnet / TEXT("shared/Microsoft.NETCore.App") / version, true); } if (failed) diff --git a/Source/Platforms/Android/Binaries/ThirdParty/ARM64/libfreetype.a b/Source/Platforms/Android/Binaries/ThirdParty/ARM64/libfreetype.a index 523e33f6b..a4a71e18e 100644 --- a/Source/Platforms/Android/Binaries/ThirdParty/ARM64/libfreetype.a +++ b/Source/Platforms/Android/Binaries/ThirdParty/ARM64/libfreetype.a @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8492361a717bebf5103846b6cc0e4aaf4f1f701c7571d6faa02d4322f97fe7b3 -size 8414046 +oid sha256:0e136a228056fd02a9074bd5bbdd09cf98c53977713cb3531971b4ea412d87a2 +size 7548846 diff --git a/Source/Platforms/Android/Binaries/ThirdParty/ARM64/libopenal.a b/Source/Platforms/Android/Binaries/ThirdParty/ARM64/libopenal.a index dcf8e8790..5c5eebbb9 100644 --- a/Source/Platforms/Android/Binaries/ThirdParty/ARM64/libopenal.a +++ b/Source/Platforms/Android/Binaries/ThirdParty/ARM64/libopenal.a @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:78414594b65e5a99c7b691ffc11995491513cf6c65baaf038191de3c1ec0efc5 -size 5546306 +oid sha256:7179291d2efd8d3c8c8cd494a2c65e5b506e52929b962223f0af9faf2ba09382 +size 20142390 diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/DirectXMesh.lib b/Source/Platforms/Windows/Binaries/ThirdParty/x64/DirectXMesh.lib index fc3085c6d..cfac926fc 100644 --- a/Source/Platforms/Windows/Binaries/ThirdParty/x64/DirectXMesh.lib +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/DirectXMesh.lib @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e56897bc6206db416ee867f8ee530ac0945bb023b2553d3ff53c6e29b2e59971 -size 2125520 +oid sha256:dd60a9aa3ffb9fb7f80594f2e44f4cc82f72c4b31bf48ce6c228536c5d2e8221 +size 2463574 diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/DirectXMesh.pdb b/Source/Platforms/Windows/Binaries/ThirdParty/x64/DirectXMesh.pdb index 17acb8c41..21ba0d376 100644 --- a/Source/Platforms/Windows/Binaries/ThirdParty/x64/DirectXMesh.pdb +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/DirectXMesh.pdb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2061568c9fd443cc83e8125a2a2134bc3a5c75a4c306de8a447e599154fa6ffc -size 2453504 +oid sha256:607ccf48338273271bce83c8560c1d4f0e4c5a133a29c9bb4b679ad367017363 +size 3010560 diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/DirectXTex.lib b/Source/Platforms/Windows/Binaries/ThirdParty/x64/DirectXTex.lib index 069e6e78c..eed0c3ef5 100644 --- a/Source/Platforms/Windows/Binaries/ThirdParty/x64/DirectXTex.lib +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/DirectXTex.lib @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2ece9e97efd754246256676994eed54f54e920a25584d00fb9a3fa37c1f263e7 -size 3256698 +oid sha256:7aca62bb2e5c0fd736b99178939e6097f2572ba77b3b4494036e7df7eb2f6cd5 +size 3420224 diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/DirectXTex.pdb b/Source/Platforms/Windows/Binaries/ThirdParty/x64/DirectXTex.pdb index 6e277d305..ed317b55e 100644 --- a/Source/Platforms/Windows/Binaries/ThirdParty/x64/DirectXTex.pdb +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/DirectXTex.pdb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3ba34f8e37d2fab479cacb62b0b7969de89a19e777919e754a78868402907b34 -size 2142208 +oid sha256:d133185ff22be11206a8c87f21b8a1c249def8c820154b550709aa6abbf430b3 +size 2920448 diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/GenericCodeGen.lib b/Source/Platforms/Windows/Binaries/ThirdParty/x64/GenericCodeGen.lib index 9efb68a21..f5a6f683e 100644 --- a/Source/Platforms/Windows/Binaries/ThirdParty/x64/GenericCodeGen.lib +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/GenericCodeGen.lib @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e71d3e62e5e212ca116559f6a16b290e056f2efe2aa4839061d37f2c5b2073f3 -size 53068 +oid sha256:578911b0b288ca4bdb0a387f7a3e5847d58d6e408841cad29b60658ddf721ffb +size 57944 diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/HLSL.lib b/Source/Platforms/Windows/Binaries/ThirdParty/x64/HLSL.lib index 5b530e98c..e5ca8d968 100644 --- a/Source/Platforms/Windows/Binaries/ThirdParty/x64/HLSL.lib +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/HLSL.lib @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:deb861040077875fa04d346debe3846b076c12c9b7269bdff684bdff5e465ab5 -size 710 +oid sha256:fa8fbafa1c7884457fee0e70f293d359606fa52419360919cd7d3ace46f34399 +size 844 diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/MachineIndependent.lib b/Source/Platforms/Windows/Binaries/ThirdParty/x64/MachineIndependent.lib index aa2780779..b1cdfdcfe 100644 --- a/Source/Platforms/Windows/Binaries/ThirdParty/x64/MachineIndependent.lib +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/MachineIndependent.lib @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e95332894a2625a2426375632f53ad97981d66d3e780bd4d48c53c1a2b6e728f -size 15422382 +oid sha256:77721d52cfa53dd120dc977f24bbed3f2af122f9b6493eb969bca624430f7731 +size 10053834 diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/NvCloth_x64.lib b/Source/Platforms/Windows/Binaries/ThirdParty/x64/NvCloth_x64.lib index d5ddd1770..4369c70f9 100644 --- a/Source/Platforms/Windows/Binaries/ThirdParty/x64/NvCloth_x64.lib +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/NvCloth_x64.lib @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e92cff7b740448dcb882892a8ff1e7c360cccad07c8333af9c7232e9266482ba -size 2425458 +oid sha256:81baf778c669c5bbb6c7a861e94addca0c7430be38001edc6a489852d244fc74 +size 2624120 diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/NvCloth_x64.pdb b/Source/Platforms/Windows/Binaries/ThirdParty/x64/NvCloth_x64.pdb index e3ec6d317..c7332dd43 100644 --- a/Source/Platforms/Windows/Binaries/ThirdParty/x64/NvCloth_x64.pdb +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/NvCloth_x64.pdb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f322c45592d9b58fe8ec9d75c2c9fbb523e752be768fcbc9f867ab9eb12f527f -size 520192 +oid sha256:70b17ba95d592f97843ac5b63f8be08025ef684c4ae10f9f4c1564d165a170ee +size 512000 diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/OGLCompiler.lib b/Source/Platforms/Windows/Binaries/ThirdParty/x64/OGLCompiler.lib index b47d7a75d..2879e6e42 100644 --- a/Source/Platforms/Windows/Binaries/ThirdParty/x64/OGLCompiler.lib +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/OGLCompiler.lib @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4dcd1ddbdae26bef5dd7c5427276848ad8491ca490acc824c0d3ca3281b93a8f -size 14066 +oid sha256:a8992e2ce158003fb41d9eab42636deec54e403cccc2ab17ae70109b5b4c976c +size 14444 diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/OSDependent.lib b/Source/Platforms/Windows/Binaries/ThirdParty/x64/OSDependent.lib index 763792a5e..f9c7c3af6 100644 --- a/Source/Platforms/Windows/Binaries/ThirdParty/x64/OSDependent.lib +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/OSDependent.lib @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:da8541f83f563835308f1b6f68d0ae514d55bb9c5ddad9af573387e6829274fa -size 6644 +oid sha256:b041bafda9771de692cefc42220e69ca4ec06ac315cdb85f0d92296b5514d71e +size 6560 diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/OpenAL32.dll b/Source/Platforms/Windows/Binaries/ThirdParty/x64/OpenAL32.dll index bfcf466aa..7ef160291 100644 --- a/Source/Platforms/Windows/Binaries/ThirdParty/x64/OpenAL32.dll +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/OpenAL32.dll @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4e2d830d2a3aa85bf17033aa50745e7c1a8bb11ae350fe14fedd2a1292a6e0e8 -size 2212352 +oid sha256:d4bc89abf5a72c9c3dede29d1cbe5ced923d99e99ba82b76c7b25e940e6f4f25 +size 1111552 diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/OpenAL32.lib b/Source/Platforms/Windows/Binaries/ThirdParty/x64/OpenAL32.lib index cfdf86a25..3f70d273b 100644 --- a/Source/Platforms/Windows/Binaries/ThirdParty/x64/OpenAL32.lib +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/OpenAL32.lib @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:868b0298d15dfd4864c12400ffd1b8e530166ab3194e11c4532e1383c7c7c816 -size 98040 +oid sha256:e7636bd763b099f84d2015fa89635d27d4298b2c8c38ce0335b545ae9e2b4db4 +size 37562 diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/SPIRV-Tools-opt.lib b/Source/Platforms/Windows/Binaries/ThirdParty/x64/SPIRV-Tools-opt.lib index b8a697dfe..a86c4aaaf 100644 --- a/Source/Platforms/Windows/Binaries/ThirdParty/x64/SPIRV-Tools-opt.lib +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/SPIRV-Tools-opt.lib @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c3f6cc77f160e73a2d6052b021d632e0d7ba90e3f2d3fb0b6d7d199bbab45baa -size 68688710 +oid sha256:76cee84936f7acb7499b8ceed8dee11bc6710aea5e4715ff8165c104985bb621 +size 34007514 diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/SPIRV-Tools.lib b/Source/Platforms/Windows/Binaries/ThirdParty/x64/SPIRV-Tools.lib index ede2192e2..f3bf0faa7 100644 --- a/Source/Platforms/Windows/Binaries/ThirdParty/x64/SPIRV-Tools.lib +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/SPIRV-Tools.lib @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:de981934f631e878883f7370e83ba6fd62bce897fe2954b975a5752355d75af7 -size 16890152 +oid sha256:296d39dea72b89b718dc2a04eb8ab2341542f1e8a52bc8c0359107d0f6209e98 +size 10363024 diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/SPIRV.lib b/Source/Platforms/Windows/Binaries/ThirdParty/x64/SPIRV.lib index 8ad3c9f0b..f7c6309af 100644 --- a/Source/Platforms/Windows/Binaries/ThirdParty/x64/SPIRV.lib +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/SPIRV.lib @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3f8b42075ad11e2a1cb66669651f1bdf3ba5ad8ef2ba20e286c286ef6352d09b -size 4767820 +oid sha256:d3eab7b189fdb4335525a578585504e68661e1287b4222d4c12de441878621b9 +size 2933362 diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/UVAtlas.lib b/Source/Platforms/Windows/Binaries/ThirdParty/x64/UVAtlas.lib index f778d533b..4a1d233f2 100644 --- a/Source/Platforms/Windows/Binaries/ThirdParty/x64/UVAtlas.lib +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/UVAtlas.lib @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3e41b5c122652ffb6d27323fb8415c18140d010056f2993a8b5f854bbb45940d -size 6030408 +oid sha256:e5e142c625149d81d229dc3e740bdaa509aded329c1e4541a17fd761c2c6f2e0 +size 6200304 diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/UVAtlas.pdb b/Source/Platforms/Windows/Binaries/ThirdParty/x64/UVAtlas.pdb index 6be334bbc..d06db83f4 100644 --- a/Source/Platforms/Windows/Binaries/ThirdParty/x64/UVAtlas.pdb +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/UVAtlas.pdb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0a9287a7dcd3a43e50f03672d9748dbad4eac4e3c06ff6bde37e29b3fec00776 -size 2183168 +oid sha256:0a1c7296511baeec0b9842d7c427461e8f183bcef1f8038029184117f053a2ae +size 2248704 diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/assimp-vc140-md.dll b/Source/Platforms/Windows/Binaries/ThirdParty/x64/assimp-vc140-md.dll index 22b96ca9b..0fa3e1ad4 100644 --- a/Source/Platforms/Windows/Binaries/ThirdParty/x64/assimp-vc140-md.dll +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/assimp-vc140-md.dll @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9b7a3131c44b92dcfd4e5b5ce9dbdda056730fcfff2221d1db1f12594d6f5536 +oid sha256:8cdddc7c402d11e4336f5a8ee8c7a958076d86f20232246f574d1d37ff1e04de size 2191872 diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/assimp-vc140-md.lib b/Source/Platforms/Windows/Binaries/ThirdParty/x64/assimp-vc140-md.lib index 42822d2bd..82446c4d2 100644 --- a/Source/Platforms/Windows/Binaries/ThirdParty/x64/assimp-vc140-md.lib +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/assimp-vc140-md.lib @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:839ac268010c3b41b24a2511c36372cb4249d7171a5119e2f54b84a49ad56ba5 +oid sha256:664cfbb4c622b15fd8de13fc359eb16bb6fe92d473ab8bcd384444103684f15c size 364390 diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/astcenc.lib b/Source/Platforms/Windows/Binaries/ThirdParty/x64/astcenc.lib index d89395539..435dc8244 100644 --- a/Source/Platforms/Windows/Binaries/ThirdParty/x64/astcenc.lib +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/astcenc.lib @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:38500ac222bd4132dd81ea03016462add2b0c8188b7cb7101223d19bf209cd01 -size 536548 +oid sha256:69f7a54d9331e184153cfce171d29e046e43590cb9374f5e1f370e977f61e551 +size 538106 diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/d3dcompiler_47.dll b/Source/Platforms/Windows/Binaries/ThirdParty/x64/d3dcompiler_47.dll index 56e413237..0b80ba584 100644 --- a/Source/Platforms/Windows/Binaries/ThirdParty/x64/d3dcompiler_47.dll +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/d3dcompiler_47.dll @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d1b50f7075e2ee47c8d184e38862a83b880a90fd2ff00da7bb1eb61e24e00f5d -size 4467904 +oid sha256:352c4ce7151fe9bc37dd9ceddb958a5ee75851c57df961f6381ec552affba198 +size 4916856 diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/d3dcompiler_47.lib b/Source/Platforms/Windows/Binaries/ThirdParty/x64/d3dcompiler_47.lib new file mode 100644 index 000000000..95e6bc166 --- /dev/null +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/d3dcompiler_47.lib @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be5917a82c8dc43cad798f7a6c11b1dd885eec8ee9675057804ec367f322e7ad +size 8314 diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/dxcompiler.dll b/Source/Platforms/Windows/Binaries/ThirdParty/x64/dxcompiler.dll index 9947f63a9..4e56df12a 100644 --- a/Source/Platforms/Windows/Binaries/ThirdParty/x64/dxcompiler.dll +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/dxcompiler.dll @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b0849eef97571a3170e20e3eb697f3467d92f98f627386ed9dbfed23a146528b -size 20067744 +oid sha256:7f504ba5f5173c50ba4ce2771c3d32618f886e1c990960ba706f58872224541b +size 14722664 diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/dxcompiler.lib b/Source/Platforms/Windows/Binaries/ThirdParty/x64/dxcompiler.lib index 4a2878f4d..c77fc6661 100644 --- a/Source/Platforms/Windows/Binaries/ThirdParty/x64/dxcompiler.lib +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/dxcompiler.lib @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3a7281c5a0091eb1a7f6e543cd4eb57547c0a17c53ce2d5cc82ed17ac195a3e0 -size 2002 +oid sha256:17dfb5b37d5aec03943083c04899b0815556d7f10f020fa8f9f9061c971dc010 +size 2086 diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/dxil.dll b/Source/Platforms/Windows/Binaries/ThirdParty/x64/dxil.dll index 01d720b18..ec6f74a5b 100644 --- a/Source/Platforms/Windows/Binaries/ThirdParty/x64/dxil.dll +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/dxil.dll @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:caaeba17de17643fc081f805942702bd62b5859dcbdca0358ace499085606e12 -size 1526184 +oid sha256:6c37738cd2fb4d659b0f49dead8311ae75c93b8c6602b991c00e070f7be20bc1 +size 1508472 diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/freetype.lib b/Source/Platforms/Windows/Binaries/ThirdParty/x64/freetype.lib index 08535f7a6..84f5f6e99 100644 --- a/Source/Platforms/Windows/Binaries/ThirdParty/x64/freetype.lib +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/freetype.lib @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a24a1323cef66bdb59239e9d1c73b8e502440be5faf29688b817921c1f265600 -size 3944178 +oid sha256:fdc41f7323658fcd3077c951b500749623ba6ee492576c67e6ab836da7f7a6ec +size 4603860 diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/freetype.pdb b/Source/Platforms/Windows/Binaries/ThirdParty/x64/freetype.pdb index 3ed62607c..8093eb6a4 100644 --- a/Source/Platforms/Windows/Binaries/ThirdParty/x64/freetype.pdb +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/freetype.pdb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8f9cf7608a0ec97fa19018c7e0874c292a68b82bde4a22745b8c9139a6699b22 -size 471040 +oid sha256:ca629434ded3ee2b7de460f6e084f37cb5840b5b086507160ed2fbeb35e71a97 +size 528384 diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/glslang.lib b/Source/Platforms/Windows/Binaries/ThirdParty/x64/glslang.lib index f25c07bbc..102542d3f 100644 --- a/Source/Platforms/Windows/Binaries/ThirdParty/x64/glslang.lib +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/glslang.lib @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8e99cb9070bfebd7485e318e272ff250d38dbcb654340e28885c8dafc55c464d -size 164520 +oid sha256:2fce38e2990b7b8e69cd769b920f7ab34e91a3ab8e447c47e53a3d69e5533d32 +size 166612 diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/libcurl.lib b/Source/Platforms/Windows/Binaries/ThirdParty/x64/libcurl.lib index 76455890e..eebbaecdb 100644 --- a/Source/Platforms/Windows/Binaries/ThirdParty/x64/libcurl.lib +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/libcurl.lib @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:237b485c9ff8bf6b2958d8d8e228d07af6949f6675675be61f45aae28a39e1d5 -size 3766516 +oid sha256:807440d52892d2eaed6179330899572ac57f54f0b68dfa6449191a682f16f025 +size 1748384 diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/libogg_static.lib b/Source/Platforms/Windows/Binaries/ThirdParty/x64/libogg_static.lib index 139c0d9ce..77b0ff1f9 100644 --- a/Source/Platforms/Windows/Binaries/ThirdParty/x64/libogg_static.lib +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/libogg_static.lib @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1d20bd882a7f2ea32089bebaac06f7969caa6d050fcd4079807811bf18e6a07e -size 42214 +oid sha256:67a7d3c72c5235416601496666ac0c1143fb94169346bb8f236ce027fa753b81 +size 50420 diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/libvorbis_static.lib b/Source/Platforms/Windows/Binaries/ThirdParty/x64/libvorbis_static.lib index 448ad4be7..3b968861f 100644 --- a/Source/Platforms/Windows/Binaries/ThirdParty/x64/libvorbis_static.lib +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/libvorbis_static.lib @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2ccc266baecbcf38dca35df4c8e71eacb0cfb1fd2fefdfc5e73ae172981c1b80 -size 996624 +oid sha256:a31f2c6cbc7bbd032cde6aab897fc6e89ae7925c719fff390ea051954d3d70c4 +size 1063506 diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/libvorbisfile_static.lib b/Source/Platforms/Windows/Binaries/ThirdParty/x64/libvorbisfile_static.lib index 28c18a099..44fbdc45c 100644 --- a/Source/Platforms/Windows/Binaries/ThirdParty/x64/libvorbisfile_static.lib +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/libvorbisfile_static.lib @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:945bf05bc9c01747763505c740b3cce3d52e31319d9f27c1d89d527764b59b38 -size 68374 +oid sha256:3c15a39c5985202ed9acc9a020788502113cf84c6e2b2dde09defae2d40abc0e +size 64650 diff --git a/Source/ThirdParty/freetype/config/ftoption.h b/Source/ThirdParty/freetype/config/ftoption.h index 1976b33af..2c74a1680 100644 --- a/Source/ThirdParty/freetype/config/ftoption.h +++ b/Source/ThirdParty/freetype/config/ftoption.h @@ -109,7 +109,7 @@ FT_BEGIN_HEADER * ``` * */ -#define FT_CONFIG_OPTION_ENVIRONMENT_PROPERTIES + /************************************************************************** diff --git a/Source/ThirdParty/freetype/config/ftstdlib.h b/Source/ThirdParty/freetype/config/ftstdlib.h index f65148a90..a3aac36d8 100644 --- a/Source/ThirdParty/freetype/config/ftstdlib.h +++ b/Source/ThirdParty/freetype/config/ftstdlib.h @@ -153,7 +153,7 @@ #define ft_strtol strtol -#define ft_getenv getenv +char* ft_getenv(const char* n); /************************************************************************** diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/Assimp.cs b/Source/Tools/Flax.Build/Deps/Dependencies/Assimp.cs index 678d50c62..3cc1bacf0 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/Assimp.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/Assimp.cs @@ -91,6 +91,7 @@ namespace Flax.Deps.Dependencies foreach (var platform in options.Platforms) { + BuildStarted(platform); switch (platform) { case TargetPlatform.Windows: diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/DirectXMesh.cs b/Source/Tools/Flax.Build/Deps/Dependencies/DirectXMesh.cs index f7c19c437..c47881efe 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/DirectXMesh.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/DirectXMesh.cs @@ -46,6 +46,7 @@ namespace Flax.Deps.Dependencies foreach (var platform in options.Platforms) { + BuildStarted(platform); switch (platform) { case TargetPlatform.Windows: diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/DirectXShaderCompiler.cs b/Source/Tools/Flax.Build/Deps/Dependencies/DirectXShaderCompiler.cs index df7d49a81..028496ef6 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/DirectXShaderCompiler.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/DirectXShaderCompiler.cs @@ -36,6 +36,7 @@ namespace Flax.Deps.Dependencies { foreach (var platform in options.Platforms) { + BuildStarted(platform); switch (platform) { case TargetPlatform.Windows: diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/DirectXTex.cs b/Source/Tools/Flax.Build/Deps/Dependencies/DirectXTex.cs index 384cb25e8..c407cfbe8 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/DirectXTex.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/DirectXTex.cs @@ -22,7 +22,6 @@ namespace Flax.Deps.Dependencies return new[] { TargetPlatform.Windows, - TargetPlatform.UWP, TargetPlatform.XboxOne, TargetPlatform.XboxScarlett, }; @@ -48,6 +47,7 @@ namespace Flax.Deps.Dependencies foreach (var platform in options.Platforms) { + BuildStarted(platform); switch (platform) { case TargetPlatform.Windows: diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/NewtonsoftJson.cs b/Source/Tools/Flax.Build/Deps/Dependencies/NewtonsoftJson.cs index 048fa9ea1..36d33e755 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/NewtonsoftJson.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/NewtonsoftJson.cs @@ -22,7 +22,6 @@ namespace Flax.Deps.Dependencies return new[] { TargetPlatform.Windows, - TargetPlatform.UWP, TargetPlatform.Linux, TargetPlatform.XboxOne, TargetPlatform.XboxScarlett, @@ -60,6 +59,7 @@ namespace Flax.Deps.Dependencies Deploy.VCEnvironment.BuildSolution(solutionPath, configuration, buildPlatform); foreach (var platform in options.Platforms) { + BuildStarted(platform); switch (platform) { case TargetPlatform.Windows: @@ -84,6 +84,7 @@ namespace Flax.Deps.Dependencies Deploy.VCEnvironment.BuildSolution(solutionPath, configuration, buildPlatform); foreach (var platform in options.Platforms) { + BuildStarted(platform); switch (platform) { case TargetPlatform.UWP: diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/NvCloth.cs b/Source/Tools/Flax.Build/Deps/Dependencies/NvCloth.cs index 80852fb61..7be41423f 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/NvCloth.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/NvCloth.cs @@ -61,6 +61,7 @@ namespace Flax.Deps.Dependencies foreach (var platform in options.Platforms) { + BuildStarted(platform); switch (platform) { case TargetPlatform.Windows: diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/OpenAL.cs b/Source/Tools/Flax.Build/Deps/Dependencies/OpenAL.cs index eeb87586e..b6b5fa83d 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/OpenAL.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/OpenAL.cs @@ -54,6 +54,7 @@ namespace Flax.Deps.Dependencies foreach (var platform in options.Platforms) { + BuildStarted(platform); switch (platform) { case TargetPlatform.Windows: diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/PhysX.cs b/Source/Tools/Flax.Build/Deps/Dependencies/PhysX.cs index f59fac426..9abd4121c 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/PhysX.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/PhysX.cs @@ -384,6 +384,7 @@ namespace Flax.Deps.Dependencies foreach (var platform in options.Platforms) { + BuildStarted(platform); switch (platform) { case TargetPlatform.Windows: diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/UVAtlas.cs b/Source/Tools/Flax.Build/Deps/Dependencies/UVAtlas.cs index 1f8f046f0..9a32b979c 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/UVAtlas.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/UVAtlas.cs @@ -47,6 +47,7 @@ namespace Flax.Deps.Dependencies foreach (var platform in options.Platforms) { + BuildStarted(platform); switch (platform) { case TargetPlatform.Windows: diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/astc.cs b/Source/Tools/Flax.Build/Deps/Dependencies/astc.cs index dcf24c3f3..c7e5b443b 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/astc.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/astc.cs @@ -45,6 +45,7 @@ namespace Flax.Deps.Dependencies foreach (var platform in options.Platforms) { + BuildStarted(platform); switch (platform) { case TargetPlatform.Windows: diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/curl.cs b/Source/Tools/Flax.Build/Deps/Dependencies/curl.cs index 8492cbae7..37d1351a4 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/curl.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/curl.cs @@ -69,6 +69,7 @@ namespace Flax.Deps.Dependencies foreach (var platform in options.Platforms) { + BuildStarted(platform); switch (platform) { case TargetPlatform.Windows: diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/dbghelp.cs b/Source/Tools/Flax.Build/Deps/Dependencies/dbghelp.cs index 318c31239..35f8545ee 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/dbghelp.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/dbghelp.cs @@ -35,6 +35,7 @@ namespace Flax.Deps.Dependencies { foreach (var platform in options.Platforms) { + BuildStarted(platform); switch (platform) { case TargetPlatform.Windows: diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/freetype.cs b/Source/Tools/Flax.Build/Deps/Dependencies/freetype.cs index 4d0d0857e..03918ae97 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/freetype.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/freetype.cs @@ -26,7 +26,6 @@ namespace Flax.Deps.Dependencies return new[] { TargetPlatform.Windows, - TargetPlatform.UWP, TargetPlatform.XboxOne, TargetPlatform.PS4, TargetPlatform.PS5, @@ -50,14 +49,6 @@ namespace Flax.Deps.Dependencies } } - private void PatchWindowsTargetPlatformVersion(string vcxprojPath, string vcxprojContents, string windowsTargetPlatformVersion, string platformToolset) - { - // Fix the MSVC project settings for Windows - var contents = vcxprojContents.Replace("$(DefaultPlatformToolset)", string.Format("{0}", platformToolset)); - contents = contents.Replace("", string.Format("{0}", windowsTargetPlatformVersion)); - File.WriteAllText(vcxprojPath, contents); - } - /// public override void Build(BuildOptions options) { @@ -92,9 +83,18 @@ namespace Flax.Deps.Dependencies var libraryFileName = "libfreetype.a"; vcxprojContents = vcxprojContents.Replace("MultiThreaded", "MultiThreadedDLL"); vcxprojContents = vcxprojContents.Replace("MultiThreadedDebug", "MultiThreadedDebugDLL"); + vcxprojContents = vcxprojContents.Replace("<v142", "v143"); + Utilities.ReplaceInFile(Path.Combine(root, "include", "freetype", "config", "ftoption.h"), "#define FT_CONFIG_OPTION_ENVIRONMENT_PROPERTIES", ""); + var msvcProps = new Dictionary + { + { "WindowsTargetPlatformVersion", "10.0" }, + { "PlatformToolset", "v143" }, + //{ "RuntimeLibrary", "MultiThreadedDLL" } + }; foreach (var platform in options.Platforms) { + BuildStarted(platform); switch (platform) { case TargetPlatform.Windows: @@ -105,31 +105,13 @@ namespace Flax.Deps.Dependencies // Build for Windows foreach (var architecture in new[] { TargetArchitecture.x64, TargetArchitecture.ARM64 }) { - Deploy.VCEnvironment.BuildSolution(vsSolutionPath, configurationMsvc, architecture.ToString(), - new Dictionary() { - { "WindowsTargetPlatformVersion", "10.0" }, - { "PlatformToolset", "v143" }, - //{ "RuntimeLibrary", "MultiThreadedDLL" } - }); + Deploy.VCEnvironment.BuildSolution(vsSolutionPath, configurationMsvc, architecture.ToString(), msvcProps); var depsFolder = GetThirdPartyFolder(options, platform, architecture); foreach (var filename in binariesToCopyMsvc) Utilities.FileCopy(Path.Combine(root, "objs", architecture.ToString(), configurationMsvc, filename), Path.Combine(depsFolder, filename)); } break; } - case TargetPlatform.UWP: - { - // Fix the MSVC project settings for UWP - PatchWindowsTargetPlatformVersion(vcxprojPath, vcxprojContents, "10.0.17763.0", "v141"); - - // Build for UWP x64 - Deploy.VCEnvironment.BuildSolution(vsSolutionPath, configurationMsvc, "x64"); - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); - foreach (var filename in binariesToCopyMsvc) - Utilities.FileCopy(Path.Combine(root, "objs", "x64", configurationMsvc, filename), Path.Combine(depsFolder, filename)); - - break; - } case TargetPlatform.Linux: { var envVars = new Dictionary @@ -198,11 +180,8 @@ namespace Flax.Deps.Dependencies } case TargetPlatform.XboxOne: { - // Fix the MSVC project settings for Xbox One - PatchWindowsTargetPlatformVersion(vcxprojPath, vcxprojContents, "10.0.19041.0", "v142"); - // Build for Xbox One x64 - Deploy.VCEnvironment.BuildSolution(vsSolutionPath, configurationMsvc, "x64"); + Deploy.VCEnvironment.BuildSolution(vsSolutionPath, configurationMsvc, "x64", msvcProps); var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); foreach (var filename in binariesToCopyMsvc) Utilities.FileCopy(Path.Combine(root, "objs", "x64", configurationMsvc, filename), Path.Combine(depsFolder, filename)); @@ -211,11 +190,8 @@ namespace Flax.Deps.Dependencies } case TargetPlatform.XboxScarlett: { - // Fix the MSVC project settings for Xbox Scarlett - PatchWindowsTargetPlatformVersion(vcxprojPath, vcxprojContents, "10.0.19041.0", "v142"); - // Build for Xbox Scarlett - Deploy.VCEnvironment.BuildSolution(vsSolutionPath, configurationMsvc, "x64"); + Deploy.VCEnvironment.BuildSolution(vsSolutionPath, configurationMsvc, "x64", msvcProps); var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); foreach (var filename in binariesToCopyMsvc) Utilities.FileCopy(Path.Combine(root, "objs", "x64", configurationMsvc, filename), Path.Combine(depsFolder, filename)); diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/glslang.cs b/Source/Tools/Flax.Build/Deps/Dependencies/glslang.cs index 813e0f016..bcf217169 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/glslang.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/glslang.cs @@ -56,6 +56,7 @@ namespace Flax.Deps.Dependencies foreach (var platform in options.Platforms) { + BuildStarted(platform); switch (platform) { case TargetPlatform.Windows: diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/nethost.cs b/Source/Tools/Flax.Build/Deps/Dependencies/nethost.cs index 090edf4be..b6df212bc 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/nethost.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/nethost.cs @@ -324,6 +324,7 @@ namespace Flax.Deps.Dependencies foreach (var platform in options.Platforms) { + BuildStarted(platform); var platformData = Path.Combine(GetBinariesFolder(options, platform), "Data", "nethost"); if (Directory.Exists(platformData)) Utilities.DirectoryCopy(platformData, root, true, true); diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/vorbis.cs b/Source/Tools/Flax.Build/Deps/Dependencies/vorbis.cs index dbf5d6bc6..1fcd5bba5 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/vorbis.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/vorbis.cs @@ -26,7 +26,6 @@ namespace Flax.Deps.Dependencies return new[] { TargetPlatform.Windows, - TargetPlatform.UWP, TargetPlatform.XboxOne, TargetPlatform.PS4, TargetPlatform.PS5, @@ -74,15 +73,18 @@ namespace Flax.Deps.Dependencies new Binary("libvorbis_static.lib", "libvorbis"), new Binary("libvorbisfile_static.lib", "libvorbisfile"), }; + private (string, string)[] vorbisBinariesToCopyWindowsCmake = { ("vorbis.lib", "libvorbis_static.lib"), ("vorbisfile.lib", "libvorbisfile_static.lib"), }; + private Binary[] oggBinariesToCopyWindows = { new Binary("libogg_static.lib", "ogg"), }; + private (string, string)[] oggBinariesToCopyWindowsCmake = { ("ogg.lib", "libogg_static.lib"), @@ -198,27 +200,6 @@ namespace Flax.Deps.Dependencies binariesToCopy.AddRange(vorbisBinariesToCopyWindows.Select(x => new Binary(x.Filename, Path.Combine(buildDir, x.SrcFolder, buildPlatform, configurationMsvc)))); break; } - case TargetPlatform.UWP: - { - buildDir = Path.Combine(rootMsvcLib, "win32", "VS2010"); - vcxprojPaths = vcxprojPathsWindows; - PatchWindowsTargetPlatformVersion("10.0.17763.0", "v141"); - switch (architecture) - { - case TargetArchitecture.x86: - buildPlatform = "Win32"; - break; - case TargetArchitecture.x64: - buildPlatform = "x64"; - break; - case TargetArchitecture.ARM: - buildPlatform = "ARM"; - break; - default: throw new InvalidArchitectureException(architecture); - } - binariesToCopy.AddRange(vorbisBinariesToCopyWindows.Select(x => new Binary(x.Filename, Path.Combine(buildDir, x.SrcFolder, buildPlatform, configurationMsvc)))); - break; - } case TargetPlatform.PS4: { buildDir = Path.Combine(rootMsvcLib, "PS4"); @@ -234,7 +215,7 @@ namespace Flax.Deps.Dependencies Utilities.DirectoryCopy(Path.Combine(GetBinariesFolder(options, platform), "Data", "vorbis"), buildDir, true, true); Utilities.FileCopy(Path.Combine(GetBinariesFolder(options, platform), "Data", "ogg", "ogg", "config_types.h"), - Path.Combine(root, "..", "ogg", "include", "ogg", "config_types.h")); + Path.Combine(root, "libogg", "include", "ogg", "config_types.h")); binariesToCopy.AddRange(binariesToCopyVorbis.Select(x => new Binary(x.Filename, Path.Combine(buildDir, x.SrcFolder, buildPlatform, configurationMsvc)))); break; } @@ -255,7 +236,7 @@ namespace Flax.Deps.Dependencies buildDir, true, true); Utilities.FileCopy( Path.Combine(GetBinariesFolder(options, platform), "Data", "ogg", "ogg", "config_types.h"), - Path.Combine(root, "..", "ogg", "include", "ogg", "config_types.h")); + Path.Combine(root, "libogg", "include", "ogg", "config_types.h")); binariesToCopy.AddRange(binariesToCopyVorbis.Select(x => new Binary(x.Filename, Path.Combine(buildDir, x.SrcFolder, buildPlatform, configurationMsvc)))); break; } @@ -263,14 +244,14 @@ namespace Flax.Deps.Dependencies buildDir = Path.Combine(rootMsvcLib, "win32", "VS2010"); vcxprojPaths = vcxprojPathsWindows; buildPlatform = "x64"; - PatchWindowsTargetPlatformVersion("10.0.19041.0", "v142"); + PatchWindowsTargetPlatformVersion("10.0", "v143"); binariesToCopy.AddRange(vorbisBinariesToCopyWindows.Select(x => new Binary(x.Filename, Path.Combine(buildDir, x.SrcFolder, buildPlatform, configurationMsvc)))); break; case TargetPlatform.XboxScarlett: buildDir = Path.Combine(rootMsvcLib, "win32", "VS2010"); vcxprojPaths = vcxprojPathsWindows; buildPlatform = "x64"; - PatchWindowsTargetPlatformVersion("10.0.19041.0", "v142"); + PatchWindowsTargetPlatformVersion("10.0", "v143"); binariesToCopy.AddRange(vorbisBinariesToCopyWindows.Select(x => new Binary(x.Filename, Path.Combine(buildDir, x.SrcFolder, buildPlatform, configurationMsvc)))); break; default: throw new InvalidPlatformException(platform); @@ -307,11 +288,11 @@ namespace Flax.Deps.Dependencies case TargetPlatform.Linux: ext = ".a"; break; - default: - throw new InvalidPlatformException(platform); + default: throw new InvalidPlatformException(platform); } - List<(string, string)> binariesToCopy = new List<(string, string)>(); + var binariesToCopy = new List<(string, string)>(); + // Build ogg { var solutionPath = Path.Combine(oggBuildDir, "ogg.sln"); @@ -356,6 +337,7 @@ namespace Flax.Deps.Dependencies foreach (var platform in options.Platforms) { + BuildStarted(platform); switch (platform) { case TargetPlatform.Windows: diff --git a/Source/Tools/Flax.Build/Deps/Dependency.cs b/Source/Tools/Flax.Build/Deps/Dependency.cs index 5f515d33c..848f2a2a9 100644 --- a/Source/Tools/Flax.Build/Deps/Dependency.cs +++ b/Source/Tools/Flax.Build/Deps/Dependency.cs @@ -62,6 +62,15 @@ namespace Flax.Deps /// The options. public abstract void Build(BuildOptions options); + /// + /// Logs build process start. + /// + /// Target platform. + protected void BuildStarted(TargetPlatform platform) + { + Log.Info($"Building {GetType().Name} for {platform}"); + } + /// /// Gets the dependency third-party packages binaries folder. /// From 51f30958cd04aa678116b73253b5d4d7ee0cd29c Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 24 Jul 2024 23:29:38 +0200 Subject: [PATCH 246/292] Revert "Add `timeBeginPeriod(1)` on `Win32` platforms to improve timer precision" This reverts commit dbda31d570acc73a23ff16748ba7ace2d7c9334d. --- Source/Engine/Platform/Win32/Win32Platform.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/Source/Engine/Platform/Win32/Win32Platform.cpp b/Source/Engine/Platform/Win32/Win32Platform.cpp index bbc8c4ba1..a9aa784c2 100644 --- a/Source/Engine/Platform/Win32/Win32Platform.cpp +++ b/Source/Engine/Platform/Win32/Win32Platform.cpp @@ -18,7 +18,6 @@ #include #include #include -#include #pragma comment(lib, "Iphlpapi.lib") static_assert(sizeof(int32) == sizeof(long), "Invalid long size for Interlocked and Atomic operations in Win32Platform."); @@ -67,7 +66,6 @@ bool Win32Platform::Init() return true; // Init timing - timeBeginPeriod(1); LARGE_INTEGER frequency; const auto freqResult = QueryPerformanceFrequency(&frequency); ASSERT(freqResult && frequency.QuadPart > 0); @@ -226,7 +224,6 @@ bool Win32Platform::Init() void Win32Platform::Exit() { WSACleanup(); - timeEndPeriod(1); } void Win32Platform::MemoryBarrier() From f21b259376069add6034ca44d6bd761192ff746a Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 25 Jul 2024 08:34:51 +0200 Subject: [PATCH 247/292] Various fixes after merge with Windows ARM pr --- .../Cooker/Platform/GDK/GDKPlatformTools.cpp | 2 +- .../DirectX/DX12/GraphicsDeviceDX12.Build.cs | 2 +- Source/Engine/Video/MF/VideoBackendMF.cpp | 2 +- Source/Shaders/Noise.hlsl | 2 +- Source/ThirdParty/ogg/config_types.h | 26 +++++++++++++++++++ .../Flax.Build/Platforms/GDK/GDKToolchain.cs | 3 ++- 6 files changed, 32 insertions(+), 5 deletions(-) create mode 100644 Source/ThirdParty/ogg/config_types.h diff --git a/Source/Editor/Cooker/Platform/GDK/GDKPlatformTools.cpp b/Source/Editor/Cooker/Platform/GDK/GDKPlatformTools.cpp index b68bc1444..a78a514b4 100644 --- a/Source/Editor/Cooker/Platform/GDK/GDKPlatformTools.cpp +++ b/Source/Editor/Cooker/Platform/GDK/GDKPlatformTools.cpp @@ -146,7 +146,7 @@ bool GDKPlatformTools::OnPostProcess(CookingData& data, GDKPlatformSettings* pla sb.Append(TEXT(" \n")); sb.AppendFormat(TEXT(" ProductName); sb.AppendFormat(TEXT(" PublisherDisplayName=\"{0}\"\n"), platformSettings->PublisherDisplayName.HasChars() ? platformSettings->PublisherDisplayName : gameSettings->CompanyName); - sb.AppendFormat(TEXT(" BackgroundColor=\"#{0}\"\n"), platformSettings->BackgroundColor.ToHexString()); + sb.AppendFormat(TEXT(" BackgroundColor=\"#{0}\"\n"), platformSettings->BackgroundColor.ToHexString().Left(6)); sb.AppendFormat(TEXT(" ForegroundText=\"{0}\"\n"), platformSettings->ForegroundText); sb.Append(TEXT(" Square150x150Logo=\"Assets\\Square150x150Logo.png\"\n")); sb.Append(TEXT(" Square480x480Logo=\"Assets\\Square480x480Logo.png\"\n")); diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GraphicsDeviceDX12.Build.cs b/Source/Engine/GraphicsDevice/DirectX/DX12/GraphicsDeviceDX12.Build.cs index d9cba37ec..e27b65c35 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GraphicsDeviceDX12.Build.cs +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GraphicsDeviceDX12.Build.cs @@ -34,7 +34,7 @@ public class GraphicsDeviceDX12 : GraphicsDeviceBaseModule break; } - if (EnableWinPixEventRuntime && options.Configuration != TargetConfiguration.Release) + if (EnableWinPixEventRuntime && options.Configuration != TargetConfiguration.Release && options.Platform.Target == TargetPlatform.Windows) { options.PrivateDefinitions.Add("USE_PIX"); options.PrivateIncludePaths.Add(Path.Combine(Globals.EngineRoot, "Source/ThirdParty/WinPixEventRuntime")); diff --git a/Source/Engine/Video/MF/VideoBackendMF.cpp b/Source/Engine/Video/MF/VideoBackendMF.cpp index 08676e6d8..727a94158 100644 --- a/Source/Engine/Video/MF/VideoBackendMF.cpp +++ b/Source/Engine/Video/MF/VideoBackendMF.cpp @@ -16,7 +16,7 @@ // Fix compilation for Windows 8.1 on the latest Windows SDK typedef enum _MFVideoSphericalFormat { } MFVideoSphericalFormat; #endif -#ifndef MF_SOURCE_READER_CURRENT_TYPE_INDEX +#if !defined(MF_SOURCE_READER_CURRENT_TYPE_INDEX) && !defined(PLATFORM_GDK) // Fix compilation for Windows 7 on the latest Windows SDK #define MF_SOURCE_READER_CURRENT_TYPE_INDEX 0xFFFFFFFF #endif diff --git a/Source/Shaders/Noise.hlsl b/Source/Shaders/Noise.hlsl index 8ba32c34c..59c810d78 100644 --- a/Source/Shaders/Noise.hlsl +++ b/Source/Shaders/Noise.hlsl @@ -85,7 +85,7 @@ float3 rand3dTo3d(float3 value) return float3( rand3dTo1d(value, float3(12.989, 78.233, 37.719)), rand3dTo1d(value, float3(39.346, 11.135, 83.155)), - rand3dTo1d(value, float3(73.156, 52.235, 09.151)) + rand3dTo1d(value, float3(73.156, 52.235, 9.151)) ); } diff --git a/Source/ThirdParty/ogg/config_types.h b/Source/ThirdParty/ogg/config_types.h new file mode 100644 index 000000000..f586c26a0 --- /dev/null +++ b/Source/ThirdParty/ogg/config_types.h @@ -0,0 +1,26 @@ +#ifndef __CONFIG_TYPES_H__ +#define __CONFIG_TYPES_H__ + +/* these are filled in by configure */ +#define INCLUDE_INTTYPES_H 1 +#define INCLUDE_STDINT_H 1 +#define INCLUDE_SYS_TYPES_H 1 + +#if INCLUDE_INTTYPES_H +# include +#endif +#if INCLUDE_STDINT_H +# include +#endif +#if INCLUDE_SYS_TYPES_H +# include +#endif + +typedef int16_t ogg_int16_t; +typedef uint16_t ogg_uint16_t; +typedef int32_t ogg_int32_t; +typedef uint32_t ogg_uint32_t; +typedef int64_t ogg_int64_t; +typedef uint64_t ogg_uint64_t; + +#endif diff --git a/Source/Tools/Flax.Build/Platforms/GDK/GDKToolchain.cs b/Source/Tools/Flax.Build/Platforms/GDK/GDKToolchain.cs index c8e0dff11..d8f9cf9dc 100644 --- a/Source/Tools/Flax.Build/Platforms/GDK/GDKToolchain.cs +++ b/Source/Tools/Flax.Build/Platforms/GDK/GDKToolchain.cs @@ -55,7 +55,8 @@ namespace Flax.Build.Platforms var paths = Directory.GetDirectories(redistToolsPath, name.Substring(0, 2) + "*"); if (paths.Length == 0) throw new Exception($"Failed to find MSVC redistribute binaries for toolset '{Toolset}' inside folder '{toolsPath}'"); - redistToolsPath = Path.Combine(paths[0], "x64", "Microsoft.VC" + (int)Toolset + ".CRT"); + var crtToolset = Toolset > WindowsPlatformToolset.v143 ? WindowsPlatformToolset.v143 : Toolset; + redistToolsPath = Path.Combine(paths[0], "x64", "Microsoft.VC" + (int)crtToolset + ".CRT"); redistToolsPath = Utilities.RemovePathRelativeParts(redistToolsPath); options.DependencyFiles.Add(Path.Combine(redistToolsPath, "concrt140.dll")); options.DependencyFiles.Add(Path.Combine(redistToolsPath, "msvcp140.dll")); From 52b00644c67fd636b04a315681d9f2b98173612c Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 5 Aug 2024 20:33:46 +0200 Subject: [PATCH 248/292] Update after merge with master --- Content/Editor/Camera/M_Camera.flax | 2 +- Content/Editor/CubeTexturePreviewMaterial.flax | 2 +- Content/Editor/DebugMaterials/DDGIDebugProbes.flax | 2 +- Content/Editor/DebugMaterials/SingleColor/Decal.flax | 2 +- Content/Editor/DebugMaterials/SingleColor/Particle.flax | 4 ++-- Content/Editor/DebugMaterials/SingleColor/Surface.flax | 2 +- .../Editor/DebugMaterials/SingleColor/SurfaceAdditive.flax | 4 ++-- Content/Editor/DebugMaterials/SingleColor/Terrain.flax | 2 +- Content/Editor/DefaultFontMaterial.flax | 2 +- Content/Editor/Gizmo/FoliageBrushMaterial.flax | 4 ++-- Content/Editor/Gizmo/Material.flax | 4 ++-- Content/Editor/Gizmo/MaterialWire.flax | 4 ++-- Content/Editor/Gizmo/SelectionOutlineMaterial.flax | 2 +- Content/Editor/Gizmo/VertexColorsPreviewMaterial.flax | 2 +- Content/Editor/Highlight Material.flax | 4 ++-- Content/Editor/Icons/IconsMaterial.flax | 4 ++-- Content/Editor/IesProfilePreviewMaterial.flax | 2 +- Content/Editor/MaterialTemplates/Features/SDFReflections.hlsl | 4 ++-- Content/Editor/Particles/Particle Material Color.flax | 4 ++-- Content/Editor/Particles/Smoke Material.flax | 4 ++-- Content/Editor/SpriteMaterial.flax | 2 +- Content/Editor/Terrain/Circle Brush Material.flax | 2 +- Content/Editor/Terrain/Highlight Terrain Material.flax | 2 +- Content/Editor/TexturePreviewMaterial.flax | 2 +- Content/Editor/Wires Debug Material.flax | 4 ++-- Content/Engine/DefaultDeformableMaterial.flax | 2 +- Content/Engine/DefaultMaterial.flax | 2 +- Content/Engine/DefaultRadialMenu.flax | 2 +- Content/Engine/DefaultTerrainMaterial.flax | 2 +- Content/Engine/SingleColorMaterial.flax | 2 +- Content/Engine/SkyboxMaterial.flax | 2 +- 31 files changed, 42 insertions(+), 42 deletions(-) diff --git a/Content/Editor/Camera/M_Camera.flax b/Content/Editor/Camera/M_Camera.flax index e49e91a7e..81921e0a9 100644 --- a/Content/Editor/Camera/M_Camera.flax +++ b/Content/Editor/Camera/M_Camera.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d2d30d16769e4159f2514fdd4914d9376b388905e5c3fd796693c5dfe8b94542 +oid sha256:2d2e306ad841a731dd9beced0fd653ff9649403e30545d5e31eed9b3f575513d size 28071 diff --git a/Content/Editor/CubeTexturePreviewMaterial.flax b/Content/Editor/CubeTexturePreviewMaterial.flax index d1f740849..b1dd13160 100644 --- a/Content/Editor/CubeTexturePreviewMaterial.flax +++ b/Content/Editor/CubeTexturePreviewMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:90892654cb922466dcb29a9dc8a395bf31b7c3aed5c85872d8cec04d2a9bd3f8 +oid sha256:9618fe8e4e7673a8fe9c51393c00ff8028fd48c5433b2982d8368832f919dbbb size 29786 diff --git a/Content/Editor/DebugMaterials/DDGIDebugProbes.flax b/Content/Editor/DebugMaterials/DDGIDebugProbes.flax index b5d7769f9..a1b73ca44 100644 --- a/Content/Editor/DebugMaterials/DDGIDebugProbes.flax +++ b/Content/Editor/DebugMaterials/DDGIDebugProbes.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b7a6c1ffe6855457f53a86155f4331d2d269a223c16dbd1b16f44e5a5d185561 +oid sha256:2016b2a5f524d0c5610539108b925c784c7d56eb18b050f78a0805150a90a5b2 size 39019 diff --git a/Content/Editor/DebugMaterials/SingleColor/Decal.flax b/Content/Editor/DebugMaterials/SingleColor/Decal.flax index 559e60182..04300ddb5 100644 --- a/Content/Editor/DebugMaterials/SingleColor/Decal.flax +++ b/Content/Editor/DebugMaterials/SingleColor/Decal.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ccf3359fde4c572e9e0c56af2b30c00bc1f15466df87ab3f20a6680c8bfe2f69 +oid sha256:ef3e97169279f1484bdb28d913587058d526172457878f86f8c54634c4c1b1cd size 7489 diff --git a/Content/Editor/DebugMaterials/SingleColor/Particle.flax b/Content/Editor/DebugMaterials/SingleColor/Particle.flax index b3ecb5c4d..99eb3da18 100644 --- a/Content/Editor/DebugMaterials/SingleColor/Particle.flax +++ b/Content/Editor/DebugMaterials/SingleColor/Particle.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:be34f63503926fcbf354ee89cc38e6a4b2003d90b07ffa538dd13243d9561176 -size 31664 +oid sha256:af65802e9b6437c3b3d1f0cbf0c320fde5d9774acea14bdf374a8b0dd28d5610 +size 32168 diff --git a/Content/Editor/DebugMaterials/SingleColor/Surface.flax b/Content/Editor/DebugMaterials/SingleColor/Surface.flax index 216a9ab49..569144f73 100644 --- a/Content/Editor/DebugMaterials/SingleColor/Surface.flax +++ b/Content/Editor/DebugMaterials/SingleColor/Surface.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9ce8b9f2d3fccb2c7d2c48943953820f3ee2f89e84fd7b2631c4504d880372ef +oid sha256:e26ff8a13ba0613915fa52ede6620a922e97d8e1f1bed9fae23b446178801929 size 27967 diff --git a/Content/Editor/DebugMaterials/SingleColor/SurfaceAdditive.flax b/Content/Editor/DebugMaterials/SingleColor/SurfaceAdditive.flax index 8f68e8d88..04eb52e4c 100644 --- a/Content/Editor/DebugMaterials/SingleColor/SurfaceAdditive.flax +++ b/Content/Editor/DebugMaterials/SingleColor/SurfaceAdditive.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:33bb400d3d9fa3da0c098207204b820da8e42512fb9cea966d8eaf5533fa07f2 -size 29648 +oid sha256:74078179b7b49c50718135028852415829c2202a2f45e057407f889c0e638047 +size 30152 diff --git a/Content/Editor/DebugMaterials/SingleColor/Terrain.flax b/Content/Editor/DebugMaterials/SingleColor/Terrain.flax index 932bfd595..2625a8a6b 100644 --- a/Content/Editor/DebugMaterials/SingleColor/Terrain.flax +++ b/Content/Editor/DebugMaterials/SingleColor/Terrain.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:82b8dd134ea46c5dab554de9a5913e8642fa94979b5ff45c08167bd6de91365d +oid sha256:07d8d36958fc592217a7999b6864bb9885e0a27d718d36d9216c16701fa124bc size 21314 diff --git a/Content/Editor/DefaultFontMaterial.flax b/Content/Editor/DefaultFontMaterial.flax index e6de70e78..3c9304b8e 100644 --- a/Content/Editor/DefaultFontMaterial.flax +++ b/Content/Editor/DefaultFontMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f50d6604365ddf117f9dee2168f9d412ccff9c78ba04a943c24d710306770600 +oid sha256:83a576dd776ba57c7d8ee5947438328acde6fab63442787d8411ee5377db0920 size 28146 diff --git a/Content/Editor/Gizmo/FoliageBrushMaterial.flax b/Content/Editor/Gizmo/FoliageBrushMaterial.flax index 16cb82140..bcc7272d5 100644 --- a/Content/Editor/Gizmo/FoliageBrushMaterial.flax +++ b/Content/Editor/Gizmo/FoliageBrushMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ffde790bdd39a3931c5154aa5588752a2cbc8f916b7c7bab490bb174427b5bf2 -size 35675 +oid sha256:59006b776f59d1445758fd5e04892583d85506bf6b7312d15613d79975886932 +size 36179 diff --git a/Content/Editor/Gizmo/Material.flax b/Content/Editor/Gizmo/Material.flax index 830bf4e46..87e7c078d 100644 --- a/Content/Editor/Gizmo/Material.flax +++ b/Content/Editor/Gizmo/Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:22265e90fac50f82a7f719ecf249bf6ac3cc612dd77fd929515347ba76b07456 -size 30252 +oid sha256:ace1f3a9b078208da9a8742e2a7de74b10509abb3dcf12ddc70e4da918bbb756 +size 30756 diff --git a/Content/Editor/Gizmo/MaterialWire.flax b/Content/Editor/Gizmo/MaterialWire.flax index b49737868..0f93a951d 100644 --- a/Content/Editor/Gizmo/MaterialWire.flax +++ b/Content/Editor/Gizmo/MaterialWire.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:de718dbc549d8777bac2c66f018c4780770bce5558701e7db81ef0b3d3ebaee1 -size 29390 +oid sha256:7fa1233fa8f24ad42190b127cb4fc9aeb5832e188e27151f95a88614881463ee +size 29894 diff --git a/Content/Editor/Gizmo/SelectionOutlineMaterial.flax b/Content/Editor/Gizmo/SelectionOutlineMaterial.flax index 2cd9ae1a8..5cde31ea7 100644 --- a/Content/Editor/Gizmo/SelectionOutlineMaterial.flax +++ b/Content/Editor/Gizmo/SelectionOutlineMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f60b3eb8f6d717b4a09588449acd8cb2b23fbe3ff3dd9c8664048ab385b50db1 +oid sha256:c53438a299dbf8d0eced6a47477470e0263ccbe40acbaa62a5f684eea0e0f112 size 16166 diff --git a/Content/Editor/Gizmo/VertexColorsPreviewMaterial.flax b/Content/Editor/Gizmo/VertexColorsPreviewMaterial.flax index 61679cd0e..d7e89b477 100644 --- a/Content/Editor/Gizmo/VertexColorsPreviewMaterial.flax +++ b/Content/Editor/Gizmo/VertexColorsPreviewMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:22a87367d53ded86ecc91579229b66a8c70bce3e9f94f89c963a6874a251cf38 +oid sha256:9c8a8eda823e4cf72cc8889a4f448d1d25c0564131ade142c552e3dba0dc36a8 size 29080 diff --git a/Content/Editor/Highlight Material.flax b/Content/Editor/Highlight Material.flax index 038c25b2b..6a8c10a52 100644 --- a/Content/Editor/Highlight Material.flax +++ b/Content/Editor/Highlight Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:491932ca0051c107c459a8b4ebcbbef4d635dc00aac361085be7615da02f8961 -size 28045 +oid sha256:0da04f087f99fdb90a3bb4baac3400761048953d659964d108513bfa4303084a +size 28549 diff --git a/Content/Editor/Icons/IconsMaterial.flax b/Content/Editor/Icons/IconsMaterial.flax index e57e7ec56..d5d64147e 100644 --- a/Content/Editor/Icons/IconsMaterial.flax +++ b/Content/Editor/Icons/IconsMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:323125b078ad9429c2696ec5e1407dfecca1164744e0633d429d8d2420978625 -size 27973 +oid sha256:6733e421934f43bb8149e78b3be2b28fd14920e0580ea6333aee698cd3bbc303 +size 28477 diff --git a/Content/Editor/IesProfilePreviewMaterial.flax b/Content/Editor/IesProfilePreviewMaterial.flax index db4029597..fc0d20df9 100644 --- a/Content/Editor/IesProfilePreviewMaterial.flax +++ b/Content/Editor/IesProfilePreviewMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:83e4ae5c290b1ab5490e73266489cbed02b22aa2c00b31a7bfad4bac3f0f37db +oid sha256:9c473ea456efdce9c3e916bd95c6c286a13646e413aa17f1569f7bd51c811358 size 18205 diff --git a/Content/Editor/MaterialTemplates/Features/SDFReflections.hlsl b/Content/Editor/MaterialTemplates/Features/SDFReflections.hlsl index 34201f546..193d845fc 100644 --- a/Content/Editor/MaterialTemplates/Features/SDFReflections.hlsl +++ b/Content/Editor/MaterialTemplates/Features/SDFReflections.hlsl @@ -9,8 +9,8 @@ GlobalSDFData GlobalSDF; GlobalSurfaceAtlasData GlobalSurfaceAtlas; @3// SDF Reflections: Resources -Texture3D GlobalSDFTex : register(t__SRV__); -Texture3D GlobalSDFMip : register(t__SRV__); +Texture3D GlobalSDFTex : register(t__SRV__); +Texture3D GlobalSDFMip : register(t__SRV__); ByteAddressBuffer GlobalSurfaceAtlasChunks : register(t__SRV__); ByteAddressBuffer RWGlobalSurfaceAtlasCulledObjects : register(t__SRV__); Buffer GlobalSurfaceAtlasObjects : register(t__SRV__); diff --git a/Content/Editor/Particles/Particle Material Color.flax b/Content/Editor/Particles/Particle Material Color.flax index 7bfa49c05..45b6918dd 100644 --- a/Content/Editor/Particles/Particle Material Color.flax +++ b/Content/Editor/Particles/Particle Material Color.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:84baddd25a044b1ca4079784d50a48fafbb119f95321d2e0ca7f650fd7ef8899 -size 29903 +oid sha256:c76a3d52b95cd64d95f3db343b05aec2a66c59f0317fb3967c50240db8af0e22 +size 30407 diff --git a/Content/Editor/Particles/Smoke Material.flax b/Content/Editor/Particles/Smoke Material.flax index 4700cd7ac..c32380913 100644 --- a/Content/Editor/Particles/Smoke Material.flax +++ b/Content/Editor/Particles/Smoke Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e98dd48bd4eae56eba17d3866102c7416227617b6d98ae8963f6567334dd3c58 -size 38663 +oid sha256:94d4ec85842448736c087594f30df7a5f6fe33a16c2cc59781f36adebdbc0b9a +size 39167 diff --git a/Content/Editor/SpriteMaterial.flax b/Content/Editor/SpriteMaterial.flax index ed5efae88..df5d7fd21 100644 --- a/Content/Editor/SpriteMaterial.flax +++ b/Content/Editor/SpriteMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9c45f25462dbc839ec36b2cc889f30d4b000eb948a7fcf8ea8387d6e9e3df463 +oid sha256:6e76b14ab9f0e53b0d50e24b44a7d3406f156bcdca64f154f1c1d92f8efaa6cf size 29159 diff --git a/Content/Editor/Terrain/Circle Brush Material.flax b/Content/Editor/Terrain/Circle Brush Material.flax index 0853dacd5..6ed469392 100644 --- a/Content/Editor/Terrain/Circle Brush Material.flax +++ b/Content/Editor/Terrain/Circle Brush Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3421653f72b9fa3d77ded95f9b81a261cbf7bde386430c80e8f970289e466b13 +oid sha256:3399c6639c78aed32767e27cdf3c8d5866a51bab8867f3ebceb1ffe5b886debe size 27986 diff --git a/Content/Editor/Terrain/Highlight Terrain Material.flax b/Content/Editor/Terrain/Highlight Terrain Material.flax index 7937f3831..592a0c1f5 100644 --- a/Content/Editor/Terrain/Highlight Terrain Material.flax +++ b/Content/Editor/Terrain/Highlight Terrain Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7063c30e4ed481ec64ca97ada1d41e8c5c33bc2394f0dedd39abde64682755ba +oid sha256:578169c0df3168d3984677e5682f29e9694ebc284229a217ae43dfd275734e58 size 21367 diff --git a/Content/Editor/TexturePreviewMaterial.flax b/Content/Editor/TexturePreviewMaterial.flax index ec0c1fb12..5523b8376 100644 --- a/Content/Editor/TexturePreviewMaterial.flax +++ b/Content/Editor/TexturePreviewMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:49871fd76ba4cdf9ce3f886c3c543b881d564aa8e6bea048857dfcfc58204f2a +oid sha256:91c76d393572fbcd535461d3815f1e0e475007792a7536f20dc5fa50ecb3c179 size 10570 diff --git a/Content/Editor/Wires Debug Material.flax b/Content/Editor/Wires Debug Material.flax index 2cd18bf3f..aa5ddd243 100644 --- a/Content/Editor/Wires Debug Material.flax +++ b/Content/Editor/Wires Debug Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8612dbf449c251c37c4d37e44cc85363360e9333c7eeeef2dbf81079cc33aa6c -size 28045 +oid sha256:97c617375c8ca7ef511487b3d060e1f2a628c27388e23ad8f6fa862425d2a5b9 +size 28549 diff --git a/Content/Engine/DefaultDeformableMaterial.flax b/Content/Engine/DefaultDeformableMaterial.flax index ec235a1b0..ba318b106 100644 --- a/Content/Engine/DefaultDeformableMaterial.flax +++ b/Content/Engine/DefaultDeformableMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2055ede0f07b8d9ea45f5bb97649d76b24c2089ad3d423cdbc4be5d81c2dc7c7 +oid sha256:bb66093810d808a26084f552b5a5da5daa0d3a57f7ea41d7e202e4b4d09129c4 size 18514 diff --git a/Content/Engine/DefaultMaterial.flax b/Content/Engine/DefaultMaterial.flax index 1e0e75755..6038d06be 100644 --- a/Content/Engine/DefaultMaterial.flax +++ b/Content/Engine/DefaultMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:97b5fde5ad1202826e64b1fdedc6f44fa8677c499674218d76bec7789891610f +oid sha256:3207b95298f954ea187cce08b28db57ade1863f523626c10c3fd0c60ac8af42c size 29992 diff --git a/Content/Engine/DefaultRadialMenu.flax b/Content/Engine/DefaultRadialMenu.flax index d6f680162..582ef5cfe 100644 --- a/Content/Engine/DefaultRadialMenu.flax +++ b/Content/Engine/DefaultRadialMenu.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f3bd840512ae1daaacf26843ce47a8bfe289541df61ca11a05bd082de9395828 +oid sha256:29db27b56a03e53a04b10239d722ae69369d80d6acd7acfa090af833f7a2c584 size 20340 diff --git a/Content/Engine/DefaultTerrainMaterial.flax b/Content/Engine/DefaultTerrainMaterial.flax index 10395bca4..96bf55cda 100644 --- a/Content/Engine/DefaultTerrainMaterial.flax +++ b/Content/Engine/DefaultTerrainMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2f878f6542975aa2fa239bdc3cae061751d8a16908069ceb29b57efbd20256a9 +oid sha256:1bc64b044a3f7999e020717efd4ef14c5cf6bd2a251ef94b754bec1e3cc5a3da size 23451 diff --git a/Content/Engine/SingleColorMaterial.flax b/Content/Engine/SingleColorMaterial.flax index 0af9c8bd6..56a190cd4 100644 --- a/Content/Engine/SingleColorMaterial.flax +++ b/Content/Engine/SingleColorMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1f89a10ecbe30a949385392b3729b82a18991a6f82ea0e396df22ca13f58c300 +oid sha256:31eba5266d5b1354caea2d4d29528661a5d8b50fd38a3fb70c36fb83b2033c9e size 28168 diff --git a/Content/Engine/SkyboxMaterial.flax b/Content/Engine/SkyboxMaterial.flax index a3fb977b1..9a4d091f2 100644 --- a/Content/Engine/SkyboxMaterial.flax +++ b/Content/Engine/SkyboxMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4342fbcf46a15467209cff4d85a14eb955d9ae5954b58c18fa5ecdb5d571e05b +oid sha256:031971460da7490fdb2645a580003a9fbb99a403f10759285ccc36fc8b6a1c88 size 29366 From ffb760d8f3766d7303240c3b4645fad5bacb63d8 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 5 Aug 2024 22:43:00 +0200 Subject: [PATCH 249/292] Fix GPU particles issues with Global SDF far away from scene geometry --- ...rticleEmitterGraph.GPU.ParticleModules.cpp | 16 ++++----- Source/Shaders/GlobalSignDistanceField.hlsl | 36 +++++++++++++------ 2 files changed, 33 insertions(+), 19 deletions(-) diff --git a/Source/Engine/Particles/Graph/GPU/ParticleEmitterGraph.GPU.ParticleModules.cpp b/Source/Engine/Particles/Graph/GPU/ParticleEmitterGraph.GPU.ParticleModules.cpp index 04dbbbc32..bbf4ccdf7 100644 --- a/Source/Engine/Particles/Graph/GPU/ParticleEmitterGraph.GPU.ParticleModules.cpp +++ b/Source/Engine/Particles/Graph/GPU/ParticleEmitterGraph.GPU.ParticleModules.cpp @@ -626,8 +626,8 @@ void ParticleEmitterGPUGenerator::ProcessModule(Node* node) " // Position (Global SDF)\n" " float3 wsPos = {2};\n" " float dist;\n" - " float3 dir = -normalize(SampleGlobalSDFGradient({1}, {1}_Tex, wsPos, dist));\n" - " {0} += dir * dist;\n" + " float3 dir = -normalize(SampleGlobalSDFGradient({1}, {1}_Tex, {1}_Mip, wsPos, dist));\n" + " {0} += dist < GLOBAL_SDF_WORLD_SIZE ? dir * dist : float3(0, 0, 0);\n" " }}\n" ), position.Value, param.ShaderName, wsPos); break; @@ -892,7 +892,7 @@ void ParticleEmitterGPUGenerator::ProcessModule(Node* node) " {{\n" " // Conform to Global SDF\n" " float dist;\n" - " float3 dir = normalize(SampleGlobalSDFGradient({3}, {3}_Tex, {0}, dist));\n" + " float3 dir = normalize(SampleGlobalSDFGradient({3}, {3}_Tex, {3}_Mip, {0}, dist));\n" " if (dist > 0) dir *= -1;\n" " float distToSurface = abs(dist);\n" " float spdNormal = dot(dir, {1});\n" @@ -900,7 +900,7 @@ void ParticleEmitterGPUGenerator::ProcessModule(Node* node) " float tgtSpeed = {4} * ratio;\n" " float deltaSpeed = tgtSpeed - spdNormal;\n" " float3 deltaVelocity = dir * (sign(deltaSpeed) * min(abs(deltaSpeed), DeltaTime * lerp({7}, {5}, ratio)) / max({2}, PARTICLE_THRESHOLD));\n" - " {1} += deltaVelocity;\n" + " {1} += dist < GLOBAL_SDF_WORLD_SIZE ? deltaVelocity : 0.0f;\n" " }}\n" ), position.Value, velocity.Value, mass.Value, param.ShaderName, attractionSpeed.Value, attractionForce.Value, stickDistance.Value, stickForce.Value); break; @@ -917,11 +917,11 @@ void ParticleEmitterGPUGenerator::ProcessModule(Node* node) " // Collision (Global SDF)\n" " float3 nextPos = {0} + {1} * DeltaTime;\n" " nextPos = mul(float4(nextPos, 1), WorldMatrix).xyz;\n" - " float dist = SampleGlobalSDF({10}, {10}_Tex, nextPos);\n" + " float dist = SampleGlobalSDF({10}, {10}_Tex, {10}_Mip, nextPos);\n" " if (dist < {5})\n" " {{\n" " {0} = mul(float4({0}, 1), WorldMatrix).xyz;\n" - " float3 n = normalize(SampleGlobalSDFGradient({10}, {10}_Tex, {0}, dist));\n" + " float3 n = normalize(SampleGlobalSDFGradient({10}, {10}_Tex, {10}_Mip, {0}, dist));\n" " {0} += n * -dist;\n" " {0} = mul(float4({0}, 1), InvWorldMatrix).xyz;\n" COLLISION_LOGIC() @@ -931,10 +931,10 @@ void ParticleEmitterGPUGenerator::ProcessModule(Node* node) " {{\n" " // Collision (Global SDF)\n" " float3 nextPos = {0} + {1} * DeltaTime;\n" - " float dist = SampleGlobalSDF({10}, {10}_Tex, nextPos);\n" + " float dist = SampleGlobalSDF({10}, {10}_Tex, {10}_Mip, nextPos);\n" " if (dist < {5})\n" " {{\n" - " float3 n = normalize(SampleGlobalSDFGradient({10}, {10}_Tex, {0}, dist));\n" + " float3 n = normalize(SampleGlobalSDFGradient({10}, {10}_Tex, {10}_Mip, {0}, dist));\n" " {0} += n * -dist;\n" COLLISION_LOGIC() " }}\n" diff --git a/Source/Shaders/GlobalSignDistanceField.hlsl b/Source/Shaders/GlobalSignDistanceField.hlsl index 577b9ebca..b850109c1 100644 --- a/Source/Shaders/GlobalSignDistanceField.hlsl +++ b/Source/Shaders/GlobalSignDistanceField.hlsl @@ -155,13 +155,14 @@ float SampleGlobalSDF(const GlobalSDFData data, Texture3D tex, Text } // Samples the Global SDF and returns the gradient vector (derivative) at the given world location. Normalize it to get normal vector. -float3 SampleGlobalSDFGradient(const GlobalSDFData data, Texture3D tex, float3 worldPosition, out float distance) +float3 SampleGlobalSDFGradient(const GlobalSDFData data, Texture3D tex, float3 worldPosition, out float distance, uint startCascade = 0) { float3 gradient = float3(0, 0.00001f, 0); distance = GLOBAL_SDF_WORLD_SIZE; if (data.CascadePosDistance[3].w <= 0.0f) return gradient; - for (uint cascade = 0; cascade < data.CascadesCount; cascade++) + startCascade = min(startCascade, data.CascadesCount - 1); + for (uint cascade = startCascade; cascade < data.CascadesCount; cascade++) { float3 cascadeUV, textureUV; GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeUV, textureUV); @@ -205,19 +206,32 @@ float3 SampleGlobalSDFGradient(const GlobalSDFData data, Texture3D float distanceMip = mip.SampleLevel(SamplerLinearClamp, textureUV, 0) * maxDistanceMip; if (distanceMip < chunkSize && all(cascadeUV > 0) && all(cascadeUV < 1)) { - distance = distanceMip; float maxDistanceTex = data.CascadeMaxDistanceTex[cascade]; float distanceTex = tex.SampleLevel(SamplerLinearClamp, textureUV, 0) * maxDistanceTex; if (distanceTex < chunkMargin) + { distance = distanceTex; - float texelOffset = 1.0f / data.Resolution; - float xp = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x + texelOffset, textureUV.y, textureUV.z), 0).x; - float xn = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x - texelOffset, textureUV.y, textureUV.z), 0).x; - float yp = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y + texelOffset, textureUV.z), 0).x; - float yn = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y - texelOffset, textureUV.z), 0).x; - float zp = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y, textureUV.z + texelOffset), 0).x; - float zn = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y, textureUV.z - texelOffset), 0).x; - gradient = float3(xp - xn, yp - yn, zp - zn) * maxDistanceTex; + float texelOffset = 1.0f / data.Resolution; + float xp = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x + texelOffset, textureUV.y, textureUV.z), 0).x; + float xn = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x - texelOffset, textureUV.y, textureUV.z), 0).x; + float yp = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y + texelOffset, textureUV.z), 0).x; + float yn = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y - texelOffset, textureUV.z), 0).x; + float zp = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y, textureUV.z + texelOffset), 0).x; + float zn = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y, textureUV.z - texelOffset), 0).x; + gradient = float3(xp - xn, yp - yn, zp - zn) * maxDistanceTex; + } + else + { + distance = distanceMip; + float texelOffset = (float)GLOBAL_SDF_RASTERIZE_MIP_FACTOR / data.Resolution; + float xp = mip.SampleLevel(SamplerLinearClamp, float3(textureUV.x + texelOffset, textureUV.y, textureUV.z), 0).x; + float xn = mip.SampleLevel(SamplerLinearClamp, float3(textureUV.x - texelOffset, textureUV.y, textureUV.z), 0).x; + float yp = mip.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y + texelOffset, textureUV.z), 0).x; + float yn = mip.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y - texelOffset, textureUV.z), 0).x; + float zp = mip.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y, textureUV.z + texelOffset), 0).x; + float zn = mip.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y, textureUV.z - texelOffset), 0).x; + gradient = float3(xp - xn, yp - yn, zp - zn) * maxDistanceMip; + } break; } } From 15d6f9861c8c926f3a4b677d146e9e5c16f1a589 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 7 Aug 2024 17:14:06 +0200 Subject: [PATCH 250/292] Fix shadow atlas resizing bug --- Source/Engine/Renderer/ShadowsPass.cpp | 7 ++++--- Source/Engine/Utilities/RectPack.h | 12 ++++++++++++ 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/Source/Engine/Renderer/ShadowsPass.cpp b/Source/Engine/Renderer/ShadowsPass.cpp index 68f26d05e..55293a1df 100644 --- a/Source/Engine/Renderer/ShadowsPass.cpp +++ b/Source/Engine/Renderer/ShadowsPass.cpp @@ -341,10 +341,8 @@ public: void Reset() { Lights.Clear(); - StaticAtlasPixelsUsed = 0; - StaticAtlas.Clear(); ClearDynamic(); - ViewOrigin = Vector3::Zero; + ClearStatic(); } void InitStaticAtlas() @@ -1102,6 +1100,8 @@ void ShadowsPass::SetupShadows(RenderContext& renderContext, RenderContextBatch& if (shadows.Resolution != atlasResolution) { shadows.Reset(); + shadows.Atlas.Reset(); + shadows.StaticAtlas.Reset(); auto desc = GPUTextureDescription::New2D(atlasResolution, atlasResolution, _shadowMapFormat, GPUTextureFlags::ShaderResource | GPUTextureFlags::DepthStencil); if (shadows.ShadowMapAtlas->Init(desc)) { @@ -1110,6 +1110,7 @@ void ShadowsPass::SetupShadows(RenderContext& renderContext, RenderContextBatch& } shadows.ClearShadowMapAtlas = true; shadows.Resolution = atlasResolution; + shadows.ViewOrigin = renderContext.View.Origin; } if (renderContext.View.Origin != shadows.ViewOrigin) { diff --git a/Source/Engine/Utilities/RectPack.h b/Source/Engine/Utilities/RectPack.h index 8217effe4..38a163a3d 100644 --- a/Source/Engine/Utilities/RectPack.h +++ b/Source/Engine/Utilities/RectPack.h @@ -134,6 +134,18 @@ public: Init(Width, Height, BordersPadding); } + /// + /// Clears and resets atlas back to the initial state. + /// + void Reset() + { + Width = 0; + Height = 0; + BordersPadding = 0; + Nodes.Clear(); + FreeNodes.Clear(); + } + /// /// Tries to insert a node into the atlas using rectangle pack algorithm. /// From 28ce224caa5fd6556253352584eb564770f10a4c Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 7 Aug 2024 17:14:31 +0200 Subject: [PATCH 251/292] Reduce shadow atlas fragmentation to favor bigger tile sizes --- Source/Engine/Renderer/ShadowsPass.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Source/Engine/Renderer/ShadowsPass.cpp b/Source/Engine/Renderer/ShadowsPass.cpp index 55293a1df..9b65e9c3e 100644 --- a/Source/Engine/Renderer/ShadowsPass.cpp +++ b/Source/Engine/Renderer/ShadowsPass.cpp @@ -22,7 +22,7 @@ #define SHADOWS_POSITION_ERROR METERS_TO_UNITS(0.1f) #define SHADOWS_ROTATION_ERROR 0.9999f #define SHADOWS_MAX_TILES 6 -#define SHADOWS_MIN_RESOLUTION 16 +#define SHADOWS_MIN_RESOLUTION 32 #define SHADOWS_MAX_STATIC_ATLAS_CAPACITY_TO_DEFRAG 0.7f #define SHADOWS_BASE_LIGHT_RESOLUTION(atlasResolution) atlasResolution / MAX_CSM_CASCADES // Allow to store 4 CSM cascades in a single row in all cases #define NormalOffsetScaleTweak METERS_TO_UNITS(1) @@ -55,11 +55,11 @@ struct ShadowsAtlasRectTile : RectPackNode uint16 QuantizeResolution(float input) { uint16 output = Math::FloorToInt(input); - uint16 alignment = 16; + uint16 alignment = 32; if (output >= 512) - alignment = 64; + alignment = 128; else if (output >= 256) - alignment = 32; + alignment = 64; output = Math::AlignDown(output, alignment); return output; } From 1c02f3d8fe0e07386bbf826e090bd1e6bf52e6ed Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 7 Aug 2024 17:44:47 +0200 Subject: [PATCH 252/292] Add text alignment options to Button --- Source/Engine/UI/GUI/Common/Button.cs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/Source/Engine/UI/GUI/Common/Button.cs b/Source/Engine/UI/GUI/Common/Button.cs index ff0b776f9..aa4c182cb 100644 --- a/Source/Engine/UI/GUI/Common/Button.cs +++ b/Source/Engine/UI/GUI/Common/Button.cs @@ -62,6 +62,18 @@ namespace FlaxEngine.GUI [EditorDisplay("Text Style"), EditorOrder(2020)] public Color TextColor; + /// + /// Gets or sets the horizontal text alignment within the control bounds. + /// + [EditorDisplay("Text Style"), EditorOrder(2027)] + public TextAlignment HorizontalAlignment { get; set; } = TextAlignment.Center; + + /// + /// Gets or sets the vertical text alignment within the control bounds. + /// + [EditorDisplay("Text Style"), EditorOrder(2028)] + public TextAlignment VerticalAlignment { get; set; } = TextAlignment.Center; + /// /// Gets or sets the brush used for background drawing. /// @@ -262,7 +274,7 @@ namespace FlaxEngine.GUI Render2D.DrawRectangle(clientRect, borderColor, BorderThickness); // Draw text - Render2D.DrawText(_font?.GetFont(), TextMaterial, _text, clientRect, textColor, TextAlignment.Center, TextAlignment.Center); + Render2D.DrawText(_font?.GetFont(), TextMaterial, _text, clientRect, textColor, HorizontalAlignment, VerticalAlignment); } /// From 6e01cca9adcef933017290bed1b008dca28ae87e Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 7 Aug 2024 17:46:30 +0200 Subject: [PATCH 253/292] Add `TextFormat`, `SelectedItemChanged`, and text alignment options to Dropdown --- Source/Engine/UI/GUI/Common/Dropdown.cs | 53 +++++++++++++++++++------ 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/Source/Engine/UI/GUI/Common/Dropdown.cs b/Source/Engine/UI/GUI/Common/Dropdown.cs index 1dbac95ab..f300015f9 100644 --- a/Source/Engine/UI/GUI/Common/Dropdown.cs +++ b/Source/Engine/UI/GUI/Common/Dropdown.cs @@ -207,7 +207,7 @@ namespace FlaxEngine.GUI /// /// Gets or sets the items collection. /// - [EditorOrder(1), Tooltip("The items collection.")] + [EditorOrder(1)] public List Items { get => _items; @@ -220,7 +220,7 @@ namespace FlaxEngine.GUI [HideInEditor, NoSerialize] public string SelectedItem { - get => _selectedIndex != -1 ? _items[_selectedIndex].ToString() : string.Empty; + get => _selectedIndex > -1 && _selectedIndex < _items.Count ? _items[_selectedIndex].ToString() : string.Empty; set => SelectedIndex = _items.IndexOf(value); } @@ -230,7 +230,7 @@ namespace FlaxEngine.GUI [HideInEditor, NoSerialize] public LocalizedString SelectedItemLocalized { - get => _selectedIndex != -1 ? _items[_selectedIndex] : LocalizedString.Empty; + get => _selectedIndex > -1 && _selectedIndex < _items.Count ? _items[_selectedIndex] : LocalizedString.Empty; set => SelectedIndex = _items.IndexOf(value); } @@ -253,17 +253,22 @@ namespace FlaxEngine.GUI } /// - /// Gets or sets whether to show all of the items. + /// Gets or sets whether to show all the items in the dropdown. /// - [EditorOrder(3), Tooltip("Whether to show all of the items in the drop down.")] + [EditorOrder(3)] public bool ShowAllItems { get; set; } = true; /// /// Gets or sets the maximum number of items to show at once. Only used if ShowAllItems is false. /// - [EditorOrder(4), VisibleIf(nameof(ShowAllItems), true), Limit(1), Tooltip("The number of items to show in the drop down.")] + [EditorOrder(4), VisibleIf(nameof(ShowAllItems), true), Limit(1)] public int ShowMaxItemsCount { get; set; } = 5; + /// + /// Event fired when selected item gets changed. + /// + public event Action SelectedItemChanged; + /// /// Event fired when selected index gets changed. /// @@ -277,21 +282,39 @@ namespace FlaxEngine.GUI /// /// Gets or sets the font used to draw text. /// - [EditorDisplay("Text Style"), EditorOrder(2021)] + [EditorDisplay("Text Style"), EditorOrder(2020)] public FontReference Font { get; set; } /// /// Gets or sets the custom material used to render the text. It must has domain set to GUI and have a public texture parameter named Font used to sample font atlas texture with font characters data. /// - [EditorDisplay("Text Style"), EditorOrder(2022), Tooltip("Custom material used to render the text. It must has domain set to GUI and have a public texture parameter named Font used to sample font atlas texture with font characters data.")] + [EditorDisplay("Text Style"), EditorOrder(2021)] public MaterialBase FontMaterial { get; set; } + /// + /// Gets or sets the custom text format for selected item displaying. Can be used to prefix or/and postfix actual selected value within the drowpdown control text where '{0}' is used to insert selected value text. Example: 'Selected: {0}'. Leave empty if unussed. + /// + [EditorDisplay("Text Style"), EditorOrder(2022)] + public LocalizedString TextFormat { get; set; } + /// /// Gets or sets the color of the text. /// - [EditorDisplay("Text Style"), EditorOrder(2020), ExpandGroups] + [EditorDisplay("Text Style"), EditorOrder(2023), ExpandGroups] public Color TextColor { get; set; } + /// + /// Gets or sets the horizontal text alignment within the control bounds. + /// + [EditorDisplay("Text Style"), EditorOrder(2027)] + public TextAlignment HorizontalAlignment { get; set; } = TextAlignment.Near; + + /// + /// Gets or sets the vertical text alignment within the control bounds. + /// + [EditorDisplay("Text Style"), EditorOrder(2028)] + public TextAlignment VerticalAlignment { get; set; } = TextAlignment.Center; + /// /// Gets or sets the color of the border. /// @@ -319,7 +342,7 @@ namespace FlaxEngine.GUI /// /// Gets or sets the border color when dropdown is highlighted. /// - [EditorDisplay("Border Style"), EditorOrder(2011)] + [EditorDisplay("Border Style"), EditorOrder(2013)] public Color BorderColorHighlighted { get; set; } /// @@ -420,6 +443,7 @@ namespace FlaxEngine.GUI protected virtual void OnSelectedIndexChanged() { SelectedIndexChanged?.Invoke(this); + SelectedItemChanged?.Invoke(); } /// @@ -506,7 +530,8 @@ namespace FlaxEngine.GUI Font = Font, TextColor = Color.White * 0.9f, TextColorHighlighted = Color.White, - HorizontalAlignment = TextAlignment.Near, + HorizontalAlignment = HorizontalAlignment, + VerticalAlignment = VerticalAlignment, Text = _items[i], Parent = item, Tag = i, @@ -681,7 +706,11 @@ namespace FlaxEngine.GUI var textRect = new Rectangle(margin, 0, clientRect.Width - boxSize - 2.0f * margin, clientRect.Height); Render2D.PushClip(textRect); var textColor = TextColor; - Render2D.DrawText(Font.GetFont(), FontMaterial, _items[_selectedIndex], textRect, enabled ? textColor : textColor * 0.5f, TextAlignment.Near, TextAlignment.Center); + string text = _items[_selectedIndex]; + string format = TextFormat != null ? TextFormat : null; + if (!string.IsNullOrEmpty(format)) + text = string.Format(format, text); + Render2D.DrawText(Font.GetFont(), FontMaterial, text, textRect, enabled ? textColor : textColor * 0.5f, HorizontalAlignment, VerticalAlignment); Render2D.PopClip(); } From 143d9bcce6f4abe68e3b1e78505727d4485d2507 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 7 Aug 2024 17:50:23 +0200 Subject: [PATCH 254/292] Fix border placement in `Dropdown` control --- Source/Engine/UI/GUI/Common/Dropdown.cs | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/Source/Engine/UI/GUI/Common/Dropdown.cs b/Source/Engine/UI/GUI/Common/Dropdown.cs index f300015f9..e1dc1fb24 100644 --- a/Source/Engine/UI/GUI/Common/Dropdown.cs +++ b/Source/Engine/UI/GUI/Common/Dropdown.cs @@ -23,7 +23,7 @@ namespace FlaxEngine.GUI /// Occurs when popup lost focus. /// public Action LostFocus; - + /// /// The selected control. Used to scroll to the control on popup creation. /// @@ -38,7 +38,7 @@ namespace FlaxEngine.GUI public override void OnEndContainsFocus() { base.OnEndContainsFocus(); - + // Dont lose focus when using panel. Does prevent LostFocus even from being called if clicking inside of the panel. if (MainPanel != null && MainPanel.IsMouseOver && !MainPanel.ContainsFocus) { @@ -489,12 +489,14 @@ namespace FlaxEngine.GUI BorderColor = BorderColorHighlighted, Width = 4.0f, AnchorPreset = AnchorPresets.StretchAll, + Offsets = Margin.Zero, Parent = popup, }; var itemsHeight = 20.0f; var itemsMargin = 20.0f; - // Scale height and margive with text height if needed + + // Scale height and margin with text height if needed var textHeight = Font.GetFont().Height; if (textHeight > itemsHeight) { @@ -509,6 +511,7 @@ namespace FlaxEngine.GUI itemsWidth = Mathf.Max(itemsWidth, itemsMargin + 4 + font.MeasureText(_items[i]).X); } */ + var itemsWidth = Width; var height = container.Margin.Height; @@ -612,14 +615,14 @@ namespace FlaxEngine.GUI { // Find canvas scalar and set as root if it exists. ContainerControl c = Parent; - while(c.Parent != Root && c.Parent != null) + while (c.Parent != Root && c.Parent != null) { c = c.Parent; if (c is CanvasScaler scalar) break; } var root = c is CanvasScaler ? c : Root; - + if (_items.Count == 0 || root == null) return; @@ -770,14 +773,14 @@ namespace FlaxEngine.GUI { if (base.OnMouseDoubleClick(location, button)) return true; - + if (_touchDown && button == MouseButton.Left) { _touchDown = false; ShowPopup(); return true; } - + if (button == MouseButton.Left) { _touchDown = true; From 5ae2196a01812201c7a99a9e274beffec5d030db Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 7 Aug 2024 22:44:38 +0200 Subject: [PATCH 255/292] Add support for customizing `Dropdown` popup visuals --- Source/Engine/UI/GUI/Common/Dropdown.cs | 176 ++++++++++++++---------- 1 file changed, 107 insertions(+), 69 deletions(-) diff --git a/Source/Engine/UI/GUI/Common/Dropdown.cs b/Source/Engine/UI/GUI/Common/Dropdown.cs index e1dc1fb24..e23bca199 100644 --- a/Source/Engine/UI/GUI/Common/Dropdown.cs +++ b/Source/Engine/UI/GUI/Common/Dropdown.cs @@ -27,7 +27,7 @@ namespace FlaxEngine.GUI /// /// The selected control. Used to scroll to the control on popup creation. /// - public ContainerControl SelectedControl = null; + public Control SelectedControl = null; /// /// The main panel used to hold the items. @@ -460,37 +460,22 @@ namespace FlaxEngine.GUI /// protected virtual DropdownRoot CreatePopup() { - // TODO: support using templates for the items collection container panel + // Create popup + var popup = CreatePopupRoot(); + if (popup == null) + throw new NullReferenceException("Missing popup."); + if (popup.MainPanel == null) + throw new NullReferenceException("Missing popup MainPanel."); + CreatePopupBackground(popup); - var popup = new DropdownRoot(); - - // TODO: support item templates - - var panel = new Panel - { - AnchorPreset = AnchorPresets.StretchAll, - BackgroundColor = BackgroundColor, - ScrollBars = ScrollBars.Vertical, - AutoFocus = true, - Parent = popup, - }; - popup.MainPanel = panel; - - var container = new VerticalPanel + // Create items container + var itemContainer = new VerticalPanel { AnchorPreset = AnchorPresets.StretchAll, BackgroundColor = Color.Transparent, IsScrollable = true, AutoSize = true, - Parent = panel, - }; - var border = new Border - { - BorderColor = BorderColorHighlighted, - Width = 4.0f, - AnchorPreset = AnchorPresets.StretchAll, - Offsets = Margin.Zero, - Parent = popup, + Parent = popup.MainPanel, }; var itemsHeight = 20.0f; @@ -513,69 +498,122 @@ namespace FlaxEngine.GUI */ var itemsWidth = Width; - var height = container.Margin.Height; + var height = itemContainer.Margin.Height; for (int i = 0; i < _items.Count; i++) { - var item = new ContainerControl - { - AutoFocus = false, - Height = itemsHeight, - Width = itemsWidth, - Parent = container, - }; - - var label = new DropdownLabel - { - AutoFocus = true, - X = itemsMargin, - Size = new Float2(itemsWidth - itemsMargin, itemsHeight), - Font = Font, - TextColor = Color.White * 0.9f, - TextColorHighlighted = Color.White, - HorizontalAlignment = HorizontalAlignment, - VerticalAlignment = VerticalAlignment, - Text = _items[i], - Parent = item, - Tag = i, - }; - label.ItemClicked += c => - { - OnItemClicked((int)c.Tag); - DestroyPopup(); - }; + var item = CreatePopupItem(i, new Float2(itemsWidth, itemsHeight), itemsMargin); + item.Parent = itemContainer; height += itemsHeight; if (i != 0) - height += container.Spacing; - + height += itemContainer.Spacing; if (_selectedIndex == i) - { - var icon = new Image - { - Brush = CheckedImage, - Size = new Float2(itemsMargin, itemsHeight), - Margin = new Margin(4.0f, 6.0f, 4.0f, 4.0f), - //AnchorPreset = AnchorPresets.VerticalStretchLeft, - Parent = item, - }; popup.SelectedControl = item; - } } if (ShowAllItems || _items.Count < ShowMaxItemsCount) { popup.Size = new Float2(itemsWidth, height); - panel.Size = popup.Size; + popup.MainPanel.Size = popup.Size; } else { - popup.Size = new Float2(itemsWidth, (itemsHeight + container.Spacing) * ShowMaxItemsCount); - panel.Size = popup.Size; + popup.Size = new Float2(itemsWidth, (itemsHeight + itemContainer.Spacing) * ShowMaxItemsCount); + popup.MainPanel.Size = popup.Size; } return popup; } + /// + /// Creates the popup root. Called by default implementation of and allows to customize popup base. + /// + /// Custom popup root control. + protected virtual DropdownRoot CreatePopupRoot() + { + var popup = new DropdownRoot(); + + var panel = new Panel + { + AnchorPreset = AnchorPresets.StretchAll, + BackgroundColor = BackgroundColor, + ScrollBars = ScrollBars.Vertical, + AutoFocus = true, + Parent = popup, + }; + popup.MainPanel = panel; + + return popup; + } + + /// + /// Creates the popup background. Called by default implementation of and allows to customize popup background by adding controls to it. + /// + /// The popup control where background controls can be added. + protected virtual void CreatePopupBackground(DropdownRoot popup) + { + // Default background outline + var border = new Border + { + BorderColor = BorderColorHighlighted, + Width = 4.0f, + AnchorPreset = AnchorPresets.StretchAll, + Offsets = Margin.Zero, + Parent = popup, + }; + } + + /// + /// Creates the popup item. Called by default implementation of and allows to customize popup item. + /// + /// The item index. + /// The item control size + /// The item control left-side margin + /// Custom popup item control. + protected virtual Control CreatePopupItem(int i, Float2 size, float margin) + { + // Default item with label + var item = new ContainerControl + { + AutoFocus = false, + Size = size, + }; + var label = new DropdownLabel + { + AutoFocus = true, + X = margin, + Size = new Float2(size.X - margin, size.Y), + Font = Font, + TextColor = Color.White * 0.9f, + TextColorHighlighted = Color.White, + HorizontalAlignment = HorizontalAlignment, + VerticalAlignment = VerticalAlignment, + Text = _items[i], + Parent = item, + Tag = i, + }; + label.ItemClicked += c => + { + OnItemClicked((int)c.Tag); + DestroyPopup(); + }; + + if (_selectedIndex == i) + { + // Add icon to the selected item + var icon = new Image + { + Brush = CheckedImage, + Size = new Float2(margin, size.Y), + Margin = new Margin(4.0f, 6.0f, 4.0f, 4.0f), + //AnchorPreset = AnchorPresets.VerticalStretchLeft, + Parent = item, + }; + } + + return item; + } + /// /// Called when popup menu gets shown. /// From 2b9c0509110e9fb7faea136a9483461281aba4a3 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 7 Aug 2024 23:46:18 +0200 Subject: [PATCH 256/292] Add `Graphics.ShadowUpdateRate` to control global shadows atlas update rate --- Source/Engine/Graphics/Graphics.cpp | 1 + Source/Engine/Graphics/Graphics.h | 5 +++++ Source/Engine/Renderer/ShadowsPass.cpp | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/Source/Engine/Graphics/Graphics.cpp b/Source/Engine/Graphics/Graphics.cpp index 489299714..a1640c0b4 100644 --- a/Source/Engine/Graphics/Graphics.cpp +++ b/Source/Engine/Graphics/Graphics.cpp @@ -18,6 +18,7 @@ Quality Graphics::SSAOQuality = Quality::Medium; Quality Graphics::VolumetricFogQuality = Quality::High; Quality Graphics::ShadowsQuality = Quality::Medium; Quality Graphics::ShadowMapsQuality = Quality::Medium; +float Graphics::ShadowUpdateRate = 1.0f; bool Graphics::AllowCSMBlending = false; Quality Graphics::GlobalSDFQuality = Quality::High; Quality Graphics::GIQuality = Quality::High; diff --git a/Source/Engine/Graphics/Graphics.h b/Source/Engine/Graphics/Graphics.h index 59c5bfd1c..4c5e0ad26 100644 --- a/Source/Engine/Graphics/Graphics.h +++ b/Source/Engine/Graphics/Graphics.h @@ -48,6 +48,11 @@ public: /// API_FIELD() static Quality ShadowMapsQuality; + /// + /// The global scale for all shadow maps update rate. Can be used to slow down shadows rendering frequency on lower quality settings or low-end platforms. Default 1. + /// + API_FIELD() static float ShadowUpdateRate; + /// /// Enables cascades splits blending for directional light shadows. /// [Deprecated in v1.9] diff --git a/Source/Engine/Renderer/ShadowsPass.cpp b/Source/Engine/Renderer/ShadowsPass.cpp index 9b65e9c3e..b98dd9999 100644 --- a/Source/Engine/Renderer/ShadowsPass.cpp +++ b/Source/Engine/Renderer/ShadowsPass.cpp @@ -224,7 +224,7 @@ struct ShadowAtlasLight const float shadowsUpdateRate = light.ShadowsUpdateRate; const float shadowsUpdateRateAtDistance = shadowsUpdateRate * light.ShadowsUpdateRateAtDistance; float updateRate = Math::Lerp(shadowsUpdateRate, shadowsUpdateRateAtDistance, Math::Saturate(distanceFromView / Distance)); - // TODO: add global shadows update rate scale to be adjusted per-platform + updateRate *= Graphics::ShadowUpdateRate; freezeUpdate = updateRate <= ZeroTolerance; if (freezeUpdate) return 0.0f; From 788d719f2a066e06eb760cff435e8fb5d0c09a29 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 8 Aug 2024 00:34:02 +0200 Subject: [PATCH 257/292] Add Git cloning plugin project inside Git repo as submodule --- Source/Editor/Windows/PluginsWindow.cs | 33 ++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/Source/Editor/Windows/PluginsWindow.cs b/Source/Editor/Windows/PluginsWindow.cs index 39d2b02a0..25dfb773c 100644 --- a/Source/Editor/Windows/PluginsWindow.cs +++ b/Source/Editor/Windows/PluginsWindow.cs @@ -366,13 +366,13 @@ namespace FlaxEditor.Windows } var clonePath = Path.Combine(Globals.ProjectFolder, "Plugins", pluginName); - if (!Directory.Exists(clonePath)) - Directory.CreateDirectory(clonePath); - else + if (Directory.Exists(clonePath)) { Editor.LogError("Plugin Name is already used. Pick a different Name."); return; } + Directory.CreateDirectory(clonePath); + try { // Start git clone @@ -384,7 +384,32 @@ namespace FlaxEditor.Windows LogOutput = true, WaitForEnd = true }; - Platform.CreateProcess(ref settings); + var asSubmodule = Directory.Exists(Path.Combine(Globals.ProjectFolder, ".git")); + if (asSubmodule) + { + // Clone as submodule to the existing repo + settings.Arguments = $"submodule add {gitPath} \"Plugins/{pluginName}\""; + + // Submodule add need the target folder to not exist + Directory.Delete(clonePath); + } + int result = Platform.CreateProcess(ref settings); + if (result != 0) + throw new Exception($"'{settings.FileName} {settings.Arguments}' failed with result {result}"); + + // Ensure that cloned repo exists + var checkPath = Path.Combine(clonePath, ".git"); + if (asSubmodule) + { + if (!File.Exists(checkPath)) + throw new Exception("Failed to clone repo."); + } + else + { + if (!Directory.Exists(checkPath)) + throw new Exception("Failed to clone repo."); + + } } catch (Exception e) { From ca62a6c4bf272800e5dc3cdb52cae27a3e8f8a56 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 8 Aug 2024 14:05:57 +0200 Subject: [PATCH 258/292] Fix directional lights shadows jittering when using TAA --- Source/Engine/Renderer/ShadowsPass.cpp | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/Source/Engine/Renderer/ShadowsPass.cpp b/Source/Engine/Renderer/ShadowsPass.cpp index b98dd9999..a8cf19542 100644 --- a/Source/Engine/Renderer/ShadowsPass.cpp +++ b/Source/Engine/Renderer/ShadowsPass.cpp @@ -859,6 +859,18 @@ void ShadowsPass::SetupLight(ShadowsCustomBuffer& shadows, RenderContext& render renderContextBatch.Contexts.AddDefault(atlasLight.ContextCount); atlasLight.Cache.Set(renderContext.View, light, atlasLight.CascadeSplits); + // Calculate view frustum corners (un-jittered) in view-space + Float3 frustumCorners[8]; + { + BoundingFrustum stableViewFrustum; + Matrix m; + Matrix::Multiply(renderContext.View.View, renderContext.View.NonJitteredProjection, m); + stableViewFrustum.SetMatrix(m); + stableViewFrustum.GetCorners(frustumCorners); + } + for (int32 i = 0; i < 8; i++) + Float3::Transform(frustumCorners[i], renderContext.View.View, frustumCorners[i]); + // Create the different view and projection matrices for each split float splitMinRatio = 0; float splitMaxRatio = (minDistance - view.Near) / viewRange; @@ -878,10 +890,9 @@ void ShadowsPass::SetupLight(ShadowsCustomBuffer& shadows, RenderContext& render for (int32 j = 0; j < 4; j++) { float overlapWithPrevSplit = 0.1f * (splitMinRatio - oldSplitMinRatio); // CSM blending overlap - const RenderList* mainCache = renderContext.List; - const auto frustumRangeVS = mainCache->FrustumCornersVs[j + 4] - mainCache->FrustumCornersVs[j]; - frustumCornersVs[j] = mainCache->FrustumCornersVs[j] + frustumRangeVS * (splitMinRatio - overlapWithPrevSplit); - frustumCornersVs[j + 4] = mainCache->FrustumCornersVs[j] + frustumRangeVS * splitMaxRatio; + const auto frustumRangeVS = frustumCorners[j + 4] - frustumCorners[j]; + frustumCornersVs[j] = frustumCorners[j] + frustumRangeVS * (splitMinRatio - overlapWithPrevSplit); + frustumCornersVs[j + 4] = frustumCorners[j] + frustumRangeVS * splitMaxRatio; } // Transform the frustum from camera view space to world-space From 1c24f5d3cee1d6649725cee60a04827a39c7adcb Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 8 Aug 2024 15:30:47 +0200 Subject: [PATCH 259/292] Update `meshoptimizer` to version `v0.21` --- Source/ThirdParty/meshoptimizer/LICENSE.md | 2 +- Source/ThirdParty/meshoptimizer/allocator.cpp | 2 +- .../ThirdParty/meshoptimizer/clusterizer.cpp | 92 +++++++++- .../ThirdParty/meshoptimizer/indexcodec.cpp | 8 +- .../ThirdParty/meshoptimizer/meshoptimizer.h | 53 ++++-- .../meshoptimizer/overdrawanalyzer.cpp | 3 +- .../ThirdParty/meshoptimizer/quantization.cpp | 12 +- .../ThirdParty/meshoptimizer/simplifier.cpp | 172 ++++++++++++++---- .../ThirdParty/meshoptimizer/stripifier.cpp | 2 +- .../meshoptimizer/vcacheoptimizer.cpp | 13 +- .../ThirdParty/meshoptimizer/vertexcodec.cpp | 2 +- 11 files changed, 281 insertions(+), 80 deletions(-) diff --git a/Source/ThirdParty/meshoptimizer/LICENSE.md b/Source/ThirdParty/meshoptimizer/LICENSE.md index 962ed41ff..ef9f5919f 100644 --- a/Source/ThirdParty/meshoptimizer/LICENSE.md +++ b/Source/ThirdParty/meshoptimizer/LICENSE.md @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2016-2023 Arseny Kapoulkine +Copyright (c) 2016-2024 Arseny Kapoulkine Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/Source/ThirdParty/meshoptimizer/allocator.cpp b/Source/ThirdParty/meshoptimizer/allocator.cpp index 072e8e51a..12eda3872 100644 --- a/Source/ThirdParty/meshoptimizer/allocator.cpp +++ b/Source/ThirdParty/meshoptimizer/allocator.cpp @@ -1,7 +1,7 @@ // This file is part of meshoptimizer library; see meshoptimizer.h for version/license details #include "meshoptimizer.h" -void meshopt_setAllocator(void* (MESHOPTIMIZER_ALLOC_CALLCONV *allocate)(size_t), void (MESHOPTIMIZER_ALLOC_CALLCONV *deallocate)(void*)) +void meshopt_setAllocator(void*(MESHOPTIMIZER_ALLOC_CALLCONV* allocate)(size_t), void(MESHOPTIMIZER_ALLOC_CALLCONV* deallocate)(void*)) { meshopt_Allocator::Storage::allocate = allocate; meshopt_Allocator::Storage::deallocate = deallocate; diff --git a/Source/ThirdParty/meshoptimizer/clusterizer.cpp b/Source/ThirdParty/meshoptimizer/clusterizer.cpp index c4672ad60..52fe5a362 100644 --- a/Source/ThirdParty/meshoptimizer/clusterizer.cpp +++ b/Source/ThirdParty/meshoptimizer/clusterizer.cpp @@ -441,7 +441,7 @@ static size_t kdtreeBuild(size_t offset, KDNode* nodes, size_t node_count, const } // split axis is one where the variance is largest - unsigned int axis = vars[0] >= vars[1] && vars[0] >= vars[2] ? 0 : vars[1] >= vars[2] ? 1 : 2; + unsigned int axis = (vars[0] >= vars[1] && vars[0] >= vars[2]) ? 0 : (vars[1] >= vars[2] ? 1 : 2); float split = mean[axis]; size_t middle = kdtreePartition(indices, count, points, stride, axis, split); @@ -882,3 +882,93 @@ meshopt_Bounds meshopt_computeMeshletBounds(const unsigned int* meshlet_vertices return meshopt_computeClusterBounds(indices, triangle_count * 3, vertex_positions, vertex_count, vertex_positions_stride); } + +void meshopt_optimizeMeshlet(unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, size_t triangle_count, size_t vertex_count) +{ + using namespace meshopt; + + assert(triangle_count <= kMeshletMaxTriangles); + assert(vertex_count <= kMeshletMaxVertices); + + unsigned char* indices = meshlet_triangles; + unsigned int* vertices = meshlet_vertices; + + // cache tracks vertex timestamps (corresponding to triangle index! all 3 vertices are added at the same time and never removed) + unsigned char cache[kMeshletMaxVertices]; + memset(cache, 0, vertex_count); + + // note that we start from a value that means all vertices aren't in cache + unsigned char cache_last = 128; + const unsigned char cache_cutoff = 3; // 3 triangles = ~5..9 vertices depending on reuse + + for (size_t i = 0; i < triangle_count; ++i) + { + int next = -1; + int next_match = -1; + + for (size_t j = i; j < triangle_count; ++j) + { + unsigned char a = indices[j * 3 + 0], b = indices[j * 3 + 1], c = indices[j * 3 + 2]; + assert(a < vertex_count && b < vertex_count && c < vertex_count); + + // score each triangle by how many vertices are in cache + // note: the distance is computed using unsigned 8-bit values, so cache timestamp overflow is handled gracefully + int aok = (unsigned char)(cache_last - cache[a]) < cache_cutoff; + int bok = (unsigned char)(cache_last - cache[b]) < cache_cutoff; + int cok = (unsigned char)(cache_last - cache[c]) < cache_cutoff; + + if (aok + bok + cok > next_match) + { + next = (int)j; + next_match = aok + bok + cok; + + // note that we could end up with all 3 vertices in the cache, but 2 is enough for ~strip traversal + if (next_match >= 2) + break; + } + } + + assert(next >= 0); + + unsigned char a = indices[next * 3 + 0], b = indices[next * 3 + 1], c = indices[next * 3 + 2]; + + // shift triangles before the next one forward so that we always keep an ordered partition + // note: this could have swapped triangles [i] and [next] but that distorts the order and may skew the output sequence + memmove(indices + (i + 1) * 3, indices + i * 3, (next - i) * 3 * sizeof(unsigned char)); + + indices[i * 3 + 0] = a; + indices[i * 3 + 1] = b; + indices[i * 3 + 2] = c; + + // cache timestamp is the same between all vertices of each triangle to reduce overflow + cache_last++; + cache[a] = cache_last; + cache[b] = cache_last; + cache[c] = cache_last; + } + + // reorder meshlet vertices for access locality assuming index buffer is scanned sequentially + unsigned int order[kMeshletMaxVertices]; + + unsigned char remap[kMeshletMaxVertices]; + memset(remap, -1, vertex_count); + + size_t vertex_offset = 0; + + for (size_t i = 0; i < triangle_count * 3; ++i) + { + unsigned char& r = remap[indices[i]]; + + if (r == 0xff) + { + r = (unsigned char)(vertex_offset); + order[vertex_offset] = vertices[indices[i]]; + vertex_offset++; + } + + indices[i] = r; + } + + assert(vertex_offset <= vertex_count); + memcpy(vertices, order, vertex_offset * sizeof(unsigned int)); +} diff --git a/Source/ThirdParty/meshoptimizer/indexcodec.cpp b/Source/ThirdParty/meshoptimizer/indexcodec.cpp index 4cc2fea63..b30046005 100644 --- a/Source/ThirdParty/meshoptimizer/indexcodec.cpp +++ b/Source/ThirdParty/meshoptimizer/indexcodec.cpp @@ -33,7 +33,7 @@ static int rotateTriangle(unsigned int a, unsigned int b, unsigned int c, unsign { (void)a; - return (b == next) ? 1 : (c == next) ? 2 : 0; + return (b == next) ? 1 : (c == next ? 2 : 0); } static int getEdgeFifo(EdgeFifo fifo, unsigned int a, unsigned int b, unsigned int c, size_t offset) @@ -217,7 +217,7 @@ size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, cons int fe = fer >> 2; int fc = getVertexFifo(vertexfifo, c, vertexfifooffset); - int fec = (fc >= 1 && fc < fecmax) ? fc : (c == next) ? (next++, 0) : 15; + int fec = (fc >= 1 && fc < fecmax) ? fc : (c == next ? (next++, 0) : 15); if (fec == 15 && version >= 1) { @@ -267,8 +267,8 @@ size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, cons // after rotation, a is almost always equal to next, so we don't waste bits on FIFO encoding for a int fea = (a == next) ? (next++, 0) : 15; - int feb = (fb >= 0 && fb < 14) ? (fb + 1) : (b == next) ? (next++, 0) : 15; - int fec = (fc >= 0 && fc < 14) ? (fc + 1) : (c == next) ? (next++, 0) : 15; + int feb = (fb >= 0 && fb < 14) ? fb + 1 : (b == next ? (next++, 0) : 15); + int fec = (fc >= 0 && fc < 14) ? fc + 1 : (c == next ? (next++, 0) : 15); // we encode feb & fec in 4 bits using a table if possible, and as a full byte otherwise unsigned char codeaux = (unsigned char)((feb << 4) | fec); diff --git a/Source/ThirdParty/meshoptimizer/meshoptimizer.h b/Source/ThirdParty/meshoptimizer/meshoptimizer.h index dbafd4e6e..6c8dcd7e8 100644 --- a/Source/ThirdParty/meshoptimizer/meshoptimizer.h +++ b/Source/ThirdParty/meshoptimizer/meshoptimizer.h @@ -1,7 +1,7 @@ /** - * meshoptimizer - version 0.20 + * meshoptimizer - version 0.21 * - * Copyright (C) 2016-2023, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) + * Copyright (C) 2016-2024, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) * Report bugs and download new versions at https://github.com/zeux/meshoptimizer * * This library is distributed under the MIT License. See notice at the end of this file. @@ -12,7 +12,7 @@ #include /* Version macro; major * 1000 + minor * 10 + patch */ -#define MESHOPTIMIZER_VERSION 200 /* 0.20 */ +#define MESHOPTIMIZER_VERSION 210 /* 0.21 */ /* If no API is defined, assume default */ #ifndef MESHOPTIMIZER_API @@ -311,12 +311,12 @@ MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterExp(void* buffer, size_t cou */ enum meshopt_EncodeExpMode { - /* When encoding exponents, use separate values for each component (maximum quality) */ - meshopt_EncodeExpSeparate, - /* When encoding exponents, use shared value for all components of each vector (better compression) */ - meshopt_EncodeExpSharedVector, - /* When encoding exponents, use shared value for each component of all vectors (best compression) */ - meshopt_EncodeExpSharedComponent, + /* When encoding exponents, use separate values for each component (maximum quality) */ + meshopt_EncodeExpSeparate, + /* When encoding exponents, use shared value for all components of each vector (better compression) */ + meshopt_EncodeExpSharedVector, + /* When encoding exponents, use shared value for each component of all vectors (best compression) */ + meshopt_EncodeExpSharedComponent, }; MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeFilterOct(void* destination, size_t count, size_t stride, int bits, const float* data); @@ -328,8 +328,12 @@ MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeFilterExp(void* destination, size_ */ enum { - /* Do not move vertices that are located on the topological border (vertices on triangle edges that don't have a paired triangle). Useful for simplifying portions of the larger mesh. */ - meshopt_SimplifyLockBorder = 1 << 0, + /* Do not move vertices that are located on the topological border (vertices on triangle edges that don't have a paired triangle). Useful for simplifying portions of the larger mesh. */ + meshopt_SimplifyLockBorder = 1 << 0, + /* Improve simplification performance assuming input indices are a sparse subset of the mesh. Note that error becomes relative to subset extents. */ + meshopt_SimplifySparse = 1 << 1, + /* Treat error limit and resulting error as absolute instead of relative to mesh extents. */ + meshopt_SimplifyErrorAbsolute = 1 << 2, }; /** @@ -357,9 +361,10 @@ MESHOPTIMIZER_API size_t meshopt_simplify(unsigned int* destination, const unsig * vertex_attributes should have attribute_count floats for each vertex * attribute_weights should have attribute_count floats in total; the weights determine relative priority of attributes between each other and wrt position. The recommended weight range is [1e-3..1e-1], assuming attribute data is in [0..1] range. * attribute_count must be <= 16 + * vertex_lock can be NULL; when it's not NULL, it should have a value for each vertex; 1 denotes vertices that can't be moved * TODO target_error/result_error currently use combined distance+attribute error; this may change in the future */ -MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_attributes, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, size_t target_index_count, float target_error, unsigned int options, float* result_error); +MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_attributes, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, const unsigned char* vertex_lock, size_t target_index_count, float target_error, unsigned int options, float* result_error); /** * Experimental: Mesh simplifier (sloppy) @@ -486,13 +491,23 @@ struct meshopt_Meshlet * meshlet_vertices must contain enough space for all meshlets, worst case size is equal to max_meshlets * max_vertices * meshlet_triangles must contain enough space for all meshlets, worst case size is equal to max_meshlets * max_triangles * 3 * vertex_positions should have float3 position in the first 12 bytes of each vertex - * max_vertices and max_triangles must not exceed implementation limits (max_vertices <= 255 - not 256!, max_triangles <= 512) + * max_vertices and max_triangles must not exceed implementation limits (max_vertices <= 255 - not 256!, max_triangles <= 512; max_triangles must be divisible by 4) * cone_weight should be set to 0 when cone culling is not used, and a value between 0 and 1 otherwise to balance between cluster size and cone culling efficiency */ MESHOPTIMIZER_API size_t meshopt_buildMeshlets(struct meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight); MESHOPTIMIZER_API size_t meshopt_buildMeshletsScan(struct meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles); MESHOPTIMIZER_API size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_t max_triangles); +/** + * Experimental: Meshlet optimizer + * Reorders meshlet vertices and triangles to maximize locality to improve rasterizer throughput + * + * meshlet_triangles and meshlet_vertices must refer to meshlet triangle and vertex index data; when buildMeshlets* is used, these + * need to be computed from meshlet's vertex_offset and triangle_offset + * triangle_count and vertex_count must not exceed implementation limits (vertex_count <= 255 - not 256!, triangle_count <= 512) + */ +MESHOPTIMIZER_EXPERIMENTAL void meshopt_optimizeMeshlet(unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, size_t triangle_count, size_t vertex_count); + struct meshopt_Bounds { /* bounding sphere, useful for frustum and occlusion culling */ @@ -649,7 +664,7 @@ inline int meshopt_decodeIndexSequence(T* destination, size_t index_count, const template inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options = 0, float* result_error = NULL); template -inline size_t meshopt_simplifyWithAttributes(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_attributes, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, size_t target_index_count, float target_error, unsigned int options = 0, float* result_error = NULL); +inline size_t meshopt_simplifyWithAttributes(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_attributes, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, const unsigned char* vertex_lock, size_t target_index_count, float target_error, unsigned int options = 0, float* result_error = NULL); template inline size_t meshopt_simplifySloppy(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error = NULL); template @@ -956,12 +971,12 @@ inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_co } template -inline size_t meshopt_simplifyWithAttributes(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_attributes, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, size_t target_index_count, float target_error, unsigned int options, float* result_error) +inline size_t meshopt_simplifyWithAttributes(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_attributes, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, const unsigned char* vertex_lock, size_t target_index_count, float target_error, unsigned int options, float* result_error) { - meshopt_IndexAdapter in(NULL, indices, index_count); - meshopt_IndexAdapter out(destination, NULL, index_count); + meshopt_IndexAdapter in(NULL, indices, index_count); + meshopt_IndexAdapter out(destination, NULL, index_count); - return meshopt_simplifyWithAttributes(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, vertex_attributes, vertex_attributes_stride, attribute_weights, attribute_count, target_index_count, target_error, options, result_error); + return meshopt_simplifyWithAttributes(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, vertex_attributes, vertex_attributes_stride, attribute_weights, attribute_count, vertex_lock, target_index_count, target_error, options, result_error); } template @@ -1050,7 +1065,7 @@ inline void meshopt_spatialSortTriangles(T* destination, const T* indices, size_ #endif /** - * Copyright (c) 2016-2023 Arseny Kapoulkine + * Copyright (c) 2016-2024 Arseny Kapoulkine * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation diff --git a/Source/ThirdParty/meshoptimizer/overdrawanalyzer.cpp b/Source/ThirdParty/meshoptimizer/overdrawanalyzer.cpp index 8b6f25413..31cf6f146 100644 --- a/Source/ThirdParty/meshoptimizer/overdrawanalyzer.cpp +++ b/Source/ThirdParty/meshoptimizer/overdrawanalyzer.cpp @@ -53,11 +53,10 @@ static void rasterize(OverdrawBuffer* buffer, float v1x, float v1y, float v1z, f // flip backfacing triangles to simplify rasterization logic if (sign) { - // flipping v2 & v3 preserves depth gradients since they're based on v1 + // flipping v2 & v3 preserves depth gradients since they're based on v1; only v1z is used below float t; t = v2x, v2x = v3x, v3x = t; t = v2y, v2y = v3y, v3y = t; - t = v2z, v2z = v3z, v3z = t; // flip depth since we rasterize backfacing triangles to second buffer with reverse Z; only v1z is used below v1z = kViewport - v1z; diff --git a/Source/ThirdParty/meshoptimizer/quantization.cpp b/Source/ThirdParty/meshoptimizer/quantization.cpp index 09a314d60..149835f50 100644 --- a/Source/ThirdParty/meshoptimizer/quantization.cpp +++ b/Source/ThirdParty/meshoptimizer/quantization.cpp @@ -3,9 +3,15 @@ #include +union FloatBits +{ + float f; + unsigned int ui; +}; + unsigned short meshopt_quantizeHalf(float v) { - union { float f; unsigned int ui; } u = {v}; + FloatBits u = {v}; unsigned int ui = u.ui; int s = (ui >> 16) & 0x8000; @@ -30,7 +36,7 @@ float meshopt_quantizeFloat(float v, int N) { assert(N >= 0 && N <= 23); - union { float f; unsigned int ui; } u = {v}; + FloatBits u = {v}; unsigned int ui = u.ui; const int mask = (1 << (23 - N)) - 1; @@ -64,7 +70,7 @@ float meshopt_dequantizeHalf(unsigned short h) // 112 is an exponent bias fixup; since we already applied it once, applying it twice converts 31 to 255 r += (em >= (31 << 10)) ? (112 << 23) : 0; - union { float f; unsigned int ui; } u; + FloatBits u; u.ui = s | r; return u.f; } diff --git a/Source/ThirdParty/meshoptimizer/simplifier.cpp b/Source/ThirdParty/meshoptimizer/simplifier.cpp index 5ba857007..e59b4afcd 100644 --- a/Source/ThirdParty/meshoptimizer/simplifier.cpp +++ b/Source/ThirdParty/meshoptimizer/simplifier.cpp @@ -111,10 +111,12 @@ struct PositionHasher { const float* vertex_positions; size_t vertex_stride_float; + const unsigned int* sparse_remap; size_t hash(unsigned int index) const { - const unsigned int* key = reinterpret_cast(vertex_positions + index * vertex_stride_float); + unsigned int ri = sparse_remap ? sparse_remap[index] : index; + const unsigned int* key = reinterpret_cast(vertex_positions + ri * vertex_stride_float); // scramble bits to make sure that integer coordinates have entropy in lower bits unsigned int x = key[0] ^ (key[0] >> 17); @@ -127,7 +129,25 @@ struct PositionHasher bool equal(unsigned int lhs, unsigned int rhs) const { - return memcmp(vertex_positions + lhs * vertex_stride_float, vertex_positions + rhs * vertex_stride_float, sizeof(float) * 3) == 0; + unsigned int li = sparse_remap ? sparse_remap[lhs] : lhs; + unsigned int ri = sparse_remap ? sparse_remap[rhs] : rhs; + + return memcmp(vertex_positions + li * vertex_stride_float, vertex_positions + ri * vertex_stride_float, sizeof(float) * 3) == 0; + } +}; + +struct RemapHasher +{ + unsigned int* remap; + + size_t hash(unsigned int id) const + { + return id * 0x5bd1e995; + } + + bool equal(unsigned int lhs, unsigned int rhs) const + { + return remap[lhs] == rhs; } }; @@ -167,9 +187,9 @@ static T* hashLookup2(T* table, size_t buckets, const Hash& hash, const T& key, return NULL; } -static void buildPositionRemap(unsigned int* remap, unsigned int* wedge, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, meshopt_Allocator& allocator) +static void buildPositionRemap(unsigned int* remap, unsigned int* wedge, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, const unsigned int* sparse_remap, meshopt_Allocator& allocator) { - PositionHasher hasher = {vertex_positions_data, vertex_positions_stride / sizeof(float)}; + PositionHasher hasher = {vertex_positions_data, vertex_positions_stride / sizeof(float), sparse_remap}; size_t table_size = hashBuckets2(vertex_count); unsigned int* table = allocator.allocate(table_size); @@ -205,6 +225,57 @@ static void buildPositionRemap(unsigned int* remap, unsigned int* wedge, const f allocator.deallocate(table); } +static unsigned int* buildSparseRemap(unsigned int* indices, size_t index_count, size_t vertex_count, size_t* out_vertex_count, meshopt_Allocator& allocator) +{ + // use a bit set to compute the precise number of unique vertices + unsigned char* filter = allocator.allocate((vertex_count + 7) / 8); + memset(filter, 0, (vertex_count + 7) / 8); + + size_t unique = 0; + for (size_t i = 0; i < index_count; ++i) + { + unsigned int index = indices[i]; + assert(index < vertex_count); + + unique += (filter[index / 8] & (1 << (index % 8))) == 0; + filter[index / 8] |= 1 << (index % 8); + } + + unsigned int* remap = allocator.allocate(unique); + size_t offset = 0; + + // temporary map dense => sparse; we allocate it last so that we can deallocate it + size_t revremap_size = hashBuckets2(unique); + unsigned int* revremap = allocator.allocate(revremap_size); + memset(revremap, -1, revremap_size * sizeof(unsigned int)); + + // fill remap, using revremap as a helper, and rewrite indices in the same pass + RemapHasher hasher = {remap}; + + for (size_t i = 0; i < index_count; ++i) + { + unsigned int index = indices[i]; + + unsigned int* entry = hashLookup2(revremap, revremap_size, hasher, index, ~0u); + + if (*entry == ~0u) + { + remap[offset] = index; + *entry = unsigned(offset); + offset++; + } + + indices[i] = *entry; + } + + allocator.deallocate(revremap); + + assert(offset == unique); + *out_vertex_count = unique; + + return remap; +} + enum VertexKind { Kind_Manifold, // not on an attribute seam, not on any boundary @@ -252,7 +323,7 @@ static bool hasEdge(const EdgeAdjacency& adjacency, unsigned int a, unsigned int return false; } -static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned int* loopback, size_t vertex_count, const EdgeAdjacency& adjacency, const unsigned int* remap, const unsigned int* wedge, unsigned int options) +static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned int* loopback, size_t vertex_count, const EdgeAdjacency& adjacency, const unsigned int* remap, const unsigned int* wedge, const unsigned char* vertex_lock, const unsigned int* sparse_remap, unsigned int options) { memset(loop, -1, vertex_count * sizeof(unsigned int)); memset(loopback, -1, vertex_count * sizeof(unsigned int)); @@ -298,7 +369,12 @@ static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned { if (remap[i] == i) { - if (wedge[i] == i) + if (vertex_lock && vertex_lock[sparse_remap ? sparse_remap[i] : i]) + { + // vertex is explicitly locked + result[i] = Kind_Locked; + } + else if (wedge[i] == i) { // no attribute seam, need to check if it's manifold unsigned int openi = openinc[i], openo = openout[i]; @@ -378,7 +454,7 @@ struct Vector3 float x, y, z; }; -static float rescalePositions(Vector3* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride) +static float rescalePositions(Vector3* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, const unsigned int* sparse_remap = NULL) { size_t vertex_stride_float = vertex_positions_stride / sizeof(float); @@ -387,7 +463,8 @@ static float rescalePositions(Vector3* result, const float* vertex_positions_dat for (size_t i = 0; i < vertex_count; ++i) { - const float* v = vertex_positions_data + i * vertex_stride_float; + unsigned int ri = sparse_remap ? sparse_remap[i] : unsigned(i); + const float* v = vertex_positions_data + ri * vertex_stride_float; if (result) { @@ -426,15 +503,17 @@ static float rescalePositions(Vector3* result, const float* vertex_positions_dat return extent; } -static void rescaleAttributes(float* result, const float* vertex_attributes_data, size_t vertex_count, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count) +static void rescaleAttributes(float* result, const float* vertex_attributes_data, size_t vertex_count, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, const unsigned int* sparse_remap) { size_t vertex_attributes_stride_float = vertex_attributes_stride / sizeof(float); for (size_t i = 0; i < vertex_count; ++i) { + unsigned int ri = sparse_remap ? sparse_remap[i] : unsigned(i); + for (size_t k = 0; k < attribute_count; ++k) { - float a = vertex_attributes_data[i * vertex_attributes_stride_float + k]; + float a = vertex_attributes_data[ri * vertex_attributes_stride_float + k]; result[i * attribute_count + k] = a * attribute_weights[k]; } @@ -580,7 +659,7 @@ static float quadricError(const Quadric& Q, const QuadricGrad* G, size_t attribu } // TODO: weight normalization is breaking attribute error somehow - float s = 1;// Q.w == 0.f ? 0.f : 1.f / Q.w; + float s = 1; // Q.w == 0.f ? 0.f : 1.f / Q.w; return fabsf(r) * s; } @@ -813,7 +892,13 @@ static bool hasTriangleFlip(const Vector3& a, const Vector3& b, const Vector3& c Vector3 nbc = {eb.y * ec.z - eb.z * ec.y, eb.z * ec.x - eb.x * ec.z, eb.x * ec.y - eb.y * ec.x}; Vector3 nbd = {eb.y * ed.z - eb.z * ed.y, eb.z * ed.x - eb.x * ed.z, eb.x * ed.y - eb.y * ed.x}; - return nbc.x * nbd.x + nbc.y * nbd.y + nbc.z * nbd.z <= 0; + float ndp = nbc.x * nbd.x + nbc.y * nbd.y + nbc.z * nbd.z; + float abc = nbc.x * nbc.x + nbc.y * nbc.y + nbc.z * nbc.z; + float abd = nbd.x * nbd.x + nbd.y * nbd.y + nbd.z * nbd.z; + + // scale is cos(angle); somewhat arbitrarily set to ~75 degrees + // note that the "pure" check is ndp <= 0 (90 degree cutoff) but that allows flipping through a series of close-to-90 collapses + return ndp <= 0.25f * sqrtf(abc * abd); } static bool hasTriangleFlips(const EdgeAdjacency& adjacency, const Vector3* vertex_positions, const unsigned int* collapse_remap, unsigned int i0, unsigned int i1) @@ -1305,7 +1390,7 @@ static void fillCellQuadrics(Quadric* cell_quadrics, const unsigned int* indices unsigned int c1 = vertex_cells[i1]; unsigned int c2 = vertex_cells[i2]; - bool single_cell = (c0 == c1) & (c0 == c2); + int single_cell = (c0 == c1) & (c0 == c2); Quadric Q; quadricFromTriangle(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], single_cell ? 3.f : 1.f); @@ -1325,7 +1410,7 @@ static void fillCellQuadrics(Quadric* cell_quadrics, const unsigned int* indices static void fillCellReservoirs(Reservoir* cell_reservoirs, size_t cell_count, const Vector3* vertex_positions, const float* vertex_colors, size_t vertex_colors_stride, size_t vertex_count, const unsigned int* vertex_cells) { - static const float dummy_color[] = { 0.f, 0.f, 0.f }; + static const float dummy_color[] = {0.f, 0.f, 0.f}; size_t vertex_colors_stride_float = vertex_colors_stride / sizeof(float); @@ -1380,7 +1465,7 @@ static void fillCellRemap(unsigned int* cell_remap, float* cell_errors, size_t c static void fillCellRemap(unsigned int* cell_remap, float* cell_errors, size_t cell_count, const unsigned int* vertex_cells, const Reservoir* cell_reservoirs, const Vector3* vertex_positions, const float* vertex_colors, size_t vertex_colors_stride, float color_weight, size_t vertex_count) { - static const float dummy_color[] = { 0.f, 0.f, 0.f }; + static const float dummy_color[] = {0.f, 0.f, 0.f}; size_t vertex_colors_stride_float = vertex_colors_stride / sizeof(float); @@ -1468,7 +1553,7 @@ MESHOPTIMIZER_API unsigned int* meshopt_simplifyDebugLoop = NULL; MESHOPTIMIZER_API unsigned int* meshopt_simplifyDebugLoopBack = NULL; #endif -size_t meshopt_simplifyEdge(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_attributes_data, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, size_t target_index_count, float target_error, unsigned int options, float* out_result_error) +size_t meshopt_simplifyEdge(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_attributes_data, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, const unsigned char* vertex_lock, size_t target_index_count, float target_error, unsigned int options, float* out_result_error) { using namespace meshopt; @@ -1476,7 +1561,7 @@ size_t meshopt_simplifyEdge(unsigned int* destination, const unsigned int* indic assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256); assert(vertex_positions_stride % sizeof(float) == 0); assert(target_index_count <= index_count); - assert((options & ~(meshopt_SimplifyLockBorder)) == 0); + assert((options & ~(meshopt_SimplifyLockBorder | meshopt_SimplifySparse | meshopt_SimplifyErrorAbsolute)) == 0); assert(vertex_attributes_stride >= attribute_count * sizeof(float) && vertex_attributes_stride <= 256); assert(vertex_attributes_stride % sizeof(float) == 0); assert(attribute_count <= kMaxAttributes); @@ -1484,22 +1569,30 @@ size_t meshopt_simplifyEdge(unsigned int* destination, const unsigned int* indic meshopt_Allocator allocator; unsigned int* result = destination; + if (result != indices) + memcpy(result, indices, index_count * sizeof(unsigned int)); + + // build an index remap and update indices/vertex_count to minimize the subsequent work + // note: as a consequence, errors will be computed relative to the subset extent + unsigned int* sparse_remap = NULL; + if (options & meshopt_SimplifySparse) + sparse_remap = buildSparseRemap(result, index_count, vertex_count, &vertex_count, allocator); // build adjacency information EdgeAdjacency adjacency = {}; prepareEdgeAdjacency(adjacency, index_count, vertex_count, allocator); - updateEdgeAdjacency(adjacency, indices, index_count, vertex_count, NULL); + updateEdgeAdjacency(adjacency, result, index_count, vertex_count, NULL); // build position remap that maps each vertex to the one with identical position unsigned int* remap = allocator.allocate(vertex_count); unsigned int* wedge = allocator.allocate(vertex_count); - buildPositionRemap(remap, wedge, vertex_positions_data, vertex_count, vertex_positions_stride, allocator); + buildPositionRemap(remap, wedge, vertex_positions_data, vertex_count, vertex_positions_stride, sparse_remap, allocator); // classify vertices; vertex kind determines collapse rules, see kCanCollapse unsigned char* vertex_kind = allocator.allocate(vertex_count); unsigned int* loop = allocator.allocate(vertex_count); unsigned int* loopback = allocator.allocate(vertex_count); - classifyVertices(vertex_kind, loop, loopback, vertex_count, adjacency, remap, wedge, options); + classifyVertices(vertex_kind, loop, loopback, vertex_count, adjacency, remap, wedge, vertex_lock, sparse_remap, options); #if TRACE size_t unique_positions = 0; @@ -1517,14 +1610,14 @@ size_t meshopt_simplifyEdge(unsigned int* destination, const unsigned int* indic #endif Vector3* vertex_positions = allocator.allocate(vertex_count); - rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride); + float vertex_scale = rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride, sparse_remap); float* vertex_attributes = NULL; if (attribute_count) { vertex_attributes = allocator.allocate(vertex_count * attribute_count); - rescaleAttributes(vertex_attributes, vertex_attributes_data, vertex_count, vertex_attributes_stride, attribute_weights, attribute_count); + rescaleAttributes(vertex_attributes, vertex_attributes_data, vertex_count, vertex_attributes_stride, attribute_weights, attribute_count, sparse_remap); } Quadric* vertex_quadrics = allocator.allocate(vertex_count); @@ -1542,14 +1635,11 @@ size_t meshopt_simplifyEdge(unsigned int* destination, const unsigned int* indic memset(attribute_gradients, 0, vertex_count * attribute_count * sizeof(QuadricGrad)); } - fillFaceQuadrics(vertex_quadrics, indices, index_count, vertex_positions, remap); - fillEdgeQuadrics(vertex_quadrics, indices, index_count, vertex_positions, remap, vertex_kind, loop, loopback); + fillFaceQuadrics(vertex_quadrics, result, index_count, vertex_positions, remap); + fillEdgeQuadrics(vertex_quadrics, result, index_count, vertex_positions, remap, vertex_kind, loop, loopback); if (attribute_count) - fillAttributeQuadrics(attribute_quadrics, attribute_gradients, indices, index_count, vertex_positions, vertex_attributes, attribute_count, remap); - - if (result != indices) - memcpy(result, indices, index_count * sizeof(unsigned int)); + fillAttributeQuadrics(attribute_quadrics, attribute_gradients, result, index_count, vertex_positions, vertex_attributes, attribute_count, remap); #if TRACE size_t pass_count = 0; @@ -1566,7 +1656,8 @@ size_t meshopt_simplifyEdge(unsigned int* destination, const unsigned int* indic float result_error = 0; // target_error input is linear; we need to adjust it to match quadricError units - float error_limit = target_error * target_error; + float error_scale = (options & meshopt_SimplifyErrorAbsolute) ? vertex_scale : 1.f; + float error_limit = (target_error * target_error) / (error_scale * error_scale); while (result_count > target_index_count) { @@ -1611,7 +1702,7 @@ size_t meshopt_simplifyEdge(unsigned int* destination, const unsigned int* indic } #if TRACE - printf("result: %d triangles, error: %e; total %d passes\n", int(result_count), sqrtf(result_error), int(pass_count)); + printf("result: %d triangles, error: %e; total %d passes\n", int(result_count / 3), sqrtf(result_error), int(pass_count)); #endif #ifndef NDEBUG @@ -1625,21 +1716,26 @@ size_t meshopt_simplifyEdge(unsigned int* destination, const unsigned int* indic memcpy(meshopt_simplifyDebugLoopBack, loopback, vertex_count * sizeof(unsigned int)); #endif + // convert resulting indices back into the dense space of the larger mesh + if (sparse_remap) + for (size_t i = 0; i < result_count; ++i) + result[i] = sparse_remap[result[i]]; + // result_error is quadratic; we need to remap it back to linear if (out_result_error) - *out_result_error = sqrtf(result_error); + *out_result_error = sqrtf(result_error) * error_scale; return result_count; } size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options, float* out_result_error) { - return meshopt_simplifyEdge(destination, indices, index_count, vertex_positions_data, vertex_count, vertex_positions_stride, NULL, 0, NULL, 0, target_index_count, target_error, options, out_result_error); + return meshopt_simplifyEdge(destination, indices, index_count, vertex_positions_data, vertex_count, vertex_positions_stride, NULL, 0, NULL, 0, NULL, target_index_count, target_error, options, out_result_error); } -size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_attributes_data, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, size_t target_index_count, float target_error, unsigned int options, float* out_result_error) +size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_attributes_data, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, const unsigned char* vertex_lock, size_t target_index_count, float target_error, unsigned int options, float* out_result_error) { - return meshopt_simplifyEdge(destination, indices, index_count, vertex_positions_data, vertex_count, vertex_positions_stride, vertex_attributes_data, vertex_attributes_stride, attribute_weights, attribute_count, target_index_count, target_error, options, out_result_error); + return meshopt_simplifyEdge(destination, indices, index_count, vertex_positions_data, vertex_count, vertex_positions_stride, vertex_attributes_data, vertex_attributes_stride, attribute_weights, attribute_count, vertex_lock, target_index_count, target_error, options, out_result_error); } size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* out_result_error) @@ -1692,14 +1788,14 @@ size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* ind // we clamp the prediction of the grid size to make sure that the search converges int grid_size = next_grid_size; - grid_size = (grid_size <= min_grid) ? min_grid + 1 : (grid_size >= max_grid) ? max_grid - 1 : grid_size; + grid_size = (grid_size <= min_grid) ? min_grid + 1 : (grid_size >= max_grid ? max_grid - 1 : grid_size); computeVertexIds(vertex_ids, vertex_positions, vertex_count, grid_size); size_t triangles = countTriangles(vertex_ids, indices, index_count); #if TRACE printf("pass %d (%s): grid size %d, triangles %d, %s\n", - pass, (pass == 0) ? "guess" : (pass <= kInterpolationPasses) ? "lerp" : "binary", + pass, (pass == 0) ? "guess" : (pass <= kInterpolationPasses ? "lerp" : "binary"), grid_size, int(triangles), (triangles <= target_index_count / 3) ? "under" : "over"); #endif @@ -1824,14 +1920,14 @@ size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_pos // we clamp the prediction of the grid size to make sure that the search converges int grid_size = next_grid_size; - grid_size = (grid_size <= min_grid) ? min_grid + 1 : (grid_size >= max_grid) ? max_grid - 1 : grid_size; + grid_size = (grid_size <= min_grid) ? min_grid + 1 : (grid_size >= max_grid ? max_grid - 1 : grid_size); computeVertexIds(vertex_ids, vertex_positions, vertex_count, grid_size); size_t vertices = countVertexCells(table, table_size, vertex_ids, vertex_count); #if TRACE printf("pass %d (%s): grid size %d, vertices %d, %s\n", - pass, (pass == 0) ? "guess" : (pass <= kInterpolationPasses) ? "lerp" : "binary", + pass, (pass == 0) ? "guess" : (pass <= kInterpolationPasses ? "lerp" : "binary"), grid_size, int(vertices), (vertices <= target_vertex_count) ? "under" : "over"); #endif diff --git a/Source/ThirdParty/meshoptimizer/stripifier.cpp b/Source/ThirdParty/meshoptimizer/stripifier.cpp index 8ce17ef3d..d57fb512b 100644 --- a/Source/ThirdParty/meshoptimizer/stripifier.cpp +++ b/Source/ThirdParty/meshoptimizer/stripifier.cpp @@ -18,7 +18,7 @@ static unsigned int findStripFirst(const unsigned int buffer[][3], unsigned int for (size_t i = 0; i < buffer_size; ++i) { unsigned int va = valence[buffer[i][0]], vb = valence[buffer[i][1]], vc = valence[buffer[i][2]]; - unsigned int v = (va < vb && va < vc) ? va : (vb < vc) ? vb : vc; + unsigned int v = (va < vb && va < vc) ? va : (vb < vc ? vb : vc); if (v < iv) { diff --git a/Source/ThirdParty/meshoptimizer/vcacheoptimizer.cpp b/Source/ThirdParty/meshoptimizer/vcacheoptimizer.cpp index d4b08ba34..e4ecc71d3 100644 --- a/Source/ThirdParty/meshoptimizer/vcacheoptimizer.cpp +++ b/Source/ThirdParty/meshoptimizer/vcacheoptimizer.cpp @@ -195,9 +195,8 @@ void meshopt_optimizeVertexCacheTable(unsigned int* destination, const unsigned TriangleAdjacency adjacency = {}; buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator); - // live triangle counts - unsigned int* live_triangles = allocator.allocate(vertex_count); - memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int)); + // live triangle counts; note, we alias adjacency.counts as we remove triangles after emitting them so the counts always match + unsigned int* live_triangles = adjacency.counts; // emitted flags unsigned char* emitted_flags = allocator.allocate(face_count); @@ -261,20 +260,16 @@ void meshopt_optimizeVertexCacheTable(unsigned int* destination, const unsigned unsigned int index = cache[i]; cache_new[cache_write] = index; - cache_write += (index != a && index != b && index != c); + cache_write += (index != a) & (index != b) & (index != c); } unsigned int* cache_temp = cache; cache = cache_new, cache_new = cache_temp; cache_count = cache_write > cache_size ? cache_size : cache_write; - // update live triangle counts - live_triangles[a]--; - live_triangles[b]--; - live_triangles[c]--; - // remove emitted triangle from adjacency data // this makes sure that we spend less time traversing these lists on subsequent iterations + // live triangle counts are updated as a byproduct of these adjustments for (size_t k = 0; k < 3; ++k) { unsigned int index = indices[current_triangle * 3 + k]; diff --git a/Source/ThirdParty/meshoptimizer/vertexcodec.cpp b/Source/ThirdParty/meshoptimizer/vertexcodec.cpp index 8ab0662d8..94f7a1adc 100644 --- a/Source/ThirdParty/meshoptimizer/vertexcodec.cpp +++ b/Source/ThirdParty/meshoptimizer/vertexcodec.cpp @@ -245,7 +245,7 @@ static unsigned char* encodeBytes(unsigned char* data, unsigned char* data_end, } } - int bitslog2 = (best_bits == 1) ? 0 : (best_bits == 2) ? 1 : (best_bits == 4) ? 2 : 3; + int bitslog2 = (best_bits == 1) ? 0 : (best_bits == 2 ? 1 : (best_bits == 4 ? 2 : 3)); assert((1 << bitslog2) == best_bits); size_t header_offset = i / kByteGroupSize; From 7ca45e1f54be7c3fb275f1c422722d71714d60a9 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 8 Aug 2024 21:25:08 +0200 Subject: [PATCH 260/292] Add clearing render target pool textures to pink during in Debug builds --- Source/Engine/Graphics/RenderTargetPool.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Source/Engine/Graphics/RenderTargetPool.cpp b/Source/Engine/Graphics/RenderTargetPool.cpp index 0d31f4077..c55b37e95 100644 --- a/Source/Engine/Graphics/RenderTargetPool.cpp +++ b/Source/Engine/Graphics/RenderTargetPool.cpp @@ -2,6 +2,9 @@ #include "RenderTargetPool.h" #include "GPUDevice.h" +#if BUILD_DEBUG +#include "GPUContext.h" +#endif #include "Engine/Core/Log.h" #include "Engine/Engine/Engine.h" #include "Engine/Profiler/ProfilerCPU.h" @@ -45,6 +48,13 @@ GPUTexture* RenderTargetPool::Get(const GPUTextureDescription& desc) { PROFILE_CPU(); + // Initialize render targets with pink color in debug builds to prevent incorrect data usage (GPU doesn't clear texture upon creation) +#if BUILD_DEBUG + #define RENDER_TARGET_POOL_CLEAR() if (desc.Dimensions == TextureDimensions::Texture && EnumHasAllFlags(desc.Flags, GPUTextureFlags::RenderTarget) && GPUDevice::Instance->IsRendering() && IsInMainThread()) GPUDevice::Instance->GetMainContext()->Clear(e.RT->View(), Color::Pink); +#else + #define RENDER_TARGET_POOL_CLEAR() +#endif + // Find free render target with the same properties const uint32 descHash = GetHash(desc); for (int32 i = 0; i < TemporaryRTs.Count(); i++) @@ -54,6 +64,7 @@ GPUTexture* RenderTargetPool::Get(const GPUTextureDescription& desc) { // Mark as used e.IsOccupied = true; + RENDER_TARGET_POOL_CLEAR(); return e.RT; } } @@ -82,7 +93,9 @@ GPUTexture* RenderTargetPool::Get(const GPUTextureDescription& desc) e.RT = rt; e.DescriptionHash = descHash; TemporaryRTs.Add(e); + RENDER_TARGET_POOL_CLEAR(); +#undef RENDER_TARGET_POOL_CLEAR return rt; } From 143014e9c9f8a2e7efbbcba94e907e70b86545a5 Mon Sep 17 00:00:00 2001 From: envision3d Date: Fri, 9 Aug 2024 00:16:38 -0500 Subject: [PATCH 261/292] fix crash with prefab window open on script reload --- Source/Editor/Viewport/PrefabWindowViewport.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Editor/Viewport/PrefabWindowViewport.cs b/Source/Editor/Viewport/PrefabWindowViewport.cs index f51abb1fc..daf0950f3 100644 --- a/Source/Editor/Viewport/PrefabWindowViewport.cs +++ b/Source/Editor/Viewport/PrefabWindowViewport.cs @@ -627,7 +627,7 @@ namespace FlaxEditor.Viewport // Debug draw all actors in prefab foreach (var child in SceneGraphRoot.ChildNodes) { - if (child is not ActorNode actorNode) + if (child is not ActorNode actorNode || !actorNode.Actor) continue; DebugDraw.DrawActorsTree(actorNode.Actor); } From 43afa76e604421a8b5ca17df3a945cbd53691244 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 9 Aug 2024 18:26:16 +0200 Subject: [PATCH 262/292] Optimize `StaticModel` draw into SDF and SurfaceAtlas if SDF texture is unused --- Source/Engine/Level/Actors/StaticModel.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Level/Actors/StaticModel.cpp b/Source/Engine/Level/Actors/StaticModel.cpp index c735b2b5f..c6d130357 100644 --- a/Source/Engine/Level/Actors/StaticModel.cpp +++ b/Source/Engine/Level/Actors/StaticModel.cpp @@ -324,13 +324,13 @@ void StaticModel::Draw(RenderContext& renderContext) return; if (renderContext.View.Pass == DrawPass::GlobalSDF) { - if (EnumHasAnyFlags(DrawModes, DrawPass::GlobalSDF)) + if (EnumHasAnyFlags(DrawModes, DrawPass::GlobalSDF) && Model->SDF.Texture) GlobalSignDistanceFieldPass::Instance()->RasterizeModelSDF(this, Model->SDF, _transform, _box); return; } if (renderContext.View.Pass == DrawPass::GlobalSurfaceAtlas) { - if (EnumHasAnyFlags(DrawModes, DrawPass::GlobalSurfaceAtlas)) + if (EnumHasAnyFlags(DrawModes, DrawPass::GlobalSurfaceAtlas) && Model->SDF.Texture) GlobalSurfaceAtlasPass::Instance()->RasterizeActor(this, this, _sphere, _transform, Model->LODs.Last().GetBox()); return; } From 0023014cc2c4288d9e6a833a57ba1593583c8746 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 9 Aug 2024 18:39:47 +0200 Subject: [PATCH 263/292] Add `MaterialInstance.ResetParameters` --- Source/Engine/Content/Assets/MaterialInstance.cpp | 8 ++++++++ Source/Engine/Content/Assets/MaterialInstance.h | 5 +++++ 2 files changed, 13 insertions(+) diff --git a/Source/Engine/Content/Assets/MaterialInstance.cpp b/Source/Engine/Content/Assets/MaterialInstance.cpp index 4afcde7b9..ca9e793b3 100644 --- a/Source/Engine/Content/Assets/MaterialInstance.cpp +++ b/Source/Engine/Content/Assets/MaterialInstance.cpp @@ -291,6 +291,14 @@ void MaterialInstance::SetBaseMaterial(MaterialBase* baseMaterial) } } +void MaterialInstance::ResetParameters() +{ + for (auto& param : Params) + { + param.SetIsOverride(false); + } +} + #if USE_EDITOR bool MaterialInstance::Save(const StringView& path) diff --git a/Source/Engine/Content/Assets/MaterialInstance.h b/Source/Engine/Content/Assets/MaterialInstance.h index 3e9f2ec2a..9d87cdf5b 100644 --- a/Source/Engine/Content/Assets/MaterialInstance.h +++ b/Source/Engine/Content/Assets/MaterialInstance.h @@ -28,6 +28,11 @@ public: /// The base material. API_PROPERTY() void SetBaseMaterial(MaterialBase* baseMaterial); + /// + /// Resets all parameters back to the base material (including disabling parameter overrides). + /// + API_FUNCTION() void ResetParameters(); + #if USE_EDITOR /// From 7224494ce6d4585ee5613479c811a6c81534448e Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 9 Aug 2024 18:44:47 +0200 Subject: [PATCH 264/292] Add improved material instance setup on model import with base material --- Source/Engine/Tools/ModelTool/ModelTool.cpp | 25 ++++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/Source/Engine/Tools/ModelTool/ModelTool.cpp b/Source/Engine/Tools/ModelTool/ModelTool.cpp index 29482c3f6..abf769304 100644 --- a/Source/Engine/Tools/ModelTool/ModelTool.cpp +++ b/Source/Engine/Tools/ModelTool/ModelTool.cpp @@ -1071,6 +1071,7 @@ void TrySetupMaterialParameter(MaterialInstance* instance, Span par if (StringUtils::CompareIgnoreCase(name, param.GetName().Get()) != 0) continue; param.SetValue(value); + param.SetIsOverride(true); return; } } @@ -1490,14 +1491,26 @@ bool ModelTool::ImportModel(const String& path, ModelData& data, Options& option if (auto* materialInstance = Content::Load(assetPath)) { materialInstance->SetBaseMaterial(options.InstanceToImportAs); + materialInstance->ResetParameters(); // Customize base material based on imported material (blind guess based on the common names used in materials) - const Char* diffuseColorNames[] = { TEXT("color"), TEXT("col"), TEXT("diffuse"), TEXT("basecolor"), TEXT("base color") }; - TrySetupMaterialParameter(materialInstance, ToSpan(diffuseColorNames, ARRAY_COUNT(diffuseColorNames)), material.Diffuse.Color, MaterialParameterType::Color); - const Char* emissiveColorNames[] = { TEXT("emissive"), TEXT("emission"), TEXT("light") }; - TrySetupMaterialParameter(materialInstance, ToSpan(emissiveColorNames, ARRAY_COUNT(emissiveColorNames)), material.Emissive.Color, MaterialParameterType::Color); - const Char* opacityValueNames[] = { TEXT("opacity"), TEXT("alpha") }; - TrySetupMaterialParameter(materialInstance, ToSpan(opacityValueNames, ARRAY_COUNT(opacityValueNames)), material.Opacity.Value, MaterialParameterType::Float); + Texture* tex; +#define TRY_SETUP_TEXTURE_PARAM(component, names, type) if (material.component.TextureIndex != -1 && ((tex = Content::LoadAsync(data.Textures[material.component.TextureIndex].AssetID)))) TrySetupMaterialParameter(materialInstance, ToSpan(names, ARRAY_COUNT(names)), tex, MaterialParameterType::type); + const Char* diffuseNames[] = { TEXT("color"), TEXT("col"), TEXT("diffuse"), TEXT("basecolor"), TEXT("base color"), TEXT("tint") }; + TrySetupMaterialParameter(materialInstance, ToSpan(diffuseNames, ARRAY_COUNT(diffuseNames)), material.Diffuse.Color, MaterialParameterType::Color); + TRY_SETUP_TEXTURE_PARAM(Diffuse, diffuseNames, Texture); + const Char* normalMapNames[] = { TEXT("normals"), TEXT("normalmap"), TEXT("normal map"), TEXT("normal") }; + TRY_SETUP_TEXTURE_PARAM(Normals, normalMapNames, NormalMap); + const Char* emissiveNames[] = { TEXT("emissive"), TEXT("emission"), TEXT("light"), TEXT("glow") }; + TrySetupMaterialParameter(materialInstance, ToSpan(emissiveNames, ARRAY_COUNT(emissiveNames)), material.Emissive.Color, MaterialParameterType::Color); + TRY_SETUP_TEXTURE_PARAM(Emissive, emissiveNames, Texture); + const Char* opacityNames[] = { TEXT("opacity"), TEXT("alpha") }; + TrySetupMaterialParameter(materialInstance, ToSpan(opacityNames, ARRAY_COUNT(opacityNames)), material.Opacity.Value, MaterialParameterType::Float); + TRY_SETUP_TEXTURE_PARAM(Opacity, opacityNames, Texture); + const Char* roughnessNames[] = { TEXT("roughness"), TEXT("rough") }; + TrySetupMaterialParameter(materialInstance, ToSpan(roughnessNames, ARRAY_COUNT(roughnessNames)), material.Roughness.Value, MaterialParameterType::Float); + TRY_SETUP_TEXTURE_PARAM(Roughness, roughnessNames, Texture); +#undef TRY_SETUP_TEXTURE_PARAM materialInstance->Save(); } From 97ab8940f050d203acc54adc8cf89f350cc57e40 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 10 Aug 2024 20:45:59 +0200 Subject: [PATCH 265/292] Fix bug with missing baked SDF data on save when asset data chunk memory was freed by GC --- Source/Engine/Content/Assets/Model.cpp | 6 +++++- Source/Engine/Content/Storage/FlaxChunk.h | 7 ++++++- Source/Engine/Content/Storage/FlaxStorage.cpp | 5 +++-- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/Source/Engine/Content/Assets/Model.cpp b/Source/Engine/Content/Assets/Model.cpp index 22de66263..3da53ea6b 100644 --- a/Source/Engine/Content/Assets/Model.cpp +++ b/Source/Engine/Content/Assets/Model.cpp @@ -682,7 +682,11 @@ bool Model::GenerateSDF(float resolutionScale, int32 lodIndex, bool cacheData, f #if USE_EDITOR // Set asset data if (cacheData) - GetOrCreateChunk(15)->Data.Copy(sdfStream.GetHandle(), sdfStream.GetPosition()); + { + auto chunk = GetOrCreateChunk(15); + chunk->Data.Copy(sdfStream.GetHandle(), sdfStream.GetPosition()); + chunk->Flags |= FlaxChunkFlags::KeepInMemory; // Prevent GC-ing chunk data so it will be properly saved + } #endif return false; diff --git a/Source/Engine/Content/Storage/FlaxChunk.h b/Source/Engine/Content/Storage/FlaxChunk.h index 5281aea28..7aa0daf8e 100644 --- a/Source/Engine/Content/Storage/FlaxChunk.h +++ b/Source/Engine/Content/Storage/FlaxChunk.h @@ -11,7 +11,7 @@ enum class FlaxChunkFlags { /// - /// The none. + /// Nothing. /// None = 0, @@ -19,6 +19,11 @@ enum class FlaxChunkFlags /// Compress chunk data using LZ4 algorithm. /// CompressedLZ4 = 1, + + /// + /// Prevents chunk file data from being unloaded if unused for a certain amount of time. Runtime-only flag, not saved with the asset. + /// + KeepInMemory = 2, }; DECLARE_ENUM_OPERATORS(FlaxChunkFlags); diff --git a/Source/Engine/Content/Storage/FlaxStorage.cpp b/Source/Engine/Content/Storage/FlaxStorage.cpp index c3e377110..29bcd5bfc 100644 --- a/Source/Engine/Content/Storage/FlaxStorage.cpp +++ b/Source/Engine/Content/Storage/FlaxStorage.cpp @@ -943,7 +943,8 @@ bool FlaxStorage::Create(WriteStream* stream, const AssetInitData* data, int32 d { FlaxChunk* chunk = chunks[i]; stream->WriteBytes(&chunk->LocationInFile, sizeof(chunk->LocationInFile)); - stream->WriteInt32((int32)chunk->Flags); + FlaxChunkFlags flags = chunk->Flags & ~(FlaxChunkFlags::KeepInMemory); // Skip saving runtime-only flags + stream->WriteInt32((int32)flags); } #if ASSETS_LOADING_EXTRA_VERIFICATION @@ -1410,7 +1411,7 @@ void FlaxStorage::Tick(double time) { auto chunk = _chunks.Get()[i]; const bool wasUsed = (time - chunk->LastAccessTime) < unusedDataChunksLifetime; - if (!wasUsed && chunk->IsLoaded()) + if (!wasUsed && chunk->IsLoaded() && EnumHasNoneFlags(chunk->Flags, FlaxChunkFlags::KeepInMemory)) { chunk->Unload(); } From 8f18d654ee5a1bab67e4198077c887bdbc70f936 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 10 Aug 2024 20:46:33 +0200 Subject: [PATCH 266/292] Add copy/paste for material instance parameters via context menu on `Parameters` group header --- .../CustomEditors/Dedicated/AnimatedModelEditor.cs | 2 +- Source/Editor/CustomEditors/Editors/GenericEditor.cs | 9 +++++++++ Source/Editor/Surface/SurfaceUtils.cs | 5 +++++ Source/Editor/Windows/Assets/MaterialInstanceWindow.cs | 2 +- 4 files changed, 16 insertions(+), 2 deletions(-) diff --git a/Source/Editor/CustomEditors/Dedicated/AnimatedModelEditor.cs b/Source/Editor/CustomEditors/Dedicated/AnimatedModelEditor.cs index 6f1ac91d0..eae7f9024 100644 --- a/Source/Editor/CustomEditors/Dedicated/AnimatedModelEditor.cs +++ b/Source/Editor/CustomEditors/Dedicated/AnimatedModelEditor.cs @@ -20,7 +20,7 @@ namespace FlaxEditor.CustomEditors.Dedicated // Show instanced parameters to view/edit at runtime if (Values.IsSingleObject && Editor.Instance.StateMachine.IsPlayMode) { - var group = layout.Group("Parameters"); + var group = SurfaceUtils.InitGraphParametersGroup(layout); group.Panel.Open(false); group.Panel.IndexInParent -= 2; diff --git a/Source/Editor/CustomEditors/Editors/GenericEditor.cs b/Source/Editor/CustomEditors/Editors/GenericEditor.cs index f624600b4..4eb523b74 100644 --- a/Source/Editor/CustomEditors/Editors/GenericEditor.cs +++ b/Source/Editor/CustomEditors/Editors/GenericEditor.cs @@ -533,6 +533,15 @@ namespace FlaxEditor.CustomEditors.Editors _groupsPool.Add(groups); } + internal static GroupElement OnGroup(LayoutElementsContainer layout, string name) + { + // Add new group + var group = layout.Group(name); + group.Panel.Tag = group; + group.Panel.MouseButtonRightClicked += OnGroupPanelMouseButtonRightClicked; + return group; + } + internal static LayoutElementsContainer OnGroup(LayoutElementsContainer layout, EditorDisplayAttribute display) { if (display?.Group != null) diff --git a/Source/Editor/Surface/SurfaceUtils.cs b/Source/Editor/Surface/SurfaceUtils.cs index ad77aa510..e323475a8 100644 --- a/Source/Editor/Surface/SurfaceUtils.cs +++ b/Source/Editor/Surface/SurfaceUtils.cs @@ -100,6 +100,11 @@ namespace FlaxEditor.Surface } } + internal static GroupElement InitGraphParametersGroup(LayoutElementsContainer layout) + { + return CustomEditors.Editors.GenericEditor.OnGroup(layout, "Parameters"); + } + internal static GraphParameterData[] InitGraphParameters(IEnumerable parameters, Material material) { int count = parameters.Count(); diff --git a/Source/Editor/Windows/Assets/MaterialInstanceWindow.cs b/Source/Editor/Windows/Assets/MaterialInstanceWindow.cs index 28a4273b3..480b39ff6 100644 --- a/Source/Editor/Windows/Assets/MaterialInstanceWindow.cs +++ b/Source/Editor/Windows/Assets/MaterialInstanceWindow.cs @@ -261,7 +261,7 @@ namespace FlaxEditor.Windows.Assets removeButton.Clicked += OnOverrideAll; } - var parametersGroup = layout.Group("Parameters"); + var parametersGroup = SurfaceUtils.InitGraphParametersGroup(layout); var baseMaterial = materialInstance.BaseMaterial; var material = baseMaterial; if (material) From bcb0200435a6e115265ccfaabade3e3f60015308 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 12 Aug 2024 10:55:27 +0200 Subject: [PATCH 267/292] Fix GPU-based SDF generation to reject negative distance from far away triangles --- Content/Shaders/SDF.flax | 4 ++-- Source/Shaders/SDF.shader | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Content/Shaders/SDF.flax b/Content/Shaders/SDF.flax index 9f9a6a261..ebb5875c6 100644 --- a/Content/Shaders/SDF.flax +++ b/Content/Shaders/SDF.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bd485ffce3c1d002621d795968cfda9c68555600157332dde91618d75881207e -size 7903 +oid sha256:462d122e2c49303b45658b7a0498a847ca8b1a4427db0ece8027e7c2946fdf4d +size 8092 diff --git a/Source/Shaders/SDF.shader b/Source/Shaders/SDF.shader index 7976558c7..6da0877f2 100644 --- a/Source/Shaders/SDF.shader +++ b/Source/Shaders/SDF.shader @@ -142,6 +142,8 @@ void CS_RasterizeTriangle(uint3 DispatchThreadId : SV_DispatchThreadID) int voxelIndex = GetVoxelIndex(voxelCoord); float3 voxelPos = GetVoxelPos(voxelCoord); float distance = SignedDistancePointToTriangle(voxelPos, v0, v1, v2); + if (distance < -10.0f) // TODO: find a better way to reject negative distance from degenerate triangles that break SDF shape + distance = abs(distance); InterlockedMin(SDF[voxelIndex], FloatFlip3(distance)); } } From 63bee0c78d2afc25519a0d4cd48990ab4b99ce5b Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 12 Aug 2024 15:34:08 +0200 Subject: [PATCH 268/292] Add `IndirectLightingIntensity` to `Sky` for GI intensity control --- Source/Engine/Level/Actors/Sky.cpp | 14 ++++++++++++++ Source/Engine/Level/Actors/Sky.h | 13 ++++++++++--- Source/Engine/Level/Actors/Skybox.cpp | 5 +++++ Source/Engine/Level/Actors/Skybox.h | 1 + Source/Engine/Renderer/DrawCall.h | 2 ++ .../GI/DynamicDiffuseGlobalIllumination.cpp | 3 ++- Source/Shaders/GI/DDGI.shader | 4 ++-- 7 files changed, 36 insertions(+), 6 deletions(-) diff --git a/Source/Engine/Level/Actors/Sky.cpp b/Source/Engine/Level/Actors/Sky.cpp index 2e1d46161..1f7a4b33e 100644 --- a/Source/Engine/Level/Actors/Sky.cpp +++ b/Source/Engine/Level/Actors/Sky.cpp @@ -16,6 +16,9 @@ #include "Engine/Graphics/Shaders/GPUShader.h" #include "Engine/Serialization/Serialization.h" #include "Engine/Level/Scene/SceneRendering.h" +#if USE_EDITOR +#include "Engine/Renderer/Lightmaps.h" +#endif GPU_CB_STRUCT(Data { Matrix WVP; @@ -66,6 +69,10 @@ void Sky::InitConfig(ShaderAtmosphericFogData& config) const config.AtmosphericFogSunPower = SunPower; config.AtmosphericFogDensityOffset = 0.0f; +#if USE_EDITOR + if (IsRunningRadiancePass) + config.AtmosphericFogSunPower *= IndirectLightingIntensity; +#endif if (SunLight) { @@ -140,6 +147,7 @@ void Sky::Serialize(SerializeStream& stream, const void* otherObj) SERIALIZE_MEMBER(Sun, SunLight); SERIALIZE(SunDiscScale); SERIALIZE(SunPower); + SERIALIZE(IndirectLightingIntensity); } void Sky::Deserialize(DeserializeStream& stream, ISerializeModifier* modifier) @@ -150,6 +158,7 @@ void Sky::Deserialize(DeserializeStream& stream, ISerializeModifier* modifier) DESERIALIZE_MEMBER(Sun, SunLight); DESERIALIZE(SunDiscScale); DESERIALIZE(SunPower); + DESERIALIZE(IndirectLightingIntensity); } bool Sky::HasContentLoaded() const @@ -204,6 +213,11 @@ bool Sky::IsDynamicSky() const return !IsStatic() || (SunLight && !SunLight->IsStatic()); } +float Sky::GetIndirectLightingIntensity() const +{ + return IndirectLightingIntensity; +} + void Sky::ApplySky(GPUContext* context, RenderContext& renderContext, const Matrix& world) { // Get precomputed cache and bind it to the pipeline diff --git a/Source/Engine/Level/Actors/Sky.h b/Source/Engine/Level/Actors/Sky.h index 313843748..bbc58888f 100644 --- a/Source/Engine/Level/Actors/Sky.h +++ b/Source/Engine/Level/Actors/Sky.h @@ -31,21 +31,27 @@ public: /// /// Directional light that is used to simulate the sun. /// - API_FIELD(Attributes="EditorOrder(10), DefaultValue(null), EditorDisplay(\"Sky\")") + API_FIELD(Attributes="EditorOrder(10), EditorDisplay(\"Sky\")") ScriptingObjectReference SunLight; /// /// The sun disc scale. /// - API_FIELD(Attributes="EditorOrder(20), DefaultValue(2.0f), EditorDisplay(\"Sky\"), Limit(0, 100, 0.01f)") + API_FIELD(Attributes="EditorOrder(20), EditorDisplay(\"Sky\"), Limit(0, 100, 0.01f)") float SunDiscScale = 2.0f; /// /// The sun power. /// - API_FIELD(Attributes="EditorOrder(30), DefaultValue(8.0f), EditorDisplay(\"Sky\"), Limit(0, 1000, 0.01f)") + API_FIELD(Attributes="EditorOrder(30), EditorDisplay(\"Sky\"), Limit(0, 1000, 0.01f)") float SunPower = 8.0f; + /// + /// Controls how much sky will contribute indirect lighting. When set to 0, there is no GI from the sky. The default value is 1. + /// + API_FIELD(Attributes="EditorOrder(40), Limit(0, 100, 0.1f), EditorDisplay(\"Sky\")") + float IndirectLightingIntensity = 1.0f; + private: #if COMPILE_WITH_DEV_ENV void OnShaderReloading(Asset* obj) @@ -76,6 +82,7 @@ public: // [ISkyRenderer] bool IsDynamicSky() const override; + float GetIndirectLightingIntensity() const override; void ApplySky(GPUContext* context, RenderContext& renderContext, const Matrix& world) override; protected: diff --git a/Source/Engine/Level/Actors/Skybox.cpp b/Source/Engine/Level/Actors/Skybox.cpp index e59e3a3e4..51b288c42 100644 --- a/Source/Engine/Level/Actors/Skybox.cpp +++ b/Source/Engine/Level/Actors/Skybox.cpp @@ -94,6 +94,11 @@ bool Skybox::IsDynamicSky() const return !IsStatic(); } +float Skybox::GetIndirectLightingIntensity() const +{ + return 1.0f; +} + void Skybox::ApplySky(GPUContext* context, RenderContext& renderContext, const Matrix& world) { // Prepare mock draw call data diff --git a/Source/Engine/Level/Actors/Skybox.h b/Source/Engine/Level/Actors/Skybox.h index 46adef7ca..240c3b62a 100644 --- a/Source/Engine/Level/Actors/Skybox.h +++ b/Source/Engine/Level/Actors/Skybox.h @@ -70,6 +70,7 @@ public: // [ISkyRenderer] bool IsDynamicSky() const override; + float GetIndirectLightingIntensity() const override; void ApplySky(GPUContext* context, RenderContext& renderContext, const Matrix& world) override; protected: diff --git a/Source/Engine/Renderer/DrawCall.h b/Source/Engine/Renderer/DrawCall.h index 448d1361e..a3324e1f3 100644 --- a/Source/Engine/Renderer/DrawCall.h +++ b/Source/Engine/Renderer/DrawCall.h @@ -40,6 +40,8 @@ public: /// virtual bool IsDynamicSky() const = 0; + virtual float GetIndirectLightingIntensity() const = 0; + /// /// Apply sky material/shader state to the GPU pipeline with custom parameters set (render to GBuffer). /// diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp index 1169d7ff6..d615e2515 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp @@ -60,7 +60,7 @@ GPU_CB_STRUCT(Data0 { GlobalSurfaceAtlasPass::ConstantsData GlobalSurfaceAtlas; ShaderGBufferData GBuffer; Float4 RaysRotation; - float Padding0; + float SkyboxIntensity; uint32 ProbesCount; float ResetBlend; float TemporalTime; @@ -533,6 +533,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont } data.TemporalTime = renderContext.List->Setup.UseTemporalAAJitter ? RenderTools::ComputeTemporalTime() : 0.0f; data.ViewDir = renderContext.View.Direction; + data.SkyboxIntensity = renderContext.List->Sky ? renderContext.List->Sky->GetIndirectLightingIntensity() : 1.0f; GBufferPass::SetInputs(renderContext.View, data.GBuffer); context->UpdateCB(_cb0, &data); context->BindCB(0, _cb0); diff --git a/Source/Shaders/GI/DDGI.shader b/Source/Shaders/GI/DDGI.shader index 89d0fdea3..7a58b03df 100644 --- a/Source/Shaders/GI/DDGI.shader +++ b/Source/Shaders/GI/DDGI.shader @@ -35,7 +35,7 @@ GlobalSDFData GlobalSDF; GlobalSurfaceAtlasData GlobalSurfaceAtlas; GBufferData GBuffer; float4 RaysRotation; -float Padding0; +float SkyboxIntensity; uint ProbesCount; float ResetBlend; float TemporalTime; @@ -397,7 +397,7 @@ void CS_TraceRays(uint3 DispatchThreadId : SV_DispatchThreadID) else { // Ray hits sky - radiance.rgb = Skybox.SampleLevel(SamplerLinearClamp, probeRayDirection, 0).rgb; + radiance.rgb = Skybox.SampleLevel(SamplerLinearClamp, probeRayDirection, 0).rgb * SkyboxIntensity; radiance.a = 1e27f; // Sky is the limit } From 824d6701d82df864cead332f5109987ffebf422d Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 12 Aug 2024 15:34:39 +0200 Subject: [PATCH 269/292] Add improved default value detection for scripting object references in bindings generator --- .../Bindings/BindingsGenerator.CSharp.cs | 15 ++++++++++++++- Source/Tools/Flax.Build/Bindings/ClassInfo.cs | 2 ++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/Source/Tools/Flax.Build/Bindings/BindingsGenerator.CSharp.cs b/Source/Tools/Flax.Build/Bindings/BindingsGenerator.CSharp.cs index 2b97a7168..7d8ba7d43 100644 --- a/Source/Tools/Flax.Build/Bindings/BindingsGenerator.CSharp.cs +++ b/Source/Tools/Flax.Build/Bindings/BindingsGenerator.CSharp.cs @@ -102,8 +102,22 @@ namespace Flax.Build.Bindings private static string GenerateCSharpDefaultValueNativeToManaged(BuildData buildData, string value, ApiTypeInfo caller, TypeInfo valueType = null, bool attribute = false, string managedType = null) { + ApiTypeInfo apiType = null; if (string.IsNullOrEmpty(value)) + { + if (attribute && valueType != null && !valueType.IsArray) + { + //if (valueType.Type == "") + //ScriptingObjectReference + apiType = FindApiTypeInfo(buildData, valueType, caller); + + // Object reference + if (apiType != null && apiType.IsScriptingObject) + return "null"; + } + return null; + } // Special case for Engine TEXT macro if (value.StartsWith("TEXT(\"") && value.EndsWith("\")")) @@ -150,7 +164,6 @@ namespace Flax.Build.Bindings value = value.Replace("::", "."); var dot = value.LastIndexOf('.'); - ApiTypeInfo apiType = null; if (dot != -1) { var type = new TypeInfo(value.Substring(0, dot)); diff --git a/Source/Tools/Flax.Build/Bindings/ClassInfo.cs b/Source/Tools/Flax.Build/Bindings/ClassInfo.cs index 021b47989..ace04a2e0 100644 --- a/Source/Tools/Flax.Build/Bindings/ClassInfo.cs +++ b/Source/Tools/Flax.Build/Bindings/ClassInfo.cs @@ -17,6 +17,8 @@ namespace Flax.Build.Bindings "ScriptingObject", "ManagedScriptingObject", "PersistentScriptingObject", + "ScriptingObjectReference", + "AssetReference", "BinaryAsset", "SceneObject", "Asset", From fbd3b9372bfda4c7379d1fd7a19c13483249dc29 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 12 Aug 2024 15:35:02 +0200 Subject: [PATCH 270/292] Fix crash when reading invalid data from file --- Source/Engine/Content/Storage/FlaxStorage.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Source/Engine/Content/Storage/FlaxStorage.cpp b/Source/Engine/Content/Storage/FlaxStorage.cpp index 29bcd5bfc..1af7c03ec 100644 --- a/Source/Engine/Content/Storage/FlaxStorage.cpp +++ b/Source/Engine/Content/Storage/FlaxStorage.cpp @@ -1088,7 +1088,7 @@ bool FlaxStorage::LoadAssetHeader(const Entry& e, AssetInitData& data) { int32 chunkIndex; stream->ReadInt32(&chunkIndex); - if (chunkIndex >= _chunks.Count()) + if (chunkIndex < -1 || chunkIndex >= _chunks.Count()) { LOG(Warning, "Invalid chunks mapping."); return true; @@ -1145,7 +1145,7 @@ bool FlaxStorage::LoadAssetHeader(const Entry& e, AssetInitData& data) { int32 chunkIndex; stream->ReadInt32(&chunkIndex); - if (chunkIndex >= _chunks.Count()) + if (chunkIndex < -1 || chunkIndex >= _chunks.Count()) { LOG(Warning, "Invalid chunks mapping."); return true; @@ -1200,7 +1200,7 @@ bool FlaxStorage::LoadAssetHeader(const Entry& e, AssetInitData& data) { int32 chunkIndex; stream->ReadInt32(&chunkIndex); - if (chunkIndex >= _chunks.Count()) + if (chunkIndex < -1 || chunkIndex >= _chunks.Count()) { LOG(Warning, "Invalid chunks mapping."); return true; From f025f923ae3a7cea3a2842f00adf35a44f64df75 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 12 Aug 2024 15:35:29 +0200 Subject: [PATCH 271/292] Increase limit on Global Surface Atlas objects culling to prevent artifacts in dense scenes --- Source/Shaders/GI/GlobalSurfaceAtlas.shader | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Shaders/GI/GlobalSurfaceAtlas.shader b/Source/Shaders/GI/GlobalSurfaceAtlas.shader index f8066549f..0d9532041 100644 --- a/Source/Shaders/GI/GlobalSurfaceAtlas.shader +++ b/Source/Shaders/GI/GlobalSurfaceAtlas.shader @@ -203,7 +203,7 @@ RWByteAddressBuffer RWGlobalSurfaceAtlasCulledObjects : register(u1); Buffer GlobalSurfaceAtlasObjects : register(t0); Buffer GlobalSurfaceAtlasObjectsList : register(t1); -#define GLOBAL_SURFACE_ATLAS_SHARED_CULL_SIZE 255 // Limit of objects that can be culled for a whole group of 4x4x4 threads (64 chunks) +#define GLOBAL_SURFACE_ATLAS_SHARED_CULL_SIZE 511 // Limit of objects that can be culled for a whole group of 4x4x4 threads (64 chunks) groupshared uint SharedCulledObjectsCount; groupshared uint SharedCulledObjects[GLOBAL_SURFACE_ATLAS_SHARED_CULL_SIZE]; From 596ad18ce360aa0e5aad53685e9b7edf19474269 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 12 Aug 2024 18:32:33 +0200 Subject: [PATCH 272/292] Add smooth light brightness fade out at last 10% of `ViewDistance` (if used) --- Source/Engine/Level/Actors/DirectionalLight.cpp | 4 ++-- Source/Engine/Level/Actors/Light.h | 15 ++++++++++++++- Source/Engine/Level/Actors/PointLight.cpp | 4 ++-- Source/Engine/Level/Actors/SpotLight.cpp | 4 ++-- 4 files changed, 20 insertions(+), 7 deletions(-) diff --git a/Source/Engine/Level/Actors/DirectionalLight.cpp b/Source/Engine/Level/Actors/DirectionalLight.cpp index 97f9c3b58..5e52b4e74 100644 --- a/Source/Engine/Level/Actors/DirectionalLight.cpp +++ b/Source/Engine/Level/Actors/DirectionalLight.cpp @@ -18,11 +18,11 @@ void DirectionalLight::Draw(RenderContext& renderContext) { float brightness = Brightness; AdjustBrightness(renderContext.View, brightness); - const Float3 position = GetPosition() - renderContext.View.Origin; + Float3 position; if (Brightness > ZeroTolerance && EnumHasAnyFlags(renderContext.View.Flags, ViewFlags::DirectionalLights) && EnumHasAnyFlags(renderContext.View.Pass, DrawPass::GBuffer) - && (ViewDistance < ZeroTolerance || Float3::DistanceSquared(renderContext.View.Position, position) < ViewDistance * ViewDistance)) + && CheckViewDistance(renderContext.View.Position, renderContext.View.Origin, position, brightness)) { RenderDirectionalLightData data; data.Position = position; diff --git a/Source/Engine/Level/Actors/Light.h b/Source/Engine/Level/Actors/Light.h index 9c3d6ff73..3138d996f 100644 --- a/Source/Engine/Level/Actors/Light.h +++ b/Source/Engine/Level/Actors/Light.h @@ -29,7 +29,7 @@ public: float Brightness = 3.14f; /// - /// Controls light visibility range. The distance at which the light becomes completely faded. Use a value of 0 to always draw light. + /// Controls light visibility range. The distance at which the light becomes completely faded (blend happens on the last 10% of that range). Use a value of 0 to always draw light. /// API_FIELD(Attributes="EditorOrder(35), Limit(0, float.MaxValue, 10.0f), EditorDisplay(\"Light\")") float ViewDistance = 0.0f; @@ -56,6 +56,19 @@ protected: // Adjust the light brightness used during rendering (called by light types inside SetupLightData callback) void AdjustBrightness(const RenderView& view, float& brightness) const; + FORCE_INLINE bool CheckViewDistance(const Float3& viewPosition, const Float3& viewOrigin, Float3& position, float& brightness) const + { + position = _transform.Translation - viewOrigin; + if (ViewDistance > ZeroTolerance) + { + const float dst2 = Vector3::DistanceSquared(viewPosition, position); + const float dst = Math::Sqrt(dst2); + brightness *= Math::Remap(dst, 0.9f * ViewDistance, ViewDistance, 1.0f, 0.0f); + return dst < ViewDistance; + } + return true; + } + public: // [Actor] void OnEnable() override; diff --git a/Source/Engine/Level/Actors/PointLight.cpp b/Source/Engine/Level/Actors/PointLight.cpp index 3c85be612..fbe3fd9e6 100644 --- a/Source/Engine/Level/Actors/PointLight.cpp +++ b/Source/Engine/Level/Actors/PointLight.cpp @@ -82,13 +82,13 @@ void PointLight::Draw(RenderContext& renderContext) { float brightness = ComputeBrightness(); AdjustBrightness(renderContext.View, brightness); - const Float3 position = GetPosition() - renderContext.View.Origin; + Float3 position; const float radius = GetScaledRadius(); if (EnumHasAnyFlags(renderContext.View.Flags, ViewFlags::PointLights) && EnumHasAnyFlags(renderContext.View.Pass, DrawPass::GBuffer) && brightness > ZeroTolerance && radius > ZeroTolerance - && (ViewDistance < ZeroTolerance || Vector3::DistanceSquared(renderContext.View.Position, position) < ViewDistance * ViewDistance)) + && CheckViewDistance(renderContext.View.Position, renderContext.View.Origin, position, brightness)) { RenderPointLightData data; data.Position = position; diff --git a/Source/Engine/Level/Actors/SpotLight.cpp b/Source/Engine/Level/Actors/SpotLight.cpp index 38457c94e..9494c14fa 100644 --- a/Source/Engine/Level/Actors/SpotLight.cpp +++ b/Source/Engine/Level/Actors/SpotLight.cpp @@ -130,7 +130,7 @@ void SpotLight::Draw(RenderContext& renderContext) { float brightness = ComputeBrightness(); AdjustBrightness(renderContext.View, brightness); - const Float3 position = GetPosition() - renderContext.View.Origin; + Float3 position; const float radius = GetScaledRadius(); const float outerConeAngle = GetOuterConeAngle(); if (EnumHasAnyFlags(renderContext.View.Flags, ViewFlags::SpotLights) @@ -138,7 +138,7 @@ void SpotLight::Draw(RenderContext& renderContext) && brightness > ZeroTolerance && radius > ZeroTolerance && outerConeAngle > ZeroTolerance - && (ViewDistance < ZeroTolerance || Vector3::DistanceSquared(renderContext.View.Position, position) < ViewDistance * ViewDistance)) + && CheckViewDistance(renderContext.View.Position, renderContext.View.Origin, position, brightness)) { RenderSpotLightData data; data.Position = position; From 75a97a30fe9756f6d4529cd4f5ffa4a244926484 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 12 Aug 2024 19:16:16 +0200 Subject: [PATCH 273/292] Update assets --- Content/Shaders/GI/DDGI.flax | 4 ++-- Content/Shaders/GI/GlobalSurfaceAtlas.flax | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Content/Shaders/GI/DDGI.flax b/Content/Shaders/GI/DDGI.flax index 72e02aebb..8fe864af0 100644 --- a/Content/Shaders/GI/DDGI.flax +++ b/Content/Shaders/GI/DDGI.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:571c028a427297ae250a600539291ffb5e830c81cd6b4998f2c713108fa2fb27 -size 32878 +oid sha256:ed501143275171e50f263407114875e70da9915bd3ec773b80744d82a57f6e49 +size 32903 diff --git a/Content/Shaders/GI/GlobalSurfaceAtlas.flax b/Content/Shaders/GI/GlobalSurfaceAtlas.flax index 184f9f0d2..fa75716d9 100644 --- a/Content/Shaders/GI/GlobalSurfaceAtlas.flax +++ b/Content/Shaders/GI/GlobalSurfaceAtlas.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:92cb1700c5bff4146d16c277112d741606c6292e50ed52b0ed27108da8976b00 +oid sha256:c87a431fc7230f3d345ce131fdb1727ffe45874b55ed062feff73aabed514f59 size 13194 From e7f1afdfaa00726527c6299d750d77aca929188e Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 15 Aug 2024 12:04:39 +0200 Subject: [PATCH 274/292] Fix GlobalSDF tracing artifacts in reflections by using occlusion bias relative to each cascade voxel size #2743 --- .../Editor/MaterialTemplates/Features/SDFReflections.hlsl | 5 ++--- Content/Shaders/SSR.flax | 4 ++-- Source/Shaders/SSR.shader | 5 ++--- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/Content/Editor/MaterialTemplates/Features/SDFReflections.hlsl b/Content/Editor/MaterialTemplates/Features/SDFReflections.hlsl index 193d845fc..cc5a1a0ba 100644 --- a/Content/Editor/MaterialTemplates/Features/SDFReflections.hlsl +++ b/Content/Editor/MaterialTemplates/Features/SDFReflections.hlsl @@ -21,9 +21,8 @@ bool TraceSDFSoftwareReflections(GBufferSample gBuffer, float3 reflectWS, out fl { GlobalSDFTrace sdfTrace; float maxDistance = GLOBAL_SDF_WORLD_SIZE; - float selfOcclusionBias = GlobalSDF.CascadeVoxelSize[0]; - sdfTrace.Init(gBuffer.WorldPos + gBuffer.Normal * selfOcclusionBias, reflectWS, 0.0f, maxDistance); - GlobalSDFHit sdfHit = RayTraceGlobalSDF(GlobalSDF, GlobalSDFTex, GlobalSDFMip, sdfTrace); + sdfTrace.Init(gBuffer.WorldPos, reflectWS, 0.0f, maxDistance); + GlobalSDFHit sdfHit = RayTraceGlobalSDF(GlobalSDF, GlobalSDFTex, GlobalSDFMip, sdfTrace, 2.0f); if (sdfHit.IsHit()) { float3 hitPosition = sdfHit.GetHitPosition(sdfTrace); diff --git a/Content/Shaders/SSR.flax b/Content/Shaders/SSR.flax index e60bd83b0..98a83c0a3 100644 --- a/Content/Shaders/SSR.flax +++ b/Content/Shaders/SSR.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d75cdf10a4e4f142a8704514dac715b9c410952cdeb083313d4c20ba0ca0caa2 -size 11154 +oid sha256:0911c13d583d7cfef690261f113c84d20820bb95dac57850f15444ebd06d32fb +size 11095 diff --git a/Source/Shaders/SSR.shader b/Source/Shaders/SSR.shader index 7a468aaa5..044aea978 100644 --- a/Source/Shaders/SSR.shader +++ b/Source/Shaders/SSR.shader @@ -140,9 +140,8 @@ float4 PS_RayTracePass(Quad_VS2PS input) : SV_Target0 GlobalSDFTrace sdfTrace; float maxDistance = GLOBAL_SDF_WORLD_SIZE; - float selfOcclusionBias = GlobalSDF.CascadeVoxelSize[0]; - sdfTrace.Init(gBuffer.WorldPos + gBuffer.Normal * selfOcclusionBias, reflectWS, 0.0f, maxDistance); - GlobalSDFHit sdfHit = RayTraceGlobalSDF(GlobalSDF, GlobalSDFTex, GlobalSDFMip, sdfTrace); + sdfTrace.Init(gBuffer.WorldPos, reflectWS, 0.0f, maxDistance); + GlobalSDFHit sdfHit = RayTraceGlobalSDF(GlobalSDF, GlobalSDFTex, GlobalSDFMip, sdfTrace, 2.0f); if (sdfHit.IsHit()) { float3 hitPosition = sdfHit.GetHitPosition(sdfTrace); From 330d769bdcd763a352c1648eec9160a8ac56ce80 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 15 Aug 2024 23:27:00 +0200 Subject: [PATCH 275/292] Optimize `SceneObjectsFactory::SetupPrefabInstances` when loading large scenes --- Source/Engine/Level/SceneObjectsFactory.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/Source/Engine/Level/SceneObjectsFactory.cpp b/Source/Engine/Level/SceneObjectsFactory.cpp index dd79f1481..6f7b672fa 100644 --- a/Source/Engine/Level/SceneObjectsFactory.cpp +++ b/Source/Engine/Level/SceneObjectsFactory.cpp @@ -426,6 +426,7 @@ void SceneObjectsFactory::SetupPrefabInstances(Context& context, const PrefabSyn PROFILE_CPU_NAMED("SetupPrefabInstances"); const int32 count = data.Data.Size(); ASSERT(count <= data.SceneObjects.Count()); + Dictionary parentIdsLookup; for (int32 i = 0; i < count; i++) { const auto& stream = data.Data[i]; @@ -435,14 +436,19 @@ void SceneObjectsFactory::SetupPrefabInstances(Context& context, const PrefabSyn if (!JsonTools::GetGuidIfValid(prefabId, stream, "PrefabID")) continue; Guid parentId = JsonTools::GetGuid(stream, "ParentID"); - for (int32 j = i - 1; j >= 0; j--) + if (!parentIdsLookup.TryGet(parentId, parentId)) { - // Find ID of the parent to this object (use data in json for relationship) - if (parentId == JsonTools::GetGuid(data.Data[j], "ID") && data.SceneObjects[j]) + Guid parentIdKep = parentId; + for (int32 j = i - 1; j >= 0; j--) { - parentId = data.SceneObjects[j]->GetID(); - break; + // Find ID of the parent to this object (use data in json for relationship) + if (parentId == JsonTools::GetGuid(data.Data[j], "ID") && data.SceneObjects[j]) + { + parentId = data.SceneObjects[j]->GetID(); + break; + } } + parentIdsLookup.Add(parentIdKep, parentId); } const SceneObject* obj = data.SceneObjects[i]; const Guid id = obj ? obj->GetID() : JsonTools::GetGuid(stream, "ID"); From c4c25fc21fbaa6b89790643164c6d2c48f859498 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 16 Aug 2024 14:49:43 +0200 Subject: [PATCH 276/292] Optimize `SceneObjectsFactory::SynchronizeNewPrefabInstances` when loading large scenes --- Source/Engine/Level/Prefabs/Prefab.cpp | 6 ++++ Source/Engine/Level/Prefabs/Prefab.h | 5 +++ Source/Engine/Level/SceneObjectsFactory.cpp | 36 ++++++++++++--------- 3 files changed, 31 insertions(+), 16 deletions(-) diff --git a/Source/Engine/Level/Prefabs/Prefab.cpp b/Source/Engine/Level/Prefabs/Prefab.cpp index 066ae04be..b85be7287 100644 --- a/Source/Engine/Level/Prefabs/Prefab.cpp +++ b/Source/Engine/Level/Prefabs/Prefab.cpp @@ -148,6 +148,10 @@ Asset::LoadResult Prefab::loadAsset() ObjectsDataCache.Add(objectId, &objData); ObjectsCount++; + Guid parentID; + if (JsonTools::GetGuidIfValid(parentID, objData, "ParentID")) + ObjectsHierarchyCache[parentID].Add(objectId); + Guid prefabId = JsonTools::GetGuid(objData, "PrefabID"); if (prefabId.IsValid() && !NestedPrefabs.Contains(prefabId)) { @@ -186,6 +190,8 @@ void Prefab::unload(bool isReloading) NestedPrefabs.Resize(0); ObjectsDataCache.Clear(); ObjectsDataCache.SetCapacity(0); + ObjectsHierarchyCache.Clear(); + ObjectsHierarchyCache.SetCapacity(0); ObjectsCache.Clear(); ObjectsCache.SetCapacity(0); if (_defaultInstance) diff --git a/Source/Engine/Level/Prefabs/Prefab.h b/Source/Engine/Level/Prefabs/Prefab.h index f5339c9ec..5b7de8637 100644 --- a/Source/Engine/Level/Prefabs/Prefab.h +++ b/Source/Engine/Level/Prefabs/Prefab.h @@ -41,6 +41,11 @@ public: /// Dictionary ObjectsDataCache; + /// + /// The object hierarchy cache that maps the PrefabObjectID into the list of children (identified also by PrefabObjectID). Objects without any children are not included for sake of optimization. Used for quick validation of the structure of loaded prefab instances. Valid only if asset is loaded. + /// + Dictionary> ObjectsHierarchyCache; + /// /// The objects cache maps the id of the object contained in the prefab asset (actor or script) to the default instance deserialized from prefab data. Valid only if asset is loaded and GetDefaultInstance was called. /// diff --git a/Source/Engine/Level/SceneObjectsFactory.cpp b/Source/Engine/Level/SceneObjectsFactory.cpp index 6f7b672fa..520154b28 100644 --- a/Source/Engine/Level/SceneObjectsFactory.cpp +++ b/Source/Engine/Level/SceneObjectsFactory.cpp @@ -719,26 +719,21 @@ void SceneObjectsFactory::SynchronizePrefabInstances(Context& context, PrefabSyn void SceneObjectsFactory::SynchronizeNewPrefabInstances(Context& context, PrefabSyncData& data, Prefab* prefab, Actor* actor, const Guid& actorPrefabObjectId, int32 i, const ISerializable::DeserializeStream& stream) { - // Check for RemovedObjects list + // Use cached acceleration structure for prefab hierarchy validation + const auto* hierarchy = prefab->ObjectsHierarchyCache.TryGet(actorPrefabObjectId); + if (!hierarchy) + return; + const Guid actorId = actor->GetID(); const auto removedObjects = SERIALIZE_FIND_MEMBER(stream, "RemovedObjects"); - - // Check if the given actor has new children or scripts added (inside the prefab that it uses) - // TODO: consider caching prefab objects structure maybe to boost this logic? - for (auto it = prefab->ObjectsDataCache.Begin(); it.IsNotEnd(); ++it) + for (const Guid& prefabObjectId : *hierarchy) { - // Use only objects that are linked to the current actor - const Guid parentId = JsonTools::GetGuid(*it->Value, "ParentID"); - if (parentId != actorPrefabObjectId) - continue; - // Skip if object was marked to be removed per instance - const Guid prefabObjectId = JsonTools::GetGuid(*it->Value, "ID"); if (removedObjects != stream.MemberEnd()) { auto& list = removedObjects->value; - const int32 size = static_cast(list.Size()); + const rapidjson::SizeType size = list.Size(); bool removed = false; - for (int32 j = 0; j < size; j++) + for (rapidjson::SizeType j = 0; j < size; j++) { if (JsonTools::GetGuid(list[j]) == prefabObjectId) { @@ -754,10 +749,19 @@ void SceneObjectsFactory::SynchronizeNewPrefabInstances(Context& context, Prefab bool spawned = false; int32 childSearchStart = i + 1; // Objects are serialized with parent followed by its children int32 instanceIndex = -1; - if (context.ObjectToInstance.TryGet(actor->GetID(), instanceIndex) && context.Instances[instanceIndex].Prefab == prefab) + if (context.ObjectToInstance.TryGet(actorId, instanceIndex) && context.Instances[instanceIndex].Prefab == prefab) { + // Quickly check if that object exists + auto& prefabInstance = context.Instances[instanceIndex]; + Guid id; + int32 idInstanceIndex; + if (prefabInstance.IdsMapping.TryGet(prefabObjectId, id) && + context.ObjectToInstance.TryGet(id, idInstanceIndex) && + idInstanceIndex == instanceIndex) + continue; + // Start searching from the beginning of that prefab instance (eg. in case prefab objects were reordered) - childSearchStart = Math::Min(childSearchStart, context.Instances[instanceIndex].StatIndex); + childSearchStart = Math::Min(childSearchStart, prefabInstance.StatIndex); } for (int32 j = childSearchStart; j < data.InitialCount; j++) { @@ -773,7 +777,7 @@ void SceneObjectsFactory::SynchronizeNewPrefabInstances(Context& context, Prefab // Map prefab object ID to this actor's prefab instance so the new objects gets added to it context.SetupIdsMapping(actor, data.Modifier); - data.Modifier->IdsMapping[actorPrefabObjectId] = actor->GetID(); + data.Modifier->IdsMapping[actorPrefabObjectId] = actorId; Scripting::ObjectsLookupIdMapping.Set(&data.Modifier->IdsMapping); // Create instance (including all children) From 4a90663b1c601ee4ffd55fc98c5feb6f7c13dd80 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 16 Aug 2024 15:03:04 +0200 Subject: [PATCH 277/292] Fix post merge --- Source/Engine/Audio/AudioSource.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Engine/Audio/AudioSource.cpp b/Source/Engine/Audio/AudioSource.cpp index 7a55c6f45..a82b0a717 100644 --- a/Source/Engine/Audio/AudioSource.cpp +++ b/Source/Engine/Audio/AudioSource.cpp @@ -404,7 +404,7 @@ void AudioSource::Update() if (!UseStreaming() && Math::NearEqual(GetTime(), 0.0f) && _isActuallyPlayingSth && !_startingToPlay) { int32 queuedBuffers; - AudioBackend::Source::GetQueuedBuffersCount(this, queuedBuffers); + AudioBackend::Source::GetQueuedBuffersCount(SourceID, queuedBuffers); if (queuedBuffers) { if (GetIsLooping()) From cb106be710456e5b638abdaa31d018ef882ddfb8 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 16 Aug 2024 22:40:02 +0200 Subject: [PATCH 278/292] Fix importing compressed textures with `DirectXTex` with resizing --- .../TextureTool/TextureTool.DirectXTex.cpp | 63 ++++++++++++++++--- 1 file changed, 53 insertions(+), 10 deletions(-) diff --git a/Source/Engine/Tools/TextureTool/TextureTool.DirectXTex.cpp b/Source/Engine/Tools/TextureTool/TextureTool.DirectXTex.cpp index 31b20fea2..2f20cd503 100644 --- a/Source/Engine/Tools/TextureTool/TextureTool.DirectXTex.cpp +++ b/Source/Engine/Tools/TextureTool/TextureTool.DirectXTex.cpp @@ -50,6 +50,39 @@ namespace return static_cast(format); } + FORCE_INLINE DXGI_FORMAT ToDecompressFormat(const DXGI_FORMAT format) + { + switch (format) + { + case DXGI_FORMAT_BC1_TYPELESS: + case DXGI_FORMAT_BC2_TYPELESS: + case DXGI_FORMAT_BC3_TYPELESS: + return DXGI_FORMAT_R8G8B8A8_TYPELESS; + case DXGI_FORMAT_BC1_UNORM: + case DXGI_FORMAT_BC2_UNORM: + case DXGI_FORMAT_BC3_UNORM: + return DXGI_FORMAT_R8G8B8A8_UNORM; + case DXGI_FORMAT_BC1_UNORM_SRGB: + case DXGI_FORMAT_BC2_UNORM_SRGB: + case DXGI_FORMAT_BC3_UNORM_SRGB: + return DXGI_FORMAT_R8G8B8A8_UNORM_SRGB; + case DXGI_FORMAT_BC4_TYPELESS: + return DXGI_FORMAT_R8_TYPELESS; + case DXGI_FORMAT_BC4_UNORM: + return DXGI_FORMAT_R8_UNORM; + case DXGI_FORMAT_BC4_SNORM: + return DXGI_FORMAT_R8_SNORM; + case DXGI_FORMAT_BC5_TYPELESS: + return DXGI_FORMAT_R8G8_TYPELESS; + case DXGI_FORMAT_BC5_UNORM: + return DXGI_FORMAT_R8G8_UNORM; + case DXGI_FORMAT_BC5_SNORM: + return DXGI_FORMAT_R8G8_SNORM; + default: + return DXGI_FORMAT_R16G16B16A16_FLOAT; + } + } + HRESULT Compress(const DirectX::Image* srcImages, size_t nimages, const DirectX::TexMetadata& metadata, DXGI_FORMAT format, DirectX::TEX_COMPRESS_FLAGS compress, float threshold, DirectX::ScratchImage& cImages) { #if USE_EDITOR @@ -627,8 +660,6 @@ bool TextureTool::ImportTextureDirectXTex(ImageType type, const StringView& path int32 height = Math::Clamp(options.Resize ? options.SizeY : static_cast(sourceHeight * options.Scale), 1, options.MaxSize); if (sourceWidth != width || sourceHeight != height) { - auto& tmpImg = GET_TMP_IMG(); - // During resizing we need to keep texture aspect ratio const bool keepAspectRatio = false; // TODO: expose as import option if (keepAspectRatio) @@ -642,15 +673,27 @@ bool TextureTool::ImportTextureDirectXTex(ImageType type, const StringView& path // Resize source texture LOG(Info, "Resizing texture from {0}x{1} to {2}x{3}.", sourceWidth, sourceHeight, width, height); - result = DirectX::Resize(*currentImage->GetImages(), width, height, DirectX::TEX_FILTER_LINEAR | DirectX::TEX_FILTER_SEPARATE_ALPHA, tmpImg); - if (FAILED(result)) + if (DirectX::IsCompressed(currentImage->GetMetadata().format)) { - errorMsg = String::Format(TEXT("Cannot resize texture, error: {0:x}"), static_cast(result)); - return true; + auto& tmpImg = GET_TMP_IMG(); + result = Decompress(currentImage->GetImages(), currentImage->GetImageCount(), currentImage->GetMetadata(), ToDecompressFormat(currentImage->GetMetadata().format), tmpImg); + if (FAILED(result)) + { + errorMsg = String::Format(TEXT("Cannot decompress texture, error: {0:x}"), static_cast(result)); + return true; + } + SET_CURRENT_IMG(tmpImg); + } + { + auto& tmpImg = GET_TMP_IMG(); + result = DirectX::Resize(*currentImage->GetImages(), width, height, DirectX::TEX_FILTER_LINEAR | DirectX::TEX_FILTER_SEPARATE_ALPHA, tmpImg); + if (FAILED(result)) + { + errorMsg = String::Format(TEXT("Cannot resize texture, error: {0:x}"), static_cast(result)); + return true; + } + SET_CURRENT_IMG(tmpImg); } - - // Use converted image - SET_CURRENT_IMG(tmpImg); } // Cache data @@ -704,7 +747,7 @@ bool TextureTool::ImportTextureDirectXTex(ImageType type, const StringView& path if (!keepAsIs && DirectX::IsCompressed(sourceDxgiFormat)) { auto& tmpImg = GET_TMP_IMG(); - sourceDxgiFormat = DXGI_FORMAT_R16G16B16A16_FLOAT; + sourceDxgiFormat = ToDecompressFormat(sourceDxgiFormat); result = Decompress(currentImage->GetImages(), currentImage->GetImageCount(), currentImage->GetMetadata(), sourceDxgiFormat, tmpImg); if (FAILED(result)) { From ed9aaba05570c6a427af9761a094ca31d4e8ae9f Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 17 Aug 2024 00:10:03 +0200 Subject: [PATCH 279/292] Fix profiler window tabs layout for charts --- Source/Editor/Windows/Profiler/Assets.cs | 2 ++ Source/Editor/Windows/Profiler/CPU.cs | 2 ++ Source/Editor/Windows/Profiler/GPU.cs | 2 ++ Source/Editor/Windows/Profiler/MemoryGPU.cs | 4 +++- Source/Editor/Windows/Profiler/SingleChart.cs | 3 ++- 5 files changed, 11 insertions(+), 2 deletions(-) diff --git a/Source/Editor/Windows/Profiler/Assets.cs b/Source/Editor/Windows/Profiler/Assets.cs index d5b735fa3..3cd66e4eb 100644 --- a/Source/Editor/Windows/Profiler/Assets.cs +++ b/Source/Editor/Windows/Profiler/Assets.cs @@ -50,6 +50,8 @@ namespace FlaxEditor.Windows.Profiler { Title = "Assets Memory Usage (CPU)", AnchorPreset = AnchorPresets.HorizontalStretchTop, + Offsets = Margin.Zero, + Height = SingleChart.DefaultHeight, FormatSample = v => Utilities.Utils.FormatBytesCount((int)v), Parent = mainPanel, }; diff --git a/Source/Editor/Windows/Profiler/CPU.cs b/Source/Editor/Windows/Profiler/CPU.cs index 5246957f6..ff9b0e253 100644 --- a/Source/Editor/Windows/Profiler/CPU.cs +++ b/Source/Editor/Windows/Profiler/CPU.cs @@ -69,6 +69,8 @@ namespace FlaxEditor.Windows.Profiler { Title = "Update", AnchorPreset = AnchorPresets.HorizontalStretchTop, + Offsets = Margin.Zero, + Height = SingleChart.DefaultHeight, FormatSample = v => (Mathf.RoundToInt(v * 10.0f) / 10.0f) + " ms", Parent = mainPanel, }; diff --git a/Source/Editor/Windows/Profiler/GPU.cs b/Source/Editor/Windows/Profiler/GPU.cs index dd90f891b..7b837768f 100644 --- a/Source/Editor/Windows/Profiler/GPU.cs +++ b/Source/Editor/Windows/Profiler/GPU.cs @@ -37,6 +37,8 @@ namespace FlaxEditor.Windows.Profiler { Title = "Draw (CPU)", AnchorPreset = AnchorPresets.HorizontalStretchTop, + Offsets = Margin.Zero, + Height = SingleChart.DefaultHeight, FormatSample = v => (Mathf.RoundToInt(v * 10.0f) / 10.0f) + " ms", Parent = mainPanel, }; diff --git a/Source/Editor/Windows/Profiler/MemoryGPU.cs b/Source/Editor/Windows/Profiler/MemoryGPU.cs index 00f24821a..dd3c6d6ea 100644 --- a/Source/Editor/Windows/Profiler/MemoryGPU.cs +++ b/Source/Editor/Windows/Profiler/MemoryGPU.cs @@ -51,7 +51,9 @@ namespace FlaxEditor.Windows.Profiler { Title = "GPU Memory Usage", AnchorPreset = AnchorPresets.HorizontalStretchTop, - FormatSample = v => Utilities.Utils.FormatBytesCount((int)v), + Offsets = Margin.Zero, + Height = SingleChart.DefaultHeight, + FormatSample = v => Utilities.Utils.FormatBytesCount((ulong)v), Parent = mainPanel, }; _memoryUsageChart.SelectedSampleChanged += OnSelectedSampleChanged; diff --git a/Source/Editor/Windows/Profiler/SingleChart.cs b/Source/Editor/Windows/Profiler/SingleChart.cs index 4985d88e1..cd24ac635 100644 --- a/Source/Editor/Windows/Profiler/SingleChart.cs +++ b/Source/Editor/Windows/Profiler/SingleChart.cs @@ -12,6 +12,7 @@ namespace FlaxEditor.Windows.Profiler /// internal class SingleChart : Control { + internal const float DefaultHeight = TitleHeight + 60; private const float TitleHeight = 20; private const float PointsOffset = 4; private readonly SamplesBuffer _samples; @@ -63,7 +64,7 @@ namespace FlaxEditor.Windows.Profiler /// /// The maximum samples to collect. public SingleChart(int maxSamples = ProfilerMode.MaxSamples) - : base(0, 0, 100, 60 + TitleHeight) + : base(0, 0, 100, DefaultHeight) { _samples = new SamplesBuffer(maxSamples); _sample = string.Empty; From 7650cead3dcf42b77f97352491e7e3c782b87454 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 17 Aug 2024 00:10:53 +0200 Subject: [PATCH 280/292] Disable tooltip on profiler recording button to not affect perf due to tooltip showing --- Source/Editor/Windows/Profiler/ProfilerWindow.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Editor/Windows/Profiler/ProfilerWindow.cs b/Source/Editor/Windows/Profiler/ProfilerWindow.cs index f746e2a6e..0b4ac39b4 100644 --- a/Source/Editor/Windows/Profiler/ProfilerWindow.cs +++ b/Source/Editor/Windows/Profiler/ProfilerWindow.cs @@ -92,7 +92,7 @@ namespace FlaxEditor.Windows.Profiler Parent = this, }; _liveRecordingButton = toolstrip.AddButton(editor.Icons.Play64); - _liveRecordingButton.LinkTooltip("Live profiling events recording"); + //_liveRecordingButton.LinkTooltip("Live profiling events recording"); _liveRecordingButton.AutoCheck = true; _liveRecordingButton.Clicked += OnLiveRecordingChanged; _clearButton = toolstrip.AddButton(editor.Icons.Rotate32, Clear); From 650a2921a3b2a63ec056c87428b91123d2f1c378 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 17 Aug 2024 14:35:13 +0200 Subject: [PATCH 281/292] Add improved `FormatBytesCount` to print large sizes in more detailed way Instead of printing `2 GB` output `2.43 GB` to be more explicit. Deprecate version with `int` in favor of a single `ulong`. --- Source/Editor/Content/Items/ContentItem.cs | 2 +- Source/Editor/Utilities/Utils.cs | 16 +++++++--------- Source/Editor/Windows/Assets/AnimationWindow.cs | 2 +- Source/Editor/Windows/Profiler/Assets.cs | 2 +- Source/Editor/Windows/Profiler/CPU.cs | 2 +- Source/Editor/Windows/Profiler/Memory.cs | 4 ++-- Source/Editor/Windows/Profiler/Network.cs | 2 +- 7 files changed, 14 insertions(+), 16 deletions(-) diff --git a/Source/Editor/Content/Items/ContentItem.cs b/Source/Editor/Content/Items/ContentItem.cs index 259c09481..18c5b6d7b 100644 --- a/Source/Editor/Content/Items/ContentItem.cs +++ b/Source/Editor/Content/Items/ContentItem.cs @@ -388,7 +388,7 @@ namespace FlaxEditor.Content { sb.Append("Type: ").Append(TypeDescription).AppendLine(); if (File.Exists(Path)) - sb.Append("Size: ").Append(Utilities.Utils.FormatBytesCount((int)new FileInfo(Path).Length)).AppendLine(); + sb.Append("Size: ").Append(Utilities.Utils.FormatBytesCount((ulong)new FileInfo(Path).Length)).AppendLine(); sb.Append("Path: ").Append(Utilities.Utils.GetAssetNamePathWithExt(Path)).AppendLine(); } diff --git a/Source/Editor/Utilities/Utils.cs b/Source/Editor/Utilities/Utils.cs index b14d58eb6..1487f1a59 100644 --- a/Source/Editor/Utilities/Utils.cs +++ b/Source/Editor/Utilities/Utils.cs @@ -146,19 +146,14 @@ namespace FlaxEditor.Utilities /// /// Formats the amount of bytes to get a human-readable data size in bytes with abbreviation. Eg. 32 kB + /// [Deprecated in v1.9] /// /// The bytes. /// The formatted amount of bytes. + [Obsolete("Use FormatBytesCount with ulong instead")] public static string FormatBytesCount(int bytes) { - int order = 0; - while (bytes >= 1024 && order < MemorySizePostfixes.Length - 1) - { - order++; - bytes /= 1024; - } - - return string.Format("{0:0.##} {1}", bytes, MemorySizePostfixes[order]); + return FormatBytesCount((ulong)bytes); } /// @@ -169,12 +164,15 @@ namespace FlaxEditor.Utilities public static string FormatBytesCount(ulong bytes) { int order = 0; + ulong bytesPrev = bytes; while (bytes >= 1024 && order < MemorySizePostfixes.Length - 1) { + bytesPrev = bytes; order++; bytes /= 1024; } - + if (order >= 3) // GB or higher use up to 2 decimal places for more precision + return string.Format("{0:0.##} {1}", FlaxEngine.Utils.RoundTo2DecimalPlaces(bytesPrev / 1024.0f), MemorySizePostfixes[order]); return string.Format("{0:0.##} {1}", bytes, MemorySizePostfixes[order]); } diff --git a/Source/Editor/Windows/Assets/AnimationWindow.cs b/Source/Editor/Windows/Assets/AnimationWindow.cs index 2411daf86..f6ca7e1b0 100644 --- a/Source/Editor/Windows/Assets/AnimationWindow.cs +++ b/Source/Editor/Windows/Assets/AnimationWindow.cs @@ -181,7 +181,7 @@ namespace FlaxEditor.Windows.Assets group.Label("Frames: " + info.FramesCount); group.Label("Channels: " + info.ChannelsCount); group.Label("Keyframes: " + info.KeyframesCount); - group.Label("Memory Usage: " + Utilities.Utils.FormatBytesCount(info.MemoryUsage)); + group.Label("Memory Usage: " + Utilities.Utils.FormatBytesCount((ulong)info.MemoryUsage)); } base.Initialize(layout); diff --git a/Source/Editor/Windows/Profiler/Assets.cs b/Source/Editor/Windows/Profiler/Assets.cs index 3cd66e4eb..159392138 100644 --- a/Source/Editor/Windows/Profiler/Assets.cs +++ b/Source/Editor/Windows/Profiler/Assets.cs @@ -52,7 +52,7 @@ namespace FlaxEditor.Windows.Profiler AnchorPreset = AnchorPresets.HorizontalStretchTop, Offsets = Margin.Zero, Height = SingleChart.DefaultHeight, - FormatSample = v => Utilities.Utils.FormatBytesCount((int)v), + FormatSample = v => Utilities.Utils.FormatBytesCount((ulong)v), Parent = mainPanel, }; _memoryUsageChart.SelectedSampleChanged += OnSelectedSampleChanged; diff --git a/Source/Editor/Windows/Profiler/CPU.cs b/Source/Editor/Windows/Profiler/CPU.cs index ff9b0e253..8c6ff5e78 100644 --- a/Source/Editor/Windows/Profiler/CPU.cs +++ b/Source/Editor/Windows/Profiler/CPU.cs @@ -175,7 +175,7 @@ namespace FlaxEditor.Windows.Profiler private string FormatCellBytes(object x) { - return Utilities.Utils.FormatBytesCount((int)x); + return Utilities.Utils.FormatBytesCount((ulong)x); } /// diff --git a/Source/Editor/Windows/Profiler/Memory.cs b/Source/Editor/Windows/Profiler/Memory.cs index 4272bc0ae..8ab279fb8 100644 --- a/Source/Editor/Windows/Profiler/Memory.cs +++ b/Source/Editor/Windows/Profiler/Memory.cs @@ -36,14 +36,14 @@ namespace FlaxEditor.Windows.Profiler _nativeAllocationsChart = new SingleChart { Title = "Native Memory Allocation", - FormatSample = v => Utilities.Utils.FormatBytesCount((int)v), + FormatSample = v => Utilities.Utils.FormatBytesCount((ulong)v), Parent = layout, }; _nativeAllocationsChart.SelectedSampleChanged += OnSelectedSampleChanged; _managedAllocationsChart = new SingleChart { Title = "Managed Memory Allocation", - FormatSample = v => Utilities.Utils.FormatBytesCount((int)v), + FormatSample = v => Utilities.Utils.FormatBytesCount((ulong)v), Parent = layout, }; _managedAllocationsChart.SelectedSampleChanged += OnSelectedSampleChanged; diff --git a/Source/Editor/Windows/Profiler/Network.cs b/Source/Editor/Windows/Profiler/Network.cs index 577387617..fa5701db9 100644 --- a/Source/Editor/Windows/Profiler/Network.cs +++ b/Source/Editor/Windows/Profiler/Network.cs @@ -319,7 +319,7 @@ namespace FlaxEditor.Windows.Profiler private static string FormatCellBytes(object x) { - return Utilities.Utils.FormatBytesCount((int)x); + return Utilities.Utils.FormatBytesCount((ulong)x); } private static int SortRows(Control x, Control y) From ce1a1d6031cf28c813ba8047b9daad9bfaf799e6 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 17 Aug 2024 16:48:42 +0200 Subject: [PATCH 282/292] Fix deploying `fbx` files in cooked build from `ModelPrefab` --- Source/Engine/Content/JsonAsset.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Source/Engine/Content/JsonAsset.cpp b/Source/Engine/Content/JsonAsset.cpp index 0dda52592..7f3a2db32 100644 --- a/Source/Engine/Content/JsonAsset.cpp +++ b/Source/Engine/Content/JsonAsset.cpp @@ -110,13 +110,13 @@ uint64 JsonAssetBase::GetMemoryUsage() const #if USE_EDITOR -void FindIds(ISerializable::DeserializeStream& node, Array& output, Array& files) +void FindIds(ISerializable::DeserializeStream& node, Array& output, Array& files, rapidjson_flax::Value* nodeName = nullptr) { if (node.IsObject()) { for (auto i = node.MemberBegin(); i != node.MemberEnd(); ++i) { - FindIds(i->value, output, files); + FindIds(i->value, output, files, &i->name); } } else if (node.IsArray()) @@ -138,7 +138,8 @@ void FindIds(ISerializable::DeserializeStream& node, Array& output, Array< return; } } - if (node.GetStringLength() < 512) + if (node.GetStringLength() < 512 && + (!nodeName || nodeName->GetStringAnsiView() != "ImportPath")) // Ignore path in ImportPath from ModelPrefab (TODO: resave prefabs/scenes before cooking to get rid of editor-only data) { // Try to detect file paths String path = node.GetText(); From bb00653645336c1993818a3daa6fb4e7675ee77f Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 22 Aug 2024 17:49:12 +0200 Subject: [PATCH 283/292] Fix regression in shadow maps rendering batching to ignore transparent materials due to opacity usage --- Source/Engine/Renderer/RenderList.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index 2c3a5d0c5..32bb43732 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -1018,9 +1018,11 @@ bool SurfaceDrawCallHandler::CanBatch(const DrawCall& a, const DrawCall& b, Draw // Batch simple materials during depth-only drawing (when using default vertex shader and no pixel shader) if (pass == DrawPass::Depth) { + const MaterialInfo& aInfo = a.Material->GetInfo(); + const MaterialInfo& bInfo = b.Material->GetInfo(); constexpr MaterialUsageFlags complexUsageFlags = MaterialUsageFlags::UseMask | MaterialUsageFlags::UsePositionOffset | MaterialUsageFlags::UseDisplacement; - const bool aIsSimple = EnumHasNoneFlags(a.Material->GetInfo().UsageFlags, complexUsageFlags); - const bool bIsSimple = EnumHasNoneFlags(b.Material->GetInfo().UsageFlags, complexUsageFlags); + const bool aIsSimple = EnumHasNoneFlags(aInfo.UsageFlags, complexUsageFlags) && aInfo.BlendMode == MaterialBlendMode::Opaque; + const bool bIsSimple = EnumHasNoneFlags(bInfo.UsageFlags, complexUsageFlags) && aInfo.BlendMode == MaterialBlendMode::Opaque; return aIsSimple && bIsSimple; } return false; From fb2af0e0e7dcc208bd21964292f37ab3bd271d38 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 22 Aug 2024 18:49:03 +0200 Subject: [PATCH 284/292] Fix regression in Sort Key usage --- Source/Engine/Renderer/RenderList.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index 32bb43732..e733c60b5 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -485,6 +485,8 @@ struct PackedSortKey { uint64 Data; + PACK_BEGIN() + struct { // Sorting order: By Sort Order -> By Material -> By Geometry -> By Distance @@ -492,7 +494,7 @@ struct PackedSortKey uint8 DrawKey; uint16 BatchKey; uint8 SortKey; - }; + } PACK_END(); }; }; @@ -665,7 +667,7 @@ void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseD PackedSortKey key; key.Data = drawCall.SortKey; key.DistanceKey ^= MAX_uint32; // Reverse depth - key.SortKey ^= MAX_uint16; // Reverse sort order + key.SortKey ^= MAX_uint8; // Reverse sort order sortedKeys[i] = key.Data; } } From 591f0ee6a9dfa021b879ce51a692e5c3c2cfc6a7 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 23 Aug 2024 00:26:42 +0200 Subject: [PATCH 285/292] Fix regressions after rendering changes --- Content/Editor/MaterialTemplates/Terrain.shader | 2 ++ Content/Engine/DefaultTerrainMaterial.flax | 4 ++-- .../Particles/Graph/GPU/ParticleEmitterGraph.GPU.Textures.cpp | 4 ++-- .../Tools/MaterialGenerator/MaterialGenerator.Textures.cpp | 4 ++-- Source/Engine/Visject/ShaderGraph.cpp | 2 ++ 5 files changed, 10 insertions(+), 6 deletions(-) diff --git a/Content/Editor/MaterialTemplates/Terrain.shader b/Content/Editor/MaterialTemplates/Terrain.shader index f7819c863..3c4567321 100644 --- a/Content/Editor/MaterialTemplates/Terrain.shader +++ b/Content/Editor/MaterialTemplates/Terrain.shader @@ -162,6 +162,8 @@ ObjectData GetObject() return object; } +#define LoadObjectFromCB(var) var = GetObject() + MaterialInput GetMaterialInput(PixelInput input) { MaterialInput output = GetGeometryMaterialInput(input.Geometry); diff --git a/Content/Engine/DefaultTerrainMaterial.flax b/Content/Engine/DefaultTerrainMaterial.flax index 96bf55cda..b75d68dcd 100644 --- a/Content/Engine/DefaultTerrainMaterial.flax +++ b/Content/Engine/DefaultTerrainMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1bc64b044a3f7999e020717efd4ef14c5cf6bd2a251ef94b754bec1e3cc5a3da -size 23451 +oid sha256:69b58818698fe9609cee68d29642d75a752db39a1b06508d26994edaa37f0503 +size 23499 diff --git a/Source/Engine/Particles/Graph/GPU/ParticleEmitterGraph.GPU.Textures.cpp b/Source/Engine/Particles/Graph/GPU/ParticleEmitterGraph.GPU.Textures.cpp index 01c7017a9..ce7f45712 100644 --- a/Source/Engine/Particles/Graph/GPU/ParticleEmitterGraph.GPU.Textures.cpp +++ b/Source/Engine/Particles/Graph/GPU/ParticleEmitterGraph.GPU.Textures.cpp @@ -311,7 +311,7 @@ void ParticleEmitterGPUGenerator::ProcessGroupTextures(Box* box, Node* node, Val { auto param = findOrAddGlobalSDF(); Value worldPosition = tryGetValue(node->GetBox(1), Value(VariantType::Float3, TEXT("input.WorldPosition.xyz"))).Cast(VariantType::Float3); - Value startCascade = tryGetValue(node->GetBox(2), 0, Value::Zero).Cast(VariantType::Uint); + Value startCascade = tryGetValue(node->TryGetBox(2), 0, Value::Zero).Cast(VariantType::Uint); value = writeLocal(VariantType::Float, String::Format(TEXT("SampleGlobalSDF({0}, {0}_Tex, {0}_Mip, {1}, {2})"), param.ShaderName, worldPosition.Value, startCascade.Value), node); _includes.Add(TEXT("./Flax/GlobalSignDistanceField.hlsl")); break; @@ -323,7 +323,7 @@ void ParticleEmitterGPUGenerator::ProcessGroupTextures(Box* box, Node* node, Val auto distanceBox = node->GetBox(2); auto param = findOrAddGlobalSDF(); Value worldPosition = tryGetValue(node->GetBox(1), Value(VariantType::Float3, TEXT("input.WorldPosition.xyz"))).Cast(VariantType::Float3); - Value startCascade = tryGetValue(node->GetBox(3), 0, Value::Zero).Cast(VariantType::Uint); + Value startCascade = tryGetValue(node->TryGetBox(3), 0, Value::Zero).Cast(VariantType::Uint); auto distance = writeLocal(VariantType::Float, node); auto gradient = writeLocal(VariantType::Float3, String::Format(TEXT("SampleGlobalSDFGradient({0}, {0}_Tex, {0}_Mip, {1}, {2}, {3})"), param.ShaderName, worldPosition.Value, distance.Value, startCascade.Value), node); _includes.Add(TEXT("./Flax/GlobalSignDistanceField.hlsl")); diff --git a/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Textures.cpp b/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Textures.cpp index 15ee6876d..7df348182 100644 --- a/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Textures.cpp +++ b/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Textures.cpp @@ -665,7 +665,7 @@ void MaterialGenerator::ProcessGroupTextures(Box* box, Node* node, Value& value) { auto param = findOrAddGlobalSDF(); Value worldPosition = tryGetValue(node->GetBox(1), Value(VariantType::Float3, TEXT("input.WorldPosition.xyz"))).Cast(VariantType::Float3); - Value startCascade = tryGetValue(node->GetBox(2), 0, Value::Zero).Cast(VariantType::Uint); + Value startCascade = tryGetValue(node->TryGetBox(2), 0, Value::Zero).Cast(VariantType::Uint); value = writeLocal(VariantType::Float, String::Format(TEXT("SampleGlobalSDF({0}, {0}_Tex, {0}_Mip, {1}, {2})"), param.ShaderName, worldPosition.Value, startCascade.Value), node); _includes.Add(TEXT("./Flax/GlobalSignDistanceField.hlsl")); break; @@ -677,7 +677,7 @@ void MaterialGenerator::ProcessGroupTextures(Box* box, Node* node, Value& value) auto distanceBox = node->GetBox(2); auto param = findOrAddGlobalSDF(); Value worldPosition = tryGetValue(node->GetBox(1), Value(VariantType::Float3, TEXT("input.WorldPosition.xyz"))).Cast(VariantType::Float3); - Value startCascade = tryGetValue(node->GetBox(3), 0, Value::Zero).Cast(VariantType::Uint); + Value startCascade = tryGetValue(node->TryGetBox(3), 0, Value::Zero).Cast(VariantType::Uint); auto distance = writeLocal(VariantType::Float, node); auto gradient = writeLocal(VariantType::Float3, String::Format(TEXT("SampleGlobalSDFGradient({0}, {0}_Tex, {0}_Mip, {1}, {2}, {3})"), param.ShaderName, worldPosition.Value, distance.Value, startCascade.Value), node); _includes.Add(TEXT("./Flax/GlobalSignDistanceField.hlsl")); diff --git a/Source/Engine/Visject/ShaderGraph.cpp b/Source/Engine/Visject/ShaderGraph.cpp index 886452af6..718e9cba9 100644 --- a/Source/Engine/Visject/ShaderGraph.cpp +++ b/Source/Engine/Visject/ShaderGraph.cpp @@ -1049,6 +1049,8 @@ ShaderGenerator::Value ShaderGenerator::eatBox(Node* caller, Box* box) ShaderGenerator::Value ShaderGenerator::tryGetValue(Box* box, int32 defaultValueBoxIndex, const Value& defaultValue) { + if (!box) + return defaultValue; const auto parentNode = box->GetParent(); if (box->HasConnection()) return eatBox(parentNode, box->FirstConnection()); From 499ef51875b9febee667d8c4d9bc52e13d9a89bd Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 27 Aug 2024 16:01:46 +0200 Subject: [PATCH 286/292] Fix draw calls sorting to favor distance over geometry and material #2271 #2306 --- Source/Engine/Renderer/RenderList.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index e733c60b5..93a254c21 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -489,10 +489,10 @@ struct PackedSortKey struct { - // Sorting order: By Sort Order -> By Material -> By Geometry -> By Distance - uint32 DistanceKey; + // Sorting order: By Sort Order -> By Distance -> By Material -> By Geometry uint8 DrawKey; uint16 BatchKey; + uint32 DistanceKey; uint8 SortKey; } PACK_END(); }; From 09737023a13fcbbb9b2156f77a00c37f808f70f9 Mon Sep 17 00:00:00 2001 From: Muzz Date: Thu, 29 Aug 2024 19:01:37 +0200 Subject: [PATCH 287/292] Add **Contrast Adaptive Sharpening (CAS)** filter pass #2423 --- Content/Shaders/CAS.flax | 3 + .../Engine/Graphics/PostProcessSettings.cpp | 4 + Source/Engine/Graphics/PostProcessSettings.h | 46 ++++++++- Source/Engine/Renderer/AntiAliasing/FXAA.cpp | 3 +- .../ContrastAdaptiveSharpeningPass.cpp | 97 +++++++++++++++++++ .../Renderer/ContrastAdaptiveSharpeningPass.h | 39 ++++++++ Source/Engine/Renderer/Renderer.cpp | 47 +++++++-- Source/Shaders/CAS.shader | 47 +++++++++ 8 files changed, 273 insertions(+), 13 deletions(-) create mode 100644 Content/Shaders/CAS.flax create mode 100644 Source/Engine/Renderer/ContrastAdaptiveSharpeningPass.cpp create mode 100644 Source/Engine/Renderer/ContrastAdaptiveSharpeningPass.h create mode 100644 Source/Shaders/CAS.shader diff --git a/Content/Shaders/CAS.flax b/Content/Shaders/CAS.flax new file mode 100644 index 000000000..238fbc6e9 --- /dev/null +++ b/Content/Shaders/CAS.flax @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:026dcdb363bcb61780b63255c780dee2e0a1c6934229764c5db8664d39244ecb +size 2075 diff --git a/Source/Engine/Graphics/PostProcessSettings.cpp b/Source/Engine/Graphics/PostProcessSettings.cpp index 6a243d960..738cc4505 100644 --- a/Source/Engine/Graphics/PostProcessSettings.cpp +++ b/Source/Engine/Graphics/PostProcessSettings.cpp @@ -198,6 +198,10 @@ void AntiAliasingSettings::BlendWith(AntiAliasingSettings& other, float weight) BLEND_FLOAT(TAA_Sharpness); BLEND_FLOAT(TAA_StationaryBlending); BLEND_FLOAT(TAA_MotionBlending); + BLEND_FLOAT(CAS_SharpeningAmount); + BLEND_FLOAT(CAS_EdgeSharpening); + BLEND_FLOAT(CAS_MinEdgeThreshold); + BLEND_FLOAT(CAS_OverBlurLimit); } void PostFxMaterialsSettings::BlendWith(PostFxMaterialsSettings& other, float weight) diff --git a/Source/Engine/Graphics/PostProcessSettings.h b/Source/Engine/Graphics/PostProcessSettings.h index 19b3cec93..5feb5dd9a 100644 --- a/Source/Engine/Graphics/PostProcessSettings.h +++ b/Source/Engine/Graphics/PostProcessSettings.h @@ -1858,10 +1858,30 @@ API_ENUM(Attributes="Flags") enum class AntiAliasingSettingsOverride : int32 /// TAA_MotionBlending = 1 << 4, + /// + /// Overrides property. + /// + CAS_SharpeningAmount = 1 << 5, + + /// + /// Overrides property. + /// + CAS_EdgeSharpening = 1 << 6, + + /// + /// Overrides property. + /// + CAS_MinEdgeThreshold = 1 << 7, + + /// + /// Overrides property. + /// + CAS_OverBlurLimit = 1 << 8, + /// /// All properties. /// - All = Mode | TAA_JitterSpread | TAA_Sharpness | TAA_StationaryBlending | TAA_MotionBlending, + All = Mode | TAA_JitterSpread | TAA_Sharpness | TAA_StationaryBlending | TAA_MotionBlending | CAS_SharpeningAmount | CAS_EdgeSharpening | CAS_MinEdgeThreshold | CAS_OverBlurLimit, }; /// @@ -1909,6 +1929,30 @@ API_STRUCT() struct FLAXENGINE_API AntiAliasingSettings : ISerializable API_FIELD(Attributes="Limit(0, 0.99f, 0.001f), EditorOrder(4), PostProcessSetting((int)AntiAliasingSettingsOverride.TAA_MotionBlending), EditorDisplay(null, \"TAA Motion Blending\"), VisibleIf(nameof(ShowTAASettings))") float TAA_MotionBlending = 0.85f; + /// + /// The sharpening strength for the Contrast Adaptive Sharpening (CAS) pass. Ignored when using TAA that contains own contrast filter. + /// + API_FIELD(Attributes = "Limit(0, 10f, 0.001f), EditorOrder(10), PostProcessSetting((int)AntiAliasingSettingsOverride.CAS_SharpeningAmount), EditorDisplay(null, \"CAS Sharpening Amount\"), VisibleIf(nameof(ShowTAASettings), true)") + float CAS_SharpeningAmount = 0.0f; + + /// + /// The edge sharpening strength for the Contrast Adaptive Sharpening (CAS) pass. Ignored when using TAA that contains own contrast filter. + /// + API_FIELD(Attributes = "Limit(0, 10f, 0.001f), EditorOrder(11), PostProcessSetting((int)AntiAliasingSettingsOverride.CAS_EdgeSharpening), EditorDisplay(null, \"CAS Edge Sharpening\"), VisibleIf(nameof(ShowTAASettings), true)") + float CAS_EdgeSharpening = 0.5f; + + /// + /// The minimum edge threshold for the Contrast Adaptive Sharpening (CAS) pass. Ignored when using TAA that contains own contrast filter. + /// + API_FIELD(Attributes = "Limit(0, 10f, 0.001f), EditorOrder(12), PostProcessSetting((int)AntiAliasingSettingsOverride.CAS_MinEdgeThreshold), EditorDisplay(null, \"CAS Min Edge Threshold\"), VisibleIf(nameof(ShowTAASettings), true)") + float CAS_MinEdgeThreshold = 0.03f; + + /// + /// The over-blur limit for the Contrast Adaptive Sharpening (CAS) pass. Ignored when using TAA that contains own contrast filter. + /// + API_FIELD(Attributes = "Limit(0, 100f, 0.001f), EditorOrder(13), PostProcessSetting((int)AntiAliasingSettingsOverride.CAS_OverBlurLimit), EditorDisplay(null, \"CAS Over-blur Limit\"), VisibleIf(nameof(ShowTAASettings), true)") + float CAS_OverBlurLimit = 1.0f; + public: /// /// Blends the settings using given weight. diff --git a/Source/Engine/Renderer/AntiAliasing/FXAA.cpp b/Source/Engine/Renderer/AntiAliasing/FXAA.cpp index 57da136c1..83fae260f 100644 --- a/Source/Engine/Renderer/AntiAliasing/FXAA.cpp +++ b/Source/Engine/Renderer/AntiAliasing/FXAA.cpp @@ -65,10 +65,10 @@ void FXAA::Dispose() void FXAA::Render(RenderContext& renderContext, GPUTexture* input, GPUTextureView* output) { auto context = GPUDevice::Instance->GetMainContext(); + context->SetRenderTarget(output); if (checkIfSkipPass()) { // Resources are missing. Do not perform rendering, just copy input frame. - context->SetRenderTarget(output); context->Draw(input); return; } @@ -83,7 +83,6 @@ void FXAA::Render(RenderContext& renderContext, GPUTexture* input, GPUTextureVie context->BindSR(0, input); // Render - context->SetRenderTarget(output); const auto qualityLevel = Math::Clamp(static_cast(Graphics::AAQuality), 0, static_cast(Quality::MAX) - 1); context->SetState(_psFXAA.Get(qualityLevel)); context->DrawFullscreenTriangle(); diff --git a/Source/Engine/Renderer/ContrastAdaptiveSharpeningPass.cpp b/Source/Engine/Renderer/ContrastAdaptiveSharpeningPass.cpp new file mode 100644 index 000000000..2d93d525d --- /dev/null +++ b/Source/Engine/Renderer/ContrastAdaptiveSharpeningPass.cpp @@ -0,0 +1,97 @@ +// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. + +#include "ContrastAdaptiveSharpeningPass.h" +#include "RenderList.h" +#include "Engine/Content/Assets/Shader.h" +#include "Engine/Content/Content.h" +#include "Engine/Graphics/GPUContext.h" +#include "Engine/Graphics/GPUDevice.h" +#include "Engine/Graphics/RenderTargetPool.h" +#include "Engine/Graphics/RenderTask.h" +#include "Engine/Graphics/Shaders/GPUShader.h" + +GPU_CB_STRUCT(Data { + Float2 InputSizeInv; + Float2 Padding; + float SharpeningAmount; + float EdgeSharpening; + float MinEdgeThreshold; + float OverBlurLimit; + }); + +String ContrastAdaptiveSharpeningPass::ToString() const +{ + return TEXT("ContrastAdaptiveSharpening"); +} + +void ContrastAdaptiveSharpeningPass::Dispose() +{ + RendererPass::Dispose(); + + SAFE_DELETE_GPU_RESOURCE(_psCAS); + _shader = nullptr; +} + +bool ContrastAdaptiveSharpeningPass::setupResources() +{ + // Lazy-load shader + if (!_shader) + { + _shader = Content::LoadAsyncInternal(TEXT("Shaders/CAS")); + if (!_shader) + return false; +#if COMPILE_WITH_DEV_ENV + _shader.Get()->OnReloading.Bind(this); +#endif + } + if (!_shader->IsLoaded()) + return true; + const auto shader = _shader->GetShader(); + + // Validate shader constant buffer size + if (shader->GetCB(0)->GetSize() != sizeof(Data)) + { + REPORT_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); + return true; + } + + // Create pipeline stage + auto psDesc = GPUPipelineState::Description::DefaultFullscreenTriangle; + psDesc.PS = shader->GetPS("PS_CAS"); + _psCAS = GPUDevice::Instance->CreatePipelineState(); + if (_psCAS->Init(psDesc)) + return true; + + return false; +} + +bool ContrastAdaptiveSharpeningPass::CanRender(const RenderContext& renderContext) +{ + const AntiAliasingSettings& antiAliasing = renderContext.List->Settings.AntiAliasing; + return EnumHasAnyFlags(renderContext.View.Flags, ViewFlags::AntiAliasing) && + antiAliasing.CAS_SharpeningAmount > ZeroTolerance && + !checkIfSkipPass(); +} + +void ContrastAdaptiveSharpeningPass::Render(const RenderContext& renderContext, GPUTexture* input, GPUTextureView* output) +{ + ASSERT_LOW_LAYER(CanRender(renderContext)); + PROFILE_GPU_CPU("Contrast Adaptive Sharpening"); + auto device = GPUDevice::Instance; + auto context = device->GetMainContext(); + const AntiAliasingSettings& antiAliasing = renderContext.List->Settings.AntiAliasing; + + Data data; + data.InputSizeInv = Float2::One / input->Size(); + data.SharpeningAmount = antiAliasing.CAS_SharpeningAmount; + data.EdgeSharpening = antiAliasing.CAS_EdgeSharpening; + data.MinEdgeThreshold = antiAliasing.CAS_MinEdgeThreshold; + data.OverBlurLimit = antiAliasing.CAS_OverBlurLimit; + const auto cb = _shader->GetShader()->GetCB(0); + context->UpdateCB(cb, &data); + context->BindCB(0, cb); + context->BindSR(0, input); + context->SetState(_psCAS); + context->SetRenderTarget(output); + context->DrawFullscreenTriangle(); +} diff --git a/Source/Engine/Renderer/ContrastAdaptiveSharpeningPass.h b/Source/Engine/Renderer/ContrastAdaptiveSharpeningPass.h new file mode 100644 index 000000000..8a449f6cd --- /dev/null +++ b/Source/Engine/Renderer/ContrastAdaptiveSharpeningPass.h @@ -0,0 +1,39 @@ +// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. + +#pragma once + +#include "RendererPass.h" +#include "Engine/Graphics/PostProcessSettings.h" + +/// +/// Contrast Adaptive Sharpening (CAS) provides a mixed ability to sharpen and optionally scale an image. Based on AMD FidelityFX implementation. +/// +class ContrastAdaptiveSharpeningPass : public RendererPass +{ +private: + bool _loadShader = true; + AssetReference _shader; + GPUPipelineState* _psCAS = nullptr; + +public: + bool CanRender(const RenderContext& renderContext); + void Render(const RenderContext& renderContext, GPUTexture* input, GPUTextureView* output); + +private: +#if COMPILE_WITH_DEV_ENV + void OnShaderReloading(Asset* obj) + { + _psCAS->ReleaseGPU(); + invalidateResources(); + } +#endif + +public: + // [RendererPass] + String ToString() const override; + void Dispose() override; + +protected: + // [RendererPass] + bool setupResources() override; +}; diff --git a/Source/Engine/Renderer/Renderer.cpp b/Source/Engine/Renderer/Renderer.cpp index 13921e079..c1a87385f 100644 --- a/Source/Engine/Renderer/Renderer.cpp +++ b/Source/Engine/Renderer/Renderer.cpp @@ -22,6 +22,7 @@ #include "VolumetricFogPass.h" #include "HistogramPass.h" #include "AtmospherePreCompute.h" +#include "ContrastAdaptiveSharpeningPass.h" #include "GlobalSignDistanceFieldPass.h" #include "GI/GlobalSurfaceAtlasPass.h" #include "GI/DynamicDiffuseGlobalIllumination.h" @@ -126,21 +127,47 @@ void RendererService::Dispose() void RenderAntiAliasingPass(RenderContext& renderContext, GPUTexture* input, GPUTextureView* output, const Viewport& outputViewport) { auto context = GPUDevice::Instance->GetMainContext(); - context->SetViewportAndScissors(outputViewport); const auto aaMode = renderContext.List->Settings.AntiAliasing.Mode; - if (aaMode == AntialiasingMode::FastApproximateAntialiasing) + if (ContrastAdaptiveSharpeningPass::Instance()->CanRender(renderContext)) { - FXAA::Instance()->Render(renderContext, input, output); - } - else if (aaMode == AntialiasingMode::SubpixelMorphologicalAntialiasing) - { - SMAA::Instance()->Render(renderContext, input, output); + if (aaMode == AntialiasingMode::FastApproximateAntialiasing || + aaMode == AntialiasingMode::SubpixelMorphologicalAntialiasing) + { + // AA -> CAS -> Output + auto tmpImage = RenderTargetPool::Get(input->GetDescription()); + RENDER_TARGET_POOL_SET_NAME(tmpImage, "TmpImage"); + context->SetViewportAndScissors((float)input->Width(), (float)input->Height()); + if (aaMode == AntialiasingMode::FastApproximateAntialiasing) + FXAA::Instance()->Render(renderContext, input, tmpImage->View()); + else + SMAA::Instance()->Render(renderContext, input, tmpImage->View()); + context->ResetSR(); + context->ResetRenderTarget(); + context->SetViewportAndScissors(outputViewport); + ContrastAdaptiveSharpeningPass::Instance()->Render(renderContext, tmpImage, output); + RenderTargetPool::Release(tmpImage); + } + else + { + // CAS -> Output + context->SetViewportAndScissors(outputViewport); + ContrastAdaptiveSharpeningPass::Instance()->Render(renderContext, input, output); + } } else { - PROFILE_GPU("Copy frame"); - context->SetRenderTarget(output); - context->Draw(input); + // AA -> Output + context->SetViewportAndScissors(outputViewport); + if (aaMode == AntialiasingMode::FastApproximateAntialiasing) + FXAA::Instance()->Render(renderContext, input, output); + else if (aaMode == AntialiasingMode::SubpixelMorphologicalAntialiasing) + SMAA::Instance()->Render(renderContext, input, output); + else + { + PROFILE_GPU("Copy frame"); + context->SetRenderTarget(output); + context->Draw(input); + } } } diff --git a/Source/Shaders/CAS.shader b/Source/Shaders/CAS.shader new file mode 100644 index 000000000..93036107a --- /dev/null +++ b/Source/Shaders/CAS.shader @@ -0,0 +1,47 @@ +// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. + +#include "./Flax/Common.hlsl" + +META_CB_BEGIN(0, Data) +float2 InputSizeInv; +float2 Padding; +float SharpeningAmount; +float EdgeSharpening; +float MinEdgeThreshold; +float OverBlurLimit; +META_CB_END + +Texture2D Input : register(t0); + +// Pixel Shader for Contrast Adaptive Sharpening (CAS) filter. Based on AMD FidelityFX implementation. +META_PS(true, FEATURE_LEVEL_ES2) +float4 PS_CAS(Quad_VS2PS input) : SV_Target0 +{ + // Sample the color texture + float4 color = Input.SampleLevel(SamplerLinearClamp, input.TexCoord, 0); + + // Sample neighboring pixels + float3 blurred = color.rgb; + float3 edges = 0.0; + for (int x = -2; x <= 2; x++) + { + for (int y = -2; y <= 2; y++) + { + float2 uv = float2(x, y) * InputSizeInv + input.TexCoord; + float3 neighbor = Input.SampleLevel(SamplerLinearClamp, uv, 0).rgb; + blurred += neighbor; + edges += abs(neighbor - color.rgb); + } + } + blurred /= 25.0; + edges /= 25.0; + + // Sharpen based on edge detection + float edgeAmount = saturate((dot(edges, edges) - MinEdgeThreshold) / (0.001 + dot(edges, edges))); + float sharpen = (1.0 - edgeAmount) * SharpeningAmount + edgeAmount * EdgeSharpening; + float3 sharpened = color.rgb + (color.rgb - blurred) * sharpen; + + // Limit sharpening to avoid over-blurring + sharpened = lerp(color.rgb, sharpened, saturate(OverBlurLimit / (OverBlurLimit + dot(abs(sharpened - color.rgb), float3(1.0, 1.0, 1.0))))); + return float4(sharpened, color.a); +} From 6a4d9877e6b6501cfa94aca5f3667e6af07d1171 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 29 Aug 2024 19:34:17 +0200 Subject: [PATCH 288/292] Remove unsured variable --- Source/Engine/Renderer/ContrastAdaptiveSharpeningPass.h | 1 - 1 file changed, 1 deletion(-) diff --git a/Source/Engine/Renderer/ContrastAdaptiveSharpeningPass.h b/Source/Engine/Renderer/ContrastAdaptiveSharpeningPass.h index 8a449f6cd..d98f7fa3c 100644 --- a/Source/Engine/Renderer/ContrastAdaptiveSharpeningPass.h +++ b/Source/Engine/Renderer/ContrastAdaptiveSharpeningPass.h @@ -11,7 +11,6 @@ class ContrastAdaptiveSharpeningPass : public RendererPass { private: - bool _loadShader = true; AssetReference _shader; GPUPipelineState* _psCAS = nullptr; From eb825f516e8d7c655202eb37e488c5fd20afef8e Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 22 Sep 2024 10:40:10 +0200 Subject: [PATCH 289/292] Fix terrain splatmap cache clearng --- Source/Engine/Terrain/TerrainPatch.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Terrain/TerrainPatch.cpp b/Source/Engine/Terrain/TerrainPatch.cpp index 592c2e7f9..96dc35d49 100644 --- a/Source/Engine/Terrain/TerrainPatch.cpp +++ b/Source/Engine/Terrain/TerrainPatch.cpp @@ -1220,8 +1220,8 @@ Color32* TerrainPatch::GetSplatMapData(int32 index) void TerrainPatch::ClearSplatMapCache() { PROFILE_CPU_NAMED("Terrain.ClearSplatMapCache"); - if (_cachedSplatMap) - _cachedSplatMap->Clear(); + for (int32 i = 0; i < TERRAIN_MAX_SPLATMAPS_COUNT; i++) + _cachedSplatMap[i].Clear(); } void TerrainPatch::ClearCache() From 428a56af2867b45a6b5b94d0df975340bb456bf2 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 22 Sep 2024 10:53:01 +0200 Subject: [PATCH 290/292] Fix old api usage warning --- Source/Editor/Managed/ManagedEditor.Internal.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Source/Editor/Managed/ManagedEditor.Internal.cpp b/Source/Editor/Managed/ManagedEditor.Internal.cpp index d8ea556c8..12244fcce 100644 --- a/Source/Editor/Managed/ManagedEditor.Internal.cpp +++ b/Source/Editor/Managed/ManagedEditor.Internal.cpp @@ -610,7 +610,8 @@ Array ManagedEditor::GetAssetReferences(const Guid& assetId) Array result; if (auto* asset = Content::Load(assetId)) { - asset->GetReferences(result); + Array files; + asset->GetReferences(result, files); } return result; } From c943b3a526a7dfdf9f43d2eda5863339c0781b6a Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 22 Sep 2024 11:08:31 +0200 Subject: [PATCH 291/292] Restore single editor tab window header on platforms with not yet implemented drag features (mac, linux) #2770 --- Source/Editor/GUI/Docking/DockPanelProxy.cs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Source/Editor/GUI/Docking/DockPanelProxy.cs b/Source/Editor/GUI/Docking/DockPanelProxy.cs index f489b0c39..bc38e2953 100644 --- a/Source/Editor/GUI/Docking/DockPanelProxy.cs +++ b/Source/Editor/GUI/Docking/DockPanelProxy.cs @@ -14,6 +14,11 @@ namespace FlaxEditor.GUI.Docking { private DockPanel _panel; private double _dragEnterTime = -1; + #if PLATFORM_WINDOWS + private const bool HideTabForSingleTab = true; + #else + private const bool HideTabForSingleTab = false; + #endif /// /// The is mouse down flag (left button). @@ -51,7 +56,7 @@ namespace FlaxEditor.GUI.Docking public DockWindow StartDragAsyncWindow; private Rectangle HeaderRectangle => new Rectangle(0, 0, Width, DockPanel.DefaultHeaderHeight); - private bool IsSingleFloatingWindow => _panel.TabsCount == 1 && _panel.IsFloating && _panel.ChildPanelsCount == 0; + private bool IsSingleFloatingWindow => HideTabForSingleTab && _panel.TabsCount == 1 && _panel.IsFloating && _panel.ChildPanelsCount == 0; /// /// Initializes a new instance of the class. From da5570e89fef407cbb84205c950e42438f65f3e5 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 22 Sep 2024 11:28:26 +0200 Subject: [PATCH 292/292] Fix terrain shader compilation regression --- Content/Editor/MaterialTemplates/Terrain.shader | 3 +-- Content/Engine/DefaultTerrainMaterial.flax | 4 ++-- Source/Shaders/MaterialCommon.hlsl | 2 ++ 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Content/Editor/MaterialTemplates/Terrain.shader b/Content/Editor/MaterialTemplates/Terrain.shader index 3c4567321..d2996d7a3 100644 --- a/Content/Editor/MaterialTemplates/Terrain.shader +++ b/Content/Editor/MaterialTemplates/Terrain.shader @@ -3,6 +3,7 @@ #define MATERIAL 1 #define USE_PER_VIEW_CONSTANTS 1 +#define LoadObjectFromCB(var) var = GetObject() @3 // Enables/disables smooth terrain chunks LOD transitions (with morphing higher LOD near edges to the lower LOD in the neighbour) #define USE_SMOOTH_LOD_TRANSITION 1 @@ -162,8 +163,6 @@ ObjectData GetObject() return object; } -#define LoadObjectFromCB(var) var = GetObject() - MaterialInput GetMaterialInput(PixelInput input) { MaterialInput output = GetGeometryMaterialInput(input.Geometry); diff --git a/Content/Engine/DefaultTerrainMaterial.flax b/Content/Engine/DefaultTerrainMaterial.flax index b75d68dcd..9d956c6ab 100644 --- a/Content/Engine/DefaultTerrainMaterial.flax +++ b/Content/Engine/DefaultTerrainMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:69b58818698fe9609cee68d29642d75a752db39a1b06508d26994edaa37f0503 -size 23499 +oid sha256:876072ac78d8f346101e45a2b7f52f759bc1e437ccefe1a79ee4fbad68b2c04f +size 30264 diff --git a/Source/Shaders/MaterialCommon.hlsl b/Source/Shaders/MaterialCommon.hlsl index f8fcd69f3..325f8d065 100644 --- a/Source/Shaders/MaterialCommon.hlsl +++ b/Source/Shaders/MaterialCommon.hlsl @@ -119,6 +119,7 @@ ObjectData LoadObject(Buffer objectsBuffer, uint objectIndex) return object; } +#ifndef LoadObjectFromCB // Loads the object data from the constant buffer into the variable #define LoadObjectFromCB(var) \ var = (ObjectData)0; \ @@ -129,6 +130,7 @@ ObjectData LoadObject(Buffer objectsBuffer, uint objectIndex) var.WorldDeterminantSign = WorldDeterminantSign; \ var.LODDitherFactor = LODDitherFactor; \ var.LightmapArea = LightmapArea; +#endif // Material properties struct Material