From d2888d00078948cd8d8cb15e100eefca4af908fe Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Perrier Date: Sat, 9 Jan 2021 23:21:31 +0100 Subject: [PATCH 01/57] Adding Convert feature to Actor context menu. --- .../Windows/SceneTreeWindow.ContextMenu.cs | 22 ++++++++++++++++ Source/Editor/Windows/SceneTreeWindow.cs | 25 ++++++++++++++++++- 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/Source/Editor/Windows/SceneTreeWindow.ContextMenu.cs b/Source/Editor/Windows/SceneTreeWindow.ContextMenu.cs index b6756b438..87f13b13f 100644 --- a/Source/Editor/Windows/SceneTreeWindow.ContextMenu.cs +++ b/Source/Editor/Windows/SceneTreeWindow.ContextMenu.cs @@ -55,6 +55,28 @@ namespace FlaxEditor.Windows b = contextMenu.AddButton("Duplicate", Editor.SceneEditing.Duplicate); b.Enabled = hasSthSelected; + var convertMenu = contextMenu.AddChildMenu("Convert"); + var convertActorCm = convertMenu.ContextMenu; + for (int i = 0; i < SpawnActorsGroups.Length; i++) + { + var group = SpawnActorsGroups[i]; + + if (group.Types.Length == 1) + { + var type = group.Types[0].Value; + convertActorCm.AddButton(group.Types[0].Key, () => Convert(type)); + } + else + { + var groupCm = convertActorCm.AddChildMenu(group.Name).ContextMenu; + for (int j = 0; j < group.Types.Length; j++) + { + var type = group.Types[j].Value; + groupCm.AddButton(group.Types[j].Key, () => Convert(type)); + } + } + } + b = contextMenu.AddButton("Delete", Editor.SceneEditing.Delete); b.Enabled = hasSthSelected; diff --git a/Source/Editor/Windows/SceneTreeWindow.cs b/Source/Editor/Windows/SceneTreeWindow.cs index 3db4df799..997f764d2 100644 --- a/Source/Editor/Windows/SceneTreeWindow.cs +++ b/Source/Editor/Windows/SceneTreeWindow.cs @@ -122,7 +122,30 @@ namespace FlaxEditor.Windows // Spawn it Editor.SceneEditing.Spawn(actor, parentActor); } - + + private void Convert(Type to) + { + if (!Editor.SceneEditing.HasSthSelected || !(Editor.SceneEditing.Selection[0] is ActorNode)) + return; + Actor old = ((ActorNode)Editor.SceneEditing.Selection[0]).Actor; + Actor actor = (Actor)FlaxEngine.Object.New(to); + actor.Transform = old.Transform; + if (old.Parent != null) + actor.Parent = old.Parent; + actor.StaticFlags = old.StaticFlags; + actor.HideFlags = old.HideFlags; + actor.Layer = old.Layer; + actor.Tag = old.Tag; + actor.Name = old.Name; + actor.IsActive = old.IsActive; + for (var i = old.Children.Length - 1; i >= 0 ; i--) + { + old.Children[i].Parent = actor; + } + Editor.SceneEditing.Delete(); + Editor.SceneEditing.Spawn(actor, actor.Parent); + } + /// /// Focuses search box. /// From f4a587faf92102f310f2ffa64e8028ba51b60dfd Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Perrier Date: Tue, 16 Feb 2021 17:36:23 +0100 Subject: [PATCH 02/57] Support undo. --- Source/Editor/Modules/SceneEditingModule.cs | 78 +++++++++++++++++++ .../Windows/SceneTreeWindow.ContextMenu.cs | 34 ++++---- Source/Editor/Windows/SceneTreeWindow.cs | 25 +----- 3 files changed, 97 insertions(+), 40 deletions(-) diff --git a/Source/Editor/Modules/SceneEditingModule.cs b/Source/Editor/Modules/SceneEditingModule.cs index 78ffd26b0..7827e5003 100644 --- a/Source/Editor/Modules/SceneEditingModule.cs +++ b/Source/Editor/Modules/SceneEditingModule.cs @@ -250,6 +250,84 @@ namespace FlaxEditor.Modules } } + public void Convert(Type to) + { + if (!Editor.SceneEditing.HasSthSelected || !(Editor.SceneEditing.Selection[0] is ActorNode)) + return; + + if (Level.IsAnySceneLoaded == false) + throw new InvalidOperationException("Cannot spawn actor when no scene is loaded."); + + var actionList = new List(); + + Actor old = ((ActorNode)Editor.SceneEditing.Selection[0]).Actor; + Actor actor = (Actor)FlaxEngine.Object.New(to); + + actionList.Add(new SelectionChangeAction(Selection.ToArray(), new SceneGraphNode[0], OnSelectionUndo)); + actionList.Add(new DeleteActorsAction(new List + { + Editor.SceneEditing.Selection[0] + })); + SelectionDeleteBegin?.Invoke(); + actionList[1].Do(); + SelectionDeleteEnd?.Invoke(); + + bool isPlayMode = Editor.StateMachine.IsPlayMode; + + SpawnBegin?.Invoke(); + + actor.Transform = old.Transform; + if (old.Parent != null) + actor.Parent = old.Parent; + actor.StaticFlags = old.StaticFlags; + actor.HideFlags = old.HideFlags; + actor.Layer = old.Layer; + actor.Tag = old.Tag; + actor.Name = old.Name; + actor.IsActive = old.IsActive; + for (var i = old.Children.Length - 1; i >= 0 ; i--) + { + old.Children[i].Parent = actor; + } + + Level.SpawnActor(actor, actor.Parent); + + if (isPlayMode) + actor.StaticFlags = StaticFlags.None; + + var actorNode = Editor.Instance.Scene.GetActorNode(actor); + if (actorNode == null) + throw new InvalidOperationException("Failed to create scene node for the spawned actor."); + + actorNode.PostSpawn(); + Editor.Scene.MarkSceneEdited(actor.Scene); + + actionList.Add(new DeleteActorsAction(new List + { + actorNode + }, true)); + actionList.Add(new SelectionChangeAction(Selection.ToArray(), new SceneGraphNode[]{actorNode}, OnSelectionUndo)); + var actions = new MultiUndoAction(actionList); + Undo.AddAction(actions); + SpawnEnd?.Invoke(); + + var options = Editor.Options.Options; + + // Auto CSG mesh rebuild + if (!isPlayMode && options.General.AutoRebuildCSG) + { + if (actor is BoxBrush && actor.Scene) + actor.Scene.BuildCSG(options.General.AutoRebuildCSGTimeoutMs); + } + + // Auto NavMesh rebuild + if (!isPlayMode && options.General.AutoRebuildNavMesh && actor.Scene && (actor.StaticFlags & StaticFlags.Navigation) == StaticFlags.Navigation) + { + var bounds = actor.BoxWithChildren; + Navigation.BuildNavMesh(actor.Scene, bounds, options.General.AutoRebuildNavMeshTimeoutMs); + } + } + /// /// Deletes the selected objects. Supports undo/redo. /// diff --git a/Source/Editor/Windows/SceneTreeWindow.ContextMenu.cs b/Source/Editor/Windows/SceneTreeWindow.ContextMenu.cs index 87f13b13f..aee456d81 100644 --- a/Source/Editor/Windows/SceneTreeWindow.ContextMenu.cs +++ b/Source/Editor/Windows/SceneTreeWindow.ContextMenu.cs @@ -55,28 +55,30 @@ namespace FlaxEditor.Windows b = contextMenu.AddButton("Duplicate", Editor.SceneEditing.Duplicate); b.Enabled = hasSthSelected; - var convertMenu = contextMenu.AddChildMenu("Convert"); - var convertActorCm = convertMenu.ContextMenu; - for (int i = 0; i < SpawnActorsGroups.Length; i++) + if (Editor.SceneEditing.SelectionCount == 1) { - var group = SpawnActorsGroups[i]; + var convertMenu = contextMenu.AddChildMenu("Convert"); + var convertActorCm = convertMenu.ContextMenu; + for (int i = 0; i < SpawnActorsGroups.Length; i++) + { + var group = SpawnActorsGroups[i]; - if (group.Types.Length == 1) - { - var type = group.Types[0].Value; - convertActorCm.AddButton(group.Types[0].Key, () => Convert(type)); - } - else - { - var groupCm = convertActorCm.AddChildMenu(group.Name).ContextMenu; - for (int j = 0; j < group.Types.Length; j++) + if (group.Types.Length == 1) { - var type = group.Types[j].Value; - groupCm.AddButton(group.Types[j].Key, () => Convert(type)); + var type = group.Types[0].Value; + convertActorCm.AddButton(group.Types[0].Key, () => Editor.SceneEditing.Convert(type)); + } + else + { + var groupCm = convertActorCm.AddChildMenu(group.Name).ContextMenu; + for (int j = 0; j < group.Types.Length; j++) + { + var type = group.Types[j].Value; + groupCm.AddButton(group.Types[j].Key, () => Editor.SceneEditing.Convert(type)); + } } } } - b = contextMenu.AddButton("Delete", Editor.SceneEditing.Delete); b.Enabled = hasSthSelected; diff --git a/Source/Editor/Windows/SceneTreeWindow.cs b/Source/Editor/Windows/SceneTreeWindow.cs index 997f764d2..3db4df799 100644 --- a/Source/Editor/Windows/SceneTreeWindow.cs +++ b/Source/Editor/Windows/SceneTreeWindow.cs @@ -122,30 +122,7 @@ namespace FlaxEditor.Windows // Spawn it Editor.SceneEditing.Spawn(actor, parentActor); } - - private void Convert(Type to) - { - if (!Editor.SceneEditing.HasSthSelected || !(Editor.SceneEditing.Selection[0] is ActorNode)) - return; - Actor old = ((ActorNode)Editor.SceneEditing.Selection[0]).Actor; - Actor actor = (Actor)FlaxEngine.Object.New(to); - actor.Transform = old.Transform; - if (old.Parent != null) - actor.Parent = old.Parent; - actor.StaticFlags = old.StaticFlags; - actor.HideFlags = old.HideFlags; - actor.Layer = old.Layer; - actor.Tag = old.Tag; - actor.Name = old.Name; - actor.IsActive = old.IsActive; - for (var i = old.Children.Length - 1; i >= 0 ; i--) - { - old.Children[i].Parent = actor; - } - Editor.SceneEditing.Delete(); - Editor.SceneEditing.Spawn(actor, actor.Parent); - } - + /// /// Focuses search box. /// From bc229d0bb659512202d78e90830f758abbb0b4f7 Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Perrier Date: Tue, 16 Feb 2021 18:25:44 +0100 Subject: [PATCH 03/57] Support converting actor with scripts. --- Source/Editor/Modules/SceneEditingModule.cs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Source/Editor/Modules/SceneEditingModule.cs b/Source/Editor/Modules/SceneEditingModule.cs index 7827e5003..d4fb8fe70 100644 --- a/Source/Editor/Modules/SceneEditingModule.cs +++ b/Source/Editor/Modules/SceneEditingModule.cs @@ -285,6 +285,10 @@ namespace FlaxEditor.Modules actor.Tag = old.Tag; actor.Name = old.Name; actor.IsActive = old.IsActive; + for (var i = old.ScriptsCount - 1; i >=0; i--) + { + old.Scripts[i].Parent = actor; + } for (var i = old.Children.Length - 1; i >= 0 ; i--) { old.Children[i].Parent = actor; @@ -306,7 +310,7 @@ namespace FlaxEditor.Modules { actorNode }, true)); - actionList.Add(new SelectionChangeAction(Selection.ToArray(), new SceneGraphNode[]{actorNode}, OnSelectionUndo)); + actionList.Add(new SelectionChangeAction(new SceneGraphNode[0], new SceneGraphNode[]{actorNode}, OnSelectionUndo)); var actions = new MultiUndoAction(actionList); Undo.AddAction(actions); SpawnEnd?.Invoke(); From 1c5b779ae92e4b68eb8491cf7938e3dd56b5205c Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Perrier Date: Tue, 16 Feb 2021 18:27:02 +0100 Subject: [PATCH 04/57] Add Convert docs. --- Source/Editor/Modules/SceneEditingModule.cs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Source/Editor/Modules/SceneEditingModule.cs b/Source/Editor/Modules/SceneEditingModule.cs index d4fb8fe70..1cec21305 100644 --- a/Source/Editor/Modules/SceneEditingModule.cs +++ b/Source/Editor/Modules/SceneEditingModule.cs @@ -250,6 +250,10 @@ namespace FlaxEditor.Modules } } + /// + /// Converts the selected actor to another type. + /// + /// The type to convert in. public void Convert(Type to) { if (!Editor.SceneEditing.HasSthSelected || !(Editor.SceneEditing.Selection[0] is ActorNode)) From 25f61e931e9ee0d729338814a5d01f63bca9876f Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 17 Feb 2021 10:58:58 +0100 Subject: [PATCH 05/57] Add support for using other VC++ toolset for Window and add cmd arg for selecting compiler manually --- Source/Tools/Flax.Build/Configuration.cs | 6 ++++++ .../Tools/Flax.Build/Platforms/Windows/WindowsToolchain.cs | 2 +- .../Flax.Build/Platforms/Windows/WindowsToolchainBase.cs | 7 +++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/Source/Tools/Flax.Build/Configuration.cs b/Source/Tools/Flax.Build/Configuration.cs index 28970f289..bd9785d35 100644 --- a/Source/Tools/Flax.Build/Configuration.cs +++ b/Source/Tools/Flax.Build/Configuration.cs @@ -201,6 +201,12 @@ namespace Flax.Build [CommandLine("customProjectFormat", "", "Generates code project files for a custom project format type. Valid only with -genproject option.")] public static string ProjectFormatCustom = null; + /// + /// Overrides the compiler to use for building. Eg. v140 overrides the toolset when building for Windows. + /// + [CommandLine("compiler", "", "Overrides the compiler to use for building. Eg. v140 overrides the toolset when building for Windows.")] + public static string Compiler = null; + /// /// Custom configuration defines provided via command line for the build tool. /// diff --git a/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchain.cs b/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchain.cs index 8f247a189..40f338a65 100644 --- a/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchain.cs +++ b/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchain.cs @@ -20,7 +20,7 @@ namespace Flax.Build.Platforms /// The platform. /// The target architecture. public WindowsToolchain(WindowsPlatform platform, TargetArchitecture architecture) - : base(platform, architecture, WindowsPlatformToolset.v140, WindowsPlatformSDK.Latest) + : base(platform, architecture, WindowsPlatformToolset.Latest, WindowsPlatformSDK.Latest) { } diff --git a/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchainBase.cs b/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchainBase.cs index 4d3289a86..919971f68 100644 --- a/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchainBase.cs +++ b/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchainBase.cs @@ -78,6 +78,13 @@ namespace Flax.Build.Platforms var toolsets = WindowsPlatformBase.GetToolsets(); var sdks = WindowsPlatformBase.GetSDKs(); + // Pick the overriden toolset + if (Configuration.Compiler != null) + { + if (Enum.TryParse(Configuration.Compiler, out WindowsPlatformToolset compiler)) + toolsetVer = compiler; + } + // Pick the newest installed Visual Studio version if using the default toolset if (toolsetVer == WindowsPlatformToolset.Default) { From 7cc5560adfe5d4f302ee74e6e9463359c262f49e Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 17 Feb 2021 10:59:08 +0100 Subject: [PATCH 06/57] Fix compilation warnings --- Source/Engine/Core/Utilities.h | 6 ++--- Source/Engine/Debug/DebugDraw.cpp | 28 ++++++++++++------------ Source/Engine/Level/Actors/SpotLight.cpp | 4 ++-- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/Source/Engine/Core/Utilities.h b/Source/Engine/Core/Utilities.h index 8d77c508d..531509d61 100644 --- a/Source/Engine/Core/Utilities.h +++ b/Source/Engine/Core/Utilities.h @@ -11,21 +11,21 @@ namespace Utilities template FORCE_INLINE T RoundTo1DecimalPlace(T value) { - return round(value * 10) / 10; + return (T)round((double)value * 10) / (T)10; } // Round floating point value up to 2 decimal places template FORCE_INLINE T RoundTo2DecimalPlaces(T value) { - return round(value * 100) / 100; + return (T)round((double)value * 100.0) / (T)100; } // Round floating point value up to 3 decimal places template FORCE_INLINE T RoundTo3DecimalPlaces(T value) { - return round(value * 1000) / 1000; + return (T)round((double)value * 1000.0) / (T)1000; } // Converts size of the file (in bytes) to the best fitting string diff --git a/Source/Engine/Debug/DebugDraw.cpp b/Source/Engine/Debug/DebugDraw.cpp index e394b31f4..bae424c4e 100644 --- a/Source/Engine/Debug/DebugDraw.cpp +++ b/Source/Engine/Debug/DebugDraw.cpp @@ -313,10 +313,10 @@ bool DebugDrawService::Init() for (float a = 0.0f; a < TWO_PI; a += step) { // Calculate sines and cosines - float sinA = sin(a); - float cosA = cos(a); - float sinB = sin(a + step); - float cosB = cos(a + step); + float sinA = Math::Sin(a); + float cosA = Math::Cos(a); + float sinB = Math::Sin(a + step); + float cosB = Math::Cos(a + step); // XY loop SphereCache[index++] = Vector3(cosA, sinA, 0.0f); @@ -337,10 +337,10 @@ bool DebugDrawService::Init() for (float a = 0.0f; a < TWO_PI; a += step) { // Calculate sines and cosines - float sinA = sin(a); - float cosA = cos(a); - float sinB = sin(a + step); - float cosB = cos(a + step); + float sinA = Math::Sin(a); + float cosA = Math::Cos(a); + float sinB = Math::Sin(a + step); + float cosB = Math::Cos(a + step); CircleCache[index++] = Vector3(cosA, sinA, 0.0f); CircleCache[index++] = Vector3(cosB, sinB, 0.0f); @@ -1049,10 +1049,10 @@ void DebugDraw::DrawWireTube(const Vector3& position, const Quaternion& orientat { // Calculate sines and cosines // TODO: optimize this stuff - float sinA = sin(a) * radius; - float cosA = cos(a) * radius; - float sinB = sin(a + step) * radius; - float cosB = cos(a + step) * radius; + float sinA = Math::Sin(a) * radius; + float cosA = Math::Cos(a) * radius; + float sinB = Math::Sin(a + step) * radius; + float cosB = Math::Cos(a + step) * radius; // First XY loop DRAW_WIRE_BOX_LINE(cosA, sinA, -halfLength, cosB, sinB, -halfLength); @@ -1098,8 +1098,8 @@ void DebugDraw::DrawWireCylinder(const Vector3& position, const Quaternion& orie { // Cache data float theta = i * angleBetweenFacets; - float x = cos(theta) * radius; - float z = sin(theta) * radius; + float x = Math::Cos(theta) * radius; + float z = Math::Sin(theta) * radius; // Top cap CylinderCache[index++] = Vector3(x, verticalOffset, z); diff --git a/Source/Engine/Level/Actors/SpotLight.cpp b/Source/Engine/Level/Actors/SpotLight.cpp index 57d4d97f3..c601506c4 100644 --- a/Source/Engine/Level/Actors/SpotLight.cpp +++ b/Source/Engine/Level/Actors/SpotLight.cpp @@ -198,8 +198,8 @@ void SpotLight::OnDebugDrawSelected() Vector3 up = _transform.GetUp(); Vector3 forward = GetDirection(); float radius = GetScaledRadius(); - float discRadius = radius * tan(_outerConeAngle * DegreesToRadians); - float falloffDiscRadius = radius * tan(_innerConeAngle * DegreesToRadians); + float discRadius = radius * Math::Tan(_outerConeAngle * DegreesToRadians); + float falloffDiscRadius = radius * Math::Tan(_innerConeAngle * DegreesToRadians); Vector3 position = GetPosition(); DEBUG_DRAW_LINE(position, position + forward * radius + up * discRadius, color, 0, true); From d6546fb41c58c58854d2af6adbfd07925c6e9621 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 17 Feb 2021 11:00:48 +0100 Subject: [PATCH 07/57] Update VulkanMemoryAllocator to `3.0.0-development` (2021-02-16) --- .../GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp | 2 + .../VulkanMemoryAllocator/LICENSE.txt | 2 +- .../VulkanMemoryAllocator/vk_mem_alloc.h | 4864 ++++++++++++----- 3 files changed, 3612 insertions(+), 1256 deletions(-) diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp index 5457872d5..1205cde98 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp @@ -1825,7 +1825,9 @@ bool GPUDeviceVulkan::Init() #endif #undef INIT_FUNC VmaAllocatorCreateInfo allocatorInfo = {}; + allocatorInfo.vulkanApiVersion = VK_API_VERSION_1_0; allocatorInfo.physicalDevice = gpu; + allocatorInfo.instance = Instance; allocatorInfo.device = Device; allocatorInfo.pVulkanFunctions = &vulkanFunctions; VALIDATE_VULKAN_RESULT(vmaCreateAllocator(&allocatorInfo, &Allocator)); diff --git a/Source/ThirdParty/VulkanMemoryAllocator/LICENSE.txt b/Source/ThirdParty/VulkanMemoryAllocator/LICENSE.txt index 67b0d01dc..71e824f80 100644 --- a/Source/ThirdParty/VulkanMemoryAllocator/LICENSE.txt +++ b/Source/ThirdParty/VulkanMemoryAllocator/LICENSE.txt @@ -1,4 +1,4 @@ -Copyright (c) 2017-2019 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2017-2021 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/Source/ThirdParty/VulkanMemoryAllocator/vk_mem_alloc.h b/Source/ThirdParty/VulkanMemoryAllocator/vk_mem_alloc.h index 12256dee6..a410c63e4 100644 --- a/Source/ThirdParty/VulkanMemoryAllocator/vk_mem_alloc.h +++ b/Source/ThirdParty/VulkanMemoryAllocator/vk_mem_alloc.h @@ -1,5 +1,5 @@ // -// Copyright (c) 2017-2019 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2021 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -23,15 +23,11 @@ #ifndef AMD_VULKAN_MEMORY_ALLOCATOR_H #define AMD_VULKAN_MEMORY_ALLOCATOR_H -#ifdef __cplusplus -extern "C" { -#endif - /** \mainpage Vulkan Memory Allocator -Version 2.2.1-development (2018-12-14) +Version 3.0.0-development (2021-02-16) -Copyright (c) 2017-2018 Advanced Micro Devices, Inc. All rights reserved. \n +Copyright (c) 2017-2021 Advanced Micro Devices, Inc. All rights reserved. \n License: MIT Documentation of all members: vk_mem_alloc.h @@ -52,8 +48,12 @@ Documentation of all members: vk_mem_alloc.h - \subpage memory_mapping - [Mapping functions](@ref memory_mapping_mapping_functions) - [Persistently mapped memory](@ref memory_mapping_persistently_mapped_memory) - - [Cache control](@ref memory_mapping_cache_control) + - [Cache flush and invalidate](@ref memory_mapping_cache_control) - [Finding out if memory is mappable](@ref memory_mapping_finding_if_memory_mappable) + - \subpage staying_within_budget + - [Querying for budget](@ref staying_within_budget_querying_for_budget) + - [Controlling memory usage](@ref staying_within_budget_controlling_memory_usage) + - \subpage resource_aliasing - \subpage custom_memory_pools - [Choosing memory type index](@ref custom_memory_pools_MemTypeIndex) - [Linear allocation algorithm](@ref linear_algorithm) @@ -63,10 +63,10 @@ Documentation of all members: vk_mem_alloc.h - [Ring buffer](@ref linear_algorithm_ring_buffer) - [Buddy allocation algorithm](@ref buddy_algorithm) - \subpage defragmentation - - [Defragmenting CPU memory](@ref defragmentation_cpu) - - [Defragmenting GPU memory](@ref defragmentation_gpu) - - [Additional notes](@ref defragmentation_additional_notes) - - [Writing custom allocation algorithm](@ref defragmentation_custom_algorithm) + - [Defragmenting CPU memory](@ref defragmentation_cpu) + - [Defragmenting GPU memory](@ref defragmentation_gpu) + - [Additional notes](@ref defragmentation_additional_notes) + - [Writing custom allocation algorithm](@ref defragmentation_custom_algorithm) - \subpage lost_allocations - \subpage statistics - [Numeric statistics](@ref statistics_numeric_statistics) @@ -80,6 +80,7 @@ Documentation of all members: vk_mem_alloc.h - [Corruption detection](@ref debugging_memory_usage_corruption_detection) - \subpage record_and_replay - \subpage usage_patterns + - [Common mistakes](@ref usage_patterns_common_mistakes) - [Simple patterns](@ref usage_patterns_simple) - [Advanced patterns](@ref usage_patterns_advanced) - \subpage configuration @@ -88,6 +89,8 @@ Documentation of all members: vk_mem_alloc.h - [Device memory allocation callbacks](@ref allocation_callbacks) - [Device heap memory limit](@ref heap_memory_limit) - \subpage vk_khr_dedicated_allocation + - \subpage enabling_buffer_device_address + - \subpage vk_amd_device_coherent_memory - \subpage general_considerations - [Thread safety](@ref general_considerations_thread_safety) - [Validation layer warnings](@ref general_considerations_validation_layer_warnings) @@ -139,24 +142,42 @@ before including these headers (like `WIN32_LEAN_AND_MEAN` or `WINVER` for Windows, `VK_USE_PLATFORM_WIN32_KHR` for Vulkan), you must define them before every `#include` of this library. +You may need to configure the way you import Vulkan functions. + +- By default, VMA assumes you you link statically with Vulkan API. If this is not the case, + `#define VMA_STATIC_VULKAN_FUNCTIONS 0` before `#include` of the VMA implementation and use another way. +- You can `#define VMA_DYNAMIC_VULKAN_FUNCTIONS 1` and make sure `vkGetInstanceProcAddr` and `vkGetDeviceProcAddr` globals are defined. + All the remaining Vulkan functions will be fetched automatically. +- Finally, you can provide your own pointers to all Vulkan functions needed by VMA using structure member + VmaAllocatorCreateInfo::pVulkanFunctions, if you fetched them in some custom way e.g. using some loader like [Volk](https://github.com/zeux/volk). + \section quick_start_initialization Initialization At program startup: --# Initialize Vulkan to have `VkPhysicalDevice` and `VkDevice` object. +-# Initialize Vulkan to have `VkPhysicalDevice`, `VkDevice` and `VkInstance` object. -# Fill VmaAllocatorCreateInfo structure and create #VmaAllocator object by calling vmaCreateAllocator(). \code VmaAllocatorCreateInfo allocatorInfo = {}; +allocatorInfo.vulkanApiVersion = VK_API_VERSION_1_2; allocatorInfo.physicalDevice = physicalDevice; allocatorInfo.device = device; +allocatorInfo.instance = instance; VmaAllocator allocator; vmaCreateAllocator(&allocatorInfo, &allocator); \endcode +Only members `physicalDevice`, `device`, `instance` are required. +However, you should inform the library which Vulkan version do you use by setting +VmaAllocatorCreateInfo::vulkanApiVersion and which extensions did you enable +by setting VmaAllocatorCreateInfo::flags (like #VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT for VK_KHR_buffer_device_address). +Otherwise, VMA would use only features of Vulkan 1.0 core with no extensions. + + \section quick_start_resource_allocation Resource allocation When you want to create a buffer or image: @@ -206,7 +227,8 @@ You can also combine multiple methods. -# If you already have a buffer or an image created, you want to allocate memory for it and then you will bind it yourself, you can use function vmaAllocateMemoryForBuffer(), vmaAllocateMemoryForImage(). - For binding you should use functions: vmaBindBufferMemory(), vmaBindImageMemory(). + For binding you should use functions: vmaBindBufferMemory(), vmaBindImageMemory() + or their extended versions: vmaBindBufferMemory2(), vmaBindImageMemory2(). -# If you want to create a buffer or an image, allocate memory for it and bind them together, all in one call, you can use function vmaCreateBuffer(), vmaCreateImage(). This is the easiest and recommended way to use this library. @@ -297,6 +319,7 @@ VmaAllocation allocation; vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr); \endcode + \section choosing_memory_type_custom_memory_pools Custom memory pools If you allocate from custom memory pool, all the ways of specifying memory @@ -421,16 +444,18 @@ There are some exceptions though, when you should consider mapping memory only f which requires unmapping before GPU can see updated texture. - Keeping many large memory blocks mapped may impact performance or stability of some debugging tools. -\section memory_mapping_cache_control Cache control - +\section memory_mapping_cache_control Cache flush and invalidate + Memory in Vulkan doesn't need to be unmapped before using it on GPU, but unless a memory types has `VK_MEMORY_PROPERTY_HOST_COHERENT_BIT` flag set, -you need to manually invalidate cache before reading of mapped pointer -and flush cache after writing to mapped pointer. +you need to manually **invalidate** cache before reading of mapped pointer +and **flush** cache after writing to mapped pointer. +Map/unmap operations don't do that automatically. Vulkan provides following functions for this purpose `vkFlushMappedMemoryRanges()`, `vkInvalidateMappedMemoryRanges()`, but this library provides more convenient functions that refer to given allocation object: vmaFlushAllocation(), -vmaInvalidateAllocation(). +vmaInvalidateAllocation(), +or multiple objects at once: vmaFlushAllocations(), vmaInvalidateAllocations(). Regions of memory specified for flush/invalidate must be aligned to `VkPhysicalDeviceLimits::nonCoherentAtomSize`. This is automatically ensured by the library. @@ -440,7 +465,7 @@ within blocks are aligned to this value, so their offsets are always multiply of Please note that memory allocated with #VMA_MEMORY_USAGE_CPU_ONLY is guaranteed to be `HOST_COHERENT`. -Also, Windows drivers from all 3 PC GPU vendors (AMD, Intel, NVIDIA) +Also, Windows drivers from all 3 **PC** GPU vendors (AMD, Intel, NVIDIA) currently provide `HOST_COHERENT` flag on all memory types that are `HOST_VISIBLE`, so on this platform you may not need to bother. @@ -472,7 +497,7 @@ vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allo VkMemoryPropertyFlags memFlags; vmaGetMemoryTypeProperties(allocator, allocInfo.memoryType, &memFlags); -if((memFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0) +if((memFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0) { // Allocation ended up in mappable memory. You can map it and access it directly. void* mappedData; @@ -507,7 +532,7 @@ VmaAllocation alloc; VmaAllocationInfo allocInfo; vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); -if(allocInfo.pUserData != nullptr) +if(allocInfo.pMappedData != nullptr) { // Allocation ended up in mappable memory. // It's persistently mapped. You can access it directly. @@ -521,6 +546,186 @@ else \endcode +\page staying_within_budget Staying within budget + +When developing a graphics-intensive game or program, it is important to avoid allocating +more GPU memory than it's physically available. When the memory is over-committed, +various bad things can happen, depending on the specific GPU, graphics driver, and +operating system: + +- It may just work without any problems. +- The application may slow down because some memory blocks are moved to system RAM + and the GPU has to access them through PCI Express bus. +- A new allocation may take very long time to complete, even few seconds, and possibly + freeze entire system. +- The new allocation may fail with `VK_ERROR_OUT_OF_DEVICE_MEMORY`. +- It may even result in GPU crash (TDR), observed as `VK_ERROR_DEVICE_LOST` + returned somewhere later. + +\section staying_within_budget_querying_for_budget Querying for budget + +To query for current memory usage and available budget, use function vmaGetBudget(). +Returned structure #VmaBudget contains quantities expressed in bytes, per Vulkan memory heap. + +Please note that this function returns different information and works faster than +vmaCalculateStats(). vmaGetBudget() can be called every frame or even before every +allocation, while vmaCalculateStats() is intended to be used rarely, +only to obtain statistical information, e.g. for debugging purposes. + +It is recommended to use VK_EXT_memory_budget device extension to obtain information +about the budget from Vulkan device. VMA is able to use this extension automatically. +When not enabled, the allocator behaves same way, but then it estimates current usage +and available budget based on its internal information and Vulkan memory heap sizes, +which may be less precise. In order to use this extension: + +1. Make sure extensions VK_EXT_memory_budget and VK_KHR_get_physical_device_properties2 + required by it are available and enable them. Please note that the first is a device + extension and the second is instance extension! +2. Use flag #VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT when creating #VmaAllocator object. +3. Make sure to call vmaSetCurrentFrameIndex() every frame. Budget is queried from + Vulkan inside of it to avoid overhead of querying it with every allocation. + +\section staying_within_budget_controlling_memory_usage Controlling memory usage + +There are many ways in which you can try to stay within the budget. + +First, when making new allocation requires allocating a new memory block, the library +tries not to exceed the budget automatically. If a block with default recommended size +(e.g. 256 MB) would go over budget, a smaller block is allocated, possibly even +dedicated memory for just this resource. + +If the size of the requested resource plus current memory usage is more than the +budget, by default the library still tries to create it, leaving it to the Vulkan +implementation whether the allocation succeeds or fails. You can change this behavior +by using #VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT flag. With it, the allocation is +not made if it would exceed the budget or if the budget is already exceeded. +Some other allocations become lost instead to make room for it, if the mechanism of +[lost allocations](@ref lost_allocations) is used. +If that is not possible, the allocation fails with `VK_ERROR_OUT_OF_DEVICE_MEMORY`. +Example usage pattern may be to pass the #VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT flag +when creating resources that are not essential for the application (e.g. the texture +of a specific object) and not to pass it when creating critically important resources +(e.g. render targets). + +Finally, you can also use #VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT flag to make sure +a new allocation is created only when it fits inside one of the existing memory blocks. +If it would require to allocate a new block, if fails instead with `VK_ERROR_OUT_OF_DEVICE_MEMORY`. +This also ensures that the function call is very fast because it never goes to Vulkan +to obtain a new block. + +Please note that creating \ref custom_memory_pools with VmaPoolCreateInfo::minBlockCount +set to more than 0 will try to allocate memory blocks without checking whether they +fit within budget. + + +\page resource_aliasing Resource aliasing (overlap) + +New explicit graphics APIs (Vulkan and Direct3D 12), thanks to manual memory +management, give an opportunity to alias (overlap) multiple resources in the +same region of memory - a feature not available in the old APIs (Direct3D 11, OpenGL). +It can be useful to save video memory, but it must be used with caution. + +For example, if you know the flow of your whole render frame in advance, you +are going to use some intermediate textures or buffers only during a small range of render passes, +and you know these ranges don't overlap in time, you can bind these resources to +the same place in memory, even if they have completely different parameters (width, height, format etc.). + +![Resource aliasing (overlap)](../gfx/Aliasing.png) + +Such scenario is possible using VMA, but you need to create your images manually. +Then you need to calculate parameters of an allocation to be made using formula: + +- allocation size = max(size of each image) +- allocation alignment = max(alignment of each image) +- allocation memoryTypeBits = bitwise AND(memoryTypeBits of each image) + +Following example shows two different images bound to the same place in memory, +allocated to fit largest of them. + +\code +// A 512x512 texture to be sampled. +VkImageCreateInfo img1CreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; +img1CreateInfo.imageType = VK_IMAGE_TYPE_2D; +img1CreateInfo.extent.width = 512; +img1CreateInfo.extent.height = 512; +img1CreateInfo.extent.depth = 1; +img1CreateInfo.mipLevels = 10; +img1CreateInfo.arrayLayers = 1; +img1CreateInfo.format = VK_FORMAT_R8G8B8A8_SRGB; +img1CreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; +img1CreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; +img1CreateInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; +img1CreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; + +// A full screen texture to be used as color attachment. +VkImageCreateInfo img2CreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; +img2CreateInfo.imageType = VK_IMAGE_TYPE_2D; +img2CreateInfo.extent.width = 1920; +img2CreateInfo.extent.height = 1080; +img2CreateInfo.extent.depth = 1; +img2CreateInfo.mipLevels = 1; +img2CreateInfo.arrayLayers = 1; +img2CreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM; +img2CreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; +img2CreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; +img2CreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; +img2CreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; + +VkImage img1; +res = vkCreateImage(device, &img1CreateInfo, nullptr, &img1); +VkImage img2; +res = vkCreateImage(device, &img2CreateInfo, nullptr, &img2); + +VkMemoryRequirements img1MemReq; +vkGetImageMemoryRequirements(device, img1, &img1MemReq); +VkMemoryRequirements img2MemReq; +vkGetImageMemoryRequirements(device, img2, &img2MemReq); + +VkMemoryRequirements finalMemReq = {}; +finalMemReq.size = std::max(img1MemReq.size, img2MemReq.size); +finalMemReq.alignment = std::max(img1MemReq.alignment, img2MemReq.alignment); +finalMemReq.memoryTypeBits = img1MemReq.memoryTypeBits & img2MemReq.memoryTypeBits; +// Validate if(finalMemReq.memoryTypeBits != 0) + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; + +VmaAllocation alloc; +res = vmaAllocateMemory(allocator, &finalMemReq, &allocCreateInfo, &alloc, nullptr); + +res = vmaBindImageMemory(allocator, alloc, img1); +res = vmaBindImageMemory(allocator, alloc, img2); + +// You can use img1, img2 here, but not at the same time! + +vmaFreeMemory(allocator, alloc); +vkDestroyImage(allocator, img2, nullptr); +vkDestroyImage(allocator, img1, nullptr); +\endcode + +Remember that using resouces that alias in memory requires proper synchronization. +You need to issue a memory barrier to make sure commands that use `img1` and `img2` +don't overlap on GPU timeline. +You also need to treat a resource after aliasing as uninitialized - containing garbage data. +For example, if you use `img1` and then want to use `img2`, you need to issue +an image memory barrier for `img2` with `oldLayout` = `VK_IMAGE_LAYOUT_UNDEFINED`. + +Additional considerations: + +- Vulkan also allows to interpret contents of memory between aliasing resources consistently in some cases. +See chapter 11.8. "Memory Aliasing" of Vulkan specification or `VK_IMAGE_CREATE_ALIAS_BIT` flag. +- You can create more complex layout where different images and buffers are bound +at different offsets inside one large allocation. For example, one can imagine +a big texture used in some render passes, aliasing with a set of many small buffers +used between in some further passes. To bind a resource at non-zero offset of an allocation, +use vmaBindBufferMemory2() / vmaBindImageMemory2(). +- Before allocating memory for the resources you want to alias, check `memoryTypeBits` +returned in memory requirements of each resource to make sure the bits overlap. +Some GPUs may expose multiple memory types suitable e.g. only for buffers or +images with `COLOR_ATTACHMENT` usage, so the sets of memory types supported by your +resources may be disjoint. Aliasing them is not possible in that case. + + \page custom_memory_pools Custom memory pools A memory pool contains a number of `VkDeviceMemory` blocks. @@ -744,7 +949,7 @@ allocations. To mitigate this problem, you can use defragmentation feature: structure #VmaDefragmentationInfo2, function vmaDefragmentationBegin(), vmaDefragmentationEnd(). -Given set of allocations, +Given set of allocations, this function can move them to compact used memory, ensure more continuous free space and possibly also free some `VkDeviceMemory` blocks. @@ -761,7 +966,8 @@ What it doesn't do, so you need to do it yourself: - Recreate buffers and images that were bound to allocations that were defragmented and bind them with their new places in memory. You must use `vkDestroyBuffer()`, `vkDestroyImage()`, - `vkCreateBuffer()`, `vkCreateImage()` for that purpose and NOT vmaDestroyBuffer(), + `vkCreateBuffer()`, `vkCreateImage()`, vmaBindBufferMemory(), vmaBindImageMemory() + for that purpose and NOT vmaDestroyBuffer(), vmaDestroyImage(), vmaCreateBuffer(), vmaCreateImage(), because you don't need to destroy or create allocation objects! - Recreate views and update descriptors that point to these buffers and images. @@ -809,13 +1015,13 @@ for(uint32_t i = 0; i < allocCount; ++i) // Create new buffer with same parameters. VkBufferCreateInfo bufferInfo = ...; vkCreateBuffer(device, &bufferInfo, nullptr, &buffers[i]); - + // You can make dummy call to vkGetBufferMemoryRequirements here to silence validation layer warning. - + // Bind new buffer to new memory region. Data contained in it is already moved. VmaAllocationInfo allocInfo; vmaGetAllocationInfo(allocator, allocations[i], &allocInfo); - vkBindBufferMemory(device, buffers[i], allocInfo.deviceMemory, allocInfo.offset); + vmaBindBufferMemory(allocator, allocations[i], buffers[i]); } } \endcode @@ -887,13 +1093,13 @@ for(uint32_t i = 0; i < allocCount; ++i) // Create new buffer with same parameters. VkBufferCreateInfo bufferInfo = ...; vkCreateBuffer(device, &bufferInfo, nullptr, &buffers[i]); - + // You can make dummy call to vkGetBufferMemoryRequirements here to silence validation layer warning. - + // Bind new buffer to new memory region. Data contained in it is already moved. VmaAllocationInfo allocInfo; vmaGetAllocationInfo(allocator, allocations[i], &allocInfo); - vkBindBufferMemory(device, buffers[i], allocInfo.deviceMemory, allocInfo.offset); + vmaBindBufferMemory(allocator, allocations[i], buffers[i]); } } \endcode @@ -907,19 +1113,22 @@ in function vmaDefragmentationBegin(). \section defragmentation_additional_notes Additional notes -While using defragmentation, you may experience validation layer warnings, which you just need to ignore. -See [Validation layer warnings](@ref general_considerations_validation_layer_warnings). +It is only legal to defragment allocations bound to: -If you defragment allocations bound to images, these images should be created with -`VK_IMAGE_CREATE_ALIAS_BIT` flag, to make sure that new image created with same -parameters and pointing to data copied to another memory region will interpret -its contents consistently. Otherwise you may experience corrupted data on some -implementations, e.g. due to different pixel swizzling used internally by the graphics driver. +- buffers +- images created with `VK_IMAGE_CREATE_ALIAS_BIT`, `VK_IMAGE_TILING_LINEAR`, and + being currently in `VK_IMAGE_LAYOUT_GENERAL` or `VK_IMAGE_LAYOUT_PREINITIALIZED`. + +Defragmentation of images created with `VK_IMAGE_TILING_OPTIMAL` or in any other +layout may give undefined results. If you defragment allocations bound to images, new images to be bound to new memory region after defragmentation should be created with `VK_IMAGE_LAYOUT_PREINITIALIZED` -and then transitioned to their original layout from before defragmentation using -an image memory barrier. +and then transitioned to their original layout from before defragmentation if +needed using an image memory barrier. + +While using defragmentation, you may experience validation layer warnings, which you just need to ignore. +See [Validation layer warnings](@ref general_considerations_validation_layer_warnings). Please don't expect memory to be fully compacted after defragmentation. Algorithms inside are based on some heuristics that try to maximize number of Vulkan @@ -1186,6 +1395,9 @@ printf("Image name: %s\n", imageName); That string is also printed in JSON report created by vmaBuildStatsString(). +\note Passing string name to VMA allocation doesn't automatically set it to the Vulkan buffer or image created with it. +You must do it manually using an extension like VK_EXT_debug_utils, which is independent of this library. + \page debugging_memory_usage Debugging incorrect memory usage @@ -1277,7 +1489,7 @@ which indicates a serious bug. You can also explicitly request checking margins of all allocations in all memory blocks that belong to specified memory types by using function vmaCheckCorruption(), -or in memory blocks that belong to specified custom pool, by using function +or in memory blocks that belong to specified custom pool, by using function vmaCheckPoolCorruption(). Margin validation (corruption detection) works only for memory types that are @@ -1301,6 +1513,13 @@ application. It can be useful to: \section record_and_replay_usage Usage +Recording functionality is disabled by default. +To enable it, define following macro before every include of this library: + +\code +#define VMA_RECORDING_ENABLED 1 +\endcode + To record sequence of calls to a file: Fill in VmaAllocatorCreateInfo::pRecordSettings member while creating #VmaAllocator object. File is opened and written during whole lifetime of the allocator. @@ -1327,7 +1546,6 @@ It's a human-readable, text file in CSV format (Comma Separated Values). coded and tested only on Windows. Inclusion of recording code is driven by `VMA_RECORDING_ENABLED` macro. Support for other platforms should be easy to add. Contributions are welcomed. -- Currently calls to vmaDefragment() function are not recorded. \page usage_patterns Recommended usage patterns @@ -1336,6 +1554,27 @@ See also slides from talk: [Sawicki, Adam. Advanced Graphics Techniques Tutorial: Memory management in Vulkan and DX12. Game Developers Conference, 2018](https://www.gdcvault.com/play/1025458/Advanced-Graphics-Techniques-Tutorial-New) +\section usage_patterns_common_mistakes Common mistakes + +Use of CPU_TO_GPU instead of CPU_ONLY memory + +#VMA_MEMORY_USAGE_CPU_TO_GPU is recommended only for resources that will be +mapped and written by the CPU, as well as read directly by the GPU - like some +buffers or textures updated every frame (dynamic). If you create a staging copy +of a resource to be written by CPU and then used as a source of transfer to +another resource placed in the GPU memory, that staging resource should be +created with #VMA_MEMORY_USAGE_CPU_ONLY. Please read the descriptions of these +enums carefully for details. + +Unnecessary use of custom pools + +\ref custom_memory_pools may be useful for special purposes - when you want to +keep certain type of resources separate e.g. to reserve minimum amount of memory +for them, limit maximum amount of memory they can occupy, or make some of them +push out the other through the mechanism of \ref lost_allocations. For most +resources this is not needed and so it is not recommended to create #VmaPool +objects and allocations out of them. Allocating from the default pool is sufficient. + \section usage_patterns_simple Simple patterns \subsection usage_patterns_simple_render_targets Render targets @@ -1391,6 +1630,7 @@ This is a more complex situation. Different solutions are possible, and the best one depends on specific GPU type, but you can use this simple approach for the start. Prefer to write to such resource sequentially (e.g. using `memcpy`). Don't perform random access or any reads from it on CPU, as it may be very slow. +Also note that textures written directly from the host through a mapped pointer need to be in LINEAR not OPTIMAL layout. \subsection usage_patterns_readback Readback @@ -1423,10 +1663,10 @@ directly instead of submitting explicit transfer (see below). For resources that you frequently write on CPU and read on GPU, many solutions are possible: -# Create one copy in video memory using #VMA_MEMORY_USAGE_GPU_ONLY, - second copy in system memory using #VMA_MEMORY_USAGE_CPU_ONLY and submit explicit tranfer each time. --# Create just single copy using #VMA_MEMORY_USAGE_CPU_TO_GPU, map it and fill it on CPU, + second copy in system memory using #VMA_MEMORY_USAGE_CPU_ONLY and submit explicit transfer each time. +-# Create just a single copy using #VMA_MEMORY_USAGE_CPU_TO_GPU, map it and fill it on CPU, read it directly on GPU. --# Create just single copy using #VMA_MEMORY_USAGE_CPU_ONLY, map it and fill it on CPU, +-# Create just a single copy using #VMA_MEMORY_USAGE_CPU_ONLY, map it and fill it on CPU, read it directly on GPU. Which solution is the most efficient depends on your resource and especially on the GPU. @@ -1454,6 +1694,10 @@ solutions are possible: You should take some measurements to decide which option is faster in case of your specific resource. +Note that textures accessed directly from the host through a mapped pointer need to be in LINEAR layout, +which may slow down their usage on the device. +Textures accessed only by the device and transfer operations can use OPTIMAL layout. + If you don't want to specialize your code for specific types of GPUs, you can still make an simple optimization for cases when your resource ends up in mappable memory to use it directly in this case instead of creating CPU-side staging copy. @@ -1469,14 +1713,38 @@ mutex, atomic etc. The library uses its own implementation of containers by default, but you can switch to using STL containers instead. +For example, define `VMA_ASSERT(expr)` before including the library to provide +custom implementation of the assertion, compatible with your project. +By default it is defined to standard C `assert(expr)` in `_DEBUG` configuration +and empty otherwise. + \section config_Vulkan_functions Pointers to Vulkan functions -The library uses Vulkan functions straight from the `vulkan.h` header by default. -If you want to provide your own pointers to these functions, e.g. fetched using -`vkGetInstanceProcAddr()` and `vkGetDeviceProcAddr()`: +There are multiple ways to import pointers to Vulkan functions in the library. +In the simplest case you don't need to do anything. +If the compilation or linking of your program or the initialization of the #VmaAllocator +doesn't work for you, you can try to reconfigure it. + +First, the allocator tries to fetch pointers to Vulkan functions linked statically, +like this: + +\code +m_VulkanFunctions.vkAllocateMemory = (PFN_vkAllocateMemory)vkAllocateMemory; +\endcode + +If you want to disable this feature, set configuration macro: `#define VMA_STATIC_VULKAN_FUNCTIONS 0`. + +Second, you can provide the pointers yourself by setting member VmaAllocatorCreateInfo::pVulkanFunctions. +You can fetch them e.g. using functions `vkGetInstanceProcAddr` and `vkGetDeviceProcAddr` or +by using a helper library like [volk](https://github.com/zeux/volk). + +Third, VMA tries to fetch remaining pointers that are still null by calling +`vkGetInstanceProcAddr` and `vkGetDeviceProcAddr` on its own. +If you want to disable this feature, set configuration macro: `#define VMA_DYNAMIC_VULKAN_FUNCTIONS 0`. + +Finally, all the function pointers required by the library (considering selected +Vulkan version and enabled extensions) are checked with `VMA_ASSERT` if they are not null. --# Define `VMA_STATIC_VULKAN_FUNCTIONS 0`. --# Provide valid pointers through VmaAllocatorCreateInfo::pVulkanFunctions. \section custom_memory_allocator Custom host memory allocator @@ -1498,11 +1766,11 @@ VmaAllocatorCreateInfo::pDeviceMemoryCallbacks. When device memory of certain heap runs out of free space, new allocations may fail (returning error code) or they may succeed, silently pushing some existing memory blocks from GPU VRAM to system RAM (which degrades performance). This -behavior is implementation-dependant - it depends on GPU vendor and graphics +behavior is implementation-dependent - it depends on GPU vendor and graphics driver. On AMD cards it can be controlled while creating Vulkan device object by using -VK_AMD_memory_allocation_behavior extension, if available. +VK_AMD_memory_overallocation_behavior extension, if available. Alternatively, if you want to test how your program behaves with limited amount of Vulkan device memory available without switching your graphics card to one that really has @@ -1556,11 +1824,115 @@ unaware of it. To learn more about this extension, see: -- [VK_KHR_dedicated_allocation in Vulkan specification](https://www.khronos.org/registry/vulkan/specs/1.0-extensions/html/vkspec.html#VK_KHR_dedicated_allocation) +- [VK_KHR_dedicated_allocation in Vulkan specification](https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap44.html#VK_KHR_dedicated_allocation) - [VK_KHR_dedicated_allocation unofficial manual](http://asawicki.info/articles/VK_KHR_dedicated_allocation.php5) +\page vk_amd_device_coherent_memory VK_AMD_device_coherent_memory + +VK_AMD_device_coherent_memory is a device extension that enables access to +additional memory types with `VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD` and +`VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD` flag. It is useful mostly for +allocation of buffers intended for writing "breadcrumb markers" in between passes +or draw calls, which in turn are useful for debugging GPU crash/hang/TDR cases. + +When the extension is available but has not been enabled, Vulkan physical device +still exposes those memory types, but their usage is forbidden. VMA automatically +takes care of that - it returns `VK_ERROR_FEATURE_NOT_PRESENT` when an attempt +to allocate memory of such type is made. + +If you want to use this extension in connection with VMA, follow these steps: + +\section vk_amd_device_coherent_memory_initialization Initialization + +1) Call `vkEnumerateDeviceExtensionProperties` for the physical device. +Check if the extension is supported - if returned array of `VkExtensionProperties` contains "VK_AMD_device_coherent_memory". + +2) Call `vkGetPhysicalDeviceFeatures2` for the physical device instead of old `vkGetPhysicalDeviceFeatures`. +Attach additional structure `VkPhysicalDeviceCoherentMemoryFeaturesAMD` to `VkPhysicalDeviceFeatures2::pNext` to be returned. +Check if the device feature is really supported - check if `VkPhysicalDeviceCoherentMemoryFeaturesAMD::deviceCoherentMemory` is true. + +3) While creating device with `vkCreateDevice`, enable this extension - add "VK_AMD_device_coherent_memory" +to the list passed as `VkDeviceCreateInfo::ppEnabledExtensionNames`. + +4) While creating the device, also don't set `VkDeviceCreateInfo::pEnabledFeatures`. +Fill in `VkPhysicalDeviceFeatures2` structure instead and pass it as `VkDeviceCreateInfo::pNext`. +Enable this device feature - attach additional structure `VkPhysicalDeviceCoherentMemoryFeaturesAMD` to +`VkPhysicalDeviceFeatures2::pNext` and set its member `deviceCoherentMemory` to `VK_TRUE`. + +5) While creating #VmaAllocator with vmaCreateAllocator() inform VMA that you +have enabled this extension and feature - add #VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT +to VmaAllocatorCreateInfo::flags. + +\section vk_amd_device_coherent_memory_usage Usage + +After following steps described above, you can create VMA allocations and custom pools +out of the special `DEVICE_COHERENT` and `DEVICE_UNCACHED` memory types on eligible +devices. There are multiple ways to do it, for example: + +- You can request or prefer to allocate out of such memory types by adding + `VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD` to VmaAllocationCreateInfo::requiredFlags + or VmaAllocationCreateInfo::preferredFlags. Those flags can be freely mixed with + other ways of \ref choosing_memory_type, like setting VmaAllocationCreateInfo::usage. +- If you manually found memory type index to use for this purpose, force allocation + from this specific index by setting VmaAllocationCreateInfo::memoryTypeBits `= 1u << index`. + +\section vk_amd_device_coherent_memory_more_information More information + +To learn more about this extension, see [VK_AMD_device_coherent_memory in Vulkan specification](https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap44.html#VK_AMD_device_coherent_memory) + +Example use of this extension can be found in the code of the sample and test suite +accompanying this library. + + +\page enabling_buffer_device_address Enabling buffer device address + +Device extension VK_KHR_buffer_device_address +allow to fetch raw GPU pointer to a buffer and pass it for usage in a shader code. +It is promoted to core Vulkan 1.2. + +If you want to use this feature in connection with VMA, follow these steps: + +\section enabling_buffer_device_address_initialization Initialization + +1) (For Vulkan version < 1.2) Call `vkEnumerateDeviceExtensionProperties` for the physical device. +Check if the extension is supported - if returned array of `VkExtensionProperties` contains +"VK_KHR_buffer_device_address". + +2) Call `vkGetPhysicalDeviceFeatures2` for the physical device instead of old `vkGetPhysicalDeviceFeatures`. +Attach additional structure `VkPhysicalDeviceBufferDeviceAddressFeatures*` to `VkPhysicalDeviceFeatures2::pNext` to be returned. +Check if the device feature is really supported - check if `VkPhysicalDeviceBufferDeviceAddressFeatures*::bufferDeviceAddress` is true. + +3) (For Vulkan version < 1.2) While creating device with `vkCreateDevice`, enable this extension - add +"VK_KHR_buffer_device_address" to the list passed as `VkDeviceCreateInfo::ppEnabledExtensionNames`. + +4) While creating the device, also don't set `VkDeviceCreateInfo::pEnabledFeatures`. +Fill in `VkPhysicalDeviceFeatures2` structure instead and pass it as `VkDeviceCreateInfo::pNext`. +Enable this device feature - attach additional structure `VkPhysicalDeviceBufferDeviceAddressFeatures*` to +`VkPhysicalDeviceFeatures2::pNext` and set its member `bufferDeviceAddress` to `VK_TRUE`. + +5) While creating #VmaAllocator with vmaCreateAllocator() inform VMA that you +have enabled this feature - add #VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT +to VmaAllocatorCreateInfo::flags. + +\section enabling_buffer_device_address_usage Usage + +After following steps described above, you can create buffers with `VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT*` using VMA. +The library automatically adds `VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT*` to +allocated memory blocks wherever it might be needed. + +Please note that the library supports only `VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT*`. +The second part of this functionality related to "capture and replay" is not supported, +as it is intended for usage in debugging tools like RenderDoc, not in everyday Vulkan usage. + +\section enabling_buffer_device_address_more_information More information + +To learn more about this extension, see [VK_KHR_buffer_device_address in Vulkan specification](https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap46.html#VK_KHR_buffer_device_address) + +Example use of this extension can be found in the code of the sample and test suite +accompanying this library. + \page general_considerations General considerations \section general_considerations_thread_safety Thread safety @@ -1619,10 +1991,15 @@ Features deliberately excluded from the scope of this library: - Data transfer. Uploading (straming) and downloading data of buffers and images between CPU and GPU memory and related synchronization is responsibility of the user. + Defining some "texture" object that would automatically stream its data from a + staging copy in CPU memory to GPU memory would rather be a feature of another, + higher-level library implemented on top of VMA. - Allocations for imported/exported external memory. They tend to require explicit memory type index and dedicated allocation anyway, so they don't interact with main features of this library. Such special purpose allocations should be made manually, using `vkCreateBuffer()` and `vkAllocateMemory()`. +- Sub-allocation of parts of one large buffer. Although recommended as a good practice, + it is the user's responsibility to implement such logic on top of VMA. - Recreation of buffers and images. Although the library has functions for buffer and image creation (vmaCreateBuffer(), vmaCreateImage()), you need to recreate these objects yourself after defragmentation. That's because the big @@ -1632,8 +2009,9 @@ Features deliberately excluded from the scope of this library: objects in CPU memory (not Vulkan memory), allocation failures are not checked and handled gracefully, because that would complicate code significantly and is usually not needed in desktop PC applications anyway. + Success of an allocation is just checked with an assert. - Code free of any compiler warnings. Maintaining the library to compile and - work correctly on so many different platforms is hard enough. Being free of + work correctly on so many different platforms is hard enough. Being free of any warnings, on any version of any compiler, is simply not feasible. - This is a C++ library with C interface. Bindings or ports to any other programming languages are welcomed as external projects and @@ -1641,28 +2019,66 @@ Features deliberately excluded from the scope of this library: */ +#ifdef __cplusplus +extern "C" { +#endif + /* Define this macro to 0/1 to disable/enable support for recording functionality, available through VmaAllocatorCreateInfo::pRecordSettings. */ #ifndef VMA_RECORDING_ENABLED - #ifdef _WIN32 - #define VMA_RECORDING_ENABLED 1 - #else - #define VMA_RECORDING_ENABLED 0 - #endif + #define VMA_RECORDING_ENABLED 0 #endif -#ifndef NOMINMAX +#if !defined(NOMINMAX) && defined(VMA_IMPLEMENTATION) #define NOMINMAX // For windows.h #endif +#if defined(__ANDROID__) && defined(VK_NO_PROTOTYPES) && VMA_STATIC_VULKAN_FUNCTIONS + extern PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr; + extern PFN_vkGetDeviceProcAddr vkGetDeviceProcAddr; + extern PFN_vkGetPhysicalDeviceProperties vkGetPhysicalDeviceProperties; + extern PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties; + extern PFN_vkAllocateMemory vkAllocateMemory; + extern PFN_vkFreeMemory vkFreeMemory; + extern PFN_vkMapMemory vkMapMemory; + extern PFN_vkUnmapMemory vkUnmapMemory; + extern PFN_vkFlushMappedMemoryRanges vkFlushMappedMemoryRanges; + extern PFN_vkInvalidateMappedMemoryRanges vkInvalidateMappedMemoryRanges; + extern PFN_vkBindBufferMemory vkBindBufferMemory; + extern PFN_vkBindImageMemory vkBindImageMemory; + extern PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements; + extern PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements; + extern PFN_vkCreateBuffer vkCreateBuffer; + extern PFN_vkDestroyBuffer vkDestroyBuffer; + extern PFN_vkCreateImage vkCreateImage; + extern PFN_vkDestroyImage vkDestroyImage; + extern PFN_vkCmdCopyBuffer vkCmdCopyBuffer; + #if VMA_VULKAN_VERSION >= 1001000 + extern PFN_vkGetBufferMemoryRequirements2 vkGetBufferMemoryRequirements2; + extern PFN_vkGetImageMemoryRequirements2 vkGetImageMemoryRequirements2; + extern PFN_vkBindBufferMemory2 vkBindBufferMemory2; + extern PFN_vkBindImageMemory2 vkBindImageMemory2; + extern PFN_vkGetPhysicalDeviceMemoryProperties2 vkGetPhysicalDeviceMemoryProperties2; + #endif // #if VMA_VULKAN_VERSION >= 1001000 +#endif // #if defined(__ANDROID__) && VMA_STATIC_VULKAN_FUNCTIONS && VK_NO_PROTOTYPES + #ifndef VULKAN_H_ #include #endif -#if VMA_RECORDING_ENABLED - #include +// Define this macro to declare maximum supported Vulkan version in format AAABBBCCC, +// where AAA = major, BBB = minor, CCC = patch. +// If you want to use version > 1.0, it still needs to be enabled via VmaAllocatorCreateInfo::vulkanApiVersion. +#if !defined(VMA_VULKAN_VERSION) + #if defined(VK_VERSION_1_2) + #define VMA_VULKAN_VERSION 1002000 + #elif defined(VK_VERSION_1_1) + #define VMA_VULKAN_VERSION 1001000 + #else + #define VMA_VULKAN_VERSION 1000000 + #endif #endif #if !defined(VMA_DEDICATED_ALLOCATION) @@ -1673,6 +2089,105 @@ available through VmaAllocatorCreateInfo::pRecordSettings. #endif #endif +#if !defined(VMA_BIND_MEMORY2) + #if VK_KHR_bind_memory2 + #define VMA_BIND_MEMORY2 1 + #else + #define VMA_BIND_MEMORY2 0 + #endif +#endif + +#if !defined(VMA_MEMORY_BUDGET) + #if VK_EXT_memory_budget && (VK_KHR_get_physical_device_properties2 || VMA_VULKAN_VERSION >= 1001000) + #define VMA_MEMORY_BUDGET 1 + #else + #define VMA_MEMORY_BUDGET 0 + #endif +#endif + +// Defined to 1 when VK_KHR_buffer_device_address device extension or equivalent core Vulkan 1.2 feature is defined in its headers. +#if !defined(VMA_BUFFER_DEVICE_ADDRESS) + #if VK_KHR_buffer_device_address || VMA_VULKAN_VERSION >= 1002000 + #define VMA_BUFFER_DEVICE_ADDRESS 1 + #else + #define VMA_BUFFER_DEVICE_ADDRESS 0 + #endif +#endif + +// Defined to 1 when VK_EXT_memory_priority device extension is defined in Vulkan headers. +#if !defined(VMA_MEMORY_PRIORITY) + #if VK_EXT_memory_priority + #define VMA_MEMORY_PRIORITY 1 + #else + #define VMA_MEMORY_PRIORITY 0 + #endif +#endif + +// Define these macros to decorate all public functions with additional code, +// before and after returned type, appropriately. This may be useful for +// exporting the functions when compiling VMA as a separate library. Example: +// #define VMA_CALL_PRE __declspec(dllexport) +// #define VMA_CALL_POST __cdecl +#ifndef VMA_CALL_PRE + #define VMA_CALL_PRE +#endif +#ifndef VMA_CALL_POST + #define VMA_CALL_POST +#endif + +// Define this macro to decorate pointers with an attribute specifying the +// length of the array they point to if they are not null. +// +// The length may be one of +// - The name of another parameter in the argument list where the pointer is declared +// - The name of another member in the struct where the pointer is declared +// - The name of a member of a struct type, meaning the value of that member in +// the context of the call. For example +// VMA_LEN_IF_NOT_NULL("VkPhysicalDeviceMemoryProperties::memoryHeapCount"), +// this means the number of memory heaps available in the device associated +// with the VmaAllocator being dealt with. +#ifndef VMA_LEN_IF_NOT_NULL + #define VMA_LEN_IF_NOT_NULL(len) +#endif + +// The VMA_NULLABLE macro is defined to be _Nullable when compiling with Clang. +// see: https://clang.llvm.org/docs/AttributeReference.html#nullable +#ifndef VMA_NULLABLE + #ifdef __clang__ + #define VMA_NULLABLE _Nullable + #else + #define VMA_NULLABLE + #endif +#endif + +// The VMA_NOT_NULL macro is defined to be _Nonnull when compiling with Clang. +// see: https://clang.llvm.org/docs/AttributeReference.html#nonnull +#ifndef VMA_NOT_NULL + #ifdef __clang__ + #define VMA_NOT_NULL _Nonnull + #else + #define VMA_NOT_NULL + #endif +#endif + +// If non-dispatchable handles are represented as pointers then we can give +// then nullability annotations +#ifndef VMA_NOT_NULL_NON_DISPATCHABLE + #if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__) ) || defined(_M_X64) || defined(__ia64) || defined (_M_IA64) || defined(__aarch64__) || defined(__powerpc64__) + #define VMA_NOT_NULL_NON_DISPATCHABLE VMA_NOT_NULL + #else + #define VMA_NOT_NULL_NON_DISPATCHABLE + #endif +#endif + +#ifndef VMA_NULLABLE_NON_DISPATCHABLE + #if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__) ) || defined(_M_X64) || defined(__ia64) || defined (_M_IA64) || defined(__aarch64__) || defined(__powerpc64__) + #define VMA_NULLABLE_NON_DISPATCHABLE VMA_NULLABLE + #else + #define VMA_NULLABLE_NON_DISPATCHABLE + #endif +#endif + /** \struct VmaAllocator \brief Represents main object of this library initialized. @@ -1686,16 +2201,18 @@ VK_DEFINE_HANDLE(VmaAllocator) /// Callback function called after successful vkAllocateMemory. typedef void (VKAPI_PTR *PFN_vmaAllocateDeviceMemoryFunction)( - VmaAllocator allocator, - uint32_t memoryType, - VkDeviceMemory memory, - VkDeviceSize size); + VmaAllocator VMA_NOT_NULL allocator, + uint32_t memoryType, + VkDeviceMemory VMA_NOT_NULL_NON_DISPATCHABLE memory, + VkDeviceSize size, + void* VMA_NULLABLE pUserData); /// Callback function called before vkFreeMemory. typedef void (VKAPI_PTR *PFN_vmaFreeDeviceMemoryFunction)( - VmaAllocator allocator, - uint32_t memoryType, - VkDeviceMemory memory, - VkDeviceSize size); + VmaAllocator VMA_NOT_NULL allocator, + uint32_t memoryType, + VkDeviceMemory VMA_NOT_NULL_NON_DISPATCHABLE memory, + VkDeviceSize size, + void* VMA_NULLABLE pUserData); /** \brief Set of callbacks that the library will call for `vkAllocateMemory` and `vkFreeMemory`. @@ -1706,9 +2223,11 @@ Used in VmaAllocatorCreateInfo::pDeviceMemoryCallbacks. */ typedef struct VmaDeviceMemoryCallbacks { /// Optional, can be null. - PFN_vmaAllocateDeviceMemoryFunction pfnAllocate; + PFN_vmaAllocateDeviceMemoryFunction VMA_NULLABLE pfnAllocate; /// Optional, can be null. - PFN_vmaFreeDeviceMemoryFunction pfnFree; + PFN_vmaFreeDeviceMemoryFunction VMA_NULLABLE pfnFree; + /// Optional, can be null. + void* VMA_NULLABLE pUserData; } VmaDeviceMemoryCallbacks; /// Flags for created #VmaAllocator. @@ -1720,6 +2239,9 @@ typedef enum VmaAllocatorCreateFlagBits { VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT = 0x00000001, /** \brief Enables usage of VK_KHR_dedicated_allocation extension. + The flag works only if VmaAllocatorCreateInfo::vulkanApiVersion `== VK_API_VERSION_1_0`. + When it's `VK_API_VERSION_1_1`, the flag is ignored because the extension has been promoted to Vulkan 1.1. + Using this extenion will automatically allocate dedicated blocks of memory for some buffers and images instead of suballocating place for them out of bigger memory blocks (as if you explicitly used #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT @@ -1731,15 +2253,95 @@ typedef enum VmaAllocatorCreateFlagBits { VmaAllocatorCreateInfo::device, and you want them to be used internally by this library: - - VK_KHR_get_memory_requirements2 - - VK_KHR_dedicated_allocation + - VK_KHR_get_memory_requirements2 (device extension) + - VK_KHR_dedicated_allocation (device extension) -When this flag is set, you can experience following warnings reported by Vulkan -validation layer. You can ignore them. + When this flag is set, you can experience following warnings reported by Vulkan + validation layer. You can ignore them. -> vkBindBufferMemory(): Binding memory to buffer 0x2d but vkGetBufferMemoryRequirements() has not been called on that buffer. + > vkBindBufferMemory(): Binding memory to buffer 0x2d but vkGetBufferMemoryRequirements() has not been called on that buffer. */ VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT = 0x00000002, + /** + Enables usage of VK_KHR_bind_memory2 extension. + + The flag works only if VmaAllocatorCreateInfo::vulkanApiVersion `== VK_API_VERSION_1_0`. + When it's `VK_API_VERSION_1_1`, the flag is ignored because the extension has been promoted to Vulkan 1.1. + + You may set this flag only if you found out that this device extension is supported, + you enabled it while creating Vulkan device passed as VmaAllocatorCreateInfo::device, + and you want it to be used internally by this library. + + The extension provides functions `vkBindBufferMemory2KHR` and `vkBindImageMemory2KHR`, + which allow to pass a chain of `pNext` structures while binding. + This flag is required if you use `pNext` parameter in vmaBindBufferMemory2() or vmaBindImageMemory2(). + */ + VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT = 0x00000004, + /** + Enables usage of VK_EXT_memory_budget extension. + + You may set this flag only if you found out that this device extension is supported, + you enabled it while creating Vulkan device passed as VmaAllocatorCreateInfo::device, + and you want it to be used internally by this library, along with another instance extension + VK_KHR_get_physical_device_properties2, which is required by it (or Vulkan 1.1, where this extension is promoted). + + The extension provides query for current memory usage and budget, which will probably + be more accurate than an estimation used by the library otherwise. + */ + VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT = 0x00000008, + /** + Enables usage of VK_AMD_device_coherent_memory extension. + + You may set this flag only if you: + + - found out that this device extension is supported and enabled it while creating Vulkan device passed as VmaAllocatorCreateInfo::device, + - checked that `VkPhysicalDeviceCoherentMemoryFeaturesAMD::deviceCoherentMemory` is true and set it while creating the Vulkan device, + - want it to be used internally by this library. + + The extension and accompanying device feature provide access to memory types with + `VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD` and `VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD` flags. + They are useful mostly for writing breadcrumb markers - a common method for debugging GPU crash/hang/TDR. + + When the extension is not enabled, such memory types are still enumerated, but their usage is illegal. + To protect from this error, if you don't create the allocator with this flag, it will refuse to allocate any memory or create a custom pool in such memory type, + returning `VK_ERROR_FEATURE_NOT_PRESENT`. + */ + VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT = 0x00000010, + /** + Enables usage of "buffer device address" feature, which allows you to use function + `vkGetBufferDeviceAddress*` to get raw GPU pointer to a buffer and pass it for usage inside a shader. + + You may set this flag only if you: + + 1. (For Vulkan version < 1.2) Found as available and enabled device extension + VK_KHR_buffer_device_address. + This extension is promoted to core Vulkan 1.2. + 2. Found as available and enabled device feature `VkPhysicalDeviceBufferDeviceAddressFeatures::bufferDeviceAddress`. + + When this flag is set, you can create buffers with `VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT` using VMA. + The library automatically adds `VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT` to + allocated memory blocks wherever it might be needed. + + For more information, see documentation chapter \ref enabling_buffer_device_address. + */ + VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT = 0x00000020, + /** + Enables usage of VK_EXT_memory_priority extension in the library. + + You may set this flag only if you found available and enabled this device extension, + along with `VkPhysicalDeviceMemoryPriorityFeaturesEXT::memoryPriority == VK_TRUE`, + while creating Vulkan device passed as VmaAllocatorCreateInfo::device. + + When this flag is used, VmaAllocationCreateInfo::priority and VmaPoolCreateInfo::priority + are used to set priorities of allocated Vulkan memory. Without it, these variables are ignored. + + A priority must be a floating-point value between 0 and 1, indicating the priority of the allocation relative to other memory allocations. + Larger values are higher priority. The granularity of the priorities is implementation-dependent. + It is automatically passed to every call to `vkAllocateMemory` done by the library using structure `VkMemoryPriorityAllocateInfoEXT`. + The value to be used for default priority is 0.5. + For more details, see the documentation of the VK_EXT_memory_priority extension. + */ + VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT = 0x00000040, VMA_ALLOCATOR_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VmaAllocatorCreateFlagBits; @@ -1750,26 +2352,33 @@ typedef VkFlags VmaAllocatorCreateFlags; Used in VmaAllocatorCreateInfo::pVulkanFunctions. */ typedef struct VmaVulkanFunctions { - PFN_vkGetPhysicalDeviceProperties vkGetPhysicalDeviceProperties; - PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties; - PFN_vkAllocateMemory vkAllocateMemory; - PFN_vkFreeMemory vkFreeMemory; - PFN_vkMapMemory vkMapMemory; - PFN_vkUnmapMemory vkUnmapMemory; - PFN_vkFlushMappedMemoryRanges vkFlushMappedMemoryRanges; - PFN_vkInvalidateMappedMemoryRanges vkInvalidateMappedMemoryRanges; - PFN_vkBindBufferMemory vkBindBufferMemory; - PFN_vkBindImageMemory vkBindImageMemory; - PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements; - PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements; - PFN_vkCreateBuffer vkCreateBuffer; - PFN_vkDestroyBuffer vkDestroyBuffer; - PFN_vkCreateImage vkCreateImage; - PFN_vkDestroyImage vkDestroyImage; - PFN_vkCmdCopyBuffer vkCmdCopyBuffer; -#if VMA_DEDICATED_ALLOCATION - PFN_vkGetBufferMemoryRequirements2KHR vkGetBufferMemoryRequirements2KHR; - PFN_vkGetImageMemoryRequirements2KHR vkGetImageMemoryRequirements2KHR; + PFN_vkGetPhysicalDeviceProperties VMA_NULLABLE vkGetPhysicalDeviceProperties; + PFN_vkGetPhysicalDeviceMemoryProperties VMA_NULLABLE vkGetPhysicalDeviceMemoryProperties; + PFN_vkAllocateMemory VMA_NULLABLE vkAllocateMemory; + PFN_vkFreeMemory VMA_NULLABLE vkFreeMemory; + PFN_vkMapMemory VMA_NULLABLE vkMapMemory; + PFN_vkUnmapMemory VMA_NULLABLE vkUnmapMemory; + PFN_vkFlushMappedMemoryRanges VMA_NULLABLE vkFlushMappedMemoryRanges; + PFN_vkInvalidateMappedMemoryRanges VMA_NULLABLE vkInvalidateMappedMemoryRanges; + PFN_vkBindBufferMemory VMA_NULLABLE vkBindBufferMemory; + PFN_vkBindImageMemory VMA_NULLABLE vkBindImageMemory; + PFN_vkGetBufferMemoryRequirements VMA_NULLABLE vkGetBufferMemoryRequirements; + PFN_vkGetImageMemoryRequirements VMA_NULLABLE vkGetImageMemoryRequirements; + PFN_vkCreateBuffer VMA_NULLABLE vkCreateBuffer; + PFN_vkDestroyBuffer VMA_NULLABLE vkDestroyBuffer; + PFN_vkCreateImage VMA_NULLABLE vkCreateImage; + PFN_vkDestroyImage VMA_NULLABLE vkDestroyImage; + PFN_vkCmdCopyBuffer VMA_NULLABLE vkCmdCopyBuffer; +#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + PFN_vkGetBufferMemoryRequirements2KHR VMA_NULLABLE vkGetBufferMemoryRequirements2KHR; + PFN_vkGetImageMemoryRequirements2KHR VMA_NULLABLE vkGetImageMemoryRequirements2KHR; +#endif +#if VMA_BIND_MEMORY2 || VMA_VULKAN_VERSION >= 1001000 + PFN_vkBindBufferMemory2KHR VMA_NULLABLE vkBindBufferMemory2KHR; + PFN_vkBindImageMemory2KHR VMA_NULLABLE vkBindImageMemory2KHR; +#endif +#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000 + PFN_vkGetPhysicalDeviceMemoryProperties2KHR VMA_NULLABLE vkGetPhysicalDeviceMemoryProperties2KHR; #endif } VmaVulkanFunctions; @@ -1781,7 +2390,7 @@ typedef enum VmaRecordFlagBits { It may degrade performance though. */ VMA_RECORD_FLUSH_AFTER_CALL_BIT = 0x00000001, - + VMA_RECORD_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VmaRecordFlagBits; typedef VkFlags VmaRecordFlags; @@ -1798,7 +2407,7 @@ typedef struct VmaRecordSettings It will be opened for the whole time #VmaAllocator object is alive. If opening this file fails, creation of the whole allocator object fails. */ - const char* pFilePath; + const char* VMA_NOT_NULL pFilePath; } VmaRecordSettings; /// Description of a Allocator to be created. @@ -1808,19 +2417,19 @@ typedef struct VmaAllocatorCreateInfo VmaAllocatorCreateFlags flags; /// Vulkan physical device. /** It must be valid throughout whole lifetime of created allocator. */ - VkPhysicalDevice physicalDevice; + VkPhysicalDevice VMA_NOT_NULL physicalDevice; /// Vulkan device. /** It must be valid throughout whole lifetime of created allocator. */ - VkDevice device; + VkDevice VMA_NOT_NULL device; /// Preferred size of a single `VkDeviceMemory` block to be allocated from large heaps > 1 GiB. Optional. /** Set to 0 to use default, which is currently 256 MiB. */ VkDeviceSize preferredLargeHeapBlockSize; /// Custom CPU memory allocation callbacks. Optional. /** Optional, can be null. When specified, will also be used for all CPU-side memory allocations. */ - const VkAllocationCallbacks* pAllocationCallbacks; + const VkAllocationCallbacks* VMA_NULLABLE pAllocationCallbacks; /// Informative callbacks for `vkAllocateMemory`, `vkFreeMemory`. Optional. /** Optional, can be null. */ - const VmaDeviceMemoryCallbacks* pDeviceMemoryCallbacks; + const VmaDeviceMemoryCallbacks* VMA_NULLABLE pDeviceMemoryCallbacks; /** \brief Maximum number of additional frames that are in use at the same time as current frame. This value is used only when you make allocations with @@ -1859,52 +2468,88 @@ typedef struct VmaAllocatorCreateInfo blocks to system RAM. This driver behavior can also be controlled using VK_AMD_memory_overallocation_behavior extension. */ - const VkDeviceSize* pHeapSizeLimit; - /** \brief Pointers to Vulkan functions. Can be null if you leave define `VMA_STATIC_VULKAN_FUNCTIONS 1`. + const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL("VkPhysicalDeviceMemoryProperties::memoryHeapCount") pHeapSizeLimit; - If you leave define `VMA_STATIC_VULKAN_FUNCTIONS 1` in configuration section, - you can pass null as this member, because the library will fetch pointers to - Vulkan functions internally in a static way, like: + /** \brief Pointers to Vulkan functions. Can be null. - vulkanFunctions.vkAllocateMemory = &vkAllocateMemory; - - Fill this member if you want to provide your own pointers to Vulkan functions, - e.g. fetched using `vkGetInstanceProcAddr()` and `vkGetDeviceProcAddr()`. + For details see [Pointers to Vulkan functions](@ref config_Vulkan_functions). */ - const VmaVulkanFunctions* pVulkanFunctions; + const VmaVulkanFunctions* VMA_NULLABLE pVulkanFunctions; /** \brief Parameters for recording of VMA calls. Can be null. If not null, it enables recording of calls to VMA functions to a file. If support for recording is not enabled using `VMA_RECORDING_ENABLED` macro, creation of the allocator object fails with `VK_ERROR_FEATURE_NOT_PRESENT`. */ - const VmaRecordSettings* pRecordSettings; + const VmaRecordSettings* VMA_NULLABLE pRecordSettings; + /** \brief Handle to Vulkan instance object. + + Starting from version 3.0.0 this member is no longer optional, it must be set! + */ + VkInstance VMA_NOT_NULL instance; + /** \brief Optional. The highest version of Vulkan that the application is designed to use. + + It must be a value in the format as created by macro `VK_MAKE_VERSION` or a constant like: `VK_API_VERSION_1_1`, `VK_API_VERSION_1_0`. + The patch version number specified is ignored. Only the major and minor versions are considered. + It must be less or equal (preferably equal) to value as passed to `vkCreateInstance` as `VkApplicationInfo::apiVersion`. + Only versions 1.0, 1.1, 1.2 are supported by the current implementation. + Leaving it initialized to zero is equivalent to `VK_API_VERSION_1_0`. + */ + uint32_t vulkanApiVersion; } VmaAllocatorCreateInfo; /// Creates Allocator object. -VkResult vmaCreateAllocator( - const VmaAllocatorCreateInfo* pCreateInfo, - VmaAllocator* pAllocator); +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAllocator( + const VmaAllocatorCreateInfo* VMA_NOT_NULL pCreateInfo, + VmaAllocator VMA_NULLABLE * VMA_NOT_NULL pAllocator); /// Destroys allocator object. -void vmaDestroyAllocator( - VmaAllocator allocator); +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyAllocator( + VmaAllocator VMA_NULLABLE allocator); + +/** \brief Information about existing #VmaAllocator object. +*/ +typedef struct VmaAllocatorInfo +{ + /** \brief Handle to Vulkan instance object. + + This is the same value as has been passed through VmaAllocatorCreateInfo::instance. + */ + VkInstance VMA_NOT_NULL instance; + /** \brief Handle to Vulkan physical device object. + + This is the same value as has been passed through VmaAllocatorCreateInfo::physicalDevice. + */ + VkPhysicalDevice VMA_NOT_NULL physicalDevice; + /** \brief Handle to Vulkan device object. + + This is the same value as has been passed through VmaAllocatorCreateInfo::device. + */ + VkDevice VMA_NOT_NULL device; +} VmaAllocatorInfo; + +/** \brief Returns information about existing #VmaAllocator object - handle to Vulkan device etc. + +It might be useful if you want to keep just the #VmaAllocator handle and fetch other required handles to +`VkPhysicalDevice`, `VkDevice` etc. every time using this function. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocatorInfo(VmaAllocator VMA_NOT_NULL allocator, VmaAllocatorInfo* VMA_NOT_NULL pAllocatorInfo); /** PhysicalDeviceProperties are fetched from physicalDevice by the allocator. You can access it here, without fetching it again on your own. */ -void vmaGetPhysicalDeviceProperties( - VmaAllocator allocator, - const VkPhysicalDeviceProperties** ppPhysicalDeviceProperties); +VMA_CALL_PRE void VMA_CALL_POST vmaGetPhysicalDeviceProperties( + VmaAllocator VMA_NOT_NULL allocator, + const VkPhysicalDeviceProperties* VMA_NULLABLE * VMA_NOT_NULL ppPhysicalDeviceProperties); /** PhysicalDeviceMemoryProperties are fetched from physicalDevice by the allocator. You can access it here, without fetching it again on your own. */ -void vmaGetMemoryProperties( - VmaAllocator allocator, - const VkPhysicalDeviceMemoryProperties** ppPhysicalDeviceMemoryProperties); +VMA_CALL_PRE void VMA_CALL_POST vmaGetMemoryProperties( + VmaAllocator VMA_NOT_NULL allocator, + const VkPhysicalDeviceMemoryProperties* VMA_NULLABLE * VMA_NOT_NULL ppPhysicalDeviceMemoryProperties); /** \brief Given Memory Type Index, returns Property Flags of this memory type. @@ -1912,10 +2557,10 @@ void vmaGetMemoryProperties( This is just a convenience function. Same information can be obtained using vmaGetMemoryProperties(). */ -void vmaGetMemoryTypeProperties( - VmaAllocator allocator, +VMA_CALL_PRE void VMA_CALL_POST vmaGetMemoryTypeProperties( + VmaAllocator VMA_NOT_NULL allocator, uint32_t memoryTypeIndex, - VkMemoryPropertyFlags* pFlags); + VkMemoryPropertyFlags* VMA_NOT_NULL pFlags); /** \brief Sets index of the current frame. @@ -1925,8 +2570,8 @@ This function must be used if you make allocations with when a new frame begins. Allocations queried using vmaGetAllocationInfo() cannot become lost in the current frame. */ -void vmaSetCurrentFrameIndex( - VmaAllocator allocator, +VMA_CALL_PRE void VMA_CALL_POST vmaSetCurrentFrameIndex( + VmaAllocator VMA_NOT_NULL allocator, uint32_t frameIndex); /** \brief Calculated statistics of memory usage in entire allocator. @@ -1955,26 +2600,91 @@ typedef struct VmaStats VmaStatInfo total; } VmaStats; -/// Retrieves statistics from current state of the Allocator. -void vmaCalculateStats( - VmaAllocator allocator, - VmaStats* pStats); +/** \brief Retrieves statistics from current state of the Allocator. +This function is called "calculate" not "get" because it has to traverse all +internal data structures, so it may be quite slow. For faster but more brief statistics +suitable to be called every frame or every allocation, use vmaGetBudget(). + +Note that when using allocator from multiple threads, returned information may immediately +become outdated. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaCalculateStats( + VmaAllocator VMA_NOT_NULL allocator, + VmaStats* VMA_NOT_NULL pStats); + +/** \brief Statistics of current memory usage and available budget, in bytes, for specific memory heap. +*/ +typedef struct VmaBudget +{ + /** \brief Sum size of all `VkDeviceMemory` blocks allocated from particular heap, in bytes. + */ + VkDeviceSize blockBytes; + + /** \brief Sum size of all allocations created in particular heap, in bytes. + + Usually less or equal than `blockBytes`. + Difference `blockBytes - allocationBytes` is the amount of memory allocated but unused - + available for new allocations or wasted due to fragmentation. + + It might be greater than `blockBytes` if there are some allocations in lost state, as they account + to this value as well. + */ + VkDeviceSize allocationBytes; + + /** \brief Estimated current memory usage of the program, in bytes. + + Fetched from system using `VK_EXT_memory_budget` extension if enabled. + + It might be different than `blockBytes` (usually higher) due to additional implicit objects + also occupying the memory, like swapchain, pipelines, descriptor heaps, command buffers, or + `VkDeviceMemory` blocks allocated outside of this library, if any. + */ + VkDeviceSize usage; + + /** \brief Estimated amount of memory available to the program, in bytes. + + Fetched from system using `VK_EXT_memory_budget` extension if enabled. + + It might be different (most probably smaller) than `VkMemoryHeap::size[heapIndex]` due to factors + external to the program, like other programs also consuming system resources. + Difference `budget - usage` is the amount of additional memory that can probably + be allocated without problems. Exceeding the budget may result in various problems. + */ + VkDeviceSize budget; +} VmaBudget; + +/** \brief Retrieves information about current memory budget for all memory heaps. + +\param[out] pBudget Must point to array with number of elements at least equal to number of memory heaps in physical device used. + +This function is called "get" not "calculate" because it is very fast, suitable to be called +every frame or every allocation. For more detailed statistics use vmaCalculateStats(). + +Note that when using allocator from multiple threads, returned information may immediately +become outdated. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetBudget( + VmaAllocator VMA_NOT_NULL allocator, + VmaBudget* VMA_NOT_NULL pBudget); + +#ifndef VMA_STATS_STRING_ENABLED #define VMA_STATS_STRING_ENABLED 1 +#endif #if VMA_STATS_STRING_ENABLED /// Builds and returns statistics as string in JSON format. /** @param[out] ppStatsString Must be freed using vmaFreeStatsString() function. */ -void vmaBuildStatsString( - VmaAllocator allocator, - char** ppStatsString, +VMA_CALL_PRE void VMA_CALL_POST vmaBuildStatsString( + VmaAllocator VMA_NOT_NULL allocator, + char* VMA_NULLABLE * VMA_NOT_NULL ppStatsString, VkBool32 detailedMap); -void vmaFreeStatsString( - VmaAllocator allocator, - char* pStatsString); +VMA_CALL_PRE void VMA_CALL_POST vmaFreeStatsString( + VmaAllocator VMA_NOT_NULL allocator, + char* VMA_NULLABLE pStatsString); #endif // #if VMA_STATS_STRING_ENABLED @@ -2000,7 +2710,7 @@ typedef enum VmaMemoryUsage It is roughly equivalent of `D3D12_HEAP_TYPE_DEFAULT`. Usage: - + - Resources written and read by device, e.g. images used as attachments. - Resources transferred from host once (immutable) or infrequently and read by device multiple times, e.g. textures to be sampled, vertex buffers, uniform @@ -2025,7 +2735,7 @@ typedef enum VmaMemoryUsage Memory that is both mappable on host (guarantees to be `HOST_VISIBLE`) and preferably fast to access by GPU. CPU access is typically uncached. Writes may be write-combined. - Usage: Resources written frequently by host (dynamic), read by device. E.g. textures, vertex buffers, uniform buffers updated every frame or every draw call. + Usage: Resources written frequently by host (dynamic), read by device. E.g. textures (with LINEAR layout), vertex buffers, uniform buffers updated every frame or every draw call. */ VMA_MEMORY_USAGE_CPU_TO_GPU = 3, /** Memory mappable on host (guarantees to be `HOST_VISIBLE`) and cached. @@ -2037,34 +2747,49 @@ typedef enum VmaMemoryUsage - Any resources read or accessed randomly on host, e.g. CPU-side copy of vertex buffer used as source of transfer, but also used for collision detection. */ VMA_MEMORY_USAGE_GPU_TO_CPU = 4, + /** CPU memory - memory that is preferably not `DEVICE_LOCAL`, but also not guaranteed to be `HOST_VISIBLE`. + + Usage: Staging copy of resources moved from GPU memory to CPU memory as part + of custom paging/residency mechanism, to be moved back to GPU memory when needed. + */ + VMA_MEMORY_USAGE_CPU_COPY = 5, + /** Lazily allocated GPU memory having `VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT`. + Exists mostly on mobile platforms. Using it on desktop PC or other GPUs with no such memory type present will fail the allocation. + + Usage: Memory for transient attachment images (color attachments, depth attachments etc.), created with `VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT`. + + Allocations with this usage are always created as dedicated - it implies #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. + */ + VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED = 6, + VMA_MEMORY_USAGE_MAX_ENUM = 0x7FFFFFFF } VmaMemoryUsage; /// Flags to be passed as VmaAllocationCreateInfo::flags. typedef enum VmaAllocationCreateFlagBits { /** \brief Set this flag if the allocation should have its own memory block. - + Use it for special, big resources, like fullscreen images used as attachments. - + You should not use this flag if VmaAllocationCreateInfo::pool is not null. */ VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT = 0x00000001, /** \brief Set this flag to only try to allocate from existing `VkDeviceMemory` blocks and never create new such block. - + If new allocation cannot be placed in any of the existing blocks, allocation fails with `VK_ERROR_OUT_OF_DEVICE_MEMORY` error. - + You should not use #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT and #VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT at the same time. It makes no sense. - + If VmaAllocationCreateInfo::pool is not null, this flag is implied and ignored. */ VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT = 0x00000002, /** \brief Set this flag to use a memory that will be persistently mapped and retrieve pointer to it. - + Pointer to mapped memory will be returned through VmaAllocationInfo::pMappedData. - Is it valid to use this flag for allocation made from memory type that is not + It is valid to use this flag for allocation made from memory type that is not `HOST_VISIBLE`. This flag is then ignored and memory is not mapped. This is useful if you need an allocation that is efficient to use on GPU (`DEVICE_LOCAL`) and still want to map it directly if possible on platforms that @@ -2104,6 +2829,16 @@ typedef enum VmaAllocationCreateFlagBits { This flag is only allowed for custom pools created with #VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT flag. */ VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT = 0x00000040, + /** Create both buffer/image and allocation, but don't bind them together. + It is useful when you want to bind yourself to do some more advanced binding, e.g. using some extensions. + The flag is meaningful only with functions that bind by default: vmaCreateBuffer(), vmaCreateImage(). + Otherwise it is ignored. + */ + VMA_ALLOCATION_CREATE_DONT_BIND_BIT = 0x00000080, + /** Create allocation only if additional device memory required for it, if any, won't exceed + memory budget. Otherwise return `VK_ERROR_OUT_OF_DEVICE_MEMORY`. + */ + VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT = 0x00000100, /** Allocation strategy that chooses smallest possible free range for the allocation. @@ -2147,19 +2882,19 @@ typedef struct VmaAllocationCreateInfo /// Use #VmaAllocationCreateFlagBits enum. VmaAllocationCreateFlags flags; /** \brief Intended usage of memory. - + You can leave #VMA_MEMORY_USAGE_UNKNOWN if you specify memory requirements in other way. \n If `pool` is not null, this member is ignored. */ VmaMemoryUsage usage; /** \brief Flags that must be set in a Memory Type chosen for an allocation. - + Leave 0 if you specify memory requirements in other way. \n If `pool` is not null, this member is ignored.*/ VkMemoryPropertyFlags requiredFlags; /** \brief Flags that preferably should be set in a memory type chosen for an allocation. - - Set to 0 if no additional flags are prefered. \n + + Set to 0 if no additional flags are preferred. \n If `pool` is not null, this member is ignored. */ VkMemoryPropertyFlags preferredFlags; /** \brief Bitmask containing one bit set for every memory type acceptable for this allocation. @@ -2175,14 +2910,21 @@ typedef struct VmaAllocationCreateInfo Leave `VK_NULL_HANDLE` to allocate from default pool. If not null, members: `usage`, `requiredFlags`, `preferredFlags`, `memoryTypeBits` are ignored. */ - VmaPool pool; + VmaPool VMA_NULLABLE pool; /** \brief Custom general-purpose pointer that will be stored in #VmaAllocation, can be read as VmaAllocationInfo::pUserData and changed using vmaSetAllocationUserData(). - + If #VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT is used, it must be either null or pointer to a null-terminated string. The string will be then copied to internal buffer, so it doesn't need to be valid after allocation call. */ - void* pUserData; + void* VMA_NULLABLE pUserData; + /** \brief A floating-point value between 0 and 1, indicating the priority of the allocation relative to other memory allocations. + + It is used only when #VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT flag was used during creation of the #VmaAllocator object + and this allocation ends up as dedicated or is explicitly forced as dedicated using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. + Otherwise, it has the priority of a memory block where it is placed and this variable is ignored. + */ + float priority; } VmaAllocationCreateInfo; /** @@ -2201,11 +2943,11 @@ device doesn't support any memory type with requested features for the specific type of resource you want to use it for. Please check parameters of your resource, like image layout (OPTIMAL versus LINEAR) or mip level count. */ -VkResult vmaFindMemoryTypeIndex( - VmaAllocator allocator, +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndex( + VmaAllocator VMA_NOT_NULL allocator, uint32_t memoryTypeBits, - const VmaAllocationCreateInfo* pAllocationCreateInfo, - uint32_t* pMemoryTypeIndex); + const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo, + uint32_t* VMA_NOT_NULL pMemoryTypeIndex); /** \brief Helps to find memoryTypeIndex, given VkBufferCreateInfo and VmaAllocationCreateInfo. @@ -2219,11 +2961,11 @@ It is just a convenience function, equivalent to calling: - `vmaFindMemoryTypeIndex` - `vkDestroyBuffer` */ -VkResult vmaFindMemoryTypeIndexForBufferInfo( - VmaAllocator allocator, - const VkBufferCreateInfo* pBufferCreateInfo, - const VmaAllocationCreateInfo* pAllocationCreateInfo, - uint32_t* pMemoryTypeIndex); +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForBufferInfo( + VmaAllocator VMA_NOT_NULL allocator, + const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, + const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo, + uint32_t* VMA_NOT_NULL pMemoryTypeIndex); /** \brief Helps to find memoryTypeIndex, given VkImageCreateInfo and VmaAllocationCreateInfo. @@ -2237,11 +2979,11 @@ It is just a convenience function, equivalent to calling: - `vmaFindMemoryTypeIndex` - `vkDestroyImage` */ -VkResult vmaFindMemoryTypeIndexForImageInfo( - VmaAllocator allocator, - const VkImageCreateInfo* pImageCreateInfo, - const VmaAllocationCreateInfo* pAllocationCreateInfo, - uint32_t* pMemoryTypeIndex); +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForImageInfo( + VmaAllocator VMA_NOT_NULL allocator, + const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo, + const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo, + uint32_t* VMA_NOT_NULL pMemoryTypeIndex); /// Flags to be passed as VmaPoolCreateInfo::flags. typedef enum VmaPoolCreateFlagBits { @@ -2328,7 +3070,7 @@ typedef struct VmaPoolCreateInfo { /** \brief Maximum number of blocks that can be allocated in this pool. Optional. Set to 0 to use default, which is `SIZE_MAX`, which means no limit. - + Set to same value as VmaPoolCreateInfo::minBlockCount to have fixed amount of memory allocated throughout whole lifetime of this pool. */ @@ -2347,6 +3089,12 @@ typedef struct VmaPoolCreateInfo { become lost, set this value to 0. */ uint32_t frameInUseCount; + /** \brief A floating-point value between 0 and 1, indicating the priority of the allocations in this pool relative to other memory allocations. + + It is used only when #VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT flag was used during creation of the #VmaAllocator object. + Otherwise, this variable is ignored. + */ + float priority; } VmaPoolCreateInfo; /** \brief Describes parameter of existing #VmaPool. @@ -2382,16 +3130,16 @@ typedef struct VmaPoolStats { @param pCreateInfo Parameters of pool to create. @param[out] pPool Handle to created pool. */ -VkResult vmaCreatePool( - VmaAllocator allocator, - const VmaPoolCreateInfo* pCreateInfo, - VmaPool* pPool); +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreatePool( + VmaAllocator VMA_NOT_NULL allocator, + const VmaPoolCreateInfo* VMA_NOT_NULL pCreateInfo, + VmaPool VMA_NULLABLE * VMA_NOT_NULL pPool); /** \brief Destroys #VmaPool object and frees Vulkan device memory. */ -void vmaDestroyPool( - VmaAllocator allocator, - VmaPool pool); +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyPool( + VmaAllocator VMA_NOT_NULL allocator, + VmaPool VMA_NULLABLE pool); /** \brief Retrieves statistics of existing #VmaPool object. @@ -2399,10 +3147,10 @@ void vmaDestroyPool( @param pool Pool object. @param[out] pPoolStats Statistics of specified pool. */ -void vmaGetPoolStats( - VmaAllocator allocator, - VmaPool pool, - VmaPoolStats* pPoolStats); +VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolStats( + VmaAllocator VMA_NOT_NULL allocator, + VmaPool VMA_NOT_NULL pool, + VmaPoolStats* VMA_NOT_NULL pPoolStats); /** \brief Marks all allocations in given pool as lost if they are not used in current frame or VmaPoolCreateInfo::frameInUseCount back from now. @@ -2410,10 +3158,10 @@ void vmaGetPoolStats( @param pool Pool. @param[out] pLostAllocationCount Number of allocations marked as lost. Optional - pass null if you don't need this information. */ -void vmaMakePoolAllocationsLost( - VmaAllocator allocator, - VmaPool pool, - size_t* pLostAllocationCount); +VMA_CALL_PRE void VMA_CALL_POST vmaMakePoolAllocationsLost( + VmaAllocator VMA_NOT_NULL allocator, + VmaPool VMA_NOT_NULL pool, + size_t* VMA_NULLABLE pLostAllocationCount); /** \brief Checks magic number in margins around all allocations in given memory pool in search for corruptions. @@ -2429,7 +3177,28 @@ Possible return values: `VMA_ASSERT` is also fired in that case. - Other value: Error returned by Vulkan, e.g. memory mapping failure. */ -VkResult vmaCheckPoolCorruption(VmaAllocator allocator, VmaPool pool); +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckPoolCorruption(VmaAllocator VMA_NOT_NULL allocator, VmaPool VMA_NOT_NULL pool); + +/** \brief Retrieves name of a custom pool. + +After the call `ppName` is either null or points to an internally-owned null-terminated string +containing name of the pool that was previously set. The pointer becomes invalid when the pool is +destroyed or its name is changed using vmaSetPoolName(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolName( + VmaAllocator VMA_NOT_NULL allocator, + VmaPool VMA_NOT_NULL pool, + const char* VMA_NULLABLE * VMA_NOT_NULL ppName); + +/** \brief Sets name of a custom pool. + +`pName` can be either null or pointer to a null-terminated string with new name for the pool. +Function makes internal copy of the string, so it can be changed or freed immediately after this call. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaSetPoolName( + VmaAllocator VMA_NOT_NULL allocator, + VmaPool VMA_NOT_NULL pool, + const char* VMA_NULLABLE pName); /** \struct VmaAllocation \brief Represents single memory allocation. @@ -2461,20 +3230,25 @@ VK_DEFINE_HANDLE(VmaAllocation) */ typedef struct VmaAllocationInfo { /** \brief Memory type index that this allocation was allocated from. - + It never changes. */ uint32_t memoryType; /** \brief Handle to Vulkan memory object. Same memory object can be shared by multiple allocations. - + It can change after call to vmaDefragment() if this allocation is passed to the function, or if allocation is lost. If the allocation is lost, it is equal to `VK_NULL_HANDLE`. */ - VkDeviceMemory deviceMemory; - /** \brief Offset into deviceMemory object to the beginning of this allocation, in bytes. (deviceMemory, offset) pair is unique to this allocation. + VkDeviceMemory VMA_NULLABLE_NON_DISPATCHABLE deviceMemory; + /** \brief Offset in `VkDeviceMemory` object to the beginning of this allocation, in bytes. `(deviceMemory, offset)` pair is unique to this allocation. + + You usually don't need to use this offset. If you create a buffer or an image together with the allocation using e.g. function + vmaCreateBuffer(), vmaCreateImage(), functions that operate on these resources refer to the beginning of the buffer or image, + not entire device memory block. Functions like vmaMapMemory(), vmaBindBufferMemory() also refer to the beginning of the allocation + and apply this offset automatically. It can change after call to vmaDefragment() if this allocation is passed to the function, or if allocation is lost. */ @@ -2482,22 +3256,28 @@ typedef struct VmaAllocationInfo { /** \brief Size of this allocation, in bytes. It never changes, unless allocation is lost. + + \note Allocation size returned in this variable may be greater than the size + requested for the resource e.g. as `VkBufferCreateInfo::size`. Whole size of the + allocation is accessible for operations on memory e.g. using a pointer after + mapping with vmaMapMemory(), but operations on the resource e.g. using + `vkCmdCopyBuffer` must be limited to the size of the resource. */ VkDeviceSize size; /** \brief Pointer to the beginning of this allocation as mapped data. If the allocation hasn't been mapped using vmaMapMemory() and hasn't been - created with #VMA_ALLOCATION_CREATE_MAPPED_BIT flag, this value null. + created with #VMA_ALLOCATION_CREATE_MAPPED_BIT flag, this value is null. It can change after call to vmaMapMemory(), vmaUnmapMemory(). It can also change after call to vmaDefragment() if this allocation is passed to the function. */ - void* pMappedData; + void* VMA_NULLABLE pMappedData; /** \brief Custom general-purpose pointer that was passed as VmaAllocationCreateInfo::pUserData or set using vmaSetAllocationUserData(). It can change after call to vmaSetAllocationUserData() for this allocation. */ - void* pUserData; + void* VMA_NULLABLE pUserData; } VmaAllocationInfo; /** \brief General purpose memory allocation. @@ -2510,12 +3290,12 @@ You should free the memory using vmaFreeMemory() or vmaFreeMemoryPages(). It is recommended to use vmaAllocateMemoryForBuffer(), vmaAllocateMemoryForImage(), vmaCreateBuffer(), vmaCreateImage() instead whenever possible. */ -VkResult vmaAllocateMemory( - VmaAllocator allocator, - const VkMemoryRequirements* pVkMemoryRequirements, - const VmaAllocationCreateInfo* pCreateInfo, - VmaAllocation* pAllocation, - VmaAllocationInfo* pAllocationInfo); +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemory( + VmaAllocator VMA_NOT_NULL allocator, + const VkMemoryRequirements* VMA_NOT_NULL pVkMemoryRequirements, + const VmaAllocationCreateInfo* VMA_NOT_NULL pCreateInfo, + VmaAllocation VMA_NULLABLE * VMA_NOT_NULL pAllocation, + VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); /** \brief General purpose memory allocation for multiple allocation objects at once. @@ -2536,13 +3316,13 @@ All allocations are made using same parameters. All of them are created out of t If any allocation fails, all allocations already made within this function call are also freed, so that when returned result is not `VK_SUCCESS`, `pAllocation` array is always entirely filled with `VK_NULL_HANDLE`. */ -VkResult vmaAllocateMemoryPages( - VmaAllocator allocator, - const VkMemoryRequirements* pVkMemoryRequirements, - const VmaAllocationCreateInfo* pCreateInfo, +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryPages( + VmaAllocator VMA_NOT_NULL allocator, + const VkMemoryRequirements* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pVkMemoryRequirements, + const VmaAllocationCreateInfo* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pCreateInfo, size_t allocationCount, - VmaAllocation* pAllocations, - VmaAllocationInfo* pAllocationInfo); + VmaAllocation VMA_NULLABLE * VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pAllocations, + VmaAllocationInfo* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) pAllocationInfo); /** @param[out] pAllocation Handle to allocated memory. @@ -2550,28 +3330,28 @@ VkResult vmaAllocateMemoryPages( You should free the memory using vmaFreeMemory(). */ -VkResult vmaAllocateMemoryForBuffer( - VmaAllocator allocator, - VkBuffer buffer, - const VmaAllocationCreateInfo* pCreateInfo, - VmaAllocation* pAllocation, - VmaAllocationInfo* pAllocationInfo); +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForBuffer( + VmaAllocator VMA_NOT_NULL allocator, + VkBuffer VMA_NOT_NULL_NON_DISPATCHABLE buffer, + const VmaAllocationCreateInfo* VMA_NOT_NULL pCreateInfo, + VmaAllocation VMA_NULLABLE * VMA_NOT_NULL pAllocation, + VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); /// Function similar to vmaAllocateMemoryForBuffer(). -VkResult vmaAllocateMemoryForImage( - VmaAllocator allocator, - VkImage image, - const VmaAllocationCreateInfo* pCreateInfo, - VmaAllocation* pAllocation, - VmaAllocationInfo* pAllocationInfo); +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForImage( + VmaAllocator VMA_NOT_NULL allocator, + VkImage VMA_NOT_NULL_NON_DISPATCHABLE image, + const VmaAllocationCreateInfo* VMA_NOT_NULL pCreateInfo, + VmaAllocation VMA_NULLABLE * VMA_NOT_NULL pAllocation, + VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); /** \brief Frees memory previously allocated using vmaAllocateMemory(), vmaAllocateMemoryForBuffer(), or vmaAllocateMemoryForImage(). Passing `VK_NULL_HANDLE` as `allocation` is valid. Such function call is just skipped. */ -void vmaFreeMemory( - VmaAllocator allocator, - VmaAllocation allocation); +VMA_CALL_PRE void VMA_CALL_POST vmaFreeMemory( + VmaAllocator VMA_NOT_NULL allocator, + const VmaAllocation VMA_NULLABLE allocation); /** \brief Frees memory and destroys multiple allocations. @@ -2583,39 +3363,26 @@ It may be internally optimized to be more efficient than calling vmaFreeMemory() Allocations in `pAllocations` array can come from any memory pools and types. Passing `VK_NULL_HANDLE` as elements of `pAllocations` array is valid. Such entries are just skipped. */ -void vmaFreeMemoryPages( - VmaAllocator allocator, +VMA_CALL_PRE void VMA_CALL_POST vmaFreeMemoryPages( + VmaAllocator VMA_NOT_NULL allocator, size_t allocationCount, - VmaAllocation* pAllocations); + const VmaAllocation VMA_NULLABLE * VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pAllocations); -/** \brief Tries to resize an allocation in place, if there is enough free memory after it. +/** \brief Deprecated. -Tries to change allocation's size without moving or reallocating it. -You can both shrink and grow allocation size. -When growing, it succeeds only when the allocation belongs to a memory block with enough -free space after it. - -Returns `VK_SUCCESS` if allocation's size has been successfully changed. -Returns `VK_ERROR_OUT_OF_POOL_MEMORY` if allocation's size could not be changed. - -After successful call to this function, VmaAllocationInfo::size of this allocation changes. -All other parameters stay the same: memory pool and type, alignment, offset, mapped pointer. - -- Calling this function on allocation that is in lost state fails with result `VK_ERROR_VALIDATION_FAILED_EXT`. -- Calling this function with `newSize` same as current allocation size does nothing and returns `VK_SUCCESS`. -- Resizing dedicated allocations, as well as allocations created in pools that use linear - or buddy algorithm, is not supported. - The function returns `VK_ERROR_FEATURE_NOT_PRESENT` in such cases. - Support may be added in the future. +\deprecated +In version 2.2.0 it used to try to change allocation's size without moving or reallocating it. +In current version it returns `VK_SUCCESS` only if `newSize` equals current allocation's size. +Otherwise returns `VK_ERROR_OUT_OF_POOL_MEMORY`, indicating that allocation's size could not be changed. */ -VkResult vmaResizeAllocation( - VmaAllocator allocator, - VmaAllocation allocation, +VMA_CALL_PRE VkResult VMA_CALL_POST vmaResizeAllocation( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, VkDeviceSize newSize); /** \brief Returns current information about specified allocation and atomically marks it as used in current frame. -Current paramters of given allocation are returned in `pAllocationInfo`. +Current paramteres of given allocation are returned in `pAllocationInfo`. This function also atomically "touches" allocation - marks it as used in current frame, just like vmaTouchAllocation(). @@ -2629,10 +3396,10 @@ you can avoid calling it too often. (e.g. due to defragmentation or allocation becoming lost). - If you just want to check if allocation is not lost, vmaTouchAllocation() will work faster. */ -void vmaGetAllocationInfo( - VmaAllocator allocator, - VmaAllocation allocation, - VmaAllocationInfo* pAllocationInfo); +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VmaAllocationInfo* VMA_NOT_NULL pAllocationInfo); /** \brief Returns `VK_TRUE` if allocation is not lost and atomically marks it as used in current frame. @@ -2648,9 +3415,9 @@ Lost allocation and the buffer/image still need to be destroyed. If the allocation has been created without #VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT flag, this function always returns `VK_TRUE`. */ -VkBool32 vmaTouchAllocation( - VmaAllocator allocator, - VmaAllocation allocation); +VMA_CALL_PRE VkBool32 VMA_CALL_POST vmaTouchAllocation( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation); /** \brief Sets pUserData in given allocation to new value. @@ -2665,10 +3432,10 @@ If the flag was not used, the value of pointer `pUserData` is just copied to allocation's `pUserData`. It is opaque, so you can use it however you want - e.g. as a pointer, ordinal number or some handle to you own data. */ -void vmaSetAllocationUserData( - VmaAllocator allocator, - VmaAllocation allocation, - void* pUserData); +VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationUserData( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + void* VMA_NULLABLE pUserData); /** \brief Creates new allocation that is in lost state from the beginning. @@ -2680,9 +3447,9 @@ Returned allocation is not tied to any specific memory pool or memory type and not bound to any image or buffer. It has size = 0. It cannot be turned into a real, non-empty allocation. */ -void vmaCreateLostAllocation( - VmaAllocator allocator, - VmaAllocation* pAllocation); +VMA_CALL_PRE void VMA_CALL_POST vmaCreateLostAllocation( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NULLABLE * VMA_NOT_NULL pAllocation); /** \brief Maps memory represented by given allocation and returns pointer to it. @@ -2717,23 +3484,33 @@ This function fails when used on allocation made in memory type that is not This function always fails when called for allocation that was created with #VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT flag. Such allocations cannot be mapped. + +This function doesn't automatically flush or invalidate caches. +If the allocation is made from a memory types that is not `HOST_COHERENT`, +you also need to use vmaInvalidateAllocation() / vmaFlushAllocation(), as required by Vulkan specification. */ -VkResult vmaMapMemory( - VmaAllocator allocator, - VmaAllocation allocation, - void** ppData); +VMA_CALL_PRE VkResult VMA_CALL_POST vmaMapMemory( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + void* VMA_NULLABLE * VMA_NOT_NULL ppData); /** \brief Unmaps memory represented by given allocation, mapped previously using vmaMapMemory(). For details, see description of vmaMapMemory(). + +This function doesn't automatically flush or invalidate caches. +If the allocation is made from a memory types that is not `HOST_COHERENT`, +you also need to use vmaInvalidateAllocation() / vmaFlushAllocation(), as required by Vulkan specification. */ -void vmaUnmapMemory( - VmaAllocator allocator, - VmaAllocation allocation); +VMA_CALL_PRE void VMA_CALL_POST vmaUnmapMemory( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation); /** \brief Flushes memory of given allocation. Calls `vkFlushMappedMemoryRanges()` for memory associated with given range of given allocation. +It needs to be called after writing to a mapped memory for memory types that are not `HOST_COHERENT`. +Unmap operation doesn't do that automatically. - `offset` must be relative to the beginning of allocation. - `size` can be `VK_WHOLE_SIZE`. It means all memory from `offset` the the end of given allocation. @@ -2742,12 +3519,25 @@ Calls `vkFlushMappedMemoryRanges()` for memory associated with given range of gi - If `size` is 0, this call is ignored. - If memory type that the `allocation` belongs to is not `HOST_VISIBLE` or it is `HOST_COHERENT`, this call is ignored. + +Warning! `offset` and `size` are relative to the contents of given `allocation`. +If you mean whole allocation, you can pass 0 and `VK_WHOLE_SIZE`, respectively. +Do not pass allocation's offset as `offset`!!! + +This function returns the `VkResult` from `vkFlushMappedMemoryRanges` if it is +called, otherwise `VK_SUCCESS`. */ -void vmaFlushAllocation(VmaAllocator allocator, VmaAllocation allocation, VkDeviceSize offset, VkDeviceSize size); +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocation( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkDeviceSize offset, + VkDeviceSize size); /** \brief Invalidates memory of given allocation. Calls `vkInvalidateMappedMemoryRanges()` for memory associated with given range of given allocation. +It needs to be called before reading from a mapped memory for memory types that are not `HOST_COHERENT`. +Map operation doesn't do that automatically. - `offset` must be relative to the beginning of allocation. - `size` can be `VK_WHOLE_SIZE`. It means all memory from `offset` the the end of given allocation. @@ -2756,8 +3546,61 @@ Calls `vkInvalidateMappedMemoryRanges()` for memory associated with given range - If `size` is 0, this call is ignored. - If memory type that the `allocation` belongs to is not `HOST_VISIBLE` or it is `HOST_COHERENT`, this call is ignored. + +Warning! `offset` and `size` are relative to the contents of given `allocation`. +If you mean whole allocation, you can pass 0 and `VK_WHOLE_SIZE`, respectively. +Do not pass allocation's offset as `offset`!!! + +This function returns the `VkResult` from `vkInvalidateMappedMemoryRanges` if +it is called, otherwise `VK_SUCCESS`. */ -void vmaInvalidateAllocation(VmaAllocator allocator, VmaAllocation allocation, VkDeviceSize offset, VkDeviceSize size); +VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocation( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkDeviceSize offset, + VkDeviceSize size); + +/** \brief Flushes memory of given set of allocations. + +Calls `vkFlushMappedMemoryRanges()` for memory associated with given ranges of given allocations. +For more information, see documentation of vmaFlushAllocation(). + +\param allocator +\param allocationCount +\param allocations +\param offsets If not null, it must point to an array of offsets of regions to flush, relative to the beginning of respective allocations. Null means all ofsets are zero. +\param sizes If not null, it must point to an array of sizes of regions to flush in respective allocations. Null means `VK_WHOLE_SIZE` for all allocations. + +This function returns the `VkResult` from `vkFlushMappedMemoryRanges` if it is +called, otherwise `VK_SUCCESS`. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocations( + VmaAllocator VMA_NOT_NULL allocator, + uint32_t allocationCount, + const VmaAllocation VMA_NOT_NULL * VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) allocations, + const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) offsets, + const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) sizes); + +/** \brief Invalidates memory of given set of allocations. + +Calls `vkInvalidateMappedMemoryRanges()` for memory associated with given ranges of given allocations. +For more information, see documentation of vmaInvalidateAllocation(). + +\param allocator +\param allocationCount +\param allocations +\param offsets If not null, it must point to an array of offsets of regions to flush, relative to the beginning of respective allocations. Null means all ofsets are zero. +\param sizes If not null, it must point to an array of sizes of regions to flush in respective allocations. Null means `VK_WHOLE_SIZE` for all allocations. + +This function returns the `VkResult` from `vkInvalidateMappedMemoryRanges` if it is +called, otherwise `VK_SUCCESS`. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocations( + VmaAllocator VMA_NOT_NULL allocator, + uint32_t allocationCount, + const VmaAllocation VMA_NOT_NULL * VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) allocations, + const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) offsets, + const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) sizes); /** \brief Checks magic number in margins around all allocations in given memory types (in both default and custom pools) in search for corruptions. @@ -2775,7 +3618,7 @@ Possible return values: `VMA_ASSERT` is also fired in that case. - Other value: Error returned by Vulkan, e.g. memory mapping failure. */ -VkResult vmaCheckCorruption(VmaAllocator allocator, uint32_t memoryTypeBits); +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckCorruption(VmaAllocator VMA_NOT_NULL allocator, uint32_t memoryTypeBits); /** \struct VmaDefragmentationContext \brief Represents Opaque object that represents started defragmentation process. @@ -2787,6 +3630,7 @@ VK_DEFINE_HANDLE(VmaDefragmentationContext) /// Flags to be used in vmaDefragmentationBegin(). None at the moment. Reserved for future use. typedef enum VmaDefragmentationFlagBits { + VMA_DEFRAGMENTATION_FLAG_INCREMENTAL = 0x1, VMA_DEFRAGMENTATION_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VmaDefragmentationFlagBits; typedef VkFlags VmaDefragmentationFlags; @@ -2810,13 +3654,13 @@ typedef struct VmaDefragmentationInfo2 { It is safe to pass allocations that are in the lost state - they are ignored. All allocations not present in this array are considered non-moveable during this defragmentation. */ - VmaAllocation* pAllocations; + const VmaAllocation VMA_NOT_NULL * VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) pAllocations; /** \brief Optional, output. Pointer to array that will be filled with information whether the allocation at certain index has been changed during defragmentation. The array should have `allocationCount` elements. You can pass null if you are not interested in this information. */ - VkBool32* pAllocationsChanged; + VkBool32* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) pAllocationsChanged; /** \brief Numer of pools in `pPools` array. */ uint32_t poolCount; @@ -2835,9 +3679,9 @@ typedef struct VmaDefragmentationInfo2 { Using this array is equivalent to specifying all allocations from the pools in `pAllocations`. It might be more efficient. */ - VmaPool* pPools; + const VmaPool VMA_NOT_NULL * VMA_NULLABLE VMA_LEN_IF_NOT_NULL(poolCount) pPools; /** \brief Maximum total numbers of bytes that can be copied while moving allocations to different places using transfers on CPU side, like `memcpy()`, `memmove()`. - + `VK_WHOLE_SIZE` means no limit. */ VkDeviceSize maxCpuBytesToMove; @@ -2847,7 +3691,7 @@ typedef struct VmaDefragmentationInfo2 { */ uint32_t maxCpuAllocationsToMove; /** \brief Maximum total numbers of bytes that can be copied while moving allocations to different places using transfers on GPU side, posted to `commandBuffer`. - + `VK_WHOLE_SIZE` means no limit. */ VkDeviceSize maxGpuBytesToMove; @@ -2864,16 +3708,31 @@ typedef struct VmaDefragmentationInfo2 { Passing null means that only CPU defragmentation will be performed. */ - VkCommandBuffer commandBuffer; + VkCommandBuffer VMA_NULLABLE commandBuffer; } VmaDefragmentationInfo2; +typedef struct VmaDefragmentationPassMoveInfo { + VmaAllocation VMA_NOT_NULL allocation; + VkDeviceMemory VMA_NOT_NULL_NON_DISPATCHABLE memory; + VkDeviceSize offset; +} VmaDefragmentationPassMoveInfo; + +/** \brief Parameters for incremental defragmentation steps. + +To be used with function vmaBeginDefragmentationPass(). +*/ +typedef struct VmaDefragmentationPassInfo { + uint32_t moveCount; + VmaDefragmentationPassMoveInfo* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(moveCount) pMoves; +} VmaDefragmentationPassInfo; + /** \brief Deprecated. Optional configuration parameters to be passed to function vmaDefragment(). \deprecated This is a part of the old interface. It is recommended to use structure #VmaDefragmentationInfo2 and function vmaDefragmentationBegin() instead. */ typedef struct VmaDefragmentationInfo { /** \brief Maximum total numbers of bytes that can be copied while moving allocations to different places. - + Default is `VK_WHOLE_SIZE`, which means no limit. */ VkDeviceSize maxBytesToMove; @@ -2921,21 +3780,34 @@ Warning! Between the call to vmaDefragmentationBegin() and vmaDefragmentationEnd They become valid after call to vmaDefragmentationEnd(). - If `pInfo->commandBuffer` is not null, you must submit that command buffer and make sure it finished execution before calling vmaDefragmentationEnd(). + +For more information and important limitations regarding defragmentation, see documentation chapter: +[Defragmentation](@ref defragmentation). */ -VkResult vmaDefragmentationBegin( - VmaAllocator allocator, - const VmaDefragmentationInfo2* pInfo, - VmaDefragmentationStats* pStats, - VmaDefragmentationContext *pContext); +VMA_CALL_PRE VkResult VMA_CALL_POST vmaDefragmentationBegin( + VmaAllocator VMA_NOT_NULL allocator, + const VmaDefragmentationInfo2* VMA_NOT_NULL pInfo, + VmaDefragmentationStats* VMA_NULLABLE pStats, + VmaDefragmentationContext VMA_NULLABLE * VMA_NOT_NULL pContext); /** \brief Ends defragmentation process. Use this function to finish defragmentation started by vmaDefragmentationBegin(). It is safe to pass `context == null`. The function then does nothing. */ -VkResult vmaDefragmentationEnd( - VmaAllocator allocator, - VmaDefragmentationContext context); +VMA_CALL_PRE VkResult VMA_CALL_POST vmaDefragmentationEnd( + VmaAllocator VMA_NOT_NULL allocator, + VmaDefragmentationContext VMA_NULLABLE context); + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentationPass( + VmaAllocator VMA_NOT_NULL allocator, + VmaDefragmentationContext VMA_NULLABLE context, + VmaDefragmentationPassInfo* VMA_NOT_NULL pInfo +); +VMA_CALL_PRE VkResult VMA_CALL_POST vmaEndDefragmentationPass( + VmaAllocator VMA_NOT_NULL allocator, + VmaDefragmentationContext VMA_NULLABLE context +); /** \brief Deprecated. Compacts memory by moving allocations. @@ -2977,13 +3849,13 @@ you should measure that on your platform. For more information, see [Defragmentation](@ref defragmentation) chapter. */ -VkResult vmaDefragment( - VmaAllocator allocator, - VmaAllocation* pAllocations, +VMA_CALL_PRE VkResult VMA_CALL_POST vmaDefragment( + VmaAllocator VMA_NOT_NULL allocator, + const VmaAllocation VMA_NOT_NULL * VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pAllocations, size_t allocationCount, - VkBool32* pAllocationsChanged, - const VmaDefragmentationInfo *pDefragmentationInfo, - VmaDefragmentationStats* pDefragmentationStats); + VkBool32* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) pAllocationsChanged, + const VmaDefragmentationInfo* VMA_NULLABLE pDefragmentationInfo, + VmaDefragmentationStats* VMA_NULLABLE pDefragmentationStats); /** \brief Binds buffer to allocation. @@ -2997,10 +3869,27 @@ allocations, calls to `vkBind*Memory()` or `vkMapMemory()` won't happen from mul It is recommended to use function vmaCreateBuffer() instead of this one. */ -VkResult vmaBindBufferMemory( - VmaAllocator allocator, - VmaAllocation allocation, - VkBuffer buffer); +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkBuffer VMA_NOT_NULL_NON_DISPATCHABLE buffer); + +/** \brief Binds buffer to allocation with additional parameters. + +@param allocationLocalOffset Additional offset to be added while binding, relative to the beginnig of the `allocation`. Normally it should be 0. +@param pNext A chain of structures to be attached to `VkBindBufferMemoryInfoKHR` structure used internally. Normally it should be null. + +This function is similar to vmaBindBufferMemory(), but it provides additional parameters. + +If `pNext` is not null, #VmaAllocator object must have been created with #VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT flag +or with VmaAllocatorCreateInfo::vulkanApiVersion `>= VK_API_VERSION_1_1`. Otherwise the call fails. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory2( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkDeviceSize allocationLocalOffset, + VkBuffer VMA_NOT_NULL_NON_DISPATCHABLE buffer, + const void* VMA_NULLABLE pNext); /** \brief Binds image to allocation. @@ -3014,10 +3903,27 @@ allocations, calls to `vkBind*Memory()` or `vkMapMemory()` won't happen from mul It is recommended to use function vmaCreateImage() instead of this one. */ -VkResult vmaBindImageMemory( - VmaAllocator allocator, - VmaAllocation allocation, - VkImage image); +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindImageMemory( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkImage VMA_NOT_NULL_NON_DISPATCHABLE image); + +/** \brief Binds image to allocation with additional parameters. + +@param allocationLocalOffset Additional offset to be added while binding, relative to the beginnig of the `allocation`. Normally it should be 0. +@param pNext A chain of structures to be attached to `VkBindImageMemoryInfoKHR` structure used internally. Normally it should be null. + +This function is similar to vmaBindImageMemory(), but it provides additional parameters. + +If `pNext` is not null, #VmaAllocator object must have been created with #VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT flag +or with VmaAllocatorCreateInfo::vulkanApiVersion `>= VK_API_VERSION_1_1`. Otherwise the call fails. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindImageMemory2( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkDeviceSize allocationLocalOffset, + VkImage VMA_NOT_NULL_NON_DISPATCHABLE image, + const void* VMA_NULLABLE pNext); /** @param[out] pBuffer Buffer that was created. @@ -3037,21 +3943,25 @@ If the function succeeded, you must destroy both buffer and allocation when you no longer need them using either convenience function vmaDestroyBuffer() or separately, using `vkDestroyBuffer()` and vmaFreeMemory(). -If VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT flag was used, +If #VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT flag was used, VK_KHR_dedicated_allocation extension is used internally to query driver whether it requires or prefers the new buffer to have dedicated allocation. If yes, and if dedicated allocation is possible (VmaAllocationCreateInfo::pool is null -and VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT is not used), it creates dedicated +and #VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT is not used), it creates dedicated allocation for this buffer, just like when using -VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. +#VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. + +\note This function creates a new `VkBuffer`. Sub-allocation of parts of one large buffer, +although recommended as a good practice, is out of scope of this library and could be implemented +by the user as a higher-level logic on top of VMA. */ -VkResult vmaCreateBuffer( - VmaAllocator allocator, - const VkBufferCreateInfo* pBufferCreateInfo, - const VmaAllocationCreateInfo* pAllocationCreateInfo, - VkBuffer* pBuffer, - VmaAllocation* pAllocation, - VmaAllocationInfo* pAllocationInfo); +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBuffer( + VmaAllocator VMA_NOT_NULL allocator, + const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, + const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo, + VkBuffer VMA_NULLABLE_NON_DISPATCHABLE * VMA_NOT_NULL pBuffer, + VmaAllocation VMA_NULLABLE * VMA_NOT_NULL pAllocation, + VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); /** \brief Destroys Vulkan buffer and frees allocated memory. @@ -3064,19 +3974,19 @@ vmaFreeMemory(allocator, allocation); It it safe to pass null as buffer and/or allocation. */ -void vmaDestroyBuffer( - VmaAllocator allocator, - VkBuffer buffer, - VmaAllocation allocation); +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyBuffer( + VmaAllocator VMA_NOT_NULL allocator, + VkBuffer VMA_NULLABLE_NON_DISPATCHABLE buffer, + VmaAllocation VMA_NULLABLE allocation); /// Function similar to vmaCreateBuffer(). -VkResult vmaCreateImage( - VmaAllocator allocator, - const VkImageCreateInfo* pImageCreateInfo, - const VmaAllocationCreateInfo* pAllocationCreateInfo, - VkImage* pImage, - VmaAllocation* pAllocation, - VmaAllocationInfo* pAllocationInfo); +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateImage( + VmaAllocator VMA_NOT_NULL allocator, + const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo, + const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo, + VkImage VMA_NULLABLE_NON_DISPATCHABLE * VMA_NOT_NULL pImage, + VmaAllocation VMA_NULLABLE * VMA_NOT_NULL pAllocation, + VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); /** \brief Destroys Vulkan image and frees allocated memory. @@ -3089,10 +3999,10 @@ vmaFreeMemory(allocator, allocation); It it safe to pass null as image and/or allocation. */ -void vmaDestroyImage( - VmaAllocator allocator, - VkImage image, - VmaAllocation allocation); +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyImage( + VmaAllocator VMA_NOT_NULL allocator, + VkImage VMA_NULLABLE_NON_DISPATCHABLE image, + VmaAllocation VMA_NULLABLE allocation); #ifdef __cplusplus } @@ -3111,6 +4021,17 @@ void vmaDestroyImage( #include #include #include +#include + +#if VMA_RECORDING_ENABLED + #include + #if defined(_WIN32) + #include + #else + #include + #include + #endif +#endif /******************************************************************************* CONFIGURATION SECTION @@ -3124,12 +4045,23 @@ Define this macro to 1 to make the library fetch pointers to Vulkan functions internally, like: vulkanFunctions.vkAllocateMemory = &vkAllocateMemory; - -Define to 0 if you are going to provide you own pointers to Vulkan functions via -VmaAllocatorCreateInfo::pVulkanFunctions. */ #if !defined(VMA_STATIC_VULKAN_FUNCTIONS) && !defined(VK_NO_PROTOTYPES) -#define VMA_STATIC_VULKAN_FUNCTIONS 1 + #define VMA_STATIC_VULKAN_FUNCTIONS 1 +#endif + +/* +Define this macro to 1 to make the library fetch pointers to Vulkan functions +internally, like: + + vulkanFunctions.vkAllocateMemory = (PFN_vkAllocateMemory)vkGetDeviceProcAddr(m_hDevice, vkAllocateMemory); +*/ +#if !defined(VMA_DYNAMIC_VULKAN_FUNCTIONS) + #define VMA_DYNAMIC_VULKAN_FUNCTIONS 1 + #if defined(VK_NO_PROTOTYPES) + extern PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr; + extern PFN_vkGetDeviceProcAddr vkGetDeviceProcAddr; + #endif #endif // Define this macro to 1 to make the library use STL containers instead of its own implementation. @@ -3161,6 +4093,10 @@ the containers. #endif #endif +/* +THESE INCLUDES ARE NOT ENABLED BY DEFAULT. +Library has its own container implementation. +*/ #if VMA_USE_STL_VECTOR #include #endif @@ -3180,7 +4116,6 @@ remove them if not needed. #include // for assert #include // for min, max #include -#include // for std::atomic #ifndef VMA_NULL // Value used as null pointer. Define it to e.g.: nullptr, NULL, 0, (void*)0. @@ -3189,7 +4124,7 @@ remove them if not needed. #if defined(__ANDROID_API__) && (__ANDROID_API__ < 16) #include -void *aligned_alloc(size_t alignment, size_t size) +static void* vma_aligned_alloc(size_t alignment, size_t size) { // alignment must be >= sizeof(void*) if(alignment < sizeof(void*)) @@ -3199,10 +4134,27 @@ void *aligned_alloc(size_t alignment, size_t size) return memalign(alignment, size); } -#elif defined(__APPLE__) || defined(__ANDROID__) +#elif defined(__APPLE__) || defined(__ANDROID__) || (defined(__linux__) && defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC)) #include -void *aligned_alloc(size_t alignment, size_t size) + +#if defined(__APPLE__) +#include +#endif + +static void* vma_aligned_alloc(size_t alignment, size_t size) { +#if defined(__APPLE__) && (defined(MAC_OS_X_VERSION_10_16) || defined(__IPHONE_14_0)) +#if MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_16 || __IPHONE_OS_VERSION_MAX_ALLOWED >= __IPHONE_14_0 + // For C++14, usr/include/malloc/_malloc.h declares aligned_alloc()) only + // with the MacOSX11.0 SDK in Xcode 12 (which is what adds + // MAC_OS_X_VERSION_10_16), even though the function is marked + // availabe for 10.15. That's why the preprocessor checks for 10.16 but + // the __builtin_available checks for 10.15. + // People who use C++17 could call aligned_alloc with the 10.15 SDK already. + if (__builtin_available(macOS 10.15, iOS 13, *)) + return aligned_alloc(alignment, size); +#endif +#endif // alignment must be >= sizeof(void*) if(alignment < sizeof(void*)) { @@ -3214,6 +4166,28 @@ void *aligned_alloc(size_t alignment, size_t size) return pointer; return VMA_NULL; } +#elif defined(_WIN32) +static void* vma_aligned_alloc(size_t alignment, size_t size) +{ + return _aligned_malloc(size, alignment); +} +#else +static void* vma_aligned_alloc(size_t alignment, size_t size) +{ + return aligned_alloc(alignment, size); +} +#endif + +#if defined(_WIN32) +static void vma_aligned_free(void* ptr) +{ + _aligned_free(ptr); +} +#else +static void vma_aligned_free(void* ptr) +{ + free(ptr); +} #endif // If your compiler is not compatible with C++11 and definition of @@ -3223,20 +4197,20 @@ void *aligned_alloc(size_t alignment, size_t size) // Normal assert to check for programmer's errors, especially in Debug configuration. #ifndef VMA_ASSERT - #ifdef _DEBUG - #define VMA_ASSERT(expr) assert(expr) - #else + #ifdef NDEBUG #define VMA_ASSERT(expr) + #else + #define VMA_ASSERT(expr) assert(expr) #endif #endif // Assert that will be called very often, like inside data structures e.g. operator[]. // Making it non-empty can make program slow. #ifndef VMA_HEAVY_ASSERT - #ifdef _DEBUG - #define VMA_HEAVY_ASSERT(expr) //VMA_ASSERT(expr) - #else + #ifdef NDEBUG #define VMA_HEAVY_ASSERT(expr) + #else + #define VMA_HEAVY_ASSERT(expr) //VMA_ASSERT(expr) #endif #endif @@ -3245,19 +4219,16 @@ void *aligned_alloc(size_t alignment, size_t size) #endif #ifndef VMA_SYSTEM_ALIGNED_MALLOC - #if defined(_WIN32) - #define VMA_SYSTEM_ALIGNED_MALLOC(size, alignment) (_aligned_malloc((size), (alignment))) - #else - #define VMA_SYSTEM_ALIGNED_MALLOC(size, alignment) (aligned_alloc((alignment), (size) )) - #endif + #define VMA_SYSTEM_ALIGNED_MALLOC(size, alignment) vma_aligned_alloc((alignment), (size)) #endif -#ifndef VMA_SYSTEM_FREE - #if defined(_WIN32) - #define VMA_SYSTEM_FREE(ptr) _aligned_free(ptr) +#ifndef VMA_SYSTEM_ALIGNED_FREE + // VMA_SYSTEM_FREE is the old name, but might have been defined by the user + #if defined(VMA_SYSTEM_FREE) + #define VMA_SYSTEM_ALIGNED_FREE(ptr) VMA_SYSTEM_FREE(ptr) #else - #define VMA_SYSTEM_FREE(ptr) free(ptr) - #endif + #define VMA_SYSTEM_ALIGNED_FREE(ptr) vma_aligned_free(ptr) + #endif #endif #ifndef VMA_MIN @@ -3308,6 +4279,7 @@ void *aligned_alloc(size_t alignment, size_t size) public: void Lock() { m_Mutex.lock(); } void Unlock() { m_Mutex.unlock(); } + bool TryLock() { return m_Mutex.try_lock(); } private: std::mutex m_Mutex; }; @@ -3324,22 +4296,27 @@ void *aligned_alloc(size_t alignment, size_t size) public: void LockRead() { m_Mutex.lock_shared(); } void UnlockRead() { m_Mutex.unlock_shared(); } + bool TryLockRead() { return m_Mutex.try_lock_shared(); } void LockWrite() { m_Mutex.lock(); } void UnlockWrite() { m_Mutex.unlock(); } + bool TryLockWrite() { return m_Mutex.try_lock(); } private: std::shared_mutex m_Mutex; }; #define VMA_RW_MUTEX VmaRWMutex - #elif defined(_WIN32) + #elif defined(_WIN32) && defined(WINVER) && WINVER >= 0x0600 // Use SRWLOCK from WinAPI. + // Minimum supported client = Windows Vista, server = Windows Server 2008. class VmaRWMutex { public: VmaRWMutex() { InitializeSRWLock(&m_Lock); } void LockRead() { AcquireSRWLockShared(&m_Lock); } void UnlockRead() { ReleaseSRWLockShared(&m_Lock); } + bool TryLockRead() { return TryAcquireSRWLockShared(&m_Lock) != FALSE; } void LockWrite() { AcquireSRWLockExclusive(&m_Lock); } void UnlockWrite() { ReleaseSRWLockExclusive(&m_Lock); } + bool TryLockWrite() { return TryAcquireSRWLockExclusive(&m_Lock) != FALSE; } private: SRWLOCK m_Lock; }; @@ -3351,8 +4328,10 @@ void *aligned_alloc(size_t alignment, size_t size) public: void LockRead() { m_Mutex.Lock(); } void UnlockRead() { m_Mutex.Unlock(); } + bool TryLockRead() { return m_Mutex.TryLock(); } void LockWrite() { m_Mutex.Lock(); } void UnlockWrite() { m_Mutex.Unlock(); } + bool TryLockWrite() { return m_Mutex.TryLock(); } private: VMA_MUTEX m_Mutex; }; @@ -3361,15 +4340,16 @@ void *aligned_alloc(size_t alignment, size_t size) #endif // #ifndef VMA_RW_MUTEX /* -If providing your own implementation, you need to implement a subset of std::atomic: - -- Constructor(uint32_t desired) -- uint32_t load() const -- void store(uint32_t desired) -- bool compare_exchange_weak(uint32_t& expected, uint32_t desired) +If providing your own implementation, you need to implement a subset of std::atomic. */ #ifndef VMA_ATOMIC_UINT32 - #define VMA_ATOMIC_UINT32 std::atomic + #include + #define VMA_ATOMIC_UINT32 std::atomic +#endif + +#ifndef VMA_ATOMIC_UINT64 + #include + #define VMA_ATOMIC_UINT64 std::atomic #endif #ifndef VMA_DEBUG_ALWAYS_DEDICATED_MEMORY @@ -3458,6 +4438,12 @@ static const uint8_t VMA_ALLOCATION_FILL_PATTERN_DESTROYED = 0xEF; END OF CONFIGURATION */ +// # Copy of some Vulkan definitions so we don't need to check their existence just to handle few constants. + +static const uint32_t VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY = 0x00000040; +static const uint32_t VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY = 0x00000080; +static const uint32_t VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_COPY = 0x00020000; + static const uint32_t VMA_ALLOCATION_INTERNAL_STRATEGY_MIN_OFFSET = 0x10000000u; static VkAllocationCallbacks VmaEmptyAllocationCallbacks = { @@ -3466,34 +4452,12 @@ static VkAllocationCallbacks VmaEmptyAllocationCallbacks = { // Returns number of bits set to 1 in (v). static inline uint32_t VmaCountBitsSet(uint32_t v) { - uint32_t c = v - ((v >> 1) & 0x55555555); - c = ((c >> 2) & 0x33333333) + (c & 0x33333333); - c = ((c >> 4) + c) & 0x0F0F0F0F; - c = ((c >> 8) + c) & 0x00FF00FF; - c = ((c >> 16) + c) & 0x0000FFFF; - return c; -} - -// Aligns given value up to nearest multiply of align value. For example: VmaAlignUp(11, 8) = 16. -// Use types like uint32_t, uint64_t as T. -template -static inline T VmaAlignUp(T val, T align) -{ - return (val + align - 1) / align * align; -} -// Aligns given value down to nearest multiply of align value. For example: VmaAlignUp(11, 8) = 8. -// Use types like uint32_t, uint64_t as T. -template -static inline T VmaAlignDown(T val, T align) -{ - return val / align * align; -} - -// Division with mathematical rounding to nearest number. -template -static inline T VmaRoundDiv(T x, T y) -{ - return (x + (y / (T)2)) / y; + uint32_t c = v - ((v >> 1) & 0x55555555); + c = ((c >> 2) & 0x33333333) + (c & 0x33333333); + c = ((c >> 4) + c) & 0x0F0F0F0F; + c = ((c >> 8) + c) & 0x00FF00FF; + c = ((c >> 16) + c) & 0x0000FFFF; + return c; } /* @@ -3507,10 +4471,34 @@ inline bool VmaIsPow2(T x) return (x & (x-1)) == 0; } +// Aligns given value up to nearest multiply of align value. For example: VmaAlignUp(11, 8) = 16. +// Use types like uint32_t, uint64_t as T. +template +static inline T VmaAlignUp(T val, T alignment) +{ + VMA_HEAVY_ASSERT(VmaIsPow2(alignment)); + return (val + alignment - 1) & ~(alignment - 1); +} +// Aligns given value down to nearest multiply of align value. For example: VmaAlignUp(11, 8) = 8. +// Use types like uint32_t, uint64_t as T. +template +static inline T VmaAlignDown(T val, T alignment) +{ + VMA_HEAVY_ASSERT(VmaIsPow2(alignment)); + return val & ~(alignment - 1); +} + +// Division with mathematical rounding to nearest number. +template +static inline T VmaRoundDiv(T x, T y) +{ + return (x + (y / (T)2)) / y; +} + // Returns smallest power of 2 greater or equal to v. static inline uint32_t VmaNextPow2(uint32_t v) { - v--; + v--; v |= v >> 1; v |= v >> 2; v |= v >> 4; @@ -3521,7 +4509,7 @@ static inline uint32_t VmaNextPow2(uint32_t v) } static inline uint64_t VmaNextPow2(uint64_t v) { - v--; + v--; v |= v >> 1; v |= v >> 2; v |= v >> 4; @@ -3560,6 +4548,8 @@ static inline bool VmaStrIsEmpty(const char* pStr) return pStr == VMA_NULL || *pStr == '\0'; } +#if VMA_STATS_STRING_ENABLED + static const char* VmaAlgorithmToStr(uint32_t algorithm) { switch(algorithm) @@ -3576,6 +4566,8 @@ static const char* VmaAlgorithmToStr(uint32_t algorithm) } } +#endif // #if VMA_STATS_STRING_ENABLED + #ifndef VMA_SORT template @@ -3662,7 +4654,7 @@ static inline bool VmaIsBufferImageGranularityConflict( { VMA_SWAP(suballocType1, suballocType2); } - + switch(suballocType1) { case VMA_SUBALLOCATION_TYPE_FREE: @@ -3691,16 +4683,21 @@ static inline bool VmaIsBufferImageGranularityConflict( static void VmaWriteMagicValue(void* pData, VkDeviceSize offset) { +#if VMA_DEBUG_MARGIN > 0 && VMA_DEBUG_DETECT_CORRUPTION uint32_t* pDst = (uint32_t*)((char*)pData + offset); const size_t numberCount = VMA_DEBUG_MARGIN / sizeof(uint32_t); for(size_t i = 0; i < numberCount; ++i, ++pDst) { *pDst = VMA_CORRUPTION_DETECTION_MAGIC_VALUE; } +#else + // no-op +#endif } static bool VmaValidateMagicValue(const void* pData, VkDeviceSize offset) { +#if VMA_DEBUG_MARGIN > 0 && VMA_DEBUG_DETECT_CORRUPTION const uint32_t* pSrc = (const uint32_t*)((const char*)pData + offset); const size_t numberCount = VMA_DEBUG_MARGIN / sizeof(uint32_t); for(size_t i = 0; i < numberCount; ++i, ++pSrc) @@ -3710,9 +4707,22 @@ static bool VmaValidateMagicValue(const void* pData, VkDeviceSize offset) return false; } } +#endif return true; } +/* +Fills structure with parameters of an example buffer to be used for transfers +during GPU memory defragmentation. +*/ +static void VmaFillGpuDefragmentationBufferCreateInfo(VkBufferCreateInfo& outBufCreateInfo) +{ + memset(&outBufCreateInfo, 0, sizeof(outBufCreateInfo)); + outBufCreateInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + outBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + outBufCreateInfo.size = (VkDeviceSize)VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE; // Example size. +} + // Helper RAII class to lock a mutex in constructor and unlock it in destructor (at the end of scope). struct VmaMutexLock { @@ -3773,12 +4783,12 @@ Returned value is the found element, if present in the collection or place where new element with value (key) should be inserted. */ template -static IterT VmaBinaryFindFirstNotLess(IterT beg, IterT end, const KeyT &key, CmpLess cmp) +static IterT VmaBinaryFindFirstNotLess(IterT beg, IterT end, const KeyT &key, const CmpLess& cmp) { size_t down = 0, up = (end - beg); while(down < up) { - const size_t mid = (down + up) / 2; + const size_t mid = down + (up - down) / 2; // Overflow-safe midpoint calculation if(cmp(*(beg+mid), key)) { down = mid + 1; @@ -3791,6 +4801,19 @@ static IterT VmaBinaryFindFirstNotLess(IterT beg, IterT end, const KeyT &key, Cm return beg + down; } +template +IterT VmaBinaryFindSorted(const IterT& beg, const IterT& end, const KeyT& value, const CmpLess& cmp) +{ + IterT it = VmaBinaryFindFirstNotLess( + beg, end, value, cmp); + if(it == end || + (!cmp(*it, value) && !cmp(value, *it))) + { + return it; + } + return end; +} + /* Returns true if all pointers in the array are not-null and unique. Warning! O(n^2) complexity. Use only inside VMA_HEAVY_ASSERT. @@ -3817,15 +4840,23 @@ static bool VmaValidatePointerArray(uint32_t count, const T* arr) return true; } +template +static inline void VmaPnextChainPushFront(MainT* mainStruct, NewT* newStruct) +{ + newStruct->pNext = mainStruct->pNext; + mainStruct->pNext = newStruct; +} + //////////////////////////////////////////////////////////////////////////////// // Memory allocation static void* VmaMalloc(const VkAllocationCallbacks* pAllocationCallbacks, size_t size, size_t alignment) { + void* result = VMA_NULL; if((pAllocationCallbacks != VMA_NULL) && (pAllocationCallbacks->pfnAllocation != VMA_NULL)) { - return (*pAllocationCallbacks->pfnAllocation)( + result = (*pAllocationCallbacks->pfnAllocation)( pAllocationCallbacks->pUserData, size, alignment, @@ -3833,8 +4864,10 @@ static void* VmaMalloc(const VkAllocationCallbacks* pAllocationCallbacks, size_t } else { - return VMA_SYSTEM_ALIGNED_MALLOC(size, alignment); + result = VMA_SYSTEM_ALIGNED_MALLOC(size, alignment); } + VMA_ASSERT(result != VMA_NULL && "CPU memory allocation failed."); + return result; } static void VmaFree(const VkAllocationCallbacks* pAllocationCallbacks, void* ptr) @@ -3846,7 +4879,7 @@ static void VmaFree(const VkAllocationCallbacks* pAllocationCallbacks, void* ptr } else { - VMA_SYSTEM_FREE(ptr); + VMA_SYSTEM_ALIGNED_FREE(ptr); } } @@ -3886,6 +4919,30 @@ static void vma_delete_array(const VkAllocationCallbacks* pAllocationCallbacks, } } +static char* VmaCreateStringCopy(const VkAllocationCallbacks* allocs, const char* srcStr) +{ + if(srcStr != VMA_NULL) + { + const size_t len = strlen(srcStr); + char* const result = vma_new_array(allocs, char, len + 1); + memcpy(result, srcStr, len + 1); + return result; + } + else + { + return VMA_NULL; + } +} + +static void VmaFreeString(const VkAllocationCallbacks* allocs, char* str) +{ + if(str != VMA_NULL) + { + const size_t len = strlen(str); + vma_delete_array(allocs, str, len + 1); + } +} + // STL-compatible allocator. template class VmaStlAllocator @@ -3893,7 +4950,7 @@ class VmaStlAllocator public: const VkAllocationCallbacks* const m_pCallbacks; typedef T value_type; - + VmaStlAllocator(const VkAllocationCallbacks* pCallbacks) : m_pCallbacks(pCallbacks) { } template VmaStlAllocator(const VmaStlAllocator& src) : m_pCallbacks(src.m_pCallbacks) { } @@ -3956,7 +5013,12 @@ public: m_Capacity(count) { } - + + // This version of the constructor is here for compatibility with pre-C++14 std::vector. + // value is unused. + VmaVector(size_t count, const T& value, const AllocatorT& allocator) + : VmaVector(count, allocator) {} + VmaVector(const VmaVector& src) : m_Allocator(src.m_Allocator), m_pArray(src.m_Count ? (T*)VmaAllocateArray(src.m_Allocator.m_pCallbacks, src.m_Count) : VMA_NULL), @@ -3968,7 +5030,7 @@ public: memcpy(m_pArray, src.m_pArray, m_Count * sizeof(T)); } } - + ~VmaVector() { VmaFree(m_Allocator.m_pCallbacks, m_pArray); @@ -3986,12 +5048,12 @@ public: } return *this; } - + bool empty() const { return m_Count == 0; } size_t size() const { return m_Count; } T* data() { return m_pArray; } const T* data() const { return m_pArray; } - + T& operator[](size_t index) { VMA_HEAVY_ASSERT(index < m_Count); @@ -4027,12 +5089,12 @@ public: void reserve(size_t newCapacity, bool freeMemory = false) { newCapacity = VMA_MAX(newCapacity, m_Count); - + if((newCapacity < m_Capacity) && !freeMemory) { newCapacity = m_Capacity; } - + if(newCapacity != m_Capacity) { T* const newArray = newCapacity ? VmaAllocateArray(m_Allocator, newCapacity) : VMA_NULL; @@ -4182,19 +5244,174 @@ bool VmaVectorRemoveSorted(VectorT& vector, const typename VectorT::value_type& return false; } -template -IterT VmaVectorFindSorted(const IterT& beg, const IterT& end, const KeyT& value) +//////////////////////////////////////////////////////////////////////////////// +// class VmaSmallVector + +/* +This is a vector (a variable-sized array), optimized for the case when the array is small. + +It contains some number of elements in-place, which allows it to avoid heap allocation +when the actual number of elements is below that threshold. This allows normal "small" +cases to be fast without losing generality for large inputs. +*/ + +template +class VmaSmallVector { - CmpLess comparator; - IterT it = VmaBinaryFindFirstNotLess( - beg, end, value, comparator); - if(it == end || - (!comparator(*it, value) && !comparator(value, *it))) +public: + typedef T value_type; + + VmaSmallVector(const AllocatorT& allocator) : + m_Count(0), + m_DynamicArray(allocator) { - return it; } - return end; -} + VmaSmallVector(size_t count, const AllocatorT& allocator) : + m_Count(count), + m_DynamicArray(count > N ? count : 0, allocator) + { + } + template + VmaSmallVector(const VmaSmallVector& src) = delete; + template + VmaSmallVector& operator=(const VmaSmallVector& rhs) = delete; + + bool empty() const { return m_Count == 0; } + size_t size() const { return m_Count; } + T* data() { return m_Count > N ? m_DynamicArray.data() : m_StaticArray; } + const T* data() const { return m_Count > N ? m_DynamicArray.data() : m_StaticArray; } + + T& operator[](size_t index) + { + VMA_HEAVY_ASSERT(index < m_Count); + return data()[index]; + } + const T& operator[](size_t index) const + { + VMA_HEAVY_ASSERT(index < m_Count); + return data()[index]; + } + + T& front() + { + VMA_HEAVY_ASSERT(m_Count > 0); + return data()[0]; + } + const T& front() const + { + VMA_HEAVY_ASSERT(m_Count > 0); + return data()[0]; + } + T& back() + { + VMA_HEAVY_ASSERT(m_Count > 0); + return data()[m_Count - 1]; + } + const T& back() const + { + VMA_HEAVY_ASSERT(m_Count > 0); + return data()[m_Count - 1]; + } + + void resize(size_t newCount, bool freeMemory = false) + { + if(newCount > N && m_Count > N) + { + // Any direction, staying in m_DynamicArray + m_DynamicArray.resize(newCount, freeMemory); + } + else if(newCount > N && m_Count <= N) + { + // Growing, moving from m_StaticArray to m_DynamicArray + m_DynamicArray.resize(newCount, freeMemory); + if(m_Count > 0) + { + memcpy(m_DynamicArray.data(), m_StaticArray, m_Count * sizeof(T)); + } + } + else if(newCount <= N && m_Count > N) + { + // Shrinking, moving from m_DynamicArray to m_StaticArray + if(newCount > 0) + { + memcpy(m_StaticArray, m_DynamicArray.data(), newCount * sizeof(T)); + } + m_DynamicArray.resize(0, freeMemory); + } + else + { + // Any direction, staying in m_StaticArray - nothing to do here + } + m_Count = newCount; + } + + void clear(bool freeMemory = false) + { + m_DynamicArray.clear(freeMemory); + m_Count = 0; + } + + void insert(size_t index, const T& src) + { + VMA_HEAVY_ASSERT(index <= m_Count); + const size_t oldCount = size(); + resize(oldCount + 1); + T* const dataPtr = data(); + if(index < oldCount) + { + // I know, this could be more optimal for case where memmove can be memcpy directly from m_StaticArray to m_DynamicArray. + memmove(dataPtr + (index + 1), dataPtr + index, (oldCount - index) * sizeof(T)); + } + dataPtr[index] = src; + } + + void remove(size_t index) + { + VMA_HEAVY_ASSERT(index < m_Count); + const size_t oldCount = size(); + if(index < oldCount - 1) + { + // I know, this could be more optimal for case where memmove can be memcpy directly from m_DynamicArray to m_StaticArray. + T* const dataPtr = data(); + memmove(dataPtr + index, dataPtr + (index + 1), (oldCount - index - 1) * sizeof(T)); + } + resize(oldCount - 1); + } + + void push_back(const T& src) + { + const size_t newIndex = size(); + resize(newIndex + 1); + data()[newIndex] = src; + } + + void pop_back() + { + VMA_HEAVY_ASSERT(m_Count > 0); + resize(size() - 1); + } + + void push_front(const T& src) + { + insert(0, src); + } + + void pop_front() + { + VMA_HEAVY_ASSERT(m_Count > 0); + remove(0); + } + + typedef T* iterator; + + iterator begin() { return data(); } + iterator end() { return data() + m_Count; } + +private: + size_t m_Count; + T m_StaticArray[N]; // Used when m_Size <= N + VmaVector m_DynamicArray; // Used when m_Size > N +}; //////////////////////////////////////////////////////////////////////////////// // class VmaPoolAllocator @@ -4211,15 +5428,14 @@ class VmaPoolAllocator public: VmaPoolAllocator(const VkAllocationCallbacks* pAllocationCallbacks, uint32_t firstBlockCapacity); ~VmaPoolAllocator(); - void Clear(); - T* Alloc(); + template T* Alloc(Types... args); void Free(T* ptr); private: union Item { uint32_t NextFreeIndex; - T Value; + alignas(T) char Value[sizeof(T)]; }; struct ItemBlock @@ -4228,7 +5444,7 @@ private: uint32_t Capacity; uint32_t FirstFreeIndex; }; - + const VkAllocationCallbacks* m_pAllocationCallbacks; const uint32_t m_FirstBlockCapacity; VmaVector< ItemBlock, VmaStlAllocator > m_ItemBlocks; @@ -4247,12 +5463,6 @@ VmaPoolAllocator::VmaPoolAllocator(const VkAllocationCallbacks* pAllocationCa template VmaPoolAllocator::~VmaPoolAllocator() -{ - Clear(); -} - -template -void VmaPoolAllocator::Clear() { for(size_t i = m_ItemBlocks.size(); i--; ) vma_delete_array(m_pAllocationCallbacks, m_ItemBlocks[i].pItems, m_ItemBlocks[i].Capacity); @@ -4260,7 +5470,7 @@ void VmaPoolAllocator::Clear() } template -T* VmaPoolAllocator::Alloc() +template T* VmaPoolAllocator::Alloc(Types... args) { for(size_t i = m_ItemBlocks.size(); i--; ) { @@ -4270,7 +5480,9 @@ T* VmaPoolAllocator::Alloc() { Item* const pItem = &block.pItems[block.FirstFreeIndex]; block.FirstFreeIndex = pItem->NextFreeIndex; - return &pItem->Value; + T* result = (T*)&pItem->Value; + new(result)T(std::forward(args)...); // Explicit constructor call. + return result; } } @@ -4278,7 +5490,9 @@ T* VmaPoolAllocator::Alloc() ItemBlock& newBlock = CreateNewBlock(); Item* const pItem = &newBlock.pItems[0]; newBlock.FirstFreeIndex = pItem->NextFreeIndex; - return &pItem->Value; + T* result = (T*)&pItem->Value; + new(result)T(std::forward(args)...); // Explicit constructor call. + return result; } template @@ -4288,14 +5502,15 @@ void VmaPoolAllocator::Free(T* ptr) for(size_t i = m_ItemBlocks.size(); i--; ) { ItemBlock& block = m_ItemBlocks[i]; - + // Casting to union. Item* pItemPtr; memcpy(&pItemPtr, &ptr, sizeof(pItemPtr)); - + // Check if pItemPtr is in address range of this block. if((pItemPtr >= block.pItems) && (pItemPtr < block.pItems + block.Capacity)) { + ptr->~T(); // Explicit destructor call. const uint32_t index = static_cast(pItemPtr - block.pItems); pItemPtr->NextFreeIndex = block.FirstFreeIndex; block.FirstFreeIndex = index; @@ -4368,7 +5583,7 @@ public: ItemType* PushFront(const T& value); void PopBack(); void PopFront(); - + // Item can be null - it means PushBack. ItemType* InsertBefore(ItemType* pItem); // Item can be null - it means PushFront. @@ -4678,7 +5893,7 @@ public: VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem != rhs.m_pItem; } - + private: VmaRawList* m_pList; VmaListItem* m_pItem; @@ -4706,7 +5921,7 @@ public: m_pItem(src.m_pItem) { } - + const T& operator*() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); @@ -4761,7 +5976,7 @@ public: VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem != rhs.m_pItem; } - + private: const_iterator(const VmaRawList* pList, const VmaListItem* pItem) : m_pList(pList), @@ -4840,7 +6055,7 @@ public: void insert(const PairType& pair); iterator find(const KeyT& key); void erase(iterator it); - + private: VmaVector< PairType, VmaStlAllocator > m_Vector; }; @@ -4924,28 +6139,27 @@ public: }; /* - This struct cannot have constructor or destructor. It must be POD because it is - allocated using VmaPoolAllocator. + This struct is allocated using VmaPoolAllocator. */ - void Ctor(uint32_t currentFrameIndex, bool userDataString) + VmaAllocation_T(uint32_t currentFrameIndex, bool userDataString) : + m_Alignment{1}, + m_Size{0}, + m_pUserData{VMA_NULL}, + m_LastUseFrameIndex{currentFrameIndex}, + m_MemoryTypeIndex{0}, + m_Type{(uint8_t)ALLOCATION_TYPE_NONE}, + m_SuballocationType{(uint8_t)VMA_SUBALLOCATION_TYPE_UNKNOWN}, + m_MapCount{0}, + m_Flags{userDataString ? (uint8_t)FLAG_USER_DATA_STRING : (uint8_t)0} { - m_Alignment = 1; - m_Size = 0; - m_pUserData = VMA_NULL; - m_LastUseFrameIndex = currentFrameIndex; - m_Type = (uint8_t)ALLOCATION_TYPE_NONE; - m_SuballocationType = (uint8_t)VMA_SUBALLOCATION_TYPE_UNKNOWN; - m_MapCount = 0; - m_Flags = userDataString ? (uint8_t)FLAG_USER_DATA_STRING : 0; - #if VMA_STATS_STRING_ENABLED m_CreationFrameIndex = currentFrameIndex; m_BufferImageUsage = 0; #endif } - void Dtor() + ~VmaAllocation_T() { VMA_ASSERT((m_MapCount & ~MAP_COUNT_FLAG_PERSISTENT_MAP) == 0 && "Allocation was not unmapped before destruction."); @@ -4958,6 +6172,7 @@ public: VkDeviceSize offset, VkDeviceSize alignment, VkDeviceSize size, + uint32_t memoryTypeIndex, VmaSuballocationType suballocationType, bool mapped, bool canBecomeLost) @@ -4967,6 +6182,7 @@ public: m_Type = (uint8_t)ALLOCATION_TYPE_BLOCK; m_Alignment = alignment; m_Size = size; + m_MemoryTypeIndex = memoryTypeIndex; m_MapCount = mapped ? MAP_COUNT_FLAG_PERSISTENT_MAP : 0; m_SuballocationType = (uint8_t)suballocationType; m_BlockAllocation.m_Block = block; @@ -4979,6 +6195,7 @@ public: VMA_ASSERT(m_Type == ALLOCATION_TYPE_NONE); VMA_ASSERT(m_LastUseFrameIndex.load() == VMA_FRAME_INDEX_LOST); m_Type = (uint8_t)ALLOCATION_TYPE_BLOCK; + m_MemoryTypeIndex = 0; m_BlockAllocation.m_Block = VMA_NULL; m_BlockAllocation.m_Offset = 0; m_BlockAllocation.m_CanBecomeLost = true; @@ -4987,9 +6204,8 @@ public: void ChangeBlockAllocation( VmaAllocator hAllocator, VmaDeviceMemoryBlock* block, - VkDeviceSize offset); + VkDeviceSize offset); - void ChangeSize(VkDeviceSize newSize); void ChangeOffset(VkDeviceSize newOffset); // pMappedData not null means allocation is created with MAPPED flag. @@ -5005,9 +6221,9 @@ public: m_Type = (uint8_t)ALLOCATION_TYPE_DEDICATED; m_Alignment = 0; m_Size = size; + m_MemoryTypeIndex = memoryTypeIndex; m_SuballocationType = (uint8_t)suballocationType; m_MapCount = (pMappedData != VMA_NULL) ? MAP_COUNT_FLAG_PERSISTENT_MAP : 0; - m_DedicatedAllocation.m_MemoryTypeIndex = memoryTypeIndex; m_DedicatedAllocation.m_hMemory = hMemory; m_DedicatedAllocation.m_pMappedData = pMappedData; } @@ -5027,11 +6243,11 @@ public: } VkDeviceSize GetOffset() const; VkDeviceMemory GetMemory() const; - uint32_t GetMemoryTypeIndex() const; + uint32_t GetMemoryTypeIndex() const { return m_MemoryTypeIndex; } bool IsPersistentMap() const { return (m_MapCount & MAP_COUNT_FLAG_PERSISTENT_MAP) != 0; } void* GetMappedData() const; bool CanBecomeLost() const; - + uint32_t GetLastUseFrameIndex() const { return m_LastUseFrameIndex.load(); @@ -5044,7 +6260,7 @@ public: - If hAllocation.LastUseFrameIndex + frameInUseCount < allocator.CurrentFrameIndex, makes it lost by setting LastUseFrameIndex = VMA_FRAME_INDEX_LOST and returns true. - Else, returns false. - + If hAllocation is already lost, assert - you should not call it then. If hAllocation was not created with CAN_BECOME_LOST_BIT, assert. */ @@ -5086,6 +6302,7 @@ private: VkDeviceSize m_Size; void* m_pUserData; VMA_ATOMIC_UINT32 m_LastUseFrameIndex; + uint32_t m_MemoryTypeIndex; uint8_t m_Type; // ALLOCATION_TYPE uint8_t m_SuballocationType; // VmaSuballocationType // Bit 0x80 is set when allocation was created with VMA_ALLOCATION_CREATE_MAPPED_BIT. @@ -5104,7 +6321,6 @@ private: // Allocation for an object that has its own private VkDeviceMemory. struct DedicatedAllocation { - uint32_t m_MemoryTypeIndex; VkDeviceMemory m_hMemory; void* m_pMappedData; // Not null means memory is mapped. }; @@ -5260,9 +6476,6 @@ public: virtual void Free(const VmaAllocation allocation) = 0; virtual void FreeAtOffset(VkDeviceSize offset) = 0; - // Tries to resize (grow or shrink) space for given allocation, in place. - virtual bool ResizeAllocation(const VmaAllocation alloc, VkDeviceSize newSize) { return false; } - protected: const VkAllocationCallbacks* GetAllocationCallbacks() const { return m_pAllocationCallbacks; } @@ -5341,11 +6554,9 @@ public: virtual void Free(const VmaAllocation allocation); virtual void FreeAtOffset(VkDeviceSize offset); - virtual bool ResizeAllocation(const VmaAllocation alloc, VkDeviceSize newSize); - //////////////////////////////////////////////////////////////////////////////// // For defragmentation - + bool IsBufferImageGranularityConflictPossible( VkDeviceSize bufferImageGranularity, VmaSuballocationType& inOutPrevSuballocType) const; @@ -5556,7 +6767,7 @@ private: SuballocationVectorType& AccessSuballocations2nd() { return m_1stVectorIndex ? m_Suballocations0 : m_Suballocations1; } const SuballocationVectorType& AccessSuballocations1st() const { return m_1stVectorIndex ? m_Suballocations1 : m_Suballocations0; } const SuballocationVectorType& AccessSuballocations2nd() const { return m_1stVectorIndex ? m_Suballocations0 : m_Suballocations1; } - + // Number of items in 1st vector with hAllocation = null at the beginning. size_t m_1stNullItemsBeginCount; // Number of other items in 1st vector with hAllocation = null somewhere in the middle. @@ -5767,7 +6978,7 @@ public: uint32_t algorithm); // Always call before destruction. void Destroy(VmaAllocator allocator); - + VmaPool GetParentPool() const { return m_hParentPool; } VkDeviceMemory GetDeviceMemory() const { return m_hMemory; } uint32_t GetMemoryTypeIndex() const { return m_MemoryTypeIndex; } @@ -5789,11 +7000,15 @@ public: VkResult BindBufferMemory( const VmaAllocator hAllocator, const VmaAllocation hAllocation, - VkBuffer hBuffer); + VkDeviceSize allocationLocalOffset, + VkBuffer hBuffer, + const void* pNext); VkResult BindImageMemory( const VmaAllocator hAllocator, const VmaAllocation hAllocation, - VkImage hImage); + VkDeviceSize allocationLocalOffset, + VkImage hImage, + const void* pNext); private: VmaPool m_hParentPool; // VK_NULL_HANDLE if not belongs to custom pool. @@ -5826,6 +7041,9 @@ struct VmaDefragmentationMove VkDeviceSize srcOffset; VkDeviceSize dstOffset; VkDeviceSize size; + VmaAllocation hAllocation; + VmaDeviceMemoryBlock* pSrcBlock; + VmaDeviceMemoryBlock* pDstBlock; }; class VmaDefragmentationAlgorithm; @@ -5849,14 +7067,16 @@ public: size_t maxBlockCount, VkDeviceSize bufferImageGranularity, uint32_t frameInUseCount, - bool isCustomPool, bool explicitBlockSize, - uint32_t algorithm); + uint32_t algorithm, + float priority); ~VmaBlockVector(); VkResult CreateMinBlocks(); + VmaAllocator GetAllocator() const { return m_hAllocator; } VmaPool GetParentPool() const { return m_hParentPool; } + bool IsCustomPool() const { return m_hParentPool != VMA_NULL; } uint32_t GetMemoryTypeIndex() const { return m_MemoryTypeIndex; } VkDeviceSize GetPreferredBlockSize() const { return m_PreferredBlockSize; } VkDeviceSize GetBufferImageGranularity() const { return m_BufferImageGranularity; } @@ -5865,7 +7085,7 @@ public: void GetPoolStats(VmaPoolStats* pStats); - bool IsEmpty() const { return m_Blocks.empty(); } + bool IsEmpty(); bool IsCorruptionDetectionEnabled() const; VkResult Allocate( @@ -5877,8 +7097,7 @@ public: size_t allocationCount, VmaAllocation* pAllocations); - void Free( - VmaAllocation hAllocation); + void Free(const VmaAllocation hAllocation); // Adds statistics of this BlockVector to pStats. void AddStats(VmaStats* pStats); @@ -5895,12 +7114,21 @@ public: // Saves results in pCtx->res. void Defragment( class VmaBlockVectorDefragmentationContext* pCtx, - VmaDefragmentationStats* pStats, + VmaDefragmentationStats* pStats, VmaDefragmentationFlags flags, VkDeviceSize& maxCpuBytesToMove, uint32_t& maxCpuAllocationsToMove, VkDeviceSize& maxGpuBytesToMove, uint32_t& maxGpuAllocationsToMove, VkCommandBuffer commandBuffer); void DefragmentationEnd( class VmaBlockVectorDefragmentationContext* pCtx, + uint32_t flags, + VmaDefragmentationStats* pStats); + + uint32_t ProcessDefragmentations( + class VmaBlockVectorDefragmentationContext *pCtx, + VmaDefragmentationPassMoveInfo* pMove, uint32_t maxMoves); + + void CommitDefragmentations( + class VmaBlockVectorDefragmentationContext *pCtx, VmaDefragmentationStats* pStats); //////////////////////////////////////////////////////////////////////////////// @@ -5922,14 +7150,14 @@ private: const size_t m_MaxBlockCount; const VkDeviceSize m_BufferImageGranularity; const uint32_t m_FrameInUseCount; - const bool m_IsCustomPool; const bool m_ExplicitBlockSize; const uint32_t m_Algorithm; - /* There can be at most one allocation that is completely empty - a - hysteresis to avoid pessimistic case of alternating creation and destruction - of a VkDeviceMemory. */ - bool m_HasEmptyBlock; + const float m_Priority; VMA_RW_MUTEX m_Mutex; + + /* There can be at most one allocation that is completely empty (except when minBlockCount > 0) - + a hysteresis to avoid pessimistic case of alternating creation and destruction of a VkDeviceMemory. */ + bool m_HasEmptyBlock; // Incrementally sorted by sumFreeSize, ascending. VmaVector< VmaDeviceMemoryBlock*, VmaStlAllocator > m_Blocks; uint32_t m_NextBlockId; @@ -5972,7 +7200,7 @@ private: // Saves result to pCtx->res. void ApplyDefragmentationMovesGpu( class VmaBlockVectorDefragmentationContext* pDefragCtx, - const VmaVector< VmaDefragmentationMove, VmaStlAllocator >& moves, + VmaVector< VmaDefragmentationMove, VmaStlAllocator >& moves, VkCommandBuffer commandBuffer); /* @@ -5980,6 +7208,8 @@ private: - updated with new data. */ void FreeEmptyBlocks(VmaDefragmentationStats* pDefragmentationStats); + + void UpdateHasEmptyBlock(); }; struct VmaPool_T @@ -5997,12 +7227,16 @@ public: uint32_t GetId() const { return m_Id; } void SetId(uint32_t id) { VMA_ASSERT(m_Id == 0); m_Id = id; } + const char* GetName() const { return m_Name; } + void SetName(const char* pName); + #if VMA_STATS_STRING_ENABLED //void PrintDetailedMap(class VmaStringBuilder& sb); #endif private: uint32_t m_Id; + char* m_Name; }; /* @@ -6035,7 +7269,8 @@ public: virtual VkResult Defragment( VmaVector< VmaDefragmentationMove, VmaStlAllocator >& moves, VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove) = 0; + uint32_t maxAllocationsToMove, + VmaDefragmentationFlags flags) = 0; virtual VkDeviceSize GetBytesMoved() const = 0; virtual uint32_t GetAllocationsMoved() const = 0; @@ -6080,7 +7315,8 @@ public: virtual VkResult Defragment( VmaVector< VmaDefragmentationMove, VmaStlAllocator >& moves, VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove); + uint32_t maxAllocationsToMove, + VmaDefragmentationFlags flags); virtual VkDeviceSize GetBytesMoved() const { return m_BytesMoved; } virtual uint32_t GetAllocationsMoved() const { return m_AllocationsMoved; } @@ -6181,7 +7417,8 @@ private: VkResult DefragmentRound( VmaVector< VmaDefragmentationMove, VmaStlAllocator >& moves, VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove); + uint32_t maxAllocationsToMove, + bool freeOldAllocations); size_t CalcBlocksWithNonMovableCount() const; @@ -6207,7 +7444,8 @@ public: virtual VkResult Defragment( VmaVector< VmaDefragmentationMove, VmaStlAllocator >& moves, VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove); + uint32_t maxAllocationsToMove, + VmaDefragmentationFlags flags); virtual VkDeviceSize GetBytesMoved() const { return m_BytesMoved; } virtual uint32_t GetAllocationsMoved() const { return m_AllocationsMoved; } @@ -6287,7 +7525,7 @@ private: } } } - + if(bestIndex != SIZE_MAX) { outBlockInfoIndex = m_FreeSpaces[bestIndex].blockInfoIndex; @@ -6346,12 +7584,6 @@ struct VmaBlockDefragmentationContext }; uint32_t flags; VkBuffer hBuffer; - - VmaBlockDefragmentationContext() : - flags(0), - hBuffer(VK_NULL_HANDLE) - { - } }; class VmaBlockVectorDefragmentationContext @@ -6361,13 +7593,16 @@ public: VkResult res; bool mutexLocked; VmaVector< VmaBlockDefragmentationContext, VmaStlAllocator > blockContexts; + VmaVector< VmaDefragmentationMove, VmaStlAllocator > defragmentationMoves; + uint32_t defragmentationMovesProcessed; + uint32_t defragmentationMovesCommitted; + bool hasDefragmentationPlan; VmaBlockVectorDefragmentationContext( VmaAllocator hAllocator, VmaPool hCustomPool, // Optional. VmaBlockVector* pBlockVector, - uint32_t currFrameIndex, - uint32_t flags); + uint32_t currFrameIndex); ~VmaBlockVectorDefragmentationContext(); VmaPool GetCustomPool() const { return m_hCustomPool; } @@ -6377,7 +7612,7 @@ public: void AddAllocation(VmaAllocation hAlloc, VkBool32* pChanged); void AddAll() { m_AllAllocations = true; } - void Begin(bool overlappingMoveSupported); + void Begin(bool overlappingMoveSupported, VmaDefragmentationFlags flags); private: const VmaAllocator m_hAllocator; @@ -6386,7 +7621,6 @@ private: // Redundant, for convenience not to fetch from m_hCustomPool->m_BlockVector or m_hAllocator->m_pBlockVectors. VmaBlockVector* const m_pBlockVector; const uint32_t m_CurrFrameIndex; - const uint32_t m_AlgorithmFlags; // Owner of this object. VmaDefragmentationAlgorithm* m_pAlgorithm; @@ -6412,10 +7646,10 @@ public: VmaDefragmentationStats* pStats); ~VmaDefragmentationContext_T(); - void AddPools(uint32_t poolCount, VmaPool* pPools); + void AddPools(uint32_t poolCount, const VmaPool* pPools); void AddAllocations( uint32_t allocationCount, - VmaAllocation* pAllocations, + const VmaAllocation* pAllocations, VkBool32* pAllocationsChanged); /* @@ -6427,13 +7661,22 @@ public: VkResult Defragment( VkDeviceSize maxCpuBytesToMove, uint32_t maxCpuAllocationsToMove, VkDeviceSize maxGpuBytesToMove, uint32_t maxGpuAllocationsToMove, - VkCommandBuffer commandBuffer, VmaDefragmentationStats* pStats); + VkCommandBuffer commandBuffer, VmaDefragmentationStats* pStats, VmaDefragmentationFlags flags); + + VkResult DefragmentPassBegin(VmaDefragmentationPassInfo* pInfo); + VkResult DefragmentPassEnd(); private: const VmaAllocator m_hAllocator; const uint32_t m_CurrFrameIndex; const uint32_t m_Flags; VmaDefragmentationStats* const m_pStats; + + VkDeviceSize m_MaxCpuBytesToMove; + uint32_t m_MaxCpuAllocationsToMove; + VkDeviceSize m_MaxGpuBytesToMove; + uint32_t m_MaxGpuAllocationsToMove; + // Owner of these objects. VmaBlockVectorDefragmentationContext* m_DefaultPoolContexts[VK_MAX_MEMORY_TYPES]; // Owner of these objects. @@ -6450,7 +7693,11 @@ public: void WriteConfiguration( const VkPhysicalDeviceProperties& devProps, const VkPhysicalDeviceMemoryProperties& memProps, - bool dedicatedAllocationExtensionEnabled); + uint32_t vulkanApiVersion, + bool dedicatedAllocationExtensionEnabled, + bool bindMemory2ExtensionEnabled, + bool memoryBudgetExtensionEnabled, + bool deviceCoherentMemoryExtensionEnabled); ~VmaRecorder(); void RecordCreateAllocator(uint32_t frameIndex); @@ -6485,10 +7732,6 @@ public: void RecordFreeMemoryPages(uint32_t frameIndex, uint64_t allocationCount, const VmaAllocation* pAllocations); - void RecordResizeAllocation( - uint32_t frameIndex, - VmaAllocation allocation, - VkDeviceSize newSize); void RecordSetAllocationUserData(uint32_t frameIndex, VmaAllocation allocation, const void* pUserData); @@ -6525,6 +7768,9 @@ public: VmaDefragmentationContext ctx); void RecordDefragmentationEnd(uint32_t frameIndex, VmaDefragmentationContext ctx); + void RecordSetPoolName(uint32_t frameIndex, + VmaPool pool, + const char* name); private: struct CallParams @@ -6548,8 +7794,7 @@ private: VmaRecordFlags m_Flags; FILE* m_File; VMA_MUTEX m_FileMutex; - int64_t m_Freq; - int64_t m_StartCounter; + std::chrono::time_point m_RecordingStartTime; void GetBasicParams(CallParams& outParams); @@ -6582,7 +7827,7 @@ class VmaAllocationObjectAllocator public: VmaAllocationObjectAllocator(const VkAllocationCallbacks* pAllocationCallbacks); - VmaAllocation Allocate(); + template VmaAllocation Allocate(Types... args); void Free(VmaAllocation hAlloc); private: @@ -6590,22 +7835,77 @@ private: VmaPoolAllocator m_Allocator; }; +struct VmaCurrentBudgetData +{ + VMA_ATOMIC_UINT64 m_BlockBytes[VK_MAX_MEMORY_HEAPS]; + VMA_ATOMIC_UINT64 m_AllocationBytes[VK_MAX_MEMORY_HEAPS]; + +#if VMA_MEMORY_BUDGET + VMA_ATOMIC_UINT32 m_OperationsSinceBudgetFetch; + VMA_RW_MUTEX m_BudgetMutex; + uint64_t m_VulkanUsage[VK_MAX_MEMORY_HEAPS]; + uint64_t m_VulkanBudget[VK_MAX_MEMORY_HEAPS]; + uint64_t m_BlockBytesAtBudgetFetch[VK_MAX_MEMORY_HEAPS]; +#endif // #if VMA_MEMORY_BUDGET + + VmaCurrentBudgetData() + { + for(uint32_t heapIndex = 0; heapIndex < VK_MAX_MEMORY_HEAPS; ++heapIndex) + { + m_BlockBytes[heapIndex] = 0; + m_AllocationBytes[heapIndex] = 0; +#if VMA_MEMORY_BUDGET + m_VulkanUsage[heapIndex] = 0; + m_VulkanBudget[heapIndex] = 0; + m_BlockBytesAtBudgetFetch[heapIndex] = 0; +#endif + } + +#if VMA_MEMORY_BUDGET + m_OperationsSinceBudgetFetch = 0; +#endif + } + + void AddAllocation(uint32_t heapIndex, VkDeviceSize allocationSize) + { + m_AllocationBytes[heapIndex] += allocationSize; +#if VMA_MEMORY_BUDGET + ++m_OperationsSinceBudgetFetch; +#endif + } + + void RemoveAllocation(uint32_t heapIndex, VkDeviceSize allocationSize) + { + VMA_ASSERT(m_AllocationBytes[heapIndex] >= allocationSize); // DELME + m_AllocationBytes[heapIndex] -= allocationSize; +#if VMA_MEMORY_BUDGET + ++m_OperationsSinceBudgetFetch; +#endif + } +}; + // Main allocator object. struct VmaAllocator_T { VMA_CLASS_NO_COPY(VmaAllocator_T) public: bool m_UseMutex; - bool m_UseKhrDedicatedAllocation; + uint32_t m_VulkanApiVersion; + bool m_UseKhrDedicatedAllocation; // Can be set only if m_VulkanApiVersion < VK_MAKE_VERSION(1, 1, 0). + bool m_UseKhrBindMemory2; // Can be set only if m_VulkanApiVersion < VK_MAKE_VERSION(1, 1, 0). + bool m_UseExtMemoryBudget; + bool m_UseAmdDeviceCoherentMemory; + bool m_UseKhrBufferDeviceAddress; + bool m_UseExtMemoryPriority; VkDevice m_hDevice; + VkInstance m_hInstance; bool m_AllocationCallbacksSpecified; VkAllocationCallbacks m_AllocationCallbacks; VmaDeviceMemoryCallbacks m_DeviceMemoryCallbacks; VmaAllocationObjectAllocator m_AllocationObjectAllocator; - - // Number of bytes free out of limit, or VK_WHOLE_SIZE if no limit for that heap. - VkDeviceSize m_HeapSizeLimit[VK_MAX_MEMORY_HEAPS]; - VMA_MUTEX m_HeapSizeLimitMutex; + + // Each bit (1 << i) is set if HeapSizeLimit is enabled for that heap, so cannot allocate more than the heap size. + uint32_t m_HeapSizeLimitMask; VkPhysicalDeviceProperties m_PhysicalDeviceProperties; VkPhysicalDeviceMemoryProperties m_MemProps; @@ -6618,6 +7918,8 @@ public: AllocationVectorType* m_pDedicatedAllocations[VK_MAX_MEMORY_TYPES]; VMA_RW_MUTEX m_DedicatedAllocationsMutex[VK_MAX_MEMORY_TYPES]; + VmaCurrentBudgetData m_Budget; + VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo); VkResult Init(const VmaAllocatorCreateInfo* pCreateInfo); ~VmaAllocator_T(); @@ -6631,6 +7933,8 @@ public: return m_VulkanFunctions; } + VkPhysicalDevice GetPhysicalDevice() const { return m_PhysicalDevice; } + VkDeviceSize GetBufferImageGranularity() const { return VMA_MAX( @@ -6665,6 +7969,8 @@ public: return m_PhysicalDeviceProperties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU; } + uint32_t GetGlobalMemoryTypeBits() const { return m_GlobalMemoryTypeBits; } + #if VMA_RECORDING_ENABLED VmaRecorder* GetRecorder() const { return m_pRecorder; } #endif @@ -6686,6 +7992,7 @@ public: bool requiresDedicatedAllocation, bool prefersDedicatedAllocation, VkBuffer dedicatedBuffer, + VkBufferUsageFlags dedicatedBufferUsage, // UINT32_MAX when unknown. VkImage dedicatedImage, const VmaAllocationCreateInfo& createInfo, VmaSuballocationType suballocType, @@ -6703,6 +8010,9 @@ public: void CalculateStats(VmaStats* pStats); + void GetBudget( + VmaBudget* outBudget, uint32_t firstHeap, uint32_t heapCount); + #if VMA_STATS_STRING_ENABLED void PrintDetailedMap(class VmaJsonWriter& json); #endif @@ -6714,6 +8024,12 @@ public: VkResult DefragmentationEnd( VmaDefragmentationContext context); + VkResult DefragmentationPassBegin( + VmaDefragmentationPassInfo* pInfo, + VmaDefragmentationContext context); + VkResult DefragmentationPassEnd( + VmaDefragmentationContext context); + void GetAllocationInfo(VmaAllocation hAllocation, VmaAllocationInfo* pAllocationInfo); bool TouchAllocation(VmaAllocation hAllocation); @@ -6732,28 +8048,62 @@ public: void CreateLostAllocation(VmaAllocation* pAllocation); + // Call to Vulkan function vkAllocateMemory with accompanying bookkeeping. VkResult AllocateVulkanMemory(const VkMemoryAllocateInfo* pAllocateInfo, VkDeviceMemory* pMemory); + // Call to Vulkan function vkFreeMemory with accompanying bookkeeping. void FreeVulkanMemory(uint32_t memoryType, VkDeviceSize size, VkDeviceMemory hMemory); + // Call to Vulkan function vkBindBufferMemory or vkBindBufferMemory2KHR. + VkResult BindVulkanBuffer( + VkDeviceMemory memory, + VkDeviceSize memoryOffset, + VkBuffer buffer, + const void* pNext); + // Call to Vulkan function vkBindImageMemory or vkBindImageMemory2KHR. + VkResult BindVulkanImage( + VkDeviceMemory memory, + VkDeviceSize memoryOffset, + VkImage image, + const void* pNext); VkResult Map(VmaAllocation hAllocation, void** ppData); void Unmap(VmaAllocation hAllocation); - VkResult BindBufferMemory(VmaAllocation hAllocation, VkBuffer hBuffer); - VkResult BindImageMemory(VmaAllocation hAllocation, VkImage hImage); + VkResult BindBufferMemory( + VmaAllocation hAllocation, + VkDeviceSize allocationLocalOffset, + VkBuffer hBuffer, + const void* pNext); + VkResult BindImageMemory( + VmaAllocation hAllocation, + VkDeviceSize allocationLocalOffset, + VkImage hImage, + const void* pNext); - void FlushOrInvalidateAllocation( + VkResult FlushOrInvalidateAllocation( VmaAllocation hAllocation, VkDeviceSize offset, VkDeviceSize size, VMA_CACHE_OPERATION op); + VkResult FlushOrInvalidateAllocations( + uint32_t allocationCount, + const VmaAllocation* allocations, + const VkDeviceSize* offsets, const VkDeviceSize* sizes, + VMA_CACHE_OPERATION op); void FillAllocation(const VmaAllocation hAllocation, uint8_t pattern); + /* + Returns bit mask of memory types that can support defragmentation on GPU as + they support creation of required buffer for copy operations. + */ + uint32_t GetGpuDefragmentationMemoryTypeBits(); + private: VkDeviceSize m_PreferredLargeHeapBlockSize; VkPhysicalDevice m_PhysicalDevice; VMA_ATOMIC_UINT32 m_CurrentFrameIndex; - + VMA_ATOMIC_UINT32 m_GpuDefragmentationMemoryTypeBits; // UINT32_MAX means uninitialized. + VMA_RW_MUTEX m_PoolsMutex; // Protected by m_PoolsMutex. Sorted by pointer value. VmaVector > m_Pools; @@ -6761,12 +8111,27 @@ private: VmaVulkanFunctions m_VulkanFunctions; + // Global bit mask AND-ed with any memoryTypeBits to disallow certain memory types. + uint32_t m_GlobalMemoryTypeBits; + #if VMA_RECORDING_ENABLED VmaRecorder* m_pRecorder; #endif void ImportVulkanFunctions(const VmaVulkanFunctions* pVulkanFunctions); +#if VMA_STATIC_VULKAN_FUNCTIONS == 1 + void ImportVulkanFunctions_Static(); +#endif + + void ImportVulkanFunctions_Custom(const VmaVulkanFunctions* pVulkanFunctions); + +#if VMA_DYNAMIC_VULKAN_FUNCTIONS == 1 + void ImportVulkanFunctions_Dynamic(); +#endif + + void ValidateVulkanFunctions(); + VkDeviceSize CalcPreferredBlockSize(uint32_t memTypeIndex); VkResult AllocateMemoryOfType( @@ -6774,6 +8139,7 @@ private: VkDeviceSize alignment, bool dedicatedAllocation, VkBuffer dedicatedBuffer, + VkBufferUsageFlags dedicatedBufferUsage, VkImage dedicatedImage, const VmaAllocationCreateInfo& createInfo, uint32_t memTypeIndex, @@ -6797,16 +8163,35 @@ private: VkDeviceSize size, VmaSuballocationType suballocType, uint32_t memTypeIndex, + bool withinBudget, bool map, bool isUserDataString, void* pUserData, + float priority, VkBuffer dedicatedBuffer, + VkBufferUsageFlags dedicatedBufferUsage, VkImage dedicatedImage, size_t allocationCount, VmaAllocation* pAllocations); - // Tries to free pMemory as Dedicated Memory. Returns true if found and freed. - void FreeDedicatedMemory(VmaAllocation allocation); + void FreeDedicatedMemory(const VmaAllocation allocation); + + /* + Calculates and returns bit mask of memory types that can support defragmentation + on GPU as they support creation of required buffer for copy operations. + */ + uint32_t CalculateGpuDefragmentationMemoryTypeBits() const; + + uint32_t CalculateGlobalMemoryTypeBits() const; + + bool GetFlushOrInvalidateRange( + VmaAllocation allocation, + VkDeviceSize offset, VkDeviceSize size, + VkMappedMemoryRange& outRange) const; + +#if VMA_MEMORY_BUDGET + void UpdateVulkanBudget(); +#endif // #if VMA_MEMORY_BUDGET }; //////////////////////////////////////////////////////////////////////////////// @@ -6892,15 +8277,29 @@ void VmaStringBuilder::Add(const char* pStr) void VmaStringBuilder::AddNumber(uint32_t num) { char buf[11]; - VmaUint32ToStr(buf, sizeof(buf), num); - Add(buf); + buf[10] = '\0'; + char *p = &buf[10]; + do + { + *--p = '0' + (num % 10); + num /= 10; + } + while(num); + Add(p); } void VmaStringBuilder::AddNumber(uint64_t num) { char buf[21]; - VmaUint64ToStr(buf, sizeof(buf), num); - Add(buf); + buf[20] = '\0'; + char *p = &buf[20]; + do + { + *--p = '0' + (num % 10); + num /= 10; + } + while(num); + Add(p); } void VmaStringBuilder::AddPointer(const void* ptr) @@ -6926,10 +8325,10 @@ public: void BeginObject(bool singleLine = false); void EndObject(); - + void BeginArray(bool singleLine = false); void EndArray(); - + void WriteString(const char* pStr); void BeginString(const char* pStr = VMA_NULL); void ContinueString(const char* pStr); @@ -6937,7 +8336,7 @@ public: void ContinueString(uint64_t n); void ContinueString_Pointer(const void* ptr); void EndString(const char* pStr = VMA_NULL); - + void WriteNumber(uint32_t n); void WriteNumber(uint64_t n); void WriteBool(bool b); @@ -7185,7 +8584,7 @@ void VmaJsonWriter::WriteIndent(bool oneLess) if(!m_Stack.empty() && !m_Stack.back().singleLineMode) { m_SB.AddNewLine(); - + size_t count = m_Stack.size(); if(count > 0 && oneLess) { @@ -7212,11 +8611,7 @@ void VmaAllocation_T::SetUserData(VmaAllocator hAllocator, void* pUserData) if(pUserData != VMA_NULL) { - const char* const newStrSrc = (char*)pUserData; - const size_t newStrLen = strlen(newStrSrc); - char* const newStrDst = vma_new_array(hAllocator, char, newStrLen + 1); - memcpy(newStrDst, newStrSrc, newStrLen + 1); - m_pUserData = newStrDst; + m_pUserData = VmaCreateStringCopy(hAllocator->GetAllocationCallbacks(), (const char*)pUserData); } } else @@ -7247,12 +8642,6 @@ void VmaAllocation_T::ChangeBlockAllocation( m_BlockAllocation.m_Offset = offset; } -void VmaAllocation_T::ChangeSize(VkDeviceSize newSize) -{ - VMA_ASSERT(newSize > 0); - m_Size = newSize; -} - void VmaAllocation_T::ChangeOffset(VkDeviceSize newOffset) { VMA_ASSERT(m_Type == ALLOCATION_TYPE_BLOCK); @@ -7287,20 +8676,6 @@ VkDeviceMemory VmaAllocation_T::GetMemory() const } } -uint32_t VmaAllocation_T::GetMemoryTypeIndex() const -{ - switch(m_Type) - { - case ALLOCATION_TYPE_BLOCK: - return m_BlockAllocation.m_Block->GetMemoryTypeIndex(); - case ALLOCATION_TYPE_DEDICATED: - return m_DedicatedAllocation.m_MemoryTypeIndex; - default: - VMA_ASSERT(0); - return UINT32_MAX; - } -} - void* VmaAllocation_T::GetMappedData() const { switch(m_Type) @@ -7425,13 +8800,8 @@ void VmaAllocation_T::PrintParameters(class VmaJsonWriter& json) const void VmaAllocation_T::FreeUserDataString(VmaAllocator hAllocator) { VMA_ASSERT(IsUserDataString()); - if(m_pUserData != VMA_NULL) - { - char* const oldStr = (char*)m_pUserData; - const size_t oldStrLen = strlen(oldStr); - vma_delete_array(hAllocator, oldStr, oldStrLen + 1); - m_pUserData = VMA_NULL; - } + VmaFreeString(hAllocator->GetAllocationCallbacks(), (char*)m_pUserData); + m_pUserData = VMA_NULL; } void VmaAllocation_T::BlockAllocMap() @@ -7628,7 +8998,7 @@ void VmaBlockMetadata::PrintDetailedMap_Allocation(class VmaJsonWriter& json, VmaAllocation hAllocation) const { json.BeginObject(true); - + json.WriteString("Offset"); json.WriteNumber(offset); @@ -7642,7 +9012,7 @@ void VmaBlockMetadata::PrintDetailedMap_UnusedRange(class VmaJsonWriter& json, VkDeviceSize size) const { json.BeginObject(true); - + json.WriteString("Offset"); json.WriteNumber(offset); @@ -7702,7 +9072,7 @@ void VmaBlockMetadata_Generic::Init(VkDeviceSize size) bool VmaBlockMetadata_Generic::Validate() const { VMA_VALIDATE(!m_Suballocations.empty()); - + // Expected offset of new suballocation as calculated from previous ones. VkDeviceSize calculatedOffset = 0; // Expected number of free suballocations as calculated from traversing their list. @@ -7720,7 +9090,7 @@ bool VmaBlockMetadata_Generic::Validate() const ++suballocItem) { const VmaSuballocation& subAlloc = *suballocItem; - + // Actual offset of this suballocation doesn't match expected one. VMA_VALIDATE(subAlloc.offset == calculatedOffset); @@ -7763,7 +9133,7 @@ bool VmaBlockMetadata_Generic::Validate() const for(size_t i = 0; i < m_FreeSuballocationsBySize.size(); ++i) { VmaSuballocationList::iterator suballocItem = m_FreeSuballocationsBySize[i]; - + // Only free suballocations can be registered in m_FreeSuballocationsBySize. VMA_VALIDATE(suballocItem->type == VMA_SUBALLOCATION_TYPE_FREE); // They must be sorted by size ascending. @@ -7805,7 +9175,7 @@ void VmaBlockMetadata_Generic::CalcAllocationStatInfo(VmaStatInfo& outInfo) cons const uint32_t rangeCount = (uint32_t)m_Suballocations.size(); outInfo.allocationCount = rangeCount - m_FreeCount; outInfo.unusedRangeCount = m_FreeCount; - + outInfo.unusedBytes = m_SumFreeSize; outInfo.usedBytes = GetSize() - outInfo.unusedBytes; @@ -8064,7 +9434,7 @@ bool VmaBlockMetadata_Generic::MakeRequestedAllocationsLost( VMA_HEAVY_ASSERT(Validate()); VMA_ASSERT(pAllocationRequest->item != m_Suballocations.end()); VMA_ASSERT(pAllocationRequest->item->type == VMA_SUBALLOCATION_TYPE_FREE); - + return true; } @@ -8208,133 +9578,6 @@ void VmaBlockMetadata_Generic::FreeAtOffset(VkDeviceSize offset) VMA_ASSERT(0 && "Not found!"); } -bool VmaBlockMetadata_Generic::ResizeAllocation(const VmaAllocation alloc, VkDeviceSize newSize) -{ - typedef VmaSuballocationList::iterator iter_type; - for(iter_type suballocItem = m_Suballocations.begin(); - suballocItem != m_Suballocations.end(); - ++suballocItem) - { - VmaSuballocation& suballoc = *suballocItem; - if(suballoc.hAllocation == alloc) - { - iter_type nextItem = suballocItem; - ++nextItem; - - // Should have been ensured on higher level. - VMA_ASSERT(newSize != alloc->GetSize() && newSize > 0); - - // Shrinking. - if(newSize < alloc->GetSize()) - { - const VkDeviceSize sizeDiff = suballoc.size - newSize; - - // There is next item. - if(nextItem != m_Suballocations.end()) - { - // Next item is free. - if(nextItem->type == VMA_SUBALLOCATION_TYPE_FREE) - { - // Grow this next item backward. - UnregisterFreeSuballocation(nextItem); - nextItem->offset -= sizeDiff; - nextItem->size += sizeDiff; - RegisterFreeSuballocation(nextItem); - } - // Next item is not free. - else - { - // Create free item after current one. - VmaSuballocation newFreeSuballoc; - newFreeSuballoc.hAllocation = VK_NULL_HANDLE; - newFreeSuballoc.offset = suballoc.offset + newSize; - newFreeSuballoc.size = sizeDiff; - newFreeSuballoc.type = VMA_SUBALLOCATION_TYPE_FREE; - iter_type newFreeSuballocIt = m_Suballocations.insert(nextItem, newFreeSuballoc); - RegisterFreeSuballocation(newFreeSuballocIt); - - ++m_FreeCount; - } - } - // This is the last item. - else - { - // Create free item at the end. - VmaSuballocation newFreeSuballoc; - newFreeSuballoc.hAllocation = VK_NULL_HANDLE; - newFreeSuballoc.offset = suballoc.offset + newSize; - newFreeSuballoc.size = sizeDiff; - newFreeSuballoc.type = VMA_SUBALLOCATION_TYPE_FREE; - m_Suballocations.push_back(newFreeSuballoc); - - iter_type newFreeSuballocIt = m_Suballocations.end(); - RegisterFreeSuballocation(--newFreeSuballocIt); - - ++m_FreeCount; - } - - suballoc.size = newSize; - m_SumFreeSize += sizeDiff; - } - // Growing. - else - { - const VkDeviceSize sizeDiff = newSize - suballoc.size; - - // There is next item. - if(nextItem != m_Suballocations.end()) - { - // Next item is free. - if(nextItem->type == VMA_SUBALLOCATION_TYPE_FREE) - { - // There is not enough free space, including margin. - if(nextItem->size < sizeDiff + VMA_DEBUG_MARGIN) - { - return false; - } - - // There is more free space than required. - if(nextItem->size > sizeDiff) - { - // Move and shrink this next item. - UnregisterFreeSuballocation(nextItem); - nextItem->offset += sizeDiff; - nextItem->size -= sizeDiff; - RegisterFreeSuballocation(nextItem); - } - // There is exactly the amount of free space required. - else - { - // Remove this next free item. - UnregisterFreeSuballocation(nextItem); - m_Suballocations.erase(nextItem); - --m_FreeCount; - } - } - // Next item is not free - there is no space to grow. - else - { - return false; - } - } - // This is the last item - there is no space to grow. - else - { - return false; - } - - suballoc.size = newSize; - m_SumFreeSize -= sizeDiff; - } - - // We cannot call Validate() here because alloc object is updated to new size outside of this call. - return true; - } - } - VMA_ASSERT(0 && "Not found!"); - return false; -} - bool VmaBlockMetadata_Generic::ValidateFreeSuballocationList() const { VkDeviceSize lastSize = 0; @@ -8368,7 +9611,7 @@ bool VmaBlockMetadata_Generic::CheckAllocation( VMA_ASSERT(allocType != VMA_SUBALLOCATION_TYPE_FREE); VMA_ASSERT(suballocItem != m_Suballocations.cend()); VMA_ASSERT(pOffset != VMA_NULL); - + *itemsToMakeLostCount = 0; *pSumFreeSize = 0; *pSumItemSize = 0; @@ -8401,19 +9644,19 @@ bool VmaBlockMetadata_Generic::CheckAllocation( // Start from offset equal to beginning of this suballocation. *pOffset = suballocItem->offset; - + // Apply VMA_DEBUG_MARGIN at the beginning. if(VMA_DEBUG_MARGIN > 0) { *pOffset += VMA_DEBUG_MARGIN; } - + // Apply alignment. *pOffset = VmaAlignUp(*pOffset, allocAlignment); // Check previous suballocations for BufferImageGranularity conflicts. // Make bigger alignment if necessary. - if(bufferImageGranularity > 1) + if(bufferImageGranularity > 1 && bufferImageGranularity != allocAlignment) { bool bufferImageGranularityConflict = false; VmaSuballocationList::const_iterator prevSuballocItem = suballocItem; @@ -8438,14 +9681,14 @@ bool VmaBlockMetadata_Generic::CheckAllocation( *pOffset = VmaAlignUp(*pOffset, bufferImageGranularity); } } - + // Now that we have final *pOffset, check if we are past suballocItem. // If yes, return false - this function should be called for another suballocItem as starting point. if(*pOffset >= suballocItem->offset + suballocItem->size) { return false; } - + // Calculate padding at the beginning based on current offset. const VkDeviceSize paddingBegin = *pOffset - suballocItem->offset; @@ -8497,7 +9740,7 @@ bool VmaBlockMetadata_Generic::CheckAllocation( // Check next suballocations for BufferImageGranularity conflicts. // If conflict exists, we must mark more allocations lost or fail. - if(bufferImageGranularity > 1) + if(allocSize % bufferImageGranularity || *pOffset % bufferImageGranularity) { VmaSuballocationList::const_iterator nextSuballocItem = lastSuballocItem; ++nextSuballocItem; @@ -8544,19 +9787,19 @@ bool VmaBlockMetadata_Generic::CheckAllocation( // Start from offset equal to beginning of this suballocation. *pOffset = suballoc.offset; - + // Apply VMA_DEBUG_MARGIN at the beginning. if(VMA_DEBUG_MARGIN > 0) { *pOffset += VMA_DEBUG_MARGIN; } - + // Apply alignment. *pOffset = VmaAlignUp(*pOffset, allocAlignment); - + // Check previous suballocations for BufferImageGranularity conflicts. // Make bigger alignment if necessary. - if(bufferImageGranularity > 1) + if(bufferImageGranularity > 1 && bufferImageGranularity != allocAlignment) { bool bufferImageGranularityConflict = false; VmaSuballocationList::const_iterator prevSuballocItem = suballocItem; @@ -8581,7 +9824,7 @@ bool VmaBlockMetadata_Generic::CheckAllocation( *pOffset = VmaAlignUp(*pOffset, bufferImageGranularity); } } - + // Calculate padding at the beginning based on current offset. const VkDeviceSize paddingBegin = *pOffset - suballoc.offset; @@ -8596,7 +9839,7 @@ bool VmaBlockMetadata_Generic::CheckAllocation( // Check next suballocations for BufferImageGranularity conflicts. // If conflict exists, allocation cannot be made here. - if(bufferImageGranularity > 1) + if(allocSize % bufferImageGranularity || *pOffset % bufferImageGranularity) { VmaSuballocationList::const_iterator nextSuballocItem = suballocItem; ++nextSuballocItem; @@ -8628,7 +9871,7 @@ void VmaBlockMetadata_Generic::MergeFreeWithNext(VmaSuballocationList::iterator { VMA_ASSERT(item != m_Suballocations.end()); VMA_ASSERT(item->type == VMA_SUBALLOCATION_TYPE_FREE); - + VmaSuballocationList::iterator nextItem = item; ++nextItem; VMA_ASSERT(nextItem != m_Suballocations.end()); @@ -8645,7 +9888,7 @@ VmaSuballocationList::iterator VmaBlockMetadata_Generic::FreeSuballocation(VmaSu VmaSuballocation& suballoc = *suballocItem; suballoc.type = VMA_SUBALLOCATION_TYPE_FREE; suballoc.hAllocation = VK_NULL_HANDLE; - + // Update totals. ++m_FreeCount; m_SumFreeSize += suballoc.size; @@ -8653,7 +9896,7 @@ VmaSuballocationList::iterator VmaBlockMetadata_Generic::FreeSuballocation(VmaSu // Merge with previous and/or next suballocation if it's also free. bool mergeWithNext = false; bool mergeWithPrev = false; - + VmaSuballocationList::iterator nextItem = suballocItem; ++nextItem; if((nextItem != m_Suballocations.end()) && (nextItem->type == VMA_SUBALLOCATION_TYPE_FREE)) @@ -8951,7 +10194,7 @@ VkDeviceSize VmaBlockMetadata_Linear::GetUnusedRangeSizeMax() const { return size; } - + const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); switch(m_2ndVectorMode) @@ -9038,7 +10281,7 @@ void VmaBlockMetadata_Linear::CalcAllocationStatInfo(VmaStatInfo& outInfo) const if(nextAlloc2ndIndex < suballoc2ndCount) { const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; - + // 1. Process free space before this allocation. if(lastOffset < suballoc.offset) { @@ -9049,13 +10292,13 @@ void VmaBlockMetadata_Linear::CalcAllocationStatInfo(VmaStatInfo& outInfo) const outInfo.unusedRangeSizeMin = VMA_MIN(outInfo.unusedRangeSizeMin, unusedRangeSize); outInfo.unusedRangeSizeMax = VMA_MIN(outInfo.unusedRangeSizeMax, unusedRangeSize); } - + // 2. Process this allocation. // There is allocation with suballoc.offset, suballoc.size. outInfo.usedBytes += suballoc.size; outInfo.allocationSizeMin = VMA_MIN(outInfo.allocationSizeMin, suballoc.size); outInfo.allocationSizeMax = VMA_MIN(outInfo.allocationSizeMax, suballoc.size); - + // 3. Prepare for next iteration. lastOffset = suballoc.offset + suballoc.size; ++nextAlloc2ndIndex; @@ -9095,7 +10338,7 @@ void VmaBlockMetadata_Linear::CalcAllocationStatInfo(VmaStatInfo& outInfo) const if(nextAlloc1stIndex < suballoc1stCount) { const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex]; - + // 1. Process free space before this allocation. if(lastOffset < suballoc.offset) { @@ -9106,13 +10349,13 @@ void VmaBlockMetadata_Linear::CalcAllocationStatInfo(VmaStatInfo& outInfo) const outInfo.unusedRangeSizeMin = VMA_MIN(outInfo.unusedRangeSizeMin, unusedRangeSize); outInfo.unusedRangeSizeMax = VMA_MIN(outInfo.unusedRangeSizeMax, unusedRangeSize); } - + // 2. Process this allocation. // There is allocation with suballoc.offset, suballoc.size. outInfo.usedBytes += suballoc.size; outInfo.allocationSizeMin = VMA_MIN(outInfo.allocationSizeMin, suballoc.size); outInfo.allocationSizeMax = VMA_MIN(outInfo.allocationSizeMax, suballoc.size); - + // 3. Prepare for next iteration. lastOffset = suballoc.offset + suballoc.size; ++nextAlloc1stIndex; @@ -9151,7 +10394,7 @@ void VmaBlockMetadata_Linear::CalcAllocationStatInfo(VmaStatInfo& outInfo) const if(nextAlloc2ndIndex != SIZE_MAX) { const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; - + // 1. Process free space before this allocation. if(lastOffset < suballoc.offset) { @@ -9162,13 +10405,13 @@ void VmaBlockMetadata_Linear::CalcAllocationStatInfo(VmaStatInfo& outInfo) const outInfo.unusedRangeSizeMin = VMA_MIN(outInfo.unusedRangeSizeMin, unusedRangeSize); outInfo.unusedRangeSizeMax = VMA_MIN(outInfo.unusedRangeSizeMax, unusedRangeSize); } - + // 2. Process this allocation. // There is allocation with suballoc.offset, suballoc.size. outInfo.usedBytes += suballoc.size; outInfo.allocationSizeMin = VMA_MIN(outInfo.allocationSizeMin, suballoc.size); outInfo.allocationSizeMax = VMA_MIN(outInfo.allocationSizeMax, suballoc.size); - + // 3. Prepare for next iteration. lastOffset = suballoc.offset + suballoc.size; --nextAlloc2ndIndex; @@ -9224,7 +10467,7 @@ void VmaBlockMetadata_Linear::AddPoolStats(VmaPoolStats& inoutStats) const if(nextAlloc2ndIndex < suballoc2ndCount) { const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; - + // 1. Process free space before this allocation. if(lastOffset < suballoc.offset) { @@ -9234,11 +10477,11 @@ void VmaBlockMetadata_Linear::AddPoolStats(VmaPoolStats& inoutStats) const ++inoutStats.unusedRangeCount; inoutStats.unusedRangeSizeMax = VMA_MAX(inoutStats.unusedRangeSizeMax, unusedRangeSize); } - + // 2. Process this allocation. // There is allocation with suballoc.offset, suballoc.size. ++inoutStats.allocationCount; - + // 3. Prepare for next iteration. lastOffset = suballoc.offset + suballoc.size; ++nextAlloc2ndIndex; @@ -9277,7 +10520,7 @@ void VmaBlockMetadata_Linear::AddPoolStats(VmaPoolStats& inoutStats) const if(nextAlloc1stIndex < suballoc1stCount) { const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex]; - + // 1. Process free space before this allocation. if(lastOffset < suballoc.offset) { @@ -9287,11 +10530,11 @@ void VmaBlockMetadata_Linear::AddPoolStats(VmaPoolStats& inoutStats) const ++inoutStats.unusedRangeCount; inoutStats.unusedRangeSizeMax = VMA_MAX(inoutStats.unusedRangeSizeMax, unusedRangeSize); } - + // 2. Process this allocation. // There is allocation with suballoc.offset, suballoc.size. ++inoutStats.allocationCount; - + // 3. Prepare for next iteration. lastOffset = suballoc.offset + suballoc.size; ++nextAlloc1stIndex; @@ -9329,7 +10572,7 @@ void VmaBlockMetadata_Linear::AddPoolStats(VmaPoolStats& inoutStats) const if(nextAlloc2ndIndex != SIZE_MAX) { const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; - + // 1. Process free space before this allocation. if(lastOffset < suballoc.offset) { @@ -9339,11 +10582,11 @@ void VmaBlockMetadata_Linear::AddPoolStats(VmaPoolStats& inoutStats) const ++inoutStats.unusedRangeCount; inoutStats.unusedRangeSizeMax = VMA_MAX(inoutStats.unusedRangeSizeMax, unusedRangeSize); } - + // 2. Process this allocation. // There is allocation with suballoc.offset, suballoc.size. ++inoutStats.allocationCount; - + // 3. Prepare for next iteration. lastOffset = suballoc.offset + suballoc.size; --nextAlloc2ndIndex; @@ -9401,19 +10644,19 @@ void VmaBlockMetadata_Linear::PrintDetailedMap(class VmaJsonWriter& json) const if(nextAlloc2ndIndex < suballoc2ndCount) { const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; - + // 1. Process free space before this allocation. if(lastOffset < suballoc.offset) { // There is free space from lastOffset to suballoc.offset. ++unusedRangeCount; } - + // 2. Process this allocation. // There is allocation with suballoc.offset, suballoc.size. ++alloc2ndCount; usedBytes += suballoc.size; - + // 3. Prepare for next iteration. lastOffset = suballoc.offset + suballoc.size; ++nextAlloc2ndIndex; @@ -9450,19 +10693,19 @@ void VmaBlockMetadata_Linear::PrintDetailedMap(class VmaJsonWriter& json) const if(nextAlloc1stIndex < suballoc1stCount) { const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex]; - + // 1. Process free space before this allocation. if(lastOffset < suballoc.offset) { // There is free space from lastOffset to suballoc.offset. ++unusedRangeCount; } - + // 2. Process this allocation. // There is allocation with suballoc.offset, suballoc.size. ++alloc1stCount; usedBytes += suballoc.size; - + // 3. Prepare for next iteration. lastOffset = suballoc.offset + suballoc.size; ++nextAlloc1stIndex; @@ -9497,19 +10740,19 @@ void VmaBlockMetadata_Linear::PrintDetailedMap(class VmaJsonWriter& json) const if(nextAlloc2ndIndex != SIZE_MAX) { const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; - + // 1. Process free space before this allocation. if(lastOffset < suballoc.offset) { // There is free space from lastOffset to suballoc.offset. ++unusedRangeCount; } - + // 2. Process this allocation. // There is allocation with suballoc.offset, suballoc.size. ++alloc2ndCount; usedBytes += suballoc.size; - + // 3. Prepare for next iteration. lastOffset = suballoc.offset + suballoc.size; --nextAlloc2ndIndex; @@ -9552,7 +10795,7 @@ void VmaBlockMetadata_Linear::PrintDetailedMap(class VmaJsonWriter& json) const if(nextAlloc2ndIndex < suballoc2ndCount) { const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; - + // 1. Process free space before this allocation. if(lastOffset < suballoc.offset) { @@ -9560,11 +10803,11 @@ void VmaBlockMetadata_Linear::PrintDetailedMap(class VmaJsonWriter& json) const const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); } - + // 2. Process this allocation. // There is allocation with suballoc.offset, suballoc.size. PrintDetailedMap_Allocation(json, suballoc.offset, suballoc.hAllocation); - + // 3. Prepare for next iteration. lastOffset = suballoc.offset + suballoc.size; ++nextAlloc2ndIndex; @@ -9599,7 +10842,7 @@ void VmaBlockMetadata_Linear::PrintDetailedMap(class VmaJsonWriter& json) const if(nextAlloc1stIndex < suballoc1stCount) { const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex]; - + // 1. Process free space before this allocation. if(lastOffset < suballoc.offset) { @@ -9607,11 +10850,11 @@ void VmaBlockMetadata_Linear::PrintDetailedMap(class VmaJsonWriter& json) const const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); } - + // 2. Process this allocation. // There is allocation with suballoc.offset, suballoc.size. PrintDetailedMap_Allocation(json, suballoc.offset, suballoc.hAllocation); - + // 3. Prepare for next iteration. lastOffset = suballoc.offset + suballoc.size; ++nextAlloc1stIndex; @@ -9647,7 +10890,7 @@ void VmaBlockMetadata_Linear::PrintDetailedMap(class VmaJsonWriter& json) const if(nextAlloc2ndIndex != SIZE_MAX) { const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; - + // 1. Process free space before this allocation. if(lastOffset < suballoc.offset) { @@ -9655,11 +10898,11 @@ void VmaBlockMetadata_Linear::PrintDetailedMap(class VmaJsonWriter& json) const const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); } - + // 2. Process this allocation. // There is allocation with suballoc.offset, suballoc.size. PrintDetailedMap_Allocation(json, suballoc.offset, suballoc.hAllocation); - + // 3. Prepare for next iteration. lastOffset = suballoc.offset + suballoc.size; --nextAlloc2ndIndex; @@ -9764,7 +11007,7 @@ bool VmaBlockMetadata_Linear::CreateAllocationRequest_UpperAddress( // Check next suballocations from 2nd for BufferImageGranularity conflicts. // Make bigger alignment if necessary. - if(bufferImageGranularity > 1 && !suballocations2nd.empty()) + if(bufferImageGranularity > 1 && bufferImageGranularity != allocAlignment && !suballocations2nd.empty()) { bool bufferImageGranularityConflict = false; for(size_t nextSuballocIndex = suballocations2nd.size(); nextSuballocIndex--; ) @@ -9869,7 +11112,7 @@ bool VmaBlockMetadata_Linear::CreateAllocationRequest_LowerAddress( // Check previous suballocations for BufferImageGranularity conflicts. // Make bigger alignment if necessary. - if(bufferImageGranularity > 1 && !suballocations1st.empty()) + if(bufferImageGranularity > 1 && bufferImageGranularity != allocAlignment && !suballocations1st.empty()) { bool bufferImageGranularityConflict = false; for(size_t prevSuballocIndex = suballocations1st.size(); prevSuballocIndex--; ) @@ -9901,7 +11144,7 @@ bool VmaBlockMetadata_Linear::CreateAllocationRequest_LowerAddress( { // Check next suballocations for BufferImageGranularity conflicts. // If conflict exists, allocation cannot be made here. - if(bufferImageGranularity > 1 && m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + if((allocSize % bufferImageGranularity || resultOffset % bufferImageGranularity) && m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) { for(size_t nextSuballocIndex = suballocations2nd.size(); nextSuballocIndex--; ) { @@ -9959,7 +11202,7 @@ bool VmaBlockMetadata_Linear::CreateAllocationRequest_LowerAddress( // Check previous suballocations for BufferImageGranularity conflicts. // Make bigger alignment if necessary. - if(bufferImageGranularity > 1 && !suballocations2nd.empty()) + if(bufferImageGranularity > 1 && bufferImageGranularity != allocAlignment && !suballocations2nd.empty()) { bool bufferImageGranularityConflict = false; for(size_t prevSuballocIndex = suballocations2nd.size(); prevSuballocIndex--; ) @@ -10017,7 +11260,7 @@ bool VmaBlockMetadata_Linear::CreateAllocationRequest_LowerAddress( // Check next suballocations for BufferImageGranularity conflicts. // If conflict exists, we must mark more allocations lost or fail. - if(bufferImageGranularity > 1) + if(allocSize % bufferImageGranularity || resultOffset % bufferImageGranularity) { while(index1st < suballocations1st.size()) { @@ -10063,7 +11306,7 @@ bool VmaBlockMetadata_Linear::CreateAllocationRequest_LowerAddress( { // Check next suballocations for BufferImageGranularity conflicts. // If conflict exists, allocation cannot be made here. - if(bufferImageGranularity > 1) + if(allocSize % bufferImageGranularity || resultOffset % bufferImageGranularity) { for(size_t nextSuballocIndex = index1st; nextSuballocIndex < suballocations1st.size(); @@ -10111,7 +11354,7 @@ bool VmaBlockMetadata_Linear::MakeRequestedAllocationsLost( } VMA_ASSERT(m_2ndVectorMode == SECOND_VECTOR_EMPTY || m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER); - + // We always start from 1st. SuballocationVectorType* suballocations = &AccessSuballocations1st(); size_t index = m_1stNullItemsBeginCount; @@ -10160,14 +11403,14 @@ bool VmaBlockMetadata_Linear::MakeRequestedAllocationsLost( CleanupAfterFree(); //VMA_HEAVY_ASSERT(Validate()); // Already called by ClanupAfterFree(). - + return true; } uint32_t VmaBlockMetadata_Linear::MakeAllocationsLost(uint32_t currentFrameIndex, uint32_t frameInUseCount) { uint32_t lostAllocationCount = 0; - + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); for(size_t i = m_1stNullItemsBeginCount, count = suballocations1st.size(); i < count; ++i) { @@ -10374,10 +11617,11 @@ void VmaBlockMetadata_Linear::FreeAtOffset(VkDeviceSize offset) VmaSuballocation refSuballoc; refSuballoc.offset = offset; // Rest of members stays uninitialized intentionally for better performance. - SuballocationVectorType::iterator it = VmaVectorFindSorted( + SuballocationVectorType::iterator it = VmaBinaryFindSorted( suballocations1st.begin() + m_1stNullItemsBeginCount, suballocations1st.end(), - refSuballoc); + refSuballoc, + VmaSuballocationOffsetLess()); if(it != suballocations1st.end()) { it->type = VMA_SUBALLOCATION_TYPE_FREE; @@ -10396,8 +11640,8 @@ void VmaBlockMetadata_Linear::FreeAtOffset(VkDeviceSize offset) refSuballoc.offset = offset; // Rest of members stays uninitialized intentionally for better performance. SuballocationVectorType::iterator it = m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER ? - VmaVectorFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc) : - VmaVectorFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc); + VmaBinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, VmaSuballocationOffsetLess()) : + VmaBinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, VmaSuballocationOffsetGreater()); if(it != suballocations2nd.end()) { it->type = VMA_SUBALLOCATION_TYPE_FREE; @@ -10468,7 +11712,7 @@ void VmaBlockMetadata_Linear::CleanupAfterFree() suballocations2nd[0].hAllocation == VK_NULL_HANDLE) { --m_2ndNullItemsCount; - suballocations2nd.remove(0); + VmaVectorRemove(suballocations2nd, 0); } if(ShouldCompact1st()) @@ -10590,7 +11834,7 @@ bool VmaBlockMetadata_Buddy::Validate() const node = node->free.next) { VMA_VALIDATE(node->type == Node::TYPE_FREE); - + if(node->free.next == VMA_NULL) { VMA_VALIDATE(m_FreeList[level].back == node); @@ -10776,7 +12020,7 @@ void VmaBlockMetadata_Buddy::Alloc( const uint32_t targetLevel = AllocSizeToLevel(allocSize); uint32_t currLevel = (uint32_t)(uintptr_t)request.customData; - + Node* currNode = m_FreeList[currLevel].front; VMA_ASSERT(currNode != VMA_NULL && currNode->type == Node::TYPE_FREE); while(currNode->offset != request.offset) @@ -10784,14 +12028,14 @@ void VmaBlockMetadata_Buddy::Alloc( currNode = currNode->free.next; VMA_ASSERT(currNode != VMA_NULL && currNode->type == Node::TYPE_FREE); } - + // Go down, splitting free nodes. while(currLevel < targetLevel) { // currNode is already first free node at currLevel. // Remove it from list of free nodes at this currLevel. RemoveFromFreeList(currLevel, currNode); - + const uint32_t childrenLevel = currLevel + 1; // Create two free sub-nodes. @@ -10953,7 +12197,7 @@ void VmaBlockMetadata_Buddy::FreeAtOffset(VmaAllocation alloc, VkDeviceSize offs vma_delete(GetAllocationCallbacks(), node->buddy); vma_delete(GetAllocationCallbacks(), node); parent->type = Node::TYPE_FREE; - + node = parent; --level; //m_SumFreeSize += LevelToNodeSize(level) % 2; // Useful only when level node sizes can be non power of 2. @@ -11067,7 +12311,7 @@ void VmaBlockMetadata_Buddy::PrintDetailedMapNode(class VmaJsonWriter& json, con PrintDetailedMap_UnusedRange(json, node->offset, levelNodeSize); break; case Node::TYPE_ALLOCATION: - { + { PrintDetailedMap_Allocation(json, node->offset, node->allocation.alloc); const VkDeviceSize allocSize = node->allocation.alloc->GetSize(); if(allocSize < levelNodeSize) @@ -11156,7 +12400,7 @@ bool VmaDeviceMemoryBlock::Validate() const { VMA_VALIDATE((m_hMemory != VK_NULL_HANDLE) && (m_pMetadata->GetSize() != 0)); - + return m_pMetadata->Validate(); } @@ -11287,33 +12531,35 @@ VkResult VmaDeviceMemoryBlock::ValidateMagicValueAroundAllocation(VmaAllocator h VkResult VmaDeviceMemoryBlock::BindBufferMemory( const VmaAllocator hAllocator, const VmaAllocation hAllocation, - VkBuffer hBuffer) + VkDeviceSize allocationLocalOffset, + VkBuffer hBuffer, + const void* pNext) { VMA_ASSERT(hAllocation->GetType() == VmaAllocation_T::ALLOCATION_TYPE_BLOCK && hAllocation->GetBlock() == this); + VMA_ASSERT(allocationLocalOffset < hAllocation->GetSize() && + "Invalid allocationLocalOffset. Did you forget that this offset is relative to the beginning of the allocation, not the whole memory block?"); + const VkDeviceSize memoryOffset = hAllocation->GetOffset() + allocationLocalOffset; // This lock is important so that we don't call vkBind... and/or vkMap... simultaneously on the same VkDeviceMemory from multiple threads. VmaMutexLock lock(m_Mutex, hAllocator->m_UseMutex); - return hAllocator->GetVulkanFunctions().vkBindBufferMemory( - hAllocator->m_hDevice, - hBuffer, - m_hMemory, - hAllocation->GetOffset()); + return hAllocator->BindVulkanBuffer(m_hMemory, memoryOffset, hBuffer, pNext); } VkResult VmaDeviceMemoryBlock::BindImageMemory( const VmaAllocator hAllocator, const VmaAllocation hAllocation, - VkImage hImage) + VkDeviceSize allocationLocalOffset, + VkImage hImage, + const void* pNext) { VMA_ASSERT(hAllocation->GetType() == VmaAllocation_T::ALLOCATION_TYPE_BLOCK && hAllocation->GetBlock() == this); + VMA_ASSERT(allocationLocalOffset < hAllocation->GetSize() && + "Invalid allocationLocalOffset. Did you forget that this offset is relative to the beginning of the allocation, not the whole memory block?"); + const VkDeviceSize memoryOffset = hAllocation->GetOffset() + allocationLocalOffset; // This lock is important so that we don't call vkBind... and/or vkMap... simultaneously on the same VkDeviceMemory from multiple threads. VmaMutexLock lock(m_Mutex, hAllocator->m_UseMutex); - return hAllocator->GetVulkanFunctions().vkBindImageMemory( - hAllocator->m_hDevice, - hImage, - m_hMemory, - hAllocation->GetOffset()); + return hAllocator->BindVulkanImage(m_hMemory, memoryOffset, hImage, pNext); } static void InitStatInfo(VmaStatInfo& outInfo) @@ -11358,10 +12604,11 @@ VmaPool_T::VmaPool_T( createInfo.maxBlockCount, (createInfo.flags & VMA_POOL_CREATE_IGNORE_BUFFER_IMAGE_GRANULARITY_BIT) != 0 ? 1 : hAllocator->GetBufferImageGranularity(), createInfo.frameInUseCount, - true, // isCustomPool createInfo.blockSize != 0, // explicitBlockSize - createInfo.flags & VMA_POOL_CREATE_ALGORITHM_MASK), // algorithm - m_Id(0) + createInfo.flags & VMA_POOL_CREATE_ALGORITHM_MASK, + createInfo.priority), // algorithm + m_Id(0), + m_Name(VMA_NULL) { } @@ -11369,6 +12616,21 @@ VmaPool_T::~VmaPool_T() { } +void VmaPool_T::SetName(const char* pName) +{ + const VkAllocationCallbacks* allocs = m_BlockVector.GetAllocator()->GetAllocationCallbacks(); + VmaFreeString(allocs, m_Name); + + if(pName != VMA_NULL) + { + m_Name = VmaCreateStringCopy(allocs, pName); + } + else + { + m_Name = VMA_NULL; + } +} + #if VMA_STATS_STRING_ENABLED #endif // #if VMA_STATS_STRING_ENABLED @@ -11382,9 +12644,9 @@ VmaBlockVector::VmaBlockVector( size_t maxBlockCount, VkDeviceSize bufferImageGranularity, uint32_t frameInUseCount, - bool isCustomPool, bool explicitBlockSize, - uint32_t algorithm) : + uint32_t algorithm, + float priority) : m_hAllocator(hAllocator), m_hParentPool(hParentPool), m_MemoryTypeIndex(memoryTypeIndex), @@ -11393,9 +12655,9 @@ VmaBlockVector::VmaBlockVector( m_MaxBlockCount(maxBlockCount), m_BufferImageGranularity(bufferImageGranularity), m_FrameInUseCount(frameInUseCount), - m_IsCustomPool(isCustomPool), m_ExplicitBlockSize(explicitBlockSize), m_Algorithm(algorithm), + m_Priority(priority), m_HasEmptyBlock(false), m_Blocks(VmaStlAllocator(hAllocator->GetAllocationCallbacks())), m_NextBlockId(0) @@ -11446,6 +12708,12 @@ void VmaBlockVector::GetPoolStats(VmaPoolStats* pStats) } } +bool VmaBlockVector::IsEmpty() +{ + VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex); + return m_Blocks.empty(); +} + bool VmaBlockVector::IsCorruptionDetectionEnabled() const { const uint32_t requiredMemFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; @@ -11518,9 +12786,20 @@ VkResult VmaBlockVector::AllocatePage( bool canMakeOtherLost = (createInfo.flags & VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT) != 0; const bool mapped = (createInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0; const bool isUserDataString = (createInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0; + + VkDeviceSize freeMemory; + { + const uint32_t heapIndex = m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex); + VmaBudget heapBudget = {}; + m_hAllocator->GetBudget(&heapBudget, heapIndex, 1); + freeMemory = (heapBudget.usage < heapBudget.budget) ? (heapBudget.budget - heapBudget.usage) : 0; + } + + const bool canFallbackToDedicated = !IsCustomPool(); const bool canCreateNewBlock = ((createInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) == 0) && - (m_Blocks.size() < m_MaxBlockCount); + (m_Blocks.size() < m_MaxBlockCount) && + (freeMemory >= size || !canFallbackToDedicated); uint32_t strategy = createInfo.flags & VMA_ALLOCATION_CREATE_STRATEGY_MASK; // If linearAlgorithm is used, canMakeOtherLost is available only when used as ring buffer. @@ -11587,7 +12866,7 @@ VkResult VmaBlockVector::AllocatePage( pAllocation); if(res == VK_SUCCESS) { - VMA_DEBUG_LOG(" Returned from last block #%u", (uint32_t)(m_Blocks.size() - 1)); + VMA_DEBUG_LOG(" Returned from last block #%u", pCurrBlock->GetId()); return VK_SUCCESS; } } @@ -11613,7 +12892,7 @@ VkResult VmaBlockVector::AllocatePage( pAllocation); if(res == VK_SUCCESS) { - VMA_DEBUG_LOG(" Returned from existing block #%u", (uint32_t)blockIndex); + VMA_DEBUG_LOG(" Returned from existing block #%u", pCurrBlock->GetId()); return VK_SUCCESS; } } @@ -11637,7 +12916,7 @@ VkResult VmaBlockVector::AllocatePage( pAllocation); if(res == VK_SUCCESS) { - VMA_DEBUG_LOG(" Returned from existing block #%u", (uint32_t)blockIndex); + VMA_DEBUG_LOG(" Returned from existing block #%u", pCurrBlock->GetId()); return VK_SUCCESS; } } @@ -11672,7 +12951,8 @@ VkResult VmaBlockVector::AllocatePage( } size_t newBlockIndex = 0; - VkResult res = CreateBlock(newBlockSize, &newBlockIndex); + VkResult res = (newBlockSize <= freeMemory || !canFallbackToDedicated) ? + CreateBlock(newBlockSize, &newBlockIndex) : VK_ERROR_OUT_OF_DEVICE_MEMORY; // Allocation of this size failed? Try 1/2, 1/4, 1/8 of m_PreferredBlockSize. if(!m_ExplicitBlockSize) { @@ -11683,7 +12963,8 @@ VkResult VmaBlockVector::AllocatePage( { newBlockSize = smallerNewBlockSize; ++newBlockSizeShift; - res = CreateBlock(newBlockSize, &newBlockIndex); + res = (newBlockSize <= freeMemory || !canFallbackToDedicated) ? + CreateBlock(newBlockSize, &newBlockIndex) : VK_ERROR_OUT_OF_DEVICE_MEMORY; } else { @@ -11709,7 +12990,7 @@ VkResult VmaBlockVector::AllocatePage( pAllocation); if(res == VK_SUCCESS) { - VMA_DEBUG_LOG(" Created new block Size=%llu", newBlockSize); + VMA_DEBUG_LOG(" Created new block #%u Size=%llu", pBlock->GetId(), newBlockSize); return VK_SUCCESS; } else @@ -11823,26 +13104,23 @@ VkResult VmaBlockVector::AllocatePage( m_FrameInUseCount, &bestRequest)) { - // We no longer have an empty Allocation. - if(pBestRequestBlock->m_pMetadata->IsEmpty()) - { - m_HasEmptyBlock = false; - } // Allocate from this pBlock. - *pAllocation = m_hAllocator->m_AllocationObjectAllocator.Allocate(); - (*pAllocation)->Ctor(currentFrameIndex, isUserDataString); + *pAllocation = m_hAllocator->m_AllocationObjectAllocator.Allocate(currentFrameIndex, isUserDataString); pBestRequestBlock->m_pMetadata->Alloc(bestRequest, suballocType, size, *pAllocation); + UpdateHasEmptyBlock(); (*pAllocation)->InitBlockAllocation( pBestRequestBlock, bestRequest.offset, alignment, size, + m_MemoryTypeIndex, suballocType, mapped, (createInfo.flags & VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT) != 0); VMA_HEAVY_ASSERT(pBestRequestBlock->Validate()); VMA_DEBUG_LOG(" Returned from existing block"); (*pAllocation)->SetUserData(m_hAllocator, createInfo.pUserData); + m_hAllocator->m_Budget.AddAllocation(m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex), size); if(VMA_DEBUG_INITIALIZE_ALLOCATIONS) { m_hAllocator->FillAllocation(*pAllocation, VMA_ALLOCATION_FILL_PATTERN_CREATED); @@ -11875,10 +13153,18 @@ VkResult VmaBlockVector::AllocatePage( } void VmaBlockVector::Free( - VmaAllocation hAllocation) + const VmaAllocation hAllocation) { VmaDeviceMemoryBlock* pBlockToDelete = VMA_NULL; + bool budgetExceeded = false; + { + const uint32_t heapIndex = m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex); + VmaBudget heapBudget = {}; + m_hAllocator->GetBudget(&heapBudget, heapIndex, 1); + budgetExceeded = heapBudget.usage >= heapBudget.budget; + } + // Scope for lock. { VmaMutexLockWrite lock(m_Mutex, m_hAllocator->m_UseMutex); @@ -11901,42 +13187,39 @@ void VmaBlockVector::Free( VMA_DEBUG_LOG(" Freed from MemoryTypeIndex=%u", m_MemoryTypeIndex); + const bool canDeleteBlock = m_Blocks.size() > m_MinBlockCount; // pBlock became empty after this deallocation. if(pBlock->m_pMetadata->IsEmpty()) { - // Already has empty Allocation. We don't want to have two, so delete this one. - if(m_HasEmptyBlock && m_Blocks.size() > m_MinBlockCount) + // Already has empty block. We don't want to have two, so delete this one. + if((m_HasEmptyBlock || budgetExceeded) && canDeleteBlock) { pBlockToDelete = pBlock; Remove(pBlock); } - // We now have first empty block. - else - { - m_HasEmptyBlock = true; - } + // else: We now have an empty block - leave it. } // pBlock didn't become empty, but we have another empty block - find and free that one. // (This is optional, heuristics.) - else if(m_HasEmptyBlock) + else if(m_HasEmptyBlock && canDeleteBlock) { VmaDeviceMemoryBlock* pLastBlock = m_Blocks.back(); - if(pLastBlock->m_pMetadata->IsEmpty() && m_Blocks.size() > m_MinBlockCount) + if(pLastBlock->m_pMetadata->IsEmpty()) { pBlockToDelete = pLastBlock; m_Blocks.pop_back(); - m_HasEmptyBlock = false; } } + UpdateHasEmptyBlock(); IncrementallySortBlocks(); } - // Destruction of a free Allocation. Deferred until this point, outside of mutex + // Destruction of a free block. Deferred until this point, outside of mutex // lock, for performance reason. if(pBlockToDelete != VMA_NULL) { - VMA_DEBUG_LOG(" Deleted empty allocation"); + VMA_DEBUG_LOG(" Deleted empty block"); pBlockToDelete->Destroy(m_hAllocator); vma_delete(m_hAllocator, pBlockToDelete); } @@ -12025,26 +13308,22 @@ VkResult VmaBlockVector::AllocateFromBlock( return res; } } - - // We no longer have an empty Allocation. - if(pBlock->m_pMetadata->IsEmpty()) - { - m_HasEmptyBlock = false; - } - - *pAllocation = m_hAllocator->m_AllocationObjectAllocator.Allocate(); - (*pAllocation)->Ctor(currentFrameIndex, isUserDataString); + + *pAllocation = m_hAllocator->m_AllocationObjectAllocator.Allocate(currentFrameIndex, isUserDataString); pBlock->m_pMetadata->Alloc(currRequest, suballocType, size, *pAllocation); + UpdateHasEmptyBlock(); (*pAllocation)->InitBlockAllocation( pBlock, currRequest.offset, alignment, size, + m_MemoryTypeIndex, suballocType, mapped, (allocFlags & VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT) != 0); VMA_HEAVY_ASSERT(pBlock->Validate()); (*pAllocation)->SetUserData(m_hAllocator, pUserData); + m_hAllocator->m_Budget.AddAllocation(m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex), size); if(VMA_DEBUG_INITIALIZE_ALLOCATIONS) { m_hAllocator->FillAllocation(*pAllocation, VMA_ALLOCATION_FILL_PATTERN_CREATED); @@ -12064,6 +13343,26 @@ VkResult VmaBlockVector::CreateBlock(VkDeviceSize blockSize, size_t* pNewBlockIn VkMemoryAllocateInfo allocInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO }; allocInfo.memoryTypeIndex = m_MemoryTypeIndex; allocInfo.allocationSize = blockSize; + +#if VMA_BUFFER_DEVICE_ADDRESS + // Every standalone block can potentially contain a buffer with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT - always enable the feature. + VkMemoryAllocateFlagsInfoKHR allocFlagsInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO_KHR }; + if(m_hAllocator->m_UseKhrBufferDeviceAddress) + { + allocFlagsInfo.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR; + VmaPnextChainPushFront(&allocInfo, &allocFlagsInfo); + } +#endif // #if VMA_BUFFER_DEVICE_ADDRESS + +#if VMA_MEMORY_PRIORITY + VkMemoryPriorityAllocateInfoEXT priorityInfo = { VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT }; + if(m_hAllocator->m_UseExtMemoryPriority) + { + priorityInfo.priority = m_Priority; + VmaPnextChainPushFront(&allocInfo, &priorityInfo); + } +#endif // #if VMA_MEMORY_PRIORITY + VkDeviceMemory mem = VK_NULL_HANDLE; VkResult res = m_hAllocator->AllocateVulkanMemory(&allocInfo, &mem); if(res < 0) @@ -12112,7 +13411,7 @@ void VmaBlockVector::ApplyDefragmentationMovesCpu( void* pMappedData; }; VmaVector< BlockInfo, VmaStlAllocator > - blockInfo(blockCount, VmaStlAllocator(m_hAllocator->GetAllocationCallbacks())); + blockInfo(blockCount, BlockInfo(), VmaStlAllocator(m_hAllocator->GetAllocationCallbacks())); memset(blockInfo.data(), 0, blockCount * sizeof(BlockInfo)); // Go over all moves. Mark blocks that are used with BLOCK_FLAG_USED. @@ -12214,7 +13513,7 @@ void VmaBlockVector::ApplyDefragmentationMovesCpu( void VmaBlockVector::ApplyDefragmentationMovesGpu( class VmaBlockVectorDefragmentationContext* pDefragCtx, - const VmaVector< VmaDefragmentationMove, VmaStlAllocator >& moves, + VmaVector< VmaDefragmentationMove, VmaStlAllocator >& moves, VkCommandBuffer commandBuffer) { const size_t blockCount = m_Blocks.size(); @@ -12227,17 +13526,21 @@ void VmaBlockVector::ApplyDefragmentationMovesGpu( for(size_t moveIndex = 0; moveIndex < moveCount; ++moveIndex) { const VmaDefragmentationMove& move = moves[moveIndex]; - pDefragCtx->blockContexts[move.srcBlockIndex].flags |= VmaBlockDefragmentationContext::BLOCK_FLAG_USED; - pDefragCtx->blockContexts[move.dstBlockIndex].flags |= VmaBlockDefragmentationContext::BLOCK_FLAG_USED; + + //if(move.type == VMA_ALLOCATION_TYPE_UNKNOWN) + { + // Old school move still require us to map the whole block + pDefragCtx->blockContexts[move.srcBlockIndex].flags |= VmaBlockDefragmentationContext::BLOCK_FLAG_USED; + pDefragCtx->blockContexts[move.dstBlockIndex].flags |= VmaBlockDefragmentationContext::BLOCK_FLAG_USED; + } } VMA_ASSERT(pDefragCtx->res == VK_SUCCESS); // Go over all blocks. Create and bind buffer for whole block if necessary. { - VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; - bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | - VK_BUFFER_USAGE_TRANSFER_DST_BIT; + VkBufferCreateInfo bufCreateInfo; + VmaFillGpuDefragmentationBufferCreateInfo(bufCreateInfo); for(size_t blockIndex = 0; pDefragCtx->res == VK_SUCCESS && blockIndex < blockCount; ++blockIndex) { @@ -12260,9 +13563,6 @@ void VmaBlockVector::ApplyDefragmentationMovesGpu( // Go over all moves. Post data transfer commands to command buffer. if(pDefragCtx->res == VK_SUCCESS) { - const VkDeviceSize nonCoherentAtomSize = m_hAllocator->m_PhysicalDeviceProperties.limits.nonCoherentAtomSize; - VkMappedMemoryRange memRange = { VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE }; - for(size_t moveIndex = 0; moveIndex < moveCount; ++moveIndex) { const VmaDefragmentationMove& move = moves[moveIndex]; @@ -12290,7 +13590,6 @@ void VmaBlockVector::ApplyDefragmentationMovesGpu( void VmaBlockVector::FreeEmptyBlocks(VmaDefragmentationStats* pDefragmentationStats) { - m_HasEmptyBlock = false; for(size_t blockIndex = m_Blocks.size(); blockIndex--; ) { VmaDeviceMemoryBlock* pBlock = m_Blocks[blockIndex]; @@ -12310,10 +13609,25 @@ void VmaBlockVector::FreeEmptyBlocks(VmaDefragmentationStats* pDefragmentationSt } else { - m_HasEmptyBlock = true; + break; } } } + UpdateHasEmptyBlock(); +} + +void VmaBlockVector::UpdateHasEmptyBlock() +{ + m_HasEmptyBlock = false; + for(size_t index = 0, count = m_Blocks.size(); index < count; ++index) + { + VmaDeviceMemoryBlock* const pBlock = m_Blocks[index]; + if(pBlock->m_pMetadata->IsEmpty()) + { + m_HasEmptyBlock = true; + break; + } + } } #if VMA_STATS_STRING_ENABLED @@ -12324,8 +13638,15 @@ void VmaBlockVector::PrintDetailedMap(class VmaJsonWriter& json) json.BeginObject(); - if(m_IsCustomPool) + if(IsCustomPool()) { + const char* poolName = m_hParentPool->GetName(); + if(poolName != VMA_NULL && poolName[0] != '\0') + { + json.WriteString("Name"); + json.WriteString(poolName); + } + json.WriteString("MemoryTypeIndex"); json.WriteNumber(m_MemoryTypeIndex); @@ -12385,22 +13706,22 @@ void VmaBlockVector::PrintDetailedMap(class VmaJsonWriter& json) void VmaBlockVector::Defragment( class VmaBlockVectorDefragmentationContext* pCtx, - VmaDefragmentationStats* pStats, + VmaDefragmentationStats* pStats, VmaDefragmentationFlags flags, VkDeviceSize& maxCpuBytesToMove, uint32_t& maxCpuAllocationsToMove, VkDeviceSize& maxGpuBytesToMove, uint32_t& maxGpuAllocationsToMove, VkCommandBuffer commandBuffer) { pCtx->res = VK_SUCCESS; - + const VkMemoryPropertyFlags memPropFlags = m_hAllocator->m_MemProps.memoryTypes[m_MemoryTypeIndex].propertyFlags; const bool isHostVisible = (memPropFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0; - const bool isHostCoherent = (memPropFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0; const bool canDefragmentOnCpu = maxCpuBytesToMove > 0 && maxCpuAllocationsToMove > 0 && isHostVisible; const bool canDefragmentOnGpu = maxGpuBytesToMove > 0 && maxGpuAllocationsToMove > 0 && - !IsCorruptionDetectionEnabled(); + !IsCorruptionDetectionEnabled() && + ((1u << m_MemoryTypeIndex) & m_hAllocator->GetGpuDefragmentationMemoryTypeBits()) != 0; // There are options to defragment this memory type. if(canDefragmentOnCpu || canDefragmentOnGpu) @@ -12422,19 +13743,28 @@ void VmaBlockVector::Defragment( if(m_hAllocator->m_UseMutex) { - m_Mutex.LockWrite(); - pCtx->mutexLocked = true; + if(flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL) + { + if(!m_Mutex.TryLockWrite()) + { + pCtx->res = VK_ERROR_INITIALIZATION_FAILED; + return; + } + } + else + { + m_Mutex.LockWrite(); + pCtx->mutexLocked = true; + } } - pCtx->Begin(overlappingMoveSupported); + pCtx->Begin(overlappingMoveSupported, flags); // Defragment. const VkDeviceSize maxBytesToMove = defragmentOnGpu ? maxGpuBytesToMove : maxCpuBytesToMove; const uint32_t maxAllocationsToMove = defragmentOnGpu ? maxGpuAllocationsToMove : maxCpuAllocationsToMove; - VmaVector< VmaDefragmentationMove, VmaStlAllocator > moves = - VmaVector< VmaDefragmentationMove, VmaStlAllocator >(VmaStlAllocator(m_hAllocator->GetAllocationCallbacks())); - pCtx->res = pCtx->GetAlgorithm()->Defragment(moves, maxBytesToMove, maxAllocationsToMove); + pCtx->res = pCtx->GetAlgorithm()->Defragment(pCtx->defragmentationMoves, maxBytesToMove, maxAllocationsToMove, flags); // Accumulate statistics. if(pStats != VMA_NULL) @@ -12456,16 +13786,27 @@ void VmaBlockVector::Defragment( maxCpuAllocationsToMove -= allocationsMoved; } } - + + if(flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL) + { + if(m_hAllocator->m_UseMutex) + m_Mutex.UnlockWrite(); + + if(pCtx->res >= VK_SUCCESS && !pCtx->defragmentationMoves.empty()) + pCtx->res = VK_NOT_READY; + + return; + } + if(pCtx->res >= VK_SUCCESS) { if(defragmentOnGpu) { - ApplyDefragmentationMovesGpu(pCtx, moves, commandBuffer); + ApplyDefragmentationMovesGpu(pCtx, pCtx->defragmentationMoves, commandBuffer); } else { - ApplyDefragmentationMovesCpu(pCtx, moves); + ApplyDefragmentationMovesCpu(pCtx, pCtx->defragmentationMoves); } } } @@ -12473,22 +13814,36 @@ void VmaBlockVector::Defragment( void VmaBlockVector::DefragmentationEnd( class VmaBlockVectorDefragmentationContext* pCtx, + uint32_t flags, VmaDefragmentationStats* pStats) { - // Destroy buffers. - for(size_t blockIndex = pCtx->blockContexts.size(); blockIndex--; ) + if(flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL && m_hAllocator->m_UseMutex) { - VmaBlockDefragmentationContext& blockCtx = pCtx->blockContexts[blockIndex]; - if(blockCtx.hBuffer) - { - (*m_hAllocator->GetVulkanFunctions().vkDestroyBuffer)( - m_hAllocator->m_hDevice, blockCtx.hBuffer, m_hAllocator->GetAllocationCallbacks()); - } + VMA_ASSERT(pCtx->mutexLocked == false); + + // Incremental defragmentation doesn't hold the lock, so when we enter here we don't actually have any + // lock protecting us. Since we mutate state here, we have to take the lock out now + m_Mutex.LockWrite(); + pCtx->mutexLocked = true; } - if(pCtx->res >= VK_SUCCESS) + // If the mutex isn't locked we didn't do any work and there is nothing to delete. + if(pCtx->mutexLocked || !m_hAllocator->m_UseMutex) { - FreeEmptyBlocks(pStats); + // Destroy buffers. + for(size_t blockIndex = pCtx->blockContexts.size(); blockIndex--;) + { + VmaBlockDefragmentationContext &blockCtx = pCtx->blockContexts[blockIndex]; + if(blockCtx.hBuffer) + { + (*m_hAllocator->GetVulkanFunctions().vkDestroyBuffer)(m_hAllocator->m_hDevice, blockCtx.hBuffer, m_hAllocator->GetAllocationCallbacks()); + } + } + + if(pCtx->res >= VK_SUCCESS) + { + FreeEmptyBlocks(pStats); + } } if(pCtx->mutexLocked) @@ -12498,6 +13853,48 @@ void VmaBlockVector::DefragmentationEnd( } } +uint32_t VmaBlockVector::ProcessDefragmentations( + class VmaBlockVectorDefragmentationContext *pCtx, + VmaDefragmentationPassMoveInfo* pMove, uint32_t maxMoves) +{ + VmaMutexLockWrite lock(m_Mutex, m_hAllocator->m_UseMutex); + + const uint32_t moveCount = VMA_MIN(uint32_t(pCtx->defragmentationMoves.size()) - pCtx->defragmentationMovesProcessed, maxMoves); + + for(uint32_t i = 0; i < moveCount; ++ i) + { + VmaDefragmentationMove& move = pCtx->defragmentationMoves[pCtx->defragmentationMovesProcessed + i]; + + pMove->allocation = move.hAllocation; + pMove->memory = move.pDstBlock->GetDeviceMemory(); + pMove->offset = move.dstOffset; + + ++ pMove; + } + + pCtx->defragmentationMovesProcessed += moveCount; + + return moveCount; +} + +void VmaBlockVector::CommitDefragmentations( + class VmaBlockVectorDefragmentationContext *pCtx, + VmaDefragmentationStats* pStats) +{ + VmaMutexLockWrite lock(m_Mutex, m_hAllocator->m_UseMutex); + + for(uint32_t i = pCtx->defragmentationMovesCommitted; i < pCtx->defragmentationMovesProcessed; ++ i) + { + const VmaDefragmentationMove &move = pCtx->defragmentationMoves[i]; + + move.pSrcBlock->m_pMetadata->FreeAtOffset(move.srcOffset); + move.hAllocation->ChangeBlockAllocation(m_hAllocator, move.pDstBlock, move.dstOffset); + } + + pCtx->defragmentationMovesCommitted = pCtx->defragmentationMovesProcessed; + FreeEmptyBlocks(pStats); +} + size_t VmaBlockVector::CalcAllocationCount() const { size_t result = 0; @@ -12596,8 +13993,8 @@ VmaDefragmentationAlgorithm_Generic::VmaDefragmentationAlgorithm_Generic( uint32_t currentFrameIndex, bool overlappingMoveSupported) : VmaDefragmentationAlgorithm(hAllocator, pBlockVector, currentFrameIndex), - m_AllAllocations(false), m_AllocationCount(0), + m_AllAllocations(false), m_BytesMoved(0), m_AllocationsMoved(0), m_Blocks(VmaStlAllocator(hAllocator->GetAllocationCallbacks())) @@ -12648,7 +14045,8 @@ void VmaDefragmentationAlgorithm_Generic::AddAllocation(VmaAllocation hAlloc, Vk VkResult VmaDefragmentationAlgorithm_Generic::DefragmentRound( VmaVector< VmaDefragmentationMove, VmaStlAllocator >& moves, VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove) + uint32_t maxAllocationsToMove, + bool freeOldAllocations) { if(m_Blocks.empty()) { @@ -12703,7 +14101,7 @@ VkResult VmaDefragmentationAlgorithm_Generic::DefragmentRound( srcAllocIndex = m_Blocks[srcBlockIndex]->m_Allocations.size() - 1; } } - + BlockInfo* pSrcBlockInfo = m_Blocks[srcBlockIndex]; AllocationInfo& allocInfo = pSrcBlockInfo->m_Allocations[srcAllocIndex]; @@ -12740,12 +14138,16 @@ VkResult VmaDefragmentationAlgorithm_Generic::DefragmentRound( return VK_SUCCESS; } - VmaDefragmentationMove move; + VmaDefragmentationMove move = {}; move.srcBlockIndex = pSrcBlockInfo->m_OriginalBlockIndex; move.dstBlockIndex = pDstBlockInfo->m_OriginalBlockIndex; move.srcOffset = srcOffset; move.dstOffset = dstAllocRequest.offset; move.size = size; + move.hAllocation = allocInfo.m_hAllocation; + move.pSrcBlock = pSrcBlockInfo->m_pBlock; + move.pDstBlock = pDstBlockInfo->m_pBlock; + moves.push_back(move); pDstBlockInfo->m_pBlock->m_pMetadata->Alloc( @@ -12753,9 +14155,12 @@ VkResult VmaDefragmentationAlgorithm_Generic::DefragmentRound( suballocType, size, allocInfo.m_hAllocation); - pSrcBlockInfo->m_pBlock->m_pMetadata->FreeAtOffset(srcOffset); - - allocInfo.m_hAllocation->ChangeBlockAllocation(m_hAllocator, pDstBlockInfo->m_pBlock, dstAllocRequest.offset); + + if(freeOldAllocations) + { + pSrcBlockInfo->m_pBlock->m_pMetadata->FreeAtOffset(srcOffset); + allocInfo.m_hAllocation->ChangeBlockAllocation(m_hAllocator, pDstBlockInfo->m_pBlock, dstAllocRequest.offset); + } if(allocInfo.m_pChanged != VMA_NULL) { @@ -12808,7 +14213,8 @@ size_t VmaDefragmentationAlgorithm_Generic::CalcBlocksWithNonMovableCount() cons VkResult VmaDefragmentationAlgorithm_Generic::Defragment( VmaVector< VmaDefragmentationMove, VmaStlAllocator >& moves, VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove) + uint32_t maxAllocationsToMove, + VmaDefragmentationFlags flags) { if(!m_AllAllocations && m_AllocationCount == 0) { @@ -12836,7 +14242,7 @@ VkResult VmaDefragmentationAlgorithm_Generic::Defragment( } pBlockInfo->CalcHasNonMovableAllocations(); - + // This is a choice based on research. // Option 1: pBlockInfo->SortAllocationsByOffsetDescending(); @@ -12854,7 +14260,7 @@ VkResult VmaDefragmentationAlgorithm_Generic::Defragment( VkResult result = VK_SUCCESS; for(uint32_t round = 0; (round < roundCount) && (result == VK_SUCCESS); ++round) { - result = DefragmentRound(moves, maxBytesToMove, maxAllocationsToMove); + result = DefragmentRound(moves, maxBytesToMove, maxAllocationsToMove, !(flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL)); } return result; @@ -12906,7 +14312,8 @@ VmaDefragmentationAlgorithm_Fast::~VmaDefragmentationAlgorithm_Fast() VkResult VmaDefragmentationAlgorithm_Fast::Defragment( VmaVector< VmaDefragmentationMove, VmaStlAllocator >& moves, VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove) + uint32_t maxAllocationsToMove, + VmaDefragmentationFlags flags) { VMA_ASSERT(m_AllAllocations || m_pBlockVector->CalcAllocationCount() == m_AllocationCount); @@ -12962,6 +14369,7 @@ VkResult VmaDefragmentationAlgorithm_Fast::Defragment( } const VkDeviceSize srcAllocOffset = srcSuballocIt->offset; + VmaDefragmentationMove move = {}; // Try to place it in one of free spaces from the database. size_t freeSpaceInfoIndex; VkDeviceSize dstAllocOffset; @@ -12971,7 +14379,6 @@ VkResult VmaDefragmentationAlgorithm_Fast::Defragment( size_t freeSpaceOrigBlockIndex = m_BlockInfos[freeSpaceInfoIndex].origBlockIndex; VmaDeviceMemoryBlock* pFreeSpaceBlock = m_pBlockVector->GetBlock(freeSpaceOrigBlockIndex); VmaBlockMetadata_Generic* pFreeSpaceMetadata = (VmaBlockMetadata_Generic*)pFreeSpaceBlock->m_pMetadata; - VkDeviceSize freeSpaceBlockSize = pFreeSpaceMetadata->GetSize(); // Same block if(freeSpaceInfoIndex == srcBlockInfoIndex) @@ -12985,7 +14392,7 @@ VkResult VmaDefragmentationAlgorithm_Fast::Defragment( suballoc.hAllocation->ChangeOffset(dstAllocOffset); m_BytesMoved += srcAllocSize; ++m_AllocationsMoved; - + VmaSuballocationList::iterator nextSuballocIt = srcSuballocIt; ++nextSuballocIt; pSrcMetadata->m_Suballocations.erase(srcSuballocIt); @@ -12993,10 +14400,12 @@ VkResult VmaDefragmentationAlgorithm_Fast::Defragment( InsertSuballoc(pFreeSpaceMetadata, suballoc); - VmaDefragmentationMove move = { - srcOrigBlockIndex, freeSpaceOrigBlockIndex, - srcAllocOffset, dstAllocOffset, - srcAllocSize }; + move.srcBlockIndex = srcOrigBlockIndex; + move.dstBlockIndex = freeSpaceOrigBlockIndex; + move.srcOffset = srcAllocOffset; + move.dstOffset = dstAllocOffset; + move.size = srcAllocSize; + moves.push_back(move); } // Different block @@ -13019,10 +14428,12 @@ VkResult VmaDefragmentationAlgorithm_Fast::Defragment( InsertSuballoc(pFreeSpaceMetadata, suballoc); - VmaDefragmentationMove move = { - srcOrigBlockIndex, freeSpaceOrigBlockIndex, - srcAllocOffset, dstAllocOffset, - srcAllocSize }; + move.srcBlockIndex = srcOrigBlockIndex; + move.dstBlockIndex = freeSpaceOrigBlockIndex; + move.srcOffset = srcAllocOffset; + move.dstOffset = dstAllocOffset; + move.size = srcAllocSize; + moves.push_back(move); } } @@ -13077,10 +14488,13 @@ VkResult VmaDefragmentationAlgorithm_Fast::Defragment( m_BytesMoved += srcAllocSize; ++m_AllocationsMoved; ++srcSuballocIt; - VmaDefragmentationMove move = { - srcOrigBlockIndex, dstOrigBlockIndex, - srcAllocOffset, dstAllocOffset, - srcAllocSize }; + + move.srcBlockIndex = srcOrigBlockIndex; + move.dstBlockIndex = dstOrigBlockIndex; + move.srcOffset = srcAllocOffset; + move.dstOffset = dstAllocOffset; + move.size = srcAllocSize; + moves.push_back(move); } } @@ -13106,10 +14520,12 @@ VkResult VmaDefragmentationAlgorithm_Fast::Defragment( pDstMetadata->m_Suballocations.push_back(suballoc); - VmaDefragmentationMove move = { - srcOrigBlockIndex, dstOrigBlockIndex, - srcAllocOffset, dstAllocOffset, - srcAllocSize }; + move.srcBlockIndex = srcOrigBlockIndex; + move.dstBlockIndex = dstOrigBlockIndex; + move.srcOffset = srcAllocOffset; + move.dstOffset = dstAllocOffset; + move.size = srcAllocSize; + moves.push_back(move); } } @@ -13117,7 +14533,7 @@ VkResult VmaDefragmentationAlgorithm_Fast::Defragment( } m_BlockInfos.clear(); - + PostprocessMetadata(); return VK_SUCCESS; @@ -13159,7 +14575,7 @@ void VmaDefragmentationAlgorithm_Fast::PostprocessMetadata() VmaBlockMetadata_Generic* const pMetadata = (VmaBlockMetadata_Generic*)m_pBlockVector->GetBlock(blockIndex)->m_pMetadata; const VkDeviceSize blockSize = pMetadata->GetSize(); - + // No allocations in this block - entire area is free. if(pMetadata->m_Suballocations.empty()) { @@ -13255,16 +14671,18 @@ VmaBlockVectorDefragmentationContext::VmaBlockVectorDefragmentationContext( VmaAllocator hAllocator, VmaPool hCustomPool, VmaBlockVector* pBlockVector, - uint32_t currFrameIndex, - uint32_t algorithmFlags) : + uint32_t currFrameIndex) : res(VK_SUCCESS), mutexLocked(false), blockContexts(VmaStlAllocator(hAllocator->GetAllocationCallbacks())), + defragmentationMoves(VmaStlAllocator(hAllocator->GetAllocationCallbacks())), + defragmentationMovesProcessed(0), + defragmentationMovesCommitted(0), + hasDefragmentationPlan(0), m_hAllocator(hAllocator), m_hCustomPool(hCustomPool), m_pBlockVector(pBlockVector), m_CurrFrameIndex(currFrameIndex), - m_AlgorithmFlags(algorithmFlags), m_pAlgorithm(VMA_NULL), m_Allocations(VmaStlAllocator(hAllocator->GetAllocationCallbacks())), m_AllAllocations(false) @@ -13282,7 +14700,7 @@ void VmaBlockVectorDefragmentationContext::AddAllocation(VmaAllocation hAlloc, V m_Allocations.push_back(info); } -void VmaBlockVectorDefragmentationContext::Begin(bool overlappingMoveSupported) +void VmaBlockVectorDefragmentationContext::Begin(bool overlappingMoveSupported, VmaDefragmentationFlags flags) { const bool allAllocations = m_AllAllocations || m_Allocations.size() == m_pBlockVector->CalcAllocationCount(); @@ -13296,10 +14714,12 @@ void VmaBlockVectorDefragmentationContext::Begin(bool overlappingMoveSupported) - VMA_DEBUG_MARGIN is 0. - All allocations in this block vector are moveable. - There is no possibility of image/buffer granularity conflict. + - The defragmentation is not incremental */ if(VMA_DEBUG_MARGIN == 0 && allAllocations && - !m_pBlockVector->IsBufferImageGranularityConflictPossible()) + !m_pBlockVector->IsBufferImageGranularityConflictPossible() && + !(flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL)) { m_pAlgorithm = vma_new(m_hAllocator, VmaDefragmentationAlgorithm_Fast)( m_hAllocator, m_pBlockVector, m_CurrFrameIndex, overlappingMoveSupported); @@ -13345,7 +14765,7 @@ VmaDefragmentationContext_T::~VmaDefragmentationContext_T() for(size_t i = m_CustomPoolContexts.size(); i--; ) { VmaBlockVectorDefragmentationContext* pBlockVectorCtx = m_CustomPoolContexts[i]; - pBlockVectorCtx->GetBlockVector()->DefragmentationEnd(pBlockVectorCtx, m_pStats); + pBlockVectorCtx->GetBlockVector()->DefragmentationEnd(pBlockVectorCtx, m_Flags, m_pStats); vma_delete(m_hAllocator, pBlockVectorCtx); } for(size_t i = m_hAllocator->m_MemProps.memoryTypeCount; i--; ) @@ -13353,13 +14773,13 @@ VmaDefragmentationContext_T::~VmaDefragmentationContext_T() VmaBlockVectorDefragmentationContext* pBlockVectorCtx = m_DefaultPoolContexts[i]; if(pBlockVectorCtx) { - pBlockVectorCtx->GetBlockVector()->DefragmentationEnd(pBlockVectorCtx, m_pStats); + pBlockVectorCtx->GetBlockVector()->DefragmentationEnd(pBlockVectorCtx, m_Flags, m_pStats); vma_delete(m_hAllocator, pBlockVectorCtx); } } } -void VmaDefragmentationContext_T::AddPools(uint32_t poolCount, VmaPool* pPools) +void VmaDefragmentationContext_T::AddPools(uint32_t poolCount, const VmaPool* pPools) { for(uint32_t poolIndex = 0; poolIndex < poolCount; ++poolIndex) { @@ -13369,7 +14789,7 @@ void VmaDefragmentationContext_T::AddPools(uint32_t poolCount, VmaPool* pPools) if(pool->m_BlockVector.GetAlgorithm() == 0) { VmaBlockVectorDefragmentationContext* pBlockVectorDefragCtx = VMA_NULL; - + for(size_t i = m_CustomPoolContexts.size(); i--; ) { if(m_CustomPoolContexts[i]->GetCustomPool() == pool) @@ -13378,15 +14798,14 @@ void VmaDefragmentationContext_T::AddPools(uint32_t poolCount, VmaPool* pPools) break; } } - + if(!pBlockVectorDefragCtx) { pBlockVectorDefragCtx = vma_new(m_hAllocator, VmaBlockVectorDefragmentationContext)( m_hAllocator, pool, &pool->m_BlockVector, - m_CurrFrameIndex, - m_Flags); + m_CurrFrameIndex); m_CustomPoolContexts.push_back(pBlockVectorDefragCtx); } @@ -13397,7 +14816,7 @@ void VmaDefragmentationContext_T::AddPools(uint32_t poolCount, VmaPool* pPools) void VmaDefragmentationContext_T::AddAllocations( uint32_t allocationCount, - VmaAllocation* pAllocations, + const VmaAllocation* pAllocations, VkBool32* pAllocationsChanged) { // Dispatch pAllocations among defragmentators. Create them when necessary. @@ -13433,8 +14852,7 @@ void VmaDefragmentationContext_T::AddAllocations( m_hAllocator, hAllocPool, &hAllocPool->m_BlockVector, - m_CurrFrameIndex, - m_Flags); + m_CurrFrameIndex); m_CustomPoolContexts.push_back(pBlockVectorDefragCtx); } } @@ -13450,8 +14868,7 @@ void VmaDefragmentationContext_T::AddAllocations( m_hAllocator, VMA_NULL, // hCustomPool m_hAllocator->m_pBlockVectors[memTypeIndex], - m_CurrFrameIndex, - m_Flags); + m_CurrFrameIndex); m_DefaultPoolContexts[memTypeIndex] = pBlockVectorDefragCtx; } } @@ -13469,13 +14886,30 @@ void VmaDefragmentationContext_T::AddAllocations( VkResult VmaDefragmentationContext_T::Defragment( VkDeviceSize maxCpuBytesToMove, uint32_t maxCpuAllocationsToMove, VkDeviceSize maxGpuBytesToMove, uint32_t maxGpuAllocationsToMove, - VkCommandBuffer commandBuffer, VmaDefragmentationStats* pStats) + VkCommandBuffer commandBuffer, VmaDefragmentationStats* pStats, VmaDefragmentationFlags flags) { if(pStats) { memset(pStats, 0, sizeof(VmaDefragmentationStats)); } + if(flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL) + { + // For incremental defragmetnations, we just earmark how much we can move + // The real meat is in the defragmentation steps + m_MaxCpuBytesToMove = maxCpuBytesToMove; + m_MaxCpuAllocationsToMove = maxCpuAllocationsToMove; + + m_MaxGpuBytesToMove = maxGpuBytesToMove; + m_MaxGpuAllocationsToMove = maxGpuAllocationsToMove; + + if(m_MaxCpuBytesToMove == 0 && m_MaxCpuAllocationsToMove == 0 && + m_MaxGpuBytesToMove == 0 && m_MaxGpuAllocationsToMove == 0) + return VK_SUCCESS; + + return VK_NOT_READY; + } + if(commandBuffer == VK_NULL_HANDLE) { maxGpuBytesToMove = 0; @@ -13495,7 +14929,7 @@ VkResult VmaDefragmentationContext_T::Defragment( VMA_ASSERT(pBlockVectorCtx->GetBlockVector()); pBlockVectorCtx->GetBlockVector()->Defragment( pBlockVectorCtx, - pStats, + pStats, flags, maxCpuBytesToMove, maxCpuAllocationsToMove, maxGpuBytesToMove, maxGpuAllocationsToMove, commandBuffer); @@ -13515,7 +14949,7 @@ VkResult VmaDefragmentationContext_T::Defragment( VMA_ASSERT(pBlockVectorCtx && pBlockVectorCtx->GetBlockVector()); pBlockVectorCtx->GetBlockVector()->Defragment( pBlockVectorCtx, - pStats, + pStats, flags, maxCpuBytesToMove, maxCpuAllocationsToMove, maxGpuBytesToMove, maxGpuAllocationsToMove, commandBuffer); @@ -13528,6 +14962,132 @@ VkResult VmaDefragmentationContext_T::Defragment( return res; } +VkResult VmaDefragmentationContext_T::DefragmentPassBegin(VmaDefragmentationPassInfo* pInfo) +{ + VmaDefragmentationPassMoveInfo* pCurrentMove = pInfo->pMoves; + uint32_t movesLeft = pInfo->moveCount; + + // Process default pools. + for(uint32_t memTypeIndex = 0; + memTypeIndex < m_hAllocator->GetMemoryTypeCount(); + ++memTypeIndex) + { + VmaBlockVectorDefragmentationContext *pBlockVectorCtx = m_DefaultPoolContexts[memTypeIndex]; + if(pBlockVectorCtx) + { + VMA_ASSERT(pBlockVectorCtx->GetBlockVector()); + + if(!pBlockVectorCtx->hasDefragmentationPlan) + { + pBlockVectorCtx->GetBlockVector()->Defragment( + pBlockVectorCtx, + m_pStats, m_Flags, + m_MaxCpuBytesToMove, m_MaxCpuAllocationsToMove, + m_MaxGpuBytesToMove, m_MaxGpuAllocationsToMove, + VK_NULL_HANDLE); + + if(pBlockVectorCtx->res < VK_SUCCESS) + continue; + + pBlockVectorCtx->hasDefragmentationPlan = true; + } + + const uint32_t processed = pBlockVectorCtx->GetBlockVector()->ProcessDefragmentations( + pBlockVectorCtx, + pCurrentMove, movesLeft); + + movesLeft -= processed; + pCurrentMove += processed; + } + } + + // Process custom pools. + for(size_t customCtxIndex = 0, customCtxCount = m_CustomPoolContexts.size(); + customCtxIndex < customCtxCount; + ++customCtxIndex) + { + VmaBlockVectorDefragmentationContext *pBlockVectorCtx = m_CustomPoolContexts[customCtxIndex]; + VMA_ASSERT(pBlockVectorCtx && pBlockVectorCtx->GetBlockVector()); + + if(!pBlockVectorCtx->hasDefragmentationPlan) + { + pBlockVectorCtx->GetBlockVector()->Defragment( + pBlockVectorCtx, + m_pStats, m_Flags, + m_MaxCpuBytesToMove, m_MaxCpuAllocationsToMove, + m_MaxGpuBytesToMove, m_MaxGpuAllocationsToMove, + VK_NULL_HANDLE); + + if(pBlockVectorCtx->res < VK_SUCCESS) + continue; + + pBlockVectorCtx->hasDefragmentationPlan = true; + } + + const uint32_t processed = pBlockVectorCtx->GetBlockVector()->ProcessDefragmentations( + pBlockVectorCtx, + pCurrentMove, movesLeft); + + movesLeft -= processed; + pCurrentMove += processed; + } + + pInfo->moveCount = pInfo->moveCount - movesLeft; + + return VK_SUCCESS; +} +VkResult VmaDefragmentationContext_T::DefragmentPassEnd() +{ + VkResult res = VK_SUCCESS; + + // Process default pools. + for(uint32_t memTypeIndex = 0; + memTypeIndex < m_hAllocator->GetMemoryTypeCount(); + ++memTypeIndex) + { + VmaBlockVectorDefragmentationContext *pBlockVectorCtx = m_DefaultPoolContexts[memTypeIndex]; + if(pBlockVectorCtx) + { + VMA_ASSERT(pBlockVectorCtx->GetBlockVector()); + + if(!pBlockVectorCtx->hasDefragmentationPlan) + { + res = VK_NOT_READY; + continue; + } + + pBlockVectorCtx->GetBlockVector()->CommitDefragmentations( + pBlockVectorCtx, m_pStats); + + if(pBlockVectorCtx->defragmentationMoves.size() != pBlockVectorCtx->defragmentationMovesCommitted) + res = VK_NOT_READY; + } + } + + // Process custom pools. + for(size_t customCtxIndex = 0, customCtxCount = m_CustomPoolContexts.size(); + customCtxIndex < customCtxCount; + ++customCtxIndex) + { + VmaBlockVectorDefragmentationContext *pBlockVectorCtx = m_CustomPoolContexts[customCtxIndex]; + VMA_ASSERT(pBlockVectorCtx && pBlockVectorCtx->GetBlockVector()); + + if(!pBlockVectorCtx->hasDefragmentationPlan) + { + res = VK_NOT_READY; + continue; + } + + pBlockVectorCtx->GetBlockVector()->CommitDefragmentations( + pBlockVectorCtx, m_pStats); + + if(pBlockVectorCtx->defragmentationMoves.size() != pBlockVectorCtx->defragmentationMovesCommitted) + res = VK_NOT_READY; + } + + return res; +} + //////////////////////////////////////////////////////////////////////////////// // VmaRecorder @@ -13537,8 +15097,7 @@ VmaRecorder::VmaRecorder() : m_UseMutex(true), m_Flags(0), m_File(VMA_NULL), - m_Freq(INT64_MAX), - m_StartCounter(INT64_MAX) + m_RecordingStartTime(std::chrono::high_resolution_clock::now()) { } @@ -13547,19 +15106,27 @@ VkResult VmaRecorder::Init(const VmaRecordSettings& settings, bool useMutex) m_UseMutex = useMutex; m_Flags = settings.flags; - QueryPerformanceFrequency((LARGE_INTEGER*)&m_Freq); - QueryPerformanceCounter((LARGE_INTEGER*)&m_StartCounter); - +#if defined(_WIN32) // Open file for writing. errno_t err = fopen_s(&m_File, settings.pFilePath, "wb"); + if(err != 0) { return VK_ERROR_INITIALIZATION_FAILED; } +#else + // Open file for writing. + m_File = fopen(settings.pFilePath, "wb"); + + if(m_File == 0) + { + return VK_ERROR_INITIALIZATION_FAILED; + } +#endif // Write header. fprintf(m_File, "%s\n", "Vulkan Memory Allocator,Calls recording"); - fprintf(m_File, "%s\n", "1,5"); + fprintf(m_File, "%s\n", "1,8"); return VK_SUCCESS; } @@ -13755,20 +15322,6 @@ void VmaRecorder::RecordFreeMemoryPages(uint32_t frameIndex, Flush(); } -void VmaRecorder::RecordResizeAllocation( - uint32_t frameIndex, - VmaAllocation allocation, - VkDeviceSize newSize) -{ - CallParams callParams; - GetBasicParams(callParams); - - VmaMutexLock lock(m_FileMutex, m_UseMutex); - fprintf(m_File, "%u,%.3f,%u,vmaResizeAllocation,%p,%llu\n", callParams.threadId, callParams.time, frameIndex, - allocation, newSize); - Flush(); -} - void VmaRecorder::RecordSetAllocationUserData(uint32_t frameIndex, VmaAllocation allocation, const void* pUserData) @@ -14006,6 +15559,19 @@ void VmaRecorder::RecordDefragmentationEnd(uint32_t frameIndex, Flush(); } +void VmaRecorder::RecordSetPoolName(uint32_t frameIndex, + VmaPool pool, + const char* name) +{ + CallParams callParams; + GetBasicParams(callParams); + + VmaMutexLock lock(m_FileMutex, m_UseMutex); + fprintf(m_File, "%u,%.3f,%u,vmaSetPoolName,%p,%s\n", callParams.threadId, callParams.time, frameIndex, + pool, name != VMA_NULL ? name : ""); + Flush(); +} + VmaRecorder::UserDataString::UserDataString(VmaAllocationCreateFlags allocFlags, const void* pUserData) { if(pUserData != VMA_NULL) @@ -14016,7 +15582,8 @@ VmaRecorder::UserDataString::UserDataString(VmaAllocationCreateFlags allocFlags, } else { - sprintf_s(m_PtrStr, "%p", pUserData); + // If VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT is not specified, convert the string's memory address to a string and store it. + snprintf(m_PtrStr, 17, "%p", pUserData); m_Str = m_PtrStr; } } @@ -14029,10 +15596,16 @@ VmaRecorder::UserDataString::UserDataString(VmaAllocationCreateFlags allocFlags, void VmaRecorder::WriteConfiguration( const VkPhysicalDeviceProperties& devProps, const VkPhysicalDeviceMemoryProperties& memProps, - bool dedicatedAllocationExtensionEnabled) + uint32_t vulkanApiVersion, + bool dedicatedAllocationExtensionEnabled, + bool bindMemory2ExtensionEnabled, + bool memoryBudgetExtensionEnabled, + bool deviceCoherentMemoryExtensionEnabled) { fprintf(m_File, "Config,Begin\n"); + fprintf(m_File, "VulkanApiVersion,%u,%u\n", VK_VERSION_MAJOR(vulkanApiVersion), VK_VERSION_MINOR(vulkanApiVersion)); + fprintf(m_File, "PhysicalDevice,apiVersion,%u\n", devProps.apiVersion); fprintf(m_File, "PhysicalDevice,driverVersion,%u\n", devProps.driverVersion); fprintf(m_File, "PhysicalDevice,vendorID,%u\n", devProps.vendorID); @@ -14058,6 +15631,9 @@ void VmaRecorder::WriteConfiguration( } fprintf(m_File, "Extension,VK_KHR_dedicated_allocation,%u\n", dedicatedAllocationExtensionEnabled ? 1 : 0); + fprintf(m_File, "Extension,VK_KHR_bind_memory2,%u\n", bindMemory2ExtensionEnabled ? 1 : 0); + fprintf(m_File, "Extension,VK_EXT_memory_budget,%u\n", memoryBudgetExtensionEnabled ? 1 : 0); + fprintf(m_File, "Extension,VK_AMD_device_coherent_memory,%u\n", deviceCoherentMemoryExtensionEnabled ? 1 : 0); fprintf(m_File, "Macro,VMA_DEBUG_ALWAYS_DEDICATED_MEMORY,%u\n", VMA_DEBUG_ALWAYS_DEDICATED_MEMORY ? 1 : 0); fprintf(m_File, "Macro,VMA_DEBUG_ALIGNMENT,%llu\n", (VkDeviceSize)VMA_DEBUG_ALIGNMENT); @@ -14074,11 +15650,22 @@ void VmaRecorder::WriteConfiguration( void VmaRecorder::GetBasicParams(CallParams& outParams) { - outParams.threadId = GetCurrentThreadId(); + #if defined(_WIN32) + outParams.threadId = GetCurrentThreadId(); + #else + // Use C++11 features to get thread id and convert it to uint32_t. + // There is room for optimization since sstream is quite slow. + // Is there a better way to convert std::this_thread::get_id() to uint32_t? + std::thread::id thread_id = std::this_thread::get_id(); + std::stringstream thread_id_to_string_converter; + thread_id_to_string_converter << thread_id; + std::string thread_id_as_string = thread_id_to_string_converter.str(); + outParams.threadId = static_cast(std::stoi(thread_id_as_string.c_str())); + #endif - LARGE_INTEGER counter; - QueryPerformanceCounter(&counter); - outParams.time = (double)(counter.QuadPart - m_StartCounter) / (double)m_Freq; + auto current_time = std::chrono::high_resolution_clock::now(); + + outParams.time = std::chrono::duration(current_time - m_RecordingStartTime).count(); } void VmaRecorder::PrintPointerList(uint64_t count, const VmaAllocation* pItems) @@ -14111,10 +15698,10 @@ VmaAllocationObjectAllocator::VmaAllocationObjectAllocator(const VkAllocationCal { } -VmaAllocation VmaAllocationObjectAllocator::Allocate() +template VmaAllocation VmaAllocationObjectAllocator::Allocate(Types... args) { VmaMutexLock mutexLock(m_Mutex); - return m_Allocator.Alloc(); + return m_Allocator.Alloc(std::forward(args)...); } void VmaAllocationObjectAllocator::Free(VmaAllocation hAlloc) @@ -14128,50 +15715,102 @@ void VmaAllocationObjectAllocator::Free(VmaAllocation hAlloc) VmaAllocator_T::VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo) : m_UseMutex((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT) == 0), + m_VulkanApiVersion(pCreateInfo->vulkanApiVersion != 0 ? pCreateInfo->vulkanApiVersion : VK_API_VERSION_1_0), m_UseKhrDedicatedAllocation((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT) != 0), + m_UseKhrBindMemory2((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT) != 0), + m_UseExtMemoryBudget((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT) != 0), + m_UseAmdDeviceCoherentMemory((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT) != 0), + m_UseKhrBufferDeviceAddress((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT) != 0), + m_UseExtMemoryPriority((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT) != 0), m_hDevice(pCreateInfo->device), + m_hInstance(pCreateInfo->instance), m_AllocationCallbacksSpecified(pCreateInfo->pAllocationCallbacks != VMA_NULL), m_AllocationCallbacks(pCreateInfo->pAllocationCallbacks ? *pCreateInfo->pAllocationCallbacks : VmaEmptyAllocationCallbacks), m_AllocationObjectAllocator(&m_AllocationCallbacks), + m_HeapSizeLimitMask(0), m_PreferredLargeHeapBlockSize(0), m_PhysicalDevice(pCreateInfo->physicalDevice), m_CurrentFrameIndex(0), + m_GpuDefragmentationMemoryTypeBits(UINT32_MAX), m_Pools(VmaStlAllocator(GetAllocationCallbacks())), - m_NextPoolId(0) + m_NextPoolId(0), + m_GlobalMemoryTypeBits(UINT32_MAX) #if VMA_RECORDING_ENABLED ,m_pRecorder(VMA_NULL) #endif { + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + m_UseKhrDedicatedAllocation = false; + m_UseKhrBindMemory2 = false; + } + if(VMA_DEBUG_DETECT_CORRUPTION) { // Needs to be multiply of uint32_t size because we are going to write VMA_CORRUPTION_DETECTION_MAGIC_VALUE to it. VMA_ASSERT(VMA_DEBUG_MARGIN % sizeof(uint32_t) == 0); } - VMA_ASSERT(pCreateInfo->physicalDevice && pCreateInfo->device); + VMA_ASSERT(pCreateInfo->physicalDevice && pCreateInfo->device && pCreateInfo->instance); -#if !(VMA_DEDICATED_ALLOCATION) - if((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT) != 0) + if(m_VulkanApiVersion < VK_MAKE_VERSION(1, 1, 0)) { - VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT set but required extensions are disabled by preprocessor macros."); +#if !(VMA_DEDICATED_ALLOCATION) + if((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT) != 0) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT set but required extensions are disabled by preprocessor macros."); + } +#endif +#if !(VMA_BIND_MEMORY2) + if((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT) != 0) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT set but required extension is disabled by preprocessor macros."); + } +#endif + } +#if !(VMA_MEMORY_BUDGET) + if((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT) != 0) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT set but required extension is disabled by preprocessor macros."); + } +#endif +#if !(VMA_BUFFER_DEVICE_ADDRESS) + if(m_UseKhrBufferDeviceAddress) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT is set but required extension or Vulkan 1.2 is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); + } +#endif +#if VMA_VULKAN_VERSION < 1002000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 2, 0)) + { + VMA_ASSERT(0 && "vulkanApiVersion >= VK_API_VERSION_1_2 but required Vulkan version is disabled by preprocessor macros."); + } +#endif +#if VMA_VULKAN_VERSION < 1001000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + VMA_ASSERT(0 && "vulkanApiVersion >= VK_API_VERSION_1_1 but required Vulkan version is disabled by preprocessor macros."); + } +#endif +#if !(VMA_MEMORY_PRIORITY) + if(m_UseExtMemoryPriority) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); } #endif memset(&m_DeviceMemoryCallbacks, 0 ,sizeof(m_DeviceMemoryCallbacks)); memset(&m_PhysicalDeviceProperties, 0, sizeof(m_PhysicalDeviceProperties)); memset(&m_MemProps, 0, sizeof(m_MemProps)); - + memset(&m_pBlockVectors, 0, sizeof(m_pBlockVectors)); memset(&m_pDedicatedAllocations, 0, sizeof(m_pDedicatedAllocations)); - - for(uint32_t i = 0; i < VK_MAX_MEMORY_HEAPS; ++i) - { - m_HeapSizeLimit[i] = VK_WHOLE_SIZE; - } + memset(&m_VulkanFunctions, 0, sizeof(m_VulkanFunctions)); if(pCreateInfo->pDeviceMemoryCallbacks != VMA_NULL) { + m_DeviceMemoryCallbacks.pUserData = pCreateInfo->pDeviceMemoryCallbacks->pUserData; m_DeviceMemoryCallbacks.pfnAllocate = pCreateInfo->pDeviceMemoryCallbacks->pfnAllocate; m_DeviceMemoryCallbacks.pfnFree = pCreateInfo->pDeviceMemoryCallbacks->pfnFree; } @@ -14189,6 +15828,8 @@ VmaAllocator_T::VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo) : m_PreferredLargeHeapBlockSize = (pCreateInfo->preferredLargeHeapBlockSize != 0) ? pCreateInfo->preferredLargeHeapBlockSize : static_cast(VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE); + m_GlobalMemoryTypeBits = CalculateGlobalMemoryTypeBits(); + if(pCreateInfo->pHeapSizeLimit != VMA_NULL) { for(uint32_t heapIndex = 0; heapIndex < GetMemoryHeapCount(); ++heapIndex) @@ -14196,7 +15837,7 @@ VmaAllocator_T::VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo) : const VkDeviceSize limit = pCreateInfo->pHeapSizeLimit[heapIndex]; if(limit != VK_WHOLE_SIZE) { - m_HeapSizeLimit[heapIndex] = limit; + m_HeapSizeLimitMask |= 1u << heapIndex; if(limit < m_MemProps.memoryHeaps[heapIndex].size) { m_MemProps.memoryHeaps[heapIndex].size = limit; @@ -14218,9 +15859,9 @@ VmaAllocator_T::VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo) : SIZE_MAX, GetBufferImageGranularity(), pCreateInfo->frameInUseCount, - false, // isCustomPool false, // explicitBlockSize - false); // linearAlgorithm + false, // linearAlgorithm + 0.5f); // priority (0.5 is the default per Vulkan spec) // No need to call m_pBlockVectors[memTypeIndex][blockVectorTypeIndex]->CreateMinBlocks here, // becase minBlockCount is 0. m_pDedicatedAllocations[memTypeIndex] = vma_new(this, AllocationVectorType)(VmaStlAllocator(GetAllocationCallbacks())); @@ -14245,7 +15886,11 @@ VkResult VmaAllocator_T::Init(const VmaAllocatorCreateInfo* pCreateInfo) m_pRecorder->WriteConfiguration( m_PhysicalDeviceProperties, m_MemProps, - m_UseKhrDedicatedAllocation); + m_VulkanApiVersion, + m_UseKhrDedicatedAllocation, + m_UseKhrBindMemory2, + m_UseExtMemoryBudget, + m_UseAmdDeviceCoherentMemory); m_pRecorder->RecordCreateAllocator(GetCurrentFrameIndex()); #else VMA_ASSERT(0 && "VmaAllocatorCreateInfo::pRecordSettings used, but not supported due to VMA_RECORDING_ENABLED not defined to 1."); @@ -14253,6 +15898,13 @@ VkResult VmaAllocator_T::Init(const VmaAllocatorCreateInfo* pCreateInfo) #endif } +#if VMA_MEMORY_BUDGET + if(m_UseExtMemoryBudget) + { + UpdateVulkanBudget(); + } +#endif // #if VMA_MEMORY_BUDGET + return res; } @@ -14265,7 +15917,7 @@ VmaAllocator_T::~VmaAllocator_T() vma_delete(this, m_pRecorder); } #endif - + VMA_ASSERT(m_Pools.empty()); for(size_t i = GetMemoryTypeCount(); i--; ) @@ -14283,66 +15935,174 @@ VmaAllocator_T::~VmaAllocator_T() void VmaAllocator_T::ImportVulkanFunctions(const VmaVulkanFunctions* pVulkanFunctions) { #if VMA_STATIC_VULKAN_FUNCTIONS == 1 - m_VulkanFunctions.vkGetPhysicalDeviceProperties = &vkGetPhysicalDeviceProperties; - m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties = &vkGetPhysicalDeviceMemoryProperties; - m_VulkanFunctions.vkAllocateMemory = &vkAllocateMemory; - m_VulkanFunctions.vkFreeMemory = &vkFreeMemory; - m_VulkanFunctions.vkMapMemory = &vkMapMemory; - m_VulkanFunctions.vkUnmapMemory = &vkUnmapMemory; - m_VulkanFunctions.vkFlushMappedMemoryRanges = &vkFlushMappedMemoryRanges; - m_VulkanFunctions.vkInvalidateMappedMemoryRanges = &vkInvalidateMappedMemoryRanges; - m_VulkanFunctions.vkBindBufferMemory = &vkBindBufferMemory; - m_VulkanFunctions.vkBindImageMemory = &vkBindImageMemory; - m_VulkanFunctions.vkGetBufferMemoryRequirements = &vkGetBufferMemoryRequirements; - m_VulkanFunctions.vkGetImageMemoryRequirements = &vkGetImageMemoryRequirements; - m_VulkanFunctions.vkCreateBuffer = &vkCreateBuffer; - m_VulkanFunctions.vkDestroyBuffer = &vkDestroyBuffer; - m_VulkanFunctions.vkCreateImage = &vkCreateImage; - m_VulkanFunctions.vkDestroyImage = &vkDestroyImage; - m_VulkanFunctions.vkCmdCopyBuffer = &vkCmdCopyBuffer; -#if VMA_DEDICATED_ALLOCATION - if(m_UseKhrDedicatedAllocation) + ImportVulkanFunctions_Static(); +#endif + + if(pVulkanFunctions != VMA_NULL) { - m_VulkanFunctions.vkGetBufferMemoryRequirements2KHR = - (PFN_vkGetBufferMemoryRequirements2KHR)vkGetDeviceProcAddr(m_hDevice, "vkGetBufferMemoryRequirements2KHR"); - m_VulkanFunctions.vkGetImageMemoryRequirements2KHR = - (PFN_vkGetImageMemoryRequirements2KHR)vkGetDeviceProcAddr(m_hDevice, "vkGetImageMemoryRequirements2KHR"); + ImportVulkanFunctions_Custom(pVulkanFunctions); } -#endif // #if VMA_DEDICATED_ALLOCATION + +#if VMA_DYNAMIC_VULKAN_FUNCTIONS == 1 + ImportVulkanFunctions_Dynamic(); +#endif + + ValidateVulkanFunctions(); +} + +#if VMA_STATIC_VULKAN_FUNCTIONS == 1 + +void VmaAllocator_T::ImportVulkanFunctions_Static() +{ + // Vulkan 1.0 + m_VulkanFunctions.vkGetPhysicalDeviceProperties = (PFN_vkGetPhysicalDeviceProperties)vkGetPhysicalDeviceProperties; + m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties = (PFN_vkGetPhysicalDeviceMemoryProperties)vkGetPhysicalDeviceMemoryProperties; + m_VulkanFunctions.vkAllocateMemory = (PFN_vkAllocateMemory)vkAllocateMemory; + m_VulkanFunctions.vkFreeMemory = (PFN_vkFreeMemory)vkFreeMemory; + m_VulkanFunctions.vkMapMemory = (PFN_vkMapMemory)vkMapMemory; + m_VulkanFunctions.vkUnmapMemory = (PFN_vkUnmapMemory)vkUnmapMemory; + m_VulkanFunctions.vkFlushMappedMemoryRanges = (PFN_vkFlushMappedMemoryRanges)vkFlushMappedMemoryRanges; + m_VulkanFunctions.vkInvalidateMappedMemoryRanges = (PFN_vkInvalidateMappedMemoryRanges)vkInvalidateMappedMemoryRanges; + m_VulkanFunctions.vkBindBufferMemory = (PFN_vkBindBufferMemory)vkBindBufferMemory; + m_VulkanFunctions.vkBindImageMemory = (PFN_vkBindImageMemory)vkBindImageMemory; + m_VulkanFunctions.vkGetBufferMemoryRequirements = (PFN_vkGetBufferMemoryRequirements)vkGetBufferMemoryRequirements; + m_VulkanFunctions.vkGetImageMemoryRequirements = (PFN_vkGetImageMemoryRequirements)vkGetImageMemoryRequirements; + m_VulkanFunctions.vkCreateBuffer = (PFN_vkCreateBuffer)vkCreateBuffer; + m_VulkanFunctions.vkDestroyBuffer = (PFN_vkDestroyBuffer)vkDestroyBuffer; + m_VulkanFunctions.vkCreateImage = (PFN_vkCreateImage)vkCreateImage; + m_VulkanFunctions.vkDestroyImage = (PFN_vkDestroyImage)vkDestroyImage; + m_VulkanFunctions.vkCmdCopyBuffer = (PFN_vkCmdCopyBuffer)vkCmdCopyBuffer; + + // Vulkan 1.1 +#if VMA_VULKAN_VERSION >= 1001000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + m_VulkanFunctions.vkGetBufferMemoryRequirements2KHR = (PFN_vkGetBufferMemoryRequirements2)vkGetBufferMemoryRequirements2; + m_VulkanFunctions.vkGetImageMemoryRequirements2KHR = (PFN_vkGetImageMemoryRequirements2)vkGetImageMemoryRequirements2; + m_VulkanFunctions.vkBindBufferMemory2KHR = (PFN_vkBindBufferMemory2)vkBindBufferMemory2; + m_VulkanFunctions.vkBindImageMemory2KHR = (PFN_vkBindImageMemory2)vkBindImageMemory2; + m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties2KHR = (PFN_vkGetPhysicalDeviceMemoryProperties2)vkGetPhysicalDeviceMemoryProperties2; + } +#endif +} + #endif // #if VMA_STATIC_VULKAN_FUNCTIONS == 1 +void VmaAllocator_T::ImportVulkanFunctions_Custom(const VmaVulkanFunctions* pVulkanFunctions) +{ + VMA_ASSERT(pVulkanFunctions != VMA_NULL); + #define VMA_COPY_IF_NOT_NULL(funcName) \ if(pVulkanFunctions->funcName != VMA_NULL) m_VulkanFunctions.funcName = pVulkanFunctions->funcName; - if(pVulkanFunctions != VMA_NULL) - { - VMA_COPY_IF_NOT_NULL(vkGetPhysicalDeviceProperties); - VMA_COPY_IF_NOT_NULL(vkGetPhysicalDeviceMemoryProperties); - VMA_COPY_IF_NOT_NULL(vkAllocateMemory); - VMA_COPY_IF_NOT_NULL(vkFreeMemory); - VMA_COPY_IF_NOT_NULL(vkMapMemory); - VMA_COPY_IF_NOT_NULL(vkUnmapMemory); - VMA_COPY_IF_NOT_NULL(vkFlushMappedMemoryRanges); - VMA_COPY_IF_NOT_NULL(vkInvalidateMappedMemoryRanges); - VMA_COPY_IF_NOT_NULL(vkBindBufferMemory); - VMA_COPY_IF_NOT_NULL(vkBindImageMemory); - VMA_COPY_IF_NOT_NULL(vkGetBufferMemoryRequirements); - VMA_COPY_IF_NOT_NULL(vkGetImageMemoryRequirements); - VMA_COPY_IF_NOT_NULL(vkCreateBuffer); - VMA_COPY_IF_NOT_NULL(vkDestroyBuffer); - VMA_COPY_IF_NOT_NULL(vkCreateImage); - VMA_COPY_IF_NOT_NULL(vkDestroyImage); - VMA_COPY_IF_NOT_NULL(vkCmdCopyBuffer); -#if VMA_DEDICATED_ALLOCATION - VMA_COPY_IF_NOT_NULL(vkGetBufferMemoryRequirements2KHR); - VMA_COPY_IF_NOT_NULL(vkGetImageMemoryRequirements2KHR); + VMA_COPY_IF_NOT_NULL(vkGetPhysicalDeviceProperties); + VMA_COPY_IF_NOT_NULL(vkGetPhysicalDeviceMemoryProperties); + VMA_COPY_IF_NOT_NULL(vkAllocateMemory); + VMA_COPY_IF_NOT_NULL(vkFreeMemory); + VMA_COPY_IF_NOT_NULL(vkMapMemory); + VMA_COPY_IF_NOT_NULL(vkUnmapMemory); + VMA_COPY_IF_NOT_NULL(vkFlushMappedMemoryRanges); + VMA_COPY_IF_NOT_NULL(vkInvalidateMappedMemoryRanges); + VMA_COPY_IF_NOT_NULL(vkBindBufferMemory); + VMA_COPY_IF_NOT_NULL(vkBindImageMemory); + VMA_COPY_IF_NOT_NULL(vkGetBufferMemoryRequirements); + VMA_COPY_IF_NOT_NULL(vkGetImageMemoryRequirements); + VMA_COPY_IF_NOT_NULL(vkCreateBuffer); + VMA_COPY_IF_NOT_NULL(vkDestroyBuffer); + VMA_COPY_IF_NOT_NULL(vkCreateImage); + VMA_COPY_IF_NOT_NULL(vkDestroyImage); + VMA_COPY_IF_NOT_NULL(vkCmdCopyBuffer); + +#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + VMA_COPY_IF_NOT_NULL(vkGetBufferMemoryRequirements2KHR); + VMA_COPY_IF_NOT_NULL(vkGetImageMemoryRequirements2KHR); +#endif + +#if VMA_BIND_MEMORY2 || VMA_VULKAN_VERSION >= 1001000 + VMA_COPY_IF_NOT_NULL(vkBindBufferMemory2KHR); + VMA_COPY_IF_NOT_NULL(vkBindImageMemory2KHR); +#endif + +#if VMA_MEMORY_BUDGET + VMA_COPY_IF_NOT_NULL(vkGetPhysicalDeviceMemoryProperties2KHR); #endif - } #undef VMA_COPY_IF_NOT_NULL +} - // If these asserts are hit, you must either #define VMA_STATIC_VULKAN_FUNCTIONS 1 - // or pass valid pointers as VmaAllocatorCreateInfo::pVulkanFunctions. +#if VMA_DYNAMIC_VULKAN_FUNCTIONS == 1 + +void VmaAllocator_T::ImportVulkanFunctions_Dynamic() +{ +#define VMA_FETCH_INSTANCE_FUNC(memberName, functionPointerType, functionNameString) \ + if(m_VulkanFunctions.memberName == VMA_NULL) \ + m_VulkanFunctions.memberName = \ + (functionPointerType)vkGetInstanceProcAddr(m_hInstance, functionNameString); +#define VMA_FETCH_DEVICE_FUNC(memberName, functionPointerType, functionNameString) \ + if(m_VulkanFunctions.memberName == VMA_NULL) \ + m_VulkanFunctions.memberName = \ + (functionPointerType)vkGetDeviceProcAddr(m_hDevice, functionNameString); + + VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceProperties, PFN_vkGetPhysicalDeviceProperties, "vkGetPhysicalDeviceProperties"); + VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties, PFN_vkGetPhysicalDeviceMemoryProperties, "vkGetPhysicalDeviceMemoryProperties"); + VMA_FETCH_DEVICE_FUNC(vkAllocateMemory, PFN_vkAllocateMemory, "vkAllocateMemory"); + VMA_FETCH_DEVICE_FUNC(vkFreeMemory, PFN_vkFreeMemory, "vkFreeMemory"); + VMA_FETCH_DEVICE_FUNC(vkMapMemory, PFN_vkMapMemory, "vkMapMemory"); + VMA_FETCH_DEVICE_FUNC(vkUnmapMemory, PFN_vkUnmapMemory, "vkUnmapMemory"); + VMA_FETCH_DEVICE_FUNC(vkFlushMappedMemoryRanges, PFN_vkFlushMappedMemoryRanges, "vkFlushMappedMemoryRanges"); + VMA_FETCH_DEVICE_FUNC(vkInvalidateMappedMemoryRanges, PFN_vkInvalidateMappedMemoryRanges, "vkInvalidateMappedMemoryRanges"); + VMA_FETCH_DEVICE_FUNC(vkBindBufferMemory, PFN_vkBindBufferMemory, "vkBindBufferMemory"); + VMA_FETCH_DEVICE_FUNC(vkBindImageMemory, PFN_vkBindImageMemory, "vkBindImageMemory"); + VMA_FETCH_DEVICE_FUNC(vkGetBufferMemoryRequirements, PFN_vkGetBufferMemoryRequirements, "vkGetBufferMemoryRequirements"); + VMA_FETCH_DEVICE_FUNC(vkGetImageMemoryRequirements, PFN_vkGetImageMemoryRequirements, "vkGetImageMemoryRequirements"); + VMA_FETCH_DEVICE_FUNC(vkCreateBuffer, PFN_vkCreateBuffer, "vkCreateBuffer"); + VMA_FETCH_DEVICE_FUNC(vkDestroyBuffer, PFN_vkDestroyBuffer, "vkDestroyBuffer"); + VMA_FETCH_DEVICE_FUNC(vkCreateImage, PFN_vkCreateImage, "vkCreateImage"); + VMA_FETCH_DEVICE_FUNC(vkDestroyImage, PFN_vkDestroyImage, "vkDestroyImage"); + VMA_FETCH_DEVICE_FUNC(vkCmdCopyBuffer, PFN_vkCmdCopyBuffer, "vkCmdCopyBuffer"); + +#if VMA_VULKAN_VERSION >= 1001000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + VMA_FETCH_DEVICE_FUNC(vkGetBufferMemoryRequirements2KHR, PFN_vkGetBufferMemoryRequirements2, "vkGetBufferMemoryRequirements2"); + VMA_FETCH_DEVICE_FUNC(vkGetImageMemoryRequirements2KHR, PFN_vkGetImageMemoryRequirements2, "vkGetImageMemoryRequirements2"); + VMA_FETCH_DEVICE_FUNC(vkBindBufferMemory2KHR, PFN_vkBindBufferMemory2, "vkBindBufferMemory2"); + VMA_FETCH_DEVICE_FUNC(vkBindImageMemory2KHR, PFN_vkBindImageMemory2, "vkBindImageMemory2"); + VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2, "vkGetPhysicalDeviceMemoryProperties2"); + } +#endif + +#if VMA_DEDICATED_ALLOCATION + if(m_UseKhrDedicatedAllocation) + { + VMA_FETCH_DEVICE_FUNC(vkGetBufferMemoryRequirements2KHR, PFN_vkGetBufferMemoryRequirements2KHR, "vkGetBufferMemoryRequirements2KHR"); + VMA_FETCH_DEVICE_FUNC(vkGetImageMemoryRequirements2KHR, PFN_vkGetImageMemoryRequirements2KHR, "vkGetImageMemoryRequirements2KHR"); + } +#endif + +#if VMA_BIND_MEMORY2 + if(m_UseKhrBindMemory2) + { + VMA_FETCH_DEVICE_FUNC(vkBindBufferMemory2KHR, PFN_vkBindBufferMemory2KHR, "vkBindBufferMemory2KHR"); + VMA_FETCH_DEVICE_FUNC(vkBindImageMemory2KHR, PFN_vkBindImageMemory2KHR, "vkBindImageMemory2KHR"); + } +#endif // #if VMA_BIND_MEMORY2 + +#if VMA_MEMORY_BUDGET + if(m_UseExtMemoryBudget) + { + VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2KHR"); + } +#endif // #if VMA_MEMORY_BUDGET + +#undef VMA_FETCH_DEVICE_FUNC +#undef VMA_FETCH_INSTANCE_FUNC +} + +#endif // #if VMA_DYNAMIC_VULKAN_FUNCTIONS == 1 + +void VmaAllocator_T::ValidateVulkanFunctions() +{ VMA_ASSERT(m_VulkanFunctions.vkGetPhysicalDeviceProperties != VMA_NULL); VMA_ASSERT(m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties != VMA_NULL); VMA_ASSERT(m_VulkanFunctions.vkAllocateMemory != VMA_NULL); @@ -14360,13 +16120,29 @@ void VmaAllocator_T::ImportVulkanFunctions(const VmaVulkanFunctions* pVulkanFunc VMA_ASSERT(m_VulkanFunctions.vkCreateImage != VMA_NULL); VMA_ASSERT(m_VulkanFunctions.vkDestroyImage != VMA_NULL); VMA_ASSERT(m_VulkanFunctions.vkCmdCopyBuffer != VMA_NULL); -#if VMA_DEDICATED_ALLOCATION - if(m_UseKhrDedicatedAllocation) + +#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0) || m_UseKhrDedicatedAllocation) { VMA_ASSERT(m_VulkanFunctions.vkGetBufferMemoryRequirements2KHR != VMA_NULL); VMA_ASSERT(m_VulkanFunctions.vkGetImageMemoryRequirements2KHR != VMA_NULL); } #endif + +#if VMA_BIND_MEMORY2 || VMA_VULKAN_VERSION >= 1001000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0) || m_UseKhrBindMemory2) + { + VMA_ASSERT(m_VulkanFunctions.vkBindBufferMemory2KHR != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkBindImageMemory2KHR != VMA_NULL); + } +#endif + +#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000 + if(m_UseExtMemoryBudget || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + VMA_ASSERT(m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties2KHR != VMA_NULL); + } +#endif } VkDeviceSize VmaAllocator_T::CalcPreferredBlockSize(uint32_t memTypeIndex) @@ -14374,7 +16150,7 @@ VkDeviceSize VmaAllocator_T::CalcPreferredBlockSize(uint32_t memTypeIndex) const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memTypeIndex); const VkDeviceSize heapSize = m_MemProps.memoryHeaps[heapIndex].size; const bool isSmallHeap = heapSize <= VMA_SMALL_HEAP_MAX_SIZE; - return isSmallHeap ? (heapSize / 8) : m_PreferredLargeHeapBlockSize; + return VmaAlignUp(isSmallHeap ? (heapSize / 8) : m_PreferredLargeHeapBlockSize, (VkDeviceSize)32); } VkResult VmaAllocator_T::AllocateMemoryOfType( @@ -14382,6 +16158,7 @@ VkResult VmaAllocator_T::AllocateMemoryOfType( VkDeviceSize alignment, bool dedicatedAllocation, VkBuffer dedicatedBuffer, + VkBufferUsageFlags dedicatedBufferUsage, VkImage dedicatedImage, const VmaAllocationCreateInfo& createInfo, uint32_t memTypeIndex, @@ -14400,6 +16177,11 @@ VkResult VmaAllocator_T::AllocateMemoryOfType( { finalCreateInfo.flags &= ~VMA_ALLOCATION_CREATE_MAPPED_BIT; } + // If memory is lazily allocated, it should be always dedicated. + if(finalCreateInfo.usage == VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED) + { + finalCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; + } VmaBlockVector* const blockVector = m_pBlockVectors[memTypeIndex]; VMA_ASSERT(blockVector); @@ -14430,10 +16212,13 @@ VkResult VmaAllocator_T::AllocateMemoryOfType( size, suballocType, memTypeIndex, + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT) != 0, (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0, (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0, finalCreateInfo.pUserData, + finalCreateInfo.priority, dedicatedBuffer, + dedicatedBufferUsage, dedicatedImage, allocationCount, pAllocations); @@ -14465,10 +16250,13 @@ VkResult VmaAllocator_T::AllocateMemoryOfType( size, suballocType, memTypeIndex, + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT) != 0, (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0, (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0, finalCreateInfo.pUserData, + finalCreateInfo.priority, dedicatedBuffer, + dedicatedBufferUsage, dedicatedImage, allocationCount, pAllocations); @@ -14492,40 +16280,85 @@ VkResult VmaAllocator_T::AllocateDedicatedMemory( VkDeviceSize size, VmaSuballocationType suballocType, uint32_t memTypeIndex, + bool withinBudget, bool map, bool isUserDataString, void* pUserData, + float priority, VkBuffer dedicatedBuffer, + VkBufferUsageFlags dedicatedBufferUsage, VkImage dedicatedImage, size_t allocationCount, VmaAllocation* pAllocations) { VMA_ASSERT(allocationCount > 0 && pAllocations); + if(withinBudget) + { + const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memTypeIndex); + VmaBudget heapBudget = {}; + GetBudget(&heapBudget, heapIndex, 1); + if(heapBudget.usage + size * allocationCount > heapBudget.budget) + { + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + } + } + VkMemoryAllocateInfo allocInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO }; allocInfo.memoryTypeIndex = memTypeIndex; allocInfo.allocationSize = size; -#if VMA_DEDICATED_ALLOCATION +#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 VkMemoryDedicatedAllocateInfoKHR dedicatedAllocInfo = { VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR }; - if(m_UseKhrDedicatedAllocation) + if(m_UseKhrDedicatedAllocation || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) { if(dedicatedBuffer != VK_NULL_HANDLE) { VMA_ASSERT(dedicatedImage == VK_NULL_HANDLE); dedicatedAllocInfo.buffer = dedicatedBuffer; - allocInfo.pNext = &dedicatedAllocInfo; + VmaPnextChainPushFront(&allocInfo, &dedicatedAllocInfo); } else if(dedicatedImage != VK_NULL_HANDLE) { dedicatedAllocInfo.image = dedicatedImage; - allocInfo.pNext = &dedicatedAllocInfo; + VmaPnextChainPushFront(&allocInfo, &dedicatedAllocInfo); } } -#endif // #if VMA_DEDICATED_ALLOCATION +#endif // #if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + +#if VMA_BUFFER_DEVICE_ADDRESS + VkMemoryAllocateFlagsInfoKHR allocFlagsInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO_KHR }; + if(m_UseKhrBufferDeviceAddress) + { + bool canContainBufferWithDeviceAddress = true; + if(dedicatedBuffer != VK_NULL_HANDLE) + { + canContainBufferWithDeviceAddress = dedicatedBufferUsage == UINT32_MAX || // Usage flags unknown + (dedicatedBufferUsage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_EXT) != 0; + } + else if(dedicatedImage != VK_NULL_HANDLE) + { + canContainBufferWithDeviceAddress = false; + } + if(canContainBufferWithDeviceAddress) + { + allocFlagsInfo.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR; + VmaPnextChainPushFront(&allocInfo, &allocFlagsInfo); + } + } +#endif // #if VMA_BUFFER_DEVICE_ADDRESS + +#if VMA_MEMORY_PRIORITY + VkMemoryPriorityAllocateInfoEXT priorityInfo = { VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT }; + if(m_UseExtMemoryPriority) + { + priorityInfo.priority = priority; + VmaPnextChainPushFront(&allocInfo, &priorityInfo); + } +#endif // #if VMA_MEMORY_PRIORITY size_t allocIndex; - VkResult res; + VkResult res = VK_SUCCESS; for(allocIndex = 0; allocIndex < allocationCount; ++allocIndex) { res = AllocateDedicatedMemoryPage( @@ -14565,7 +16398,7 @@ VkResult VmaAllocator_T::AllocateDedicatedMemory( { VmaAllocation currAlloc = pAllocations[allocIndex]; VkDeviceMemory hMemory = currAlloc->GetMemory(); - + /* There is no need to call this, because Vulkan spec allows to skip vkUnmapMemory before vkFreeMemory. @@ -14575,11 +16408,10 @@ VkResult VmaAllocator_T::AllocateDedicatedMemory( (*m_VulkanFunctions.vkUnmapMemory)(m_hDevice, hMemory); } */ - - FreeVulkanMemory(memTypeIndex, currAlloc->GetSize(), hMemory); + FreeVulkanMemory(memTypeIndex, currAlloc->GetSize(), hMemory); + m_Budget.RemoveAllocation(MemoryTypeIndexToHeapIndex(memTypeIndex), currAlloc->GetSize()); currAlloc->SetUserData(this, VMA_NULL); - currAlloc->Dtor(); m_AllocationObjectAllocator.Free(currAlloc); } @@ -14625,10 +16457,10 @@ VkResult VmaAllocator_T::AllocateDedicatedMemoryPage( } } - *pAllocation = m_AllocationObjectAllocator.Allocate(); - (*pAllocation)->Ctor(m_CurrentFrameIndex.load(), isUserDataString); + *pAllocation = m_AllocationObjectAllocator.Allocate(m_CurrentFrameIndex.load(), isUserDataString); (*pAllocation)->InitDedicatedAllocation(memTypeIndex, hMemory, suballocType, pMappedData, size); (*pAllocation)->SetUserData(this, pUserData); + m_Budget.AddAllocation(MemoryTypeIndexToHeapIndex(memTypeIndex), size); if(VMA_DEBUG_INITIALIZE_ALLOCATIONS) { FillAllocation(*pAllocation, VMA_ALLOCATION_FILL_PATTERN_CREATED); @@ -14643,8 +16475,8 @@ void VmaAllocator_T::GetBufferMemoryRequirements( bool& requiresDedicatedAllocation, bool& prefersDedicatedAllocation) const { -#if VMA_DEDICATED_ALLOCATION - if(m_UseKhrDedicatedAllocation) +#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + if(m_UseKhrDedicatedAllocation || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) { VkBufferMemoryRequirementsInfo2KHR memReqInfo = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2_KHR }; memReqInfo.buffer = hBuffer; @@ -14652,7 +16484,7 @@ void VmaAllocator_T::GetBufferMemoryRequirements( VkMemoryDedicatedRequirementsKHR memDedicatedReq = { VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR }; VkMemoryRequirements2KHR memReq2 = { VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2_KHR }; - memReq2.pNext = &memDedicatedReq; + VmaPnextChainPushFront(&memReq2, &memDedicatedReq); (*m_VulkanFunctions.vkGetBufferMemoryRequirements2KHR)(m_hDevice, &memReqInfo, &memReq2); @@ -14661,7 +16493,7 @@ void VmaAllocator_T::GetBufferMemoryRequirements( prefersDedicatedAllocation = (memDedicatedReq.prefersDedicatedAllocation != VK_FALSE); } else -#endif // #if VMA_DEDICATED_ALLOCATION +#endif // #if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 { (*m_VulkanFunctions.vkGetBufferMemoryRequirements)(m_hDevice, hBuffer, &memReq); requiresDedicatedAllocation = false; @@ -14675,8 +16507,8 @@ void VmaAllocator_T::GetImageMemoryRequirements( bool& requiresDedicatedAllocation, bool& prefersDedicatedAllocation) const { -#if VMA_DEDICATED_ALLOCATION - if(m_UseKhrDedicatedAllocation) +#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + if(m_UseKhrDedicatedAllocation || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) { VkImageMemoryRequirementsInfo2KHR memReqInfo = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2_KHR }; memReqInfo.image = hImage; @@ -14684,7 +16516,7 @@ void VmaAllocator_T::GetImageMemoryRequirements( VkMemoryDedicatedRequirementsKHR memDedicatedReq = { VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR }; VkMemoryRequirements2KHR memReq2 = { VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2_KHR }; - memReq2.pNext = &memDedicatedReq; + VmaPnextChainPushFront(&memReq2, &memDedicatedReq); (*m_VulkanFunctions.vkGetImageMemoryRequirements2KHR)(m_hDevice, &memReqInfo, &memReq2); @@ -14693,7 +16525,7 @@ void VmaAllocator_T::GetImageMemoryRequirements( prefersDedicatedAllocation = (memDedicatedReq.prefersDedicatedAllocation != VK_FALSE); } else -#endif // #if VMA_DEDICATED_ALLOCATION +#endif // #if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 { (*m_VulkanFunctions.vkGetImageMemoryRequirements)(m_hDevice, hImage, &memReq); requiresDedicatedAllocation = false; @@ -14706,6 +16538,7 @@ VkResult VmaAllocator_T::AllocateMemory( bool requiresDedicatedAllocation, bool prefersDedicatedAllocation, VkBuffer dedicatedBuffer, + VkBufferUsageFlags dedicatedBufferUsage, VkImage dedicatedImage, const VmaAllocationCreateInfo& createInfo, VmaSuballocationType suballocType, @@ -14757,11 +16590,20 @@ VkResult VmaAllocator_T::AllocateMemory( const VkDeviceSize alignmentForPool = VMA_MAX( vkMemReq.alignment, GetMemoryTypeMinAlignment(createInfo.pool->m_BlockVector.GetMemoryTypeIndex())); + + VmaAllocationCreateInfo createInfoForPool = createInfo; + // If memory type is not HOST_VISIBLE, disable MAPPED. + if((createInfoForPool.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0 && + (m_MemProps.memoryTypes[createInfo.pool->m_BlockVector.GetMemoryTypeIndex()].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0) + { + createInfoForPool.flags &= ~VMA_ALLOCATION_CREATE_MAPPED_BIT; + } + return createInfo.pool->m_BlockVector.Allocate( m_CurrentFrameIndex.load(), vkMemReq.size, alignmentForPool, - createInfo, + createInfoForPool, suballocType, allocationCount, pAllocations); @@ -14783,6 +16625,7 @@ VkResult VmaAllocator_T::AllocateMemory( alignmentForMemType, requiresDedicatedAllocation || prefersDedicatedAllocation, dedicatedBuffer, + dedicatedBufferUsage, dedicatedImage, createInfo, memTypeIndex, @@ -14808,12 +16651,13 @@ VkResult VmaAllocator_T::AllocateMemory( alignmentForMemType = VMA_MAX( vkMemReq.alignment, GetMemoryTypeMinAlignment(memTypeIndex)); - + res = AllocateMemoryOfType( vkMemReq.size, alignmentForMemType, requiresDedicatedAllocation || prefersDedicatedAllocation, dedicatedBuffer, + dedicatedBufferUsage, dedicatedImage, createInfo, memTypeIndex, @@ -14887,8 +16731,9 @@ void VmaAllocator_T::FreeMemory( } } + // Do this regardless of whether the allocation is lost. Lost allocations still account to Budget.AllocationBytes. + m_Budget.RemoveAllocation(MemoryTypeIndexToHeapIndex(allocation->GetMemoryTypeIndex()), allocation->GetSize()); allocation->SetUserData(this, VMA_NULL); - allocation->Dtor(); m_AllocationObjectAllocator.Free(allocation); } } @@ -14898,6 +16743,7 @@ VkResult VmaAllocator_T::ResizeAllocation( const VmaAllocation alloc, VkDeviceSize newSize) { + // This function is deprecated and so it does nothing. It's left for backward compatibility. if(newSize == 0 || alloc->GetLastUseFrameIndex() == VMA_FRAME_INDEX_LOST) { return VK_ERROR_VALIDATION_FAILED_EXT; @@ -14906,26 +16752,7 @@ VkResult VmaAllocator_T::ResizeAllocation( { return VK_SUCCESS; } - - switch(alloc->GetType()) - { - case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: - return VK_ERROR_FEATURE_NOT_PRESENT; - case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: - if(alloc->GetBlock()->m_pMetadata->ResizeAllocation(alloc, newSize)) - { - alloc->ChangeSize(newSize); - VMA_HEAVY_ASSERT(alloc->GetBlock()->m_pMetadata->Validate()); - return VK_SUCCESS; - } - else - { - return VK_ERROR_OUT_OF_POOL_MEMORY; - } - default: - VMA_ASSERT(0); - return VK_ERROR_VALIDATION_FAILED_EXT; - } + return VK_ERROR_OUT_OF_POOL_MEMORY; } void VmaAllocator_T::CalculateStats(VmaStats* pStats) @@ -14936,7 +16763,7 @@ void VmaAllocator_T::CalculateStats(VmaStats* pStats) InitStatInfo(pStats->memoryType[i]); for(size_t i = 0; i < VK_MAX_MEMORY_HEAPS; ++i) InitStatInfo(pStats->memoryHeap[i]); - + // Process default pools. for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) { @@ -14979,6 +16806,58 @@ void VmaAllocator_T::CalculateStats(VmaStats* pStats) VmaPostprocessCalcStatInfo(pStats->memoryHeap[i]); } +void VmaAllocator_T::GetBudget(VmaBudget* outBudget, uint32_t firstHeap, uint32_t heapCount) +{ +#if VMA_MEMORY_BUDGET + if(m_UseExtMemoryBudget) + { + if(m_Budget.m_OperationsSinceBudgetFetch < 30) + { + VmaMutexLockRead lockRead(m_Budget.m_BudgetMutex, m_UseMutex); + for(uint32_t i = 0; i < heapCount; ++i, ++outBudget) + { + const uint32_t heapIndex = firstHeap + i; + + outBudget->blockBytes = m_Budget.m_BlockBytes[heapIndex]; + outBudget->allocationBytes = m_Budget.m_AllocationBytes[heapIndex]; + + if(m_Budget.m_VulkanUsage[heapIndex] + outBudget->blockBytes > m_Budget.m_BlockBytesAtBudgetFetch[heapIndex]) + { + outBudget->usage = m_Budget.m_VulkanUsage[heapIndex] + + outBudget->blockBytes - m_Budget.m_BlockBytesAtBudgetFetch[heapIndex]; + } + else + { + outBudget->usage = 0; + } + + // Have to take MIN with heap size because explicit HeapSizeLimit is included in it. + outBudget->budget = VMA_MIN( + m_Budget.m_VulkanBudget[heapIndex], m_MemProps.memoryHeaps[heapIndex].size); + } + } + else + { + UpdateVulkanBudget(); // Outside of mutex lock + GetBudget(outBudget, firstHeap, heapCount); // Recursion + } + } + else +#endif + { + for(uint32_t i = 0; i < heapCount; ++i, ++outBudget) + { + const uint32_t heapIndex = firstHeap + i; + + outBudget->blockBytes = m_Budget.m_BlockBytes[heapIndex]; + outBudget->allocationBytes = m_Budget.m_AllocationBytes[heapIndex]; + + outBudget->usage = outBudget->blockBytes; + outBudget->budget = m_MemProps.memoryHeaps[heapIndex].size * 8 / 10; // 80% heuristics. + } + } +} + static const uint32_t VMA_VENDOR_ID_AMD = 4098; VkResult VmaAllocator_T::DefragmentationBegin( @@ -15001,7 +16880,7 @@ VkResult VmaAllocator_T::DefragmentationBegin( VkResult res = (*pContext)->Defragment( info.maxCpuBytesToMove, info.maxCpuAllocationsToMove, info.maxGpuBytesToMove, info.maxGpuAllocationsToMove, - info.commandBuffer, pStats); + info.commandBuffer, pStats, info.flags); if(res != VK_NOT_READY) { @@ -15019,6 +16898,19 @@ VkResult VmaAllocator_T::DefragmentationEnd( return VK_SUCCESS; } +VkResult VmaAllocator_T::DefragmentationPassBegin( + VmaDefragmentationPassInfo* pInfo, + VmaDefragmentationContext context) +{ + return context->DefragmentPassBegin(pInfo); +} +VkResult VmaAllocator_T::DefragmentationPassEnd( + VmaDefragmentationContext context) +{ + return context->DefragmentPassEnd(); + +} + void VmaAllocator_T::GetAllocationInfo(VmaAllocation hAllocation, VmaAllocationInfo* pAllocationInfo) { if(hAllocation->CanBecomeLost()) @@ -15157,6 +17049,12 @@ VkResult VmaAllocator_T::CreatePool(const VmaPoolCreateInfo* pCreateInfo, VmaPoo { return VK_ERROR_INITIALIZATION_FAILED; } + // Memory type index out of range or forbidden. + if(pCreateInfo->memoryTypeIndex >= GetMemoryTypeCount() || + ((1u << pCreateInfo->memoryTypeIndex) & m_GlobalMemoryTypeBits) == 0) + { + return VK_ERROR_FEATURE_NOT_PRESENT; + } const VkDeviceSize preferredBlockSize = CalcPreferredBlockSize(newCreateInfo.memoryTypeIndex); @@ -15200,6 +17098,13 @@ void VmaAllocator_T::GetPoolStats(VmaPool pool, VmaPoolStats* pPoolStats) void VmaAllocator_T::SetCurrentFrameIndex(uint32_t frameIndex) { m_CurrentFrameIndex.store(frameIndex); + +#if VMA_MEMORY_BUDGET + if(m_UseExtMemoryBudget) + { + UpdateVulkanBudget(); + } +#endif // #if VMA_MEMORY_BUDGET } void VmaAllocator_T::MakePoolAllocationsLost( @@ -15268,8 +17173,7 @@ VkResult VmaAllocator_T::CheckCorruption(uint32_t memoryTypeBits) void VmaAllocator_T::CreateLostAllocation(VmaAllocation* pAllocation) { - *pAllocation = m_AllocationObjectAllocator.Allocate(); - (*pAllocation)->Ctor(VMA_FRAME_INDEX_LOST, false); + *pAllocation = m_AllocationObjectAllocator.Allocate(VMA_FRAME_INDEX_LOST, false); (*pAllocation)->InitLost(); } @@ -15277,31 +17181,47 @@ VkResult VmaAllocator_T::AllocateVulkanMemory(const VkMemoryAllocateInfo* pAlloc { const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(pAllocateInfo->memoryTypeIndex); - VkResult res; - if(m_HeapSizeLimit[heapIndex] != VK_WHOLE_SIZE) + // HeapSizeLimit is in effect for this heap. + if((m_HeapSizeLimitMask & (1u << heapIndex)) != 0) { - VmaMutexLock lock(m_HeapSizeLimitMutex, m_UseMutex); - if(m_HeapSizeLimit[heapIndex] >= pAllocateInfo->allocationSize) + const VkDeviceSize heapSize = m_MemProps.memoryHeaps[heapIndex].size; + VkDeviceSize blockBytes = m_Budget.m_BlockBytes[heapIndex]; + for(;;) { - res = (*m_VulkanFunctions.vkAllocateMemory)(m_hDevice, pAllocateInfo, GetAllocationCallbacks(), pMemory); - if(res == VK_SUCCESS) + const VkDeviceSize blockBytesAfterAllocation = blockBytes + pAllocateInfo->allocationSize; + if(blockBytesAfterAllocation > heapSize) { - m_HeapSizeLimit[heapIndex] -= pAllocateInfo->allocationSize; + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + } + if(m_Budget.m_BlockBytes[heapIndex].compare_exchange_strong(blockBytes, blockBytesAfterAllocation)) + { + break; } - } - else - { - res = VK_ERROR_OUT_OF_DEVICE_MEMORY; } } else { - res = (*m_VulkanFunctions.vkAllocateMemory)(m_hDevice, pAllocateInfo, GetAllocationCallbacks(), pMemory); + m_Budget.m_BlockBytes[heapIndex] += pAllocateInfo->allocationSize; } - if(res == VK_SUCCESS && m_DeviceMemoryCallbacks.pfnAllocate != VMA_NULL) + // VULKAN CALL vkAllocateMemory. + VkResult res = (*m_VulkanFunctions.vkAllocateMemory)(m_hDevice, pAllocateInfo, GetAllocationCallbacks(), pMemory); + + if(res == VK_SUCCESS) { - (*m_DeviceMemoryCallbacks.pfnAllocate)(this, pAllocateInfo->memoryTypeIndex, *pMemory, pAllocateInfo->allocationSize); +#if VMA_MEMORY_BUDGET + ++m_Budget.m_OperationsSinceBudgetFetch; +#endif + + // Informative callback. + if(m_DeviceMemoryCallbacks.pfnAllocate != VMA_NULL) + { + (*m_DeviceMemoryCallbacks.pfnAllocate)(this, pAllocateInfo->memoryTypeIndex, *pMemory, pAllocateInfo->allocationSize, m_DeviceMemoryCallbacks.pUserData); + } + } + else + { + m_Budget.m_BlockBytes[heapIndex] -= pAllocateInfo->allocationSize; } return res; @@ -15309,18 +17229,77 @@ VkResult VmaAllocator_T::AllocateVulkanMemory(const VkMemoryAllocateInfo* pAlloc void VmaAllocator_T::FreeVulkanMemory(uint32_t memoryType, VkDeviceSize size, VkDeviceMemory hMemory) { + // Informative callback. if(m_DeviceMemoryCallbacks.pfnFree != VMA_NULL) { - (*m_DeviceMemoryCallbacks.pfnFree)(this, memoryType, hMemory, size); + (*m_DeviceMemoryCallbacks.pfnFree)(this, memoryType, hMemory, size, m_DeviceMemoryCallbacks.pUserData); } + // VULKAN CALL vkFreeMemory. (*m_VulkanFunctions.vkFreeMemory)(m_hDevice, hMemory, GetAllocationCallbacks()); - const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memoryType); - if(m_HeapSizeLimit[heapIndex] != VK_WHOLE_SIZE) + m_Budget.m_BlockBytes[MemoryTypeIndexToHeapIndex(memoryType)] -= size; +} + +VkResult VmaAllocator_T::BindVulkanBuffer( + VkDeviceMemory memory, + VkDeviceSize memoryOffset, + VkBuffer buffer, + const void* pNext) +{ + if(pNext != VMA_NULL) { - VmaMutexLock lock(m_HeapSizeLimitMutex, m_UseMutex); - m_HeapSizeLimit[heapIndex] += size; +#if VMA_VULKAN_VERSION >= 1001000 || VMA_BIND_MEMORY2 + if((m_UseKhrBindMemory2 || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) && + m_VulkanFunctions.vkBindBufferMemory2KHR != VMA_NULL) + { + VkBindBufferMemoryInfoKHR bindBufferMemoryInfo = { VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR }; + bindBufferMemoryInfo.pNext = pNext; + bindBufferMemoryInfo.buffer = buffer; + bindBufferMemoryInfo.memory = memory; + bindBufferMemoryInfo.memoryOffset = memoryOffset; + return (*m_VulkanFunctions.vkBindBufferMemory2KHR)(m_hDevice, 1, &bindBufferMemoryInfo); + } + else +#endif // #if VMA_VULKAN_VERSION >= 1001000 || VMA_BIND_MEMORY2 + { + return VK_ERROR_EXTENSION_NOT_PRESENT; + } + } + else + { + return (*m_VulkanFunctions.vkBindBufferMemory)(m_hDevice, buffer, memory, memoryOffset); + } +} + +VkResult VmaAllocator_T::BindVulkanImage( + VkDeviceMemory memory, + VkDeviceSize memoryOffset, + VkImage image, + const void* pNext) +{ + if(pNext != VMA_NULL) + { +#if VMA_VULKAN_VERSION >= 1001000 || VMA_BIND_MEMORY2 + if((m_UseKhrBindMemory2 || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) && + m_VulkanFunctions.vkBindImageMemory2KHR != VMA_NULL) + { + VkBindImageMemoryInfoKHR bindBufferMemoryInfo = { VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO_KHR }; + bindBufferMemoryInfo.pNext = pNext; + bindBufferMemoryInfo.image = image; + bindBufferMemoryInfo.memory = memory; + bindBufferMemoryInfo.memoryOffset = memoryOffset; + return (*m_VulkanFunctions.vkBindImageMemory2KHR)(m_hDevice, 1, &bindBufferMemoryInfo); + } + else +#endif // #if VMA_BIND_MEMORY2 + { + return VK_ERROR_EXTENSION_NOT_PRESENT; + } + } + else + { + return (*m_VulkanFunctions.vkBindImageMemory)(m_hDevice, image, memory, memoryOffset); } } @@ -15372,23 +17351,23 @@ void VmaAllocator_T::Unmap(VmaAllocation hAllocation) } } -VkResult VmaAllocator_T::BindBufferMemory(VmaAllocation hAllocation, VkBuffer hBuffer) +VkResult VmaAllocator_T::BindBufferMemory( + VmaAllocation hAllocation, + VkDeviceSize allocationLocalOffset, + VkBuffer hBuffer, + const void* pNext) { VkResult res = VK_SUCCESS; switch(hAllocation->GetType()) { case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: - res = GetVulkanFunctions().vkBindBufferMemory( - m_hDevice, - hBuffer, - hAllocation->GetMemory(), - 0); //memoryOffset + res = BindVulkanBuffer(hAllocation->GetMemory(), allocationLocalOffset, hBuffer, pNext); break; case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: { - VmaDeviceMemoryBlock* pBlock = hAllocation->GetBlock(); + VmaDeviceMemoryBlock* const pBlock = hAllocation->GetBlock(); VMA_ASSERT(pBlock && "Binding buffer to allocation that doesn't belong to any block. Is the allocation lost?"); - res = pBlock->BindBufferMemory(this, hAllocation, hBuffer); + res = pBlock->BindBufferMemory(this, hAllocation, allocationLocalOffset, hBuffer, pNext); break; } default: @@ -15397,23 +17376,23 @@ VkResult VmaAllocator_T::BindBufferMemory(VmaAllocation hAllocation, VkBuffer hB return res; } -VkResult VmaAllocator_T::BindImageMemory(VmaAllocation hAllocation, VkImage hImage) +VkResult VmaAllocator_T::BindImageMemory( + VmaAllocation hAllocation, + VkDeviceSize allocationLocalOffset, + VkImage hImage, + const void* pNext) { VkResult res = VK_SUCCESS; switch(hAllocation->GetType()) { case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: - res = GetVulkanFunctions().vkBindImageMemory( - m_hDevice, - hImage, - hAllocation->GetMemory(), - 0); //memoryOffset + res = BindVulkanImage(hAllocation->GetMemory(), allocationLocalOffset, hImage, pNext); break; case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: { VmaDeviceMemoryBlock* pBlock = hAllocation->GetBlock(); VMA_ASSERT(pBlock && "Binding image to allocation that doesn't belong to any block. Is the allocation lost?"); - res = pBlock->BindImageMemory(this, hAllocation, hImage); + res = pBlock->BindImageMemory(this, hAllocation, allocationLocalOffset, hImage, pNext); break; } default: @@ -15422,83 +17401,74 @@ VkResult VmaAllocator_T::BindImageMemory(VmaAllocation hAllocation, VkImage hIma return res; } -void VmaAllocator_T::FlushOrInvalidateAllocation( +VkResult VmaAllocator_T::FlushOrInvalidateAllocation( VmaAllocation hAllocation, VkDeviceSize offset, VkDeviceSize size, VMA_CACHE_OPERATION op) { - const uint32_t memTypeIndex = hAllocation->GetMemoryTypeIndex(); - if(size > 0 && IsMemoryTypeNonCoherent(memTypeIndex)) + VkResult res = VK_SUCCESS; + + VkMappedMemoryRange memRange = {}; + if(GetFlushOrInvalidateRange(hAllocation, offset, size, memRange)) { - const VkDeviceSize allocationSize = hAllocation->GetSize(); - VMA_ASSERT(offset <= allocationSize); - - const VkDeviceSize nonCoherentAtomSize = m_PhysicalDeviceProperties.limits.nonCoherentAtomSize; - - VkMappedMemoryRange memRange = { VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE }; - memRange.memory = hAllocation->GetMemory(); - - switch(hAllocation->GetType()) - { - case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: - memRange.offset = VmaAlignDown(offset, nonCoherentAtomSize); - if(size == VK_WHOLE_SIZE) - { - memRange.size = allocationSize - memRange.offset; - } - else - { - VMA_ASSERT(offset + size <= allocationSize); - memRange.size = VMA_MIN( - VmaAlignUp(size + (offset - memRange.offset), nonCoherentAtomSize), - allocationSize - memRange.offset); - } - break; - - case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: - { - // 1. Still within this allocation. - memRange.offset = VmaAlignDown(offset, nonCoherentAtomSize); - if(size == VK_WHOLE_SIZE) - { - size = allocationSize - offset; - } - else - { - VMA_ASSERT(offset + size <= allocationSize); - } - memRange.size = VmaAlignUp(size + (offset - memRange.offset), nonCoherentAtomSize); - - // 2. Adjust to whole block. - const VkDeviceSize allocationOffset = hAllocation->GetOffset(); - VMA_ASSERT(allocationOffset % nonCoherentAtomSize == 0); - const VkDeviceSize blockSize = hAllocation->GetBlock()->m_pMetadata->GetSize(); - memRange.offset += allocationOffset; - memRange.size = VMA_MIN(memRange.size, blockSize - memRange.offset); - - break; - } - - default: - VMA_ASSERT(0); - } - switch(op) { case VMA_CACHE_FLUSH: - (*GetVulkanFunctions().vkFlushMappedMemoryRanges)(m_hDevice, 1, &memRange); + res = (*GetVulkanFunctions().vkFlushMappedMemoryRanges)(m_hDevice, 1, &memRange); break; case VMA_CACHE_INVALIDATE: - (*GetVulkanFunctions().vkInvalidateMappedMemoryRanges)(m_hDevice, 1, &memRange); + res = (*GetVulkanFunctions().vkInvalidateMappedMemoryRanges)(m_hDevice, 1, &memRange); break; default: VMA_ASSERT(0); } } // else: Just ignore this call. + return res; } -void VmaAllocator_T::FreeDedicatedMemory(VmaAllocation allocation) +VkResult VmaAllocator_T::FlushOrInvalidateAllocations( + uint32_t allocationCount, + const VmaAllocation* allocations, + const VkDeviceSize* offsets, const VkDeviceSize* sizes, + VMA_CACHE_OPERATION op) +{ + typedef VmaStlAllocator RangeAllocator; + typedef VmaSmallVector RangeVector; + RangeVector ranges = RangeVector(RangeAllocator(GetAllocationCallbacks())); + + for(uint32_t allocIndex = 0; allocIndex < allocationCount; ++allocIndex) + { + const VmaAllocation alloc = allocations[allocIndex]; + const VkDeviceSize offset = offsets != VMA_NULL ? offsets[allocIndex] : 0; + const VkDeviceSize size = sizes != VMA_NULL ? sizes[allocIndex] : VK_WHOLE_SIZE; + VkMappedMemoryRange newRange; + if(GetFlushOrInvalidateRange(alloc, offset, size, newRange)) + { + ranges.push_back(newRange); + } + } + + VkResult res = VK_SUCCESS; + if(!ranges.empty()) + { + switch(op) + { + case VMA_CACHE_FLUSH: + res = (*GetVulkanFunctions().vkFlushMappedMemoryRanges)(m_hDevice, (uint32_t)ranges.size(), ranges.data()); + break; + case VMA_CACHE_INVALIDATE: + res = (*GetVulkanFunctions().vkInvalidateMappedMemoryRanges)(m_hDevice, (uint32_t)ranges.size(), ranges.data()); + break; + default: + VMA_ASSERT(0); + } + } + // else: Just ignore this call. + return res; +} + +void VmaAllocator_T::FreeDedicatedMemory(const VmaAllocation allocation) { VMA_ASSERT(allocation && allocation->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED); @@ -15512,7 +17482,7 @@ void VmaAllocator_T::FreeDedicatedMemory(VmaAllocation allocation) } VkDeviceMemory hMemory = allocation->GetMemory(); - + /* There is no need to call this, because Vulkan spec allows to skip vkUnmapMemory before vkFreeMemory. @@ -15522,12 +17492,164 @@ void VmaAllocator_T::FreeDedicatedMemory(VmaAllocation allocation) (*m_VulkanFunctions.vkUnmapMemory)(m_hDevice, hMemory); } */ - + FreeVulkanMemory(memTypeIndex, allocation->GetSize(), hMemory); VMA_DEBUG_LOG(" Freed DedicatedMemory MemoryTypeIndex=%u", memTypeIndex); } +uint32_t VmaAllocator_T::CalculateGpuDefragmentationMemoryTypeBits() const +{ + VkBufferCreateInfo dummyBufCreateInfo; + VmaFillGpuDefragmentationBufferCreateInfo(dummyBufCreateInfo); + + uint32_t memoryTypeBits = 0; + + // Create buffer. + VkBuffer buf = VK_NULL_HANDLE; + VkResult res = (*GetVulkanFunctions().vkCreateBuffer)( + m_hDevice, &dummyBufCreateInfo, GetAllocationCallbacks(), &buf); + if(res == VK_SUCCESS) + { + // Query for supported memory types. + VkMemoryRequirements memReq; + (*GetVulkanFunctions().vkGetBufferMemoryRequirements)(m_hDevice, buf, &memReq); + memoryTypeBits = memReq.memoryTypeBits; + + // Destroy buffer. + (*GetVulkanFunctions().vkDestroyBuffer)(m_hDevice, buf, GetAllocationCallbacks()); + } + + return memoryTypeBits; +} + +uint32_t VmaAllocator_T::CalculateGlobalMemoryTypeBits() const +{ + // Make sure memory information is already fetched. + VMA_ASSERT(GetMemoryTypeCount() > 0); + + uint32_t memoryTypeBits = UINT32_MAX; + + if(!m_UseAmdDeviceCoherentMemory) + { + // Exclude memory types that have VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD. + for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + { + if((m_MemProps.memoryTypes[memTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY) != 0) + { + memoryTypeBits &= ~(1u << memTypeIndex); + } + } + } + + return memoryTypeBits; +} + +bool VmaAllocator_T::GetFlushOrInvalidateRange( + VmaAllocation allocation, + VkDeviceSize offset, VkDeviceSize size, + VkMappedMemoryRange& outRange) const +{ + const uint32_t memTypeIndex = allocation->GetMemoryTypeIndex(); + if(size > 0 && IsMemoryTypeNonCoherent(memTypeIndex)) + { + const VkDeviceSize nonCoherentAtomSize = m_PhysicalDeviceProperties.limits.nonCoherentAtomSize; + const VkDeviceSize allocationSize = allocation->GetSize(); + VMA_ASSERT(offset <= allocationSize); + + outRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + outRange.pNext = VMA_NULL; + outRange.memory = allocation->GetMemory(); + + switch(allocation->GetType()) + { + case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: + outRange.offset = VmaAlignDown(offset, nonCoherentAtomSize); + if(size == VK_WHOLE_SIZE) + { + outRange.size = allocationSize - outRange.offset; + } + else + { + VMA_ASSERT(offset + size <= allocationSize); + outRange.size = VMA_MIN( + VmaAlignUp(size + (offset - outRange.offset), nonCoherentAtomSize), + allocationSize - outRange.offset); + } + break; + case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: + { + // 1. Still within this allocation. + outRange.offset = VmaAlignDown(offset, nonCoherentAtomSize); + if(size == VK_WHOLE_SIZE) + { + size = allocationSize - offset; + } + else + { + VMA_ASSERT(offset + size <= allocationSize); + } + outRange.size = VmaAlignUp(size + (offset - outRange.offset), nonCoherentAtomSize); + + // 2. Adjust to whole block. + const VkDeviceSize allocationOffset = allocation->GetOffset(); + VMA_ASSERT(allocationOffset % nonCoherentAtomSize == 0); + const VkDeviceSize blockSize = allocation->GetBlock()->m_pMetadata->GetSize(); + outRange.offset += allocationOffset; + outRange.size = VMA_MIN(outRange.size, blockSize - outRange.offset); + + break; + } + default: + VMA_ASSERT(0); + } + return true; + } + return false; +} + +#if VMA_MEMORY_BUDGET + +void VmaAllocator_T::UpdateVulkanBudget() +{ + VMA_ASSERT(m_UseExtMemoryBudget); + + VkPhysicalDeviceMemoryProperties2KHR memProps = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2_KHR }; + + VkPhysicalDeviceMemoryBudgetPropertiesEXT budgetProps = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT }; + VmaPnextChainPushFront(&memProps, &budgetProps); + + GetVulkanFunctions().vkGetPhysicalDeviceMemoryProperties2KHR(m_PhysicalDevice, &memProps); + + { + VmaMutexLockWrite lockWrite(m_Budget.m_BudgetMutex, m_UseMutex); + + for(uint32_t heapIndex = 0; heapIndex < GetMemoryHeapCount(); ++heapIndex) + { + m_Budget.m_VulkanUsage[heapIndex] = budgetProps.heapUsage[heapIndex]; + m_Budget.m_VulkanBudget[heapIndex] = budgetProps.heapBudget[heapIndex]; + m_Budget.m_BlockBytesAtBudgetFetch[heapIndex] = m_Budget.m_BlockBytes[heapIndex].load(); + + // Some bugged drivers return the budget incorrectly, e.g. 0 or much bigger than heap size. + if(m_Budget.m_VulkanBudget[heapIndex] == 0) + { + m_Budget.m_VulkanBudget[heapIndex] = m_MemProps.memoryHeaps[heapIndex].size * 8 / 10; // 80% heuristics. + } + else if(m_Budget.m_VulkanBudget[heapIndex] > m_MemProps.memoryHeaps[heapIndex].size) + { + m_Budget.m_VulkanBudget[heapIndex] = m_MemProps.memoryHeaps[heapIndex].size; + } + if(m_Budget.m_VulkanUsage[heapIndex] == 0 && m_Budget.m_BlockBytesAtBudgetFetch[heapIndex] > 0) + { + m_Budget.m_VulkanUsage[heapIndex] = m_Budget.m_BlockBytesAtBudgetFetch[heapIndex]; + } + } + m_Budget.m_OperationsSinceBudgetFetch = 0; + } +} + +#endif // #if VMA_MEMORY_BUDGET + void VmaAllocator_T::FillAllocation(const VmaAllocation hAllocation, uint8_t pattern) { if(VMA_DEBUG_INITIALIZE_ALLOCATIONS && @@ -15549,6 +17671,17 @@ void VmaAllocator_T::FillAllocation(const VmaAllocation hAllocation, uint8_t pat } } +uint32_t VmaAllocator_T::GetGpuDefragmentationMemoryTypeBits() +{ + uint32_t memoryTypeBits = m_GpuDefragmentationMemoryTypeBits.load(); + if(memoryTypeBits == UINT32_MAX) + { + memoryTypeBits = CalculateGpuDefragmentationMemoryTypeBits(); + m_GpuDefragmentationMemoryTypeBits.store(memoryTypeBits); + } + return memoryTypeBits; +} + #if VMA_STATS_STRING_ENABLED void VmaAllocator_T::PrintDetailedMap(VmaJsonWriter& json) @@ -15571,7 +17704,7 @@ void VmaAllocator_T::PrintDetailedMap(VmaJsonWriter& json) json.BeginString("Type "); json.ContinueString(memTypeIndex); json.EndString(); - + json.BeginArray(); for(size_t i = 0; i < pDedicatedAllocVector->size(); ++i) @@ -15642,17 +17775,19 @@ void VmaAllocator_T::PrintDetailedMap(VmaJsonWriter& json) //////////////////////////////////////////////////////////////////////////////// // Public interface -VkResult vmaCreateAllocator( +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAllocator( const VmaAllocatorCreateInfo* pCreateInfo, VmaAllocator* pAllocator) { VMA_ASSERT(pCreateInfo && pAllocator); + VMA_ASSERT(pCreateInfo->vulkanApiVersion == 0 || + (VK_VERSION_MAJOR(pCreateInfo->vulkanApiVersion) == 1 && VK_VERSION_MINOR(pCreateInfo->vulkanApiVersion) <= 2)); VMA_DEBUG_LOG("vmaCreateAllocator"); *pAllocator = vma_new(pCreateInfo->pAllocationCallbacks, VmaAllocator_T)(pCreateInfo); return (*pAllocator)->Init(pCreateInfo); } -void vmaDestroyAllocator( +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyAllocator( VmaAllocator allocator) { if(allocator != VK_NULL_HANDLE) @@ -15663,7 +17798,15 @@ void vmaDestroyAllocator( } } -void vmaGetPhysicalDeviceProperties( +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocatorInfo(VmaAllocator allocator, VmaAllocatorInfo* pAllocatorInfo) +{ + VMA_ASSERT(allocator && pAllocatorInfo); + pAllocatorInfo->instance = allocator->m_hInstance; + pAllocatorInfo->physicalDevice = allocator->GetPhysicalDevice(); + pAllocatorInfo->device = allocator->m_hDevice; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetPhysicalDeviceProperties( VmaAllocator allocator, const VkPhysicalDeviceProperties **ppPhysicalDeviceProperties) { @@ -15671,7 +17814,7 @@ void vmaGetPhysicalDeviceProperties( *ppPhysicalDeviceProperties = &allocator->m_PhysicalDeviceProperties; } -void vmaGetMemoryProperties( +VMA_CALL_PRE void VMA_CALL_POST vmaGetMemoryProperties( VmaAllocator allocator, const VkPhysicalDeviceMemoryProperties** ppPhysicalDeviceMemoryProperties) { @@ -15679,7 +17822,7 @@ void vmaGetMemoryProperties( *ppPhysicalDeviceMemoryProperties = &allocator->m_MemProps; } -void vmaGetMemoryTypeProperties( +VMA_CALL_PRE void VMA_CALL_POST vmaGetMemoryTypeProperties( VmaAllocator allocator, uint32_t memoryTypeIndex, VkMemoryPropertyFlags* pFlags) @@ -15689,7 +17832,7 @@ void vmaGetMemoryTypeProperties( *pFlags = allocator->m_MemProps.memoryTypes[memoryTypeIndex].propertyFlags; } -void vmaSetCurrentFrameIndex( +VMA_CALL_PRE void VMA_CALL_POST vmaSetCurrentFrameIndex( VmaAllocator allocator, uint32_t frameIndex) { @@ -15701,7 +17844,7 @@ void vmaSetCurrentFrameIndex( allocator->SetCurrentFrameIndex(frameIndex); } -void vmaCalculateStats( +VMA_CALL_PRE void VMA_CALL_POST vmaCalculateStats( VmaAllocator allocator, VmaStats* pStats) { @@ -15710,9 +17853,18 @@ void vmaCalculateStats( allocator->CalculateStats(pStats); } +VMA_CALL_PRE void VMA_CALL_POST vmaGetBudget( + VmaAllocator allocator, + VmaBudget* pBudget) +{ + VMA_ASSERT(allocator && pBudget); + VMA_DEBUG_GLOBAL_MUTEX_LOCK + allocator->GetBudget(pBudget, 0, allocator->GetMemoryHeapCount()); +} + #if VMA_STATS_STRING_ENABLED -void vmaBuildStatsString( +VMA_CALL_PRE void VMA_CALL_POST vmaBuildStatsString( VmaAllocator allocator, char** ppStatsString, VkBool32 detailedMap) @@ -15725,12 +17877,15 @@ void vmaBuildStatsString( VmaJsonWriter json(allocator->GetAllocationCallbacks(), sb); json.BeginObject(); + VmaBudget budget[VK_MAX_MEMORY_HEAPS]; + allocator->GetBudget(budget, 0, allocator->GetMemoryHeapCount()); + VmaStats stats; allocator->CalculateStats(&stats); json.WriteString("Total"); VmaPrintStatInfo(json, stats.total); - + for(uint32_t heapIndex = 0; heapIndex < allocator->GetMemoryHeapCount(); ++heapIndex) { json.BeginString("Heap "); @@ -15749,6 +17904,20 @@ void vmaBuildStatsString( } json.EndArray(); + json.WriteString("Budget"); + json.BeginObject(); + { + json.WriteString("BlockBytes"); + json.WriteNumber(budget[heapIndex].blockBytes); + json.WriteString("AllocationBytes"); + json.WriteNumber(budget[heapIndex].allocationBytes); + json.WriteString("Usage"); + json.WriteNumber(budget[heapIndex].usage); + json.WriteString("Budget"); + json.WriteNumber(budget[heapIndex].budget); + } + json.EndObject(); + if(stats.memoryHeap[heapIndex].blockCount > 0) { json.WriteString("Stats"); @@ -15788,6 +17957,18 @@ void vmaBuildStatsString( { json.WriteString("LAZILY_ALLOCATED"); } + if((flags & VK_MEMORY_PROPERTY_PROTECTED_BIT) != 0) + { + json.WriteString(" PROTECTED"); + } + if((flags & VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY) != 0) + { + json.WriteString(" DEVICE_COHERENT"); + } + if((flags & VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY) != 0) + { + json.WriteString(" DEVICE_UNCACHED"); + } json.EndArray(); if(stats.memoryType[typeIndex].blockCount > 0) @@ -15820,7 +18001,7 @@ void vmaBuildStatsString( *ppStatsString = pChars; } -void vmaFreeStatsString( +VMA_CALL_PRE void VMA_CALL_POST vmaFreeStatsString( VmaAllocator allocator, char* pStatsString) { @@ -15837,7 +18018,7 @@ void vmaFreeStatsString( /* This function is not protected by any mutex because it just reads immutable data. */ -VkResult vmaFindMemoryTypeIndex( +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndex( VmaAllocator allocator, uint32_t memoryTypeBits, const VmaAllocationCreateInfo* pAllocationCreateInfo, @@ -15847,19 +18028,16 @@ VkResult vmaFindMemoryTypeIndex( VMA_ASSERT(pAllocationCreateInfo != VMA_NULL); VMA_ASSERT(pMemoryTypeIndex != VMA_NULL); + memoryTypeBits &= allocator->GetGlobalMemoryTypeBits(); + if(pAllocationCreateInfo->memoryTypeBits != 0) { memoryTypeBits &= pAllocationCreateInfo->memoryTypeBits; } - + uint32_t requiredFlags = pAllocationCreateInfo->requiredFlags; uint32_t preferredFlags = pAllocationCreateInfo->preferredFlags; - - const bool mapped = (pAllocationCreateInfo->flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0; - if(mapped) - { - preferredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; - } + uint32_t notPreferredFlags = 0; // Convert usage to requiredFlags and preferredFlags. switch(pAllocationCreateInfo->usage) @@ -15884,12 +18062,26 @@ VkResult vmaFindMemoryTypeIndex( break; case VMA_MEMORY_USAGE_GPU_TO_CPU: requiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; - preferredFlags |= VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + preferredFlags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + break; + case VMA_MEMORY_USAGE_CPU_COPY: + notPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + break; + case VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED: + requiredFlags |= VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT; break; default: + VMA_ASSERT(0); break; } + // Avoid DEVICE_COHERENT unless explicitly requested. + if(((pAllocationCreateInfo->requiredFlags | pAllocationCreateInfo->preferredFlags) & + (VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY | VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY)) == 0) + { + notPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY; + } + *pMemoryTypeIndex = UINT32_MAX; uint32_t minCost = UINT32_MAX; for(uint32_t memTypeIndex = 0, memTypeBit = 1; @@ -15905,7 +18097,8 @@ VkResult vmaFindMemoryTypeIndex( if((requiredFlags & ~currFlags) == 0) { // Calculate cost as number of bits from preferredFlags not present in this memory type. - uint32_t currCost = VmaCountBitsSet(preferredFlags & ~currFlags); + uint32_t currCost = VmaCountBitsSet(preferredFlags & ~currFlags) + + VmaCountBitsSet(currFlags & notPreferredFlags); // Remember memory type with lowest cost. if(currCost < minCost) { @@ -15922,7 +18115,7 @@ VkResult vmaFindMemoryTypeIndex( return (*pMemoryTypeIndex != UINT32_MAX) ? VK_SUCCESS : VK_ERROR_FEATURE_NOT_PRESENT; } -VkResult vmaFindMemoryTypeIndexForBufferInfo( +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForBufferInfo( VmaAllocator allocator, const VkBufferCreateInfo* pBufferCreateInfo, const VmaAllocationCreateInfo* pAllocationCreateInfo, @@ -15955,7 +18148,7 @@ VkResult vmaFindMemoryTypeIndexForBufferInfo( return res; } -VkResult vmaFindMemoryTypeIndexForImageInfo( +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForImageInfo( VmaAllocator allocator, const VkImageCreateInfo* pImageCreateInfo, const VmaAllocationCreateInfo* pAllocationCreateInfo, @@ -15988,44 +18181,44 @@ VkResult vmaFindMemoryTypeIndexForImageInfo( return res; } -VkResult vmaCreatePool( - VmaAllocator allocator, - const VmaPoolCreateInfo* pCreateInfo, - VmaPool* pPool) +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreatePool( + VmaAllocator allocator, + const VmaPoolCreateInfo* pCreateInfo, + VmaPool* pPool) { VMA_ASSERT(allocator && pCreateInfo && pPool); - + VMA_DEBUG_LOG("vmaCreatePool"); - + VMA_DEBUG_GLOBAL_MUTEX_LOCK - + VkResult res = allocator->CreatePool(pCreateInfo, pPool); - + #if VMA_RECORDING_ENABLED if(allocator->GetRecorder() != VMA_NULL) { allocator->GetRecorder()->RecordCreatePool(allocator->GetCurrentFrameIndex(), *pCreateInfo, *pPool); } #endif - + return res; } -void vmaDestroyPool( +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyPool( VmaAllocator allocator, VmaPool pool) { VMA_ASSERT(allocator); - + if(pool == VK_NULL_HANDLE) { return; } - + VMA_DEBUG_LOG("vmaDestroyPool"); - + VMA_DEBUG_GLOBAL_MUTEX_LOCK - + #if VMA_RECORDING_ENABLED if(allocator->GetRecorder() != VMA_NULL) { @@ -16036,7 +18229,7 @@ void vmaDestroyPool( allocator->DestroyPool(pool); } -void vmaGetPoolStats( +VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolStats( VmaAllocator allocator, VmaPool pool, VmaPoolStats* pPoolStats) @@ -16048,7 +18241,7 @@ void vmaGetPoolStats( allocator->GetPoolStats(pool, pPoolStats); } -void vmaMakePoolAllocationsLost( +VMA_CALL_PRE void VMA_CALL_POST vmaMakePoolAllocationsLost( VmaAllocator allocator, VmaPool pool, size_t* pLostAllocationCount) @@ -16067,7 +18260,7 @@ void vmaMakePoolAllocationsLost( allocator->MakePoolAllocationsLost(pool, pLostAllocationCount); } -VkResult vmaCheckPoolCorruption(VmaAllocator allocator, VmaPool pool) +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckPoolCorruption(VmaAllocator allocator, VmaPool pool) { VMA_ASSERT(allocator && pool); @@ -16078,7 +18271,42 @@ VkResult vmaCheckPoolCorruption(VmaAllocator allocator, VmaPool pool) return allocator->CheckPoolCorruption(pool); } -VkResult vmaAllocateMemory( +VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolName( + VmaAllocator allocator, + VmaPool pool, + const char** ppName) +{ + VMA_ASSERT(allocator && pool && ppName); + + VMA_DEBUG_LOG("vmaGetPoolName"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + *ppName = pool->GetName(); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaSetPoolName( + VmaAllocator allocator, + VmaPool pool, + const char* pName) +{ + VMA_ASSERT(allocator && pool); + + VMA_DEBUG_LOG("vmaSetPoolName"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + pool->SetName(pName); + +#if VMA_RECORDING_ENABLED + if(allocator->GetRecorder() != VMA_NULL) + { + allocator->GetRecorder()->RecordSetPoolName(allocator->GetCurrentFrameIndex(), pool, pName); + } +#endif +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemory( VmaAllocator allocator, const VkMemoryRequirements* pVkMemoryRequirements, const VmaAllocationCreateInfo* pCreateInfo, @@ -16091,11 +18319,12 @@ VkResult vmaAllocateMemory( VMA_DEBUG_GLOBAL_MUTEX_LOCK - VkResult result = allocator->AllocateMemory( + VkResult result = allocator->AllocateMemory( *pVkMemoryRequirements, false, // requiresDedicatedAllocation false, // prefersDedicatedAllocation VK_NULL_HANDLE, // dedicatedBuffer + UINT32_MAX, // dedicatedBufferUsage VK_NULL_HANDLE, // dedicatedImage *pCreateInfo, VMA_SUBALLOCATION_TYPE_UNKNOWN, @@ -16112,16 +18341,16 @@ VkResult vmaAllocateMemory( *pAllocation); } #endif - + if(pAllocationInfo != VMA_NULL && result == VK_SUCCESS) { allocator->GetAllocationInfo(*pAllocation, pAllocationInfo); } - return result; + return result; } -VkResult vmaAllocateMemoryPages( +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryPages( VmaAllocator allocator, const VkMemoryRequirements* pVkMemoryRequirements, const VmaAllocationCreateInfo* pCreateInfo, @@ -16140,11 +18369,12 @@ VkResult vmaAllocateMemoryPages( VMA_DEBUG_GLOBAL_MUTEX_LOCK - VkResult result = allocator->AllocateMemory( + VkResult result = allocator->AllocateMemory( *pVkMemoryRequirements, false, // requiresDedicatedAllocation false, // prefersDedicatedAllocation VK_NULL_HANDLE, // dedicatedBuffer + UINT32_MAX, // dedicatedBufferUsage VK_NULL_HANDLE, // dedicatedImage *pCreateInfo, VMA_SUBALLOCATION_TYPE_UNKNOWN, @@ -16162,7 +18392,7 @@ VkResult vmaAllocateMemoryPages( pAllocations); } #endif - + if(pAllocationInfo != VMA_NULL && result == VK_SUCCESS) { for(size_t i = 0; i < allocationCount; ++i) @@ -16171,10 +18401,10 @@ VkResult vmaAllocateMemoryPages( } } - return result; + return result; } -VkResult vmaAllocateMemoryForBuffer( +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForBuffer( VmaAllocator allocator, VkBuffer buffer, const VmaAllocationCreateInfo* pCreateInfo, @@ -16199,6 +18429,7 @@ VkResult vmaAllocateMemoryForBuffer( requiresDedicatedAllocation, prefersDedicatedAllocation, buffer, // dedicatedBuffer + UINT32_MAX, // dedicatedBufferUsage VK_NULL_HANDLE, // dedicatedImage *pCreateInfo, VMA_SUBALLOCATION_TYPE_BUFFER, @@ -16223,10 +18454,10 @@ VkResult vmaAllocateMemoryForBuffer( allocator->GetAllocationInfo(*pAllocation, pAllocationInfo); } - return result; + return result; } -VkResult vmaAllocateMemoryForImage( +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForImage( VmaAllocator allocator, VkImage image, const VmaAllocationCreateInfo* pCreateInfo, @@ -16250,6 +18481,7 @@ VkResult vmaAllocateMemoryForImage( requiresDedicatedAllocation, prefersDedicatedAllocation, VK_NULL_HANDLE, // dedicatedBuffer + UINT32_MAX, // dedicatedBufferUsage image, // dedicatedImage *pCreateInfo, VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN, @@ -16274,22 +18506,22 @@ VkResult vmaAllocateMemoryForImage( allocator->GetAllocationInfo(*pAllocation, pAllocationInfo); } - return result; + return result; } -void vmaFreeMemory( +VMA_CALL_PRE void VMA_CALL_POST vmaFreeMemory( VmaAllocator allocator, VmaAllocation allocation) { VMA_ASSERT(allocator); - + if(allocation == VK_NULL_HANDLE) { return; } - + VMA_DEBUG_LOG("vmaFreeMemory"); - + VMA_DEBUG_GLOBAL_MUTEX_LOCK #if VMA_RECORDING_ENABLED @@ -16300,16 +18532,16 @@ void vmaFreeMemory( allocation); } #endif - + allocator->FreeMemory( 1, // allocationCount &allocation); } -void vmaFreeMemoryPages( +VMA_CALL_PRE void VMA_CALL_POST vmaFreeMemoryPages( VmaAllocator allocator, size_t allocationCount, - VmaAllocation* pAllocations) + const VmaAllocation* pAllocations) { if(allocationCount == 0) { @@ -16317,9 +18549,9 @@ void vmaFreeMemoryPages( } VMA_ASSERT(allocator); - + VMA_DEBUG_LOG("vmaFreeMemoryPages"); - + VMA_DEBUG_GLOBAL_MUTEX_LOCK #if VMA_RECORDING_ENABLED @@ -16331,35 +18563,25 @@ void vmaFreeMemoryPages( pAllocations); } #endif - + allocator->FreeMemory(allocationCount, pAllocations); } -VkResult vmaResizeAllocation( +VMA_CALL_PRE VkResult VMA_CALL_POST vmaResizeAllocation( VmaAllocator allocator, VmaAllocation allocation, VkDeviceSize newSize) { VMA_ASSERT(allocator && allocation); - + VMA_DEBUG_LOG("vmaResizeAllocation"); - + VMA_DEBUG_GLOBAL_MUTEX_LOCK -#if VMA_RECORDING_ENABLED - if(allocator->GetRecorder() != VMA_NULL) - { - allocator->GetRecorder()->RecordResizeAllocation( - allocator->GetCurrentFrameIndex(), - allocation, - newSize); - } -#endif - return allocator->ResizeAllocation(allocation, newSize); } -void vmaGetAllocationInfo( +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo( VmaAllocator allocator, VmaAllocation allocation, VmaAllocationInfo* pAllocationInfo) @@ -16380,7 +18602,7 @@ void vmaGetAllocationInfo( allocator->GetAllocationInfo(allocation, pAllocationInfo); } -VkBool32 vmaTouchAllocation( +VMA_CALL_PRE VkBool32 VMA_CALL_POST vmaTouchAllocation( VmaAllocator allocator, VmaAllocation allocation) { @@ -16400,7 +18622,7 @@ VkBool32 vmaTouchAllocation( return allocator->TouchAllocation(allocation); } -void vmaSetAllocationUserData( +VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationUserData( VmaAllocator allocator, VmaAllocation allocation, void* pUserData) @@ -16422,7 +18644,7 @@ void vmaSetAllocationUserData( #endif } -void vmaCreateLostAllocation( +VMA_CALL_PRE void VMA_CALL_POST vmaCreateLostAllocation( VmaAllocator allocator, VmaAllocation* pAllocation) { @@ -16442,7 +18664,7 @@ void vmaCreateLostAllocation( #endif } -VkResult vmaMapMemory( +VMA_CALL_PRE VkResult VMA_CALL_POST vmaMapMemory( VmaAllocator allocator, VmaAllocation allocation, void** ppData) @@ -16465,7 +18687,7 @@ VkResult vmaMapMemory( return res; } -void vmaUnmapMemory( +VMA_CALL_PRE void VMA_CALL_POST vmaUnmapMemory( VmaAllocator allocator, VmaAllocation allocation) { @@ -16485,7 +18707,7 @@ void vmaUnmapMemory( allocator->Unmap(allocation); } -void vmaFlushAllocation(VmaAllocator allocator, VmaAllocation allocation, VkDeviceSize offset, VkDeviceSize size) +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocation(VmaAllocator allocator, VmaAllocation allocation, VkDeviceSize offset, VkDeviceSize size) { VMA_ASSERT(allocator && allocation); @@ -16493,7 +18715,7 @@ void vmaFlushAllocation(VmaAllocator allocator, VmaAllocation allocation, VkDevi VMA_DEBUG_GLOBAL_MUTEX_LOCK - allocator->FlushOrInvalidateAllocation(allocation, offset, size, VMA_CACHE_FLUSH); + const VkResult res = allocator->FlushOrInvalidateAllocation(allocation, offset, size, VMA_CACHE_FLUSH); #if VMA_RECORDING_ENABLED if(allocator->GetRecorder() != VMA_NULL) @@ -16503,9 +18725,11 @@ void vmaFlushAllocation(VmaAllocator allocator, VmaAllocation allocation, VkDevi allocation, offset, size); } #endif + + return res; } -void vmaInvalidateAllocation(VmaAllocator allocator, VmaAllocation allocation, VkDeviceSize offset, VkDeviceSize size) +VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocation(VmaAllocator allocator, VmaAllocation allocation, VkDeviceSize offset, VkDeviceSize size) { VMA_ASSERT(allocator && allocation); @@ -16513,7 +18737,7 @@ void vmaInvalidateAllocation(VmaAllocator allocator, VmaAllocation allocation, V VMA_DEBUG_GLOBAL_MUTEX_LOCK - allocator->FlushOrInvalidateAllocation(allocation, offset, size, VMA_CACHE_INVALIDATE); + const VkResult res = allocator->FlushOrInvalidateAllocation(allocation, offset, size, VMA_CACHE_INVALIDATE); #if VMA_RECORDING_ENABLED if(allocator->GetRecorder() != VMA_NULL) @@ -16523,9 +18747,75 @@ void vmaInvalidateAllocation(VmaAllocator allocator, VmaAllocation allocation, V allocation, offset, size); } #endif + + return res; } -VkResult vmaCheckCorruption(VmaAllocator allocator, uint32_t memoryTypeBits) +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocations( + VmaAllocator allocator, + uint32_t allocationCount, + const VmaAllocation* allocations, + const VkDeviceSize* offsets, + const VkDeviceSize* sizes) +{ + VMA_ASSERT(allocator); + + if(allocationCount == 0) + { + return VK_SUCCESS; + } + + VMA_ASSERT(allocations); + + VMA_DEBUG_LOG("vmaFlushAllocations"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + const VkResult res = allocator->FlushOrInvalidateAllocations(allocationCount, allocations, offsets, sizes, VMA_CACHE_FLUSH); + +#if VMA_RECORDING_ENABLED + if(allocator->GetRecorder() != VMA_NULL) + { + //TODO + } +#endif + + return res; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocations( + VmaAllocator allocator, + uint32_t allocationCount, + const VmaAllocation* allocations, + const VkDeviceSize* offsets, + const VkDeviceSize* sizes) +{ + VMA_ASSERT(allocator); + + if(allocationCount == 0) + { + return VK_SUCCESS; + } + + VMA_ASSERT(allocations); + + VMA_DEBUG_LOG("vmaInvalidateAllocations"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + const VkResult res = allocator->FlushOrInvalidateAllocations(allocationCount, allocations, offsets, sizes, VMA_CACHE_INVALIDATE); + +#if VMA_RECORDING_ENABLED + if(allocator->GetRecorder() != VMA_NULL) + { + //TODO + } +#endif + + return res; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckCorruption(VmaAllocator allocator, uint32_t memoryTypeBits) { VMA_ASSERT(allocator); @@ -16536,9 +18826,9 @@ VkResult vmaCheckCorruption(VmaAllocator allocator, uint32_t memoryTypeBits) return allocator->CheckCorruption(memoryTypeBits); } -VkResult vmaDefragment( +VMA_CALL_PRE VkResult VMA_CALL_POST vmaDefragment( VmaAllocator allocator, - VmaAllocation* pAllocations, + const VmaAllocation* pAllocations, size_t allocationCount, VkBool32* pAllocationsChanged, const VmaDefragmentationInfo *pDefragmentationInfo, @@ -16571,7 +18861,7 @@ VkResult vmaDefragment( return res; } -VkResult vmaDefragmentationBegin( +VMA_CALL_PRE VkResult VMA_CALL_POST vmaDefragmentationBegin( VmaAllocator allocator, const VmaDefragmentationInfo2* pInfo, VmaDefragmentationStats* pStats, @@ -16607,7 +18897,7 @@ VkResult vmaDefragmentationBegin( return res; } -VkResult vmaDefragmentationEnd( +VMA_CALL_PRE VkResult VMA_CALL_POST vmaDefragmentationEnd( VmaAllocator allocator, VmaDefragmentationContext context) { @@ -16635,7 +18925,43 @@ VkResult vmaDefragmentationEnd( } } -VkResult vmaBindBufferMemory( +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentationPass( + VmaAllocator allocator, + VmaDefragmentationContext context, + VmaDefragmentationPassInfo* pInfo + ) +{ + VMA_ASSERT(allocator); + VMA_ASSERT(pInfo); + + VMA_DEBUG_LOG("vmaBeginDefragmentationPass"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + if(context == VK_NULL_HANDLE) + { + pInfo->moveCount = 0; + return VK_SUCCESS; + } + + return allocator->DefragmentationPassBegin(pInfo, context); +} +VMA_CALL_PRE VkResult VMA_CALL_POST vmaEndDefragmentationPass( + VmaAllocator allocator, + VmaDefragmentationContext context) +{ + VMA_ASSERT(allocator); + + VMA_DEBUG_LOG("vmaEndDefragmentationPass"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + if(context == VK_NULL_HANDLE) + return VK_SUCCESS; + + return allocator->DefragmentationPassEnd(context); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory( VmaAllocator allocator, VmaAllocation allocation, VkBuffer buffer) @@ -16646,10 +18972,26 @@ VkResult vmaBindBufferMemory( VMA_DEBUG_GLOBAL_MUTEX_LOCK - return allocator->BindBufferMemory(allocation, buffer); + return allocator->BindBufferMemory(allocation, 0, buffer, VMA_NULL); } -VkResult vmaBindImageMemory( +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory2( + VmaAllocator allocator, + VmaAllocation allocation, + VkDeviceSize allocationLocalOffset, + VkBuffer buffer, + const void* pNext) +{ + VMA_ASSERT(allocator && allocation && buffer); + + VMA_DEBUG_LOG("vmaBindBufferMemory2"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->BindBufferMemory(allocation, allocationLocalOffset, buffer, pNext); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindImageMemory( VmaAllocator allocator, VmaAllocation allocation, VkImage image) @@ -16660,10 +19002,26 @@ VkResult vmaBindImageMemory( VMA_DEBUG_GLOBAL_MUTEX_LOCK - return allocator->BindImageMemory(allocation, image); + return allocator->BindImageMemory(allocation, 0, image, VMA_NULL); } -VkResult vmaCreateBuffer( +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindImageMemory2( + VmaAllocator allocator, + VmaAllocation allocation, + VkDeviceSize allocationLocalOffset, + VkImage image, + const void* pNext) +{ + VMA_ASSERT(allocator && allocation && image); + + VMA_DEBUG_LOG("vmaBindImageMemory2"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->BindImageMemory(allocation, allocationLocalOffset, image, pNext); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBuffer( VmaAllocator allocator, const VkBufferCreateInfo* pBufferCreateInfo, const VmaAllocationCreateInfo* pAllocationCreateInfo, @@ -16677,9 +19035,15 @@ VkResult vmaCreateBuffer( { return VK_ERROR_VALIDATION_FAILED_EXT; } - + if((pBufferCreateInfo->usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_COPY) != 0 && + !allocator->m_UseKhrBufferDeviceAddress) + { + VMA_ASSERT(0 && "Creating a buffer with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT is not valid if VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT was not used."); + return VK_ERROR_VALIDATION_FAILED_EXT; + } + VMA_DEBUG_LOG("vmaCreateBuffer"); - + VMA_DEBUG_GLOBAL_MUTEX_LOCK *pBuffer = VK_NULL_HANDLE; @@ -16700,30 +19064,13 @@ VkResult vmaCreateBuffer( allocator->GetBufferMemoryRequirements(*pBuffer, vkMemReq, requiresDedicatedAllocation, prefersDedicatedAllocation); - // Make sure alignment requirements for specific buffer usages reported - // in Physical Device Properties are included in alignment reported by memory requirements. - if((pBufferCreateInfo->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT) != 0) - { - VMA_ASSERT(vkMemReq.alignment % - allocator->m_PhysicalDeviceProperties.limits.minTexelBufferOffsetAlignment == 0); - } - if((pBufferCreateInfo->usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) != 0) - { - VMA_ASSERT(vkMemReq.alignment % - allocator->m_PhysicalDeviceProperties.limits.minUniformBufferOffsetAlignment == 0); - } - if((pBufferCreateInfo->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT) != 0) - { - VMA_ASSERT(vkMemReq.alignment % - allocator->m_PhysicalDeviceProperties.limits.minStorageBufferOffsetAlignment == 0); - } - // 3. Allocate memory using allocator. res = allocator->AllocateMemory( vkMemReq, requiresDedicatedAllocation, prefersDedicatedAllocation, *pBuffer, // dedicatedBuffer + pBufferCreateInfo->usage, // dedicatedBufferUsage VK_NULL_HANDLE, // dedicatedImage *pAllocationCreateInfo, VMA_SUBALLOCATION_TYPE_BUFFER, @@ -16744,7 +19091,10 @@ VkResult vmaCreateBuffer( if(res >= 0) { // 3. Bind buffer with memory. - res = allocator->BindBufferMemory(*pAllocation, *pBuffer); + if((pAllocationCreateInfo->flags & VMA_ALLOCATION_CREATE_DONT_BIND_BIT) == 0) + { + res = allocator->BindBufferMemory(*pAllocation, 0, *pBuffer, VMA_NULL); + } if(res >= 0) { // All steps succeeded. @@ -16773,7 +19123,7 @@ VkResult vmaCreateBuffer( return res; } -void vmaDestroyBuffer( +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyBuffer( VmaAllocator allocator, VkBuffer buffer, VmaAllocation allocation) @@ -16811,7 +19161,7 @@ void vmaDestroyBuffer( } } -VkResult vmaCreateImage( +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateImage( VmaAllocator allocator, const VkImageCreateInfo* pImageCreateInfo, const VmaAllocationCreateInfo* pAllocationCreateInfo, @@ -16848,7 +19198,7 @@ VkResult vmaCreateImage( VmaSuballocationType suballocType = pImageCreateInfo->tiling == VK_IMAGE_TILING_OPTIMAL ? VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL : VMA_SUBALLOCATION_TYPE_IMAGE_LINEAR; - + // 2. Allocate memory using allocator. VkMemoryRequirements vkMemReq = {}; bool requiresDedicatedAllocation = false; @@ -16861,6 +19211,7 @@ VkResult vmaCreateImage( requiresDedicatedAllocation, prefersDedicatedAllocation, VK_NULL_HANDLE, // dedicatedBuffer + UINT32_MAX, // dedicatedBufferUsage *pImage, // dedicatedImage *pAllocationCreateInfo, suballocType, @@ -16881,7 +19232,10 @@ VkResult vmaCreateImage( if(res >= 0) { // 3. Bind image with memory. - res = allocator->BindImageMemory(*pAllocation, *pImage); + if((pAllocationCreateInfo->flags & VMA_ALLOCATION_CREATE_DONT_BIND_BIT) == 0) + { + res = allocator->BindImageMemory(*pAllocation, 0, *pImage, VMA_NULL); + } if(res >= 0) { // All steps succeeded. @@ -16910,7 +19264,7 @@ VkResult vmaCreateImage( return res; } -void vmaDestroyImage( +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyImage( VmaAllocator allocator, VkImage image, VmaAllocation allocation) From 57e4831a5f1ed063c182e1a4e6eb5f6ae52ff6c5 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 17 Feb 2021 11:17:34 +0100 Subject: [PATCH 08/57] Update readme for new setup --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9c5b83aae..cfc8ad8cc 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ Flax Visual Studio extension provides better programming workflow, C# scripts de * Install Visual Studio 2015 or newer * Install Windows 8.1 SDK or newer -* Install Microsoft Visual C++ 2015.3 v140 toolset for desktop (x86, x64) +* Install Microsoft Visual C++ 2015 v140 toolset or newer * Clone repo (with LFS) * Run **GenerateProjectFiles.bat** * Open `Flax.sln` and set solution configuration to **Editor.Development** and solution platform to **Win64** From d896f4f699778719b4dc663ef42c4a90f7b254d2 Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Perrier Date: Wed, 17 Feb 2021 11:38:42 +0100 Subject: [PATCH 09/57] Fix script error on undo. --- Source/Editor/Modules/SceneEditingModule.cs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Source/Editor/Modules/SceneEditingModule.cs b/Source/Editor/Modules/SceneEditingModule.cs index 1cec21305..19edaf7f7 100644 --- a/Source/Editor/Modules/SceneEditingModule.cs +++ b/Source/Editor/Modules/SceneEditingModule.cs @@ -268,9 +268,10 @@ namespace FlaxEditor.Modules Actor actor = (Actor)FlaxEngine.Object.New(to); actionList.Add(new SelectionChangeAction(Selection.ToArray(), new SceneGraphNode[0], OnSelectionUndo)); + actionList[0].Do(); actionList.Add(new DeleteActorsAction(new List { - Editor.SceneEditing.Selection[0] + Editor.Instance.Scene.GetActorNode(old) })); SelectionDeleteBegin?.Invoke(); actionList[1].Do(); @@ -291,9 +292,12 @@ namespace FlaxEditor.Modules actor.IsActive = old.IsActive; for (var i = old.ScriptsCount - 1; i >=0; i--) { - old.Scripts[i].Parent = actor; + var script = old.Scripts[i]; + script.Actor = actor; + Guid newid = Guid.NewGuid(); + FlaxEngine.Object.Internal_ChangeID(FlaxEngine.Object.GetUnmanagedPtr(script), ref newid); } - for (var i = old.Children.Length - 1; i >= 0 ; i--) + for (var i = old.Children.Length - 1; i >= 0; i--) { old.Children[i].Parent = actor; } @@ -315,6 +319,7 @@ namespace FlaxEditor.Modules actorNode }, true)); actionList.Add(new SelectionChangeAction(new SceneGraphNode[0], new SceneGraphNode[]{actorNode}, OnSelectionUndo)); + actionList[3].Do(); var actions = new MultiUndoAction(actionList); Undo.AddAction(actions); SpawnEnd?.Invoke(); From 62bfc94f2471071c2035b02005d4abae5ad95729 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 17 Feb 2021 12:31:03 +0100 Subject: [PATCH 10/57] Add EditorScene for using gameplay logic in editor preview windows --- Source/Editor/Utilities/EditorScene.cpp | 23 +++++++++++++++++++++++ Source/Editor/Utilities/EditorScene.h | 19 +++++++++++++++++++ Source/Engine/Level/Actor.cpp | 2 +- Source/Engine/Level/Scene.cs | 2 +- Source/Engine/Level/Scene/Scene.h | 2 +- 5 files changed, 45 insertions(+), 3 deletions(-) create mode 100644 Source/Editor/Utilities/EditorScene.cpp create mode 100644 Source/Editor/Utilities/EditorScene.h diff --git a/Source/Editor/Utilities/EditorScene.cpp b/Source/Editor/Utilities/EditorScene.cpp new file mode 100644 index 000000000..4350a31ff --- /dev/null +++ b/Source/Editor/Utilities/EditorScene.cpp @@ -0,0 +1,23 @@ +// Copyright (c) 2012-2021 Wojciech Figat. All rights reserved. + +#include "EditorScene.h" + +EditorScene::EditorScene(const SpawnParams& params) + : Scene(params) +{ + // Mock editor preview scene to be in gameplay + EditorScene::PostSpawn(); + SceneBeginData beginData; + EditorScene::BeginPlay(&beginData); + beginData.OnDone(); +} + +void EditorScene::Update() +{ + for (auto& e : Ticking.Update.Ticks) + e.Call(); + for (auto& e : Ticking.LateUpdate.Ticks) + e.Call(); + for (auto& e : Ticking.FixedUpdate.Ticks) + e.Call(); +} diff --git a/Source/Editor/Utilities/EditorScene.h b/Source/Editor/Utilities/EditorScene.h new file mode 100644 index 000000000..998ec0141 --- /dev/null +++ b/Source/Editor/Utilities/EditorScene.h @@ -0,0 +1,19 @@ +// Copyright (c) 2012-2021 Wojciech Figat. All rights reserved. + +#pragma once + +#include "Engine/Level/Scene/Scene.h" + +/// +/// Scene for editor previews with support of object drawing and updating in separation of global scenes collection. It mocks the gameplay to preview scene objects. +/// +API_CLASS() class EditorScene final : public Scene +{ +DECLARE_SCENE_OBJECT(EditorScene); +public: + + /// + /// Updates the gameplay. + /// + API_FUNCTION() void Update(); +}; diff --git a/Source/Engine/Level/Actor.cpp b/Source/Engine/Level/Actor.cpp index 324e6ada1..42e937976 100644 --- a/Source/Engine/Level/Actor.cpp +++ b/Source/Engine/Level/Actor.cpp @@ -198,7 +198,7 @@ void Actor::SetParent(Actor* value, bool worldPositionsStays, bool canBreakPrefa // Check if value won't change if (_parent == value) return; - if (!IsInMainThread()) + if (IsDuringPlay() && !IsInMainThread()) { LOG(Error, "Editing scene hierarchy is only allowed on a main thread."); return; diff --git a/Source/Engine/Level/Scene.cs b/Source/Engine/Level/Scene.cs index c4b92a6bc..da8cd978d 100644 --- a/Source/Engine/Level/Scene.cs +++ b/Source/Engine/Level/Scene.cs @@ -2,7 +2,7 @@ namespace FlaxEngine { - public sealed partial class Scene + partial class Scene { /// /// The scene asset typename. Type of the serialized scene asset data. Hidden class for the scene assets. Actors deserialization rules are strictly controlled under the hood by the C++ core parts. Mostly because scene asset has the same ID as scene root actor so loading both managed objects for scene asset and scene will crash (due to object ids conflict). diff --git a/Source/Engine/Level/Scene/Scene.h b/Source/Engine/Level/Scene/Scene.h index d3c1502c6..c576e7eca 100644 --- a/Source/Engine/Level/Scene/Scene.h +++ b/Source/Engine/Level/Scene/Scene.h @@ -19,7 +19,7 @@ class NavMesh; /// /// The scene root object that contains a hierarchy of actors. /// -API_CLASS() class FLAXENGINE_API Scene final : public Actor +API_CLASS() class FLAXENGINE_API Scene : public Actor { DECLARE_SCENE_OBJECT(Scene); friend Level; From d88335c51fdc8fb10711180a717658f350b69cbf Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 17 Feb 2021 12:31:30 +0100 Subject: [PATCH 11/57] Add support for audio clip playback preview in editor window --- .../Editor/Windows/Assets/AudioClipWindow.cs | 72 ++++++++++++++++++- Source/Engine/Audio/AudioSource.cpp | 2 +- 2 files changed, 70 insertions(+), 4 deletions(-) diff --git a/Source/Editor/Windows/Assets/AudioClipWindow.cs b/Source/Editor/Windows/Assets/AudioClipWindow.cs index 009a1ff3d..0dc71c50d 100644 --- a/Source/Editor/Windows/Assets/AudioClipWindow.cs +++ b/Source/Editor/Windows/Assets/AudioClipWindow.cs @@ -5,6 +5,7 @@ using FlaxEditor.Content; using FlaxEditor.Content.Import; using FlaxEditor.CustomEditors; using FlaxEditor.CustomEditors.Editors; +using FlaxEditor.GUI; using FlaxEditor.Viewport.Previews; using FlaxEngine; using FlaxEngine.GUI; @@ -115,6 +116,10 @@ namespace FlaxEditor.Windows.Assets private readonly SplitPanel _split; private readonly AudioClipPreview _preview; private readonly CustomEditorPresenter _propertiesEditor; + private readonly ToolStripButton _playButton; + private readonly ToolStripButton _pauseButton; + private EditorScene _previewScene; + private AudioSource _previewSource; private readonly PropertiesProxy _properties; private bool _isWaitingForLoad; @@ -132,7 +137,7 @@ namespace FlaxEditor.Windows.Assets Parent = this }; - // AudioClip preview + // Preview _preview = new AudioClipPreview { DrawMode = AudioClipPreview.DrawModes.Fill, @@ -141,7 +146,7 @@ namespace FlaxEditor.Windows.Assets Parent = _split.Panel1 }; - // AudioClip properties editor + // Properties editor _propertiesEditor = new CustomEditorPresenter(null); _propertiesEditor.Panel.Parent = _split.Panel2; _properties = new PropertiesProxy(); @@ -150,14 +155,47 @@ namespace FlaxEditor.Windows.Assets // Toolstrip _toolstrip.AddButton(Editor.Icons.Import32, () => Editor.ContentImporting.Reimport((BinaryAssetItem)Item)).LinkTooltip("Reimport"); _toolstrip.AddSeparator(); + _playButton = (ToolStripButton)_toolstrip.AddButton(Editor.Icons.Play32, OnPlay).LinkTooltip("Play/stop audio"); + _pauseButton = (ToolStripButton)_toolstrip.AddButton(Editor.Icons.Pause32, OnPause).LinkTooltip("Pause audio"); + _toolstrip.AddSeparator(); _toolstrip.AddButton(editor.Icons.Docs32, () => Platform.OpenUrl(Utilities.Constants.DocsUrl + "manual/audio/audio-clip.html")).LinkTooltip("See documentation to learn more"); } + private void OnPlay() + { + if (!_previewScene) + { + _previewScene = new EditorScene(); + } + if (!_previewSource) + { + _previewSource = new AudioSource + { + Parent = _previewScene, + Clip = _asset, + }; + } + if (_previewSource.State == AudioSource.States.Playing) + _previewSource.Stop(); + else + _previewSource.Play(); + UpdateToolstrip(); + } + + private void OnPause() + { + if (_previewSource) + _previewSource.Pause(); + UpdateToolstrip(); + } + /// protected override void UnlinkItem() { _properties.OnClean(); _preview.Asset = null; + if (_previewSource) + _previewSource.Clip = null; _isWaitingForLoad = false; base.UnlinkItem(); @@ -188,6 +226,29 @@ namespace FlaxEditor.Windows.Assets base.OnClose(); } + /// + public override void OnDestroy() + { + if (_previewSource) + { + _previewSource.Stop(); + Object.Destroy(_previewSource); + _previewSource = null; + } + Object.Destroy(ref _previewScene); + + base.OnDestroy(); + } + + /// + protected override void UpdateToolstrip() + { + base.UpdateToolstrip(); + + _playButton.Icon = _previewSource && _previewSource.State == AudioSource.States.Playing ? Editor.Icons.Stop32 : Editor.Icons.Play32; + _pauseButton.Enabled = _previewSource && _previewSource.State == AudioSource.States.Playing; + } + /// public override void Update(float deltaTime) { @@ -196,16 +257,21 @@ namespace FlaxEditor.Windows.Assets // Check if need to load if (_isWaitingForLoad && _asset.IsLoaded) { - // Clear flag _isWaitingForLoad = false; // Init properties and parameters proxy _properties.OnLoad(this); _propertiesEditor.BuildLayout(); + if (_previewSource) + _previewSource.Stop(); // Setup ClearEditedFlag(); } + + // Tick scene + if (_previewScene) + _previewScene.Update(); } /// diff --git a/Source/Engine/Audio/AudioSource.cpp b/Source/Engine/Audio/AudioSource.cpp index e85722e32..b07ea95ee 100644 --- a/Source/Engine/Audio/AudioSource.cpp +++ b/Source/Engine/Audio/AudioSource.cpp @@ -342,7 +342,7 @@ void AudioSource::Update() { // Update the velocity const Vector3 pos = GetPosition(); - const float dt = Time::Update.UnscaledDeltaTime.GetTotalSeconds(); + const float dt = Math::Max(Time::Update.UnscaledDeltaTime.GetTotalSeconds(), ZeroTolerance); const auto prevVelocity = _velocity; _velocity = (pos - _prevPos) / dt; _prevPos = pos; From cae0b5e91ee2382067cb28ca1f3f55fd84c450b2 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 17 Feb 2021 12:51:39 +0100 Subject: [PATCH 12/57] Add info group for selected Rigidbody in play mode (display speed, velocity, angular velocity, etc.) --- .../Dedicated/RigidBodyEditor.cs | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/Source/Editor/CustomEditors/Dedicated/RigidBodyEditor.cs b/Source/Editor/CustomEditors/Dedicated/RigidBodyEditor.cs index 02a1359c5..33a40c370 100644 --- a/Source/Editor/CustomEditors/Dedicated/RigidBodyEditor.cs +++ b/Source/Editor/CustomEditors/Dedicated/RigidBodyEditor.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using FlaxEditor.CustomEditors.GUI; using FlaxEngine; +using FlaxEngine.GUI; namespace FlaxEditor.CustomEditors.Dedicated { @@ -15,6 +16,7 @@ namespace FlaxEditor.CustomEditors.Dedicated { private readonly List _labels = new List(64); private const int MassOrder = 110; + private Label _infoLabel; /// protected override void SpawnProperty(LayoutElementsContainer itemLayout, ValueContainer itemValues, ItemInfo item) @@ -63,6 +65,22 @@ namespace FlaxEditor.CustomEditors.Dedicated check = value.OverrideMass; _labels[i].CheckBox.Checked = check; } + if (_infoLabel != null) + { + string text = string.Empty; + if (Editor.IsPlayMode) + { + var linearVelocity = value.LinearVelocity; + var centerOfMass = value.CenterOfMass; + text = $"Speed: {linearVelocity.Length}\n" + + $"Linear Velocity: {linearVelocity}\n" + + $"Angular Velocity: {value.AngularVelocity}\n" + + $"Center of Mass (local): {centerOfMass}\n" + + $"Center Of Mass (world): {value.Transform.LocalToWorld(centerOfMass)}\n" + + $"Is Sleeping: {value.IsSleeping}"; + } + _infoLabel.Text = text; + } } /// @@ -71,6 +89,13 @@ namespace FlaxEditor.CustomEditors.Dedicated _labels.Clear(); base.Initialize(layout); + + // Add info box + if (IsSingleObject && Values[0] is RigidBody && Editor.IsPlayMode) + { + _infoLabel = layout.Label(string.Empty).Label; + _infoLabel.AutoHeight = true; + } } } } From ed04eece8ab3f9157f469dc0e73661de2d216125 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 17 Feb 2021 13:02:47 +0100 Subject: [PATCH 13/57] Add support for mapping objects in SceneAnimationPlayer to reuse it for different objects --- .../SceneAnimations/SceneAnimationPlayer.cpp | 29 ++++++++++++++----- .../SceneAnimations/SceneAnimationPlayer.h | 8 +++++ 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/Source/Engine/Animations/SceneAnimations/SceneAnimationPlayer.cpp b/Source/Engine/Animations/SceneAnimations/SceneAnimationPlayer.cpp index 09b7cf874..ea24d126f 100644 --- a/Source/Engine/Animations/SceneAnimations/SceneAnimationPlayer.cpp +++ b/Source/Engine/Animations/SceneAnimations/SceneAnimationPlayer.cpp @@ -213,6 +213,11 @@ void SceneAnimationPlayer::Tick(float dt) _lastTime = _time = time; } +void SceneAnimationPlayer::MapObject(const Guid& from, const Guid& to) +{ + _objectsMapping[from] = to; +} + void SceneAnimationPlayer::Restore(SceneAnimation* anim, int32 stateIndexOffset) { // Restore all tracks @@ -268,7 +273,9 @@ void SceneAnimationPlayer::Restore(SceneAnimation* anim, int32 stateIndexOffset) case SceneAnimation::Track::Types::ObjectReferenceProperty: { value = &_restoreData[state.RestoreStateIndex]; - auto obj = Scripting::FindObject(*(Guid*)value); + Guid id = *(Guid*)value; + _objectsMapping.TryGet(id, id); + auto obj = Scripting::FindObject(id); value = obj ? obj->GetOrCreateManagedInstance() : nullptr; break; } @@ -358,7 +365,9 @@ bool SceneAnimationPlayer::TickPropertyTrack(int32 trackIndex, int32 stateIndexO void* value = (void*)((byte*)trackDataKeyframes->Keyframes + keyframeSize * (leftKey) + sizeof(float)); if (track.Type == SceneAnimation::Track::Types::ObjectReferenceProperty) { - auto obj = Scripting::FindObject(*(Guid*)value); + Guid id = *(Guid*)value; + _objectsMapping.TryGet(id, id); + auto obj = Scripting::FindObject(id); value = obj ? obj->GetOrCreateManagedInstance() : nullptr; *(void**)target = value; } @@ -680,10 +689,12 @@ void SceneAnimationPlayer::Tick(SceneAnimation* anim, float time, float dt, int3 // Find actor const auto trackData = track.GetData(); + Guid id = trackData->ID; + _objectsMapping.TryGet(id, id); state.Object = Scripting::FindObject(trackData->ID); if (!state.Object) { - LOG(Warning, "Failed to find {3} of ID={0} for track '{1}' in scene animation '{2}'", trackData->ID, track.Name, anim->ToString(), TEXT("actor")); + LOG(Warning, "Failed to find {3} of ID={0} for track '{1}' in scene animation '{2}'", id, track.Name, anim->ToString(), TEXT("actor")); break; } } @@ -708,10 +719,12 @@ void SceneAnimationPlayer::Tick(SceneAnimation* anim, float time, float dt, int3 break; // Find script - state.Object = Scripting::FindObject