From 2dc404cbd36a2f4af8f5bb0a7a2723eefe7b800e Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 22 May 2025 04:40:32 +0200 Subject: [PATCH 001/211] Add new memory profiler --- Source/Editor/Windows/Profiler/Memory.cs | 186 ++++++++- Source/Engine/Core/Types/StringView.h | 8 + Source/Engine/Engine/Engine.cpp | 10 + Source/Engine/Platform/Base/PlatformBase.cpp | 15 + Source/Engine/Platform/Base/PlatformBase.h | 2 +- Source/Engine/Profiler/Profiler.h | 1 + Source/Engine/Profiler/ProfilerMemory.cpp | 413 +++++++++++++++++++ Source/Engine/Profiler/ProfilerMemory.h | 258 ++++++++++++ 8 files changed, 891 insertions(+), 2 deletions(-) create mode 100644 Source/Engine/Profiler/ProfilerMemory.cpp create mode 100644 Source/Engine/Profiler/ProfilerMemory.h diff --git a/Source/Editor/Windows/Profiler/Memory.cs b/Source/Editor/Windows/Profiler/Memory.cs index d7bdc43af..f33bec4cc 100644 --- a/Source/Editor/Windows/Profiler/Memory.cs +++ b/Source/Editor/Windows/Profiler/Memory.cs @@ -2,6 +2,8 @@ #if USE_PROFILER using System; +using System.Collections.Generic; +using FlaxEditor.GUI; using FlaxEngine; using FlaxEngine.GUI; @@ -13,9 +15,21 @@ namespace FlaxEditor.Windows.Profiler /// internal sealed class Memory : ProfilerMode { + private struct FrameData + { + public ProfilerMemory.GroupsArray Usage; + public ProfilerMemory.GroupsArray Peek; + public ProfilerMemory.GroupsArray Count; + } + private readonly SingleChart _nativeAllocationsChart; private readonly SingleChart _managedAllocationsChart; - + private readonly Table _table; + private SamplesBuffer _frames; + private List _tableRowsCache; + private string[] _groupNames; + private int[] _groupOrder; + public Memory() : base("Memory") { @@ -50,6 +64,58 @@ namespace FlaxEditor.Windows.Profiler Parent = layout, }; _managedAllocationsChart.SelectedSampleChanged += OnSelectedSampleChanged; + + // Table + var style = Style.Current; + var headerColor = style.LightBackground; + var textColor = style.Foreground; + _table = new Table + { + Columns = new[] + { + new ColumnDefinition + { + UseExpandCollapseMode = true, + CellAlignment = TextAlignment.Near, + Title = "Group", + TitleBackgroundColor = headerColor, + TitleColor = textColor, + }, + new ColumnDefinition + { + Title = "Usage", + TitleBackgroundColor = headerColor, + FormatValue = FormatCellBytes, + TitleColor = textColor, + }, + new ColumnDefinition + { + Title = "Peek", + TitleBackgroundColor = headerColor, + FormatValue = FormatCellBytes, + TitleColor = textColor, + }, + new ColumnDefinition + { + Title = "Count", + TitleBackgroundColor = headerColor, + TitleColor = textColor, + }, + }, + Parent = layout, + }; + _table.Splits = new[] + { + 0.5f, + 0.2f, + 0.2f, + 0.1f, + }; + } + + private string FormatCellBytes(object x) + { + return Utilities.Utils.FormatBytesCount(Convert.ToUInt64(x)); } /// @@ -57,6 +123,7 @@ namespace FlaxEditor.Windows.Profiler { _nativeAllocationsChart.Clear(); _managedAllocationsChart.Clear(); + _frames?.Clear(); } /// @@ -84,6 +151,19 @@ namespace FlaxEditor.Windows.Profiler _nativeAllocationsChart.AddSample(nativeMemoryAllocation); _managedAllocationsChart.AddSample(managedMemoryAllocation); + + // Gather memory profiler stats for groups + var frame = new FrameData + { + Usage = ProfilerMemory.GetGroups(0), + Peek = ProfilerMemory.GetGroups(1), + Count = ProfilerMemory.GetGroups(2), + }; + if (_frames == null) + _frames = new SamplesBuffer(); + if (_groupNames == null) + _groupNames = ProfilerMemory.GetGroupNames(); + _frames.Add(frame); } /// @@ -91,6 +171,110 @@ namespace FlaxEditor.Windows.Profiler { _nativeAllocationsChart.SelectedSampleIndex = selectedFrame; _managedAllocationsChart.SelectedSampleIndex = selectedFrame; + + UpdateTable(selectedFrame); + } + + /// + public override void OnDestroy() + { + _tableRowsCache?.Clear(); + _groupNames = null; + _groupOrder = null; + + base.OnDestroy(); + } + + private void UpdateTable(int selectedFrame) + { + if (_frames == null) + return; + if (_tableRowsCache == null) + _tableRowsCache = new List(); + _table.IsLayoutLocked = true; + + RecycleTableRows(_table, _tableRowsCache); + UpdateTableInner(selectedFrame); + + _table.UnlockChildrenRecursive(); + _table.PerformLayout(); + } + + private unsafe void UpdateTableInner(int selectedFrame) + { + if (_frames.Count == 0) + return; + var frame = _frames.Get(selectedFrame); + var totalUage = frame.Usage.Values0[(int)ProfilerMemory.Groups.TotalTracked]; + var totalPeek = frame.Peek.Values0[(int)ProfilerMemory.Groups.TotalTracked]; + var totalCount = frame.Count.Values0[(int)ProfilerMemory.Groups.TotalTracked]; + + // Sort by memory size + if (_groupOrder == null) + _groupOrder = new int[(int)ProfilerMemory.Groups.MAX]; + for (int i = 0; i < (int)ProfilerMemory.Groups.MAX; i++) + _groupOrder[i] = i; + Array.Sort(_groupOrder, (x, y) => + { + var tmp = _frames.Get(selectedFrame); + return (int)(tmp.Usage.Values0[y] - tmp.Usage.Values0[x]); + }); + + // Add rows + var rowColor2 = Style.Current.Background * 1.4f; + for (int i = 0; i < (int)ProfilerMemory.Groups.MAX; i++) + { + var group = _groupOrder[i]; + var groupUsage = frame.Usage.Values0[group]; + if (groupUsage <= 0) + continue; + var groupPeek = frame.Peek.Values0[group]; + var groupCount = frame.Count.Values0[group]; + + Row row; + if (_tableRowsCache.Count != 0) + { + var last = _tableRowsCache.Count - 1; + row = _tableRowsCache[last]; + _tableRowsCache.RemoveAt(last); + } + else + { + row = new Row + { + Values = new object[4], + BackgroundColors = new Color[4], + }; + } + { + // Group + row.Values[0] = _groupNames[group]; + + // Usage + row.Values[1] = groupUsage; + row.BackgroundColors[1] = Color.Red.AlphaMultiplied(Mathf.Min(1, (float)groupUsage / totalUage) * 0.5f); + + // Peek + row.Values[2] = groupPeek; + row.BackgroundColors[2] = Color.Red.AlphaMultiplied(Mathf.Min(1, (float)groupPeek / totalPeek) * 0.5f); + + // Count + row.Values[3] = groupCount; + row.BackgroundColors[3] = Color.Red.AlphaMultiplied(Mathf.Min(1, (float)groupCount / totalCount) * 0.5f); + } + row.Width = _table.Width; + row.BackgroundColor = i % 2 == 1 ? rowColor2 : Color.Transparent; + row.Parent = _table; + + var useBackground = group != (int)ProfilerMemory.Groups.Total && + group != (int)ProfilerMemory.Groups.TotalTracked && + group != (int)ProfilerMemory.Groups.Malloc; + if (!useBackground) + { + for (int k = 1; k < row.BackgroundColors.Length; k++) + row.BackgroundColors[k] = Color.Transparent; + } + } } } } diff --git a/Source/Engine/Core/Types/StringView.h b/Source/Engine/Core/Types/StringView.h index 25d156c64..cbc221e7b 100644 --- a/Source/Engine/Core/Types/StringView.h +++ b/Source/Engine/Core/Types/StringView.h @@ -208,6 +208,14 @@ public: return StringUtils::CompareIgnoreCase(&(*this)[Length() - suffix.Length()], *suffix) == 0; return StringUtils::Compare(&(*this)[Length() - suffix.Length()], *suffix) == 0; } + + bool Contains(const T* subStr, StringSearchCase searchCase = StringSearchCase::CaseSensitive) const + { + const int32 length = Length(); + if (subStr == nullptr || length == 0) + return false; + return (searchCase == StringSearchCase::IgnoreCase ? StringUtils::FindIgnoreCase(_data, subStr) : StringUtils::Find(_data, subStr)) != nullptr; + } }; /// diff --git a/Source/Engine/Engine/Engine.cpp b/Source/Engine/Engine/Engine.cpp index 385a05554..288e0da11 100644 --- a/Source/Engine/Engine/Engine.cpp +++ b/Source/Engine/Engine/Engine.cpp @@ -79,6 +79,11 @@ Window* Engine::MainWindow = nullptr; int32 Engine::Main(const Char* cmdLine) { +#if COMPILE_WITH_PROFILER + extern void InitProfilerMemory(const Char*); + InitProfilerMemory(cmdLine); +#endif + PROFILE_MEM_BEGIN(Engine); EngineImpl::CommandLine = cmdLine; Globals::MainThreadID = Platform::GetCurrentThreadID(); StartupTime = DateTime::Now(); @@ -164,6 +169,7 @@ int32 Engine::Main(const Char* cmdLine) LOG_FLUSH(); Time::Synchronize(); EngineImpl::IsReady = true; + PROFILE_MEM_END(); // Main engine loop const bool useSleep = true; // TODO: this should probably be a platform setting @@ -204,6 +210,10 @@ int32 Engine::Main(const Char* cmdLine) { PROFILE_CPU_NAMED("Platform.Tick"); Platform::Tick(); +#if COMPILE_WITH_PROFILER + extern void TickProfilerMemory(); + TickProfilerMemory(); +#endif } // Update game logic diff --git a/Source/Engine/Platform/Base/PlatformBase.cpp b/Source/Engine/Platform/Base/PlatformBase.cpp index 65f995967..9ba6b7bd6 100644 --- a/Source/Engine/Platform/Base/PlatformBase.cpp +++ b/Source/Engine/Platform/Base/PlatformBase.cpp @@ -17,6 +17,7 @@ #include "Engine/Core/Utilities.h" #if COMPILE_WITH_PROFILER #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #endif #include "Engine/Threading/Threading.h" #include "Engine/Engine/CommandLine.h" @@ -218,6 +219,10 @@ void PlatformBase::OnMemoryAlloc(void* ptr, uint64 size) tracy::Profiler::MemAllocCallstack(ptr, (size_t)size, 12, false); #endif + // Register in memory profiler + if (ProfilerMemory::Enabled) + ProfilerMemory::OnMemoryAlloc(ptr, size); + // Register allocation during the current CPU event auto thread = ProfilerCPU::GetCurrentThread(); if (thread != nullptr && thread->Buffer.GetCount() != 0) @@ -235,6 +240,10 @@ void PlatformBase::OnMemoryFree(void* ptr) if (!ptr) return; + // Register in memory profiler + if (ProfilerMemory::Enabled) + ProfilerMemory::OnMemoryFree(ptr); + #if TRACY_ENABLE_MEMORY // Track memory allocation in Tracy tracy::Profiler::MemFree(ptr, false); @@ -372,6 +381,12 @@ void PlatformBase::Fatal(const StringView& msg, void* context, FatalErrorType er LOG(Error, "External Used Physical Memory: {0} ({1}%)", Utilities::BytesToText(externalUsedPhysical), (int32)(100 * externalUsedPhysical / memoryStats.TotalPhysicalMemory)); LOG(Error, "External Used Virtual Memory: {0} ({1}%)", Utilities::BytesToText(externalUsedVirtual), (int32)(100 * externalUsedVirtual / memoryStats.TotalVirtualMemory)); } +#if COMPILE_WITH_PROFILER + if (error == FatalErrorType::OutOfMemory || error == FatalErrorType::GPUOutOfMemory) + { + ProfilerMemory::Dump(); + } +#endif } if (Log::Logger::LogFilePath.HasChars()) { diff --git a/Source/Engine/Platform/Base/PlatformBase.h b/Source/Engine/Platform/Base/PlatformBase.h index fb2c68099..ff47e77a3 100644 --- a/Source/Engine/Platform/Base/PlatformBase.h +++ b/Source/Engine/Platform/Base/PlatformBase.h @@ -286,7 +286,7 @@ public: /// /// A pointer to the first operand. This value will be replaced with the result of the operation. /// The second operand. - /// The result value of the operation. + /// The original value of the dst parameter. static int64 InterlockedAdd(int64 volatile* dst, int64 value) = delete; /// diff --git a/Source/Engine/Profiler/Profiler.h b/Source/Engine/Profiler/Profiler.h index f0f7aad05..b80541719 100644 --- a/Source/Engine/Profiler/Profiler.h +++ b/Source/Engine/Profiler/Profiler.h @@ -4,6 +4,7 @@ #include "ProfilerCPU.h" #include "ProfilerGPU.h" +#include "ProfilerMemory.h" #if COMPILE_WITH_PROFILER diff --git a/Source/Engine/Profiler/ProfilerMemory.cpp b/Source/Engine/Profiler/ProfilerMemory.cpp new file mode 100644 index 000000000..e617d712c --- /dev/null +++ b/Source/Engine/Profiler/ProfilerMemory.cpp @@ -0,0 +1,413 @@ +// Copyright (c) Wojciech Figat. All rights reserved. + +#if COMPILE_WITH_PROFILER + +#include "ProfilerMemory.h" +#include "Engine/Core/Log.h" +#include "Engine/Core/Utilities.h" +#include "Engine/Core/Math/Math.h" +#include "Engine/Core/Types/StringBuilder.h" +#include "Engine/Core/Collections/Sorting.h" +#include "Engine/Core/Collections/Dictionary.h" +#include "Engine/Platform/MemoryStats.h" +#include "Engine/Platform/File.h" +#include "Engine/Scripting/Enums.h" +#include "Engine/Threading/ThreadLocal.h" +#include "Engine/Utilities/StringConverter.h" + +#define GROUPS_COUNT (int32)ProfilerMemory::Groups::MAX + +static_assert(GROUPS_COUNT <= MAX_uint8, "Fix memory profiler groups to fit a single byte."); + +// Compact name storage. +struct GroupNameBuffer +{ + Char Buffer[30]; + + template + void Set(const T* str) + { + int32 max = StringUtils::Length(str), dst = 0; + char prev = 0; + for (int32 i = 0; i < max && dst < ARRAY_COUNT(Buffer) - 2; i++) + { + char cur = str[i]; + if (StringUtils::IsUpper(cur) && StringUtils::IsLower(prev)) + Buffer[dst++] = '/'; + Buffer[dst++] = cur; + prev = cur; + } + Buffer[dst] = 0; + } +}; + +// Compact groups stack container. +struct GroupStackData +{ + uint8 Count : 7; + uint8 SkipRecursion : 1; + uint8 Stack[15]; + + FORCE_INLINE void Push(ProfilerMemory::Groups group) + { + if (Count < ARRAY_COUNT(Stack)) + Count++; + else + { + int a= 10; + } + Stack[Count - 1] = (uint8)group; + } + + FORCE_INLINE void Pop() + { + if (Count > 0) + Count--; + } + + FORCE_INLINE ProfilerMemory::Groups Peek() const + { + return Count > 0 ? (ProfilerMemory::Groups)Stack[Count - 1] : ProfilerMemory::Groups::Unknown; + } +}; + +template<> +struct TIsPODType +{ + enum { Value = true }; +}; + +// Memory allocation data for a specific pointer. +struct PointerData +{ + uint32 Size; + uint8 Group; +}; + +template<> +struct TIsPODType +{ + enum { Value = true }; +}; + +#define UPDATE_PEEK(group) Platform::AtomicStore(&GroupMemoryPeek[(int32)group], Math::Max(Platform::AtomicRead(&GroupMemory[(int32)group]), Platform::AtomicRead(&GroupMemoryPeek[(int32)group]))) + +namespace +{ + alignas(16) volatile int64 GroupMemory[GROUPS_COUNT] = {}; + alignas(16) volatile int64 GroupMemoryPeek[GROUPS_COUNT] = {}; + alignas(16) volatile int64 GroupMemoryCount[GROUPS_COUNT] = {}; + uint8 GroupParents[GROUPS_COUNT] = {}; + ThreadLocal GroupStack; + GroupNameBuffer GroupNames[GROUPS_COUNT]; + bool InitedNames = false; + CriticalSection PointersLocker; + Dictionary Pointers; + + void InitNames() + { + if (InitedNames) + return; + InitedNames = true; + for (int32 i = 0; i < GROUPS_COUNT; i++) + { + const char* name = ScriptingEnum::GetName((ProfilerMemory::Groups)i); + GroupNames[i].Set(name); + } + + // Init constant memory + PROFILE_MEM_INC(ProgramSize, Platform::GetMemoryStats().ProgramSizeMemory); + UPDATE_PEEK(ProfilerMemory::Groups::ProgramSize); + } + + void Dump(StringBuilder& output, const int32 maxCount) + { + InitNames(); + + // Sort groups + struct GroupInfo + { + ProfilerMemory::Groups Group; + int64 Size; + int64 Peek; + uint32 Count; + + bool operator<(const GroupInfo& other) const + { + return Size > other.Size; + } + }; + GroupInfo groups[GROUPS_COUNT]; + for (int32 i = 0; i < GROUPS_COUNT; i++) + { + GroupInfo& group = groups[i]; + group.Group = (ProfilerMemory::Groups)i; + group.Size = Platform::AtomicRead(&GroupMemory[i]); + group.Peek = Platform::AtomicRead(&GroupMemoryPeek[i]); + group.Count = (uint32)Platform::AtomicRead(&GroupMemoryCount[i]); + } + Sorting::QuickSort(groups, GROUPS_COUNT); + + // Print groups + output.Append(TEXT("Memory profiler summary:")).AppendLine(); + for (int32 i = 0; i < maxCount; i++) + { + const GroupInfo& group = groups[i]; + if (group.Size == 0) + break; + const Char* name = GroupNames[(int32)group.Group].Buffer; + const String size = Utilities::BytesToText(group.Size); + const String peek = Utilities::BytesToText(group.Peek); + output.AppendFormat(TEXT("{:>30}: {:>11} (peek: {}, count: {})"), name, size.Get(), peek.Get(), group.Count); + output.AppendLine(); + } + +#if 0 + // Print count of memory allocs count per group + for (int32 i = 0; i < GROUPS_COUNT; i++) + { + GroupInfo& group = groups[i]; + group.Group = (ProfilerMemory::Groups)i; + group.Size = 0; + } + PointersLocker.Lock(); + for (auto& e : Pointers) + groups[e.Value.Group].Size++; + PointersLocker.Unlock(); + Sorting::QuickSort(groups, GROUPS_COUNT); + output.Append(TEXT("Memory allocations count summary:")).AppendLine(); + for (int32 i = 0; i < maxCount; i++) + { + const GroupInfo& group = groups[i]; + if (group.Size == 0) + break; + const Char* name = GroupName[(int32)group.Group].Buffer; + output.AppendFormat(TEXT("{:>30}: {:>11}"), name, group.Size); + output.AppendLine(); + } +#endif + } + + FORCE_INLINE void AddGroupMemory(ProfilerMemory::Groups group, int64 add) + { + // Group itself + Platform::InterlockedAdd(&GroupMemory[(int32)group], add); + Platform::InterlockedIncrement(&GroupMemoryCount[(int32)group]); + UPDATE_PEEK(group); + + // Total memory + Platform::InterlockedAdd(&GroupMemory[(int32)ProfilerMemory::Groups::TotalTracked], add); + Platform::InterlockedIncrement(&GroupMemoryCount[(int32)ProfilerMemory::Groups::TotalTracked]); + UPDATE_PEEK(ProfilerMemory::Groups::TotalTracked); + + // Group hierarchy parents + uint8 parent = GroupParents[(int32)group]; + while (parent != 0) + { + Platform::InterlockedAdd(&GroupMemory[parent], add); + Platform::InterlockedIncrement(&GroupMemoryCount[parent]); + UPDATE_PEEK(parent); + parent = GroupParents[parent]; + } + } + + FORCE_INLINE void SubGroupMemory(ProfilerMemory::Groups group, int64 add) + { + // Group itself + int64 value = Platform::InterlockedAdd(&GroupMemory[(int32)group], add); + Platform::InterlockedDecrement(&GroupMemoryCount[(int32)group]); + + // Total memory + value = Platform::InterlockedAdd(&GroupMemory[(int32)ProfilerMemory::Groups::TotalTracked], add); + Platform::InterlockedDecrement(&GroupMemoryCount[(int32)ProfilerMemory::Groups::TotalTracked]); + + // Group hierarchy parents + uint8 parent = GroupParents[(int32)group]; + while (parent != 0) + { + value = Platform::InterlockedAdd(&GroupMemory[parent], add); + Platform::InterlockedDecrement(&GroupMemoryCount[parent]); + parent = GroupParents[parent]; + } + } +} + +void InitProfilerMemory(const Char* cmdLine) +{ + // Check for command line option (memory profiling affects performance thus not active by default) + ProfilerMemory::Enabled = StringUtils::FindIgnoreCase(cmdLine, TEXT("-mem")); + + // Init hierarchy +#define INIT_PARENT(parent, child) GroupParents[(int32)ProfilerMemory::Groups::child] = (uint8)ProfilerMemory::Groups::parent + INIT_PARENT(Graphics, GraphicsTextures); + INIT_PARENT(Graphics, GraphicsBuffers); + INIT_PARENT(Graphics, GraphicsMeshes); + INIT_PARENT(Graphics, GraphicsShaders); + INIT_PARENT(Graphics, GraphicsMaterials); + INIT_PARENT(Graphics, GraphicsCommands); + INIT_PARENT(Animations, AnimationsData); + INIT_PARENT(Content, ContentAssets); + INIT_PARENT(Content, ContentFiles); +#undef INIT_PARENT +} + +void TickProfilerMemory() +{ + // Update profiler memory + PointersLocker.Lock(); + GroupMemory[(int32)ProfilerMemory::Groups::Profiler] = + sizeof(GroupMemory) + sizeof(GroupNames) + sizeof(GroupStack) + + Pointers.Capacity() * sizeof(Dictionary::Bucket); + PointersLocker.Unlock(); + + // Get total system memory and update untracked amount + auto memory = Platform::GetProcessMemoryStats(); + memory.UsedPhysicalMemory -= GroupMemory[(int32)ProfilerMemory::Groups::Profiler]; + GroupMemory[(int32)ProfilerMemory::Groups::Total] = memory.UsedPhysicalMemory; + GroupMemory[(int32)ProfilerMemory::Groups::TotalUntracked] = Math::Max(memory.UsedPhysicalMemory - GroupMemory[(int32)ProfilerMemory::Groups::TotalTracked], 0); + + // Update peeks + UPDATE_PEEK(ProfilerMemory::Groups::Profiler); + UPDATE_PEEK(ProfilerMemory::Groups::Total); + UPDATE_PEEK(ProfilerMemory::Groups::TotalUntracked); + GroupMemoryPeek[(int32)ProfilerMemory::Groups::Total] = Math::Max(GroupMemoryPeek[(int32)ProfilerMemory::Groups::Total], GroupMemoryPeek[(int32)ProfilerMemory::Groups::TotalTracked]); +} + +bool ProfilerMemory::Enabled = false; + +void ProfilerMemory::IncrementGroup(Groups group, uint64 size) +{ + AddGroupMemory(group, (int64)size); +} + +void ProfilerMemory::DecrementGroup(Groups group, uint64 size) +{ + SubGroupMemory(group, -(int64)size); +} + +void ProfilerMemory::BeginGroup(Groups group) +{ + auto& stack = GroupStack.Get(); + stack.Push(group); +} + +void ProfilerMemory::EndGroup() +{ + auto& stack = GroupStack.Get(); + stack.Pop(); +} + +void ProfilerMemory::RenameGroup(Groups group, const StringView& name) +{ + GroupNames[(int32)group].Set(name.Get()); +} + +Array ProfilerMemory::GetGroupNames() +{ + Array result; + result.Resize((int32)Groups::MAX); + InitNames(); + for (int32 i = 0; i < (int32)Groups::MAX; i++) + result[i] = GroupNames[i].Buffer; + return result; +} + +ProfilerMemory::GroupsArray ProfilerMemory::GetGroups(int32 mode) +{ + GroupsArray result; + Platform::MemoryClear(&result, sizeof(result)); + static_assert(ARRAY_COUNT(result.Values) >= (int32)Groups::MAX, "Update group array size."); + InitNames(); + if (mode == 0) + { + for (int32 i = 0; i < (int32)Groups::MAX; i++) + result.Values[i] = Platform::AtomicRead(&GroupMemory[i]); + } + else if (mode == 1) + { + for (int32 i = 0; i < (int32)Groups::MAX; i++) + result.Values[i] = Platform::AtomicRead(&GroupMemoryPeek[i]); + } + else if (mode == 2) + { + for (int32 i = 0; i < (int32)Groups::MAX; i++) + result.Values[i] = Platform::AtomicRead(&GroupMemoryCount[i]); + } + return result; +} + +void ProfilerMemory::Dump(const StringView& options) +{ + bool file = options.Contains(TEXT("file")); + StringBuilder output; + int32 maxCount = 20; + if (file || options.Contains(TEXT("all"))) + maxCount = MAX_int32; + ::Dump(output, maxCount); + if (file) + { + String path = String(StringUtils::GetDirectoryName(Log::Logger::LogFilePath)) / TEXT("Memory_") + DateTime::Now().ToFileNameString() + TEXT(".txt"); + File::WriteAllText(path, output, Encoding::ANSI); + LOG(Info, "Saved to {}", path); + return; + } + LOG_STR(Info, output.ToStringView()); +} + +void ProfilerMemory::OnMemoryAlloc(void* ptr, uint64 size) +{ + ASSERT_LOW_LAYER(Enabled && ptr); + auto& stack = GroupStack.Get(); + if (stack.SkipRecursion) + return; + stack.SkipRecursion = true; + + // Register pointer + PointerData ptrData; + ptrData.Size = size; + ptrData.Group = (uint8)stack.Peek(); + PointersLocker.Lock(); + Pointers[ptr] = ptrData; + PointersLocker.Unlock(); + + // Update group memory + const int64 add = (int64)size; + AddGroupMemory((Groups)ptrData.Group, add); + Platform::InterlockedAdd(&GroupMemory[(int32)ProfilerMemory::Groups::Malloc], add); + Platform::InterlockedIncrement(&GroupMemoryCount[(int32)ProfilerMemory::Groups::Malloc]); + UPDATE_PEEK(ProfilerMemory::Groups::Malloc); + + stack.SkipRecursion = false; +} + +void ProfilerMemory::OnMemoryFree(void* ptr) +{ + ASSERT_LOW_LAYER(Enabled && ptr); + auto& stack = GroupStack.Get(); + if (stack.SkipRecursion) + return; + stack.SkipRecursion = true; + + // Find and remove pointer + PointerData ptrData; + PointersLocker.Lock(); + auto it = Pointers.Find(ptr); + bool found = it.IsNotEnd(); + if (found) + ptrData = it->Value; + Pointers.Remove(it); + PointersLocker.Unlock(); + + if (found) + { + // Update group memory + const int64 add = -(int64)ptrData.Size; + SubGroupMemory((Groups)ptrData.Group, add); + Platform::InterlockedAdd(&GroupMemory[(int32)ProfilerMemory::Groups::Malloc], add); + Platform::InterlockedDecrement(&GroupMemoryCount[(int32)ProfilerMemory::Groups::Malloc]); + } + + stack.SkipRecursion = false; +} + +#endif diff --git a/Source/Engine/Profiler/ProfilerMemory.h b/Source/Engine/Profiler/ProfilerMemory.h new file mode 100644 index 000000000..65ed5d9ab --- /dev/null +++ b/Source/Engine/Profiler/ProfilerMemory.h @@ -0,0 +1,258 @@ +// Copyright (c) Wojciech Figat. All rights reserved. + +#pragma once + +#include "Engine/Platform/Platform.h" + +#if COMPILE_WITH_PROFILER + +#include "Engine/Core/Types/StringView.h" + +/// +/// Provides memory allocations collecting utilities. +/// +API_CLASS(Static) class FLAXENGINE_API ProfilerMemory +{ + DECLARE_SCRIPTING_TYPE_MINIMAL(ProfilerMemory); +public: + /// + /// List of different memory categories used to track and analyze memory allocations specific to a certain engine system. + /// + API_ENUM() enum class Groups : uint8 + { + // Not categorized. + Unknown, + // Total used system memory (reported by platform). + Total, + // Total amount of tracked memory (by ProfilerMemory tool). + TotalTracked, + // Total amount of untracked memory (gap between total system memory usage and tracked memory size). + TotalUntracked, + // Initial memory used by program upon startup (eg. executable size, static variables). + ProgramSize, + // Total memory allocated via malloc. + Malloc, + // General purpose engine memory. + Engine, + // Profiling tool memory overhead. + Profiler, + + // Total graphics memory usage. + Graphics, + // Total textures memory usage. + GraphicsTextures, + // Total buffers memory usage. + GraphicsBuffers, + // Total meshes memory usage (vertex and idnex buffers allocated by models). + GraphicsMeshes, + // Totoal shaders memory usage (shaders bytecode, PSOs data). + GraphicsShaders, + // Totoal materials memory usage (constant buffers, parameters data). + GraphicsMaterials, + // Totoal command buffers memory usage (draw lists, constants uploads, ring buffer allocators). + GraphicsCommands, + + // Total Artificial Intelligence systems memory usage (eg. Behavior Trees). + AI, + + // Total animations system memory usage. + Animations, + // Total animation data memory usage (curves, events, keyframes, graphs, etc.). + AnimationsData, + + // Total autio system memory. + Audio, + + // Total content system memory usage. + Content, + // Total general purpose memory allocated by assets. + ContentAssets, + // Total memory used by content files buffers (file reading and streaming buffers). + ContentFiles, + // Total memory used by content streaming system (internals). + ContentStreaming, + + // Total memory allocated by input system. + Input, + + // Total memory allocated by scene objects. + Level, + + // Total localization system memory. + Localization, + + // Total navigation system memory. + Navigation, + + // Total networking system memory. + Networking, + + // Total particles memory (loaded assets, particles buffers and instance parameters). + Particles, + + // Total physics memory. + Physics, + + // Total scripting memory allocated by game. + Scripting, + + // Total User Interface components memory. + UI, + + // Total video system memory (video file data, frame buffers, GPU images and any audio buffers used by video playback). + Video, + + // Custom game-specific memory tracking. + CustomGame0, + // Custom game-specific memory tracking. + CustomGame1, + // Custom game-specific memory tracking. + CustomGame2, + // Custom game-specific memory tracking. + CustomGame3, + // Custom game-specific memory tracking. + CustomGame4, + // Custom game-specific memory tracking. + CustomGame5, + // Custom game-specific memory tracking. + CustomGame6, + // Custom game-specific memory tracking. + CustomGame7, + // Custom game-specific memory tracking. + CustomGame8, + // Custom game-specific memory tracking. + CustomGame9, + + // Custom plugin-specific memory tracking. + CustomPlugin0, + // Custom plugin-specific memory tracking. + CustomPlugin1, + // Custom plugin-specific memory tracking. + CustomPlugin2, + // Custom plugin-specific memory tracking. + CustomPlugin3, + // Custom plugin-specific memory tracking. + CustomPlugin4, + // Custom plugin-specific memory tracking. + CustomPlugin5, + // Custom plugin-specific memory tracking. + CustomPlugin6, + // Custom plugin-specific memory tracking. + CustomPlugin7, + // Custom plugin-specific memory tracking. + CustomPlugin8, + // Custom plugin-specific memory tracking. + CustomPlugin9, + + // Total editor-specific memory. + Editor, + + MAX + }; + + /// + /// The memory groups array wraper to avoid dynamic memory allocation. + /// + API_STRUCT(NoDefault) struct GroupsArray + { + DECLARE_SCRIPTING_TYPE_MINIMAL(GroupsArray); + + // Values for each group + API_FIELD(NoArray) int64 Values[100]; + }; + +public: + /// + /// Increments memory usage by a specific group. + /// + /// The group to update. + /// The amount of memory allocated (in bytes). + API_FUNCTION() static void IncrementGroup(Groups group, uint64 size); + + /// + /// Decrements memory usage by a specific group. + /// + /// The group to update. + /// The amount of memory freed (in bytes). + API_FUNCTION() static void DecrementGroup(Groups group, uint64 size); + + /// + /// Enters a new group context scope (by the current thread). Informs the profiler about context of any memory allocations within. + /// + /// The group to enter. + API_FUNCTION() static void BeginGroup(Groups group); + + /// + /// Leaves the last group context scope (by the current thread). + /// + API_FUNCTION() static void EndGroup(); + + /// + /// Renames the group. Can be used for custom game/plugin groups naming. + /// + /// The group to update. + /// The new name to set. + API_FUNCTION() static void RenameGroup(Groups group, const StringView& name); + + /// + /// Gets the names of all groups (array matches Groups enums). + /// + API_FUNCTION() static Array GetGroupNames(); + + /// + /// Gets the memory stats for all groups (array matches Groups enums). + /// + /// 0 to get current memory, 1 to get peek memory, 2 to get current count. + API_FUNCTION() static GroupsArray GetGroups(int32 mode = 0); + + /// + /// Dumps the memory allocations stats (groupped). + /// + /// 'all' to dump all groups, 'file' to dump info to a file (in Logs folder) + API_FUNCTION(Attributes="DebugCommand") static void Dump(const StringView& options = StringView::Empty); + +public: + /// + /// The profiling tools usage flag. Can be used to disable profiler. Run engine with '-mem' command line to activate it from start. + /// + static bool Enabled; + + static void OnMemoryAlloc(void* ptr, uint64 size); + static void OnMemoryFree(void* ptr); + +public: + /// + /// Helper structure used to call begin/end on group within single code block. + /// + struct GroupScope + { + FORCE_INLINE GroupScope(Groups group) + { + if (ProfilerMemory::Enabled) + ProfilerMemory::BeginGroup(group); + } + + FORCE_INLINE ~GroupScope() + { + if (ProfilerMemory::Enabled) + ProfilerMemory::EndGroup(); + } + }; +}; + +#define PROFILE_MEM_INC(group, size) ProfilerMemory::IncrementGroup(ProfilerMemory::Groups::group, size) +#define PROFILE_MEM_DEC(group, size) ProfilerMemory::DecrementGroup(ProfilerMemory::Groups::group, size) +#define PROFILE_MEM(group) ProfilerMemory::GroupScope ProfileMem(ProfilerMemory::Groups::group) +#define PROFILE_MEM_BEGIN(group) if (ProfilerMemory::Enabled) ProfilerMemory::BeginGroup(ProfilerMemory::Groups::group) +#define PROFILE_MEM_END() if (ProfilerMemory::Enabled) ProfilerMemory::EndGroup() + +#else + +// Empty macros for disabled profiler +#define PROFILE_MEM_INC(group, size) +#define PROFILE_MEM_DEC(group, size) +#define PROFILE_MEM(group) +#define PROFILE_MEM_BEGIN(group) +#define PROFILE_MEM_END() + +#endif From 9215f2662f929d99e15fffa0d9514ce2ff823190 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 22 May 2025 04:41:01 +0200 Subject: [PATCH 002/211] Add missing memory alloc profiling for virtual pages --- Source/Engine/Platform/Win32/Win32Platform.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Platform/Win32/Win32Platform.cpp b/Source/Engine/Platform/Win32/Win32Platform.cpp index a61d7c36d..6b5eaa2ff 100644 --- a/Source/Engine/Platform/Win32/Win32Platform.cpp +++ b/Source/Engine/Platform/Win32/Win32Platform.cpp @@ -283,14 +283,23 @@ void* Win32Platform::AllocatePages(uint64 numPages, uint64 pageSize) { const uint64 numBytes = numPages * pageSize; #if PLATFORM_UWP - return VirtualAllocFromApp(nullptr, (SIZE_T)numBytes, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); + void* ptr = VirtualAllocFromApp(nullptr, (SIZE_T)numBytes, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); #else - return VirtualAlloc(nullptr, (SIZE_T)numBytes, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); + void* ptr = VirtualAlloc(nullptr, (SIZE_T)numBytes, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); #endif + if (!ptr) + OutOfMemory(); +#if COMPILE_WITH_PROFILER + OnMemoryAlloc(ptr, size); +#endif + return ptr; } void Win32Platform::FreePages(void* ptr) { +#if COMPILE_WITH_PROFILER + OnMemoryFree(ptr); +#endif VirtualFree(ptr, 0, MEM_RELEASE); } From 66dcfafa2ec69142667b4352a47b5ea09532cbff Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 22 May 2025 04:42:01 +0200 Subject: [PATCH 003/211] Fix Vulkan descriptor sets pooling --- Source/Engine/GraphicsDevice/Vulkan/DescriptorSetVulkan.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Engine/GraphicsDevice/Vulkan/DescriptorSetVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/DescriptorSetVulkan.cpp index 82bbd93d6..10a247a56 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/DescriptorSetVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/DescriptorSetVulkan.cpp @@ -292,7 +292,7 @@ DescriptorPoolSetContainerVulkan* DescriptorPoolsManagerVulkan::AcquirePoolSetCo ScopeLock lock(_locker); for (auto* poolSet : _poolSets) { - if (poolSet->Refs == 0 && Engine::FrameCount - poolSet->LastFrameUsed > VULKAN_RESOURCE_DELETE_SAFE_FRAMES_COUNT) + if (poolSet->Refs == 0 && Engine::FrameCount != poolSet->LastFrameUsed) { poolSet->LastFrameUsed = Engine::FrameCount; poolSet->Reset(); From 32bc73610fe4d40071c681d1d36128b798d9401e Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 22 May 2025 04:45:12 +0200 Subject: [PATCH 004/211] Fix debug command type detection when it's used with argument --- Source/Engine/Debug/DebugCommands.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Source/Engine/Debug/DebugCommands.cpp b/Source/Engine/Debug/DebugCommands.cpp index 5d94cf557..fcea938aa 100644 --- a/Source/Engine/Debug/DebugCommands.cpp +++ b/Source/Engine/Debug/DebugCommands.cpp @@ -438,6 +438,8 @@ void DebugCommands::InitAsync() DebugCommands::CommandFlags DebugCommands::GetCommandFlags(StringView command) { CommandFlags result = CommandFlags::None; + if (command.FindLast(' ') != -1) + command = command.Left(command.Find(' ')); // TODO: fix missing string handle on 1st command execution (command gets invalid after InitCommands due to dotnet GC or dotnet interop handles flush) String commandCopy = command; command = commandCopy; From c639a3103cc4de82c2222fd781ddd45d323d3164 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 22 May 2025 04:47:01 +0200 Subject: [PATCH 005/211] Add memory profiling events to the main areas of the engine --- Source/Editor/Cooker/GameCooker.cpp | 8 +++++++ .../CustomEditors/CustomEditorsUtil.cpp | 5 ++++ Source/Editor/Editor.cpp | 10 ++++++++ Source/Editor/ProjectInfo.cpp | 2 ++ Source/Editor/Scripting/CodeEditor.cpp | 3 +++ Source/Editor/Scripting/ScriptsBuilder.cpp | 5 ++++ .../Utilities/ViewportIconsRenderer.cpp | 2 ++ Source/Engine/AI/Behavior.cpp | 3 +++ Source/Engine/AI/BehaviorKnowledge.cpp | 2 ++ Source/Engine/AI/BehaviorTree.cpp | 2 ++ Source/Engine/Animations/Animations.cpp | 4 ++++ .../SceneAnimations/SceneAnimation.cpp | 2 ++ .../SceneAnimations/SceneAnimationPlayer.cpp | 3 +++ Source/Engine/Audio/Audio.cpp | 3 +++ Source/Engine/Audio/AudioClip.cpp | 2 ++ .../Engine/Audio/OpenAL/AudioBackendOAL.cpp | 6 +++++ Source/Engine/Content/Asset.cpp | 1 + Source/Engine/Content/Assets/Animation.cpp | 2 ++ .../Engine/Content/Assets/AnimationGraph.cpp | 3 +++ .../Content/Assets/AnimationGraphFunction.cpp | 2 ++ Source/Engine/Content/Assets/Material.cpp | 4 ++++ Source/Engine/Content/Assets/Model.cpp | 6 +++++ Source/Engine/Content/Assets/ModelBase.cpp | 4 ++++ Source/Engine/Content/Assets/SkinnedModel.cpp | 7 ++++++ Source/Engine/Content/Assets/VisualScript.cpp | 4 ++++ Source/Engine/Content/BinaryAsset.cpp | 2 ++ Source/Engine/Content/Content.cpp | 19 ++++++++++++++- Source/Engine/Content/JsonAsset.cpp | 6 +++++ .../Content/Loading/Tasks/LoadAssetTask.h | 2 ++ Source/Engine/Content/Storage/FlaxChunk.h | 7 +----- Source/Engine/Content/Storage/FlaxStorage.cpp | 23 ++++++++++++------- .../AssetsImportingManager.cpp | 2 ++ Source/Engine/Core/Log.cpp | 4 ++++ Source/Engine/Debug/DebugDraw.cpp | 2 ++ Source/Engine/Graphics/GPUBuffer.cpp | 5 ++++ Source/Engine/Graphics/GPUDevice.cpp | 1 + Source/Engine/Graphics/Graphics.cpp | 2 ++ .../Graphics/Materials/MaterialParams.cpp | 2 ++ .../Graphics/Materials/MaterialShader.cpp | 4 ++++ Source/Engine/Graphics/Models/Mesh.cpp | 3 +++ .../Graphics/Models/ModelInstanceEntry.cpp | 3 +++ Source/Engine/Graphics/RenderTask.cpp | 1 + Source/Engine/Graphics/Shaders/GPUShader.cpp | 7 +++++- .../Engine/Graphics/Textures/GPUTexture.cpp | 5 ++++ .../DirectX/DX11/GPUDeviceDX11.cpp | 6 +++++ .../DirectX/DX11/GPUSwapChainDX11.cpp | 4 ++++ .../DirectX/DX12/GPUDeviceDX12.cpp | 6 +++++ .../DirectX/DX12/GPUSwapChainDX12.cpp | 4 ++++ .../DirectX/DX12/UploadBufferDX12.cpp | 4 ++++ .../GraphicsDevice/Null/GPUDeviceNull.cpp | 5 ++++ .../GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp | 10 ++++++++ .../GraphicsDevice/Vulkan/GPUShaderVulkan.cpp | 3 +++ .../Vulkan/GPUSwapChainVulkan.cpp | 4 ++++ Source/Engine/Input/Input.cpp | 14 +++++++++++ Source/Engine/Level/Actor.cpp | 4 ++++ Source/Engine/Level/Level.cpp | 9 ++++++++ Source/Engine/Level/Scene/SceneRendering.cpp | 4 ++++ Source/Engine/Localization/CultureInfo.cpp | 5 ++++ Source/Engine/Localization/Localization.cpp | 4 ++++ .../Localization/LocalizedStringTable.cpp | 5 ++++ Source/Engine/Navigation/NavCrowd.cpp | 4 ++++ Source/Engine/Navigation/NavMeshData.cpp | 2 ++ Source/Engine/Navigation/NavMeshRuntime.cpp | 4 ++++ Source/Engine/Navigation/Navigation.cpp | 6 +++++ Source/Engine/Networking/NetworkManager.cpp | 8 +++++++ Source/Engine/Networking/NetworkPeer.cpp | 4 ++++ .../Engine/Networking/NetworkReplicator.cpp | 8 +++++++ Source/Engine/Networking/NetworkStream.cpp | 3 +++ Source/Engine/Particles/ParticleEffect.cpp | 5 ++++ Source/Engine/Particles/ParticleEmitter.cpp | 3 +++ .../Particles/ParticleEmitterFunction.cpp | 2 ++ Source/Engine/Particles/ParticleSystem.cpp | 3 +++ Source/Engine/Particles/Particles.cpp | 9 ++++++++ Source/Engine/Particles/ParticlesData.cpp | 2 ++ Source/Engine/Physics/Actors/Cloth.cpp | 8 +++++++ .../Engine/Physics/Actors/SplineRopeBody.cpp | 2 ++ Source/Engine/Physics/CollisionCooking.cpp | 2 ++ Source/Engine/Physics/CollisionData.cpp | 9 ++++++++ .../Physics/PhysX/PhysicsBackendPhysX.cpp | 6 +++++ .../PhysX/SimulationEventCallbackPhysX.cpp | 4 ++++ Source/Engine/Physics/Physics.cpp | 6 +++++ Source/Engine/Platform/Base/WindowBase.cpp | 20 ++++++++++++++++ Source/Engine/Platform/Unix/UnixThread.cpp | 6 ++++- Source/Engine/Profiler/ProfilingTools.cpp | 1 + Source/Engine/Render2D/Render2D.cpp | 2 ++ Source/Engine/Renderer/RenderList.cpp | 3 +++ Source/Engine/Renderer/Renderer.cpp | 3 +++ Source/Engine/Scripting/BinaryModule.cpp | 18 +++++++++++++++ Source/Engine/Scripting/ManagedCLR/MCore.cpp | 2 ++ .../Scripting/Plugins/PluginManager.cpp | 7 ++++++ Source/Engine/Scripting/Runtime/DotNet.cpp | 20 ++++++++++++++++ Source/Engine/Scripting/Scripting.cpp | 11 +++++++++ Source/Engine/Scripting/ScriptingObject.cpp | 4 ++++ Source/Engine/Streaming/Streaming.cpp | 5 ++++ Source/Engine/Threading/TaskGraph.cpp | 3 +++ Source/Engine/UI/TextRender.cpp | 5 ++++ Source/Engine/UI/UICanvas.cpp | 4 ++++ Source/Engine/UI/UIControl.cpp | 4 ++++ Source/Engine/Video/AV/VideoBackendAV.cpp | 4 ++++ Source/Engine/Video/MF/VideoBackendMF.cpp | 5 ++++ Source/Engine/Video/Video.cpp | 6 +++++ 101 files changed, 502 insertions(+), 17 deletions(-) diff --git a/Source/Editor/Cooker/GameCooker.cpp b/Source/Editor/Cooker/GameCooker.cpp index ea64bbae6..db8ded610 100644 --- a/Source/Editor/Cooker/GameCooker.cpp +++ b/Source/Editor/Cooker/GameCooker.cpp @@ -30,6 +30,7 @@ #include "Engine/Scripting/ManagedCLR/MAssembly.h" #include "Engine/Content/JsonAsset.h" #include "Engine/Content/AssetReference.h" +#include "Engine/Profiler/ProfilerMemory.h" #if PLATFORM_TOOLS_WINDOWS #include "Platform/Windows/WindowsPlatformTools.h" #include "Engine/Platform/Windows/WindowsPlatformSettings.h" @@ -380,6 +381,7 @@ bool GameCooker::IsCancelRequested() PlatformTools* GameCooker::GetTools(BuildPlatform platform) { + PROFILE_MEM(Editor); PlatformTools* result = nullptr; if (!Tools.TryGet(platform, result)) { @@ -471,6 +473,7 @@ bool GameCooker::Build(BuildPlatform platform, BuildConfiguration configuration, LOG(Error, "Build platform {0} is not supported.", ::ToString(platform)); return true; } + PROFILE_MEM(Editor); // Setup CancelFlag = 0; @@ -624,6 +627,7 @@ void GameCookerImpl::ReportProgress(const String& info, float totalProgress) void GameCookerImpl::OnCollectAssets(HashSet& assets) { + PROFILE_MEM(Editor); if (Internal_OnCollectAssets == nullptr) { auto c = GameCooker::GetStaticClass(); @@ -651,6 +655,7 @@ void GameCookerImpl::OnCollectAssets(HashSet& assets) bool GameCookerImpl::Build() { + PROFILE_MEM(Editor); CookingData& data = *Data; LOG(Info, "Starting Game Cooker..."); LOG(Info, "Platform: {0}, Configuration: {2}, Options: {1}", ::ToString(data.Platform), (int32)data.Options, ::ToString(data.Configuration)); @@ -778,6 +783,8 @@ int32 GameCookerImpl::ThreadFunction() bool GameCookerService::Init() { + PROFILE_MEM(Editor); + auto editorAssembly = ((NativeBinaryModule*)GetBinaryModuleFlaxEngine())->Assembly; editorAssembly->Unloading.Bind(OnEditorAssemblyUnloading); GameCooker::OnCollectAssets.Bind(OnCollectAssets); @@ -789,6 +796,7 @@ void GameCookerService::Update() { if (IsRunning) { + PROFILE_MEM(Editor); ScopeLock lock(ProgressLocker); if (ProgressMsg.HasChars()) diff --git a/Source/Editor/CustomEditors/CustomEditorsUtil.cpp b/Source/Editor/CustomEditors/CustomEditorsUtil.cpp index e5f2da5e4..3549cb866 100644 --- a/Source/Editor/CustomEditors/CustomEditorsUtil.cpp +++ b/Source/Editor/CustomEditors/CustomEditorsUtil.cpp @@ -6,6 +6,8 @@ #include "Engine/Core/Types/TimeSpan.h" #include "Engine/Core/Types/Stopwatch.h" #include "Engine/Core/Collections/Dictionary.h" +#include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Engine/EngineService.h" #include "Engine/Scripting/Scripting.h" #include "Engine/Scripting/BinaryModule.h" @@ -69,6 +71,7 @@ MTypeObject* CustomEditorsUtil::GetCustomEditor(MTypeObject* refType) bool CustomEditorsUtilService::Init() { + PROFILE_MEM(Editor); TRACK_ASSEMBLY(((NativeBinaryModule*)GetBinaryModuleFlaxEngine())->Assembly); Scripting::BinaryModuleLoaded.Bind(&OnBinaryModuleLoaded); @@ -77,6 +80,8 @@ bool CustomEditorsUtilService::Init() void OnAssemblyLoaded(MAssembly* assembly) { + PROFILE_CPU_NAMED("CustomEditors.OnAssemblyLoaded"); + PROFILE_MEM(Editor); Stopwatch stopwatch; // Prepare FlaxEngine diff --git a/Source/Editor/Editor.cpp b/Source/Editor/Editor.cpp index 04c6eee71..99b7f1522 100644 --- a/Source/Editor/Editor.cpp +++ b/Source/Editor/Editor.cpp @@ -20,6 +20,7 @@ #include "Engine/Engine/Engine.h" #include "Engine/ShadowsOfMordor/Builder.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "FlaxEngine.Gen.h" #if PLATFORM_LINUX #include "Engine/Tools/TextureTool/TextureTool.h" @@ -47,6 +48,7 @@ void Editor::CloseSplashScreen() bool Editor::CheckProjectUpgrade() { + PROFILE_MEM(Editor); const auto versionFilePath = Globals::ProjectCacheFolder / TEXT("version"); // Load version cache file @@ -366,6 +368,8 @@ bool Editor::BackupProject() int32 Editor::LoadProduct() { + PROFILE_MEM(Editor); + // Flax Editor product Globals::ProductName = TEXT("Flax Editor"); Globals::CompanyName = TEXT("Flax"); @@ -626,6 +630,7 @@ int32 Editor::LoadProduct() Window* Editor::CreateMainWindow() { + PROFILE_MEM(Editor); Window* window = Managed->GetMainWindow(); #if PLATFORM_LINUX @@ -662,6 +667,7 @@ bool Editor::Init() return true; } PROFILE_CPU(); + PROFILE_MEM(Editor); // If during last lightmaps baking engine crashed we could try to restore the progress ShadowsOfMordor::Builder::Instance()->CheckIfRestoreState(); @@ -693,11 +699,13 @@ bool Editor::Init() void Editor::BeforeRun() { + PROFILE_MEM(Editor); Managed->BeforeRun(); } void Editor::BeforeExit() { + PROFILE_MEM(Editor); CloseSplashScreen(); Managed->Exit(); @@ -708,6 +716,8 @@ void Editor::BeforeExit() void EditorImpl::OnUpdate() { + PROFILE_MEM(Editor); + // Update c# editor Editor::Managed->Update(); diff --git a/Source/Editor/ProjectInfo.cpp b/Source/Editor/ProjectInfo.cpp index 6f2743fbf..bc9c1ecdb 100644 --- a/Source/Editor/ProjectInfo.cpp +++ b/Source/Editor/ProjectInfo.cpp @@ -6,6 +6,7 @@ #include "Engine/Core/Log.h" #include "Engine/Engine/Globals.h" #include "Engine/Core/Math/Quaternion.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Serialization/JsonWriters.h" #include "Engine/Serialization/JsonTools.h" #include @@ -327,6 +328,7 @@ ProjectInfo* ProjectInfo::Load(const String& path) } // Load + PROFILE_MEM(Editor); auto project = New(); if (project->LoadProject(path)) { diff --git a/Source/Editor/Scripting/CodeEditor.cpp b/Source/Editor/Scripting/CodeEditor.cpp index c78c9f48a..b372da189 100644 --- a/Source/Editor/Scripting/CodeEditor.cpp +++ b/Source/Editor/Scripting/CodeEditor.cpp @@ -13,6 +13,7 @@ #include "Engine/Engine/EngineService.h" #include "Engine/Platform/Thread.h" #include "Engine/Threading/IRunnable.h" +#include "Engine/Profiler/ProfilerMemory.h" void OnAsyncBegin(Thread* thread); void OnAsyncEnd(); @@ -232,6 +233,8 @@ void OnAsyncEnd() bool CodeEditingManagerService::Init() { + PROFILE_MEM(Editor); + // Try get editors #if USE_VISUAL_STUDIO_DTE VisualStudioEditor::FindEditors(&CodeEditors); diff --git a/Source/Editor/Scripting/ScriptsBuilder.cpp b/Source/Editor/Scripting/ScriptsBuilder.cpp index 1bc67f79e..84df897e6 100644 --- a/Source/Editor/Scripting/ScriptsBuilder.cpp +++ b/Source/Editor/Scripting/ScriptsBuilder.cpp @@ -23,6 +23,7 @@ #include "Engine/Scripting/Scripting.h" #include "Engine/Scripting/Script.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Level/Level.h" #include "FlaxEngine.Gen.h" @@ -413,6 +414,7 @@ void ScriptsBuilder::GetBinariesConfiguration(const Char*& target, const Char*& bool ScriptsBuilderImpl::compileGameScriptsAsyncInner() { + PROFILE_MEM(Editor); LOG(Info, "Starting scripts compilation..."); CallEvent(EventType::CompileStarted); @@ -519,6 +521,8 @@ void ScriptsBuilderImpl::onEditorAssemblyUnloading(MAssembly* assembly) bool ScriptsBuilderImpl::compileGameScriptsAsync() { + PROFILE_MEM(Editor); + // Start { ScopeLock scopeLock(_locker); @@ -562,6 +566,7 @@ bool ScriptsBuilderService::Init() // Check flag if (_isInited) return false; + PROFILE_MEM(Editor); _isInited = true; // Link for Editor assembly unload event to clear cached Internal_OnCompilationEnd to prevent errors diff --git a/Source/Editor/Utilities/ViewportIconsRenderer.cpp b/Source/Editor/Utilities/ViewportIconsRenderer.cpp index 2562c9bf5..6bfdbb5f8 100644 --- a/Source/Editor/Utilities/ViewportIconsRenderer.cpp +++ b/Source/Editor/Utilities/ViewportIconsRenderer.cpp @@ -5,6 +5,7 @@ #include "Engine/Content/Assets/Model.h" #include "Engine/Content/Assets/MaterialInstance.h" #include "Engine/Content/Content.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Level/Level.h" #include "Engine/Level/Scene/Scene.h" #include "Engine/Level/Actors/PointLight.h" @@ -253,6 +254,7 @@ void ViewportIconsRendererService::DrawIcons(RenderContext& renderContext, Actor bool ViewportIconsRendererService::Init() { + PROFILE_MEM(Editor); QuadModel = Content::LoadAsyncInternal(TEXT("Engine/Models/Quad")); #define INIT(type, path) \ InstanceBuffers[static_cast(IconTypes::type)].Setup(1); \ diff --git a/Source/Engine/AI/Behavior.cpp b/Source/Engine/AI/Behavior.cpp index e70ca50a4..7e647e945 100644 --- a/Source/Engine/AI/Behavior.cpp +++ b/Source/Engine/AI/Behavior.cpp @@ -7,6 +7,7 @@ #include "Engine/Engine/Time.h" #include "Engine/Engine/EngineService.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Threading/TaskGraph.h" class BehaviorSystem : public TaskGraphSystem @@ -38,6 +39,7 @@ TaskGraphSystem* Behavior::System = nullptr; void BehaviorSystem::Job(int32 index) { PROFILE_CPU_NAMED("Behavior.Job"); + PROFILE_MEM(AI); Behaviors[index]->UpdateAsync(); } @@ -57,6 +59,7 @@ void BehaviorSystem::Execute(TaskGraph* graph) bool BehaviorService::Init() { + PROFILE_MEM(AI); Behavior::System = New(); Engine::UpdateGraph->AddSystem(Behavior::System); return false; diff --git a/Source/Engine/AI/BehaviorKnowledge.cpp b/Source/Engine/AI/BehaviorKnowledge.cpp index c71bb0724..4d8e2eed9 100644 --- a/Source/Engine/AI/BehaviorKnowledge.cpp +++ b/Source/Engine/AI/BehaviorKnowledge.cpp @@ -4,6 +4,7 @@ #include "BehaviorTree.h" #include "BehaviorTreeNodes.h" #include "BehaviorKnowledgeSelector.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Scripting/Scripting.h" #include "Engine/Scripting/BinaryModule.h" #include "Engine/Scripting/ManagedCLR/MProperty.h" @@ -144,6 +145,7 @@ BehaviorKnowledge::~BehaviorKnowledge() void BehaviorKnowledge::InitMemory(BehaviorTree* tree) { + PROFILE_MEM(AI); if (Tree) FreeMemory(); if (!tree) diff --git a/Source/Engine/AI/BehaviorTree.cpp b/Source/Engine/AI/BehaviorTree.cpp index 64491b10e..ae58e983a 100644 --- a/Source/Engine/AI/BehaviorTree.cpp +++ b/Source/Engine/AI/BehaviorTree.cpp @@ -10,6 +10,7 @@ #include "Engine/Serialization/JsonSerializer.h" #include "Engine/Serialization/MemoryReadStream.h" #include "Engine/Threading/Threading.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "FlaxEngine.Gen.h" #if USE_EDITOR #include "Engine/Level/Level.h" @@ -275,6 +276,7 @@ Asset::LoadResult BehaviorTree::load() if (surfaceChunk == nullptr) return LoadResult::MissingDataChunk; MemoryReadStream surfaceStream(surfaceChunk->Get(), surfaceChunk->Size()); + PROFILE_MEM(AI); if (Graph.Load(&surfaceStream, true)) { LOG(Warning, "Failed to load graph \'{0}\'", ToString()); diff --git a/Source/Engine/Animations/Animations.cpp b/Source/Engine/Animations/Animations.cpp index 0a5a129e1..cc3be45b0 100644 --- a/Source/Engine/Animations/Animations.cpp +++ b/Source/Engine/Animations/Animations.cpp @@ -4,6 +4,7 @@ #include "AnimEvent.h" #include "Engine/Engine/Engine.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Level/Actors/AnimatedModel.h" #include "Engine/Engine/Time.h" #include "Engine/Engine/EngineService.h" @@ -69,6 +70,7 @@ AnimContinuousEvent::AnimContinuousEvent(const SpawnParams& params) bool AnimationsService::Init() { + PROFILE_MEM(Animations); Animations::System = New(); Engine::UpdateGraph->AddSystem(Animations::System); return false; @@ -83,6 +85,7 @@ void AnimationsService::Dispose() void AnimationsSystem::Job(int32 index) { PROFILE_CPU_NAMED("Animations.Job"); + PROFILE_MEM(Animations); auto animatedModel = AnimationManagerInstance.UpdateList[index]; if (CanUpdateModel(animatedModel)) { @@ -147,6 +150,7 @@ void AnimationsSystem::PostExecute(TaskGraph* graph) if (!Active) return; PROFILE_CPU_NAMED("Animations.PostExecute"); + PROFILE_MEM(Animations); // Update gameplay for (int32 index = 0; index < AnimationManagerInstance.UpdateList.Count(); index++) diff --git a/Source/Engine/Animations/SceneAnimations/SceneAnimation.cpp b/Source/Engine/Animations/SceneAnimations/SceneAnimation.cpp index b1d51a89a..1a6c6fbee 100644 --- a/Source/Engine/Animations/SceneAnimations/SceneAnimation.cpp +++ b/Source/Engine/Animations/SceneAnimations/SceneAnimation.cpp @@ -7,6 +7,7 @@ #include "Engine/Content/Content.h" #include "Engine/Content/Deprecated.h" #include "Engine/Serialization/MemoryReadStream.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Audio/AudioClip.h" #include "Engine/Graphics/PostProcessSettings.h" #if USE_EDITOR @@ -249,6 +250,7 @@ bool SceneAnimation::Save(const StringView& path) Asset::LoadResult SceneAnimation::load() { TrackStatesCount = 0; + PROFILE_MEM(AnimationsData); // Get the data chunk if (LoadChunk(0)) diff --git a/Source/Engine/Animations/SceneAnimations/SceneAnimationPlayer.cpp b/Source/Engine/Animations/SceneAnimations/SceneAnimationPlayer.cpp index eb3bf966e..18234c7ef 100644 --- a/Source/Engine/Animations/SceneAnimations/SceneAnimationPlayer.cpp +++ b/Source/Engine/Animations/SceneAnimations/SceneAnimationPlayer.cpp @@ -12,6 +12,7 @@ #include "Engine/Audio/AudioSource.h" #include "Engine/Graphics/RenderTask.h" #include "Engine/Renderer/RenderList.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Scripting/Scripting.h" #include "Engine/Scripting/Script.h" #include "Engine/Scripting/ManagedCLR/MException.h" @@ -151,6 +152,7 @@ void SceneAnimationPlayer::Tick(float dt) SceneAnimation* anim = Animation.Get(); if (!anim || !anim->IsLoaded()) return; + PROFILE_MEM(Animations); // Setup state if (_tracks.Count() != anim->TrackStatesCount) @@ -229,6 +231,7 @@ void SceneAnimationPlayer::MapTrack(const StringView& from, const Guid& to) SceneAnimation* anim = Animation.Get(); if (!anim || !anim->IsLoaded()) return; + PROFILE_MEM(Animations); for (int32 j = 0; j < anim->Tracks.Count(); j++) { const auto& track = anim->Tracks[j]; diff --git a/Source/Engine/Audio/Audio.cpp b/Source/Engine/Audio/Audio.cpp index e19efabf1..5574919c4 100644 --- a/Source/Engine/Audio/Audio.cpp +++ b/Source/Engine/Audio/Audio.cpp @@ -8,6 +8,7 @@ #include "Engine/Scripting/BinaryModule.h" #include "Engine/Level/Level.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Engine/Engine.h" #include "Engine/Engine/CommandLine.h" #include "Engine/Core/Log.h" @@ -151,6 +152,7 @@ void Audio::SetEnableHRTF(bool value) bool AudioService::Init() { PROFILE_CPU_NAMED("Audio.Init"); + PROFILE_MEM(Audio); const auto settings = AudioSettings::Get(); const bool mute = CommandLine::Options.Mute.IsTrue() || settings->DisableAudio; @@ -211,6 +213,7 @@ bool AudioService::Init() void AudioService::Update() { PROFILE_CPU_NAMED("Audio.Update"); + PROFILE_MEM(Audio); // Update the master volume float masterVolume = MasterVolume; diff --git a/Source/Engine/Audio/AudioClip.cpp b/Source/Engine/Audio/AudioClip.cpp index 8835cddc7..2ac9d218c 100644 --- a/Source/Engine/Audio/AudioClip.cpp +++ b/Source/Engine/Audio/AudioClip.cpp @@ -10,6 +10,7 @@ #include "Engine/Scripting/ManagedCLR/MUtils.h" #include "Engine/Streaming/StreamingGroup.h" #include "Engine/Serialization/MemoryReadStream.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Tools/AudioTool/OggVorbisDecoder.h" #include "Engine/Tools/AudioTool/AudioTool.h" #include "Engine/Threading/Threading.h" @@ -318,6 +319,7 @@ bool AudioClip::init(AssetInitData& initData) Asset::LoadResult AudioClip::load() { + PROFILE_MEM(Audio); #if !COMPILE_WITH_OGG_VORBIS if (AudioHeader.Format == AudioFormat::Vorbis) { diff --git a/Source/Engine/Audio/OpenAL/AudioBackendOAL.cpp b/Source/Engine/Audio/OpenAL/AudioBackendOAL.cpp index dfdca18f6..86e828028 100644 --- a/Source/Engine/Audio/OpenAL/AudioBackendOAL.cpp +++ b/Source/Engine/Audio/OpenAL/AudioBackendOAL.cpp @@ -9,6 +9,7 @@ #include "Engine/Tools/AudioTool/AudioTool.h" #include "Engine/Engine/Units.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Audio/Audio.h" #include "Engine/Audio/AudioListener.h" #include "Engine/Audio/AudioSource.h" @@ -321,6 +322,8 @@ void AudioBackendOAL::Listener_ReinitializeAll() uint32 AudioBackendOAL::Source_Add(const AudioDataInfo& format, const Vector3& position, const Quaternion& orientation, float volume, float pitch, float pan, bool loop, bool spatial, float attenuation, float minDistance, float doppler) { + PROFILE_MEM(Audio); + uint32 sourceID = 0; ALC::Source::Rebuild(sourceID, position, orientation, volume, pitch, pan, loop, spatial, attenuation, minDistance, doppler); @@ -516,6 +519,7 @@ void AudioBackendOAL::Buffer_Delete(uint32 bufferID) void AudioBackendOAL::Buffer_Write(uint32 bufferID, byte* samples, const AudioDataInfo& info) { PROFILE_CPU(); + PROFILE_MEM(Audio); // Pick the format for the audio data (it might not be supported natively) ALenum format = GetOpenALBufferFormat(info.NumChannels, info.BitDepth); @@ -625,6 +629,8 @@ AudioBackend::FeatureFlags AudioBackendOAL::Base_Features() void AudioBackendOAL::Base_OnActiveDeviceChanged() { + PROFILE_MEM(Audio); + // Cleanup Array states; states.EnsureCapacity(Audio::Sources.Count()); diff --git a/Source/Engine/Content/Asset.cpp b/Source/Engine/Content/Asset.cpp index 0d5391482..3a6ed648e 100644 --- a/Source/Engine/Content/Asset.cpp +++ b/Source/Engine/Content/Asset.cpp @@ -526,6 +526,7 @@ ContentLoadTask* Asset::createLoadingTask() void Asset::startLoading() { + PROFILE_MEM(ContentAssets); ASSERT(!IsLoaded()); ASSERT(Platform::AtomicRead(&_loadingTask) == 0); auto loadingTask = createLoadingTask(); diff --git a/Source/Engine/Content/Assets/Animation.cpp b/Source/Engine/Content/Assets/Animation.cpp index 54c4d898e..8558e601d 100644 --- a/Source/Engine/Content/Assets/Animation.cpp +++ b/Source/Engine/Content/Assets/Animation.cpp @@ -9,6 +9,7 @@ #include "Engine/Animations/Animations.h" #include "Engine/Animations/SceneAnimations/SceneAnimation.h" #include "Engine/Scripting/Scripting.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Threading/Threading.h" #include "Engine/Serialization/MemoryReadStream.h" #if USE_EDITOR @@ -598,6 +599,7 @@ void Animation::OnScriptingDispose() Asset::LoadResult Animation::load() { + PROFILE_MEM(AnimationsData); ConcurrentSystemLocker::WriteScope systemScope(Animations::SystemLocker); // Get stream with animations data diff --git a/Source/Engine/Content/Assets/AnimationGraph.cpp b/Source/Engine/Content/Assets/AnimationGraph.cpp index 3e4a96ea2..acab48b2f 100644 --- a/Source/Engine/Content/Assets/AnimationGraph.cpp +++ b/Source/Engine/Content/Assets/AnimationGraph.cpp @@ -9,6 +9,7 @@ #include "Engine/Core/Types/DataContainer.h" #include "Engine/Serialization/MemoryReadStream.h" #include "Engine/Serialization/MemoryWriteStream.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Content/Factories/BinaryAssetFactory.h" #include "Engine/Animations/Animations.h" #include "Engine/Threading/Threading.h" @@ -25,6 +26,7 @@ AnimationGraph::AnimationGraph(const SpawnParams& params, const AssetInfo* info) Asset::LoadResult AnimationGraph::load() { + PROFILE_MEM(AnimationsData); ConcurrentSystemLocker::WriteScope systemScope(Animations::SystemLocker); // Get stream with graph data @@ -83,6 +85,7 @@ bool AnimationGraph::InitAsAnimation(SkinnedModel* baseModel, Animation* anim, b Log::ArgumentNullException(); return true; } + PROFILE_MEM(AnimationsData); ConcurrentSystemLocker::WriteScope systemScope(Animations::SystemLocker); // Create Graph data diff --git a/Source/Engine/Content/Assets/AnimationGraphFunction.cpp b/Source/Engine/Content/Assets/AnimationGraphFunction.cpp index 76c84977a..3e8ce62e8 100644 --- a/Source/Engine/Content/Assets/AnimationGraphFunction.cpp +++ b/Source/Engine/Content/Assets/AnimationGraphFunction.cpp @@ -8,6 +8,7 @@ #include "Engine/Serialization/MemoryWriteStream.h" #endif #include "Engine/Animations/Animations.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Content/Factories/BinaryAssetFactory.h" #include "Engine/Threading/Threading.h" @@ -20,6 +21,7 @@ AnimationGraphFunction::AnimationGraphFunction(const SpawnParams& params, const Asset::LoadResult AnimationGraphFunction::load() { + PROFILE_MEM(AnimationsData); ConcurrentSystemLocker::WriteScope systemScope(Animations::SystemLocker); // Get graph data from chunk diff --git a/Source/Engine/Content/Assets/Material.cpp b/Source/Engine/Content/Assets/Material.cpp index 85e63fc18..ce457c862 100644 --- a/Source/Engine/Content/Assets/Material.cpp +++ b/Source/Engine/Content/Assets/Material.cpp @@ -165,9 +165,13 @@ Asset::LoadResult Material::load() MaterialGenerator generator; generator.Error.Bind(&OnGeneratorError); if (_shaderHeader.Material.GraphVersion != MATERIAL_GRAPH_VERSION) + { LOG(Info, "Converting material \'{0}\', from version {1} to {2}...", name, _shaderHeader.Material.GraphVersion, MATERIAL_GRAPH_VERSION); + } else + { LOG(Info, "Updating material \'{0}\'...", name); + } // Load or create material surface MaterialLayer* layer; diff --git a/Source/Engine/Content/Assets/Model.cpp b/Source/Engine/Content/Assets/Model.cpp index 1f5768e6c..10c471f8c 100644 --- a/Source/Engine/Content/Assets/Model.cpp +++ b/Source/Engine/Content/Assets/Model.cpp @@ -18,6 +18,7 @@ #include "Engine/Graphics/Models/MeshDeformation.h" #include "Engine/Graphics/Textures/GPUTexture.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Renderer/DrawCall.h" #include "Engine/Threading/Threading.h" #include "Engine/Tools/ModelTool/ModelTool.h" @@ -304,6 +305,7 @@ bool Model::Init(const Span& meshesCountPerLod) Log::ArgumentOutOfRangeException(); return true; } + PROFILE_MEM(GraphicsMeshes); // Dispose previous data and disable streaming (will start data uploading tasks manually) StopStreaming(); @@ -343,6 +345,7 @@ bool Model::Init(const Span& meshesCountPerLod) bool Model::LoadHeader(ReadStream& stream, byte& headerVersion) { + PROFILE_MEM(GraphicsMeshes); if (ModelBase::LoadHeader(stream, headerVersion)) return true; @@ -509,6 +512,7 @@ bool Model::Save(bool withMeshDataFromGpu, Function& getChunk void Model::SetupMaterialSlots(int32 slotsCount) { + PROFILE_MEM(GraphicsMeshes); ModelBase::SetupMaterialSlots(slotsCount); // Adjust meshes indices for slots @@ -584,6 +588,8 @@ int32 Model::GetAllocatedResidency() const Asset::LoadResult Model::load() { + PROFILE_MEM(GraphicsMeshes); + // Get header chunk auto chunk0 = GetChunk(0); if (chunk0 == nullptr || chunk0->IsMissing()) diff --git a/Source/Engine/Content/Assets/ModelBase.cpp b/Source/Engine/Content/Assets/ModelBase.cpp index 243e429a5..5f2a89328 100644 --- a/Source/Engine/Content/Assets/ModelBase.cpp +++ b/Source/Engine/Content/Assets/ModelBase.cpp @@ -5,6 +5,7 @@ #include "Engine/Core/Math/Transform.h" #include "Engine/Content/WeakAssetReference.h" #include "Engine/Serialization/MemoryReadStream.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Graphics/Config.h" #include "Engine/Graphics/Models/MeshBase.h" #include "Engine/Graphics/Models/MeshDeformation.h" @@ -51,6 +52,7 @@ public: AssetReference model = _model.Get(); if (model == nullptr) return true; + PROFILE_MEM(GraphicsMeshes); // Get data BytesContainer data; @@ -334,6 +336,8 @@ bool ModelBase::LoadHeader(ReadStream& stream, byte& headerVersion) bool ModelBase::LoadMesh(MemoryReadStream& stream, byte meshVersion, MeshBase* mesh, MeshData* dataIfReadOnly) { + PROFILE_MEM(GraphicsMeshes); + // Load descriptor static_assert(MODEL_MESH_VERSION == 2, "Update code"); uint32 vertices, triangles; diff --git a/Source/Engine/Content/Assets/SkinnedModel.cpp b/Source/Engine/Content/Assets/SkinnedModel.cpp index 5271df723..7c51f4332 100644 --- a/Source/Engine/Content/Assets/SkinnedModel.cpp +++ b/Source/Engine/Content/Assets/SkinnedModel.cpp @@ -18,6 +18,7 @@ #include "Engine/Content/Upgraders/SkinnedModelAssetUpgrader.h" #include "Engine/Debug/Exceptions/ArgumentOutOfRangeException.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Renderer/DrawCall.h" #if USE_EDITOR #include "Engine/Graphics/Models/ModelData.h" @@ -458,6 +459,7 @@ bool SkinnedModel::Init(const Span& meshesCountPerLod) Log::ArgumentOutOfRangeException(); return true; } + PROFILE_MEM(GraphicsMeshes); // Dispose previous data and disable streaming (will start data uploading tasks manually) StopStreaming(); @@ -501,6 +503,7 @@ void BlendShape::LoadHeader(ReadStream& stream, byte headerVersion) void BlendShape::Load(ReadStream& stream, byte meshVersion) { + PROFILE_MEM(GraphicsMeshes); UseNormals = stream.ReadBool(); stream.ReadUint32(&MinVertexIndex); stream.ReadUint32(&MaxVertexIndex); @@ -531,6 +534,7 @@ void BlendShape::Save(WriteStream& stream) const bool SkinnedModel::LoadMesh(MemoryReadStream& stream, byte meshVersion, MeshBase* mesh, MeshData* dataIfReadOnly) { + PROFILE_MEM(GraphicsMeshes); if (ModelBase::LoadMesh(stream, meshVersion, mesh, dataIfReadOnly)) return true; static_assert(MODEL_MESH_VERSION == 2, "Update code"); @@ -560,6 +564,7 @@ bool SkinnedModel::LoadMesh(MemoryReadStream& stream, byte meshVersion, MeshBase bool SkinnedModel::LoadHeader(ReadStream& stream, byte& headerVersion) { + PROFILE_MEM(GraphicsMeshes); if (ModelBase::LoadHeader(stream, headerVersion)) return true; static_assert(MODEL_HEADER_VERSION == 2, "Update code"); @@ -861,6 +866,7 @@ uint64 SkinnedModel::GetMemoryUsage() const void SkinnedModel::SetupMaterialSlots(int32 slotsCount) { + PROFILE_MEM(GraphicsMeshes); ModelBase::SetupMaterialSlots(slotsCount); // Adjust meshes indices for slots @@ -954,6 +960,7 @@ Asset::LoadResult SkinnedModel::load() if (chunk0 == nullptr || chunk0->IsMissing()) return LoadResult::MissingDataChunk; MemoryReadStream headerStream(chunk0->Get(), chunk0->Size()); + PROFILE_MEM(GraphicsMeshes); // Load asset data (anything but mesh contents that use streaming) byte headerVersion; diff --git a/Source/Engine/Content/Assets/VisualScript.cpp b/Source/Engine/Content/Assets/VisualScript.cpp index 66d95ca87..a6d83919c 100644 --- a/Source/Engine/Content/Assets/VisualScript.cpp +++ b/Source/Engine/Content/Assets/VisualScript.cpp @@ -1900,9 +1900,13 @@ bool VisualScriptingBinaryModule::InvokeMethod(void* method, const Variant& inst if (!instanceObject || instanceObject->GetTypeHandle() != vsMethod->Script->GetScriptingType()) { if (!instanceObject) + { LOG(Error, "Failed to call method '{0}.{1}' (args count: {2}) without object instance", String(vsMethod->Script->GetScriptTypeName()), String(vsMethod->Name), vsMethod->ParamNames.Count()); + } else + { LOG(Error, "Failed to call method '{0}.{1}' (args count: {2}) with invalid object instance of type '{3}'", String(vsMethod->Script->GetScriptTypeName()), String(vsMethod->Name), vsMethod->ParamNames.Count(), String(instanceObject->GetType().Fullname)); + } return true; } } diff --git a/Source/Engine/Content/BinaryAsset.cpp b/Source/Engine/Content/BinaryAsset.cpp index 76980f3d6..9e7e51113 100644 --- a/Source/Engine/Content/BinaryAsset.cpp +++ b/Source/Engine/Content/BinaryAsset.cpp @@ -10,6 +10,7 @@ #include "Engine/Serialization/JsonTools.h" #include "Engine/Debug/Exceptions/JsonParseException.h" #include "Engine/Threading/ThreadPoolTask.h" +#include "Engine/Profiler/ProfilerMemory.h" #if USE_EDITOR #include "Engine/Platform/FileSystem.h" #include "Engine/Threading/Threading.h" @@ -527,6 +528,7 @@ protected: auto storage = ref->Storage; auto factory = (BinaryAssetFactoryBase*)Content::GetAssetFactory(ref->GetTypeName()); ASSERT(factory); + PROFILE_MEM(ContentAssets); // Here we should open storage and extract AssetInitData // This would also allow to convert/upgrade data diff --git a/Source/Engine/Content/Content.cpp b/Source/Engine/Content/Content.cpp index a6971e875..915f48140 100644 --- a/Source/Engine/Content/Content.cpp +++ b/Source/Engine/Content/Content.cpp @@ -28,6 +28,7 @@ #include "Engine/Engine/Globals.h" #include "Engine/Level/Types.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Scripting/ManagedCLR/MClass.h" #include "Engine/Scripting/Scripting.h" #if USE_EDITOR @@ -117,6 +118,8 @@ ContentService ContentServiceInstance; bool ContentService::Init() { + PROFILE_MEM(Content); + // Load assets registry Cache.Init(); @@ -159,6 +162,7 @@ void ContentService::Update() void ContentService::LateUpdate() { PROFILE_CPU(); + PROFILE_MEM(Content); // Check if need to perform an update of unloading assets const TimeSpan timeNow = Time::Update.UnscaledTime; @@ -324,6 +328,7 @@ String LoadingThread::ToString() const int32 LoadingThread::Run() { + PROFILE_MEM(Content); #if USE_EDITOR && PLATFORM_WINDOWS // Initialize COM // TODO: maybe add sth to Thread::Create to indicate that thread will use COM stuff @@ -416,6 +421,7 @@ bool Content::GetAssetInfo(const Guid& id, AssetInfo& info) if (Cache.FindAsset(id, info)) return true; PROFILE_CPU(); + PROFILE_MEM(Content); // Locking injects some stalls but we need to make it safe (only one thread can pass though it at once) ScopeLock lock(WorkspaceDiscoveryLocker); @@ -465,6 +471,7 @@ bool Content::GetAssetInfo(const StringView& path, AssetInfo& info) if (!FileSystem::FileExists(path)) return false; PROFILE_CPU(); + PROFILE_MEM(Content); const auto extension = FileSystem::GetExtension(path).ToLower(); @@ -593,6 +600,7 @@ Asset* Content::LoadAsyncInternal(const StringView& internalPath, const MClass* Asset* Content::LoadAsyncInternal(const StringView& internalPath, const ScriptingTypeHandle& type) { + PROFILE_MEM(Content); #if USE_EDITOR const String path = Globals::EngineContentFolder / internalPath + ASSET_FILES_EXTENSION_WITH_DOT; if (!FileSystem::FileExists(path)) @@ -635,6 +643,8 @@ Asset* Content::LoadAsync(const StringView& path, const MClass* type) Asset* Content::LoadAsync(const StringView& path, const ScriptingTypeHandle& type) { + PROFILE_MEM(Content); + // Ensure path is in a valid format String pathNorm(path); ContentStorageManager::FormatPath(pathNorm); @@ -687,7 +697,6 @@ Asset* Content::GetAsset(const StringView& outputPath) { if (outputPath.IsEmpty()) return nullptr; - ScopeLock lock(AssetsLocker); for (auto i = Assets.Begin(); i.IsNotEnd(); ++i) { @@ -1023,6 +1032,7 @@ Asset* Content::CreateVirtualAsset(const MClass* type) Asset* Content::CreateVirtualAsset(const ScriptingTypeHandle& type) { PROFILE_CPU(); + PROFILE_MEM(Content); auto& assetType = type.GetType(); // Init mock asset info @@ -1045,7 +1055,9 @@ Asset* Content::CreateVirtualAsset(const ScriptingTypeHandle& type) } // Create asset object + PROFILE_MEM_BEGIN(ContentAssets); auto asset = factory->NewVirtual(info); + PROFILE_MEM_END(); if (asset == nullptr) { LOG(Error, "Cannot create virtual asset object."); @@ -1054,7 +1066,9 @@ Asset* Content::CreateVirtualAsset(const ScriptingTypeHandle& type) asset->RegisterObject(); // Call initializer function + PROFILE_MEM_BEGIN(ContentAssets); asset->InitAsVirtual(); + PROFILE_MEM_END(); // Register asset AssetsLocker.Lock(); @@ -1209,6 +1223,7 @@ Asset* Content::LoadAsync(const Guid& id, const ScriptingTypeHandle& type) { if (!id.IsValid()) return nullptr; + PROFILE_MEM(Content); // Check if asset has been already loaded Asset* result = nullptr; @@ -1277,7 +1292,9 @@ Asset* Content::LoadAsync(const Guid& id, const ScriptingTypeHandle& type) } // Create asset object + PROFILE_MEM_BEGIN(ContentAssets); result = factory->New(assetInfo); + PROFILE_MEM_END(); if (result == nullptr) { LOG(Error, "Cannot create asset object. Info: {0}", assetInfo.ToString()); diff --git a/Source/Engine/Content/JsonAsset.cpp b/Source/Engine/Content/JsonAsset.cpp index 60f05d38e..04487eb65 100644 --- a/Source/Engine/Content/JsonAsset.cpp +++ b/Source/Engine/Content/JsonAsset.cpp @@ -20,6 +20,7 @@ #include "Engine/Core/Cache.h" #include "Engine/Debug/Exceptions/JsonParseException.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Scripting/Scripting.h" #include "Engine/Scripting/ManagedCLR/MClass.h" #include "Engine/Scripting/ManagedCLR/MField.h" @@ -39,6 +40,7 @@ String JsonAssetBase::GetData() const if (Data == nullptr) return String::Empty; PROFILE_CPU_NAMED("JsonAsset.GetData"); + PROFILE_MEM(ContentAssets); rapidjson_flax::StringBuffer buffer; OnGetData(buffer); return String((const char*)buffer.GetString(), (int32)buffer.GetSize()); @@ -49,6 +51,7 @@ void JsonAssetBase::SetData(const StringView& value) if (!IsLoaded()) return; PROFILE_CPU_NAMED("JsonAsset.SetData"); + PROFILE_MEM(ContentAssets); const StringAnsi dataJson(value); ScopeLock lock(Locker); const StringView dataTypeName = DataTypeName; @@ -60,6 +63,7 @@ void JsonAssetBase::SetData(const StringView& value) bool JsonAssetBase::Init(const StringView& dataTypeName, const StringAnsiView& dataJson) { + PROFILE_MEM(ContentAssets); unload(true); DataTypeName = dataTypeName; DataEngineBuild = FLAXENGINE_VERSION_BUILD; @@ -239,6 +243,7 @@ Asset::LoadResult JsonAssetBase::loadAsset() { if (IsVirtual() || _isVirtualDocument) return LoadResult::Ok; + PROFILE_MEM(ContentAssets); // Load data (raw json file in editor, cooked asset in build game) #if USE_EDITOR @@ -453,6 +458,7 @@ bool JsonAsset::CreateInstance() ScopeLock lock(Locker); if (Instance) return false; + PROFILE_MEM(ContentAssets); // Try to scripting type for this data const StringAsANSI<> dataTypeNameAnsi(DataTypeName.Get(), DataTypeName.Length()); diff --git a/Source/Engine/Content/Loading/Tasks/LoadAssetTask.h b/Source/Engine/Content/Loading/Tasks/LoadAssetTask.h index 18d870616..4eae2829b 100644 --- a/Source/Engine/Content/Loading/Tasks/LoadAssetTask.h +++ b/Source/Engine/Content/Loading/Tasks/LoadAssetTask.h @@ -8,6 +8,7 @@ #include "Engine/Content/WeakAssetReference.h" #include "Engine/Core/Log.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" /// /// Asset loading task object. @@ -44,6 +45,7 @@ protected: Result run() override { PROFILE_CPU(); + PROFILE_MEM(ContentAssets); // Keep valid ref to the asset AssetReference<::Asset> ref = Asset.Get(); diff --git a/Source/Engine/Content/Storage/FlaxChunk.h b/Source/Engine/Content/Storage/FlaxChunk.h index 5121a0f0f..6e9887574 100644 --- a/Source/Engine/Content/Storage/FlaxChunk.h +++ b/Source/Engine/Content/Storage/FlaxChunk.h @@ -182,10 +182,5 @@ public: /// Clones this chunk data (doesn't copy location in file). /// /// The cloned chunk. - FlaxChunk* Clone() const - { - auto chunk = New(); - chunk->Data.Copy(Data); - return chunk; - } + FlaxChunk* Clone() const; }; diff --git a/Source/Engine/Content/Storage/FlaxStorage.cpp b/Source/Engine/Content/Storage/FlaxStorage.cpp index 9e36ad632..17daba278 100644 --- a/Source/Engine/Content/Storage/FlaxStorage.cpp +++ b/Source/Engine/Content/Storage/FlaxStorage.cpp @@ -8,6 +8,7 @@ #include "Engine/Core/Types/TimeSpan.h" #include "Engine/Platform/File.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Serialization/FileWriteStream.h" #include "Engine/Content/Asset.h" #include "Engine/Content/Content.h" @@ -63,6 +64,14 @@ void FlaxChunk::RegisterUsage() LastAccessTime = Platform::GetTimeSeconds(); } +FlaxChunk* FlaxChunk::Clone() const +{ + PROFILE_MEM(ContentFiles); + auto chunk = New(); + chunk->Data.Copy(Data); + return chunk; +} + const int32 FlaxStorage::MagicCode = 1180124739; FlaxStorage::LockData FlaxStorage::LockData::Invalid(nullptr); @@ -281,19 +290,12 @@ uint32 FlaxStorage::GetMemoryUsage() const bool FlaxStorage::Load() { - // Check if was already loaded if (IsLoaded()) - { return false; - } - - // Prevent loading by more than one thread + PROFILE_MEM(ContentFiles); ScopeLock lock(_loadLocker); if (IsLoaded()) - { - // Other thread loaded it return false; - } ASSERT(GetEntriesCount() == 0); // Open file @@ -693,6 +695,7 @@ bool FlaxStorage::LoadAssetHeader(const Guid& id, AssetInitData& data) bool FlaxStorage::LoadAssetChunk(FlaxChunk* chunk) { + PROFILE_MEM(ContentFiles); ASSERT(IsLoaded()); ASSERT(chunk != nullptr && _chunks.Contains(chunk)); @@ -866,6 +869,7 @@ FlaxChunk* FlaxStorage::AllocateChunk() { if (AllowDataModifications()) { + PROFILE_MEM(ContentFiles); auto chunk = New(); _chunks.Add(chunk); return chunk; @@ -1125,6 +1129,7 @@ bool FlaxStorage::Save(const AssetInitData& data, bool silentMode) bool FlaxStorage::LoadAssetHeader(const Entry& e, AssetInitData& data) { + PROFILE_MEM(ContentFiles); ASSERT(IsLoaded()); auto lock = Lock(); @@ -1396,6 +1401,8 @@ FileReadStream* FlaxStorage::OpenFile() auto& stream = _file.Get(); if (stream == nullptr) { + PROFILE_MEM(ContentFiles); + // Open file auto file = File::Open(_path, FileMode::OpenExisting, FileAccess::Read, FileShare::Read); if (file == nullptr) diff --git a/Source/Engine/ContentImporters/AssetsImportingManager.cpp b/Source/Engine/ContentImporters/AssetsImportingManager.cpp index b3cf8e419..9cad287dc 100644 --- a/Source/Engine/ContentImporters/AssetsImportingManager.cpp +++ b/Source/Engine/ContentImporters/AssetsImportingManager.cpp @@ -13,6 +13,7 @@ #include "Engine/Engine/EngineService.h" #include "Engine/Platform/FileSystem.h" #include "Engine/Platform/Platform.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Engine/Globals.h" #include "ImportTexture.h" #include "ImportModel.h" @@ -151,6 +152,7 @@ bool CreateAssetContext::AllocateChunk(int32 index) } // Create new chunk + PROFILE_MEM(ContentFiles); Data.Header.Chunks[index] = New(); return false; } diff --git a/Source/Engine/Core/Log.cpp b/Source/Engine/Core/Log.cpp index 215f490be..85cc3cc02 100644 --- a/Source/Engine/Core/Log.cpp +++ b/Source/Engine/Core/Log.cpp @@ -8,6 +8,7 @@ #include "Engine/Engine/Globals.h" #include "Engine/Platform/FileSystem.h" #include "Engine/Platform/CriticalSection.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Serialization/FileWriteStream.h" #include "Engine/Debug/Exceptions/Exceptions.h" #if USE_EDITOR @@ -42,6 +43,7 @@ bool Log::Logger::Init() // Skip if disabled if (!IsLogEnabled()) return false; + PROFILE_MEM(Engine); // Create logs directory (if is missing) #if USE_EDITOR @@ -119,6 +121,7 @@ void Log::Logger::Write(const StringView& msg) const auto length = msg.Length(); if (length <= 0) return; + PROFILE_MEM(Engine); LogLocker.Lock(); if (IsDuringLog) @@ -258,6 +261,7 @@ void Log::Logger::Write(LogType type, const StringView& msg) { if (msg.Length() <= 0) return; + PROFILE_MEM(Engine); const bool isError = IsError(type); // Create message for the log file diff --git a/Source/Engine/Debug/DebugDraw.cpp b/Source/Engine/Debug/DebugDraw.cpp index a026267cc..df2e41803 100644 --- a/Source/Engine/Debug/DebugDraw.cpp +++ b/Source/Engine/Debug/DebugDraw.cpp @@ -515,6 +515,7 @@ DebugDrawService DebugDrawServiceInstance; bool DebugDrawService::Init() { + PROFILE_MEM(Graphics); Context = &GlobalContext; // Init wireframe sphere cache @@ -633,6 +634,7 @@ void DebugDrawService::Update() } PROFILE_CPU(); + PROFILE_MEM(Graphics); // Update lists float deltaTime = Time::Update.DeltaTime.GetTotalSeconds(); diff --git a/Source/Engine/Graphics/GPUBuffer.cpp b/Source/Engine/Graphics/GPUBuffer.cpp index 0e71ea8b3..db7845227 100644 --- a/Source/Engine/Graphics/GPUBuffer.cpp +++ b/Source/Engine/Graphics/GPUBuffer.cpp @@ -15,6 +15,7 @@ #include "Engine/Debug/Exceptions/ArgumentNullException.h" #include "Engine/Debug/Exceptions/ArgumentOutOfRangeException.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Scripting/Enums.h" #include "Engine/Threading/ThreadPoolTask.h" #include "Engine/Threading/Threading.h" @@ -188,6 +189,8 @@ bool GPUBuffer::IsDynamic() const bool GPUBuffer::Init(const GPUBufferDescription& desc) { + PROFILE_MEM(GraphicsBuffers); + // Validate description #if !BUILD_RELEASE #define GET_NAME() GetName() @@ -241,6 +244,7 @@ bool GPUBuffer::Init(const GPUBufferDescription& desc) LOG(Warning, "Cannot initialize buffer. Description: {0}", desc.ToString()); return true; } + PROFILE_MEM_INC(GraphicsBuffers, GetMemoryUsage()); return false; } @@ -476,6 +480,7 @@ GPUResourceType GPUBuffer::GetResourceType() const void GPUBuffer::OnReleaseGPU() { + PROFILE_MEM_DEC(GraphicsBuffers, GetMemoryUsage()); _desc.Clear(); _isLocked = false; } diff --git a/Source/Engine/Graphics/GPUDevice.cpp b/Source/Engine/Graphics/GPUDevice.cpp index e4bbc170a..1ea008913 100644 --- a/Source/Engine/Graphics/GPUDevice.cpp +++ b/Source/Engine/Graphics/GPUDevice.cpp @@ -650,6 +650,7 @@ GPUTasksExecutor* GPUDevice::CreateTasksExecutor() void GPUDevice::Draw() { + PROFILE_MEM(Graphics); DrawBegin(); auto context = GetMainContext(); diff --git a/Source/Engine/Graphics/Graphics.cpp b/Source/Engine/Graphics/Graphics.cpp index 7ffba3957..43bd1a76d 100644 --- a/Source/Engine/Graphics/Graphics.cpp +++ b/Source/Engine/Graphics/Graphics.cpp @@ -9,6 +9,7 @@ #include "Engine/Engine/CommandLine.h" #include "Engine/Engine/EngineService.h" #include "Engine/Profiler/ProfilerGPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Render2D/Font.h" bool Graphics::UseVSync = false; @@ -97,6 +98,7 @@ void Graphics::DisposeDevice() bool GraphicsService::Init() { ASSERT(GPUDevice::Instance == nullptr); + PROFILE_MEM(Graphics); // Create and initialize graphics device Log::Logger::WriteFloor(); diff --git a/Source/Engine/Graphics/Materials/MaterialParams.cpp b/Source/Engine/Graphics/Materials/MaterialParams.cpp index e31697f77..55726c20c 100644 --- a/Source/Engine/Graphics/Materials/MaterialParams.cpp +++ b/Source/Engine/Graphics/Materials/MaterialParams.cpp @@ -15,6 +15,7 @@ #include "Engine/Renderer/GlobalSignDistanceFieldPass.h" #include "Engine/Scripting/Enums.h" #include "Engine/Streaming/Streaming.h" +#include "Engine/Profiler/ProfilerMemory.h" bool MaterialInfo8::operator==(const MaterialInfo8& other) const { @@ -638,6 +639,7 @@ void MaterialParams::Dispose() bool MaterialParams::Load(ReadStream* stream) { + PROFILE_MEM(GraphicsMaterials); bool result = false; // Release diff --git a/Source/Engine/Graphics/Materials/MaterialShader.cpp b/Source/Engine/Graphics/Materials/MaterialShader.cpp index 1b0b6937b..c24631d56 100644 --- a/Source/Engine/Graphics/Materials/MaterialShader.cpp +++ b/Source/Engine/Graphics/Materials/MaterialShader.cpp @@ -11,6 +11,7 @@ #include "Engine/Graphics/Shaders/GPUConstantBuffer.h" #include "Engine/Graphics/Shaders/GPUShader.h" #include "Engine/Engine/Time.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "DecalMaterialShader.h" #include "PostFxMaterialShader.h" #include "ForwardMaterialShader.h" @@ -136,6 +137,7 @@ MaterialShader::~MaterialShader() MaterialShader* MaterialShader::Create(const StringView& name, MemoryReadStream& shaderCacheStream, const MaterialInfo& info) { + PROFILE_MEM(GraphicsMaterials); MaterialShader* material; switch (info.Domain) { @@ -199,6 +201,7 @@ protected: MaterialShader* MaterialShader::CreateDummy(MemoryReadStream& shaderCacheStream, const MaterialInfo& info) { + PROFILE_MEM(GraphicsMaterials); MaterialShader* material = New(); if (material->Load(shaderCacheStream, info)) { @@ -225,6 +228,7 @@ bool MaterialShader::IsReady() const bool MaterialShader::Load(MemoryReadStream& shaderCacheStream, const MaterialInfo& info) { + PROFILE_MEM(GraphicsMaterials); ASSERT(!_isLoaded); // Cache material info diff --git a/Source/Engine/Graphics/Models/Mesh.cpp b/Source/Engine/Graphics/Models/Mesh.cpp index 36dc18af4..32df730e8 100644 --- a/Source/Engine/Graphics/Models/Mesh.cpp +++ b/Source/Engine/Graphics/Models/Mesh.cpp @@ -14,6 +14,7 @@ #include "Engine/Renderer/RenderList.h" #include "Engine/Scripting/ManagedCLR/MCore.h" #include "Engine/Threading/Threading.h" +#include "Engine/Profiler/ProfilerMemory.h" #if USE_EDITOR #include "Engine/Renderer/GBufferPass.h" #endif @@ -48,6 +49,7 @@ namespace { bool UpdateMesh(MeshBase* mesh, uint32 vertexCount, uint32 triangleCount, PixelFormat indexFormat, const Float3* vertices, const void* triangles, const Float3* normals, const Float3* tangents, const Float2* uvs, const Color32* colors) { + PROFILE_MEM(GraphicsMeshes); auto model = mesh->GetModelBase(); CHECK_RETURN(model && model->IsVirtual(), true); CHECK_RETURN(triangles && vertices, true); @@ -172,6 +174,7 @@ bool Mesh::UpdateMesh(uint32 vertexCount, uint32 triangleCount, const VB0Element bool Mesh::UpdateMesh(uint32 vertexCount, uint32 triangleCount, const VB0ElementType* vb0, const VB1ElementType* vb1, const VB2ElementType* vb2, const void* ib, bool use16BitIndices) { + PROFILE_MEM(GraphicsMeshes); Release(); // Setup GPU resources diff --git a/Source/Engine/Graphics/Models/ModelInstanceEntry.cpp b/Source/Engine/Graphics/Models/ModelInstanceEntry.cpp index ace033d6d..1c9440b39 100644 --- a/Source/Engine/Graphics/Models/ModelInstanceEntry.cpp +++ b/Source/Engine/Graphics/Models/ModelInstanceEntry.cpp @@ -4,6 +4,7 @@ #include "Engine/Serialization/Serialization.h" #include "Engine/Content/Assets/Model.h" #include "Engine/Content/Assets/SkinnedModel.h" +#include "Engine/Profiler/ProfilerMemory.h" bool ModelInstanceEntries::HasContentLoaded() const { @@ -41,6 +42,7 @@ void ModelInstanceEntries::Serialize(SerializeStream& stream, const void* otherO void ModelInstanceEntries::Deserialize(DeserializeStream& stream, ISerializeModifier* modifier) { + PROFILE_MEM(Graphics); const DeserializeStream& entries = stream["Entries"]; ASSERT(entries.IsArray()); Resize(entries.Size()); @@ -85,6 +87,7 @@ void ModelInstanceEntries::Setup(const SkinnedModel* model) void ModelInstanceEntries::Setup(int32 slotsCount) { + PROFILE_MEM(Graphics); Clear(); Resize(slotsCount); } diff --git a/Source/Engine/Graphics/RenderTask.cpp b/Source/Engine/Graphics/RenderTask.cpp index df45d981c..ecdcd572c 100644 --- a/Source/Engine/Graphics/RenderTask.cpp +++ b/Source/Engine/Graphics/RenderTask.cpp @@ -417,6 +417,7 @@ void SceneRenderTask::OnEnd(GPUContext* context) bool SceneRenderTask::Resize(int32 width, int32 height) { + PROFILE_MEM(Graphics); if (Output && Output->Resize(width, height)) return true; if (Buffers && Buffers->Init((int32)((float)width * RenderingPercentage), (int32)((float)height * RenderingPercentage))) diff --git a/Source/Engine/Graphics/Shaders/GPUShader.cpp b/Source/Engine/Graphics/Shaders/GPUShader.cpp index 2d14c1796..694b13f40 100644 --- a/Source/Engine/Graphics/Shaders/GPUShader.cpp +++ b/Source/Engine/Graphics/Shaders/GPUShader.cpp @@ -8,6 +8,7 @@ #include "Engine/Graphics/GPUDevice.h" #include "Engine/Graphics/Shaders/GPUVertexLayout.h" #include "Engine/Serialization/MemoryReadStream.h" +#include "Engine/Profiler/ProfilerMemory.h" static FORCE_INLINE uint32 HashPermutation(const StringAnsiView& name, int32 permutationIndex) { @@ -33,6 +34,7 @@ GPUShader::GPUShader() bool GPUShader::Create(MemoryReadStream& stream) { ReleaseGPU(); + _memoryUsage = sizeof(GPUShader); // Version int32 version; @@ -111,6 +113,7 @@ bool GPUShader::Create(MemoryReadStream& stream) const uint32 hash = HashPermutation(shader->GetName(), permutationIndex); ASSERT_LOW_LAYER(!_shaders.ContainsKey(hash)); _shaders.Add(hash, shader); + _memoryUsage += sizeof(GPUShaderProgram) + bytecodeSize; } } @@ -142,11 +145,12 @@ bool GPUShader::Create(MemoryReadStream& stream) return true; } _constantBuffers[slotIndex] = cb; + _memoryUsage += sizeof(GPUConstantBuffer); } // Don't read additional data - _memoryUsage = 1; + PROFILE_MEM_INC(GraphicsShaders, _memoryUsage); return false; } @@ -208,6 +212,7 @@ GPUResourceType GPUShader::GetResourceType() const void GPUShader::OnReleaseGPU() { + PROFILE_MEM_DEC(GraphicsShaders, _memoryUsage); for (GPUConstantBuffer*& cb : _constantBuffers) { if (cb) diff --git a/Source/Engine/Graphics/Textures/GPUTexture.cpp b/Source/Engine/Graphics/Textures/GPUTexture.cpp index 127cf2f5b..6d138a40c 100644 --- a/Source/Engine/Graphics/Textures/GPUTexture.cpp +++ b/Source/Engine/Graphics/Textures/GPUTexture.cpp @@ -16,6 +16,7 @@ #include "Engine/Threading/ThreadPoolTask.h" #include "Engine/Graphics/GPUDevice.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Scripting/Enums.h" namespace @@ -353,6 +354,8 @@ int32 GPUTexture::ComputeRowPitch(int32 mipLevel, int32 rowAlign) const bool GPUTexture::Init(const GPUTextureDescription& desc) { + PROFILE_MEM(GraphicsTextures); + // Validate description const auto device = GPUDevice::Instance; if (desc.Usage == GPUResourceUsage::Dynamic) @@ -500,6 +503,7 @@ bool GPUTexture::Init(const GPUTextureDescription& desc) LOG(Warning, "Cannot initialize texture. Description: {0}", desc.ToString()); return true; } + PROFILE_MEM_INC(GraphicsTextures, GetMemoryUsage()); // Render targets and depth buffers doesn't support normal textures streaming and are considered to be always resident if (IsRegularTexture() == false) @@ -589,6 +593,7 @@ GPUResourceType GPUTexture::GetResourceType() const void GPUTexture::OnReleaseGPU() { + PROFILE_MEM_DEC(GraphicsTextures, GetMemoryUsage()); _desc.Clear(); _residentMipLevels = 0; } diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp index ad39b777d..c578fd295 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp @@ -19,6 +19,7 @@ #include "Engine/GraphicsDevice/DirectX/RenderToolsDX.h" #include "Engine/Graphics/PixelFormatExtensions.h" #include "Engine/Engine/CommandLine.h" +#include "Engine/Profiler/ProfilerMemory.h" #if !USE_EDITOR && PLATFORM_WINDOWS #include "Engine/Core/Config/PlatformSettings.h" @@ -810,16 +811,19 @@ void GPUDeviceDX11::DrawEnd() GPUTexture* GPUDeviceDX11::CreateTexture(const StringView& name) { + PROFILE_MEM(GraphicsTextures); return New(this, name); } GPUShader* GPUDeviceDX11::CreateShader(const StringView& name) { + PROFILE_MEM(GraphicsShaders); return New(this, name); } GPUPipelineState* GPUDeviceDX11::CreatePipelineState() { + PROFILE_MEM(GraphicsCommands); return New(this); } @@ -830,6 +834,7 @@ GPUTimerQuery* GPUDeviceDX11::CreateTimerQuery() GPUBuffer* GPUDeviceDX11::CreateBuffer(const StringView& name) { + PROFILE_MEM(GraphicsBuffers); return New(this, name); } @@ -850,6 +855,7 @@ GPUSwapChain* GPUDeviceDX11::CreateSwapChain(Window* window) GPUConstantBuffer* GPUDeviceDX11::CreateConstantBuffer(uint32 size, const StringView& name) { + PROFILE_MEM(GraphicsShaders); ID3D11Buffer* buffer = nullptr; uint32 memorySize = 0; if (size) diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUSwapChainDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUSwapChainDX11.cpp index ba7226250..7a106d377 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUSwapChainDX11.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUSwapChainDX11.cpp @@ -6,6 +6,7 @@ #include "Engine/Platform/Window.h" #include "Engine/Graphics/RenderTools.h" #include "Engine/GraphicsDevice/DirectX/RenderToolsDX.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "GPUContextDX11.h" GPUSwapChainDX11::GPUSwapChainDX11(GPUDeviceDX11* device, Window* window) @@ -60,9 +61,11 @@ void GPUSwapChainDX11::OnReleaseGPU() #endif // Release data + PROFILE_MEM_DEC(Graphics, _memoryUsage); releaseBackBuffer(); DX_SAFE_RELEASE_CHECK(_swapChain, 0); _width = _height = 0; + _memoryUsage = 0; } ID3D11Resource* GPUSwapChainDX11::GetResource() @@ -262,6 +265,7 @@ bool GPUSwapChainDX11::Resize(int32 width, int32 height) _width = width; _height = height; _memoryUsage = RenderTools::CalculateTextureMemoryUsage(_format, _width, _height, 1) * swapChainDesc.BufferCount; + PROFILE_MEM_INC(Graphics, _memoryUsage); getBackBuffer(); diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp index 036454589..40e081175 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp @@ -18,6 +18,7 @@ #include "Engine/Graphics/PixelFormatExtensions.h" #include "Engine/GraphicsDevice/DirectX/RenderToolsDX.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Core/Log.h" #include "Engine/Core/Config/PlatformSettings.h" #include "UploadBufferDX12.h" @@ -833,16 +834,19 @@ void GPUDeviceDX12::WaitForGPU() GPUTexture* GPUDeviceDX12::CreateTexture(const StringView& name) { + PROFILE_MEM(GraphicsTextures); return New(this, name); } GPUShader* GPUDeviceDX12::CreateShader(const StringView& name) { + PROFILE_MEM(GraphicsShaders); return New(this, name); } GPUPipelineState* GPUDeviceDX12::CreatePipelineState() { + PROFILE_MEM(GraphicsCommands); return New(this); } @@ -853,6 +857,7 @@ GPUTimerQuery* GPUDeviceDX12::CreateTimerQuery() GPUBuffer* GPUDeviceDX12::CreateBuffer(const StringView& name) { + PROFILE_MEM(GraphicsBuffers); return New(this, name); } @@ -873,6 +878,7 @@ GPUSwapChain* GPUDeviceDX12::CreateSwapChain(Window* window) GPUConstantBuffer* GPUDeviceDX12::CreateConstantBuffer(uint32 size, const StringView& name) { + PROFILE_MEM(GraphicsShaders); return New(this, size, name); } diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUSwapChainDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUSwapChainDX12.cpp index bfbf662b7..fa6dfa881 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUSwapChainDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUSwapChainDX12.cpp @@ -6,6 +6,7 @@ #include "GPUContextDX12.h" #include "../IncludeDirectXHeaders.h" #include "Engine/GraphicsDevice/DirectX/RenderToolsDX.h" +#include "Engine/Profiler/ProfilerMemory.h" void BackBufferDX12::Setup(GPUSwapChainDX12* window, ID3D12Resource* backbuffer) { @@ -71,6 +72,7 @@ void GPUSwapChainDX12::OnReleaseGPU() #endif // Release data + PROFILE_MEM_DEC(Graphics, _memoryUsage); releaseBackBuffer(); _backBuffers.Resize(0); if (_swapChain) @@ -79,6 +81,7 @@ void GPUSwapChainDX12::OnReleaseGPU() _swapChain = nullptr; } _width = _height = 0; + _memoryUsage = 0; } void GPUSwapChainDX12::releaseBackBuffer() @@ -244,6 +247,7 @@ bool GPUSwapChainDX12::Resize(int32 width, int32 height) _width = width; _height = height; _memoryUsage = RenderTools::CalculateTextureMemoryUsage(_format, _width, _height, 1) * swapChainDesc.BufferCount; + PROFILE_MEM_INC(Graphics, _memoryUsage); getBackBuffer(); #endif diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/UploadBufferDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/UploadBufferDX12.cpp index 1e6613744..8fdfd5ac3 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/UploadBufferDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/UploadBufferDX12.cpp @@ -6,6 +6,7 @@ #include "GPUTextureDX12.h" #include "GPUContextDX12.h" #include "../RenderToolsDX.h" +#include "Engine/Profiler/ProfilerMemory.h" UploadBufferDX12::UploadBufferDX12(GPUDeviceDX12* device) : _device(device) @@ -235,6 +236,7 @@ UploadBufferPageDX12::UploadBufferPageDX12(GPUDeviceDX12* device, uint64 size) initResource(resource, D3D12_RESOURCE_STATE_GENERIC_READ, 1); DX_SET_DEBUG_NAME(_resource, GPUResourceDX12::GetName()); _memoryUsage = size; + PROFILE_MEM_INC(GraphicsCommands, _memoryUsage); GPUAddress = _resource->GetGPUVirtualAddress(); // Map buffer @@ -243,6 +245,8 @@ UploadBufferPageDX12::UploadBufferPageDX12(GPUDeviceDX12* device, uint64 size) void UploadBufferPageDX12::OnReleaseGPU() { + PROFILE_MEM_DEC(GraphicsCommands, _memoryUsage); + // Unmap if (_resource && CPUAddress) { diff --git a/Source/Engine/GraphicsDevice/Null/GPUDeviceNull.cpp b/Source/Engine/GraphicsDevice/Null/GPUDeviceNull.cpp index 41ec9f76a..a1582102f 100644 --- a/Source/Engine/GraphicsDevice/Null/GPUDeviceNull.cpp +++ b/Source/Engine/GraphicsDevice/Null/GPUDeviceNull.cpp @@ -14,6 +14,7 @@ #include "GPUVertexLayoutNull.h" #include "GPUSwapChainNull.h" #include "Engine/Core/Log.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Graphics/Async/GPUTasksManager.h" GPUDeviceNull::GPUDeviceNull() @@ -145,16 +146,19 @@ void GPUDeviceNull::WaitForGPU() GPUTexture* GPUDeviceNull::CreateTexture(const StringView& name) { + PROFILE_MEM(GraphicsTextures); return New(); } GPUShader* GPUDeviceNull::CreateShader(const StringView& name) { + PROFILE_MEM(GraphicsShaders); return New(); } GPUPipelineState* GPUDeviceNull::CreatePipelineState() { + PROFILE_MEM(GraphicsCommands); return New(); } @@ -165,6 +169,7 @@ GPUTimerQuery* GPUDeviceNull::CreateTimerQuery() GPUBuffer* GPUDeviceNull::CreateBuffer(const StringView& name) { + PROFILE_MEM(GraphicsBuffers); return New(); } diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp index 359ac0993..73eb90755 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp @@ -34,6 +34,7 @@ #include "Engine/Engine/CommandLine.h" #include "Engine/Utilities/StringConverter.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Threading/Threading.h" #include "Engine/Scripting/Enums.h" @@ -229,9 +230,13 @@ static VKAPI_ATTR VkBool32 VKAPI_PTR DebugUtilsCallback(VkDebugUtilsMessageSever const String message(callbackData->pMessage); if (callbackData->pMessageIdName) + { LOG(Info, "[Vulkan] {0} {1}:{2}({3}) {4}", type, severity, callbackData->messageIdNumber, String(callbackData->pMessageIdName), message); + } else + { LOG(Info, "[Vulkan] {0} {1}:{2} {3}", type, severity, callbackData->messageIdNumber, message); + } #if BUILD_DEBUG if (auto* context = (GPUContextVulkan*)GPUDevice::Instance->GetMainContext()) @@ -2095,16 +2100,19 @@ void GPUDeviceVulkan::WaitForGPU() GPUTexture* GPUDeviceVulkan::CreateTexture(const StringView& name) { + PROFILE_MEM(GraphicsTextures); return New(this, name); } GPUShader* GPUDeviceVulkan::CreateShader(const StringView& name) { + PROFILE_MEM(GraphicsShaders); return New(this, name); } GPUPipelineState* GPUDeviceVulkan::CreatePipelineState() { + PROFILE_MEM(GraphicsCommands); return New(this); } @@ -2115,6 +2123,7 @@ GPUTimerQuery* GPUDeviceVulkan::CreateTimerQuery() GPUBuffer* GPUDeviceVulkan::CreateBuffer(const StringView& name) { + PROFILE_MEM(GraphicsBuffers); return New(this, name); } @@ -2135,6 +2144,7 @@ GPUSwapChain* GPUDeviceVulkan::CreateSwapChain(Window* window) GPUConstantBuffer* GPUDeviceVulkan::CreateConstantBuffer(uint32 size, const StringView& name) { + PROFILE_MEM(GraphicsShaders); return New(this, size); } diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUShaderVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUShaderVulkan.cpp index f6fbe4838..853fcf693 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUShaderVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUShaderVulkan.cpp @@ -12,6 +12,7 @@ #include "Engine/Core/Types/DataContainer.h" #include "Engine/Serialization/MemoryReadStream.h" #include "Engine/Graphics/PixelFormatExtensions.h" +#include "Engine/Profiler/ProfilerMemory.h" #if PLATFORM_DESKTOP #define VULKAN_UNIFORM_RING_BUFFER_SIZE (24 * 1024 * 1024) @@ -41,6 +42,7 @@ UniformBufferUploaderVulkan::UniformBufferUploaderVulkan(GPUDeviceVulkan* device VkResult result = vmaCreateBuffer(_device->Allocator, &bufferInfo, &allocInfo, &_buffer, &_allocation, nullptr); LOG_VULKAN_RESULT(result); _memoryUsage = bufferInfo.size; + PROFILE_MEM_INC(GraphicsCommands, _memoryUsage); // Map buffer result = vmaMapMemory(_device->Allocator, _allocation, (void**)&_mapped); @@ -87,6 +89,7 @@ void UniformBufferUploaderVulkan::OnReleaseGPU() { if (_allocation != VK_NULL_HANDLE) { + PROFILE_MEM_DEC(GraphicsCommands, _memoryUsage); if (_mapped) { vmaUnmapMemory(_device->Allocator, _allocation); diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUSwapChainVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUSwapChainVulkan.cpp index 18e2cd95c..801ba1fc1 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUSwapChainVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUSwapChainVulkan.cpp @@ -12,6 +12,7 @@ #include "Engine/Graphics/GPULimits.h" #include "Engine/Scripting/Enums.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" void BackBufferVulkan::Setup(GPUSwapChainVulkan* window, VkImage backbuffer, PixelFormat format, VkExtent3D extent) { @@ -61,6 +62,7 @@ void GPUSwapChainVulkan::OnReleaseGPU() ReleaseBackBuffer(); // Release data + PROFILE_MEM_DEC(Graphics, _memoryUsage); _currentImageIndex = -1; _semaphoreIndex = 0; _acquiredImageIndex = -1; @@ -76,6 +78,7 @@ void GPUSwapChainVulkan::OnReleaseGPU() _surface = VK_NULL_HANDLE; } _width = _height = 0; + _memoryUsage = 0; } bool GPUSwapChainVulkan::IsFullscreen() @@ -412,6 +415,7 @@ bool GPUSwapChainVulkan::CreateSwapChain(int32 width, int32 height) // Estimate memory usage _memoryUsage = 1024 + RenderTools::CalculateTextureMemoryUsage(_format, _width, _height, 1) * _backBuffers.Count(); + PROFILE_MEM_INC(Graphics, _memoryUsage); return false; } diff --git a/Source/Engine/Input/Input.cpp b/Source/Engine/Input/Input.cpp index 7a4d0592c..dc5ecc236 100644 --- a/Source/Engine/Input/Input.cpp +++ b/Source/Engine/Input/Input.cpp @@ -14,6 +14,7 @@ #include "Engine/Scripting/ScriptingType.h" #include "Engine/Scripting/BinaryModule.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Serialization/JsonTools.h" struct AxisEvaluation @@ -89,12 +90,14 @@ Array Input::AxisMappings; void InputSettings::Apply() { + PROFILE_MEM(Input); Input::ActionMappings = ActionMappings; Input::AxisMappings = AxisMappings; } void InputSettings::Deserialize(DeserializeStream& stream, ISerializeModifier* modifier) { + PROFILE_MEM(Input); const auto actionMappings = stream.FindMember("ActionMappings"); if (actionMappings != stream.MemberEnd()) { @@ -615,6 +618,7 @@ float Input::GetAxisRaw(const StringView& name) void Input::SetInputMappingFromSettings(const JsonAssetReference& settings) { + PROFILE_MEM(Input); auto actionMappings = settings.GetInstance()->ActionMappings; ActionMappings.Resize(actionMappings.Count(), false); for (int i = 0; i < actionMappings.Count(); i++) @@ -634,6 +638,7 @@ void Input::SetInputMappingFromSettings(const JsonAssetReference& void Input::SetInputMappingToDefaultSettings() { + PROFILE_MEM(Input); InputSettings* settings = InputSettings::Get(); if (settings) { @@ -696,6 +701,7 @@ Array Input::GetAllAxisConfigsByName(const StringView& name) void Input::SetAxisConfigByName(const StringView& name, AxisConfig& config, bool all) { + PROFILE_MEM(Input); for (int i = 0; i < AxisMappings.Count(); ++i) { auto& mapping = AxisMappings.At(i); @@ -712,6 +718,7 @@ void Input::SetAxisConfigByName(const StringView& name, AxisConfig& config, bool void Input::SetAxisConfigByName(const StringView& name, InputAxisType inputType, const KeyboardKeys positiveButton, const KeyboardKeys negativeButton, bool all) { + PROFILE_MEM(Input); for (int i = 0; i < AxisMappings.Count(); ++i) { auto& mapping = AxisMappings.At(i); @@ -727,6 +734,7 @@ void Input::SetAxisConfigByName(const StringView& name, InputAxisType inputType, void Input::SetAxisConfigByName(const StringView& name, InputAxisType inputType, const GamepadButton positiveButton, const GamepadButton negativeButton, InputGamepadIndex gamepadIndex, bool all) { + PROFILE_MEM(Input); for (int i = 0; i < AxisMappings.Count(); ++i) { auto& mapping = AxisMappings.At(i); @@ -742,6 +750,7 @@ void Input::SetAxisConfigByName(const StringView& name, InputAxisType inputType, void Input::SetAxisConfigByName(const StringView& name, InputAxisType inputType, const float gravity, const float deadZone, const float sensitivity, const float scale, const bool snap, bool all) { + PROFILE_MEM(Input); for (int i = 0; i < AxisMappings.Count(); ++i) { auto& mapping = AxisMappings.At(i); @@ -760,6 +769,7 @@ void Input::SetAxisConfigByName(const StringView& name, InputAxisType inputType, void Input::SetActionConfigByName(const StringView& name, const KeyboardKeys key, bool all) { + PROFILE_MEM(Input); for (int i = 0; i < ActionMappings.Count(); ++i) { auto& mapping = ActionMappings.At(i); @@ -774,6 +784,7 @@ void Input::SetActionConfigByName(const StringView& name, const KeyboardKeys key void Input::SetActionConfigByName(const StringView& name, const MouseButton mouseButton, bool all) { + PROFILE_MEM(Input); for (int i = 0; i < ActionMappings.Count(); ++i) { auto& mapping = ActionMappings.At(i); @@ -788,6 +799,7 @@ void Input::SetActionConfigByName(const StringView& name, const MouseButton mous void Input::SetActionConfigByName(const StringView& name, const GamepadButton gamepadButton, InputGamepadIndex gamepadIndex, bool all) { + PROFILE_MEM(Input); for (int i = 0; i < ActionMappings.Count(); ++i) { auto& mapping = ActionMappings.At(i); @@ -802,6 +814,7 @@ void Input::SetActionConfigByName(const StringView& name, const GamepadButton ga void Input::SetActionConfigByName(const StringView& name, ActionConfig& config, bool all) { + PROFILE_MEM(Input); for (int i = 0; i < ActionMappings.Count(); ++i) { auto& mapping = ActionMappings.At(i); @@ -819,6 +832,7 @@ void Input::SetActionConfigByName(const StringView& name, ActionConfig& config, void InputService::Update() { PROFILE_CPU(); + PROFILE_MEM(Input); const auto frame = Time::Update.TicksCount; const auto dt = Time::Update.UnscaledDeltaTime.GetTotalSeconds(); InputEvents.Clear(); diff --git a/Source/Engine/Level/Actor.cpp b/Source/Engine/Level/Actor.cpp index 4989f2c49..12039da5e 100644 --- a/Source/Engine/Level/Actor.cpp +++ b/Source/Engine/Level/Actor.cpp @@ -1127,9 +1127,13 @@ void Actor::Deserialize(DeserializeStream& stream, ISerializeModifier* modifier) else if (!parent && parentId.IsValid()) { if (_prefabObjectID.IsValid()) + { LOG(Warning, "Missing parent actor {0} for \'{1}\', prefab object {2}", parentId, ToString(), _prefabObjectID); + } else + { LOG(Warning, "Missing parent actor {0} for \'{1}\'", parentId, ToString()); + } } } } diff --git a/Source/Engine/Level/Level.cpp b/Source/Engine/Level/Level.cpp index a96ba936a..5bcd98994 100644 --- a/Source/Engine/Level/Level.cpp +++ b/Source/Engine/Level/Level.cpp @@ -24,6 +24,7 @@ #include "Engine/Platform/File.h" #include "Engine/Platform/FileSystem.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Scripting/Script.h" #include "Engine/Engine/Time.h" #include "Engine/Scripting/ManagedCLR/MAssembly.h" @@ -248,6 +249,7 @@ void LayersAndTagsSettings::Apply() #define TICK_LEVEL(tickingStage, name) \ PROFILE_CPU_NAMED(name); \ + PROFILE_MEM(Level); \ ScopeLock lock(Level::ScenesLock); \ auto& scenes = Level::Scenes; \ if (!Time::GetGamePaused() && Level::TickEnabled) \ @@ -504,6 +506,7 @@ public: // Note: we don't want to override original scene files PROFILE_CPU_NAMED("Level.ReloadScripts"); + PROFILE_MEM(Level); LOG(Info, "Scripts reloading start"); const auto startTime = DateTime::NowUTC(); @@ -784,6 +787,7 @@ bool LevelImpl::unloadScene(Scene* scene) const auto sceneId = scene->GetID(); PROFILE_CPU_NAMED("Level.UnloadScene"); + PROFILE_MEM(Level); // Fire event CallSceneEvent(SceneEventType::OnSceneUnloading, scene, sceneId); @@ -838,6 +842,7 @@ bool Level::loadScene(const BytesContainer& sceneData, Scene** outScene) LOG(Error, "Missing scene data."); return true; } + PROFILE_MEM(Level); // Parse scene JSON file rapidjson_flax::Document document; @@ -870,6 +875,7 @@ bool Level::loadScene(rapidjson_flax::Document& document, Scene** outScene) bool Level::loadScene(rapidjson_flax::Value& data, int32 engineBuild, Scene** outScene, const String* assetPath) { PROFILE_CPU_NAMED("Level.LoadScene"); + PROFILE_MEM(Level); if (outScene) *outScene = nullptr; #if USE_EDITOR @@ -954,6 +960,7 @@ bool Level::loadScene(rapidjson_flax::Value& data, int32 engineBuild, Scene** ou ScenesLock.Unlock(); // Unlock scenes from Main Thread so Job Threads can use it to safely setup actors hierarchy (see Actor::Deserialize) JobSystem::Execute([&](int32 i) { + PROFILE_MEM(Level); i++; // Start from 1. at index [0] was scene auto& stream = data[i]; auto obj = SceneObjectsFactory::Spawn(context, stream); @@ -1165,6 +1172,7 @@ bool LevelImpl::saveScene(Scene* scene) bool LevelImpl::saveScene(Scene* scene, const String& path) { PROFILE_CPU_NAMED("Level.SaveScene"); + PROFILE_MEM(Level); ASSERT(scene && EnumHasNoneFlags(scene->Flags, ObjectFlags::WasMarkedToDelete)); auto sceneId = scene->GetID(); @@ -1208,6 +1216,7 @@ bool LevelImpl::saveScene(Scene* scene, const String& path) bool LevelImpl::saveScene(Scene* scene, rapidjson_flax::StringBuffer& outBuffer, bool prettyJson) { PROFILE_CPU_NAMED("Level.SaveScene"); + PROFILE_MEM(Level); if (prettyJson) { PrettyJsonWriter writerObj(outBuffer); diff --git a/Source/Engine/Level/Scene/SceneRendering.cpp b/Source/Engine/Level/Scene/SceneRendering.cpp index 445447cd1..c6f5669a5 100644 --- a/Source/Engine/Level/Scene/SceneRendering.cpp +++ b/Source/Engine/Level/Scene/SceneRendering.cpp @@ -9,6 +9,7 @@ #include "Engine/Threading/JobSystem.h" #include "Engine/Threading/Threading.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" ISceneRenderingListener::~ISceneRenderingListener() { @@ -41,6 +42,7 @@ FORCE_INLINE bool FrustumsListCull(const BoundingSphere& bounds, const Array_drawCategory; ScopeLock lock(Locker); auto& list = Actors[category]; @@ -214,6 +217,7 @@ void SceneRendering::RemoveActor(Actor* a, int32& key) void SceneRendering::DrawActorsJob(int32) { PROFILE_CPU(); + PROFILE_MEM(Graphics); auto& mainContext = _drawBatch->GetMainContext(); const auto& view = mainContext.View; if (view.StaticFlagsMask != StaticFlags::None) diff --git a/Source/Engine/Localization/CultureInfo.cpp b/Source/Engine/Localization/CultureInfo.cpp index 4595703af..7d53489b5 100644 --- a/Source/Engine/Localization/CultureInfo.cpp +++ b/Source/Engine/Localization/CultureInfo.cpp @@ -3,6 +3,7 @@ #include "CultureInfo.h" #include "Engine/Core/Log.h" #include "Engine/Core/Types/StringView.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Utilities/StringConverter.h" #include "Engine/Scripting/Types.h" #include "Engine/Scripting/ManagedCLR/MProperty.h" @@ -51,6 +52,7 @@ CultureInfo::CultureInfo(int32 lcid) _data = nullptr; if (lcid == 0) return; + PROFILE_MEM(Localization); if (lcid == 127) { _englishName = TEXT("Invariant Culture"); @@ -88,6 +90,7 @@ CultureInfo::CultureInfo(const StringView& name) CultureInfo::CultureInfo(const StringAnsiView& name) { + PROFILE_MEM(Localization); _data = nullptr; if (name.IsEmpty()) { @@ -160,6 +163,7 @@ bool CultureInfo::operator==(const CultureInfo& other) const void* MUtils::ToManaged(const CultureInfo& value) { #if USE_CSHARP + PROFILE_MEM(Localization); auto scriptingClass = Scripting::GetStaticClass(); CHECK_RETURN(scriptingClass, nullptr); auto cultureInfoToManaged = scriptingClass->GetMethod("CultureInfoToManaged", 1); @@ -182,6 +186,7 @@ CultureInfo MUtils::ToNative(void* value) if (value) lcid = static_cast(value)->lcid; #elif USE_CSHARP + PROFILE_MEM(Localization); const MClass* klass = GetBinaryModuleCorlib()->Assembly->GetClass("System.Globalization.CultureInfo"); if (value && klass) { diff --git a/Source/Engine/Localization/Localization.cpp b/Source/Engine/Localization/Localization.cpp index 00a6a8deb..d1b3a036f 100644 --- a/Source/Engine/Localization/Localization.cpp +++ b/Source/Engine/Localization/Localization.cpp @@ -9,6 +9,7 @@ #include "Engine/Engine/EngineService.h" #include "Engine/Content/Content.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Serialization/Serialization.h" #include @@ -171,6 +172,7 @@ String LocalizedString::ToStringPlural(int32 n) const void LocalizationService::OnLocalizationChanged() { PROFILE_CPU(); + PROFILE_MEM(Localization); Instance.LocalizedStringTables.Clear(); Instance.FallbackStringTables.Clear(); @@ -279,6 +281,8 @@ void LocalizationService::OnLocalizationChanged() bool LocalizationService::Init() { + PROFILE_MEM(Localization); + // Use system language as default CurrentLanguage = CurrentCulture = CultureInfo(Platform::GetUserLocaleName()); diff --git a/Source/Engine/Localization/LocalizedStringTable.cpp b/Source/Engine/Localization/LocalizedStringTable.cpp index 84aca852d..e99b87a27 100644 --- a/Source/Engine/Localization/LocalizedStringTable.cpp +++ b/Source/Engine/Localization/LocalizedStringTable.cpp @@ -5,6 +5,7 @@ #include "Engine/Serialization/JsonWriters.h" #include "Engine/Serialization/SerializationFwd.h" #include "Engine/Content/Factories/JsonAssetFactory.h" +#include "Engine/Profiler/ProfilerMemory.h" #if USE_EDITOR #include "Engine/Threading/Threading.h" #include "Engine/Core/Log.h" @@ -20,6 +21,7 @@ LocalizedStringTable::LocalizedStringTable(const SpawnParams& params, const Asse void LocalizedStringTable::AddString(const StringView& id, const StringView& value) { + PROFILE_MEM(Localization); auto& values = Entries[id]; values.Resize(1); values[0] = value; @@ -27,6 +29,7 @@ void LocalizedStringTable::AddString(const StringView& id, const StringView& val void LocalizedStringTable::AddPluralString(const StringView& id, const StringView& value, int32 n) { + PROFILE_MEM(Localization); CHECK(n >= 0 && n < 1024); auto& values = Entries[id]; values.Resize(Math::Max(values.Count(), n + 1)); @@ -57,6 +60,8 @@ String LocalizedStringTable::GetPluralString(const String& id, int32 n) const Asset::LoadResult LocalizedStringTable::loadAsset() { + PROFILE_MEM(Localization); + // Base auto result = JsonAssetBase::loadAsset(); if (result != LoadResult::Ok || IsInternalType()) diff --git a/Source/Engine/Navigation/NavCrowd.cpp b/Source/Engine/Navigation/NavCrowd.cpp index c9a56a08a..cb2a4ebee 100644 --- a/Source/Engine/Navigation/NavCrowd.cpp +++ b/Source/Engine/Navigation/NavCrowd.cpp @@ -34,9 +34,13 @@ bool NavCrowd::Init(const NavAgentProperties& agentProperties, int32 maxAgents) if (!navMeshRuntime) { if (NavMeshRuntime::Get()) + { LOG(Error, "Cannot create crowd. Failed to find a navmesh that matches a given agent properties."); + } else + { LOG(Error, "Cannot create crowd. No navmesh is loaded."); + } } #endif return Init(agentProperties.Radius * 3.0f, maxAgents, navMeshRuntime); diff --git a/Source/Engine/Navigation/NavMeshData.cpp b/Source/Engine/Navigation/NavMeshData.cpp index 7dfa597a8..6fbf5e33e 100644 --- a/Source/Engine/Navigation/NavMeshData.cpp +++ b/Source/Engine/Navigation/NavMeshData.cpp @@ -4,6 +4,7 @@ #include "Engine/Core/Log.h" #include "Engine/Serialization/WriteStream.h" #include "Engine/Serialization/MemoryReadStream.h" +#include "Engine/Profiler/ProfilerMemory.h" void NavMeshData::Save(WriteStream& stream) { @@ -47,6 +48,7 @@ bool NavMeshData::Load(BytesContainer& data, bool copyData) return true; } MemoryReadStream stream(data.Get(), data.Length()); + PROFILE_MEM(Navigation); // Read header const auto header = stream.Move(); diff --git a/Source/Engine/Navigation/NavMeshRuntime.cpp b/Source/Engine/Navigation/NavMeshRuntime.cpp index baa8b0320..f90d5efd1 100644 --- a/Source/Engine/Navigation/NavMeshRuntime.cpp +++ b/Source/Engine/Navigation/NavMeshRuntime.cpp @@ -6,6 +6,7 @@ #include "Engine/Core/Log.h" #include "Engine/Core/Random.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Threading/Threading.h" #include #include @@ -312,6 +313,7 @@ void NavMeshRuntime::EnsureCapacity(int32 tilesToAddCount) if (newTilesCount <= capacity) return; PROFILE_CPU_NAMED("NavMeshRuntime.EnsureCapacity"); + PROFILE_MEM(Navigation); // Navmesh tiles capacity growing rule int32 newCapacity = capacity ? capacity : 32; @@ -380,6 +382,7 @@ void NavMeshRuntime::AddTiles(NavMesh* navMesh) return; auto& data = navMesh->Data; PROFILE_CPU_NAMED("NavMeshRuntime.AddTiles"); + PROFILE_MEM(Navigation); ScopeLock lock(Locker); // Validate data (must match navmesh) or init navmesh to match the tiles options @@ -411,6 +414,7 @@ void NavMeshRuntime::AddTile(NavMesh* navMesh, NavMeshTileData& tileData) ASSERT(navMesh); auto& data = navMesh->Data; PROFILE_CPU_NAMED("NavMeshRuntime.AddTile"); + PROFILE_MEM(Navigation); ScopeLock lock(Locker); // Validate data (must match navmesh) or init navmesh to match the tiles options diff --git a/Source/Engine/Navigation/Navigation.cpp b/Source/Engine/Navigation/Navigation.cpp index 446464634..1d54550df 100644 --- a/Source/Engine/Navigation/Navigation.cpp +++ b/Source/Engine/Navigation/Navigation.cpp @@ -18,6 +18,7 @@ #include "Engine/Content/Deprecated.h" #include "Engine/Engine/EngineService.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Serialization/Serialization.h" #include #include @@ -93,6 +94,7 @@ NavMeshRuntime* NavMeshRuntime::Get(const NavMeshProperties& navMeshProperties, if (!result && createIfMissing) { // Create a new navmesh + PROFILE_MEM(Navigation); result = New(navMeshProperties); NavMeshes.Add(result); } @@ -178,16 +180,20 @@ NavigationService NavigationServiceInstance; void* dtAllocDefault(size_t size, dtAllocHint) { + PROFILE_MEM(Navigation); return Allocator::Allocate(size); } void* rcAllocDefault(size_t size, rcAllocHint) { + PROFILE_MEM(Navigation); return Allocator::Allocate(size); } NavigationSettings::NavigationSettings() { + PROFILE_MEM(Navigation); + // Init navmeshes NavMeshes.Resize(1); auto& navMesh = NavMeshes[0]; diff --git a/Source/Engine/Networking/NetworkManager.cpp b/Source/Engine/Networking/NetworkManager.cpp index af84b8d8c..3dc244bba 100644 --- a/Source/Engine/Networking/NetworkManager.cpp +++ b/Source/Engine/Networking/NetworkManager.cpp @@ -14,6 +14,7 @@ #include "Engine/Engine/EngineService.h" #include "Engine/Engine/Time.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Scripting/Scripting.h" float NetworkManager::NetworkFPS = 60.0f; @@ -414,6 +415,7 @@ NetworkManagerService NetworkManagerServiceInstance; bool StartPeer() { PROFILE_CPU(); + PROFILE_MEM(Networking); ASSERT_LOW_LAYER(!NetworkManager::Peer); NetworkManager::State = NetworkConnectionState::Connecting; NetworkManager::StateChanged(); @@ -504,6 +506,7 @@ NetworkClient* NetworkManager::GetClient(uint32 clientId) bool NetworkManager::StartServer() { PROFILE_CPU(); + PROFILE_MEM(Networking); Stop(); LOG(Info, "Starting network manager as server"); @@ -529,6 +532,7 @@ bool NetworkManager::StartServer() bool NetworkManager::StartClient() { PROFILE_CPU(); + PROFILE_MEM(Networking); Stop(); LOG(Info, "Starting network manager as client"); @@ -553,6 +557,7 @@ bool NetworkManager::StartClient() bool NetworkManager::StartHost() { PROFILE_CPU(); + PROFILE_MEM(Networking); Stop(); LOG(Info, "Starting network manager as host"); @@ -586,6 +591,7 @@ void NetworkManager::Stop() if (Mode == NetworkManagerMode::Offline && State == NetworkConnectionState::Offline) return; PROFILE_CPU(); + PROFILE_MEM(Networking); LOG(Info, "Stopping network manager"); State = NetworkConnectionState::Disconnecting; @@ -632,6 +638,7 @@ void NetworkManager::Stop() void NetworkKeys::SendPending() { PROFILE_CPU(); + PROFILE_MEM(Networking); ScopeLock lock(Lock); // Add new keys @@ -718,6 +725,7 @@ void NetworkManagerService::Update() if (NetworkManager::Mode == NetworkManagerMode::Offline || (float)(currentTime - LastUpdateTime) < minDeltaTime || !peer) return; PROFILE_CPU(); + PROFILE_MEM(Networking); LastUpdateTime = currentTime; NetworkManager::Frame++; NetworkInternal::NetworkReplicatorPreUpdate(); diff --git a/Source/Engine/Networking/NetworkPeer.cpp b/Source/Engine/Networking/NetworkPeer.cpp index 0f815a1ec..073d3d060 100644 --- a/Source/Engine/Networking/NetworkPeer.cpp +++ b/Source/Engine/Networking/NetworkPeer.cpp @@ -7,6 +7,7 @@ #include "Engine/Core/Math/Math.h" #include "Engine/Platform/CPUInfo.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" Array NetworkPeer::Peers; @@ -85,6 +86,7 @@ void NetworkPeer::Shutdown() void NetworkPeer::CreateMessageBuffers() { + PROFILE_MEM(Networking); ASSERT(MessageBuffer == nullptr); const uint32 pageSize = Platform::GetCPUInfo().PageSize; @@ -198,6 +200,8 @@ bool NetworkPeer::EndSendMessage(const NetworkChannelType channelType, const Net NetworkPeer* NetworkPeer::CreatePeer(const NetworkConfig& config) { + PROFILE_MEM(Networking); + // Validate the address for listen/connect if (config.Address != TEXT("any")) { diff --git a/Source/Engine/Networking/NetworkReplicator.cpp b/Source/Engine/Networking/NetworkReplicator.cpp index af478a019..fba916891 100644 --- a/Source/Engine/Networking/NetworkReplicator.cpp +++ b/Source/Engine/Networking/NetworkReplicator.cpp @@ -24,6 +24,7 @@ #include "Engine/Level/Prefabs/Prefab.h" #include "Engine/Level/Prefabs/PrefabManager.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Scripting/Script.h" #include "Engine/Scripting/Scripting.h" #include "Engine/Scripting/ScriptingObjectReference.h" @@ -1112,6 +1113,7 @@ void NetworkReplicator::AddSerializer(const ScriptingTypeHandle& typeHandle, Ser { if (!typeHandle) return; + PROFILE_MEM(Networking); const Serializer serializer{ { serialize, deserialize }, { serializeTag, deserializeTag } }; SerializersTable[typeHandle] = serializer; } @@ -1145,6 +1147,7 @@ bool NetworkReplicator::InvokeSerializer(const ScriptingTypeHandle& typeHandle, serializer.Methods[1] = INetworkSerializable_Script_Deserialize; serializer.Tags[0] = serializer.Tags[1] = nullptr; } + PROFILE_MEM(Networking); SerializersTable.Add(typeHandle, serializer); } else if (const ScriptingTypeHandle baseTypeHandle = typeHandle.GetType().GetBaseType()) @@ -1166,6 +1169,7 @@ void NetworkReplicator::AddObject(ScriptingObject* obj, const ScriptingObject* p { if (!obj || NetworkManager::IsOffline()) return; + PROFILE_MEM(Networking); ScopeLock lock(ObjectsLock); if (Objects.Contains(obj)) return; @@ -1235,6 +1239,7 @@ void NetworkReplicator::SpawnObject(ScriptingObject* obj, const DataContainerGetID()); if (it != Objects.End() && it->Item.Spawned) @@ -1250,6 +1255,7 @@ void NetworkReplicator::DespawnObject(ScriptingObject* obj) { if (!obj || NetworkManager::IsOffline()) return; + PROFILE_MEM(Networking); ScopeLock lock(ObjectsLock); const auto it = Objects.Find(obj->GetID()); if (it == Objects.End()) @@ -1524,6 +1530,7 @@ Dictionary NetworkRpcInfo::RPCsTable; NetworkStream* NetworkReplicator::BeginInvokeRPC() { + PROFILE_MEM(Networking); if (CachedWriteStream == nullptr) CachedWriteStream = New(); CachedWriteStream->Initialize(); @@ -1540,6 +1547,7 @@ bool NetworkReplicator::EndInvokeRPC(ScriptingObject* obj, const ScriptingTypeHa const NetworkRpcInfo* info = NetworkRpcInfo::RPCsTable.TryGet(NetworkRpcName(type, name)); if (!info || !obj || NetworkManager::IsOffline()) return false; + PROFILE_MEM(Networking); ObjectsLock.Lock(); auto& rpc = RpcQueue.AddOne(); rpc.Object = obj; diff --git a/Source/Engine/Networking/NetworkStream.cpp b/Source/Engine/Networking/NetworkStream.cpp index 5c4a367f1..1542c98bd 100644 --- a/Source/Engine/Networking/NetworkStream.cpp +++ b/Source/Engine/Networking/NetworkStream.cpp @@ -4,6 +4,7 @@ #include "INetworkSerializable.h" #include "Engine/Core/Math/Quaternion.h" #include "Engine/Core/Math/Transform.h" +#include "Engine/Profiler/ProfilerMemory.h" // Quaternion quantized for optimized network data size. struct NetworkQuaternion @@ -119,6 +120,7 @@ void NetworkStream::Initialize(uint32 minCapacity) Allocator::Free(_buffer); // Allocate new one + PROFILE_MEM(Networking); _buffer = (byte*)Allocator::Allocate(minCapacity); _length = minCapacity; _allocated = true; @@ -246,6 +248,7 @@ void NetworkStream::WriteBytes(const void* data, uint32 bytes) uint32 newLength = _length != 0 ? _length * 2 : 256; while (newLength < position + bytes) newLength *= 2; + PROFILE_MEM(Networking); byte* newBuf = (byte*)Allocator::Allocate(newLength); if (_buffer && _length) Platform::MemoryCopy(newBuf, _buffer, _length); diff --git a/Source/Engine/Particles/ParticleEffect.cpp b/Source/Engine/Particles/ParticleEffect.cpp index c1031f4ac..1359dbcf2 100644 --- a/Source/Engine/Particles/ParticleEffect.cpp +++ b/Source/Engine/Particles/ParticleEffect.cpp @@ -6,6 +6,7 @@ #include "Engine/Content/Deprecated.h" #include "Engine/Serialization/JsonTools.h" #include "Engine/Serialization/Serialization.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Level/Scene/SceneRendering.h" #include "Engine/Level/Scene/Scene.h" #include "Engine/Engine/Time.h" @@ -380,6 +381,7 @@ void ParticleEffect::Sync() Instance.ClearState(); return; } + PROFILE_MEM(Particles); Instance.Sync(system); @@ -498,6 +500,7 @@ void ParticleEffect::CacheModifiedParameters() { if (_parameters.IsEmpty()) return; + PROFILE_MEM(Particles); _parametersOverrides.Clear(); auto& parameters = GetParameters(); for (auto& param : parameters) @@ -516,6 +519,7 @@ void ParticleEffect::ApplyModifiedParameters() { if (_parametersOverrides.IsEmpty()) return; + PROFILE_MEM(Particles); // Parameters getter applies the parameters overrides if (_parameters.IsEmpty()) @@ -658,6 +662,7 @@ void ParticleEffect::Deserialize(DeserializeStream& stream, ISerializeModifier* // Base Actor::Deserialize(stream, modifier); + PROFILE_MEM(Particles); const auto overridesMember = stream.FindMember("Overrides"); if (overridesMember != stream.MemberEnd()) { diff --git a/Source/Engine/Particles/ParticleEmitter.cpp b/Source/Engine/Particles/ParticleEmitter.cpp index 452d4560a..c7bc647bf 100644 --- a/Source/Engine/Particles/ParticleEmitter.cpp +++ b/Source/Engine/Particles/ParticleEmitter.cpp @@ -13,6 +13,7 @@ #include "Engine/Serialization/MemoryReadStream.h" #include "Engine/Serialization/MemoryWriteStream.h" #include "Engine/Threading/Threading.h" +#include "Engine/Profiler/ProfilerMemory.h" #if USE_EDITOR #include "ParticleEmitterFunction.h" #include "Engine/ShadersCompilation/Config.h" @@ -41,6 +42,7 @@ ParticleEmitter::ParticleEmitter(const SpawnParams& params, const AssetInfo* inf ParticleEffect* ParticleEmitter::Spawn(Actor* parent, const Transform& transform, float duration, bool autoDestroy) { + PROFILE_MEM(Particles); CHECK_RETURN(!WaitForLoaded(), nullptr); auto system = Content::CreateVirtualAsset(); CHECK_RETURN(system, nullptr); @@ -72,6 +74,7 @@ namespace Asset::LoadResult ParticleEmitter::load() { + PROFILE_MEM(Particles); ConcurrentSystemLocker::WriteScope systemScope(Particles::SystemLocker); // Load the graph diff --git a/Source/Engine/Particles/ParticleEmitterFunction.cpp b/Source/Engine/Particles/ParticleEmitterFunction.cpp index 3aa0ec115..f8aa5c62a 100644 --- a/Source/Engine/Particles/ParticleEmitterFunction.cpp +++ b/Source/Engine/Particles/ParticleEmitterFunction.cpp @@ -5,6 +5,7 @@ #include "Engine/Core/Log.h" #include "Engine/Serialization/MemoryReadStream.h" #include "Engine/Threading/Threading.h" +#include "Engine/Profiler/ProfilerMemory.h" #if USE_EDITOR #include "Engine/Core/Types/DataContainer.h" #include "Engine/Serialization/MemoryWriteStream.h" @@ -41,6 +42,7 @@ ParticleEmitterFunction::ParticleEmitterFunction(const SpawnParams& params, cons Asset::LoadResult ParticleEmitterFunction::load() { + PROFILE_MEM(Particles); ConcurrentSystemLocker::WriteScope systemScope(Particles::SystemLocker); // Load graph diff --git a/Source/Engine/Particles/ParticleSystem.cpp b/Source/Engine/Particles/ParticleSystem.cpp index 7eea5a08d..8354f48cb 100644 --- a/Source/Engine/Particles/ParticleSystem.cpp +++ b/Source/Engine/Particles/ParticleSystem.cpp @@ -6,6 +6,7 @@ #include "Engine/Level/Level.h" #include "Engine/Content/Deprecated.h" #include "Engine/Content/Factories/BinaryAssetFactory.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Serialization/MemoryReadStream.h" #include "Engine/Serialization/MemoryWriteStream.h" #include "Engine/Threading/Threading.h" @@ -146,6 +147,7 @@ bool ParticleSystem::SaveTimeline(const BytesContainer& data) const ParticleEffect* ParticleSystem::Spawn(Actor* parent, const Transform& transform, bool autoDestroy) { + PROFILE_MEM(Particles); CHECK_RETURN(!WaitForLoaded(), nullptr); auto effect = New(); @@ -202,6 +204,7 @@ bool ParticleSystem::Save(const StringView& path) Asset::LoadResult ParticleSystem::load() { + PROFILE_MEM(Particles); Version++; // Get the data chunk diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index e895e0b6d..3cf25eec6 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -16,6 +16,7 @@ #include "Engine/Graphics/RenderTools.h" #include "Engine/Graphics/Shaders/GPUVertexLayout.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Renderer/DrawCall.h" #include "Engine/Renderer/RenderList.h" #include "Engine/Threading/TaskGraph.h" @@ -167,6 +168,7 @@ ParticleManagerService ParticleManagerServiceInstance; void Particles::UpdateEffect(ParticleEffect* effect) { + PROFILE_MEM(Particles); UpdateList.Add(effect); } @@ -933,6 +935,7 @@ void Particles::DrawParticles(RenderContext& renderContext, ParticleEffect* effe const DrawPass drawModes = view.Pass & effect->DrawModes; if (drawModes == DrawPass::None || SpriteRenderer.Init()) return; + PROFILE_MEM(Particles); Matrix worlds[2]; Matrix::Translation(-renderContext.View.Origin, worlds[0]); // World renderContext.View.GetWorldMatrix(effect->GetTransform(), worlds[1]); // Local @@ -1073,6 +1076,7 @@ void UpdateGPU(RenderTask* task, GPUContext* context) if (GpuUpdateList.IsEmpty()) return; PROFILE_GPU("GPU Particles"); + PROFILE_MEM(Particles); for (ParticleEffect* effect : GpuUpdateList) { @@ -1112,6 +1116,7 @@ void UpdateGPU(RenderTask* task, GPUContext* context) ParticleBuffer* Particles::AcquireParticleBuffer(ParticleEmitter* emitter) { PROFILE_CPU(); + PROFILE_MEM(Particles); ParticleBuffer* result = nullptr; ASSERT(emitter && emitter->IsLoaded()); @@ -1161,6 +1166,7 @@ ParticleBuffer* Particles::AcquireParticleBuffer(ParticleEmitter* emitter) void Particles::RecycleParticleBuffer(ParticleBuffer* buffer) { PROFILE_CPU(); + PROFILE_MEM(Particles); if (buffer->Emitter->EnablePooling && EnableParticleBufferPooling) { // Return to pool @@ -1208,6 +1214,7 @@ void Particles::OnEmitterUnload(ParticleEmitter* emitter) bool ParticleManagerService::Init() { + PROFILE_MEM(Particles); Particles::System = New(); Particles::System->Order = 10000; Engine::UpdateGraph->AddSystem(Particles::System); @@ -1253,6 +1260,7 @@ void ParticleManagerService::Dispose() void ParticlesSystem::Job(int32 index) { PROFILE_CPU_NAMED("Particles.Job"); + PROFILE_MEM(Particles); auto effect = UpdateList[index]; auto& instance = effect->Instance; const auto particleSystem = effect->ParticleSystem.Get(); @@ -1432,6 +1440,7 @@ void ParticlesSystem::PostExecute(TaskGraph* graph) if (!Active) return; PROFILE_CPU_NAMED("Particles.PostExecute"); + PROFILE_MEM(Particles); // Cleanup Particles::SystemLocker.End(false); diff --git a/Source/Engine/Particles/ParticlesData.cpp b/Source/Engine/Particles/ParticlesData.cpp index dcdef46d7..10988fd34 100644 --- a/Source/Engine/Particles/ParticlesData.cpp +++ b/Source/Engine/Particles/ParticlesData.cpp @@ -5,6 +5,7 @@ #include "Engine/Graphics/GPUBuffer.h" #include "Engine/Graphics/GPUDevice.h" #include "Engine/Graphics/DynamicBuffer.h" +#include "Engine/Profiler/ProfilerMemory.h" ParticleBuffer::ParticleBuffer() { @@ -23,6 +24,7 @@ ParticleBuffer::~ParticleBuffer() bool ParticleBuffer::Init(ParticleEmitter* emitter) { + PROFILE_MEM(Particles); ASSERT(emitter && emitter->IsLoaded()); Version = emitter->Graph.Version; diff --git a/Source/Engine/Physics/Actors/Cloth.cpp b/Source/Engine/Physics/Actors/Cloth.cpp index 889a0874e..037dc0000 100644 --- a/Source/Engine/Physics/Actors/Cloth.cpp +++ b/Source/Engine/Physics/Actors/Cloth.cpp @@ -11,6 +11,7 @@ #include "Engine/Physics/PhysicsBackend.h" #include "Engine/Physics/PhysicsScene.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Serialization/Serialization.h" #include "Engine/Level/Actors/AnimatedModel.h" #include "Engine/Level/Scene/SceneRendering.h" @@ -132,6 +133,7 @@ Array Cloth::GetParticles() const if (_cloth) { PROFILE_CPU(); + PROFILE_MEM(Physics); PhysicsBackend::LockClothParticles(_cloth); const Span particles = PhysicsBackend::GetClothParticles(_cloth); result.Resize(particles.Length()); @@ -148,6 +150,7 @@ Array Cloth::GetParticles() const void Cloth::SetParticles(Span value) { PROFILE_CPU(); + PROFILE_MEM(Physics); #if USE_CLOTH_SANITY_CHECKS { // Sanity check @@ -177,6 +180,7 @@ Span Cloth::GetPaint() const void Cloth::SetPaint(Span value) { PROFILE_CPU(); + PROFILE_MEM(Physics); #if USE_CLOTH_SANITY_CHECKS { // Sanity check @@ -302,6 +306,7 @@ void Cloth::Deserialize(DeserializeStream& stream, ISerializeModifier* modifier) { Actor::Deserialize(stream, modifier); + PROFILE_MEM(Physics); DESERIALIZE_MEMBER(Mesh, _mesh); _mesh.Actor = nullptr; // Don't store this reference DESERIALIZE_MEMBER(Force, _forceSettings); @@ -536,6 +541,7 @@ bool Cloth::CreateCloth() { #if WITH_CLOTH PROFILE_CPU(); + PROFILE_MEM(Physics); // Skip if all vertices are fixed so cloth sim doesn't make sense if (_paint.HasItems()) @@ -631,6 +637,7 @@ void Cloth::CalculateInvMasses(Array& invMasses) if (_paint.IsEmpty()) return; PROFILE_CPU(); + PROFILE_MEM(Physics); // Get mesh data const ModelInstanceActor::MeshReference mesh = GetMesh(); @@ -918,6 +925,7 @@ void Cloth::RunClothDeformer(const MeshBase* mesh, MeshDeformationData& deformat return; #if WITH_CLOTH PROFILE_CPU_NAMED("Cloth"); + PROFILE_MEM(Physics); PhysicsBackend::LockClothParticles(_cloth); const Span particles = PhysicsBackend::GetClothParticles(_cloth); auto vbCount = (uint32)mesh->GetVertexCount(); diff --git a/Source/Engine/Physics/Actors/SplineRopeBody.cpp b/Source/Engine/Physics/Actors/SplineRopeBody.cpp index 2cf81e228..ea020b5c5 100644 --- a/Source/Engine/Physics/Actors/SplineRopeBody.cpp +++ b/Source/Engine/Physics/Actors/SplineRopeBody.cpp @@ -7,6 +7,7 @@ #include "Engine/Physics/PhysicsScene.h" #include "Engine/Engine/Time.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Serialization/Serialization.h" SplineRopeBody::SplineRopeBody(const SpawnParams& params) @@ -19,6 +20,7 @@ void SplineRopeBody::Tick() if (!_spline || _spline->GetSplinePointsCount() < 2) return; PROFILE_CPU(); + PROFILE_MEM(Physics); // Cache data const Vector3 gravity = GetPhysicsScene()->GetGravity() * GravityScale; diff --git a/Source/Engine/Physics/CollisionCooking.cpp b/Source/Engine/Physics/CollisionCooking.cpp index 4522d6862..bbf3f4f91 100644 --- a/Source/Engine/Physics/CollisionCooking.cpp +++ b/Source/Engine/Physics/CollisionCooking.cpp @@ -10,11 +10,13 @@ #include "Engine/Graphics/Models/MeshAccessor.h" #include "Engine/Threading/Threading.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Core/Log.h" bool CollisionCooking::CookCollision(const Argument& arg, CollisionData::SerializedOptions& outputOptions, BytesContainer& outputData) { PROFILE_CPU(); + PROFILE_MEM(Physics); int32 convexVertexLimit = Math::Clamp(arg.ConvexVertexLimit, CONVEX_VERTEX_MIN, CONVEX_VERTEX_MAX); if (arg.ConvexVertexLimit == 0) convexVertexLimit = CONVEX_VERTEX_MAX; diff --git a/Source/Engine/Physics/CollisionData.cpp b/Source/Engine/Physics/CollisionData.cpp index e7e02c464..c65ea8a6b 100644 --- a/Source/Engine/Physics/CollisionData.cpp +++ b/Source/Engine/Physics/CollisionData.cpp @@ -9,6 +9,7 @@ #include "Engine/Physics/PhysicsBackend.h" #include "Engine/Physics/CollisionCooking.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Threading/Threading.h" REGISTER_BINARY_ASSET(CollisionData, "FlaxEngine.CollisionData", true); @@ -35,6 +36,7 @@ bool CollisionData::CookCollision(CollisionDataType type, ModelBase* modelObj, i return true; } PROFILE_CPU(); + PROFILE_MEM(Physics); // Prepare CollisionCooking::Argument arg; @@ -64,6 +66,7 @@ bool CollisionData::CookCollision(CollisionDataType type, ModelBase* modelObj, i bool CollisionData::CookCollision(CollisionDataType type, const Span& vertices, const Span& triangles, ConvexMeshGenerationFlags convexFlags, int32 convexVertexLimit) { PROFILE_CPU(); + PROFILE_MEM(Physics); CHECK_RETURN(vertices.Length() != 0, true); CHECK_RETURN(triangles.Length() != 0 && triangles.Length() % 3 == 0, true); ModelData modelData; @@ -78,6 +81,7 @@ bool CollisionData::CookCollision(CollisionDataType type, const Span& ve bool CollisionData::CookCollision(CollisionDataType type, const Span& vertices, const Span& triangles, ConvexMeshGenerationFlags convexFlags, int32 convexVertexLimit) { PROFILE_CPU(); + PROFILE_MEM(Physics); CHECK_RETURN(vertices.Length() != 0, true); CHECK_RETURN(triangles.Length() != 0 && triangles.Length() % 3 == 0, true); ModelData modelData; @@ -99,6 +103,7 @@ bool CollisionData::CookCollision(CollisionDataType type, ModelData* modelData, return true; } PROFILE_CPU(); + PROFILE_MEM(Physics); // Prepare CollisionCooking::Argument arg; @@ -180,6 +185,7 @@ bool CollisionData::GetModelTriangle(uint32 faceIndex, MeshBase*& mesh, uint32& void CollisionData::ExtractGeometry(Array& vertexBuffer, Array& indexBuffer) const { PROFILE_CPU(); + PROFILE_MEM(Physics); vertexBuffer.Clear(); indexBuffer.Clear(); @@ -197,6 +203,7 @@ const Array& CollisionData::GetDebugLines() if (_hasMissingDebugLines && IsLoaded()) { PROFILE_CPU(); + PROFILE_MEM(Physics); ScopeLock lock(Locker); _hasMissingDebugLines = false; @@ -250,6 +257,8 @@ Asset::LoadResult CollisionData::load() CollisionData::LoadResult CollisionData::load(const SerializedOptions* options, byte* dataPtr, int32 dataSize) { + PROFILE_MEM(Physics); + // Load options _options.Type = options->Type; _options.Model = options->Model; diff --git a/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp b/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp index e4636a568..5023b69c7 100644 --- a/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp +++ b/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp @@ -24,6 +24,7 @@ #include "Engine/Platform/CPUInfo.h" #include "Engine/Platform/CriticalSection.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Serialization/WriteStream.h" #include #include @@ -117,6 +118,7 @@ class AllocatorPhysX : public PxAllocatorCallback void* allocate(size_t size, const char* typeName, const char* filename, int line) override { ASSERT(size < 1024 * 1024 * 1024); // Prevent invalid allocation size + PROFILE_MEM(Physics); return Allocator::Allocate(size, 16); } @@ -725,6 +727,7 @@ void ScenePhysX::UpdateVehicles(float dt) if (WheelVehicles.IsEmpty()) return; PROFILE_CPU_NAMED("Physics.Vehicles"); + PROFILE_MEM(Physics); // Update vehicles steering WheelVehiclesCache.Clear(); @@ -1861,6 +1864,7 @@ void PhysicsBackend::DestroyScene(void* scene) void PhysicsBackend::StartSimulateScene(void* scene, float dt) { + PROFILE_MEM(Physics); auto scenePhysX = (ScenePhysX*)scene; const auto& settings = *PhysicsSettings::Get(); @@ -1895,6 +1899,7 @@ void PhysicsBackend::StartSimulateScene(void* scene, float dt) void PhysicsBackend::EndSimulateScene(void* scene) { + PROFILE_MEM(Physics); auto scenePhysX = (ScenePhysX*)scene; { @@ -3880,6 +3885,7 @@ void PhysicsBackend::RemoveVehicle(void* scene, WheeledVehicle* actor) void* PhysicsBackend::CreateCloth(const PhysicsClothDesc& desc) { PROFILE_CPU(); + PROFILE_MEM(Physics); #if USE_CLOTH_SANITY_CHECKS { // Sanity check diff --git a/Source/Engine/Physics/PhysX/SimulationEventCallbackPhysX.cpp b/Source/Engine/Physics/PhysX/SimulationEventCallbackPhysX.cpp index b35538711..73135c3a3 100644 --- a/Source/Engine/Physics/PhysX/SimulationEventCallbackPhysX.cpp +++ b/Source/Engine/Physics/PhysX/SimulationEventCallbackPhysX.cpp @@ -6,6 +6,7 @@ #include "Engine/Physics/Colliders/Collider.h" #include "Engine/Physics/Joints/Joint.h" #include "Engine/Physics/Actors/RigidBody.h" +#include "Engine/Profiler/ProfilerMemory.h" #include #include @@ -91,6 +92,7 @@ void SimulationEventCallback::OnJointRemoved(Joint* joint) void SimulationEventCallback::onConstraintBreak(PxConstraintInfo* constraints, PxU32 count) { + PROFILE_MEM(Physics); for (uint32 i = 0; i < count; i++) { PxJoint* joint = reinterpret_cast(constraints[i].externalReference); @@ -114,6 +116,7 @@ void SimulationEventCallback::onContact(const PxContactPairHeader& pairHeader, c // Skip sending events to removed actors if (pairHeader.flags & (PxContactPairHeaderFlag::eREMOVED_ACTOR_0 | PxContactPairHeaderFlag::eREMOVED_ACTOR_1)) return; + PROFILE_MEM(Physics); Collision c; PxContactPairExtraDataIterator j(pairHeader.extraDataStream, pairHeader.extraDataStreamSize); @@ -185,6 +188,7 @@ void SimulationEventCallback::onContact(const PxContactPairHeader& pairHeader, c void SimulationEventCallback::onTrigger(PxTriggerPair* pairs, PxU32 count) { + PROFILE_MEM(Physics); for (PxU32 i = 0; i < count; i++) { const PxTriggerPair& pair = pairs[i]; diff --git a/Source/Engine/Physics/Physics.cpp b/Source/Engine/Physics/Physics.cpp index 6b48bc157..ecd7c1093 100644 --- a/Source/Engine/Physics/Physics.cpp +++ b/Source/Engine/Physics/Physics.cpp @@ -10,6 +10,7 @@ #include "Engine/Engine/Time.h" #include "Engine/Engine/EngineService.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Serialization/Serialization.h" #include "Engine/Threading/Threading.h" @@ -117,6 +118,8 @@ PhysicalMaterial::~PhysicalMaterial() bool PhysicsService::Init() { + PROFILE_MEM(Physics); + // Initialize backend if (PhysicsBackend::Init()) return true; @@ -153,6 +156,7 @@ void PhysicsService::Dispose() PhysicsScene* Physics::FindOrCreateScene(const StringView& name) { + PROFILE_MEM(Physics); auto scene = FindScene(name); if (scene == nullptr) { @@ -244,6 +248,7 @@ bool Physics::IsDuringSimulation() void Physics::FlushRequests() { PROFILE_CPU_NAMED("Physics.FlushRequests"); + PROFILE_MEM(Physics); for (PhysicsScene* scene : Scenes) PhysicsBackend::FlushRequests(scene->GetPhysicsScene()); PhysicsBackend::FlushRequests(); @@ -492,6 +497,7 @@ PhysicsStatistics PhysicsScene::GetStatistics() const bool PhysicsScene::Init(const StringView& name, const PhysicsSettings& settings) { + PROFILE_MEM(Physics); if (_scene) { PhysicsBackend::DestroyScene(_scene); diff --git a/Source/Engine/Platform/Base/WindowBase.cpp b/Source/Engine/Platform/Base/WindowBase.cpp index de64dfd94..3bd34a1a0 100644 --- a/Source/Engine/Platform/Base/WindowBase.cpp +++ b/Source/Engine/Platform/Base/WindowBase.cpp @@ -10,6 +10,7 @@ #include "Engine/Platform/IGuiData.h" #include "Engine/Scripting/ScriptingType.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Scripting/ManagedCLR/MException.h" #include "Engine/Scripting/ManagedCLR/MUtils.h" #include "Engine/Scripting/ManagedCLR/MMethod.h" @@ -204,6 +205,7 @@ void WindowBase::SetRenderingEnabled(bool value) void WindowBase::OnCharInput(Char c) { PROFILE_CPU_NAMED("GUI.OnCharInput"); + PROFILE_MEM(UI); CharInput(c); INVOKE_EVENT_PARAMS_1(OnCharInput, &c); } @@ -211,6 +213,7 @@ void WindowBase::OnCharInput(Char c) void WindowBase::OnKeyDown(KeyboardKeys key) { PROFILE_CPU_NAMED("GUI.OnKeyDown"); + PROFILE_MEM(UI); KeyDown(key); INVOKE_EVENT_PARAMS_1(OnKeyDown, &key); } @@ -218,6 +221,7 @@ void WindowBase::OnKeyDown(KeyboardKeys key) void WindowBase::OnKeyUp(KeyboardKeys key) { PROFILE_CPU_NAMED("GUI.OnKeyUp"); + PROFILE_MEM(UI); KeyUp(key); INVOKE_EVENT_PARAMS_1(OnKeyUp, &key); } @@ -225,6 +229,7 @@ void WindowBase::OnKeyUp(KeyboardKeys key) void WindowBase::OnMouseDown(const Float2& mousePosition, MouseButton button) { PROFILE_CPU_NAMED("GUI.OnMouseDown"); + PROFILE_MEM(UI); MouseDown(mousePosition, button); INVOKE_EVENT_PARAMS_2(OnMouseDown, (void*)&mousePosition, &button); } @@ -232,6 +237,7 @@ void WindowBase::OnMouseDown(const Float2& mousePosition, MouseButton button) void WindowBase::OnMouseUp(const Float2& mousePosition, MouseButton button) { PROFILE_CPU_NAMED("GUI.OnMouseUp"); + PROFILE_MEM(UI); MouseUp(mousePosition, button); INVOKE_EVENT_PARAMS_2(OnMouseUp, (void*)&mousePosition, &button); } @@ -239,6 +245,7 @@ void WindowBase::OnMouseUp(const Float2& mousePosition, MouseButton button) void WindowBase::OnMouseDoubleClick(const Float2& mousePosition, MouseButton button) { PROFILE_CPU_NAMED("GUI.OnMouseDoubleClick"); + PROFILE_MEM(UI); MouseDoubleClick(mousePosition, button); INVOKE_EVENT_PARAMS_2(OnMouseDoubleClick, (void*)&mousePosition, &button); } @@ -246,6 +253,7 @@ void WindowBase::OnMouseDoubleClick(const Float2& mousePosition, MouseButton but void WindowBase::OnMouseWheel(const Float2& mousePosition, float delta) { PROFILE_CPU_NAMED("GUI.OnMouseWheel"); + PROFILE_MEM(UI); MouseWheel(mousePosition, delta); INVOKE_EVENT_PARAMS_2(OnMouseWheel, (void*)&mousePosition, &delta); } @@ -253,6 +261,7 @@ void WindowBase::OnMouseWheel(const Float2& mousePosition, float delta) void WindowBase::OnMouseMove(const Float2& mousePosition) { PROFILE_CPU_NAMED("GUI.OnMouseMove"); + PROFILE_MEM(UI); MouseMove(mousePosition); INVOKE_EVENT_PARAMS_1(OnMouseMove, (void*)&mousePosition); } @@ -260,6 +269,7 @@ void WindowBase::OnMouseMove(const Float2& mousePosition) void WindowBase::OnMouseLeave() { PROFILE_CPU_NAMED("GUI.OnMouseLeave"); + PROFILE_MEM(UI); MouseLeave(); INVOKE_EVENT_PARAMS_0(OnMouseLeave); } @@ -267,6 +277,7 @@ void WindowBase::OnMouseLeave() void WindowBase::OnTouchDown(const Float2& pointerPosition, int32 pointerId) { PROFILE_CPU_NAMED("GUI.OnTouchDown"); + PROFILE_MEM(UI); TouchDown(pointerPosition, pointerId); INVOKE_EVENT_PARAMS_2(OnTouchDown, (void*)&pointerPosition, &pointerId); } @@ -274,6 +285,7 @@ void WindowBase::OnTouchDown(const Float2& pointerPosition, int32 pointerId) void WindowBase::OnTouchMove(const Float2& pointerPosition, int32 pointerId) { PROFILE_CPU_NAMED("GUI.OnTouchMove"); + PROFILE_MEM(UI); TouchMove(pointerPosition, pointerId); INVOKE_EVENT_PARAMS_2(OnTouchMove, (void*)&pointerPosition, &pointerId); } @@ -281,6 +293,7 @@ void WindowBase::OnTouchMove(const Float2& pointerPosition, int32 pointerId) void WindowBase::OnTouchUp(const Float2& pointerPosition, int32 pointerId) { PROFILE_CPU_NAMED("GUI.OnTouchUp"); + PROFILE_MEM(UI); TouchUp(pointerPosition, pointerId); INVOKE_EVENT_PARAMS_2(OnTouchUp, (void*)&pointerPosition, &pointerId); } @@ -391,6 +404,7 @@ bool WindowBase::GetMouseButtonUp(MouseButton button) const void WindowBase::OnShow() { PROFILE_CPU_NAMED("GUI.OnShow"); + PROFILE_MEM(UI); INVOKE_EVENT_PARAMS_0(OnShow); Shown(); } @@ -398,10 +412,13 @@ void WindowBase::OnShow() void WindowBase::OnResize(int32 width, int32 height) { PROFILE_CPU_NAMED("GUI.OnResize"); + PROFILE_MEM_BEGIN(Graphics); if (_swapChain) _swapChain->Resize(width, height); if (RenderTask) RenderTask->Resize(width, height); + PROFILE_MEM_END(); + PROFILE_MEM(UI); Resized({ static_cast(width), static_cast(height) }); INVOKE_EVENT_PARAMS_2(OnResize, &width, &height); } @@ -453,6 +470,7 @@ void WindowBase::OnLostFocus() void WindowBase::OnUpdate(float dt) { PROFILE_CPU_NAMED("GUI.OnUpdate"); + PROFILE_MEM(UI); Update(dt); INVOKE_EVENT_PARAMS_1(OnUpdate, &dt); } @@ -460,6 +478,7 @@ void WindowBase::OnUpdate(float dt) void WindowBase::OnDraw() { PROFILE_CPU_NAMED("GUI.OnDraw"); + PROFILE_MEM(UI); INVOKE_EVENT_PARAMS_0(OnDraw); Draw(); } @@ -467,6 +486,7 @@ void WindowBase::OnDraw() bool WindowBase::InitSwapChain() { // Setup swapchain + PROFILE_MEM(Graphics); if (_swapChain == nullptr) { _swapChain = GPUDevice::Instance->CreateSwapChain((Window*)this); diff --git a/Source/Engine/Platform/Unix/UnixThread.cpp b/Source/Engine/Platform/Unix/UnixThread.cpp index c58c009be..ff6e61b2a 100644 --- a/Source/Engine/Platform/Unix/UnixThread.cpp +++ b/Source/Engine/Platform/Unix/UnixThread.cpp @@ -4,6 +4,9 @@ #include "UnixThread.h" #include "Engine/Core/Log.h" +#if PLATFORM_APPLE_FAMILY +#include "Engine/Utilities/StringConverter.h" +#endif #include "Engine/Threading/IRunnable.h" #include "Engine/Threading/ThreadRegistry.h" @@ -29,7 +32,8 @@ void* UnixThread::ThreadProc(void* pThis) #if PLATFORM_APPLE_FAMILY // Apple doesn't support creating named thread so assign name here { - pthread_setname_np(StringAnsi(thread->GetName()).Get()); + const String& name = thread->GetName(); + pthread_setname_np(StringAsANSI<>(name.Get(), name.Length()).Get()); } #endif const int32 exitCode = thread->Run(); diff --git a/Source/Engine/Profiler/ProfilingTools.cpp b/Source/Engine/Profiler/ProfilingTools.cpp index ea50d2183..0e7ac1566 100644 --- a/Source/Engine/Profiler/ProfilingTools.cpp +++ b/Source/Engine/Profiler/ProfilingTools.cpp @@ -33,6 +33,7 @@ ProfilingToolsService ProfilingToolsServiceInstance; void ProfilingToolsService::Update() { ZoneScoped; + PROFILE_MEM(Profiler); // Capture stats { diff --git a/Source/Engine/Render2D/Render2D.cpp b/Source/Engine/Render2D/Render2D.cpp index 057265aae..251781c99 100644 --- a/Source/Engine/Render2D/Render2D.cpp +++ b/Source/Engine/Render2D/Render2D.cpp @@ -597,6 +597,8 @@ void OnGUIShaderReloading(Asset* obj) bool Render2DService::Init() { + PROFILE_MEM(UI); + // GUI Shader GUIShader = Content::LoadAsyncInternal(TEXT("Shaders/GUI")); if (GUIShader == nullptr) diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index 97a157355..2a6540da5 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -626,6 +626,7 @@ void RenderList::BuildObjectsBuffer() if (count == 0) return; PROFILE_CPU(); + PROFILE_MEM(GraphicsCommands); ObjectBuffer.Data.Resize(count * sizeof(ShaderObjectData)); auto* src = (const DrawCall*)DrawCalls.Get(); auto* dst = (ShaderObjectData*)ObjectBuffer.Data.Get(); @@ -648,6 +649,7 @@ void RenderList::BuildObjectsBuffer() void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseDistance, DrawCallsList& list, const RenderListBuffer& drawCalls, DrawPass pass, bool stable) { PROFILE_CPU(); + PROFILE_MEM(GraphicsCommands); const auto* drawCallsData = drawCalls.Get(); const auto* listData = list.Indices.Get(); const int32 listSize = list.Indices.Count(); @@ -754,6 +756,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL if (list.IsEmpty()) return; PROFILE_GPU_CPU("Drawing"); + PROFILE_MEM(GraphicsCommands); const auto* drawCallsData = drawCallsList->DrawCalls.Get(); const auto* listData = list.Indices.Get(); const auto* batchesData = list.Batches.Get(); diff --git a/Source/Engine/Renderer/Renderer.cpp b/Source/Engine/Renderer/Renderer.cpp index 26f3a636e..56c78600b 100644 --- a/Source/Engine/Renderer/Renderer.cpp +++ b/Source/Engine/Renderer/Renderer.cpp @@ -36,6 +36,7 @@ #include "Engine/Level/Scene/SceneRendering.h" #include "Engine/Core/Config/GraphicsSettings.h" #include "Engine/Threading/JobSystem.h" +#include "Engine/Profiler/ProfilerMemory.h" #if USE_EDITOR #include "Editor/Editor.h" #include "Editor/QuadOverdrawPass.h" @@ -68,6 +69,8 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont bool RendererService::Init() { + PROFILE_MEM(Graphics); + // Register passes PassList.Add(GBufferPass::Instance()); PassList.Add(ShadowsPass::Instance()); diff --git a/Source/Engine/Scripting/BinaryModule.cpp b/Source/Engine/Scripting/BinaryModule.cpp index 8d61e2bb5..3030b041d 100644 --- a/Source/Engine/Scripting/BinaryModule.cpp +++ b/Source/Engine/Scripting/BinaryModule.cpp @@ -6,6 +6,7 @@ #include "Engine/Core/Utilities.h" #include "Engine/Threading/Threading.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "ManagedCLR/MAssembly.h" #include "ManagedCLR/MClass.h" #include "ManagedCLR/MMethod.h" @@ -762,6 +763,8 @@ ManagedBinaryModule* ManagedBinaryModule::GetModule(const MAssembly* assembly) ScriptingObject* ManagedBinaryModule::ManagedObjectSpawn(const ScriptingObjectSpawnParams& params) { + PROFILE_MEM(Scripting); + // Create native object ScriptingTypeHandle managedTypeHandle = params.Type; const ScriptingType* managedTypePtr = &managedTypeHandle.GetType(); @@ -932,6 +935,7 @@ void ManagedBinaryModule::OnLoaded(MAssembly* assembly) { #if !COMPILE_WITHOUT_CSHARP PROFILE_CPU(); + PROFILE_MEM(Scripting); ASSERT(ClassToTypeIndex.IsEmpty()); ScopeLock lock(Locker); @@ -1028,6 +1032,7 @@ void ManagedBinaryModule::InitType(MClass* mclass) const StringAnsi& typeName = mclass->GetFullName(); if (TypeNameToTypeIndex.ContainsKey(typeName)) return; + PROFILE_MEM(Scripting); // Find first native base C++ class of this C# class MClass* baseClass = mclass->GetBaseClass(); @@ -1057,9 +1062,13 @@ void ManagedBinaryModule::InitType(MClass* mclass) if (baseType.TypeIndex == -1 || baseType.Module == nullptr) { if (baseType.Module) + { LOG(Error, "Missing base class for managed class {0} from assembly {1}.", String(baseClass->GetFullName()), baseType.Module->GetName().ToString()); + } else + { LOG(Error, "Missing base class for managed class {0} from unknown assembly.", String(baseClass->GetFullName())); + } return; } @@ -1183,6 +1192,7 @@ void ManagedBinaryModule::OnUnloading(MAssembly* assembly) void ManagedBinaryModule::OnUnloaded(MAssembly* assembly) { PROFILE_CPU(); + PROFILE_MEM(Scripting); // Clear managed-only types Types.Resize(_firstManagedTypeIndex); @@ -1495,9 +1505,13 @@ bool ManagedBinaryModule::GetFieldValue(void* field, const Variant& instance, Va if (!instanceObject || !MCore::Object::GetClass(instanceObject)->IsSubClassOf(parentClass)) { if (!instanceObject) + { LOG(Error, "Failed to get '{0}.{1}' without object instance", String(parentClass->GetFullName()), String(name)); + } else + { LOG(Error, "Failed to get '{0}.{1}' with invalid object instance of type '{2}'", String(parentClass->GetFullName()), String(name), String(MUtils::GetClassFullname(instanceObject))); + } return true; } } @@ -1553,9 +1567,13 @@ bool ManagedBinaryModule::SetFieldValue(void* field, const Variant& instance, Va if (!instanceObject || !MCore::Object::GetClass(instanceObject)->IsSubClassOf(parentClass)) { if (!instanceObject) + { LOG(Error, "Failed to set '{0}.{1}' without object instance", String(parentClass->GetFullName()), String(name)); + } else + { LOG(Error, "Failed to set '{0}.{1}' with invalid object instance of type '{2}'", String(parentClass->GetFullName()), String(name), String(MUtils::GetClassFullname(instanceObject))); + } return true; } } diff --git a/Source/Engine/Scripting/ManagedCLR/MCore.cpp b/Source/Engine/Scripting/ManagedCLR/MCore.cpp index db184afd9..8761c309c 100644 --- a/Source/Engine/Scripting/ManagedCLR/MCore.cpp +++ b/Source/Engine/Scripting/ManagedCLR/MCore.cpp @@ -14,6 +14,7 @@ #include "Engine/Core/Types/TimeSpan.h" #include "Engine/Platform/FileSystem.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Debug/Exceptions/FileNotFoundException.h" #include "Engine/Debug/Exceptions/InvalidOperationException.h" @@ -80,6 +81,7 @@ bool MAssembly::Load(const String& assemblyPath, const StringView& nativePath) if (IsLoaded()) return false; PROFILE_CPU(); + PROFILE_MEM(Scripting); ZoneText(*assemblyPath, assemblyPath.Length()); Stopwatch stopwatch; diff --git a/Source/Engine/Scripting/Plugins/PluginManager.cpp b/Source/Engine/Scripting/Plugins/PluginManager.cpp index 7645d9bdd..c040acfbb 100644 --- a/Source/Engine/Scripting/Plugins/PluginManager.cpp +++ b/Source/Engine/Scripting/Plugins/PluginManager.cpp @@ -11,6 +11,7 @@ #include "Engine/Scripting/ManagedCLR/MUtils.h" #include "Engine/Platform/FileSystem.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Engine/EngineService.h" #include "Engine/Core/Log.h" #include "Engine/Scripting/ManagedCLR/MField.h" @@ -186,6 +187,7 @@ void PluginManagerService::InvokeDeinitialize(Plugin* plugin) void PluginManagerImpl::OnAssemblyLoaded(MAssembly* assembly) { PROFILE_CPU_NAMED("Load Assembly Plugins"); + PROFILE_MEM(Scripting); const auto gamePluginClass = GamePlugin::GetStaticClass(); if (gamePluginClass == nullptr) @@ -318,6 +320,7 @@ void PluginManagerImpl::InitializePlugins() if (EditorPlugins.Count() + GamePlugins.Count() == 0) return; PROFILE_CPU_NAMED("InitializePlugins"); + PROFILE_MEM(Scripting); auto engineAssembly = ((NativeBinaryModule*)GetBinaryModuleFlaxEngine())->Assembly; auto pluginLoadOrderAttribute = engineAssembly->GetClass("FlaxEngine.PluginLoadOrderAttribute"); @@ -345,6 +348,7 @@ void PluginManagerImpl::DeinitializePlugins() if (EditorPlugins.Count() + GamePlugins.Count() == 0) return; PROFILE_CPU_NAMED("DeinitializePlugins"); + PROFILE_MEM(Scripting); auto engineAssembly = ((NativeBinaryModule*)GetBinaryModuleFlaxEngine())->Assembly; auto pluginLoadOrderAttribute = engineAssembly->GetClass("FlaxEngine.PluginLoadOrderAttribute"); @@ -375,6 +379,7 @@ void PluginManagerImpl::DeinitializePlugins() bool PluginManagerService::Init() { Initialized = false; + PROFILE_MEM(Scripting); // Process already loaded modules for (auto module : BinaryModule::GetModules()) @@ -472,6 +477,7 @@ Plugin* PluginManager::GetPlugin(const ScriptingTypeHandle& type) void PluginManager::InitializeGamePlugins() { PROFILE_CPU(); + PROFILE_MEM(Scripting); auto engineAssembly = ((NativeBinaryModule*)GetBinaryModuleFlaxEngine())->Assembly; auto pluginLoadOrderAttribute = engineAssembly->GetClass("FlaxEngine.PluginLoadOrderAttribute"); @@ -488,6 +494,7 @@ void PluginManager::InitializeGamePlugins() void PluginManager::DeinitializeGamePlugins() { PROFILE_CPU(); + PROFILE_MEM(Scripting); auto engineAssembly = ((NativeBinaryModule*)GetBinaryModuleFlaxEngine())->Assembly; auto pluginLoadOrderAttribute = engineAssembly->GetClass("FlaxEngine.PluginLoadOrderAttribute"); diff --git a/Source/Engine/Scripting/Runtime/DotNet.cpp b/Source/Engine/Scripting/Runtime/DotNet.cpp index 106736d6b..d56c0e5ea 100644 --- a/Source/Engine/Scripting/Runtime/DotNet.cpp +++ b/Source/Engine/Scripting/Runtime/DotNet.cpp @@ -29,6 +29,7 @@ #include "Engine/Scripting/BinaryModule.h" #include "Engine/Engine/Globals.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Threading/Threading.h" #include "Engine/Debug/Exceptions/CLRInnerException.h" #if DOTNET_HOST_CORECLR @@ -281,6 +282,7 @@ void MCore::UnloadDomain(const StringAnsi& domainName) bool MCore::LoadEngine() { PROFILE_CPU(); + PROFILE_MEM(Scripting); // Initialize hostfxr if (InitHostfxr()) @@ -735,6 +737,7 @@ const MAssembly::ClassesDictionary& MAssembly::GetClasses() const if (_hasCachedClasses || !IsLoaded()) return _classes; PROFILE_CPU(); + PROFILE_MEM(Scripting); Stopwatch stopwatch; #if TRACY_ENABLE @@ -796,6 +799,7 @@ void GetAssemblyName(void* assemblyHandle, StringAnsi& name, StringAnsi& fullnam DEFINE_INTERNAL_CALL(void) NativeInterop_CreateClass(NativeClassDefinitions* managedClass, void* assemblyHandle) { + PROFILE_MEM(Scripting); ScopeLock lock(BinaryModule::Locker); MAssembly* assembly = GetAssembly(assemblyHandle); if (assembly == nullptr) @@ -831,6 +835,7 @@ bool MAssembly::LoadCorlib() if (IsLoaded()) return false; PROFILE_CPU(); + PROFILE_MEM(Scripting); #if TRACY_ENABLE const StringAnsiView name("Corlib"); ZoneText(*name, name.Length()); @@ -1056,6 +1061,7 @@ const Array& MClass::GetMethods() const { if (_hasCachedMethods) return _methods; + PROFILE_MEM(Scripting); ScopeLock lock(BinaryModule::Locker); if (_hasCachedMethods) return _methods; @@ -1093,6 +1099,7 @@ const Array& MClass::GetFields() const { if (_hasCachedFields) return _fields; + PROFILE_MEM(Scripting); ScopeLock lock(BinaryModule::Locker); if (_hasCachedFields) return _fields; @@ -1119,6 +1126,7 @@ const Array& MClass::GetEvents() const { if (_hasCachedEvents) return _events; + PROFILE_MEM(Scripting); // TODO: implement MEvent in .NET @@ -1141,6 +1149,7 @@ const Array& MClass::GetProperties() const { if (_hasCachedProperties) return _properties; + PROFILE_MEM(Scripting); ScopeLock lock(BinaryModule::Locker); if (_hasCachedProperties) return _properties; @@ -1167,6 +1176,7 @@ const Array& MClass::GetInterfaces() const { if (_hasCachedInterfaces) return _interfaces; + PROFILE_MEM(Scripting); ScopeLock lock(BinaryModule::Locker); if (_hasCachedInterfaces) return _interfaces; @@ -1206,6 +1216,7 @@ const Array& MClass::GetAttributes() const { if (_hasCachedAttributes) return _attributes; + PROFILE_MEM(Scripting); ScopeLock lock(BinaryModule::Locker); if (_hasCachedAttributes) return _attributes; @@ -1388,6 +1399,7 @@ const Array& MField::GetAttributes() const { if (_hasCachedAttributes) return _attributes; + PROFILE_MEM(Scripting); ScopeLock lock(BinaryModule::Locker); if (_hasCachedAttributes) return _attributes; @@ -1450,6 +1462,7 @@ void MMethod::CacheSignature() const ScopeLock lock(BinaryModule::Locker); if (_hasCachedSignature) return; + PROFILE_MEM(Scripting); static void* GetMethodReturnTypePtr = GetStaticMethodPointer(TEXT("GetMethodReturnType")); static void* GetMethodParameterTypesPtr = GetStaticMethodPointer(TEXT("GetMethodParameterTypes")); @@ -1550,6 +1563,7 @@ const Array& MMethod::GetAttributes() const { if (_hasCachedAttributes) return _attributes; + PROFILE_MEM(Scripting); ScopeLock lock(BinaryModule::Locker); if (_hasCachedAttributes) return _attributes; @@ -1628,6 +1642,7 @@ const Array& MProperty::GetAttributes() const { if (_hasCachedAttributes) return _attributes; + PROFILE_MEM(Scripting); ScopeLock lock(BinaryModule::Locker); if (_hasCachedAttributes) return _attributes; @@ -1658,6 +1673,7 @@ MClass* GetOrCreateClass(MType* typeHandle) { if (!typeHandle) return nullptr; + PROFILE_MEM(Scripting); ScopeLock lock(BinaryModule::Locker); MClass* klass; if (!CachedClassHandles.TryGet(typeHandle, klass)) @@ -1781,9 +1797,13 @@ bool InitHostfxr() if (hostfxr == nullptr) { if (FileSystem::FileExists(path)) + { LOG(Fatal, "Failed to load hostfxr library, possible platform/architecture mismatch with the library. See log for more information. ({0})", path); + } else + { LOG(Fatal, "Failed to load hostfxr library ({0})", path); + } return true; } hostfxr_initialize_for_runtime_config = (hostfxr_initialize_for_runtime_config_fn)Platform::GetProcAddress(hostfxr, "hostfxr_initialize_for_runtime_config"); diff --git a/Source/Engine/Scripting/Scripting.cpp b/Source/Engine/Scripting/Scripting.cpp index 593092b8f..8c27dc09f 100644 --- a/Source/Engine/Scripting/Scripting.cpp +++ b/Source/Engine/Scripting/Scripting.cpp @@ -33,6 +33,7 @@ #include "Engine/Graphics/RenderTask.h" #include "Engine/Serialization/JsonTools.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" extern void registerFlaxEngineInternalCalls(); @@ -173,6 +174,7 @@ void onEngineUnloading(MAssembly* assembly); bool ScriptingService::Init() { + PROFILE_MEM(Scripting); Stopwatch stopwatch; // Initialize managed runtime @@ -254,30 +256,35 @@ void ScriptingService::Update() void ScriptingService::LateUpdate() { PROFILE_CPU_NAMED("Scripting::LateUpdate"); + PROFILE_MEM(Scripting); INVOKE_EVENT(LateUpdate); } void ScriptingService::FixedUpdate() { PROFILE_CPU_NAMED("Scripting::FixedUpdate"); + PROFILE_MEM(Scripting); INVOKE_EVENT(FixedUpdate); } void ScriptingService::LateFixedUpdate() { PROFILE_CPU_NAMED("Scripting::LateFixedUpdate"); + PROFILE_MEM(Scripting); INVOKE_EVENT(LateFixedUpdate); } void ScriptingService::Draw() { PROFILE_CPU_NAMED("Scripting::Draw"); + PROFILE_MEM(Scripting); INVOKE_EVENT(Draw); } void ScriptingService::BeforeExit() { PROFILE_CPU_NAMED("Scripting::BeforeExit"); + PROFILE_MEM(Scripting); INVOKE_EVENT(Exit); } @@ -306,6 +313,7 @@ void Scripting::ProcessBuildInfoPath(String& path, const String& projectFolderPa bool Scripting::LoadBinaryModules(const String& path, const String& projectFolderPath) { PROFILE_CPU_NAMED("LoadBinaryModules"); + PROFILE_MEM(Scripting); LOG(Info, "Loading binary modules from build info file {0}", path); // Read file contents @@ -482,6 +490,7 @@ bool Scripting::LoadBinaryModules(const String& path, const String& projectFolde bool Scripting::Load() { PROFILE_CPU(); + PROFILE_MEM(Scripting); // Note: this action can be called from main thread (due to Mono problems with assemblies actions from other threads) ASSERT(IsInMainThread()); ScopeLock lock(BinaryModule::Locker); @@ -1034,6 +1043,7 @@ bool Scripting::IsTypeFromGameScripts(const MClass* type) void Scripting::RegisterObject(ScriptingObject* obj) { + PROFILE_MEM(Scripting); const Guid id = obj->GetID(); ScopeLock lock(_objectsLocker); @@ -1116,6 +1126,7 @@ bool initFlaxEngine() void onEngineLoaded(MAssembly* assembly) { + PROFILE_MEM(Scripting); if (initFlaxEngine()) { LOG(Fatal, "Failed to initialize Flax Engine runtime."); diff --git a/Source/Engine/Scripting/ScriptingObject.cpp b/Source/Engine/Scripting/ScriptingObject.cpp index 624a3f0e9..eb9afdeae 100644 --- a/Source/Engine/Scripting/ScriptingObject.cpp +++ b/Source/Engine/Scripting/ScriptingObject.cpp @@ -745,9 +745,13 @@ DEFINE_INTERNAL_CALL(MObject*) ObjectInternal_FindObject(Guid* id, MTypeObject* if (!skipLog) { if (klass) + { LOG(Warning, "Unable to find scripting object with ID={0} of type {1}", *id, String(klass->GetFullName())); + } else + { LOG(Warning, "Unable to find scripting object with ID={0}", *id); + } LogContext::Print(LogType::Warning); } return nullptr; diff --git a/Source/Engine/Streaming/Streaming.cpp b/Source/Engine/Streaming/Streaming.cpp index 2d233edc9..5ddbd0996 100644 --- a/Source/Engine/Streaming/Streaming.cpp +++ b/Source/Engine/Streaming/Streaming.cpp @@ -7,6 +7,7 @@ #include "Engine/Engine/Engine.h" #include "Engine/Engine/EngineService.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Threading/Threading.h" #include "Engine/Threading/TaskGraph.h" #include "Engine/Threading/Task.h" @@ -55,6 +56,7 @@ Array> Streaming::TextureGroups; void StreamingSettings::Apply() { + PROFILE_MEM(ContentStreaming); Streaming::TextureGroups = TextureGroups; SAFE_DELETE_GPU_RESOURCES(TextureGroupSamplers); TextureGroupSamplers.Resize(TextureGroups.Count(), false); @@ -91,6 +93,7 @@ void StreamableResource::StartStreaming(bool isDynamic) _isDynamic = isDynamic; if (!_isStreaming) { + PROFILE_MEM(ContentStreaming); _isStreaming = true; ResourcesLock.Lock(); Resources.Add(this); @@ -201,6 +204,7 @@ void UpdateResource(StreamableResource* resource, double currentTime) bool StreamingService::Init() { + PROFILE_MEM(ContentStreaming); System = New(); Engine::UpdateGraph->AddSystem(System); return false; @@ -217,6 +221,7 @@ void StreamingService::BeforeExit() void StreamingSystem::Job(int32 index) { PROFILE_CPU_NAMED("Streaming.Job"); + PROFILE_MEM(ContentStreaming); // TODO: use streaming settings const double ResourceUpdatesInterval = 0.1; diff --git a/Source/Engine/Threading/TaskGraph.cpp b/Source/Engine/Threading/TaskGraph.cpp index 10b6ea0c8..c016bfd40 100644 --- a/Source/Engine/Threading/TaskGraph.cpp +++ b/Source/Engine/Threading/TaskGraph.cpp @@ -4,6 +4,7 @@ #include "JobSystem.h" #include "Engine/Core/Collections/Sorting.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" namespace { @@ -67,6 +68,7 @@ const Array>& TaskGraph::GetSystems() co void TaskGraph::AddSystem(TaskGraphSystem* system) { + PROFILE_MEM(Engine); _systems.Add(system); } @@ -78,6 +80,7 @@ void TaskGraph::RemoveSystem(TaskGraphSystem* system) void TaskGraph::Execute() { PROFILE_CPU(); + PROFILE_MEM(Engine); for (auto system : _systems) system->PreExecute(this); diff --git a/Source/Engine/UI/TextRender.cpp b/Source/Engine/UI/TextRender.cpp index bc7b88647..951da2316 100644 --- a/Source/Engine/UI/TextRender.cpp +++ b/Source/Engine/UI/TextRender.cpp @@ -13,6 +13,8 @@ #include "Engine/Render2D/FontManager.h" #include "Engine/Render2D/FontTextureAtlas.h" #include "Engine/Renderer/RenderList.h" +#include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Serialization/Serialization.h" #include "Engine/Content/Assets/MaterialInstance.h" #include "Engine/Content/Content.h" @@ -120,6 +122,9 @@ void TextRender::SetLayoutOptions(TextLayoutOptions& value) void TextRender::UpdateLayout() { + PROFILE_CPU(); + PROFILE_MEM(UI); + // Clear _ib.Clear(); _vb.Clear(); diff --git a/Source/Engine/UI/UICanvas.cpp b/Source/Engine/UI/UICanvas.cpp index 107a9eded..bf87fd676 100644 --- a/Source/Engine/UI/UICanvas.cpp +++ b/Source/Engine/UI/UICanvas.cpp @@ -6,6 +6,7 @@ #include "Engine/Scripting/ManagedCLR/MClass.h" #include "Engine/Scripting/ManagedCLR/MUtils.h" #include "Engine/Serialization/Serialization.h" +#include "Engine/Profiler/ProfilerMemory.h" #if COMPILE_WITHOUT_CSHARP #define UICANVAS_INVOKE(event) @@ -26,6 +27,7 @@ MMethod* UICanvas_ParentChanged = nullptr; auto* managed = GetManagedInstance(); \ if (managed) \ { \ + PROFILE_MEM(UI); \ MObject* exception = nullptr; \ UICanvas_##event->Invoke(managed, nullptr, &exception); \ if (exception) \ @@ -77,6 +79,7 @@ void UICanvas::Serialize(SerializeStream& stream, const void* otherObj) SERIALIZE_GET_OTHER_OBJ(UICanvas); #if !COMPILE_WITHOUT_CSHARP + PROFILE_MEM(UI); stream.JKEY("V"); void* params[1]; params[0] = other ? other->GetOrCreateManagedInstance() : nullptr; @@ -109,6 +112,7 @@ void UICanvas::Deserialize(DeserializeStream& stream, ISerializeModifier* modifi const auto dataMember = stream.FindMember("V"); if (dataMember != stream.MemberEnd()) { + PROFILE_MEM(UI); rapidjson_flax::StringBuffer buffer; rapidjson_flax::Writer writer(buffer); dataMember->value.Accept(writer); diff --git a/Source/Engine/UI/UIControl.cpp b/Source/Engine/UI/UIControl.cpp index 8321c33fc..06554542b 100644 --- a/Source/Engine/UI/UIControl.cpp +++ b/Source/Engine/UI/UIControl.cpp @@ -7,6 +7,7 @@ #include "Engine/Scripting/ManagedCLR/MClass.h" #include "Engine/Scripting/ManagedCLR/MCore.h" #include "Engine/Serialization/Serialization.h" +#include "Engine/Profiler/ProfilerMemory.h" #if COMPILE_WITHOUT_CSHARP #define UICONTROL_INVOKE(event) @@ -25,6 +26,7 @@ MMethod* UIControl_EndPlay = nullptr; auto* managed = GetManagedInstance(); \ if (managed) \ { \ + PROFILE_MEM(UI); \ MObject* exception = nullptr; \ UIControl_##event->Invoke(managed, nullptr, &exception); \ if (exception) \ @@ -78,6 +80,7 @@ void UIControl::Serialize(SerializeStream& stream, const void* otherObj) SERIALIZE_MEMBER(NavTargetRight, _navTargetRight); #if !COMPILE_WITHOUT_CSHARP + PROFILE_MEM(UI); void* params[2]; MString* controlType = nullptr; params[0] = &controlType; @@ -129,6 +132,7 @@ void UIControl::Deserialize(DeserializeStream& stream, ISerializeModifier* modif DESERIALIZE_MEMBER(NavTargetRight, _navTargetRight); #if !COMPILE_WITHOUT_CSHARP + PROFILE_MEM(UI); MTypeObject* typeObj = nullptr; const auto controlMember = stream.FindMember("Control"); if (controlMember != stream.MemberEnd()) diff --git a/Source/Engine/Video/AV/VideoBackendAV.cpp b/Source/Engine/Video/AV/VideoBackendAV.cpp index 2d73144c9..c9563d9e2 100644 --- a/Source/Engine/Video/AV/VideoBackendAV.cpp +++ b/Source/Engine/Video/AV/VideoBackendAV.cpp @@ -5,6 +5,7 @@ #include "VideoBackendAV.h" #include "Engine/Platform/Apple/AppleUtils.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Threading/TaskGraph.h" #include "Engine/Core/Log.h" #include "Engine/Engine/Globals.h" @@ -39,6 +40,7 @@ namespace AV void UpdatePlayer(int32 index) { PROFILE_CPU(); + PROFILE_MEM(Video); auto& player = *Players[index]; ZoneText(player.DebugUrl, player.DebugUrlLen); auto& playerAV = player.GetBackendState(); @@ -152,6 +154,7 @@ namespace AV bool VideoBackendAV::Player_Create(const VideoBackendPlayerInfo& info, VideoBackendPlayer& player) { PROFILE_CPU(); + PROFILE_MEM(Video); player = VideoBackendPlayer(); auto& playerAV = player.GetBackendState(); @@ -210,6 +213,7 @@ void VideoBackendAV::Player_Destroy(VideoBackendPlayer& player) void VideoBackendAV::Player_UpdateInfo(VideoBackendPlayer& player, const VideoBackendPlayerInfo& info) { PROFILE_CPU(); + PROFILE_MEM(Video); auto& playerAV = player.GetBackendState(); playerAV.Player.actionAtItemEnd = info.Loop ? AVPlayerActionAtItemEndNone : AVPlayerActionAtItemEndPause; // TODO: spatial audio diff --git a/Source/Engine/Video/MF/VideoBackendMF.cpp b/Source/Engine/Video/MF/VideoBackendMF.cpp index c6af1cef8..01d6ec481 100644 --- a/Source/Engine/Video/MF/VideoBackendMF.cpp +++ b/Source/Engine/Video/MF/VideoBackendMF.cpp @@ -4,6 +4,7 @@ #include "VideoBackendMF.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Threading/TaskGraph.h" #include "Engine/Core/Log.h" #include "Engine/Engine/Time.h" @@ -43,6 +44,7 @@ namespace MF bool Configure(VideoBackendPlayer& player, VideoPlayerMF& playerMF, DWORD streamIndex) { PROFILE_CPU_NAMED("Configure"); + PROFILE_MEM(Video); IMFMediaType *mediaType = nullptr, *nativeType = nullptr; bool result = true; @@ -367,6 +369,7 @@ namespace MF void UpdatePlayer(int32 index) { PROFILE_CPU(); + PROFILE_MEM(Video); auto& player = *Players[index]; ZoneText(player.DebugUrl, player.DebugUrlLen); auto& playerMF = player.GetBackendState(); @@ -453,6 +456,7 @@ namespace MF bool VideoBackendMF::Player_Create(const VideoBackendPlayerInfo& info, VideoBackendPlayer& player) { PROFILE_CPU(); + PROFILE_MEM(Video); player = VideoBackendPlayer(); auto& playerMF = player.GetBackendState(); @@ -572,6 +576,7 @@ const Char* VideoBackendMF::Base_Name() bool VideoBackendMF::Base_Init() { PROFILE_CPU(); + PROFILE_MEM(Video); // Init COM HRESULT hr = CoInitializeEx(0, COINIT_MULTITHREADED); diff --git a/Source/Engine/Video/Video.cpp b/Source/Engine/Video/Video.cpp index 2e76d8960..6b44102d9 100644 --- a/Source/Engine/Video/Video.cpp +++ b/Source/Engine/Video/Video.cpp @@ -7,6 +7,7 @@ #include "Engine/Core/Math/Quaternion.h" #include "Engine/Core/Math/Transform.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Engine/Engine.h" #include "Engine/Engine/EngineService.h" #include "Engine/Graphics/GPUDevice.h" @@ -70,6 +71,7 @@ protected: if (!frame->IsAllocated()) return Result::MissingResources; PROFILE_CPU(); + PROFILE_MEM(Video); ZoneText(_player->DebugUrl, _player->DebugUrlLen); if (PixelFormatExtensions::IsVideo(_player->Format)) @@ -159,6 +161,7 @@ public: void InitBackend(int32 index, VideoBackend* backend) { + PROFILE_MEM(Video); LOG(Info, "Video initialization... (backend: {0})", backend->Base_Name()); if (backend->Base_Init()) { @@ -177,6 +180,7 @@ TaskGraphSystem* Video::System = nullptr; void VideoSystem::Execute(TaskGraph* graph) { PROFILE_CPU_NAMED("Video.Update"); + PROFILE_MEM(Video); // Update backends for (VideoBackend*& backend : VideoServiceInstance.Backends) @@ -309,6 +313,7 @@ void VideoBackendPlayer::InitVideoFrame() void VideoBackendPlayer::UpdateVideoFrame(Span data, TimeSpan time, TimeSpan duration) { PROFILE_CPU(); + PROFILE_MEM(Video); ZoneText(DebugUrl, DebugUrlLen); VideoFrameTime = time; VideoFrameDuration = duration; @@ -356,6 +361,7 @@ void VideoBackendPlayer::UpdateVideoFrame(Span data, TimeSpan time, TimeSp void VideoBackendPlayer::UpdateAudioBuffer(Span data, TimeSpan time, TimeSpan duration) { PROFILE_CPU(); + PROFILE_MEM(Video); ZoneText(DebugUrl, DebugUrlLen); AudioBufferTime = time; AudioBufferDuration = duration; From c1b1f4afc45bde3520e591821254155d0137017a Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 22 May 2025 04:49:48 +0200 Subject: [PATCH 006/211] Add process memory stats for Apple platforms --- Source/Engine/Platform/Apple/ApplePlatform.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Platform/Apple/ApplePlatform.cpp b/Source/Engine/Platform/Apple/ApplePlatform.cpp index 5104dd5f8..ad86ed7da 100644 --- a/Source/Engine/Platform/Apple/ApplePlatform.cpp +++ b/Source/Engine/Platform/Apple/ApplePlatform.cpp @@ -224,8 +224,11 @@ MemoryStats ApplePlatform::GetMemoryStats() ProcessMemoryStats ApplePlatform::GetProcessMemoryStats() { ProcessMemoryStats result; - result.UsedPhysicalMemory = 1024; - result.UsedVirtualMemory = 1024; + mach_task_basic_info_data_t taskInfo; + mach_msg_type_number_t count = MACH_TASK_BASIC_INFO_COUNT; + task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&taskInfo, &count); + result.UsedPhysicalMemory = taskInfo.resident_size; + result.UsedVirtualMemory = result.UsedPhysicalMemory; return result; } From d24f9d1e1e5346b0ec088271b1408a3b8bb1c9df Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 22 May 2025 05:18:56 +0200 Subject: [PATCH 007/211] Add warning when using memory profiler without enabled on startup --- Source/Editor/Windows/Profiler/Memory.cs | 15 +++++++++++++++ Source/Engine/Profiler/ProfilerMemory.cpp | 4 ++++ Source/Engine/Profiler/ProfilerMemory.h | 2 +- 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/Source/Editor/Windows/Profiler/Memory.cs b/Source/Editor/Windows/Profiler/Memory.cs index f33bec4cc..806bed18f 100644 --- a/Source/Editor/Windows/Profiler/Memory.cs +++ b/Source/Editor/Windows/Profiler/Memory.cs @@ -29,6 +29,7 @@ namespace FlaxEditor.Windows.Profiler private List _tableRowsCache; private string[] _groupNames; private int[] _groupOrder; + private Label _warningText; public Memory() : base("Memory") @@ -65,6 +66,18 @@ namespace FlaxEditor.Windows.Profiler }; _managedAllocationsChart.SelectedSampleChanged += OnSelectedSampleChanged; + // Warning text + if (!ProfilerMemory.Enabled) + { + _warningText = new Label + { + Text = "Detailed memory profiling is disabled. Run with command line: -mem", + TextColor = Color.Red, + Visible = false, + Parent = layout, + }; + } + // Table var style = Style.Current; var headerColor = style.LightBackground; @@ -204,6 +217,8 @@ namespace FlaxEditor.Windows.Profiler { if (_frames.Count == 0) return; + if (_warningText != null) + _warningText.Visible = true; var frame = _frames.Get(selectedFrame); var totalUage = frame.Usage.Values0[(int)ProfilerMemory.Groups.TotalTracked]; var totalPeek = frame.Peek.Values0[(int)ProfilerMemory.Groups.TotalTracked]; diff --git a/Source/Engine/Profiler/ProfilerMemory.cpp b/Source/Engine/Profiler/ProfilerMemory.cpp index e617d712c..adb244f26 100644 --- a/Source/Engine/Profiler/ProfilerMemory.cpp +++ b/Source/Engine/Profiler/ProfilerMemory.cpp @@ -186,6 +186,10 @@ namespace output.AppendLine(); } #endif + + // Warn that data might be missing due to inactive profiler + if (!ProfilerMemory::Enabled) + output.AppendLine(TEXT("Detailed memory profiling is disabled. Run with command line: -mem")); } FORCE_INLINE void AddGroupMemory(ProfilerMemory::Groups group, int64 add) diff --git a/Source/Engine/Profiler/ProfilerMemory.h b/Source/Engine/Profiler/ProfilerMemory.h index 65ed5d9ab..206814560 100644 --- a/Source/Engine/Profiler/ProfilerMemory.h +++ b/Source/Engine/Profiler/ProfilerMemory.h @@ -215,7 +215,7 @@ public: /// /// The profiling tools usage flag. Can be used to disable profiler. Run engine with '-mem' command line to activate it from start. /// - static bool Enabled; + API_FIELD(ReadOnly) static bool Enabled; static void OnMemoryAlloc(void* ptr, uint64 size); static void OnMemoryFree(void* ptr); From bb855e2663a935ce7170c1554f293dfa2332067e Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 22 May 2025 05:34:36 +0200 Subject: [PATCH 008/211] Add suport for Tracy profiler on Mac --- Source/Engine/Profiler/Profiler.Build.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/Source/Engine/Profiler/Profiler.Build.cs b/Source/Engine/Profiler/Profiler.Build.cs index 72ca2a9e4..f47cbcf87 100644 --- a/Source/Engine/Profiler/Profiler.Build.cs +++ b/Source/Engine/Profiler/Profiler.Build.cs @@ -35,6 +35,7 @@ public class Profiler : EngineModule case TargetPlatform.Linux: case TargetPlatform.Windows: case TargetPlatform.Switch: + case TargetPlatform.Mac: options.PublicDependencies.Add("tracy"); break; } From f9cb4ddae24cf10e9fe7b6b71e4869395eb0a9ab Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 24 May 2025 05:08:32 +0200 Subject: [PATCH 009/211] Add new Arena Allocator for optimized dynamic memory allocations with a shared lifetime --- Source/Engine/Core/Memory/Allocation.cpp | 44 +++++++++++++++++ Source/Engine/Core/Memory/ArenaAllocation.h | 55 +++++++++++++++++++++ 2 files changed, 99 insertions(+) create mode 100644 Source/Engine/Core/Memory/Allocation.cpp create mode 100644 Source/Engine/Core/Memory/ArenaAllocation.h diff --git a/Source/Engine/Core/Memory/Allocation.cpp b/Source/Engine/Core/Memory/Allocation.cpp new file mode 100644 index 000000000..87c9dbc63 --- /dev/null +++ b/Source/Engine/Core/Memory/Allocation.cpp @@ -0,0 +1,44 @@ +// Copyright (c) Wojciech Figat. All rights reserved. + +#include "ArenaAllocation.h" +#include "../Math/Math.h" + +void ArenaAllocator::Free() +{ + // Free all pages + Page* page = _first; + while (page) + { + Allocator::Free(page->Memory); + Page* next = page->Next; + Allocator::Free(page); + page = next; + } +} + +void* ArenaAllocator::Allocate(uint64 size, uint64 alignment) +{ + // Find the first page that has some space left + Page* page = _first; + while (page && page->Offset + size + alignment > page->Size) + page = page->Next; + + // Create a new page if need to + if (!page) + { + uint64 pageSize = Math::Max(_pageSize, size); + page = (Page*)Allocator::Allocate(sizeof(Page)); + page->Memory = Allocator::Allocate(pageSize); + page->Next = _first; + page->Offset = 0; + page->Size = pageSize; + _first = page; + } + + // Allocate within a page + page->Offset = Math::AlignUp(page->Offset, (uint32)alignment); + void* mem = (byte*)page->Memory + page->Offset; + page->Offset += size; + + return mem; +} \ No newline at end of file diff --git a/Source/Engine/Core/Memory/ArenaAllocation.h b/Source/Engine/Core/Memory/ArenaAllocation.h new file mode 100644 index 000000000..18915853d --- /dev/null +++ b/Source/Engine/Core/Memory/ArenaAllocation.h @@ -0,0 +1,55 @@ +// Copyright (c) Wojciech Figat. All rights reserved. + +#include "Allocation.h" + +/// +/// Allocator that uses pages for stack-based allocs without freeing memory during it's lifetime. +/// +class ArenaAllocator +{ +private: + struct Page + { + void* Memory; + Page* Next; + uint32 Offset, Size; + }; + + int32 _pageSize; + Page* _first = nullptr; + +public: + ArenaAllocator(int32 pageSizeBytes = 1024 * 1024) // 1 MB by default + : _pageSize(pageSizeBytes) + { + } + + ~ArenaAllocator() + { + Free(); + } + + // Allocates a chunk of unitialized memory. + void* Allocate(uint64 size, uint64 alignment = 1); + + // Frees all memory allocations within allocator. + void Free(); + + // Creates a new object within the arena allocator. + template + inline T* New(Args&&...args) + { + T* ptr = (T*)Allocate(sizeof(T)); + new(ptr) T(Forward(args)...); + return ptr; + } + + // Invokes destructor on values in a dictionary and clears it. + template + void ClearDelete(Dictionary& collection) + { + for (auto it = collection.Begin(); it.IsNotEnd(); ++it) + Memory::DestructItem(it->Value); + collection.Clear(); + } +}; From 410ec0465ce109af0d36bed89209505546e7c350 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 25 May 2025 02:04:16 +0200 Subject: [PATCH 010/211] Optimize CSharp scripting runtime to use arena allocator per-assembly --- Source/Editor/Scripting/ScriptsBuilder.cpp | 4 +- Source/Engine/Core/Memory/ArenaAllocation.h | 12 ++- Source/Engine/Core/Types/Variant.cpp | 2 +- Source/Engine/Debug/DebugCommands.cpp | 2 +- Source/Engine/Scripting/BinaryModule.cpp | 2 +- .../Engine/Scripting/ManagedCLR/MAssembly.h | 12 ++- Source/Engine/Scripting/ManagedCLR/MClass.h | 30 +++--- Source/Engine/Scripting/ManagedCLR/MCore.cpp | 15 +++ Source/Engine/Scripting/ManagedCLR/MEvent.h | 6 +- Source/Engine/Scripting/ManagedCLR/MField.h | 5 +- Source/Engine/Scripting/ManagedCLR/MMethod.h | 8 +- .../Engine/Scripting/ManagedCLR/MProperty.h | 6 +- Source/Engine/Scripting/ManagedCLR/MUtils.cpp | 6 +- Source/Engine/Scripting/ManagedCLR/MUtils.h | 2 +- Source/Engine/Scripting/Runtime/DotNet.cpp | 92 ++++++++++--------- 15 files changed, 126 insertions(+), 78 deletions(-) diff --git a/Source/Editor/Scripting/ScriptsBuilder.cpp b/Source/Editor/Scripting/ScriptsBuilder.cpp index 84df897e6..ea58f3240 100644 --- a/Source/Editor/Scripting/ScriptsBuilder.cpp +++ b/Source/Editor/Scripting/ScriptsBuilder.cpp @@ -78,7 +78,7 @@ namespace ScriptsBuilderImpl void onScriptsReloadEnd(); void onScriptsLoaded(); - void GetClassName(const StringAnsi& fullname, StringAnsi& className); + void GetClassName(const StringAnsiView fullname, StringAnsi& className); void onCodeEditorAsyncOpenBegin() { @@ -273,7 +273,7 @@ bool ScriptsBuilder::GenerateProject(const StringView& customArgs) return RunBuildTool(args); } -void ScriptsBuilderImpl::GetClassName(const StringAnsi& fullname, StringAnsi& className) +void ScriptsBuilderImpl::GetClassName(const StringAnsiView fullname, StringAnsi& className) { const auto lastDotIndex = fullname.FindLast('.'); if (lastDotIndex != -1) diff --git a/Source/Engine/Core/Memory/ArenaAllocation.h b/Source/Engine/Core/Memory/ArenaAllocation.h index 18915853d..43004ab30 100644 --- a/Source/Engine/Core/Memory/ArenaAllocation.h +++ b/Source/Engine/Core/Memory/ArenaAllocation.h @@ -44,9 +44,19 @@ public: return ptr; } + // Invokes destructor on values in an array and clears it. + template + static void ClearDelete(Array& collection) + { + Value* ptr = collection.Get(); + for (int32 i = 0; i < collection.Count(); i++) + Memory::DestructItem(ptr[i]); + collection.Clear(); + } + // Invokes destructor on values in a dictionary and clears it. template - void ClearDelete(Dictionary& collection) + static void ClearDelete(Dictionary& collection) { for (auto it = collection.Begin(); it.IsNotEnd(); ++it) Memory::DestructItem(it->Value); diff --git a/Source/Engine/Core/Types/Variant.cpp b/Source/Engine/Core/Types/Variant.cpp index f45856443..873aba0eb 100644 --- a/Source/Engine/Core/Types/Variant.cpp +++ b/Source/Engine/Core/Types/Variant.cpp @@ -118,7 +118,7 @@ VariantType::VariantType(Types type, const MClass* klass) #if USE_CSHARP if (klass) { - const StringAnsi& typeName = klass->GetFullName(); + const StringAnsiView typeName = klass->GetFullName(); const int32 length = typeName.Length(); TypeName = static_cast(Allocator::Allocate(length + 1)); Platform::MemoryCopy(TypeName, typeName.Get(), length); diff --git a/Source/Engine/Debug/DebugCommands.cpp b/Source/Engine/Debug/DebugCommands.cpp index fcea938aa..6927a1da9 100644 --- a/Source/Engine/Debug/DebugCommands.cpp +++ b/Source/Engine/Debug/DebugCommands.cpp @@ -215,7 +215,7 @@ namespace { if (!method->IsStatic()) continue; - const StringAnsi& name = method->GetName(); + const StringAnsiView name = method->GetName(); if (name.Contains("Internal_") || mclass->GetFullName().Contains(".Interop.")) continue; diff --git a/Source/Engine/Scripting/BinaryModule.cpp b/Source/Engine/Scripting/BinaryModule.cpp index 3030b041d..1a48a3846 100644 --- a/Source/Engine/Scripting/BinaryModule.cpp +++ b/Source/Engine/Scripting/BinaryModule.cpp @@ -1029,7 +1029,7 @@ void ManagedBinaryModule::InitType(MClass* mclass) { #if !COMPILE_WITHOUT_CSHARP // Skip if already initialized - const StringAnsi& typeName = mclass->GetFullName(); + const StringAnsiView typeName = mclass->GetFullName(); if (TypeNameToTypeIndex.ContainsKey(typeName)) return; PROFILE_MEM(Scripting); diff --git a/Source/Engine/Scripting/ManagedCLR/MAssembly.h b/Source/Engine/Scripting/ManagedCLR/MAssembly.h index 2e5af191a..6c0aa9579 100644 --- a/Source/Engine/Scripting/ManagedCLR/MAssembly.h +++ b/Source/Engine/Scripting/ManagedCLR/MAssembly.h @@ -7,6 +7,7 @@ #include "Engine/Core/Types/String.h" #include "Engine/Core/Collections/Array.h" #include "Engine/Core/Collections/Dictionary.h" +#include "Engine/Core/Memory/ArenaAllocation.h" #include "Engine/Platform/CriticalSection.h" /// @@ -19,7 +20,7 @@ class FLAXENGINE_API MAssembly friend Scripting; public: - typedef Dictionary ClassesDictionary; + typedef Dictionary ClassesDictionary; private: #if USE_MONO @@ -67,6 +68,15 @@ public: /// ~MAssembly(); +public: + /// + /// Memory storage with all assembly-related data that shares its lifetime (eg. metadata). + /// + ArenaAllocator Memory; + + // Allocates the given string within a memory that has lifetime of assembly. + StringAnsiView AllocString(const char* str); + public: /// /// Managed assembly actions delegate type. diff --git a/Source/Engine/Scripting/ManagedCLR/MClass.h b/Source/Engine/Scripting/ManagedCLR/MClass.h index b44c446cb..a74b4f7da 100644 --- a/Source/Engine/Scripting/ManagedCLR/MClass.h +++ b/Source/Engine/Scripting/ManagedCLR/MClass.h @@ -17,14 +17,13 @@ private: mutable void* _attrInfo = nullptr; #elif USE_NETCORE void* _handle; - StringAnsi _name; - StringAnsi _namespace; + StringAnsiView _name; + StringAnsiView _namespace; + StringAnsiView _fullname; uint32 _types = 0; mutable uint32 _size = 0; #endif - const MAssembly* _assembly; - - StringAnsi _fullname; + MAssembly* _assembly; mutable Array _methods; mutable Array _fields; @@ -47,12 +46,13 @@ private: int32 _isInterface : 1; int32 _isValueType : 1; int32 _isEnum : 1; + int32 _isGeneric : 1; public: #if USE_MONO MClass(const MAssembly* parentAssembly, MonoClass* monoClass, const StringAnsi& fullname); #elif USE_NETCORE - MClass(const MAssembly* parentAssembly, void* handle, const char* name, const char* fullname, const char* namespace_, MTypeAttributes typeAttributes); + MClass(MAssembly* parentAssembly, void* handle, const char* name, const char* fullname, const char* namespace_, MTypeAttributes typeAttributes); #endif /// @@ -64,7 +64,7 @@ public: /// /// Gets the parent assembly. /// - const MAssembly* GetAssembly() const + FORCE_INLINE MAssembly* GetAssembly() const { return _assembly; } @@ -72,7 +72,7 @@ public: /// /// Gets the full name of the class (namespace and typename). /// - FORCE_INLINE const StringAnsi& GetFullName() const + FORCE_INLINE StringAnsiView GetFullName() const { return _fullname; } @@ -80,12 +80,18 @@ public: /// /// Gets the name of the class. /// - StringAnsiView GetName() const; + FORCE_INLINE StringAnsiView GetName() const + { + return _name; + } /// /// Gets the namespace of the class. /// - StringAnsiView GetNamespace() const; + FORCE_INLINE StringAnsiView GetNamespace() const + { + return _name; + } #if USE_MONO /// @@ -161,9 +167,9 @@ public: /// /// Gets if class is generic /// - bool IsGeneric() const + FORCE_INLINE bool IsGeneric() const { - return _fullname.FindLast('`') != -1; + return _isGeneric != 0; } /// diff --git a/Source/Engine/Scripting/ManagedCLR/MCore.cpp b/Source/Engine/Scripting/ManagedCLR/MCore.cpp index 8761c309c..4ded56b52 100644 --- a/Source/Engine/Scripting/ManagedCLR/MCore.cpp +++ b/Source/Engine/Scripting/ManagedCLR/MCore.cpp @@ -71,6 +71,17 @@ MAssembly::~MAssembly() Unload(); } +StringAnsiView MAssembly::AllocString(const char* str) +{ + if (!str) + return StringAnsiView::Empty; + int32 len = StringUtils::Length(str); + char* mem = (char*)Memory.Allocate(len + 1); + Platform::MemoryCopy(mem, str, len); + mem[len] = 0; + return StringAnsiView(mem, len); +} + String MAssembly::ToString() const { return _name.ToString(); @@ -127,7 +138,11 @@ void MAssembly::Unload(bool isReloading) _isLoading = false; _isLoaded = false; _hasCachedClasses = false; +#if USE_NETCORE + ArenaAllocator::ClearDelete(_classes); +#else _classes.ClearDelete(); +#endif Unloaded(this); } diff --git a/Source/Engine/Scripting/ManagedCLR/MEvent.h b/Source/Engine/Scripting/ManagedCLR/MEvent.h index 52573e204..0aade183e 100644 --- a/Source/Engine/Scripting/ManagedCLR/MEvent.h +++ b/Source/Engine/Scripting/ManagedCLR/MEvent.h @@ -15,16 +15,16 @@ class FLAXENGINE_API MEvent protected: #if USE_MONO MonoEvent* _monoEvent; + StringAnsi _name; #elif USE_NETCORE void* _handle; + StringAnsiView _name; #endif mutable MMethod* _addMethod; mutable MMethod* _removeMethod; MClass* _parentClass; - StringAnsi _name; - mutable int32 _hasCachedAttributes : 1; mutable int32 _hasAddMonoMethod : 1; mutable int32 _hasRemoveMonoMethod : 1; @@ -42,7 +42,7 @@ public: /// /// Gets the event name. /// - FORCE_INLINE const StringAnsi& GetName() const + FORCE_INLINE StringAnsiView GetName() const { return _name; } diff --git a/Source/Engine/Scripting/ManagedCLR/MField.h b/Source/Engine/Scripting/ManagedCLR/MField.h index 49bce9f54..ed73db711 100644 --- a/Source/Engine/Scripting/ManagedCLR/MField.h +++ b/Source/Engine/Scripting/ManagedCLR/MField.h @@ -17,14 +17,15 @@ protected: #if USE_MONO MonoClassField* _monoField; MonoType* _monoType; + StringAnsi _name; #elif USE_NETCORE void* _handle; void* _type; int32 _fieldOffset; + StringAnsiView _name; #endif MClass* _parentClass; - StringAnsi _name; MVisibility _visibility; @@ -44,7 +45,7 @@ public: /// /// Gets field name. /// - FORCE_INLINE const StringAnsi& GetName() const + FORCE_INLINE StringAnsiView GetName() const { return _name; } diff --git a/Source/Engine/Scripting/ManagedCLR/MMethod.h b/Source/Engine/Scripting/ManagedCLR/MMethod.h index 989147754..8d24b533b 100644 --- a/Source/Engine/Scripting/ManagedCLR/MMethod.h +++ b/Source/Engine/Scripting/ManagedCLR/MMethod.h @@ -21,15 +21,16 @@ class FLAXENGINE_API MMethod protected: #if USE_MONO MonoMethod* _monoMethod; + StringAnsi _name; #elif USE_NETCORE void* _handle; + StringAnsiView _name; int32 _paramsCount; mutable void* _returnType; mutable Array> _parameterTypes; void CacheSignature() const; #endif MClass* _parentClass; - StringAnsi _name; MVisibility _visibility; #if !USE_MONO_AOT void* _cachedThunk = nullptr; @@ -48,12 +49,11 @@ public: explicit MMethod(MonoMethod* monoMethod, MClass* parentClass); explicit MMethod(MonoMethod* monoMethod, const char* name, MClass* parentClass); #elif USE_NETCORE - MMethod(MClass* parentClass, StringAnsi&& name, void* handle, int32 paramsCount, MMethodAttributes attributes); + MMethod(MClass* parentClass, StringAnsiView name, void* handle, int32 paramsCount, MMethodAttributes attributes); #endif public: #if COMPILE_WITH_PROFILER - StringAnsi ProfilerName; SourceLocationData ProfilerData; #endif @@ -109,7 +109,7 @@ public: /// /// Gets the method name. /// - FORCE_INLINE const StringAnsi& GetName() const + FORCE_INLINE StringAnsiView GetName() const { return _name; } diff --git a/Source/Engine/Scripting/ManagedCLR/MProperty.h b/Source/Engine/Scripting/ManagedCLR/MProperty.h index 3be94eb1a..7e426b474 100644 --- a/Source/Engine/Scripting/ManagedCLR/MProperty.h +++ b/Source/Engine/Scripting/ManagedCLR/MProperty.h @@ -17,16 +17,16 @@ class FLAXENGINE_API MProperty protected: #if USE_MONO MonoProperty* _monoProperty; + StringAnsi _name; #elif USE_NETCORE void* _handle; + StringAnsiView _name; #endif mutable MMethod* _getMethod; mutable MMethod* _setMethod; MClass* _parentClass; - StringAnsi _name; - mutable int32 _hasCachedAttributes : 1; mutable int32 _hasSetMethod : 1; mutable int32 _hasGetMethod : 1; @@ -49,7 +49,7 @@ public: /// /// Gets the property name. /// - FORCE_INLINE const StringAnsi& GetName() const + FORCE_INLINE StringAnsiView GetName() const { return _name; } diff --git a/Source/Engine/Scripting/ManagedCLR/MUtils.cpp b/Source/Engine/Scripting/ManagedCLR/MUtils.cpp index a52d55f67..fd0596ef5 100644 --- a/Source/Engine/Scripting/ManagedCLR/MUtils.cpp +++ b/Source/Engine/Scripting/ManagedCLR/MUtils.cpp @@ -150,7 +150,7 @@ ScriptingTypeHandle MUtils::UnboxScriptingTypeHandle(MTypeObject* value) MClass* klass = GetClass(value); if (!klass) return ScriptingTypeHandle(); - const StringAnsi& typeName = klass->GetFullName(); + const StringAnsiView typeName = klass->GetFullName(); const ScriptingTypeHandle typeHandle = Scripting::FindScriptingType(typeName); if (!typeHandle) LOG(Warning, "Unknown scripting type {}", String(typeName)); @@ -821,14 +821,14 @@ MObject* MUtils::BoxVariant(const Variant& value) } } -const StringAnsi& MUtils::GetClassFullname(MObject* obj) +StringAnsiView MUtils::GetClassFullname(MObject* obj) { if (obj) { MClass* mClass = MCore::Object::GetClass(obj); return mClass->GetFullName(); } - return StringAnsi::Empty; + return StringAnsiView::Empty; } MClass* MUtils::GetClass(MTypeObject* type) diff --git a/Source/Engine/Scripting/ManagedCLR/MUtils.h b/Source/Engine/Scripting/ManagedCLR/MUtils.h index f3bc27172..f642681d2 100644 --- a/Source/Engine/Scripting/ManagedCLR/MUtils.h +++ b/Source/Engine/Scripting/ManagedCLR/MUtils.h @@ -400,7 +400,7 @@ struct MConverter> namespace MUtils { // Outputs the full typename for the type of the specified object. - extern FLAXENGINE_API const StringAnsi& GetClassFullname(MObject* obj); + extern FLAXENGINE_API StringAnsiView GetClassFullname(MObject* obj); // Returns the class of the provided object. extern FLAXENGINE_API MClass* GetClass(MObject* object); diff --git a/Source/Engine/Scripting/Runtime/DotNet.cpp b/Source/Engine/Scripting/Runtime/DotNet.cpp index d56c0e5ea..cd4da3f20 100644 --- a/Source/Engine/Scripting/Runtime/DotNet.cpp +++ b/Source/Engine/Scripting/Runtime/DotNet.cpp @@ -183,7 +183,7 @@ Dictionary CachedAssemblyHandles; /// /// Returns the function pointer to the managed static method in NativeInterop class. /// -void* GetStaticMethodPointer(const String& methodName); +void* GetStaticMethodPointer(StringView methodName); /// /// Calls the managed static method with given parameters. @@ -753,12 +753,13 @@ const MAssembly::ClassesDictionary& MAssembly::GetClasses() const static void* GetManagedClassesPtr = GetStaticMethodPointer(TEXT("GetManagedClasses")); CallStaticMethod(GetManagedClassesPtr, _handle, &managedClasses, &classCount); _classes.EnsureCapacity(classCount); + MAssembly* assembly = const_cast(this); for (int32 i = 0; i < classCount; i++) { NativeClassDefinitions& managedClass = managedClasses[i]; // Create class object - MClass* klass = New(this, managedClass.typeHandle, managedClass.name, managedClass.fullname, managedClass.namespace_, managedClass.typeAttributes); + MClass* klass = assembly->Memory.New(assembly, managedClass.typeHandle, managedClass.name, managedClass.fullname, managedClass.namespace_, managedClass.typeAttributes); _classes.Add(klass->GetFullName(), klass); managedClass.nativePointer = klass; @@ -811,7 +812,7 @@ DEFINE_INTERNAL_CALL(void) NativeInterop_CreateClass(NativeClassDefinitions* man CachedAssemblyHandles.Add(assemblyHandle, assembly); } - MClass* klass = New(assembly, managedClass->typeHandle, managedClass->name, managedClass->fullname, managedClass->namespace_, managedClass->typeAttributes); + MClass* klass = assembly->Memory.New(assembly, managedClass->typeHandle, managedClass->name, managedClass->fullname, managedClass->namespace_, managedClass->typeAttributes); if (assembly != nullptr) { auto& classes = const_cast(assembly->GetClasses()); @@ -819,7 +820,7 @@ DEFINE_INTERNAL_CALL(void) NativeInterop_CreateClass(NativeClassDefinitions* man if (classes.TryGet(klass->GetFullName(), oldKlass)) { LOG(Warning, "Class '{0}' was already added to assembly '{1}'", String(klass->GetFullName()), String(assembly->GetName())); - Delete(klass); + Memory::DestructItem(klass); klass = oldKlass; } else @@ -915,12 +916,12 @@ bool MAssembly::UnloadImage(bool isReloading) return false; } -MClass::MClass(const MAssembly* parentAssembly, void* handle, const char* name, const char* fullname, const char* namespace_, MTypeAttributes attributes) +MClass::MClass(MAssembly* parentAssembly, void* handle, const char* name, const char* fullname, const char* namespace_, MTypeAttributes attributes) : _handle(handle) - , _name(name) - , _namespace(namespace_) + , _name(parentAssembly->AllocString(name)) + , _namespace(parentAssembly->AllocString(namespace_)) + , _fullname(parentAssembly->AllocString(fullname)) , _assembly(parentAssembly) - , _fullname(fullname) , _hasCachedProperties(false) , _hasCachedFields(false) , _hasCachedMethods(false) @@ -967,6 +968,8 @@ MClass::MClass(const MAssembly* parentAssembly, void* handle, const char* name, static void* TypeIsEnumPtr = GetStaticMethodPointer(TEXT("TypeIsEnum")); _isEnum = CallStaticMethod(TypeIsEnumPtr, handle); + _isGeneric = _fullname.FindLast('`') != -1; + CachedClassHandles[handle] = this; } @@ -982,24 +985,14 @@ bool MAssembly::ResolveMissingFile(String& assemblyPath) const MClass::~MClass() { - _methods.ClearDelete(); - _fields.ClearDelete(); - _properties.ClearDelete(); - _events.ClearDelete(); + ArenaAllocator::ClearDelete(_methods); + ArenaAllocator::ClearDelete(_fields); + ArenaAllocator::ClearDelete(_properties); + ArenaAllocator::ClearDelete(_events); CachedClassHandles.Remove(_handle); } -StringAnsiView MClass::GetName() const -{ - return _name; -} - -StringAnsiView MClass::GetNamespace() const -{ - return _namespace; -} - MType* MClass::GetType() const { return (MType*)_handle; @@ -1071,10 +1064,11 @@ const Array& MClass::GetMethods() const static void* GetClassMethodsPtr = GetStaticMethodPointer(TEXT("GetClassMethods")); CallStaticMethod(GetClassMethodsPtr, _handle, &methods, &methodsCount); _methods.Resize(methodsCount); + MAssembly* assembly = const_cast(_assembly); for (int32 i = 0; i < methodsCount; i++) { NativeMethodDefinitions& definition = methods[i]; - MMethod* method = New(const_cast(this), StringAnsi(definition.name), definition.handle, definition.numParameters, definition.methodAttributes); + MMethod* method = assembly->Memory.New(const_cast(this), assembly->AllocString(definition.name), definition.handle, definition.numParameters, definition.methodAttributes); _methods[i] = method; MCore::GC::FreeMemory((void*)definition.name); } @@ -1112,7 +1106,7 @@ const Array& MClass::GetFields() const for (int32 i = 0; i < numFields; i++) { NativeFieldDefinitions& definition = fields[i]; - MField* field = New(const_cast(this), definition.fieldHandle, definition.name, definition.fieldType, definition.fieldOffset, definition.fieldAttributes); + MField* field = _assembly->Memory.New(const_cast(this), definition.fieldHandle, definition.name, definition.fieldType, definition.fieldOffset, definition.fieldAttributes); _fields[i] = field; MCore::GC::FreeMemory((void*)definition.name); } @@ -1162,7 +1156,7 @@ const Array& MClass::GetProperties() const for (int i = 0; i < numProperties; i++) { const NativePropertyDefinitions& definition = foundProperties[i]; - MProperty* property = New(const_cast(this), definition.name, definition.propertyHandle, definition.getterHandle, definition.setterHandle, definition.getterAttributes, definition.setterAttributes); + MProperty* property = _assembly->Memory.New(const_cast(this), definition.name, definition.propertyHandle, definition.getterHandle, definition.setterHandle, definition.getterAttributes, definition.setterAttributes); _properties[i] = property; MCore::GC::FreeMemory((void*)definition.name); } @@ -1241,7 +1235,7 @@ MEvent::MEvent(MClass* parentClass, void* handle, const char* name) , _addMethod(nullptr) , _removeMethod(nullptr) , _parentClass(parentClass) - , _name(name) + , _name(parentClass->GetAssembly()->AllocString(name)) , _hasCachedAttributes(false) , _hasAddMonoMethod(true) , _hasRemoveMonoMethod(true) @@ -1317,7 +1311,7 @@ MField::MField(MClass* parentClass, void* handle, const char* name, void* type, , _type(type) , _fieldOffset(fieldOffset) , _parentClass(parentClass) - , _name(name) + , _name(parentClass->GetAssembly()->AllocString(name)) , _hasCachedAttributes(false) { switch (attributes & MFieldAttributes::FieldAccessMask) @@ -1409,11 +1403,11 @@ const Array& MField::GetAttributes() const return _attributes; } -MMethod::MMethod(MClass* parentClass, StringAnsi&& name, void* handle, int32 paramsCount, MMethodAttributes attributes) +MMethod::MMethod(MClass* parentClass, StringAnsiView name, void* handle, int32 paramsCount, MMethodAttributes attributes) : _handle(handle) , _paramsCount(paramsCount) , _parentClass(parentClass) - , _name(MoveTemp(name)) + , _name(name) , _hasCachedAttributes(false) , _hasCachedSignature(false) { @@ -1443,13 +1437,15 @@ MMethod::MMethod(MClass* parentClass, StringAnsi&& name, void* handle, int32 par _isStatic = (attributes & MMethodAttributes::Static) == MMethodAttributes::Static; #if COMPILE_WITH_PROFILER - const StringAnsi& className = parentClass->GetFullName(); - ProfilerName.Resize(className.Length() + 2 + _name.Length()); - Platform::MemoryCopy(ProfilerName.Get(), className.Get(), className.Length()); - ProfilerName.Get()[className.Length()] = ':'; - ProfilerName.Get()[className.Length() + 1] = ':'; - Platform::MemoryCopy(ProfilerName.Get() + className.Length() + 2, _name.Get(), _name.Length()); - ProfilerData.name = ProfilerName.Get(); + // Setup Tracy profiler entry (use assembly memory) + const StringAnsiView className = parentClass->GetFullName(); + char* profilerName = (char*)parentClass->GetAssembly()->Memory.Allocate(className.Length() + _name.Length() + 3); + Platform::MemoryCopy(profilerName, className.Get(), className.Length()); + profilerName[className.Length()] = ':'; + profilerName[className.Length() + 1] = ':'; + Platform::MemoryCopy(profilerName + className.Length() + 2, _name.Get(), _name.Length()); + profilerName[className.Length() + 2 + _name.Length()] = 0; + ProfilerData.name = profilerName; ProfilerData.function = _name.Get(); ProfilerData.file = nullptr; ProfilerData.line = 0; @@ -1573,20 +1569,30 @@ const Array& MMethod::GetAttributes() const return _attributes; } +FORCE_INLINE StringAnsiView GetPropertyMethodName(MProperty* property, StringAnsiView prefix) +{ + StringAnsiView name = property->GetName(); + char* mem = (char*)property->GetParentClass()->GetAssembly()->Memory.Allocate(name.Length() + prefix.Length() + 1); + Platform::MemoryCopy(mem, prefix.Get(), prefix.Length()); + Platform::MemoryCopy(mem + prefix.Length(), name.Get(), name.Length()); + mem[name.Length() + prefix.Length()] = 0; + return StringAnsiView(mem, name.Length() + prefix.Length() + 1); +} + MProperty::MProperty(MClass* parentClass, const char* name, void* handle, void* getterHandle, void* setterHandle, MMethodAttributes getterAttributes, MMethodAttributes setterAttributes) : _parentClass(parentClass) - , _name(name) + , _name(parentClass->GetAssembly()->AllocString(name)) , _handle(handle) , _hasCachedAttributes(false) { _hasGetMethod = getterHandle != nullptr; if (_hasGetMethod) - _getMethod = New(parentClass, StringAnsi("get_" + _name), getterHandle, 0, getterAttributes); + _getMethod = parentClass->GetAssembly()->Memory.New(parentClass, GetPropertyMethodName(this, StringAnsiView("get_", 4)), getterHandle, 0, getterAttributes); else _getMethod = nullptr; _hasSetMethod = setterHandle != nullptr; if (_hasSetMethod) - _setMethod = New(parentClass, StringAnsi("set_" + _name), setterHandle, 1, setterAttributes); + _setMethod = parentClass->GetAssembly()->Memory.New(parentClass, GetPropertyMethodName(this, StringAnsiView("set_", 4)), setterHandle, 1, setterAttributes); else _setMethod = nullptr; } @@ -1594,9 +1600,9 @@ MProperty::MProperty(MClass* parentClass, const char* name, void* handle, void* MProperty::~MProperty() { if (_getMethod) - Delete(_getMethod); + Memory::DestructItem(_getMethod); if (_setMethod) - Delete(_setMethod); + Memory::DestructItem(_setMethod); } MMethod* MProperty::GetGetMethod() const @@ -1683,7 +1689,7 @@ MClass* GetOrCreateClass(MType* typeHandle) static void* GetManagedClassFromTypePtr = GetStaticMethodPointer(TEXT("GetManagedClassFromType")); CallStaticMethod(GetManagedClassFromTypePtr, typeHandle, &classInfo, &assemblyHandle); MAssembly* assembly = GetAssembly(assemblyHandle); - klass = New(assembly, classInfo.typeHandle, classInfo.name, classInfo.fullname, classInfo.namespace_, classInfo.typeAttributes); + klass = assembly->Memory.New(assembly, classInfo.typeHandle, classInfo.name, classInfo.fullname, classInfo.namespace_, classInfo.typeAttributes); if (assembly != nullptr) { auto& classes = const_cast(assembly->GetClasses()); @@ -1889,7 +1895,7 @@ void ShutdownHostfxr() { } -void* GetStaticMethodPointer(const String& methodName) +void* GetStaticMethodPointer(StringView methodName) { void* fun; if (CachedFunctions.TryGet(methodName, fun)) From 9aaba955d0b8942079f8374582da1b9596af9d17 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 25 May 2025 02:04:56 +0200 Subject: [PATCH 011/211] Fix profiler tables to use column headers aligned to center --- Source/Editor/GUI/ColumnDefinition.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Editor/GUI/ColumnDefinition.cs b/Source/Editor/GUI/ColumnDefinition.cs index 6c1f8050a..4ddac7802 100644 --- a/Source/Editor/GUI/ColumnDefinition.cs +++ b/Source/Editor/GUI/ColumnDefinition.cs @@ -51,7 +51,7 @@ namespace FlaxEditor.GUI /// /// The column title horizontal text alignment /// - public TextAlignment TitleAlignment = TextAlignment.Near; + public TextAlignment TitleAlignment = TextAlignment.Center; /// /// The column title margin. From 8c62f1120f4e5b8eae05399c85a82070e416d866 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 25 May 2025 17:39:20 +0200 Subject: [PATCH 012/211] Optimize dynamic memory allocations for managed runtime interop collections with a new Arena Allocation --- Source/Engine/Core/Collections/Array.h | 12 +++ Source/Engine/Core/Memory/Allocation.h | 34 ++++++-- Source/Engine/Core/Memory/ArenaAllocation.h | 79 +++++++++++++++++++ .../Engine/Core/Memory/SimpleHeapAllocation.h | 5 ++ Source/Engine/Scripting/BinaryModule.cpp | 4 +- Source/Engine/Scripting/ManagedCLR/MClass.h | 25 +++--- Source/Engine/Scripting/ManagedCLR/MEvent.h | 4 +- Source/Engine/Scripting/ManagedCLR/MField.h | 5 +- Source/Engine/Scripting/ManagedCLR/MMethod.h | 5 +- .../Engine/Scripting/ManagedCLR/MProperty.h | 5 +- Source/Engine/Scripting/Runtime/DotNet.cpp | 34 +++++--- Source/Engine/Scripting/Runtime/Mono.cpp | 10 +-- Source/Engine/Scripting/Runtime/None.cpp | 18 ++--- 13 files changed, 185 insertions(+), 55 deletions(-) diff --git a/Source/Engine/Core/Collections/Array.h b/Source/Engine/Core/Collections/Array.h index 5845d7f50..ab142961d 100644 --- a/Source/Engine/Core/Collections/Array.h +++ b/Source/Engine/Core/Collections/Array.h @@ -20,6 +20,7 @@ API_CLASS(InBuild) class Array public: using ItemType = T; using AllocationData = typename AllocationType::template Data; + using AllocationTag = typename AllocationType::Tag; private: int32 _count; @@ -36,6 +37,17 @@ public: { } + /// + /// Initializes an empty without reserving any space. + /// + /// The custom allocation tag. + Array(AllocationTag tag) + : _count(0) + , _capacity(0) + , _allocation(tag) + { + } + /// /// Initializes by reserving space. /// diff --git a/Source/Engine/Core/Memory/Allocation.h b/Source/Engine/Core/Memory/Allocation.h index 6da929d86..d6958d2c3 100644 --- a/Source/Engine/Core/Memory/Allocation.h +++ b/Source/Engine/Core/Memory/Allocation.h @@ -36,6 +36,17 @@ namespace AllocationUtils capacity++; return capacity; } + + inline int32 CalculateCapacityGrow(int32 capacity, int32 minCapacity) + { + if (capacity < minCapacity) + capacity = minCapacity; + if (capacity < 8) + capacity = 8; + else + capacity = RoundUpToPowerOf2(capacity); + return capacity; + } } /// @@ -46,6 +57,7 @@ class FixedAllocation { public: enum { HasSwap = false }; + typedef void* Tag; template class alignas(sizeof(void*)) Data @@ -58,6 +70,10 @@ public: { } + FORCE_INLINE Data(Tag tag) + { + } + FORCE_INLINE ~Data() { } @@ -106,6 +122,7 @@ class HeapAllocation { public: enum { HasSwap = true }; + typedef void* Tag; template class Data @@ -118,6 +135,10 @@ public: { } + FORCE_INLINE Data(Tag tag) + { + } + FORCE_INLINE ~Data() { Allocator::Free(_data); @@ -135,13 +156,7 @@ public: FORCE_INLINE int32 CalculateCapacityGrow(int32 capacity, const int32 minCapacity) const { - if (capacity < minCapacity) - capacity = minCapacity; - if (capacity < 8) - capacity = 8; - else - capacity = AllocationUtils::RoundUpToPowerOf2(capacity); - return capacity; + return AllocationUtils::CalculateCapacityGrow(capacity, minCapacity); } FORCE_INLINE void Allocate(const int32 capacity) @@ -184,6 +199,7 @@ class InlinedAllocation { public: enum { HasSwap = false }; + typedef void* Tag; template class alignas(sizeof(void*)) Data @@ -200,6 +216,10 @@ public: { } + FORCE_INLINE Data(Tag tag) + { + } + FORCE_INLINE ~Data() { } diff --git a/Source/Engine/Core/Memory/ArenaAllocation.h b/Source/Engine/Core/Memory/ArenaAllocation.h index 43004ab30..af8df2001 100644 --- a/Source/Engine/Core/Memory/ArenaAllocation.h +++ b/Source/Engine/Core/Memory/ArenaAllocation.h @@ -1,5 +1,7 @@ // Copyright (c) Wojciech Figat. All rights reserved. +#pragma once + #include "Allocation.h" /// @@ -63,3 +65,80 @@ public: collection.Clear(); } }; + +/// +/// The memory allocation policy that uses a part of shared page allocator. Allocations are performed in stack-manner, and free is no-op. +/// +class ArenaAllocation +{ +public: + enum { HasSwap = true }; + typedef ArenaAllocator* Tag; + + template + class Data + { + private: + T* _data = nullptr; + ArenaAllocator* _arena = nullptr; + + public: + FORCE_INLINE Data() + { + } + + FORCE_INLINE Data(Tag tag) + { + _arena = tag; + } + + FORCE_INLINE ~Data() + { + } + + FORCE_INLINE T* Get() + { + return _data; + } + + FORCE_INLINE const T* Get() const + { + return _data; + } + + FORCE_INLINE int32 CalculateCapacityGrow(int32 capacity, const int32 minCapacity) const + { + return AllocationUtils::CalculateCapacityGrow(capacity, minCapacity); + } + + FORCE_INLINE void Allocate(const int32 capacity) + { + ASSERT_LOW_LAYER(!_data && _arena); + _data = (T*)_arena->Allocate(capacity * sizeof(T), alignof(T)); + } + + FORCE_INLINE void Relocate(const int32 capacity, int32 oldCount, int32 newCount) + { + ASSERT_LOW_LAYER(_arena); + T* newData = capacity != 0 ? (T*)_arena->Allocate(capacity * sizeof(T), alignof(T)) : nullptr; + if (oldCount) + { + if (newCount > 0) + Memory::MoveItems(newData, _data, newCount); + Memory::DestructItems(_data, oldCount); + } + _data = newData; + } + + FORCE_INLINE void Free() + { + _data = nullptr; + } + + FORCE_INLINE void Swap(Data& other) + { + ::Swap(_data, other._data); + ::Swap(_arena, other._arena); + } + }; +}; diff --git a/Source/Engine/Core/Memory/SimpleHeapAllocation.h b/Source/Engine/Core/Memory/SimpleHeapAllocation.h index 4df5bb660..6afcffd64 100644 --- a/Source/Engine/Core/Memory/SimpleHeapAllocation.h +++ b/Source/Engine/Core/Memory/SimpleHeapAllocation.h @@ -11,6 +11,7 @@ class SimpleHeapAllocation { public: enum { HasSwap = true }; + typedef void* Tag; template class Data @@ -23,6 +24,10 @@ public: { } + FORCE_INLINE Data(Tag tag) + { + } + FORCE_INLINE ~Data() { if (_data) diff --git a/Source/Engine/Scripting/BinaryModule.cpp b/Source/Engine/Scripting/BinaryModule.cpp index 1a48a3846..da01a9f07 100644 --- a/Source/Engine/Scripting/BinaryModule.cpp +++ b/Source/Engine/Scripting/BinaryModule.cpp @@ -815,7 +815,7 @@ namespace { MMethod* FindMethod(MClass* mclass, const MMethod* referenceMethod) { - const Array& methods = mclass->GetMethods(); + const auto& methods = mclass->GetMethods(); for (int32 i = 0; i < methods.Count(); i++) { MMethod* method = methods[i]; @@ -1095,7 +1095,7 @@ void ManagedBinaryModule::InitType(MClass* mclass) // Initialize scripting interfaces implemented in C# int32 interfacesCount = 0; MClass* klass = mclass; - const Array& interfaceClasses = klass->GetInterfaces(); + const auto& interfaceClasses = klass->GetInterfaces(); for (const MClass* interfaceClass : interfaceClasses) { const ScriptingTypeHandle interfaceType = FindType(interfaceClass); diff --git a/Source/Engine/Scripting/ManagedCLR/MClass.h b/Source/Engine/Scripting/ManagedCLR/MClass.h index a74b4f7da..a89d75205 100644 --- a/Source/Engine/Scripting/ManagedCLR/MClass.h +++ b/Source/Engine/Scripting/ManagedCLR/MClass.h @@ -3,6 +3,7 @@ #pragma once #include "Engine/Core/Collections/Array.h" +#include "Engine/Core/Memory/ArenaAllocation.h" #include "MTypes.h" /// @@ -25,12 +26,12 @@ private: #endif MAssembly* _assembly; - mutable Array _methods; - mutable Array _fields; - mutable Array _properties; - mutable Array _attributes; - mutable Array _events; - mutable Array _interfaces; + mutable Array _methods; + mutable Array _fields; + mutable Array _properties; + mutable Array _attributes; + mutable Array _events; + mutable Array _interfaces; MVisibility _visibility; @@ -248,7 +249,7 @@ public: /// /// Be aware this will not include the methods of any base classes. /// The list of methods. - const Array& GetMethods() const; + const Array& GetMethods() const; /// /// Returns an object referencing a field with the specified name. @@ -263,7 +264,7 @@ public: /// /// Be aware this will not include the fields of any base classes. /// The list of fields. - const Array& GetFields() const; + const Array& GetFields() const; /// /// Returns an object referencing a event with the specified name. @@ -276,7 +277,7 @@ public: /// Returns all events belonging to this class. /// /// The list of events. - const Array& GetEvents() const; + const Array& GetEvents() const; /// /// Returns an object referencing a property with the specified name. @@ -291,14 +292,14 @@ public: /// /// Be aware this will not include the properties of any base classes. /// The list of properties. - const Array& GetProperties() const; + const Array& GetProperties() const; /// /// Returns all interfaces implemented by this class (excluding interfaces from base classes). /// /// Be aware this will not include the interfaces of any base classes. /// The list of interfaces. - const Array& GetInterfaces() const; + const Array& GetInterfaces() const; public: /// @@ -332,5 +333,5 @@ public: /// Returns an instance of all attributes connected with given class. Returns null if the class doesn't have any attributes. /// /// The array of attribute objects. - const Array& GetAttributes() const; + const Array& GetAttributes() const; }; diff --git a/Source/Engine/Scripting/ManagedCLR/MEvent.h b/Source/Engine/Scripting/ManagedCLR/MEvent.h index 0aade183e..9d8551774 100644 --- a/Source/Engine/Scripting/ManagedCLR/MEvent.h +++ b/Source/Engine/Scripting/ManagedCLR/MEvent.h @@ -29,7 +29,7 @@ protected: mutable int32 _hasAddMonoMethod : 1; mutable int32 _hasRemoveMonoMethod : 1; - mutable Array _attributes; + mutable Array _attributes; public: #if USE_MONO @@ -121,5 +121,5 @@ public: /// Returns an instance of all attributes connected with given event. Returns null if the event doesn't have any attributes. /// /// The array of attribute objects. - const Array& GetAttributes() const; + const Array& GetAttributes() const; }; diff --git a/Source/Engine/Scripting/ManagedCLR/MField.h b/Source/Engine/Scripting/ManagedCLR/MField.h index ed73db711..66213d6ab 100644 --- a/Source/Engine/Scripting/ManagedCLR/MField.h +++ b/Source/Engine/Scripting/ManagedCLR/MField.h @@ -3,6 +3,7 @@ #pragma once #include "Engine/Core/Collections/Array.h" +#include "Engine/Core/Memory/ArenaAllocation.h" #include "MTypes.h" /// @@ -32,7 +33,7 @@ protected: mutable int32 _hasCachedAttributes : 1; int32 _isStatic : 1; - mutable Array _attributes; + mutable Array _attributes; public: #if USE_MONO @@ -157,5 +158,5 @@ public: /// Returns an instance of all attributes connected with given field. Returns null if the field doesn't have any attributes. /// /// The array of attribute objects. - const Array& GetAttributes() const; + const Array& GetAttributes() const; }; diff --git a/Source/Engine/Scripting/ManagedCLR/MMethod.h b/Source/Engine/Scripting/ManagedCLR/MMethod.h index 8d24b533b..700fa9593 100644 --- a/Source/Engine/Scripting/ManagedCLR/MMethod.h +++ b/Source/Engine/Scripting/ManagedCLR/MMethod.h @@ -3,6 +3,7 @@ #pragma once #include "Engine/Core/Collections/Array.h" +#include "Engine/Core/Memory/ArenaAllocation.h" #if COMPILE_WITH_PROFILER #include "Engine/Profiler/ProfilerSrcLoc.h" #endif @@ -42,7 +43,7 @@ protected: #endif int32 _isStatic : 1; - mutable Array _attributes; + mutable Array _attributes; public: #if USE_MONO @@ -197,5 +198,5 @@ public: /// Returns an instance of all attributes connected with given method. Returns null if the method doesn't have any attributes. /// /// The array of attribute objects. - const Array& GetAttributes() const; + const Array& GetAttributes() const; }; diff --git a/Source/Engine/Scripting/ManagedCLR/MProperty.h b/Source/Engine/Scripting/ManagedCLR/MProperty.h index 7e426b474..e48d98375 100644 --- a/Source/Engine/Scripting/ManagedCLR/MProperty.h +++ b/Source/Engine/Scripting/ManagedCLR/MProperty.h @@ -3,6 +3,7 @@ #pragma once #include "Engine/Core/Collections/Array.h" +#include "Engine/Core/Memory/ArenaAllocation.h" #include "MTypes.h" /// @@ -31,7 +32,7 @@ protected: mutable int32 _hasSetMethod : 1; mutable int32 _hasGetMethod : 1; - mutable Array _attributes; + mutable Array _attributes; public: #if USE_MONO @@ -135,5 +136,5 @@ public: /// Returns an instance of all attributes connected with given property. Returns null if the property doesn't have any attributes. /// /// The array of attribute objects. - const Array& GetAttributes() const; + const Array& GetAttributes() const; }; diff --git a/Source/Engine/Scripting/Runtime/DotNet.cpp b/Source/Engine/Scripting/Runtime/DotNet.cpp index cd4da3f20..ce5f315f1 100644 --- a/Source/Engine/Scripting/Runtime/DotNet.cpp +++ b/Source/Engine/Scripting/Runtime/DotNet.cpp @@ -212,7 +212,7 @@ MClass* GetClass(MType* typeHandle); MClass* GetOrCreateClass(MType* typeHandle); MType* GetObjectType(MObject* obj); -void* GetCustomAttribute(const Array& attributes, const MClass* attributeClass) +void* GetCustomAttribute(const Array& attributes, const MClass* attributeClass) { for (MObject* attr : attributes) { @@ -223,7 +223,7 @@ void* GetCustomAttribute(const Array& attributes, const MClass* attrib return nullptr; } -void GetCustomAttributes(Array& result, void* handle, void* getAttributesFunc) +void GetCustomAttributes(Array& result, void* handle, void* getAttributesFunc) { MObject** attributes; int numAttributes; @@ -922,6 +922,12 @@ MClass::MClass(MAssembly* parentAssembly, void* handle, const char* name, const , _namespace(parentAssembly->AllocString(namespace_)) , _fullname(parentAssembly->AllocString(fullname)) , _assembly(parentAssembly) + , _methods(&parentAssembly->Memory) + , _fields(&parentAssembly->Memory) + , _properties(&parentAssembly->Memory) + , _attributes(&parentAssembly->Memory) + , _events(&parentAssembly->Memory) + , _interfaces(&parentAssembly->Memory) , _hasCachedProperties(false) , _hasCachedFields(false) , _hasCachedMethods(false) @@ -1050,7 +1056,7 @@ MMethod* MClass::GetMethod(const char* name, int32 numParams) const return nullptr; } -const Array& MClass::GetMethods() const +const Array& MClass::GetMethods() const { if (_hasCachedMethods) return _methods; @@ -1089,7 +1095,7 @@ MField* MClass::GetField(const char* name) const return nullptr; } -const Array& MClass::GetFields() const +const Array& MClass::GetFields() const { if (_hasCachedFields) return _fields; @@ -1116,7 +1122,7 @@ const Array& MClass::GetFields() const return _fields; } -const Array& MClass::GetEvents() const +const Array& MClass::GetEvents() const { if (_hasCachedEvents) return _events; @@ -1139,7 +1145,7 @@ MProperty* MClass::GetProperty(const char* name) const return nullptr; } -const Array& MClass::GetProperties() const +const Array& MClass::GetProperties() const { if (_hasCachedProperties) return _properties; @@ -1166,7 +1172,7 @@ const Array& MClass::GetProperties() const return _properties; } -const Array& MClass::GetInterfaces() const +const Array& MClass::GetInterfaces() const { if (_hasCachedInterfaces) return _interfaces; @@ -1206,7 +1212,7 @@ MObject* MClass::GetAttribute(const MClass* klass) const return (MObject*)GetCustomAttribute(GetAttributes(), klass); } -const Array& MClass::GetAttributes() const +const Array& MClass::GetAttributes() const { if (_hasCachedAttributes) return _attributes; @@ -1239,6 +1245,7 @@ MEvent::MEvent(MClass* parentClass, void* handle, const char* name) , _hasCachedAttributes(false) , _hasAddMonoMethod(true) , _hasRemoveMonoMethod(true) + , _attributes(&parentClass->GetAssembly()->Memory) { } @@ -1267,7 +1274,7 @@ MObject* MEvent::GetAttribute(const MClass* klass) const return (MObject*)GetCustomAttribute(GetAttributes(), klass); } -const Array& MEvent::GetAttributes() const +const Array& MEvent::GetAttributes() const { if (_hasCachedAttributes) return _attributes; @@ -1313,6 +1320,7 @@ MField::MField(MClass* parentClass, void* handle, const char* name, void* type, , _parentClass(parentClass) , _name(parentClass->GetAssembly()->AllocString(name)) , _hasCachedAttributes(false) + , _attributes(&parentClass->GetAssembly()->Memory) { switch (attributes & MFieldAttributes::FieldAccessMask) { @@ -1389,7 +1397,7 @@ MObject* MField::GetAttribute(const MClass* klass) const return (MObject*)GetCustomAttribute(GetAttributes(), klass); } -const Array& MField::GetAttributes() const +const Array& MField::GetAttributes() const { if (_hasCachedAttributes) return _attributes; @@ -1410,6 +1418,7 @@ MMethod::MMethod(MClass* parentClass, StringAnsiView name, void* handle, int32 p , _name(name) , _hasCachedAttributes(false) , _hasCachedSignature(false) + , _attributes(&parentClass->GetAssembly()->Memory) { switch (attributes & MMethodAttributes::MemberAccessMask) { @@ -1555,7 +1564,7 @@ MObject* MMethod::GetAttribute(const MClass* klass) const return (MObject*)GetCustomAttribute(GetAttributes(), klass); } -const Array& MMethod::GetAttributes() const +const Array& MMethod::GetAttributes() const { if (_hasCachedAttributes) return _attributes; @@ -1584,6 +1593,7 @@ MProperty::MProperty(MClass* parentClass, const char* name, void* handle, void* , _name(parentClass->GetAssembly()->AllocString(name)) , _handle(handle) , _hasCachedAttributes(false) + , _attributes(&parentClass->GetAssembly()->Memory) { _hasGetMethod = getterHandle != nullptr; if (_hasGetMethod) @@ -1644,7 +1654,7 @@ MObject* MProperty::GetAttribute(const MClass* klass) const return (MObject*)GetCustomAttribute(GetAttributes(), klass); } -const Array& MProperty::GetAttributes() const +const Array& MProperty::GetAttributes() const { if (_hasCachedAttributes) return _attributes; diff --git a/Source/Engine/Scripting/Runtime/Mono.cpp b/Source/Engine/Scripting/Runtime/Mono.cpp index 317f4e798..86b800e97 100644 --- a/Source/Engine/Scripting/Runtime/Mono.cpp +++ b/Source/Engine/Scripting/Runtime/Mono.cpp @@ -1539,7 +1539,7 @@ MObject* MClass::GetAttribute(const MClass* klass) const return attrInfo ? mono_custom_attrs_get_attr(attrInfo, klass->GetNative()) : nullptr; } -const Array& MClass::GetAttributes() const +const Array& MClass::GetAttributes() const { if (_hasCachedAttributes) return _attributes; @@ -1662,7 +1662,7 @@ MObject* MEvent::GetAttribute(const MClass* klass) const return foundAttr; } -const Array& MEvent::GetAttributes() const +const Array& MEvent::GetAttributes() const { if (_hasCachedAttributes) return _attributes; @@ -1815,7 +1815,7 @@ MObject* MField::GetAttribute(const MClass* klass) const return foundAttr; } -const Array& MField::GetAttributes() const +const Array& MField::GetAttributes() const { if (_hasCachedAttributes) return _attributes; @@ -1988,7 +1988,7 @@ MObject* MMethod::GetAttribute(const MClass* klass) const return foundAttr; } -const Array& MMethod::GetAttributes() const +const Array& MMethod::GetAttributes() const { if (_hasCachedAttributes) return _attributes; @@ -2118,7 +2118,7 @@ MObject* MProperty::GetAttribute(const MClass* klass) const return foundAttr; } -const Array& MProperty::GetAttributes() const +const Array& MProperty::GetAttributes() const { if (_hasCachedAttributes) return _attributes; diff --git a/Source/Engine/Scripting/Runtime/None.cpp b/Source/Engine/Scripting/Runtime/None.cpp index a7c921cb0..580029eef 100644 --- a/Source/Engine/Scripting/Runtime/None.cpp +++ b/Source/Engine/Scripting/Runtime/None.cpp @@ -358,7 +358,7 @@ MMethod* MClass::GetMethod(const char* name, int32 numParams) const return nullptr; } -const Array& MClass::GetMethods() const +const Array& MClass::GetMethods() const { _hasCachedMethods = true; return _methods; @@ -369,13 +369,13 @@ MField* MClass::GetField(const char* name) const return nullptr; } -const Array& MClass::GetFields() const +const Array& MClass::GetFields() const { _hasCachedFields = true; return _fields; } -const Array& MClass::GetEvents() const +const Array& MClass::GetEvents() const { _hasCachedEvents = true; return _events; @@ -386,7 +386,7 @@ MProperty* MClass::GetProperty(const char* name) const return nullptr; } -const Array& MClass::GetProperties() const +const Array& MClass::GetProperties() const { _hasCachedProperties = true; return _properties; @@ -407,7 +407,7 @@ MObject* MClass::GetAttribute(const MClass* klass) const return nullptr; } -const Array& MClass::GetAttributes() const +const Array& MClass::GetAttributes() const { _hasCachedAttributes = true; return _attributes; @@ -449,7 +449,7 @@ MObject* MEvent::GetAttribute(const MClass* klass) const return nullptr; } -const Array& MEvent::GetAttributes() const +const Array& MEvent::GetAttributes() const { return _attributes; } @@ -501,7 +501,7 @@ MObject* MField::GetAttribute(const MClass* klass) const return nullptr; } -const Array& MField::GetAttributes() const +const Array& MField::GetAttributes() const { return _attributes; } @@ -556,7 +556,7 @@ MObject* MMethod::GetAttribute(const MClass* klass) const return nullptr; } -const Array& MMethod::GetAttributes() const +const Array& MMethod::GetAttributes() const { return _attributes; } @@ -603,7 +603,7 @@ MObject* MProperty::GetAttribute(const MClass* klass) const return nullptr; } -const Array& MProperty::GetAttributes() const +const Array& MProperty::GetAttributes() const { return _attributes; } From 98e59450f1bd0f825e070a87db8e362cad34a2fa Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 25 May 2025 17:39:51 +0200 Subject: [PATCH 013/211] Add freeing managed assembly memory on reload/unload --- Source/Engine/Core/Memory/Allocation.cpp | 7 +++++-- Source/Engine/Scripting/ManagedCLR/MCore.cpp | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Core/Memory/Allocation.cpp b/Source/Engine/Core/Memory/Allocation.cpp index 87c9dbc63..59b3a8a7e 100644 --- a/Source/Engine/Core/Memory/Allocation.cpp +++ b/Source/Engine/Core/Memory/Allocation.cpp @@ -14,6 +14,9 @@ void ArenaAllocator::Free() Allocator::Free(page); page = next; } + + // Unlink + _first = nullptr; } void* ArenaAllocator::Allocate(uint64 size, uint64 alignment) @@ -31,14 +34,14 @@ void* ArenaAllocator::Allocate(uint64 size, uint64 alignment) page->Memory = Allocator::Allocate(pageSize); page->Next = _first; page->Offset = 0; - page->Size = pageSize; + page->Size = (uint32)pageSize; _first = page; } // Allocate within a page page->Offset = Math::AlignUp(page->Offset, (uint32)alignment); void* mem = (byte*)page->Memory + page->Offset; - page->Offset += size; + page->Offset += (uint32)size; return mem; } \ No newline at end of file diff --git a/Source/Engine/Scripting/ManagedCLR/MCore.cpp b/Source/Engine/Scripting/ManagedCLR/MCore.cpp index 4ded56b52..675e09ddf 100644 --- a/Source/Engine/Scripting/ManagedCLR/MCore.cpp +++ b/Source/Engine/Scripting/ManagedCLR/MCore.cpp @@ -143,6 +143,7 @@ void MAssembly::Unload(bool isReloading) #else _classes.ClearDelete(); #endif + Memory.Free(); Unloaded(this); } From 8f9fa6995eafe270a95d6a25da16d5e22d542070 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 25 May 2025 17:40:00 +0200 Subject: [PATCH 014/211] Fix compilation issues --- Source/Engine/Platform/Apple/ApplePlatform.cpp | 13 +++++++++++++ Source/Engine/Platform/Win32/Win32Platform.cpp | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/Source/Engine/Platform/Apple/ApplePlatform.cpp b/Source/Engine/Platform/Apple/ApplePlatform.cpp index ad86ed7da..bbb08ddaf 100644 --- a/Source/Engine/Platform/Apple/ApplePlatform.cpp +++ b/Source/Engine/Platform/Apple/ApplePlatform.cpp @@ -2,6 +2,9 @@ #if PLATFORM_MAC || PLATFORM_IOS +#define PLATFORM_MAC_CACHED PLATFORM_MAC +#define PLATFORM_IOS_CACHED PLATFORM_IOS + #include "ApplePlatform.h" #include "AppleUtils.h" #include "Engine/Core/Log.h" @@ -50,6 +53,10 @@ #include #endif +// System includes break those defines +#define PLATFORM_MAC PLATFORM_MAC_CACHED +#define PLATFORM_IOS PLATFORM_IOS_CACHED + CPUInfo Cpu; String UserLocale; double SecondsPerCycle; @@ -224,10 +231,16 @@ MemoryStats ApplePlatform::GetMemoryStats() ProcessMemoryStats ApplePlatform::GetProcessMemoryStats() { ProcessMemoryStats result; +#if PLATFORM_IOS + rusage_info_current rusage_payload; + proc_pid_rusage(getpid(), RUSAGE_INFO_CURRENT, (rusage_info_t*)&rusage_payload); + result.UsedPhysicalMemory = rusage_payload.ri_phys_footprint; +#else mach_task_basic_info_data_t taskInfo; mach_msg_type_number_t count = MACH_TASK_BASIC_INFO_COUNT; task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&taskInfo, &count); result.UsedPhysicalMemory = taskInfo.resident_size; +#endif result.UsedVirtualMemory = result.UsedPhysicalMemory; return result; } diff --git a/Source/Engine/Platform/Win32/Win32Platform.cpp b/Source/Engine/Platform/Win32/Win32Platform.cpp index 6b5eaa2ff..c002986c9 100644 --- a/Source/Engine/Platform/Win32/Win32Platform.cpp +++ b/Source/Engine/Platform/Win32/Win32Platform.cpp @@ -290,7 +290,7 @@ void* Win32Platform::AllocatePages(uint64 numPages, uint64 pageSize) if (!ptr) OutOfMemory(); #if COMPILE_WITH_PROFILER - OnMemoryAlloc(ptr, size); + OnMemoryAlloc(ptr, numBytes); #endif return ptr; } From a74c5e79432e71f3e2ac0c32e13b68a6a87c7738 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 25 May 2025 18:01:30 +0200 Subject: [PATCH 015/211] Another fix for iOS build --- .../Engine/Platform/Apple/ApplePlatform.cpp | 27 ++++++++++++++++--- Source/Engine/Scripting/Runtime/DotNet.cpp | 2 +- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/Source/Engine/Platform/Apple/ApplePlatform.cpp b/Source/Engine/Platform/Apple/ApplePlatform.cpp index bbb08ddaf..c6c689195 100644 --- a/Source/Engine/Platform/Apple/ApplePlatform.cpp +++ b/Source/Engine/Platform/Apple/ApplePlatform.cpp @@ -2,8 +2,12 @@ #if PLATFORM_MAC || PLATFORM_IOS -#define PLATFORM_MAC_CACHED PLATFORM_MAC -#define PLATFORM_IOS_CACHED PLATFORM_IOS +#if PLATFORM_MAC +#define PLATFORM_MAC_CACHED 1 +#endif +#if PLATFORM_IOS +#define PLATFORM_IOS_CACHED 1 +#endif #include "ApplePlatform.h" #include "AppleUtils.h" @@ -54,8 +58,23 @@ #endif // System includes break those defines -#define PLATFORM_MAC PLATFORM_MAC_CACHED -#define PLATFORM_IOS PLATFORM_IOS_CACHED +#undef PLATFORM_MAC +#if PLATFORM_MAC_CACHED +#define PLATFORM_MAC 1 +#else +#define PLATFORM_MAC 0 +#endif +#undef PLATFORM_IOS +#if PLATFORM_IOS_CACHED +#define PLATFORM_IOS 1 +#else +#define PLATFORM_IOS 0 +#endif + +#if PLATFORM_IOS +#include +extern "C" int proc_pid_rusage(int pid, int flavor, rusage_info_t *buffer) __OSX_AVAILABLE_STARTING(__MAC_10_9, __IPHONE_7_0); +#endif CPUInfo Cpu; String UserLocale; diff --git a/Source/Engine/Scripting/Runtime/DotNet.cpp b/Source/Engine/Scripting/Runtime/DotNet.cpp index ce5f315f1..24e07859d 100644 --- a/Source/Engine/Scripting/Runtime/DotNet.cpp +++ b/Source/Engine/Scripting/Runtime/DotNet.cpp @@ -2272,7 +2272,7 @@ void ShutdownHostfxr() #endif } -void* GetStaticMethodPointer(const String& methodName) +void* GetStaticMethodPointer(StringView methodName) { void* fun; if (CachedFunctions.TryGet(methodName, fun)) From 9dc4dbc6d775aceb8e1971e4f6e03c9aed26bbba Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 25 May 2025 18:38:07 +0200 Subject: [PATCH 016/211] Add more memory profiler categories --- Source/Engine/Content/Assets/VisualScript.cpp | 15 +++++ Source/Engine/Core/Memory/Allocation.cpp | 7 +++ Source/Engine/Graphics/GPUBuffer.cpp | 20 ++++++- .../Engine/Graphics/Textures/GPUTexture.cpp | 24 +++++++- Source/Engine/Profiler/ProfilerMemory.cpp | 55 ++++++++----------- Source/Engine/Profiler/ProfilerMemory.h | 29 +++++++++- 6 files changed, 112 insertions(+), 38 deletions(-) diff --git a/Source/Engine/Content/Assets/VisualScript.cpp b/Source/Engine/Content/Assets/VisualScript.cpp index a6d83919c..2e6ffb735 100644 --- a/Source/Engine/Content/Assets/VisualScript.cpp +++ b/Source/Engine/Content/Assets/VisualScript.cpp @@ -18,6 +18,7 @@ #include "Engine/Serialization/Serialization.h" #include "Engine/Serialization/JsonWriter.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Utilities/StringConverter.h" #include "Engine/Threading/MainThreadTask.h" #include "Engine/Level/SceneObject.h" @@ -1340,6 +1341,8 @@ bool VisualScript::Save(const StringView& path) Asset::LoadResult VisualScript::load() { + PROFILE_MEM(ScriptingVisual); + // Build Visual Script typename that is based on asset id String typeName = _id.ToString(); StringUtils::ConvertUTF162ANSI(typeName.Get(), _typenameChars, 32); @@ -1532,6 +1535,7 @@ Asset::LoadResult VisualScript::load() void VisualScript::unload(bool isReloading) { + PROFILE_MEM(ScriptingVisual); #if USE_EDITOR if (isReloading) { @@ -1588,6 +1592,7 @@ AssetChunksFlag VisualScript::getChunksToPreload() const void VisualScript::CacheScriptingType() { + PROFILE_MEM(ScriptingVisual); ScopeLock lock(VisualScriptingBinaryModule::Locker); auto& binaryModule = VisualScriptingModule; @@ -1723,6 +1728,7 @@ ScriptingObject* VisualScriptingBinaryModule::VisualScriptObjectSpawn(const Scri VisualScript* visualScript = VisualScriptingModule.Scripts[params.Type.TypeIndex]; // Initialize instance data + PROFILE_MEM(ScriptingVisual); ScopeLock lock(visualScript->Locker); auto& instanceParams = visualScript->_instances[object->GetID()].Params; instanceParams.Resize(visualScript->Graph.Parameters.Count()); @@ -1747,6 +1753,8 @@ ScriptingObject* VisualScriptingBinaryModule::VisualScriptObjectSpawn(const Scri void VisualScriptingBinaryModule::OnScriptsReloading() { + PROFILE_MEM(ScriptingVisual); + // Clear any cached types from that module across all loaded Visual Scripts for (auto& script : Scripts) { @@ -1795,6 +1803,7 @@ void VisualScriptingBinaryModule::OnScriptsReloading() void VisualScriptingBinaryModule::OnEvent(ScriptingObject* object, Span parameters, ScriptingTypeHandle eventType, StringView eventName) { + PROFILE_MEM(ScriptingVisual); if (object) { // Object event @@ -1956,6 +1965,7 @@ bool VisualScriptingBinaryModule::GetFieldValue(void* field, const Variant& inst bool VisualScriptingBinaryModule::SetFieldValue(void* field, const Variant& instance, Variant& value) { + PROFILE_MEM(ScriptingVisual); const auto vsFiled = (VisualScript::Field*)field; const auto instanceObject = (ScriptingObject*)instance; if (!instanceObject) @@ -2042,6 +2052,7 @@ void VisualScriptingBinaryModule::SerializeObject(JsonWriter& stream, ScriptingO void VisualScriptingBinaryModule::DeserializeObject(ISerializable::DeserializeStream& stream, ScriptingObject* object, ISerializeModifier* modifier) { + PROFILE_MEM(ScriptingVisual); ASSERT(stream.IsObject()); Locker.Lock(); const auto asset = Scripts[object->GetTypeHandle().TypeIndex].Get(); @@ -2165,6 +2176,7 @@ const Variant& VisualScript::GetScriptInstanceParameterValue(const StringView& n void VisualScript::SetScriptInstanceParameterValue(const StringView& name, ScriptingObject* instance, const Variant& value) { + PROFILE_MEM(ScriptingVisual); CHECK(instance); for (int32 paramIndex = 0; paramIndex < Graph.Parameters.Count(); paramIndex++) { @@ -2186,6 +2198,7 @@ void VisualScript::SetScriptInstanceParameterValue(const StringView& name, Scrip void VisualScript::SetScriptInstanceParameterValue(const StringView& name, ScriptingObject* instance, Variant&& value) { + PROFILE_MEM(ScriptingVisual); CHECK(instance); for (int32 paramIndex = 0; paramIndex < Graph.Parameters.Count(); paramIndex++) { @@ -2383,6 +2396,7 @@ VisualScriptingBinaryModule* VisualScripting::GetBinaryModule() Variant VisualScripting::Invoke(VisualScript::Method* method, ScriptingObject* instance, Span parameters) { + PROFILE_MEM(ScriptingVisual); CHECK_RETURN(method && method->Script->IsLoaded(), Variant::Zero); PROFILE_CPU_SRC_LOC(method->ProfilerData); @@ -2423,6 +2437,7 @@ bool VisualScripting::Evaluate(VisualScript* script, ScriptingObject* instance, const auto box = node->GetBox(boxId); if (!box) return false; + PROFILE_MEM(ScriptingVisual); // Add to the calling stack ScopeContext scope; diff --git a/Source/Engine/Core/Memory/Allocation.cpp b/Source/Engine/Core/Memory/Allocation.cpp index 59b3a8a7e..b2e74b8b6 100644 --- a/Source/Engine/Core/Memory/Allocation.cpp +++ b/Source/Engine/Core/Memory/Allocation.cpp @@ -2,6 +2,7 @@ #include "ArenaAllocation.h" #include "../Math/Math.h" +#include "Engine/Profiler/ProfilerMemory.h" void ArenaAllocator::Free() { @@ -9,6 +10,9 @@ void ArenaAllocator::Free() Page* page = _first; while (page) { +#if COMPILE_WITH_PROFILER + ProfilerMemory::OnGroupUpdate(ProfilerMemory::Groups::MallocArena, -page->Size, -1); +#endif Allocator::Free(page->Memory); Page* next = page->Next; Allocator::Free(page); @@ -30,6 +34,9 @@ void* ArenaAllocator::Allocate(uint64 size, uint64 alignment) if (!page) { uint64 pageSize = Math::Max(_pageSize, size); +#if COMPILE_WITH_PROFILER + ProfilerMemory::OnGroupUpdate(ProfilerMemory::Groups::MallocArena, pageSize, 1); +#endif page = (Page*)Allocator::Allocate(sizeof(Page)); page->Memory = Allocator::Allocate(pageSize); page->Next = _first; diff --git a/Source/Engine/Graphics/GPUBuffer.cpp b/Source/Engine/Graphics/GPUBuffer.cpp index db7845227..f8372572e 100644 --- a/Source/Engine/Graphics/GPUBuffer.cpp +++ b/Source/Engine/Graphics/GPUBuffer.cpp @@ -244,7 +244,15 @@ bool GPUBuffer::Init(const GPUBufferDescription& desc) LOG(Warning, "Cannot initialize buffer. Description: {0}", desc.ToString()); return true; } - PROFILE_MEM_INC(GraphicsBuffers, GetMemoryUsage()); + +#if COMPILE_WITH_PROFILER + auto group = ProfilerMemory::Groups::GraphicsBuffers; + if (EnumHasAnyFlags(_desc.Flags, GPUBufferFlags::VertexBuffer)) + group = ProfilerMemory::Groups::GraphicsVertexBuffers; + else if (EnumHasAnyFlags(_desc.Flags, GPUBufferFlags::IndexBuffer)) + group = ProfilerMemory::Groups::GraphicsIndexBuffers; + ProfilerMemory::IncrementGroup(group, _memoryUsage); +#endif return false; } @@ -480,7 +488,15 @@ GPUResourceType GPUBuffer::GetResourceType() const void GPUBuffer::OnReleaseGPU() { - PROFILE_MEM_DEC(GraphicsBuffers, GetMemoryUsage()); +#if COMPILE_WITH_PROFILER + auto group = ProfilerMemory::Groups::GraphicsBuffers; + if (EnumHasAnyFlags(_desc.Flags, GPUBufferFlags::VertexBuffer)) + group = ProfilerMemory::Groups::GraphicsVertexBuffers; + else if (EnumHasAnyFlags(_desc.Flags, GPUBufferFlags::IndexBuffer)) + group = ProfilerMemory::Groups::GraphicsIndexBuffers; + ProfilerMemory::IncrementGroup(group, _memoryUsage); +#endif + _desc.Clear(); _isLocked = false; } diff --git a/Source/Engine/Graphics/Textures/GPUTexture.cpp b/Source/Engine/Graphics/Textures/GPUTexture.cpp index 6d138a40c..245fb1e01 100644 --- a/Source/Engine/Graphics/Textures/GPUTexture.cpp +++ b/Source/Engine/Graphics/Textures/GPUTexture.cpp @@ -503,7 +503,17 @@ bool GPUTexture::Init(const GPUTextureDescription& desc) LOG(Warning, "Cannot initialize texture. Description: {0}", desc.ToString()); return true; } - PROFILE_MEM_INC(GraphicsTextures, GetMemoryUsage()); + +#if COMPILE_WITH_PROFILER + auto group = ProfilerMemory::Groups::GraphicsTextures; + if (_desc.IsRenderTarget()) + group = ProfilerMemory::Groups::GraphicsRenderTargets; + else if (_desc.IsCubeMap()) + group = ProfilerMemory::Groups::GraphicsCubeMaps; + else if (_desc.IsVolume()) + group = ProfilerMemory::Groups::GraphicsVolumeTextures; + ProfilerMemory::IncrementGroup(group, _memoryUsage); +#endif // Render targets and depth buffers doesn't support normal textures streaming and are considered to be always resident if (IsRegularTexture() == false) @@ -593,7 +603,17 @@ GPUResourceType GPUTexture::GetResourceType() const void GPUTexture::OnReleaseGPU() { - PROFILE_MEM_DEC(GraphicsTextures, GetMemoryUsage()); +#if COMPILE_WITH_PROFILER + auto group = ProfilerMemory::Groups::GraphicsTextures; + if (_desc.IsRenderTarget()) + group = ProfilerMemory::Groups::GraphicsRenderTargets; + else if (_desc.IsCubeMap()) + group = ProfilerMemory::Groups::GraphicsCubeMaps; + else if (_desc.IsVolume()) + group = ProfilerMemory::Groups::GraphicsVolumeTextures; + ProfilerMemory::DecrementGroup(group, _memoryUsage); +#endif + _desc.Clear(); _residentMipLevels = 0; } diff --git a/Source/Engine/Profiler/ProfilerMemory.cpp b/Source/Engine/Profiler/ProfilerMemory.cpp index adb244f26..f05ac5f64 100644 --- a/Source/Engine/Profiler/ProfilerMemory.cpp +++ b/Source/Engine/Profiler/ProfilerMemory.cpp @@ -25,14 +25,14 @@ struct GroupNameBuffer Char Buffer[30]; template - void Set(const T* str) + void Set(const T* str, bool autoFormat = false) { int32 max = StringUtils::Length(str), dst = 0; char prev = 0; for (int32 i = 0; i < max && dst < ARRAY_COUNT(Buffer) - 2; i++) { char cur = str[i]; - if (StringUtils::IsUpper(cur) && StringUtils::IsLower(prev)) + if (autoFormat && StringUtils::IsUpper(cur) && StringUtils::IsLower(prev)) Buffer[dst++] = '/'; Buffer[dst++] = cur; prev = cur; @@ -52,10 +52,6 @@ struct GroupStackData { if (Count < ARRAY_COUNT(Stack)) Count++; - else - { - int a= 10; - } Stack[Count - 1] = (uint8)group; } @@ -112,8 +108,15 @@ namespace for (int32 i = 0; i < GROUPS_COUNT; i++) { const char* name = ScriptingEnum::GetName((ProfilerMemory::Groups)i); - GroupNames[i].Set(name); + GroupNames[i].Set(name, true); } +#define RENAME_GROUP(group, name) GroupNames[(int32)ProfilerMemory::Groups::group].Set(name) + RENAME_GROUP(GraphicsRenderTargets, "Graphics/RenderTargets"); + RENAME_GROUP(GraphicsCubeMaps, "Graphics/CubeMaps"); + RENAME_GROUP(GraphicsVolumeTextures, "Graphics/VolumeTextures"); + RENAME_GROUP(GraphicsVertexBuffers, "Graphics/VertexBuffers"); + RENAME_GROUP(GraphicsIndexBuffers, "Graphics/IndexBuffers"); +#undef RENAME_GROUP // Init constant memory PROFILE_MEM_INC(ProgramSize, Platform::GetMemoryStats().ProgramSizeMemory); @@ -162,31 +165,6 @@ namespace output.AppendLine(); } -#if 0 - // Print count of memory allocs count per group - for (int32 i = 0; i < GROUPS_COUNT; i++) - { - GroupInfo& group = groups[i]; - group.Group = (ProfilerMemory::Groups)i; - group.Size = 0; - } - PointersLocker.Lock(); - for (auto& e : Pointers) - groups[e.Value.Group].Size++; - PointersLocker.Unlock(); - Sorting::QuickSort(groups, GROUPS_COUNT); - output.Append(TEXT("Memory allocations count summary:")).AppendLine(); - for (int32 i = 0; i < maxCount; i++) - { - const GroupInfo& group = groups[i]; - if (group.Size == 0) - break; - const Char* name = GroupName[(int32)group.Group].Buffer; - output.AppendFormat(TEXT("{:>30}: {:>11}"), name, group.Size); - output.AppendLine(); - } -#endif - // Warn that data might be missing due to inactive profiler if (!ProfilerMemory::Enabled) output.AppendLine(TEXT("Detailed memory profiling is disabled. Run with command line: -mem")); @@ -243,8 +221,14 @@ void InitProfilerMemory(const Char* cmdLine) // Init hierarchy #define INIT_PARENT(parent, child) GroupParents[(int32)ProfilerMemory::Groups::child] = (uint8)ProfilerMemory::Groups::parent + INIT_PARENT(Malloc, MallocArena); INIT_PARENT(Graphics, GraphicsTextures); + INIT_PARENT(Graphics, GraphicsRenderTargets); + INIT_PARENT(Graphics, GraphicsCubeMaps); + INIT_PARENT(Graphics, GraphicsVolumeTextures); INIT_PARENT(Graphics, GraphicsBuffers); + INIT_PARENT(Graphics, GraphicsVertexBuffers); + INIT_PARENT(Graphics, GraphicsIndexBuffers); INIT_PARENT(Graphics, GraphicsMeshes); INIT_PARENT(Graphics, GraphicsShaders); INIT_PARENT(Graphics, GraphicsMaterials); @@ -414,4 +398,11 @@ void ProfilerMemory::OnMemoryFree(void* ptr) stack.SkipRecursion = false; } +void ProfilerMemory::OnGroupUpdate(Groups group, int64 sizeDelta, int64 countDetla) +{ + Platform::InterlockedAdd(&GroupMemory[(int32)group], sizeDelta); + Platform::InterlockedAdd(&GroupMemoryCount[(int32)group], countDetla); + UPDATE_PEEK(group); +} + #endif diff --git a/Source/Engine/Profiler/ProfilerMemory.h b/Source/Engine/Profiler/ProfilerMemory.h index 206814560..a9dedadad 100644 --- a/Source/Engine/Profiler/ProfilerMemory.h +++ b/Source/Engine/Profiler/ProfilerMemory.h @@ -30,19 +30,32 @@ public: TotalUntracked, // Initial memory used by program upon startup (eg. executable size, static variables). ProgramSize, - // Total memory allocated via malloc. - Malloc, // General purpose engine memory. Engine, // Profiling tool memory overhead. Profiler, + // Total memory allocated via dynamic memory allocations. + Malloc, + // Total memory allocated via arena allocators (all pages). + MallocArena, + // Total graphics memory usage. Graphics, // Total textures memory usage. GraphicsTextures, + // Total render targets memory usage (textures used as target image for rendering). + GraphicsRenderTargets, + // Total cubemap textures memory usage (each cubemap is 6 textures). + GraphicsCubeMaps, + // Total volume textures memory usage (3D textures). + GraphicsVolumeTextures, // Total buffers memory usage. GraphicsBuffers, + // Total vertex buffers memory usage. + GraphicsVertexBuffers, + // Total index buffers memory usage. + GraphicsIndexBuffers, // Total meshes memory usage (vertex and idnex buffers allocated by models). GraphicsMeshes, // Totoal shaders memory usage (shaders bytecode, PSOs data). @@ -95,6 +108,8 @@ public: // Total scripting memory allocated by game. Scripting, + // Total Visual scripting memory allocated by game (visual script graphs, data and runtime allocations). + ScriptingVisual, // Total User Interface components memory. UI, @@ -144,6 +159,15 @@ public: // Custom plugin-specific memory tracking. CustomPlugin9, + // Custom platform-specific memory tracking. + CustomPlatform0, + // Custom platform-specific memory tracking. + CustomPlatform1, + // Custom platform-specific memory tracking. + CustomPlatform2, + // Custom platform-specific memory tracking. + CustomPlatform3, + // Total editor-specific memory. Editor, @@ -219,6 +243,7 @@ public: static void OnMemoryAlloc(void* ptr, uint64 size); static void OnMemoryFree(void* ptr); + static void OnGroupUpdate(Groups group, int64 sizeDelta, int64 countDetla); public: /// From 72ee80242d812b9074f0e6b0102b5ba42a55e99c Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 26 May 2025 05:37:53 +0200 Subject: [PATCH 017/211] Add integration with Tracy profiler to plot main memory categories --- Source/Engine/Engine/Engine.cpp | 7 +- Source/Engine/Profiler/ProfilerMemory.cpp | 112 +++++++++++++++------- 2 files changed, 85 insertions(+), 34 deletions(-) diff --git a/Source/Engine/Engine/Engine.cpp b/Source/Engine/Engine/Engine.cpp index 288e0da11..17a2f377b 100644 --- a/Source/Engine/Engine/Engine.cpp +++ b/Source/Engine/Engine/Engine.cpp @@ -80,8 +80,8 @@ Window* Engine::MainWindow = nullptr; int32 Engine::Main(const Char* cmdLine) { #if COMPILE_WITH_PROFILER - extern void InitProfilerMemory(const Char*); - InitProfilerMemory(cmdLine); + extern void InitProfilerMemory(const Char* cmdLine, int32 stage); + InitProfilerMemory(cmdLine, 0); #endif PROFILE_MEM_BEGIN(Engine); EngineImpl::CommandLine = cmdLine; @@ -109,6 +109,9 @@ int32 Engine::Main(const Char* cmdLine) Platform::Fatal(TEXT("Cannot init platform.")); return -1; } +#if COMPILE_WITH_PROFILER + InitProfilerMemory(cmdLine, 1); +#endif Platform::SetHighDpiAwarenessEnabled(!CommandLine::Options.LowDPI.IsTrue()); Time::StartupTime = DateTime::Now(); diff --git a/Source/Engine/Profiler/ProfilerMemory.cpp b/Source/Engine/Profiler/ProfilerMemory.cpp index f05ac5f64..a18004fc1 100644 --- a/Source/Engine/Profiler/ProfilerMemory.cpp +++ b/Source/Engine/Profiler/ProfilerMemory.cpp @@ -14,8 +14,10 @@ #include "Engine/Scripting/Enums.h" #include "Engine/Threading/ThreadLocal.h" #include "Engine/Utilities/StringConverter.h" +#include #define GROUPS_COUNT (int32)ProfilerMemory::Groups::MAX +#define USE_TRACY_MEMORY_PLOTS (defined(TRACY_ENABLE)) static_assert(GROUPS_COUNT <= MAX_uint8, "Fix memory profiler groups to fit a single byte."); @@ -23,6 +25,7 @@ static_assert(GROUPS_COUNT <= MAX_uint8, "Fix memory profiler groups to fit a si struct GroupNameBuffer { Char Buffer[30]; + char Ansi[30]; template void Set(const T* str, bool autoFormat = false) @@ -33,11 +36,16 @@ struct GroupNameBuffer { char cur = str[i]; if (autoFormat && StringUtils::IsUpper(cur) && StringUtils::IsLower(prev)) + { + Ansi[dst] = '/'; Buffer[dst++] = '/'; + } + Ansi[dst] = cur; Buffer[dst++] = cur; prev = cur; } Buffer[dst] = 0; + Ansi[dst] = 0; } }; @@ -93,40 +101,17 @@ namespace alignas(16) volatile int64 GroupMemory[GROUPS_COUNT] = {}; alignas(16) volatile int64 GroupMemoryPeek[GROUPS_COUNT] = {}; alignas(16) volatile int64 GroupMemoryCount[GROUPS_COUNT] = {}; +#ifdef USE_TRACY_MEMORY_PLOTS + alignas(16) volatile uint32 GroupTracyPlotEnable[(GROUPS_COUNT + 31) / 32] = {}; +#endif uint8 GroupParents[GROUPS_COUNT] = {}; ThreadLocal GroupStack; GroupNameBuffer GroupNames[GROUPS_COUNT]; - bool InitedNames = false; CriticalSection PointersLocker; Dictionary Pointers; - void InitNames() - { - if (InitedNames) - return; - InitedNames = true; - for (int32 i = 0; i < GROUPS_COUNT; i++) - { - const char* name = ScriptingEnum::GetName((ProfilerMemory::Groups)i); - GroupNames[i].Set(name, true); - } -#define RENAME_GROUP(group, name) GroupNames[(int32)ProfilerMemory::Groups::group].Set(name) - RENAME_GROUP(GraphicsRenderTargets, "Graphics/RenderTargets"); - RENAME_GROUP(GraphicsCubeMaps, "Graphics/CubeMaps"); - RENAME_GROUP(GraphicsVolumeTextures, "Graphics/VolumeTextures"); - RENAME_GROUP(GraphicsVertexBuffers, "Graphics/VertexBuffers"); - RENAME_GROUP(GraphicsIndexBuffers, "Graphics/IndexBuffers"); -#undef RENAME_GROUP - - // Init constant memory - PROFILE_MEM_INC(ProgramSize, Platform::GetMemoryStats().ProgramSizeMemory); - UPDATE_PEEK(ProfilerMemory::Groups::ProgramSize); - } - void Dump(StringBuilder& output, const int32 maxCount) { - InitNames(); - // Sort groups struct GroupInfo { @@ -170,11 +155,26 @@ namespace output.AppendLine(TEXT("Detailed memory profiling is disabled. Run with command line: -mem")); } +#ifdef USE_TRACY_MEMORY_PLOTS + FORCE_INLINE void UpdateGroupTracyPlot(ProfilerMemory::Groups group) + { + // Track only selected groups in Tracy + uint32 bit = (uint32)(1 << ((int32)group & 31)); + if ((GroupTracyPlotEnable[(int32)group / 32] & bit) == bit) + { + TracyPlot(GroupNames[(int32)group].Ansi, GroupMemory[(int32)group]); + } + } +#else +#define UpdateGroupTracyPlot(group) +#endif + FORCE_INLINE void AddGroupMemory(ProfilerMemory::Groups group, int64 add) { // Group itself Platform::InterlockedAdd(&GroupMemory[(int32)group], add); Platform::InterlockedIncrement(&GroupMemoryCount[(int32)group]); + UpdateGroupTracyPlot(group); UPDATE_PEEK(group); // Total memory @@ -188,6 +188,7 @@ namespace { Platform::InterlockedAdd(&GroupMemory[parent], add); Platform::InterlockedIncrement(&GroupMemoryCount[parent]); + UpdateGroupTracyPlot((ProfilerMemory::Groups)parent); UPDATE_PEEK(parent); parent = GroupParents[parent]; } @@ -196,26 +197,37 @@ namespace FORCE_INLINE void SubGroupMemory(ProfilerMemory::Groups group, int64 add) { // Group itself - int64 value = Platform::InterlockedAdd(&GroupMemory[(int32)group], add); + Platform::InterlockedAdd(&GroupMemory[(int32)group], add); Platform::InterlockedDecrement(&GroupMemoryCount[(int32)group]); + UpdateGroupTracyPlot(group); // Total memory - value = Platform::InterlockedAdd(&GroupMemory[(int32)ProfilerMemory::Groups::TotalTracked], add); + Platform::InterlockedAdd(&GroupMemory[(int32)ProfilerMemory::Groups::TotalTracked], add); Platform::InterlockedDecrement(&GroupMemoryCount[(int32)ProfilerMemory::Groups::TotalTracked]); // Group hierarchy parents uint8 parent = GroupParents[(int32)group]; while (parent != 0) { - value = Platform::InterlockedAdd(&GroupMemory[parent], add); + Platform::InterlockedAdd(&GroupMemory[parent], add); Platform::InterlockedDecrement(&GroupMemoryCount[parent]); + UpdateGroupTracyPlot((ProfilerMemory::Groups)parent); parent = GroupParents[parent]; } } } -void InitProfilerMemory(const Char* cmdLine) +void InitProfilerMemory(const Char* cmdLine, int32 stage) { + if (stage == 1) // Post-platform init + { + // Init constant memory + PROFILE_MEM_INC(ProgramSize, Platform::GetMemoryStats().ProgramSizeMemory); + UPDATE_PEEK(ProfilerMemory::Groups::ProgramSize); + + return; + } + // Check for command line option (memory profiling affects performance thus not active by default) ProfilerMemory::Enabled = StringUtils::FindIgnoreCase(cmdLine, TEXT("-mem")); @@ -237,6 +249,44 @@ void InitProfilerMemory(const Char* cmdLine) INIT_PARENT(Content, ContentAssets); INIT_PARENT(Content, ContentFiles); #undef INIT_PARENT + + // Init group names + for (int32 i = 0; i < GROUPS_COUNT; i++) + { + const char* name = ScriptingEnum::GetName((ProfilerMemory::Groups)i); + GroupNames[i].Set(name, true); + } +#define RENAME_GROUP(group, name) GroupNames[(int32)ProfilerMemory::Groups::group].Set(name) + RENAME_GROUP(GraphicsRenderTargets, "Graphics/RenderTargets"); + RENAME_GROUP(GraphicsCubeMaps, "Graphics/CubeMaps"); + RENAME_GROUP(GraphicsVolumeTextures, "Graphics/VolumeTextures"); + RENAME_GROUP(GraphicsVertexBuffers, "Graphics/VertexBuffers"); + RENAME_GROUP(GraphicsIndexBuffers, "Graphics/IndexBuffers"); +#undef RENAME_GROUP + + // Init Tracy +#ifdef USE_TRACY_MEMORY_PLOTS + // Toggle on specific groups only for high-level overview only +#define ENABLE_GROUP(group) GroupTracyPlotEnable[(uint32)ProfilerMemory::Groups::group / 32] |= (uint32)(1 << ((int32)ProfilerMemory::Groups::group & 31)) + ENABLE_GROUP(Graphics); + ENABLE_GROUP(Audio); + ENABLE_GROUP(Content); + ENABLE_GROUP(Level); + ENABLE_GROUP(Physics); + ENABLE_GROUP(Scripting); + ENABLE_GROUP(UI); +#undef ENABLE_GROUP + + // Setup plots + for (int32 i = 0; i < GROUPS_COUNT; i++) + { + uint32 bit = (uint32)(1 << ((int32)i & 31)); + if ((GroupTracyPlotEnable[i / 32] & bit) == bit) + { + TracyPlotConfig(GroupNames[i].Ansi, tracy::PlotFormatType::Memory, false, true, 0); + } + } +#endif } void TickProfilerMemory() @@ -294,7 +344,6 @@ Array ProfilerMemory::GetGroupNames() { Array result; result.Resize((int32)Groups::MAX); - InitNames(); for (int32 i = 0; i < (int32)Groups::MAX; i++) result[i] = GroupNames[i].Buffer; return result; @@ -305,7 +354,6 @@ ProfilerMemory::GroupsArray ProfilerMemory::GetGroups(int32 mode) GroupsArray result; Platform::MemoryClear(&result, sizeof(result)); static_assert(ARRAY_COUNT(result.Values) >= (int32)Groups::MAX, "Update group array size."); - InitNames(); if (mode == 0) { for (int32 i = 0; i < (int32)Groups::MAX; i++) From ab61ed5a375cc83da470dda8e2ac6f2db60cb820 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 28 May 2025 04:03:44 +0200 Subject: [PATCH 018/211] Add more memory profiling insights and groups --- Source/Editor/Scripting/ScriptsBuilder.cpp | 3 ++ Source/Engine/Audio/AudioClip.cpp | 1 + Source/Engine/Content/JsonAsset.cpp | 1 + .../Engine/Engine/NativeInterop.Unmanaged.cs | 8 ++++ Source/Engine/Level/Level.cpp | 1 + Source/Engine/Navigation/NavCrowd.cpp | 4 ++ Source/Engine/Profiler/ProfilerMemory.cpp | 24 +++++++++++- Source/Engine/Profiler/ProfilerMemory.h | 6 +++ .../Engine/Renderer/AtmospherePreCompute.cpp | 3 ++ Source/Engine/Scripting/BinaryModule.cpp | 8 ++-- Source/Engine/Scripting/ManagedCLR/MCore.cpp | 2 +- Source/Engine/Scripting/ManagedCLR/MCore.h | 1 + Source/Engine/Scripting/Runtime/DotNet.cpp | 38 +++++++++++-------- Source/Engine/Scripting/Runtime/Mono.cpp | 5 +++ Source/Engine/Scripting/Runtime/None.cpp | 5 +++ Source/Engine/Scripting/Scripting.cpp | 1 + 16 files changed, 89 insertions(+), 22 deletions(-) diff --git a/Source/Editor/Scripting/ScriptsBuilder.cpp b/Source/Editor/Scripting/ScriptsBuilder.cpp index ea58f3240..8f33d04d7 100644 --- a/Source/Editor/Scripting/ScriptsBuilder.cpp +++ b/Source/Editor/Scripting/ScriptsBuilder.cpp @@ -664,6 +664,9 @@ bool ScriptsBuilderService::Init() void ScriptsBuilderService::Update() { + PROFILE_CPU(); + PROFILE_MEM(Editor); + // Send compilation events { ScopeLock scopeLock(_compileEventsLocker); diff --git a/Source/Engine/Audio/AudioClip.cpp b/Source/Engine/Audio/AudioClip.cpp index 2ac9d218c..2ca7f3512 100644 --- a/Source/Engine/Audio/AudioClip.cpp +++ b/Source/Engine/Audio/AudioClip.cpp @@ -19,6 +19,7 @@ REGISTER_BINARY_ASSET_WITH_UPGRADER(AudioClip, "FlaxEngine.AudioClip", AudioClip bool AudioClip::StreamingTask::Run() { + PROFILE_MEM(Audio); AssetReference ref = _asset.Get(); if (ref == nullptr || AudioBackend::Instance == nullptr) return true; diff --git a/Source/Engine/Content/JsonAsset.cpp b/Source/Engine/Content/JsonAsset.cpp index 04487eb65..1aa434c41 100644 --- a/Source/Engine/Content/JsonAsset.cpp +++ b/Source/Engine/Content/JsonAsset.cpp @@ -310,6 +310,7 @@ Asset::LoadResult JsonAssetBase::loadAsset() void JsonAssetBase::unload(bool isReloading) { + PROFILE_MEM(ContentAssets); ISerializable::SerializeDocument tmp; Document.Swap(tmp); Data = nullptr; diff --git a/Source/Engine/Engine/NativeInterop.Unmanaged.cs b/Source/Engine/Engine/NativeInterop.Unmanaged.cs index 7692f375e..7c1e2e64a 100644 --- a/Source/Engine/Engine/NativeInterop.Unmanaged.cs +++ b/Source/Engine/Engine/NativeInterop.Unmanaged.cs @@ -1277,6 +1277,14 @@ namespace FlaxEngine.Interop return GC.MaxGeneration; } + [UnmanagedCallersOnly] + internal static void GCMemoryInfo(long* totalCommitted, long* heapSize) + { + GCMemoryInfo gcMemoryInfo = GC.GetGCMemoryInfo(); + *totalCommitted = gcMemoryInfo.TotalCommittedBytes; + *heapSize = gcMemoryInfo.HeapSizeBytes; + } + [UnmanagedCallersOnly] internal static void GCWaitForPendingFinalizers() { diff --git a/Source/Engine/Level/Level.cpp b/Source/Engine/Level/Level.cpp index 5bcd98994..1233282be 100644 --- a/Source/Engine/Level/Level.cpp +++ b/Source/Engine/Level/Level.cpp @@ -813,6 +813,7 @@ bool LevelImpl::unloadScene(Scene* scene) bool LevelImpl::unloadScenes() { + PROFILE_MEM(Level); auto scenes = Level::Scenes; for (int32 i = scenes.Count() - 1; i >= 0; i--) { diff --git a/Source/Engine/Navigation/NavCrowd.cpp b/Source/Engine/Navigation/NavCrowd.cpp index cb2a4ebee..ed7f3ba7f 100644 --- a/Source/Engine/Navigation/NavCrowd.cpp +++ b/Source/Engine/Navigation/NavCrowd.cpp @@ -7,12 +7,14 @@ #include "Engine/Level/Level.h" #include "Engine/Level/Scene/Scene.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Threading/Threading.h" #include NavCrowd::NavCrowd(const SpawnParams& params) : ScriptingObject(params) { + PROFILE_MEM(Navigation); _crowd = dtAllocCrowd(); } @@ -51,6 +53,7 @@ bool NavCrowd::Init(float maxAgentRadius, int32 maxAgents, NavMeshRuntime* navMe if (!_crowd || !navMesh) return true; PROFILE_CPU(); + PROFILE_MEM(Navigation); // This can happen on game start when no navmesh is loaded yet (eg. navmesh tiles data is during streaming) so wait for navmesh if (navMesh->GetNavMesh() == nullptr) @@ -175,6 +178,7 @@ void NavCrowd::RemoveAgent(int32 id) void NavCrowd::Update(float dt) { PROFILE_CPU(); + PROFILE_MEM(Navigation); _crowd->update(Math::Max(dt, ZeroTolerance), nullptr); } diff --git a/Source/Engine/Profiler/ProfilerMemory.cpp b/Source/Engine/Profiler/ProfilerMemory.cpp index a18004fc1..61433c0e7 100644 --- a/Source/Engine/Profiler/ProfilerMemory.cpp +++ b/Source/Engine/Profiler/ProfilerMemory.cpp @@ -12,6 +12,7 @@ #include "Engine/Platform/MemoryStats.h" #include "Engine/Platform/File.h" #include "Engine/Scripting/Enums.h" +#include "Engine/Scripting/ManagedCLR/MCore.h" #include "Engine/Threading/ThreadLocal.h" #include "Engine/Utilities/StringConverter.h" #include @@ -24,8 +25,8 @@ static_assert(GROUPS_COUNT <= MAX_uint8, "Fix memory profiler groups to fit a si // Compact name storage. struct GroupNameBuffer { - Char Buffer[30]; - char Ansi[30]; + Char Buffer[40]; + char Ansi[40]; template void Set(const T* str, bool autoFormat = false) @@ -248,6 +249,10 @@ void InitProfilerMemory(const Char* cmdLine, int32 stage) INIT_PARENT(Animations, AnimationsData); INIT_PARENT(Content, ContentAssets); INIT_PARENT(Content, ContentFiles); + INIT_PARENT(Scripting, ScriptingVisual); + INIT_PARENT(Scripting, ScriptingCSharp); + INIT_PARENT(ScriptingCSharp, ScriptingCSharpGCCommitted); + INIT_PARENT(ScriptingCSharp, ScriptingCSharpGCHeap); #undef INIT_PARENT // Init group names @@ -262,6 +267,8 @@ void InitProfilerMemory(const Char* cmdLine, int32 stage) RENAME_GROUP(GraphicsVolumeTextures, "Graphics/VolumeTextures"); RENAME_GROUP(GraphicsVertexBuffers, "Graphics/VertexBuffers"); RENAME_GROUP(GraphicsIndexBuffers, "Graphics/IndexBuffers"); + RENAME_GROUP(ScriptingCSharpGCCommitted, "Scripting/CSharp/GC/Committed"); + RENAME_GROUP(ScriptingCSharpGCHeap, "Scripting/CSharp/GC/Heap"); #undef RENAME_GROUP // Init Tracy @@ -291,10 +298,23 @@ void InitProfilerMemory(const Char* cmdLine, int32 stage) void TickProfilerMemory() { + // Update .NET GC memory stats + int64 totalCommitted, heapSize; + MCore::GC::MemoryInfo(totalCommitted, heapSize); + int64 gcComittedDelta = totalCommitted - GroupMemory[(int32)ProfilerMemory::Groups::ScriptingCSharpGCCommitted]; + GroupMemory[(int32)ProfilerMemory::Groups::ScriptingCSharpGCCommitted] = totalCommitted; + GroupMemory[(int32)ProfilerMemory::Groups::ScriptingCSharpGCHeap] = heapSize; + UPDATE_PEEK(ProfilerMemory::Groups::ScriptingCSharpGCCommitted); + UPDATE_PEEK(ProfilerMemory::Groups::ScriptingCSharpGCHeap); + Platform::InterlockedAdd(&GroupMemory[(int32)ProfilerMemory::Groups::TotalTracked], gcComittedDelta); + // Update profiler memory PointersLocker.Lock(); GroupMemory[(int32)ProfilerMemory::Groups::Profiler] = sizeof(GroupMemory) + sizeof(GroupNames) + sizeof(GroupStack) + +#ifdef USE_TRACY_MEMORY_PLOTS + sizeof(GroupTracyPlotEnable) + +#endif Pointers.Capacity() * sizeof(Dictionary::Bucket); PointersLocker.Unlock(); diff --git a/Source/Engine/Profiler/ProfilerMemory.h b/Source/Engine/Profiler/ProfilerMemory.h index a9dedadad..1b1ad5ac3 100644 --- a/Source/Engine/Profiler/ProfilerMemory.h +++ b/Source/Engine/Profiler/ProfilerMemory.h @@ -110,6 +110,12 @@ public: Scripting, // Total Visual scripting memory allocated by game (visual script graphs, data and runtime allocations). ScriptingVisual, + // Total C# scripting memory allocated by game (runtime assemblies, managed interop and runtime allocations). + ScriptingCSharp, + // Total amount of committed virtual memory in use by the .NET GC, as observed during the latest garbage collection. + ScriptingCSharpGCCommitted, + // Total managed GC heap size (including fragmentation), as observed during the latest garbage collection. + ScriptingCSharpGCHeap, // Total User Interface components memory. UI, diff --git a/Source/Engine/Renderer/AtmospherePreCompute.cpp b/Source/Engine/Renderer/AtmospherePreCompute.cpp index 81cee0cb6..595ebcca5 100644 --- a/Source/Engine/Renderer/AtmospherePreCompute.cpp +++ b/Source/Engine/Renderer/AtmospherePreCompute.cpp @@ -342,6 +342,9 @@ void AtmospherePreComputeService::Update() } else if (_isUpdatePending && (_task == nullptr || !_task->Enabled)) { + PROFILE_CPU(); + PROFILE_MEM(Graphics); + // TODO: init but without a stalls, just wait for resources loaded and then start rendering // Init service diff --git a/Source/Engine/Scripting/BinaryModule.cpp b/Source/Engine/Scripting/BinaryModule.cpp index da01a9f07..ef76de61c 100644 --- a/Source/Engine/Scripting/BinaryModule.cpp +++ b/Source/Engine/Scripting/BinaryModule.cpp @@ -763,7 +763,7 @@ ManagedBinaryModule* ManagedBinaryModule::GetModule(const MAssembly* assembly) ScriptingObject* ManagedBinaryModule::ManagedObjectSpawn(const ScriptingObjectSpawnParams& params) { - PROFILE_MEM(Scripting); + PROFILE_MEM(ScriptingCSharp); // Create native object ScriptingTypeHandle managedTypeHandle = params.Type; @@ -935,7 +935,7 @@ void ManagedBinaryModule::OnLoaded(MAssembly* assembly) { #if !COMPILE_WITHOUT_CSHARP PROFILE_CPU(); - PROFILE_MEM(Scripting); + PROFILE_MEM(ScriptingCSharp); ASSERT(ClassToTypeIndex.IsEmpty()); ScopeLock lock(Locker); @@ -1032,7 +1032,7 @@ void ManagedBinaryModule::InitType(MClass* mclass) const StringAnsiView typeName = mclass->GetFullName(); if (TypeNameToTypeIndex.ContainsKey(typeName)) return; - PROFILE_MEM(Scripting); + PROFILE_MEM(ScriptingCSharp); // Find first native base C++ class of this C# class MClass* baseClass = mclass->GetBaseClass(); @@ -1192,7 +1192,7 @@ void ManagedBinaryModule::OnUnloading(MAssembly* assembly) void ManagedBinaryModule::OnUnloaded(MAssembly* assembly) { PROFILE_CPU(); - PROFILE_MEM(Scripting); + PROFILE_MEM(ScriptingCSharp); // Clear managed-only types Types.Resize(_firstManagedTypeIndex); diff --git a/Source/Engine/Scripting/ManagedCLR/MCore.cpp b/Source/Engine/Scripting/ManagedCLR/MCore.cpp index 675e09ddf..0b38730e8 100644 --- a/Source/Engine/Scripting/ManagedCLR/MCore.cpp +++ b/Source/Engine/Scripting/ManagedCLR/MCore.cpp @@ -92,7 +92,7 @@ bool MAssembly::Load(const String& assemblyPath, const StringView& nativePath) if (IsLoaded()) return false; PROFILE_CPU(); - PROFILE_MEM(Scripting); + PROFILE_MEM(ScriptingCSharp); ZoneText(*assemblyPath, assemblyPath.Length()); Stopwatch stopwatch; diff --git a/Source/Engine/Scripting/ManagedCLR/MCore.h b/Source/Engine/Scripting/ManagedCLR/MCore.h index 549dfadf6..cedebe7b8 100644 --- a/Source/Engine/Scripting/ManagedCLR/MCore.h +++ b/Source/Engine/Scripting/ManagedCLR/MCore.h @@ -122,6 +122,7 @@ public: static void Collect(int32 generation); static void Collect(int32 generation, MGCCollectionMode collectionMode, bool blocking, bool compacting); static int32 MaxGeneration(); + static void MemoryInfo(int64& totalCommitted, int64& heapSize); static void WaitForPendingFinalizers(); static void WriteRef(void* ptr, MObject* ref); static void WriteValue(void* dst, void* src, int32 count, const MClass* klass); diff --git a/Source/Engine/Scripting/Runtime/DotNet.cpp b/Source/Engine/Scripting/Runtime/DotNet.cpp index 24e07859d..5b67670e5 100644 --- a/Source/Engine/Scripting/Runtime/DotNet.cpp +++ b/Source/Engine/Scripting/Runtime/DotNet.cpp @@ -282,7 +282,7 @@ void MCore::UnloadDomain(const StringAnsi& domainName) bool MCore::LoadEngine() { PROFILE_CPU(); - PROFILE_MEM(Scripting); + PROFILE_MEM(ScriptingCSharp); // Initialize hostfxr if (InitHostfxr()) @@ -550,6 +550,12 @@ int32 MCore::GC::MaxGeneration() return maxGeneration; } +void MCore::GC::MemoryInfo(int64& totalCommitted, int64& heapSize) +{ + static void* GCMemoryInfoPtr = GetStaticMethodPointer(TEXT("GCMemoryInfo")); + CallStaticMethod(GCMemoryInfoPtr, &totalCommitted, &heapSize); +} + void MCore::GC::WaitForPendingFinalizers() { PROFILE_CPU(); @@ -737,7 +743,7 @@ const MAssembly::ClassesDictionary& MAssembly::GetClasses() const if (_hasCachedClasses || !IsLoaded()) return _classes; PROFILE_CPU(); - PROFILE_MEM(Scripting); + PROFILE_MEM(ScriptingCSharp); Stopwatch stopwatch; #if TRACY_ENABLE @@ -800,7 +806,7 @@ void GetAssemblyName(void* assemblyHandle, StringAnsi& name, StringAnsi& fullnam DEFINE_INTERNAL_CALL(void) NativeInterop_CreateClass(NativeClassDefinitions* managedClass, void* assemblyHandle) { - PROFILE_MEM(Scripting); + PROFILE_MEM(ScriptingCSharp); ScopeLock lock(BinaryModule::Locker); MAssembly* assembly = GetAssembly(assemblyHandle); if (assembly == nullptr) @@ -836,7 +842,7 @@ bool MAssembly::LoadCorlib() if (IsLoaded()) return false; PROFILE_CPU(); - PROFILE_MEM(Scripting); + PROFILE_MEM(ScriptingCSharp); #if TRACY_ENABLE const StringAnsiView name("Corlib"); ZoneText(*name, name.Length()); @@ -1060,7 +1066,7 @@ const Array& MClass::GetMethods() const { if (_hasCachedMethods) return _methods; - PROFILE_MEM(Scripting); + PROFILE_MEM(ScriptingCSharp); ScopeLock lock(BinaryModule::Locker); if (_hasCachedMethods) return _methods; @@ -1099,7 +1105,7 @@ const Array& MClass::GetFields() const { if (_hasCachedFields) return _fields; - PROFILE_MEM(Scripting); + PROFILE_MEM(ScriptingCSharp); ScopeLock lock(BinaryModule::Locker); if (_hasCachedFields) return _fields; @@ -1126,7 +1132,7 @@ const Array& MClass::GetEvents() const { if (_hasCachedEvents) return _events; - PROFILE_MEM(Scripting); + PROFILE_MEM(ScriptingCSharp); // TODO: implement MEvent in .NET @@ -1149,7 +1155,7 @@ const Array& MClass::GetProperties() const { if (_hasCachedProperties) return _properties; - PROFILE_MEM(Scripting); + PROFILE_MEM(ScriptingCSharp); ScopeLock lock(BinaryModule::Locker); if (_hasCachedProperties) return _properties; @@ -1176,7 +1182,7 @@ const Array& MClass::GetInterfaces() const { if (_hasCachedInterfaces) return _interfaces; - PROFILE_MEM(Scripting); + PROFILE_MEM(ScriptingCSharp); ScopeLock lock(BinaryModule::Locker); if (_hasCachedInterfaces) return _interfaces; @@ -1216,7 +1222,7 @@ const Array& MClass::GetAttributes() const { if (_hasCachedAttributes) return _attributes; - PROFILE_MEM(Scripting); + PROFILE_MEM(ScriptingCSharp); ScopeLock lock(BinaryModule::Locker); if (_hasCachedAttributes) return _attributes; @@ -1401,7 +1407,7 @@ const Array& MField::GetAttributes() const { if (_hasCachedAttributes) return _attributes; - PROFILE_MEM(Scripting); + PROFILE_MEM(ScriptingCSharp); ScopeLock lock(BinaryModule::Locker); if (_hasCachedAttributes) return _attributes; @@ -1467,7 +1473,7 @@ void MMethod::CacheSignature() const ScopeLock lock(BinaryModule::Locker); if (_hasCachedSignature) return; - PROFILE_MEM(Scripting); + PROFILE_MEM(ScriptingCSharp); static void* GetMethodReturnTypePtr = GetStaticMethodPointer(TEXT("GetMethodReturnType")); static void* GetMethodParameterTypesPtr = GetStaticMethodPointer(TEXT("GetMethodParameterTypes")); @@ -1568,7 +1574,7 @@ const Array& MMethod::GetAttributes() const { if (_hasCachedAttributes) return _attributes; - PROFILE_MEM(Scripting); + PROFILE_MEM(ScriptingCSharp); ScopeLock lock(BinaryModule::Locker); if (_hasCachedAttributes) return _attributes; @@ -1658,7 +1664,7 @@ const Array& MProperty::GetAttributes() const { if (_hasCachedAttributes) return _attributes; - PROFILE_MEM(Scripting); + PROFILE_MEM(ScriptingCSharp); ScopeLock lock(BinaryModule::Locker); if (_hasCachedAttributes) return _attributes; @@ -1689,7 +1695,7 @@ MClass* GetOrCreateClass(MType* typeHandle) { if (!typeHandle) return nullptr; - PROFILE_MEM(Scripting); + PROFILE_MEM(ScriptingCSharp); ScopeLock lock(BinaryModule::Locker); MClass* klass; if (!CachedClassHandles.TryGet(typeHandle, klass)) @@ -1911,6 +1917,7 @@ void* GetStaticMethodPointer(StringView methodName) if (CachedFunctions.TryGet(methodName, fun)) return fun; PROFILE_CPU(); + PROFILE_MEM(ScriptingCSharp); const int rc = get_function_pointer(NativeInteropTypeName, FLAX_CORECLR_STRING(methodName).Get(), UNMANAGEDCALLERSONLY_METHOD, nullptr, nullptr, &fun); if (rc != 0) LOG(Fatal, "Failed to get unmanaged function pointer for method '{0}': 0x{1:x}", methodName, (unsigned int)rc); @@ -2278,6 +2285,7 @@ void* GetStaticMethodPointer(StringView methodName) if (CachedFunctions.TryGet(methodName, fun)) return fun; PROFILE_CPU(); + PROFILE_MEM(ScriptingCSharp); static MonoClass* nativeInteropClass = nullptr; if (!nativeInteropClass) diff --git a/Source/Engine/Scripting/Runtime/Mono.cpp b/Source/Engine/Scripting/Runtime/Mono.cpp index 86b800e97..06392f932 100644 --- a/Source/Engine/Scripting/Runtime/Mono.cpp +++ b/Source/Engine/Scripting/Runtime/Mono.cpp @@ -864,6 +864,11 @@ int32 MCore::GC::MaxGeneration() return mono_gc_max_generation(); } +void MCore::GC::MemoryInfo(int64& totalCommitted, int64& heapSize) +{ + totalCommitted = heapSize = 0; +} + void MCore::GC::WaitForPendingFinalizers() { PROFILE_CPU(); diff --git a/Source/Engine/Scripting/Runtime/None.cpp b/Source/Engine/Scripting/Runtime/None.cpp index 580029eef..1ddaeae8e 100644 --- a/Source/Engine/Scripting/Runtime/None.cpp +++ b/Source/Engine/Scripting/Runtime/None.cpp @@ -190,6 +190,11 @@ int32 MCore::GC::MaxGeneration() return 0; } +void MCore::GC::MemoryInfo(int64& totalCommitted, int64& heapSize) +{ + totalCommitted = heapSize = 0; +} + void MCore::GC::WaitForPendingFinalizers() { } diff --git a/Source/Engine/Scripting/Scripting.cpp b/Source/Engine/Scripting/Scripting.cpp index 8c27dc09f..3a69a7601 100644 --- a/Source/Engine/Scripting/Scripting.cpp +++ b/Source/Engine/Scripting/Scripting.cpp @@ -617,6 +617,7 @@ bool Scripting::Load() void Scripting::Release() { PROFILE_CPU(); + PROFILE_MEM(Scripting); // Note: this action can be called from main thread (due to Mono problems with assemblies actions from other threads) ASSERT(IsInMainThread()); From 03d52d4eb99f0daded2d28956c1fd1381972386d Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 28 May 2025 04:05:12 +0200 Subject: [PATCH 019/211] Add support for building engine without logging --- Source/Editor/Analytics/EditorAnalytics.cpp | 4 ++ Source/Editor/Managed/ManagedEditor.cpp | 4 ++ Source/Engine/Content/Assets/VisualScript.cpp | 2 + Source/Engine/Core/Log.cpp | 3 ++ Source/Engine/Core/Log.h | 52 +++++++++++-------- Source/Engine/Core/LogContext.cpp | 2 + Source/Engine/Debug/Exception.cpp | 2 + Source/Engine/Engine/Engine.cpp | 8 +++ Source/Engine/Graphics/Graphics.cpp | 4 ++ .../Vulkan/RenderToolsVulkan.cpp | 2 + Source/Engine/Platform/Base/PlatformBase.cpp | 2 + Source/Engine/Platform/Mac/MacPlatform.cpp | 4 ++ Source/Engine/Profiler/ProfilerMemory.cpp | 2 + .../Internal/EngineInternalCalls.cpp | 4 ++ Source/Engine/Scripting/ManagedCLR/MCore.cpp | 2 + 15 files changed, 76 insertions(+), 21 deletions(-) diff --git a/Source/Editor/Analytics/EditorAnalytics.cpp b/Source/Editor/Analytics/EditorAnalytics.cpp index 385492789..ab4bf7f33 100644 --- a/Source/Editor/Analytics/EditorAnalytics.cpp +++ b/Source/Editor/Analytics/EditorAnalytics.cpp @@ -174,7 +174,9 @@ void EditorAnalytics::StartSession() // Bind events GameCooker::OnEvent.Bind(); ShadowsOfMordor::Builder::Instance()->OnBuildStarted.Bind(); +#if LOG_ENABLE Log::Logger::OnError.Bind(); +#endif } void EditorAnalytics::EndSession() @@ -187,7 +189,9 @@ void EditorAnalytics::EndSession() // Unbind events GameCooker::OnEvent.Unbind(); ShadowsOfMordor::Builder::Instance()->OnBuildStarted.Unbind(); +#if LOG_ENABLE Log::Logger::OnError.Unbind(); +#endif // End session { diff --git a/Source/Editor/Managed/ManagedEditor.cpp b/Source/Editor/Managed/ManagedEditor.cpp index d65ae6e0a..e270d08f8 100644 --- a/Source/Editor/Managed/ManagedEditor.cpp +++ b/Source/Editor/Managed/ManagedEditor.cpp @@ -156,7 +156,9 @@ ManagedEditor::ManagedEditor() lightmapsBuilder->OnBuildProgress.Bind(); lightmapsBuilder->OnBuildFinished.Bind(); CSG::Builder::OnBrushModified.Bind(); +#if LOG_ENABLE Log::Logger::OnMessage.Bind(); +#endif VisualScripting::DebugFlow.Bind(); } @@ -172,7 +174,9 @@ ManagedEditor::~ManagedEditor() lightmapsBuilder->OnBuildProgress.Unbind(); lightmapsBuilder->OnBuildFinished.Unbind(); CSG::Builder::OnBrushModified.Unbind(); +#if LOG_ENABLE Log::Logger::OnMessage.Unbind(); +#endif VisualScripting::DebugFlow.Unbind(); } diff --git a/Source/Engine/Content/Assets/VisualScript.cpp b/Source/Engine/Content/Assets/VisualScript.cpp index 2e6ffb735..329696dea 100644 --- a/Source/Engine/Content/Assets/VisualScript.cpp +++ b/Source/Engine/Content/Assets/VisualScript.cpp @@ -38,10 +38,12 @@ namespace void PrintStack(LogType type) { +#if LOG_ENABLE const String stack = VisualScripting::GetStackTrace(); Log::Logger::Write(type, TEXT("Visual Script stack trace:")); Log::Logger::Write(type, stack); Log::Logger::Write(type, TEXT("")); +#endif } bool SerializeValue(const Variant& a, const Variant& b) diff --git a/Source/Engine/Core/Log.cpp b/Source/Engine/Core/Log.cpp index 85cc3cc02..013031ced 100644 --- a/Source/Engine/Core/Log.cpp +++ b/Source/Engine/Core/Log.cpp @@ -1,6 +1,7 @@ // Copyright (c) Wojciech Figat. All rights reserved. #include "Log.h" +#if LOG_ENABLE #include "Engine/Engine/CommandLine.h" #include "Engine/Core/Types/DateTime.h" #include "Engine/Core/Collections/Array.h" @@ -310,3 +311,5 @@ const Char* ToString(LogType e) } return result; } + +#endif diff --git a/Source/Engine/Core/Log.h b/Source/Engine/Core/Log.h index 2db769e45..f09099d11 100644 --- a/Source/Engine/Core/Log.h +++ b/Source/Engine/Core/Log.h @@ -7,27 +7,6 @@ #include "Engine/Core/Types/String.h" #include "Engine/Core/Types/StringView.h" -// Enable/disable auto flush function -#define LOG_ENABLE_AUTO_FLUSH 1 - -/// -/// Sends a formatted message to the log file (message type - describes level of the log (see LogType enum)) -/// -#define LOG(messageType, format, ...) Log::Logger::Write(LogType::messageType, ::String::Format(TEXT(format), ##__VA_ARGS__)) - -/// -/// Sends a string message to the log file (message type - describes level of the log (see LogType enum)) -/// -#define LOG_STR(messageType, str) Log::Logger::Write(LogType::messageType, str) - -#if LOG_ENABLE_AUTO_FLUSH -// Noop as log is auto-flushed on write -#define LOG_FLUSH() -#else -// Flushes the log file buffer -#define LOG_FLUSH() Log::Logger::Flush() -#endif - /// /// The log message types. /// @@ -54,6 +33,29 @@ API_ENUM() enum class LogType Fatal = 8, }; +#if LOG_ENABLE + +// Enable/disable auto flush function +#define LOG_ENABLE_AUTO_FLUSH 1 + +/// +/// Sends a formatted message to the log file (message type - describes level of the log (see LogType enum)) +/// +#define LOG(messageType, format, ...) Log::Logger::Write(LogType::messageType, ::String::Format(TEXT(format), ##__VA_ARGS__)) + +/// +/// Sends a string message to the log file (message type - describes level of the log (see LogType enum)) +/// +#define LOG_STR(messageType, str) Log::Logger::Write(LogType::messageType, str) + +#if LOG_ENABLE_AUTO_FLUSH +// Noop as log is auto-flushed on write +#define LOG_FLUSH() +#else +// Flushes the log file buffer +#define LOG_FLUSH() Log::Logger::Flush() +#endif + extern const Char* ToString(LogType e); namespace Log @@ -186,3 +188,11 @@ namespace Log static void ProcessLogMessage(LogType type, const StringView& msg, fmt_flax::memory_buffer& w); }; } + +#else + +#define LOG(messageType, format, ...) +#define LOG_STR(messageType, str) +#define LOG_FLUSH() + +#endif diff --git a/Source/Engine/Core/LogContext.cpp b/Source/Engine/Core/LogContext.cpp index 8a14ad48c..5eeec5738 100644 --- a/Source/Engine/Core/LogContext.cpp +++ b/Source/Engine/Core/LogContext.cpp @@ -47,6 +47,7 @@ ThreadLocal GlobalLogContexts; void LogContext::Print(LogType verbosity) { +#if LOG_ENABLE auto& stack = GlobalLogContexts.Get(); if (stack.Count == 0) return; @@ -102,6 +103,7 @@ void LogContext::Print(LogType verbosity) // Print message Log::Logger::Write(verbosity, msg.ToStringView()); } +#endif } void LogContext::Push(const Guid& id) diff --git a/Source/Engine/Debug/Exception.cpp b/Source/Engine/Debug/Exception.cpp index d866d4867..7eb112aa1 100644 --- a/Source/Engine/Debug/Exception.cpp +++ b/Source/Engine/Debug/Exception.cpp @@ -4,6 +4,8 @@ Log::Exception::~Exception() { +#if LOG_ENABLE // Always write exception to the log Logger::Write(_level, ToString()); +#endif } diff --git a/Source/Engine/Engine/Engine.cpp b/Source/Engine/Engine/Engine.cpp index 17a2f377b..a20c1780b 100644 --- a/Source/Engine/Engine/Engine.cpp +++ b/Source/Engine/Engine/Engine.cpp @@ -150,7 +150,9 @@ int32 Engine::Main(const Char* cmdLine) { // End LOG(Warning, "Loading project cancelled. Closing..."); +#if LOG_ENABLE Log::Logger::Dispose(); +#endif return 0; } #endif @@ -168,8 +170,10 @@ int32 Engine::Main(const Char* cmdLine) #if !USE_EDITOR && (PLATFORM_WINDOWS || PLATFORM_LINUX || PLATFORM_MAC) EngineImpl::RunInBackground = PlatformSettings::Get()->RunInBackground; #endif +#if LOG_ENABLE Log::Logger::WriteFloor(); LOG_FLUSH(); +#endif Time::Synchronize(); EngineImpl::IsReady = true; PROFILE_MEM_END(); @@ -546,14 +550,17 @@ void Engine::OnExit() ProfilerGPU::Dispose(); #endif +#if LOG_ENABLE // Close logging service Log::Logger::Dispose(); +#endif Platform::Exit(); } void EngineImpl::InitLog() { +#if LOG_ENABLE // Initialize logger Log::Logger::Init(); @@ -607,6 +614,7 @@ void EngineImpl::InitLog() Platform::LogInfo(); LOG_FLUSH(); +#endif } void EngineImpl::InitPaths() diff --git a/Source/Engine/Graphics/Graphics.cpp b/Source/Engine/Graphics/Graphics.cpp index 43bd1a76d..733e8c222 100644 --- a/Source/Engine/Graphics/Graphics.cpp +++ b/Source/Engine/Graphics/Graphics.cpp @@ -101,8 +101,10 @@ bool GraphicsService::Init() PROFILE_MEM(Graphics); // Create and initialize graphics device +#if LOG_ENABLE Log::Logger::WriteFloor(); LOG(Info, "Creating Graphics Device..."); +#endif PixelFormatExtensions::Init(); GPUDevice* device = nullptr; @@ -216,7 +218,9 @@ bool GraphicsService::Init() { return true; } +#if LOG_ENABLE Log::Logger::WriteFloor(); +#endif return false; } diff --git a/Source/Engine/GraphicsDevice/Vulkan/RenderToolsVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/RenderToolsVulkan.cpp index 961799a42..d535f6ea1 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/RenderToolsVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/RenderToolsVulkan.cpp @@ -250,8 +250,10 @@ void RenderToolsVulkan::LogVkResult(VkResult result, const char* file, uint32 li errorType = FatalErrorType::GPUCrash; if (errorType != FatalErrorType::None) Platform::Fatal(msg, nullptr, errorType); +#if LOG_ENABLE else Log::Logger::Write(fatal ? LogType::Fatal : LogType::Error, msg); +#endif } bool RenderToolsVulkan::HasExtension(const Array& extensions, const char* name) diff --git a/Source/Engine/Platform/Base/PlatformBase.cpp b/Source/Engine/Platform/Base/PlatformBase.cpp index 9ba6b7bd6..7e8428e64 100644 --- a/Source/Engine/Platform/Base/PlatformBase.cpp +++ b/Source/Engine/Platform/Base/PlatformBase.cpp @@ -310,6 +310,7 @@ void PlatformBase::Fatal(const StringView& msg, void* context, FatalErrorType er Engine::RequestingExit(); // Collect crash info (platform-dependant implementation that might collect stack trace and/or create memory dump) +#if LOG_ENABLE { // Log separation for crash info LOG_FLUSH(); @@ -406,6 +407,7 @@ void PlatformBase::Fatal(const StringView& msg, void* context, FatalErrorType er LOG(Error, "Crash info collected."); Log::Logger::WriteFloor(); } +#endif // Show error message if (Engine::ReportCrash.IsBinded()) diff --git a/Source/Engine/Platform/Mac/MacPlatform.cpp b/Source/Engine/Platform/Mac/MacPlatform.cpp index fca66cf42..6cb8b1153 100644 --- a/Source/Engine/Platform/Mac/MacPlatform.cpp +++ b/Source/Engine/Platform/Mac/MacPlatform.cpp @@ -490,7 +490,9 @@ int32 MacPlatform::CreateProcess(CreateProcessSettings& settings) StringView lineView(line); if (line[line.Length() - 1] == '\n') lineView = StringView(line.Get(), line.Length() - 1); +#if LOG_ENABLE Log::Logger::Write(LogType::Info, lineView); +#endif } [[stdoutPipe fileHandleForReading] waitForDataInBackgroundAndNotify]; } @@ -517,7 +519,9 @@ int32 MacPlatform::CreateProcess(CreateProcessSettings& settings) StringView lineView(line); if (line[line.Length() - 1] == '\n') lineView = StringView(line.Get(), line.Length() - 1); +#if LOG_ENABLE Log::Logger::Write(LogType::Error, lineView); +#endif } [[stderrPipe fileHandleForReading] waitForDataInBackgroundAndNotify]; } diff --git a/Source/Engine/Profiler/ProfilerMemory.cpp b/Source/Engine/Profiler/ProfilerMemory.cpp index 61433c0e7..0aa2ba053 100644 --- a/Source/Engine/Profiler/ProfilerMemory.cpp +++ b/Source/Engine/Profiler/ProfilerMemory.cpp @@ -394,6 +394,7 @@ ProfilerMemory::GroupsArray ProfilerMemory::GetGroups(int32 mode) void ProfilerMemory::Dump(const StringView& options) { +#if LOG_ENABLE bool file = options.Contains(TEXT("file")); StringBuilder output; int32 maxCount = 20; @@ -408,6 +409,7 @@ void ProfilerMemory::Dump(const StringView& options) return; } LOG_STR(Info, output.ToStringView()); +#endif } void ProfilerMemory::OnMemoryAlloc(void* ptr, uint64 size) diff --git a/Source/Engine/Scripting/Internal/EngineInternalCalls.cpp b/Source/Engine/Scripting/Internal/EngineInternalCalls.cpp index 21323d2bd..a310409c0 100644 --- a/Source/Engine/Scripting/Internal/EngineInternalCalls.cpp +++ b/Source/Engine/Scripting/Internal/EngineInternalCalls.cpp @@ -52,13 +52,16 @@ DEFINE_INTERNAL_CALL(int32) PlatformInternal_MemoryCompare(const void* buf1, con DEFINE_INTERNAL_CALL(void) DebugLogHandlerInternal_LogWrite(LogType level, MString* msgObj) { +#if LOG_ENABLE StringView msg; MUtils::ToString(msgObj, msg); Log::Logger::Write(level, msg); +#endif } DEFINE_INTERNAL_CALL(void) DebugLogHandlerInternal_Log(LogType level, MString* msgObj, ScriptingObject* obj, MString* stackTrace) { +#if LOG_ENABLE if (msgObj == nullptr) return; @@ -71,6 +74,7 @@ DEFINE_INTERNAL_CALL(void) DebugLogHandlerInternal_Log(LogType level, MString* m // TODO: maybe option for build to threat warnings and errors as fatal errors? //const String logMessage = String::Format(TEXT("Debug:{1} {2}"), objName, *msg); Log::Logger::Write(level, msg); +#endif } DEFINE_INTERNAL_CALL(void) DebugLogHandlerInternal_LogException(MObject* exception, ScriptingObject* obj) diff --git a/Source/Engine/Scripting/ManagedCLR/MCore.cpp b/Source/Engine/Scripting/ManagedCLR/MCore.cpp index 0b38730e8..4300434e3 100644 --- a/Source/Engine/Scripting/ManagedCLR/MCore.cpp +++ b/Source/Engine/Scripting/ManagedCLR/MCore.cpp @@ -246,6 +246,7 @@ MType* MEvent::GetType() const void MException::Log(const LogType type, const Char* target) { +#if LOG_ENABLE // Log inner exceptions chain MException* inner = InnerException; while (inner) @@ -260,6 +261,7 @@ void MException::Log(const LogType type, const Char* target) const String info = target && *target ? String::Format(TEXT("Exception has been thrown during {0}."), target) : TEXT("Exception has been thrown."); Log::Logger::Write(LogType::Warning, String::Format(TEXT("{0} {1}\nStack strace:\n{2}"), info, Message, stackTrace)); Log::Logger::Write(type, String::Format(TEXT("{0}\n{1}"), info, Message)); +#endif } MType* MProperty::GetType() const From 4fe9fdded67335d4112b29fcc836acd8b1043aba Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 28 May 2025 04:10:47 +0200 Subject: [PATCH 020/211] Optimize redundant string allocation in managed binary module unload --- Source/Engine/Scripting/BinaryModule.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Source/Engine/Scripting/BinaryModule.cpp b/Source/Engine/Scripting/BinaryModule.cpp index ef76de61c..07feedf10 100644 --- a/Source/Engine/Scripting/BinaryModule.cpp +++ b/Source/Engine/Scripting/BinaryModule.cpp @@ -1184,8 +1184,7 @@ void ManagedBinaryModule::OnUnloading(MAssembly* assembly) for (int32 i = _firstManagedTypeIndex; i < Types.Count(); i++) { const ScriptingType& type = Types[i]; - const StringAnsi typeName(type.Fullname.Get(), type.Fullname.Length()); - TypeNameToTypeIndex.Remove(typeName); + TypeNameToTypeIndex.Remove(type.Fullname); } } From 8eff098850f567a48c95a6c73751499a30f6b309 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 28 May 2025 04:30:08 +0200 Subject: [PATCH 021/211] Fix Linux build --- Source/Engine/Profiler/ProfilerMemory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Engine/Profiler/ProfilerMemory.cpp b/Source/Engine/Profiler/ProfilerMemory.cpp index 0aa2ba053..dc91dbc02 100644 --- a/Source/Engine/Profiler/ProfilerMemory.cpp +++ b/Source/Engine/Profiler/ProfilerMemory.cpp @@ -163,7 +163,7 @@ namespace uint32 bit = (uint32)(1 << ((int32)group & 31)); if ((GroupTracyPlotEnable[(int32)group / 32] & bit) == bit) { - TracyPlot(GroupNames[(int32)group].Ansi, GroupMemory[(int32)group]); + TracyPlot(GroupNames[(int32)group].Ansi, (int64_t)GroupMemory[(int32)group]); } } #else From 0670c0bbd3c7bd092fc279599cbda606bc794e1d Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 5 Jun 2025 18:32:36 +0200 Subject: [PATCH 022/211] Fix compilation warnings --- Source/Engine/Core/Memory/Allocation.cpp | 6 +++--- Source/Engine/Profiler/ProfilerMemory.cpp | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Source/Engine/Core/Memory/Allocation.cpp b/Source/Engine/Core/Memory/Allocation.cpp index b2e74b8b6..c55ab1dab 100644 --- a/Source/Engine/Core/Memory/Allocation.cpp +++ b/Source/Engine/Core/Memory/Allocation.cpp @@ -11,7 +11,7 @@ void ArenaAllocator::Free() while (page) { #if COMPILE_WITH_PROFILER - ProfilerMemory::OnGroupUpdate(ProfilerMemory::Groups::MallocArena, -page->Size, -1); + ProfilerMemory::OnGroupUpdate(ProfilerMemory::Groups::MallocArena, -(int64)page->Size, -1); #endif Allocator::Free(page->Memory); Page* next = page->Next; @@ -35,7 +35,7 @@ void* ArenaAllocator::Allocate(uint64 size, uint64 alignment) { uint64 pageSize = Math::Max(_pageSize, size); #if COMPILE_WITH_PROFILER - ProfilerMemory::OnGroupUpdate(ProfilerMemory::Groups::MallocArena, pageSize, 1); + ProfilerMemory::OnGroupUpdate(ProfilerMemory::Groups::MallocArena, (int64)pageSize, 1); #endif page = (Page*)Allocator::Allocate(sizeof(Page)); page->Memory = Allocator::Allocate(pageSize); @@ -51,4 +51,4 @@ void* ArenaAllocator::Allocate(uint64 size, uint64 alignment) page->Offset += (uint32)size; return mem; -} \ No newline at end of file +} diff --git a/Source/Engine/Profiler/ProfilerMemory.cpp b/Source/Engine/Profiler/ProfilerMemory.cpp index dc91dbc02..ed1a825d5 100644 --- a/Source/Engine/Profiler/ProfilerMemory.cpp +++ b/Source/Engine/Profiler/ProfilerMemory.cpp @@ -35,7 +35,7 @@ struct GroupNameBuffer char prev = 0; for (int32 i = 0; i < max && dst < ARRAY_COUNT(Buffer) - 2; i++) { - char cur = str[i]; + char cur = (char)str[i]; if (autoFormat && StringUtils::IsUpper(cur) && StringUtils::IsLower(prev)) { Ansi[dst] = '/'; @@ -422,7 +422,7 @@ void ProfilerMemory::OnMemoryAlloc(void* ptr, uint64 size) // Register pointer PointerData ptrData; - ptrData.Size = size; + ptrData.Size = (uint32)size; ptrData.Group = (uint8)stack.Peek(); PointersLocker.Lock(); Pointers[ptr] = ptrData; From 9d8e75caa3d09902a12f27cd19665185c15c0cec Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 6 Jun 2025 11:19:32 +0200 Subject: [PATCH 023/211] Fix various code to improve quality --- Source/Engine/Core/Log.h | 9 ++++++--- Source/Engine/Engine/Engine.cpp | 5 ++--- Source/Engine/Graphics/Graphics.cpp | 8 ++------ .../DirectX/DX11/GPUDeviceDX11.cpp | 2 +- .../DirectX/DX12/GPUContextDX12.cpp | 2 +- .../GraphicsDevice/DirectX/RenderToolsDX.cpp | 6 +++++- .../Vulkan/RenderToolsVulkan.cpp | 4 +++- Source/Engine/Platform/Base/PlatformBase.cpp | 6 +++--- .../Engine/Platform/Linux/LinuxPlatform.cpp | 2 ++ Source/Engine/Platform/Mac/MacPlatform.cpp | 8 ++++---- .../Platform/Windows/WindowsPlatform.cpp | 2 ++ Source/Engine/Profiler/ProfilerMemory.cpp | 20 +++++++++---------- Source/Engine/Tests/TestMain.cpp | 4 ++-- 13 files changed, 43 insertions(+), 35 deletions(-) diff --git a/Source/Engine/Core/Log.h b/Source/Engine/Core/Log.h index f09099d11..e10fc50a0 100644 --- a/Source/Engine/Core/Log.h +++ b/Source/Engine/Core/Log.h @@ -56,6 +56,8 @@ API_ENUM() enum class LogType #define LOG_FLUSH() Log::Logger::Flush() #endif +#define LOG_FLOOR() Log::Logger::WriteFloor() + extern const Char* ToString(LogType e); namespace Log @@ -191,8 +193,9 @@ namespace Log #else -#define LOG(messageType, format, ...) -#define LOG_STR(messageType, str) -#define LOG_FLUSH() +#define LOG(messageType, format, ...) {} +#define LOG_STR(messageType, str) {} +#define LOG_FLUSH() {} +#define LOG_FLOOR() {} #endif diff --git a/Source/Engine/Engine/Engine.cpp b/Source/Engine/Engine/Engine.cpp index d9a4173c8..5607432c2 100644 --- a/Source/Engine/Engine/Engine.cpp +++ b/Source/Engine/Engine/Engine.cpp @@ -170,10 +170,8 @@ int32 Engine::Main(const Char* cmdLine) #if !USE_EDITOR && (PLATFORM_WINDOWS || PLATFORM_LINUX || PLATFORM_MAC) EngineImpl::RunInBackground = PlatformSettings::Get()->RunInBackground; #endif -#if LOG_ENABLE - Log::Logger::WriteFloor(); + LOG_FLOOR(); LOG_FLUSH(); -#endif Time::Synchronize(); EngineImpl::IsReady = true; PROFILE_MEM_END(); @@ -557,6 +555,7 @@ void Engine::OnExit() #if COMPILE_WITH_PROFILER ProfilerCPU::Dispose(); ProfilerGPU::Dispose(); + ProfilerMemory::Enabled = false; #endif #if LOG_ENABLE diff --git a/Source/Engine/Graphics/Graphics.cpp b/Source/Engine/Graphics/Graphics.cpp index 733e8c222..bf17970ea 100644 --- a/Source/Engine/Graphics/Graphics.cpp +++ b/Source/Engine/Graphics/Graphics.cpp @@ -101,10 +101,8 @@ bool GraphicsService::Init() PROFILE_MEM(Graphics); // Create and initialize graphics device -#if LOG_ENABLE - Log::Logger::WriteFloor(); + LOG_FLOOR(); LOG(Info, "Creating Graphics Device..."); -#endif PixelFormatExtensions::Init(); GPUDevice* device = nullptr; @@ -218,9 +216,7 @@ bool GraphicsService::Init() { return true; } -#if LOG_ENABLE - Log::Logger::WriteFloor(); -#endif + LOG_FLOOR(); return false; } diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp index c578fd295..411d9dd92 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp @@ -765,7 +765,7 @@ void GPUDeviceDX11::DrawEnd() { GPUDeviceDX::DrawEnd(); -#if GPU_ENABLE_DIAGNOSTICS +#if GPU_ENABLE_DIAGNOSTICS && LOG_ENABLE // Flush debug messages queue ComPtr infoQueue; VALIDATE_DIRECTX_CALL(_device->QueryInterface(IID_PPV_ARGS(&infoQueue))); diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp index dd2bc3da4..5f278a8ae 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp @@ -39,7 +39,7 @@ #include "Engine/Threading/Threading.h" #define DX12_ENABLE_RESOURCE_BARRIERS_BATCHING 1 -#define DX12_ENABLE_RESOURCE_BARRIERS_DEBUGGING 0 +#define DX12_ENABLE_RESOURCE_BARRIERS_DEBUGGING (0 && LOG_ENABLE) inline bool operator!=(const D3D12_VERTEX_BUFFER_VIEW& l, const D3D12_VERTEX_BUFFER_VIEW& r) { diff --git a/Source/Engine/GraphicsDevice/DirectX/RenderToolsDX.cpp b/Source/Engine/GraphicsDevice/DirectX/RenderToolsDX.cpp index f4bf4a3df..f195b8b41 100644 --- a/Source/Engine/GraphicsDevice/DirectX/RenderToolsDX.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/RenderToolsDX.cpp @@ -387,10 +387,14 @@ void RenderToolsDX::LogD3DResult(HRESULT result, const char* file, uint32 line, if (removedReason == DXGI_ERROR_DEVICE_HUNG) errorType = FatalErrorType::GPUHang; } + else if (fatal) + errorType = FatalErrorType::Unknown; if (errorType != FatalErrorType::None) Platform::Fatal(msg, nullptr, errorType); +#if LOG_ENABLE else - Log::Logger::Write(fatal ? LogType::Fatal : LogType::Error, msg); + Log::Logger::Write(LogType::Error, msg); +#endif } LPCSTR RenderToolsDX::GetVertexInputSemantic(VertexElement::Types type, UINT& semanticIndex) diff --git a/Source/Engine/GraphicsDevice/Vulkan/RenderToolsVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/RenderToolsVulkan.cpp index d535f6ea1..604b8a612 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/RenderToolsVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/RenderToolsVulkan.cpp @@ -248,11 +248,13 @@ void RenderToolsVulkan::LogVkResult(VkResult result, const char* file, uint32 li errorType = FatalErrorType::GPUHang; else if (result == VK_ERROR_DEVICE_LOST || result == VK_ERROR_SURFACE_LOST_KHR || result == VK_ERROR_MEMORY_MAP_FAILED) errorType = FatalErrorType::GPUCrash; + else if (fatal) + errorType = FatalErrorType::Unknown; if (errorType != FatalErrorType::None) Platform::Fatal(msg, nullptr, errorType); #if LOG_ENABLE else - Log::Logger::Write(fatal ? LogType::Fatal : LogType::Error, msg); + Log::Logger::Write(LogType::Error, msg); #endif } diff --git a/Source/Engine/Platform/Base/PlatformBase.cpp b/Source/Engine/Platform/Base/PlatformBase.cpp index 7e8428e64..0bde861c7 100644 --- a/Source/Engine/Platform/Base/PlatformBase.cpp +++ b/Source/Engine/Platform/Base/PlatformBase.cpp @@ -314,7 +314,7 @@ void PlatformBase::Fatal(const StringView& msg, void* context, FatalErrorType er { // Log separation for crash info LOG_FLUSH(); - Log::Logger::WriteFloor(); + LOG_FLOOR(); LOG(Error, ""); LOG(Error, "Critical error! Reason: {0}", msg); LOG(Error, ""); @@ -400,12 +400,12 @@ void PlatformBase::Fatal(const StringView& msg, void* context, FatalErrorType er // Capture the original log file LOG(Error, ""); - Log::Logger::WriteFloor(); + LOG_FLOOR(); LOG_FLUSH(); FileSystem::CopyFile(crashDataFolder / TEXT("Log.txt"), Log::Logger::LogFilePath); LOG(Error, "Crash info collected."); - Log::Logger::WriteFloor(); + LOG_FLOOR(); } #endif diff --git a/Source/Engine/Platform/Linux/LinuxPlatform.cpp b/Source/Engine/Platform/Linux/LinuxPlatform.cpp index 44e5a313d..4b55f8bd5 100644 --- a/Source/Engine/Platform/Linux/LinuxPlatform.cpp +++ b/Source/Engine/Platform/Linux/LinuxPlatform.cpp @@ -3029,8 +3029,10 @@ int32 LinuxPlatform::CreateProcess(CreateProcessSettings& settings) String line(lineBuffer); if (settings.SaveOutput) settings.Output.Add(line.Get(), line.Length()); +#if LOG_ENABLE if (settings.LogOutput) Log::Logger::Write(LogType::Info, line); +#endif } } int stat_loc; diff --git a/Source/Engine/Platform/Mac/MacPlatform.cpp b/Source/Engine/Platform/Mac/MacPlatform.cpp index 6cb8b1153..6c7fbf533 100644 --- a/Source/Engine/Platform/Mac/MacPlatform.cpp +++ b/Source/Engine/Platform/Mac/MacPlatform.cpp @@ -485,15 +485,15 @@ int32 MacPlatform::CreateProcess(CreateProcessSettings& settings) String line((const char*)data.bytes, data.length); if (settings.SaveOutput) settings.Output.Add(line.Get(), line.Length()); +#if LOG_ENABLE if (settings.LogOutput) { StringView lineView(line); if (line[line.Length() - 1] == '\n') lineView = StringView(line.Get(), line.Length() - 1); -#if LOG_ENABLE Log::Logger::Write(LogType::Info, lineView); -#endif } +#endif [[stdoutPipe fileHandleForReading] waitForDataInBackgroundAndNotify]; } } @@ -514,15 +514,15 @@ int32 MacPlatform::CreateProcess(CreateProcessSettings& settings) String line((const char*)data.bytes, data.length); if (settings.SaveOutput) settings.Output.Add(line.Get(), line.Length()); +#if LOG_ENABLE if (settings.LogOutput) { StringView lineView(line); if (line[line.Length() - 1] == '\n') lineView = StringView(line.Get(), line.Length() - 1); -#if LOG_ENABLE Log::Logger::Write(LogType::Error, lineView); -#endif } +#endif [[stderrPipe fileHandleForReading] waitForDataInBackgroundAndNotify]; } } diff --git a/Source/Engine/Platform/Windows/WindowsPlatform.cpp b/Source/Engine/Platform/Windows/WindowsPlatform.cpp index 277927af3..697174a47 100644 --- a/Source/Engine/Platform/Windows/WindowsPlatform.cpp +++ b/Source/Engine/Platform/Windows/WindowsPlatform.cpp @@ -1054,8 +1054,10 @@ void ReadPipe(HANDLE pipe, Array& rawData, Array& logData, LogType l int32 tmp; StringUtils::ConvertANSI2UTF16(rawData.Get(), logData.Get(), rawData.Count(), tmp); logData.Last() = '\0'; +#if LOG_ENABLE if (settings.LogOutput) Log::Logger::Write(logType, StringView(logData.Get(), rawData.Count())); +#endif if (settings.SaveOutput) settings.Output.Add(logData.Get(), rawData.Count()); } diff --git a/Source/Engine/Profiler/ProfilerMemory.cpp b/Source/Engine/Profiler/ProfilerMemory.cpp index ed1a825d5..972dd6646 100644 --- a/Source/Engine/Profiler/ProfilerMemory.cpp +++ b/Source/Engine/Profiler/ProfilerMemory.cpp @@ -310,12 +310,12 @@ void TickProfilerMemory() // Update profiler memory PointersLocker.Lock(); - GroupMemory[(int32)ProfilerMemory::Groups::Profiler] = - sizeof(GroupMemory) + sizeof(GroupNames) + sizeof(GroupStack) + + GroupMemory[(int32)ProfilerMemory::Groups::Profiler] = + sizeof(GroupMemory) + sizeof(GroupNames) + sizeof(GroupStack) + #ifdef USE_TRACY_MEMORY_PLOTS - sizeof(GroupTracyPlotEnable) + + sizeof(GroupTracyPlotEnable) + #endif - Pointers.Capacity() * sizeof(Dictionary::Bucket); + Pointers.Capacity() * sizeof(Dictionary::Bucket); PointersLocker.Unlock(); // Get total system memory and update untracked amount @@ -431,8 +431,8 @@ void ProfilerMemory::OnMemoryAlloc(void* ptr, uint64 size) // Update group memory const int64 add = (int64)size; AddGroupMemory((Groups)ptrData.Group, add); - Platform::InterlockedAdd(&GroupMemory[(int32)ProfilerMemory::Groups::Malloc], add); - Platform::InterlockedIncrement(&GroupMemoryCount[(int32)ProfilerMemory::Groups::Malloc]); + Platform::InterlockedAdd(&GroupMemory[(int32)Groups::Malloc], add); + Platform::InterlockedIncrement(&GroupMemoryCount[(int32)Groups::Malloc]); UPDATE_PEEK(ProfilerMemory::Groups::Malloc); stack.SkipRecursion = false; @@ -453,16 +453,16 @@ void ProfilerMemory::OnMemoryFree(void* ptr) bool found = it.IsNotEnd(); if (found) ptrData = it->Value; - Pointers.Remove(it); - PointersLocker.Unlock(); + Pointers.Remove(it); + PointersLocker.Unlock(); if (found) { // Update group memory const int64 add = -(int64)ptrData.Size; SubGroupMemory((Groups)ptrData.Group, add); - Platform::InterlockedAdd(&GroupMemory[(int32)ProfilerMemory::Groups::Malloc], add); - Platform::InterlockedDecrement(&GroupMemoryCount[(int32)ProfilerMemory::Groups::Malloc]); + Platform::InterlockedAdd(&GroupMemory[(int32)Groups::Malloc], add); + Platform::InterlockedDecrement(&GroupMemoryCount[(int32)Groups::Malloc]); } stack.SkipRecursion = false; diff --git a/Source/Engine/Tests/TestMain.cpp b/Source/Engine/Tests/TestMain.cpp index 88db6144c..061f1343b 100644 --- a/Source/Engine/Tests/TestMain.cpp +++ b/Source/Engine/Tests/TestMain.cpp @@ -40,14 +40,14 @@ void TestsRunnerService::Update() return; // Runs tests - Log::Logger::WriteFloor(); + LOG_FLOOR(); LOG(Info, "Running Flax Tests..."); const int result = Catch::Session().run(); if (result == 0) LOG(Info, "Flax Tests result: {0}", result); else LOG(Error, "Flax Tests result: {0}", result); - Log::Logger::WriteFloor(); + LOG_FLOOR(); Engine::RequestExit(result); } From cd637e8a7afd5ff1c92c246759fdd2df6f79a619 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 6 Jun 2025 14:38:22 +0200 Subject: [PATCH 024/211] Add more memory profiling coverage --- Source/Engine/Content/Storage/FlaxStorage.cpp | 1 + Source/Engine/Core/Delegate.h | 4 ++++ Source/Engine/Graphics/Textures/GPUTexture.cpp | 3 +++ Source/Engine/Input/Input.cpp | 9 +++++++++ Source/Engine/Profiler/ProfilerCPU.cpp | 2 ++ Source/Engine/Profiler/ProfilerGPU.cpp | 4 ++++ Source/Engine/Profiler/ProfilerMemory.cpp | 2 ++ Source/Engine/Profiler/ProfilerMemory.h | 9 +++++++-- Source/Engine/Threading/ConcurrentQueue.h | 2 ++ Source/Engine/Threading/JobSystem.cpp | 6 ++++++ Source/Engine/Threading/ThreadPool.cpp | 4 ++++ Source/Engine/Threading/ThreadRegistry.cpp | 4 +++- Source/Engine/Tools/TextureTool/TextureTool.cpp | 7 +++++++ Source/Engine/Utilities/Screenshot.cpp | 3 +++ 14 files changed, 57 insertions(+), 3 deletions(-) diff --git a/Source/Engine/Content/Storage/FlaxStorage.cpp b/Source/Engine/Content/Storage/FlaxStorage.cpp index 17daba278..ed8b623ae 100644 --- a/Source/Engine/Content/Storage/FlaxStorage.cpp +++ b/Source/Engine/Content/Storage/FlaxStorage.cpp @@ -1425,6 +1425,7 @@ bool FlaxStorage::CloseFileHandles() return false; } PROFILE_CPU(); + PROFILE_MEM(ContentFiles); // Note: this is usually called by the content manager when this file is not used or on exit // In those situations all the async tasks using this storage should be cancelled externally diff --git a/Source/Engine/Core/Delegate.h b/Source/Engine/Core/Delegate.h index b11d9b845..8efc2beec 100644 --- a/Source/Engine/Core/Delegate.h +++ b/Source/Engine/Core/Delegate.h @@ -12,6 +12,9 @@ #include "Engine/Threading/Threading.h" #include "Engine/Core/Collections/HashSet.h" #endif +#if COMPILE_WITH_PROFILER +#include "Engine/Profiler/ProfilerMemory.h" +#endif /// /// The function object that supports binding static, member and lambda functions. @@ -457,6 +460,7 @@ public: /// The function to bind. void Bind(const FunctionType& f) { + PROFILE_MEM(EngineDelegate); #if DELEGATE_USE_ATOMIC const intptr size = Platform::AtomicRead(&_size); FunctionType* bindings = (FunctionType*)Platform::AtomicRead(&_ptr); diff --git a/Source/Engine/Graphics/Textures/GPUTexture.cpp b/Source/Engine/Graphics/Textures/GPUTexture.cpp index 245fb1e01..07b85bc42 100644 --- a/Source/Engine/Graphics/Textures/GPUTexture.cpp +++ b/Source/Engine/Graphics/Textures/GPUTexture.cpp @@ -628,6 +628,7 @@ GPUTask* GPUTexture::UploadMipMapAsync(const BytesContainer& data, int32 mipInde GPUTask* GPUTexture::UploadMipMapAsync(const BytesContainer& data, int32 mipIndex, int32 rowPitch, int32 slicePitch, bool copyData) { PROFILE_CPU(); + PROFILE_MEM(GraphicsTextures); ASSERT(IsAllocated()); ASSERT(mipIndex < MipLevels() && data.IsValid()); ASSERT(data.Length() >= slicePitch); @@ -720,6 +721,7 @@ bool GPUTexture::DownloadData(TextureData& result) MISSING_CODE("support volume texture data downloading."); } PROFILE_CPU(); + PROFILE_MEM(GraphicsTextures); // Use faster path for staging resources if (IsStaging()) // TODO: what about chips with unified memory? if rendering is not active then we can access GPU memory from CPU directly (eg. mobile, integrated GPUs and some consoles) @@ -806,6 +808,7 @@ Task* GPUTexture::DownloadDataAsync(TextureData& result) MISSING_CODE("support volume texture data downloading."); } PROFILE_CPU(); + PROFILE_MEM(GraphicsTextures); // Use faster path for staging resources if (IsStaging()) diff --git a/Source/Engine/Input/Input.cpp b/Source/Engine/Input/Input.cpp index e0fc0297f..8438977b1 100644 --- a/Source/Engine/Input/Input.cpp +++ b/Source/Engine/Input/Input.cpp @@ -169,6 +169,7 @@ void Mouse::OnMouseMoved(const Float2& newPosition) void Mouse::OnMouseDown(const Float2& position, const MouseButton button, Window* target) { + PROFILE_MEM(Input); Event& e = _queue.AddOne(); e.Type = EventType::MouseDown; e.Target = target; @@ -187,6 +188,7 @@ bool Mouse::IsAnyButtonDown() const void Mouse::OnMouseUp(const Float2& position, const MouseButton button, Window* target) { + PROFILE_MEM(Input); Event& e = _queue.AddOne(); e.Type = EventType::MouseUp; e.Target = target; @@ -196,6 +198,7 @@ void Mouse::OnMouseUp(const Float2& position, const MouseButton button, Window* void Mouse::OnMouseDoubleClick(const Float2& position, const MouseButton button, Window* target) { + PROFILE_MEM(Input); Event& e = _queue.AddOne(); e.Type = EventType::MouseDoubleClick; e.Target = target; @@ -205,6 +208,7 @@ void Mouse::OnMouseDoubleClick(const Float2& position, const MouseButton button, void Mouse::OnMouseMove(const Float2& position, Window* target) { + PROFILE_MEM(Input); Event& e = _queue.AddOne(); e.Type = EventType::MouseMove; e.Target = target; @@ -213,6 +217,7 @@ void Mouse::OnMouseMove(const Float2& position, Window* target) void Mouse::OnMouseLeave(Window* target) { + PROFILE_MEM(Input); Event& e = _queue.AddOne(); e.Type = EventType::MouseLeave; e.Target = target; @@ -220,6 +225,7 @@ void Mouse::OnMouseLeave(Window* target) void Mouse::OnMouseWheel(const Float2& position, float delta, Window* target) { + PROFILE_MEM(Input); Event& e = _queue.AddOne(); e.Type = EventType::MouseWheel; e.Target = target; @@ -316,6 +322,7 @@ void Keyboard::OnCharInput(Char c, Window* target) if (c < 32) return; + PROFILE_MEM(Input); Event& e = _queue.AddOne(); e.Type = EventType::Char; e.Target = target; @@ -326,6 +333,7 @@ void Keyboard::OnKeyUp(KeyboardKeys key, Window* target) { if (key >= KeyboardKeys::MAX) return; + PROFILE_MEM(Input); Event& e = _queue.AddOne(); e.Type = EventType::KeyUp; e.Target = target; @@ -336,6 +344,7 @@ void Keyboard::OnKeyDown(KeyboardKeys key, Window* target) { if (key >= KeyboardKeys::MAX) return; + PROFILE_MEM(Input); Event& e = _queue.AddOne(); e.Type = EventType::KeyDown; e.Target = target; diff --git a/Source/Engine/Profiler/ProfilerCPU.cpp b/Source/Engine/Profiler/ProfilerCPU.cpp index 9ebc39bd5..5f5141e9e 100644 --- a/Source/Engine/Profiler/ProfilerCPU.cpp +++ b/Source/Engine/Profiler/ProfilerCPU.cpp @@ -3,6 +3,7 @@ #if COMPILE_WITH_PROFILER #include "ProfilerCPU.h" +#include "ProfilerMemory.h" #include "Engine/Engine/Globals.h" #include "Engine/Threading/ThreadRegistry.h" @@ -157,6 +158,7 @@ int32 ProfilerCPU::BeginEvent() auto thread = Thread::Current; if (thread == nullptr) { + PROFILE_MEM(Profiler); const auto id = Platform::GetCurrentThreadID(); const auto t = ThreadRegistry::GetThread(id); if (t) diff --git a/Source/Engine/Profiler/ProfilerGPU.cpp b/Source/Engine/Profiler/ProfilerGPU.cpp index 168677c1b..9330663f4 100644 --- a/Source/Engine/Profiler/ProfilerGPU.cpp +++ b/Source/Engine/Profiler/ProfilerGPU.cpp @@ -3,6 +3,7 @@ #if COMPILE_WITH_PROFILER #include "ProfilerGPU.h" +#include "ProfilerMemory.h" #include "Engine/Core/Log.h" #include "Engine/Engine/Engine.h" #include "Engine/Graphics/GPUDevice.h" @@ -45,6 +46,7 @@ void ProfilerGPU::EventBuffer::TryResolve() } // Collect queries results and free them + PROFILE_MEM(Profiler); for (int32 i = 0; i < _data.Count(); i++) { auto& e = _data[i]; @@ -58,6 +60,7 @@ void ProfilerGPU::EventBuffer::TryResolve() int32 ProfilerGPU::EventBuffer::Add(const Event& e) { + PROFILE_MEM(Profiler); const int32 index = _data.Count(); _data.Add(e); return index; @@ -88,6 +91,7 @@ GPUTimerQuery* ProfilerGPU::GetTimerQuery() } else { + PROFILE_MEM(Profiler); result = GPUDevice::Instance->CreateTimerQuery(); _timerQueriesPool.Add(result); } diff --git a/Source/Engine/Profiler/ProfilerMemory.cpp b/Source/Engine/Profiler/ProfilerMemory.cpp index 972dd6646..807f397cc 100644 --- a/Source/Engine/Profiler/ProfilerMemory.cpp +++ b/Source/Engine/Profiler/ProfilerMemory.cpp @@ -234,6 +234,8 @@ void InitProfilerMemory(const Char* cmdLine, int32 stage) // Init hierarchy #define INIT_PARENT(parent, child) GroupParents[(int32)ProfilerMemory::Groups::child] = (uint8)ProfilerMemory::Groups::parent + INIT_PARENT(Engine, EngineThreading); + INIT_PARENT(Engine, EngineDelegate); INIT_PARENT(Malloc, MallocArena); INIT_PARENT(Graphics, GraphicsTextures); INIT_PARENT(Graphics, GraphicsRenderTargets); diff --git a/Source/Engine/Profiler/ProfilerMemory.h b/Source/Engine/Profiler/ProfilerMemory.h index 1b1ad5ac3..2112a407d 100644 --- a/Source/Engine/Profiler/ProfilerMemory.h +++ b/Source/Engine/Profiler/ProfilerMemory.h @@ -30,8 +30,6 @@ public: TotalUntracked, // Initial memory used by program upon startup (eg. executable size, static variables). ProgramSize, - // General purpose engine memory. - Engine, // Profiling tool memory overhead. Profiler, @@ -40,6 +38,13 @@ public: // Total memory allocated via arena allocators (all pages). MallocArena, + // General purpose engine memory. + Engine, + // Memory used by the threads (and relevant systems such as Job System). + EngineThreading, + // Memory used by Delegate (engine events system to store all references). + EngineDelegate, + // Total graphics memory usage. Graphics, // Total textures memory usage. diff --git a/Source/Engine/Threading/ConcurrentQueue.h b/Source/Engine/Threading/ConcurrentQueue.h index ba60dcec6..79c819cc8 100644 --- a/Source/Engine/Threading/ConcurrentQueue.h +++ b/Source/Engine/Threading/ConcurrentQueue.h @@ -3,6 +3,7 @@ #pragma once #include "Engine/Core/Memory/Memory.h" +#include "Engine/Profiler/ProfilerMemory.h" #define MOODYCAMEL_EXCEPTIONS_ENABLED 0 #include @@ -17,6 +18,7 @@ struct ConcurrentQueueSettings : public moodycamel::ConcurrentQueueDefaultTraits // Use default engine memory allocator static inline void* malloc(size_t size) { + PROFILE_MEM(EngineThreading); return Allocator::Allocate((uint64)size); } diff --git a/Source/Engine/Threading/JobSystem.cpp b/Source/Engine/Threading/JobSystem.cpp index 634f1a884..612584c40 100644 --- a/Source/Engine/Threading/JobSystem.cpp +++ b/Source/Engine/Threading/JobSystem.cpp @@ -12,6 +12,7 @@ #include "Engine/Core/Collections/RingBuffer.h" #include "Engine/Engine/EngineService.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #if USE_CSHARP #include "Engine/Scripting/ManagedCLR/MCore.h" #endif @@ -118,17 +119,22 @@ void* JobSystemAllocation::Allocate(uintptr size) } } if (!result) + { + PROFILE_MEM(EngineThreading); result = Platform::Allocate(size, 16); + } return result; } void JobSystemAllocation::Free(void* ptr, uintptr size) { + PROFILE_MEM(EngineThreading); MemPool.Add({ ptr, size }); } bool JobSystemService::Init() { + PROFILE_MEM(EngineThreading); ThreadsCount = Math::Min(Platform::GetCPUInfo().LogicalProcessorCount, ARRAY_COUNT(Threads)); for (int32 i = 0; i < ThreadsCount; i++) { diff --git a/Source/Engine/Threading/ThreadPool.cpp b/Source/Engine/Threading/ThreadPool.cpp index aeb4dea98..e84aa2cdd 100644 --- a/Source/Engine/Threading/ThreadPool.cpp +++ b/Source/Engine/Threading/ThreadPool.cpp @@ -14,6 +14,7 @@ #include "Engine/Platform/ConditionVariable.h" #include "Engine/Platform/CPUInfo.h" #include "Engine/Platform/Thread.h" +#include "Engine/Profiler/ProfilerMemory.h" FLAXENGINE_API bool IsInMainThread() { @@ -36,6 +37,7 @@ String ThreadPoolTask::ToString() const void ThreadPoolTask::Enqueue() { + PROFILE_MEM(EngineThreading); ThreadPoolImpl::Jobs.Add(this); ThreadPoolImpl::JobsSignal.NotifyOne(); } @@ -58,6 +60,8 @@ ThreadPoolService ThreadPoolServiceInstance; bool ThreadPoolService::Init() { + PROFILE_MEM(EngineThreading); + // Spawn threads const int32 numThreads = Math::Clamp(Platform::GetCPUInfo().ProcessorCoreCount - 1, 2, PLATFORM_THREADS_LIMIT / 2); LOG(Info, "Spawning {0} Thread Pool workers", numThreads); diff --git a/Source/Engine/Threading/ThreadRegistry.cpp b/Source/Engine/Threading/ThreadRegistry.cpp index d793f7e56..522a5479b 100644 --- a/Source/Engine/Threading/ThreadRegistry.cpp +++ b/Source/Engine/Threading/ThreadRegistry.cpp @@ -3,10 +3,11 @@ #include "ThreadRegistry.h" #include "Engine/Core/Collections/Dictionary.h" #include "Engine/Platform/CriticalSection.h" +#include "Engine/Profiler/ProfilerMemory.h" namespace ThreadRegistryImpl { - Dictionary Registry(64); + Dictionary Registry; CriticalSection Locker; } @@ -46,6 +47,7 @@ void ThreadRegistry::KillEmAll() void ThreadRegistry::Add(Thread* thread) { + PROFILE_MEM(EngineThreading); ASSERT(thread && thread->GetID() != 0); Locker.Lock(); ASSERT(!Registry.ContainsKey(thread->GetID()) && !Registry.ContainsValue(thread)); diff --git a/Source/Engine/Tools/TextureTool/TextureTool.cpp b/Source/Engine/Tools/TextureTool/TextureTool.cpp index 75f4da774..b892385da 100644 --- a/Source/Engine/Tools/TextureTool/TextureTool.cpp +++ b/Source/Engine/Tools/TextureTool/TextureTool.cpp @@ -14,6 +14,7 @@ #include "Engine/Graphics/PixelFormatSampler.h" #include "Engine/Graphics/Textures/TextureData.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #if USE_EDITOR #include "Engine/Core/Collections/Dictionary.h" @@ -210,6 +211,7 @@ bool TextureTool::HasAlpha(const StringView& path) bool TextureTool::ImportTexture(const StringView& path, TextureData& textureData) { PROFILE_CPU(); + PROFILE_MEM(GraphicsTextures); LOG(Info, "Importing texture from \'{0}\'", path); const auto startTime = DateTime::NowUTC(); @@ -247,6 +249,7 @@ bool TextureTool::ImportTexture(const StringView& path, TextureData& textureData bool TextureTool::ImportTexture(const StringView& path, TextureData& textureData, Options options, String& errorMsg) { PROFILE_CPU(); + PROFILE_MEM(GraphicsTextures); LOG(Info, "Importing texture from \'{0}\'. Options: {1}", path, options.ToString()); const auto startTime = DateTime::NowUTC(); @@ -296,6 +299,7 @@ bool TextureTool::ImportTexture(const StringView& path, TextureData& textureData bool TextureTool::ExportTexture(const StringView& path, const TextureData& textureData) { PROFILE_CPU(); + PROFILE_MEM(GraphicsTextures); LOG(Info, "Exporting texture to \'{0}\'.", path); const auto startTime = DateTime::NowUTC(); ImageType type; @@ -346,6 +350,7 @@ bool TextureTool::Convert(TextureData& dst, const TextureData& src, const PixelF return true; } PROFILE_CPU(); + PROFILE_MEM(GraphicsTextures); #if COMPILE_WITH_DIRECTXTEX return ConvertDirectXTex(dst, src, dstFormat); @@ -375,6 +380,7 @@ bool TextureTool::Resize(TextureData& dst, const TextureData& src, int32 dstWidt return true; } PROFILE_CPU(); + PROFILE_MEM(GraphicsTextures); #if COMPILE_WITH_DIRECTXTEX return ResizeDirectXTex(dst, src, dstWidth, dstHeight); #elif COMPILE_WITH_STB @@ -488,6 +494,7 @@ bool TextureTool::GetImageType(const StringView& path, ImageType& type) bool TextureTool::Transform(TextureData& texture, const Function& transformation) { PROFILE_CPU(); + PROFILE_MEM(GraphicsTextures); auto sampler = PixelFormatSampler::Get(texture.Format); if (!sampler) return true; diff --git a/Source/Engine/Utilities/Screenshot.cpp b/Source/Engine/Utilities/Screenshot.cpp index 85abf19d8..9a453701d 100644 --- a/Source/Engine/Utilities/Screenshot.cpp +++ b/Source/Engine/Utilities/Screenshot.cpp @@ -82,6 +82,7 @@ bool CaptureScreenshot::Run() LOG(Warning, "Missing target render task."); return true; } + PROFILE_MEM(Graphics); // TODO: how about a case two or more screenshots at the same second? update counter and check files @@ -147,6 +148,7 @@ void Screenshot::Capture(GPUTexture* target, const StringView& path) LOG(Warning, "Cannot take screenshot. Graphics device is not ready."); return; } + PROFILE_MEM(Graphics); // Faster path for staging textures that contents are ready to access on a CPU if (target->IsStaging()) @@ -211,6 +213,7 @@ void Screenshot::Capture(SceneRenderTask* target, const StringView& path) LOG(Warning, "Cannot take screenshot. Graphics device is not ready."); return; } + PROFILE_MEM(Graphics); // Create tasks auto saveTask = New(target, path); From e8b60060ab5601e38e38510da8891c9faeacbbb1 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 6 Jun 2025 14:52:27 +0200 Subject: [PATCH 025/211] Fix memory profiler thread-local storage to avoid dynamic mem alloc due to recursive call --- Source/Engine/Profiler/ProfilerMemory.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/Source/Engine/Profiler/ProfilerMemory.cpp b/Source/Engine/Profiler/ProfilerMemory.cpp index 807f397cc..49cc9649f 100644 --- a/Source/Engine/Profiler/ProfilerMemory.cpp +++ b/Source/Engine/Profiler/ProfilerMemory.cpp @@ -106,7 +106,13 @@ namespace alignas(16) volatile uint32 GroupTracyPlotEnable[(GROUPS_COUNT + 31) / 32] = {}; #endif uint8 GroupParents[GROUPS_COUNT] = {}; +#if 0 ThreadLocal GroupStack; +#define GetGroupStack() GroupStack.Get(); +#else + THREADLOCAL GroupStackData GroupStack; +#define GetGroupStack() GroupStack +#endif GroupNameBuffer GroupNames[GROUPS_COUNT]; CriticalSection PointersLocker; Dictionary Pointers; @@ -347,13 +353,13 @@ void ProfilerMemory::DecrementGroup(Groups group, uint64 size) void ProfilerMemory::BeginGroup(Groups group) { - auto& stack = GroupStack.Get(); + auto& stack = GetGroupStack(); stack.Push(group); } void ProfilerMemory::EndGroup() { - auto& stack = GroupStack.Get(); + auto& stack = GetGroupStack(); stack.Pop(); } @@ -417,7 +423,7 @@ void ProfilerMemory::Dump(const StringView& options) void ProfilerMemory::OnMemoryAlloc(void* ptr, uint64 size) { ASSERT_LOW_LAYER(Enabled && ptr); - auto& stack = GroupStack.Get(); + auto& stack = GetGroupStack(); if (stack.SkipRecursion) return; stack.SkipRecursion = true; @@ -443,7 +449,7 @@ void ProfilerMemory::OnMemoryAlloc(void* ptr, uint64 size) void ProfilerMemory::OnMemoryFree(void* ptr) { ASSERT_LOW_LAYER(Enabled && ptr); - auto& stack = GroupStack.Get(); + auto& stack = GetGroupStack(); if (stack.SkipRecursion) return; stack.SkipRecursion = true; From 091f76bbf29b2bdf429e9b83c35d45c2d3197a75 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 6 Jun 2025 22:40:43 +0200 Subject: [PATCH 026/211] Add more improvements to usability of memory profiler --- Source/Editor/Windows/Profiler/Memory.cs | 2 +- Source/Engine/Foliage/Foliage.cpp | 6 ++++++ Source/Engine/Foliage/FoliageType.cpp | 5 +++++ Source/Engine/Profiler/ProfilerMemory.cpp | 19 +++++++++++-------- Source/Engine/Profiler/ProfilerMemory.h | 20 ++++++++++++-------- Source/Engine/Terrain/Terrain.cpp | 6 ++++++ Source/Engine/Terrain/TerrainManager.cpp | 8 +++++--- Source/Engine/Terrain/TerrainPatch.cpp | 17 +++++++++++++++++ 8 files changed, 63 insertions(+), 20 deletions(-) diff --git a/Source/Editor/Windows/Profiler/Memory.cs b/Source/Editor/Windows/Profiler/Memory.cs index 806bed18f..6958b828b 100644 --- a/Source/Editor/Windows/Profiler/Memory.cs +++ b/Source/Editor/Windows/Profiler/Memory.cs @@ -232,7 +232,7 @@ namespace FlaxEditor.Windows.Profiler Array.Sort(_groupOrder, (x, y) => { var tmp = _frames.Get(selectedFrame); - return (int)(tmp.Usage.Values0[y] - tmp.Usage.Values0[x]); + return tmp.Usage.Values0[y].CompareTo(tmp.Usage.Values0[x]); }); // Add rows diff --git a/Source/Engine/Foliage/Foliage.cpp b/Source/Engine/Foliage/Foliage.cpp index a1cd046ae..29771c29c 100644 --- a/Source/Engine/Foliage/Foliage.cpp +++ b/Source/Engine/Foliage/Foliage.cpp @@ -400,6 +400,7 @@ void Foliage::DrawClusterGlobalSA(GlobalSurfaceAtlasPass* globalSA, const Vector void Foliage::DrawFoliageJob(int32 i) { PROFILE_CPU(); + PROFILE_MEM(Graphics); const FoliageType& type = FoliageTypes[i]; if (type.IsReady() && type.Model->CanBeRendered()) { @@ -551,6 +552,7 @@ FoliageType* Foliage::GetFoliageType(int32 index) void Foliage::AddFoliageType(Model* model) { PROFILE_CPU(); + PROFILE_MEM(LevelFoliage); // Ensure to have unique model CHECK(model); @@ -629,6 +631,7 @@ int32 Foliage::GetFoliageTypeInstancesCount(int32 index) const void Foliage::AddInstance(const FoliageInstance& instance) { + PROFILE_MEM(LevelFoliage); ASSERT(instance.Type >= 0 && instance.Type < FoliageTypes.Count()); auto type = &FoliageTypes[instance.Type]; @@ -705,6 +708,7 @@ void Foliage::OnFoliageTypeModelLoaded(int32 index) if (_disableFoliageTypeEvents) return; PROFILE_CPU(); + PROFILE_MEM(LevelFoliage); auto& type = FoliageTypes[index]; ASSERT(type.IsReady()); @@ -803,6 +807,7 @@ void Foliage::OnFoliageTypeModelLoaded(int32 index) void Foliage::RebuildClusters() { PROFILE_CPU(); + PROFILE_MEM(LevelFoliage); // Faster path if foliage is empty or no types is ready bool anyTypeReady = false; @@ -1328,6 +1333,7 @@ void Foliage::Deserialize(DeserializeStream& stream, ISerializeModifier* modifie Actor::Deserialize(stream, modifier); PROFILE_CPU(); + PROFILE_MEM(LevelFoliage); // Clear #if FOLIAGE_USE_SINGLE_QUAD_TREE diff --git a/Source/Engine/Foliage/FoliageType.cpp b/Source/Engine/Foliage/FoliageType.cpp index 0f1893e67..8b8c84420 100644 --- a/Source/Engine/Foliage/FoliageType.cpp +++ b/Source/Engine/Foliage/FoliageType.cpp @@ -4,6 +4,7 @@ #include "Engine/Core/Collections/ArrayExtensions.h" #include "Engine/Core/Random.h" #include "Engine/Serialization/Serialization.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Foliage.h" FoliageType::FoliageType() @@ -62,6 +63,7 @@ Array FoliageType::GetMaterials() const void FoliageType::SetMaterials(const Array& value) { + PROFILE_MEM(LevelFoliage); CHECK(value.Count() == Entries.Count()); for (int32 i = 0; i < value.Count(); i++) Entries[i].Material = value[i]; @@ -114,6 +116,8 @@ void FoliageType::OnModelChanged() void FoliageType::OnModelLoaded() { + PROFILE_MEM(LevelFoliage); + // Now it's ready _isReady = 1; @@ -169,6 +173,7 @@ void FoliageType::Serialize(SerializeStream& stream, const void* otherObj) void FoliageType::Deserialize(DeserializeStream& stream, ISerializeModifier* modifier) { + PROFILE_MEM(LevelFoliage); DESERIALIZE(Model); const auto member = stream.FindMember("Materials"); diff --git a/Source/Engine/Profiler/ProfilerMemory.cpp b/Source/Engine/Profiler/ProfilerMemory.cpp index 49cc9649f..d53e48b17 100644 --- a/Source/Engine/Profiler/ProfilerMemory.cpp +++ b/Source/Engine/Profiler/ProfilerMemory.cpp @@ -21,6 +21,7 @@ #define USE_TRACY_MEMORY_PLOTS (defined(TRACY_ENABLE)) static_assert(GROUPS_COUNT <= MAX_uint8, "Fix memory profiler groups to fit a single byte."); +static_assert(sizeof(ProfilerMemory::Groups) == sizeof(uint8), "Fix memory profiler groups to fit a single byte."); // Compact name storage. struct GroupNameBuffer @@ -32,17 +33,17 @@ struct GroupNameBuffer void Set(const T* str, bool autoFormat = false) { int32 max = StringUtils::Length(str), dst = 0; - char prev = 0; + T prev = 0; for (int32 i = 0; i < max && dst < ARRAY_COUNT(Buffer) - 2; i++) { - char cur = (char)str[i]; + T cur = (T)str[i]; if (autoFormat && StringUtils::IsUpper(cur) && StringUtils::IsLower(prev)) { Ansi[dst] = '/'; Buffer[dst++] = '/'; } - Ansi[dst] = cur; - Buffer[dst++] = cur; + Ansi[dst] = (char)cur; + Buffer[dst++] = (Char)cur; prev = cur; } Buffer[dst] = 0; @@ -257,6 +258,8 @@ void InitProfilerMemory(const Char* cmdLine, int32 stage) INIT_PARENT(Animations, AnimationsData); INIT_PARENT(Content, ContentAssets); INIT_PARENT(Content, ContentFiles); + INIT_PARENT(Level, LevelFoliage); + INIT_PARENT(Level, LevelTerrain); INIT_PARENT(Scripting, ScriptingVisual); INIT_PARENT(Scripting, ScriptingCSharp); INIT_PARENT(ScriptingCSharp, ScriptingCSharpGCCommitted); @@ -403,10 +406,10 @@ ProfilerMemory::GroupsArray ProfilerMemory::GetGroups(int32 mode) void ProfilerMemory::Dump(const StringView& options) { #if LOG_ENABLE - bool file = options.Contains(TEXT("file")); + bool file = options.Contains(TEXT("file"), StringSearchCase::IgnoreCase); StringBuilder output; int32 maxCount = 20; - if (file || options.Contains(TEXT("all"))) + if (file || options.Contains(TEXT("all"), StringSearchCase::IgnoreCase)) maxCount = MAX_int32; ::Dump(output, maxCount); if (file) @@ -476,10 +479,10 @@ void ProfilerMemory::OnMemoryFree(void* ptr) stack.SkipRecursion = false; } -void ProfilerMemory::OnGroupUpdate(Groups group, int64 sizeDelta, int64 countDetla) +void ProfilerMemory::OnGroupUpdate(Groups group, int64 sizeDelta, int64 countDelta) { Platform::InterlockedAdd(&GroupMemory[(int32)group], sizeDelta); - Platform::InterlockedAdd(&GroupMemoryCount[(int32)group], countDetla); + Platform::InterlockedAdd(&GroupMemoryCount[(int32)group], countDelta); UPDATE_PEEK(group); } diff --git a/Source/Engine/Profiler/ProfilerMemory.h b/Source/Engine/Profiler/ProfilerMemory.h index 2112a407d..e42b8720e 100644 --- a/Source/Engine/Profiler/ProfilerMemory.h +++ b/Source/Engine/Profiler/ProfilerMemory.h @@ -61,7 +61,7 @@ public: GraphicsVertexBuffers, // Total index buffers memory usage. GraphicsIndexBuffers, - // Total meshes memory usage (vertex and idnex buffers allocated by models). + // Total meshes memory usage (vertex and index buffers allocated by models). GraphicsMeshes, // Totoal shaders memory usage (shaders bytecode, PSOs data). GraphicsShaders, @@ -78,7 +78,7 @@ public: // Total animation data memory usage (curves, events, keyframes, graphs, etc.). AnimationsData, - // Total autio system memory. + // Total audio system memory. Audio, // Total content system memory usage. @@ -90,11 +90,15 @@ public: // Total memory used by content streaming system (internals). ContentStreaming, - // Total memory allocated by input system. - Input, - // Total memory allocated by scene objects. Level, + // Total memory allocated by the foliage system (quad-tree, foliage instances data). Excluding foliage models data. + LevelFoliage, + // Total memory allocated by the terrain system (patches). + LevelTerrain, + + // Total memory allocated by input system. + Input, // Total localization system memory. Localization, @@ -148,7 +152,7 @@ public: CustomGame8, // Custom game-specific memory tracking. CustomGame9, - + // Custom plugin-specific memory tracking. CustomPlugin0, // Custom plugin-specific memory tracking. @@ -186,7 +190,7 @@ public: }; /// - /// The memory groups array wraper to avoid dynamic memory allocation. + /// The memory groups array wrapper to avoid dynamic memory allocation. /// API_STRUCT(NoDefault) struct GroupsArray { @@ -254,7 +258,7 @@ public: static void OnMemoryAlloc(void* ptr, uint64 size); static void OnMemoryFree(void* ptr); - static void OnGroupUpdate(Groups group, int64 sizeDelta, int64 countDetla); + static void OnGroupUpdate(Groups group, int64 sizeDelta, int64 countDelta); public: /// diff --git a/Source/Engine/Terrain/Terrain.cpp b/Source/Engine/Terrain/Terrain.cpp index ebbfd70e6..85274b8fb 100644 --- a/Source/Engine/Terrain/Terrain.cpp +++ b/Source/Engine/Terrain/Terrain.cpp @@ -16,6 +16,7 @@ #include "Engine/Graphics/Textures/GPUTexture.h" #include "Engine/Level/Scene/Scene.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Renderer/GlobalSignDistanceFieldPass.h" #include "Engine/Renderer/GI/GlobalSurfaceAtlasPass.h" @@ -290,6 +291,7 @@ void Terrain::SetCollisionLOD(int32 value) void Terrain::SetPhysicalMaterials(const Array, FixedAllocation<8>>& value) { + PROFILE_MEM(LevelTerrain); _physicalMaterials = value; _physicalMaterials.Resize(8); JsonAsset* materials[8]; @@ -431,6 +433,7 @@ void Terrain::Setup(int32 lodCount, int32 chunkSize) void Terrain::AddPatches(const Int2& numberOfPatches) { + PROFILE_MEM(LevelTerrain); if (_chunkSize == 0) Setup(); _patches.ClearDelete(); @@ -470,6 +473,7 @@ void Terrain::AddPatch(const Int2& patchCoord) LOG(Warning, "Cannot add patch at {0}x{1}. The patch at the given location already exists.", patchCoord.X, patchCoord.Y); return; } + PROFILE_MEM(LevelTerrain); if (_chunkSize == 0) Setup(); @@ -726,6 +730,8 @@ void Terrain::Serialize(SerializeStream& stream, const void* otherObj) void Terrain::Deserialize(DeserializeStream& stream, ISerializeModifier* modifier) { + PROFILE_MEM(LevelTerrain); + // Base Actor::Deserialize(stream, modifier); diff --git a/Source/Engine/Terrain/TerrainManager.cpp b/Source/Engine/Terrain/TerrainManager.cpp index 594cdd39b..6da020079 100644 --- a/Source/Engine/Terrain/TerrainManager.cpp +++ b/Source/Engine/Terrain/TerrainManager.cpp @@ -5,16 +5,17 @@ #include "Engine/Threading/Threading.h" #include "Engine/Graphics/GPUDevice.h" #include "Engine/Graphics/GPUBuffer.h" +#include "Engine/Graphics/Shaders/GPUVertexLayout.h" +#include "Engine/Core/Log.h" #include "Engine/Core/Math/Color32.h" #include "Engine/Core/Collections/ChunkedArray.h" #include "Engine/Core/Collections/Dictionary.h" -#include "Engine/Content/Content.h" #include "Engine/Engine/EngineService.h" +#include "Engine/Content/Content.h" #include "Engine/Content/Assets/MaterialBase.h" #include "Engine/Content/AssetReference.h" -#include "Engine/Core/Log.h" -#include "Engine/Graphics/Shaders/GPUVertexLayout.h" #include "Engine/Renderer/DrawCall.h" +#include "Engine/Profiler/ProfilerMemory.h" // Must match structure defined in Terrain.shader struct TerrainVertex @@ -94,6 +95,7 @@ bool TerrainManager::GetChunkGeometry(DrawCall& drawCall, int32 chunkSize, int32 data->GetChunkGeometry(drawCall); return false; } + PROFILE_MEM(LevelTerrain); // Prepare const int32 vertexCount = (chunkSize + 1) >> lodIndex; diff --git a/Source/Engine/Terrain/TerrainPatch.cpp b/Source/Engine/Terrain/TerrainPatch.cpp index 1c754d843..65cdace25 100644 --- a/Source/Engine/Terrain/TerrainPatch.cpp +++ b/Source/Engine/Terrain/TerrainPatch.cpp @@ -6,6 +6,7 @@ #include "Engine/Core/Log.h" #include "Engine/Core/Math/Color32.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Physics/Physics.h" #include "Engine/Physics/PhysicsScene.h" #include "Engine/Physics/PhysicsBackend.h" @@ -66,6 +67,7 @@ TerrainPatch::TerrainPatch(const SpawnParams& params) void TerrainPatch::Init(Terrain* terrain, int16 x, int16 z) { + PROFILE_MEM(LevelTerrain); ScopeLock lock(_collisionLocker); _terrain = terrain; @@ -823,6 +825,7 @@ bool ModifyCollision(TerrainDataUpdateInfo& info, TextureBase::InitData* initDat bool TerrainPatch::SetupHeightMap(int32 heightMapLength, const float* heightMap, const byte* holesMask, bool forceUseVirtualStorage) { PROFILE_CPU_NAMED("Terrain.Setup"); + PROFILE_MEM(LevelTerrain); if (heightMap == nullptr) { LOG(Warning, "Cannot create terrain without a heightmap specified."); @@ -1034,6 +1037,7 @@ bool TerrainPatch::SetupHeightMap(int32 heightMapLength, const float* heightMap, bool TerrainPatch::SetupSplatMap(int32 index, int32 splatMapLength, const Color32* splatMap, bool forceUseVirtualStorage) { PROFILE_CPU_NAMED("Terrain.SetupSplatMap"); + PROFILE_MEM(LevelTerrain); CHECK_RETURN(index >= 0 && index < TERRAIN_MAX_SPLATMAPS_COUNT, true); if (splatMap == nullptr) { @@ -1182,6 +1186,7 @@ bool TerrainPatch::SetupSplatMap(int32 index, int32 splatMapLength, const Color3 bool TerrainPatch::InitializeHeightMap() { PROFILE_CPU_NAMED("Terrain.InitializeHeightMap"); + PROFILE_MEM(LevelTerrain); const auto heightmapSize = _terrain->GetChunkSize() * Terrain::ChunksCountEdge + 1; Array heightmap; heightmap.Resize(heightmapSize * heightmapSize); @@ -1248,6 +1253,7 @@ void TerrainPatch::ClearCache() void TerrainPatch::CacheHeightData() { PROFILE_CPU_NAMED("Terrain.CacheHeightData"); + PROFILE_MEM(LevelTerrain); const TerrainDataUpdateInfo info(this); // Ensure that heightmap data is all loaded @@ -1313,6 +1319,7 @@ void TerrainPatch::CacheHeightData() void TerrainPatch::CacheSplatData() { PROFILE_CPU_NAMED("Terrain.CacheSplatData"); + PROFILE_MEM(LevelTerrain); const TerrainDataUpdateInfo info(this); // Cache all the splatmaps @@ -1396,6 +1403,7 @@ bool TerrainPatch::ModifyHeightMap(const float* samples, const Int2& modifiedOff return true; } PROFILE_CPU_NAMED("Terrain.ModifyHeightMap"); + PROFILE_MEM(LevelTerrain); // Check if has no heightmap if (Heightmap == nullptr) @@ -1490,6 +1498,7 @@ bool TerrainPatch::ModifyHolesMask(const byte* samples, const Int2& modifiedOffs return true; } PROFILE_CPU_NAMED("Terrain.ModifyHolesMask"); + PROFILE_MEM(LevelTerrain); // Check if has no heightmap if (Heightmap == nullptr) @@ -1567,6 +1576,7 @@ bool TerrainPatch::ModifySplatMap(int32 index, const Color32* samples, const Int return true; } PROFILE_CPU_NAMED("Terrain.ModifySplatMap"); + PROFILE_MEM(LevelTerrain); // Get the current data to modify it Color32* splatMap = GetSplatMapData(index); @@ -1738,6 +1748,7 @@ bool TerrainPatch::ModifySplatMap(int32 index, const Color32* samples, const Int bool TerrainPatch::UpdateHeightData(TerrainDataUpdateInfo& info, const Int2& modifiedOffset, const Int2& modifiedSize, bool wasHeightRangeChanged, bool wasHeightChanged) { PROFILE_CPU(); + PROFILE_MEM(LevelTerrain); float* heightMap = GetHeightmapData(); byte* holesMask = GetHolesMaskData(); ASSERT(heightMap && holesMask); @@ -2126,6 +2137,7 @@ void TerrainPatch::UpdatePostManualDeserialization() void TerrainPatch::CreateCollision() { PROFILE_CPU(); + PROFILE_MEM(LevelTerrain); ASSERT(!HasCollision()); if (CreateHeightField()) return; @@ -2241,6 +2253,7 @@ void TerrainPatch::DestroyCollision() void TerrainPatch::CacheDebugLines() { PROFILE_CPU(); + PROFILE_MEM(LevelTerrain); ASSERT(_physicsHeightField); _debugLinesDirty = false; if (!_debugLines) @@ -2322,6 +2335,7 @@ void TerrainPatch::DrawPhysicsDebug(RenderView& view) const BoundingBox bounds(_bounds.Minimum - view.Origin, _bounds.Maximum - view.Origin); if (!_physicsShape || !view.CullingFrustum.Intersects(bounds)) return; + PROFILE_MEM(LevelTerrain); if (view.Mode == ViewMode::PhysicsColliders) { const auto& triangles = GetCollisionTriangles(); @@ -2378,6 +2392,7 @@ const Array& TerrainPatch::GetCollisionTriangles() if (!_physicsShape || _collisionTriangles.HasItems()) return _collisionTriangles; PROFILE_CPU(); + PROFILE_MEM(LevelTerrain); int32 rows, cols; PhysicsBackend::GetHeightFieldSize(_physicsHeightField, rows, cols); @@ -2428,6 +2443,7 @@ const Array& TerrainPatch::GetCollisionTriangles() void TerrainPatch::GetCollisionTriangles(const BoundingSphere& bounds, Array& result) { PROFILE_CPU(); + PROFILE_MEM(LevelTerrain); result.Clear(); // Skip if no intersection with patch @@ -2525,6 +2541,7 @@ void TerrainPatch::GetCollisionTriangles(const BoundingSphere& bounds, Array& vertexBuffer, Array& indexBuffer) { PROFILE_CPU(); + PROFILE_MEM(LevelTerrain); vertexBuffer.Clear(); indexBuffer.Clear(); From d95cd2f0be9ddf341bbaeb641662f4a06557686c Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 6 Jun 2025 22:41:29 +0200 Subject: [PATCH 027/211] Optimize memory alloc on Animated Model init --- .../Graphics/Models/SkinnedMeshDrawData.cpp | 23 ------------------- .../Graphics/Models/SkinnedMeshDrawData.h | 7 ------ Source/Engine/Level/Actors/AnimatedModel.cpp | 22 ++++++++++-------- 3 files changed, 13 insertions(+), 39 deletions(-) diff --git a/Source/Engine/Graphics/Models/SkinnedMeshDrawData.cpp b/Source/Engine/Graphics/Models/SkinnedMeshDrawData.cpp index 2efe92181..8470facac 100644 --- a/Source/Engine/Graphics/Models/SkinnedMeshDrawData.cpp +++ b/Source/Engine/Graphics/Models/SkinnedMeshDrawData.cpp @@ -38,29 +38,6 @@ void SkinnedMeshDrawData::Setup(int32 bonesCount) SAFE_DELETE_GPU_RESOURCE(PrevBoneMatrices); } -void SkinnedMeshDrawData::SetData(const Matrix* bones, bool dropHistory) -{ - if (!bones) - return; - ANIM_GRAPH_PROFILE_EVENT("SetSkinnedMeshData"); - - // Copy bones to the buffer - const int32 count = BonesCount; - const int32 preFetchStride = 2; - const Matrix* input = bones; - const auto output = (Matrix3x4*)Data.Get(); - ASSERT(Data.Count() == count * sizeof(Matrix3x4)); - for (int32 i = 0; i < count; i++) - { - Matrix3x4* bone = output + i; - Platform::Prefetch(bone + preFetchStride); - Platform::Prefetch((byte*)(bone + preFetchStride) + PLATFORM_CACHE_LINE_SIZE); - bone->SetMatrixTranspose(input[i]); - } - - OnDataChanged(dropHistory); -} - void SkinnedMeshDrawData::OnDataChanged(bool dropHistory) { // Setup previous frame bone matrices if needed diff --git a/Source/Engine/Graphics/Models/SkinnedMeshDrawData.h b/Source/Engine/Graphics/Models/SkinnedMeshDrawData.h index e690100be..24d5ca230 100644 --- a/Source/Engine/Graphics/Models/SkinnedMeshDrawData.h +++ b/Source/Engine/Graphics/Models/SkinnedMeshDrawData.h @@ -69,13 +69,6 @@ public: /// The bones count. void Setup(int32 bonesCount); - /// - /// Sets the bone matrices data for the GPU buffer. Ensure to call Flush before rendering. - /// - /// The bones data. - /// True if drop previous update bones used for motion blur, otherwise will keep them and do the update. - void SetData(const Matrix* bones, bool dropHistory); - /// /// After bones Data has been modified externally. Updates the bone matrices data for the GPU buffer. Ensure to call Flush before rendering. /// diff --git a/Source/Engine/Level/Actors/AnimatedModel.cpp b/Source/Engine/Level/Actors/AnimatedModel.cpp index 7863578c6..1aad5f285 100644 --- a/Source/Engine/Level/Actors/AnimatedModel.cpp +++ b/Source/Engine/Level/Actors/AnimatedModel.cpp @@ -86,7 +86,8 @@ void AnimatedModel::PreInitSkinningData() { if (!SkinnedModel || !SkinnedModel->IsLoaded()) return; - + PROFILE_CPU(); + PROFILE_MEM(Animations); ScopeLock lock(SkinnedModel->Locker); SetupSkinningData(); @@ -96,28 +97,30 @@ void AnimatedModel::PreInitSkinningData() // Get nodes global transformations for the initial pose GraphInstance.NodesPose.Resize(nodesCount, false); + auto nodesPose = GraphInstance.NodesPose.Get(); for (int32 nodeIndex = 0; nodeIndex < nodesCount; nodeIndex++) { Matrix localTransform; skeleton.Nodes[nodeIndex].LocalTransform.GetWorld(localTransform); const int32 parentIndex = skeleton.Nodes[nodeIndex].ParentIndex; if (parentIndex != -1) - GraphInstance.NodesPose[nodeIndex] = localTransform * GraphInstance.NodesPose[parentIndex]; + nodesPose[nodeIndex] = localTransform * nodesPose[parentIndex]; else - GraphInstance.NodesPose[nodeIndex] = localTransform; + nodesPose[nodeIndex] = localTransform; } GraphInstance.Invalidate(); - GraphInstance.RootTransform = skeleton.Nodes[0].LocalTransform; + GraphInstance.RootTransform = nodesCount > 0 ? skeleton.Nodes[0].LocalTransform : Transform::Identity; // Setup bones transformations including bone offset matrix - Array identityMatrices; // TODO: use shared memory? - identityMatrices.Resize(bonesCount, false); + Matrix3x4* output = (Matrix3x4*)_skinningData.Data.Get(); + const SkeletonBone* bones = skeleton.Bones.Get(); for (int32 boneIndex = 0; boneIndex < bonesCount; boneIndex++) { - auto& bone = skeleton.Bones[boneIndex]; - identityMatrices.Get()[boneIndex] = bone.OffsetMatrix * GraphInstance.NodesPose[bone.NodeIndex]; + auto& bone = bones[boneIndex]; + Matrix identityMatrix = bone.OffsetMatrix * nodesPose[bone.NodeIndex]; + output[boneIndex].SetMatrixTranspose(identityMatrix); } - _skinningData.SetData(identityMatrices.Get(), true); + _skinningData.OnDataChanged(true); UpdateBounds(); UpdateSockets(); @@ -586,6 +589,7 @@ void AnimatedModel::SyncParameters() } else { + PROFILE_MEM(Animations); ScopeLock lock(AnimationGraph->Locker); // Clone the parameters From 462f75abd0c815dc0fba1813382e5397d22e7675 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 6 Jun 2025 22:41:48 +0200 Subject: [PATCH 028/211] Optimize memory allocation when reading animated model pose by cloth --- Source/Engine/Level/Actors/AnimatedModel.cpp | 7 +++++++ Source/Engine/Level/Actors/AnimatedModel.h | 6 ++++++ Source/Engine/Physics/Actors/Cloth.cpp | 6 ++---- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/Source/Engine/Level/Actors/AnimatedModel.cpp b/Source/Engine/Level/Actors/AnimatedModel.cpp index 1aad5f285..ee95d6233 100644 --- a/Source/Engine/Level/Actors/AnimatedModel.cpp +++ b/Source/Engine/Level/Actors/AnimatedModel.cpp @@ -140,6 +140,13 @@ void AnimatedModel::GetCurrentPose(Array& nodesTransformation, bool worl } } +void AnimatedModel::GetCurrentPose(Span& nodesTransformation) const +{ + if (GraphInstance.NodesPose.IsEmpty()) + const_cast(this)->PreInitSkinningData(); // Ensure to have valid nodes pose to return + nodesTransformation = ToSpan(GraphInstance.NodesPose); +} + void AnimatedModel::SetCurrentPose(const Array& nodesTransformation, bool worldSpace) { if (GraphInstance.NodesPose.IsEmpty()) diff --git a/Source/Engine/Level/Actors/AnimatedModel.h b/Source/Engine/Level/Actors/AnimatedModel.h index 89124cb87..e13e515d9 100644 --- a/Source/Engine/Level/Actors/AnimatedModel.h +++ b/Source/Engine/Level/Actors/AnimatedModel.h @@ -213,6 +213,12 @@ public: /// True if convert matrices into world-space, otherwise returned values will be in local-space of the actor. API_FUNCTION() void GetCurrentPose(API_PARAM(Out) Array& nodesTransformation, bool worldSpace = false) const; + /// + /// Gets the per-node final transformations (skeleton pose). + /// + /// The output per-node final transformation matrices. + void GetCurrentPose(Span& nodesTransformation) const; + /// /// Sets the per-node final transformations (skeleton pose). /// diff --git a/Source/Engine/Physics/Actors/Cloth.cpp b/Source/Engine/Physics/Actors/Cloth.cpp index da7823517..a55cddd2a 100644 --- a/Source/Engine/Physics/Actors/Cloth.cpp +++ b/Source/Engine/Physics/Actors/Cloth.cpp @@ -815,8 +815,7 @@ bool Cloth::OnPreUpdate() Array particlesSkinned; particlesSkinned.Set(particles.Get(), particles.Length()); - // TODO: optimize memory allocs (eg. get pose as Span for readonly) - Array pose; + Span pose; animatedModel->GetCurrentPose(pose); const SkeletonData& skeleton = animatedModel->SkinnedModel->Skeleton; const SkeletonBone* bones = skeleton.Bones.Get(); @@ -999,8 +998,7 @@ void Cloth::RunClothDeformer(const MeshBase* mesh, MeshDeformationData& deformat return; } - // TODO: optimize memory allocs (eg. get pose as Span for readonly) - Array pose; + Span pose; animatedModel->GetCurrentPose(pose); const SkeletonData& skeleton = animatedModel->SkinnedModel->Skeleton; From 125a973ff2482ee5b7c68c1124d9391405115b1b Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 6 Jun 2025 22:55:14 +0200 Subject: [PATCH 029/211] Rename `Prefetch` to `MemoryPrefetch` --- Source/Engine/Platform/Android/AndroidPlatform.h | 8 ++++---- Source/Engine/Platform/Apple/ApplePlatform.h | 7 ++++--- Source/Engine/Platform/Base/PlatformBase.h | 12 ++++++------ Source/Engine/Platform/Linux/LinuxPlatform.h | 8 ++++---- Source/Engine/Platform/Win32/Win32Platform.cpp | 2 +- Source/Engine/Platform/Win32/Win32Platform.h | 2 +- 6 files changed, 20 insertions(+), 19 deletions(-) diff --git a/Source/Engine/Platform/Android/AndroidPlatform.h b/Source/Engine/Platform/Android/AndroidPlatform.h index 2d40c709f..4bae040cc 100644 --- a/Source/Engine/Platform/Android/AndroidPlatform.h +++ b/Source/Engine/Platform/Android/AndroidPlatform.h @@ -30,6 +30,10 @@ public: { __sync_synchronize(); } + FORCE_INLINE static void MemoryPrefetch(void const* ptr) + { + __builtin_prefetch(static_cast(ptr)); + } FORCE_INLINE static int64 InterlockedExchange(int64 volatile* dst, int64 exchange) { return __sync_lock_test_and_set(dst, exchange); @@ -74,10 +78,6 @@ public: { __atomic_store(dst, &value, __ATOMIC_RELAXED); } - FORCE_INLINE static void Prefetch(void const* ptr) - { - __builtin_prefetch(static_cast(ptr)); - } static bool Is64BitPlatform(); static String GetSystemName(); static Version GetSystemVersion(); diff --git a/Source/Engine/Platform/Apple/ApplePlatform.h b/Source/Engine/Platform/Apple/ApplePlatform.h index cf91b8e22..02f6ac347 100644 --- a/Source/Engine/Platform/Apple/ApplePlatform.h +++ b/Source/Engine/Platform/Apple/ApplePlatform.h @@ -21,6 +21,10 @@ public: { __sync_synchronize(); } + FORCE_INLINE static void MemoryPrefetch(void const* ptr) + { + __builtin_prefetch(static_cast(ptr)); + } FORCE_INLINE static int64 InterlockedExchange(int64 volatile* dst, int64 exchange) { return __sync_lock_test_and_set(dst, exchange); @@ -62,9 +66,6 @@ public: __atomic_store_n((volatile int64*)dst, value, __ATOMIC_RELAXED); } FORCE_INLINE static void Prefetch(void const* ptr) - { - __builtin_prefetch(static_cast(ptr)); - } static bool Is64BitPlatform(); static String GetSystemName(); static Version GetSystemVersion(); diff --git a/Source/Engine/Platform/Base/PlatformBase.h b/Source/Engine/Platform/Base/PlatformBase.h index ff47e77a3..eeaeb879d 100644 --- a/Source/Engine/Platform/Base/PlatformBase.h +++ b/Source/Engine/Platform/Base/PlatformBase.h @@ -239,6 +239,12 @@ public: /// static void MemoryBarrier() = delete; + /// + /// Indicates to the processor that a cache line will be needed in the near future. + /// + /// The address of the cache line to be loaded. This address is not required to be on a cache line boundary. + static void Prefetch(void const* ptr) = delete; + /// /// Sets a 64-bit variable to the specified value as an atomic operation. The function prevents more than one thread from using the same variable simultaneously. /// @@ -317,12 +323,6 @@ public: /// The value to be set. static void AtomicStore(int64 volatile* dst, int64 value) = delete; - /// - /// Indicates to the processor that a cache line will be needed in the near future. - /// - /// The address of the cache line to be loaded. This address is not required to be on a cache line boundary. - static void Prefetch(void const* ptr) = delete; - #if COMPILE_WITH_PROFILER static void OnMemoryAlloc(void* ptr, uint64 size); static void OnMemoryFree(void* ptr); diff --git a/Source/Engine/Platform/Linux/LinuxPlatform.h b/Source/Engine/Platform/Linux/LinuxPlatform.h index 08637dc87..98d9d2976 100644 --- a/Source/Engine/Platform/Linux/LinuxPlatform.h +++ b/Source/Engine/Platform/Linux/LinuxPlatform.h @@ -45,6 +45,10 @@ public: { __sync_synchronize(); } + FORCE_INLINE static void MemoryPrefetch(void const* ptr) + { + __builtin_prefetch(static_cast(ptr)); + } FORCE_INLINE static int64 InterlockedExchange(int64 volatile* dst, int64 exchange) { return __sync_lock_test_and_set(dst, exchange); @@ -89,10 +93,6 @@ public: { __atomic_store(dst, &value, __ATOMIC_SEQ_CST); } - FORCE_INLINE static void Prefetch(void const* ptr) - { - __builtin_prefetch(static_cast(ptr)); - } static bool Is64BitPlatform(); static String GetSystemName(); static Version GetSystemVersion(); diff --git a/Source/Engine/Platform/Win32/Win32Platform.cpp b/Source/Engine/Platform/Win32/Win32Platform.cpp index c002986c9..d32cb1249 100644 --- a/Source/Engine/Platform/Win32/Win32Platform.cpp +++ b/Source/Engine/Platform/Win32/Win32Platform.cpp @@ -251,7 +251,7 @@ void Win32Platform::MemoryBarrier() #endif } -void Win32Platform::Prefetch(void const* ptr) +void Win32Platform::MemoryPrefetch(void const* ptr) { #if _M_ARM64 __prefetch((char const*)ptr); diff --git a/Source/Engine/Platform/Win32/Win32Platform.h b/Source/Engine/Platform/Win32/Win32Platform.h index 5763641ee..36d982bf7 100644 --- a/Source/Engine/Platform/Win32/Win32Platform.h +++ b/Source/Engine/Platform/Win32/Win32Platform.h @@ -23,6 +23,7 @@ public: static bool Init(); static void Exit(); static void MemoryBarrier(); + static void MemoryPrefetch(void const* ptr); static int64 InterlockedExchange(int64 volatile* dst, int64 exchange) { #if WIN64 @@ -83,7 +84,6 @@ public: _interlockedexchange64(dst, value); #endif } - static void Prefetch(void const* ptr); static void* Allocate(uint64 size, uint64 alignment); static void Free(void* ptr); static void* AllocatePages(uint64 numPages, uint64 pageSize); From bffb175a9bac1e68b74478f855fc0f7c884b21d1 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 7 Jun 2025 01:25:22 +0200 Subject: [PATCH 030/211] Code fixes --- Source/Editor/Cooker/Steps/CookAssetsStep.cpp | 1 + Source/Engine/Animations/Graph/AnimGraph.cpp | 1 + Source/Engine/Audio/XAudio2/AudioBackendXAudio2.cpp | 4 ++++ Source/Engine/Content/Assets/ModelBase.cpp | 1 + Source/Engine/Content/Assets/SkeletonMask.cpp | 1 + Source/Engine/Graphics/Models/MeshBase.cpp | 1 + Source/Engine/Graphics/RenderTargetPool.cpp | 1 + Source/Engine/Level/SceneObjectsFactory.h | 3 +++ Source/Engine/Navigation/NavMesh.cpp | 1 + Source/Engine/Navigation/NavMeshBuilder.cpp | 1 + Source/Engine/Networking/NetworkManager.cpp | 1 + Source/Engine/Terrain/Terrain.cpp | 1 + Source/Engine/Tools/ModelTool/ModelTool.cpp | 1 + 13 files changed, 18 insertions(+) diff --git a/Source/Editor/Cooker/Steps/CookAssetsStep.cpp b/Source/Editor/Cooker/Steps/CookAssetsStep.cpp index 81bf5ef35..030c31c41 100644 --- a/Source/Editor/Cooker/Steps/CookAssetsStep.cpp +++ b/Source/Editor/Cooker/Steps/CookAssetsStep.cpp @@ -36,6 +36,7 @@ #include "Engine/Engine/Base/GameBase.h" #include "Engine/Engine/Globals.h" #include "Engine/Tools/TextureTool/TextureTool.h" +#include "Engine/Threading/Threading.h" #include "Engine/Profiler/ProfilerCPU.h" #include "Engine/Scripting/Enums.h" #if PLATFORM_TOOLS_WINDOWS diff --git a/Source/Engine/Animations/Graph/AnimGraph.cpp b/Source/Engine/Animations/Graph/AnimGraph.cpp index 805d4747b..e99f53b8f 100644 --- a/Source/Engine/Animations/Graph/AnimGraph.cpp +++ b/Source/Engine/Animations/Graph/AnimGraph.cpp @@ -6,6 +6,7 @@ #include "Engine/Content/Assets/SkinnedModel.h" #include "Engine/Graphics/Models/SkeletonData.h" #include "Engine/Scripting/Scripting.h" +#include "Engine/Threading/Threading.h" extern void RetargetSkeletonPose(const SkeletonData& sourceSkeleton, const SkeletonData& targetSkeleton, const SkinnedModel::SkeletonMapping& mapping, const Transform* sourceNodes, Transform* targetNodes); diff --git a/Source/Engine/Audio/XAudio2/AudioBackendXAudio2.cpp b/Source/Engine/Audio/XAudio2/AudioBackendXAudio2.cpp index 2dc9e85ba..9600a93fb 100644 --- a/Source/Engine/Audio/XAudio2/AudioBackendXAudio2.cpp +++ b/Source/Engine/Audio/XAudio2/AudioBackendXAudio2.cpp @@ -9,6 +9,7 @@ #include "Engine/Core/Log.h" #include "Engine/Audio/Audio.h" #include "Engine/Threading/Threading.h" +#include "Engine/Profiler/ProfilerMemory.h" #if PLATFORM_WINDOWS // Tweak Win ver @@ -232,6 +233,7 @@ void AudioBackendXAudio2::Listener_ReinitializeAll() uint32 AudioBackendXAudio2::Source_Add(const AudioDataInfo& format, const Vector3& position, const Quaternion& orientation, float volume, float pitch, float pan, bool loop, bool spatial, float attenuation, float minDistance, float doppler) { + PROFILE_MEM(Audio); ScopeLock lock(XAudio2::Locker); // Get first free source @@ -580,6 +582,7 @@ void AudioBackendXAudio2::Source_DequeueProcessedBuffers(uint32 sourceID) uint32 AudioBackendXAudio2::Buffer_Create() { + PROFILE_MEM(Audio); uint32 bufferID; ScopeLock lock(XAudio2::Locker); @@ -618,6 +621,7 @@ void AudioBackendXAudio2::Buffer_Delete(uint32 bufferID) void AudioBackendXAudio2::Buffer_Write(uint32 bufferID, byte* samples, const AudioDataInfo& info) { + PROFILE_MEM(Audio); CHECK(info.NumChannels <= MAX_INPUT_CHANNELS); XAudio2::Locker.Lock(); diff --git a/Source/Engine/Content/Assets/ModelBase.cpp b/Source/Engine/Content/Assets/ModelBase.cpp index 7d16639ab..2521966a2 100644 --- a/Source/Engine/Content/Assets/ModelBase.cpp +++ b/Source/Engine/Content/Assets/ModelBase.cpp @@ -10,6 +10,7 @@ #include "Engine/Graphics/Models/MeshBase.h" #include "Engine/Graphics/Models/MeshDeformation.h" #include "Engine/Graphics/Shaders/GPUVertexLayout.h" +#include "Engine/Threading/Threading.h" #if GPU_ENABLE_ASYNC_RESOURCES_CREATION #include "Engine/Threading/ThreadPoolTask.h" #define STREAM_TASK_BASE ThreadPoolTask diff --git a/Source/Engine/Content/Assets/SkeletonMask.cpp b/Source/Engine/Content/Assets/SkeletonMask.cpp index 51d6e15c6..2776ba02c 100644 --- a/Source/Engine/Content/Assets/SkeletonMask.cpp +++ b/Source/Engine/Content/Assets/SkeletonMask.cpp @@ -6,6 +6,7 @@ #include "Engine/Serialization/MemoryWriteStream.h" #include "Engine/Content/Factories/BinaryAssetFactory.h" #include "Engine/Content/Upgraders/SkeletonMaskUpgrader.h" +#include "Engine/Threading/Threading.h" REGISTER_BINARY_ASSET_WITH_UPGRADER(SkeletonMask, "FlaxEngine.SkeletonMask", SkeletonMaskUpgrader, true); diff --git a/Source/Engine/Graphics/Models/MeshBase.cpp b/Source/Engine/Graphics/Models/MeshBase.cpp index 62e65ee61..c62820748 100644 --- a/Source/Engine/Graphics/Models/MeshBase.cpp +++ b/Source/Engine/Graphics/Models/MeshBase.cpp @@ -16,6 +16,7 @@ #include "Engine/Scripting/ManagedCLR/MCore.h" #include "Engine/Serialization/MemoryReadStream.h" #include "Engine/Threading/Task.h" +#include "Engine/Threading/Threading.h" static_assert(MODEL_MAX_VB == 3, "Update code in mesh to match amount of vertex buffers."); diff --git a/Source/Engine/Graphics/RenderTargetPool.cpp b/Source/Engine/Graphics/RenderTargetPool.cpp index 9afd446e7..a1d243782 100644 --- a/Source/Engine/Graphics/RenderTargetPool.cpp +++ b/Source/Engine/Graphics/RenderTargetPool.cpp @@ -8,6 +8,7 @@ #include "Engine/Core/Log.h" #include "Engine/Engine/Engine.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Threading/Threading.h" struct Entry { diff --git a/Source/Engine/Level/SceneObjectsFactory.h b/Source/Engine/Level/SceneObjectsFactory.h index ab4138ea1..904bef696 100644 --- a/Source/Engine/Level/SceneObjectsFactory.h +++ b/Source/Engine/Level/SceneObjectsFactory.h @@ -4,6 +4,9 @@ #include "SceneObject.h" #include "Engine/Core/Collections/Dictionary.h" +#if USE_EDITOR +#include "Engine/Core/Collections/HashSet.h" +#endif #include "Engine/Platform/CriticalSection.h" #include "Engine/Threading/ThreadLocal.h" diff --git a/Source/Engine/Navigation/NavMesh.cpp b/Source/Engine/Navigation/NavMesh.cpp index 62017ad9e..6479542a1 100644 --- a/Source/Engine/Navigation/NavMesh.cpp +++ b/Source/Engine/Navigation/NavMesh.cpp @@ -4,6 +4,7 @@ #include "NavMeshRuntime.h" #include "Engine/Level/Scene/Scene.h" #include "Engine/Serialization/Serialization.h" +#include "Engine/Threading/Threading.h" #if COMPILE_WITH_ASSETS_IMPORTER #include "Engine/Core/Log.h" #include "Engine/ContentImporters/AssetsImportingManager.h" diff --git a/Source/Engine/Navigation/NavMeshBuilder.cpp b/Source/Engine/Navigation/NavMeshBuilder.cpp index d9f26522d..fdd938d9e 100644 --- a/Source/Engine/Navigation/NavMeshBuilder.cpp +++ b/Source/Engine/Navigation/NavMeshBuilder.cpp @@ -18,6 +18,7 @@ #include "Engine/Physics/Colliders/MeshCollider.h" #include "Engine/Physics/Colliders/SplineCollider.h" #include "Engine/Threading/ThreadPoolTask.h" +#include "Engine/Threading/Threading.h" #include "Engine/Terrain/TerrainPatch.h" #include "Engine/Terrain/Terrain.h" #include "Engine/Profiler/ProfilerCPU.h" diff --git a/Source/Engine/Networking/NetworkManager.cpp b/Source/Engine/Networking/NetworkManager.cpp index 3dc244bba..784bbf51e 100644 --- a/Source/Engine/Networking/NetworkManager.cpp +++ b/Source/Engine/Networking/NetworkManager.cpp @@ -16,6 +16,7 @@ #include "Engine/Profiler/ProfilerCPU.h" #include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Scripting/Scripting.h" +#include "Engine/Threading/Threading.h" float NetworkManager::NetworkFPS = 60.0f; NetworkPeer* NetworkManager::Peer = nullptr; diff --git a/Source/Engine/Terrain/Terrain.cpp b/Source/Engine/Terrain/Terrain.cpp index 85274b8fb..72ed1e7e1 100644 --- a/Source/Engine/Terrain/Terrain.cpp +++ b/Source/Engine/Terrain/Terrain.cpp @@ -4,6 +4,7 @@ #include "TerrainPatch.h" #include "Engine/Core/Log.h" #include "Engine/Core/Math/Ray.h" +#include "Engine/Core/Collections/HashSet.h" #include "Engine/Level/Scene/SceneRendering.h" #include "Engine/Serialization/Serialization.h" #include "Engine/Physics/Physics.h" diff --git a/Source/Engine/Tools/ModelTool/ModelTool.cpp b/Source/Engine/Tools/ModelTool/ModelTool.cpp index fe268c350..cdcd799b3 100644 --- a/Source/Engine/Tools/ModelTool/ModelTool.cpp +++ b/Source/Engine/Tools/ModelTool/ModelTool.cpp @@ -11,6 +11,7 @@ #include "Engine/Platform/ConditionVariable.h" #include "Engine/Profiler/Profiler.h" #include "Engine/Threading/JobSystem.h" +#include "Engine/Threading/Threading.h" #include "Engine/Graphics/GPUDevice.h" #include "Engine/Graphics/GPUBuffer.h" #include "Engine/Graphics/RenderTools.h" From 73c30d3d8923cbfa21f8b1239f4f615cd7a14658 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 8 Jun 2025 00:58:15 +0200 Subject: [PATCH 031/211] Optimize asset references to support direct registration to reduce `Delegate` memory allocations and overhead --- Source/Engine/AI/Behavior.cpp | 15 +- Source/Engine/AI/Behavior.h | 7 +- Source/Engine/Audio/AudioSource.cpp | 11 +- Source/Engine/Audio/AudioSource.h | 8 +- Source/Engine/Content/Asset.cpp | 192 +++++++++++------- Source/Engine/Content/Asset.h | 34 +++- Source/Engine/Content/AssetReference.h | 33 ++- Source/Engine/Content/JsonAssetReference.h | 9 + Source/Engine/Content/SoftAssetReference.h | 16 +- Source/Engine/Content/WeakAssetReference.h | 18 +- Source/Engine/Level/Actors/AnimatedModel.cpp | 27 ++- Source/Engine/Level/Actors/AnimatedModel.h | 7 +- Source/Engine/Level/Actors/Sky.cpp | 1 - Source/Engine/Level/Actors/StaticModel.cpp | 11 +- Source/Engine/Level/Actors/StaticModel.h | 9 +- Source/Engine/Navigation/NavMesh.cpp | 12 +- Source/Engine/Navigation/NavMesh.h | 8 +- Source/Engine/Particles/ParticleEffect.cpp | 12 +- Source/Engine/Particles/ParticleEffect.h | 7 +- Source/Engine/Physics/Colliders/Collider.cpp | 28 ++- Source/Engine/Physics/Colliders/Collider.h | 10 +- .../Engine/Physics/Colliders/MeshCollider.cpp | 24 ++- .../Engine/Physics/Colliders/MeshCollider.h | 2 + Source/Engine/Renderer/ColorGradingPass.cpp | 1 - Source/Engine/Renderer/ForwardPass.cpp | 3 +- Source/Engine/Renderer/VolumetricFogPass.cpp | 1 - 26 files changed, 365 insertions(+), 141 deletions(-) diff --git a/Source/Engine/AI/Behavior.cpp b/Source/Engine/AI/Behavior.cpp index 7e647e945..442051d73 100644 --- a/Source/Engine/AI/Behavior.cpp +++ b/Source/Engine/AI/Behavior.cpp @@ -73,9 +73,9 @@ void BehaviorService::Dispose() Behavior::Behavior(const SpawnParams& params) : Script(params) + , Tree(this) { _knowledge.Behavior = this; - Tree.Changed.Bind(this); } void Behavior::UpdateAsync() @@ -175,6 +175,19 @@ void Behavior::OnDisable() BehaviorServiceInstance.UpdateList.Remove(this); } +void Behavior::OnAssetChanged(Asset* asset, void* caller) +{ + ResetLogic(); +} + +void Behavior::OnAssetLoaded(Asset* asset, void* caller) +{ +} + +void Behavior::OnAssetUnloaded(Asset* asset, void* caller) +{ +} + #if USE_EDITOR bool Behavior::GetNodeDebugRelevancy(const BehaviorTreeNode* node, const Behavior* behavior) diff --git a/Source/Engine/AI/Behavior.h b/Source/Engine/AI/Behavior.h index 4919847fa..b1c39ffc6 100644 --- a/Source/Engine/AI/Behavior.h +++ b/Source/Engine/AI/Behavior.h @@ -11,7 +11,7 @@ /// /// Behavior instance script that runs Behavior Tree execution. /// -API_CLASS(Attributes="Category(\"Flax Engine\")") class FLAXENGINE_API Behavior : public Script +API_CLASS(Attributes="Category(\"Flax Engine\")") class FLAXENGINE_API Behavior : public Script, private IAssetReference { API_AUTO_SERIALIZATION(); DECLARE_SCRIPTING_TYPE(Behavior); @@ -92,6 +92,11 @@ public: void OnDisable() override; private: + // [IAssetReference] + void OnAssetChanged(Asset* asset, void* caller) override; + void OnAssetLoaded(Asset* asset, void* caller) override; + void OnAssetUnloaded(Asset* asset, void* caller) override; + #if USE_EDITOR // Editor-only utilities to debug nodes state. API_FUNCTION(Internal) static bool GetNodeDebugRelevancy(const BehaviorTreeNode* node, const Behavior* behavior); diff --git a/Source/Engine/Audio/AudioSource.cpp b/Source/Engine/Audio/AudioSource.cpp index cff89e7e1..2a061ad48 100644 --- a/Source/Engine/Audio/AudioSource.cpp +++ b/Source/Engine/Audio/AudioSource.cpp @@ -21,9 +21,8 @@ AudioSource::AudioSource(const SpawnParams& params) , _playOnStart(false) , _startTime(0.0f) , _allowSpatialization(true) + , Clip(this) { - Clip.Changed.Bind(this); - Clip.Loaded.Bind(this); } void AudioSource::SetVolume(float value) @@ -264,7 +263,7 @@ void AudioSource::RequestStreamingBuffersUpdate() _needToUpdateStreamingBuffers = true; } -void AudioSource::OnClipChanged() +void AudioSource::OnAssetChanged(Asset* asset, void* caller) { Stop(); @@ -276,7 +275,7 @@ void AudioSource::OnClipChanged() } } -void AudioSource::OnClipLoaded() +void AudioSource::OnAssetLoaded(Asset* asset, void* caller) { if (!SourceID) return; @@ -302,6 +301,10 @@ void AudioSource::OnClipLoaded() } } +void AudioSource::OnAssetUnloaded(Asset* asset, void* caller) +{ +} + bool AudioSource::UseStreaming() const { if (Clip == nullptr || Clip->WaitForLoaded()) diff --git a/Source/Engine/Audio/AudioSource.h b/Source/Engine/Audio/AudioSource.h index b83a2b408..9d6d28ab4 100644 --- a/Source/Engine/Audio/AudioSource.h +++ b/Source/Engine/Audio/AudioSource.h @@ -13,7 +13,7 @@ /// Whether or not an audio source is spatial is controlled by the assigned AudioClip.The volume and the pitch of a spatial audio source is controlled by its position and the AudioListener's position/direction/velocity. /// API_CLASS(Attributes="ActorContextMenu(\"New/Audio/Audio Source\"), ActorToolbox(\"Other\")") -class FLAXENGINE_API AudioSource : public Actor +class FLAXENGINE_API AudioSource : public Actor, IAssetReference { DECLARE_SCENE_OBJECT(AudioSource); friend class AudioStreamingHandler; @@ -293,8 +293,10 @@ public: void RequestStreamingBuffersUpdate(); private: - void OnClipChanged(); - void OnClipLoaded(); + // [IAssetReference] + void OnAssetChanged(Asset* asset, void* caller) override; + void OnAssetLoaded(Asset* asset, void* caller) override; + void OnAssetUnloaded(Asset* asset, void* caller) override; /// /// Plays the audio source. Should have buffer(s) binded before. diff --git a/Source/Engine/Content/Asset.cpp b/Source/Engine/Content/Asset.cpp index fd4fae421..86801b078 100644 --- a/Source/Engine/Content/Asset.cpp +++ b/Source/Engine/Content/Asset.cpp @@ -9,6 +9,7 @@ #include "Engine/Core/Log.h" #include "Engine/Core/LogContext.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Scripting/ManagedCLR/MCore.h" #include "Engine/Threading/MainThreadTask.h" #include "Engine/Threading/ThreadLocal.h" @@ -34,15 +35,18 @@ bool ContentDeprecated::Clear(bool newValue) #endif +AssetReferenceBase::AssetReferenceBase(IAssetReference* owner) + : _owner(owner) +{ +} + AssetReferenceBase::~AssetReferenceBase() { Asset* asset = _asset; if (asset) { _asset = nullptr; - asset->OnLoaded.Unbind(this); - asset->OnUnloaded.Unbind(this); - asset->RemoveReference(); + asset->RemoveReference(this); } } @@ -51,52 +55,60 @@ String AssetReferenceBase::ToString() const return _asset ? _asset->ToString() : TEXT(""); } +void AssetReferenceBase::OnAssetChanged(Asset* asset, void* caller) +{ + if (_owner) + _owner->OnAssetChanged(asset, this); +} + +void AssetReferenceBase::OnAssetLoaded(Asset* asset, void* caller) +{ + if (_asset != asset) + return; + Loaded(); + if (_owner) + _owner->OnAssetLoaded(asset, this); +} + +void AssetReferenceBase::OnAssetUnloaded(Asset* asset, void* caller) +{ + if (_asset != asset) + return; + Unload(); + OnSet(nullptr); + if (_owner) + _owner->OnAssetUnloaded(asset, this); +} + void AssetReferenceBase::OnSet(Asset* asset) { auto e = _asset; if (e != asset) { if (e) - { - e->OnLoaded.Unbind(this); - e->OnUnloaded.Unbind(this); - e->RemoveReference(); - } + e->RemoveReference(this); _asset = e = asset; if (e) - { - e->AddReference(); - e->OnLoaded.Bind(this); - e->OnUnloaded.Bind(this); - } + e->AddReference(this); Changed(); + if (_owner) + _owner->OnAssetChanged(asset, this); if (e && e->IsLoaded()) + { Loaded(); + if (_owner) + _owner->OnAssetLoaded(asset, this); + } } } -void AssetReferenceBase::OnLoaded(Asset* asset) -{ - if (_asset != asset) - return; - Loaded(); -} - -void AssetReferenceBase::OnUnloaded(Asset* asset) -{ - if (_asset != asset) - return; - Unload(); - OnSet(nullptr); -} - WeakAssetReferenceBase::~WeakAssetReferenceBase() { Asset* asset = _asset; if (asset) { _asset = nullptr; - asset->OnUnloaded.Unbind(this); + asset->RemoveReference(this, true); } } @@ -105,36 +117,43 @@ String WeakAssetReferenceBase::ToString() const return _asset ? _asset->ToString() : TEXT(""); } +void WeakAssetReferenceBase::OnAssetChanged(Asset* asset, void* caller) +{ +} + +void WeakAssetReferenceBase::OnAssetLoaded(Asset* asset, void* caller) +{ +} + +void WeakAssetReferenceBase::OnAssetUnloaded(Asset* asset, void* caller) +{ + if (_asset != asset) + return; + Unload(); + asset->RemoveReference(this, true); + _asset = nullptr; +} + void WeakAssetReferenceBase::OnSet(Asset* asset) { auto e = _asset; if (e != asset) { if (e) - e->OnUnloaded.Unbind(this); + e->RemoveReference(this, true); _asset = e = asset; if (e) - e->OnUnloaded.Bind(this); + e->AddReference(this, true); } } -void WeakAssetReferenceBase::OnUnloaded(Asset* asset) -{ - if (_asset != asset) - return; - Unload(); - asset->OnUnloaded.Unbind(this); - _asset = nullptr; -} - SoftAssetReferenceBase::~SoftAssetReferenceBase() { Asset* asset = _asset; if (asset) { _asset = nullptr; - asset->OnUnloaded.Unbind(this); - asset->RemoveReference(); + asset->RemoveReference(this); } #if !BUILD_RELEASE _id = Guid::Empty; @@ -146,22 +165,34 @@ String SoftAssetReferenceBase::ToString() const return _asset ? _asset->ToString() : (_id.IsValid() ? _id.ToString() : TEXT("")); } +void SoftAssetReferenceBase::OnAssetChanged(Asset* asset, void* caller) +{ +} + +void SoftAssetReferenceBase::OnAssetLoaded(Asset* asset, void* caller) +{ +} + +void SoftAssetReferenceBase::OnAssetUnloaded(Asset* asset, void* caller) +{ + if (_asset != asset) + return; + _asset->RemoveReference(this); + _asset = nullptr; + _id = Guid::Empty; + Changed(); +} + void SoftAssetReferenceBase::OnSet(Asset* asset) { if (_asset == asset) return; if (_asset) - { - _asset->OnUnloaded.Unbind(this); - _asset->RemoveReference(); - } + _asset->RemoveReference(this); _asset = asset; _id = asset ? asset->GetID() : Guid::Empty; if (asset) - { - asset->AddReference(); - asset->OnUnloaded.Bind(this); - } + asset->AddReference(this); Changed(); } @@ -170,10 +201,7 @@ void SoftAssetReferenceBase::OnSet(const Guid& id) if (_id == id) return; if (_asset) - { - _asset->OnUnloaded.Unbind(this); - _asset->RemoveReference(); - } + _asset->RemoveReference(this); _asset = nullptr; _id = id; Changed(); @@ -184,21 +212,7 @@ void SoftAssetReferenceBase::OnResolve(const ScriptingTypeHandle& type) ASSERT(!_asset); _asset = ::LoadAsset(_id, type); if (_asset) - { - _asset->OnUnloaded.Bind(this); - _asset->AddReference(); - } -} - -void SoftAssetReferenceBase::OnUnloaded(Asset* asset) -{ - if (_asset != asset) - return; - _asset->RemoveReference(); - _asset->OnUnloaded.Unbind(this); - _asset = nullptr; - _id = Guid::Empty; - Changed(); + _asset->AddReference(this); } Asset::Asset(const SpawnParams& params, const AssetInfo* info) @@ -216,6 +230,41 @@ int32 Asset::GetReferencesCount() const return (int32)Platform::AtomicRead(const_cast(&_refCount)); } +void Asset::AddReference() +{ + Platform::InterlockedIncrement(&_refCount); +} + +void Asset::AddReference(IAssetReference* ref, bool week) +{ + if (!week) + Platform::InterlockedIncrement(&_refCount); + if (ref) + { + //PROFILE_MEM(EngineDelegate); // Include references tracking memory within Delegate memory + Locker.Lock(); + _references.Add(ref); + Locker.Unlock(); + } +} + +void Asset::RemoveReference() +{ + Platform::InterlockedDecrement(&_refCount); +} + +void Asset::RemoveReference(IAssetReference* ref, bool week) +{ + if (ref) + { + Locker.Lock(); + _references.Remove(ref); + Locker.Unlock(); + } + if (!week) + Platform::InterlockedDecrement(&_refCount); +} + String Asset::ToString() const { return String::Format(TEXT("{0}, {1}, {2}"), GetTypeName(), GetID(), GetPath()); @@ -354,6 +403,7 @@ uint64 Asset::GetMemoryUsage() const if (Platform::AtomicRead(&_loadingTask)) result += sizeof(ContentLoadTask); result += (OnLoaded.Capacity() + OnReloading.Capacity() + OnUnloaded.Capacity()) * sizeof(EventType::FunctionType); + result += _references.Capacity() * sizeof(HashSet::Bucket); Locker.Unlock(); return result; } @@ -628,6 +678,8 @@ void Asset::onLoaded_MainThread() ASSERT(IsInMainThread()); // Send event + for (const auto& e : _references) + e.Item->OnAssetLoaded(this, this); OnLoaded(this); } @@ -641,6 +693,8 @@ void Asset::onUnload_MainThread() CancelStreaming(); // Send event + for (const auto& e : _references) + e.Item->OnAssetUnloaded(this, this); OnUnloaded(this); } diff --git a/Source/Engine/Content/Asset.h b/Source/Engine/Content/Asset.h index c16ea337e..17d8c8b5f 100644 --- a/Source/Engine/Content/Asset.h +++ b/Source/Engine/Content/Asset.h @@ -18,6 +18,18 @@ public: \ explicit type(const SpawnParams& params, const AssetInfo* info) +// Utility interface for objects that reference asset and want to get notified about asset reference changes. +class FLAXENGINE_API IAssetReference +{ +public: + // Asset reference got changed. + virtual void OnAssetChanged(Asset* asset, void* caller) = 0; + // Asset got loaded. + virtual void OnAssetLoaded(Asset* asset, void* caller) = 0; + // Asset gets unloaded. + virtual void OnAssetUnloaded(Asset* asset, void* caller) = 0; +}; + /// /// Asset objects base class. /// @@ -48,6 +60,8 @@ protected: int8 _deleteFileOnUnload : 1; // Indicates that asset source file should be removed on asset unload int8 _isVirtual : 1; // Indicates that asset is pure virtual (generated or temporary, has no storage so won't be saved) + HashSet _references; + public: /// /// Initializes a new instance of the class. @@ -88,18 +102,22 @@ public: /// /// Adds reference to that asset. /// - FORCE_INLINE void AddReference() - { - Platform::InterlockedIncrement(&_refCount); - } + void AddReference(); + + /// + /// Adds reference to that asset. + /// + void AddReference(IAssetReference* ref, bool week = false); /// /// Removes reference from that asset. /// - FORCE_INLINE void RemoveReference() - { - Platform::InterlockedDecrement(&_refCount); - } + void RemoveReference(); + + /// + /// Removes reference from that asset. + /// + void RemoveReference(IAssetReference* ref, bool week = false); public: /// diff --git a/Source/Engine/Content/AssetReference.h b/Source/Engine/Content/AssetReference.h index 09e637e57..cd380a39c 100644 --- a/Source/Engine/Content/AssetReference.h +++ b/Source/Engine/Content/AssetReference.h @@ -7,10 +7,11 @@ /// /// Asset reference utility. Keeps reference to the linked asset object and handles load/unload events. /// -class FLAXENGINE_API AssetReferenceBase +class FLAXENGINE_API AssetReferenceBase : public IAssetReference { protected: Asset* _asset = nullptr; + IAssetReference* _owner = nullptr; public: /// @@ -36,6 +37,12 @@ public: /// AssetReferenceBase() = default; + /// + /// Initializes a new instance of the class. + /// + /// The reference owner to keep notified about asset changes. + AssetReferenceBase(IAssetReference* owner); + /// /// Finalizes an instance of the class. /// @@ -63,10 +70,14 @@ public: /// String ToString() const; +public: + // [IAssetReference] + void OnAssetChanged(Asset* asset, void* caller) override; + void OnAssetLoaded(Asset* asset, void* caller) override; + void OnAssetUnloaded(Asset* asset, void* caller) override; + protected: void OnSet(Asset* asset); - void OnLoaded(Asset* asset); - void OnUnloaded(Asset* asset); }; /// @@ -87,6 +98,13 @@ public: { } + /// + /// Initializes a new instance of the class. + /// + explicit AssetReference(decltype(__nullptr)) + { + } + /// /// Initializes a new instance of the class. /// @@ -96,6 +114,15 @@ public: OnSet((Asset*)asset); } + /// + /// Initializes a new instance of the class. + /// + /// The reference owner to keep notified about asset changes. + explicit AssetReference(IAssetReference* owner) + : AssetReferenceBase(owner) + { + } + /// /// Initializes a new instance of the class. /// diff --git a/Source/Engine/Content/JsonAssetReference.h b/Source/Engine/Content/JsonAssetReference.h index 7e0ef528a..4325ca79e 100644 --- a/Source/Engine/Content/JsonAssetReference.h +++ b/Source/Engine/Content/JsonAssetReference.h @@ -19,6 +19,15 @@ API_STRUCT(NoDefault, Template, MarshalAs=JsonAsset*) struct JsonAssetReference OnSet(asset); } + explicit JsonAssetReference(decltype(__nullptr)) + { + } + + explicit JsonAssetReference(IAssetReference* owner) + : AssetReference(owner) + { + } + /// /// Gets the deserialized native object instance of the given type. Returns null if asset is not loaded or loaded object has different type. /// diff --git a/Source/Engine/Content/SoftAssetReference.h b/Source/Engine/Content/SoftAssetReference.h index e02fd1a4a..ef9adcde1 100644 --- a/Source/Engine/Content/SoftAssetReference.h +++ b/Source/Engine/Content/SoftAssetReference.h @@ -7,7 +7,7 @@ /// /// The asset soft reference. Asset gets referenced (loaded) on actual use (ID reference is resolving it). /// -class FLAXENGINE_API SoftAssetReferenceBase +class FLAXENGINE_API SoftAssetReferenceBase : public IAssetReference { protected: Asset* _asset = nullptr; @@ -46,11 +46,16 @@ public: /// String ToString() const; +public: + // [IAssetReference] + void OnAssetChanged(Asset* asset, void* caller) override; + void OnAssetLoaded(Asset* asset, void* caller) override; + void OnAssetUnloaded(Asset* asset, void* caller) override; + protected: void OnSet(Asset* asset); void OnSet(const Guid& id); void OnResolve(const ScriptingTypeHandle& type); - void OnUnloaded(Asset* asset); }; /// @@ -71,6 +76,13 @@ public: { } + /// + /// Initializes a new instance of the class. + /// + explicit SoftAssetReference(decltype(__nullptr)) + { + } + /// /// Initializes a new instance of the class. /// diff --git a/Source/Engine/Content/WeakAssetReference.h b/Source/Engine/Content/WeakAssetReference.h index c6df857d2..d67be0643 100644 --- a/Source/Engine/Content/WeakAssetReference.h +++ b/Source/Engine/Content/WeakAssetReference.h @@ -7,7 +7,7 @@ /// /// Asset reference utility that doesn't add reference to that asset. Handles asset unload event. /// -API_CLASS(InBuild) class WeakAssetReferenceBase +API_CLASS(InBuild) class WeakAssetReferenceBase : public IAssetReference { public: typedef Delegate<> EventType; @@ -56,9 +56,14 @@ public: /// String ToString() const; +public: + // [IAssetReference] + void OnAssetChanged(Asset* asset, void* caller) override; + void OnAssetLoaded(Asset* asset, void* caller) override; + void OnAssetUnloaded(Asset* asset, void* caller) override; + protected: void OnSet(Asset* asset); - void OnUnloaded(Asset* asset); }; /// @@ -72,7 +77,13 @@ public: /// Initializes a new instance of the class. /// WeakAssetReference() - : WeakAssetReferenceBase() + { + } + + /// + /// Initializes a new instance of the class. + /// + explicit WeakAssetReference(decltype(__nullptr)) { } @@ -81,7 +92,6 @@ public: /// /// The asset to set. WeakAssetReference(T* asset) - : WeakAssetReferenceBase() { OnSet(asset); } diff --git a/Source/Engine/Level/Actors/AnimatedModel.cpp b/Source/Engine/Level/Actors/AnimatedModel.cpp index ee95d6233..c4225ef94 100644 --- a/Source/Engine/Level/Actors/AnimatedModel.cpp +++ b/Source/Engine/Level/Actors/AnimatedModel.cpp @@ -27,16 +27,13 @@ AnimatedModel::AnimatedModel(const SpawnParams& params) , _counter(0) , _lastMinDstSqr(MAX_Real) , _lastUpdateFrame(0) + , SkinnedModel(this) + , AnimationGraph(this) { _drawCategory = SceneRendering::SceneDrawAsync; GraphInstance.Object = this; _box = BoundingBox(Vector3::Zero); _sphere = BoundingSphere(Vector3::Zero, 0.0f); - - SkinnedModel.Changed.Bind(this); - SkinnedModel.Loaded.Bind(this); - AnimationGraph.Changed.Bind(this); - AnimationGraph.Loaded.Bind(this); } AnimatedModel::~AnimatedModel() @@ -889,6 +886,26 @@ void AnimatedModel::OnGraphLoaded() SyncParameters(); } +void AnimatedModel::OnAssetChanged(Asset* asset, void* caller) +{ + if (caller == &SkinnedModel) + OnSkinnedModelChanged(); + else if (caller == &AnimationGraph) + OnGraphChanged(); +} + +void AnimatedModel::OnAssetLoaded(Asset* asset, void* caller) +{ + if (caller == &SkinnedModel) + OnSkinnedModelLoaded(); + else if (caller == &AnimationGraph) + OnGraphLoaded(); +} + +void AnimatedModel::OnAssetUnloaded(Asset* asset, void* caller) +{ +} + bool AnimatedModel::HasContentLoaded() const { return (SkinnedModel == nullptr || SkinnedModel->IsLoaded()) && Entries.HasContentLoaded(); diff --git a/Source/Engine/Level/Actors/AnimatedModel.h b/Source/Engine/Level/Actors/AnimatedModel.h index e13e515d9..6f87ab9cd 100644 --- a/Source/Engine/Level/Actors/AnimatedModel.h +++ b/Source/Engine/Level/Actors/AnimatedModel.h @@ -13,7 +13,7 @@ /// Performs an animation and renders a skinned model. /// API_CLASS(Attributes="ActorContextMenu(\"New/Animation/Animated Model\"), ActorToolbox(\"Visuals\")") -class FLAXENGINE_API AnimatedModel : public ModelInstanceActor +class FLAXENGINE_API AnimatedModel : public ModelInstanceActor, IAssetReference { DECLARE_SCENE_OBJECT(AnimatedModel); friend class AnimationsSystem; @@ -422,6 +422,11 @@ private: void OnGraphChanged(); void OnGraphLoaded(); + // [IAssetReference] + void OnAssetChanged(Asset* asset, void* caller) override; + void OnAssetLoaded(Asset* asset, void* caller) override; + void OnAssetUnloaded(Asset* asset, void* caller) override; + public: // [ModelInstanceActor] bool HasContentLoaded() const override; diff --git a/Source/Engine/Level/Actors/Sky.cpp b/Source/Engine/Level/Actors/Sky.cpp index 4e635489f..0aa3bf070 100644 --- a/Source/Engine/Level/Actors/Sky.cpp +++ b/Source/Engine/Level/Actors/Sky.cpp @@ -30,7 +30,6 @@ GPU_CB_STRUCT(Data { Sky::Sky(const SpawnParams& params) : Actor(params) - , _shader(nullptr) , _psSky(nullptr) , _psFog(nullptr) { diff --git a/Source/Engine/Level/Actors/StaticModel.cpp b/Source/Engine/Level/Actors/StaticModel.cpp index 38c1eed90..912009b3b 100644 --- a/Source/Engine/Level/Actors/StaticModel.cpp +++ b/Source/Engine/Level/Actors/StaticModel.cpp @@ -29,10 +29,9 @@ StaticModel::StaticModel(const SpawnParams& params) , _vertexColorsDirty(false) , _vertexColorsCount(0) , _sortOrder(0) + , Model(this) { _drawCategory = SceneRendering::SceneDrawAsync; - Model.Changed.Bind(this); - Model.Loaded.Bind(this); } StaticModel::~StaticModel() @@ -224,7 +223,7 @@ void StaticModel::RemoveVertexColors() _vertexColorsDirty = false; } -void StaticModel::OnModelChanged() +void StaticModel::OnAssetChanged(Asset* asset, void* caller) { if (_residencyChangedModel) { @@ -241,7 +240,7 @@ void StaticModel::OnModelChanged() GetSceneRendering()->RemoveActor(this, _sceneRenderingKey); } -void StaticModel::OnModelLoaded() +void StaticModel::OnAssetLoaded(Asset* asset, void* caller) { Entries.SetupIfInvalid(Model); UpdateBounds(); @@ -316,6 +315,10 @@ void StaticModel::FlushVertexColors() RenderContext::GPULocker.Unlock(); } +void StaticModel::OnAssetUnloaded(Asset* asset, void* caller) +{ +} + bool StaticModel::HasContentLoaded() const { return (Model == nullptr || Model->IsLoaded()) && Entries.HasContentLoaded(); diff --git a/Source/Engine/Level/Actors/StaticModel.h b/Source/Engine/Level/Actors/StaticModel.h index 09553f577..e6ed701cc 100644 --- a/Source/Engine/Level/Actors/StaticModel.h +++ b/Source/Engine/Level/Actors/StaticModel.h @@ -11,7 +11,7 @@ /// Renders model on the screen. /// API_CLASS(Attributes="ActorContextMenu(\"New/Model\"), ActorToolbox(\"Visuals\")") -class FLAXENGINE_API StaticModel : public ModelInstanceActor +class FLAXENGINE_API StaticModel : public ModelInstanceActor, IAssetReference { DECLARE_SCENE_OBJECT(StaticModel); private: @@ -154,11 +154,14 @@ public: API_FUNCTION() void RemoveVertexColors(); private: - void OnModelChanged(); - void OnModelLoaded(); void OnModelResidencyChanged(); void FlushVertexColors(); + // [IAssetReference] + void OnAssetChanged(Asset* asset, void* caller) override; + void OnAssetLoaded(Asset* asset, void* caller) override; + void OnAssetUnloaded(Asset* asset, void* caller) override; + public: // [ModelInstanceActor] bool HasContentLoaded() const override; diff --git a/Source/Engine/Navigation/NavMesh.cpp b/Source/Engine/Navigation/NavMesh.cpp index 6479542a1..ee3e48f3e 100644 --- a/Source/Engine/Navigation/NavMesh.cpp +++ b/Source/Engine/Navigation/NavMesh.cpp @@ -17,8 +17,8 @@ NavMesh::NavMesh(const SpawnParams& params) : Actor(params) , IsDataDirty(false) + , DataAsset(this) { - DataAsset.Loaded.Bind(this); } void NavMesh::SaveNavMesh() @@ -100,7 +100,11 @@ void NavMesh::RemoveTiles() navMesh->RemoveTiles(this); } -void NavMesh::OnDataAssetLoaded() +void NavMesh::OnAssetChanged(Asset* asset, void* caller) +{ +} + +void NavMesh::OnAssetLoaded(Asset* asset, void* caller) { // Skip if already has data (prevent reloading navmesh on saving) if (Data.Tiles.HasItems()) @@ -126,6 +130,10 @@ void NavMesh::OnDataAssetLoaded() } } +void NavMesh::OnAssetUnloaded(Asset* asset, void* caller) +{ +} + void NavMesh::Serialize(SerializeStream& stream, const void* otherObj) { // Base diff --git a/Source/Engine/Navigation/NavMesh.h b/Source/Engine/Navigation/NavMesh.h index 40423696d..fcfbd27e2 100644 --- a/Source/Engine/Navigation/NavMesh.h +++ b/Source/Engine/Navigation/NavMesh.h @@ -15,7 +15,7 @@ class NavMeshRuntime; /// The navigation mesh actor that holds a navigation data for a scene. /// API_CLASS(Attributes="ActorContextMenu(\"New/Navigation/Nav Mesh\")") -class FLAXENGINE_API NavMesh : public Actor +class FLAXENGINE_API NavMesh : public Actor, IAssetReference { DECLARE_SCENE_OBJECT(NavMesh); public: @@ -67,7 +67,11 @@ public: private: void AddTiles(); void RemoveTiles(); - void OnDataAssetLoaded(); + + // [IAssetReference] + void OnAssetChanged(Asset* asset, void* caller) override; + void OnAssetLoaded(Asset* asset, void* caller) override; + void OnAssetUnloaded(Asset* asset, void* caller) override; private: bool _navMeshActive = false; diff --git a/Source/Engine/Particles/ParticleEffect.cpp b/Source/Engine/Particles/ParticleEffect.cpp index 1359dbcf2..93ccef55c 100644 --- a/Source/Engine/Particles/ParticleEffect.cpp +++ b/Source/Engine/Particles/ParticleEffect.cpp @@ -16,12 +16,10 @@ ParticleEffect::ParticleEffect(const SpawnParams& params) : Actor(params) , _lastUpdateFrame(0) , _lastMinDstSqr(MAX_Real) + , ParticleSystem(this) { _box = BoundingBox(_transform.Translation); BoundingSphere::FromBox(_box, _sphere); - - ParticleSystem.Changed.Bind(this); - ParticleSystem.Loaded.Bind(this); } void ParticleEffectParameter::Init(ParticleEffect* effect, int32 emitterIndex, int32 paramIndex) @@ -542,18 +540,22 @@ void ParticleEffect::ApplyModifiedParameters() } } -void ParticleEffect::OnParticleSystemModified() +void ParticleEffect::OnAssetChanged(Asset* asset, void* caller) { Instance.ClearState(); _parameters.Resize(0); _parametersVersion = 0; } -void ParticleEffect::OnParticleSystemLoaded() +void ParticleEffect::OnAssetLoaded(Asset* asset, void* caller) { ApplyModifiedParameters(); } +void ParticleEffect::OnAssetUnloaded(Asset* asset, void* caller) +{ +} + bool ParticleEffect::HasContentLoaded() const { if (ParticleSystem == nullptr) diff --git a/Source/Engine/Particles/ParticleEffect.h b/Source/Engine/Particles/ParticleEffect.h index 9be31c4c7..8529732dd 100644 --- a/Source/Engine/Particles/ParticleEffect.h +++ b/Source/Engine/Particles/ParticleEffect.h @@ -118,7 +118,7 @@ public: /// The particle system instance that plays the particles simulation in the game. /// API_CLASS(Attributes="ActorContextMenu(\"New/Visuals/Particle Effect\"), ActorToolbox(\"Visuals\")") -class FLAXENGINE_API ParticleEffect : public Actor +class FLAXENGINE_API ParticleEffect : public Actor, IAssetReference { DECLARE_SCENE_OBJECT(ParticleEffect); public: @@ -388,6 +388,11 @@ private: void OnParticleSystemModified(); void OnParticleSystemLoaded(); + // [IAssetReference] + void OnAssetChanged(Asset* asset, void* caller) override; + void OnAssetLoaded(Asset* asset, void* caller) override; + void OnAssetUnloaded(Asset* asset, void* caller) override; + public: // [Actor] bool HasContentLoaded() const override; diff --git a/Source/Engine/Physics/Colliders/Collider.cpp b/Source/Engine/Physics/Colliders/Collider.cpp index 0ff51e8e6..5ebef0a9f 100644 --- a/Source/Engine/Physics/Colliders/Collider.cpp +++ b/Source/Engine/Physics/Colliders/Collider.cpp @@ -20,10 +20,8 @@ Collider::Collider(const SpawnParams& params) , _staticActor(nullptr) , _cachedScale(1.0f) , _contactOffset(2.0f) + , Material(this) { - Material.Loaded.Bind(this); - Material.Unload.Bind(this); - Material.Changed.Bind(this); } void* Collider::GetPhysicsShape() const @@ -294,13 +292,6 @@ void Collider::DrawPhysicsDebug(RenderView& view) #endif -void Collider::OnMaterialChanged() -{ - // Update the shape material - if (_shape) - PhysicsBackend::SetShapeMaterial(_shape, Material); -} - void Collider::BeginPlay(SceneBeginData* data) { // Check if has no shape created (it means no rigidbody requested it but also collider may be spawned at runtime) @@ -466,3 +457,20 @@ void Collider::OnPhysicsSceneChanged(PhysicsScene* previous) PhysicsBackend::AddSceneActor(scene, _staticActor); } } + +void Collider::OnAssetChanged(Asset* asset, void* caller) +{ + // Update the shape material + if (_shape && caller == &Material) + PhysicsBackend::SetShapeMaterial(_shape, Material); +} + +void Collider::OnAssetLoaded(Asset* asset, void* caller) +{ + Collider::OnAssetChanged(asset, caller); +} + +void Collider::OnAssetUnloaded(Asset* asset, void* caller) +{ + Collider::OnAssetChanged(asset, caller); +} diff --git a/Source/Engine/Physics/Colliders/Collider.h b/Source/Engine/Physics/Colliders/Collider.h index 835d89a22..17e1d7883 100644 --- a/Source/Engine/Physics/Colliders/Collider.h +++ b/Source/Engine/Physics/Colliders/Collider.h @@ -15,7 +15,7 @@ class RigidBody; /// /// /// -API_CLASS(Abstract) class FLAXENGINE_API Collider : public PhysicsColliderActor +API_CLASS(Abstract) class FLAXENGINE_API Collider : public PhysicsColliderActor, protected IAssetReference { API_AUTO_SERIALIZATION(); DECLARE_SCENE_OBJECT_ABSTRACT(Collider); @@ -154,9 +154,6 @@ protected: /// void RemoveStaticActor(); -private: - void OnMaterialChanged(); - public: // [PhysicsColliderActor] RigidBody* GetAttachedRigidBody() const override; @@ -181,4 +178,9 @@ protected: void OnLayerChanged() override; void OnStaticFlagsChanged() override; void OnPhysicsSceneChanged(PhysicsScene* previous) override; + + // [IAssetReference] + void OnAssetChanged(Asset* asset, void* caller) override; + void OnAssetLoaded(Asset* asset, void* caller) override; + void OnAssetUnloaded(Asset* asset, void* caller) override; }; diff --git a/Source/Engine/Physics/Colliders/MeshCollider.cpp b/Source/Engine/Physics/Colliders/MeshCollider.cpp index 0902f1106..e4e6948e3 100644 --- a/Source/Engine/Physics/Colliders/MeshCollider.cpp +++ b/Source/Engine/Physics/Colliders/MeshCollider.cpp @@ -11,9 +11,8 @@ MeshCollider::MeshCollider(const SpawnParams& params) : Collider(params) + , CollisionData(this) { - CollisionData.Changed.Bind(this); - CollisionData.Loaded.Bind(this); } void MeshCollider::OnCollisionDataChanged() @@ -33,8 +32,9 @@ void MeshCollider::OnCollisionDataChanged() void MeshCollider::OnCollisionDataLoaded() { - UpdateGeometry(); - UpdateBounds(); + // Not needed as OnCollisionDataChanged waits for it to be loaded + //UpdateGeometry(); + //UpdateBounds(); } bool MeshCollider::CanAttach(RigidBody* rigidBody) const @@ -152,3 +152,19 @@ void MeshCollider::GetGeometry(CollisionShape& collision) else collision.SetSphere(minSize); } + +void MeshCollider::OnAssetChanged(Asset* asset, void* caller) +{ + Collider::OnAssetChanged(asset, caller); + + if (caller == &CollisionData) + OnCollisionDataChanged(); +} + +void MeshCollider::OnAssetLoaded(Asset* asset, void* caller) +{ + Collider::OnAssetLoaded(asset, caller); + + if (caller == &CollisionData) + OnCollisionDataLoaded(); +} diff --git a/Source/Engine/Physics/Colliders/MeshCollider.h b/Source/Engine/Physics/Colliders/MeshCollider.h index e6b1b7a82..89f013552 100644 --- a/Source/Engine/Physics/Colliders/MeshCollider.h +++ b/Source/Engine/Physics/Colliders/MeshCollider.h @@ -42,4 +42,6 @@ protected: #endif void UpdateBounds() override; void GetGeometry(CollisionShape& collision) override; + void OnAssetChanged(Asset* asset, void* caller) override; + void OnAssetLoaded(Asset* asset, void* caller) override; }; diff --git a/Source/Engine/Renderer/ColorGradingPass.cpp b/Source/Engine/Renderer/ColorGradingPass.cpp index 3b531b30f..43b49f091 100644 --- a/Source/Engine/Renderer/ColorGradingPass.cpp +++ b/Source/Engine/Renderer/ColorGradingPass.cpp @@ -39,7 +39,6 @@ GPU_CB_STRUCT(Data { ColorGradingPass::ColorGradingPass() : _useVolumeTexture(false) , _lutFormat() - , _shader(nullptr) { } diff --git a/Source/Engine/Renderer/ForwardPass.cpp b/Source/Engine/Renderer/ForwardPass.cpp index caf624609..c0d57b498 100644 --- a/Source/Engine/Renderer/ForwardPass.cpp +++ b/Source/Engine/Renderer/ForwardPass.cpp @@ -14,8 +14,7 @@ #include "Engine/Graphics/Shaders/GPUShader.h" ForwardPass::ForwardPass() - : _shader(nullptr) - , _psApplyDistortion(nullptr) + : _psApplyDistortion(nullptr) { } diff --git a/Source/Engine/Renderer/VolumetricFogPass.cpp b/Source/Engine/Renderer/VolumetricFogPass.cpp index 817f25eef..c56812c72 100644 --- a/Source/Engine/Renderer/VolumetricFogPass.cpp +++ b/Source/Engine/Renderer/VolumetricFogPass.cpp @@ -19,7 +19,6 @@ int32 VolumetricFogGridInjectionGroupSize = 4; int32 VolumetricFogIntegrationGroupSize = 8; VolumetricFogPass::VolumetricFogPass() - : _shader(nullptr) { } From 99841e2e8d332d466f4e8391716c83a0cf59c9db Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 8 Jun 2025 00:58:31 +0200 Subject: [PATCH 032/211] Fix crash when using invalid node index in skinned mesh --- Source/Engine/Graphics/Models/SkinnedMesh.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Source/Engine/Graphics/Models/SkinnedMesh.cpp b/Source/Engine/Graphics/Models/SkinnedMesh.cpp index 543f3791b..8c1b98ebf 100644 --- a/Source/Engine/Graphics/Models/SkinnedMesh.cpp +++ b/Source/Engine/Graphics/Models/SkinnedMesh.cpp @@ -158,6 +158,7 @@ void SkeletonData::Swap(SkeletonData& other) Transform SkeletonData::GetNodeTransform(int32 nodeIndex) const { + CHECK_RETURN(Nodes.IsValidIndex(nodeIndex), Transform::Identity); const int32 parentIndex = Nodes[nodeIndex].ParentIndex; if (parentIndex == -1) { @@ -169,6 +170,7 @@ Transform SkeletonData::GetNodeTransform(int32 nodeIndex) const void SkeletonData::SetNodeTransform(int32 nodeIndex, const Transform& value) { + CHECK(Nodes.IsValidIndex(nodeIndex)); const int32 parentIndex = Nodes[nodeIndex].ParentIndex; if (parentIndex == -1) { From 65ab42158d683b778264f009124c0a98fd927f8e Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 8 Jun 2025 00:58:39 +0200 Subject: [PATCH 033/211] Update engine version --- Flax.flaxproj | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Flax.flaxproj b/Flax.flaxproj index c51eb8bd6..96e09cd3d 100644 --- a/Flax.flaxproj +++ b/Flax.flaxproj @@ -2,9 +2,9 @@ "Name": "Flax", "Version": { "Major": 1, - "Minor": 10, + "Minor": 11, "Revision": 0, - "Build": 6705 + "Build": 6800 }, "Company": "Flax", "Copyright": "Copyright (c) 2012-2025 Wojciech Figat. All rights reserved.", From 907c593671b491d98e22fc5408e6133d8847187a Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 8 Jun 2025 19:47:09 +0200 Subject: [PATCH 034/211] Fix typos in doc comments --- Source/Engine/Profiler/ProfilerMemory.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Source/Engine/Profiler/ProfilerMemory.h b/Source/Engine/Profiler/ProfilerMemory.h index e42b8720e..5dddb912b 100644 --- a/Source/Engine/Profiler/ProfilerMemory.h +++ b/Source/Engine/Profiler/ProfilerMemory.h @@ -63,11 +63,11 @@ public: GraphicsIndexBuffers, // Total meshes memory usage (vertex and index buffers allocated by models). GraphicsMeshes, - // Totoal shaders memory usage (shaders bytecode, PSOs data). + // Total shaders memory usage (shaders bytecode, PSOs data). GraphicsShaders, - // Totoal materials memory usage (constant buffers, parameters data). + // Total materials memory usage (constant buffers, parameters data). GraphicsMaterials, - // Totoal command buffers memory usage (draw lists, constants uploads, ring buffer allocators). + // Total command buffers memory usage (draw lists, constants uploads, ring buffer allocators). GraphicsCommands, // Total Artificial Intelligence systems memory usage (eg. Behavior Trees). @@ -245,7 +245,7 @@ public: API_FUNCTION() static GroupsArray GetGroups(int32 mode = 0); /// - /// Dumps the memory allocations stats (groupped). + /// Dumps the memory allocations stats (grouped). /// /// 'all' to dump all groups, 'file' to dump info to a file (in Logs folder) API_FUNCTION(Attributes="DebugCommand") static void Dump(const StringView& options = StringView::Empty); From 6547e7ee9c829c118e908b1d86910e0ff8f64b42 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 8 Jun 2025 23:58:33 +0200 Subject: [PATCH 035/211] Fix compilation with Clang --- Source/Engine/Content/Asset.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Source/Engine/Content/Asset.h b/Source/Engine/Content/Asset.h index 17d8c8b5f..bb0fbe490 100644 --- a/Source/Engine/Content/Asset.h +++ b/Source/Engine/Content/Asset.h @@ -22,6 +22,8 @@ class FLAXENGINE_API IAssetReference { public: + virtual ~IAssetReference() = default; + // Asset reference got changed. virtual void OnAssetChanged(Asset* asset, void* caller) = 0; // Asset got loaded. From 7fa4efcac5500c802e45daaf65c7f56c287daef0 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 9 Jun 2025 10:17:51 +0200 Subject: [PATCH 036/211] Fix compilation in Release --- Source/Engine/Core/Delegate.h | 2 ++ Source/Engine/Level/Actors/AnimatedModel.cpp | 1 + Source/Engine/Utilities/Screenshot.cpp | 1 + 3 files changed, 4 insertions(+) diff --git a/Source/Engine/Core/Delegate.h b/Source/Engine/Core/Delegate.h index 8efc2beec..00614a9ff 100644 --- a/Source/Engine/Core/Delegate.h +++ b/Source/Engine/Core/Delegate.h @@ -460,7 +460,9 @@ public: /// The function to bind. void Bind(const FunctionType& f) { +#if COMPILE_WITH_PROFILER PROFILE_MEM(EngineDelegate); +#endif #if DELEGATE_USE_ATOMIC const intptr size = Platform::AtomicRead(&_size); FunctionType* bindings = (FunctionType*)Platform::AtomicRead(&_ptr); diff --git a/Source/Engine/Level/Actors/AnimatedModel.cpp b/Source/Engine/Level/Actors/AnimatedModel.cpp index c4225ef94..ec50cfd56 100644 --- a/Source/Engine/Level/Actors/AnimatedModel.cpp +++ b/Source/Engine/Level/Actors/AnimatedModel.cpp @@ -19,6 +19,7 @@ #include "Engine/Graphics/Models/MeshDeformation.h" #include "Engine/Level/Scene/Scene.h" #include "Engine/Level/SceneObjectsFactory.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Serialization/Serialization.h" AnimatedModel::AnimatedModel(const SpawnParams& params) diff --git a/Source/Engine/Utilities/Screenshot.cpp b/Source/Engine/Utilities/Screenshot.cpp index 9a453701d..e8e6baf8d 100644 --- a/Source/Engine/Utilities/Screenshot.cpp +++ b/Source/Engine/Utilities/Screenshot.cpp @@ -13,6 +13,7 @@ #include "Engine/Graphics/GPUSwapChain.h" #include "Engine/Threading/ThreadPoolTask.h" #include "Engine/Engine/Globals.h" +#include "Engine/Profiler/ProfilerMemory.h" #if COMPILE_WITH_TEXTURE_TOOL #include "Engine/Tools/TextureTool/TextureTool.h" #endif From 057ec9d41ed1d7e0f78b2469c85775db8182bfa6 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 9 Jun 2025 10:48:02 +0200 Subject: [PATCH 037/211] Anothher fix --- Source/Engine/Platform/Apple/ApplePlatform.h | 1 - Source/Engine/Platform/Base/PlatformBase.h | 8 ++++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/Source/Engine/Platform/Apple/ApplePlatform.h b/Source/Engine/Platform/Apple/ApplePlatform.h index 02f6ac347..003cec91a 100644 --- a/Source/Engine/Platform/Apple/ApplePlatform.h +++ b/Source/Engine/Platform/Apple/ApplePlatform.h @@ -65,7 +65,6 @@ public: { __atomic_store_n((volatile int64*)dst, value, __ATOMIC_RELAXED); } - FORCE_INLINE static void Prefetch(void const* ptr) static bool Is64BitPlatform(); static String GetSystemName(); static Version GetSystemVersion(); diff --git a/Source/Engine/Platform/Base/PlatformBase.h b/Source/Engine/Platform/Base/PlatformBase.h index eeaeb879d..40245a3fc 100644 --- a/Source/Engine/Platform/Base/PlatformBase.h +++ b/Source/Engine/Platform/Base/PlatformBase.h @@ -186,7 +186,7 @@ DECLARE_SCRIPTING_TYPE_MINIMAL(PlatformBase); static void BeforeExit(); /// - /// Called after engine exit to shutdown platform service. + /// Called after engine exit to shut down platform service. /// static void Exit(); @@ -243,7 +243,7 @@ public: /// Indicates to the processor that a cache line will be needed in the near future. /// /// The address of the cache line to be loaded. This address is not required to be on a cache line boundary. - static void Prefetch(void const* ptr) = delete; + static void MemoryPrefetch(void const* ptr) = delete; /// /// Sets a 64-bit variable to the specified value as an atomic operation. The function prevents more than one thread from using the same variable simultaneously. @@ -256,7 +256,7 @@ public: /// /// Performs an atomic compare-and-exchange operation on the specified values. The function compares two specified 32-bit values and exchanges with another 32-bit value based on the outcome of the comparison. /// - /// The function compares the dst value with the comperand value. If the dst value is equal to the comperand value, the value value is stored in the address specified by dst. Otherwise, no operation is performed. + /// The function compares the dst value with the comperand value. If the dst value is equal to the comperand value, the value is stored in the address specified by dst. Otherwise, no operation is performed. /// A pointer to the first operand. This value will be replaced with the result of the operation. /// The value to exchange. /// The value to compare to destination. @@ -266,7 +266,7 @@ public: /// /// Performs an atomic compare-and-exchange operation on the specified values. The function compares two specified 64-bit values and exchanges with another 64-bit value based on the outcome of the comparison. /// - /// The function compares the dst value with the comperand value. If the dst value is equal to the comperand value, the value value is stored in the address specified by dst. Otherwise, no operation is performed. + /// The function compares the dst value with the comperand value. If the dst value is equal to the comperand value, the value is stored in the address specified by dst. Otherwise, no operation is performed. /// A pointer to the first operand. This value will be replaced with the result of the operation. /// The value to exchange. /// The value to compare to destination. From d7ff9fdadebe2566d23216c30387a88f873865df Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 9 Jun 2025 15:23:31 +0200 Subject: [PATCH 038/211] Optimize editor profiler native allocations when capturing data --- Source/Engine/Content/Content.cpp | 17 +++++++++++++++++ Source/Engine/Content/Content.cs | 14 ++++++++++++++ Source/Engine/Content/Content.h | 7 ++++++- Source/Engine/Engine/NativeInterop.cs | 5 ++++- Source/Engine/Graphics/GPUBufferDescription.cs | 18 ++++++++++++++++++ Source/Engine/Graphics/GPUDevice.cpp | 17 +++++++++++++++++ Source/Engine/Graphics/GPUDevice.h | 8 +++++++- 7 files changed, 83 insertions(+), 3 deletions(-) diff --git a/Source/Engine/Content/Content.cpp b/Source/Engine/Content/Content.cpp index 915f48140..4ba6cbc44 100644 --- a/Source/Engine/Content/Content.cpp +++ b/Source/Engine/Content/Content.cpp @@ -800,6 +800,23 @@ void Content::deleteFileSafety(const StringView& path, const Guid& id) #endif } +#if !COMPILE_WITHOUT_CSHARP + +#include "Engine/Scripting/ManagedCLR/MUtils.h" + +void* Content::GetAssetsInternal() +{ + AssetsLocker.Lock(); + MArray* result = MCore::Array::New(Asset::TypeInitializer.GetClass(), Assets.Count()); + int32 i = 0; + for (const auto& e : Assets) + MCore::GC::WriteArrayRef(result, e.Value->GetOrCreateManagedInstance(), i++); + AssetsLocker.Unlock(); + return result; +} + +#endif + #if USE_EDITOR bool Content::RenameAsset(const StringView& oldPath, const StringView& newPath) diff --git a/Source/Engine/Content/Content.cs b/Source/Engine/Content/Content.cs index 4fcc0c300..4f07e1dc6 100644 --- a/Source/Engine/Content/Content.cs +++ b/Source/Engine/Content/Content.cs @@ -1,5 +1,6 @@ // Copyright (c) Wojciech Figat. All rights reserved. +using FlaxEngine.Interop; using System; using System.Runtime.CompilerServices; @@ -7,6 +8,19 @@ namespace FlaxEngine { partial class Content { + /// + /// Gets the assets (loaded or during load). + /// + public static Asset[] Assets + { + get + { + IntPtr ptr = Internal_GetAssetsInternal(); + ManagedArray array = Unsafe.As(ManagedHandle.FromIntPtr(ptr).Target); + return NativeInterop.GCHandleArrayToManagedArray(array); + } + } + /// /// Loads asset to the Content Pool and holds it until it won't be referenced by any object. Returns null if asset is missing. Actual asset data loading is performed on a other thread in async. /// diff --git a/Source/Engine/Content/Content.h b/Source/Engine/Content/Content.h index c11a9ed11..15ace944a 100644 --- a/Source/Engine/Content/Content.h +++ b/Source/Engine/Content/Content.h @@ -122,7 +122,7 @@ public: /// Gets the assets (loaded or during load). /// /// The collection of assets. - API_PROPERTY() static Array GetAssets(); + static Array GetAssets(); /// /// Gets the raw dictionary of assets (loaded or during load). @@ -368,4 +368,9 @@ private: static void onAssetUnload(Asset* asset); static void onAssetChangeId(Asset* asset, const Guid& oldId, const Guid& newId); static void deleteFileSafety(const StringView& path, const Guid& id); + + // Internal bindings +#if !COMPILE_WITHOUT_CSHARP + API_FUNCTION(NoProxy) static void* GetAssetsInternal(); +#endif }; diff --git a/Source/Engine/Engine/NativeInterop.cs b/Source/Engine/Engine/NativeInterop.cs index 368c67132..8138d3604 100644 --- a/Source/Engine/Engine/NativeInterop.cs +++ b/Source/Engine/Engine/NativeInterop.cs @@ -201,7 +201,10 @@ namespace FlaxEngine.Interop Span span = ptrArray.ToSpan(); T[] managedArray = new T[ptrArray.Length]; for (int i = 0; i < managedArray.Length; i++) - managedArray[i] = span[i] != IntPtr.Zero ? (T)ManagedHandle.FromIntPtr(span[i]).Target : default; + { + IntPtr ptr = span[i]; + managedArray[i] = ptr != IntPtr.Zero ? (T)ManagedHandle.FromIntPtr(ptr).Target : default; + } return managedArray; } diff --git a/Source/Engine/Graphics/GPUBufferDescription.cs b/Source/Engine/Graphics/GPUBufferDescription.cs index 290e43a7a..9e52876fc 100644 --- a/Source/Engine/Graphics/GPUBufferDescription.cs +++ b/Source/Engine/Graphics/GPUBufferDescription.cs @@ -1,9 +1,27 @@ // Copyright (c) Wojciech Figat. All rights reserved. using System; +using System.Runtime.CompilerServices; +using FlaxEngine.Interop; namespace FlaxEngine { + partial class GPUDevice + { + /// + /// Gets the list with all active GPU resources. + /// + public GPUResource[] Resources + { + get + { + IntPtr ptr = Internal_GetResourcesInternal(__unmanagedPtr); + ManagedArray array = Unsafe.As(ManagedHandle.FromIntPtr(ptr).Target); + return NativeInterop.GCHandleArrayToManagedArray(array); + } + } + } + partial struct GPUBufferDescription : IEquatable { /// diff --git a/Source/Engine/Graphics/GPUDevice.cpp b/Source/Engine/Graphics/GPUDevice.cpp index 1ea008913..18b9cdffc 100644 --- a/Source/Engine/Graphics/GPUDevice.cpp +++ b/Source/Engine/Graphics/GPUDevice.cpp @@ -648,6 +648,23 @@ GPUTasksExecutor* GPUDevice::CreateTasksExecutor() return New(); } +#if !COMPILE_WITHOUT_CSHARP + +#include "Engine/Scripting/ManagedCLR/MUtils.h" + +void* GPUDevice::GetResourcesInternal() +{ + _resourcesLock.Lock(); + MArray* result = MCore::Array::New(GPUResource::TypeInitializer.GetClass(), _resources.Count()); + int32 i = 0; + for (const auto& e : _resources) + MCore::GC::WriteArrayRef(result, e->GetOrCreateManagedInstance(), i++); + _resourcesLock.Unlock(); + return result; +} + +#endif + void GPUDevice::Draw() { PROFILE_MEM(Graphics); diff --git a/Source/Engine/Graphics/GPUDevice.h b/Source/Engine/Graphics/GPUDevice.h index c54395df8..8914085eb 100644 --- a/Source/Engine/Graphics/GPUDevice.h +++ b/Source/Engine/Graphics/GPUDevice.h @@ -236,7 +236,7 @@ public: /// /// Gets the list with all active GPU resources. /// - API_PROPERTY() Array GetResources() const; + Array GetResources() const; /// /// Gets the GPU asynchronous work manager. @@ -432,6 +432,12 @@ public: /// /// The GPU tasks executor. virtual GPUTasksExecutor* CreateTasksExecutor(); + +private: + // Internal bindings +#if !COMPILE_WITHOUT_CSHARP + API_FUNCTION(NoProxy) void* GetResourcesInternal(); +#endif }; /// From 89c7f4b0a3cca616169301d9d6c816e49e9fd165 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 9 Jun 2025 17:19:36 +0200 Subject: [PATCH 039/211] Fix `ManagedDictionary` cache to be cleared on hot-reload --- .../Scripting/Internal/ManagedDictionary.cpp | 2 +- .../Scripting/Internal/ManagedDictionary.h | 17 +++++++++-------- Source/Engine/Scripting/Scripting.cpp | 2 ++ 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/Source/Engine/Scripting/Internal/ManagedDictionary.cpp b/Source/Engine/Scripting/Internal/ManagedDictionary.cpp index d467fe47a..d2f74e054 100644 --- a/Source/Engine/Scripting/Internal/ManagedDictionary.cpp +++ b/Source/Engine/Scripting/Internal/ManagedDictionary.cpp @@ -3,7 +3,7 @@ #include "ManagedDictionary.h" #if USE_CSHARP -Dictionary ManagedDictionary::CachedDictionaryTypes; +Dictionary ManagedDictionary::CachedTypes; #if !USE_MONO_AOT ManagedDictionary::MakeGenericTypeThunk ManagedDictionary::MakeGenericType; ManagedDictionary::CreateInstanceThunk ManagedDictionary::CreateInstance; diff --git a/Source/Engine/Scripting/Internal/ManagedDictionary.h b/Source/Engine/Scripting/Internal/ManagedDictionary.h index af88172b6..5e2638af7 100644 --- a/Source/Engine/Scripting/Internal/ManagedDictionary.h +++ b/Source/Engine/Scripting/Internal/ManagedDictionary.h @@ -22,17 +22,18 @@ struct FLAXENGINE_API ManagedDictionary public: struct KeyValueType { - MType* keyType; - MType* valueType; + MType* KeyType; + MType* ValueType; bool operator==(const KeyValueType& other) const { - return keyType == other.keyType && valueType == other.valueType; + return KeyType == other.KeyType && ValueType == other.ValueType; } }; private: - static Dictionary CachedDictionaryTypes; + friend class Scripting; + static Dictionary CachedTypes; #if !USE_MONO_AOT typedef MTypeObject* (*MakeGenericTypeThunk)(MObject* instance, MTypeObject* genericType, MArray* genericArgs, MObject** exception); @@ -158,7 +159,7 @@ public: // Check if the generic type was generated earlier KeyValueType cacheKey = { keyType, valueType }; MTypeObject* dictionaryType; - if (CachedDictionaryTypes.TryGet(cacheKey, dictionaryType)) + if (CachedTypes.TryGet(cacheKey, dictionaryType)) return dictionaryType; MTypeObject* genericType = MUtils::GetType(StdTypesContainer::Instance()->DictionaryClass); @@ -186,7 +187,7 @@ public: ex.Log(LogType::Error, TEXT("")); return nullptr; } - CachedDictionaryTypes.Add(cacheKey, dictionaryType); + CachedTypes.Add(cacheKey, dictionaryType); return dictionaryType; } @@ -264,8 +265,8 @@ public: inline uint32 GetHash(const ManagedDictionary::KeyValueType& other) { - uint32 hash = ::GetHash((void*)other.keyType); - CombineHash(hash, ::GetHash((void*)other.valueType)); + uint32 hash = ::GetHash((void*)other.KeyType); + CombineHash(hash, ::GetHash((void*)other.ValueType)); return hash; } diff --git a/Source/Engine/Scripting/Scripting.cpp b/Source/Engine/Scripting/Scripting.cpp index 3a69a7601..a17de075b 100644 --- a/Source/Engine/Scripting/Scripting.cpp +++ b/Source/Engine/Scripting/Scripting.cpp @@ -21,6 +21,7 @@ #include "ManagedCLR/MCore.h" #include "ManagedCLR/MException.h" #include "Internal/StdTypesContainer.h" +#include "Internal/ManagedDictionary.h" #include "Engine/Core/LogContext.h" #include "Engine/Core/ObjectsRemovalService.h" #include "Engine/Core/Types/TimeSpan.h" @@ -720,6 +721,7 @@ void Scripting::Reload(bool canTriggerSceneReload) modules.Clear(); _nonNativeModules.ClearDelete(); _hasGameModulesLoaded = false; + ManagedDictionary::CachedTypes.Clear(); // Release and create a new assembly load context for user assemblies MCore::UnloadScriptingAssemblyLoadContext(); From cfd2f42b0cf3787f6f502fde7f885a369a909936 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 9 Jun 2025 22:06:49 +0200 Subject: [PATCH 040/211] Optimize managed memory allocations in Editor profiler --- Source/Editor/Windows/Profiler/Assets.cs | 11 +++++++---- Source/Editor/Windows/Profiler/MemoryGPU.cs | 11 +++++++---- Source/Engine/Content/Content.cs | 14 ++++++++++++++ Source/Engine/Engine/NativeInterop.cs | 12 +++++++----- Source/Engine/Graphics/GPUBufferDescription.cs | 14 ++++++++++++++ 5 files changed, 49 insertions(+), 13 deletions(-) diff --git a/Source/Editor/Windows/Profiler/Assets.cs b/Source/Editor/Windows/Profiler/Assets.cs index 0ad47735f..e4e41c668 100644 --- a/Source/Editor/Windows/Profiler/Assets.cs +++ b/Source/Editor/Windows/Profiler/Assets.cs @@ -34,6 +34,7 @@ namespace FlaxEditor.Windows.Profiler private List _tableRowsCache; private Dictionary _resourceCache; private StringBuilder _stringBuilder; + private Asset[] _assetsCache; public Assets() : base("Assets") @@ -138,12 +139,12 @@ namespace FlaxEditor.Windows.Profiler _stringBuilder = new StringBuilder(); // Capture current assets usage info - var assets = FlaxEngine.Content.Assets; - var resources = new Resource[assets.Length]; + FlaxEngine.Content.GetAssets(ref _assetsCache, out var count); + var resources = new Resource[count]; ulong totalMemoryUsage = 0; - for (int i = 0; i < resources.Length; i++) + for (int i = 0; i < count; i++) { - var asset = assets[i]; + var asset = _assetsCache[i]; ref var resource = ref resources[i]; if (!asset) continue; @@ -179,6 +180,7 @@ namespace FlaxEditor.Windows.Profiler if (_resources == null) _resources = new SamplesBuffer(); _resources.Add(resources); + Array.Clear(_assetsCache); } /// @@ -200,6 +202,7 @@ namespace FlaxEditor.Windows.Profiler _resourceCache?.Clear(); _tableRowsCache?.Clear(); _stringBuilder?.Clear(); + _assetsCache = null; base.OnDestroy(); } diff --git a/Source/Editor/Windows/Profiler/MemoryGPU.cs b/Source/Editor/Windows/Profiler/MemoryGPU.cs index acaeca364..ce266777d 100644 --- a/Source/Editor/Windows/Profiler/MemoryGPU.cs +++ b/Source/Editor/Windows/Profiler/MemoryGPU.cs @@ -35,6 +35,7 @@ namespace FlaxEditor.Windows.Profiler private Dictionary _assetPathToId; private Dictionary _resourceCache; private StringBuilder _stringBuilder; + private GPUResource[] _gpuResourcesCached; public MemoryGPU() : base("GPU Memory") @@ -138,12 +139,12 @@ namespace FlaxEditor.Windows.Profiler // Capture current GPU resources usage info var contentDatabase = Editor.Instance.ContentDatabase; - var gpuResources = GPUDevice.Instance.Resources; - var resources = new Resource[gpuResources.Length]; + GPUDevice.Instance.GetResources(ref _gpuResourcesCached, out var count); + var resources = new Resource[count]; var sb = _stringBuilder; - for (int i = 0; i < resources.Length; i++) + for (int i = 0; i < count; i++) { - var gpuResource = gpuResources[i]; + var gpuResource = _gpuResourcesCached[i]; ref var resource = ref resources[i]; // Try to reuse cached resource info @@ -219,6 +220,7 @@ namespace FlaxEditor.Windows.Profiler if (_resources == null) _resources = new SamplesBuffer(); _resources.Add(resources); + Array.Clear(_gpuResourcesCached); } /// @@ -255,6 +257,7 @@ namespace FlaxEditor.Windows.Profiler _assetPathToId?.Clear(); _tableRowsCache?.Clear(); _stringBuilder?.Clear(); + _gpuResourcesCached = null; base.OnDestroy(); } diff --git a/Source/Engine/Content/Content.cs b/Source/Engine/Content/Content.cs index 4f07e1dc6..010abbc56 100644 --- a/Source/Engine/Content/Content.cs +++ b/Source/Engine/Content/Content.cs @@ -21,6 +21,20 @@ namespace FlaxEngine } } + /// + /// Gets the assets (loaded or during load). + /// + /// Output buffer to fill with asset pointers. Can be provided by a user to avoid memory allocation. Buffer might be larger than actual list size. Use for actual item count.> + /// Amount of valid items inside . + public static void GetAssets(ref Asset[] buffer, out int count) + { + count = 0; + IntPtr ptr = Internal_GetAssetsInternal(); + ManagedArray array = Unsafe.As(ManagedHandle.FromIntPtr(ptr).Target); + buffer = NativeInterop.GCHandleArrayToManagedArray(array, buffer); + count = buffer.Length; + } + /// /// Loads asset to the Content Pool and holds it until it won't be referenced by any object. Returns null if asset is missing. Actual asset data loading is performed on a other thread in async. /// diff --git a/Source/Engine/Engine/NativeInterop.cs b/Source/Engine/Engine/NativeInterop.cs index 8138d3604..7d16b4752 100644 --- a/Source/Engine/Engine/NativeInterop.cs +++ b/Source/Engine/Engine/NativeInterop.cs @@ -195,17 +195,19 @@ namespace FlaxEngine.Interop /// /// Array element type. /// Input array. + /// Cached memory allocation buffer to use for the result (if size fits). /// Output array. - public static T[] GCHandleArrayToManagedArray(ManagedArray ptrArray) where T : class + public static T[] GCHandleArrayToManagedArray(ManagedArray ptrArray, T[] buffer = null) where T : class { Span span = ptrArray.ToSpan(); - T[] managedArray = new T[ptrArray.Length]; - for (int i = 0; i < managedArray.Length; i++) + if (buffer == null || buffer.Length < ptrArray.Length) + buffer = new T[ptrArray.Length]; + for (int i = 0; i < ptrArray.Length; i++) { IntPtr ptr = span[i]; - managedArray[i] = ptr != IntPtr.Zero ? (T)ManagedHandle.FromIntPtr(ptr).Target : default; + buffer[i] = ptr != IntPtr.Zero ? (T)ManagedHandle.FromIntPtr(ptr).Target : default; } - return managedArray; + return buffer; } /// diff --git a/Source/Engine/Graphics/GPUBufferDescription.cs b/Source/Engine/Graphics/GPUBufferDescription.cs index 9e52876fc..107d17b3e 100644 --- a/Source/Engine/Graphics/GPUBufferDescription.cs +++ b/Source/Engine/Graphics/GPUBufferDescription.cs @@ -20,6 +20,20 @@ namespace FlaxEngine return NativeInterop.GCHandleArrayToManagedArray(array); } } + + /// + /// Gets the list with all active GPU resources. + /// + /// Output buffer to fill with resource pointers. Can be provided by a user to avoid memory allocation. Buffer might be larger than actual list size. Use for actual item count.> + /// Amount of valid items inside . + public void GetResources(ref GPUResource[] buffer, out int count) + { + count = 0; + IntPtr ptr = Internal_GetResourcesInternal(__unmanagedPtr); + ManagedArray array = Unsafe.As(ManagedHandle.FromIntPtr(ptr).Target); + buffer = NativeInterop.GCHandleArrayToManagedArray(array, buffer); + count = buffer.Length; + } } partial struct GPUBufferDescription : IEquatable From d6b4992991fddc61059fda3f3dca36ef2b7e2740 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 10 Jun 2025 20:08:20 +0200 Subject: [PATCH 041/211] Optimize actors registration in `SceneRendering` to track free items --- Source/Engine/Level/Scene/SceneRendering.cpp | 17 +++++++++++------ Source/Engine/Level/Scene/SceneRendering.h | 1 + 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/Source/Engine/Level/Scene/SceneRendering.cpp b/Source/Engine/Level/Scene/SceneRendering.cpp index c6f5669a5..fe2bc310f 100644 --- a/Source/Engine/Level/Scene/SceneRendering.cpp +++ b/Source/Engine/Level/Scene/SceneRendering.cpp @@ -136,6 +136,8 @@ void SceneRendering::Clear() _listeners.Clear(); for (auto& e : Actors) e.Clear(); + for (auto& e : FreeActors) + e.Clear(); #if USE_EDITOR PhysicsDebug.Clear(); #endif @@ -149,15 +151,17 @@ void SceneRendering::AddActor(Actor* a, int32& key) const int32 category = a->_drawCategory; ScopeLock lock(Locker); auto& list = Actors[category]; - // TODO: track removedCount and skip searching for free entry if there is none - key = 0; - for (; key < list.Count(); key++) + if (FreeActors[category].HasItems()) { - if (list.Get()[key].Actor == nullptr) - break; + // Use existing item + key = FreeActors[category].Pop(); } - if (key == list.Count()) + else + { + // Add a new item + key = list.Count(); list.AddOne(); + } auto& e = list[key]; e.Actor = a; e.LayerMask = a->GetLayerMask(); @@ -200,6 +204,7 @@ void SceneRendering::RemoveActor(Actor* a, int32& key) listener->OnSceneRenderingRemoveActor(a); e.Actor = nullptr; e.LayerMask = 0; + FreeActors[category].Add(key); } } key = -1; diff --git a/Source/Engine/Level/Scene/SceneRendering.h b/Source/Engine/Level/Scene/SceneRendering.h index 043f5079e..b24dcdfa9 100644 --- a/Source/Engine/Level/Scene/SceneRendering.h +++ b/Source/Engine/Level/Scene/SceneRendering.h @@ -100,6 +100,7 @@ public: }; Array Actors[MAX]; + Array FreeActors[MAX]; Array PostFxProviders; CriticalSection Locker; From b50f3fcb64932c3c63f26f6807b56ca56a25c621 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 11 Jun 2025 00:01:46 +0200 Subject: [PATCH 042/211] Refactor level actions to support time budget and time slicing --- Source/Engine/Core/Types/Stopwatch.h | 2 +- Source/Engine/Level/Level.cpp | 239 +++++++++++++++++---------- Source/Engine/Level/Level.h | 13 +- Source/Engine/Level/Scene/Scene.h | 1 + 4 files changed, 156 insertions(+), 99 deletions(-) diff --git a/Source/Engine/Core/Types/Stopwatch.h b/Source/Engine/Core/Types/Stopwatch.h index c909285af..d87df0f21 100644 --- a/Source/Engine/Core/Types/Stopwatch.h +++ b/Source/Engine/Core/Types/Stopwatch.h @@ -43,7 +43,7 @@ public: /// /// Gets the total number of milliseconds. /// - FORCE_INLINE double GetTotalMilliseconds() const + FORCE_INLINE float GetTotalMilliseconds() const { return (float)((_end - _start) * 1000.0); } diff --git a/Source/Engine/Level/Level.cpp b/Source/Engine/Level/Level.cpp index 1233282be..d3f1ccba3 100644 --- a/Source/Engine/Level/Level.cpp +++ b/Source/Engine/Level/Level.cpp @@ -18,16 +18,15 @@ #include "Engine/Debug/Exceptions/ArgumentNullException.h" #include "Engine/Debug/Exceptions/InvalidOperationException.h" #include "Engine/Debug/Exceptions/JsonParseException.h" +#include "Engine/Engine/Engine.h" #include "Engine/Engine/EngineService.h" #include "Engine/Threading/Threading.h" #include "Engine/Threading/JobSystem.h" #include "Engine/Platform/File.h" -#include "Engine/Platform/FileSystem.h" #include "Engine/Profiler/ProfilerCPU.h" #include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Scripting/Script.h" #include "Engine/Engine/Time.h" -#include "Engine/Scripting/ManagedCLR/MAssembly.h" #include "Engine/Scripting/ManagedCLR/MClass.h" #include "Engine/Scripting/ManagedCLR/MDomain.h" #include "Engine/Scripting/ManagedCLR/MException.h" @@ -78,6 +77,13 @@ enum class SceneEventType OnSceneUnloaded = 7, }; +enum class SceneResult +{ + Success, + Failed, + Wait, +}; + class SceneAction { public: @@ -85,14 +91,15 @@ public: { } - virtual bool CanDo() const + struct Context { - return true; - } + // Amount of seconds that action can take to run within a budget. + float TimeBudget = MAX_float; + }; - virtual bool Do() const + virtual SceneResult Do(Context& context) { - return true; + return SceneResult::Failed; } }; @@ -107,6 +114,33 @@ struct ScriptsReloadObject #endif +// Async map loading utility for state tracking and synchronization of various load stages. +class SceneLoader +{ +public: + enum Stages + { + Init, + Spawn, + SetupPrefabs, + Deserialize, + SetupTransforms, + BeginPlay, + Loaded, + } Stage = Init; + bool AsyncLoad; + bool AsyncJobs; + float TotalTime = 0.0f; + + SceneLoader(bool asyncLoad = false) + : AsyncLoad(true) + , AsyncJobs(JobSystem::GetThreadsCount() > 1) + { + } + + SceneResult Tick(rapidjson_flax::Value& data, int32 engineBuild, Scene** outScene, const String* assetPath, float* timeBudget); +}; + namespace LevelImpl { Array _sceneActions; @@ -119,6 +153,10 @@ namespace LevelImpl void CallSceneEvent(SceneEventType eventType, Scene* scene, Guid sceneId); void flushActions(); + SceneResult loadScene(SceneLoader& loader, JsonAsset* sceneAsset); + SceneResult loadScene(SceneLoader& loader, const BytesContainer& sceneData, Scene** outScene = nullptr); + SceneResult loadScene(SceneLoader& loader, rapidjson_flax::Document& document, Scene** outScene = nullptr); + SceneResult loadScene(SceneLoader& loader, rapidjson_flax::Value& data, int32 engineBuild, Scene** outScene = nullptr, const String* assetPath = nullptr, float* timeBudget = nullptr); bool unloadScene(Scene* scene); bool unloadScenes(); bool saveScene(Scene* scene); @@ -151,6 +189,7 @@ LevelService LevelServiceInstanceService; CriticalSection Level::ScenesLock; Array Level::Scenes; bool Level::TickEnabled = true; +float Level::StreamingFrameBudget = 0.3f; Delegate Level::ActorSpawned; Delegate Level::ActorDeleted; Delegate Level::ActorParentChanged; @@ -394,40 +433,22 @@ class LoadSceneAction : public SceneAction public: Guid SceneId; AssetReference SceneAsset; + SceneLoader Loader; - LoadSceneAction(const Guid& sceneId, JsonAsset* sceneAsset) + LoadSceneAction(const Guid& sceneId, JsonAsset* sceneAsset, bool async) + : Loader(async) { SceneId = sceneId; SceneAsset = sceneAsset; } - bool CanDo() const override + SceneResult Do(Context& context) override { - return SceneAsset == nullptr || SceneAsset->IsLoaded(); - } - - bool Do() const override - { - // Now to deserialize scene in a proper way we need to load scripting - if (!Scripting::IsEveryAssemblyLoaded()) - { - LOG(Error, "Scripts must be compiled without any errors in order to load a scene."); -#if USE_EDITOR - Platform::Error(TEXT("Scripts must be compiled without any errors in order to load a scene. Please fix it.")); -#endif - CallSceneEvent(SceneEventType::OnSceneLoadError, nullptr, SceneId); - return true; - } - - // Load scene - if (Level::loadScene(SceneAsset)) - { - LOG(Error, "Failed to deserialize scene {0}", SceneId); - CallSceneEvent(SceneEventType::OnSceneLoadError, nullptr, SceneId); - return true; - } - - return false; + if (SceneAsset == nullptr) + return SceneResult::Failed; + if (!SceneAsset->IsLoaded()) + return SceneResult::Wait; + return LevelImpl::loadScene(Loader, SceneAsset); } }; @@ -441,12 +462,12 @@ public: TargetScene = scene->GetID(); } - bool Do() const override + SceneResult Do(Context& context) override { auto scene = Level::FindScene(TargetScene); if (!scene) - return true; - return unloadScene(scene); + return SceneResult::Failed; + return unloadScene(scene) ? SceneResult::Failed : SceneResult::Success; } }; @@ -457,9 +478,9 @@ public: { } - bool Do() const override + SceneResult Do(Context& context) override { - return unloadScenes(); + return unloadScenes() ? SceneResult::Failed : SceneResult::Success; } }; @@ -475,14 +496,14 @@ public: PrettyJson = prettyJson; } - bool Do() const override + SceneResult Do(Context& context) override { if (saveScene(TargetScene)) { LOG(Error, "Failed to save scene {0}", TargetScene ? TargetScene->GetName() : String::Empty); - return true; + return SceneResult::Failed; } - return false; + return SceneResult::Success; } }; @@ -495,7 +516,7 @@ public: { } - bool Do() const override + SceneResult Do(Context& context) override { // Reloading scripts workflow: // - save scenes (to temporary files) @@ -556,7 +577,7 @@ public: { LOG(Error, "Failed to save scene '{0}' for scripts reload.", scenes[i].Name); CallSceneEvent(SceneEventType::OnSceneSaveError, scene, scene->GetID()); - return true; + return SceneResult::Failed; } CallSceneEvent(SceneEventType::OnSceneSaved, scene, scene->GetID()); } @@ -601,16 +622,17 @@ public: } if (document.HasParseError()) { - LOG(Error, "Failed to deserialize scene {0}. Result: {1}", scenes[i].Name, GetParseError_En(document.GetParseError())); - return true; + LOG(Error, "Failed to deserialize scene {0}. SceneResult: {1}", scenes[i].Name, GetParseError_En(document.GetParseError())); + return SceneResult::Failed; } // Load scene - if (Level::loadScene(document)) + SceneLoader loader; + if (LevelImpl::loadScene(loader, document) != SceneResult::Success) { LOG(Error, "Failed to deserialize scene {0}", scenes[i].Name); CallSceneEvent(SceneEventType::OnSceneLoadError, nullptr, scenes[i].ID); - return true; + return SceneResult::Failed; } } scenes.Resize(0); @@ -619,7 +641,7 @@ public: LOG(Info, "Scripts reloading end. Total time: {0}ms", static_cast((DateTime::NowUTC() - startTime).GetTotalMilliseconds())); Level::ScriptsReloadEnd(); - return false; + return SceneResult::Success; } }; @@ -651,9 +673,9 @@ public: { } - bool Do() const override + SceneResult Do(Context& context) override { - return spawnActor(TargetActor, ParentActor); + return spawnActor(TargetActor, ParentActor) ? SceneResult::Failed : SceneResult::Success; } }; @@ -667,9 +689,9 @@ public: { } - bool Do() const override + SceneResult Do(Context& context) override { - return deleteActor(TargetActor); + return deleteActor(TargetActor) ? SceneResult::Failed : SceneResult::Success; } }; @@ -767,13 +789,29 @@ void Level::callActorEvent(ActorEventType eventType, Actor* a, Actor* b) void LevelImpl::flushActions() { - ScopeLock lock(_sceneActionsLocker); + // Calculate time budget for the streaming (relative to the game frame rate to scale across different devices) + SceneAction::Context context; + float targetFps = 60; + if (Time::UpdateFPS > ZeroTolerance) + targetFps = Time::UpdateFPS; + else if (Engine::GetFramesPerSecond() > 0) + targetFps = (float)Engine::GetFramesPerSecond(); + context.TimeBudget = Level::StreamingFrameBudget / targetFps; - while (_sceneActions.HasItems() && _sceneActions.First()->CanDo()) + // Runs actions in order + ScopeLock lock(_sceneActionsLocker); + for (int32 i = 0; i < _sceneActions.Count() && context.TimeBudget >= 0.0; i++) { - const auto action = _sceneActions.Dequeue(); - action->Do(); - Delete(action); + auto action = _sceneActions[0]; + Stopwatch time; + auto result = action->Do(context); + time.Stop(); + context.TimeBudget -= time.GetTotalSeconds(); + if (result != SceneResult::Wait) + { + _sceneActions.RemoveAtKeepOrder(i--); + Delete(action); + } } } @@ -823,25 +861,25 @@ bool LevelImpl::unloadScenes() return false; } -bool Level::loadScene(JsonAsset* sceneAsset) +SceneResult LevelImpl::loadScene(SceneLoader& loader, JsonAsset* sceneAsset) { // Keep reference to the asset (prevent unloading during action) AssetReference ref = sceneAsset; if (sceneAsset == nullptr || sceneAsset->WaitForLoaded()) { LOG(Error, "Cannot load scene asset."); - return true; + return SceneResult::Failed; } - return loadScene(*sceneAsset->Data, sceneAsset->DataEngineBuild, nullptr, &sceneAsset->GetPath()); + return loadScene(loader, *sceneAsset->Data, sceneAsset->DataEngineBuild, nullptr, &sceneAsset->GetPath()); } -bool Level::loadScene(const BytesContainer& sceneData, Scene** outScene) +SceneResult LevelImpl::loadScene(SceneLoader& loader, const BytesContainer& sceneData, Scene** outScene) { if (sceneData.IsInvalid()) { LOG(Error, "Missing scene data."); - return true; + return SceneResult::Failed; } PROFILE_MEM(Level); @@ -854,34 +892,48 @@ bool Level::loadScene(const BytesContainer& sceneData, Scene** outScene) if (document.HasParseError()) { Log::JsonParseException(document.GetParseError(), document.GetErrorOffset()); - return true; + return SceneResult::Failed; } - ScopeLock lock(ScenesLock); - return loadScene(document, outScene); + ScopeLock lock(Level::ScenesLock); + return loadScene(loader, document, outScene); } -bool Level::loadScene(rapidjson_flax::Document& document, Scene** outScene) +SceneResult LevelImpl::loadScene(SceneLoader& loader, rapidjson_flax::Document& document, Scene** outScene) { auto data = document.FindMember("Data"); if (data == document.MemberEnd()) { LOG(Error, "Missing Data member."); - return true; + return SceneResult::Failed; } const int32 saveEngineBuild = JsonTools::GetInt(document, "EngineBuild", 0); - return loadScene(data->value, saveEngineBuild, outScene); + return loadScene(loader, data->value, saveEngineBuild, outScene); } -bool Level::loadScene(rapidjson_flax::Value& data, int32 engineBuild, Scene** outScene, const String* assetPath) +SceneResult LevelImpl::loadScene(SceneLoader& loader, rapidjson_flax::Value& data, int32 engineBuild, Scene** outScene, const String* assetPath, float* timeBudget) { PROFILE_CPU_NAMED("Level.LoadScene"); PROFILE_MEM(Level); - if (outScene) - *outScene = nullptr; #if USE_EDITOR ContentDeprecated::Clear(); #endif + SceneResult result = SceneResult::Success; + while ((!timeBudget || *timeBudget > 0.0f) && loader.Stage != SceneLoader::Loaded && result == SceneResult::Success) + { + Stopwatch time; + result = loader.Tick(data, engineBuild, outScene, assetPath, timeBudget); + time.Stop(); + const float delta = time.GetTotalSeconds(); + loader.TotalTime += delta; + if (timeBudget) + *timeBudget -= delta; + } + return result; +} + +SceneResult SceneLoader::Tick(rapidjson_flax::Value& data, int32 engineBuild, Scene** outScene, const String* assetPath, float* timeBudget) +{ LOG(Info, "Loading scene..."); Stopwatch stopwatch; _lastSceneLoadTime = DateTime::Now(); @@ -900,19 +952,19 @@ bool Level::loadScene(rapidjson_flax::Value& data, int32 engineBuild, Scene** ou MessageBox::Show(TEXT("Failed to load scripts.\n\nCannot load scene without game script modules.\n\nSee logs for more info."), TEXT("Missing game modules"), MessageBoxButtons::OK, MessageBoxIcon::Error); } #endif - return true; + return SceneResult::Failed; } // Peek meta if (engineBuild < 6000) { LOG(Error, "Invalid serialized engine build."); - return true; + return SceneResult::Failed; } if (!data.IsArray()) { LOG(Error, "Invalid Data member."); - return true; + return SceneResult::Failed; } // Peek scene node value (it's the first actor serialized) @@ -920,16 +972,16 @@ bool Level::loadScene(rapidjson_flax::Value& data, int32 engineBuild, Scene** ou if (!sceneId.IsValid()) { LOG(Error, "Invalid scene id."); - return true; + return SceneResult::Failed; } auto modifier = Cache::ISerializeModifier.Get(); modifier->EngineBuild = engineBuild; // Skip is that scene is already loaded - if (FindScene(sceneId) != nullptr) + if (Level::FindScene(sceneId) != nullptr) { LOG(Info, "Scene {0} is already loaded.", sceneId); - return false; + return SceneResult::Failed; } // Create scene actor @@ -958,7 +1010,7 @@ bool Level::loadScene(rapidjson_flax::Value& data, int32 engineBuild, Scene** ou SceneObject** objects = sceneObjects->Get(); if (context.Async) { - ScenesLock.Unlock(); // Unlock scenes from Main Thread so Job Threads can use it to safely setup actors hierarchy (see Actor::Deserialize) + Level::ScenesLock.Unlock(); // Unlock scenes from Main Thread so Job Threads can use it to safely setup actors hierarchy (see Actor::Deserialize) JobSystem::Execute([&](int32 i) { PROFILE_MEM(Level); @@ -979,7 +1031,7 @@ bool Level::loadScene(rapidjson_flax::Value& data, int32 engineBuild, Scene** ou else SceneObjectsFactory::HandleObjectDeserializationError(stream); }, dataCount - 1); - ScenesLock.Lock(); + Level::ScenesLock.Lock(); } else { @@ -1015,7 +1067,7 @@ bool Level::loadScene(rapidjson_flax::Value& data, int32 engineBuild, Scene** ou // TODO: - add _loadNoAsync flag to SceneObject or Actor to handle non-async loading for those types (eg. UIControl/UICanvas) if (context.Async) { - ScenesLock.Unlock(); // Unlock scenes from Main Thread so Job Threads can use it to safely setup actors hierarchy (see Actor::Deserialize) + Level::ScenesLock.Unlock(); // Unlock scenes from Main Thread so Job Threads can use it to safely setup actors hierarchy (see Actor::Deserialize) #if USE_EDITOR volatile int64 deprecated = 0; #endif @@ -1039,7 +1091,7 @@ bool Level::loadScene(rapidjson_flax::Value& data, int32 engineBuild, Scene** ou if (deprecated != 0) ContentDeprecated::Mark(); #endif - ScenesLock.Lock(); + Level::ScenesLock.Lock(); } else { @@ -1114,13 +1166,15 @@ bool Level::loadScene(rapidjson_flax::Value& data, int32 engineBuild, Scene** ou { PROFILE_CPU_NAMED("BeginPlay"); - ScopeLock lock(ScenesLock); - Scenes.Add(scene); + ScopeLock lock(Level::ScenesLock); + Level::Scenes.Add(scene); SceneBeginData beginData; scene->BeginPlay(&beginData); beginData.OnDone(); } + Stage = Loaded; + // Fire event CallSceneEvent(SceneEventType::OnSceneLoaded, scene, sceneId); @@ -1150,7 +1204,7 @@ bool Level::loadScene(rapidjson_flax::Value& data, int32 engineBuild, Scene** ou } #endif - return false; + return SceneResult::Success; } bool LevelImpl::saveScene(Scene* scene) @@ -1271,7 +1325,8 @@ bool LevelImpl::saveScene(Scene* scene, rapidjson_flax::StringBuffer& outBuffer, bool Level::SaveScene(Scene* scene, bool prettyJson) { ScopeLock lock(_sceneActionsLocker); - return SaveSceneAction(scene, prettyJson).Do(); + SceneAction::Context context; + return SaveSceneAction(scene, prettyJson).Do(context) != SceneResult::Success; } bool Level::SaveSceneToBytes(Scene* scene, rapidjson_flax::StringBuffer& outData, bool prettyJson) @@ -1317,9 +1372,10 @@ void Level::SaveSceneAsync(Scene* scene) bool Level::SaveAllScenes() { ScopeLock lock(_sceneActionsLocker); + SceneAction::Context context; for (int32 i = 0; i < Scenes.Count(); i++) { - if (SaveSceneAction(Scenes[i]).Do()) + if (SaveSceneAction(Scenes[i]).Do(context) != SceneResult::Success) return true; } return false; @@ -1369,7 +1425,8 @@ bool Level::LoadScene(const Guid& id) // Load scene ScopeLock lock(ScenesLock); - if (loadScene(sceneAsset)) + SceneLoader loader; + if (loadScene(loader, sceneAsset) != SceneResult::Success) { LOG(Error, "Failed to deserialize scene {0}", id); CallSceneEvent(SceneEventType::OnSceneLoadError, nullptr, id); @@ -1381,7 +1438,8 @@ bool Level::LoadScene(const Guid& id) Scene* Level::LoadSceneFromBytes(const BytesContainer& data) { Scene* scene = nullptr; - if (loadScene(data, &scene)) + SceneLoader loader; + if (loadScene(loader, data, &scene) != SceneResult::Success) { LOG(Error, "Failed to deserialize scene from bytes"); CallSceneEvent(SceneEventType::OnSceneLoadError, nullptr, Guid::Empty); @@ -1391,7 +1449,6 @@ Scene* Level::LoadSceneFromBytes(const BytesContainer& data) bool Level::LoadSceneAsync(const Guid& id) { - // Check ID if (!id.IsValid()) { Log::ArgumentException(); @@ -1407,7 +1464,7 @@ bool Level::LoadSceneAsync(const Guid& id) } ScopeLock lock(_sceneActionsLocker); - _sceneActions.Enqueue(New(id, sceneAsset)); + _sceneActions.Enqueue(New(id, sceneAsset, true)); return false; } diff --git a/Source/Engine/Level/Level.h b/Source/Engine/Level/Level.h index f07a20c51..597bc0a87 100644 --- a/Source/Engine/Level/Level.h +++ b/Source/Engine/Level/Level.h @@ -48,7 +48,12 @@ public: /// /// True if game objects (actors and scripts) can receive a tick during engine Update/LateUpdate/FixedUpdate events. Can be used to temporarily disable gameplay logic updating. /// - API_FIELD() static bool TickEnabled; + API_FIELD(Attributes="DebugCommand") static bool TickEnabled; + + /// + /// Fraction of the frame budget to limit time spent on levels streaming. For example, value of 0.3 means that 30% of frame time can be spent on levels loading within a single frame (eg. 0.3 at 60fps is 4.8ms budget). + /// + API_FIELD(Attributes="DebugCommand") static float StreamingFrameBudget; public: /// @@ -547,10 +552,4 @@ private: }; static void callActorEvent(ActorEventType eventType, Actor* a, Actor* b); - - // All loadScene assume that ScenesLock has been taken by the calling thread - static bool loadScene(JsonAsset* sceneAsset); - static bool loadScene(const BytesContainer& sceneData, Scene** outScene = nullptr); - static bool loadScene(rapidjson_flax::Document& document, Scene** outScene = nullptr); - static bool loadScene(rapidjson_flax::Value& data, int32 engineBuild, Scene** outScene = nullptr, const String* assetPath = nullptr); }; diff --git a/Source/Engine/Level/Scene/Scene.h b/Source/Engine/Level/Scene/Scene.h index a34ebd592..f8f40b05a 100644 --- a/Source/Engine/Level/Scene/Scene.h +++ b/Source/Engine/Level/Scene/Scene.h @@ -19,6 +19,7 @@ API_CLASS() class FLAXENGINE_API Scene : public Actor { friend class Level; friend class ReloadScriptsAction; + friend class SceneLoader; DECLARE_SCENE_OBJECT(Scene); /// From d6eb647d5991d14fa5a4c56da52f84ffee753c5c Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 11 Jun 2025 14:33:47 +0200 Subject: [PATCH 043/211] Optimize async scene loading to run in separate stages with time-slicing --- Source/Engine/Core/Cache.cpp | 2 +- Source/Engine/Core/Cache.h | 3 +- Source/Engine/Level/Level.cpp | 504 ++++++++++++++++++++++------------ 3 files changed, 328 insertions(+), 181 deletions(-) diff --git a/Source/Engine/Core/Cache.cpp b/Source/Engine/Core/Cache.cpp index b562465c0..315a93775 100644 --- a/Source/Engine/Core/Cache.cpp +++ b/Source/Engine/Core/Cache.cpp @@ -3,7 +3,7 @@ #include "Cache.h" #include "FlaxEngine.Gen.h" -CollectionPoolCache Cache::ISerializeModifier; +Cache::ISerializeModifierCache Cache::ISerializeModifier; void Cache::ISerializeModifierClearCallback(::ISerializeModifier* obj) { diff --git a/Source/Engine/Core/Cache.h b/Source/Engine/Core/Cache.h index c0a8e3ac4..1b8d95910 100644 --- a/Source/Engine/Core/Cache.h +++ b/Source/Engine/Core/Cache.h @@ -15,11 +15,12 @@ public: static void ISerializeModifierClearCallback(ISerializeModifier* obj); public: + typedef CollectionPoolCache ISerializeModifierCache; /// /// Gets the ISerializeModifier lookup cache. Safe allocation, per thread, uses caching. /// - static CollectionPoolCache ISerializeModifier; + static ISerializeModifierCache ISerializeModifier; public: diff --git a/Source/Engine/Level/Level.cpp b/Source/Engine/Level/Level.cpp index d3f1ccba3..881277c5e 100644 --- a/Source/Engine/Level/Level.cpp +++ b/Source/Engine/Level/Level.cpp @@ -118,27 +118,79 @@ struct ScriptsReloadObject class SceneLoader { public: - enum Stages + struct Args { - Init, + rapidjson_flax::Value& Data; + const String* AssetPath; + int32 EngineBuild; + float TimeBudget; + }; + + enum class Stages + { + Begin, Spawn, SetupPrefabs, + SyncNewPrefabs, Deserialize, + SyncPrefabs, SetupTransforms, + Initialize, BeginPlay, + End, Loaded, - } Stage = Init; + } Stage = Stages::Begin; + bool AsyncLoad; bool AsyncJobs; + Guid SceneId = Guid::Empty; + Scene* Scene = nullptr; float TotalTime = 0.0f; + uint64 StartFrame; + + // Cache data + ISerializeModifier* Modifier = nullptr; + ActorsCache::SceneObjectsListType* SceneObjects = nullptr; + Array InjectedSceneChildren; + SceneObjectsFactory::Context Context; + SceneObjectsFactory::PrefabSyncData* PrefabSyncData = nullptr; SceneLoader(bool asyncLoad = false) - : AsyncLoad(true) + : AsyncLoad(asyncLoad) , AsyncJobs(JobSystem::GetThreadsCount() > 1) + , Modifier(Cache::ISerializeModifier.GetUnscoped()) + , Context(Modifier) { } - SceneResult Tick(rapidjson_flax::Value& data, int32 engineBuild, Scene** outScene, const String* assetPath, float* timeBudget); + ~SceneLoader() + { + if (PrefabSyncData) + Delete(PrefabSyncData); + if (SceneObjects) + ActorsCache::SceneObjectsListCache.Put(SceneObjects); + if (Modifier) + Cache::ISerializeModifier.Put(Modifier); + } + + NON_COPYABLE(SceneLoader); + + FORCE_INLINE void NextStage() + { + Stage = (Stages)((uint8)Stage + 1); + } + + SceneResult Tick(Args& args); + SceneResult OnBegin(Args& args); + SceneResult OnSpawn(Args& args); + SceneResult OnSetupPrefabs(Args& args); + SceneResult OnSyncNewPrefabs(Args& args); + SceneResult OnDeserialize(Args& args); + SceneResult OnSyncPrefabs(Args& args); + SceneResult OnSetupTransforms(Args& args); + SceneResult OnInitialize(Args& args); + SceneResult OnBeginPlay(Args& args); + SceneResult OnEnd(Args& args); }; namespace LevelImpl @@ -153,9 +205,9 @@ namespace LevelImpl void CallSceneEvent(SceneEventType eventType, Scene* scene, Guid sceneId); void flushActions(); - SceneResult loadScene(SceneLoader& loader, JsonAsset* sceneAsset); - SceneResult loadScene(SceneLoader& loader, const BytesContainer& sceneData, Scene** outScene = nullptr); - SceneResult loadScene(SceneLoader& loader, rapidjson_flax::Document& document, Scene** outScene = nullptr); + SceneResult loadScene(SceneLoader& loader, JsonAsset* sceneAsset, float* timeBudget = nullptr); + SceneResult loadScene(SceneLoader& loader, const BytesContainer& sceneData, Scene** outScene = nullptr, float* timeBudget = nullptr); + SceneResult loadScene(SceneLoader& loader, rapidjson_flax::Document& document, Scene** outScene = nullptr, float* timeBudget = nullptr); SceneResult loadScene(SceneLoader& loader, rapidjson_flax::Value& data, int32 engineBuild, Scene** outScene = nullptr, const String* assetPath = nullptr, float* timeBudget = nullptr); bool unloadScene(Scene* scene); bool unloadScenes(); @@ -448,7 +500,7 @@ public: return SceneResult::Failed; if (!SceneAsset->IsLoaded()) return SceneResult::Wait; - return LevelImpl::loadScene(Loader, SceneAsset); + return LevelImpl::loadScene(Loader, SceneAsset, &context.TimeBudget); } }; @@ -797,6 +849,16 @@ void LevelImpl::flushActions() else if (Engine::GetFramesPerSecond() > 0) targetFps = (float)Engine::GetFramesPerSecond(); context.TimeBudget = Level::StreamingFrameBudget / targetFps; +#if USE_EDITOR + // Throttle up in Editor + context.TimeBudget *= Editor::IsPlayMode ? 1.2f : 2.0f; +#endif +#if BUILD_DEBUG + // Throttle up in Debug + context.TimeBudget *= 1.2f; +#endif + if (context.TimeBudget <= ZeroTolerance) + context.TimeBudget = MAX_float; // Runs actions in order ScopeLock lock(_sceneActionsLocker); @@ -861,7 +923,7 @@ bool LevelImpl::unloadScenes() return false; } -SceneResult LevelImpl::loadScene(SceneLoader& loader, JsonAsset* sceneAsset) +SceneResult LevelImpl::loadScene(SceneLoader& loader, JsonAsset* sceneAsset, float* timeBudget) { // Keep reference to the asset (prevent unloading during action) AssetReference ref = sceneAsset; @@ -871,10 +933,10 @@ SceneResult LevelImpl::loadScene(SceneLoader& loader, JsonAsset* sceneAsset) return SceneResult::Failed; } - return loadScene(loader, *sceneAsset->Data, sceneAsset->DataEngineBuild, nullptr, &sceneAsset->GetPath()); + return loadScene(loader, *sceneAsset->Data, sceneAsset->DataEngineBuild, nullptr, &sceneAsset->GetPath(), timeBudget); } -SceneResult LevelImpl::loadScene(SceneLoader& loader, const BytesContainer& sceneData, Scene** outScene) +SceneResult LevelImpl::loadScene(SceneLoader& loader, const BytesContainer& sceneData, Scene** outScene, float* timeBudget) { if (sceneData.IsInvalid()) { @@ -896,10 +958,10 @@ SceneResult LevelImpl::loadScene(SceneLoader& loader, const BytesContainer& scen } ScopeLock lock(Level::ScenesLock); - return loadScene(loader, document, outScene); + return loadScene(loader, document, outScene, timeBudget); } -SceneResult LevelImpl::loadScene(SceneLoader& loader, rapidjson_flax::Document& document, Scene** outScene) +SceneResult LevelImpl::loadScene(SceneLoader& loader, rapidjson_flax::Document& document, Scene** outScene, float* timeBudget) { auto data = document.FindMember("Data"); if (data == document.MemberEnd()) @@ -908,7 +970,7 @@ SceneResult LevelImpl::loadScene(SceneLoader& loader, rapidjson_flax::Document& return SceneResult::Failed; } const int32 saveEngineBuild = JsonTools::GetInt(document, "EngineBuild", 0); - return loadScene(loader, data->value, saveEngineBuild, outScene); + return loadScene(loader, data->value, saveEngineBuild, outScene, nullptr, timeBudget); } SceneResult LevelImpl::loadScene(SceneLoader& loader, rapidjson_flax::Value& data, int32 engineBuild, Scene** outScene, const String* assetPath, float* timeBudget) @@ -919,27 +981,64 @@ SceneResult LevelImpl::loadScene(SceneLoader& loader, rapidjson_flax::Value& dat ContentDeprecated::Clear(); #endif SceneResult result = SceneResult::Success; - while ((!timeBudget || *timeBudget > 0.0f) && loader.Stage != SceneLoader::Loaded && result == SceneResult::Success) + float timeLeft = timeBudget ? *timeBudget : MAX_float; + SceneLoader::Args args = { data, assetPath, engineBuild, timeLeft }; + while (timeLeft > 0.0f && loader.Stage != SceneLoader::Stages::Loaded) { Stopwatch time; - result = loader.Tick(data, engineBuild, outScene, assetPath, timeBudget); + result = loader.Tick(args); time.Stop(); const float delta = time.GetTotalSeconds(); loader.TotalTime += delta; - if (timeBudget) - *timeBudget -= delta; + timeLeft -= delta; + if (timeLeft < 0.0f && result == SceneResult::Success) + { + result = SceneResult::Wait; + break; + } } + if (outScene) + *outScene = loader.Scene; return result; } -SceneResult SceneLoader::Tick(rapidjson_flax::Value& data, int32 engineBuild, Scene** outScene, const String* assetPath, float* timeBudget) +SceneResult SceneLoader::Tick(Args& args) { - LOG(Info, "Loading scene..."); - Stopwatch stopwatch; - _lastSceneLoadTime = DateTime::Now(); + switch (Stage) + { + case Stages::Begin: + return OnBegin(args); + case Stages::Spawn: + return OnSpawn(args); + case Stages::SetupPrefabs: + return OnSetupPrefabs(args); + case Stages::SyncNewPrefabs: + return OnSyncNewPrefabs(args); + case Stages::Deserialize: + return OnDeserialize(args); + case Stages::SyncPrefabs: + return OnSyncPrefabs(args); + case Stages::Initialize: + return OnInitialize(args); + case Stages::SetupTransforms: + return OnSetupTransforms(args); + case Stages::BeginPlay: + return OnBeginPlay(args); + case Stages::End: + return OnEnd(args); + default: + return SceneResult::Failed; + } +} - // Here whole scripting backend should be loaded for current project - // Later scripts will setup attached scripts and restore initial vars +SceneResult SceneLoader::OnBegin(Args& args) +{ + PROFILE_CPU_NAMED("Begin"); + LOG(Info, "Loading scene..."); + _lastSceneLoadTime = DateTime::Now(); + StartFrame = Engine::UpdateCount; + + // Scripting backend should be loaded for the current project before loading scene if (!Scripting::HasGameModulesLoaded()) { LOG(Error, "Cannot load scene without game modules loaded."); @@ -956,163 +1055,186 @@ SceneResult SceneLoader::Tick(rapidjson_flax::Value& data, int32 engineBuild, Sc } // Peek meta - if (engineBuild < 6000) + if (args.EngineBuild < 6000) { LOG(Error, "Invalid serialized engine build."); return SceneResult::Failed; } - if (!data.IsArray()) + if (!args.Data.IsArray()) { LOG(Error, "Invalid Data member."); return SceneResult::Failed; } + Modifier->EngineBuild = args.EngineBuild; // Peek scene node value (it's the first actor serialized) - auto sceneId = JsonTools::GetGuid(data[0], "ID"); - if (!sceneId.IsValid()) + SceneId = JsonTools::GetGuid(args.Data[0], "ID"); + if (!SceneId.IsValid()) { LOG(Error, "Invalid scene id."); return SceneResult::Failed; } - auto modifier = Cache::ISerializeModifier.Get(); - modifier->EngineBuild = engineBuild; // Skip is that scene is already loaded - if (Level::FindScene(sceneId) != nullptr) + if (Level::FindScene(SceneId) != nullptr) { - LOG(Info, "Scene {0} is already loaded.", sceneId); + LOG(Info, "Scene {0} is already loaded.", SceneId); return SceneResult::Failed; } // Create scene actor - // Note: the first object in the scene file data is a Scene Actor - auto scene = New(ScriptingObjectSpawnParams(sceneId, Scene::TypeInitializer)); - scene->RegisterObject(); - scene->Deserialize(data[0], modifier.Value); + Scene = New<::Scene>(ScriptingObjectSpawnParams(SceneId, Scene::TypeInitializer)); + Scene->RegisterObject(); + Scene->Deserialize(args.Data[0], Modifier); // Fire event - CallSceneEvent(SceneEventType::OnSceneLoading, scene, sceneId); + CallSceneEvent(SceneEventType::OnSceneLoading, Scene, SceneId); + + NextStage(); + return SceneResult::Success; +} + +SceneResult SceneLoader::OnSpawn(Args& args) +{ + PROFILE_CPU_NAMED("Spawn"); // Get any injected children of the scene. - Array injectedSceneChildren = scene->Children; + InjectedSceneChildren = Scene->Children; - // Loaded scene objects list - CollectionPoolCache::ScopeCache sceneObjects = ActorsCache::SceneObjectsListCache.Get(); - const int32 dataCount = (int32)data.Size(); - sceneObjects->Resize(dataCount); - sceneObjects->At(0) = scene; + // Allocate scene objects list + SceneObjects = ActorsCache::SceneObjectsListCache.GetUnscoped(); + const int32 dataCount = (int32)args.Data.Size(); + SceneObjects->Resize(dataCount); + SceneObjects->At(0) = Scene; + AsyncJobs &= dataCount > 10; // Spawn all scene objects - SceneObjectsFactory::Context context(modifier.Value); - context.Async = JobSystem::GetThreadsCount() > 1 && dataCount > 10; + Context.Async = AsyncJobs; + SceneObject** objects = SceneObjects->Get(); + if (Context.Async) { - PROFILE_CPU_NAMED("Spawn"); - SceneObject** objects = sceneObjects->Get(); - if (context.Async) + Level::ScenesLock.Unlock(); // Unlock scenes from Main Thread so Job Threads can use it to safely setup actors hierarchy (see Actor::Deserialize) + JobSystem::Execute([&](int32 i) { - Level::ScenesLock.Unlock(); // Unlock scenes from Main Thread so Job Threads can use it to safely setup actors hierarchy (see Actor::Deserialize) - JobSystem::Execute([&](int32 i) + PROFILE_MEM(Level); + i++; // Start from 1. at index [0] was scene + auto& stream = args.Data[i]; + auto obj = SceneObjectsFactory::Spawn(Context, stream); + objects[i] = obj; + if (obj) { - PROFILE_MEM(Level); - i++; // Start from 1. at index [0] was scene - auto& stream = data[i]; - auto obj = SceneObjectsFactory::Spawn(context, stream); - objects[i] = obj; - if (obj) - { - if (!obj->IsRegistered()) - obj->RegisterObject(); -#if USE_EDITOR - // Auto-create C# objects for all actors in Editor during scene load when running in async (so main thread already has all of them) - if (!obj->GetManagedInstance()) - obj->CreateManaged(); -#endif - } - else - SceneObjectsFactory::HandleObjectDeserializationError(stream); - }, dataCount - 1); - Level::ScenesLock.Lock(); - } - else - { - for (int32 i = 1; i < dataCount; i++) // start from 1. at index [0] was scene - { - auto& stream = data[i]; - auto obj = SceneObjectsFactory::Spawn(context, stream); - sceneObjects->At(i) = obj; - if (obj) + if (!obj->IsRegistered()) obj->RegisterObject(); - else - SceneObjectsFactory::HandleObjectDeserializationError(stream); +#if USE_EDITOR + // Auto-create C# objects for all actors in Editor during scene load when running in async (so main thread already has all of them) + if (!obj->GetManagedInstance()) + obj->CreateManaged(); +#endif } + else + SceneObjectsFactory::HandleObjectDeserializationError(stream); + }, dataCount - 1); + Level::ScenesLock.Lock(); + } + else + { + for (int32 i = 1; i < dataCount; i++) // start from 1. at index [0] was scene + { + auto& stream = args.Data[i]; + auto obj = SceneObjectsFactory::Spawn(Context, stream); + objects[i] = obj; + if (obj) + obj->RegisterObject(); + else + SceneObjectsFactory::HandleObjectDeserializationError(stream); } } - // Capture prefab instances in a scene to restore any missing objects (eg. newly added objects to prefab that are missing in scene file) - SceneObjectsFactory::PrefabSyncData prefabSyncData(*sceneObjects.Value, data, modifier.Value); - SceneObjectsFactory::SetupPrefabInstances(context, prefabSyncData); - // TODO: resave and force sync scenes during game cooking so this step could be skipped in game - SceneObjectsFactory::SynchronizeNewPrefabInstances(context, prefabSyncData); + NextStage(); + return SceneResult::Success; +} - // /\ all above this has to be done on an any thread - // \/ all below this has to be done on multiple threads at once +SceneResult SceneLoader::OnSetupPrefabs(Args& args) +{ + // Capture prefab instances in a scene to restore any missing objects (eg. newly added objects to prefab that are missing in scene file) + PrefabSyncData = New(*SceneObjects, args.Data, Modifier); + SceneObjectsFactory::SetupPrefabInstances(Context, *PrefabSyncData); + + NextStage(); + return SceneResult::Success; +} + +SceneResult SceneLoader::OnSyncNewPrefabs(Args& args) +{ + // Sync the new prefab instances by spawning missing objects that were added to prefab but were not saved in a scene + // TODO: resave and force sync scenes during game cooking so this step could be skipped in game + SceneObjectsFactory::SynchronizeNewPrefabInstances(Context, *PrefabSyncData); + + NextStage(); + return SceneResult::Success; +} + +SceneResult SceneLoader::OnDeserialize(Args& args) +{ + PROFILE_CPU_NAMED("Deserialize"); + const int32 dataCount = (int32)args.Data.Size(); + SceneObject** objects = SceneObjects->Get(); + bool wasAsync = Context.Async; + Context.Async = false; // TODO: before doing full async for scene objects fix: + // TODO: - fix Actor's Scripts and Children order when loading objects data out of order via async jobs + // TODO: - add _loadNoAsync flag to SceneObject or Actor to handle non-async loading for those types (eg. UIControl/UICanvas) // Load all scene objects + if (Context.Async) { - PROFILE_CPU_NAMED("Deserialize"); - SceneObject** objects = sceneObjects->Get(); - bool wasAsync = context.Async; - context.Async = false; // TODO: before doing full async for scene objects fix: - // TODO: - fix Actor's Scripts and Children order when loading objects data out of order via async jobs - // TODO: - add _loadNoAsync flag to SceneObject or Actor to handle non-async loading for those types (eg. UIControl/UICanvas) - if (context.Async) + Level::ScenesLock.Unlock(); // Unlock scenes from Main Thread so Job Threads can use it to safely setup actors hierarchy (see Actor::Deserialize) +#if USE_EDITOR + volatile int64 deprecated = 0; +#endif + JobSystem::Execute([&](int32 i) { - Level::ScenesLock.Unlock(); // Unlock scenes from Main Thread so Job Threads can use it to safely setup actors hierarchy (see Actor::Deserialize) -#if USE_EDITOR - volatile int64 deprecated = 0; -#endif - JobSystem::Execute([&](int32 i) + i++; // Start from 1. at index [0] was scene + auto obj = objects[i]; + if (obj) { - i++; // Start from 1. at index [0] was scene - auto obj = objects[i]; - if (obj) - { - auto& idMapping = Scripting::ObjectsLookupIdMapping.Get(); - idMapping = &context.GetModifier()->IdsMapping; - SceneObjectsFactory::Deserialize(context, obj, data[i]); + auto& idMapping = Scripting::ObjectsLookupIdMapping.Get(); + idMapping = &Context.GetModifier()->IdsMapping; + SceneObjectsFactory::Deserialize(Context, obj, args.Data[i]); #if USE_EDITOR - if (ContentDeprecated::Clear()) - Platform::InterlockedIncrement(&deprecated); + if (ContentDeprecated::Clear()) + Platform::InterlockedIncrement(&deprecated); #endif - idMapping = nullptr; - } - }, dataCount - 1); -#if USE_EDITOR - if (deprecated != 0) - ContentDeprecated::Mark(); -#endif - Level::ScenesLock.Lock(); - } - else - { - Scripting::ObjectsLookupIdMapping.Set(&modifier.Value->IdsMapping); - for (int32 i = 1; i < dataCount; i++) // start from 1. at index [0] was scene - { - auto& objData = data[i]; - auto obj = objects[i]; - if (obj) - SceneObjectsFactory::Deserialize(context, obj, objData); + idMapping = nullptr; } - Scripting::ObjectsLookupIdMapping.Set(nullptr); - } - context.Async = wasAsync; + }, dataCount - 1); +#if USE_EDITOR + if (deprecated != 0) + ContentDeprecated::Mark(); +#endif + Level::ScenesLock.Lock(); } + else + { + Scripting::ObjectsLookupIdMapping.Set(&Modifier->IdsMapping); + for (int32 i = 1; i < dataCount; i++) // start from 1. at index [0] was scene + { + auto& objData = args.Data[i]; + auto obj = objects[i]; + if (obj) + SceneObjectsFactory::Deserialize(Context, obj, objData); + } + Scripting::ObjectsLookupIdMapping.Set(nullptr); + } + Context.Async = wasAsync; - // /\ all above this has to be done on multiple threads at once - // \/ all below this has to be done on an any thread + NextStage(); + return SceneResult::Success; +} +SceneResult SceneLoader::OnSyncPrefabs(Args& args) +{ // Add injected children of scene (via OnSceneLoading) into sceneObjects to be initialized - for (auto child : injectedSceneChildren) + for (auto child : InjectedSceneChildren) { Array injectedSceneObjects; injectedSceneObjects.Add(child); @@ -1121,71 +1243,93 @@ SceneResult SceneLoader::Tick(rapidjson_flax::Value& data, int32 engineBuild, Sc { if (!o->IsRegistered()) o->RegisterObject(); - sceneObjects->Add(o); + SceneObjects->Add(o); } } // Synchronize prefab instances (prefab may have objects removed or reordered so deserialized instances need to synchronize with it) // TODO: resave and force sync scenes during game cooking so this step could be skipped in game - SceneObjectsFactory::SynchronizePrefabInstances(context, prefabSyncData); + SceneObjectsFactory::SynchronizePrefabInstances(Context, *PrefabSyncData); - // Cache transformations - { - PROFILE_CPU_NAMED("Cache Transform"); + NextStage(); + return SceneResult::Success; +} - scene->OnTransformChanged(); - } +SceneResult SceneLoader::OnSetupTransforms(Args& args) +{ + // Cache actor transformations + PROFILE_CPU_NAMED("SetupTransforms"); + Scene->OnTransformChanged(); + NextStage(); + return SceneResult::Success; +} + +SceneResult SceneLoader::OnInitialize(Args& args) +{ // Initialize scene objects + PROFILE_CPU_NAMED("Initialize"); + ASSERT_LOW_LAYER(IsInMainThread()); + SceneObject** objects = SceneObjects->Get(); + for (int32 i = 0; i < SceneObjects->Count(); i++) { - PROFILE_CPU_NAMED("Initialize"); - - SceneObject** objects = sceneObjects->Get(); - for (int32 i = 0; i < sceneObjects->Count(); i++) + SceneObject* obj = objects[i]; + if (obj) { - SceneObject* obj = objects[i]; - if (obj) - { - obj->Initialize(); + obj->Initialize(); - // Delete objects without parent - if (i != 0 && obj->GetParent() == nullptr) - { - LOG(Warning, "Scene object {0} {1} has missing parent object after load. Removing it.", obj->GetID(), obj->ToString()); - obj->DeleteObject(); - } + // Delete objects without parent + if (i != 0 && obj->GetParent() == nullptr) + { + LOG(Warning, "Scene object {0} {1} has missing parent object after load. Removing it.", obj->GetID(), obj->ToString()); + obj->DeleteObject(); } } - prefabSyncData.InitNewObjects(); } + PrefabSyncData->InitNewObjects(); - // /\ all above this has to be done on an any thread - // \/ all below this has to be done on a main thread + NextStage(); + return SceneResult::Success; +} - // Link scene and call init - { - PROFILE_CPU_NAMED("BeginPlay"); +SceneResult SceneLoader::OnBeginPlay(Args& args) +{ + PROFILE_CPU_NAMED("BeginPlay"); + ASSERT_LOW_LAYER(IsInMainThread()); - ScopeLock lock(Level::ScenesLock); - Level::Scenes.Add(scene); - SceneBeginData beginData; - scene->BeginPlay(&beginData); - beginData.OnDone(); - } + // Link scene + ScopeLock lock(Level::ScenesLock); + Level::Scenes.Add(Scene); - Stage = Loaded; + // TODO: prototype time-slicing with load-balancing for Begin Play: + // TODO: - collect all actors to enable + // TODO: - invoke in order OnBeginPlay -> Child Actors Begin -> Child Scripts Begin -> OnEnable for each actor + // TODO: - consider not drawing level until it's fully loaded (other engine systems should respect this too?) + // TODO: - consider refactoring Joints creation maybe? to get rid of SceneBeginData + + // Start the game for scene objects + SceneBeginData beginData; + Scene->BeginPlay(&beginData); + beginData.OnDone(); + + NextStage(); + return SceneResult::Success; +} + +SceneResult SceneLoader::OnEnd(Args& args) +{ + PROFILE_CPU_NAMED("End"); + Stopwatch time; // Fire event - CallSceneEvent(SceneEventType::OnSceneLoaded, scene, sceneId); + CallSceneEvent(SceneEventType::OnSceneLoaded, Scene, SceneId); - stopwatch.Stop(); - LOG(Info, "Scene loaded in {0}ms", stopwatch.GetMilliseconds()); - if (outScene) - *outScene = scene; + time.Stop(); + LOG(Info, "Scene loaded in {}ms ({} frames)", (int32)((TotalTime + time.GetTotalSeconds()) * 1000.0), Engine::UpdateCount - StartFrame); #if USE_EDITOR // Resave assets that use deprecated data format - for (auto& e : context.DeprecatedPrefabs) + for (auto& e : Context.DeprecatedPrefabs) { AssetReference prefab = e.Item; LOG(Info, "Resaving asset '{}' that uses deprecated data format", prefab->GetPath()); @@ -1194,16 +1338,17 @@ SceneResult SceneLoader::Tick(rapidjson_flax::Value& data, int32 engineBuild, Sc LOG(Error, "Failed to resave asset '{}'", prefab->GetPath()); } } - if (ContentDeprecated::Clear() && assetPath) + if (ContentDeprecated::Clear() && args.AssetPath) { - LOG(Info, "Resaving asset '{}' that uses deprecated data format", *assetPath); - if (saveScene(scene, *assetPath)) + LOG(Info, "Resaving asset '{}' that uses deprecated data format", *args.AssetPath); + if (saveScene(Scene, *args.AssetPath)) { - LOG(Error, "Failed to resave asset '{}'", *assetPath); + LOG(Error, "Failed to resave asset '{}'", *args.AssetPath); } } #endif + NextStage(); return SceneResult::Success; } @@ -1732,8 +1877,9 @@ Array Level::GetScripts(const MClass* type, Actor* root) const bool isInterface = type->IsInterface(); if (root) ::GetScripts(type, isInterface, root, result); - else for (int32 i = 0; i < Scenes.Count(); i++) - ::GetScripts(type, isInterface, Scenes[i], result); + else + for (int32 i = 0; i < Scenes.Count(); i++) + ::GetScripts(type, isInterface, Scenes[i], result); return result; } From e9835766bc3efb25db9b27beda2160ba9b139b21 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 11 Jun 2025 14:56:43 +0200 Subject: [PATCH 044/211] Add red color to Tracy profiler zones that cause CPU waiting to improve profiling --- Source/Engine/Content/Asset.cpp | 3 +++ Source/Engine/Content/Content.cpp | 2 ++ .../Engine/GraphicsDevice/DirectX/DX11/GPUSwapChainDX11.cpp | 6 +++++- .../Engine/GraphicsDevice/DirectX/DX12/CommandQueueDX12.cpp | 5 +++-- .../Engine/GraphicsDevice/DirectX/DX12/GPUSwapChainDX12.cpp | 3 +++ Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp | 1 + Source/Engine/GraphicsDevice/Vulkan/GPUSwapChainVulkan.cpp | 1 + Source/Engine/Profiler/ProfilerCPU.h | 3 +++ Source/Engine/Threading/JobSystem.cpp | 1 + Source/Engine/Threading/Task.cpp | 2 ++ 10 files changed, 24 insertions(+), 3 deletions(-) diff --git a/Source/Engine/Content/Asset.cpp b/Source/Engine/Content/Asset.cpp index 86801b078..5a9e62993 100644 --- a/Source/Engine/Content/Asset.cpp +++ b/Source/Engine/Content/Asset.cpp @@ -494,6 +494,9 @@ bool Asset::WaitForLoaded(double timeoutInMilliseconds) const } PROFILE_CPU(); + ZoneColor(TracyWaitZoneColor); + const StringView path(GetPath()); + ZoneText(*path, path.Length()); Content::WaitForTask(loadingTask, timeoutInMilliseconds); diff --git a/Source/Engine/Content/Content.cpp b/Source/Engine/Content/Content.cpp index 4ba6cbc44..2178579cc 100644 --- a/Source/Engine/Content/Content.cpp +++ b/Source/Engine/Content/Content.cpp @@ -1128,6 +1128,8 @@ void Content::WaitForTask(ContentLoadTask* loadingTask, double timeoutInMillisec localQueue.Clear(); } + PROFILE_CPU_NAMED("Inline"); + ZoneColor(0xffaaaaaa); thread->Run(tmp); } else diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUSwapChainDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUSwapChainDX11.cpp index 7a106d377..3856a7cd0 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUSwapChainDX11.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUSwapChainDX11.cpp @@ -3,11 +3,12 @@ #if GRAPHICS_API_DIRECTX11 #include "GPUSwapChainDX11.h" +#include "GPUContextDX11.h" #include "Engine/Platform/Window.h" #include "Engine/Graphics/RenderTools.h" #include "Engine/GraphicsDevice/DirectX/RenderToolsDX.h" +#include "Engine/Profiler/ProfilerCPU.h" #include "Engine/Profiler/ProfilerMemory.h" -#include "GPUContextDX11.h" GPUSwapChainDX11::GPUSwapChainDX11(GPUDeviceDX11* device, Window* window) : GPUResourceDX11(device, StringView::Empty) @@ -140,6 +141,9 @@ GPUTextureView* GPUSwapChainDX11::GetBackBufferView() void GPUSwapChainDX11::Present(bool vsync) { + PROFILE_CPU(); + ZoneColor(TracyWaitZoneColor); + // Present frame ASSERT(_swapChain); UINT presentFlags = 0; diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/CommandQueueDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/CommandQueueDX12.cpp index 2488480af..81fef4965 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/CommandQueueDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/CommandQueueDX12.cpp @@ -6,6 +6,7 @@ #include "GPUDeviceDX12.h" #include "Engine/Threading/Threading.h" #include "Engine/GraphicsDevice/DirectX/RenderToolsDX.h" +#include "Engine/Profiler/ProfilerCPU.h" FenceDX12::FenceDX12(GPUDeviceDX12* device) : _currentValue(1) @@ -64,12 +65,12 @@ void FenceDX12::WaitCPU(uint64 value) { if (IsFenceComplete(value)) return; - + PROFILE_CPU(); + ZoneColor(TracyWaitZoneColor); ScopeLock lock(_locker); _fence->SetEventOnCompletion(value, _event); WaitForSingleObject(_event, INFINITE); - _lastCompletedValue = _fence->GetCompletedValue(); } diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUSwapChainDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUSwapChainDX12.cpp index fa6dfa881..83ecf020d 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUSwapChainDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUSwapChainDX12.cpp @@ -6,6 +6,7 @@ #include "GPUContextDX12.h" #include "../IncludeDirectXHeaders.h" #include "Engine/GraphicsDevice/DirectX/RenderToolsDX.h" +#include "Engine/Profiler/ProfilerCPU.h" #include "Engine/Profiler/ProfilerMemory.h" void BackBufferDX12::Setup(GPUSwapChainDX12* window, ID3D12Resource* backbuffer) @@ -364,6 +365,8 @@ void GPUSwapChainDX12::End(RenderTask* task) void GPUSwapChainDX12::Present(bool vsync) { + PROFILE_CPU(); + ZoneColor(TracyWaitZoneColor); #if PLATFORM_XBOX_SCARLETT || PLATFORM_XBOX_ONE ID3D12Resource* backBuffer = _backBuffers[_currentFrameIndex].GetResource(); D3D12XBOX_PRESENT_PLANE_PARAMETERS planeParameters = {}; diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp index 73eb90755..f2d0aad7d 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp @@ -2094,6 +2094,7 @@ void GPUDeviceVulkan::WaitForGPU() if (Device != VK_NULL_HANDLE) { PROFILE_CPU(); + ZoneColor(TracyWaitZoneColor); VALIDATE_VULKAN_RESULT(vkDeviceWaitIdle(Device)); } } diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUSwapChainVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUSwapChainVulkan.cpp index 801ba1fc1..21971c5ca 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUSwapChainVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUSwapChainVulkan.cpp @@ -553,6 +553,7 @@ void GPUSwapChainVulkan::Present(bool vsync) if (_acquiredImageIndex == -1) return; PROFILE_CPU(); + ZoneColor(TracyWaitZoneColor); // Ensure that backbuffer has been acquired before presenting it to the window const auto backBuffer = (GPUTextureViewVulkan*)GetBackBufferView(); diff --git a/Source/Engine/Profiler/ProfilerCPU.h b/Source/Engine/Profiler/ProfilerCPU.h index e8d0523f4..77ecfe7b2 100644 --- a/Source/Engine/Profiler/ProfilerCPU.h +++ b/Source/Engine/Profiler/ProfilerCPU.h @@ -412,3 +412,6 @@ struct TIsPODType #define PROFILE_CPU_ACTOR(actor) #endif + +// CPU-wait zones can be marked with red color for better readability +#define TracyWaitZoneColor 0xba1904 diff --git a/Source/Engine/Threading/JobSystem.cpp b/Source/Engine/Threading/JobSystem.cpp index 612584c40..e90a2e847 100644 --- a/Source/Engine/Threading/JobSystem.cpp +++ b/Source/Engine/Threading/JobSystem.cpp @@ -385,6 +385,7 @@ void JobSystem::Wait(int64 label) { #if JOB_SYSTEM_ENABLED PROFILE_CPU(); + ZoneColor(TracyWaitZoneColor); while (Platform::AtomicRead(&ExitFlag) == 0) { diff --git a/Source/Engine/Threading/Task.cpp b/Source/Engine/Threading/Task.cpp index 601079e85..911e1e3e8 100644 --- a/Source/Engine/Threading/Task.cpp +++ b/Source/Engine/Threading/Task.cpp @@ -40,6 +40,7 @@ void Task::Cancel() bool Task::Wait(double timeoutMilliseconds) const { PROFILE_CPU(); + ZoneColor(TracyWaitZoneColor); const double startTime = Platform::GetTimeSeconds(); // TODO: no active waiting! use a semaphore! @@ -76,6 +77,7 @@ bool Task::Wait(double timeoutMilliseconds) const bool Task::WaitAll(const Span& tasks, double timeoutMilliseconds) { PROFILE_CPU(); + ZoneColor(TracyWaitZoneColor); for (int32 i = 0; i < tasks.Length(); i++) { if (tasks[i]->Wait()) From 5b6859a66f55fb4472a795e2c267dca844f760e1 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 11 Jun 2025 18:40:06 +0200 Subject: [PATCH 045/211] Add time slicing to Deserialization stage of async scenes loading to avoid hitching #3261 --- Source/Engine/Level/Level.cpp | 61 ++++++++++++++++++++++++++++++++--- 1 file changed, 56 insertions(+), 5 deletions(-) diff --git a/Source/Engine/Level/Level.cpp b/Source/Engine/Level/Level.cpp index 881277c5e..678eada1f 100644 --- a/Source/Engine/Level/Level.cpp +++ b/Source/Engine/Level/Level.cpp @@ -114,6 +114,19 @@ struct ScriptsReloadObject #endif +// Small utility for dividing the iterative work over data set that can run in equal slicer limited by time. +struct TimeSlicer +{ + int32 Index = -1; + int32 Count = 0; + double TimeBudget; + double StartTime; + + void BeginSync(float timeBudget, int32 count, int32 startIndex = 0); + bool StepSync(); + SceneResult End(); +}; + // Async map loading utility for state tracking and synchronization of various load stages. class SceneLoader { @@ -154,6 +167,7 @@ public: Array InjectedSceneChildren; SceneObjectsFactory::Context Context; SceneObjectsFactory::PrefabSyncData* PrefabSyncData = nullptr; + TimeSlicer StageSlicer; SceneLoader(bool asyncLoad = false) : AsyncLoad(asyncLoad) @@ -1002,6 +1016,38 @@ SceneResult LevelImpl::loadScene(SceneLoader& loader, rapidjson_flax::Value& dat return result; } +void TimeSlicer::BeginSync(float timeBudget, int32 count, int32 startIndex) +{ + if (Index == -1) + { + // Starting + Index = startIndex; + Count = count; + } + TimeBudget = (double)timeBudget; + StartTime = Platform::GetTimeSeconds(); +} + +bool TimeSlicer::StepSync() +{ + Index++; + double time = Platform::GetTimeSeconds(); + double dt = time - StartTime; + return dt >= TimeBudget; +} + +SceneResult TimeSlicer::End() +{ + if (Index >= Count) + { + // Finished + *this = TimeSlicer(); + return SceneResult::Success; + } + + return SceneResult::Wait; +} + SceneResult SceneLoader::Tick(Args& args) { switch (Stage) @@ -1216,19 +1262,24 @@ SceneResult SceneLoader::OnDeserialize(Args& args) else { Scripting::ObjectsLookupIdMapping.Set(&Modifier->IdsMapping); - for (int32 i = 1; i < dataCount; i++) // start from 1. at index [0] was scene + StageSlicer.BeginSync(args.TimeBudget, dataCount, 1); // start from 1. at index [0] was scene + while (StageSlicer.Index < StageSlicer.Count) { - auto& objData = args.Data[i]; - auto obj = objects[i]; + auto& objData = args.Data[StageSlicer.Index]; + auto obj = objects[StageSlicer.Index]; if (obj) SceneObjectsFactory::Deserialize(Context, obj, objData); + if (StageSlicer.StepSync()) + break; } Scripting::ObjectsLookupIdMapping.Set(nullptr); } Context.Async = wasAsync; - NextStage(); - return SceneResult::Success; + auto result = StageSlicer.End(); + if (result != SceneResult::Wait) + NextStage(); + return result; } SceneResult SceneLoader::OnSyncPrefabs(Args& args) From 8ec138399af7e5a703a1e9b8b946f26dcd0c8473 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 11 Jun 2025 18:40:35 +0200 Subject: [PATCH 046/211] Add higher level streaming time budget in frame based on idle time --- Source/Engine/Engine/Engine.cpp | 5 +++++ Source/Engine/Level/Level.cpp | 8 ++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Engine/Engine.cpp b/Source/Engine/Engine/Engine.cpp index 5607432c2..4ea18a85c 100644 --- a/Source/Engine/Engine/Engine.cpp +++ b/Source/Engine/Engine/Engine.cpp @@ -76,6 +76,7 @@ FatalErrorType Engine::FatalError = FatalErrorType::None; bool Engine::IsRequestingExit = false; int32 Engine::ExitCode = 0; Window* Engine::MainWindow = nullptr; +double EngineIdleTime = 0; int32 Engine::Main(const Char* cmdLine) { @@ -190,7 +191,10 @@ int32 Engine::Main(const Char* cmdLine) if (timeToTick > 0.002) { PROFILE_CPU_NAMED("Idle"); + auto sleepStart = Platform::GetTimeSeconds(); Platform::Sleep(1); + auto sleepEnd = Platform::GetTimeSeconds(); + EngineIdleTime += sleepEnd - sleepStart; } } @@ -227,6 +231,7 @@ int32 Engine::Main(const Char* cmdLine) OnUpdate(); OnLateUpdate(); Time::OnEndUpdate(); + EngineIdleTime = 0; } // Start physics simulation diff --git a/Source/Engine/Level/Level.cpp b/Source/Engine/Level/Level.cpp index 678eada1f..5516e6d53 100644 --- a/Source/Engine/Level/Level.cpp +++ b/Source/Engine/Level/Level.cpp @@ -251,6 +251,7 @@ public: }; LevelService LevelServiceInstanceService; +extern double EngineIdleTime; CriticalSection Level::ScenesLock; Array Level::Scenes; @@ -863,6 +864,8 @@ void LevelImpl::flushActions() else if (Engine::GetFramesPerSecond() > 0) targetFps = (float)Engine::GetFramesPerSecond(); context.TimeBudget = Level::StreamingFrameBudget / targetFps; + if (EngineIdleTime > 0.001) + context.TimeBudget += (float)(EngineIdleTime * 0.5); // Increase time budget if engine has some idle time for spare #if USE_EDITOR // Throttle up in Editor context.TimeBudget *= Editor::IsPlayMode ? 1.2f : 2.0f; @@ -871,8 +874,9 @@ void LevelImpl::flushActions() // Throttle up in Debug context.TimeBudget *= 1.2f; #endif - if (context.TimeBudget <= ZeroTolerance) - context.TimeBudget = MAX_float; + if (context.TimeBudget <= 0.0f) + context.TimeBudget = MAX_float; // Unlimited if 0 + context.TimeBudget = Math::Max(context.TimeBudget, 0.001f); // Minimum 1ms // Runs actions in order ScopeLock lock(_sceneActionsLocker); From 0fa53f860a3afc2edfc9eae4ff1a1bc080d78054 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 11 Jun 2025 23:35:03 +0200 Subject: [PATCH 047/211] Add `UseLogInRelease` to engine config to disable logging in Release builds --- Source/Engine/Core/Config.h | 4 ++++ Source/Tools/Flax.Build/Build/ProjectTarget.cs | 4 ++++ Source/Tools/Flax.Build/Configuration.cs | 6 ++++++ 3 files changed, 14 insertions(+) diff --git a/Source/Engine/Core/Config.h b/Source/Engine/Core/Config.h index 014ebb0c2..810217050 100644 --- a/Source/Engine/Core/Config.h +++ b/Source/Engine/Core/Config.h @@ -30,13 +30,17 @@ #endif // Enable logging service (saving log to file, can be disabled using -nolog command line) +#ifndef LOG_ENABLE #define LOG_ENABLE 1 +#endif // Enable crash reporting service (stack trace and crash dump collecting) #define CRASH_LOG_ENABLE (!BUILD_RELEASE) // Enable/disable assertion +#ifndef ENABLE_ASSERTION #define ENABLE_ASSERTION (!BUILD_RELEASE) +#endif // Enable/disable assertion for Engine low layers #define ENABLE_ASSERTION_LOW_LAYERS ENABLE_ASSERTION && (BUILD_DEBUG || FLAX_TESTS) diff --git a/Source/Tools/Flax.Build/Build/ProjectTarget.cs b/Source/Tools/Flax.Build/Build/ProjectTarget.cs index 77b7cfe94..e408452a0 100644 --- a/Source/Tools/Flax.Build/Build/ProjectTarget.cs +++ b/Source/Tools/Flax.Build/Build/ProjectTarget.cs @@ -79,6 +79,10 @@ namespace Flax.Build options.CompileEnv.PreprocessorDefinitions.Add("USE_LARGE_WORLDS"); options.ScriptingAPI.Defines.Add("USE_LARGE_WORLDS"); } + if (!EngineConfiguration.UseLogInRelease && !IsEditor) + { + options.CompileEnv.PreprocessorDefinitions.Add("LOG_ENABLE=0"); + } // Add include paths for this and all referenced projects sources foreach (var project in Project.GetAllProjects()) diff --git a/Source/Tools/Flax.Build/Configuration.cs b/Source/Tools/Flax.Build/Configuration.cs index f6ad3b3a5..3bb84760e 100644 --- a/Source/Tools/Flax.Build/Configuration.cs +++ b/Source/Tools/Flax.Build/Configuration.cs @@ -276,6 +276,12 @@ namespace Flax.Build [CommandLine("useDotNet", "1 to enable .NET support in build, 0 to enable Mono support in build")] public static bool UseDotNet = true; + /// + /// True if enable logging in Release game builds. + /// + [CommandLine("useLogInRelease", "Can be used to disable logging in Release game builds")] + public static bool UseLogInRelease = true; + public static bool WithCSharp(NativeCpp.BuildOptions options) { return UseCSharp || options.Target.IsEditor; From 4240646ec78b812b9be49471cbbef87a71425db1 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 12 Jun 2025 08:31:32 +0200 Subject: [PATCH 048/211] Update minimum Windows version to `10` (to match .NET 8) --- .../Engine/Platform/Windows/WindowsPlatform.cpp | 16 +++++----------- Source/Engine/Video/MF/VideoBackendMF.cpp | 2 +- .../Platforms/Windows/WindowsToolchain.cs | 2 +- 3 files changed, 7 insertions(+), 13 deletions(-) diff --git a/Source/Engine/Platform/Windows/WindowsPlatform.cpp b/Source/Engine/Platform/Windows/WindowsPlatform.cpp index 697174a47..272d38cf7 100644 --- a/Source/Engine/Platform/Windows/WindowsPlatform.cpp +++ b/Source/Engine/Platform/Windows/WindowsPlatform.cpp @@ -556,14 +556,8 @@ void WindowsPlatform::PreInit(void* hInstance) FlaxDbgHelpUnlock(); #endif + // Get system version GetWindowsVersion(WindowsName, VersionMajor, VersionMinor, VersionBuild); - - // Validate platform - if (VersionMajor < 6) - { - Error(TEXT("Not supported operating system version.")); - exit(-1); - } } bool WindowsPlatform::IsWindows10() @@ -640,25 +634,25 @@ bool WindowsPlatform::Init() // Check if can run Engine on current platform #if WINVER >= 0x0A00 - if (!IsWindows10OrGreater() && !IsWindowsServer()) + if (VersionMajor < 10 && !IsWindowsServer()) { Platform::Fatal(TEXT("Flax Engine requires Windows 10 or higher.")); return true; } #elif WINVER >= 0x0603 - if (!IsWindows8Point1OrGreater() && !IsWindowsServer()) + if ((VersionMajor < 8 || (VersionMajor == 8 && VersionMinor == 0)) && !IsWindowsServer()) { Platform::Fatal(TEXT("Flax Engine requires Windows 8.1 or higher.")); return true; } #elif WINVER >= 0x0602 - if (!IsWindows8OrGreater() && !IsWindowsServer()) + if (VersionMajor < 8 && !IsWindowsServer()) { Platform::Fatal(TEXT("Flax Engine requires Windows 8 or higher.")); return true; } #else - if (!IsWindows7OrGreater() && !IsWindowsServer()) + if (VersionMajor < 7 && !IsWindowsServer()) { Platform::Fatal(TEXT("Flax Engine requires Windows 7 or higher.")); return true; diff --git a/Source/Engine/Video/MF/VideoBackendMF.cpp b/Source/Engine/Video/MF/VideoBackendMF.cpp index 01d6ec481..75950f3aa 100644 --- a/Source/Engine/Video/MF/VideoBackendMF.cpp +++ b/Source/Engine/Video/MF/VideoBackendMF.cpp @@ -17,7 +17,7 @@ // Fix compilation for Windows 8.1 on the latest Windows SDK typedef enum _MFVideoSphericalFormat { } MFVideoSphericalFormat; #endif -#if !defined(MF_SOURCE_READER_CURRENT_TYPE_INDEX) && !defined(PLATFORM_GDK) +#if !defined(MF_SOURCE_READER_CURRENT_TYPE_INDEX) && !defined(PLATFORM_GDK) && WINVER < _WIN32_WINNT_WIN10 // Fix compilation for Windows 7 on the latest Windows SDK #define MF_SOURCE_READER_CURRENT_TYPE_INDEX 0xFFFFFFFF #endif diff --git a/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchain.cs b/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchain.cs index b33ca5e25..428cc2437 100644 --- a/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchain.cs +++ b/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchain.cs @@ -14,7 +14,7 @@ namespace Flax.Build /// Specifies the minimum Windows version to use (eg. 10). /// [CommandLine("winMinVer", "", "Specifies the minimum Windows version to use (eg. 10).")] - public static string WindowsMinVer = "7"; + public static string WindowsMinVer = "10"; } } From 7606c9ac1227bbadd8ee3dbce3b999ae799fc0f2 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 12 Jun 2025 17:03:19 +0200 Subject: [PATCH 049/211] Update minimum CPU arch requirement on Windows to AVX2 with SSE4.2 94.48% support on PC according to Steam Hardware & Software Survey: May 2025 (https://store.steampowered.com/hwsurvey/) --- Source/Engine/Platform/Defines.h | 18 ++-- .../Platform/Windows/WindowsPlatform.cpp | 101 +++++++++++++++--- .../Build/NativeCpp/CompileEnvironment.cs | 47 ++++++++ Source/Tools/Flax.Build/Build/Target.cs | 19 ++++ Source/Tools/Flax.Build/Build/Toolchain.cs | 5 + .../Platforms/Windows/WindowsToolchain.cs | 27 ++++- 6 files changed, 188 insertions(+), 29 deletions(-) diff --git a/Source/Engine/Platform/Defines.h b/Source/Engine/Platform/Defines.h index b5b3274e2..29f64052d 100644 --- a/Source/Engine/Platform/Defines.h +++ b/Source/Engine/Platform/Defines.h @@ -207,28 +207,22 @@ API_ENUM() enum class ArchitectureType #define PLATFORM_UNIX_FAMILY (PLATFORM_LINUX || PLATFORM_ANDROID || PLATFORM_PS4 || PLATFORM_PS5 || PLATFORM_APPLE_FAMILY) // SIMD defines -#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) || defined(__SSE2__) +#if !defined(PLATFORM_SIMD_SSE2) && (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) || defined(__SSE2__)) #define PLATFORM_SIMD_SSE2 1 -#if defined(__SSE3__) +#if !defined(PLATFORM_SIMD_SSE3) && (defined(__SSE3__)) #define PLATFORM_SIMD_SSE3 1 #endif -#if defined(__SSE4__) -#define PLATFORM_SIMD_SSE4 1 -#endif -#if defined(__SSE4_1__) +#if !defined(PLATFORM_SIMD_SSE4_1) && (defined(__SSE4_1__)) #define PLATFORM_SIMD_SSE4_1 1 #endif -#if defined(__SSE4_2__) +#if !defined(PLATFORM_SIMD_SSE4_2) && (defined(__SSE4_2__)) #define PLATFORM_SIMD_SSE4_2 1 #endif #endif -#if defined(_M_ARM) || defined(__ARM_NEON__) || defined(__ARM_NEON) +#if !defined(PLATFORM_SIMD_NEON) && (defined(_M_ARM) || defined(__ARM_NEON__) || defined(__ARM_NEON)) #define PLATFORM_SIMD_NEON 1 #endif -#if defined(_M_PPC) || defined(__CELLOS_LV2__) -#define PLATFORM_SIMD_VMX 1 -#endif -#define PLATFORM_SIMD (PLATFORM_SIMD_SSE2 || PLATFORM_SIMD_SSE3 || PLATFORM_SIMD_SSE4 || PLATFORM_SIMD_NEON || PLATFORM_SIMD_VMX) +#define PLATFORM_SIMD (PLATFORM_SIMD_SSE2 || PLATFORM_SIMD_SSE3 || PLATFORM_SIMD_SSE4_1 || PLATFORM_SIMD_SSE4_2 || PLATFORM_SIMD_NEON) // Unicode text macro #if !defined(TEXT) diff --git a/Source/Engine/Platform/Windows/WindowsPlatform.cpp b/Source/Engine/Platform/Windows/WindowsPlatform.cpp index 272d38cf7..116cb3deb 100644 --- a/Source/Engine/Platform/Windows/WindowsPlatform.cpp +++ b/Source/Engine/Platform/Windows/WindowsPlatform.cpp @@ -257,6 +257,37 @@ void GetWindowsVersion(String& windowsName, int32& versionMajor, int32& versionM RegCloseKey(hKey); } +#if PLATFORM_ARCH_X86 || PLATFORM_ARCH_X64 + +struct CPUBrand +{ + char Buffer[0x40]; + + CPUBrand() + { + Buffer[0] = 0; + int32 cpuInfo[4]; + __cpuid(cpuInfo, 0x80000000); + if (cpuInfo[0] >= 0x80000004) + { + // Get name + for (uint32 i = 0; i < 3; i++) + { + __cpuid(cpuInfo, 0x80000002 + i); + memcpy(Buffer + i * sizeof(cpuInfo), cpuInfo, sizeof(cpuInfo)); + } + + // Trim ending whitespaces + int32 size = StringUtils::Length(Buffer); + while (size > 1 && Buffer[size - 1] == ' ') + size--; + Buffer[size] = 0; + } + } +}; + +#endif + LRESULT CALLBACK WndProc(HWND hwnd, UINT msg, WPARAM wParam, LPARAM lParam) { // Find window to process that message @@ -517,6 +548,60 @@ void WindowsPlatform::PreInit(void* hInstance) ASSERT(hInstance); Instance = hInstance; +#if PLATFORM_ARCH_X86 || PLATFORM_ARCH_X64 + // Check the minimum vector instruction set support + int32 cpuInfo[4] = { -1 }; + __cpuid(cpuInfo, 0); + int32 cpuInfoSize = cpuInfo[0]; + __cpuid(cpuInfo, 1); + bool SSE2 = cpuInfo[3] & (1u << 26); + bool SSE3 = cpuInfo[2] & (1u << 0); + bool SSE41 = cpuInfo[2] & (1u << 19); + bool SSE42 = cpuInfo[2] & (1u << 20); + bool AVX = cpuInfo[2] & (1u << 28); + bool POPCNT = cpuInfo[2] & (1u << 23); + bool AVX2 = false; + if (cpuInfoSize >= 7) + { + __cpuid(cpuInfo, 7); + AVX2 = cpuInfo[1] & (1u << 5) && (_xgetbv(0) & 6) == 6; + } + const Char* missingFeature = nullptr; +#if defined(__AVX__) + if (!AVX) + missingFeature = TEXT("AVX"); +#endif +#if defined(__AVX2__) + if (!AVX2) + missingFeature = TEXT("AVX2"); +#endif +#if PLATFORM_SIMD_SSE2 + if (!SSE2) + missingFeature = TEXT("SSE2"); +#endif +#if PLATFORM_SIMD_SSE3 + if (!SSE3) + missingFeature = TEXT("SSE3"); +#endif +#if PLATFORM_SIMD_SSE4_1 + if (!SSE41) + missingFeature = TEXT("SSE4.1"); +#endif +#if PLATFORM_SIMD_SSE4_2 + if (!SSE42) + missingFeature = TEXT("SSE4.2"); + if (!POPCNT) + missingFeature = TEXT("POPCNT"); +#endif + if (missingFeature) + { + // Not supported CPU + CPUBrand cpu; + Error(String::Format(TEXT("Cannot start program due to lack of CPU feature {}.\n\n{}"), missingFeature, String(cpu.Buffer))); + exit(-1); + } +#endif + // Disable the process from being showing "ghosted" while not responding messages during slow tasks DisableProcessWindowsGhosting(); @@ -707,20 +792,8 @@ void WindowsPlatform::LogInfo() #if PLATFORM_ARCH_X86 || PLATFORM_ARCH_X64 // Log CPU brand - { - char brandBuffer[0x40] = {}; - int32 cpuInfo[4] = { -1 }; - __cpuid(cpuInfo, 0x80000000); - if (cpuInfo[0] >= 0x80000004) - { - for (uint32 i = 0; i < 3; i++) - { - __cpuid(cpuInfo, 0x80000002 + i); - memcpy(brandBuffer + i * sizeof(cpuInfo), cpuInfo, sizeof(cpuInfo)); - } - } - LOG(Info, "CPU: {0}", String(brandBuffer)); - } + CPUBrand cpu; + LOG(Info, "CPU: {0}", String(cpu.Buffer)); #endif LOG(Info, "Microsoft {0} {1}-bit ({2}.{3}.{4})", WindowsName, Platform::Is64BitPlatform() ? TEXT("64") : TEXT("32"), VersionMajor, VersionMinor, VersionBuild); diff --git a/Source/Tools/Flax.Build/Build/NativeCpp/CompileEnvironment.cs b/Source/Tools/Flax.Build/Build/NativeCpp/CompileEnvironment.cs index 81c620592..7da495b46 100644 --- a/Source/Tools/Flax.Build/Build/NativeCpp/CompileEnvironment.cs +++ b/Source/Tools/Flax.Build/Build/NativeCpp/CompileEnvironment.cs @@ -84,6 +84,47 @@ namespace Flax.Build.NativeCpp Latest, } + /// + /// The SIMD architecture to use for code generation. + /// + public enum CpuArchitecture + { + /// + /// No specific architecture set. + /// + None, + + /// + /// Intel Advanced Vector Extensions. + /// + AVX, + + /// + /// Enables Intel Advanced Vector Extensions 2. + /// + AVX2, + + /// + /// Intel Advanced Vector Extensions 512. + /// + AVX512, + + /// + /// Intel Streaming SIMD Extensions 2. + /// + SSE2, + + /// + /// Intel Streaming SIMD Extensions 4.2. + /// + SSE4_2, + + /// + /// ARM Neon. + /// + NEON, + } + /// /// The C++ compilation environment required to build source files in the native modules. /// @@ -104,6 +145,11 @@ namespace Flax.Build.NativeCpp /// public Sanitizer Sanitizers = Sanitizer.None; + /// + /// SIMD architecture to use. + /// + public CpuArchitecture CpuArchitecture = CpuArchitecture.None; + /// /// Enables exceptions support. /// @@ -222,6 +268,7 @@ namespace Flax.Build.NativeCpp CppVersion = CppVersion, FavorSizeOrSpeed = FavorSizeOrSpeed, Sanitizers = Sanitizers, + CpuArchitecture = CpuArchitecture, EnableExceptions = EnableExceptions, RuntimeTypeInfo = RuntimeTypeInfo, Inlining = Inlining, diff --git a/Source/Tools/Flax.Build/Build/Target.cs b/Source/Tools/Flax.Build/Build/Target.cs index 4deecb414..fe3a4e075 100644 --- a/Source/Tools/Flax.Build/Build/Target.cs +++ b/Source/Tools/Flax.Build/Build/Target.cs @@ -248,6 +248,25 @@ namespace Flax.Build Modules.Add("Main"); } + switch (options.CompileEnv.CpuArchitecture) + { + case CpuArchitecture.AVX: + case CpuArchitecture.SSE2: + // Basic SEE2 + options.CompileEnv.PreprocessorDefinitions.Add("PLATFORM_SIMD_SSE2=1"); break; + case CpuArchitecture.AVX2: + case CpuArchitecture.SSE4_2: + // Assume full support of SEE4.2 and older + options.CompileEnv.PreprocessorDefinitions.Add("PLATFORM_SIMD_SSE2=1"); + options.CompileEnv.PreprocessorDefinitions.Add("PLATFORM_SIMD_SSE3=1"); + options.CompileEnv.PreprocessorDefinitions.Add("PLATFORM_SIMD_SSE4_1=1"); + options.CompileEnv.PreprocessorDefinitions.Add("PLATFORM_SIMD_SSE4_2=1"); + break; + case CpuArchitecture.NEON: + options.CompileEnv.PreprocessorDefinitions.Add("PLATFORM_SIMD_NEON=1"); + break; + } + options.CompileEnv.EnableExceptions = true; // TODO: try to disable this! options.CompileEnv.Sanitizers = Configuration.Sanitizers; switch (options.Configuration) diff --git a/Source/Tools/Flax.Build/Build/Toolchain.cs b/Source/Tools/Flax.Build/Build/Toolchain.cs index ca05cecc4..d67766cf8 100644 --- a/Source/Tools/Flax.Build/Build/Toolchain.cs +++ b/Source/Tools/Flax.Build/Build/Toolchain.cs @@ -102,6 +102,11 @@ namespace Flax.Build { options.CompileEnv.IncludePaths.AddRange(SystemIncludePaths); options.LinkEnv.LibraryPaths.AddRange(SystemLibraryPaths); + + if (options.Architecture == TargetArchitecture.x64 || options.Architecture == TargetArchitecture.x86) + options.CompileEnv.CpuArchitecture = CpuArchitecture.AVX; + else if (options.Architecture == TargetArchitecture.ARM64 || options.Architecture == TargetArchitecture.ARM) + options.CompileEnv.CpuArchitecture = CpuArchitecture.NEON; } /// diff --git a/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchain.cs b/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchain.cs index 428cc2437..7537cdecf 100644 --- a/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchain.cs +++ b/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchain.cs @@ -15,6 +15,12 @@ namespace Flax.Build /// [CommandLine("winMinVer", "", "Specifies the minimum Windows version to use (eg. 10).")] public static string WindowsMinVer = "10"; + + /// + /// Specifies the minimum CPU architecture type to support (on x86/x64). + /// + [CommandLine("winCpuArch", "", "Specifies the minimum CPU architecture type to support (om x86/x64).")] + public static CpuArchitecture WindowsCpuArch = CpuArchitecture.AVX2; // 94.48% support on PC according to Steam Hardware & Software Survey: May 2025 (https://store.steampowered.com/hwsurvey/) } } @@ -80,6 +86,18 @@ namespace Flax.Build.Platforms options.CompileEnv.PreprocessorDefinitions.Add("USE_SOFT_INTRINSICS"); options.LinkEnv.InputLibraries.Add("softintrin.lib"); } + + options.CompileEnv.CpuArchitecture = Configuration.WindowsCpuArch; + if (_minVersion.Major <= 7 && options.CompileEnv.CpuArchitecture == CpuArchitecture.AVX2) + { + // Old Windows had lower support ratio for latest CPU features + options.CompileEnv.CpuArchitecture = CpuArchitecture.AVX; + } + if (_minVersion.Major >= 11 && options.CompileEnv.CpuArchitecture == CpuArchitecture.AVX) + { + // Windows 11 has hard requirement on SSE4.2 + options.CompileEnv.CpuArchitecture = CpuArchitecture.SSE4_2; + } } /// @@ -87,10 +105,13 @@ namespace Flax.Build.Platforms { base.SetupCompileCppFilesArgs(graph, options, args); - if (Toolset >= WindowsPlatformToolset.v142 && _minVersion.Major >= 11) + switch (options.CompileEnv.CpuArchitecture) { - // Windows 11 requires SSE4.2 - args.Add("/d2archSSE42"); + case CpuArchitecture.AVX: args.Add("/arch:AVX"); break; + case CpuArchitecture.AVX2: args.Add("/arch:AVX2"); break; + case CpuArchitecture.AVX512: args.Add("/arch:AVX512"); break; + case CpuArchitecture.SSE2: args.Add("/arch:SSE2"); break; + case CpuArchitecture.SSE4_2: args.Add("/arch:SSE4.2"); break; } } From bdc87c7bc6582f1be10774cf03664a7f9ee8e724 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 12 Jun 2025 17:26:39 +0200 Subject: [PATCH 050/211] Update min supported version of macOS to 13 and iOS to 15 --- Source/Editor/Cooker/Platform/Mac/MacPlatformTools.cpp | 2 +- Source/Tools/Flax.Build/Platforms/Mac/MacToolchain.cs | 2 +- Source/Tools/Flax.Build/Platforms/iOS/iOSToolchain.cs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Source/Editor/Cooker/Platform/Mac/MacPlatformTools.cpp b/Source/Editor/Cooker/Platform/Mac/MacPlatformTools.cpp index 69f32f588..1d447027b 100644 --- a/Source/Editor/Cooker/Platform/Mac/MacPlatformTools.cpp +++ b/Source/Editor/Cooker/Platform/Mac/MacPlatformTools.cpp @@ -186,7 +186,7 @@ bool MacPlatformTools::OnPostProcess(CookingData& data) ADD_ENTRY("CFBundlePackageType", "APPL"); ADD_ENTRY("NSPrincipalClass", "NSApplication"); ADD_ENTRY("LSApplicationCategoryType", "public.app-category.games"); - ADD_ENTRY("LSMinimumSystemVersion", "10.15"); + ADD_ENTRY("LSMinimumSystemVersion", "13"); ADD_ENTRY("CFBundleIconFile", "icon.icns"); ADD_ENTRY_STR("CFBundleExecutable", executableName); ADD_ENTRY_STR("CFBundleIdentifier", appIdentifier); diff --git a/Source/Tools/Flax.Build/Platforms/Mac/MacToolchain.cs b/Source/Tools/Flax.Build/Platforms/Mac/MacToolchain.cs index a9210274f..1d5315626 100644 --- a/Source/Tools/Flax.Build/Platforms/Mac/MacToolchain.cs +++ b/Source/Tools/Flax.Build/Platforms/Mac/MacToolchain.cs @@ -11,7 +11,7 @@ namespace Flax.Build /// Specifies the minimum Mac OSX version to use (eg. 10.14). /// [CommandLine("macOSXMinVer", "", "Specifies the minimum Mac OSX version to use (eg. 10.14).")] - public static string MacOSXMinVer = "10.15"; + public static string MacOSXMinVer = "13"; } } diff --git a/Source/Tools/Flax.Build/Platforms/iOS/iOSToolchain.cs b/Source/Tools/Flax.Build/Platforms/iOS/iOSToolchain.cs index 58bf44b77..44647a607 100644 --- a/Source/Tools/Flax.Build/Platforms/iOS/iOSToolchain.cs +++ b/Source/Tools/Flax.Build/Platforms/iOS/iOSToolchain.cs @@ -12,7 +12,7 @@ namespace Flax.Build /// Specifies the minimum iOS version to use (eg. 14). /// [CommandLine("iOSMinVer", "", "Specifies the minimum iOS version to use (eg. 14).")] - public static string iOSMinVer = "14"; + public static string iOSMinVer = "15"; } } From eadb4411ffd53290814657bfaa009695f4c45982 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 12 Jun 2025 17:35:02 +0200 Subject: [PATCH 051/211] Fix crash in GPU Memory profiler if resource went null --- Source/Editor/Windows/Profiler/MemoryGPU.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Source/Editor/Windows/Profiler/MemoryGPU.cs b/Source/Editor/Windows/Profiler/MemoryGPU.cs index ce266777d..74f14b584 100644 --- a/Source/Editor/Windows/Profiler/MemoryGPU.cs +++ b/Source/Editor/Windows/Profiler/MemoryGPU.cs @@ -146,6 +146,8 @@ namespace FlaxEditor.Windows.Profiler { var gpuResource = _gpuResourcesCached[i]; ref var resource = ref resources[i]; + if (!gpuResource) + continue; // Try to reuse cached resource info var gpuResourceId = gpuResource.ID; From e2d9452994a3fc1ec9c8d1ee02fd39a770e1fa67 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 12 Jun 2025 18:05:01 +0200 Subject: [PATCH 052/211] Add unified min Clang version `13` for Linux --- Source/Tools/Flax.Build/Deps/Dependencies/Assimp.cs | 6 +++--- Source/Tools/Flax.Build/Deps/Dependencies/OpenAL.cs | 4 ++-- Source/Tools/Flax.Build/Deps/Dependencies/PhysX.cs | 5 ++--- Source/Tools/Flax.Build/Deps/Dependencies/curl.cs | 4 ++-- .../Tools/Flax.Build/Deps/Dependencies/freetype.cs | 4 ++-- Source/Tools/Flax.Build/Deps/Dependencies/mono.cs | 4 ++-- Source/Tools/Flax.Build/Deps/Dependencies/vorbis.cs | 4 ++-- .../Flax.Build/Platforms/Linux/LinuxToolchain.cs | 12 ++++++++++++ 8 files changed, 27 insertions(+), 16 deletions(-) diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/Assimp.cs b/Source/Tools/Flax.Build/Deps/Dependencies/Assimp.cs index 50235aa10..f72928bf8 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/Assimp.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/Assimp.cs @@ -124,9 +124,9 @@ namespace Flax.Deps.Dependencies { var envVars = new Dictionary { - { "CC", "clang-13" }, - { "CC_FOR_BUILD", "clang-13" }, - { "CXX", "clang++-13" }, + { "CC", "clang-" + Configuration.LinuxClangMinVer }, + { "CC_FOR_BUILD", "clang-" + Configuration.LinuxClangMinVer }, + { "CXX", "clang++-" + Configuration.LinuxClangMinVer }, }; // Build for Linux diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/OpenAL.cs b/Source/Tools/Flax.Build/Deps/Dependencies/OpenAL.cs index 6a783d73f..7b42486e2 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/OpenAL.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/OpenAL.cs @@ -121,8 +121,8 @@ namespace Flax.Deps.Dependencies }; var envVars = new Dictionary { - { "CC", "clang-7" }, - { "CC_FOR_BUILD", "clang-7" } + { "CC", "clang-" + Configuration.LinuxClangMinVer }, + { "CC_FOR_BUILD", "clang-" + Configuration.LinuxClangMinVer } }; var config = "-DALSOFT_REQUIRE_ALSA=ON -DALSOFT_REQUIRE_OSS=ON -DALSOFT_REQUIRE_PORTAUDIO=ON -DALSOFT_REQUIRE_PULSEAUDIO=ON -DALSOFT_REQUIRE_JACK=ON -DALSOFT_EMBED_HRTF_DATA=YES"; diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/PhysX.cs b/Source/Tools/Flax.Build/Deps/Dependencies/PhysX.cs index a82b1f59b..32e2d2f38 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/PhysX.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/PhysX.cs @@ -7,7 +7,6 @@ using System.Linq; using System.Xml; using Flax.Build; using Flax.Build.Platforms; -using Flax.Build.Projects.VisualStudio; using Flax.Deploy; namespace Flax.Deps.Dependencies @@ -237,8 +236,8 @@ namespace Flax.Deps.Dependencies break; } case TargetPlatform.Linux: - envVars.Add("CC", "clang-7"); - envVars.Add("CC_FOR_BUILD", "clang-7"); + envVars.Add("CC", "clang-" + Configuration.LinuxClangMinVer); + envVars.Add("CC_FOR_BUILD", "clang-" + Configuration.LinuxClangMinVer); break; case TargetPlatform.Mac: break; default: throw new InvalidPlatformException(BuildPlatform); diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/curl.cs b/Source/Tools/Flax.Build/Deps/Dependencies/curl.cs index 4eaf8df4b..a474b9566 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/curl.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/curl.cs @@ -105,8 +105,8 @@ namespace Flax.Deps.Dependencies }; var envVars = new Dictionary { - { "CC", "clang-7" }, - { "CC_FOR_BUILD", "clang-7" }, + { "CC", "clang-" + Configuration.LinuxClangMinVer }, + { "CC_FOR_BUILD", "clang-" + Configuration.LinuxClangMinVer }, }; var buildDir = Path.Combine(root, "build"); SetupDirectory(buildDir, true); diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/freetype.cs b/Source/Tools/Flax.Build/Deps/Dependencies/freetype.cs index 6d104d563..ac0079401 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/freetype.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/freetype.cs @@ -116,8 +116,8 @@ namespace Flax.Deps.Dependencies { var envVars = new Dictionary { - { "CC", "clang-7" }, - { "CC_FOR_BUILD", "clang-7" } + { "CC", "clang-" + Configuration.LinuxClangMinVer }, + { "CC_FOR_BUILD", "clang-" + Configuration.LinuxClangMinVer } }; // Fix scripts diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/mono.cs b/Source/Tools/Flax.Build/Deps/Dependencies/mono.cs index dd52114c2..ddf1cc15d 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/mono.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/mono.cs @@ -546,8 +546,8 @@ namespace Flax.Deps.Dependencies { var envVars = new Dictionary { - { "CC", "clang-7" }, - { "CXX", "clang++-7" } + { "CC", "clang-" + Configuration.LinuxClangMinVer }, + { "CXX", "clang++-" + Configuration.LinuxClangMinVer } }; var monoOptions = new[] { diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/vorbis.cs b/Source/Tools/Flax.Build/Deps/Dependencies/vorbis.cs index d841e281c..195c0d8cb 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/vorbis.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/vorbis.cs @@ -365,8 +365,8 @@ namespace Flax.Deps.Dependencies var envVars = new Dictionary { - { "CC", "clang-7" }, - { "CC_FOR_BUILD", "clang-7" } + { "CC", "clang-" + Configuration.LinuxClangMinVer }, + { "CC_FOR_BUILD", "clang-" + Configuration.LinuxClangMinVer } }; var buildDir = Path.Combine(root, "build"); diff --git a/Source/Tools/Flax.Build/Platforms/Linux/LinuxToolchain.cs b/Source/Tools/Flax.Build/Platforms/Linux/LinuxToolchain.cs index ebbdc5b46..f4e7f06cf 100644 --- a/Source/Tools/Flax.Build/Platforms/Linux/LinuxToolchain.cs +++ b/Source/Tools/Flax.Build/Platforms/Linux/LinuxToolchain.cs @@ -5,6 +5,18 @@ using System.IO; using Flax.Build.Graph; using Flax.Build.NativeCpp; +namespace Flax.Build +{ + partial class Configuration + { + /// + /// Specifies the minimum Clang compiler version to use on Linux (eg. 10). + /// + [CommandLine("linuxClangMinVer", "", "Specifies the minimum Clang compiler version to use on Linux (eg. 10).")] + public static string LinuxClangMinVer = "13"; + } +} + namespace Flax.Build.Platforms { /// From 766091045b1be027321b8cc33f74d50254ffe6c1 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 12 Jun 2025 18:21:12 +0200 Subject: [PATCH 053/211] Improve version parsing to share code --- .../Flax.Build/Platforms/Linux/LinuxToolchain.cs | 9 +++++++-- .../Flax.Build/Platforms/Windows/WindowsToolchain.cs | 9 ++------- Source/Tools/Flax.Build/Utilities/Utilities.cs | 12 ++++++++++++ 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/Source/Tools/Flax.Build/Platforms/Linux/LinuxToolchain.cs b/Source/Tools/Flax.Build/Platforms/Linux/LinuxToolchain.cs index f4e7f06cf..174b2bcee 100644 --- a/Source/Tools/Flax.Build/Platforms/Linux/LinuxToolchain.cs +++ b/Source/Tools/Flax.Build/Platforms/Linux/LinuxToolchain.cs @@ -1,9 +1,10 @@ // Copyright (c) Wojciech Figat. All rights reserved. -using System.Collections.Generic; -using System.IO; using Flax.Build.Graph; using Flax.Build.NativeCpp; +using System; +using System.Collections.Generic; +using System.IO; namespace Flax.Build { @@ -34,6 +35,10 @@ namespace Flax.Build.Platforms public LinuxToolchain(LinuxPlatform platform, TargetArchitecture architecture) : base(platform, architecture, platform.ToolchainRoot, platform.Compiler) { + // Check version + if (Utilities.ParseVersion(Configuration.LinuxClangMinVer, out var minClangVer) && ClangVersion < minClangVer) + Log.Error($"Old Clang version {ClangVersion}. Minimum supported is {minClangVer}."); + // Setup system paths var includePath = Path.Combine(ToolsetRoot, "usr", "include"); if (Directory.Exists(includePath)) diff --git a/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchain.cs b/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchain.cs index 7537cdecf..5154cdbf9 100644 --- a/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchain.cs +++ b/Source/Tools/Flax.Build/Platforms/Windows/WindowsToolchain.cs @@ -44,13 +44,8 @@ namespace Flax.Build.Platforms : base(platform, architecture, WindowsPlatformToolset.Latest, WindowsPlatformSDK.Latest) { // Select minimum Windows version - if (!Version.TryParse(Configuration.WindowsMinVer, out _minVersion)) - { - if (int.TryParse(Configuration.WindowsMinVer, out var winMinVerMajor)) - _minVersion = new Version(winMinVerMajor, 0); - else - _minVersion = new Version(7, 0); - } + if (!Utilities.ParseVersion(Configuration.WindowsMinVer, out _minVersion)) + _minVersion = new Version(7, 0); } /// diff --git a/Source/Tools/Flax.Build/Utilities/Utilities.cs b/Source/Tools/Flax.Build/Utilities/Utilities.cs index 2b971f153..85f791d09 100644 --- a/Source/Tools/Flax.Build/Utilities/Utilities.cs +++ b/Source/Tools/Flax.Build/Utilities/Utilities.cs @@ -818,5 +818,17 @@ namespace Flax.Build return 0; }); } + + internal static bool ParseVersion(string text, out Version ver) + { + if (Version.TryParse(text, out ver)) + return true; + if (int.TryParse(text, out var major)) + { + ver = new Version(major, 0); + return true; + } + return false; + } } } From bd2add7edd4b7ecf9ea5e1cce6987ce950ce07a2 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 16 Jun 2025 23:15:58 +0200 Subject: [PATCH 054/211] Tweak memory command tip --- Source/Editor/Windows/Profiler/Memory.cs | 2 +- Source/Engine/Profiler/ProfilerMemory.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Editor/Windows/Profiler/Memory.cs b/Source/Editor/Windows/Profiler/Memory.cs index 6958b828b..a74472a8c 100644 --- a/Source/Editor/Windows/Profiler/Memory.cs +++ b/Source/Editor/Windows/Profiler/Memory.cs @@ -71,7 +71,7 @@ namespace FlaxEditor.Windows.Profiler { _warningText = new Label { - Text = "Detailed memory profiling is disabled. Run with command line: -mem", + Text = "Detailed memory profiling is disabled. Run with command line '-mem'", TextColor = Color.Red, Visible = false, Parent = layout, diff --git a/Source/Engine/Profiler/ProfilerMemory.cpp b/Source/Engine/Profiler/ProfilerMemory.cpp index d53e48b17..c936ff5b2 100644 --- a/Source/Engine/Profiler/ProfilerMemory.cpp +++ b/Source/Engine/Profiler/ProfilerMemory.cpp @@ -160,7 +160,7 @@ namespace // Warn that data might be missing due to inactive profiler if (!ProfilerMemory::Enabled) - output.AppendLine(TEXT("Detailed memory profiling is disabled. Run with command line: -mem")); + output.AppendLine(TEXT("Detailed memory profiling is disabled. Run with command line '-mem'")); } #ifdef USE_TRACY_MEMORY_PLOTS From 62e329ac6e491e8cb0eea6acea81de6eef4c6d3b Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 18 Jun 2025 23:00:43 +0200 Subject: [PATCH 055/211] Add more memory tags for Navigation --- Source/Engine/Navigation/NavMesh.cpp | 1 + Source/Engine/Navigation/NavMeshBuilder.cpp | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Navigation/NavMesh.cpp b/Source/Engine/Navigation/NavMesh.cpp index ee3e48f3e..b48bf26c7 100644 --- a/Source/Engine/Navigation/NavMesh.cpp +++ b/Source/Engine/Navigation/NavMesh.cpp @@ -110,6 +110,7 @@ void NavMesh::OnAssetLoaded(Asset* asset, void* caller) if (Data.Tiles.HasItems()) return; ScopeLock lock(DataAsset->Locker); + PROFILE_MEM(Navigation); // Remove added tiles if (_navMeshActive) diff --git a/Source/Engine/Navigation/NavMeshBuilder.cpp b/Source/Engine/Navigation/NavMeshBuilder.cpp index dbfecefcb..e5fdec5da 100644 --- a/Source/Engine/Navigation/NavMeshBuilder.cpp +++ b/Source/Engine/Navigation/NavMeshBuilder.cpp @@ -732,6 +732,7 @@ public: bool Run() override { PROFILE_CPU_NAMED("BuildNavMeshTile"); + PROFILE_MEM(Navigation); const auto navMesh = NavMesh.Get(); if (!navMesh) return false; @@ -1154,6 +1155,7 @@ void ClearNavigation(Scene* scene) void NavMeshBuilder::Update() { + PROFILE_MEM(Navigation); ScopeLock lock(NavBuildQueueLocker); // Process nav mesh building requests and kick the tasks @@ -1204,7 +1206,7 @@ void NavMeshBuilder::Build(Scene* scene, float timeoutMs) } PROFILE_CPU_NAMED("NavMeshBuilder"); - + PROFILE_MEM(Navigation); ScopeLock lock(NavBuildQueueLocker); BuildRequest req; @@ -1241,7 +1243,7 @@ void NavMeshBuilder::Build(Scene* scene, const BoundingBox& dirtyBounds, float t } PROFILE_CPU_NAMED("NavMeshBuilder"); - + PROFILE_MEM(Navigation); ScopeLock lock(NavBuildQueueLocker); BuildRequest req; From edb68849426fbd527392bdc1c6f74876a306972a Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 19 Jun 2025 08:24:26 +0200 Subject: [PATCH 056/211] Optimize PhysX work dispatcher to be shared by all scenes --- .../Engine/Physics/PhysX/PhysicsBackendPhysX.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp b/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp index 768788f90..b15b22a9b 100644 --- a/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp +++ b/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp @@ -82,7 +82,6 @@ struct ActionDataPhysX struct ScenePhysX { PxScene* Scene = nullptr; - PxCpuDispatcher* CpuDispatcher = nullptr; PxControllerManager* ControllerManager = nullptr; void* ScratchMemory = nullptr; Vector3 Origin = Vector3::Zero; @@ -542,6 +541,7 @@ namespace { PxFoundation* Foundation = nullptr; PxPhysics* PhysX = nullptr; + PxDefaultCpuDispatcher* CpuDispatcher = nullptr; #if WITH_PVD PxPvd* PVD = nullptr; #endif @@ -1734,6 +1734,7 @@ void PhysicsBackend::Shutdown() #if WITH_PVD RELEASE_PHYSX(PVD); #endif + RELEASE_PHYSX(CpuDispatcher); RELEASE_PHYSX(Foundation); SceneOrigins.Clear(); } @@ -1791,9 +1792,13 @@ void* PhysicsBackend::CreateScene(const PhysicsSettings& settings) } if (sceneDesc.cpuDispatcher == nullptr) { - scenePhysX->CpuDispatcher = PxDefaultCpuDispatcherCreate(Math::Clamp(Platform::GetCPUInfo().ProcessorCoreCount - 1, 1, 4)); - CHECK_INIT(scenePhysX->CpuDispatcher, "PxDefaultCpuDispatcherCreate failed!"); - sceneDesc.cpuDispatcher = scenePhysX->CpuDispatcher; + if (CpuDispatcher == nullptr) + { + uint32 threads = Math::Clamp(Platform::GetCPUInfo().ProcessorCoreCount - 1, 1, 4); + CpuDispatcher = PxDefaultCpuDispatcherCreate(threads); + CHECK_INIT(CpuDispatcher, "PxDefaultCpuDispatcherCreate failed!"); + } + sceneDesc.cpuDispatcher = CpuDispatcher; } switch (settings.BroadPhaseType) { @@ -1855,7 +1860,6 @@ void PhysicsBackend::DestroyScene(void* scene) } #endif RELEASE_PHYSX(scenePhysX->ControllerManager); - SAFE_DELETE(scenePhysX->CpuDispatcher); Allocator::Free(scenePhysX->ScratchMemory); scenePhysX->Scene->release(); From 6144f6c74e16961992cb8599fb62b29772fa8002 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 19 Jun 2025 09:50:07 +0200 Subject: [PATCH 057/211] Optimize physics simulation with higher limit of `8` threads --- Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp b/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp index b15b22a9b..673b1e96d 100644 --- a/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp +++ b/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp @@ -1794,7 +1794,7 @@ void* PhysicsBackend::CreateScene(const PhysicsSettings& settings) { if (CpuDispatcher == nullptr) { - uint32 threads = Math::Clamp(Platform::GetCPUInfo().ProcessorCoreCount - 1, 1, 4); + uint32 threads = Math::Clamp(Platform::GetCPUInfo().ProcessorCoreCount - 1, 1, 8); CpuDispatcher = PxDefaultCpuDispatcherCreate(threads); CHECK_INIT(CpuDispatcher, "PxDefaultCpuDispatcherCreate failed!"); } From 4ac870f7012964f699daa94b2f66e075ab371d8c Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 19 Jun 2025 13:57:50 +0200 Subject: [PATCH 058/211] Optimize physics transformation updates propagation in async via Job System --- Source/Engine/Physics/Actors/RigidBody.cpp | 8 +++++ .../Physics/PhysX/PhysicsBackendPhysX.cpp | 31 +++++++++++++++++-- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/Source/Engine/Physics/Actors/RigidBody.cpp b/Source/Engine/Physics/Actors/RigidBody.cpp index a58911dfb..565bd9fbd 100644 --- a/Source/Engine/Physics/Actors/RigidBody.cpp +++ b/Source/Engine/Physics/Actors/RigidBody.cpp @@ -468,6 +468,14 @@ void RigidBody::OnActiveTransformChanged() void RigidBody::BeginPlay(SceneBeginData* data) { +#if USE_EDITOR || !BUILD_RELEASE + // FlushActiveTransforms runs in async for each separate actor thus we don't support two rigidbodies that transformations depend on each other + if (Cast(GetParent())) + { + LOG(Warning, "Rigid Body '{0}' is attached to other Rigid Body which is not unsupported and might cause physical simulation instability.", GetNamePath()); + } +#endif + // Create rigid body ASSERT(_actor == nullptr); void* scene = GetPhysicsScene()->GetPhysicsScene(); diff --git a/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp b/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp index 673b1e96d..99b54e8fc 100644 --- a/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp +++ b/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp @@ -1901,6 +1901,23 @@ void PhysicsBackend::StartSimulateScene(void* scene, float dt) scenePhysX->Stepper.renderDone(); } +PxActor** CachedActiveActors; +int64 CachedActiveActorsCount; +volatile int64 CachedActiveActorIndex; + +void FlushActiveTransforms(int32 i) +{ + PROFILE_CPU(); + int64 index; + while ((index = Platform::InterlockedIncrement(&CachedActiveActorIndex)) < CachedActiveActorsCount) + { + const auto pxActor = (PxRigidActor*)CachedActiveActors[index]; + auto actor = static_cast(pxActor->userData); + if (actor) + actor->OnActiveTransformChanged(); + } +} + void PhysicsBackend::EndSimulateScene(void* scene) { PROFILE_MEM(Physics); @@ -1919,10 +1936,18 @@ void PhysicsBackend::EndSimulateScene(void* scene) // Gather change info PxU32 activeActorsCount; PxActor** activeActors = scenePhysX->Scene->getActiveActors(activeActorsCount); - if (activeActorsCount > 0) + + // Update changed transformations + if (activeActorsCount > 50 && JobSystem::GetThreadsCount() > 1) + { + // Run in async via job system + CachedActiveActors = activeActors; + CachedActiveActorsCount = activeActorsCount; + CachedActiveActorIndex = -1; + JobSystem::Execute(FlushActiveTransforms, JobSystem::GetThreadsCount()); + } + else { - // Update changed transformations - // TODO: use jobs system if amount if huge for (uint32 i = 0; i < activeActorsCount; i++) { const auto pxActor = (PxRigidActor*)*activeActors++; From 2e10d776e9bc4e80323a3bb82f62337e21b36971 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 19 Jun 2025 14:04:06 +0200 Subject: [PATCH 059/211] Optimize updating actor rendering entry with better thread locking that support async writes on actor update --- Source/Engine/Level/Scene/SceneRendering.cpp | 14 +++++++------- Source/Engine/Level/Scene/SceneRendering.h | 4 ++-- Source/Engine/Threading/ConcurrentSystemLocker.cpp | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Source/Engine/Level/Scene/SceneRendering.cpp b/Source/Engine/Level/Scene/SceneRendering.cpp index fe2bc310f..4a88703ae 100644 --- a/Source/Engine/Level/Scene/SceneRendering.cpp +++ b/Source/Engine/Level/Scene/SceneRendering.cpp @@ -43,7 +43,7 @@ FORCE_INLINE bool FrustumsListCull(const BoundingSphere& bounds, const ArrayScenes.Add(this); // Add additional lock during scene rendering (prevents any Actors cache modifications on content streaming threads - eg. when model residency changes) - Locker.Lock(); + Locker.Begin(false); } else if (category == PostRender) { // Release additional lock - Locker.Unlock(); + Locker.End(false); } auto& view = renderContextBatch.GetMainContext().View; auto& list = Actors[(int32)category]; @@ -127,7 +127,7 @@ void SceneRendering::CollectPostFxVolumes(RenderContext& renderContext) void SceneRendering::Clear() { - ScopeLock lock(Locker); + ConcurrentSystemLocker::WriteScope lock(Locker); for (auto* listener : _listeners) { listener->OnSceneRenderingClear(this); @@ -149,7 +149,7 @@ void SceneRendering::AddActor(Actor* a, int32& key) return; PROFILE_MEM(Graphics); const int32 category = a->_drawCategory; - ScopeLock lock(Locker); + ConcurrentSystemLocker::WriteScope lock(Locker); auto& list = Actors[category]; if (FreeActors[category].HasItems()) { @@ -174,7 +174,7 @@ void SceneRendering::AddActor(Actor* a, int32& key) void SceneRendering::UpdateActor(Actor* a, int32& key, ISceneRenderingListener::UpdateFlags flags) { const int32 category = a->_drawCategory; - ScopeLock lock(Locker); + ConcurrentSystemLocker::ReadScope lock(Locker); // Read-access only as list doesn't get resized (like Add/Remove do) so allow updating actors from different threads at once auto& list = Actors[category]; if (list.Count() <= key) // Ignore invalid key softly return; @@ -193,7 +193,7 @@ void SceneRendering::UpdateActor(Actor* a, int32& key, ISceneRenderingListener:: void SceneRendering::RemoveActor(Actor* a, int32& key) { const int32 category = a->_drawCategory; - ScopeLock lock(Locker); + ConcurrentSystemLocker::WriteScope lock(Locker); auto& list = Actors[category]; if (list.Count() > key) // Ignore invalid key softly (eg. list after batch clear during scene unload) { diff --git a/Source/Engine/Level/Scene/SceneRendering.h b/Source/Engine/Level/Scene/SceneRendering.h index b24dcdfa9..59f997f6b 100644 --- a/Source/Engine/Level/Scene/SceneRendering.h +++ b/Source/Engine/Level/Scene/SceneRendering.h @@ -7,7 +7,7 @@ #include "Engine/Core/Math/BoundingSphere.h" #include "Engine/Core/Math/BoundingFrustum.h" #include "Engine/Level/Actor.h" -#include "Engine/Platform/CriticalSection.h" +#include "Engine/Threading/ConcurrentSystemLocker.h" class SceneRenderTask; class SceneRendering; @@ -102,7 +102,7 @@ public: Array Actors[MAX]; Array FreeActors[MAX]; Array PostFxProviders; - CriticalSection Locker; + ConcurrentSystemLocker Locker; private: #if USE_EDITOR diff --git a/Source/Engine/Threading/ConcurrentSystemLocker.cpp b/Source/Engine/Threading/ConcurrentSystemLocker.cpp index c8569b119..f8eab96d9 100644 --- a/Source/Engine/Threading/ConcurrentSystemLocker.cpp +++ b/Source/Engine/Threading/ConcurrentSystemLocker.cpp @@ -18,7 +18,7 @@ RETRY: { // Someone else is doing opposite operation so wait for it's end // TODO: use ConditionVariable+CriticalSection to prevent active-waiting - Platform::Sleep(1); + Platform::Sleep(0); goto RETRY; } From d3a50cdacb9349487fde4ba9951bc214f85f5fba Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 20 Jun 2025 09:05:25 +0200 Subject: [PATCH 060/211] Optimize `Actor::DestroyChildren` --- Source/Editor/Modules/SceneEditingModule.cs | 4 ++ Source/Editor/Modules/SceneModule.cs | 45 +++++++++++++++ Source/Editor/SceneGraph/SceneGraphNode.cs | 2 +- Source/Engine/Level/Actor.cpp | 63 ++++++++++++++++++++- Source/Engine/Level/Level.cpp | 8 +++ Source/Engine/Level/Level.h | 6 ++ 6 files changed, 124 insertions(+), 4 deletions(-) diff --git a/Source/Editor/Modules/SceneEditingModule.cs b/Source/Editor/Modules/SceneEditingModule.cs index 3c7130615..11ab2fcb3 100644 --- a/Source/Editor/Modules/SceneEditingModule.cs +++ b/Source/Editor/Modules/SceneEditingModule.cs @@ -711,7 +711,11 @@ namespace FlaxEditor.Modules private void OnActorChildNodesDispose(ActorNode node) { + if (Selection.Count == 0) + return; + // TODO: cache if selection contains any actor child node and skip this loop if no need to iterate + // TODO: or build a hash set with selected nodes for quick O(1) checks (cached until selection changes) // Deselect child nodes for (int i = 0; i < node.ChildNodes.Count; i++) diff --git a/Source/Editor/Modules/SceneModule.cs b/Source/Editor/Modules/SceneModule.cs index 56c420964..9ca92ddce 100644 --- a/Source/Editor/Modules/SceneModule.cs +++ b/Source/Editor/Modules/SceneModule.cs @@ -3,6 +3,7 @@ using System; using System.Collections.Generic; using System.IO; +using System.Linq; using FlaxEditor.SceneGraph; using FlaxEditor.SceneGraph.Actors; using FlaxEngine; @@ -658,6 +659,48 @@ namespace FlaxEditor.Modules //node?.TreeNode.OnActiveChanged(); } + private void OnActorDestroyChildren(Actor actor) + { + // Instead of doing OnActorParentChanged for every child lets remove all of them at once from that actor + ActorNode node = GetActorNode(actor); + if (node != null) + { + if (Editor.SceneEditing.HasSthSelected) + { + // Clear selection if one of the removed actors is selected + var selection = new HashSet(); + foreach (var e in Editor.SceneEditing.Selection) + { + if (e is ActorNode q && q.Actor) + selection.Add(q.Actor); + } + var count = actor.ChildrenCount; + for (int i = 0; i < count; i++) + { + var child = actor.GetChild(i); + if (selection.Contains(child)) + { + Editor.SceneEditing.Deselect(); + break; + } + } + } + + // Remove all child nodes (upfront remove all nodes to run faster) + for (int i = 0; i < node.ChildNodes.Count; i++) + { + if (node.ChildNodes[i] is ActorNode child) + child.parentNode = null; + } + node.TreeNode.DisposeChildren(); + for (int i = 0; i < node.ChildNodes.Count; i++) + { + node.ChildNodes[i].Dispose(); + } + node.ChildNodes.Clear(); + } + } + /// /// Gets the actor node. /// @@ -709,6 +752,7 @@ namespace FlaxEditor.Modules Level.ActorOrderInParentChanged += OnActorOrderInParentChanged; Level.ActorNameChanged += OnActorNameChanged; Level.ActorActiveChanged += OnActorActiveChanged; + Level.ActorDestroyChildren += OnActorDestroyChildren; } /// @@ -726,6 +770,7 @@ namespace FlaxEditor.Modules Level.ActorOrderInParentChanged -= OnActorOrderInParentChanged; Level.ActorNameChanged -= OnActorNameChanged; Level.ActorActiveChanged -= OnActorActiveChanged; + Level.ActorDestroyChildren -= OnActorDestroyChildren; // Cleanup graph Root.Dispose(); diff --git a/Source/Editor/SceneGraph/SceneGraphNode.cs b/Source/Editor/SceneGraph/SceneGraphNode.cs index b6cbdb135..20ac3a6a5 100644 --- a/Source/Editor/SceneGraph/SceneGraphNode.cs +++ b/Source/Editor/SceneGraph/SceneGraphNode.cs @@ -27,7 +27,7 @@ namespace FlaxEditor.SceneGraph /// /// The parent node. /// - protected SceneGraphNode parentNode; + internal SceneGraphNode parentNode; /// /// Gets the children list. diff --git a/Source/Engine/Level/Actor.cpp b/Source/Engine/Level/Actor.cpp index 02210d910..24ca6b139 100644 --- a/Source/Engine/Level/Actor.cpp +++ b/Source/Engine/Level/Actor.cpp @@ -466,12 +466,71 @@ Array Actor::GetChildren(const MClass* type) const void Actor::DestroyChildren(float timeLeft) { + if (Children.IsEmpty()) + return; PROFILE_CPU(); + + // Actors system doesn't support editing scene hierarchy from multiple threads + if (!IsInMainThread() && IsDuringPlay()) + { + LOG(Error, "Editing scene hierarchy is only allowed on a main thread."); + return; + } + + // Get all actors Array children = Children; + + // Inform Editor beforehand + Level::callActorEvent(Level::ActorEventType::OnActorDestroyChildren, this, nullptr); + + if (_scene && IsActiveInHierarchy()) + { + // Disable children + for (Actor* child : children) + { + if (child->IsActiveInHierarchy()) + { + child->OnDisableInHierarchy(); + } + } + } + + Level::ScenesLock.Lock(); + + // Remove children all at once + Children.Clear(); + _isHierarchyDirty = true; + + // Unlink children from scene hierarchy + for (Actor* child : children) + { + child->_parent = nullptr; + if (!_isActiveInHierarchy) + child->_isActive = false; // Force keep children deactivated to reduce overhead during destruction + if (_scene) + child->SetSceneInHierarchy(nullptr); + } + + Level::ScenesLock.Unlock(); + + // Inform actors about this + for (Actor* child : children) + { + child->OnParentChanged(); + } + + // Unlink children for hierarchy + for (Actor* child : children) + { + //child->EndPlay(); + + //child->SetParent(nullptr, false, false); + } + + // Delete objects const bool useGameTime = timeLeft > ZeroTolerance; for (Actor* child : children) { - child->SetParent(nullptr, false, false); child->DeleteObject(timeLeft, useGameTime); } } @@ -1280,7 +1339,6 @@ void Actor::OnActiveChanged() if (wasActiveInTree != IsActiveInHierarchy()) OnActiveInTreeChanged(); - //if (GetScene()) Level::callActorEvent(Level::ActorEventType::OnActorActiveChanged, this, nullptr); } @@ -1311,7 +1369,6 @@ void Actor::OnActiveInTreeChanged() void Actor::OnOrderInParentChanged() { - //if (GetScene()) Level::callActorEvent(Level::ActorEventType::OnActorOrderInParentChanged, this, nullptr); } diff --git a/Source/Engine/Level/Level.cpp b/Source/Engine/Level/Level.cpp index 5516e6d53..49bef81c3 100644 --- a/Source/Engine/Level/Level.cpp +++ b/Source/Engine/Level/Level.cpp @@ -263,6 +263,9 @@ Delegate Level::ActorParentChanged; Delegate Level::ActorOrderInParentChanged; Delegate Level::ActorNameChanged; Delegate Level::ActorActiveChanged; +#if USE_EDITOR +Delegate Level::ActorDestroyChildren; +#endif Delegate Level::SceneSaving; Delegate Level::SceneSaved; Delegate Level::SceneSaveError; @@ -851,6 +854,11 @@ void Level::callActorEvent(ActorEventType eventType, Actor* a, Actor* b) case ActorEventType::OnActorActiveChanged: ActorActiveChanged(a); break; +#if USE_EDITOR + case ActorEventType::OnActorDestroyChildren: + ActorDestroyChildren(a); + break; +#endif } } diff --git a/Source/Engine/Level/Level.h b/Source/Engine/Level/Level.h index 597bc0a87..484ba35b8 100644 --- a/Source/Engine/Level/Level.h +++ b/Source/Engine/Level/Level.h @@ -549,7 +549,13 @@ private: OnActorOrderInParentChanged = 3, OnActorNameChanged = 4, OnActorActiveChanged = 5, +#if USE_EDITOR + OnActorDestroyChildren = 6, +#endif }; static void callActorEvent(ActorEventType eventType, Actor* a, Actor* b); +#if USE_EDITOR + API_EVENT(Internal) static Delegate ActorDestroyChildren; +#endif }; From d7df403e5e5237f125123fe79b7737c4c63e45e2 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 20 Jun 2025 09:05:41 +0200 Subject: [PATCH 061/211] Optimize `ContainerControl.DisposeChildren` --- Source/Engine/UI/GUI/ContainerControl.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Engine/UI/GUI/ContainerControl.cs b/Source/Engine/UI/GUI/ContainerControl.cs index 32b03af0d..017b8ee5c 100644 --- a/Source/Engine/UI/GUI/ContainerControl.cs +++ b/Source/Engine/UI/GUI/ContainerControl.cs @@ -173,7 +173,7 @@ namespace FlaxEngine.GUI // Delete children while (_children.Count > 0) { - _children[0].Dispose(); + _children[^1].Dispose(); } _isLayoutLocked = wasLayoutLocked; From ef5d45874a96452fe6ea8350cf0e41f4290da73c Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 22 Jun 2025 12:12:42 +0200 Subject: [PATCH 062/211] Fix compilation regression --- Source/Engine/Level/Actor.cpp | 2 ++ Source/Engine/Navigation/NavMesh.cpp | 1 + Source/Engine/Navigation/NavMeshBuilder.cpp | 1 + 3 files changed, 4 insertions(+) diff --git a/Source/Engine/Level/Actor.cpp b/Source/Engine/Level/Actor.cpp index 24ca6b139..551f6fba9 100644 --- a/Source/Engine/Level/Actor.cpp +++ b/Source/Engine/Level/Actor.cpp @@ -480,8 +480,10 @@ void Actor::DestroyChildren(float timeLeft) // Get all actors Array children = Children; +#if USE_EDITOR // Inform Editor beforehand Level::callActorEvent(Level::ActorEventType::OnActorDestroyChildren, this, nullptr); +#endif if (_scene && IsActiveInHierarchy()) { diff --git a/Source/Engine/Navigation/NavMesh.cpp b/Source/Engine/Navigation/NavMesh.cpp index b48bf26c7..5593d732a 100644 --- a/Source/Engine/Navigation/NavMesh.cpp +++ b/Source/Engine/Navigation/NavMesh.cpp @@ -5,6 +5,7 @@ #include "Engine/Level/Scene/Scene.h" #include "Engine/Serialization/Serialization.h" #include "Engine/Threading/Threading.h" +#include "Engine/Profiler/ProfilerMemory.h" #if COMPILE_WITH_ASSETS_IMPORTER #include "Engine/Core/Log.h" #include "Engine/ContentImporters/AssetsImportingManager.h" diff --git a/Source/Engine/Navigation/NavMeshBuilder.cpp b/Source/Engine/Navigation/NavMeshBuilder.cpp index e5fdec5da..e92173846 100644 --- a/Source/Engine/Navigation/NavMeshBuilder.cpp +++ b/Source/Engine/Navigation/NavMeshBuilder.cpp @@ -23,6 +23,7 @@ #include "Engine/Terrain/TerrainPatch.h" #include "Engine/Terrain/Terrain.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Level/Scene/Scene.h" #include "Engine/Level/Level.h" #include From 674fda7375f211655ea81b6644fea082f85ed3c9 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 26 Jun 2025 19:50:04 +0200 Subject: [PATCH 063/211] Add resizing to Custom Code nodes in Materials --- Source/Editor/Surface/Archetypes/Material.cs | 150 ++++++++++++++++++- 1 file changed, 149 insertions(+), 1 deletion(-) diff --git a/Source/Editor/Surface/Archetypes/Material.cs b/Source/Editor/Surface/Archetypes/Material.cs index e46b1c6fb..bd084e286 100644 --- a/Source/Editor/Surface/Archetypes/Material.cs +++ b/Source/Editor/Surface/Archetypes/Material.cs @@ -1,11 +1,13 @@ // Copyright (c) Wojciech Figat. All rights reserved. using System; +using System.Linq; using FlaxEditor.Content.Settings; using FlaxEditor.Scripting; using FlaxEditor.Surface.Elements; using FlaxEditor.Windows.Assets; using FlaxEngine; +using FlaxEngine.GUI; namespace FlaxEditor.Surface.Archetypes { @@ -260,6 +262,148 @@ namespace FlaxEditor.Surface.Archetypes } } + internal sealed class CustomCodeNode : SurfaceNode + { + private Rectangle _resizeButtonRect; + private Float2 _startResizingSize; + private Float2 _startResizingCornerOffset; + private bool _isResizing; + + private int SizeValueIndex => Archetype.TypeID == 8 ? 1 : 3; // Index of the Size stored in Values array + + private Float2 SizeValue + { + get => (Float2)Values[SizeValueIndex]; + set => SetValue(SizeValueIndex, value, false); + } + + public CustomCodeNode(uint id, VisjectSurfaceContext context, NodeArchetype nodeArch, GroupArchetype groupArch) + : base(id, context, nodeArch, groupArch) + { + } + + public override bool CanSelect(ref Float2 location) + { + return base.CanSelect(ref location) && !_resizeButtonRect.MakeOffsetted(Location).Contains(ref location); + } + + public override void OnSurfaceLoaded(SurfaceNodeActions action) + { + base.OnSurfaceLoaded(action); + + var textBox = (TextBox)Children.First(x => x is TextBox); + textBox.AnchorMax = Float2.One; + + var size = SizeValue; + if (Surface != null && Surface.GridSnappingEnabled) + size = Surface.SnapToGrid(size, true); + Resize(size.X, size.Y); + } + + public override void OnValuesChanged() + { + base.OnValuesChanged(); + + var size = SizeValue; + Resize(size.X, size.Y); + } + + protected override void UpdateRectangles() + { + base.UpdateRectangles(); + + const float buttonMargin = FlaxEditor.Surface.Constants.NodeCloseButtonMargin; + const float buttonSize = FlaxEditor.Surface.Constants.NodeCloseButtonSize; + _resizeButtonRect = new Rectangle(_closeButtonRect.Left, Height - buttonSize - buttonMargin - 4, buttonSize, buttonSize); + } + + public override void Draw() + { + base.Draw(); + + var style = Style.Current; + if (_isResizing) + { + Render2D.FillRectangle(_resizeButtonRect, style.Selection); + Render2D.DrawRectangle(_resizeButtonRect, style.SelectionBorder); + } + Render2D.DrawSprite(style.Scale, _resizeButtonRect, _resizeButtonRect.Contains(_mousePosition) && Surface.CanEdit ? style.Foreground : style.ForegroundGrey); + } + + public override void OnLostFocus() + { + if (_isResizing) + EndResizing(); + + base.OnLostFocus(); + } + + public override void OnEndMouseCapture() + { + if (_isResizing) + EndResizing(); + + base.OnEndMouseCapture(); + } + + public override bool OnMouseDown(Float2 location, MouseButton button) + { + if (base.OnMouseDown(location, button)) + return true; + + if (button == MouseButton.Left && _resizeButtonRect.Contains(ref location) && Surface.CanEdit) + { + // Start sliding + _isResizing = true; + _startResizingSize = Size; + _startResizingCornerOffset = Size - location; + StartMouseCapture(); + Cursor = CursorType.SizeNWSE; + return true; + } + + return false; + } + + public override void OnMouseMove(Float2 location) + { + if (_isResizing) + { + var emptySize = CalculateNodeSize(0, 0); + var size = Float2.Max(location - emptySize + _startResizingCornerOffset, new Float2(240, 160)); + Resize(size.X, size.Y); + } + else + { + base.OnMouseMove(location); + } + } + + public override bool OnMouseUp(Float2 location, MouseButton button) + { + if (button == MouseButton.Left && _isResizing) + { + EndResizing(); + return true; + } + + return base.OnMouseUp(location, button); + } + + private void EndResizing() + { + Cursor = CursorType.Default; + EndMouseCapture(); + _isResizing = false; + if (_startResizingSize != Size) + { + var emptySize = CalculateNodeSize(0, 0); + SizeValue = Size - emptySize; + Surface.MarkAsEdited(false); + } + } + } + internal enum MaterialTemplateInputsMapping { /// @@ -410,13 +554,15 @@ namespace FlaxEditor.Surface.Archetypes new NodeArchetype { TypeID = 8, + Create = (id, context, arch, groupArch) => new CustomCodeNode(id, context, arch, groupArch), Title = "Custom Code", Description = "Custom HLSL shader code expression", Flags = NodeFlags.MaterialGraph, Size = new Float2(300, 200), DefaultValues = new object[] { - "// Here you can add HLSL code\nOutput0 = Input0;" + "// Here you can add HLSL code\nOutput0 = Input0;", + new Float2(300, 200), }, Elements = new[] { @@ -874,6 +1020,7 @@ namespace FlaxEditor.Surface.Archetypes new NodeArchetype { TypeID = 38, + Create = (id, context, arch, groupArch) => new CustomCodeNode(id, context, arch, groupArch), Title = "Custom Global Code", Description = "Custom global HLSL shader code expression (placed before material shader code). Can contain includes to shader utilities or declare functions to reuse later.", Flags = NodeFlags.MaterialGraph, @@ -883,6 +1030,7 @@ namespace FlaxEditor.Surface.Archetypes "// Here you can add HLSL code\nfloat4 GetCustomColor()\n{\n\treturn float4(1, 0, 0, 1);\n}", true, (int)MaterialTemplateInputsMapping.Utilities, + new Float2(300, 240), }, Elements = new[] { From 5c37584eca692cb113896c4d3cd6a7d994978226 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 26 Jun 2025 19:50:42 +0200 Subject: [PATCH 064/211] Minor adjustment for alignment of perf-critical variables in rendering --- Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp | 2 +- Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp index 77cec978d..22780e3ec 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp @@ -267,7 +267,7 @@ void GPUContextDX11::SetRenderTarget(GPUTextureView* depthBuffer, const Span(depthBuffer); ID3D11DepthStencilView* dsv = depthBufferDX11 ? depthBufferDX11->DSV() : nullptr; - ID3D11RenderTargetView* rtvs[GPU_MAX_RT_BINDED]; + __declspec(align(16)) ID3D11RenderTargetView* rtvs[GPU_MAX_RT_BINDED]; for (int32 i = 0; i < rts.Length(); i++) { auto rtDX11 = reinterpret_cast(rts[i]); diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp index 5f278a8ae..9cb285ac1 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp @@ -977,7 +977,7 @@ void GPUContextDX12::BindVB(const Span& vertexBuffers, const uint32* { ASSERT(vertexBuffers.Length() >= 0 && vertexBuffers.Length() <= GPU_MAX_VB_BINDED); bool vbEdited = _vbCount != vertexBuffers.Length(); - D3D12_VERTEX_BUFFER_VIEW views[GPU_MAX_VB_BINDED]; + __declspec(align(16)) D3D12_VERTEX_BUFFER_VIEW views[GPU_MAX_VB_BINDED]; for (int32 i = 0; i < vertexBuffers.Length(); i++) { const auto vbDX12 = static_cast(vertexBuffers[i]); From 45e82d21f4e15234ce8b7cf39d005fa929f535c8 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 26 Jun 2025 19:51:06 +0200 Subject: [PATCH 065/211] Fix `ConcurrentSystemLocker` to guard for a single writer at once --- Source/Engine/Threading/ConcurrentSystemLocker.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Source/Engine/Threading/ConcurrentSystemLocker.cpp b/Source/Engine/Threading/ConcurrentSystemLocker.cpp index f8eab96d9..c8debb561 100644 --- a/Source/Engine/Threading/ConcurrentSystemLocker.cpp +++ b/Source/Engine/Threading/ConcurrentSystemLocker.cpp @@ -22,6 +22,14 @@ RETRY: goto RETRY; } + // Writers have to check themselves to (one write at the same time - just like a mutex) + if (write && Platform::AtomicRead(thisCounter) != 0) + { + // Someone else is doing opposite operation so wait for it's end + Platform::Sleep(0); + goto RETRY; + } + // Mark that we entered this section Platform::InterlockedIncrement(thisCounter); From 1b40775d628202e31dff559a8917e1e44b1f4332 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 27 Jun 2025 11:56:09 +0200 Subject: [PATCH 066/211] Fix deadloop in `HtmlParser` when parsing text with incorrect tags --- Source/Engine/Utilities/HtmlParser.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Source/Engine/Utilities/HtmlParser.cs b/Source/Engine/Utilities/HtmlParser.cs index 37b614b80..175b0247a 100644 --- a/Source/Engine/Utilities/HtmlParser.cs +++ b/Source/Engine/Utilities/HtmlParser.cs @@ -177,6 +177,8 @@ namespace FlaxEngine.Utilities // Get name of this tag int start = _pos; string s = ParseTagName(); + if (s == string.Empty) + return false; // Special handling bool doctype = _scriptBegin = false; From 8cdec15fa64c365260660039373080762dbefe0d Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 27 Jun 2025 15:41:48 +0200 Subject: [PATCH 067/211] Fix `GlobalSignDistanceFieldCustomBuffer` to be thread-safe (scene rendering events are not guarded via mutex anymore) --- Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp index a46568f97..56dd196a5 100644 --- a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp +++ b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp @@ -197,6 +197,7 @@ public: GPUTexture* Texture = nullptr; GPUTexture* TextureMip = nullptr; Vector3 Origin = Vector3::Zero; + ConcurrentSystemLocker Locker; Array> Cascades; HashSet ObjectTypes; HashSet SDFTextures; @@ -395,6 +396,7 @@ public: { if (GLOBAL_SDF_ACTOR_IS_STATIC(a) && ObjectTypes.Contains(a->GetTypeHandle())) { + ConcurrentSystemLocker::WriteScope lock(Locker); OnSceneRenderingDirty(a->GetBox()); } } @@ -403,6 +405,7 @@ public: { if (GLOBAL_SDF_ACTOR_IS_STATIC(a) && ObjectTypes.Contains(a->GetTypeHandle())) { + ConcurrentSystemLocker::WriteScope lock(Locker); OnSceneRenderingDirty(BoundingBox::FromSphere(prevBounds)); OnSceneRenderingDirty(a->GetBox()); } @@ -412,6 +415,7 @@ public: { if (GLOBAL_SDF_ACTOR_IS_STATIC(a) && ObjectTypes.Contains(a->GetTypeHandle())) { + ConcurrentSystemLocker::WriteScope lock(Locker); OnSceneRenderingDirty(a->GetBox()); } } From 185151b0250bb708c3d67e183b05ee9a542e2c82 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 27 Jun 2025 18:52:25 +0200 Subject: [PATCH 068/211] Minor fixes --- Source/Editor/Surface/Archetypes/Material.cs | 71 ++++++++++++++++++-- Source/Engine/Core/ObjectsRemovalService.cpp | 2 +- Source/Engine/UI/GUI/Common/RichTextBox.cs | 2 +- 3 files changed, 67 insertions(+), 8 deletions(-) diff --git a/Source/Editor/Surface/Archetypes/Material.cs b/Source/Editor/Surface/Archetypes/Material.cs index bd084e286..c9066eaa5 100644 --- a/Source/Editor/Surface/Archetypes/Material.cs +++ b/Source/Editor/Surface/Archetypes/Material.cs @@ -1,7 +1,6 @@ // Copyright (c) Wojciech Figat. All rights reserved. using System; -using System.Linq; using FlaxEditor.Content.Settings; using FlaxEditor.Scripting; using FlaxEditor.Surface.Elements; @@ -262,12 +261,55 @@ namespace FlaxEditor.Surface.Archetypes } } +#if false // TODO: finish code editor based on RichTextBoxBase with text block parsing for custom styling + internal sealed class CustomCodeTextBox : RichTextBoxBase + { + protected override void OnParseTextBlocks() + { + base.OnParseTextBlocks(); + + // Single block for a whole text + // TODO: implement code parsing with HLSL syntax + var font = Style.Current.FontMedium; + var style = new TextBlockStyle + { + Font = new FontReference(font), + Color = Style.Current.Foreground, + BackgroundSelectedBrush = new SolidColorBrush(Style.Current.BackgroundSelected), + }; + _textBlocks.Clear(); + _textBlocks.Add(new TextBlock + { + Range = new TextRange + { + StartIndex = 0, + EndIndex = TextLength, + }, + Style = style, + Bounds = new Rectangle(Float2.Zero, font.MeasureText(Text)), + }); + } +#else + internal sealed class CustomCodeTextBox : TextBox + { +#endif + public override void Draw() + { + base.Draw(); + + // Draw border + if (!IsFocused) + Render2D.DrawRectangle(new Rectangle(Float2.Zero, Size), Style.Current.BorderNormal); + } + } + internal sealed class CustomCodeNode : SurfaceNode { private Rectangle _resizeButtonRect; private Float2 _startResizingSize; private Float2 _startResizingCornerOffset; private bool _isResizing; + private CustomCodeTextBox _textBox; private int SizeValueIndex => Archetype.TypeID == 8 ? 1 : 3; // Index of the Size stored in Values array @@ -280,6 +322,26 @@ namespace FlaxEditor.Surface.Archetypes public CustomCodeNode(uint id, VisjectSurfaceContext context, NodeArchetype nodeArch, GroupArchetype groupArch) : base(id, context, nodeArch, groupArch) { + Float2 pos = new Float2(FlaxEditor.Surface.Constants.NodeMarginX, FlaxEditor.Surface.Constants.NodeMarginY + FlaxEditor.Surface.Constants.NodeHeaderSize), size; + if (nodeArch.TypeID == 8) + { + pos += new Float2(60, 0); + size = new Float2(172, 200); + } + else + { + pos += new Float2(0, 40); + size = new Float2(300, 200); + } + _textBox = new CustomCodeTextBox + { + IsMultiline = true, + Location = pos, + Size = size, + Parent = this, + AnchorMax = Float2.One, + }; + _textBox.EditEnd += () => SetValue(0, _textBox.Text); } public override bool CanSelect(ref Float2 location) @@ -291,8 +353,7 @@ namespace FlaxEditor.Surface.Archetypes { base.OnSurfaceLoaded(action); - var textBox = (TextBox)Children.First(x => x is TextBox); - textBox.AnchorMax = Float2.One; + _textBox.Text = (string)Values[0]; var size = SizeValue; if (Surface != null && Surface.GridSnappingEnabled) @@ -306,6 +367,7 @@ namespace FlaxEditor.Surface.Archetypes var size = SizeValue; Resize(size.X, size.Y); + _textBox.Text = (string)Values[0]; } protected override void UpdateRectangles() @@ -579,8 +641,6 @@ namespace FlaxEditor.Surface.Archetypes NodeElementArchetype.Factory.Output(1, "Output1", typeof(Float4), 9), NodeElementArchetype.Factory.Output(2, "Output2", typeof(Float4), 10), NodeElementArchetype.Factory.Output(3, "Output3", typeof(Float4), 11), - - NodeElementArchetype.Factory.TextBox(60, 0, 175, 200, 0), } }, new NodeArchetype @@ -1038,7 +1098,6 @@ namespace FlaxEditor.Surface.Archetypes NodeElementArchetype.Factory.Text(20, 0, "Enabled"), NodeElementArchetype.Factory.Text(0, 20, "Location"), NodeElementArchetype.Factory.Enum(50, 20, 120, 2, typeof(MaterialTemplateInputsMapping)), - NodeElementArchetype.Factory.TextBox(0, 40, 300, 200, 0), } }, new NodeArchetype diff --git a/Source/Engine/Core/ObjectsRemovalService.cpp b/Source/Engine/Core/ObjectsRemovalService.cpp index 4d9159ea9..6d40c3f78 100644 --- a/Source/Engine/Core/ObjectsRemovalService.cpp +++ b/Source/Engine/Core/ObjectsRemovalService.cpp @@ -156,7 +156,7 @@ Object::~Object() { #if BUILD_DEBUG // Prevent removing object that is still reverenced by the removal service - ASSERT(!ObjectsRemovalService::IsInPool(this)); + //ASSERT(!ObjectsRemovalService::IsInPool(this)); #endif } diff --git a/Source/Engine/UI/GUI/Common/RichTextBox.cs b/Source/Engine/UI/GUI/Common/RichTextBox.cs index b417854d7..f7726bf56 100644 --- a/Source/Engine/UI/GUI/Common/RichTextBox.cs +++ b/Source/Engine/UI/GUI/Common/RichTextBox.cs @@ -57,7 +57,7 @@ namespace FlaxEngine.GUI { base.OnSizeChanged(); - // Refresh textblocks since thos emight depend on control size (eg. align right) + // Refresh textblocks since those might depend on control size (eg. align right) UpdateTextBlocks(); } } From 3dc7546dd4907b69a206c20c07166295c659a35e Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 27 Jun 2025 19:06:25 +0200 Subject: [PATCH 069/211] Fix crash when constant buffer is unused by shader but still exists --- Source/Engine/Renderer/AmbientOcclusionPass.cpp | 10 +--------- Source/Engine/Renderer/AntiAliasing/FXAA.cpp | 6 +----- Source/Engine/Renderer/AntiAliasing/SMAA.cpp | 8 +------- Source/Engine/Renderer/AntiAliasing/TAA.cpp | 6 +----- Source/Engine/Renderer/AtmospherePreCompute.cpp | 6 +----- Source/Engine/Renderer/ColorGradingPass.cpp | 8 +------- .../Renderer/ContrastAdaptiveSharpeningPass.cpp | 8 +------- Source/Engine/Renderer/DepthOfFieldPass.cpp | 8 +------- Source/Engine/Renderer/EyeAdaptationPass.cpp | 8 +------- Source/Engine/Renderer/HistogramPass.cpp | 8 +------- Source/Engine/Renderer/LightPass.cpp | 14 ++------------ Source/Engine/Renderer/MotionBlurPass.cpp | 10 +--------- Source/Engine/Renderer/PostProcessingPass.cpp | 14 ++------------ Source/Engine/Renderer/ProbesRenderer.cpp | 6 +----- Source/Engine/Renderer/ReflectionsPass.cpp | 8 +------- Source/Engine/Renderer/RendererPass.h | 1 + .../Engine/Renderer/ScreenSpaceReflectionsPass.cpp | 8 +------- Source/Engine/Renderer/ShadowsPass.cpp | 8 +------- Source/Engine/Renderer/Utils/BitonicSort.cpp | 8 +------- Source/Engine/Renderer/Utils/MultiScaler.cpp | 8 +------- Source/Engine/Renderer/VolumetricFogPass.cpp | 14 ++------------ 21 files changed, 24 insertions(+), 151 deletions(-) diff --git a/Source/Engine/Renderer/AmbientOcclusionPass.cpp b/Source/Engine/Renderer/AmbientOcclusionPass.cpp index 5f181ab97..08a8cefcb 100644 --- a/Source/Engine/Renderer/AmbientOcclusionPass.cpp +++ b/Source/Engine/Renderer/AmbientOcclusionPass.cpp @@ -91,17 +91,9 @@ bool AmbientOcclusionPass::setupResources() { // Check shader if (!_shader->IsLoaded()) - { return true; - } const auto shader = _shader->GetShader(); - - // Validate shader constant buffer size - if (shader->GetCB(0)->GetSize() != sizeof(ASSAOConstants)) - { - REPORT_INVALID_SHADER_PASS_CB_SIZE(shader, 0, ASSAOConstants); - return true; - } + CHECK_INVALID_SHADER_PASS_CB_SIZE(shader, 0, ASSAOConstants); // Create pipeline states GPUPipelineState::Description psDesc = GPUPipelineState::Description::DefaultFullscreenTriangle; diff --git a/Source/Engine/Renderer/AntiAliasing/FXAA.cpp b/Source/Engine/Renderer/AntiAliasing/FXAA.cpp index 41926e2d6..00dfb0cdd 100644 --- a/Source/Engine/Renderer/AntiAliasing/FXAA.cpp +++ b/Source/Engine/Renderer/AntiAliasing/FXAA.cpp @@ -36,11 +36,7 @@ bool FXAA::setupResources() return true; } const auto shader = _shader->GetShader(); - if (shader->GetCB(0)->GetSize() != sizeof(Data)) - { - REPORT_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); - return true; - } + CHECK_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); GPUPipelineState::Description psDesc; if (!_psFXAA.IsValid()) diff --git a/Source/Engine/Renderer/AntiAliasing/SMAA.cpp b/Source/Engine/Renderer/AntiAliasing/SMAA.cpp index 25007ad7d..2414118d3 100644 --- a/Source/Engine/Renderer/AntiAliasing/SMAA.cpp +++ b/Source/Engine/Renderer/AntiAliasing/SMAA.cpp @@ -45,13 +45,7 @@ bool SMAA::setupResources() return true; } const auto shader = _shader->GetShader(); - - // Validate shader constant buffer size - if (shader->GetCB(0)->GetSize() != sizeof(Data)) - { - REPORT_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); - return true; - } + CHECK_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); // Create pipeline state GPUPipelineState::Description psDesc = GPUPipelineState::Description::DefaultFullscreenTriangle; diff --git a/Source/Engine/Renderer/AntiAliasing/TAA.cpp b/Source/Engine/Renderer/AntiAliasing/TAA.cpp index 8bbb6ba81..b772eb45e 100644 --- a/Source/Engine/Renderer/AntiAliasing/TAA.cpp +++ b/Source/Engine/Renderer/AntiAliasing/TAA.cpp @@ -37,11 +37,7 @@ bool TAA::setupResources() if (!_shader->IsLoaded()) return true; const auto shader = _shader->GetShader(); - if (shader->GetCB(0)->GetSize() != sizeof(Data)) - { - REPORT_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); - return true; - } + CHECK_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); if (!_psTAA) _psTAA = GPUDevice::Instance->CreatePipelineState(); GPUPipelineState::Description psDesc; diff --git a/Source/Engine/Renderer/AtmospherePreCompute.cpp b/Source/Engine/Renderer/AtmospherePreCompute.cpp index 595ebcca5..b796e828b 100644 --- a/Source/Engine/Renderer/AtmospherePreCompute.cpp +++ b/Source/Engine/Renderer/AtmospherePreCompute.cpp @@ -166,11 +166,7 @@ bool init() } auto shader = _shader->GetShader(); ASSERT(shader->GetCB(0) != nullptr); - if (shader->GetCB(0)->GetSize() != sizeof(Data)) - { - REPORT_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); - return true; - } + CHECK_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); // Create pipeline stages _psTransmittance = GPUDevice::Instance->CreatePipelineState(); diff --git a/Source/Engine/Renderer/ColorGradingPass.cpp b/Source/Engine/Renderer/ColorGradingPass.cpp index 43b49f091..322e7d591 100644 --- a/Source/Engine/Renderer/ColorGradingPass.cpp +++ b/Source/Engine/Renderer/ColorGradingPass.cpp @@ -89,13 +89,7 @@ bool ColorGradingPass::setupResources() if (!_shader || !_shader->IsLoaded()) return true; const auto shader = _shader->GetShader(); - - // Validate shader constant buffer size - if (shader->GetCB(0)->GetSize() != sizeof(Data)) - { - REPORT_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); - return true; - } + CHECK_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); // Create pipeline stages GPUPipelineState::Description psDesc = GPUPipelineState::Description::DefaultFullscreenTriangle; diff --git a/Source/Engine/Renderer/ContrastAdaptiveSharpeningPass.cpp b/Source/Engine/Renderer/ContrastAdaptiveSharpeningPass.cpp index 6a6fce521..3231c32f8 100644 --- a/Source/Engine/Renderer/ContrastAdaptiveSharpeningPass.cpp +++ b/Source/Engine/Renderer/ContrastAdaptiveSharpeningPass.cpp @@ -48,13 +48,7 @@ bool ContrastAdaptiveSharpeningPass::setupResources() if (!_shader || !_shader->IsLoaded()) return true; const auto shader = _shader->GetShader(); - - // Validate shader constant buffer size - if (shader->GetCB(0)->GetSize() != sizeof(Data)) - { - REPORT_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); - return true; - } + CHECK_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); // Create pipeline stage auto psDesc = GPUPipelineState::Description::DefaultFullscreenTriangle; diff --git a/Source/Engine/Renderer/DepthOfFieldPass.cpp b/Source/Engine/Renderer/DepthOfFieldPass.cpp index 25d9ea94f..2c3dc36f8 100644 --- a/Source/Engine/Renderer/DepthOfFieldPass.cpp +++ b/Source/Engine/Renderer/DepthOfFieldPass.cpp @@ -117,13 +117,7 @@ bool DepthOfFieldPass::setupResources() if (!_shader->IsLoaded()) return true; const auto shader = _shader->GetShader(); - - // Validate shader constant buffer size - if (shader->GetCB(0)->GetSize() != sizeof(Data)) - { - REPORT_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); - return true; - } + CHECK_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); // Create pipeline stages GPUPipelineState::Description psDesc = GPUPipelineState::Description::DefaultFullscreenTriangle; diff --git a/Source/Engine/Renderer/EyeAdaptationPass.cpp b/Source/Engine/Renderer/EyeAdaptationPass.cpp index 84e826e36..3cf44af1b 100644 --- a/Source/Engine/Renderer/EyeAdaptationPass.cpp +++ b/Source/Engine/Renderer/EyeAdaptationPass.cpp @@ -258,13 +258,7 @@ bool EyeAdaptationPass::setupResources() if (!_shader->IsLoaded()) return true; const auto shader = _shader->GetShader(); - - // Validate shader constant buffer size - if (shader->GetCB(0)->GetSize() != sizeof(EyeAdaptationData)) - { - REPORT_INVALID_SHADER_PASS_CB_SIZE(shader, 0, EyeAdaptationData); - return true; - } + CHECK_INVALID_SHADER_PASS_CB_SIZE(shader, 0, EyeAdaptationData); // Create pipeline stages GPUPipelineState::Description psDesc = GPUPipelineState::Description::DefaultFullscreenTriangle; diff --git a/Source/Engine/Renderer/HistogramPass.cpp b/Source/Engine/Renderer/HistogramPass.cpp index 96f0cca73..9b6e71a77 100644 --- a/Source/Engine/Renderer/HistogramPass.cpp +++ b/Source/Engine/Renderer/HistogramPass.cpp @@ -113,13 +113,7 @@ bool HistogramPass::setupResources() if (!_shader->IsLoaded()) return true; const auto shader = _shader->GetShader(); - - // Validate shader constant buffer size - if (shader->GetCB(0)->GetSize() != sizeof(HistogramData)) - { - REPORT_INVALID_SHADER_PASS_CB_SIZE(shader, 0, HistogramData); - return true; - } + CHECK_INVALID_SHADER_PASS_CB_SIZE(shader, 0, HistogramData); _csClearHistogram = shader->GetCS("CS_ClearHistogram"); _csGenerateHistogram = shader->GetCS("CS_GenerateHistogram"); diff --git a/Source/Engine/Renderer/LightPass.cpp b/Source/Engine/Renderer/LightPass.cpp index 1371a53ee..ee38ee2ac 100644 --- a/Source/Engine/Renderer/LightPass.cpp +++ b/Source/Engine/Renderer/LightPass.cpp @@ -65,18 +65,8 @@ bool LightPass::setupResources() if (!_sphereModel->CanBeRendered() || !_shader->IsLoaded()) return true; auto shader = _shader->GetShader(); - - // Validate shader constant buffers sizes - if (shader->GetCB(0)->GetSize() != sizeof(PerLight)) - { - REPORT_INVALID_SHADER_PASS_CB_SIZE(shader, 0, PerLight); - return true; - } - if (shader->GetCB(1)->GetSize() != sizeof(PerFrame)) - { - REPORT_INVALID_SHADER_PASS_CB_SIZE(shader, 1, PerFrame); - return true; - } + CHECK_INVALID_SHADER_PASS_CB_SIZE(shader, 0, PerLight); + CHECK_INVALID_SHADER_PASS_CB_SIZE(shader, 1, PerFrame); // Create pipeline stages GPUPipelineState::Description psDesc; diff --git a/Source/Engine/Renderer/MotionBlurPass.cpp b/Source/Engine/Renderer/MotionBlurPass.cpp index d8ce05de1..3077e4cdb 100644 --- a/Source/Engine/Renderer/MotionBlurPass.cpp +++ b/Source/Engine/Renderer/MotionBlurPass.cpp @@ -80,17 +80,9 @@ bool MotionBlurPass::setupResources() { // Check shader if (!_shader->IsLoaded()) - { return true; - } const auto shader = _shader->GetShader(); - - // Validate shader constant buffer size - if (shader->GetCB(0)->GetSize() != sizeof(Data)) - { - REPORT_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); - return true; - } + CHECK_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); // Create pipeline state GPUPipelineState::Description psDesc = GPUPipelineState::Description::DefaultFullscreenTriangle; diff --git a/Source/Engine/Renderer/PostProcessingPass.cpp b/Source/Engine/Renderer/PostProcessingPass.cpp index 9192c8ca4..bc7a1b820 100644 --- a/Source/Engine/Renderer/PostProcessingPass.cpp +++ b/Source/Engine/Renderer/PostProcessingPass.cpp @@ -98,18 +98,8 @@ bool PostProcessingPass::setupResources() if (!_shader->IsLoaded()) return true; auto shader = _shader->GetShader(); - - // Validate shader constant buffer size - if (shader->GetCB(0)->GetSize() != sizeof(Data)) - { - REPORT_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); - return true; - } - if (shader->GetCB(1)->GetSize() != sizeof(GaussianBlurData)) - { - REPORT_INVALID_SHADER_PASS_CB_SIZE(shader, 1, GaussianBlurData); - return true; - } + CHECK_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); + CHECK_INVALID_SHADER_PASS_CB_SIZE(shader, 1, GaussianBlurData); // Create pipeline stages GPUPipelineState::Description psDesc = GPUPipelineState::Description::DefaultFullscreenTriangle; diff --git a/Source/Engine/Renderer/ProbesRenderer.cpp b/Source/Engine/Renderer/ProbesRenderer.cpp index 70705b8a8..ee72afe72 100644 --- a/Source/Engine/Renderer/ProbesRenderer.cpp +++ b/Source/Engine/Renderer/ProbesRenderer.cpp @@ -231,11 +231,7 @@ bool ProbesRenderer::Init() if (!_shader->IsLoaded()) return false; const auto shader = _shader->GetShader(); - if (shader->GetCB(0)->GetSize() != sizeof(Data)) - { - REPORT_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); - return true; - } + CHECK_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); // Create pipeline stages _psFilterFace = GPUDevice::Instance->CreatePipelineState(); diff --git a/Source/Engine/Renderer/ReflectionsPass.cpp b/Source/Engine/Renderer/ReflectionsPass.cpp index 631543010..5aa8404ab 100644 --- a/Source/Engine/Renderer/ReflectionsPass.cpp +++ b/Source/Engine/Renderer/ReflectionsPass.cpp @@ -281,13 +281,7 @@ bool ReflectionsPass::setupResources() if (!_sphereModel->CanBeRendered() || !_preIntegratedGF->IsLoaded() || !_shader->IsLoaded()) return true; const auto shader = _shader->GetShader(); - - // Validate shader constant buffer size - if (shader->GetCB(0)->GetSize() != sizeof(Data)) - { - REPORT_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); - return true; - } + CHECK_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); // Create pipeline stages GPUPipelineState::Description psDesc; diff --git a/Source/Engine/Renderer/RendererPass.h b/Source/Engine/Renderer/RendererPass.h index 25d887883..32d3b86b9 100644 --- a/Source/Engine/Renderer/RendererPass.h +++ b/Source/Engine/Renderer/RendererPass.h @@ -113,3 +113,4 @@ class RendererPass : public Singleton, public RendererPassBase }; #define REPORT_INVALID_SHADER_PASS_CB_SIZE(shader, index, dataType) LOG(Fatal, "Shader {0} has incorrect constant buffer {1} size: {2} bytes. Expected: {3} bytes", shader->ToString(), index, shader->GetCB(index)->GetSize(), sizeof(dataType)); +#define CHECK_INVALID_SHADER_PASS_CB_SIZE(shader, index, dataType) if (shader->GetCB(index)->GetSize() != sizeof(dataType) && shader->GetCB(index)->GetSize() != 0) { REPORT_INVALID_SHADER_PASS_CB_SIZE(shader, index, dataType); return true; } diff --git a/Source/Engine/Renderer/ScreenSpaceReflectionsPass.cpp b/Source/Engine/Renderer/ScreenSpaceReflectionsPass.cpp index 55c6d79f2..454540eec 100644 --- a/Source/Engine/Renderer/ScreenSpaceReflectionsPass.cpp +++ b/Source/Engine/Renderer/ScreenSpaceReflectionsPass.cpp @@ -89,13 +89,7 @@ bool ScreenSpaceReflectionsPass::setupResources() if (!_shader->IsLoaded()) return true; const auto shader = _shader->GetShader(); - - // Validate shader constant buffer size - if (shader->GetCB(0)->GetSize() != sizeof(Data)) - { - REPORT_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); - return true; - } + CHECK_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); // Create pipeline stages GPUPipelineState::Description psDesc = GPUPipelineState::Description::DefaultFullscreenTriangle; diff --git a/Source/Engine/Renderer/ShadowsPass.cpp b/Source/Engine/Renderer/ShadowsPass.cpp index 3937f8554..5cf90876a 100644 --- a/Source/Engine/Renderer/ShadowsPass.cpp +++ b/Source/Engine/Renderer/ShadowsPass.cpp @@ -507,13 +507,7 @@ bool ShadowsPass::setupResources() if (!_sphereModel->CanBeRendered() || !_shader->IsLoaded()) return true; auto shader = _shader->GetShader(); - - // Validate shader constant buffers sizes - if (shader->GetCB(0)->GetSize() != sizeof(Data)) - { - REPORT_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); - return true; - } + CHECK_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); // Create pipeline stages GPUPipelineState::Description psDesc; diff --git a/Source/Engine/Renderer/Utils/BitonicSort.cpp b/Source/Engine/Renderer/Utils/BitonicSort.cpp index 73a310832..babc058e2 100644 --- a/Source/Engine/Renderer/Utils/BitonicSort.cpp +++ b/Source/Engine/Renderer/Utils/BitonicSort.cpp @@ -59,14 +59,8 @@ bool BitonicSort::setupResources() if (!_shader->IsLoaded()) return true; const auto shader = _shader->GetShader(); - - // Validate shader constant buffer size _cb = shader->GetCB(0); - if (_cb->GetSize() != sizeof(Data)) - { - REPORT_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); - return true; - } + CHECK_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); // Cache compute shaders _indirectArgsCS = shader->GetCS("CS_IndirectArgs"); diff --git a/Source/Engine/Renderer/Utils/MultiScaler.cpp b/Source/Engine/Renderer/Utils/MultiScaler.cpp index 3f812ea77..ae5633834 100644 --- a/Source/Engine/Renderer/Utils/MultiScaler.cpp +++ b/Source/Engine/Renderer/Utils/MultiScaler.cpp @@ -41,13 +41,7 @@ bool MultiScaler::setupResources() if (!_shader->IsLoaded()) return true; const auto shader = _shader->GetShader(); - - // Validate shader constant buffer size - if (shader->GetCB(0)->GetSize() != sizeof(Data)) - { - REPORT_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); - return true; - } + CHECK_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); // Create pipeline states GPUPipelineState::Description psDesc = GPUPipelineState::Description::DefaultFullscreenTriangle; diff --git a/Source/Engine/Renderer/VolumetricFogPass.cpp b/Source/Engine/Renderer/VolumetricFogPass.cpp index c56812c72..b7e57c2bb 100644 --- a/Source/Engine/Renderer/VolumetricFogPass.cpp +++ b/Source/Engine/Renderer/VolumetricFogPass.cpp @@ -53,19 +53,9 @@ bool VolumetricFogPass::setupResources() if (!_shader->IsLoaded()) return true; auto shader = _shader->GetShader(); - - // Validate shader constant buffers sizes - if (shader->GetCB(0)->GetSize() != sizeof(Data)) - { - REPORT_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); - return true; - } + CHECK_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); // CB1 is used for per-draw info (ObjectIndex) - if (shader->GetCB(2)->GetSize() != sizeof(PerLight)) - { - REPORT_INVALID_SHADER_PASS_CB_SIZE(shader, 2, PerLight); - return true; - } + CHECK_INVALID_SHADER_PASS_CB_SIZE(shader, 2, PerLight); // Cache compute shaders _csInitialize = shader->GetCS("CS_Initialize"); From bdd7bae4591360f430758fc820166000c796fb2b Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 29 Jun 2025 13:51:59 +0200 Subject: [PATCH 070/211] Add new Custom Lit shading model for custom lighting in materials (eg. Cel Shading) --- .../Features/ForwardShading.hlsl | 10 ++++++-- Source/Editor/Surface/Archetypes/Material.cs | 7 +++--- Source/Engine/Content/Assets/Material.cpp | 10 ++++---- .../Materials/DeferredMaterialShader.cpp | 2 ++ .../Engine/Graphics/Materials/MaterialInfo.h | 5 ++++ .../MaterialGenerator/MaterialGenerator.cpp | 23 ++++++++++--------- Source/Shaders/LightingCommon.hlsl | 18 +++++++++++++-- 7 files changed, 53 insertions(+), 22 deletions(-) diff --git a/Content/Editor/MaterialTemplates/Features/ForwardShading.hlsl b/Content/Editor/MaterialTemplates/Features/ForwardShading.hlsl index 2db55111b..263859075 100644 --- a/Content/Editor/MaterialTemplates/Features/ForwardShading.hlsl +++ b/Content/Editor/MaterialTemplates/Features/ForwardShading.hlsl @@ -28,6 +28,13 @@ TextureCube SkyLightTexture : register(t__SRV__); Buffer ShadowsBuffer : register(t__SRV__); Texture2D ShadowMap : register(t__SRV__); @4// Forward Shading: Utilities +// Public accessors for lighting data, use them as data binding might change but those methods will remain. +LightData GetDirectionalLight() { return DirectionalLight; } +LightData GetSkyLight() { return SkyLight; } +ProbeData GetEnvironmentProbe() { return EnvironmentProbe; } +ExponentialHeightFogData GetExponentialHeightFog() { return ExponentialHeightFog; } +uint GetLocalLightsCount() { return LocalLightsCount; } +LightData GetLocalLight(uint i) { return LocalLights[i]; } @5// Forward Shading: Shaders // Pixel Shader function for Forward Pass @@ -76,9 +83,8 @@ void PS_Forward( gBuffer.ShadingModel = MATERIAL_SHADING_MODEL; // Calculate lighting from a single directional light - float4 shadowMask = 1.0f; ShadowSample shadow = SampleDirectionalLightShadow(DirectionalLight, ShadowsBuffer, ShadowMap, gBuffer); - shadowMask = GetShadowMask(shadow); + float4 shadowMask = GetShadowMask(shadow); float4 light = GetLighting(ViewPos, DirectionalLight, gBuffer, shadowMask, false, false); // Calculate lighting from sky light diff --git a/Source/Editor/Surface/Archetypes/Material.cs b/Source/Editor/Surface/Archetypes/Material.cs index c9066eaa5..e46038639 100644 --- a/Source/Editor/Surface/Archetypes/Material.cs +++ b/Source/Editor/Surface/Archetypes/Material.cs @@ -124,7 +124,8 @@ namespace FlaxEditor.Surface.Archetypes case MaterialDomain.Particle: case MaterialDomain.Deformable: { - bool isNotUnlit = info.ShadingModel != MaterialShadingModel.Unlit; + bool isNotUnlit = info.ShadingModel != MaterialShadingModel.Unlit && info.ShadingModel != MaterialShadingModel.CustomLit; + bool isOpaque = info.BlendMode == MaterialBlendMode.Opaque; bool withTess = info.TessellationMode != TessellationMethod.None; GetBox(MaterialNodeBoxes.Color).IsActive = isNotUnlit; @@ -135,8 +136,8 @@ namespace FlaxEditor.Surface.Archetypes GetBox(MaterialNodeBoxes.Roughness).IsActive = isNotUnlit; GetBox(MaterialNodeBoxes.AmbientOcclusion).IsActive = isNotUnlit; GetBox(MaterialNodeBoxes.Normal).IsActive = isNotUnlit; - GetBox(MaterialNodeBoxes.Opacity).IsActive = info.ShadingModel == MaterialShadingModel.Subsurface || info.ShadingModel == MaterialShadingModel.Foliage || info.BlendMode != MaterialBlendMode.Opaque; - GetBox(MaterialNodeBoxes.Refraction).IsActive = info.BlendMode != MaterialBlendMode.Opaque; + GetBox(MaterialNodeBoxes.Opacity).IsActive = info.ShadingModel == MaterialShadingModel.Subsurface || info.ShadingModel == MaterialShadingModel.Foliage || !isOpaque; + GetBox(MaterialNodeBoxes.Refraction).IsActive = !isOpaque; GetBox(MaterialNodeBoxes.PositionOffset).IsActive = true; GetBox(MaterialNodeBoxes.TessellationMultiplier).IsActive = withTess; GetBox(MaterialNodeBoxes.WorldDisplacement).IsActive = withTess; diff --git a/Source/Engine/Content/Assets/Material.cpp b/Source/Engine/Content/Assets/Material.cpp index ce457c862..1e36b36ae 100644 --- a/Source/Engine/Content/Assets/Material.cpp +++ b/Source/Engine/Content/Assets/Material.cpp @@ -414,16 +414,18 @@ void Material::InitCompilationOptions(ShaderCompilationOptions& options) // Prepare auto& info = _shaderHeader.Material.Info; const bool isSurfaceOrTerrainOrDeformable = info.Domain == MaterialDomain::Surface || info.Domain == MaterialDomain::Terrain || info.Domain == MaterialDomain::Deformable; + const bool isOpaque = info.BlendMode == MaterialBlendMode::Opaque; const bool useCustomData = info.ShadingModel == MaterialShadingModel::Subsurface || info.ShadingModel == MaterialShadingModel::Foliage; - const bool useForward = ((info.Domain == MaterialDomain::Surface || info.Domain == MaterialDomain::Deformable) && info.BlendMode != MaterialBlendMode::Opaque) || info.Domain == MaterialDomain::Particle; + const bool useForward = ((info.Domain == MaterialDomain::Surface || info.Domain == MaterialDomain::Deformable) && !isOpaque) || info.Domain == MaterialDomain::Particle; const bool useTess = info.TessellationMode != TessellationMethod::None && RenderTools::CanSupportTessellation(options.Profile) && isSurfaceOrTerrainOrDeformable; const bool useDistortion = (info.Domain == MaterialDomain::Surface || info.Domain == MaterialDomain::Deformable || info.Domain == MaterialDomain::Particle) && - info.BlendMode != MaterialBlendMode::Opaque && + !isOpaque && EnumHasAnyFlags(info.UsageFlags, MaterialUsageFlags::UseRefraction) && (info.FeaturesFlags & MaterialFeaturesFlags::DisableDistortion) == MaterialFeaturesFlags::None; + const MaterialShadingModel shadingModel = info.ShadingModel == MaterialShadingModel::CustomLit ? MaterialShadingModel::Unlit : info.ShadingModel; // @formatter:off static const char* Numbers[] = @@ -435,7 +437,7 @@ void Material::InitCompilationOptions(ShaderCompilationOptions& options) // Setup shader macros options.Macros.Add({ "MATERIAL_DOMAIN", Numbers[(int32)info.Domain] }); options.Macros.Add({ "MATERIAL_BLEND", Numbers[(int32)info.BlendMode] }); - options.Macros.Add({ "MATERIAL_SHADING_MODEL", Numbers[(int32)info.ShadingModel] }); + options.Macros.Add({ "MATERIAL_SHADING_MODEL", Numbers[(int32)shadingModel] }); options.Macros.Add({ "MATERIAL_MASKED", Numbers[EnumHasAnyFlags(info.UsageFlags, MaterialUsageFlags::UseMask) ? 1 : 0] }); options.Macros.Add({ "DECAL_BLEND_MODE", Numbers[(int32)info.DecalBlendingMode] }); options.Macros.Add({ "USE_EMISSIVE", Numbers[EnumHasAnyFlags(info.UsageFlags, MaterialUsageFlags::UseEmissive) ? 1 : 0] }); @@ -492,7 +494,7 @@ void Material::InitCompilationOptions(ShaderCompilationOptions& options) options.Macros.Add({ "IS_PARTICLE", Numbers[info.Domain == MaterialDomain::Particle ? 1 : 0] }); options.Macros.Add({ "IS_DEFORMABLE", Numbers[info.Domain == MaterialDomain::Deformable ? 1 : 0] }); options.Macros.Add({ "USE_FORWARD", Numbers[useForward ? 1 : 0] }); - options.Macros.Add({ "USE_DEFERRED", Numbers[isSurfaceOrTerrainOrDeformable && info.BlendMode == MaterialBlendMode::Opaque ? 1 : 0] }); + options.Macros.Add({ "USE_DEFERRED", Numbers[isSurfaceOrTerrainOrDeformable && isOpaque ? 1 : 0] }); options.Macros.Add({ "USE_DISTORTION", Numbers[useDistortion ? 1 : 0] }); #endif } diff --git a/Source/Engine/Graphics/Materials/DeferredMaterialShader.cpp b/Source/Engine/Graphics/Materials/DeferredMaterialShader.cpp index c15ff5ef2..4e9622a01 100644 --- a/Source/Engine/Graphics/Materials/DeferredMaterialShader.cpp +++ b/Source/Engine/Graphics/Materials/DeferredMaterialShader.cpp @@ -42,6 +42,8 @@ void DeferredMaterialShader::Bind(BindParameters& params) // Setup features const bool useLightmap = _info.BlendMode == MaterialBlendMode::Opaque && LightmapFeature::Bind(params, cb, srv); + if (_info.ShadingModel == MaterialShadingModel::CustomLit) + ForwardShadingFeature::Bind(params, cb, srv); // Setup parameters MaterialParameter::BindMeta bindMeta; diff --git a/Source/Engine/Graphics/Materials/MaterialInfo.h b/Source/Engine/Graphics/Materials/MaterialInfo.h index 2a57b5f3d..69a9bd0a6 100644 --- a/Source/Engine/Graphics/Materials/MaterialInfo.h +++ b/Source/Engine/Graphics/Materials/MaterialInfo.h @@ -103,6 +103,11 @@ API_ENUM() enum class MaterialShadingModel : byte /// The foliage material. Intended for foliage materials like leaves and grass that need light scattering to transport simulation through the thin object. /// Foliage = 3, + + /// + /// The custom lit shader that calculates own lighting such as Cel Shading. It has access to the scene lights data during both GBuffer and Forward pass rendering. + /// + CustomLit = 5, }; /// diff --git a/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.cpp b/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.cpp index 5950676a3..2aea40c94 100644 --- a/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.cpp +++ b/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.cpp @@ -184,28 +184,29 @@ bool MaterialGenerator::Generate(WriteStream& source, MaterialInfo& materialInfo return true; \ } \ } + const bool isOpaque = materialInfo.BlendMode == MaterialBlendMode::Opaque; switch (baseLayer->Domain) { case MaterialDomain::Surface: if (materialInfo.TessellationMode != TessellationMethod::None) ADD_FEATURE(TessellationFeature); - if (materialInfo.BlendMode == MaterialBlendMode::Opaque) + if (isOpaque) ADD_FEATURE(MotionVectorsFeature); - if (materialInfo.BlendMode == MaterialBlendMode::Opaque) + if (isOpaque) ADD_FEATURE(LightmapFeature); - if (materialInfo.BlendMode == MaterialBlendMode::Opaque) + if (isOpaque) ADD_FEATURE(DeferredShadingFeature); - if (materialInfo.BlendMode != MaterialBlendMode::Opaque && (materialInfo.FeaturesFlags & MaterialFeaturesFlags::DisableDistortion) == MaterialFeaturesFlags::None) + if (!isOpaque && (materialInfo.FeaturesFlags & MaterialFeaturesFlags::DisableDistortion) == MaterialFeaturesFlags::None) ADD_FEATURE(DistortionFeature); - if (materialInfo.BlendMode != MaterialBlendMode::Opaque && EnumHasAnyFlags(materialInfo.FeaturesFlags, MaterialFeaturesFlags::GlobalIllumination)) + if (!isOpaque && EnumHasAnyFlags(materialInfo.FeaturesFlags, MaterialFeaturesFlags::GlobalIllumination)) { ADD_FEATURE(GlobalIlluminationFeature); - // SDF Reflections is only valid when both GI and SSR is enabled - if (materialInfo.BlendMode != MaterialBlendMode::Opaque && EnumHasAnyFlags(materialInfo.FeaturesFlags, MaterialFeaturesFlags::ScreenSpaceReflections)) + // SDF Reflections is only valid when both GI and SSR are enabled + if (EnumHasAnyFlags(materialInfo.FeaturesFlags, MaterialFeaturesFlags::ScreenSpaceReflections)) ADD_FEATURE(SDFReflectionsFeature); } - if (materialInfo.BlendMode != MaterialBlendMode::Opaque) + if (materialInfo.BlendMode != MaterialBlendMode::Opaque || materialInfo.ShadingModel == MaterialShadingModel::CustomLit) ADD_FEATURE(ForwardShadingFeature); break; case MaterialDomain::Terrain: @@ -215,16 +216,16 @@ bool MaterialGenerator::Generate(WriteStream& source, MaterialInfo& materialInfo ADD_FEATURE(DeferredShadingFeature); break; case MaterialDomain::Particle: - if (materialInfo.BlendMode != MaterialBlendMode::Opaque && (materialInfo.FeaturesFlags & MaterialFeaturesFlags::DisableDistortion) == MaterialFeaturesFlags::None) + if (!isOpaque && (materialInfo.FeaturesFlags & MaterialFeaturesFlags::DisableDistortion) == MaterialFeaturesFlags::None) ADD_FEATURE(DistortionFeature); - if (materialInfo.BlendMode != MaterialBlendMode::Opaque && EnumHasAnyFlags(materialInfo.FeaturesFlags, MaterialFeaturesFlags::GlobalIllumination)) + if (!isOpaque && EnumHasAnyFlags(materialInfo.FeaturesFlags, MaterialFeaturesFlags::GlobalIllumination)) ADD_FEATURE(GlobalIlluminationFeature); ADD_FEATURE(ForwardShadingFeature); break; case MaterialDomain::Deformable: if (materialInfo.TessellationMode != TessellationMethod::None) ADD_FEATURE(TessellationFeature); - if (materialInfo.BlendMode == MaterialBlendMode::Opaque) + if (isOpaque) ADD_FEATURE(DeferredShadingFeature); if (materialInfo.BlendMode != MaterialBlendMode::Opaque) ADD_FEATURE(ForwardShadingFeature); diff --git a/Source/Shaders/LightingCommon.hlsl b/Source/Shaders/LightingCommon.hlsl index 807d6a71d..f09572310 100644 --- a/Source/Shaders/LightingCommon.hlsl +++ b/Source/Shaders/LightingCommon.hlsl @@ -62,8 +62,8 @@ void GetRadialLightAttenuation( float distanceBiasSqr, float3 toLight, float3 L, - inout float NoL, - inout float attenuation) + out float NoL, + out float attenuation) { // Distance attenuation if (lightData.InverseSquared) @@ -104,6 +104,20 @@ void GetRadialLightAttenuation( } } +// Calculates radial light (point or spot) attenuation factors (distance, spot and radius mask) +void GetRadialLightAttenuation( + LightData lightData, + bool isSpotLight, + float3 toLight, + float3 N, + out float NoL, + out float attenuation) +{ + float distanceSqr = dot(toLight, toLight); + float3 L = toLight * rsqrt(distanceSqr); + GetRadialLightAttenuation(lightData, isSpotLight, N, distanceSqr, 1, toLight, L, NoL, attenuation); +} + // Find representative incoming light direction and energy modification float AreaLightSpecular(LightData lightData, float roughness, inout float3 toLight, inout float3 L, float3 V, half3 N) { From f126a83b797c1fd3dda98f3438079c81d33dcde3 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 29 Jun 2025 13:52:29 +0200 Subject: [PATCH 071/211] Fix graphical issues when batching materials that use Forward Shading for instancing --- Source/Engine/Graphics/Materials/DeferredMaterialShader.cpp | 5 ++++- Source/Engine/Graphics/Materials/DeferredMaterialShader.h | 1 + Source/Engine/Graphics/Materials/ForwardMaterialShader.cpp | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Graphics/Materials/DeferredMaterialShader.cpp b/Source/Engine/Graphics/Materials/DeferredMaterialShader.cpp index 4e9622a01..5f0abad33 100644 --- a/Source/Engine/Graphics/Materials/DeferredMaterialShader.cpp +++ b/Source/Engine/Graphics/Materials/DeferredMaterialShader.cpp @@ -29,7 +29,7 @@ bool DeferredMaterialShader::CanUseLightmap() const bool DeferredMaterialShader::CanUseInstancing(InstancingHandler& handler) const { handler = { SurfaceDrawCallHandler::GetHash, SurfaceDrawCallHandler::CanBatch, }; - return true; + return _instanced; } void DeferredMaterialShader::Bind(BindParameters& params) @@ -114,6 +114,9 @@ void DeferredMaterialShader::Unload() bool DeferredMaterialShader::Load() { + // TODO: support instancing when using ForwardShadingFeature + _instanced = _info.BlendMode == MaterialBlendMode::Opaque && _info.ShadingModel != MaterialShadingModel::CustomLit; + bool failed = false; auto psDesc = GPUPipelineState::Description::Default; psDesc.DepthWriteEnable = (_info.FeaturesFlags & MaterialFeaturesFlags::DisableDepthWrite) == MaterialFeaturesFlags::None; diff --git a/Source/Engine/Graphics/Materials/DeferredMaterialShader.h b/Source/Engine/Graphics/Materials/DeferredMaterialShader.h index 4d01c4d4a..ebfd54ecb 100644 --- a/Source/Engine/Graphics/Materials/DeferredMaterialShader.h +++ b/Source/Engine/Graphics/Materials/DeferredMaterialShader.h @@ -65,6 +65,7 @@ private: private: Cache _cache; Cache _cacheInstanced; + bool _instanced; public: DeferredMaterialShader(const StringView& name) diff --git a/Source/Engine/Graphics/Materials/ForwardMaterialShader.cpp b/Source/Engine/Graphics/Materials/ForwardMaterialShader.cpp index 4ed8e6b86..a966507d8 100644 --- a/Source/Engine/Graphics/Materials/ForwardMaterialShader.cpp +++ b/Source/Engine/Graphics/Materials/ForwardMaterialShader.cpp @@ -25,7 +25,7 @@ DrawPass ForwardMaterialShader::GetDrawModes() const bool ForwardMaterialShader::CanUseInstancing(InstancingHandler& handler) const { handler = { SurfaceDrawCallHandler::GetHash, SurfaceDrawCallHandler::CanBatch, }; - return true; + return false; // TODO: support instancing when using ForwardShadingFeature } void ForwardMaterialShader::Bind(BindParameters& params) From 43d11264f82ac2ffb3b4f94d77da427f32a05ac2 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 29 Jun 2025 19:16:23 +0200 Subject: [PATCH 072/211] Fix asset references to use separate lightweight locking instead of full asset mutex --- Source/Engine/Content/Asset.cpp | 8 ++++---- Source/Engine/Content/Asset.h | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/Source/Engine/Content/Asset.cpp b/Source/Engine/Content/Asset.cpp index 5a9e62993..559a673da 100644 --- a/Source/Engine/Content/Asset.cpp +++ b/Source/Engine/Content/Asset.cpp @@ -242,9 +242,8 @@ void Asset::AddReference(IAssetReference* ref, bool week) if (ref) { //PROFILE_MEM(EngineDelegate); // Include references tracking memory within Delegate memory - Locker.Lock(); + ScopeLock lock(_referencesLocker); _references.Add(ref); - Locker.Unlock(); } } @@ -257,9 +256,8 @@ void Asset::RemoveReference(IAssetReference* ref, bool week) { if (ref) { - Locker.Lock(); + ScopeLock lock(_referencesLocker); _references.Remove(ref); - Locker.Unlock(); } if (!week) Platform::InterlockedDecrement(&_refCount); @@ -681,6 +679,7 @@ void Asset::onLoaded_MainThread() ASSERT(IsInMainThread()); // Send event + ScopeLock lock(_referencesLocker); for (const auto& e : _references) e.Item->OnAssetLoaded(this, this); OnLoaded(this); @@ -696,6 +695,7 @@ void Asset::onUnload_MainThread() CancelStreaming(); // Send event + ScopeLock lock(_referencesLocker); for (const auto& e : _references) e.Item->OnAssetUnloaded(this, this); OnUnloaded(this); diff --git a/Source/Engine/Content/Asset.h b/Source/Engine/Content/Asset.h index bb0fbe490..c838eddf8 100644 --- a/Source/Engine/Content/Asset.h +++ b/Source/Engine/Content/Asset.h @@ -7,6 +7,7 @@ #include "Engine/Core/Types/String.h" #include "Engine/Platform/CriticalSection.h" #include "Engine/Scripting/ScriptingObject.h" +#include "Engine/Threading/ConcurrentSystemLocker.h" #include "Config.h" #include "Types.h" @@ -63,6 +64,7 @@ protected: int8 _isVirtual : 1; // Indicates that asset is pure virtual (generated or temporary, has no storage so won't be saved) HashSet _references; + CriticalSection _referencesLocker; // TODO: convert into a single interlocked exchange for the current thread owning lock public: /// From 78d519cb9a71ab42a506c0f5018b6d417f2bf426 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 29 Jun 2025 19:16:41 +0200 Subject: [PATCH 073/211] Fix `ConcurrentSystemLocker` to have exclusive lock as an option --- Source/Engine/Level/Scene/SceneRendering.cpp | 6 +++--- Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp | 8 +++++--- Source/Engine/Threading/ConcurrentSystemLocker.cpp | 6 +++--- Source/Engine/Threading/ConcurrentSystemLocker.h | 6 +++--- 4 files changed, 14 insertions(+), 12 deletions(-) diff --git a/Source/Engine/Level/Scene/SceneRendering.cpp b/Source/Engine/Level/Scene/SceneRendering.cpp index 4a88703ae..e55dbd43f 100644 --- a/Source/Engine/Level/Scene/SceneRendering.cpp +++ b/Source/Engine/Level/Scene/SceneRendering.cpp @@ -127,7 +127,7 @@ void SceneRendering::CollectPostFxVolumes(RenderContext& renderContext) void SceneRendering::Clear() { - ConcurrentSystemLocker::WriteScope lock(Locker); + ConcurrentSystemLocker::WriteScope lock(Locker, true); for (auto* listener : _listeners) { listener->OnSceneRenderingClear(this); @@ -149,7 +149,7 @@ void SceneRendering::AddActor(Actor* a, int32& key) return; PROFILE_MEM(Graphics); const int32 category = a->_drawCategory; - ConcurrentSystemLocker::WriteScope lock(Locker); + ConcurrentSystemLocker::WriteScope lock(Locker, true); auto& list = Actors[category]; if (FreeActors[category].HasItems()) { @@ -193,7 +193,7 @@ void SceneRendering::UpdateActor(Actor* a, int32& key, ISceneRenderingListener:: void SceneRendering::RemoveActor(Actor* a, int32& key) { const int32 category = a->_drawCategory; - ConcurrentSystemLocker::WriteScope lock(Locker); + ConcurrentSystemLocker::WriteScope lock(Locker, true); auto& list = Actors[category]; if (list.Count() > key) // Ignore invalid key softly (eg. list after batch clear during scene unload) { diff --git a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp index 56dd196a5..e0d227b6b 100644 --- a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp +++ b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp @@ -396,7 +396,7 @@ public: { if (GLOBAL_SDF_ACTOR_IS_STATIC(a) && ObjectTypes.Contains(a->GetTypeHandle())) { - ConcurrentSystemLocker::WriteScope lock(Locker); + ConcurrentSystemLocker::WriteScope lock(Locker, true); OnSceneRenderingDirty(a->GetBox()); } } @@ -405,7 +405,7 @@ public: { if (GLOBAL_SDF_ACTOR_IS_STATIC(a) && ObjectTypes.Contains(a->GetTypeHandle())) { - ConcurrentSystemLocker::WriteScope lock(Locker); + ConcurrentSystemLocker::WriteScope lock(Locker, true); OnSceneRenderingDirty(BoundingBox::FromSphere(prevBounds)); OnSceneRenderingDirty(a->GetBox()); } @@ -415,13 +415,14 @@ public: { if (GLOBAL_SDF_ACTOR_IS_STATIC(a) && ObjectTypes.Contains(a->GetTypeHandle())) { - ConcurrentSystemLocker::WriteScope lock(Locker); + ConcurrentSystemLocker::WriteScope lock(Locker, true); OnSceneRenderingDirty(a->GetBox()); } } void OnSceneRenderingClear(SceneRendering* scene) override { + ConcurrentSystemLocker::WriteScope lock(Locker, true); for (auto& cascade : Cascades) cascade.StaticChunks.Clear(); } @@ -719,6 +720,7 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex } sdfData.LastFrameUsed = currentFrame; PROFILE_GPU_CPU("Global SDF"); + ConcurrentSystemLocker::WriteScope lock(sdfData.Locker); // Setup options int32 resolution, cascadesCount, resolutionMip; diff --git a/Source/Engine/Threading/ConcurrentSystemLocker.cpp b/Source/Engine/Threading/ConcurrentSystemLocker.cpp index c8debb561..d936f8307 100644 --- a/Source/Engine/Threading/ConcurrentSystemLocker.cpp +++ b/Source/Engine/Threading/ConcurrentSystemLocker.cpp @@ -8,7 +8,7 @@ ConcurrentSystemLocker::ConcurrentSystemLocker() _counters[0] = _counters[1] = 0; } -void ConcurrentSystemLocker::Begin(bool write) +void ConcurrentSystemLocker::Begin(bool write, bool exclusively) { volatile int64* thisCounter = &_counters[write]; volatile int64* otherCounter = &_counters[!write]; @@ -22,8 +22,8 @@ RETRY: goto RETRY; } - // Writers have to check themselves to (one write at the same time - just like a mutex) - if (write && Platform::AtomicRead(thisCounter) != 0) + // Writers might want to check themselves for a single writer at the same time - just like a mutex + if (exclusively && Platform::AtomicRead(thisCounter) != 0) { // Someone else is doing opposite operation so wait for it's end Platform::Sleep(0); diff --git a/Source/Engine/Threading/ConcurrentSystemLocker.h b/Source/Engine/Threading/ConcurrentSystemLocker.h index dd214a308..031b7e685 100644 --- a/Source/Engine/Threading/ConcurrentSystemLocker.h +++ b/Source/Engine/Threading/ConcurrentSystemLocker.h @@ -17,7 +17,7 @@ public: NON_COPYABLE(ConcurrentSystemLocker); ConcurrentSystemLocker(); - void Begin(bool write); + void Begin(bool write, bool exclusively = false); void End(bool write); public: @@ -26,10 +26,10 @@ public: { NON_COPYABLE(Scope); - Scope(ConcurrentSystemLocker& locker) + Scope(ConcurrentSystemLocker& locker, bool exclusively = false) : _locker(locker) { - _locker.Begin(Write); + _locker.Begin(Write, exclusively); } ~Scope() From 448eb48c230dd1c7c2591798d5ce4a993257ddfe Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 29 Jun 2025 20:02:24 +0200 Subject: [PATCH 074/211] Fix fog to draw Fog Cutoff Distance via a plane, not sphere test Add support for negative Fog Cutoff Distance on fog to draw it in front of the camera Far Plane, no matter the setup. Fix hot-reloading Fog shader in Editor. --- .../Features/ForwardShading.hlsl | 2 +- Content/Editor/Particles/Smoke Material.flax | 4 +- Content/Shaders/Fog.flax | 4 +- .../Level/Actors/ExponentialHeightFog.cpp | 8 +-- .../Level/Actors/ExponentialHeightFog.h | 4 +- Source/Shaders/ExponentialHeightFog.hlsl | 9 ++- Source/Shaders/Fog.shader | 55 +++++++------------ 7 files changed, 39 insertions(+), 47 deletions(-) diff --git a/Content/Editor/MaterialTemplates/Features/ForwardShading.hlsl b/Content/Editor/MaterialTemplates/Features/ForwardShading.hlsl index 263859075..625a78e36 100644 --- a/Content/Editor/MaterialTemplates/Features/ForwardShading.hlsl +++ b/Content/Editor/MaterialTemplates/Features/ForwardShading.hlsl @@ -151,7 +151,7 @@ void PS_Forward( #if USE_FOG // Calculate exponential height fog - float4 fog = GetExponentialHeightFog(ExponentialHeightFog, materialInput.WorldPosition, ViewPos, 0); + float4 fog = GetExponentialHeightFog(ExponentialHeightFog, materialInput.WorldPosition, ViewPos, 0, gBuffer.ViewPos.z); // Apply fog to the output color #if MATERIAL_BLEND == MATERIAL_BLEND_OPAQUE diff --git a/Content/Editor/Particles/Smoke Material.flax b/Content/Editor/Particles/Smoke Material.flax index bd8c1c0e9..d5b8cb872 100644 --- a/Content/Editor/Particles/Smoke Material.flax +++ b/Content/Editor/Particles/Smoke Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6097a8ca31dbe7a985b5c512d2049d2d22c73175551965c75d6b360323505491 -size 38427 +oid sha256:a16a3fa5bed3bc8030c40fbe0e946f2bdec28745542bf08db1d7b4a43180f785 +size 38900 diff --git a/Content/Shaders/Fog.flax b/Content/Shaders/Fog.flax index 75590f84d..3f934412c 100644 --- a/Content/Shaders/Fog.flax +++ b/Content/Shaders/Fog.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7735a770a87483d4df5e4e653373067c26469de8088f071ca092ed3e797bf461 -size 2785 +oid sha256:e83f9dbbcf84550de09e7c63bbdd3acc6591cf6ba1bcce2a2699772122ae07f4 +size 2633 diff --git a/Source/Engine/Level/Actors/ExponentialHeightFog.cpp b/Source/Engine/Level/Actors/ExponentialHeightFog.cpp index d62aecac4..efb5351e7 100644 --- a/Source/Engine/Level/Actors/ExponentialHeightFog.cpp +++ b/Source/Engine/Level/Actors/ExponentialHeightFog.cpp @@ -41,11 +41,10 @@ void ExponentialHeightFog::Draw(RenderContext& renderContext) && _shader->IsLoaded() && renderContext.View.IsPerspectiveProjection()) { - // Prepare if (_psFog.States[0] == nullptr) - { - // Create pipeline states _psFog.CreatePipelineStates(); + if (!_psFog.States[0]->IsValid()) + { GPUPipelineState::Description psDesc = GPUPipelineState::Description::DefaultFullscreenTriangle; psDesc.DepthWriteEnable = false; psDesc.BlendMode.BlendEnable = true; @@ -59,6 +58,7 @@ void ExponentialHeightFog::Draw(RenderContext& renderContext) if (_psFog.Create(psDesc, _shader->GetShader(), "PS_Fog")) { LOG(Warning, "Cannot create graphics pipeline state object for '{0}'.", ToString()); + return; } } @@ -160,7 +160,7 @@ void ExponentialHeightFog::GetExponentialHeightFogData(const RenderView& view, S result.FogAtViewPosition = density * Math::Pow(2.0f, Math::Clamp(-heightFalloff * (viewHeight - height), -125.f, 126.f)); result.StartDistance = StartDistance; result.FogMinOpacity = 1.0f - FogMaxOpacity; - result.FogCutoffDistance = FogCutoffDistance; + result.FogCutoffDistance = FogCutoffDistance >= 0 ? FogCutoffDistance : view.Far + FogCutoffDistance; if (useDirectionalLightInscattering) { result.InscatteringLightDirection = -DirectionalInscatteringLight->GetDirection(); diff --git a/Source/Engine/Level/Actors/ExponentialHeightFog.h b/Source/Engine/Level/Actors/ExponentialHeightFog.h index 0b442ba9f..c0e5407d2 100644 --- a/Source/Engine/Level/Actors/ExponentialHeightFog.h +++ b/Source/Engine/Level/Actors/ExponentialHeightFog.h @@ -55,9 +55,9 @@ public: float StartDistance = 0.0f; /// - /// Scene elements past this distance will not have fog applied. This is useful for excluding skyboxes which already have fog baked in. Setting this value to 0 disables it. + /// Scene elements past this distance will not have fog applied. This is useful for excluding skyboxes which already have fog baked in. Setting this value to 0 disables it. Negative value sets the cutoff distance relative to the far plane of the camera. /// - API_FIELD(Attributes="EditorOrder(60), DefaultValue(0.0f), Limit(0), EditorDisplay(\"Exponential Height Fog\")") + API_FIELD(Attributes="EditorOrder(60), DefaultValue(0.0f), EditorDisplay(\"Exponential Height Fog\")") float FogCutoffDistance = 0.0f; public: diff --git a/Source/Shaders/ExponentialHeightFog.hlsl b/Source/Shaders/ExponentialHeightFog.hlsl index 2e34936eb..f6fb918f5 100644 --- a/Source/Shaders/ExponentialHeightFog.hlsl +++ b/Source/Shaders/ExponentialHeightFog.hlsl @@ -29,7 +29,7 @@ struct ExponentialHeightFogData float StartDistance; }; -float4 GetExponentialHeightFog(ExponentialHeightFogData exponentialHeightFog, float3 posWS, float3 camWS, float skipDistance) +float4 GetExponentialHeightFog(ExponentialHeightFogData exponentialHeightFog, float3 posWS, float3 camWS, float skipDistance, float sceneDistance) { float3 cameraToPos = posWS - camWS; float cameraToPosSqr = dot(cameraToPos, cameraToPos); @@ -78,7 +78,7 @@ float4 GetExponentialHeightFog(ExponentialHeightFogData exponentialHeightFog, fl // Disable fog after a certain distance FLATTEN - if (exponentialHeightFog.FogCutoffDistance > 0 && cameraToPosLen > exponentialHeightFog.FogCutoffDistance) + if (exponentialHeightFog.FogCutoffDistance > 0 && sceneDistance > exponentialHeightFog.FogCutoffDistance) { expFogFactor = 1; directionalInscattering = 0; @@ -87,4 +87,9 @@ float4 GetExponentialHeightFog(ExponentialHeightFogData exponentialHeightFog, fl return float4(inscatteringColor * (1.0f - expFogFactor) + directionalInscattering, expFogFactor); } +float4 GetExponentialHeightFog(ExponentialHeightFogData exponentialHeightFog, float3 posWS, float3 camWS, float skipDistance) +{ + return GetExponentialHeightFog(exponentialHeightFog, posWS, camWS, skipDistance, distance(posWS, camWS)); +} + #endif diff --git a/Source/Shaders/Fog.shader b/Source/Shaders/Fog.shader index 7dcc679ec..dfea921cc 100644 --- a/Source/Shaders/Fog.shader +++ b/Source/Shaders/Fog.shader @@ -24,41 +24,17 @@ Texture2D Depth : register(t0); Texture3D IntegratedLightScattering : register(t1); #endif -// Get world space position at given pixel coordinate -float3 GetWorldPos(float2 uv) -{ - float depth = SAMPLE_RT(Depth, uv).r; - GBufferData gBufferData = GetGBufferData(); - float3 viewPos = GetViewPos(gBufferData, uv, depth); - return mul(float4(viewPos, 1), gBufferData.InvViewMatrix).xyz; -} - -float4 CalculateCombinedFog(float3 posWS, float sceneDepth, float3 volumeUV) -{ - float skipDistance = 0; - -#if VOLUMETRIC_FOG - skipDistance = max(ExponentialHeightFog.VolumetricFogMaxDistance - 100, 0); -#endif - - float4 fog = GetExponentialHeightFog(ExponentialHeightFog, posWS, GBuffer.ViewPos, skipDistance); - -#if VOLUMETRIC_FOG - float4 volumetricFog = IntegratedLightScattering.SampleLevel(SamplerLinearClamp, volumeUV, 0); - fog = float4(volumetricFog.rgb + fog.rgb * volumetricFog.a, volumetricFog.a * fog.a); -#endif - - return fog; -} - META_PS(true, FEATURE_LEVEL_ES2) META_PERMUTATION_1(VOLUMETRIC_FOG=0) META_PERMUTATION_1(VOLUMETRIC_FOG=1) float4 PS_Fog(Quad_VS2PS input) : SV_Target0 { - // Calculate pixel world space position - float3 posWS = GetWorldPos(input.TexCoord); - float3 viewVector = posWS - GBuffer.ViewPos; + // Get world space position at given pixel coordinate + float rawDepth = SAMPLE_RT(Depth, input.TexCoord).r; + GBufferData gBufferData = GetGBufferData(); + float3 viewPos = GetViewPos(gBufferData, input.TexCoord, rawDepth); + float3 worldPos = mul(float4(viewPos, 1), gBufferData.InvViewMatrix).xyz; + float3 viewVector = worldPos - GBuffer.ViewPos; float sceneDepth = length(viewVector); // Calculate volumetric fog coordinates @@ -67,17 +43,28 @@ float4 PS_Fog(Quad_VS2PS input) : SV_Target0 // Debug code #if VOLUMETRIC_FOG && 0 - volumeUV = posWS / 1000; + volumeUV = worldPos / 1000; if (!all(volumeUV >= 0 && volumeUV <= 1)) return 0; return float4(IntegratedLightScattering.SampleLevel(SamplerLinearClamp, volumeUV, 0).rgb, 1); //return float4(volumeUV, 1); - //return float4(posWS / 100, 1); + //return float4(worldPos / 100, 1); #endif - // Calculate fog color - float4 fog = CalculateCombinedFog(posWS, sceneDepth, volumeUV); + float skipDistance = 0; +#if VOLUMETRIC_FOG + skipDistance = max(ExponentialHeightFog.VolumetricFogMaxDistance - 100, 0); +#endif + + // Calculate exponential fog color + float4 fog = GetExponentialHeightFog(ExponentialHeightFog, worldPos, GBuffer.ViewPos, skipDistance, viewPos.z); + +#if VOLUMETRIC_FOG + // Sample volumetric fog and mix it in + float4 volumetricFog = IntegratedLightScattering.SampleLevel(SamplerLinearClamp, volumeUV, 0); + fog = float4(volumetricFog.rgb + fog.rgb * volumetricFog.a, volumetricFog.a * fog.a); +#endif return fog; } From 094a6562b87ce661fc549a5b53efe5abc5b40545 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 3 Jul 2025 10:18:51 +0200 Subject: [PATCH 075/211] Refactor `ProbesRenderer` --- Source/Editor/Managed/ManagedEditor.cpp | 9 +- Source/Engine/Renderer/ProbesRenderer.cpp | 338 ++++++++++------------ Source/Engine/Renderer/ProbesRenderer.h | 70 +---- 3 files changed, 173 insertions(+), 244 deletions(-) diff --git a/Source/Editor/Managed/ManagedEditor.cpp b/Source/Editor/Managed/ManagedEditor.cpp index e270d08f8..c020fe7ce 100644 --- a/Source/Editor/Managed/ManagedEditor.cpp +++ b/Source/Editor/Managed/ManagedEditor.cpp @@ -13,6 +13,7 @@ #include "Engine/Scripting/Internal/MainThreadManagedInvokeAction.h" #include "Engine/Content/Assets/VisualScript.h" #include "Engine/Content/Content.h" +#include "Engine/Level/Actor.h" #include "Engine/CSG/CSGBuilder.h" #include "Engine/Engine/CommandLine.h" #include "Engine/Renderer/ProbesRenderer.h" @@ -74,7 +75,7 @@ void OnLightmapsBuildFinished(bool failed) OnLightmapsBake(ShadowsOfMordor::BuildProgressStep::GenerateLightmapCharts, 0, 0, false); } -void OnBakeEvent(bool started, const ProbesRenderer::Entry& e) +void OnBakeEvent(bool started, Actor* e) { if (Internal_EnvProbeBake == nullptr) { @@ -82,7 +83,7 @@ void OnBakeEvent(bool started, const ProbesRenderer::Entry& e) ASSERT(Internal_EnvProbeBake); } - MObject* probeObj = e.Actor ? e.Actor->GetManagedInstance() : nullptr; + MObject* probeObj = e ? e->GetManagedInstance() : nullptr; MainThreadManagedInvokeAction::ParamsBuilder params; params.AddParam(started); @@ -90,12 +91,12 @@ void OnBakeEvent(bool started, const ProbesRenderer::Entry& e) MainThreadManagedInvokeAction::Invoke(Internal_EnvProbeBake, params); } -void OnRegisterBake(const ProbesRenderer::Entry& e) +void OnRegisterBake(Actor* e) { OnBakeEvent(true, e); } -void OnFinishBake(const ProbesRenderer::Entry& e) +void OnFinishBake(Actor* e) { OnBakeEvent(false, e); } diff --git a/Source/Engine/Renderer/ProbesRenderer.cpp b/Source/Engine/Renderer/ProbesRenderer.cpp index ee72afe72..ac19cd309 100644 --- a/Source/Engine/Renderer/ProbesRenderer.cpp +++ b/Source/Engine/Renderer/ProbesRenderer.cpp @@ -4,8 +4,8 @@ #include "Renderer.h" #include "ReflectionsPass.h" #include "Engine/Core/Config/GraphicsSettings.h" -#include "Engine/Threading/ThreadPoolTask.h" -#include "Engine/Content/Content.h" +#include "Engine/Engine/Time.h" +#include "Engine/Engine/Engine.h" #include "Engine/Engine/EngineService.h" #include "Engine/Level/Actors/PointLight.h" #include "Engine/Level/Actors/EnvironmentProbe.h" @@ -14,28 +14,49 @@ #include "Engine/Level/LargeWorlds.h" #include "Engine/ContentExporters/AssetExporters.h" #include "Engine/Serialization/FileWriteStream.h" -#include "Engine/Engine/Time.h" +#include "Engine/Content/Content.h" #include "Engine/Content/Assets/Shader.h" #include "Engine/Content/AssetReference.h" #include "Engine/Graphics/Graphics.h" +#include "Engine/Graphics/PixelFormat.h" #include "Engine/Graphics/GPUContext.h" #include "Engine/Graphics/Textures/GPUTexture.h" #include "Engine/Graphics/Textures/TextureData.h" #include "Engine/Graphics/RenderTask.h" -#include "Engine/Engine/Engine.h" +#include "Engine/Scripting/ScriptingObjectReference.h" +#include "Engine/Threading/ThreadPoolTask.h" -/// -/// Custom task called after downloading probe texture data to save it. -/// +// Amount of frames to wait for data from probe update job +#define PROBES_RENDERER_LATENCY_FRAMES 1 + +struct ProbeEntry +{ + enum class Types + { + Invalid = 0, + EnvProbe = 1, + SkyLight = 2, + }; + + Types Type = Types::Invalid; + float Timeout = 0.0f; + ScriptingObjectReference Actor; + + bool UseTextureData() const; + int32 GetResolution() const; + PixelFormat GetFormat() const; +}; + +// Custom task called after downloading probe texture data to save it. class DownloadProbeTask : public ThreadPoolTask { private: GPUTexture* _texture; TextureData _data; - ProbesRenderer::Entry _entry; + ProbeEntry _entry; public: - DownloadProbeTask(GPUTexture* target, const ProbesRenderer::Entry& entry) + DownloadProbeTask(GPUTexture* target, const ProbeEntry& entry) : _texture(target) , _entry(entry) { @@ -48,23 +69,23 @@ public: bool Run() override { - if (_entry.Type == ProbesRenderer::EntryType::EnvProbe) + Actor* actor = _entry.Actor.Get(); + if (_entry.Type == ProbeEntry::Types::EnvProbe) { - if (_entry.Actor) - ((EnvironmentProbe*)_entry.Actor.Get())->SetProbeData(_data); + if (actor) + ((EnvironmentProbe*)actor)->SetProbeData(_data); } - else if (_entry.Type == ProbesRenderer::EntryType::SkyLight) + else if (_entry.Type == ProbeEntry::Types::SkyLight) { - if (_entry.Actor) - ((SkyLight*)_entry.Actor.Get())->SetProbeData(_data); + if (actor) + ((SkyLight*)actor)->SetProbeData(_data); } else { return true; } - ProbesRenderer::OnFinishBake(_entry); - + ProbesRenderer::OnFinishBake(actor); return false; } }; @@ -75,14 +96,17 @@ GPU_CB_STRUCT(Data { float SourceMipIndex; }); -namespace ProbesRendererImpl +class ProbesRendererService : public EngineService { - TimeSpan _lastProbeUpdate(0); - Array _probesToBake; +private: + bool _initDone = false; + bool _initFailed = false; - ProbesRenderer::Entry _current; + TimeSpan _lastProbeUpdate = TimeSpan(0); + Array _probesToBake; + + ProbeEntry _current; - bool _isReady = false; AssetReference _shader; GPUPipelineState* _psFilterFace = nullptr; SceneRenderTask* _task = nullptr; @@ -92,91 +116,52 @@ namespace ProbesRendererImpl GPUTexture* _skySHIrradianceMap = nullptr; uint64 _updateFrameNumber = 0; - FORCE_INLINE bool isUpdateSynced() - { - return _updateFrameNumber > 0 && _updateFrameNumber + PROBES_RENDERER_LATENCY_FRAMES <= Engine::FrameCount; - } -} - -using namespace ProbesRendererImpl; - -class ProbesRendererService : public EngineService -{ public: ProbesRendererService() : EngineService(TEXT("Probes Renderer"), 500) { } + bool LazyInit(); void Update() override; void Dispose() override; + + void Bake(const ProbeEntry& e); + void OnRender(RenderTask* task, GPUContext* context); }; ProbesRendererService ProbesRendererServiceInstance; -TimeSpan ProbesRenderer::ProbesUpdatedBreak(0, 0, 0, 0, 500); -TimeSpan ProbesRenderer::ProbesReleaseDataTime(0, 0, 0, 60); -Delegate ProbesRenderer::OnRegisterBake; -Delegate ProbesRenderer::OnFinishBake; +TimeSpan ProbesRenderer::UpdateDelay(0, 0, 0, 0, 100); +TimeSpan ProbesRenderer::ReleaseTimeout(0, 0, 0, 30); +Delegate ProbesRenderer::OnRegisterBake; +Delegate ProbesRenderer::OnFinishBake; void ProbesRenderer::Bake(EnvironmentProbe* probe, float timeout) { if (!probe || probe->IsUsingCustomProbe()) return; - - // Check if already registered for bake - for (int32 i = 0; i < _probesToBake.Count(); i++) - { - auto& p = _probesToBake[i]; - if (p.Type == EntryType::EnvProbe && p.Actor == probe) - { - p.Timeout = timeout; - return; - } - } - - // Register probe - Entry e; - e.Type = EntryType::EnvProbe; + ProbeEntry e; + e.Type = ProbeEntry::Types::EnvProbe; e.Actor = probe; e.Timeout = timeout; - _probesToBake.Add(e); - - // Fire event - if (e.UseTextureData()) - OnRegisterBake(e); + ProbesRendererServiceInstance.Bake(e); } void ProbesRenderer::Bake(SkyLight* probe, float timeout) { - ASSERT(probe && dynamic_cast(probe)); - - // Check if already registered for bake - for (int32 i = 0; i < _probesToBake.Count(); i++) - { - auto& p = _probesToBake[i]; - if (p.Type == EntryType::SkyLight && p.Actor == probe) - { - p.Timeout = timeout; - return; - } - } - - // Register probe - Entry e; - e.Type = EntryType::SkyLight; + if (!probe) + return; + ProbeEntry e; + e.Type = ProbeEntry::Types::SkyLight; e.Actor = probe; e.Timeout = timeout; - _probesToBake.Add(e); - - // Fire event - if (e.UseTextureData()) - OnRegisterBake(e); + ProbesRendererServiceInstance.Bake(e); } -bool ProbesRenderer::Entry::UseTextureData() const +bool ProbeEntry::UseTextureData() const { - if (Type == EntryType::EnvProbe && Actor) + if (Type == Types::EnvProbe && Actor) { switch (Actor.As()->UpdateMode) { @@ -187,12 +172,12 @@ bool ProbesRenderer::Entry::UseTextureData() const return true; } -int32 ProbesRenderer::Entry::GetResolution() const +int32 ProbeEntry::GetResolution() const { auto resolution = ProbeCubemapResolution::UseGraphicsSettings; - if (Type == EntryType::EnvProbe && Actor) + if (Type == Types::EnvProbe && Actor) resolution = ((EnvironmentProbe*)Actor.Get())->CubemapResolution; - else if (Type == EntryType::SkyLight) + else if (Type == Types::SkyLight) resolution = ProbeCubemapResolution::_128; if (resolution == ProbeCubemapResolution::UseGraphicsSettings) resolution = GraphicsSettings::Get()->DefaultProbeResolution; @@ -201,116 +186,83 @@ int32 ProbesRenderer::Entry::GetResolution() const return (int32)resolution; } -PixelFormat ProbesRenderer::Entry::GetFormat() const +PixelFormat ProbeEntry::GetFormat() const { return GraphicsSettings::Get()->UseHDRProbes ? PixelFormat::R11G11B10_Float : PixelFormat::R8G8B8A8_UNorm; } -int32 ProbesRenderer::GetBakeQueueSize() +bool ProbesRendererService::LazyInit() { - return _probesToBake.Count(); -} - -bool ProbesRenderer::HasReadyResources() -{ - return _isReady && _shader->IsLoaded(); -} - -bool ProbesRenderer::Init() -{ - if (_isReady) + if (_initDone || _initFailed) return false; // Load shader if (_shader == nullptr) { _shader = Content::LoadAsyncInternal(TEXT("Shaders/ProbesFilter")); - if (_shader == nullptr) - return true; + _initFailed = _shader == nullptr; + if (_initFailed) + return false; } if (!_shader->IsLoaded()) - return false; + return true; const auto shader = _shader->GetShader(); CHECK_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); // Create pipeline stages _psFilterFace = GPUDevice::Instance->CreatePipelineState(); - GPUPipelineState::Description psDesc = GPUPipelineState::Description::DefaultFullscreenTriangle; + auto psDesc = GPUPipelineState::Description::DefaultFullscreenTriangle; { psDesc.PS = shader->GetPS("PS_FilterFace"); - if (_psFilterFace->Init(psDesc)) - return true; + _initFailed |= _psFilterFace->Init(psDesc); } // Init rendering pipeline - _output = GPUDevice::Instance->CreateTexture(TEXT("Output")); + _output = GPUDevice::Instance->CreateTexture(TEXT("ProbesRenderer.Output")); const int32 probeResolution = _current.GetResolution(); const PixelFormat probeFormat = _current.GetFormat(); - if (_output->Init(GPUTextureDescription::New2D(probeResolution, probeResolution, probeFormat))) - return true; + _initFailed |= _output->Init(GPUTextureDescription::New2D(probeResolution, probeResolution, probeFormat)); _task = New(); auto task = _task; + task->Order = -100; // Run before main view rendering (realtime probes will get smaller latency) task->Enabled = false; task->IsCustomRendering = true; task->Output = _output; auto& view = task->View; view.Flags = - ViewFlags::AO | - ViewFlags::GI | - ViewFlags::DirectionalLights | - ViewFlags::PointLights | - ViewFlags::SpotLights | - ViewFlags::SkyLights | - ViewFlags::Decals | - ViewFlags::Shadows | - ViewFlags::Sky | - ViewFlags::Fog; + ViewFlags::AO | + ViewFlags::GI | + ViewFlags::DirectionalLights | + ViewFlags::PointLights | + ViewFlags::SpotLights | + ViewFlags::SkyLights | + ViewFlags::Decals | + ViewFlags::Shadows | + ViewFlags::Sky | + ViewFlags::Fog; view.Mode = ViewMode::NoPostFx; view.IsOfflinePass = true; view.IsSingleFrame = true; view.StaticFlagsMask = view.StaticFlagsCompare = StaticFlags::ReflectionProbe; - view.MaxShadowsQuality = Quality::Low; task->IsCameraCut = true; task->Resize(probeResolution, probeResolution); - task->Render.Bind(OnRender); + task->Render.Bind(this); // Init render targets - _probe = GPUDevice::Instance->CreateTexture(TEXT("ProbesUpdate.Probe")); - if (_probe->Init(GPUTextureDescription::NewCube(probeResolution, probeFormat, GPUTextureFlags::ShaderResource | GPUTextureFlags::RenderTarget | GPUTextureFlags::PerMipViews, 0))) - return true; - _tmpFace = GPUDevice::Instance->CreateTexture(TEXT("ProbesUpdate.TmpFace")); - if (_tmpFace->Init(GPUTextureDescription::New2D(probeResolution, probeResolution, 0, probeFormat, GPUTextureFlags::ShaderResource | GPUTextureFlags::RenderTarget | GPUTextureFlags::PerMipViews))) - return true; + _probe = GPUDevice::Instance->CreateTexture(TEXT("ProbesRenderer.Probe")); + _initFailed |= _probe->Init(GPUTextureDescription::NewCube(probeResolution, probeFormat, GPUTextureFlags::ShaderResource | GPUTextureFlags::RenderTarget | GPUTextureFlags::PerMipViews, 0)); + _tmpFace = GPUDevice::Instance->CreateTexture(TEXT("ProbesRenderer.TmpFace")); + _initFailed |= _tmpFace->Init(GPUTextureDescription::New2D(probeResolution, probeResolution, 0, probeFormat, GPUTextureFlags::ShaderResource | GPUTextureFlags::RenderTarget | GPUTextureFlags::PerMipViews)); // Mark as ready - _isReady = true; + _initDone = true; return false; } -void ProbesRenderer::Release() -{ - if (!_isReady) - return; - ASSERT(_updateFrameNumber == 0); - - // Release GPU data - if (_output) - _output->ReleaseGPU(); - - // Release data - SAFE_DELETE_GPU_RESOURCE(_psFilterFace); - _shader = nullptr; - SAFE_DELETE_GPU_RESOURCE(_output); - SAFE_DELETE(_task); - SAFE_DELETE_GPU_RESOURCE(_probe); - SAFE_DELETE_GPU_RESOURCE(_tmpFace); - SAFE_DELETE_GPU_RESOURCE(_skySHIrradianceMap); - - _isReady = false; -} - void ProbesRendererService::Update() { + PROFILE_MEM(Graphics); + // Calculate time delta since last update auto timeNow = Time::Update.UnscaledTime; auto timeSinceUpdate = timeNow - _lastProbeUpdate; @@ -321,35 +273,32 @@ void ProbesRendererService::Update() } // Check if render job is done - if (isUpdateSynced()) + if (_updateFrameNumber > 0 && _updateFrameNumber + PROBES_RENDERER_LATENCY_FRAMES <= Engine::FrameCount) { // Create async job to gather probe data from the GPU GPUTexture* texture = nullptr; switch (_current.Type) { - case ProbesRenderer::EntryType::SkyLight: - case ProbesRenderer::EntryType::EnvProbe: + case ProbeEntry::Types::SkyLight: + case ProbeEntry::Types::EnvProbe: texture = _probe; break; } ASSERT(texture && _current.UseTextureData()); auto taskB = New(texture, _current); auto taskA = texture->DownloadDataAsync(taskB->GetData()); - if (taskA == nullptr) - { - LOG(Fatal, "Failed to create async tsk to download env probe texture data fro mthe GPU."); - } + ASSERT(taskA); taskA->ContinueWith(taskB); taskA->Start(); // Clear flag _updateFrameNumber = 0; - _current.Type = ProbesRenderer::EntryType::Invalid; + _current.Type = ProbeEntry::Types::Invalid; } - else if (_current.Type == ProbesRenderer::EntryType::Invalid) + else if (_current.Type == ProbeEntry::Types::Invalid && timeSinceUpdate > ProbesRenderer::UpdateDelay) { int32 firstValidEntryIndex = -1; - auto dt = (float)Time::Update.UnscaledDeltaTime.GetTotalSeconds(); + auto dt = Time::Update.UnscaledDeltaTime.GetTotalSeconds(); for (int32 i = 0; i < _probesToBake.Count(); i++) { auto& e = _probesToBake[i]; @@ -362,40 +311,65 @@ void ProbesRendererService::Update() } // Check if need to update probe - if (firstValidEntryIndex >= 0 && timeSinceUpdate > ProbesRenderer::ProbesUpdatedBreak) + if (firstValidEntryIndex >= 0 && timeSinceUpdate > ProbesRenderer::UpdateDelay) { - // Init service - if (ProbesRenderer::Init()) - { - LOG(Fatal, "Cannot setup Probes Renderer!"); - } - if (ProbesRenderer::HasReadyResources() == false) - return; + if (LazyInit()) + return; // Shader is not yet loaded so try the next frame // Mark probe to update _current = _probesToBake[firstValidEntryIndex]; _probesToBake.RemoveAtKeepOrder(firstValidEntryIndex); _task->Enabled = true; _updateFrameNumber = 0; - - // Store time of the last probe update _lastProbeUpdate = timeNow; } // Check if need to release data - else if (_isReady && timeSinceUpdate > ProbesRenderer::ProbesReleaseDataTime) + else if (_initDone && timeSinceUpdate > ProbesRenderer::ReleaseTimeout) { - // Release service - ProbesRenderer::Release(); + // Release resources + Dispose(); } } } void ProbesRendererService::Dispose() { - ProbesRenderer::Release(); + if (!_initDone && !_initFailed) + return; + ASSERT(_updateFrameNumber == 0); + if (_output) + _output->ReleaseGPU(); + SAFE_DELETE_GPU_RESOURCE(_psFilterFace); + SAFE_DELETE_GPU_RESOURCE(_output); + SAFE_DELETE_GPU_RESOURCE(_probe); + SAFE_DELETE_GPU_RESOURCE(_tmpFace); + SAFE_DELETE_GPU_RESOURCE(_skySHIrradianceMap); + SAFE_DELETE(_task); + _shader = nullptr; + _initDone = false; + _initFailed = false; } -bool fixFarPlaneTreeExecute(Actor* actor, const Vector3& position, float& farPlane) +void ProbesRendererService::Bake(const ProbeEntry& e) +{ + // Check if already registered for bake + for (ProbeEntry& p : _probesToBake) + { + if (p.Type == e.Type && p.Actor == e.Actor) + { + p.Timeout = e.Timeout; + return; + } + } + + _probesToBake.Add(e); + + // Fire event + if (e.UseTextureData()) + ProbesRenderer::OnRegisterBake(e.Actor); +} + +static bool FixFarPlane(Actor* actor, const Vector3& position, float& farPlane) { if (auto* pointLight = dynamic_cast(actor)) { @@ -408,20 +382,19 @@ bool fixFarPlaneTreeExecute(Actor* actor, const Vector3& position, float& farPla return true; } -void ProbesRenderer::OnRender(RenderTask* task, GPUContext* context) +void ProbesRendererService::OnRender(RenderTask* task, GPUContext* context) { - ASSERT(_current.Type != EntryType::Invalid && _updateFrameNumber == 0); switch (_current.Type) { - case EntryType::EnvProbe: - case EntryType::SkyLight: + case ProbeEntry::Types::EnvProbe: + case ProbeEntry::Types::SkyLight: { if (_current.Actor == nullptr) { // Probe has been unlinked (or deleted) _task->Enabled = false; _updateFrameNumber = 0; - _current.Type = EntryType::Invalid; + _current.Type = ProbeEntry::Types::Invalid; return; } break; @@ -430,7 +403,7 @@ void ProbesRenderer::OnRender(RenderTask* task, GPUContext* context) // Canceled return; } - + ASSERT(_updateFrameNumber == 0); auto shader = _shader->GetShader(); PROFILE_GPU("Render Probe"); @@ -438,7 +411,7 @@ void ProbesRenderer::OnRender(RenderTask* task, GPUContext* context) float customCullingNear = -1; const int32 probeResolution = _current.GetResolution(); const PixelFormat probeFormat = _current.GetFormat(); - if (_current.Type == EntryType::EnvProbe) + if (_current.Type == ProbeEntry::Types::EnvProbe) { auto envProbe = (EnvironmentProbe*)_current.Actor.Get(); Vector3 position = envProbe->GetPosition(); @@ -448,14 +421,14 @@ void ProbesRenderer::OnRender(RenderTask* task, GPUContext* context) // Adjust far plane distance float farPlane = Math::Max(radius, nearPlane + 100.0f); farPlane *= farPlane < 10000 ? 10 : 4; - Function f(&fixFarPlaneTreeExecute); + Function f(&FixFarPlane); SceneQuery::TreeExecute(f, position, farPlane); // Setup view LargeWorlds::UpdateOrigin(_task->View.Origin, position); _task->View.SetUpCube(nearPlane, farPlane, position - _task->View.Origin); } - else if (_current.Type == EntryType::SkyLight) + else if (_current.Type == ProbeEntry::Types::SkyLight) { auto skyLight = (SkyLight*)_current.Actor.Get(); Vector3 position = skyLight->GetPosition(); @@ -481,6 +454,9 @@ void ProbesRenderer::OnRender(RenderTask* task, GPUContext* context) const bool isActorActive = _current.Actor->GetIsActive(); _current.Actor->SetIsActive(false); + // Lower quality when rendering probes in-game to gain performance + _task->View.MaxShadowsQuality = Engine::IsPlayMode() ? Quality::Low : Quality::Ultra; + // Render scene for all faces for (int32 faceIndex = 0; faceIndex < 6; faceIndex++) { @@ -556,13 +532,13 @@ void ProbesRenderer::OnRender(RenderTask* task, GPUContext* context) // Real-time probes don't use TextureData (for streaming) but copy generated probe directly to GPU memory if (!_current.UseTextureData()) { - if (_current.Type == EntryType::EnvProbe && _current.Actor) + if (_current.Type == ProbeEntry::Types::EnvProbe && _current.Actor) { _current.Actor.As()->SetProbeData(context, _probe); } // Clear flag _updateFrameNumber = 0; - _current.Type = EntryType::Invalid; + _current.Type = ProbeEntry::Types::Invalid; } } diff --git a/Source/Engine/Renderer/ProbesRenderer.h b/Source/Engine/Renderer/ProbesRenderer.h index 5c4e011e4..0e2007a37 100644 --- a/Source/Engine/Renderer/ProbesRenderer.h +++ b/Source/Engine/Renderer/ProbesRenderer.h @@ -2,75 +2,30 @@ #pragma once -#include "Engine/Graphics/PixelFormat.h" -#include "Engine/Scripting/ScriptingObjectReference.h" -#include "Engine/Level/Actor.h" +#include "Engine/Core/Delegate.h" +#include "Engine/Core/Types/TimeSpan.h" -// Amount of frames to wait for data from probe update job -#define PROBES_RENDERER_LATENCY_FRAMES 1 - -class EnvironmentProbe; -class SkyLight; -class RenderTask; +class Actor; /// /// Probes rendering service /// class ProbesRenderer { -public: - enum class EntryType - { - Invalid = 0, - EnvProbe = 1, - SkyLight = 2, - }; - - struct Entry - { - EntryType Type = EntryType::Invalid; - ScriptingObjectReference Actor; - float Timeout = 0.0f; - - bool UseTextureData() const; - int32 GetResolution() const; - PixelFormat GetFormat() const; - }; - public: /// - /// Minimum amount of time between two updated of probes + /// Time delay between probe updates. Can be used to improve performance by rendering probes less often. /// - static TimeSpan ProbesUpdatedBreak; + static TimeSpan UpdateDelay; /// - /// Time after last probe update when probes updating content will be released + /// Timeout after the last probe rendered when resources used to render it should be released. /// - static TimeSpan ProbesReleaseDataTime; + static TimeSpan ReleaseTimeout; - int32 GetBakeQueueSize(); + static Delegate OnRegisterBake; - static Delegate OnRegisterBake; - - static Delegate OnFinishBake; - -public: - /// - /// Checks if resources are ready to render probes (shaders or textures may be during loading). - /// - /// True if is ready, otherwise false. - static bool HasReadyResources(); - - /// - /// Init probes content - /// - /// True if cannot init service - static bool Init(); - - /// - /// Release probes content - /// - static void Release(); + static Delegate OnFinishBake; public: /// @@ -78,15 +33,12 @@ public: /// /// Probe to bake /// Timeout in seconds left to bake it. - static void Bake(EnvironmentProbe* probe, float timeout = 0); + static void Bake(class EnvironmentProbe* probe, float timeout = 0); /// /// Register probe to baking service. /// /// Probe to bake /// Timeout in seconds left to bake it. - static void Bake(SkyLight* probe, float timeout = 0); - -private: - static void OnRender(RenderTask* task, GPUContext* context); + static void Bake(class SkyLight* probe, float timeout = 0); }; From 33e58c12cbea2acd04e4de45fcb74fed89cd2241 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 3 Jul 2025 11:43:56 +0200 Subject: [PATCH 076/211] Optimize `ProbesRenderer` to use time-slicing for cubemap faces rendering and filtering --- Source/Engine/Renderer/ProbesRenderer.cpp | 101 +++++++++++++--------- Source/Engine/Renderer/ProbesRenderer.h | 5 ++ 2 files changed, 66 insertions(+), 40 deletions(-) diff --git a/Source/Engine/Renderer/ProbesRenderer.cpp b/Source/Engine/Renderer/ProbesRenderer.cpp index ac19cd309..ae94385e2 100644 --- a/Source/Engine/Renderer/ProbesRenderer.cpp +++ b/Source/Engine/Renderer/ProbesRenderer.cpp @@ -106,6 +106,8 @@ private: Array _probesToBake; ProbeEntry _current; + int32 _workStep; + float _customCullingNear; AssetReference _shader; GPUPipelineState* _psFilterFace = nullptr; @@ -134,6 +136,7 @@ ProbesRendererService ProbesRendererServiceInstance; TimeSpan ProbesRenderer::UpdateDelay(0, 0, 0, 0, 100); TimeSpan ProbesRenderer::ReleaseTimeout(0, 0, 0, 30); +int32 ProbesRenderer::MaxWorkPerFrame = 1; Delegate ProbesRenderer::OnRegisterBake; Delegate ProbesRenderer::OnFinishBake; @@ -293,6 +296,7 @@ void ProbesRendererService::Update() // Clear flag _updateFrameNumber = 0; + _workStep = 0; _current.Type = ProbeEntry::Types::Invalid; } else if (_current.Type == ProbeEntry::Types::Invalid && timeSinceUpdate > ProbesRenderer::UpdateDelay) @@ -321,6 +325,7 @@ void ProbesRendererService::Update() _probesToBake.RemoveAtKeepOrder(firstValidEntryIndex); _task->Enabled = true; _updateFrameNumber = 0; + _workStep = 0; _lastProbeUpdate = timeNow; } // Check if need to release data @@ -408,72 +413,76 @@ void ProbesRendererService::OnRender(RenderTask* task, GPUContext* context) PROFILE_GPU("Render Probe"); // Init - float customCullingNear = -1; const int32 probeResolution = _current.GetResolution(); const PixelFormat probeFormat = _current.GetFormat(); - if (_current.Type == ProbeEntry::Types::EnvProbe) + if (_workStep == 0) { - auto envProbe = (EnvironmentProbe*)_current.Actor.Get(); - Vector3 position = envProbe->GetPosition(); - float radius = envProbe->GetScaledRadius(); - float nearPlane = Math::Max(0.1f, envProbe->CaptureNearPlane); + _customCullingNear = -1; + if (_current.Type == ProbeEntry::Types::EnvProbe) + { + auto envProbe = (EnvironmentProbe*)_current.Actor.Get(); + Vector3 position = envProbe->GetPosition(); + float radius = envProbe->GetScaledRadius(); + float nearPlane = Math::Max(0.1f, envProbe->CaptureNearPlane); - // Adjust far plane distance - float farPlane = Math::Max(radius, nearPlane + 100.0f); - farPlane *= farPlane < 10000 ? 10 : 4; - Function f(&FixFarPlane); - SceneQuery::TreeExecute(f, position, farPlane); + // Adjust far plane distance + float farPlane = Math::Max(radius, nearPlane + 100.0f); + farPlane *= farPlane < 10000 ? 10 : 4; + Function f(&FixFarPlane); + SceneQuery::TreeExecute(f, position, farPlane); - // Setup view - LargeWorlds::UpdateOrigin(_task->View.Origin, position); - _task->View.SetUpCube(nearPlane, farPlane, position - _task->View.Origin); + // Setup view + LargeWorlds::UpdateOrigin(_task->View.Origin, position); + _task->View.SetUpCube(nearPlane, farPlane, position - _task->View.Origin); + } + else if (_current.Type == ProbeEntry::Types::SkyLight) + { + auto skyLight = (SkyLight*)_current.Actor.Get(); + Vector3 position = skyLight->GetPosition(); + float nearPlane = 10.0f; + float farPlane = Math::Max(nearPlane + 1000.0f, skyLight->SkyDistanceThreshold * 2.0f); + _customCullingNear = skyLight->SkyDistanceThreshold; + + // Setup view + LargeWorlds::UpdateOrigin(_task->View.Origin, position); + _task->View.SetUpCube(nearPlane, farPlane, position - _task->View.Origin); + } + + // Resize buffers + bool resizeFailed = _output->Resize(probeResolution, probeResolution, probeFormat); + resizeFailed |= _probe->Resize(probeResolution, probeResolution, probeFormat); + resizeFailed |= _tmpFace->Resize(probeResolution, probeResolution, probeFormat); + resizeFailed |= _task->Resize(probeResolution, probeResolution); + if (resizeFailed) + LOG(Error, "Failed to resize probe"); } - else if (_current.Type == ProbeEntry::Types::SkyLight) - { - auto skyLight = (SkyLight*)_current.Actor.Get(); - Vector3 position = skyLight->GetPosition(); - float nearPlane = 10.0f; - float farPlane = Math::Max(nearPlane + 1000.0f, skyLight->SkyDistanceThreshold * 2.0f); - customCullingNear = skyLight->SkyDistanceThreshold; - - // Setup view - LargeWorlds::UpdateOrigin(_task->View.Origin, position); - _task->View.SetUpCube(nearPlane, farPlane, position - _task->View.Origin); - } - _task->CameraCut(); - - // Resize buffers - bool resizeFailed = _output->Resize(probeResolution, probeResolution, probeFormat); - resizeFailed |= _probe->Resize(probeResolution, probeResolution, probeFormat); - resizeFailed |= _tmpFace->Resize(probeResolution, probeResolution, probeFormat); - resizeFailed |= _task->Resize(probeResolution, probeResolution); - if (resizeFailed) - LOG(Error, "Failed to resize probe"); // Disable actor during baking (it cannot influence own results) const bool isActorActive = _current.Actor->GetIsActive(); _current.Actor->SetIsActive(false); // Lower quality when rendering probes in-game to gain performance - _task->View.MaxShadowsQuality = Engine::IsPlayMode() ? Quality::Low : Quality::Ultra; + _task->View.MaxShadowsQuality = Engine::IsPlayMode() || probeResolution <= 128 ? Quality::Low : Quality::Ultra; // Render scene for all faces - for (int32 faceIndex = 0; faceIndex < 6; faceIndex++) + int32 workLeft = ProbesRenderer::MaxWorkPerFrame; + const int32 lastFace = Math::Min(_workStep + workLeft, 6); + for (int32 faceIndex = _workStep; faceIndex < lastFace; faceIndex++) { + _task->CameraCut(); _task->View.SetFace(faceIndex); // Handle custom frustum for the culling (used to skip objects near the camera) - if (customCullingNear > 0) + if (_customCullingNear > 0) { Matrix p; - Matrix::PerspectiveFov(PI_OVER_2, 1.0f, customCullingNear, _task->View.Far, p); + Matrix::PerspectiveFov(PI_OVER_2, 1.0f, _customCullingNear, _task->View.Far, p); _task->View.CullingFrustum.SetMatrix(_task->View.View, p); } // Render frame Renderer::Render(_task); context->ClearState(); - _task->CameraCut(); // Copy frame to cube face { @@ -483,12 +492,17 @@ void ProbesRendererService::OnRender(RenderTask* task, GPUContext* context) context->Draw(_output->View()); context->ResetRenderTarget(); } + + // Move to the next face + _workStep++; + workLeft--; } // Enable actor back _current.Actor->SetIsActive(isActorActive); // Filter all lower mip levels + if (workLeft > 0) { PROFILE_GPU("Filtering"); Data data; @@ -520,11 +534,18 @@ void ProbesRendererService::OnRender(RenderTask* task, GPUContext* context) context->Draw(_tmpFace->View(0, mipIndex)); } } + + // End + workLeft--; + _workStep++; } // Cleanup context->ClearState(); + if (_workStep < 7) + return; // Continue rendering next frame + // Mark as rendered _updateFrameNumber = Engine::FrameCount; _task->Enabled = false; diff --git a/Source/Engine/Renderer/ProbesRenderer.h b/Source/Engine/Renderer/ProbesRenderer.h index 0e2007a37..73d8b4132 100644 --- a/Source/Engine/Renderer/ProbesRenderer.h +++ b/Source/Engine/Renderer/ProbesRenderer.h @@ -23,6 +23,11 @@ public: /// static TimeSpan ReleaseTimeout; + /// + /// Maximum amount of cubemap faces or filtering passes that can be performed per-frame (in total). Set it to 7 to perform whole cubemap capture within a single frame, lower values spread the work across multiple frames. + /// + static int32 MaxWorkPerFrame; + static Delegate OnRegisterBake; static Delegate OnFinishBake; From a138c6b062bf1a0fbf3610add18a6f7f581c5ac7 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 3 Jul 2025 11:45:12 +0200 Subject: [PATCH 077/211] Optimize environment probes filtering shader --- Content/Shaders/ProbesFilter.flax | 4 +- Source/Engine/Renderer/ProbesRenderer.cpp | 48 +++++++++++++++++------ Source/Shaders/ProbesFilter.shader | 8 ++-- 3 files changed, 42 insertions(+), 18 deletions(-) diff --git a/Content/Shaders/ProbesFilter.flax b/Content/Shaders/ProbesFilter.flax index 0f853c5b4..679eac27b 100644 --- a/Content/Shaders/ProbesFilter.flax +++ b/Content/Shaders/ProbesFilter.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f0249696b525cd59825ab3c0ce38bd612f93cf4be1f88fb49bfcecaac6e9ab34 -size 2022 +oid sha256:bbe90799accc93fabdc900df37bf762132037eeaff17f5731f379b6b3d017d2b +size 2033 diff --git a/Source/Engine/Renderer/ProbesRenderer.cpp b/Source/Engine/Renderer/ProbesRenderer.cpp index ae94385e2..9eb5f3937 100644 --- a/Source/Engine/Renderer/ProbesRenderer.cpp +++ b/Source/Engine/Renderer/ProbesRenderer.cpp @@ -125,11 +125,21 @@ public: } bool LazyInit(); + bool InitShader(); void Update() override; void Dispose() override; - void Bake(const ProbeEntry& e); + +private: void OnRender(RenderTask* task, GPUContext* context); +#if COMPILE_WITH_DEV_ENV + bool _initShader = false; + void OnShaderReloading(Asset* obj) + { + _initShader = true; + SAFE_DELETE_GPU_RESOURCE(_psFilterFace); + } +#endif }; ProbesRendererService ProbesRendererServiceInstance; @@ -206,19 +216,13 @@ bool ProbesRendererService::LazyInit() _initFailed = _shader == nullptr; if (_initFailed) return false; +#if COMPILE_WITH_DEV_ENV + _shader->OnReloading.Bind(this); +#endif } if (!_shader->IsLoaded()) return true; - const auto shader = _shader->GetShader(); - CHECK_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); - - // Create pipeline stages - _psFilterFace = GPUDevice::Instance->CreatePipelineState(); - auto psDesc = GPUPipelineState::Description::DefaultFullscreenTriangle; - { - psDesc.PS = shader->GetPS("PS_FilterFace"); - _initFailed |= _psFilterFace->Init(psDesc); - } + _initFailed |= InitShader(); // Init rendering pipeline _output = GPUDevice::Instance->CreateTexture(TEXT("ProbesRenderer.Output")); @@ -262,6 +266,16 @@ bool ProbesRendererService::LazyInit() return false; } +bool ProbesRendererService::InitShader() +{ + const auto shader = _shader->GetShader(); + CHECK_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); + _psFilterFace = GPUDevice::Instance->CreatePipelineState(); + auto psDesc = GPUPipelineState::Description::DefaultFullscreenTriangle; + psDesc.PS = shader->GetPS("PS_FilterFace"); + return _psFilterFace->Init(psDesc); +} + void ProbesRendererService::Update() { PROFILE_MEM(Graphics); @@ -412,6 +426,18 @@ void ProbesRendererService::OnRender(RenderTask* task, GPUContext* context) auto shader = _shader->GetShader(); PROFILE_GPU("Render Probe"); +#if COMPILE_WITH_DEV_ENV + // handle shader hot-reload + if (_initShader) + { + if (_shader->WaitForLoaded()) + return; + _initShader = false; + if (InitShader()) + return; + } +#endif + // Init const int32 probeResolution = _current.GetResolution(); const PixelFormat probeFormat = _current.GetFormat(); diff --git a/Source/Shaders/ProbesFilter.shader b/Source/Shaders/ProbesFilter.shader index 437d484f5..c64a281f2 100644 --- a/Source/Shaders/ProbesFilter.shader +++ b/Source/Shaders/ProbesFilter.shader @@ -50,18 +50,16 @@ float4 PS_FilterFace(Quad_VS2PS input) : SV_Target float2 uv = input.TexCoord * 2 - 1; float3 cubeCoordinates = UvToCubeMapUv(uv); -#define NUM_FILTER_SAMPLES 512 - float3 N = normalize(cubeCoordinates); float roughness = ProbeRoughnessFromMip(SourceMipIndex); + const uint samplesCount = roughness > 0.1 ? 64 : 32; float4 filteredColor = 0; float weight = 0; - LOOP - for (int i = 0; i < NUM_FILTER_SAMPLES; i++) + for (int i = 0; i < samplesCount; i++) { - float2 E = Hammersley(i, NUM_FILTER_SAMPLES, 0); + float2 E = Hammersley(i, samplesCount, 0); float3 H = TangentToWorld(ImportanceSampleGGX(E, roughness).xyz, N); float3 L = 2 * dot(N, H) * H - N; float NoL = saturate(dot(N, L)); From bf345f13ce573cc8a7aaffe30aa8fae222d1e19a Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 3 Jul 2025 13:54:22 +0200 Subject: [PATCH 078/211] Fix reflection probes capture seams on cube face edges due to volumetric fog #3252 --- Source/Engine/Graphics/RenderTask.cpp | 9 ++++++++- Source/Engine/Renderer/ProbesRenderer.cpp | 13 ++++++++++--- Source/Engine/Renderer/RenderSetup.h | 1 + Source/Engine/Renderer/Renderer.cpp | 1 + Source/Engine/Renderer/VolumetricFogPass.cpp | 5 ++--- 5 files changed, 22 insertions(+), 7 deletions(-) diff --git a/Source/Engine/Graphics/RenderTask.cpp b/Source/Engine/Graphics/RenderTask.cpp index ecdcd572c..ac969ad2d 100644 --- a/Source/Engine/Graphics/RenderTask.cpp +++ b/Source/Engine/Graphics/RenderTask.cpp @@ -200,12 +200,19 @@ void SceneRenderTask::RemoveGlobalCustomPostFx(PostProcessEffect* fx) void SceneRenderTask::CollectPostFxVolumes(RenderContext& renderContext) { + PROFILE_CPU(); + // Cache WorldPosition used for PostFx volumes blending (RenderView caches it later on) renderContext.View.WorldPosition = renderContext.View.Origin + renderContext.View.Position; if (EnumHasAllFlags(ActorsSource, ActorsSources::Scenes)) { - Level::CollectPostFxVolumes(renderContext); + //ScopeLock lock(Level::ScenesLock); + for (Scene* scene : Level::Scenes) + { + if (scene->IsActiveInHierarchy()) + scene->Rendering.CollectPostFxVolumes(renderContext); + } } if (EnumHasAllFlags(ActorsSource, ActorsSources::CustomActors)) { diff --git a/Source/Engine/Renderer/ProbesRenderer.cpp b/Source/Engine/Renderer/ProbesRenderer.cpp index 9eb5f3937..eaf7a53ca 100644 --- a/Source/Engine/Renderer/ProbesRenderer.cpp +++ b/Source/Engine/Renderer/ProbesRenderer.cpp @@ -2,6 +2,7 @@ #include "ProbesRenderer.h" #include "Renderer.h" +#include "RenderList.h" #include "ReflectionsPass.h" #include "Engine/Core/Config/GraphicsSettings.h" #include "Engine/Engine/Time.h" @@ -17,7 +18,6 @@ #include "Engine/Content/Content.h" #include "Engine/Content/Assets/Shader.h" #include "Engine/Content/AssetReference.h" -#include "Engine/Graphics/Graphics.h" #include "Engine/Graphics/PixelFormat.h" #include "Engine/Graphics/GPUContext.h" #include "Engine/Graphics/Textures/GPUTexture.h" @@ -115,7 +115,6 @@ private: GPUTexture* _output = nullptr; GPUTexture* _probe = nullptr; GPUTexture* _tmpFace = nullptr; - GPUTexture* _skySHIrradianceMap = nullptr; uint64 _updateFrameNumber = 0; public: @@ -132,6 +131,7 @@ public: private: void OnRender(RenderTask* task, GPUContext* context); + void OnSetupRender(RenderContext& renderContext); #if COMPILE_WITH_DEV_ENV bool _initShader = false; void OnShaderReloading(Asset* obj) @@ -234,6 +234,7 @@ bool ProbesRendererService::LazyInit() task->Order = -100; // Run before main view rendering (realtime probes will get smaller latency) task->Enabled = false; task->IsCustomRendering = true; + task->ActorsSource = ActorsSources::ScenesAndCustomActors; task->Output = _output; auto& view = task->View; view.Flags = @@ -254,6 +255,7 @@ bool ProbesRendererService::LazyInit() task->IsCameraCut = true; task->Resize(probeResolution, probeResolution); task->Render.Bind(this); + task->SetupRender.Bind(this); // Init render targets _probe = GPUDevice::Instance->CreateTexture(TEXT("ProbesRenderer.Probe")); @@ -362,7 +364,6 @@ void ProbesRendererService::Dispose() SAFE_DELETE_GPU_RESOURCE(_output); SAFE_DELETE_GPU_RESOURCE(_probe); SAFE_DELETE_GPU_RESOURCE(_tmpFace); - SAFE_DELETE_GPU_RESOURCE(_skySHIrradianceMap); SAFE_DELETE(_task); _shader = nullptr; _initDone = false; @@ -589,3 +590,9 @@ void ProbesRendererService::OnRender(RenderTask* task, GPUContext* context) _current.Type = ProbeEntry::Types::Invalid; } } + +void ProbesRendererService::OnSetupRender(RenderContext& renderContext) +{ + // Disable Volumetric Fog in reflection as it causes seams on cubemap face edges + renderContext.List->Setup.UseVolumetricFog = false; +} diff --git a/Source/Engine/Renderer/RenderSetup.h b/Source/Engine/Renderer/RenderSetup.h index 10377e023..3444f0838 100644 --- a/Source/Engine/Renderer/RenderSetup.h +++ b/Source/Engine/Renderer/RenderSetup.h @@ -14,4 +14,5 @@ struct FLAXENGINE_API RenderSetup bool UseTemporalAAJitter = false; bool UseGlobalSDF = false; bool UseGlobalSurfaceAtlas = false; + bool UseVolumetricFog = false; }; diff --git a/Source/Engine/Renderer/Renderer.cpp b/Source/Engine/Renderer/Renderer.cpp index 87d1d94f1..fd02b133f 100644 --- a/Source/Engine/Renderer/Renderer.cpp +++ b/Source/Engine/Renderer/Renderer.cpp @@ -379,6 +379,7 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont setup.UseGlobalSDF = (graphicsSettings->EnableGlobalSDF && EnumHasAnyFlags(view.Flags, ViewFlags::GlobalSDF)) || renderContext.View.Mode == ViewMode::GlobalSDF || setup.UseGlobalSurfaceAtlas; + setup.UseVolumetricFog = (view.Flags & ViewFlags::Fog) != ViewFlags::None; // Disable TAA jitter in debug modes switch (renderContext.View.Mode) diff --git a/Source/Engine/Renderer/VolumetricFogPass.cpp b/Source/Engine/Renderer/VolumetricFogPass.cpp index b7e57c2bb..6029b399d 100644 --- a/Source/Engine/Renderer/VolumetricFogPass.cpp +++ b/Source/Engine/Renderer/VolumetricFogPass.cpp @@ -99,7 +99,6 @@ float ComputeZSliceFromDepth(float sceneDepth, const VolumetricFogOptions& optio bool VolumetricFogPass::Init(RenderContext& renderContext, GPUContext* context, VolumetricFogOptions& options) { - auto& view = renderContext.View; const auto fog = renderContext.List->Fog; // Check if already prepared for this frame @@ -111,7 +110,7 @@ bool VolumetricFogPass::Init(RenderContext& renderContext, GPUContext* context, } // Check if skip rendering - if (fog == nullptr || (view.Flags & ViewFlags::Fog) == ViewFlags::None || !_isSupported || checkIfSkipPass()) + if (fog == nullptr || !renderContext.List->Setup.UseVolumetricFog || !_isSupported || checkIfSkipPass()) { RenderTargetPool::Release(renderContext.Buffers->VolumetricFog); renderContext.Buffers->VolumetricFog = nullptr; @@ -184,7 +183,7 @@ bool VolumetricFogPass::Init(RenderContext& renderContext, GPUContext* context, _cache.Data.PhaseG = options.ScatteringDistribution; _cache.Data.VolumetricFogMaxDistance = options.Distance; _cache.Data.MissedHistorySamplesCount = Math::Clamp(_cache.MissedHistorySamplesCount, 1, (int32)ARRAY_COUNT(_cache.Data.FrameJitterOffsets)); - Matrix::Transpose(view.PrevViewProjection, _cache.Data.PrevWorldToClip); + Matrix::Transpose(renderContext.View.PrevViewProjection, _cache.Data.PrevWorldToClip); _cache.Data.SkyLight.VolumetricScatteringIntensity = 0; // Fill frame jitter history From 48c6339ebbbcb699a457c7ba5a3b87b9c8df03ba Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 4 Jul 2025 12:21:25 +0200 Subject: [PATCH 079/211] Fix memory leak on material instances when updating layout of Text Render --- Source/Engine/UI/TextRender.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/Source/Engine/UI/TextRender.cpp b/Source/Engine/UI/TextRender.cpp index 951da2316..6b2988b7a 100644 --- a/Source/Engine/UI/TextRender.cpp +++ b/Source/Engine/UI/TextRender.cpp @@ -190,6 +190,10 @@ void TextRender::UpdateLayout() _buffersDirty = true; // Init draw chunks data + Array> materials; + materials.Resize(_drawChunks.Count()); + for (int32 i = 0; i < materials.Count(); i++) + materials[i] = _drawChunks[i].Material; DrawChunk drawChunk; drawChunk.Actor = this; drawChunk.StartIndex = 0; @@ -242,10 +246,12 @@ void TextRender::UpdateLayout() } // Setup material - drawChunk.Material = Content::CreateVirtualAsset(); + if (_drawChunks.Count() < materials.Count()) + drawChunk.Material = materials[_drawChunks.Count()]; + else + drawChunk.Material = Content::CreateVirtualAsset(); drawChunk.Material->SetBaseMaterial(Material.Get()); - for (auto& param : drawChunk.Material->Params) - param.SetIsOverride(false); + drawChunk.Material->ResetParameters(); // Set the font parameter static StringView FontParamName = TEXT("Font"); From a8eb4fc14081d41e43ab8df98300186e21eaa0d4 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 7 Jul 2025 23:22:32 +0200 Subject: [PATCH 080/211] Add allocator tag support for `Dictionary` and `HashSet` --- Source/Engine/Core/Collections/Dictionary.h | 9 +++++++++ Source/Engine/Core/Collections/HashSet.h | 9 +++++++++ Source/Engine/Core/Collections/HashSetBase.h | 6 ++++++ Source/Engine/Core/Memory/ArenaAllocation.h | 2 +- 4 files changed, 25 insertions(+), 1 deletion(-) diff --git a/Source/Engine/Core/Collections/Dictionary.h b/Source/Engine/Core/Collections/Dictionary.h index 544a70ca0..bdab15bbe 100644 --- a/Source/Engine/Core/Collections/Dictionary.h +++ b/Source/Engine/Core/Collections/Dictionary.h @@ -163,6 +163,15 @@ public: { } + /// + /// Initializes an empty without reserving any space. + /// + /// The custom allocation tag. + Dictionary(typename Base::AllocationTag tag) + : Base(tag) + { + } + /// /// Initializes by reserving space. /// diff --git a/Source/Engine/Core/Collections/HashSet.h b/Source/Engine/Core/Collections/HashSet.h index ab2601525..032a407db 100644 --- a/Source/Engine/Core/Collections/HashSet.h +++ b/Source/Engine/Core/Collections/HashSet.h @@ -140,6 +140,15 @@ public: { } + /// + /// Initializes an empty without reserving any space. + /// + /// The custom allocation tag. + HashSet(typename Base::AllocationTag tag) + : Base(tag) + { + } + /// /// Initializes by reserving space. /// diff --git a/Source/Engine/Core/Collections/HashSetBase.h b/Source/Engine/Core/Collections/HashSetBase.h index 200e26b7b..3b487227a 100644 --- a/Source/Engine/Core/Collections/HashSetBase.h +++ b/Source/Engine/Core/Collections/HashSetBase.h @@ -59,6 +59,7 @@ class HashSetBase public: // Type of allocation data used to store hash set buckets. using AllocationData = typename AllocationType::template Data; + using AllocationTag = typename AllocationType::Tag; protected: int32 _elementsCount = 0; @@ -70,6 +71,11 @@ protected: { } + HashSetBase(AllocationTag tag) + : _allocation(tag) + { + } + void MoveToEmpty(HashSetBase&& other) { _elementsCount = other._elementsCount; diff --git a/Source/Engine/Core/Memory/ArenaAllocation.h b/Source/Engine/Core/Memory/ArenaAllocation.h index af8df2001..7de7e0994 100644 --- a/Source/Engine/Core/Memory/ArenaAllocation.h +++ b/Source/Engine/Core/Memory/ArenaAllocation.h @@ -138,7 +138,7 @@ public: FORCE_INLINE void Swap(Data& other) { ::Swap(_data, other._data); - ::Swap(_arena, other._arena); + _arena = other._arena; // TODO: find a better way to move allocation with AllocationUtils::MoveToEmpty to preserve/maintain allocation tag ownership } }; }; From 3abbf08f1f64457792bc8cf01f954d1c4c0655c5 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 8 Jul 2025 22:18:00 +0200 Subject: [PATCH 081/211] Optimize foliage rendering with concurrent arena allocator --- Source/Engine/Core/Memory/Allocation.cpp | 78 ++++++++++++++++++ Source/Engine/Core/Memory/ArenaAllocation.h | 90 ++++++++++++++++++++- Source/Engine/Foliage/Foliage.cpp | 16 ++-- Source/Engine/Foliage/Foliage.h | 5 +- Source/Engine/Renderer/RenderList.cpp | 20 ++++- Source/Engine/Renderer/RenderList.h | 12 ++- 6 files changed, 205 insertions(+), 16 deletions(-) diff --git a/Source/Engine/Core/Memory/Allocation.cpp b/Source/Engine/Core/Memory/Allocation.cpp index c55ab1dab..683c31c43 100644 --- a/Source/Engine/Core/Memory/Allocation.cpp +++ b/Source/Engine/Core/Memory/Allocation.cpp @@ -52,3 +52,81 @@ void* ArenaAllocator::Allocate(uint64 size, uint64 alignment) return mem; } + +void ConcurrentArenaAllocator::Free() +{ + _locker.Lock(); + + // Free all pages + Page* page = (Page*)_first; + while (page) + { +#if COMPILE_WITH_PROFILER + ProfilerMemory::OnGroupUpdate(ProfilerMemory::Groups::MallocArena, -(int64)page->Size, -1); +#endif + if (_free1) + _free1(page->Memory); + else + _free2(page->Memory, page->Size); + Page* next = page->Next; + if (_free1) + _free1(page); + else + _free2(page, sizeof(Page)); + page = next; + } + + // Unlink + _first = 0; + _totalBytes = 0; + + _locker.Unlock(); +} + +void* ConcurrentArenaAllocator::Allocate(uint64 size, uint64 alignment) +{ +RETRY: + + // Check if the current page has some space left + Page* page = (Page*)Platform::AtomicRead(&_first); + if (page) + { + int64 offset = Platform::AtomicRead(&page->Offset); + int64 offsetAligned = Math::AlignUp(offset, (int64)alignment); + int64 end = offsetAligned + size; + if (end <= page->Size) + { + // Try to allocate within a page + if (Platform::InterlockedCompareExchange(&page->Offset, end, offset) != offset) + { + // Someone else changed allocated so retry (new offset might mismatch alignment) + goto RETRY; + } + Platform::InterlockedAdd(&_totalBytes, (int64)size); + return (byte*)page->Memory + offsetAligned; + } + } + + // Page allocation is thread-synced + _locker.Lock(); + + // Check if page was unchanged by any other thread + if ((Page*)Platform::AtomicRead(&_first) == page) + { + uint64 pageSize = Math::Max(_pageSize, size); +#if COMPILE_WITH_PROFILER + ProfilerMemory::OnGroupUpdate(ProfilerMemory::Groups::MallocArena, (int64)pageSize, 1); +#endif + page = (Page*)(_allocate1 ? _allocate1(sizeof(Page), alignof(Page)) : _allocate2(sizeof(Page))); + page->Memory = _allocate1 ? _allocate1(pageSize, 16) : _allocate2(pageSize); + page->Next = (Page*)_first; + page->Offset = 0; + page->Size = (int64)pageSize; + Platform::AtomicStore(&_first, (intptr)page); + } + + _locker.Unlock(); + + // Use a single cde for allocation + goto RETRY; +} diff --git a/Source/Engine/Core/Memory/ArenaAllocation.h b/Source/Engine/Core/Memory/ArenaAllocation.h index 7de7e0994..eaffcff95 100644 --- a/Source/Engine/Core/Memory/ArenaAllocation.h +++ b/Source/Engine/Core/Memory/ArenaAllocation.h @@ -3,6 +3,7 @@ #pragma once #include "Allocation.h" +#include "Engine/Platform/CriticalSection.h" /// /// Allocator that uses pages for stack-based allocs without freeing memory during it's lifetime. @@ -66,21 +67,94 @@ public: } }; +/// +/// Allocator that uses pages for stack-based allocs without freeing memory during it's lifetime. Thread-safe to allocate memory from multiple threads at once. +/// +class ConcurrentArenaAllocator +{ +private: + struct Page + { + void* Memory; + Page* Next; + volatile int64 Offset; + int64 Size; + }; + + int32 _pageSize; + volatile int64 _first = 0; +#if !BUILD_RELEASE + volatile int64 _totalBytes = 0; +#endif + void*(*_allocate1)(uint64 size, uint64 alignment) = nullptr; + void(*_free1)(void* ptr) = nullptr; + void*(*_allocate2)(uint64 size) = nullptr; + void(*_free2)(void* ptr, uint64 size) = nullptr; + CriticalSection _locker; + +public: + ConcurrentArenaAllocator(int32 pageSizeBytes, void* (*customAllocate)(uint64 size, uint64 alignment), void(*customFree)(void* ptr)) + : _pageSize(pageSizeBytes) + , _allocate1(customAllocate) + , _free1(customFree) + { + } + + ConcurrentArenaAllocator(int32 pageSizeBytes, void* (*customAllocate)(uint64 size), void(*customFree)(void* ptr, uint64 size)) + : _pageSize(pageSizeBytes) + , _allocate2(customAllocate) + , _free2(customFree) + { + } + + ConcurrentArenaAllocator(int32 pageSizeBytes = 1024 * 1024) // 1 MB by default + : ConcurrentArenaAllocator(pageSizeBytes, Allocator::Allocate, Allocator::Free) + { + } + + ~ConcurrentArenaAllocator() + { + Free(); + } + + // Gets the total amount of bytes allocated in arena (excluding alignment). + int64 GetTotalBytes() const + { + return Platform::AtomicRead(&_totalBytes); + } + + // Allocates a chunk of unitialized memory. + void* Allocate(uint64 size, uint64 alignment = 1); + + // Frees all memory allocations within allocator. + void Free(); + + // Creates a new object within the arena allocator. + template + inline T* New(Args&&...args) + { + T* ptr = (T*)Allocate(sizeof(T)); + new(ptr) T(Forward(args)...); + return ptr; + } +}; + /// /// The memory allocation policy that uses a part of shared page allocator. Allocations are performed in stack-manner, and free is no-op. /// -class ArenaAllocation +template +class ArenaAllocationBase { public: enum { HasSwap = true }; - typedef ArenaAllocator* Tag; + typedef ArenaType* Tag; template class Data { private: T* _data = nullptr; - ArenaAllocator* _arena = nullptr; + ArenaType* _arena = nullptr; public: FORCE_INLINE Data() @@ -142,3 +216,13 @@ public: } }; }; + +/// +/// The memory allocation policy that uses a part of shared page allocator. Allocations are performed in stack-manner, and free is no-op. +/// +typedef ArenaAllocationBase ArenaAllocation; + +/// +/// The memory allocation policy that uses a part of shared page allocator. Allocations are performed in stack-manner, and free is no-op. +/// +typedef ArenaAllocationBase ConcurrentArenaAllocation; diff --git a/Source/Engine/Foliage/Foliage.cpp b/Source/Engine/Foliage/Foliage.cpp index 4fc576ff8..f51f4ece0 100644 --- a/Source/Engine/Foliage/Foliage.cpp +++ b/Source/Engine/Foliage/Foliage.cpp @@ -103,17 +103,17 @@ void Foliage::DrawInstance(RenderContext& renderContext, FoliageInstance& instan for (int32 meshIndex = 0; meshIndex < meshes.Count(); meshIndex++) { auto& drawCall = drawCallsLists[lod][meshIndex]; - if (!drawCall.DrawCall.Material) + if (!drawCall.Material) continue; DrawKey key; - key.Mat = drawCall.DrawCall.Material; + key.Mat = drawCall.Material; key.Geo = &meshes.Get()[meshIndex]; key.Lightmap = instance.Lightmap.TextureIndex; auto* e = result.TryGet(key); if (!e) { - e = &result[key]; + e = &result.Add(key, BatchedDrawCall(renderContext.List))->Value; ASSERT_LOW_LAYER(key.Mat); e->DrawCall.Material = key.Mat; e->DrawCall.Surface.Lightmap = EnumHasAnyFlags(_staticFlags, StaticFlags::Lightmap) && _scene ? _scene->LightmapsData.GetReadyLightmap(key.Lightmap) : nullptr; @@ -127,7 +127,7 @@ void Foliage::DrawInstance(RenderContext& renderContext, FoliageInstance& instan const Float3 translation = transform.Translation - renderContext.View.Origin; Matrix::Transformation(transform.Scale, transform.Orientation, translation, world); constexpr float worldDeterminantSign = 1.0f; - instanceData.Store(world, world, instance.Lightmap.UVsArea, drawCall.DrawCall.Surface.GeometrySize, instance.Random, worldDeterminantSign, lodDitherFactor); + instanceData.Store(world, world, instance.Lightmap.UVsArea, drawCall.Surface.GeometrySize, instance.Random, worldDeterminantSign, lodDitherFactor); } } @@ -430,7 +430,7 @@ void Foliage::DrawType(RenderContext& renderContext, const FoliageType& type, Dr { const auto& mesh = meshes.Get()[meshIndex]; auto& drawCall = drawCallsList.Get()[meshIndex]; - drawCall.DrawCall.Material = nullptr; + drawCall.Material = nullptr; // DrawInstance skips draw calls from meshes with unset material // Check entry visibility const auto& entry = type.Entries[mesh.GetMaterialSlotIndex()]; @@ -455,13 +455,13 @@ void Foliage::DrawType(RenderContext& renderContext, const FoliageType& type, Dr if (drawModes == DrawPass::None) continue; - drawCall.DrawCall.Material = material; - drawCall.DrawCall.Surface.GeometrySize = mesh.GetBox().GetSize(); + drawCall.Material = material; + drawCall.Surface.GeometrySize = mesh.GetBox().GetSize(); } } // Draw instances of the foliage type - BatchedDrawCalls result; + BatchedDrawCalls result(&renderContext.List->Memory); DrawCluster(renderContext, type.Root, type, drawCallsLists, result); // Submit draw calls with valid instances added diff --git a/Source/Engine/Foliage/Foliage.h b/Source/Engine/Foliage/Foliage.h index 1855e9914..6f8b36cf4 100644 --- a/Source/Engine/Foliage/Foliage.h +++ b/Source/Engine/Foliage/Foliage.h @@ -6,6 +6,7 @@ #include "FoliageInstance.h" #include "FoliageCluster.h" #include "FoliageType.h" +#include "Engine/Core/Memory/ArenaAllocation.h" #include "Engine/Level/Actor.h" /// @@ -178,8 +179,8 @@ private: } }; - typedef Array> DrawCallsList; - typedef Dictionary BatchedDrawCalls; + typedef Array> DrawCallsList; + typedef Dictionary BatchedDrawCalls; void DrawInstance(RenderContext& renderContext, FoliageInstance& instance, const FoliageType& type, Model* model, int32 lod, float lodDitherFactor, DrawCallsList* drawCallsLists, BatchedDrawCalls& result) const; void DrawCluster(RenderContext& renderContext, FoliageCluster* cluster, const FoliageType& type, DrawCallsList* drawCallsLists, BatchedDrawCalls& result) const; #else diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index 2a6540da5..0dedfda38 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -169,6 +169,7 @@ void RenderEnvironmentProbeData::SetShaderData(ShaderEnvProbeData& data) const void* RendererAllocation::Allocate(uintptr size) { + PROFILE_CPU(); void* result = nullptr; MemPoolLocker.Lock(); for (int32 i = 0; i < MemPool.Count(); i++) @@ -188,6 +189,7 @@ void* RendererAllocation::Allocate(uintptr size) void RendererAllocation::Free(void* ptr, uintptr size) { + PROFILE_CPU(); MemPoolLocker.Lock(); MemPool.Add({ ptr, size }); MemPoolLocker.Unlock(); @@ -418,6 +420,18 @@ bool RenderList::HasAnyPostFx(const RenderContext& renderContext, MaterialPostFx return false; } +BatchedDrawCall::BatchedDrawCall(RenderList* list) + : Instances(&list->Memory) +{ +} + +BatchedDrawCall::BatchedDrawCall(BatchedDrawCall&& other) noexcept + : DrawCall(other.DrawCall) + , ObjectsStartIndex(other.ObjectsStartIndex) + , Instances(MoveTemp(other.Instances)) +{ +} + void DrawCallsList::Clear() { Indices.Clear(); @@ -433,6 +447,7 @@ bool DrawCallsList::IsEmpty() const RenderList::RenderList(const SpawnParams& params) : ScriptingObject(params) + , Memory(4 * 1024 * 1024, RendererAllocation::Allocate, RendererAllocation::Free) // 4MB pages, use page pooling via RendererAllocation , DirectionalLights(4) , PointLights(32) , SpotLights(32) @@ -443,8 +458,8 @@ RenderList::RenderList(const SpawnParams& params) , AtmosphericFog(nullptr) , Fog(nullptr) , Blendable(32) - , ObjectBuffer(0, PixelFormat::R32G32B32A32_Float, false, TEXT("Object Bufffer")) - , TempObjectBuffer(0, PixelFormat::R32G32B32A32_Float, false, TEXT("Object Bufffer")) + , ObjectBuffer(0, PixelFormat::R32G32B32A32_Float, false, TEXT("Object Buffer")) + , TempObjectBuffer(0, PixelFormat::R32G32B32A32_Float, false, TEXT("Object Buffer")) , _instanceBuffer(0, sizeof(ShaderObjectDrawInstanceData), TEXT("Instance Buffer"), GPUVertexLayout::Get({ { VertexElement::Types::Attribute0, 3, 0, 1, PixelFormat::R32_UInt } })) { } @@ -480,6 +495,7 @@ void RenderList::Clear() _instanceBuffer.Clear(); ObjectBuffer.Clear(); TempObjectBuffer.Clear(); + Memory.Free(); } struct PackedSortKey diff --git a/Source/Engine/Renderer/RenderList.h b/Source/Engine/Renderer/RenderList.h index d5288e6ee..f17e1b045 100644 --- a/Source/Engine/Renderer/RenderList.h +++ b/Source/Engine/Renderer/RenderList.h @@ -3,6 +3,7 @@ #pragma once #include "Engine/Core/Collections/Array.h" +#include "Engine/Core/Memory/ArenaAllocation.h" #include "Engine/Core/Math/Half.h" #include "Engine/Graphics/PostProcessSettings.h" #include "Engine/Graphics/DynamicBuffer.h" @@ -241,7 +242,11 @@ struct BatchedDrawCall { DrawCall DrawCall; uint16 ObjectsStartIndex = 0; // Index of the instances start in the ObjectsBuffer (set internally). - Array Instances; + Array Instances; + + BatchedDrawCall() { CRASH; } // Don't use it + BatchedDrawCall(RenderList* list); + BatchedDrawCall(BatchedDrawCall&& other) noexcept; }; /// @@ -298,6 +303,11 @@ API_CLASS(Sealed) class FLAXENGINE_API RenderList : public ScriptingObject static void CleanupCache(); public: + /// + /// Memory storage with all draw-related data that lives during a single frame rendering time. Thread-safe to allocate memory during rendering jobs. + /// + ConcurrentArenaAllocator Memory; + /// /// All scenes for rendering. /// From bdaf31b54f896a457d452309ff30d6870ade6ad3 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 9 Jul 2025 00:22:35 +0200 Subject: [PATCH 082/211] Optimize Arena Allocator to store page metadata within itself to save on allocs --- Source/Engine/Core/Memory/Allocation.cpp | 25 ++++++++------------- Source/Engine/Core/Memory/ArenaAllocation.h | 2 -- 2 files changed, 9 insertions(+), 18 deletions(-) diff --git a/Source/Engine/Core/Memory/Allocation.cpp b/Source/Engine/Core/Memory/Allocation.cpp index 683c31c43..239228b67 100644 --- a/Source/Engine/Core/Memory/Allocation.cpp +++ b/Source/Engine/Core/Memory/Allocation.cpp @@ -13,7 +13,6 @@ void ArenaAllocator::Free() #if COMPILE_WITH_PROFILER ProfilerMemory::OnGroupUpdate(ProfilerMemory::Groups::MallocArena, -(int64)page->Size, -1); #endif - Allocator::Free(page->Memory); Page* next = page->Next; Allocator::Free(page); page = next; @@ -33,21 +32,20 @@ void* ArenaAllocator::Allocate(uint64 size, uint64 alignment) // Create a new page if need to if (!page) { - uint64 pageSize = Math::Max(_pageSize, size); + uint64 pageSize = Math::Max(_pageSize, size + alignment + sizeof(Page)); #if COMPILE_WITH_PROFILER ProfilerMemory::OnGroupUpdate(ProfilerMemory::Groups::MallocArena, (int64)pageSize, 1); #endif - page = (Page*)Allocator::Allocate(sizeof(Page)); - page->Memory = Allocator::Allocate(pageSize); + page = (Page*)Allocator::Allocate(pageSize); page->Next = _first; - page->Offset = 0; + page->Offset = sizeof(Page); page->Size = (uint32)pageSize; _first = page; } // Allocate within a page page->Offset = Math::AlignUp(page->Offset, (uint32)alignment); - void* mem = (byte*)page->Memory + page->Offset; + void* mem = (byte*)page + page->Offset; page->Offset += (uint32)size; return mem; @@ -64,15 +62,11 @@ void ConcurrentArenaAllocator::Free() #if COMPILE_WITH_PROFILER ProfilerMemory::OnGroupUpdate(ProfilerMemory::Groups::MallocArena, -(int64)page->Size, -1); #endif - if (_free1) - _free1(page->Memory); - else - _free2(page->Memory, page->Size); Page* next = page->Next; if (_free1) _free1(page); else - _free2(page, sizeof(Page)); + _free2(page, page->Size); page = next; } @@ -103,7 +97,7 @@ RETRY: goto RETRY; } Platform::InterlockedAdd(&_totalBytes, (int64)size); - return (byte*)page->Memory + offsetAligned; + return (byte*)page + offsetAligned; } } @@ -113,14 +107,13 @@ RETRY: // Check if page was unchanged by any other thread if ((Page*)Platform::AtomicRead(&_first) == page) { - uint64 pageSize = Math::Max(_pageSize, size); + uint64 pageSize = Math::Max(_pageSize, size + alignment + sizeof(Page)); #if COMPILE_WITH_PROFILER ProfilerMemory::OnGroupUpdate(ProfilerMemory::Groups::MallocArena, (int64)pageSize, 1); #endif - page = (Page*)(_allocate1 ? _allocate1(sizeof(Page), alignof(Page)) : _allocate2(sizeof(Page))); - page->Memory = _allocate1 ? _allocate1(pageSize, 16) : _allocate2(pageSize); + page = (Page*)(_allocate1 ? _allocate1(pageSize, 16) : _allocate2(pageSize)); page->Next = (Page*)_first; - page->Offset = 0; + page->Offset = sizeof(Page); page->Size = (int64)pageSize; Platform::AtomicStore(&_first, (intptr)page); } diff --git a/Source/Engine/Core/Memory/ArenaAllocation.h b/Source/Engine/Core/Memory/ArenaAllocation.h index eaffcff95..2288e4560 100644 --- a/Source/Engine/Core/Memory/ArenaAllocation.h +++ b/Source/Engine/Core/Memory/ArenaAllocation.h @@ -13,7 +13,6 @@ class ArenaAllocator private: struct Page { - void* Memory; Page* Next; uint32 Offset, Size; }; @@ -75,7 +74,6 @@ class ConcurrentArenaAllocator private: struct Page { - void* Memory; Page* Next; volatile int64 Offset; int64 Size; From c27a9808c4ee91cd8db10f821b68f9c61dff2e73 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 9 Jul 2025 10:02:20 +0200 Subject: [PATCH 083/211] Fix unwanted code --- Source/Engine/Core/Memory/ArenaAllocation.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/Source/Engine/Core/Memory/ArenaAllocation.h b/Source/Engine/Core/Memory/ArenaAllocation.h index 2288e4560..bfb5dbfe6 100644 --- a/Source/Engine/Core/Memory/ArenaAllocation.h +++ b/Source/Engine/Core/Memory/ArenaAllocation.h @@ -81,9 +81,7 @@ private: int32 _pageSize; volatile int64 _first = 0; -#if !BUILD_RELEASE volatile int64 _totalBytes = 0; -#endif void*(*_allocate1)(uint64 size, uint64 alignment) = nullptr; void(*_free1)(void* ptr) = nullptr; void*(*_allocate2)(uint64 size) = nullptr; From a1e4ed05c42fc986f04233b89497ab90a3885862 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 14 Jul 2025 18:14:09 +0200 Subject: [PATCH 084/211] Don't force load asset on asset clone if it's unused --- Source/Engine/Content/Content.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Engine/Content/Content.cpp b/Source/Engine/Content/Content.cpp index 2178579cc..48e7b6c1f 100644 --- a/Source/Engine/Content/Content.cpp +++ b/Source/Engine/Content/Content.cpp @@ -1003,7 +1003,7 @@ bool Content::CloneAssetFile(const StringView& dstPath, const StringView& srcPat FileSystem::DeleteFile(tmpPath); // Reload storage - if (auto storage = ContentStorageManager::GetStorage(dstPath)) + if (auto storage = ContentStorageManager::GetStorage(dstPath, false)) { storage->Reload(); } From 8ed2d6da5695b3046e2e19758912b31a1f1d72e4 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 14 Jul 2025 20:26:24 +0200 Subject: [PATCH 085/211] Optimize Debug Draw performance of splines to use frustum culling --- .../Viewport/MainEditorGizmoViewport.cs | 9 +++++++- .../Editor/Viewport/PrefabWindowViewport.cs | 7 +++++- .../Editor/Viewport/Previews/AssetPreview.cs | 1 + Source/Engine/Debug/DebugDraw.cpp | 22 ++++++++++++++----- Source/Engine/Debug/DebugDraw.h | 5 +++++ Source/Engine/Level/Actors/Spline.cpp | 4 +++- 6 files changed, 40 insertions(+), 8 deletions(-) diff --git a/Source/Editor/Viewport/MainEditorGizmoViewport.cs b/Source/Editor/Viewport/MainEditorGizmoViewport.cs index c14b6dfd8..5343a1fe5 100644 --- a/Source/Editor/Viewport/MainEditorGizmoViewport.cs +++ b/Source/Editor/Viewport/MainEditorGizmoViewport.cs @@ -340,6 +340,13 @@ namespace FlaxEditor.Viewport { _debugDrawData.Clear(); + if (task is SceneRenderTask sceneRenderTask) + { + // Sync debug view to avoid lag on culling/LODing + var view = sceneRenderTask.View; + DebugDraw.SetView(ref view); + } + // Collect selected objects debug shapes and visuals var selectedParents = TransformGizmo.SelectedParents; if (selectedParents.Count > 0) @@ -374,7 +381,7 @@ namespace FlaxEditor.Viewport // Draw selected objects debug shapes and visuals if (DrawDebugDraw && (renderContext.View.Flags & ViewFlags.DebugDraw) == ViewFlags.DebugDraw) { - _debugDrawData.DrawActors(); + _debugDrawData.DrawActors(true); DebugDraw.Draw(ref renderContext, target.View(), targetDepth.View(), true); } } diff --git a/Source/Editor/Viewport/PrefabWindowViewport.cs b/Source/Editor/Viewport/PrefabWindowViewport.cs index 8b508eedf..3968f0379 100644 --- a/Source/Editor/Viewport/PrefabWindowViewport.cs +++ b/Source/Editor/Viewport/PrefabWindowViewport.cs @@ -243,7 +243,12 @@ namespace FlaxEditor.Viewport _tempDebugDrawContext = DebugDraw.AllocateContext(); DebugDraw.SetContext(_tempDebugDrawContext); DebugDraw.UpdateContext(_tempDebugDrawContext, 1.0f); - + if (task is SceneRenderTask sceneRenderTask) + { + // Sync debug view to avoid lag on culling/LODing + var view = sceneRenderTask.View; + DebugDraw.SetView(ref view); + } for (int i = 0; i < selectedParents.Count; i++) { if (selectedParents[i].IsActiveInHierarchy) diff --git a/Source/Editor/Viewport/Previews/AssetPreview.cs b/Source/Editor/Viewport/Previews/AssetPreview.cs index 84f60f96a..3583a9ff0 100644 --- a/Source/Editor/Viewport/Previews/AssetPreview.cs +++ b/Source/Editor/Viewport/Previews/AssetPreview.cs @@ -264,6 +264,7 @@ namespace FlaxEditor.Viewport.Previews { DebugDraw.SetContext(_debugDrawContext); DebugDraw.UpdateContext(_debugDrawContext, 1.0f / Mathf.Max(Engine.FramesPerSecond, 1)); + DebugDraw.SetView(ref renderContext.View); CustomDebugDraw?.Invoke(context, ref renderContext); OnDebugDraw(context, ref renderContext); DebugDraw.Draw(ref renderContext, target.View(), targetDepth.View(), true); diff --git a/Source/Engine/Debug/DebugDraw.cpp b/Source/Engine/Debug/DebugDraw.cpp index 752e8bf24..3e5601532 100644 --- a/Source/Engine/Debug/DebugDraw.cpp +++ b/Source/Engine/Debug/DebugDraw.cpp @@ -357,7 +357,8 @@ struct DebugDrawContext DebugDrawData DebugDrawDefault; DebugDrawData DebugDrawDepthTest; Float3 LastViewPos = Float3::Zero; - Matrix LastViewProj = Matrix::Identity; + Matrix LastViewProjection = Matrix::Identity; + BoundingFrustum LastViewFrustum; inline int32 Count() const { @@ -779,9 +780,23 @@ Vector3 DebugDraw::GetViewPos() return Context->LastViewPos; } +BoundingFrustum DebugDraw::GetViewFrustum() +{ + return Context->LastViewFrustum; +} + +void DebugDraw::SetView(const RenderView& view) +{ + Context->LastViewPos = view.Position; + Context->LastViewProjection = view.Projection; + Context->LastViewFrustum = view.Frustum; +} + void DebugDraw::Draw(RenderContext& renderContext, GPUTextureView* target, GPUTextureView* depthBuffer, bool enableDepthTest) { PROFILE_GPU_CPU("Debug Draw"); + const RenderView& view = renderContext.View; + SetView(view); // Ensure to have shader loaded and any lines to render const int32 debugDrawDepthTestCount = Context->DebugDrawDepthTest.Count(); @@ -791,7 +806,6 @@ void DebugDraw::Draw(RenderContext& renderContext, GPUTextureView* target, GPUTe if (renderContext.Buffers == nullptr || !DebugDrawVB) return; auto context = GPUDevice::Instance->GetMainContext(); - const RenderView& view = renderContext.View; if (Context->Origin != view.Origin) { // Teleport existing debug shapes to maintain their location @@ -800,8 +814,6 @@ void DebugDraw::Draw(RenderContext& renderContext, GPUTextureView* target, GPUTe Context->DebugDrawDepthTest.Teleport(delta); Context->Origin = view.Origin; } - Context->LastViewPos = view.Position; - Context->LastViewProj = view.Projection; TaaJitterRemoveContext taaJitterRemove(view); // Fallback to task buffers @@ -1383,7 +1395,7 @@ void DebugDraw::DrawWireSphere(const BoundingSphere& sphere, const Color& color, int32 index; const Float3 centerF = sphere.Center - Context->Origin; const float radiusF = (float)sphere.Radius; - const float screenRadiusSquared = RenderTools::ComputeBoundsScreenRadiusSquared(centerF, radiusF, Context->LastViewPos, Context->LastViewProj); + const float screenRadiusSquared = RenderTools::ComputeBoundsScreenRadiusSquared(centerF, radiusF, Context->LastViewPos, Context->LastViewProjection); if (screenRadiusSquared > DEBUG_DRAW_SPHERE_LOD0_SCREEN_SIZE * DEBUG_DRAW_SPHERE_LOD0_SCREEN_SIZE * 0.25f) index = 0; else if (screenRadiusSquared > DEBUG_DRAW_SPHERE_LOD1_SCREEN_SIZE * DEBUG_DRAW_SPHERE_LOD1_SCREEN_SIZE * 0.25f) diff --git a/Source/Engine/Debug/DebugDraw.h b/Source/Engine/Debug/DebugDraw.h index 3b51c0e13..3f4ff8aff 100644 --- a/Source/Engine/Debug/DebugDraw.h +++ b/Source/Engine/Debug/DebugDraw.h @@ -76,6 +76,11 @@ API_CLASS(Static) class FLAXENGINE_API DebugDraw // Gets the last view position when rendering the current context. Can be used for custom culling or LODing when drawing more complex shapes. static Vector3 GetViewPos(); + // Gets the last view frustum when rendering the current context. Can be used for custom culling or LODing when drawing more complex shapes. + static BoundingFrustum GetViewFrustum(); + + // Sets the rendering view information beforehand. + API_FUNCTION() static void SetView(API_PARAM(ref) const RenderView& view); /// /// Draws the collected debug shapes to the output. diff --git a/Source/Engine/Level/Actors/Spline.cpp b/Source/Engine/Level/Actors/Spline.cpp index eb3df0771..8b690100c 100644 --- a/Source/Engine/Level/Actors/Spline.cpp +++ b/Source/Engine/Level/Actors/Spline.cpp @@ -3,6 +3,7 @@ #include "Spline.h" #include "Engine/Serialization/Serialization.h" #include "Engine/Animations/CurveSerialization.h" +#include "Engine/Core/Math/BoundingFrustum.h" #include "Engine/Core/Math/Matrix.h" #include "Engine/Scripting/ManagedCLR/MCore.h" @@ -520,7 +521,8 @@ namespace void Spline::OnDebugDraw() { - DrawSpline(this, GetSplineColor().AlphaMultiplied(0.7f), _transform, true); + if (DebugDraw::GetViewFrustum().Intersects(_sphere)) + DrawSpline(this, GetSplineColor().AlphaMultiplied(0.7f), _transform, true); // Base Actor::OnDebugDraw(); From a22b33d3bbc722cb4dca38bbc9882156077da45a Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 14 Jul 2025 20:26:41 +0200 Subject: [PATCH 086/211] Fix missing sphere bounds update for splines --- Source/Engine/Level/Actors/Spline.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Source/Engine/Level/Actors/Spline.cpp b/Source/Engine/Level/Actors/Spline.cpp index 8b690100c..7be63fd99 100644 --- a/Source/Engine/Level/Actors/Spline.cpp +++ b/Source/Engine/Level/Actors/Spline.cpp @@ -465,6 +465,7 @@ void Spline::UpdateSpline() Matrix world; GetLocalToWorldMatrix(world); BoundingBox::Transform(_localBounds, world, _box); + BoundingSphere::FromBox(_box, _sphere); SplineUpdated(); } @@ -564,6 +565,7 @@ void Spline::Initialize() Matrix world; GetLocalToWorldMatrix(world); BoundingBox::Transform(_localBounds, world, _box); + BoundingSphere::FromBox(_box, _sphere); } void Spline::Serialize(SerializeStream& stream, const void* otherObj) From 20f1e67700dd884be28ecdd25ff4f750ebd81663 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 14 Jul 2025 21:09:28 +0200 Subject: [PATCH 087/211] Optimize Spline debug rendering far from camera --- Source/Engine/Level/Actors/Spline.cpp | 36 ++++++++++++++++++++------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/Source/Engine/Level/Actors/Spline.cpp b/Source/Engine/Level/Actors/Spline.cpp index 7be63fd99..25bdc4ce9 100644 --- a/Source/Engine/Level/Actors/Spline.cpp +++ b/Source/Engine/Level/Actors/Spline.cpp @@ -6,6 +6,7 @@ #include "Engine/Core/Math/BoundingFrustum.h" #include "Engine/Core/Math/Matrix.h" #include "Engine/Scripting/ManagedCLR/MCore.h" +#include "Engine/Engine/Units.h" Spline::Spline(const SpawnParams& params) : Actor(params) @@ -506,16 +507,33 @@ namespace return; Spline::Keyframe* prev = spline->Curve.GetKeyframes().Get(); Vector3 prevPos = transform.LocalToWorld(prev->Value.Translation); - DEBUG_DRAW_WIRE_SPHERE(BoundingSphere(prevPos, NodeSizeByDistance(prevPos, scaleByDistance)), color, 0.0f, depthTest); - for (int32 i = 1; i < count; i++) + float distance = Vector3::Distance(prevPos, DebugDraw::GetViewPos()); + if (distance < METERS_TO_UNITS(800)) // 800m { - Spline::Keyframe* next = prev + 1; - Vector3 nextPos = transform.LocalToWorld(next->Value.Translation); - DEBUG_DRAW_WIRE_SPHERE(BoundingSphere(nextPos, NodeSizeByDistance(nextPos, scaleByDistance)), color, 0.0f, depthTest); - const float d = (next->Time - prev->Time) / 3.0f; - DEBUG_DRAW_BEZIER(prevPos, prevPos + prev->TangentOut.Translation * d, nextPos + next->TangentIn.Translation * d, nextPos, color, 0.0f, depthTest); - prev = next; - prevPos = nextPos; + // Bezier curve + DEBUG_DRAW_WIRE_SPHERE(BoundingSphere(prevPos, NodeSizeByDistance(prevPos, scaleByDistance)), color, 0.0f, depthTest); + for (int32 i = 1; i < count; i++) + { + Spline::Keyframe* next = prev + 1; + Vector3 nextPos = transform.LocalToWorld(next->Value.Translation); + DEBUG_DRAW_WIRE_SPHERE(BoundingSphere(nextPos, NodeSizeByDistance(nextPos, scaleByDistance)), color, 0.0f, depthTest); + const float d = (next->Time - prev->Time) / 3.0f; + DEBUG_DRAW_BEZIER(prevPos, prevPos + prev->TangentOut.Translation * d, nextPos + next->TangentIn.Translation * d, nextPos, color, 0.0f, depthTest); + prev = next; + prevPos = nextPos; + } + } + else + { + // Simplified + for (int32 i = 1; i < count; i++) + { + Spline::Keyframe* next = prev + 1; + Vector3 nextPos = transform.LocalToWorld(next->Value.Translation); + DEBUG_DRAW_LINE(prevPos, nextPos, color, 0.0f, depthTest); + prev = next; + prevPos = nextPos; + } } } } From ab8612a914398aa9e4017b3bbe88a455471ae10e Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 14 Jul 2025 22:24:27 +0200 Subject: [PATCH 088/211] Add profiler events to editor gizmo --- Source/Editor/Gizmo/TransformGizmo.cs | 3 +++ Source/Editor/SceneGraph/RootNode.cs | 10 ++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/Source/Editor/Gizmo/TransformGizmo.cs b/Source/Editor/Gizmo/TransformGizmo.cs index 8294a4302..91e37ca25 100644 --- a/Source/Editor/Gizmo/TransformGizmo.cs +++ b/Source/Editor/Gizmo/TransformGizmo.cs @@ -155,6 +155,7 @@ namespace FlaxEditor.Gizmo // Ensure player is not moving objects if (ActiveAxis != Axis.None) return; + Profiler.BeginEvent("Pick"); // Get mouse ray and try to hit any object var ray = Owner.MouseRay; @@ -243,6 +244,8 @@ namespace FlaxEditor.Gizmo { sceneEditing.Deselect(); } + + Profiler.EndEvent(); } /// diff --git a/Source/Editor/SceneGraph/RootNode.cs b/Source/Editor/SceneGraph/RootNode.cs index 1a3e47be8..30e469657 100644 --- a/Source/Editor/SceneGraph/RootNode.cs +++ b/Source/Editor/SceneGraph/RootNode.cs @@ -97,13 +97,16 @@ namespace FlaxEditor.SceneGraph /// Hit object or null if there is no intersection at all. public SceneGraphNode RayCast(ref Ray ray, ref Ray view, out Real distance, RayCastData.FlagTypes flags = RayCastData.FlagTypes.None) { + Profiler.BeginEvent("RayCastScene"); var data = new RayCastData { Ray = ray, View = view, Flags = flags }; - return RayCast(ref data, out distance, out _); + var result = RayCast(ref data, out distance, out _); + Profiler.EndEvent(); + return result; } /// @@ -117,13 +120,16 @@ namespace FlaxEditor.SceneGraph /// Hit object or null if there is no intersection at all. public SceneGraphNode RayCast(ref Ray ray, ref Ray view, out Real distance, out Vector3 normal, RayCastData.FlagTypes flags = RayCastData.FlagTypes.None) { + Profiler.BeginEvent("RayCastScene"); var data = new RayCastData { Ray = ray, View = view, Flags = flags }; - return RayCast(ref data, out distance, out normal); + var result = RayCast(ref data, out distance, out normal); + Profiler.EndEvent(); + return result; } internal static Quaternion RaycastNormalRotation(ref Vector3 normal) From c0cce748cc094f8d7a32b744f8d65ab351e2bb86 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 15 Jul 2025 00:12:31 +0200 Subject: [PATCH 089/211] Optimize `Array::RemoveAtKeepOrder` --- Source/Engine/Core/Collections/Array.h | 8 +--- Source/Engine/Core/Memory/Memory.h | 63 ++++++++------------------ 2 files changed, 19 insertions(+), 52 deletions(-) diff --git a/Source/Engine/Core/Collections/Array.h b/Source/Engine/Core/Collections/Array.h index 7cb4a3248..4f660d2a9 100644 --- a/Source/Engine/Core/Collections/Array.h +++ b/Source/Engine/Core/Collections/Array.h @@ -658,13 +658,7 @@ public: --_count; T* data = _allocation.Get(); if (index < _count) - { - T* dst = data + index; - T* src = data + (index + 1); - const int32 count = _count - index; - for (int32 i = 0; i < count; ++i) - dst[i] = MoveTemp(src[i]); - } + Memory::MoveAssignItems(data + index, data + (index + 1), _count - index); Memory::DestructItems(data + _count, 1); } diff --git a/Source/Engine/Core/Memory/Memory.h b/Source/Engine/Core/Memory/Memory.h index 329ee372d..772c9fb48 100644 --- a/Source/Engine/Core/Memory/Memory.h +++ b/Source/Engine/Core/Memory/Memory.h @@ -104,12 +104,6 @@ public: { new(dst) T(); } - - /// - /// Constructs the item in the memory. - /// - /// The optimized version is noop. - /// The address of the memory location to construct. template FORCE_INLINE static typename TEnableIf::Value>::Type ConstructItem(T* dst) { @@ -132,13 +126,6 @@ public: ++(T*&)dst; } } - - /// - /// Constructs the range of items in the memory. - /// - /// The optimized version is noop. - /// The address of the first memory location to construct. - /// The number of element to construct. Can be equal 0. template FORCE_INLINE static typename TEnableIf::Value>::Type ConstructItems(T* dst, int32 count) { @@ -163,14 +150,6 @@ public: ++src; } } - - /// - /// Constructs the range of items in the memory from the set of arguments. - /// - /// The optimized version uses low-level memory copy. - /// The address of the first memory location to construct. - /// The address of the first memory location to pass to the constructor. - /// The number of element to construct. Can be equal 0. template FORCE_INLINE static typename TEnableIf::Value>::Type ConstructItems(T* dst, const U* src, int32 count) { @@ -187,12 +166,6 @@ public: { dst->~T(); } - - /// - /// Destructs the item in the memory. - /// - /// The optimized version is noop. - /// The address of the memory location to destruct. template FORCE_INLINE static typename TEnableIf::Value>::Type DestructItem(T* dst) { @@ -213,13 +186,6 @@ public: ++dst; } } - - /// - /// Destructs the range of items in the memory. - /// - /// The optimized version is noop. - /// The address of the first memory location to destruct. - /// The number of element to destruct. Can be equal 0. template FORCE_INLINE static typename TEnableIf::Value>::Type DestructItems(T* dst, int32 count) { @@ -242,15 +208,7 @@ public: ++src; } } - - /// - /// Copies the range of items using the assignment operator. - /// - /// The optimized version is low-level memory copy. - /// The address of the first memory location to start assigning to. - /// The address of the first memory location to assign from. - /// The number of element to assign. Can be equal 0. - template + template FORCE_INLINE static typename TEnableIf::Value>::Type CopyItems(T* dst, const T* src, int32 count) { Platform::MemoryCopy(dst, src, count * sizeof(T)); @@ -273,16 +231,31 @@ public: ++src; } } + template + FORCE_INLINE static typename TEnableIf::Value>::Type MoveItems(T* dst, U* src, int32 count) + { + Platform::MemoryCopy(dst, src, count * sizeof(U)); + } /// - /// Moves the range of items in the memory from the set of arguments. + /// Moves the range of items using the assignment operator. /// /// The optimized version uses low-level memory copy. /// The address of the first memory location to move. /// The address of the first memory location to pass to the move constructor. /// The number of element to move. Can be equal 0. template - FORCE_INLINE static typename TEnableIf::Value>::Type MoveItems(T* dst, U* src, int32 count) + FORCE_INLINE static typename TEnableIf::Value>::Type MoveAssignItems(T* dst, U* src, int32 count) + { + while (count--) + { + *dst = MoveTemp(*src); + ++(T*&)dst; + ++src; + } + } + template + FORCE_INLINE static typename TEnableIf::Value>::Type MoveAssignItems(T* dst, U* src, int32 count) { Platform::MemoryCopy(dst, src, count * sizeof(U)); } From 9646dd3fc2cf95845a8c6e75a277de9df0ff84f2 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 15 Jul 2025 12:33:33 +0200 Subject: [PATCH 090/211] Fix `AutoFocus` on `ContainerControl` to be `false` by default --- Source/Editor/GUI/ContextMenu/ContextMenuBase.cs | 3 ++- Source/Editor/GUI/Dialogs/ColorSelector.cs | 2 ++ Source/Editor/GUI/Docking/DockPanelProxy.cs | 2 -- Source/Editor/GUI/Input/SliderControl.cs | 2 ++ Source/Editor/GUI/Tree/TreeNode.cs | 2 ++ Source/Editor/Surface/Archetypes/Animation.MultiBlend.cs | 2 ++ Source/Editor/Surface/Elements/BoxValue.cs | 1 + Source/Editor/Surface/SurfaceControl.cs | 1 + Source/Engine/UI/GUI/Common/Dropdown.cs | 2 ++ Source/Engine/UI/GUI/Common/Slider.cs | 2 ++ Source/Engine/UI/GUI/Common/TextBoxBase.cs | 2 ++ Source/Engine/UI/GUI/ContainerControl.cs | 4 ++++ Source/Engine/UI/GUI/Special/RadialMenu.cs | 2 ++ 13 files changed, 24 insertions(+), 3 deletions(-) diff --git a/Source/Editor/GUI/ContextMenu/ContextMenuBase.cs b/Source/Editor/GUI/ContextMenu/ContextMenuBase.cs index d7534b15b..0261792e7 100644 --- a/Source/Editor/GUI/ContextMenu/ContextMenuBase.cs +++ b/Source/Editor/GUI/ContextMenu/ContextMenuBase.cs @@ -114,9 +114,10 @@ namespace FlaxEditor.GUI.ContextMenu public ContextMenuBase() : base(0, 0, 120, 32) { - _direction = ContextMenuDirection.RightDown; Visible = false; + AutoFocus = true; + _direction = ContextMenuDirection.RightDown; _isSubMenu = true; } diff --git a/Source/Editor/GUI/Dialogs/ColorSelector.cs b/Source/Editor/GUI/Dialogs/ColorSelector.cs index 126959d85..f556e8cd6 100644 --- a/Source/Editor/GUI/Dialogs/ColorSelector.cs +++ b/Source/Editor/GUI/Dialogs/ColorSelector.cs @@ -76,6 +76,8 @@ namespace FlaxEditor.GUI.Dialogs public ColorSelector(float wheelSize) : base(0, 0, wheelSize, wheelSize) { + AutoFocus = true; + _colorWheelSprite = Editor.Instance.Icons.ColorWheel128; _wheelRect = new Rectangle(0, 0, wheelSize, wheelSize); } diff --git a/Source/Editor/GUI/Docking/DockPanelProxy.cs b/Source/Editor/GUI/Docking/DockPanelProxy.cs index dbb4e082e..5cf64266a 100644 --- a/Source/Editor/GUI/Docking/DockPanelProxy.cs +++ b/Source/Editor/GUI/Docking/DockPanelProxy.cs @@ -65,8 +65,6 @@ namespace FlaxEditor.GUI.Docking internal DockPanelProxy(DockPanel panel) : base(0, 0, 64, 64) { - AutoFocus = false; - _panel = panel; AnchorPreset = AnchorPresets.StretchAll; Offsets = Margin.Zero; diff --git a/Source/Editor/GUI/Input/SliderControl.cs b/Source/Editor/GUI/Input/SliderControl.cs index 8e3efe956..4703da73d 100644 --- a/Source/Editor/GUI/Input/SliderControl.cs +++ b/Source/Editor/GUI/Input/SliderControl.cs @@ -368,6 +368,8 @@ namespace FlaxEditor.GUI.Input public SliderControl(float value, float x = 0, float y = 0, float width = 120, float min = float.MinValue, float max = float.MaxValue) : base(x, y, width, TextBox.DefaultHeight) { + AutoFocus = true; + _min = min; _max = max; _value = Mathf.Clamp(value, min, max); diff --git a/Source/Editor/GUI/Tree/TreeNode.cs b/Source/Editor/GUI/Tree/TreeNode.cs index 40c276bf4..ed1257819 100644 --- a/Source/Editor/GUI/Tree/TreeNode.cs +++ b/Source/Editor/GUI/Tree/TreeNode.cs @@ -319,6 +319,8 @@ namespace FlaxEditor.GUI.Tree public TreeNode(bool canChangeOrder, SpriteHandle iconCollapsed, SpriteHandle iconOpened) : base(0, 0, 64, 16) { + AutoFocus = true; + _canChangeOrder = canChangeOrder; _animationProgress = 1.0f; _cachedHeight = _headerHeight; diff --git a/Source/Editor/Surface/Archetypes/Animation.MultiBlend.cs b/Source/Editor/Surface/Archetypes/Animation.MultiBlend.cs index bd92bd266..450960af7 100644 --- a/Source/Editor/Surface/Archetypes/Animation.MultiBlend.cs +++ b/Source/Editor/Surface/Archetypes/Animation.MultiBlend.cs @@ -233,6 +233,8 @@ namespace FlaxEditor.Surface.Archetypes public BlendPointsEditor(Animation.MultiBlend node, bool is2D, float x, float y, float width, float height) : base(x, y, width, height) { + AutoFocus = true; + _node = node; _is2D = is2D; } diff --git a/Source/Editor/Surface/Elements/BoxValue.cs b/Source/Editor/Surface/Elements/BoxValue.cs index 597c11b9e..05d39580c 100644 --- a/Source/Editor/Surface/Elements/BoxValue.cs +++ b/Source/Editor/Surface/Elements/BoxValue.cs @@ -64,6 +64,7 @@ namespace FlaxEditor.Surface.Elements { ParentNode = parentNode; Archetype = archetype; + AutoFocus = true; var back = Style.Current.TextBoxBackground; var grayOutFactor = 0.6f; diff --git a/Source/Editor/Surface/SurfaceControl.cs b/Source/Editor/Surface/SurfaceControl.cs index 46cb94577..ce5b17d86 100644 --- a/Source/Editor/Surface/SurfaceControl.cs +++ b/Source/Editor/Surface/SurfaceControl.cs @@ -59,6 +59,7 @@ namespace FlaxEditor.Surface protected SurfaceControl(VisjectSurfaceContext context, float width, float height) : base(0, 0, width, height) { + AutoFocus = true; ClipChildren = false; Surface = context.Surface; diff --git a/Source/Engine/UI/GUI/Common/Dropdown.cs b/Source/Engine/UI/GUI/Common/Dropdown.cs index c0a81eacf..868db4b89 100644 --- a/Source/Engine/UI/GUI/Common/Dropdown.cs +++ b/Source/Engine/UI/GUI/Common/Dropdown.cs @@ -381,6 +381,8 @@ namespace FlaxEngine.GUI public Dropdown() : base(0, 0, 120, 18.0f) { + AutoFocus = true; + var style = Style.Current; Font = new FontReference(style.FontMedium); TextColor = style.Foreground; diff --git a/Source/Engine/UI/GUI/Common/Slider.cs b/Source/Engine/UI/GUI/Common/Slider.cs index c1e7cd9f6..e02b67ab2 100644 --- a/Source/Engine/UI/GUI/Common/Slider.cs +++ b/Source/Engine/UI/GUI/Common/Slider.cs @@ -298,6 +298,8 @@ public class Slider : ContainerControl public Slider(float width, float height) : base(0, 0, width, height) { + AutoFocus = true; + var style = Style.Current; TrackLineColor = style.BackgroundHighlighted; TrackFillLineColor = style.LightBackground; diff --git a/Source/Engine/UI/GUI/Common/TextBoxBase.cs b/Source/Engine/UI/GUI/Common/TextBoxBase.cs index 237a1bb5f..243e4786e 100644 --- a/Source/Engine/UI/GUI/Common/TextBoxBase.cs +++ b/Source/Engine/UI/GUI/Common/TextBoxBase.cs @@ -511,6 +511,8 @@ namespace FlaxEngine.GUI protected TextBoxBase(bool isMultiline, float x, float y, float width = 120) : base(x, y, width, DefaultHeight) { + AutoFocus = true; + _isMultiline = isMultiline; _maxLength = 2147483646; _selectionStart = _selectionEnd = -1; diff --git a/Source/Engine/UI/GUI/ContainerControl.cs b/Source/Engine/UI/GUI/ContainerControl.cs index 017b8ee5c..ada93ff1e 100644 --- a/Source/Engine/UI/GUI/ContainerControl.cs +++ b/Source/Engine/UI/GUI/ContainerControl.cs @@ -38,6 +38,7 @@ namespace FlaxEngine.GUI public ContainerControl() { _isLayoutLocked = true; + AutoFocus = false; } /// @@ -47,6 +48,7 @@ namespace FlaxEngine.GUI : base(x, y, width, height) { _isLayoutLocked = true; + AutoFocus = false; } /// @@ -56,6 +58,7 @@ namespace FlaxEngine.GUI : base(location, size) { _isLayoutLocked = true; + AutoFocus = false; } /// @@ -63,6 +66,7 @@ namespace FlaxEngine.GUI : base(bounds) { _isLayoutLocked = true; + AutoFocus = false; } /// diff --git a/Source/Engine/UI/GUI/Special/RadialMenu.cs b/Source/Engine/UI/GUI/Special/RadialMenu.cs index 3fbaa2fe2..060c4ef90 100644 --- a/Source/Engine/UI/GUI/Special/RadialMenu.cs +++ b/Source/Engine/UI/GUI/Special/RadialMenu.cs @@ -165,6 +165,8 @@ namespace FlaxEngine.GUI public RadialMenu(float x, float y, float width = 100, float height = 100) : base(x, y, width, height) { + AutoFocus = true; + var style = Style.Current; if (style != null) { From c882b547c8709e431ad4f40f731fb6b5fb14b557 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 15 Jul 2025 12:34:02 +0200 Subject: [PATCH 091/211] Fix game UI focus loss when hiding focused control to maintain gamepad navigation in Editor --- Source/Editor/Windows/GameWindow.cs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Source/Editor/Windows/GameWindow.cs b/Source/Editor/Windows/GameWindow.cs index acb2deda3..5b243cfed 100644 --- a/Source/Editor/Windows/GameWindow.cs +++ b/Source/Editor/Windows/GameWindow.cs @@ -1176,6 +1176,12 @@ namespace FlaxEditor.Windows if (!_cursorVisible) Screen.CursorVisible = true; } + + if (Editor.IsPlayMode && IsDocked && IsSelected && RootWindow.FocusedControl == null) + { + // Game UI cleared focus so regain it to maintain UI navigation just like game window does + FlaxEngine.Scripting.InvokeOnUpdate(Focus); + } } /// From 3e0c085bf399a416db4738eb1b3074d18a1527ce Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 15 Jul 2025 12:34:40 +0200 Subject: [PATCH 092/211] Add error log when adding/removign actors during rendering or when `ConcurrentSystemLocker` deadlocks --- Source/Engine/Level/Scene/SceneRendering.cpp | 18 ++++++++++++- .../Threading/ConcurrentSystemLocker.cpp | 26 +++++++++++++++++++ .../Engine/Threading/ConcurrentSystemLocker.h | 1 + 3 files changed, 44 insertions(+), 1 deletion(-) diff --git a/Source/Engine/Level/Scene/SceneRendering.cpp b/Source/Engine/Level/Scene/SceneRendering.cpp index e55dbd43f..d7225036c 100644 --- a/Source/Engine/Level/Scene/SceneRendering.cpp +++ b/Source/Engine/Level/Scene/SceneRendering.cpp @@ -7,9 +7,23 @@ #include "Engine/Graphics/RenderView.h" #include "Engine/Renderer/RenderList.h" #include "Engine/Threading/JobSystem.h" -#include "Engine/Threading/Threading.h" #include "Engine/Profiler/ProfilerCPU.h" #include "Engine/Profiler/ProfilerMemory.h" +#if !BUILD_RELEASE +#include "Engine/Graphics/GPUDevice.h" +#include "Engine/Core/Log.h" +#endif + +#if BUILD_RELEASE +#define CHECK_SCENE_EDIT_ACCESS() +#else +#define CHECK_SCENE_EDIT_ACCESS() \ + if (Locker.HasLock(false) && IsInMainThread() && GPUDevice::Instance && GPUDevice::Instance->IsRendering()) \ + { \ + LOG(Error, "Adding/removing actors during rendering is not supported ({}, '{}').", a->ToString(), a->GetNamePath()); \ + return; \ + } +#endif ISceneRenderingListener::~ISceneRenderingListener() { @@ -148,6 +162,7 @@ void SceneRendering::AddActor(Actor* a, int32& key) if (key != -1) return; PROFILE_MEM(Graphics); + CHECK_SCENE_EDIT_ACCESS(); const int32 category = a->_drawCategory; ConcurrentSystemLocker::WriteScope lock(Locker, true); auto& list = Actors[category]; @@ -192,6 +207,7 @@ void SceneRendering::UpdateActor(Actor* a, int32& key, ISceneRenderingListener:: void SceneRendering::RemoveActor(Actor* a, int32& key) { + CHECK_SCENE_EDIT_ACCESS(); const int32 category = a->_drawCategory; ConcurrentSystemLocker::WriteScope lock(Locker, true); auto& list = Actors[category]; diff --git a/Source/Engine/Threading/ConcurrentSystemLocker.cpp b/Source/Engine/Threading/ConcurrentSystemLocker.cpp index d936f8307..cafc3dadb 100644 --- a/Source/Engine/Threading/ConcurrentSystemLocker.cpp +++ b/Source/Engine/Threading/ConcurrentSystemLocker.cpp @@ -2,6 +2,9 @@ #include "ConcurrentSystemLocker.h" #include "Engine/Platform/Platform.h" +#if !BUILD_RELEASE +#include "Engine/Core/Log.h" +#endif ConcurrentSystemLocker::ConcurrentSystemLocker() { @@ -12,7 +15,25 @@ void ConcurrentSystemLocker::Begin(bool write, bool exclusively) { volatile int64* thisCounter = &_counters[write]; volatile int64* otherCounter = &_counters[!write]; + +#if !BUILD_RELEASE + int32 retries = 0; + double startTime = Platform::GetTimeSeconds(); +#endif RETRY: +#if !BUILD_RELEASE + retries++; + if (retries > 1000) + { + double endTime = Platform::GetTimeSeconds(); + if (endTime - startTime > 0.5f) + { + LOG(Error, "Deadlock detected in ConcurrentSystemLocker! Thread 0x{0:x} waits for {1} ms...", Platform::GetCurrentThreadID(), (int32)((endTime - startTime) * 1000.0)); + retries = 0; + } + } +#endif + // Check if we can enter (cannot read while someone else is writing and vice versa) if (Platform::AtomicRead(otherCounter) != 0) { @@ -47,3 +68,8 @@ void ConcurrentSystemLocker::End(bool write) // Mark that we left this section Platform::InterlockedDecrement(&_counters[write]); } + +bool ConcurrentSystemLocker::HasLock(bool write) const +{ + return Platform::AtomicRead(&_counters[write]) != 0; +} diff --git a/Source/Engine/Threading/ConcurrentSystemLocker.h b/Source/Engine/Threading/ConcurrentSystemLocker.h index 031b7e685..0b46a64f5 100644 --- a/Source/Engine/Threading/ConcurrentSystemLocker.h +++ b/Source/Engine/Threading/ConcurrentSystemLocker.h @@ -19,6 +19,7 @@ public: void Begin(bool write, bool exclusively = false); void End(bool write); + bool HasLock(bool write) const; public: template From 2d2c5411cdbf0adf23c347478e414d194c397985 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 15 Jul 2025 12:49:05 +0200 Subject: [PATCH 093/211] Add variable DDGI probe size in debug view based on cascade --- Content/Editor/DebugMaterials/DDGIDebugProbes.flax | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Content/Editor/DebugMaterials/DDGIDebugProbes.flax b/Content/Editor/DebugMaterials/DDGIDebugProbes.flax index ee6bd8d7b..4289244c8 100644 --- a/Content/Editor/DebugMaterials/DDGIDebugProbes.flax +++ b/Content/Editor/DebugMaterials/DDGIDebugProbes.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fb32df5fa9255c8d27f968819ab7f59236640661c83ae33588733542ea635b0f -size 40232 +oid sha256:740621fb235edae990ffa259a833b12001eb5027bc6036af0aa34ebca4bcec64 +size 40805 From 7fd278a68917130551ddd02e7d260f810932e4d5 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 15 Jul 2025 13:34:43 +0200 Subject: [PATCH 094/211] Fix .NET version to use selection for consoles with fixed setup --- Source/Editor/Cooker/CookingData.h | 9 ++------- Source/Editor/Cooker/GameCooker.cpp | 8 ++++++++ .../Cooker/Platform/GDK/GDKPlatformTools.cpp | 5 +++++ .../Editor/Cooker/Platform/GDK/GDKPlatformTools.h | 1 + Source/Editor/Cooker/PlatformTools.h | 14 ++++++++++++++ Source/Editor/Cooker/Steps/CompileScriptsStep.cpp | 2 +- Source/Editor/Cooker/Steps/DeployDataStep.cpp | 13 ++++++++----- .../Cooker/Steps/PrecompileAssembliesStep.cpp | 2 +- 8 files changed, 40 insertions(+), 14 deletions(-) diff --git a/Source/Editor/Cooker/CookingData.h b/Source/Editor/Cooker/CookingData.h index 13db5bb3f..1ecfaf353 100644 --- a/Source/Editor/Cooker/CookingData.h +++ b/Source/Editor/Cooker/CookingData.h @@ -20,13 +20,6 @@ class PlatformTools; #define GAME_BUILD_DOTNET_RUNTIME_MAX_VER 9 #endif -#if OFFICIAL_BUILD -// Use the fixed .NET SDK version in packaged builds for compatibility (FlaxGame is precompiled with it) -#define GAME_BUILD_DOTNET_VER TEXT("-dotnet=" MACRO_TO_STR(GAME_BUILD_DOTNET_RUNTIME_MIN_VER)) -#else -#define GAME_BUILD_DOTNET_VER TEXT("") -#endif - /// /// Game building options. Used as flags. /// @@ -374,6 +367,8 @@ public: /// void GetBuildPlatformName(const Char*& platform, const Char*& architecture) const; + String GetDotnetCommandArg() const; + public: /// diff --git a/Source/Editor/Cooker/GameCooker.cpp b/Source/Editor/Cooker/GameCooker.cpp index db8ded610..c204efac5 100644 --- a/Source/Editor/Cooker/GameCooker.cpp +++ b/Source/Editor/Cooker/GameCooker.cpp @@ -312,6 +312,14 @@ void CookingData::GetBuildPlatformName(const Char*& platform, const Char*& archi } } +String CookingData::GetDotnetCommandArg() const +{ + int32 version = Tools->GetDotnetVersion(); + if (version == 0) + return String::Empty; + return String::Format(TEXT("-dotnet={}"), version); +} + void CookingData::StepProgress(const String& info, const float stepProgress) const { const float singleStepProgress = 1.0f / (StepsCount + 1); diff --git a/Source/Editor/Cooker/Platform/GDK/GDKPlatformTools.cpp b/Source/Editor/Cooker/Platform/GDK/GDKPlatformTools.cpp index eea89a794..f9a8f1b82 100644 --- a/Source/Editor/Cooker/Platform/GDK/GDKPlatformTools.cpp +++ b/Source/Editor/Cooker/Platform/GDK/GDKPlatformTools.cpp @@ -195,4 +195,9 @@ bool GDKPlatformTools::OnPostProcess(CookingData& data, GDKPlatformSettings* pla return false; } +int32 GDKPlatformTools::GetDotnetVersion() const +{ + return GAME_BUILD_DOTNET_RUNTIME_MIN_VER; +} + #endif diff --git a/Source/Editor/Cooker/Platform/GDK/GDKPlatformTools.h b/Source/Editor/Cooker/Platform/GDK/GDKPlatformTools.h index 11763fce4..d2c137664 100644 --- a/Source/Editor/Cooker/Platform/GDK/GDKPlatformTools.h +++ b/Source/Editor/Cooker/Platform/GDK/GDKPlatformTools.h @@ -26,6 +26,7 @@ public: public: // [PlatformTools] + int32 GetDotnetVersion() const override; DotNetAOTModes UseAOT() const override; bool OnDeployBinaries(CookingData& data) override; }; diff --git a/Source/Editor/Cooker/PlatformTools.h b/Source/Editor/Cooker/PlatformTools.h index 9d6a50aa9..dfe7b6ffb 100644 --- a/Source/Editor/Cooker/PlatformTools.h +++ b/Source/Editor/Cooker/PlatformTools.h @@ -70,6 +70,20 @@ public: /// virtual ArchitectureType GetArchitecture() const = 0; + /// + /// Gets the .Net version to use for the cooked game. + /// + virtual int32 GetDotnetVersion() const + { +#if OFFICIAL_BUILD + // Use the fixed .NET SDK version in packaged builds for compatibility (FlaxGame is precompiled with it) + return GAME_BUILD_DOTNET_RUNTIME_MIN_VER; +#else + // Use the highest version found on a system (Flax.Build will decide) + return 0; +#endif + } + /// /// Gets the value indicating whenever platform requires AOT (needs C# assemblies to be precompiled). /// diff --git a/Source/Editor/Cooker/Steps/CompileScriptsStep.cpp b/Source/Editor/Cooker/Steps/CompileScriptsStep.cpp index eaf5b2863..c0a8e452a 100644 --- a/Source/Editor/Cooker/Steps/CompileScriptsStep.cpp +++ b/Source/Editor/Cooker/Steps/CompileScriptsStep.cpp @@ -189,7 +189,7 @@ bool CompileScriptsStep::Perform(CookingData& data) const String logFile = data.CacheDirectory / TEXT("CompileLog.txt"); auto args = String::Format( TEXT("-log -logfile=\"{4}\" -build -mutex -buildtargets={0} -platform={1} -arch={2} -configuration={3} -aotMode={5} {6}"), - target, platform, architecture, configuration, logFile, ToString(data.Tools->UseAOT()), GAME_BUILD_DOTNET_VER); + target, platform, architecture, configuration, logFile, ToString(data.Tools->UseAOT()), data.GetDotnetCommandArg()); #if PLATFORM_WINDOWS if (data.Platform == BuildPlatform::LinuxX64) #elif PLATFORM_LINUX diff --git a/Source/Editor/Cooker/Steps/DeployDataStep.cpp b/Source/Editor/Cooker/Steps/DeployDataStep.cpp index ac271ab7d..b5b24c251 100644 --- a/Source/Editor/Cooker/Steps/DeployDataStep.cpp +++ b/Source/Editor/Cooker/Steps/DeployDataStep.cpp @@ -88,7 +88,7 @@ bool DeployDataStep::Perform(CookingData& data) { // Ask Flax.Build to provide .NET SDK location for the current platform String sdks; - bool failed = ScriptsBuilder::RunBuildTool(String::Format(TEXT("-log -logMessagesOnly -logFileWithConsole -logfile=SDKs.txt -printSDKs {}"), GAME_BUILD_DOTNET_VER), data.CacheDirectory); + bool failed = ScriptsBuilder::RunBuildTool(String::Format(TEXT("-log -logMessagesOnly -logFileWithConsole -logfile=SDKs.txt -printSDKs {}"), data.GetDotnetCommandArg()), data.CacheDirectory); failed |= File::ReadAllText(data.CacheDirectory / TEXT("SDKs.txt"), sdks); int32 idx = sdks.Find(TEXT("DotNetSdk, "), StringSearchCase::CaseSensitive); if (idx != -1) @@ -200,7 +200,7 @@ bool DeployDataStep::Perform(CookingData& data) String sdks; const Char *platformName, *archName; data.GetBuildPlatformName(platformName, archName); - String args = String::Format(TEXT("-log -logMessagesOnly -logFileWithConsole -logfile=SDKs.txt -printDotNetRuntime -platform={} -arch={} {}"), platformName, archName, GAME_BUILD_DOTNET_VER); + String args = String::Format(TEXT("-log -logMessagesOnly -logFileWithConsole -logfile=SDKs.txt -printDotNetRuntime -platform={} -arch={} {}"), platformName, archName, data.GetDotnetCommandArg()); bool failed = ScriptsBuilder::RunBuildTool(args, data.CacheDirectory); failed |= File::ReadAllText(data.CacheDirectory / TEXT("SDKs.txt"), sdks); Array parts; @@ -244,10 +244,13 @@ bool DeployDataStep::Perform(CookingData& data) } if (version.IsEmpty()) { + int32 minVer = GAME_BUILD_DOTNET_RUNTIME_MIN_VER, maxVer = GAME_BUILD_DOTNET_RUNTIME_MAX_VER; if (srcDotnetFromEngine) { // Detect version from runtime files inside Engine Platform folder - for (int32 i = GAME_BUILD_DOTNET_RUNTIME_MAX_VER; i >= GAME_BUILD_DOTNET_RUNTIME_MIN_VER; i--) + if (data.Tools->GetDotnetVersion() != 0) + minVer = maxVer = data.Tools->GetDotnetVersion(); + for (int32 i = maxVer; i >= minVer; i--) { // Check runtime files inside Engine Platform folder String testPath1 = srcDotnet / String::Format(TEXT("lib/net{}.0"), i); @@ -262,7 +265,7 @@ bool DeployDataStep::Perform(CookingData& data) } if (version.IsEmpty()) { - data.Error(String::Format(TEXT("Failed to find supported .NET {} version for the current host platform."), GAME_BUILD_DOTNET_RUNTIME_MIN_VER)); + data.Error(String::Format(TEXT("Failed to find supported .NET {} version (min {}) for the current host platform."), maxVer, minVer)); return true; } } @@ -364,7 +367,7 @@ bool DeployDataStep::Perform(CookingData& data) const String logFile = data.CacheDirectory / TEXT("StripDotnetLibs.txt"); String args = String::Format( TEXT("-log -logfile=\"{}\" -runDotNetClassLibStripping -mutex -binaries=\"{}\" {}"), - logFile, data.DataOutputPath, GAME_BUILD_DOTNET_VER); + logFile, data.DataOutputPath, data.GetDotnetCommandArg()); for (const String& define : data.CustomDefines) { args += TEXT(" -D"); diff --git a/Source/Editor/Cooker/Steps/PrecompileAssembliesStep.cpp b/Source/Editor/Cooker/Steps/PrecompileAssembliesStep.cpp index 1c8e321aa..1a4f67c01 100644 --- a/Source/Editor/Cooker/Steps/PrecompileAssembliesStep.cpp +++ b/Source/Editor/Cooker/Steps/PrecompileAssembliesStep.cpp @@ -69,7 +69,7 @@ bool PrecompileAssembliesStep::Perform(CookingData& data) const String logFile = data.CacheDirectory / TEXT("AOTLog.txt"); String args = String::Format( TEXT("-log -logfile=\"{}\" -runDotNetAOT -mutex -platform={} -arch={} -configuration={} -aotMode={} -binaries=\"{}\" -intermediate=\"{}\" {}"), - logFile, platform, architecture, configuration, ToString(aotMode), data.DataOutputPath, data.ManagedCodeOutputPath, GAME_BUILD_DOTNET_VER); + logFile, platform, architecture, configuration, ToString(aotMode), data.DataOutputPath, data.ManagedCodeOutputPath, data.GetDotnetCommandArg()); if (!buildSettings.SkipUnusedDotnetLibsPackaging) args += TEXT(" -skipUnusedDotnetLibs=false"); // Run AOT on whole class library (not just used libs) for (const String& define : data.CustomDefines) From 2754d61c05b8d526b94aa300eddb66586611b294 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 15 Jul 2025 19:31:07 +0200 Subject: [PATCH 095/211] Fix building Tracy for Switch --- Source/ThirdParty/tracy/client/TracyProfiler.cpp | 6 ++++-- Source/ThirdParty/tracy/tracy.Build.cs | 4 ++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/Source/ThirdParty/tracy/client/TracyProfiler.cpp b/Source/ThirdParty/tracy/client/TracyProfiler.cpp index c96fc5beb..837b36cc3 100644 --- a/Source/ThirdParty/tracy/client/TracyProfiler.cpp +++ b/Source/ThirdParty/tracy/client/TracyProfiler.cpp @@ -1480,7 +1480,7 @@ Profiler::Profiler() m_safeSendBuffer = (char*)tracy_malloc( SafeSendBufferSize ); -#ifndef _WIN32 +#ifndef TRACY_NO_PIPE pipe(m_pipe); # if defined __APPLE__ || defined BSD // FreeBSD/XNU don't have F_SETPIPE_SZ, so use the default @@ -1642,7 +1642,7 @@ Profiler::~Profiler() tracy_free( m_kcore ); #endif -#ifndef _WIN32 +#ifndef TRACY_NO_PIPE close( m_pipe[0] ); close( m_pipe[1] ); #endif @@ -3139,6 +3139,8 @@ char* Profiler::SafeCopyProlog( const char* data, size_t size ) { success = false; } +#elif !defined(TRACY_NO_PIPE) + memcpy(buf, data, size); #else // Send through the pipe to ensure safe reads for( size_t offset = 0; offset != size; /*in loop*/ ) diff --git a/Source/ThirdParty/tracy/tracy.Build.cs b/Source/ThirdParty/tracy/tracy.Build.cs index 9d54ca688..beb0e9f89 100644 --- a/Source/ThirdParty/tracy/tracy.Build.cs +++ b/Source/ThirdParty/tracy/tracy.Build.cs @@ -47,11 +47,15 @@ public class tracy : ThirdPartyModule switch (options.Platform.Target) { case TargetPlatform.Windows: + case TargetPlatform.XboxOne: + case TargetPlatform.XboxScarlett: options.PrivateDefinitions.Add("TRACY_DBGHELP_LOCK=FlaxDbgHelp"); + options.PrivateDefinitions.Add("TRACY_NO_PIPE"); break; case TargetPlatform.Switch: options.PrivateDefinitions.Add("TRACY_USE_MALLOC"); options.PrivateDefinitions.Add("TRACY_ONLY_IPV4"); + options.PrivateDefinitions.Add("TRACY_NO_PIPE"); break; } } From 6763436effbe204655f3dd1fcff6166f64586122 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 15 Jul 2025 20:10:01 +0200 Subject: [PATCH 096/211] Add logging missing instance layer on Vulkan --- .../GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp | 44 ++++++++++++++++--- 1 file changed, 39 insertions(+), 5 deletions(-) diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp index f2d0aad7d..8c77b77d3 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp @@ -1179,23 +1179,57 @@ GPUDevice* GPUDeviceVulkan::Create() Array properties; properties.Resize(propertyCount); vkEnumerateInstanceExtensionProperties(nullptr, &propertyCount, properties.Get()); + String missingExtension; for (const char* extension : InstanceExtensions) { - bool found = false; for (uint32_t propertyIndex = 0; propertyIndex < propertyCount; propertyIndex++) { if (!StringUtils::Compare(properties[propertyIndex].extensionName, extension)) { - found = true; + if (missingExtension.IsEmpty()) + missingExtension = extension; + else + missingExtension += TEXT(", ") + String(extension); break; } } - if (!found) + } + LOG(Warning, "Extensions found:"); + for (const VkExtensionProperties& property : properties) + LOG(Warning, " > {}", String(property.extensionName)); + auto error = String::Format(TEXT("Vulkan driver doesn't contain specified extensions:\n{0}\nPlease make sure your layers path is set appropriately."), missingExtension); + LOG_STR(Error, error); + Platform::Error(*error); + return nullptr; + } + if (result == VK_ERROR_LAYER_NOT_PRESENT) + { + // Layers error + uint32_t propertyCount; + vkEnumerateInstanceLayerProperties(&propertyCount, nullptr); + Array properties; + properties.Resize(propertyCount); + vkEnumerateInstanceLayerProperties(&propertyCount, properties.Get()); + String missingLayers; + for (const char* layer : InstanceLayers) + { + for (uint32_t propertyIndex = 0; propertyIndex < propertyCount; propertyIndex++) { - LOG(Warning, "Missing required Vulkan extension: {0}", String(extension)); + if (!StringUtils::Compare(properties[propertyIndex].layerName, layer)) + { + if (missingLayers.IsEmpty()) + missingLayers = layer; + else + missingLayers += TEXT(", ") + String(layer); + break; + } } } - auto error = String::Format(TEXT("Vulkan driver doesn't contain specified extensions:\n{0}\nPlease make sure your layers path is set appropriately.")); + LOG(Warning, "Layers found:"); + for (const VkLayerProperties& property : properties) + LOG(Warning, " > {}", String(property.layerName)); + auto error = String::Format(TEXT("Vulkan driver doesn't contain specified layers:\n{0}\nPlease make sure your layers path is set appropriately."), missingLayers); + LOG_STR(Error, error); Platform::Error(*error); return nullptr; } From 18035a860497fd769806a72db24a55089da33c66 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 19 Jul 2025 11:14:54 +0200 Subject: [PATCH 097/211] Fix incorrect image buffer usage in Render Output Control --- Source/Engine/UI/GUI/RenderOutputControl.cs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Source/Engine/UI/GUI/RenderOutputControl.cs b/Source/Engine/UI/GUI/RenderOutputControl.cs index b6717ffe6..9025cbc04 100644 --- a/Source/Engine/UI/GUI/RenderOutputControl.cs +++ b/Source/Engine/UI/GUI/RenderOutputControl.cs @@ -192,16 +192,19 @@ namespace FlaxEngine.GUI } // Draw backbuffer texture - var buffer = _backBufferOld ? _backBufferOld : _backBuffer; + var buffer = _backBuffer ? _backBuffer : _backBufferOld; var color = TintColor.RGBMultiplied(Brightness); - if (KeepAspectRatio) + if (KeepAspectRatio && buffer) { float ratioX = bounds.Width / buffer.Width; float ratioY = bounds.Height / buffer.Height; float ratio = ratioX < ratioY ? ratioX : ratioY; bounds = new Rectangle((bounds.Width - buffer.Width * ratio) / 2, (bounds.Height - buffer.Height * ratio) / 2, buffer.Width * ratio, buffer.Height * ratio); } - Render2D.DrawTexture(buffer, bounds, color); + if (buffer) + Render2D.DrawTexture(buffer, bounds, color); + else + Render2D.FillRectangle(bounds, Color.Black); // Push clipping mask if (ClipChildren) From cd08eeaf95fb81ffad2827710c7b52e0e9946fb6 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 19 Jul 2025 16:08:27 +0200 Subject: [PATCH 098/211] Fix running cooked game o macOS --- Source/Editor/Cooker/GameCooker.cpp | 5 ++--- Source/Editor/Cooker/Platform/Mac/MacPlatformTools.cpp | 2 ++ .../Editor/Cooker/Platform/Windows/WindowsPlatformTools.cpp | 3 +++ Source/Editor/Cooker/Steps/PrecompileAssembliesStep.cpp | 2 +- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/Source/Editor/Cooker/GameCooker.cpp b/Source/Editor/Cooker/GameCooker.cpp index c204efac5..da2d74f87 100644 --- a/Source/Editor/Cooker/GameCooker.cpp +++ b/Source/Editor/Cooker/GameCooker.cpp @@ -683,8 +683,7 @@ bool GameCookerImpl::Build() MCore::Thread::Attach(); - // Build Started - if (!EnumHasAnyFlags(data.Options, BuildOptions::NoCook)) + // Build start { CallEvent(GameCooker::EventType::BuildStarted); data.Tools->OnBuildStarted(data); @@ -757,8 +756,8 @@ bool GameCookerImpl::Build() } IsRunning = false; CancelFlag = 0; - if (!EnumHasAnyFlags(data.Options, BuildOptions::NoCook)) { + // Build end for (int32 stepIndex = 0; stepIndex < Steps.Count(); stepIndex++) Steps[stepIndex]->OnBuildEnded(data, failed); data.Tools->OnBuildEnded(data, failed); diff --git a/Source/Editor/Cooker/Platform/Mac/MacPlatformTools.cpp b/Source/Editor/Cooker/Platform/Mac/MacPlatformTools.cpp index 1d447027b..660121584 100644 --- a/Source/Editor/Cooker/Platform/Mac/MacPlatformTools.cpp +++ b/Source/Editor/Cooker/Platform/Mac/MacPlatformTools.cpp @@ -231,6 +231,8 @@ bool MacPlatformTools::OnPostProcess(CookingData& data) LOG(Info, "Building app package..."); { const String dmgPath = data.OriginalOutputPath / appName + TEXT(".dmg"); + if (FileSystem::FileExists(dmgPath)) + FileSystem::DeleteFile(dmgPath); CreateProcessSettings procSettings; procSettings.HiddenWindow = true; procSettings.WorkingDirectory = data.OriginalOutputPath; diff --git a/Source/Editor/Cooker/Platform/Windows/WindowsPlatformTools.cpp b/Source/Editor/Cooker/Platform/Windows/WindowsPlatformTools.cpp index 438c47388..0711e4866 100644 --- a/Source/Editor/Cooker/Platform/Windows/WindowsPlatformTools.cpp +++ b/Source/Editor/Cooker/Platform/Windows/WindowsPlatformTools.cpp @@ -528,6 +528,9 @@ bool WindowsPlatformTools::OnDeployBinaries(CookingData& data) void WindowsPlatformTools::OnBuildStarted(CookingData& data) { + if (EnumHasAllFlags(data.Options, BuildOptions::NoCook)) + return; + // Remove old executable Array files; FileSystem::DirectoryGetFiles(files, data.NativeCodeOutputPath, TEXT("*.exe"), DirectorySearchOption::TopDirectoryOnly); diff --git a/Source/Editor/Cooker/Steps/PrecompileAssembliesStep.cpp b/Source/Editor/Cooker/Steps/PrecompileAssembliesStep.cpp index 1a4f67c01..472a3cca6 100644 --- a/Source/Editor/Cooker/Steps/PrecompileAssembliesStep.cpp +++ b/Source/Editor/Cooker/Steps/PrecompileAssembliesStep.cpp @@ -12,7 +12,7 @@ void PrecompileAssembliesStep::OnBuildStarted(CookingData& data) { const DotNetAOTModes aotMode = data.Tools->UseAOT(); - if (aotMode == DotNetAOTModes::None) + if (aotMode == DotNetAOTModes::None || EnumHasAllFlags(data.Options, BuildOptions::NoCook)) return; const auto& buildSettings = *BuildSettings::Get(); From 90551b32bc22a248675e4363097d3ae5f8ef0d1d Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 19 Jul 2025 16:40:34 +0200 Subject: [PATCH 099/211] Add version to game settings --- Source/Engine/Core/Config/GameSettings.cpp | 1 + Source/Engine/Core/Config/GameSettings.h | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/Source/Engine/Core/Config/GameSettings.cpp b/Source/Engine/Core/Config/GameSettings.cpp index 35770c578..eb6705c9c 100644 --- a/Source/Engine/Core/Config/GameSettings.cpp +++ b/Source/Engine/Core/Config/GameSettings.cpp @@ -221,6 +221,7 @@ void GameSettings::Deserialize(DeserializeStream& stream, ISerializeModifier* mo ProductName = JsonTools::GetString(stream, "ProductName"); CompanyName = JsonTools::GetString(stream, "CompanyName"); CopyrightNotice = JsonTools::GetString(stream, "CopyrightNotice"); + Version = JsonTools::GetString(stream, "Version"); Icon = JsonTools::GetGuid(stream, "Icon"); FirstScene = JsonTools::GetGuid(stream, "FirstScene"); NoSplashScreen = JsonTools::GetBool(stream, "NoSplashScreen", NoSplashScreen); diff --git a/Source/Engine/Core/Config/GameSettings.h b/Source/Engine/Core/Config/GameSettings.h index 54e38555b..db1f8ac53 100644 --- a/Source/Engine/Core/Config/GameSettings.h +++ b/Source/Engine/Core/Config/GameSettings.h @@ -33,6 +33,12 @@ public: API_FIELD(Attributes="EditorOrder(15), EditorDisplay(\"General\")") String CopyrightNotice; + /// + /// The game version number. Usually in format: MAJOR.MINOR.BUILD.REVISION + /// + API_FIELD(Attributes="EditorOrder(20), EditorDisplay(\"General\")") + String Version; + /// /// The default application icon. /// From 892e2e0d1e40a2e5119d6a1a750451306ae04ba5 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 19 Jul 2025 16:46:10 +0200 Subject: [PATCH 100/211] Fix fullscreen borderless window on macOS --- Source/Engine/Engine/Linux/LinuxGame.cpp | 3 --- Source/Engine/Engine/Mac/MacGame.cpp | 3 --- Source/Engine/Engine/Windows/WindowsGame.cpp | 2 -- Source/Engine/Platform/Mac/MacWindow.cpp | 5 +++++ 4 files changed, 5 insertions(+), 8 deletions(-) diff --git a/Source/Engine/Engine/Linux/LinuxGame.cpp b/Source/Engine/Engine/Linux/LinuxGame.cpp index 293fc6092..a3055def9 100644 --- a/Source/Engine/Engine/Linux/LinuxGame.cpp +++ b/Source/Engine/Engine/Linux/LinuxGame.cpp @@ -16,8 +16,6 @@ void LinuxGame::InitMainWindowSettings(CreateWindowSettings& settings) { - // TODO: restore window size and fullscreen mode from the cached local settings saved after previous session - const auto platformSettings = LinuxPlatformSettings::Get(); auto windowMode = platformSettings->WindowMode; @@ -50,7 +48,6 @@ void LinuxGame::InitMainWindowSettings(CreateWindowSettings& settings) settings.HasBorder = windowMode == GameWindowMode::Windowed || windowMode == GameWindowMode::Fullscreen; settings.AllowMaximize = true; settings.AllowMinimize = platformSettings->ResizableWindow; - } bool LinuxGame::Init() diff --git a/Source/Engine/Engine/Mac/MacGame.cpp b/Source/Engine/Engine/Mac/MacGame.cpp index 640816906..aac68d070 100644 --- a/Source/Engine/Engine/Mac/MacGame.cpp +++ b/Source/Engine/Engine/Mac/MacGame.cpp @@ -9,8 +9,6 @@ void MacGame::InitMainWindowSettings(CreateWindowSettings& settings) { - // TODO: restore window size and fullscreen mode from the cached local settings saved after previous session - const auto platformSettings = MacPlatformSettings::Get(); auto windowMode = platformSettings->WindowMode; @@ -41,7 +39,6 @@ void MacGame::InitMainWindowSettings(CreateWindowSettings& settings) settings.HasBorder = windowMode == GameWindowMode::Windowed || windowMode == GameWindowMode::Fullscreen; settings.AllowMaximize = true; settings.AllowMinimize = platformSettings->ResizableWindow; - } #endif diff --git a/Source/Engine/Engine/Windows/WindowsGame.cpp b/Source/Engine/Engine/Windows/WindowsGame.cpp index c15a543a4..e0135b9ec 100644 --- a/Source/Engine/Engine/Windows/WindowsGame.cpp +++ b/Source/Engine/Engine/Windows/WindowsGame.cpp @@ -9,8 +9,6 @@ void WindowsGame::InitMainWindowSettings(CreateWindowSettings& settings) { - // TODO: restore window size and fullscreen mode from the cached local settings saved after previous session - const auto platformSettings = WindowsPlatformSettings::Get(); auto windowMode = platformSettings->WindowMode; diff --git a/Source/Engine/Platform/Mac/MacWindow.cpp b/Source/Engine/Platform/Mac/MacWindow.cpp index ebccb5709..b1dc0768f 100644 --- a/Source/Engine/Platform/Mac/MacWindow.cpp +++ b/Source/Engine/Platform/Mac/MacWindow.cpp @@ -715,6 +715,11 @@ MacWindow::MacWindow(const CreateWindowSettings& settings) styleMask |= NSWindowStyleMaskTitled; styleMask &= ~NSWindowStyleMaskFullSizeContentView; } + else + { + styleMask |= NSWindowStyleMaskBorderless; + styleMask &= ~NSWindowStyleMaskTitled; + } const float screenScale = MacPlatform::ScreenScale; frame.origin.x /= screenScale; From 8ac238544795e3c9b203f5c023ffb159c9bbb4c5 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 22 Jul 2025 22:58:57 +0200 Subject: [PATCH 101/211] Update dependencies for Switch --- Source/Editor/Windows/GameCookerWindow.cs | 44 +++++++++---------- .../Flax.Build/Deps/Dependencies/mono.cs | 3 ++ .../Flax.Build/Deps/Dependencies/nethost.cs | 13 ++++-- .../Flax.Build/Deps/Dependencies/vorbis.cs | 7 +-- Source/Tools/Flax.Build/Deps/Dependency.cs | 7 ++- 5 files changed, 43 insertions(+), 31 deletions(-) diff --git a/Source/Editor/Windows/GameCookerWindow.cs b/Source/Editor/Windows/GameCookerWindow.cs index 54aec19ab..232b51d8d 100644 --- a/Source/Editor/Windows/GameCookerWindow.cs +++ b/Source/Editor/Windows/GameCookerWindow.cs @@ -59,17 +59,11 @@ namespace FlaxEditor.Windows GameCookerWin = win; Selector = platformSelector; - PerPlatformOptions[PlatformType.Windows].Init("Output/Windows", "Windows"); - PerPlatformOptions[PlatformType.XboxOne].Init("Output/XboxOne", "XboxOne"); - PerPlatformOptions[PlatformType.UWP].Init("Output/UWP", "UWP"); - PerPlatformOptions[PlatformType.Linux].Init("Output/Linux", "Linux"); - PerPlatformOptions[PlatformType.PS4].Init("Output/PS4", "PS4"); - PerPlatformOptions[PlatformType.XboxScarlett].Init("Output/XboxScarlett", "XboxScarlett"); - PerPlatformOptions[PlatformType.Android].Init("Output/Android", "Android"); - PerPlatformOptions[PlatformType.Switch].Init("Output/Switch", "Switch"); - PerPlatformOptions[PlatformType.PS5].Init("Output/PS5", "PS5"); - PerPlatformOptions[PlatformType.Mac].Init("Output/Mac", "Mac"); - PerPlatformOptions[PlatformType.iOS].Init("Output/iOS", "iOS"); + foreach (var e in PerPlatformOptions) + { + var str = e.Key.ToString(); + e.Value.Init("Output/" + str, str); + } } [HideInEditor] @@ -196,12 +190,14 @@ namespace FlaxEditor.Windows var label = layout.Label(text, TextAlignment.Center); label.Label.AutoHeight = true; } - + /// /// Used to add platform specific tools if available. /// /// The layout to start the tools at. - public virtual void OnCustomToolsLayout(LayoutElementsContainer layout) { } + public virtual void OnCustomToolsLayout(LayoutElementsContainer layout) + { + } public virtual void Build() { @@ -256,7 +252,7 @@ namespace FlaxEditor.Windows if (string.IsNullOrEmpty(sdkPath)) sdkPath = Environment.GetEnvironmentVariable("ANDROID_SDK"); emulatorGroup.Label($"SDK path: {sdkPath}"); - + // AVD and starting emulator var avdGroup = emulatorGroup.Group("AVD Emulator"); avdGroup.Label("Note: Create AVDs using Android Studio."); @@ -273,7 +269,7 @@ namespace FlaxEditor.Windows { if (avdListTree.Children.Count > 0) avdListTree.DisposeChildren(); - + var processStartInfo = new System.Diagnostics.ProcessStartInfo { FileName = Path.Combine(sdkPath, "emulator", "emulator.exe"), @@ -299,7 +295,7 @@ namespace FlaxEditor.Windows }; //processSettings.ShellExecute = true; FlaxEngine.Platform.CreateProcess(ref processSettings); - + var output = new string(processSettings.Output);*/ if (output.Length == 0) { @@ -345,9 +341,9 @@ namespace FlaxEditor.Windows processSettings.ShellExecute = true; FlaxEngine.Platform.CreateProcess(ref processSettings); }; - + emulatorGroup.Space(2); - + // Device var installGroup = emulatorGroup.Group("Install"); installGroup.Panel.IsClosed = false; @@ -391,10 +387,10 @@ namespace FlaxEditor.Windows processSettings.SaveOutput = true; processSettings.ShellExecute = false; FlaxEngine.Platform.CreateProcess(ref processSettings); - + var output = new string(processSettings.Output); */ - + if (output.Length > 0 && !output.Equals("List of devices attached", StringComparison.Ordinal)) { noDevicesLabel.Visible = false; @@ -403,7 +399,7 @@ namespace FlaxEditor.Windows { if (line.Trim().Equals("List of devices attached", StringComparison.Ordinal) || string.IsNullOrEmpty(line.Trim())) continue; - + var tab = line.Split("device "); if (tab.Length < 2) continue; @@ -430,7 +426,7 @@ namespace FlaxEditor.Windows { if (deviceListTree.Selection.Count == 0) return; - + // Get built APK at output path string output = StringUtils.ConvertRelativePathToAbsolute(Globals.ProjectFolder, StringUtils.NormalizePath(Output)); if (!Directory.Exists(output)) @@ -438,7 +434,7 @@ namespace FlaxEditor.Windows FlaxEditor.Editor.LogWarning("Can not copy APK because output folder does not exist."); return; } - + var apkFiles = Directory.GetFiles(output, "*.apk"); if (apkFiles.Length == 0) { @@ -457,7 +453,7 @@ namespace FlaxEditor.Windows } apkFilesString += $" \"{file}\""; } - + CreateProcessSettings processSettings = new CreateProcessSettings { FileName = Path.Combine(sdkPath, "platform-tools", "adb.exe"), diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/mono.cs b/Source/Tools/Flax.Build/Deps/Dependencies/mono.cs index ddf1cc15d..57d2f74fe 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/mono.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/mono.cs @@ -18,6 +18,9 @@ namespace Flax.Deps.Dependencies /// class mono : Dependency { + /// + public override bool BuildByDefault => false; // Unused in favor of nethost + /// public override TargetPlatform[] Platforms { diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/nethost.cs b/Source/Tools/Flax.Build/Deps/Dependencies/nethost.cs index 5734e1407..11ab39f8d 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/nethost.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/nethost.cs @@ -43,9 +43,6 @@ namespace Flax.Deps.Dependencies } } - /// - public override bool BuildByDefault => false; - private string root; private bool cleanArtifacts; @@ -296,9 +293,19 @@ namespace Flax.Deps.Dependencies { root = options.IntermediateFolder; + // On Windows MAX_PATH=260 might cause some build issues with CMake+Ninja, even when LongPathsEnabled=1 + // To solve this, simply use a drive root folder instead of Deps directory + if (BuildPlatform == TargetPlatform.Windows && root.Length > 30) + { + root = Path.Combine(Path.GetPathRoot(root), "nethost"); + Log.Info($"Using custom rooted build directory: {root} (due to path size limit)"); + SetupDirectory(root, false); + } + // Ensure to have dependencies installed Utilities.Run("ninja", "--version", null, null, Utilities.RunOptions.ThrowExceptionOnError); Utilities.Run("cmake", "--version", null, null, Utilities.RunOptions.ThrowExceptionOnError); + Utilities.Run("python", "--version", null, null, Utilities.RunOptions.ThrowExceptionOnError); // Get the source if (!Directory.Exists(Path.Combine(root, ".git"))) diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/vorbis.cs b/Source/Tools/Flax.Build/Deps/Dependencies/vorbis.cs index 195c0d8cb..45adc9188 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/vorbis.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/vorbis.cs @@ -428,18 +428,19 @@ namespace Flax.Deps.Dependencies var buildDir = Path.Combine(root, "build"); // Get the source + SetupDirectory(oggRoot, false); CloneGitRepo(root, "https://github.com/xiph/vorbis.git"); GitCheckout(root, "master", "98eddc72d36e3421519d54b101c09b57e4d4d10d"); CloneGitRepo(oggRoot, "https://github.com/xiph/ogg.git"); GitCheckout(oggRoot, "master", "4380566a44b8d5e85ad511c9c17eb04197863ec5"); - Utilities.DirectoryCopy(Path.Combine(GetBinariesFolder(options, platform), "ogg"), oggRoot, true, true); - Utilities.DirectoryCopy(Path.Combine(GetBinariesFolder(options, platform), "vorbis"), buildDir, true, true); + Utilities.DirectoryCopy(Path.Combine(GetBinariesFolder(options, platform), "Data/ogg"), oggRoot, true, true); + Utilities.DirectoryCopy(Path.Combine(GetBinariesFolder(options, platform), "Data/vorbis"), buildDir, true, true); // Build for Switch SetupDirectory(oggBuildDir, true); RunCmake(oggBuildDir, platform, TargetArchitecture.ARM64, ".. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=\"../install\""); Utilities.Run("cmake", "--build . --target install", null, oggBuildDir, Utilities.RunOptions.ConsoleLogOutput); - Utilities.FileCopy(Path.Combine(GetBinariesFolder(options, platform), "ogg", "include", "ogg", "config_types.h"), Path.Combine(oggRoot, "install", "include", "ogg", "config_types.h")); + Utilities.FileCopy(Path.Combine(GetBinariesFolder(options, platform), "Data/ogg", "include", "ogg", "config_types.h"), Path.Combine(oggRoot, "install", "include", "ogg", "config_types.h")); SetupDirectory(buildDir, true); RunCmake(buildDir, platform, TargetArchitecture.ARM64, string.Format(".. -DCMAKE_BUILD_TYPE=Release -DOGG_INCLUDE_DIR=\"{0}/install/include\" -DOGG_LIBRARY=\"{0}/install/lib\"", oggRoot)); BuildCmake(buildDir); diff --git a/Source/Tools/Flax.Build/Deps/Dependency.cs b/Source/Tools/Flax.Build/Deps/Dependency.cs index 5ee49a64a..c746cb0ae 100644 --- a/Source/Tools/Flax.Build/Deps/Dependency.cs +++ b/Source/Tools/Flax.Build/Deps/Dependency.cs @@ -3,6 +3,7 @@ using System; using System.Collections.Generic; using System.IO; +using System.Linq; using Flax.Build; using Flax.Build.Platforms; using Flax.Build.Projects.VisualStudio; @@ -305,8 +306,12 @@ namespace Flax.Deps cmdLine = "CMakeLists.txt"; break; case TargetPlatform.Switch: - cmdLine = string.Format("-DCMAKE_TOOLCHAIN_FILE=\"{1}\\Source\\Platforms\\Switch\\Binaries\\Data\\Switch.cmake\" -G \"NMake Makefiles\" -DCMAKE_MAKE_PROGRAM=\"{0}..\\..\\VC\\bin\\nmake.exe\"", Environment.GetEnvironmentVariable("VS140COMNTOOLS"), Globals.EngineRoot); + { + var nmakeSubdir = "bin\\Hostx64\\x64\\nmake.exe"; + var toolset = WindowsPlatform.GetToolsets().First(e => File.Exists(Path.Combine(e.Value, nmakeSubdir))); + cmdLine = string.Format("-DCMAKE_TOOLCHAIN_FILE=\"{1}\\Source\\Platforms\\Switch\\Binaries\\Data\\Switch.cmake\" -G \"NMake Makefiles\" -DCMAKE_MAKE_PROGRAM=\"{0}\"", Path.Combine(toolset.Value, nmakeSubdir), Globals.EngineRoot); break; + } case TargetPlatform.Android: { var ndk = AndroidNdk.Instance.RootPath; From 735d611de114287719520e1c598a1e9515dae528 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 23 Jul 2025 18:15:52 +0200 Subject: [PATCH 102/211] Fix AOT libs rebuild when corlib was modified to avoid version mismatch --- .../Flax.Build/Build/DotNet/DotNetAOT.cs | 22 ++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/Source/Tools/Flax.Build/Build/DotNet/DotNetAOT.cs b/Source/Tools/Flax.Build/Build/DotNet/DotNetAOT.cs index 97c3e6969..b6ad58cfe 100644 --- a/Source/Tools/Flax.Build/Build/DotNet/DotNetAOT.cs +++ b/Source/Tools/Flax.Build/Build/DotNet/DotNetAOT.cs @@ -351,10 +351,10 @@ namespace Flax.Build } // Run compilation - bool failed = false; - bool validCache = true; + bool failed = false, validCache = true, coreLibDirty = false; string platformToolsPath; { + // Get platform tools var options = new Toolchain.CSharpOptions { Action = Toolchain.CSharpOptions.ActionTypes.GetPlatformTools, @@ -365,6 +365,21 @@ namespace Flax.Build buildToolchain.CompileCSharp(ref options); platformToolsPath = options.PlatformToolsPath; } + { + // Check if core library has been modified + var options = new Toolchain.CSharpOptions + { + Action = Toolchain.CSharpOptions.ActionTypes.GetOutputFiles, + InputFiles = new List() { coreLibPaths[0] }, + OutputFiles = new List(), + AssembliesPath = aotAssembliesPath, + ClassLibraryPath = dotnetLibPath, + PlatformToolsPath = platformToolsPath, + EnableDebugSymbols = false, + EnableToolDebug = dotnetAotDebug, + }; + buildToolchain.CompileCSharp(ref options); + coreLibDirty = File.GetLastWriteTime(options.InputFiles[0]) > File.GetLastWriteTime(options.OutputFiles[0]); if (!Directory.Exists(platformToolsPath)) throw new Exception("Missing platform tools " + platformToolsPath); Log.Info("Platform tools found in: " + platformToolsPath); @@ -393,7 +408,8 @@ namespace Flax.Build buildToolchain.CompileCSharp(ref options); // Skip if output is already generated and is newer than a source assembly - if (!File.Exists(options.OutputFiles[0]) || File.GetLastWriteTime(assemblyPath) > File.GetLastWriteTime(options.OutputFiles[0])) + // Force run AOT if corelib has been modified (all libs depend on its GUID) + if (!File.Exists(options.OutputFiles[0]) || File.GetLastWriteTime(assemblyPath) > File.GetLastWriteTime(options.OutputFiles[0]) || coreLibDirty) { if (dotnetAotDebug) { From 6f172f8f2cbc81d37ed21a70386b5347ab7b1bac Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 23 Jul 2025 18:16:32 +0200 Subject: [PATCH 103/211] Fix crash when prefab root object failed to deserialize --- Source/Engine/Level/SceneObjectsFactory.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Level/SceneObjectsFactory.cpp b/Source/Engine/Level/SceneObjectsFactory.cpp index b0f968808..919c0eed2 100644 --- a/Source/Engine/Level/SceneObjectsFactory.cpp +++ b/Source/Engine/Level/SceneObjectsFactory.cpp @@ -721,8 +721,15 @@ void SceneObjectsFactory::SynchronizePrefabInstances(Context& context, PrefabSyn if (instance.FixRootParent && JsonTools::GetGuidIfValid(prefabStartParentId, prefabStartData, "ParentID")) { auto* root = data.SceneObjects[instance.RootIndex]; - const auto rootParent = Scripting::FindObject(prefabStartParentId); - root->SetParent(rootParent, false); + if (root) + { + const auto rootParent = Scripting::FindObject(prefabStartParentId); + root->SetParent(rootParent, false); + } + else + { + LOG(Warning, "Missing root actor at index {} for prefab instance at actor {} ({})", instance.RootIndex, instance.RootId, instance.Prefab->ToString()); + } } } From c3cf8fba98fc7584bcc1fde7e117fa4dbd96b1da Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 23 Jul 2025 18:17:10 +0200 Subject: [PATCH 104/211] Optimize managed code --- Source/Engine/Engine/DebugLogHandler.cs | 2 +- Source/Engine/Scripting/Runtime/DotNet.cpp | 8 ++------ 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/Source/Engine/Engine/DebugLogHandler.cs b/Source/Engine/Engine/DebugLogHandler.cs index 2ca7be0b1..634060d5b 100644 --- a/Source/Engine/Engine/DebugLogHandler.cs +++ b/Source/Engine/Engine/DebugLogHandler.cs @@ -40,7 +40,7 @@ namespace FlaxEngine { if (message == null) return; -#if BUILD_RELEASE +#if BUILD_RELEASE || !FLAX_EDITOR string stackTrace = null; #else string stackTrace = Environment.StackTrace; diff --git a/Source/Engine/Scripting/Runtime/DotNet.cpp b/Source/Engine/Scripting/Runtime/DotNet.cpp index 5b67670e5..13195f886 100644 --- a/Source/Engine/Scripting/Runtime/DotNet.cpp +++ b/Source/Engine/Scripting/Runtime/DotNet.cpp @@ -525,16 +525,12 @@ void MCore::GCHandle::Free(const MGCHandle& handle) void MCore::GC::Collect() { - PROFILE_CPU(); - static void* GCCollectPtr = GetStaticMethodPointer(TEXT("GCCollect")); - CallStaticMethod(GCCollectPtr, MaxGeneration(), (int)MGCCollectionMode::Default, true, false); + Collect(MaxGeneration(), MGCCollectionMode::Default, true, false); } void MCore::GC::Collect(int32 generation) { - PROFILE_CPU(); - static void* GCCollectPtr = GetStaticMethodPointer(TEXT("GCCollect")); - CallStaticMethod(GCCollectPtr, generation, (int)MGCCollectionMode::Default, true, false); + Collect(generation, MGCCollectionMode::Default, true, false); } void MCore::GC::Collect(int32 generation, MGCCollectionMode collectionMode, bool blocking, bool compacting) From 2550b9f88ede1f52f545d3b17262acc93cb074cd Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 23 Jul 2025 22:15:38 +0200 Subject: [PATCH 105/211] Fix missing `TypeConverter` support in AOT build --- Source/Platforms/DotNet/AOT/Newtonsoft.Json.dll | 4 ++-- Source/Tools/Flax.Build/Deps/Dependencies/NewtonsoftJson.cs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Source/Platforms/DotNet/AOT/Newtonsoft.Json.dll b/Source/Platforms/DotNet/AOT/Newtonsoft.Json.dll index 83ecb3bf5..83d91e664 100644 --- a/Source/Platforms/DotNet/AOT/Newtonsoft.Json.dll +++ b/Source/Platforms/DotNet/AOT/Newtonsoft.Json.dll @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8aadc5977413af81d4843f0ac6a417ec74d77e7224a06115ada40860ddd79608 -size 541696 +oid sha256:c89720565cc1ab39345b7287cde6797596d3fbdcd7c5d816a67699897c3408a2 +size 542720 diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/NewtonsoftJson.cs b/Source/Tools/Flax.Build/Deps/Dependencies/NewtonsoftJson.cs index 36e0fd44e..495de4734 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/NewtonsoftJson.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/NewtonsoftJson.cs @@ -69,7 +69,7 @@ namespace Flax.Deps.Dependencies Utilities.ReplaceInFile(Path.Combine(root, "Src", "Newtonsoft.Json", "Newtonsoft.Json.csproj"), "HAVE_DYNAMIC;", ";"); Utilities.ReplaceInFile(Path.Combine(root, "Src", "Newtonsoft.Json", "Newtonsoft.Json.csproj"), "HAVE_EXPRESSIONS;", ";"); Utilities.ReplaceInFile(Path.Combine(root, "Src", "Newtonsoft.Json", "Newtonsoft.Json.csproj"), "HAVE_REGEX;", ";"); - Utilities.ReplaceInFile(Path.Combine(root, "Src", "Newtonsoft.Json", "Newtonsoft.Json.csproj"), "HAVE_TYPE_DESCRIPTOR;", ";"); + //Utilities.ReplaceInFile(Path.Combine(root, "Src", "Newtonsoft.Json", "Newtonsoft.Json.csproj"), "HAVE_TYPE_DESCRIPTOR;", ";"); Deploy.VCEnvironment.BuildSolution(solutionPath, configuration, buildPlatform); { var platform = "AOT"; From 751d179cdbcd872789ecddc65573fe75e33247e1 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 24 Jul 2025 11:34:48 +0200 Subject: [PATCH 106/211] Fix missing brace --- Source/Tools/Flax.Build/Build/DotNet/DotNetAOT.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/Source/Tools/Flax.Build/Build/DotNet/DotNetAOT.cs b/Source/Tools/Flax.Build/Build/DotNet/DotNetAOT.cs index b6ad58cfe..e421b6999 100644 --- a/Source/Tools/Flax.Build/Build/DotNet/DotNetAOT.cs +++ b/Source/Tools/Flax.Build/Build/DotNet/DotNetAOT.cs @@ -380,6 +380,7 @@ namespace Flax.Build }; buildToolchain.CompileCSharp(ref options); coreLibDirty = File.GetLastWriteTime(options.InputFiles[0]) > File.GetLastWriteTime(options.OutputFiles[0]); + } if (!Directory.Exists(platformToolsPath)) throw new Exception("Missing platform tools " + platformToolsPath); Log.Info("Platform tools found in: " + platformToolsPath); From af0ea65d78213819c6bc575db10b4f15cf49658a Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 24 Jul 2025 12:20:09 +0200 Subject: [PATCH 107/211] Add option to disable pipeline cache on specific Vulkan platforms --- Source/Engine/GraphicsDevice/Vulkan/Config.h | 4 ++++ .../Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp | 12 ++++++++++++ 2 files changed, 16 insertions(+) diff --git a/Source/Engine/GraphicsDevice/Vulkan/Config.h b/Source/Engine/GraphicsDevice/Vulkan/Config.h index 32e69216f..1f30c301a 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/Config.h +++ b/Source/Engine/GraphicsDevice/Vulkan/Config.h @@ -33,6 +33,10 @@ #define VULKAN_USE_DEBUG_LAYER GPU_ENABLE_DIAGNOSTICS #define VULKAN_USE_DEBUG_DATA (GPU_ENABLE_DIAGNOSTICS && COMPILE_WITH_DEV_ENV) +#ifndef VULKAN_USE_PIPELINE_CACHE +#define VULKAN_USE_PIPELINE_CACHE 1 +#endif + #ifndef VULKAN_USE_VALIDATION_CACHE #ifdef VK_EXT_validation_cache #define VULKAN_USE_VALIDATION_CACHE VK_EXT_validation_cache diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp index 8c77b77d3..96b139bac 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp @@ -1498,6 +1498,8 @@ PixelFormat GPUDeviceVulkan::GetClosestSupportedPixelFormat(PixelFormat format, return format; } +#if VULKAN_USE_PIPELINE_CACHE + void GetPipelineCachePath(String& path) { #if USE_EDITOR @@ -1507,8 +1509,11 @@ void GetPipelineCachePath(String& path) #endif } +#endif + bool GPUDeviceVulkan::SavePipelineCache() { +#if VULKAN_USE_PIPELINE_CACHE if (PipelineCache == VK_NULL_HANDLE || !vkGetPipelineCacheData) return false; @@ -1529,6 +1534,9 @@ bool GPUDeviceVulkan::SavePipelineCache() String path; GetPipelineCachePath(path); return File::WriteAllBytes(path, data); +#else + return false; +#endif } #if VULKAN_USE_VALIDATION_CACHE @@ -1975,6 +1983,7 @@ bool GPUDeviceVulkan::Init() UniformBufferUploader = New(this); DescriptorPoolsManager = New(this); MainContext = New(this, GraphicsQueue); +#if VULKAN_USE_PIPELINE_CACHE if (vkCreatePipelineCache) { Array data; @@ -1992,6 +2001,7 @@ bool GPUDeviceVulkan::Init() const VkResult result = vkCreatePipelineCache(Device, &pipelineCacheCreateInfo, nullptr, &PipelineCache); LOG_VULKAN_RESULT(result); } +#endif #if VULKAN_USE_VALIDATION_CACHE if (OptionalDeviceExtensions.HasEXTValidationCache && vkCreateValidationCacheEXT && vkDestroyValidationCacheEXT) { @@ -2088,6 +2098,7 @@ void GPUDeviceVulkan::Dispose() DeferredDeletionQueue.ReleaseResources(true); vmaDestroyAllocator(Allocator); Allocator = VK_NULL_HANDLE; +#if VULKAN_USE_PIPELINE_CACHE if (PipelineCache != VK_NULL_HANDLE) { if (SavePipelineCache()) @@ -2095,6 +2106,7 @@ void GPUDeviceVulkan::Dispose() vkDestroyPipelineCache(Device, PipelineCache, nullptr); PipelineCache = VK_NULL_HANDLE; } +#endif #if VULKAN_USE_VALIDATION_CACHE if (ValidationCache != VK_NULL_HANDLE) { From 69585618ed9dac22f9210d6d7b531d06df007446 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 24 Jul 2025 12:27:28 +0200 Subject: [PATCH 108/211] Optimize scripting objects spawning on AOT platforms --- Source/Engine/Scripting/Runtime/DotNet.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Source/Engine/Scripting/Runtime/DotNet.cpp b/Source/Engine/Scripting/Runtime/DotNet.cpp index 13195f886..64fec7b04 100644 --- a/Source/Engine/Scripting/Runtime/DotNet.cpp +++ b/Source/Engine/Scripting/Runtime/DotNet.cpp @@ -708,6 +708,13 @@ void MCore::ScriptingObject::SetInternalValues(MClass* klass, MObject* object, v #if PLATFORM_DESKTOP && !USE_MONO_AOT static void* ScriptingObjectSetInternalValuesPtr = GetStaticMethodPointer(TEXT("ScriptingObjectSetInternalValues")); CallStaticMethod(ScriptingObjectSetInternalValuesPtr, object, unmanagedPtr, id); +#elif !USE_EDITOR + static MField* monoUnmanagedPtrField = ::ScriptingObject::GetStaticClass()->GetField("__unmanagedPtr"); + static MField* monoIdField = ::ScriptingObject::GetStaticClass()->GetField("__internalId"); + if (monoUnmanagedPtrField) + monoUnmanagedPtrField->SetValue(object, &unmanagedPtr); + if (id != nullptr && monoIdField) + monoIdField->SetValue(object, (void*)id); #else const MField* monoUnmanagedPtrField = klass->GetField("__unmanagedPtr"); if (monoUnmanagedPtrField) From acc17776384837b243f429ced53e1da47aff2693 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 24 Jul 2025 12:38:52 +0200 Subject: [PATCH 109/211] Add shared utility for command line arguments merging --- Source/Engine/Main/Default/main.cpp | 17 ++----------- Source/Engine/Main/MainUtil.h | 37 +++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 15 deletions(-) create mode 100644 Source/Engine/Main/MainUtil.h diff --git a/Source/Engine/Main/Default/main.cpp b/Source/Engine/Main/Default/main.cpp index 1791f6ce1..ba9db262a 100644 --- a/Source/Engine/Main/Default/main.cpp +++ b/Source/Engine/Main/Default/main.cpp @@ -3,24 +3,11 @@ #if PLATFORM_LINUX || PLATFORM_MAC || PLATFORM_IOS #include "Engine/Engine/Engine.h" -#include "Engine/Core/Types/StringBuilder.h" +#include "Engine/Main/MainUtil.h" int main(int argc, char* argv[]) { - // Join the arguments - StringBuilder args; - for (int i = 1; i < argc; i++) - { - String arg; - arg.SetUTF8(argv[i], StringUtils::Length(argv[i])); - args.Append(arg); - - if (i + 1 != argc) - args.Append(TEXT(' ')); - } - args.Append(TEXT('\0')); - - return Engine::Main(*args); + return Engine::Main(GetCommandLine(argc, argv)); } #endif diff --git a/Source/Engine/Main/MainUtil.h b/Source/Engine/Main/MainUtil.h new file mode 100644 index 000000000..668c69c3e --- /dev/null +++ b/Source/Engine/Main/MainUtil.h @@ -0,0 +1,37 @@ +// Copyright (c) Wojciech Figat. All rights reserved. + +#pragma once + +#include "Engine/Platform/StringUtils.h" + +const Char* GetCommandLine(int argc, char* argv[]) +{ + int32 length = 0; + for (int i = 1; argc > 1 && i < argc; i++) + { + length += StringUtils::Length((const char*)argv[i]); + if (i + 1 != argc) + length++; + } + const Char* cmdLine; + if (length != 0) + { + Char* str = (Char*)malloc(length * sizeof(Char)); + cmdLine = str; + for (int i = 1; i < argc; i++) + { + length = StringUtils::Length((const char*)argv[i]); + int32 strLen = 0; + StringUtils::ConvertANSI2UTF16(argv[i], str, length, strLen); + str += strLen; + if (i + 1 != argc) + *str++ = TEXT(' '); + } + *str = TEXT('\0'); + } + else + { + cmdLine = TEXT(""); + } + return cmdLine; +} From 2af266727fa15259dfa5468f9351afda8210f343 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 26 Jul 2025 00:12:10 +0200 Subject: [PATCH 110/211] Add utility names to gamepad buttons doc and add usefull profiler event for mono --- Source/Engine/Input/Enums.h | 12 ++++++------ Source/Engine/Scripting/Runtime/DotNet.cpp | 5 +++++ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/Source/Engine/Input/Enums.h b/Source/Engine/Input/Enums.h index 5310e5c14..eb2d03e75 100644 --- a/Source/Engine/Input/Enums.h +++ b/Source/Engine/Input/Enums.h @@ -150,32 +150,32 @@ API_ENUM() enum class GamepadButton Back = 6, /// - /// Left thumbstick button. + /// Left thumbstick button (L). /// LeftThumb = 7, /// - /// Right thumbstick button. + /// Right thumbstick button (R). /// RightThumb = 8, /// - /// Left shoulder button. + /// Left shoulder button (LB). /// LeftShoulder = 9, /// - /// Right shoulder button. + /// Right shoulder button (RB). /// RightShoulder = 10, /// - /// Left trigger button. + /// Left trigger button (LT). /// LeftTrigger = 11, /// - /// Right trigger button. + /// Right trigger button (RT). /// RightTrigger = 12, diff --git a/Source/Engine/Scripting/Runtime/DotNet.cpp b/Source/Engine/Scripting/Runtime/DotNet.cpp index 64fec7b04..3f3e33135 100644 --- a/Source/Engine/Scripting/Runtime/DotNet.cpp +++ b/Source/Engine/Scripting/Runtime/DotNet.cpp @@ -2009,6 +2009,9 @@ void OnPrintErrorCallback(const char* string, mono_bool isStdout) static MonoAssembly* OnMonoAssemblyLoad(const char* aname) { + PROFILE_CPU(); + ZoneText(aname, StringUtils::Length(aname)); + // Find assembly file const String name(aname); #if DOTNET_HOST_MONO_DEBUG @@ -2080,6 +2083,8 @@ static void OnMonoFreeAOT(MonoAssembly* assembly, int size, void* user_data, voi static void* OnMonoDlFallbackLoad(const char* name, int flags, char** err, void* user_data) { + PROFILE_CPU(); + ZoneText(name, StringUtils::Length(name)); const String fileName = StringUtils::GetFileName(String(name)); #if DOTNET_HOST_MONO_DEBUG LOG(Info, "Loading dynamic library {0}", fileName); From 8c51ea511a578b43e163f742ab4dded2a143c92a Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 27 Jul 2025 11:07:58 +0200 Subject: [PATCH 111/211] Fix incorrect documentation comments on HashSet/Dictionary item removal --- Source/Engine/Core/Collections/Dictionary.h | 4 ++-- Source/Engine/Core/Collections/HashSet.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Source/Engine/Core/Collections/Dictionary.h b/Source/Engine/Core/Collections/Dictionary.h index bdab15bbe..e2f5f0ed6 100644 --- a/Source/Engine/Core/Collections/Dictionary.h +++ b/Source/Engine/Core/Collections/Dictionary.h @@ -558,7 +558,7 @@ public: /// Removes element with a specified key. /// /// The element key to remove. - /// True if cannot remove item from the collection because cannot find it, otherwise false. + /// True if item was removed from collection, otherwise false. template bool Remove(const KeyComparableType& key) { @@ -578,7 +578,7 @@ public: /// Removes element at specified iterator. /// /// The element iterator to remove. - /// True if cannot remove item from the collection because cannot find it, otherwise false. + /// True if item was removed from collection, otherwise false. bool Remove(const Iterator& i) { ASSERT(i._collection == this); diff --git a/Source/Engine/Core/Collections/HashSet.h b/Source/Engine/Core/Collections/HashSet.h index 032a407db..b0fa8d7b5 100644 --- a/Source/Engine/Core/Collections/HashSet.h +++ b/Source/Engine/Core/Collections/HashSet.h @@ -451,7 +451,7 @@ public: /// Removes the specified element from the collection. /// /// The element to remove. - /// True if cannot remove item from the collection because cannot find it, otherwise false. + /// True if item was removed from collection, otherwise false. template bool Remove(const ItemType& item) { @@ -471,7 +471,7 @@ public: /// Removes an element at specified iterator position. /// /// The element iterator to remove. - /// True if cannot remove item from the collection because cannot find it, otherwise false. + /// True if item was removed from collection, otherwise false. bool Remove(const Iterator& i) { ASSERT(i._collection == this); From 354eaac56c7cf0716330d85cc835ca2663880559 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 27 Jul 2025 23:20:06 +0200 Subject: [PATCH 112/211] Add optional testing utility for malloc/free operations --- Source/Engine/Platform/Base/PlatformBase.cpp | 20 +++++++ Source/Engine/Utilities/MallocTester.h | 60 ++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 Source/Engine/Utilities/MallocTester.h diff --git a/Source/Engine/Platform/Base/PlatformBase.cpp b/Source/Engine/Platform/Base/PlatformBase.cpp index 0bde861c7..e472f7e1a 100644 --- a/Source/Engine/Platform/Base/PlatformBase.cpp +++ b/Source/Engine/Platform/Base/PlatformBase.cpp @@ -206,6 +206,16 @@ void PlatformBase::Exit() #if COMPILE_WITH_PROFILER +#define TEST_MALLOC 0 +#if TEST_MALLOC +#include "Engine/Utilities/MallocTester.h" +MallocTester& GetMallocTester() +{ + static MallocTester MallocTest; + return MallocTest; +} +#endif + #define TRACY_ENABLE_MEMORY (TRACY_ENABLE) void PlatformBase::OnMemoryAlloc(void* ptr, uint64 size) @@ -213,6 +223,11 @@ void PlatformBase::OnMemoryAlloc(void* ptr, uint64 size) if (!ptr) return; +#if TEST_MALLOC + if (GetMallocTester().OnMalloc(ptr, size)) + LOG(Fatal, "Invalid mallloc detected for pointer 0x{0:x} ({1} bytes)!\n{2}", (uintptr)ptr, size, Platform::GetStackTrace(3)); +#endif + #if TRACY_ENABLE_MEMORY // Track memory allocation in Tracy //tracy::Profiler::MemAlloc(ptr, (size_t)size, false); @@ -248,6 +263,11 @@ void PlatformBase::OnMemoryFree(void* ptr) // Track memory allocation in Tracy tracy::Profiler::MemFree(ptr, false); #endif + +#if TEST_MALLOC + if (GetMallocTester().OnFree(ptr)) + LOG(Fatal, "Invalid free detected for pointer 0x{0:x}!\n{1}", (uintptr)ptr, Platform::GetStackTrace(3)); +#endif } #endif diff --git a/Source/Engine/Utilities/MallocTester.h b/Source/Engine/Utilities/MallocTester.h new file mode 100644 index 000000000..e871a3d51 --- /dev/null +++ b/Source/Engine/Utilities/MallocTester.h @@ -0,0 +1,60 @@ +// Copyright (c) Wojciech Figat. All rights reserved. + +#pragma once + +#include "Engine/Core/Log.h" +#include "Engine/Core/Types/BaseTypes.h" +#include "Engine/Core/Collections/HashSet.h" +#include "Engine/Platform/Platform.h" +#include "Engine/Platform/CriticalSection.h" + +/// +/// The utility for finding double-free or invalid malloc calls. +/// +class MallocTester +{ +private: + bool _entry = true; + CriticalSection _locker; + HashSet _allocs; + +public: + bool OnMalloc(void* ptr, uint64 size) + { + if (ptr == nullptr) + return false; + bool failed = false; + _locker.Lock(); + if (_entry) + { + _entry = false; + if (_allocs.Contains(ptr)) + { + failed = true; + } + _allocs.Add(ptr); + _entry = true; + } + _locker.Unlock(); + return failed; + } + + bool OnFree(void* ptr) + { + if (ptr == nullptr) + return false; + bool failed = false; + _locker.Lock(); + if (_entry) + { + _entry = false; + if (!_allocs.Remove(ptr)) + { + failed = true; + } + _entry = true; + } + _locker.Unlock(); + return failed; + } +}; From a7ffd9e57f9b59652d485eba9845f60f45afb08e Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 28 Jul 2025 15:15:09 +0200 Subject: [PATCH 113/211] Add more profiler events --- Source/Engine/Audio/AudioSource.cpp | 3 +++ Source/Engine/Content/Cache/AssetsCache.cpp | 1 + Source/Engine/Scripting/Scripting.cpp | 4 ++-- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Audio/AudioSource.cpp b/Source/Engine/Audio/AudioSource.cpp index 2a061ad48..1a7ef4768 100644 --- a/Source/Engine/Audio/AudioSource.cpp +++ b/Source/Engine/Audio/AudioSource.cpp @@ -120,6 +120,7 @@ void AudioSource::Play() auto state = _state; if (state == States::Playing) return; + PROFILE_CPU(); if (Clip == nullptr || Clip->WaitForLoaded()) { LOG(Warning, "Cannot play audio source without a clip ({0})", GetNamePath()); @@ -188,6 +189,7 @@ void AudioSource::Stop() { if (_state == States::Stopped) return; + PROFILE_CPU(); _state = States::Stopped; _isActuallyPlayingSth = false; @@ -386,6 +388,7 @@ bool AudioSource::IntersectsItself(const Ray& ray, Real& distance, Vector3& norm void AudioSource::Update() { PROFILE_CPU(); + PROFILE_MEM(Audio); // Update the velocity const Vector3 pos = GetPosition(); diff --git a/Source/Engine/Content/Cache/AssetsCache.cpp b/Source/Engine/Content/Cache/AssetsCache.cpp index eef5a777d..bd1474afa 100644 --- a/Source/Engine/Content/Cache/AssetsCache.cpp +++ b/Source/Engine/Content/Cache/AssetsCache.cpp @@ -19,6 +19,7 @@ void AssetsCache::Init() { + PROFILE_CPU(); Entry e; int32 count; Stopwatch stopwatch; diff --git a/Source/Engine/Scripting/Scripting.cpp b/Source/Engine/Scripting/Scripting.cpp index a17de075b..addd3bf5f 100644 --- a/Source/Engine/Scripting/Scripting.cpp +++ b/Source/Engine/Scripting/Scripting.cpp @@ -206,12 +206,12 @@ bool ScriptingService::Init() // Load assemblies if (Scripting::Load()) { - LOG(Fatal, "Scripting Engine initialization failed."); + LOG(Fatal, "Scripting initialization failed."); return true; } stopwatch.Stop(); - LOG(Info, "Scripting Engine initializated! (time: {0}ms)", stopwatch.GetMilliseconds()); + LOG(Info, "Scripting initializated! (time: {0}ms)", stopwatch.GetMilliseconds()); return false; } From a00ffe6ec3a5d13ce2ac5590895176c029c18250 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 28 Jul 2025 18:33:05 +0200 Subject: [PATCH 114/211] Add `Platform::Yield` --- Source/Engine/Platform/Android/AndroidPlatform.cpp | 5 +++++ Source/Engine/Platform/Android/AndroidPlatform.h | 1 + Source/Engine/Platform/Apple/ApplePlatform.cpp | 5 +++++ Source/Engine/Platform/Apple/ApplePlatform.h | 1 + Source/Engine/Platform/Base/PlatformBase.h | 7 ++++++- Source/Engine/Platform/Linux/LinuxPlatform.cpp | 5 +++++ Source/Engine/Platform/Linux/LinuxPlatform.h | 1 + Source/Engine/Platform/Win32/IncludeWindowsHeaders.h | 1 + Source/Engine/Platform/Win32/Win32Platform.cpp | 5 +++++ Source/Engine/Platform/Win32/Win32Platform.h | 1 + Source/Engine/Renderer/RenderListBuffer.h | 2 +- Source/Engine/Threading/ConcurrentSystemLocker.cpp | 4 ++-- 12 files changed, 34 insertions(+), 4 deletions(-) diff --git a/Source/Engine/Platform/Android/AndroidPlatform.cpp b/Source/Engine/Platform/Android/AndroidPlatform.cpp index 2f8e442ba..d78b716bf 100644 --- a/Source/Engine/Platform/Android/AndroidPlatform.cpp +++ b/Source/Engine/Platform/Android/AndroidPlatform.cpp @@ -748,6 +748,11 @@ void AndroidPlatform::Sleep(int32 milliseconds) usleep(milliseconds * 1000); } +void AndroidPlatform::Yield() +{ + pthread_yield(); +} + double AndroidPlatform::GetTimeSeconds() { struct timespec ts; diff --git a/Source/Engine/Platform/Android/AndroidPlatform.h b/Source/Engine/Platform/Android/AndroidPlatform.h index 4bae040cc..6153fa951 100644 --- a/Source/Engine/Platform/Android/AndroidPlatform.h +++ b/Source/Engine/Platform/Android/AndroidPlatform.h @@ -91,6 +91,7 @@ public: static void SetThreadPriority(ThreadPriority priority); static void SetThreadAffinityMask(uint64 affinityMask); static void Sleep(int32 milliseconds); + static void Yield(); static double GetTimeSeconds(); static uint64 GetTimeCycles(); FORCE_INLINE static uint64 GetClockFrequency() diff --git a/Source/Engine/Platform/Apple/ApplePlatform.cpp b/Source/Engine/Platform/Apple/ApplePlatform.cpp index c6c689195..8f8a0ba61 100644 --- a/Source/Engine/Platform/Apple/ApplePlatform.cpp +++ b/Source/Engine/Platform/Apple/ApplePlatform.cpp @@ -293,6 +293,11 @@ void ApplePlatform::Sleep(int32 milliseconds) usleep(milliseconds * 1000); } +void ApplePlatform::Yield() +{ + pthread_yield(); +} + double ApplePlatform::GetTimeSeconds() { return SecondsPerCycle * mach_absolute_time(); diff --git a/Source/Engine/Platform/Apple/ApplePlatform.h b/Source/Engine/Platform/Apple/ApplePlatform.h index 003cec91a..04af9757b 100644 --- a/Source/Engine/Platform/Apple/ApplePlatform.h +++ b/Source/Engine/Platform/Apple/ApplePlatform.h @@ -75,6 +75,7 @@ public: static void SetThreadPriority(ThreadPriority priority); static void SetThreadAffinityMask(uint64 affinityMask); static void Sleep(int32 milliseconds); + static void Yield(); static double GetTimeSeconds(); static uint64 GetTimeCycles(); static uint64 GetClockFrequency(); diff --git a/Source/Engine/Platform/Base/PlatformBase.h b/Source/Engine/Platform/Base/PlatformBase.h index 40245a3fc..44a0c8415 100644 --- a/Source/Engine/Platform/Base/PlatformBase.h +++ b/Source/Engine/Platform/Base/PlatformBase.h @@ -436,11 +436,16 @@ public: static void SetThreadAffinityMask(uint64 affinityMask) = delete; /// - /// Suspends the execution of the current thread until the time-out interval elapses + /// Suspends the execution of the current thread until the time-out interval elapses. /// /// The time interval for which execution is to be suspended, in milliseconds. static void Sleep(int32 milliseconds) = delete; + /// + /// Yields the execution of the current thread to another thread that is ready to run on the current processor. + /// + static void Yield() = delete; + public: /// /// Gets the current time in seconds. diff --git a/Source/Engine/Platform/Linux/LinuxPlatform.cpp b/Source/Engine/Platform/Linux/LinuxPlatform.cpp index 4b55f8bd5..ce05bf361 100644 --- a/Source/Engine/Platform/Linux/LinuxPlatform.cpp +++ b/Source/Engine/Platform/Linux/LinuxPlatform.cpp @@ -1836,6 +1836,11 @@ void LinuxPlatform::Sleep(int32 milliseconds) usleep(milliseconds * 1000); } +void LinuxPlatform::Yield() +{ + pthread_yield(); +} + double LinuxPlatform::GetTimeSeconds() { struct timespec ts; diff --git a/Source/Engine/Platform/Linux/LinuxPlatform.h b/Source/Engine/Platform/Linux/LinuxPlatform.h index 98d9d2976..071566f41 100644 --- a/Source/Engine/Platform/Linux/LinuxPlatform.h +++ b/Source/Engine/Platform/Linux/LinuxPlatform.h @@ -106,6 +106,7 @@ public: static void SetThreadPriority(ThreadPriority priority); static void SetThreadAffinityMask(uint64 affinityMask); static void Sleep(int32 milliseconds); + static void Yield(); static double GetTimeSeconds(); static uint64 GetTimeCycles(); FORCE_INLINE static uint64 GetClockFrequency() diff --git a/Source/Engine/Platform/Win32/IncludeWindowsHeaders.h b/Source/Engine/Platform/Win32/IncludeWindowsHeaders.h index 4d4290866..1e4e8ff0d 100644 --- a/Source/Engine/Platform/Win32/IncludeWindowsHeaders.h +++ b/Source/Engine/Platform/Win32/IncludeWindowsHeaders.h @@ -68,6 +68,7 @@ #undef CreateMutex #undef DrawState #undef LoadLibrary +#undef Yield #undef GetEnvironmentVariable #undef SetEnvironmentVariable diff --git a/Source/Engine/Platform/Win32/Win32Platform.cpp b/Source/Engine/Platform/Win32/Win32Platform.cpp index d32cb1249..5eab78205 100644 --- a/Source/Engine/Platform/Win32/Win32Platform.cpp +++ b/Source/Engine/Platform/Win32/Win32Platform.cpp @@ -400,6 +400,11 @@ void Win32Platform::Sleep(int32 milliseconds) WaitForSingleObject(timer, INFINITE); } +void Win32Platform::Yield() +{ + SwitchToThread(); +} + double Win32Platform::GetTimeSeconds() { LARGE_INTEGER counter; diff --git a/Source/Engine/Platform/Win32/Win32Platform.h b/Source/Engine/Platform/Win32/Win32Platform.h index 36d982bf7..ca404ae80 100644 --- a/Source/Engine/Platform/Win32/Win32Platform.h +++ b/Source/Engine/Platform/Win32/Win32Platform.h @@ -100,6 +100,7 @@ public: static void SetThreadPriority(ThreadPriority priority); static void SetThreadAffinityMask(uint64 affinityMask); static void Sleep(int32 milliseconds); + static void Yield(); static double GetTimeSeconds(); static uint64 GetTimeCycles(); static uint64 GetClockFrequency(); diff --git a/Source/Engine/Renderer/RenderListBuffer.h b/Source/Engine/Renderer/RenderListBuffer.h index bf1ea2cb0..90a05d7ac 100644 --- a/Source/Engine/Renderer/RenderListBuffer.h +++ b/Source/Engine/Renderer/RenderListBuffer.h @@ -324,7 +324,7 @@ private: // Wait for all threads to stop adding items before resizing can happen RETRY: while (Platform::AtomicRead(&_threadsAdding)) - Platform::Sleep(0); + Platform::Yield(); // Thread-safe resizing _locker.Lock(); diff --git a/Source/Engine/Threading/ConcurrentSystemLocker.cpp b/Source/Engine/Threading/ConcurrentSystemLocker.cpp index cafc3dadb..4b64a12ac 100644 --- a/Source/Engine/Threading/ConcurrentSystemLocker.cpp +++ b/Source/Engine/Threading/ConcurrentSystemLocker.cpp @@ -39,7 +39,7 @@ RETRY: { // Someone else is doing opposite operation so wait for it's end // TODO: use ConditionVariable+CriticalSection to prevent active-waiting - Platform::Sleep(0); + Platform::Yield(); goto RETRY; } @@ -47,7 +47,7 @@ RETRY: if (exclusively && Platform::AtomicRead(thisCounter) != 0) { // Someone else is doing opposite operation so wait for it's end - Platform::Sleep(0); + Platform::Yield(); goto RETRY; } From 0bfd38e065747646f35c323bdcb0cac8f6a0a784 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 28 Jul 2025 18:34:29 +0200 Subject: [PATCH 115/211] Fix compilation on platforms without cloth sim --- Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp b/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp index 99b54e8fc..bacd976a1 100644 --- a/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp +++ b/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp @@ -26,6 +26,7 @@ #include "Engine/Profiler/ProfilerCPU.h" #include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Serialization/WriteStream.h" +#include "Engine/Threading/JobSystem.h" #include #include #include @@ -45,7 +46,6 @@ #endif #if WITH_CLOTH #include "Engine/Physics/Actors/Cloth.h" -#include "Engine/Threading/JobSystem.h" #include "Engine/Threading/Threading.h" #include #include From ad6764e6d79c3ad30786dac7aa266a991d50634c Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 28 Jul 2025 18:35:37 +0200 Subject: [PATCH 116/211] Fix `Asset.WaitForLoaded` to yield or sleep thread for proper multithreading on some platforms --- Source/Engine/Content/Content.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Source/Engine/Content/Content.cpp b/Source/Engine/Content/Content.cpp index 48e7b6c1f..6742d3490 100644 --- a/Source/Engine/Content/Content.cpp +++ b/Source/Engine/Content/Content.cpp @@ -1107,11 +1107,21 @@ void Content::WaitForTask(ContentLoadTask* loadingTask, double timeoutInMillisec const double timeoutInSeconds = timeoutInMilliseconds * 0.001; const double startTime = Platform::GetTimeSeconds(); + int32 loopCounter = 0; Task* task = loadingTask; Array> localQueue; #define CHECK_CONDITIONS() (!Engine::ShouldExit() && (timeoutInSeconds <= 0.0 || Platform::GetTimeSeconds() - startTime < timeoutInSeconds)) do { + // Give opportunity for other threads to use the current core + if (loopCounter == 0) + ; // First run is fast + else if (loopCounter < 10) + Platform::Yield(); + else + Platform::Sleep(1); + loopCounter++; + // Try to execute content tasks while (task->IsQueued() && CHECK_CONDITIONS()) { From 17c0892ff1bd51c5c20b541206ad45a97a5c3de8 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 28 Jul 2025 23:08:26 +0200 Subject: [PATCH 117/211] Add debug name for PSO catching on D3D12/Vulkan during profiling incl. Development builds --- Source/Engine/Graphics/GPUDevice.cpp | 33 +++++++++++++- Source/Engine/Graphics/GPUPipelineState.h | 5 ++- Source/Engine/Graphics/Shaders/GPUShader.cpp | 19 ++++++++ .../Graphics/Shaders/GPUShaderProgram.h | 8 ++++ .../DirectX/DX12/GPUPipelineStateDX12.cpp | 45 +++++-------------- .../DirectX/DX12/GPUShaderDX12.cpp | 7 +++ .../GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp | 8 ++-- .../Vulkan/GPUPipelineStateVulkan.cpp | 17 ++++--- 8 files changed, 95 insertions(+), 47 deletions(-) diff --git a/Source/Engine/Graphics/GPUDevice.cpp b/Source/Engine/Graphics/GPUDevice.cpp index 18b9cdffc..d7f64de45 100644 --- a/Source/Engine/Graphics/GPUDevice.cpp +++ b/Source/Engine/Graphics/GPUDevice.cpp @@ -75,10 +75,39 @@ GPUPipelineState::GPUPipelineState() { } +#if !BUILD_RELEASE + +void GPUPipelineState::GetDebugName(DebugName& name) const +{ +#define GET_NAME(e) \ + if (DebugDesc.e) \ + { \ + GPUShaderProgram::DebugName n; \ + DebugDesc.e->GetDebugName(n); \ + name.Add(n.Get(), n.Count() - 1); \ + name.Add('+'); \ + } + GET_NAME(VS); +#if GPU_ALLOW_TESSELLATION_SHADERS + GET_NAME(HS); + GET_NAME(DS); +#endif +#if GPU_ALLOW_GEOMETRY_SHADERS + GET_NAME(GS); +#endif + GET_NAME(PS); +#undef GET_NAME + if (name.Count() != 0 && name[name.Count() - 1] == '+') + name.RemoveLast(); + name.Add('\0'); +} + +#endif + bool GPUPipelineState::Init(const Description& desc) { - // Cache description in debug builds -#if BUILD_DEBUG + // Cache description in development builds +#if !BUILD_RELEASE DebugDesc = desc; #endif diff --git a/Source/Engine/Graphics/GPUPipelineState.h b/Source/Engine/Graphics/GPUPipelineState.h index b00100953..4ddc73dde 100644 --- a/Source/Engine/Graphics/GPUPipelineState.h +++ b/Source/Engine/Graphics/GPUPipelineState.h @@ -172,11 +172,14 @@ protected: GPUPipelineState(); public: -#if BUILD_DEBUG +#if !BUILD_RELEASE /// /// The description of the pipeline state cached on creation in debug builds. Can be used to help with rendering crashes or issues and validation. /// Description DebugDesc; + + typedef Array> DebugName; + void GetDebugName(DebugName& name) const; #endif #if USE_EDITOR int32 Complexity; diff --git a/Source/Engine/Graphics/Shaders/GPUShader.cpp b/Source/Engine/Graphics/Shaders/GPUShader.cpp index 694b13f40..8b0996088 100644 --- a/Source/Engine/Graphics/Shaders/GPUShader.cpp +++ b/Source/Engine/Graphics/Shaders/GPUShader.cpp @@ -25,6 +25,25 @@ void GPUShaderProgram::Init(const GPUShaderProgramInitializer& initializer) #endif } +#if !BUILD_RELEASE + +void GPUShaderProgram::GetDebugName(DebugName& name) const +{ + StringView ownerName = StringUtils::GetFileNameWithoutExtension(_owner->GetName()); + name.AddUninitialized(ownerName.Length() + _name.Length() + 2); + char* dst = name.Get(); + for (int32 i = 0; i < ownerName.Length(); i++) + dst[i] = (char)ownerName.Get()[i]; + dst += ownerName.Length(); + *dst = ':'; + dst++; + for (int32 i = 0; i < _name.Length(); i++) + dst[i] = _name.Get()[i]; + dst[_name.Length()] = 0; +} + +#endif + GPUShader::GPUShader() : GPUResource(SpawnParams(Guid::New(), TypeInitializer)) { diff --git a/Source/Engine/Graphics/Shaders/GPUShaderProgram.h b/Source/Engine/Graphics/Shaders/GPUShaderProgram.h index 176c67709..43a62065b 100644 --- a/Source/Engine/Graphics/Shaders/GPUShaderProgram.h +++ b/Source/Engine/Graphics/Shaders/GPUShaderProgram.h @@ -4,6 +4,9 @@ #include "Engine/Core/Types/BaseTypes.h" #include "Engine/Core/Types/String.h" +#if !BUILD_RELEASE +#include "Engine/Core/Collections/Array.h" +#endif #include "Config.h" class GPUShader; @@ -93,6 +96,11 @@ public: return _flags; } +#if !BUILD_RELEASE + typedef Array> DebugName; + void GetDebugName(DebugName& name) const; +#endif + public: /// /// Gets shader program stage type. diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUPipelineStateDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUPipelineStateDX12.cpp index cfd8d487a..7695260cd 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUPipelineStateDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUPipelineStateDX12.cpp @@ -73,7 +73,12 @@ ID3D12PipelineState* GPUPipelineStateDX12::GetState(GPUTextureViewDX12* depth, i #endif return state; } - PROFILE_CPU_NAMED("Create Pipeline State"); + PROFILE_CPU(); +#if !BUILD_RELEASE + DebugName name; + GetDebugName(name); + ZoneText(name.Get(), name.Count() - 1); +#endif // Update description to match the pipeline _desc.NumRenderTargets = key.RTsCount; @@ -103,41 +108,13 @@ ID3D12PipelineState* GPUPipelineStateDX12::GetState(GPUTextureViewDX12* depth, i const HRESULT result = _device->GetDevice()->CreateGraphicsPipelineState(&_desc, IID_PPV_ARGS(&state)); LOG_DIRECTX_RESULT(result); if (FAILED(result)) + { +#if !BUILD_RELEASE + LOG(Error, "CreateGraphicsPipelineState failed for {}", String(name.Get(), name.Count() - 1)); +#endif return nullptr; -#if GPU_ENABLE_RESOURCE_NAMING && BUILD_DEBUG - Array> name; - if (DebugDesc.VS) - { - name.Add(*DebugDesc.VS->GetName(), DebugDesc.VS->GetName().Length()); - name.Add('+'); } -#if GPU_ALLOW_TESSELLATION_SHADERS - if (DebugDesc.HS) - { - name.Add(*DebugDesc.HS->GetName(), DebugDesc.HS->GetName().Length()); - name.Add('+'); - } - if (DebugDesc.DS) - { - name.Add(*DebugDesc.DS->GetName(), DebugDesc.DS->GetName().Length()); - name.Add('+'); - } -#endif -#if GPU_ALLOW_GEOMETRY_SHADERS - if (DebugDesc.GS) - { - name.Add(*DebugDesc.GS->GetName(), DebugDesc.GS->GetName().Length()); - name.Add('+'); - } -#endif - if (DebugDesc.PS) - { - name.Add(*DebugDesc.PS->GetName(), DebugDesc.PS->GetName().Length()); - name.Add('+'); - } - if (name.Count() != 0 && name[name.Count() - 1] == '+') - name.RemoveLast(); - name.Add('\0'); +#if GPU_ENABLE_RESOURCE_NAMING && !BUILD_RELEASE SetDebugObjectName(state, name.Get(), name.Count() - 1); #endif diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUShaderDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUShaderDX12.cpp index 48e06aa78..951c4d1c2 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUShaderDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUShaderDX12.cpp @@ -4,6 +4,7 @@ #include "GPUShaderDX12.h" #include "Engine/Serialization/MemoryReadStream.h" +#include "Engine/Profiler/ProfilerCPU.h" #include "GPUShaderProgramDX12.h" #include "Types.h" #include "../RenderToolsDX.h" @@ -70,6 +71,12 @@ ID3D12PipelineState* GPUShaderProgramCSDX12::GetOrCreateState() { if (_state) return _state; + PROFILE_CPU(); +#if !BUILD_RELEASE + DebugName name; + GetDebugName(name); + ZoneText(name.Get(), name.Count() - 1); +#endif // Create description D3D12_COMPUTE_PIPELINE_STATE_DESC psDesc; diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp index 96b139bac..083748aaa 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp @@ -238,14 +238,14 @@ static VKAPI_ATTR VkBool32 VKAPI_PTR DebugUtilsCallback(VkDebugUtilsMessageSever LOG(Info, "[Vulkan] {0} {1}:{2} {3}", type, severity, callbackData->messageIdNumber, message); } -#if BUILD_DEBUG +#if !BUILD_RELEASE if (auto* context = (GPUContextVulkan*)GPUDevice::Instance->GetMainContext()) { if (auto* state = (GPUPipelineStateVulkan*)context->GetState()) { - const StringAnsi vsName = state->DebugDesc.VS ? state->DebugDesc.VS->GetName() : StringAnsi::Empty; - const StringAnsi psName = state->DebugDesc.PS ? state->DebugDesc.PS->GetName() : StringAnsi::Empty; - LOG(Warning, "[Vulkan] Error during rendering with VS={}, PS={}", String(vsName), String(psName)); + GPUPipelineState::DebugName name; + state->GetDebugName(name); + LOG(Warning, "[Vulkan] Error during rendering with {}", String(name.Get(), name.Count() - 1)); } } #endif diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUPipelineStateVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUPipelineStateVulkan.cpp index ce60f1d39..b8357e29f 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUPipelineStateVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUPipelineStateVulkan.cpp @@ -90,6 +90,8 @@ ComputePipelineStateVulkan* GPUShaderProgramCSVulkan::GetOrCreateState() { if (_pipelineState) return _pipelineState; + PROFILE_CPU(); + ZoneText(*_name, _name.Length()); // Create pipeline layout DescriptorSetLayoutInfoVulkan descriptorSetLayoutInfo; @@ -110,7 +112,7 @@ ComputePipelineStateVulkan* GPUShaderProgramCSVulkan::GetOrCreateState() // Create pipeline object VkPipeline pipeline; - const VkResult result = vkCreateComputePipelines(_device->Device, _device->PipelineCache, 1, &desc, nullptr, &pipeline); + VkResult result = vkCreateComputePipelines(_device->Device, _device->PipelineCache, 1, &desc, nullptr, &pipeline); LOG_VULKAN_RESULT(result); if (result != VK_SUCCESS) return nullptr; @@ -220,7 +222,12 @@ VkPipeline GPUPipelineStateVulkan::GetState(RenderPassVulkan* renderPass, GPUVer #endif return pipeline; } - PROFILE_CPU_NAMED("Create Pipeline"); + PROFILE_CPU(); +#if !BUILD_RELEASE + DebugName name; + GetDebugName(name); + ZoneText(name.Get(), name.Count() - 1); +#endif // Bind vertex input VkPipelineVertexInputStateCreateInfo vertexInputCreateInfo; @@ -310,10 +317,8 @@ VkPipeline GPUPipelineStateVulkan::GetState(RenderPassVulkan* renderPass, GPUVer LOG_VULKAN_RESULT(result); if (result != VK_SUCCESS) { -#if BUILD_DEBUG - const StringAnsi vsName = DebugDesc.VS ? DebugDesc.VS->GetName() : StringAnsi::Empty; - const StringAnsi psName = DebugDesc.PS ? DebugDesc.PS->GetName() : StringAnsi::Empty; - LOG(Error, "vkCreateGraphicsPipelines failed for VS={0}, PS={1}", String(vsName), String(psName)); +#if !BUILD_RELEASE + LOG(Error, "vkCreateGraphicsPipelines failed for {}", String(name.Get(), name.Count() - 1)); #endif return VK_NULL_HANDLE; } From 99323c1d2f32aaf12ce2de3f1eaebf1d40e332c3 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 29 Jul 2025 10:41:41 +0200 Subject: [PATCH 118/211] Add customizable per-platform affinity for content and pool threads --- Source/Engine/Content/Config.h | 7 ++++++- Source/Engine/Content/Content.cpp | 7 +++++-- Source/Engine/Threading/ThreadPool.cpp | 20 +++++++++++++------- Source/Engine/Threading/ThreadPool.h | 3 +++ 4 files changed, 27 insertions(+), 10 deletions(-) diff --git a/Source/Engine/Content/Config.h b/Source/Engine/Content/Config.h index 77d77ec35..120017f53 100644 --- a/Source/Engine/Content/Config.h +++ b/Source/Engine/Content/Config.h @@ -4,8 +4,13 @@ #include "Engine/Core/Config.h" -// Amount of content loading threads per single physical CPU core +// Amount of content loading threads per single logical CPU core +#ifndef LOADING_THREAD_PER_LOGICAL_CORE #define LOADING_THREAD_PER_LOGICAL_CORE 0.5f +#endif + +// Enables pinning loading threads to the logical CPU cores with affinity mask +//#define LOADING_THREAD_AFFINITY_MASK(thread) (1 << (thread + 1)) // Enables additional assets metadata verification #define ASSETS_LOADING_EXTRA_VERIFICATION (BUILD_DEBUG || USE_EDITOR) diff --git a/Source/Engine/Content/Content.cpp b/Source/Engine/Content/Content.cpp index 6742d3490..1a397a2e7 100644 --- a/Source/Engine/Content/Content.cpp +++ b/Source/Engine/Content/Content.cpp @@ -129,17 +129,17 @@ bool ContentService::Init() LOG(Info, "Creating {0} content loading threads...", count); MainLoadThread = New(); ThisLoadThread = MainLoadThread; - LoadThreads.EnsureCapacity(count); + LoadThreads.Resize(count); for (int32 i = 0; i < count; i++) { auto thread = New(); + LoadThreads[i] = thread; if (thread->Start(String::Format(TEXT("Load Thread {0}"), i))) { LOG(Fatal, "Cannot spawn content thread {0}/{1}", i, count); Delete(thread); return true; } - LoadThreads.Add(thread); } return false; @@ -339,6 +339,9 @@ int32 LoadingThread::Run() return -1; } #endif +#ifdef LOADING_THREAD_AFFINITY_MASK + Platform::SetThreadAffinityMask(LOADING_THREAD_AFFINITY_MASK(LoadThreads.Find(this))); +#endif ContentLoadTask* task; ThisLoadThread = this; diff --git a/Source/Engine/Threading/ThreadPool.cpp b/Source/Engine/Threading/ThreadPool.cpp index e84aa2cdd..4943469a5 100644 --- a/Source/Engine/Threading/ThreadPool.cpp +++ b/Source/Engine/Threading/ThreadPool.cpp @@ -28,6 +28,9 @@ namespace ThreadPoolImpl ConcurrentTaskQueue Jobs; // Hello Steve! ConditionVariable JobsSignal; CriticalSection JobsMutex; +#ifdef THREAD_POOL_AFFINITY_MASK + volatile int64 ThreadIndex = 0; +#endif } String ThreadPoolTask::ToString() const @@ -63,11 +66,12 @@ bool ThreadPoolService::Init() PROFILE_MEM(EngineThreading); // Spawn threads - const int32 numThreads = Math::Clamp(Platform::GetCPUInfo().ProcessorCoreCount - 1, 2, PLATFORM_THREADS_LIMIT / 2); - LOG(Info, "Spawning {0} Thread Pool workers", numThreads); - for (int32 i = ThreadPoolImpl::Threads.Count(); i < numThreads; i++) + const CPUInfo cpuInfo = Platform::GetCPUInfo(); + const int32 count = Math::Clamp(cpuInfo.ProcessorCoreCount - 1, 2, PLATFORM_THREADS_LIMIT / 2); + LOG(Info, "Spawning {0} Thread Pool workers", count); + ThreadPoolImpl::Threads.Resize(count); + for (int32 i = 0; i < count; i++) { - // Create tread auto runnable = New(true); runnable->OnWork.Bind(ThreadPool::ThreadProc); auto thread = Thread::Create(runnable, String::Format(TEXT("Thread Pool {0}"), i)); @@ -76,9 +80,7 @@ bool ThreadPoolService::Init() LOG(Error, "Failed to spawn {0} thread in the Thread Pool", i + 1); return true; } - - // Add to the list - ThreadPoolImpl::Threads.Add(thread); + ThreadPoolImpl::Threads[i] = thread; } return false; @@ -110,6 +112,10 @@ void ThreadPoolService::Dispose() int32 ThreadPool::ThreadProc() { +#ifdef THREAD_POOL_AFFINITY_MASK + const int64 index = Platform::InterlockedIncrement(&ThreadPoolImpl::ThreadIndex) - 1; + Platform::SetThreadAffinityMask(THREAD_POOL_AFFINITY_MASK((int32)index)); +#endif ThreadPoolTask* task; // Work until end diff --git a/Source/Engine/Threading/ThreadPool.h b/Source/Engine/Threading/ThreadPool.h index 8e46772e0..b9bb37a7a 100644 --- a/Source/Engine/Threading/ThreadPool.h +++ b/Source/Engine/Threading/ThreadPool.h @@ -4,6 +4,9 @@ #include "Engine/Core/Types/BaseTypes.h" +// Enables pinning thread pool to the logical CPU cores with affinity mask +//#define THREAD_POOL_AFFINITY_MASK(thread) (1 << (thread + 1)) + /// /// Main engine thread pool for threaded tasks system. /// From b8218e9ab412c8d488e4b33cd6de25918ef9a3d3 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 29 Jul 2025 17:34:42 +0200 Subject: [PATCH 119/211] Fix shader asset path in debug name in cooked build --- Source/Engine/Content/Assets/Shader.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Source/Engine/Content/Assets/Shader.cpp b/Source/Engine/Content/Assets/Shader.cpp index 618cc3f67..9228ef635 100644 --- a/Source/Engine/Content/Assets/Shader.cpp +++ b/Source/Engine/Content/Assets/Shader.cpp @@ -4,6 +4,10 @@ #include "Engine/Core/Log.h" #include "Engine/Graphics/GPUDevice.h" #include "Engine/Graphics/Shaders/GPUShader.h" +#if GPU_ENABLE_RESOURCE_NAMING && !USE_EDITOR +#include "Engine/Content/Content.h" +#include "Engine/Content/Cache/AssetsCache.h" +#endif #include "Engine/Content/Upgraders/ShaderAssetUpgrader.h" #include "Engine/Content/Factories/BinaryAssetFactory.h" #include "Engine/Serialization/MemoryReadStream.h" @@ -14,7 +18,11 @@ Shader::Shader(const SpawnParams& params, const AssetInfo* info) : ShaderAssetTypeBase(params, info) { ASSERT(GPUDevice::Instance); - _shader = GPUDevice::Instance->CreateShader(info->Path); + StringView name = info->Path; +#if GPU_ENABLE_RESOURCE_NAMING && !USE_EDITOR + name = Content::GetRegistry()->GetEditorAssetPath(info->ID); +#endif + _shader = GPUDevice::Instance->CreateShader(name); ASSERT(_shader); GPU = _shader; } From 6d05bf16b15c5021a3b1177561b90f846c58849d Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 29 Jul 2025 21:41:59 +0200 Subject: [PATCH 120/211] Optimize `Asset::GetPath` in cooked build --- Source/Engine/Content/Asset.h | 2 +- Source/Engine/Content/Assets/Model.cpp | 2 +- Source/Engine/Content/BinaryAsset.cpp | 4 +- Source/Engine/Content/BinaryAsset.h | 2 +- Source/Engine/Content/Cache/AssetsCache.cpp | 73 ++++++++++++------- Source/Engine/Content/Cache/AssetsCache.h | 22 +++--- Source/Engine/Content/Content.cpp | 4 +- Source/Engine/Content/Content.h | 2 +- Source/Engine/Content/JsonAsset.cpp | 2 +- Source/Engine/Content/JsonAsset.h | 2 +- .../Content/Loading/Tasks/LoadAssetDataTask.h | 2 +- Source/Engine/Level/Level.cpp | 16 ++-- 12 files changed, 79 insertions(+), 54 deletions(-) diff --git a/Source/Engine/Content/Asset.h b/Source/Engine/Content/Asset.h index c838eddf8..32f2a5e31 100644 --- a/Source/Engine/Content/Asset.h +++ b/Source/Engine/Content/Asset.h @@ -127,7 +127,7 @@ public: /// /// Gets the path to the asset storage file. In Editor, it reflects the actual file, in cooked Game, it fakes the Editor path to be informative for developers. /// - API_PROPERTY() virtual const String& GetPath() const = 0; + API_PROPERTY() virtual StringView GetPath() const = 0; /// /// Gets the asset type name. diff --git a/Source/Engine/Content/Assets/Model.cpp b/Source/Engine/Content/Assets/Model.cpp index df95e58b7..47335ae1f 100644 --- a/Source/Engine/Content/Assets/Model.cpp +++ b/Source/Engine/Content/Assets/Model.cpp @@ -619,7 +619,7 @@ Asset::LoadResult Model::load() { String name; #if !BUILD_RELEASE - name = GetPath() + TEXT(".SDF"); + name = String(GetPath()) + TEXT(".SDF"); #endif SDF.Texture = GPUDevice::Instance->CreateTexture(name); } diff --git a/Source/Engine/Content/BinaryAsset.cpp b/Source/Engine/Content/BinaryAsset.cpp index 9e7e51113..cb7b951f6 100644 --- a/Source/Engine/Content/BinaryAsset.cpp +++ b/Source/Engine/Content/BinaryAsset.cpp @@ -464,10 +464,10 @@ void BinaryAsset::OnDeleteObject() #endif -const String& BinaryAsset::GetPath() const +StringView BinaryAsset::GetPath() const { #if USE_EDITOR - return Storage ? Storage->GetPath() : String::Empty; + return Storage ? Storage->GetPath() : StringView::Empty; #else // In build all assets are packed into packages so use ID for original path lookup return Content::GetRegistry()->GetEditorAssetPath(_id); diff --git a/Source/Engine/Content/BinaryAsset.h b/Source/Engine/Content/BinaryAsset.h index cd9f078ca..39c34588e 100644 --- a/Source/Engine/Content/BinaryAsset.h +++ b/Source/Engine/Content/BinaryAsset.h @@ -292,7 +292,7 @@ public: #if USE_EDITOR void OnDeleteObject() override; #endif - const String& GetPath() const final override; + StringView GetPath() const final override; uint64 GetMemoryUsage() const override; protected: diff --git a/Source/Engine/Content/Cache/AssetsCache.cpp b/Source/Engine/Content/Cache/AssetsCache.cpp index bd1474afa..2a2d252b6 100644 --- a/Source/Engine/Content/Cache/AssetsCache.cpp +++ b/Source/Engine/Content/Cache/AssetsCache.cpp @@ -10,12 +10,24 @@ #include "Engine/Serialization/FileWriteStream.h" #include "Engine/Serialization/FileReadStream.h" #include "Engine/Content/Content.h" -#include "Engine/Content/Storage/ContentStorageManager.h" -#include "Engine/Content/Storage/JsonStorageProxy.h" #include "Engine/Profiler/ProfilerCPU.h" -#include "Engine/Threading/Threading.h" #include "Engine/Engine/Globals.h" #include "FlaxEngine.Gen.h" +#if ASSETS_CACHE_EDITABLE +#include "Engine/Content/Storage/ContentStorageManager.h" +#include "Engine/Content/Storage/JsonStorageProxy.h" +#include "Engine/Threading/Threading.h" +#define ASSETS_CACHE_LOCK() ScopeLock lock(_locker) +#else +#define ASSETS_CACHE_LOCK() +#endif + +int32 AssetsCache::Size() const +{ + ASSETS_CACHE_LOCK(); + const int32 result = _registry.Count(); + return result; +} void AssetsCache::Init() { @@ -72,7 +84,7 @@ void AssetsCache::Init() return; } - ScopeLock lock(_locker); + ASSETS_CACHE_LOCK(); _isDirty = false; // Load elements count @@ -127,6 +139,16 @@ void AssetsCache::Init() _pathsMapping.Add(mappedPath, id); } +#if !USE_EDITOR && !BUILD_RELEASE + // Build inverse path mapping in development builds for faster GetEditorAssetPath (eg. used by PROFILE_CPU_ASSET) + _pathsMappingInv.Clear(); + _pathsMappingInv.EnsureCapacity(count); + for (auto& mapping : _pathsMapping) + { + _pathsMappingInv.Add(mapping.Value, StringView(mapping.Key)); + } +#endif + // Check errors const bool hasError = stream->HasError(); deleteStream.Delete(); @@ -154,7 +176,7 @@ bool AssetsCache::Save() if (!_isDirty && FileSystem::FileExists(_path)) return false; - ScopeLock lock(_locker); + ASSETS_CACHE_LOCK(); if (Save(_path, _registry, _pathsMapping)) return true; @@ -223,12 +245,16 @@ bool AssetsCache::Save(const StringView& path, const Registry& entries, const Pa return false; } -const String& AssetsCache::GetEditorAssetPath(const Guid& id) const +StringView AssetsCache::GetEditorAssetPath(const Guid& id) const { - ScopeLock lock(_locker); + ASSETS_CACHE_LOCK(); #if USE_EDITOR auto e = _registry.TryGet(id); return e ? e->Info.Path : String::Empty; +#elif !BUILD_RELEASE + StringView result; + _pathsMappingInv.TryGet(id, result); + return result; #else for (auto& e : _pathsMapping) { @@ -242,10 +268,8 @@ const String& AssetsCache::GetEditorAssetPath(const Guid& id) const bool AssetsCache::FindAsset(const StringView& path, AssetInfo& info) { PROFILE_CPU(); - bool result = false; - - ScopeLock lock(_locker); + ASSETS_CACHE_LOCK(); // Check if asset has direct mapping to id (used for some cooked assets) Guid id; @@ -294,7 +318,7 @@ bool AssetsCache::FindAsset(const Guid& id, AssetInfo& info) { PROFILE_CPU(); bool result = false; - ScopeLock lock(_locker); + ASSETS_CACHE_LOCK(); auto e = _registry.TryGet(id); if (e != nullptr) { @@ -316,14 +340,14 @@ bool AssetsCache::FindAsset(const Guid& id, AssetInfo& info) void AssetsCache::GetAll(Array& result) const { PROFILE_CPU(); - ScopeLock lock(_locker); + ASSETS_CACHE_LOCK(); _registry.GetKeys(result); } void AssetsCache::GetAllByTypeName(const StringView& typeName, Array& result) const { PROFILE_CPU(); - ScopeLock lock(_locker); + ASSETS_CACHE_LOCK(); for (auto i = _registry.Begin(); i.IsNotEnd(); ++i) { if (i->Value.Info.TypeName == typeName) @@ -331,6 +355,8 @@ void AssetsCache::GetAllByTypeName(const StringView& typeName, Array& resu } } +#if ASSETS_CACHE_EDITABLE + void AssetsCache::RegisterAssets(FlaxStorage* storage) { PROFILE_CPU(); @@ -342,7 +368,7 @@ void AssetsCache::RegisterAssets(FlaxStorage* storage) storage->GetEntries(entries); ASSERT(entries.HasItems()); - ScopeLock lock(_locker); + ASSETS_CACHE_LOCK(); auto storagePath = storage->GetPath(); // Remove all old entries from that location @@ -440,7 +466,7 @@ void AssetsCache::RegisterAssets(const FlaxStorageReference& storage) void AssetsCache::RegisterAsset(const Guid& id, const String& typeName, const StringView& path) { PROFILE_CPU(); - ScopeLock lock(_locker); + ASSETS_CACHE_LOCK(); // Check if asset has been already added to the registry bool isMissing = true; @@ -492,8 +518,7 @@ void AssetsCache::RegisterAsset(const Guid& id, const String& typeName, const St bool AssetsCache::DeleteAsset(const StringView& path, AssetInfo* info) { bool result = false; - _locker.Lock(); - + ASSETS_CACHE_LOCK(); for (auto i = _registry.Begin(); i.IsNotEnd(); ++i) { if (i->Value.Info.Path == path) @@ -506,16 +531,13 @@ bool AssetsCache::DeleteAsset(const StringView& path, AssetInfo* info) break; } } - - _locker.Unlock(); return result; } bool AssetsCache::DeleteAsset(const Guid& id, AssetInfo* info) { bool result = false; - _locker.Lock(); - + ASSETS_CACHE_LOCK(); const auto e = _registry.TryGet(id); if (e != nullptr) { @@ -525,16 +547,13 @@ bool AssetsCache::DeleteAsset(const Guid& id, AssetInfo* info) _isDirty = true; result = true; } - - _locker.Unlock(); return result; } bool AssetsCache::RenameAsset(const StringView& oldPath, const StringView& newPath) { bool result = false; - _locker.Lock(); - + ASSETS_CACHE_LOCK(); for (auto i = _registry.Begin(); i.IsNotEnd(); ++i) { if (i->Value.Info.Path == oldPath) @@ -545,11 +564,11 @@ bool AssetsCache::RenameAsset(const StringView& oldPath, const StringView& newPa break; } } - - _locker.Unlock(); return result; } +#endif + bool AssetsCache::IsEntryValid(Entry& e) { #if ENABLE_ASSETS_DISCOVERY diff --git a/Source/Engine/Content/Cache/AssetsCache.h b/Source/Engine/Content/Cache/AssetsCache.h index 217d56ebd..42f05a2aa 100644 --- a/Source/Engine/Content/Cache/AssetsCache.h +++ b/Source/Engine/Content/Cache/AssetsCache.h @@ -16,6 +16,9 @@ struct AssetHeader; struct FlaxStorageReference; class FlaxStorage; +// In cooked game all assets are there and all access to registry is read-only so can be multithreaded +#define ASSETS_CACHE_EDITABLE (USE_EDITOR) + /// /// Assets cache flags. /// @@ -75,22 +78,21 @@ public: private: bool _isDirty = false; +#if ASSETS_CACHE_EDITABLE CriticalSection _locker; +#endif Registry _registry; PathsMapping _pathsMapping; +#if !USE_EDITOR && !BUILD_RELEASE + Dictionary _pathsMappingInv; +#endif String _path; public: /// /// Gets amount of registered assets. /// - int32 Size() const - { - _locker.Lock(); - const int32 result = _registry.Count(); - _locker.Unlock(); - return result; - } + int32 Size() const; public: /// @@ -116,11 +118,11 @@ public: public: /// - /// Finds the asset path by id. In editor it returns the actual asset path, at runtime it returns the mapped asset path. + /// Finds the asset path by id. In editor, it returns the actual asset path, at runtime it returns the mapped asset path. /// /// The asset id. /// The asset path, or empty if failed to find. - const String& GetEditorAssetPath(const Guid& id) const; + StringView GetEditorAssetPath(const Guid& id) const; /// /// Finds the asset info by path. @@ -173,6 +175,7 @@ public: /// The result array. void GetAllByTypeName(const StringView& typeName, Array& result) const; +#if ASSETS_CACHE_EDITABLE /// /// Register assets in the cache /// @@ -223,6 +226,7 @@ public: /// New path /// True if has been deleted, otherwise false bool RenameAsset(const StringView& oldPath, const StringView& newPath); +#endif /// /// Determines whether cached asset entry is valid. diff --git a/Source/Engine/Content/Content.cpp b/Source/Engine/Content/Content.cpp index 1a397a2e7..b82a329a8 100644 --- a/Source/Engine/Content/Content.cpp +++ b/Source/Engine/Content/Content.cpp @@ -521,7 +521,7 @@ bool Content::GetAssetInfo(const StringView& path, AssetInfo& info) #endif } -String Content::GetEditorAssetPath(const Guid& id) +StringView Content::GetEditorAssetPath(const Guid& id) { return Cache.GetEditorAssetPath(id); } @@ -749,6 +749,7 @@ void Content::DeleteAsset(const StringView& path) return; } +#if USE_EDITOR ScopeLock locker(AssetsLocker); // Remove from registry @@ -765,6 +766,7 @@ void Content::DeleteAsset(const StringView& path) // Delete file deleteFileSafety(path, info.ID); +#endif } void Content::deleteFileSafety(const StringView& path, const Guid& id) diff --git a/Source/Engine/Content/Content.h b/Source/Engine/Content/Content.h index 15ace944a..f6dcf59f0 100644 --- a/Source/Engine/Content/Content.h +++ b/Source/Engine/Content/Content.h @@ -75,7 +75,7 @@ public: /// /// The asset id. /// The asset path, or empty if failed to find. - API_FUNCTION() static String GetEditorAssetPath(const Guid& id); + API_FUNCTION() static StringView GetEditorAssetPath(const Guid& id); /// /// Finds all the asset IDs. Uses asset registry. diff --git a/Source/Engine/Content/JsonAsset.cpp b/Source/Engine/Content/JsonAsset.cpp index 1aa434c41..4b5ee7b63 100644 --- a/Source/Engine/Content/JsonAsset.cpp +++ b/Source/Engine/Content/JsonAsset.cpp @@ -91,7 +91,7 @@ void JsonAssetBase::OnGetData(rapidjson_flax::StringBuffer& buffer) const Data->Accept(writerObj.GetWriter()); } -const String& JsonAssetBase::GetPath() const +StringView JsonAssetBase::GetPath() const { #if USE_EDITOR return _path; diff --git a/Source/Engine/Content/JsonAsset.h b/Source/Engine/Content/JsonAsset.h index 11c13ce80..e27c8dafb 100644 --- a/Source/Engine/Content/JsonAsset.h +++ b/Source/Engine/Content/JsonAsset.h @@ -88,7 +88,7 @@ protected: public: // [Asset] - const String& GetPath() const override; + StringView GetPath() const override; uint64 GetMemoryUsage() const override; #if USE_EDITOR void GetReferences(Array& assets, Array& files) const override; diff --git a/Source/Engine/Content/Loading/Tasks/LoadAssetDataTask.h b/Source/Engine/Content/Loading/Tasks/LoadAssetDataTask.h index 5c9ab5604..45f8f53f7 100644 --- a/Source/Engine/Content/Loading/Tasks/LoadAssetDataTask.h +++ b/Source/Engine/Content/Loading/Tasks/LoadAssetDataTask.h @@ -36,7 +36,7 @@ public: // [ContentLoadTask] String ToString() const override { - return String::Format(TEXT("Load Asset Data Task ({}, {}, {})"), (int32)GetState(), _chunks, _asset ? _asset->GetPath() : String::Empty); + return String::Format(TEXT("Load Asset Data Task ({}, {}, {})"), (int32)GetState(), _chunks, _asset ? _asset->GetPath() : StringView::Empty); } bool HasReference(Object* obj) const override { diff --git a/Source/Engine/Level/Level.cpp b/Source/Engine/Level/Level.cpp index 49bef81c3..ddb871846 100644 --- a/Source/Engine/Level/Level.cpp +++ b/Source/Engine/Level/Level.cpp @@ -134,7 +134,7 @@ public: struct Args { rapidjson_flax::Value& Data; - const String* AssetPath; + StringView AssetPath; int32 EngineBuild; float TimeBudget; }; @@ -222,7 +222,7 @@ namespace LevelImpl SceneResult loadScene(SceneLoader& loader, JsonAsset* sceneAsset, float* timeBudget = nullptr); SceneResult loadScene(SceneLoader& loader, const BytesContainer& sceneData, Scene** outScene = nullptr, float* timeBudget = nullptr); SceneResult loadScene(SceneLoader& loader, rapidjson_flax::Document& document, Scene** outScene = nullptr, float* timeBudget = nullptr); - SceneResult loadScene(SceneLoader& loader, rapidjson_flax::Value& data, int32 engineBuild, Scene** outScene = nullptr, const String* assetPath = nullptr, float* timeBudget = nullptr); + SceneResult loadScene(SceneLoader& loader, rapidjson_flax::Value& data, int32 engineBuild, Scene** outScene = nullptr, StringView assetPath = StringView(), float* timeBudget = nullptr); bool unloadScene(Scene* scene); bool unloadScenes(); bool saveScene(Scene* scene); @@ -959,7 +959,7 @@ SceneResult LevelImpl::loadScene(SceneLoader& loader, JsonAsset* sceneAsset, flo return SceneResult::Failed; } - return loadScene(loader, *sceneAsset->Data, sceneAsset->DataEngineBuild, nullptr, &sceneAsset->GetPath(), timeBudget); + return loadScene(loader, *sceneAsset->Data, sceneAsset->DataEngineBuild, nullptr, sceneAsset->GetPath(), timeBudget); } SceneResult LevelImpl::loadScene(SceneLoader& loader, const BytesContainer& sceneData, Scene** outScene, float* timeBudget) @@ -999,7 +999,7 @@ SceneResult LevelImpl::loadScene(SceneLoader& loader, rapidjson_flax::Document& return loadScene(loader, data->value, saveEngineBuild, outScene, nullptr, timeBudget); } -SceneResult LevelImpl::loadScene(SceneLoader& loader, rapidjson_flax::Value& data, int32 engineBuild, Scene** outScene, const String* assetPath, float* timeBudget) +SceneResult LevelImpl::loadScene(SceneLoader& loader, rapidjson_flax::Value& data, int32 engineBuild, Scene** outScene, StringView assetPath, float* timeBudget) { PROFILE_CPU_NAMED("Level.LoadScene"); PROFILE_MEM(Level); @@ -1401,12 +1401,12 @@ SceneResult SceneLoader::OnEnd(Args& args) LOG(Error, "Failed to resave asset '{}'", prefab->GetPath()); } } - if (ContentDeprecated::Clear() && args.AssetPath) + if (ContentDeprecated::Clear() && args.AssetPath != StringView()) { - LOG(Info, "Resaving asset '{}' that uses deprecated data format", *args.AssetPath); - if (saveScene(Scene, *args.AssetPath)) + LOG(Info, "Resaving asset '{}' that uses deprecated data format", args.AssetPath); + if (saveScene(Scene, args.AssetPath)) { - LOG(Error, "Failed to resave asset '{}'", *args.AssetPath); + LOG(Error, "Failed to resave asset '{}'", args.AssetPath); } } #endif From 8f63a99a2c21f0aee50e5f3113faf7fe2c4fe84e Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 29 Jul 2025 21:42:37 +0200 Subject: [PATCH 121/211] Fix game splash screen to wait fr texture to be streamed in, not just allocated --- Source/Engine/Engine/Base/GameBase.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Engine/Engine/Base/GameBase.cpp b/Source/Engine/Engine/Base/GameBase.cpp index b209fd6f0..c49738e27 100644 --- a/Source/Engine/Engine/Base/GameBase.cpp +++ b/Source/Engine/Engine/Base/GameBase.cpp @@ -211,7 +211,7 @@ void GameBaseImpl::OnPostRender(GPUContext* context, RenderContext& renderContex } // Wait for texture loaded before showing splash screen - if (!SplashScreen->IsLoaded() || SplashScreen.Get()->GetTexture()->MipLevels() != SplashScreen.Get()->StreamingTexture()->TotalMipLevels()) + if (!SplashScreen->IsLoaded() || SplashScreen.Get()->GetResidentMipLevels() != SplashScreen.Get()->GetMipLevels()) { return; } From c68b75a298d3f9cc243018b8ebcbf551bb411f5a Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 29 Jul 2025 21:43:09 +0200 Subject: [PATCH 122/211] Fix `GPUBufferDX11::Map` to wait on data to avoid missing data when reading staging buffers --- Source/Engine/GraphicsDevice/DirectX/DX11/GPUBufferDX11.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUBufferDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUBufferDX11.cpp index 4d607b7b3..1ea5f1b75 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUBufferDX11.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUBufferDX11.cpp @@ -33,8 +33,8 @@ void* GPUBufferDX11::Map(GPUResourceMapMode mode) { case GPUResourceMapMode::Read: mapType = D3D11_MAP_READ; - if (_desc.Usage == GPUResourceUsage::StagingReadback && isMainThread) - mapFlags = D3D11_MAP_FLAG_DO_NOT_WAIT; + //if (_desc.Usage == GPUResourceUsage::StagingReadback && isMainThread) + // mapFlags = D3D11_MAP_FLAG_DO_NOT_WAIT; break; case GPUResourceMapMode::Write: mapType = D3D11_MAP_WRITE_DISCARD; From 8fcbef863e044498bc19a16e93beb9770df638d8 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 30 Jul 2025 08:42:26 +0200 Subject: [PATCH 123/211] Add `GPUResourceMapMode.NoWait` flag to control buffer data reading c68b75a298d3f9cc243018b8ebcbf551bb411f5a --- Source/Engine/Graphics/Enums.h | 7 +++++++ .../Engine/GraphicsDevice/DirectX/DX11/GPUBufferDX11.cpp | 6 +++--- .../Renderer/GI/DynamicDiffuseGlobalIllumination.cpp | 2 +- Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp | 2 +- 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/Source/Engine/Graphics/Enums.h b/Source/Engine/Graphics/Enums.h index 6343b827b..f6af6c16b 100644 --- a/Source/Engine/Graphics/Enums.h +++ b/Source/Engine/Graphics/Enums.h @@ -340,8 +340,15 @@ API_ENUM(Attributes="Flags") enum class GPUResourceMapMode /// The resource is mapped for reading and writing. /// ReadWrite = Read | Write, + + /// + /// Flag that indicates mapping should fail with no data if the resource is still used by the GPU. Otherwise, CPU will wait for the GPU execution. + /// + NoWait = 0x04, }; +DECLARE_ENUM_OPERATORS(GPUResourceMapMode); + /// /// Primitives types. /// diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUBufferDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUBufferDX11.cpp index 1ea5f1b75..f7575d0a5 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUBufferDX11.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUBufferDX11.cpp @@ -29,12 +29,12 @@ void* GPUBufferDX11::Map(GPUResourceMapMode mode) map.pData = nullptr; D3D11_MAP mapType; UINT mapFlags = 0; - switch (mode) + switch (mode & GPUResourceMapMode::ReadWrite) { case GPUResourceMapMode::Read: mapType = D3D11_MAP_READ; - //if (_desc.Usage == GPUResourceUsage::StagingReadback && isMainThread) - // mapFlags = D3D11_MAP_FLAG_DO_NOT_WAIT; + if (EnumHasAnyFlags(mode, GPUResourceMapMode::NoWait)) + mapFlags = D3D11_MAP_FLAG_DO_NOT_WAIT; break; case GPUResourceMapMode::Write: mapType = D3D11_MAP_WRITE_DISCARD; diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp index 3dc730aa0..782f15260 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp @@ -649,7 +649,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont // Update stats { StatsData stats; - if (void* mapped = ddgiData.StatsRead->Map(GPUResourceMapMode::Read)) + if (void* mapped = ddgiData.StatsRead->Map(GPUResourceMapMode::Read | GPUResourceMapMode::NoWait)) { Platform::MemoryCopy(&stats, mapped, sizeof(stats)); ddgiData.StatsRead->Unmap(); diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp index 1fbf94945..dad9a23e6 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp @@ -976,7 +976,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co { // Get the last counter value (accept staging readback delay or not available data yet) notReady = true; - auto data = (uint32*)_culledObjectsSizeBuffer->Map(GPUResourceMapMode::Read); + auto data = (uint32*)_culledObjectsSizeBuffer->Map(GPUResourceMapMode::Read | GPUResourceMapMode::NoWait); if (data) { uint32 counter = data[surfaceAtlasData.CulledObjectsCounterIndex]; From 5e4d564338f45bc705074d8bd5764df34b6f1c30 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 30 Jul 2025 19:08:45 +0200 Subject: [PATCH 124/211] Add **GPU profiling support to Tracy integration** --- Source/Engine/Engine/Engine.cpp | 7 +- Source/Engine/Graphics/GPUContext.cpp | 4 + Source/Engine/Graphics/GPUContext.h | 5 + Source/Engine/Graphics/GPUDevice.cpp | 1 + Source/Engine/Graphics/Graphics.Build.cs | 6 + .../DirectX/DX11/GPUContextDX11.cpp | 26 + .../DirectX/DX11/GPUContextDX11.h | 6 + .../DirectX/DX12/GPUContextDX12.cpp | 27 + .../DirectX/DX12/GPUContextDX12.h | 8 + .../GraphicsDevice/Vulkan/CmdBufferVulkan.cpp | 59 +- .../GraphicsDevice/Vulkan/CmdBufferVulkan.h | 5 +- .../Vulkan/GPUContextVulkan.cpp | 53 +- .../GraphicsDevice/Vulkan/GPUContextVulkan.h | 3 + .../Vulkan/GPUDeviceVulkan.Layers.cpp | 4 + .../Vulkan/GPUSwapChainVulkan.cpp | 11 +- .../Engine/Platform/Base/StringUtilsBase.cpp | 12 + Source/Engine/Platform/StringUtils.h | 3 + .../ThirdParty/tracy/client/TracyProfiler.cpp | 1 + Source/ThirdParty/tracy/common/TracyColor.hpp | 690 ++++++++++++++++ .../ThirdParty/tracy/common/TracySystem.cpp | 17 + Source/ThirdParty/tracy/tracy.Build.cs | 22 + Source/ThirdParty/tracy/tracy/Tracy.hpp | 5 +- Source/ThirdParty/tracy/tracy/TracyD3D11.hpp | 456 ++++++++++ Source/ThirdParty/tracy/tracy/TracyD3D12.hpp | 529 ++++++++++++ Source/ThirdParty/tracy/tracy/TracyVulkan.hpp | 779 ++++++++++++++++++ .../Build/NativeCpp/BuildOptions.cs | 23 +- 26 files changed, 2716 insertions(+), 46 deletions(-) create mode 100644 Source/ThirdParty/tracy/common/TracyColor.hpp create mode 100644 Source/ThirdParty/tracy/tracy/TracyD3D11.hpp create mode 100644 Source/ThirdParty/tracy/tracy/TracyD3D12.hpp create mode 100644 Source/ThirdParty/tracy/tracy/TracyVulkan.hpp diff --git a/Source/Engine/Engine/Engine.cpp b/Source/Engine/Engine/Engine.cpp index 4ea18a85c..5e8224b4e 100644 --- a/Source/Engine/Engine/Engine.cpp +++ b/Source/Engine/Engine/Engine.cpp @@ -247,7 +247,6 @@ int32 Engine::Main(const Char* cmdLine) { OnDraw(); Time::OnEndDraw(); - FrameMark; } } @@ -397,6 +396,11 @@ void Engine::OnLateUpdate() void Engine::OnDraw() { +#if COMPILE_WITH_PROFILER + // Auto-enable GPU events when Tracy got connected + if (!ProfilerGPU::EventsEnabled && TracyIsConnected) + ProfilerGPU::EventsEnabled = true; +#endif PROFILE_CPU_NAMED("Draw"); // Begin frame rendering @@ -411,6 +415,7 @@ void Engine::OnDraw() device->Draw(); // End frame rendering + FrameMark; #if COMPILE_WITH_PROFILER ProfilerGPU::EndFrame(); #endif diff --git a/Source/Engine/Graphics/GPUContext.cpp b/Source/Engine/Graphics/GPUContext.cpp index 9c8a60b4b..fdeca122b 100644 --- a/Source/Engine/Graphics/GPUContext.cpp +++ b/Source/Engine/Graphics/GPUContext.cpp @@ -69,6 +69,10 @@ void GPUContext::FrameEnd() FlushState(); } +void GPUContext::OnPresent() +{ +} + void GPUContext::BindSR(int32 slot, GPUTexture* t) { ASSERT_LOW_LAYER(t == nullptr || t->ResidentMipLevels() == 0 || t->IsShaderResource()); diff --git a/Source/Engine/Graphics/GPUContext.h b/Source/Engine/Graphics/GPUContext.h index aa6003f87..a042e3f83 100644 --- a/Source/Engine/Graphics/GPUContext.h +++ b/Source/Engine/Graphics/GPUContext.h @@ -148,6 +148,11 @@ public: /// virtual void FrameEnd(); + /// + /// Called after performing final swapchain presentation and submitting all GPU commands. + /// + virtual void OnPresent(); + public: #if GPU_ALLOW_PROFILE_EVENTS /// diff --git a/Source/Engine/Graphics/GPUDevice.cpp b/Source/Engine/Graphics/GPUDevice.cpp index d7f64de45..eda66d95e 100644 --- a/Source/Engine/Graphics/GPUDevice.cpp +++ b/Source/Engine/Graphics/GPUDevice.cpp @@ -646,6 +646,7 @@ void GPUDevice::DrawEnd() const double presentEnd = Platform::GetTimeSeconds(); ProfilerGPU::OnPresentTime((float)((presentEnd - presentStart) * 1000.0)); #endif + GetMainContext()->OnPresent(); _wasVSyncUsed = anyVSync; _isRendering = false; diff --git a/Source/Engine/Graphics/Graphics.Build.cs b/Source/Engine/Graphics/Graphics.Build.cs index 212f975f0..e1c77d844 100644 --- a/Source/Engine/Graphics/Graphics.Build.cs +++ b/Source/Engine/Graphics/Graphics.Build.cs @@ -20,6 +20,12 @@ public abstract class GraphicsDeviceBaseModule : EngineModule // Enables GPU diagnostic tools (debug layer etc.) options.PublicDefinitions.Add("GPU_ENABLE_DIAGNOSTICS"); } + + if (Profiler.Use(options) && tracy.GPU && true) + { + // Enables GPU profiling with Tracy + options.PrivateDefinitions.Add("GPU_ENABLE_TRACY"); + } } /// diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp index 22780e3ec..b68f5e595 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp @@ -65,10 +65,17 @@ GPUContextDX11::GPUContextDX11(GPUDeviceDX11* device, ID3D11DeviceContext* conte _maxUASlots = GPU_MAX_UA_BINDED; if (_device->GetRendererType() != RendererType::DirectX11) _maxUASlots = 1; + +#if GPU_ENABLE_TRACY + _tracyContext = tracy::CreateD3D11Context(device->GetDevice(), context); +#endif } GPUContextDX11::~GPUContextDX11() { +#if GPU_ENABLE_TRACY + tracy::DestroyD3D11Context(_tracyContext); +#endif #if GPU_ALLOW_PROFILE_EVENTS SAFE_RELEASE(_userDefinedAnnotations); #endif @@ -139,16 +146,35 @@ void GPUContextDX11::FrameBegin() _context->CSSetSamplers(0, ARRAY_COUNT(samplers), samplers); } +void GPUContextDX11::OnPresent() +{ + GPUContext::OnPresent(); + +#if GPU_ENABLE_TRACY + tracy::CollectD3D11Context(_tracyContext); +#endif +} + #if GPU_ALLOW_PROFILE_EVENTS void GPUContextDX11::EventBegin(const Char* name) { if (_userDefinedAnnotations) _userDefinedAnnotations->BeginEvent(name); + +#if GPU_ENABLE_TRACY + char buffer[60]; + int32 bufferSize = StringUtils::Copy(buffer, name, sizeof(buffer)); + tracy::BeginD3D11ZoneScope(_tracyZone, _tracyContext, buffer, bufferSize); +#endif } void GPUContextDX11::EventEnd() { +#if GPU_ENABLE_TRACY + tracy::EndD3D11ZoneScope(_tracyZone); +#endif + if (_userDefinedAnnotations) _userDefinedAnnotations->EndEvent(); } diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h index 4941af978..48de69b3f 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h @@ -6,6 +6,7 @@ #include "GPUDeviceDX11.h" #include "GPUPipelineStateDX11.h" #include "../IncludeDirectXHeaders.h" +#include #if GRAPHICS_API_DIRECTX11 @@ -23,6 +24,10 @@ private: ID3D11DeviceContext* _context; #if GPU_ALLOW_PROFILE_EVENTS ID3DUserDefinedAnnotation* _userDefinedAnnotations; +#endif +#if COMPILE_WITH_PROFILER + void* _tracyContext; + byte _tracyZone[TracyD3D11ZoneSize]; #endif int32 _maxUASlots; @@ -110,6 +115,7 @@ public: // [GPUContext] void FrameBegin() override; + void OnPresent() override; #if GPU_ALLOW_PROFILE_EVENTS void EventBegin(const Char* name) override; void EventEnd() override; diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp index 9cb285ac1..6d06231ee 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp @@ -99,10 +99,16 @@ GPUContextDX12::GPUContextDX12(GPUDeviceDX12* device, D3D12_COMMAND_LIST_TYPE ty #if GPU_ENABLE_RESOURCE_NAMING _commandList->SetName(TEXT("GPUContextDX12::CommandList")); #endif +#if GPU_ENABLE_TRACY + _tracyContext = tracy::CreateD3D12Context(device->GetDevice(), _device->GetCommandQueue()->GetCommandQueue()); +#endif } GPUContextDX12::~GPUContextDX12() { +#if GPU_ENABLE_TRACY + tracy::DestroyD3D12Context(_tracyContext); +#endif DX_SAFE_RELEASE_CHECK(_commandList, 0); } @@ -706,6 +712,15 @@ void GPUContextDX12::FrameEnd() FrameFenceValues[0] = Execute(false); } +void GPUContextDX12::OnPresent() +{ + GPUContext::OnPresent(); + +#if GPU_ENABLE_TRACY + tracy::CollectD3D12Context(_tracyContext); +#endif +} + #if GPU_ALLOW_PROFILE_EVENTS void GPUContextDX12::EventBegin(const Char* name) @@ -713,10 +728,22 @@ void GPUContextDX12::EventBegin(const Char* name) #if USE_PIX PIXBeginEvent(_commandList, 0, name); #endif + +#if GPU_ENABLE_TRACY + char buffer[60]; + int32 bufferSize = StringUtils::Copy(buffer, name, sizeof(buffer)); + auto& zone = _tracyZones.AddOne(); + tracy::BeginD3D12ZoneScope(zone.Data, _tracyContext, _commandList, buffer, bufferSize); +#endif } void GPUContextDX12::EventEnd() { +#if GPU_ENABLE_TRACY + tracy::EndD3D12ZoneScope(_tracyZones.Last().Data); + _tracyZones.RemoveLast(); +#endif + #if USE_PIX PIXEndEvent(_commandList); #endif diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h index 3fa0f7930..917b68165 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h @@ -6,6 +6,7 @@ #include "IShaderResourceDX12.h" #include "DescriptorHeapDX12.h" #include "../IncludeDirectXHeaders.h" +#include #if GRAPHICS_API_DIRECTX12 @@ -71,6 +72,12 @@ private: GPUConstantBufferDX12* _cbHandles[GPU_MAX_CB_BINDED]; GPUSamplerDX12* _samplers[GPU_MAX_SAMPLER_BINDED - GPU_STATIC_SAMPLERS_COUNT]; +#if COMPILE_WITH_PROFILER + void* _tracyContext; + struct TracyZone { byte Data[TracyD3D12ZoneSize]; }; + Array> _tracyZones; +#endif + public: GPUContextDX12(GPUDeviceDX12* device, D3D12_COMMAND_LIST_TYPE type); @@ -154,6 +161,7 @@ public: // [GPUContext] void FrameBegin() override; void FrameEnd() override; + void OnPresent() override; #if GPU_ALLOW_PROFILE_EVENTS void EventBegin(const Char* name) override; void EventEnd() override; diff --git a/Source/Engine/GraphicsDevice/Vulkan/CmdBufferVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/CmdBufferVulkan.cpp index 029d8ee1d..36eacccf9 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/CmdBufferVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/CmdBufferVulkan.cpp @@ -49,10 +49,19 @@ void CmdBufferVulkan::End() PROFILE_CPU(); ASSERT(IsOutsideRenderPass()); -#if GPU_ALLOW_PROFILE_EVENTS && VK_EXT_debug_utils +#if GPU_ALLOW_PROFILE_EVENTS // End remaining events while (_eventsBegin--) - vkCmdEndDebugUtilsLabelEXT(GetHandle()); + { +#if VK_EXT_debug_utils + if (vkCmdEndDebugUtilsLabelEXT) + vkCmdEndDebugUtilsLabelEXT(GetHandle()); +#endif +#if GPU_ENABLE_TRACY + tracy::EndVkZoneScope(_tracyZones.Last().Data); + _tracyZones.RemoveLast(); +#endif + } #endif VALIDATE_VULKAN_RESULT(vkEndCommandBuffer(GetHandle())); @@ -85,39 +94,43 @@ void CmdBufferVulkan::EndRenderPass() #if GPU_ALLOW_PROFILE_EVENTS -void CmdBufferVulkan::BeginEvent(const Char* name) +void CmdBufferVulkan::BeginEvent(const Char* name, void* tracyContext) { -#if VK_EXT_debug_utils - if (!vkCmdBeginDebugUtilsLabelEXT) - return; - _eventsBegin++; - // Convert to ANSI - char buffer[101]; - int32 i = 0; - while (i < 100 && name[i]) - { - buffer[i] = (char)name[i]; - i++; - } - buffer[i] = 0; + char buffer[60]; + int32 bufferSize = StringUtils::Copy(buffer, name, sizeof(buffer)); - VkDebugUtilsLabelEXT label; - RenderToolsVulkan::ZeroStruct(label, VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT); - label.pLabelName = buffer; - vkCmdBeginDebugUtilsLabelEXT(GetHandle(), &label); +#if GPU_ENABLE_TRACY + auto& zone = _tracyZones.AddOne(); + tracy::BeginVkZoneScope(zone.Data, tracyContext, GetHandle(), buffer, bufferSize); +#endif + +#if VK_EXT_debug_utils + if (vkCmdBeginDebugUtilsLabelEXT) + { + VkDebugUtilsLabelEXT label; + RenderToolsVulkan::ZeroStruct(label, VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT); + label.pLabelName = buffer; + vkCmdBeginDebugUtilsLabelEXT(GetHandle(), &label); + } #endif } void CmdBufferVulkan::EndEvent() { -#if VK_EXT_debug_utils - if (_eventsBegin == 0 || !vkCmdEndDebugUtilsLabelEXT) + if (_eventsBegin == 0) return; _eventsBegin--; - vkCmdEndDebugUtilsLabelEXT(GetHandle()); +#if VK_EXT_debug_utils + if (vkCmdEndDebugUtilsLabelEXT) + vkCmdEndDebugUtilsLabelEXT(GetHandle()); +#endif + +#if GPU_ENABLE_TRACY + tracy::EndVkZoneScope(_tracyZones.Last().Data); + _tracyZones.RemoveLast(); #endif } diff --git a/Source/Engine/GraphicsDevice/Vulkan/CmdBufferVulkan.h b/Source/Engine/GraphicsDevice/Vulkan/CmdBufferVulkan.h index 15cd616ff..7cb3ee104 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/CmdBufferVulkan.h +++ b/Source/Engine/GraphicsDevice/Vulkan/CmdBufferVulkan.h @@ -5,6 +5,7 @@ #include "GPUDeviceVulkan.h" #include "Engine/Core/Types/BaseTypes.h" #include "Engine/Core/Collections/Array.h" +#include #if GRAPHICS_API_VULKAN @@ -42,6 +43,8 @@ private: FenceVulkan* _fence; #if GPU_ALLOW_PROFILE_EVENTS int32 _eventsBegin = 0; + struct TracyZone { byte Data[TracyVulkanZoneSize]; }; + Array> _tracyZones; #endif // The latest value when command buffer was submitted. @@ -129,7 +132,7 @@ public: } #if GPU_ALLOW_PROFILE_EVENTS - void BeginEvent(const Char* name); + void BeginEvent(const Char* name, void* tracyContext); void EndEvent(); #endif diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp index d0ecf9358..99b3712ae 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp @@ -4,6 +4,7 @@ #include "GPUContextVulkan.h" #include "CmdBufferVulkan.h" +#include "GPUAdapterVulkan.h" #include "RenderToolsVulkan.h" #include "Engine/Core/Math/Color.h" #include "Engine/Core/Math/Rectangle.h" @@ -15,6 +16,7 @@ #include "Engine/Profiler/RenderStats.h" #include "GPUShaderProgramVulkan.h" #include "GPUTextureVulkan.h" +#include "QueueVulkan.h" #include "Engine/Graphics/PixelFormatExtensions.h" #include "Engine/Debug/Exceptions/NotImplementedException.h" @@ -107,10 +109,37 @@ GPUContextVulkan::GPUContextVulkan(GPUDeviceVulkan* device, QueueVulkan* queue) _handlesSizes[(int32)SpirvShaderResourceBindingType::SRV] = GPU_MAX_SR_BINDED; _handlesSizes[(int32)SpirvShaderResourceBindingType::UAV] = GPU_MAX_UA_BINDED; #endif + +#if GPU_ENABLE_TRACY +#if VK_EXT_calibrated_timestamps && VK_EXT_host_query_reset + // Use calibrated timestamps extension + if (vkResetQueryPoolEXT && vkGetCalibratedTimestampsEXT) + { + _tracyContext = tracy::CreateVkContext(_device->Adapter->Gpu, _device->Device, vkResetQueryPoolEXT, vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, vkGetCalibratedTimestampsEXT); + } + else +#endif + { + // Use immediate command buffer for Tracy initialization + VkCommandBufferAllocateInfo cmdInfo; + RenderToolsVulkan::ZeroStruct(cmdInfo, VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO); + cmdInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + cmdInfo.commandPool = _cmdBufferManager->GetHandle(); + cmdInfo.commandBufferCount = 1; + VkCommandBuffer tracyCmdBuffer; + vkAllocateCommandBuffers(_device->Device, &cmdInfo, &tracyCmdBuffer); + _tracyContext = tracy::CreateVkContext(_device->Adapter->Gpu, _device->Device, _queue->GetHandle(), tracyCmdBuffer, vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, vkGetCalibratedTimestampsEXT); + vkQueueWaitIdle(_queue->GetHandle()); + vkFreeCommandBuffers(_device->Device, _cmdBufferManager->GetHandle(), 1, &tracyCmdBuffer); + } +#endif } GPUContextVulkan::~GPUContextVulkan() { +#if GPU_ENABLE_TRACY + tracy::DestroyVkContext(_tracyContext); +#endif for (int32 i = 0; i < _descriptorPools.Count(); i++) { _descriptorPools[i].ClearDelete(); @@ -679,15 +708,9 @@ void GPUContextVulkan::OnDrawCall() // Bind descriptors sets to the graphics pipeline if (pipelineState->HasDescriptorsPerStageMask) { - vkCmdBindDescriptorSets( - cmdBuffer->GetHandle(), - VK_PIPELINE_BIND_POINT_GRAPHICS, - pipelineState->GetLayout()->Handle, - 0, - pipelineState->DescriptorSetHandles.Count(), - pipelineState->DescriptorSetHandles.Get(), - pipelineState->DynamicOffsets.Count(), - pipelineState->DynamicOffsets.Get()); + auto& descriptorSets = pipelineState->DescriptorSetHandles; + auto& dynamicOffsets = pipelineState->DynamicOffsets; + vkCmdBindDescriptorSets(cmdBuffer->GetHandle(), VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineState->GetLayout()->Handle, 0, descriptorSets.Count(), descriptorSets.Get(), dynamicOffsets.Count(), dynamicOffsets.Get()); } _rtDirtyFlag = false; @@ -748,6 +771,11 @@ void GPUContextVulkan::FrameEnd() // Execute any queued layout transitions that weren't already handled by the render pass FlushBarriers(); +#if GPU_ENABLE_TRACY + if (cmdBuffer) + tracy::CollectVkContext(_tracyContext, cmdBuffer->GetHandle()); +#endif + // Base GPUContext::FrameEnd(); } @@ -757,7 +785,12 @@ void GPUContextVulkan::FrameEnd() void GPUContextVulkan::EventBegin(const Char* name) { const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer(); - cmdBuffer->BeginEvent(name); +#if COMPILE_WITH_PROFILER + void* tracyContext = _tracyContext; +#else + void* tracyContext = nullptr; +#endif + cmdBuffer->BeginEvent(name, tracyContext); } void GPUContextVulkan::EventEnd() diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h index b5be52461..73aa5a52f 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h @@ -94,6 +94,9 @@ private: #if ENABLE_ASSERTION uint32 _handlesSizes[(int32)SpirvShaderResourceBindingType::MAX]; #endif +#if COMPILE_WITH_PROFILER + void* _tracyContext; +#endif typedef Array DescriptorPoolArray; Dictionary _descriptorPools; diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.Layers.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.Layers.cpp index 14cca0a6d..ad99d8850 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.Layers.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.Layers.cpp @@ -62,6 +62,10 @@ static const char* GDeviceExtensions[] = #endif #if VK_KHR_sampler_mirror_clamp_to_edge VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, +#endif +#if GPU_ENABLE_TRACY && VK_EXT_calibrated_timestamps && VK_EXT_host_query_reset + VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME, + VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, #endif nullptr }; diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUSwapChainVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUSwapChainVulkan.cpp index 21971c5ca..70d422cd9 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUSwapChainVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUSwapChainVulkan.cpp @@ -424,6 +424,7 @@ GPUSwapChainVulkan::Status GPUSwapChainVulkan::Present(QueueVulkan* presentQueue { if (_currentImageIndex == -1) return Status::Ok; + PROFILE_CPU_NAMED("vkQueuePresentKHR"); VkPresentInfoKHR presentInfo; RenderToolsVulkan::ZeroStruct(presentInfo, VK_STRUCTURE_TYPE_PRESENT_INFO_KHR); @@ -506,7 +507,7 @@ int32 GPUSwapChainVulkan::TryPresent(Function int32 GPUSwapChainVulkan::AcquireNextImage(SemaphoreVulkan*& outSemaphore) { - PROFILE_CPU(); + PROFILE_CPU_NAMED("vkAcquireNextImageKHR"); ASSERT(_swapChain && _backBuffers.HasItems()); uint32 imageIndex = _currentImageIndex; @@ -514,13 +515,7 @@ int32 GPUSwapChainVulkan::AcquireNextImage(SemaphoreVulkan*& outSemaphore) _semaphoreIndex = (_semaphoreIndex + 1) % _backBuffers.Count(); const auto semaphore = _backBuffers[_semaphoreIndex].ImageAcquiredSemaphore; - const VkResult result = vkAcquireNextImageKHR( - _device->Device, - _swapChain, - UINT64_MAX, - semaphore->GetHandle(), - VK_NULL_HANDLE, - &imageIndex); + const VkResult result = vkAcquireNextImageKHR(_device->Device, _swapChain, UINT64_MAX, semaphore->GetHandle(), VK_NULL_HANDLE, &imageIndex); if (result == VK_ERROR_OUT_OF_DATE_KHR) { _semaphoreIndex = prevSemaphoreIndex; diff --git a/Source/Engine/Platform/Base/StringUtilsBase.cpp b/Source/Engine/Platform/Base/StringUtilsBase.cpp index 6b9c6f861..adda1b5a7 100644 --- a/Source/Engine/Platform/Base/StringUtilsBase.cpp +++ b/Source/Engine/Platform/Base/StringUtilsBase.cpp @@ -16,6 +16,18 @@ constexpr char DirectorySeparatorChar = '\\'; constexpr char AltDirectorySeparatorChar = '/'; constexpr char VolumeSeparatorChar = ':'; +int32 StringUtils::Copy(char* dst, const Char* src, int32 count) +{ + int32 i = 0; + while (i < count && src[i]) + { + dst[i] = (char)src[i]; + i++; + } + dst[i] = 0; + return i; +} + const Char* StringUtils::FindIgnoreCase(const Char* str, const Char* toFind) { if (toFind == nullptr || str == nullptr) diff --git a/Source/Engine/Platform/StringUtils.h b/Source/Engine/Platform/StringUtils.h index 5d712070b..3e1dc5660 100644 --- a/Source/Engine/Platform/StringUtils.h +++ b/Source/Engine/Platform/StringUtils.h @@ -125,6 +125,9 @@ public: // Copies the string (count is maximum amount of characters to copy). static Char* Copy(Char* dst, const Char* src, int32 count); + // Copies the string (count is maximum amount of characters to copy). Returns amount of copied elements (excluding null terminator character). + static int32 Copy(char* dst, const Char* src, int32 count); + // Finds specific sub-string in the input string. Returns the first found position in the input string or nulll if failed. static const Char* Find(const Char* str, const Char* toFind); diff --git a/Source/ThirdParty/tracy/client/TracyProfiler.cpp b/Source/ThirdParty/tracy/client/TracyProfiler.cpp index 837b36cc3..fe24f9309 100644 --- a/Source/ThirdParty/tracy/client/TracyProfiler.cpp +++ b/Source/ThirdParty/tracy/client/TracyProfiler.cpp @@ -1400,6 +1400,7 @@ TRACY_API LuaZoneState& GetLuaZoneState() { return s_luaZoneState; } # endif #endif +TRACY_API bool IsConnected() { return GetProfiler().IsConnected(); } TRACY_API bool ProfilerAvailable() { return s_instance != nullptr; } TRACY_API bool ProfilerAllocatorAvailable() { return !RpThreadShutdown; } diff --git a/Source/ThirdParty/tracy/common/TracyColor.hpp b/Source/ThirdParty/tracy/common/TracyColor.hpp new file mode 100644 index 000000000..4825c0fba --- /dev/null +++ b/Source/ThirdParty/tracy/common/TracyColor.hpp @@ -0,0 +1,690 @@ +#ifndef __TRACYCOLOR_HPP__ +#define __TRACYCOLOR_HPP__ + +namespace tracy +{ +struct Color +{ +enum ColorType +{ + Snow = 0xfffafa, + GhostWhite = 0xf8f8ff, + WhiteSmoke = 0xf5f5f5, + Gainsboro = 0xdcdcdc, + FloralWhite = 0xfffaf0, + OldLace = 0xfdf5e6, + Linen = 0xfaf0e6, + AntiqueWhite = 0xfaebd7, + PapayaWhip = 0xffefd5, + BlanchedAlmond = 0xffebcd, + Bisque = 0xffe4c4, + PeachPuff = 0xffdab9, + NavajoWhite = 0xffdead, + Moccasin = 0xffe4b5, + Cornsilk = 0xfff8dc, + Ivory = 0xfffff0, + LemonChiffon = 0xfffacd, + Seashell = 0xfff5ee, + Honeydew = 0xf0fff0, + MintCream = 0xf5fffa, + Azure = 0xf0ffff, + AliceBlue = 0xf0f8ff, + Lavender = 0xe6e6fa, + LavenderBlush = 0xfff0f5, + MistyRose = 0xffe4e1, + White = 0xffffff, + Black = 0x000000, + DarkSlateGray = 0x2f4f4f, + DarkSlateGrey = 0x2f4f4f, + DimGray = 0x696969, + DimGrey = 0x696969, + SlateGray = 0x708090, + SlateGrey = 0x708090, + LightSlateGray = 0x778899, + LightSlateGrey = 0x778899, + Gray = 0xbebebe, + Grey = 0xbebebe, + X11Gray = 0xbebebe, + X11Grey = 0xbebebe, + WebGray = 0x808080, + WebGrey = 0x808080, + LightGrey = 0xd3d3d3, + LightGray = 0xd3d3d3, + MidnightBlue = 0x191970, + Navy = 0x000080, + NavyBlue = 0x000080, + CornflowerBlue = 0x6495ed, + DarkSlateBlue = 0x483d8b, + SlateBlue = 0x6a5acd, + MediumSlateBlue = 0x7b68ee, + LightSlateBlue = 0x8470ff, + MediumBlue = 0x0000cd, + RoyalBlue = 0x4169e1, + Blue = 0x0000ff, + DodgerBlue = 0x1e90ff, + DeepSkyBlue = 0x00bfff, + SkyBlue = 0x87ceeb, + LightSkyBlue = 0x87cefa, + SteelBlue = 0x4682b4, + LightSteelBlue = 0xb0c4de, + LightBlue = 0xadd8e6, + PowderBlue = 0xb0e0e6, + PaleTurquoise = 0xafeeee, + DarkTurquoise = 0x00ced1, + MediumTurquoise = 0x48d1cc, + Turquoise = 0x40e0d0, + Cyan = 0x00ffff, + Aqua = 0x00ffff, + LightCyan = 0xe0ffff, + CadetBlue = 0x5f9ea0, + MediumAquamarine = 0x66cdaa, + Aquamarine = 0x7fffd4, + DarkGreen = 0x006400, + DarkOliveGreen = 0x556b2f, + DarkSeaGreen = 0x8fbc8f, + SeaGreen = 0x2e8b57, + MediumSeaGreen = 0x3cb371, + LightSeaGreen = 0x20b2aa, + PaleGreen = 0x98fb98, + SpringGreen = 0x00ff7f, + LawnGreen = 0x7cfc00, + Green = 0x00ff00, + Lime = 0x00ff00, + X11Green = 0x00ff00, + WebGreen = 0x008000, + Chartreuse = 0x7fff00, + MediumSpringGreen = 0x00fa9a, + GreenYellow = 0xadff2f, + LimeGreen = 0x32cd32, + YellowGreen = 0x9acd32, + ForestGreen = 0x228b22, + OliveDrab = 0x6b8e23, + DarkKhaki = 0xbdb76b, + Khaki = 0xf0e68c, + PaleGoldenrod = 0xeee8aa, + LightGoldenrodYellow = 0xfafad2, + LightYellow = 0xffffe0, + Yellow = 0xffff00, + Gold = 0xffd700, + LightGoldenrod = 0xeedd82, + Goldenrod = 0xdaa520, + DarkGoldenrod = 0xb8860b, + RosyBrown = 0xbc8f8f, + IndianRed = 0xcd5c5c, + SaddleBrown = 0x8b4513, + Sienna = 0xa0522d, + Peru = 0xcd853f, + Burlywood = 0xdeb887, + Beige = 0xf5f5dc, + Wheat = 0xf5deb3, + SandyBrown = 0xf4a460, + Tan = 0xd2b48c, + Chocolate = 0xd2691e, + Firebrick = 0xb22222, + Brown = 0xa52a2a, + DarkSalmon = 0xe9967a, + Salmon = 0xfa8072, + LightSalmon = 0xffa07a, + Orange = 0xffa500, + DarkOrange = 0xff8c00, + Coral = 0xff7f50, + LightCoral = 0xf08080, + Tomato = 0xff6347, + OrangeRed = 0xff4500, + Red = 0xff0000, + HotPink = 0xff69b4, + DeepPink = 0xff1493, + Pink = 0xffc0cb, + LightPink = 0xffb6c1, + PaleVioletRed = 0xdb7093, + Maroon = 0xb03060, + X11Maroon = 0xb03060, + WebMaroon = 0x800000, + MediumVioletRed = 0xc71585, + VioletRed = 0xd02090, + Magenta = 0xff00ff, + Fuchsia = 0xff00ff, + Violet = 0xee82ee, + Plum = 0xdda0dd, + Orchid = 0xda70d6, + MediumOrchid = 0xba55d3, + DarkOrchid = 0x9932cc, + DarkViolet = 0x9400d3, + BlueViolet = 0x8a2be2, + Purple = 0xa020f0, + X11Purple = 0xa020f0, + WebPurple = 0x800080, + MediumPurple = 0x9370db, + Thistle = 0xd8bfd8, + Snow1 = 0xfffafa, + Snow2 = 0xeee9e9, + Snow3 = 0xcdc9c9, + Snow4 = 0x8b8989, + Seashell1 = 0xfff5ee, + Seashell2 = 0xeee5de, + Seashell3 = 0xcdc5bf, + Seashell4 = 0x8b8682, + AntiqueWhite1 = 0xffefdb, + AntiqueWhite2 = 0xeedfcc, + AntiqueWhite3 = 0xcdc0b0, + AntiqueWhite4 = 0x8b8378, + Bisque1 = 0xffe4c4, + Bisque2 = 0xeed5b7, + Bisque3 = 0xcdb79e, + Bisque4 = 0x8b7d6b, + PeachPuff1 = 0xffdab9, + PeachPuff2 = 0xeecbad, + PeachPuff3 = 0xcdaf95, + PeachPuff4 = 0x8b7765, + NavajoWhite1 = 0xffdead, + NavajoWhite2 = 0xeecfa1, + NavajoWhite3 = 0xcdb38b, + NavajoWhite4 = 0x8b795e, + LemonChiffon1 = 0xfffacd, + LemonChiffon2 = 0xeee9bf, + LemonChiffon3 = 0xcdc9a5, + LemonChiffon4 = 0x8b8970, + Cornsilk1 = 0xfff8dc, + Cornsilk2 = 0xeee8cd, + Cornsilk3 = 0xcdc8b1, + Cornsilk4 = 0x8b8878, + Ivory1 = 0xfffff0, + Ivory2 = 0xeeeee0, + Ivory3 = 0xcdcdc1, + Ivory4 = 0x8b8b83, + Honeydew1 = 0xf0fff0, + Honeydew2 = 0xe0eee0, + Honeydew3 = 0xc1cdc1, + Honeydew4 = 0x838b83, + LavenderBlush1 = 0xfff0f5, + LavenderBlush2 = 0xeee0e5, + LavenderBlush3 = 0xcdc1c5, + LavenderBlush4 = 0x8b8386, + MistyRose1 = 0xffe4e1, + MistyRose2 = 0xeed5d2, + MistyRose3 = 0xcdb7b5, + MistyRose4 = 0x8b7d7b, + Azure1 = 0xf0ffff, + Azure2 = 0xe0eeee, + Azure3 = 0xc1cdcd, + Azure4 = 0x838b8b, + SlateBlue1 = 0x836fff, + SlateBlue2 = 0x7a67ee, + SlateBlue3 = 0x6959cd, + SlateBlue4 = 0x473c8b, + RoyalBlue1 = 0x4876ff, + RoyalBlue2 = 0x436eee, + RoyalBlue3 = 0x3a5fcd, + RoyalBlue4 = 0x27408b, + Blue1 = 0x0000ff, + Blue2 = 0x0000ee, + Blue3 = 0x0000cd, + Blue4 = 0x00008b, + DodgerBlue1 = 0x1e90ff, + DodgerBlue2 = 0x1c86ee, + DodgerBlue3 = 0x1874cd, + DodgerBlue4 = 0x104e8b, + SteelBlue1 = 0x63b8ff, + SteelBlue2 = 0x5cacee, + SteelBlue3 = 0x4f94cd, + SteelBlue4 = 0x36648b, + DeepSkyBlue1 = 0x00bfff, + DeepSkyBlue2 = 0x00b2ee, + DeepSkyBlue3 = 0x009acd, + DeepSkyBlue4 = 0x00688b, + SkyBlue1 = 0x87ceff, + SkyBlue2 = 0x7ec0ee, + SkyBlue3 = 0x6ca6cd, + SkyBlue4 = 0x4a708b, + LightSkyBlue1 = 0xb0e2ff, + LightSkyBlue2 = 0xa4d3ee, + LightSkyBlue3 = 0x8db6cd, + LightSkyBlue4 = 0x607b8b, + SlateGray1 = 0xc6e2ff, + SlateGray2 = 0xb9d3ee, + SlateGray3 = 0x9fb6cd, + SlateGray4 = 0x6c7b8b, + LightSteelBlue1 = 0xcae1ff, + LightSteelBlue2 = 0xbcd2ee, + LightSteelBlue3 = 0xa2b5cd, + LightSteelBlue4 = 0x6e7b8b, + LightBlue1 = 0xbfefff, + LightBlue2 = 0xb2dfee, + LightBlue3 = 0x9ac0cd, + LightBlue4 = 0x68838b, + LightCyan1 = 0xe0ffff, + LightCyan2 = 0xd1eeee, + LightCyan3 = 0xb4cdcd, + LightCyan4 = 0x7a8b8b, + PaleTurquoise1 = 0xbbffff, + PaleTurquoise2 = 0xaeeeee, + PaleTurquoise3 = 0x96cdcd, + PaleTurquoise4 = 0x668b8b, + CadetBlue1 = 0x98f5ff, + CadetBlue2 = 0x8ee5ee, + CadetBlue3 = 0x7ac5cd, + CadetBlue4 = 0x53868b, + Turquoise1 = 0x00f5ff, + Turquoise2 = 0x00e5ee, + Turquoise3 = 0x00c5cd, + Turquoise4 = 0x00868b, + Cyan1 = 0x00ffff, + Cyan2 = 0x00eeee, + Cyan3 = 0x00cdcd, + Cyan4 = 0x008b8b, + DarkSlateGray1 = 0x97ffff, + DarkSlateGray2 = 0x8deeee, + DarkSlateGray3 = 0x79cdcd, + DarkSlateGray4 = 0x528b8b, + Aquamarine1 = 0x7fffd4, + Aquamarine2 = 0x76eec6, + Aquamarine3 = 0x66cdaa, + Aquamarine4 = 0x458b74, + DarkSeaGreen1 = 0xc1ffc1, + DarkSeaGreen2 = 0xb4eeb4, + DarkSeaGreen3 = 0x9bcd9b, + DarkSeaGreen4 = 0x698b69, + SeaGreen1 = 0x54ff9f, + SeaGreen2 = 0x4eee94, + SeaGreen3 = 0x43cd80, + SeaGreen4 = 0x2e8b57, + PaleGreen1 = 0x9aff9a, + PaleGreen2 = 0x90ee90, + PaleGreen3 = 0x7ccd7c, + PaleGreen4 = 0x548b54, + SpringGreen1 = 0x00ff7f, + SpringGreen2 = 0x00ee76, + SpringGreen3 = 0x00cd66, + SpringGreen4 = 0x008b45, + Green1 = 0x00ff00, + Green2 = 0x00ee00, + Green3 = 0x00cd00, + Green4 = 0x008b00, + Chartreuse1 = 0x7fff00, + Chartreuse2 = 0x76ee00, + Chartreuse3 = 0x66cd00, + Chartreuse4 = 0x458b00, + OliveDrab1 = 0xc0ff3e, + OliveDrab2 = 0xb3ee3a, + OliveDrab3 = 0x9acd32, + OliveDrab4 = 0x698b22, + DarkOliveGreen1 = 0xcaff70, + DarkOliveGreen2 = 0xbcee68, + DarkOliveGreen3 = 0xa2cd5a, + DarkOliveGreen4 = 0x6e8b3d, + Khaki1 = 0xfff68f, + Khaki2 = 0xeee685, + Khaki3 = 0xcdc673, + Khaki4 = 0x8b864e, + LightGoldenrod1 = 0xffec8b, + LightGoldenrod2 = 0xeedc82, + LightGoldenrod3 = 0xcdbe70, + LightGoldenrod4 = 0x8b814c, + LightYellow1 = 0xffffe0, + LightYellow2 = 0xeeeed1, + LightYellow3 = 0xcdcdb4, + LightYellow4 = 0x8b8b7a, + Yellow1 = 0xffff00, + Yellow2 = 0xeeee00, + Yellow3 = 0xcdcd00, + Yellow4 = 0x8b8b00, + Gold1 = 0xffd700, + Gold2 = 0xeec900, + Gold3 = 0xcdad00, + Gold4 = 0x8b7500, + Goldenrod1 = 0xffc125, + Goldenrod2 = 0xeeb422, + Goldenrod3 = 0xcd9b1d, + Goldenrod4 = 0x8b6914, + DarkGoldenrod1 = 0xffb90f, + DarkGoldenrod2 = 0xeead0e, + DarkGoldenrod3 = 0xcd950c, + DarkGoldenrod4 = 0x8b6508, + RosyBrown1 = 0xffc1c1, + RosyBrown2 = 0xeeb4b4, + RosyBrown3 = 0xcd9b9b, + RosyBrown4 = 0x8b6969, + IndianRed1 = 0xff6a6a, + IndianRed2 = 0xee6363, + IndianRed3 = 0xcd5555, + IndianRed4 = 0x8b3a3a, + Sienna1 = 0xff8247, + Sienna2 = 0xee7942, + Sienna3 = 0xcd6839, + Sienna4 = 0x8b4726, + Burlywood1 = 0xffd39b, + Burlywood2 = 0xeec591, + Burlywood3 = 0xcdaa7d, + Burlywood4 = 0x8b7355, + Wheat1 = 0xffe7ba, + Wheat2 = 0xeed8ae, + Wheat3 = 0xcdba96, + Wheat4 = 0x8b7e66, + Tan1 = 0xffa54f, + Tan2 = 0xee9a49, + Tan3 = 0xcd853f, + Tan4 = 0x8b5a2b, + Chocolate1 = 0xff7f24, + Chocolate2 = 0xee7621, + Chocolate3 = 0xcd661d, + Chocolate4 = 0x8b4513, + Firebrick1 = 0xff3030, + Firebrick2 = 0xee2c2c, + Firebrick3 = 0xcd2626, + Firebrick4 = 0x8b1a1a, + Brown1 = 0xff4040, + Brown2 = 0xee3b3b, + Brown3 = 0xcd3333, + Brown4 = 0x8b2323, + Salmon1 = 0xff8c69, + Salmon2 = 0xee8262, + Salmon3 = 0xcd7054, + Salmon4 = 0x8b4c39, + LightSalmon1 = 0xffa07a, + LightSalmon2 = 0xee9572, + LightSalmon3 = 0xcd8162, + LightSalmon4 = 0x8b5742, + Orange1 = 0xffa500, + Orange2 = 0xee9a00, + Orange3 = 0xcd8500, + Orange4 = 0x8b5a00, + DarkOrange1 = 0xff7f00, + DarkOrange2 = 0xee7600, + DarkOrange3 = 0xcd6600, + DarkOrange4 = 0x8b4500, + Coral1 = 0xff7256, + Coral2 = 0xee6a50, + Coral3 = 0xcd5b45, + Coral4 = 0x8b3e2f, + Tomato1 = 0xff6347, + Tomato2 = 0xee5c42, + Tomato3 = 0xcd4f39, + Tomato4 = 0x8b3626, + OrangeRed1 = 0xff4500, + OrangeRed2 = 0xee4000, + OrangeRed3 = 0xcd3700, + OrangeRed4 = 0x8b2500, + Red1 = 0xff0000, + Red2 = 0xee0000, + Red3 = 0xcd0000, + Red4 = 0x8b0000, + DeepPink1 = 0xff1493, + DeepPink2 = 0xee1289, + DeepPink3 = 0xcd1076, + DeepPink4 = 0x8b0a50, + HotPink1 = 0xff6eb4, + HotPink2 = 0xee6aa7, + HotPink3 = 0xcd6090, + HotPink4 = 0x8b3a62, + Pink1 = 0xffb5c5, + Pink2 = 0xeea9b8, + Pink3 = 0xcd919e, + Pink4 = 0x8b636c, + LightPink1 = 0xffaeb9, + LightPink2 = 0xeea2ad, + LightPink3 = 0xcd8c95, + LightPink4 = 0x8b5f65, + PaleVioletRed1 = 0xff82ab, + PaleVioletRed2 = 0xee799f, + PaleVioletRed3 = 0xcd6889, + PaleVioletRed4 = 0x8b475d, + Maroon1 = 0xff34b3, + Maroon2 = 0xee30a7, + Maroon3 = 0xcd2990, + Maroon4 = 0x8b1c62, + VioletRed1 = 0xff3e96, + VioletRed2 = 0xee3a8c, + VioletRed3 = 0xcd3278, + VioletRed4 = 0x8b2252, + Magenta1 = 0xff00ff, + Magenta2 = 0xee00ee, + Magenta3 = 0xcd00cd, + Magenta4 = 0x8b008b, + Orchid1 = 0xff83fa, + Orchid2 = 0xee7ae9, + Orchid3 = 0xcd69c9, + Orchid4 = 0x8b4789, + Plum1 = 0xffbbff, + Plum2 = 0xeeaeee, + Plum3 = 0xcd96cd, + Plum4 = 0x8b668b, + MediumOrchid1 = 0xe066ff, + MediumOrchid2 = 0xd15fee, + MediumOrchid3 = 0xb452cd, + MediumOrchid4 = 0x7a378b, + DarkOrchid1 = 0xbf3eff, + DarkOrchid2 = 0xb23aee, + DarkOrchid3 = 0x9a32cd, + DarkOrchid4 = 0x68228b, + Purple1 = 0x9b30ff, + Purple2 = 0x912cee, + Purple3 = 0x7d26cd, + Purple4 = 0x551a8b, + MediumPurple1 = 0xab82ff, + MediumPurple2 = 0x9f79ee, + MediumPurple3 = 0x8968cd, + MediumPurple4 = 0x5d478b, + Thistle1 = 0xffe1ff, + Thistle2 = 0xeed2ee, + Thistle3 = 0xcdb5cd, + Thistle4 = 0x8b7b8b, + Gray0 = 0x000000, + Grey0 = 0x000000, + Gray1 = 0x030303, + Grey1 = 0x030303, + Gray2 = 0x050505, + Grey2 = 0x050505, + Gray3 = 0x080808, + Grey3 = 0x080808, + Gray4 = 0x0a0a0a, + Grey4 = 0x0a0a0a, + Gray5 = 0x0d0d0d, + Grey5 = 0x0d0d0d, + Gray6 = 0x0f0f0f, + Grey6 = 0x0f0f0f, + Gray7 = 0x121212, + Grey7 = 0x121212, + Gray8 = 0x141414, + Grey8 = 0x141414, + Gray9 = 0x171717, + Grey9 = 0x171717, + Gray10 = 0x1a1a1a, + Grey10 = 0x1a1a1a, + Gray11 = 0x1c1c1c, + Grey11 = 0x1c1c1c, + Gray12 = 0x1f1f1f, + Grey12 = 0x1f1f1f, + Gray13 = 0x212121, + Grey13 = 0x212121, + Gray14 = 0x242424, + Grey14 = 0x242424, + Gray15 = 0x262626, + Grey15 = 0x262626, + Gray16 = 0x292929, + Grey16 = 0x292929, + Gray17 = 0x2b2b2b, + Grey17 = 0x2b2b2b, + Gray18 = 0x2e2e2e, + Grey18 = 0x2e2e2e, + Gray19 = 0x303030, + Grey19 = 0x303030, + Gray20 = 0x333333, + Grey20 = 0x333333, + Gray21 = 0x363636, + Grey21 = 0x363636, + Gray22 = 0x383838, + Grey22 = 0x383838, + Gray23 = 0x3b3b3b, + Grey23 = 0x3b3b3b, + Gray24 = 0x3d3d3d, + Grey24 = 0x3d3d3d, + Gray25 = 0x404040, + Grey25 = 0x404040, + Gray26 = 0x424242, + Grey26 = 0x424242, + Gray27 = 0x454545, + Grey27 = 0x454545, + Gray28 = 0x474747, + Grey28 = 0x474747, + Gray29 = 0x4a4a4a, + Grey29 = 0x4a4a4a, + Gray30 = 0x4d4d4d, + Grey30 = 0x4d4d4d, + Gray31 = 0x4f4f4f, + Grey31 = 0x4f4f4f, + Gray32 = 0x525252, + Grey32 = 0x525252, + Gray33 = 0x545454, + Grey33 = 0x545454, + Gray34 = 0x575757, + Grey34 = 0x575757, + Gray35 = 0x595959, + Grey35 = 0x595959, + Gray36 = 0x5c5c5c, + Grey36 = 0x5c5c5c, + Gray37 = 0x5e5e5e, + Grey37 = 0x5e5e5e, + Gray38 = 0x616161, + Grey38 = 0x616161, + Gray39 = 0x636363, + Grey39 = 0x636363, + Gray40 = 0x666666, + Grey40 = 0x666666, + Gray41 = 0x696969, + Grey41 = 0x696969, + Gray42 = 0x6b6b6b, + Grey42 = 0x6b6b6b, + Gray43 = 0x6e6e6e, + Grey43 = 0x6e6e6e, + Gray44 = 0x707070, + Grey44 = 0x707070, + Gray45 = 0x737373, + Grey45 = 0x737373, + Gray46 = 0x757575, + Grey46 = 0x757575, + Gray47 = 0x787878, + Grey47 = 0x787878, + Gray48 = 0x7a7a7a, + Grey48 = 0x7a7a7a, + Gray49 = 0x7d7d7d, + Grey49 = 0x7d7d7d, + Gray50 = 0x7f7f7f, + Grey50 = 0x7f7f7f, + Gray51 = 0x828282, + Grey51 = 0x828282, + Gray52 = 0x858585, + Grey52 = 0x858585, + Gray53 = 0x878787, + Grey53 = 0x878787, + Gray54 = 0x8a8a8a, + Grey54 = 0x8a8a8a, + Gray55 = 0x8c8c8c, + Grey55 = 0x8c8c8c, + Gray56 = 0x8f8f8f, + Grey56 = 0x8f8f8f, + Gray57 = 0x919191, + Grey57 = 0x919191, + Gray58 = 0x949494, + Grey58 = 0x949494, + Gray59 = 0x969696, + Grey59 = 0x969696, + Gray60 = 0x999999, + Grey60 = 0x999999, + Gray61 = 0x9c9c9c, + Grey61 = 0x9c9c9c, + Gray62 = 0x9e9e9e, + Grey62 = 0x9e9e9e, + Gray63 = 0xa1a1a1, + Grey63 = 0xa1a1a1, + Gray64 = 0xa3a3a3, + Grey64 = 0xa3a3a3, + Gray65 = 0xa6a6a6, + Grey65 = 0xa6a6a6, + Gray66 = 0xa8a8a8, + Grey66 = 0xa8a8a8, + Gray67 = 0xababab, + Grey67 = 0xababab, + Gray68 = 0xadadad, + Grey68 = 0xadadad, + Gray69 = 0xb0b0b0, + Grey69 = 0xb0b0b0, + Gray70 = 0xb3b3b3, + Grey70 = 0xb3b3b3, + Gray71 = 0xb5b5b5, + Grey71 = 0xb5b5b5, + Gray72 = 0xb8b8b8, + Grey72 = 0xb8b8b8, + Gray73 = 0xbababa, + Grey73 = 0xbababa, + Gray74 = 0xbdbdbd, + Grey74 = 0xbdbdbd, + Gray75 = 0xbfbfbf, + Grey75 = 0xbfbfbf, + Gray76 = 0xc2c2c2, + Grey76 = 0xc2c2c2, + Gray77 = 0xc4c4c4, + Grey77 = 0xc4c4c4, + Gray78 = 0xc7c7c7, + Grey78 = 0xc7c7c7, + Gray79 = 0xc9c9c9, + Grey79 = 0xc9c9c9, + Gray80 = 0xcccccc, + Grey80 = 0xcccccc, + Gray81 = 0xcfcfcf, + Grey81 = 0xcfcfcf, + Gray82 = 0xd1d1d1, + Grey82 = 0xd1d1d1, + Gray83 = 0xd4d4d4, + Grey83 = 0xd4d4d4, + Gray84 = 0xd6d6d6, + Grey84 = 0xd6d6d6, + Gray85 = 0xd9d9d9, + Grey85 = 0xd9d9d9, + Gray86 = 0xdbdbdb, + Grey86 = 0xdbdbdb, + Gray87 = 0xdedede, + Grey87 = 0xdedede, + Gray88 = 0xe0e0e0, + Grey88 = 0xe0e0e0, + Gray89 = 0xe3e3e3, + Grey89 = 0xe3e3e3, + Gray90 = 0xe5e5e5, + Grey90 = 0xe5e5e5, + Gray91 = 0xe8e8e8, + Grey91 = 0xe8e8e8, + Gray92 = 0xebebeb, + Grey92 = 0xebebeb, + Gray93 = 0xededed, + Grey93 = 0xededed, + Gray94 = 0xf0f0f0, + Grey94 = 0xf0f0f0, + Gray95 = 0xf2f2f2, + Grey95 = 0xf2f2f2, + Gray96 = 0xf5f5f5, + Grey96 = 0xf5f5f5, + Gray97 = 0xf7f7f7, + Grey97 = 0xf7f7f7, + Gray98 = 0xfafafa, + Grey98 = 0xfafafa, + Gray99 = 0xfcfcfc, + Grey99 = 0xfcfcfc, + Gray100 = 0xffffff, + Grey100 = 0xffffff, + DarkGrey = 0xa9a9a9, + DarkGray = 0xa9a9a9, + DarkBlue = 0x00008b, + DarkCyan = 0x008b8b, + DarkMagenta = 0x8b008b, + DarkRed = 0x8b0000, + LightGreen = 0x90ee90, + Crimson = 0xdc143c, + Indigo = 0x4b0082, + Olive = 0x808000, + RebeccaPurple = 0x663399, + Silver = 0xc0c0c0, + Teal = 0x008080, +}; +}; +} + +#endif diff --git a/Source/ThirdParty/tracy/common/TracySystem.cpp b/Source/ThirdParty/tracy/common/TracySystem.cpp index eb831fe20..78cce7bdf 100644 --- a/Source/ThirdParty/tracy/common/TracySystem.cpp +++ b/Source/ThirdParty/tracy/common/TracySystem.cpp @@ -351,3 +351,20 @@ TRACY_API void ___tracy_set_thread_name( const char* name ) { tracy::SetThreadNa #ifdef __cplusplus } #endif + +// Inset graphics integration (within Tracy module) +#define TRACY_GPU_IMPL 1 +#if TRACY_GPU_D3D11 +#include +static_assert(sizeof(tracy::D3D11ZoneScope) <= TracyD3D11ZoneSize, "Invalid zone size"); +#endif +#if TRACY_GPU_D3D12 +#include +static_assert(sizeof(tracy::D3D12ZoneScope) <= TracyD3D12ZoneSize, "Invalid zone size"); +#endif +#if TRACY_GPU_VULKAN +#define GRAPHICS_API_VULKAN 1 +#include "Engine/GraphicsDevice/Vulkan/IncludeVulkanHeaders.h" +#include +static_assert(sizeof(tracy::VkCtxScope) <= TracyVulkanZoneSize, "Invalid zone size"); +#endif diff --git a/Source/ThirdParty/tracy/tracy.Build.cs b/Source/ThirdParty/tracy/tracy.Build.cs index beb0e9f89..6a26d21ff 100644 --- a/Source/ThirdParty/tracy/tracy.Build.cs +++ b/Source/ThirdParty/tracy/tracy.Build.cs @@ -15,6 +15,11 @@ public class tracy : ThirdPartyModule /// public static bool OnDemand = true; + /// + /// Enables GPU profiling. + /// + public static bool GPU = true; + /// public override void Init() { @@ -56,8 +61,25 @@ public class tracy : ThirdPartyModule options.PrivateDefinitions.Add("TRACY_USE_MALLOC"); options.PrivateDefinitions.Add("TRACY_ONLY_IPV4"); options.PrivateDefinitions.Add("TRACY_NO_PIPE"); + options.PrivateDefinitions.Add("TRACY_NO_CODE_TRANSFER"); break; } + + if (GPU) + { + // Ask Graphics module which graphics backends are active + var graphics = new Graphics(); + graphics.FilePath = FilePath; + graphics.FolderPath = FolderPath; + var graphicsOptions = (BuildOptions)options.Clone(); + graphics.Setup(graphicsOptions); + if (graphicsOptions.PrivateDependencies.Contains("GraphicsDeviceDX11")) + options.PrivateDefinitions.Add("TRACY_GPU_D3D11"); + if (graphicsOptions.PrivateDependencies.Contains("GraphicsDeviceDX12")) + options.PrivateDefinitions.Add("TRACY_GPU_D3D12"); + if (graphicsOptions.PrivateDependencies.Contains("GraphicsDeviceVulkan")) + options.PrivateDefinitions.Add("TRACY_GPU_VULKAN"); + } } /// diff --git a/Source/ThirdParty/tracy/tracy/Tracy.hpp b/Source/ThirdParty/tracy/tracy/Tracy.hpp index fbc0746eb..5e26dec10 100644 --- a/Source/ThirdParty/tracy/tracy/Tracy.hpp +++ b/Source/ThirdParty/tracy/tracy/Tracy.hpp @@ -119,6 +119,8 @@ namespace tracy { +TRACY_API bool IsConnected(); + class TRACY_API Profiler { public: @@ -143,7 +145,6 @@ public: static void MemAllocCallstackNamed( const void* ptr, size_t size, int depth, bool secure, const char* name ); static void MemFreeCallstackNamed( const void* ptr, int depth, bool secure, const char* name ); static void SendCallstack( int depth ); - static void ParameterRegister( ParameterCallback cb ); static void ParameterRegister( ParameterCallback cb, void* data ); static void ParameterSetup( uint32_t idx, const char* name, bool isBool, int32_t val ); }; @@ -255,7 +256,7 @@ public: #define TracySourceCallbackRegister( cb, data ) tracy::Profiler::SourceCallbackRegister( cb, data ) #define TracyParameterRegister( cb, data ) tracy::Profiler::ParameterRegister( cb, data ) #define TracyParameterSetup( idx, name, isBool, val ) tracy::Profiler::ParameterSetup( idx, name, isBool, val ) -#define TracyIsConnected tracy::GetProfiler().IsConnected() +#define TracyIsConnected tracy::IsConnected() #define TracySetProgramName( name ) tracy::GetProfiler().SetProgramName( name ); #ifdef TRACY_FIBERS diff --git a/Source/ThirdParty/tracy/tracy/TracyD3D11.hpp b/Source/ThirdParty/tracy/tracy/TracyD3D11.hpp new file mode 100644 index 000000000..06fcd6225 --- /dev/null +++ b/Source/ThirdParty/tracy/tracy/TracyD3D11.hpp @@ -0,0 +1,456 @@ +#ifndef __TRACYD3D11_HPP__ +#define __TRACYD3D11_HPP__ + +#define TracyD3D11ZoneSize 16 + +#ifndef TRACY_ENABLE + +#define TracyD3D11Context(device,queue) nullptr +#define TracyD3D11Destroy(ctx) +#define TracyD3D11ContextName(ctx, name, size) + +#define TracyD3D11NewFrame(ctx) + +#define TracyD3D11Zone(ctx, name) +#define TracyD3D11ZoneC(ctx, name, color) +#define TracyD3D11NamedZone(ctx, varname, name, active) +#define TracyD3D11NamedZoneC(ctx, varname, name, color, active) +#define TracyD3D11ZoneTransient(ctx, varname, name, active) + +#define TracyD3D11ZoneS(ctx, name, depth) +#define TracyD3D11ZoneCS(ctx, name, color, depth) +#define TracyD3D11NamedZoneS(ctx, varname, name, depth, active) +#define TracyD3D11NamedZoneCS(ctx, varname, name, color, depth, active) +#define TracyD3D11ZoneTransientS(ctx, varname, name, depth, active) + +#define TracyD3D11Collect(ctx) + +namespace tracy +{ +class D3D11ZoneScope {}; +} + +using TracyD3D11Ctx = void*; + +#elif TRACY_GPU_IMPL + +#include +#include +#include + +#include "../client/TracyProfiler.hpp" +#include "../client/TracyCallstack.hpp" +#include "../common/TracyYield.hpp" +#include "../common/TracyColor.hpp" + +#include + +#define TRACY_CALLSTACK 0 +#define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, TRACY_CALLSTACK ) +#define TracyD3D11Panic(msg, ...) do { assert(false && "TracyD3D11: " msg); TracyMessageLC("TracyD3D11: " msg, tracy::Color::Red4); __VA_ARGS__; } while(false); + +namespace tracy +{ + +class D3D11Ctx +{ + friend class D3D11ZoneScope; + + static constexpr uint32_t MaxQueries = 64 * 1024; + + enum CollectMode { POLL, BLOCK }; + +public: + tracy_force_inline D3D11Ctx( ID3D11Device* device, ID3D11DeviceContext* devicectx ) + { + // TODO: consider calling ID3D11Device::GetImmediateContext() instead of passing it as an argument + m_device = device; + device->AddRef(); + m_immediateDevCtx = devicectx; + devicectx->AddRef(); + + { + D3D11_QUERY_DESC desc = { }; + desc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT; + if (FAILED(m_device->CreateQuery(&desc, &m_disjointQuery))) + { + TracyD3D11Panic("unable to create disjoint timestamp query.", return); + } + } + + for (ID3D11Query*& query : m_queries) + { + D3D11_QUERY_DESC desc = { }; + desc.Query = D3D11_QUERY_TIMESTAMP; + if (FAILED(m_device->CreateQuery(&desc, &query))) + { + TracyD3D11Panic("unable to create timestamp query.", return); + } + } + + // Calibrate CPU and GPU timestamps + int64_t tcpu = 0; + int64_t tgpu = 0; + for (int attempts = 0; attempts < 50; attempts++) + { + m_immediateDevCtx->Begin(m_disjointQuery); + m_immediateDevCtx->End(m_queries[0]); + m_immediateDevCtx->End(m_disjointQuery); + + int64_t tcpu0 = Profiler::GetTime(); + WaitForQuery(m_disjointQuery); + // NOTE: one would expect that by waiting for the enclosing disjoint query to finish, + // all timestamp queries within would also be readily available, but that does not + // seem to be the case here... See https://github.com/wolfpld/tracy/issues/947 + WaitForQuery(m_queries[0]); + int64_t tcpu1 = Profiler::GetTime(); + + D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint = { }; + if (m_immediateDevCtx->GetData(m_disjointQuery, &disjoint, sizeof(disjoint), 0) != S_OK) + { + TracyMessageLC("TracyD3D11: unable to query GPU timestamp; retrying...", tracy::Color::Tomato); + continue; + } + + if (disjoint.Disjoint) + continue; + + UINT64 timestamp = 0; + if (m_immediateDevCtx->GetData(m_queries[0], ×tamp, sizeof(timestamp), 0) != S_OK) + continue; // this should never happen (we waited for the query to finish above) + + tcpu = tcpu0 + (tcpu1 - tcpu0) * 1 / 2; + tgpu = timestamp * (1000000000 / disjoint.Frequency); + break; + } + + // ready to roll + m_contextId = GetGpuCtxCounter().fetch_add(1); + m_immediateDevCtx->Begin(m_disjointQuery); + m_previousCheckpoint = m_nextCheckpoint = 0; + + auto* item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuNewContext ); + MemWrite( &item->gpuNewContext.cpuTime, tcpu ); + MemWrite( &item->gpuNewContext.gpuTime, tgpu ); + MemWrite( &item->gpuNewContext.thread, uint32_t(0) ); // #TODO: why not GetThreadHandle()? + MemWrite( &item->gpuNewContext.period, 1.0f ); + MemWrite( &item->gpuNewContext.context, m_contextId); + MemWrite( &item->gpuNewContext.flags, uint8_t(0) ); + MemWrite( &item->gpuNewContext.type, GpuContextType::Direct3D11 ); + +#ifdef TRACY_ON_DEMAND + GetProfiler().DeferItem( *item ); +#endif + + Profiler::QueueSerialFinish(); + } + + tracy_force_inline ~D3D11Ctx() + { + // collect all pending timestamps before destroying everything + do + { + Collect(BLOCK); + } while (m_previousCheckpoint != m_queryCounter); + + for (ID3D11Query* query : m_queries) + { + query->Release(); + } + m_immediateDevCtx->End(m_disjointQuery); + m_disjointQuery->Release(); + m_immediateDevCtx->Release(); + m_device->Release(); + } + + tracy_force_inline void Name( const char* name, uint16_t len ) + { + auto ptr = (char*)tracy_malloc( len ); + memcpy( ptr, name, len ); + + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuContextName ); + MemWrite( &item->gpuContextNameFat.context, m_contextId ); + MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); + MemWrite( &item->gpuContextNameFat.size, len ); +#ifdef TRACY_ON_DEMAND + GetProfiler().DeferItem( *item ); +#endif + Profiler::QueueSerialFinish(); + } + + void Collect(CollectMode mode = POLL) + { +#ifdef TRACY_ON_DEMAND + if( !GetProfiler().IsConnected() ) + { + m_previousCheckpoint = m_nextCheckpoint = m_queryCounter; + return; + } +#endif + + if (m_previousCheckpoint == m_nextCheckpoint) + { + uintptr_t nextCheckpoint = m_queryCounter; + if (nextCheckpoint == m_nextCheckpoint) + { + return; + } + m_nextCheckpoint = nextCheckpoint; + m_immediateDevCtx->End(m_disjointQuery); + } + + if (mode == CollectMode::BLOCK) + { + WaitForQuery(m_disjointQuery); + } + + D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint = { }; + if (m_immediateDevCtx->GetData(m_disjointQuery, &disjoint, sizeof(disjoint), D3D11_ASYNC_GETDATA_DONOTFLUSH) != S_OK) + { + return; + } + + if (disjoint.Disjoint == TRUE) + { + m_previousCheckpoint = m_nextCheckpoint; + TracyD3D11Panic("disjoint timestamps detected; dropping."); + return; + } + + auto begin = m_previousCheckpoint; + auto end = m_nextCheckpoint; + for (auto i = begin; i != end; ++i) + { + uint32_t k = RingIndex(i); + UINT64 timestamp = 0; + if (m_immediateDevCtx->GetData(m_queries[k], ×tamp, sizeof(timestamp), 0) != S_OK) + { + TracyD3D11Panic("timestamp expected to be ready, but it was not!"); + break; + } + timestamp *= (1000000000ull / disjoint.Frequency); + auto* item = Profiler::QueueSerial(); + MemWrite(&item->hdr.type, QueueType::GpuTime); + MemWrite(&item->gpuTime.gpuTime, static_cast(timestamp)); + MemWrite(&item->gpuTime.queryId, static_cast(k)); + MemWrite(&item->gpuTime.context, m_contextId); + Profiler::QueueSerialFinish(); + } + + // disjoint timestamp queries should only be invoked once per frame or less + // https://learn.microsoft.com/en-us/windows/win32/api/d3d11/ne-d3d11-d3d11_query + m_immediateDevCtx->Begin(m_disjointQuery); + m_previousCheckpoint = m_nextCheckpoint; + } + +private: + tracy_force_inline uint32_t RingIndex(uintptr_t index) + { + index %= MaxQueries; + return static_cast(index); + } + + tracy_force_inline uint32_t RingCount(uintptr_t begin, uintptr_t end) + { + // wrap-around safe: all unsigned + uintptr_t count = end - begin; + return static_cast(count); + } + + tracy_force_inline uint32_t NextQueryId() + { + auto id = m_queryCounter++; + if (RingCount(m_previousCheckpoint, id) >= MaxQueries) + { + TracyD3D11Panic("too many pending timestamp queries."); + // #TODO: return some sentinel value; ideally a "hidden" query index + } + return RingIndex(id); + } + + tracy_force_inline ID3D11Query* GetQueryObjectFromId(uint32_t id) + { + return m_queries[id]; + } + + tracy_force_inline void WaitForQuery(ID3D11Query* query) + { + m_immediateDevCtx->Flush(); + while (m_immediateDevCtx->GetData(query, nullptr, 0, 0) != S_OK) + YieldThread(); // busy-wait :-( attempt to reduce power usage with _mm_pause() & friends... + } + + tracy_force_inline uint8_t GetContextId() const + { + return m_contextId; + } + + ID3D11Device* m_device = nullptr; + ID3D11DeviceContext* m_immediateDevCtx = nullptr; + + ID3D11Query* m_queries[MaxQueries]; + ID3D11Query* m_disjointQuery = nullptr; + + uint8_t m_contextId = 255; // NOTE: apparently, 255 means invalid id; is this documented anywhere? + + uintptr_t m_queryCounter = 0; + + uintptr_t m_previousCheckpoint = 0; + uintptr_t m_nextCheckpoint = 0; +}; + +class D3D11ZoneScope +{ +public: + tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, const SourceLocationData* srcloc, bool active ) + : D3D11ZoneScope(ctx, active) + { + if( !m_active ) return; + + auto* item = Profiler::QueueSerial(); + WriteQueueItem(item, QueueType::GpuZoneBeginSerial, reinterpret_cast(srcloc)); + } + + tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, const SourceLocationData* srcloc, int32_t depth, bool active ) + : D3D11ZoneScope(ctx, active) + { + if( !m_active ) return; + + if( depth > 0 && has_callstack() ) + { + auto* item = Profiler::QueueSerialCallstack(Callstack(depth)); + WriteQueueItem(item, QueueType::GpuZoneBeginCallstackSerial, reinterpret_cast(srcloc)); + } + else + { + auto* item = Profiler::QueueSerial(); + WriteQueueItem(item, QueueType::GpuZoneBeginSerial, reinterpret_cast(srcloc)); + } + } + + tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool active) + : D3D11ZoneScope(ctx, active) + { + if( !m_active ) return; + + const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz); + + auto* item = Profiler::QueueSerial(); + WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocSerial, sourceLocation); + } + + tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int32_t depth, bool active) + : D3D11ZoneScope(ctx, active) + { + if( !m_active ) return; + + const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz); + + if ( depth > 0 && has_callstack() ) + { + auto* item = Profiler::QueueSerialCallstack(Callstack(depth)); + WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial, sourceLocation); + } + else + { + auto* item = Profiler::QueueSerial(); + WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocSerial, sourceLocation); + } + } + + tracy_force_inline ~D3D11ZoneScope() + { + if( !m_active ) return; + + const auto queryId = m_ctx->NextQueryId(); + m_ctx->m_immediateDevCtx->End(m_ctx->GetQueryObjectFromId(queryId)); + + auto* item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial ); + MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() ); + MemWrite( &item->gpuZoneEnd.thread, GetThreadHandle() ); + MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) ); + MemWrite( &item->gpuZoneEnd.context, m_ctx->GetContextId() ); + Profiler::QueueSerialFinish(); + } + +private: + tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, bool active ) +#ifdef TRACY_ON_DEMAND + : m_active( active && GetProfiler().IsConnected() ) +#else + : m_active( active ) +#endif + { + if( !m_active ) return; + m_ctx = ctx; + } + + void WriteQueueItem(tracy::QueueItem* item, tracy::QueueType queueItemType, uint64_t sourceLocation) + { + const auto queryId = m_ctx->NextQueryId(); + m_ctx->m_immediateDevCtx->End(m_ctx->GetQueryObjectFromId(queryId)); + + MemWrite( &item->hdr.type, queueItemType); + MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); + MemWrite( &item->gpuZoneBegin.srcloc, sourceLocation ); + MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() ); + MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); + MemWrite( &item->gpuZoneBegin.context, m_ctx->GetContextId() ); + Profiler::QueueSerialFinish(); + } + + const bool m_active; + + D3D11Ctx* m_ctx; +}; + +void* CreateD3D11Context( ID3D11Device* device, ID3D11DeviceContext* devicectx ) +{ + auto ctx = (D3D11Ctx*)tracy_malloc( sizeof( D3D11Ctx ) ); + new(ctx) D3D11Ctx( device, devicectx ); + ctx->Name("D3D11", 5); + return ctx; +} + +void CollectD3D11Context(void* ctx) +{ + ((D3D11Ctx*)ctx)->Collect(); +} + +void DestroyD3D11Context(void* ctx ) +{ + ((D3D11Ctx*)ctx)->~D3D11Ctx(); + tracy_free( ctx ); +} + +void BeginD3D11ZoneScope(void* zone, void* ctx, const char* name, size_t nameLen) +{ + new(zone) tracy::D3D11ZoneScope{ (tracy::D3D11Ctx*)ctx, 0, 0, 0, 0, 0, name, nameLen, true }; +} + +void EndD3D11ZoneScope(void* zone) +{ + ((tracy::D3D11ZoneScope*)zone)->~D3D11ZoneScope(); +} +} + +#undef TracyD3D11Panic + +#else + +// Forward declarations to be used in engine +namespace tracy +{ + extern void* CreateD3D11Context(ID3D11Device* device, ID3D11DeviceContext* devicectx); + extern void DestroyD3D11Context(void* ctx); + extern void CollectD3D11Context(void* ctx); + extern void BeginD3D11ZoneScope(void* zone, void* ctx, const char* name, size_t nameLen); + extern void EndD3D11ZoneScope(void* zone); +} + +#endif + +#endif diff --git a/Source/ThirdParty/tracy/tracy/TracyD3D12.hpp b/Source/ThirdParty/tracy/tracy/TracyD3D12.hpp new file mode 100644 index 000000000..75aaedefc --- /dev/null +++ b/Source/ThirdParty/tracy/tracy/TracyD3D12.hpp @@ -0,0 +1,529 @@ +#ifndef __TRACYD3D12_HPP__ +#define __TRACYD3D12_HPP__ + +#define TracyD3D12ZoneSize 32 + +#ifndef TRACY_ENABLE + +#define TracyD3D12Context(device, queue) nullptr +#define TracyD3D12Destroy(ctx) +#define TracyD3D12ContextName(ctx, name, size) + +#define TracyD3D12NewFrame(ctx) + +#define TracyD3D12Zone(ctx, cmdList, name) +#define TracyD3D12ZoneC(ctx, cmdList, name, color) +#define TracyD3D12NamedZone(ctx, varname, cmdList, name, active) +#define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) +#define TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active) + +#define TracyD3D12ZoneS(ctx, cmdList, name, depth) +#define TracyD3D12ZoneCS(ctx, cmdList, name, color, depth) +#define TracyD3D12NamedZoneS(ctx, varname, cmdList, name, depth, active) +#define TracyD3D12NamedZoneCS(ctx, varname, cmdList, name, color, depth, active) +#define TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, depth, active) + +#define TracyD3D12Collect(ctx) + +namespace tracy +{ + class D3D12ZoneScope {}; +} + +using TracyD3D12Ctx = void*; + +#elif TRACY_GPU_IMPL + +#include "../client/TracyProfiler.hpp" +#include "../client/TracyProfiler.hpp" +#include "../client/TracyCallstack.hpp" +#include "../common/TracyColor.hpp" + +#include +#include +#include +#include +#include + +#define TRACY_CALLSTACK 0 +#define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, TRACY_CALLSTACK ) +#define TracyD3D12Panic(msg, ...) do { assert(false && "TracyD3D12: " msg); TracyMessageLC("TracyD3D12: " msg, tracy::Color::Red4); __VA_ARGS__; } while(false); + +namespace tracy +{ + + struct D3D12QueryPayload + { + uint32_t m_queryIdStart = 0; + uint32_t m_queryCount = 0; + }; + + // Command queue context. + class D3D12QueueCtx + { + friend class D3D12ZoneScope; + + ID3D12Device* m_device = nullptr; + ID3D12CommandQueue* m_queue = nullptr; + uint8_t m_contextId = 255; // TODO: apparently, 255 means "invalid id"; is this documented somewhere? + ID3D12QueryHeap* m_queryHeap = nullptr; + ID3D12Resource* m_readbackBuffer = nullptr; + + // In-progress payload. + uint32_t m_queryLimit = 0; + std::atomic m_queryCounter = 0; + uint32_t m_previousQueryCounter = 0; + + uint32_t m_activePayload = 0; + ID3D12Fence* m_payloadFence = nullptr; + std::queue m_payloadQueue; + + UINT64 m_prevCalibrationTicksCPU = 0; + + void RecalibrateClocks() + { + UINT64 cpuTimestamp; + UINT64 gpuTimestamp; + if (FAILED(m_queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp))) + { + TracyD3D12Panic("failed to obtain queue clock calibration counters.", return); + } + + int64_t cpuDeltaTicks = cpuTimestamp - m_prevCalibrationTicksCPU; + if (cpuDeltaTicks > 0) + { + static const int64_t nanosecodsPerTick = int64_t(1000000000) / GetFrequencyQpc(); + int64_t cpuDeltaNS = cpuDeltaTicks * nanosecodsPerTick; + // Save the device cpu timestamp, not the Tracy profiler timestamp: + m_prevCalibrationTicksCPU = cpuTimestamp; + + cpuTimestamp = Profiler::GetTime(); + + auto* item = Profiler::QueueSerial(); + MemWrite(&item->hdr.type, QueueType::GpuCalibration); + MemWrite(&item->gpuCalibration.gpuTime, gpuTimestamp); + MemWrite(&item->gpuCalibration.cpuTime, cpuTimestamp); + MemWrite(&item->gpuCalibration.cpuDelta, cpuDeltaNS); + MemWrite(&item->gpuCalibration.context, GetId()); + SubmitQueueItem(item); + } + } + + tracy_force_inline void SubmitQueueItem(tracy::QueueItem* item) + { +#ifdef TRACY_ON_DEMAND + GetProfiler().DeferItem(*item); +#endif + Profiler::QueueSerialFinish(); + } + + public: + D3D12QueueCtx(ID3D12Device* device, ID3D12CommandQueue* queue) + : m_device(device) + , m_queue(queue) + { + // Verify we support timestamp queries on this queue. + + if (queue->GetDesc().Type == D3D12_COMMAND_LIST_TYPE_COPY) + { + D3D12_FEATURE_DATA_D3D12_OPTIONS3 featureData{}; + + HRESULT hr = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS3, &featureData, sizeof(featureData)); + if (FAILED(hr) || (featureData.CopyQueueTimestampQueriesSupported == FALSE)) + { + TracyD3D12Panic("Platform does not support profiling of copy queues.", return); + } + } + + static constexpr uint32_t MaxQueries = 64 * 1024; // Must be even, because queries are (begin, end) pairs + m_queryLimit = MaxQueries; + + D3D12_QUERY_HEAP_DESC heapDesc{}; + heapDesc.Type = queue->GetDesc().Type == D3D12_COMMAND_LIST_TYPE_COPY ? D3D12_QUERY_HEAP_TYPE_COPY_QUEUE_TIMESTAMP : D3D12_QUERY_HEAP_TYPE_TIMESTAMP; + heapDesc.Count = m_queryLimit; + heapDesc.NodeMask = 0; // #TODO: Support multiple adapters. + + while (FAILED(device->CreateQueryHeap(&heapDesc, IID_PPV_ARGS(&m_queryHeap)))) + { + m_queryLimit /= 2; + heapDesc.Count = m_queryLimit; + } + + // Create a readback buffer, which will be used as a destination for the query data. + + D3D12_RESOURCE_DESC readbackBufferDesc{}; + readbackBufferDesc.Alignment = 0; + readbackBufferDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + readbackBufferDesc.Width = m_queryLimit * sizeof(uint64_t); + readbackBufferDesc.Height = 1; + readbackBufferDesc.DepthOrArraySize = 1; + readbackBufferDesc.Format = DXGI_FORMAT_UNKNOWN; + readbackBufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; // Buffers are always row major. + readbackBufferDesc.MipLevels = 1; + readbackBufferDesc.SampleDesc.Count = 1; + readbackBufferDesc.SampleDesc.Quality = 0; + readbackBufferDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + + D3D12_HEAP_PROPERTIES readbackHeapProps{}; + readbackHeapProps.Type = D3D12_HEAP_TYPE_READBACK; + readbackHeapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + readbackHeapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + readbackHeapProps.CreationNodeMask = 0; + readbackHeapProps.VisibleNodeMask = 0; // #TODO: Support multiple adapters. + + if (FAILED(device->CreateCommittedResource(&readbackHeapProps, D3D12_HEAP_FLAG_NONE, &readbackBufferDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&m_readbackBuffer)))) + { + TracyD3D12Panic("Failed to create query readback buffer.", return); + } + + if (FAILED(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_payloadFence)))) + { + TracyD3D12Panic("Failed to create payload fence.", return); + } + + float period = [queue]() + { + uint64_t timestampFrequency; + if (FAILED(queue->GetTimestampFrequency(×tampFrequency))) + { + return 0.0f; + } + return static_cast( 1E+09 / static_cast(timestampFrequency) ); + }(); + + if (period == 0.0f) + { + TracyD3D12Panic("Failed to get timestamp frequency.", return); + } + + uint64_t cpuTimestamp; + uint64_t gpuTimestamp; + if (FAILED(queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp))) + { + TracyD3D12Panic("Failed to get queue clock calibration.", return); + } + + // Save the device cpu timestamp, not the profiler's timestamp. + m_prevCalibrationTicksCPU = cpuTimestamp; + + cpuTimestamp = Profiler::GetTime(); + + // all checked: ready to roll + m_contextId = GetGpuCtxCounter().fetch_add(1); + + auto* item = Profiler::QueueSerial(); + MemWrite(&item->hdr.type, QueueType::GpuNewContext); + MemWrite(&item->gpuNewContext.cpuTime, cpuTimestamp); + MemWrite(&item->gpuNewContext.gpuTime, gpuTimestamp); + MemWrite(&item->gpuNewContext.thread, decltype(item->gpuNewContext.thread)(0)); // #TODO: why 0 instead of GetThreadHandle()? + MemWrite(&item->gpuNewContext.period, period); + MemWrite(&item->gpuNewContext.context, GetId()); + MemWrite(&item->gpuNewContext.flags, GpuContextCalibration); + MemWrite(&item->gpuNewContext.type, GpuContextType::Direct3D12); + SubmitQueueItem(item); + } + + ~D3D12QueueCtx() + { + // collect all pending timestamps + while (m_payloadFence->GetCompletedValue() != m_activePayload) + /* busy-wait ... */; + Collect(); + m_payloadFence->Release(); + m_readbackBuffer->Release(); + m_queryHeap->Release(); + } + + + void NewFrame() + { + uint32_t queryCounter = m_queryCounter.exchange(0); + m_payloadQueue.emplace(D3D12QueryPayload{ m_previousQueryCounter, queryCounter }); + m_previousQueryCounter += queryCounter; + + if (m_previousQueryCounter >= m_queryLimit) + { + m_previousQueryCounter -= m_queryLimit; + } + + m_queue->Signal(m_payloadFence, ++m_activePayload); + } + + void Name( const char* name, uint16_t len ) + { + auto ptr = (char*)tracy_malloc( len ); + memcpy( ptr, name, len ); + + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuContextName ); + MemWrite( &item->gpuContextNameFat.context, GetId()); + MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); + MemWrite( &item->gpuContextNameFat.size, len ); + SubmitQueueItem(item); + } + + void Collect() + { +#ifdef TRACY_ON_DEMAND + if (!GetProfiler().IsConnected()) + { + m_queryCounter = 0; + + return; + } +#endif + + // Find out what payloads are available. + const auto newestReadyPayload = m_payloadFence->GetCompletedValue(); + const auto payloadCount = m_payloadQueue.size() - (m_activePayload - newestReadyPayload); + + if (!payloadCount) + { + return; // No payloads are available yet, exit out. + } + + D3D12_RANGE mapRange{ 0, m_queryLimit * sizeof(uint64_t) }; + + // Map the readback buffer so we can fetch the query data from the GPU. + void* readbackBufferMapping = nullptr; + + if (FAILED(m_readbackBuffer->Map(0, &mapRange, &readbackBufferMapping))) + { + TracyD3D12Panic("Failed to map readback buffer.", return); + } + + auto* timestampData = static_cast(readbackBufferMapping); + + for (uint32_t i = 0; i < payloadCount; ++i) + { + const auto& payload = m_payloadQueue.front(); + + for (uint32_t j = 0; j < payload.m_queryCount; ++j) + { + const auto counter = (payload.m_queryIdStart + j) % m_queryLimit; + const auto timestamp = timestampData[counter]; + const auto queryId = counter; + + auto* item = Profiler::QueueSerial(); + MemWrite(&item->hdr.type, QueueType::GpuTime); + MemWrite(&item->gpuTime.gpuTime, timestamp); + MemWrite(&item->gpuTime.queryId, static_cast(queryId)); + MemWrite(&item->gpuTime.context, GetId()); + + Profiler::QueueSerialFinish(); + } + + m_payloadQueue.pop(); + } + + m_readbackBuffer->Unmap(0, nullptr); + + // Recalibrate to account for drift. + RecalibrateClocks(); + } + + private: + tracy_force_inline uint32_t NextQueryId() + { + uint32_t queryCounter = m_queryCounter.fetch_add(2); + if (queryCounter >= m_queryLimit) + { + TracyD3D12Panic("Submitted too many GPU queries! Consider increasing MaxQueries."); + // #TODO: consider returning an invalid id or sentinel value here + } + + const uint32_t id = (m_previousQueryCounter + queryCounter) % m_queryLimit; + + return id; + } + + tracy_force_inline uint8_t GetId() const + { + return m_contextId; + } + }; + + class D3D12ZoneScope + { + const bool m_active; + D3D12QueueCtx* m_ctx = nullptr; + ID3D12GraphicsCommandList* m_cmdList = nullptr; + uint32_t m_queryId = 0; // Used for tracking in nested zones. + + tracy_force_inline void WriteQueueItem(QueueItem* item, QueueType type, uint64_t srcLocation) + { + MemWrite(&item->hdr.type, type); + MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime()); + MemWrite(&item->gpuZoneBegin.srcloc, srcLocation); + MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle()); + MemWrite(&item->gpuZoneBegin.queryId, static_cast(m_queryId)); + MemWrite(&item->gpuZoneBegin.context, m_ctx->GetId()); + Profiler::QueueSerialFinish(); + } + + tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, bool active) +#ifdef TRACY_ON_DEMAND + : m_active(active&& GetProfiler().IsConnected()) +#else + : m_active(active) +#endif + { + if (!m_active) return; + + m_ctx = ctx; + m_cmdList = cmdList; + + m_queryId = m_ctx->NextQueryId(); + m_cmdList->EndQuery(m_ctx->m_queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, m_queryId); + } + + public: + tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, bool active) + : D3D12ZoneScope(ctx, cmdList, active) + { + if (!m_active) return; + + auto* item = Profiler::QueueSerial(); + WriteQueueItem(item, QueueType::GpuZoneBeginSerial, reinterpret_cast(srcLocation)); + } + + tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, int32_t depth, bool active) + : D3D12ZoneScope(ctx, cmdList, active) + { + if (!m_active) return; + + auto* item = Profiler::QueueSerialCallstack(Callstack(depth)); + WriteQueueItem(item, QueueType::GpuZoneBeginCallstackSerial, reinterpret_cast(srcLocation)); + } + + tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, ID3D12GraphicsCommandList* cmdList, bool active) + : D3D12ZoneScope(ctx, cmdList, active) + { + if (!m_active) return; + + const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz); + + auto* item = Profiler::QueueSerial(); + WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocSerial, sourceLocation); + } + + tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, ID3D12GraphicsCommandList* cmdList, int32_t depth, bool active) + : D3D12ZoneScope(ctx, cmdList, active) + { + if (!m_active) return; + + const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz); + + auto* item = Profiler::QueueSerialCallstack(Callstack(depth)); + WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial, sourceLocation); + } + + tracy_force_inline ~D3D12ZoneScope() + { + if (!m_active) return; + + const auto queryId = m_queryId + 1; // Our end query slot is immediately after the begin slot. + m_cmdList->EndQuery(m_ctx->m_queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, queryId); + + auto* item = Profiler::QueueSerial(); + MemWrite(&item->hdr.type, QueueType::GpuZoneEndSerial); + MemWrite(&item->gpuZoneEnd.cpuTime, Profiler::GetTime()); + MemWrite(&item->gpuZoneEnd.thread, GetThreadHandle()); + MemWrite(&item->gpuZoneEnd.queryId, static_cast(queryId)); + MemWrite(&item->gpuZoneEnd.context, m_ctx->GetId()); + Profiler::QueueSerialFinish(); + + m_cmdList->ResolveQueryData(m_ctx->m_queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, m_queryId, 2, m_ctx->m_readbackBuffer, m_queryId * sizeof(uint64_t)); + } + }; + + void* CreateD3D12Context(ID3D12Device* device, ID3D12CommandQueue* queue) + { + auto* ctx = static_cast(tracy_malloc(sizeof(D3D12QueueCtx))); + new (ctx) D3D12QueueCtx{ device, queue }; + ctx->Name("D3D12", 5); + return ctx; + } + + void CollectD3D12Context(void* ctx) + { + ((D3D12QueueCtx*)ctx)->Collect(); + ((D3D12QueueCtx*)ctx)->NewFrame(); + } + + void DestroyD3D12Context(void* ctx) + { + ((D3D12QueueCtx*)ctx)->~D3D12QueueCtx(); + tracy_free(ctx); + } + + void BeginD3D12ZoneScope(void* zone, void* ctx, ID3D12GraphicsCommandList* cmdList, const char* name, size_t nameLen) + { + new(zone) tracy::D3D12ZoneScope{ (tracy::D3D12QueueCtx*)ctx, 0, 0, 0, 0, 0, name, nameLen, cmdList, true }; + } + + void EndD3D12ZoneScope(void* zone) + { + ((tracy::D3D12ZoneScope*)zone)->~D3D12ZoneScope(); + } +} + +#undef TracyD3D12Panic + +using TracyD3D12Ctx = tracy::D3D12QueueCtx*; + +#define TracyD3D12Context(device, queue) tracy::CreateD3D12Context(device, queue); +#define TracyD3D12Destroy(ctx) tracy::DestroyD3D12Context(ctx); +#define TracyD3D12ContextName(ctx, name, size) ctx->Name(name, size); + +#define TracyD3D12NewFrame(ctx) ctx->NewFrame(); + +#define TracyD3D12UnnamedZone ___tracy_gpu_d3d12_zone +#define TracyD3D12SrcLocSymbol TracyConcat(__tracy_d3d12_source_location,TracyLine) +#define TracyD3D12SrcLocObject(name, color) static constexpr tracy::SourceLocationData TracyD3D12SrcLocSymbol { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; + +#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK +# define TracyD3D12Zone(ctx, cmdList, name) TracyD3D12NamedZoneS(ctx, TracyD3D12UnnamedZone, cmdList, name, TRACY_CALLSTACK, true) +# define TracyD3D12ZoneC(ctx, cmdList, name, color) TracyD3D12NamedZoneCS(ctx, TracyD3D12UnnamedZone, cmdList, name, color, TRACY_CALLSTACK, true) +# define TracyD3D12NamedZone(ctx, varname, cmdList, name, active) TracyD3D12SrcLocObject(name, 0); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, TRACY_CALLSTACK, active }; +# define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) TracyD3D12SrcLocObject(name, color); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, TRACY_CALLSTACK, active }; +# define TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active) TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, TRACY_CALLSTACK, active) +#else +# define TracyD3D12Zone(ctx, cmdList, name) TracyD3D12NamedZone(ctx, TracyD3D12UnnamedZone, cmdList, name, true) +# define TracyD3D12ZoneC(ctx, cmdList, name, color) TracyD3D12NamedZoneC(ctx, TracyD3D12UnnamedZone, cmdList, name, color, true) +# define TracyD3D12NamedZone(ctx, varname, cmdList, name, active) TracyD3D12SrcLocObject(name, 0); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, active }; +# define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) TracyD3D12SrcLocObject(name, color); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, active }; +# define TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active) tracy::D3D12ZoneScope varname{ ctx, TracyLine, TracyFile, strlen(TracyFile), TracyFunction, strlen(TracyFunction), name, strlen(name), cmdList, active }; +#endif + +#ifdef TRACY_HAS_CALLSTACK +# define TracyD3D12ZoneS(ctx, cmdList, name, depth) TracyD3D12NamedZoneS(ctx, TracyD3D12UnnamedZone, cmdList, name, depth, true) +# define TracyD3D12ZoneCS(ctx, cmdList, name, color, depth) TracyD3D12NamedZoneCS(ctx, TracyD3D12UnnamedZone, cmdList, name, color, depth, true) +# define TracyD3D12NamedZoneS(ctx, varname, cmdList, name, depth, active) TracyD3D12SrcLocObject(name, 0); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, depth, active }; +# define TracyD3D12NamedZoneCS(ctx, varname, cmdList, name, color, depth, active) TracyD3D12SrcLocObject(name, color); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, depth, active }; +# define TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, depth, active) tracy::D3D12ZoneScope varname{ ctx, TracyLine, TracyFile, strlen(TracyFile), TracyFunction, strlen(TracyFunction), name, strlen(name), cmdList, depth, active }; +#else +# define TracyD3D12ZoneS(ctx, cmdList, name, depth) TracyD3D12Zone(ctx, cmdList, name) +# define TracyD3D12ZoneCS(ctx, cmdList, name, color, depth) TracyD3D12Zone(ctx, cmdList, name, color) +# define TracyD3D12NamedZoneS(ctx, varname, cmdList, name, depth, active) TracyD3D12NamedZone(ctx, varname, cmdList, name, active) +# define TracyD3D12NamedZoneCS(ctx, varname, cmdList, name, color, depth, active) TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) +# define TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, depth, active) TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active) +#endif + +#define TracyD3D12Collect(ctx) ctx->Collect(); + +#else + +// Forward declarations to be used in engine +namespace tracy +{ + extern void* CreateD3D12Context(ID3D12Device* device, ID3D12CommandQueue* queue); + extern void DestroyD3D12Context(void* ctx); + extern void CollectD3D12Context(void* ctx); + extern void BeginD3D12ZoneScope(void* zone, void* ctx, ID3D12GraphicsCommandList* cmdList, const char* name, size_t nameLen); + extern void EndD3D12ZoneScope(void* zone); +} + +#endif + +#endif diff --git a/Source/ThirdParty/tracy/tracy/TracyVulkan.hpp b/Source/ThirdParty/tracy/tracy/TracyVulkan.hpp new file mode 100644 index 000000000..334d2cd24 --- /dev/null +++ b/Source/ThirdParty/tracy/tracy/TracyVulkan.hpp @@ -0,0 +1,779 @@ +#ifndef __TRACYVULKAN_HPP__ +#define __TRACYVULKAN_HPP__ + +#define TracyVulkanZoneSize 24 + +#if !defined TRACY_ENABLE + +#define TracyVkContext(x,y,z,w) nullptr +#define TracyVkContextCalibrated(x,y,z,w,a,b) nullptr +#if defined VK_EXT_host_query_reset +#define TracyVkContextHostCalibrated(x,y,z,w,a) nullptr +#endif +#define TracyVkDestroy(x) +#define TracyVkContextName(c,x,y) +#define TracyVkNamedZone(c,x,y,z,w) +#define TracyVkNamedZoneC(c,x,y,z,w,a) +#define TracyVkZone(c,x,y) +#define TracyVkZoneC(c,x,y,z) +#define TracyVkZoneTransient(c,x,y,z,w) +#define TracyVkCollect(c,x) + +#define TracyVkNamedZoneS(c,x,y,z,w,a) +#define TracyVkNamedZoneCS(c,x,y,z,w,v,a) +#define TracyVkZoneS(c,x,y,z) +#define TracyVkZoneCS(c,x,y,z,w) +#define TracyVkZoneTransientS(c,x,y,z,w,a) + +namespace tracy +{ +class VkCtxScope {}; +} + +using TracyVkCtx = void*; + +#elif TRACY_GPU_VULKAN + +#if !defined VK_NULL_HANDLE +# error "You must include Vulkan headers before including TracyVulkan.hpp" +#endif + +#include +#include +#include "../client/TracyProfiler.hpp" +#include "../client/TracyCallstack.hpp" + +#include + +namespace tracy +{ + +#if defined TRACY_VK_USE_SYMBOL_TABLE +#define LoadVkDeviceCoreSymbols(Operation) \ + Operation(vkBeginCommandBuffer) \ + Operation(vkCmdResetQueryPool) \ + Operation(vkCmdWriteTimestamp) \ + Operation(vkCreateQueryPool) \ + Operation(vkDestroyQueryPool) \ + Operation(vkEndCommandBuffer) \ + Operation(vkGetQueryPoolResults) \ + Operation(vkQueueSubmit) \ + Operation(vkQueueWaitIdle) \ + Operation(vkResetQueryPool) + +#define LoadVkDeviceExtensionSymbols(Operation) \ + Operation(vkGetCalibratedTimestampsEXT) + +#define LoadVkInstanceExtensionSymbols(Operation) \ + Operation(vkGetPhysicalDeviceCalibrateableTimeDomainsEXT) + +#define LoadVkInstanceCoreSymbols(Operation) \ + Operation(vkGetPhysicalDeviceProperties) + +struct VkSymbolTable +{ +#define MAKE_PFN(name) PFN_##name name; + LoadVkDeviceCoreSymbols(MAKE_PFN) + LoadVkDeviceExtensionSymbols(MAKE_PFN) + LoadVkInstanceExtensionSymbols(MAKE_PFN) + LoadVkInstanceCoreSymbols(MAKE_PFN) +#undef MAKE_PFN +}; + +#define VK_FUNCTION_WRAPPER(callSignature) m_symbols.callSignature +#define CONTEXT_VK_FUNCTION_WRAPPER(callSignature) m_ctx->m_symbols.callSignature +#else +#define VK_FUNCTION_WRAPPER(callSignature) callSignature +#define CONTEXT_VK_FUNCTION_WRAPPER(callSignature) callSignature +#endif + +class VkCtx +{ + friend class VkCtxScope; + + enum { QueryCount = 64 * 1024 }; + +public: +#if defined TRACY_VK_USE_SYMBOL_TABLE + VkCtx( VkInstance instance, VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr deviceProcAddr, bool calibrated ) +#else + VkCtx( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, PFN_vkGetCalibratedTimestampsEXT vkGetCalibratedTimestampsEXT) +#endif + : m_device( device ) + , m_timeDomain( VK_TIME_DOMAIN_DEVICE_EXT ) + , m_context( GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ) ) + , m_head( 0 ) + , m_tail( 0 ) + , m_oldCnt( 0 ) + , m_queryCount( QueryCount ) +#if !defined TRACY_VK_USE_SYMBOL_TABLE + , m_vkGetCalibratedTimestampsEXT( vkGetCalibratedTimestampsEXT ) +#endif + { + assert( m_context != 255 ); + +#if defined TRACY_VK_USE_SYMBOL_TABLE + PopulateSymbolTable(instance, instanceProcAddr, deviceProcAddr); + if ( calibrated ) + { + m_vkGetCalibratedTimestampsEXT = m_symbols.vkGetCalibratedTimestampsEXT; + } + +#endif + + if( VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) && m_vkGetCalibratedTimestampsEXT ) + { + FindAvailableTimeDomains( physdev, VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) ); + } + + CreateQueryPool(); + + VkCommandBufferBeginInfo beginInfo = {}; + beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + + VkSubmitInfo submitInfo = {}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &cmdbuf; + + VK_FUNCTION_WRAPPER( vkBeginCommandBuffer( cmdbuf, &beginInfo ) ); + VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount ) ); + VK_FUNCTION_WRAPPER( vkEndCommandBuffer( cmdbuf ) ); + VK_FUNCTION_WRAPPER( vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ) ); + VK_FUNCTION_WRAPPER( vkQueueWaitIdle( queue ) ); + + int64_t tcpu, tgpu; + if( m_timeDomain == VK_TIME_DOMAIN_DEVICE_EXT ) + { + VK_FUNCTION_WRAPPER( vkBeginCommandBuffer( cmdbuf, &beginInfo ) ); + VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_query, 0 ) ); + VK_FUNCTION_WRAPPER( vkEndCommandBuffer( cmdbuf ) ); + VK_FUNCTION_WRAPPER( vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ) ); + VK_FUNCTION_WRAPPER( vkQueueWaitIdle( queue ) ); + + tcpu = Profiler::GetTime(); + VK_FUNCTION_WRAPPER( vkGetQueryPoolResults( device, m_query, 0, 1, sizeof( tgpu ), &tgpu, sizeof( tgpu ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT ) ); + + VK_FUNCTION_WRAPPER( vkBeginCommandBuffer( cmdbuf, &beginInfo ) ); + VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, 0, 1 ) ); + VK_FUNCTION_WRAPPER( vkEndCommandBuffer( cmdbuf ) ); + VK_FUNCTION_WRAPPER( vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ) ); + VK_FUNCTION_WRAPPER( vkQueueWaitIdle( queue ) ); + } + else + { + FindCalibratedTimestampDeviation(); + Calibrate( device, m_prevCalibration, tgpu ); + tcpu = Profiler::GetTime(); + } + + WriteInitialItem( physdev, tcpu, tgpu ); + + m_res = (int64_t*)tracy_malloc( sizeof( int64_t ) * m_queryCount ); + } + +#if defined VK_EXT_host_query_reset + /** + * This alternative constructor does not use command buffers and instead uses functionality from + * VK_EXT_host_query_reset (core with 1.2 and non-optional) and VK_EXT_calibrated_timestamps. This requires + * the physical device to have another time domain apart from DEVICE to be calibrateable. + */ +#if defined TRACY_VK_USE_SYMBOL_TABLE + VkCtx( VkInstance instance, VkPhysicalDevice physdev, VkDevice device, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr deviceProcAddr ) +#else + VkCtx( VkPhysicalDevice physdev, VkDevice device, PFN_vkResetQueryPoolEXT vkResetQueryPool, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, PFN_vkGetCalibratedTimestampsEXT vkGetCalibratedTimestampsEXT ) +#endif + : m_device( device ) + , m_timeDomain( VK_TIME_DOMAIN_DEVICE_EXT ) + , m_context( GetGpuCtxCounter().fetch_add(1, std::memory_order_relaxed) ) + , m_head( 0 ) + , m_tail( 0 ) + , m_oldCnt( 0 ) + , m_queryCount( QueryCount ) +#if !defined TRACY_VK_USE_SYMBOL_TABLE + , m_vkGetCalibratedTimestampsEXT( vkGetCalibratedTimestampsEXT ) +#endif + { + assert( m_context != 255); + +#if defined TRACY_VK_USE_SYMBOL_TABLE + PopulateSymbolTable(instance, instanceProcAddr, deviceProcAddr); + m_vkGetCalibratedTimestampsEXT = m_symbols.vkGetCalibratedTimestampsEXT; +#endif + + assert( VK_FUNCTION_WRAPPER( vkResetQueryPool ) != nullptr ); + assert( VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) != nullptr ); + assert( VK_FUNCTION_WRAPPER( vkGetCalibratedTimestampsEXT ) != nullptr ); + + FindAvailableTimeDomains( physdev, VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) ); + + // We require a host time domain to be available to properly calibrate. + FindCalibratedTimestampDeviation(); + int64_t tgpu; + Calibrate( device, m_prevCalibration, tgpu ); + int64_t tcpu = Profiler::GetTime(); + + CreateQueryPool(); + VK_FUNCTION_WRAPPER( vkResetQueryPool( device, m_query, 0, m_queryCount ) ); + + WriteInitialItem( physdev, tcpu, tgpu ); + + // We need the buffer to be twice as large for availability values + size_t resSize = sizeof( int64_t ) * m_queryCount * 2; + m_res = (int64_t*)tracy_malloc( resSize ); + } +#endif + + ~VkCtx() + { + tracy_free( m_res ); + VK_FUNCTION_WRAPPER( vkDestroyQueryPool( m_device, m_query, nullptr ) ); + } + + void Name( const char* name, uint16_t len ) + { + auto ptr = (char*)tracy_malloc( len ); + memcpy( ptr, name, len ); + + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuContextName ); + MemWrite( &item->gpuContextNameFat.context, m_context ); + MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); + MemWrite( &item->gpuContextNameFat.size, len ); +#ifdef TRACY_ON_DEMAND + GetProfiler().DeferItem( *item ); +#endif + Profiler::QueueSerialFinish(); + } + + void Collect( VkCommandBuffer cmdbuf ) + { + const uint64_t head = m_head.load(std::memory_order_relaxed); + if( m_tail == head ) return; + +#ifdef TRACY_ON_DEMAND + if( !GetProfiler().IsConnected() ) + { + VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount ) ); + m_tail = head; + m_oldCnt = 0; + int64_t tgpu; + if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) Calibrate( m_device, m_prevCalibration, tgpu ); + return; + } +#endif + assert( head > m_tail ); + + const unsigned int wrappedTail = (unsigned int)( m_tail % m_queryCount ); + + unsigned int cnt; + if( m_oldCnt != 0 ) + { + cnt = m_oldCnt; + m_oldCnt = 0; + } + else + { + cnt = (unsigned int)( head - m_tail ); + assert( cnt <= m_queryCount ); + if( wrappedTail + cnt > m_queryCount ) + { + cnt = m_queryCount - wrappedTail; + } + } + + + VK_FUNCTION_WRAPPER( vkGetQueryPoolResults( m_device, m_query, wrappedTail, cnt, sizeof( int64_t ) * m_queryCount * 2, m_res, sizeof( int64_t ) * 2, VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT ) ); + + for( unsigned int idx=0; idxhdr.type, QueueType::GpuTime ); + MemWrite( &item->gpuTime.gpuTime, m_res[idx * 2] ); + MemWrite( &item->gpuTime.queryId, uint16_t( wrappedTail + idx ) ); + MemWrite( &item->gpuTime.context, m_context ); + Profiler::QueueSerialFinish(); + } + + if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) + { + int64_t tgpu, tcpu; + Calibrate( m_device, tcpu, tgpu ); + const auto refCpu = Profiler::GetTime(); + const auto delta = tcpu - m_prevCalibration; + if( delta > 0 ) + { + m_prevCalibration = tcpu; + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuCalibration ); + MemWrite( &item->gpuCalibration.gpuTime, tgpu ); + MemWrite( &item->gpuCalibration.cpuTime, refCpu ); + MemWrite( &item->gpuCalibration.cpuDelta, delta ); + MemWrite( &item->gpuCalibration.context, m_context ); + Profiler::QueueSerialFinish(); + } + } + + VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, wrappedTail, cnt ) ); + + m_tail += cnt; + } + + tracy_force_inline unsigned int NextQueryId() + { + const uint64_t id = m_head.fetch_add(1, std::memory_order_relaxed); + return id % m_queryCount; + } + + tracy_force_inline uint8_t GetId() const + { + return m_context; + } + + tracy_force_inline VkQueryPool GetQueryPool() const + { + return m_query; + } + +private: + tracy_force_inline void Calibrate( VkDevice device, int64_t& tCpu, int64_t& tGpu ) + { + assert( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ); + VkCalibratedTimestampInfoEXT spec[2] = { + { VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT }, + { VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_timeDomain }, + }; + uint64_t ts[2]; + uint64_t deviation; + do + { + m_vkGetCalibratedTimestampsEXT( device, 2, spec, ts, &deviation ); + } + while( deviation > m_deviation ); + +#if defined _WIN32 + tGpu = ts[0]; + tCpu = ts[1] * m_qpcToNs; +#elif defined __linux__ && defined CLOCK_MONOTONIC_RAW + tGpu = ts[0]; + tCpu = ts[1]; +#else + assert( false ); +#endif + } + + tracy_force_inline void CreateQueryPool() + { + VkQueryPoolCreateInfo poolInfo = {}; + poolInfo.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; + poolInfo.queryCount = m_queryCount; + poolInfo.queryType = VK_QUERY_TYPE_TIMESTAMP; + while ( VK_FUNCTION_WRAPPER( vkCreateQueryPool( m_device, &poolInfo, nullptr, &m_query ) != VK_SUCCESS ) ) + { + m_queryCount /= 2; + poolInfo.queryCount = m_queryCount; + } + } + + tracy_force_inline void FindAvailableTimeDomains( VkPhysicalDevice physicalDevice, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) + { + uint32_t num; + _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physicalDevice, &num, nullptr ); + if(num > 4) num = 4; + VkTimeDomainEXT data[4]; + _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physicalDevice, &num, data ); + VkTimeDomainEXT supportedDomain = (VkTimeDomainEXT)-1; +#if defined _WIN32 + supportedDomain = VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_EXT; +#elif defined __linux__ && defined CLOCK_MONOTONIC_RAW + supportedDomain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT; +#endif + for( uint32_t i=0; i deviation[i] ) { + minDeviation = deviation[i]; + } + } + m_deviation = minDeviation * 3 / 2; + +#if defined _WIN32 + m_qpcToNs = int64_t( 1000000000. / GetFrequencyQpc() ); +#endif + } + + tracy_force_inline void WriteInitialItem( VkPhysicalDevice physdev, int64_t tcpu, int64_t tgpu ) + { + uint8_t flags = 0; + if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) flags |= GpuContextCalibration; + + VkPhysicalDeviceProperties prop; + VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceProperties( physdev, &prop ) ); + const float period = prop.limits.timestampPeriod; + + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuNewContext ); + MemWrite( &item->gpuNewContext.cpuTime, tcpu ); + MemWrite( &item->gpuNewContext.gpuTime, tgpu ); + memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) ); + MemWrite( &item->gpuNewContext.period, period ); + MemWrite( &item->gpuNewContext.context, m_context ); + MemWrite( &item->gpuNewContext.flags, flags ); + MemWrite( &item->gpuNewContext.type, GpuContextType::Vulkan ); + +#ifdef TRACY_ON_DEMAND + GetProfiler().DeferItem( *item ); +#endif + Profiler::QueueSerialFinish(); + } + +#if defined TRACY_VK_USE_SYMBOL_TABLE + void PopulateSymbolTable( VkInstance instance, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr deviceProcAddr ) + { +#define VK_GET_DEVICE_SYMBOL( name ) \ + (PFN_##name)deviceProcAddr( m_device, #name ); +#define VK_LOAD_DEVICE_SYMBOL( name ) \ + m_symbols.name = VK_GET_DEVICE_SYMBOL( name ); +#define VK_GET_INSTANCE_SYMBOL( name ) \ + (PFN_##name)instanceProcAddr( instance, #name ); +#define VK_LOAD_INSTANCE_SYMBOL( name ) \ + m_symbols.name = VK_GET_INSTANCE_SYMBOL( name ); + + LoadVkDeviceCoreSymbols( VK_LOAD_DEVICE_SYMBOL ) + LoadVkDeviceExtensionSymbols( VK_LOAD_DEVICE_SYMBOL ) + LoadVkInstanceExtensionSymbols( VK_LOAD_INSTANCE_SYMBOL ) + LoadVkInstanceCoreSymbols( VK_LOAD_INSTANCE_SYMBOL ) +#undef VK_GET_DEVICE_SYMBOL +#undef VK_LOAD_DEVICE_SYMBOL +#undef VK_GET_INSTANCE_SYMBOL +#undef VK_LOAD_INSTANCE_SYMBOL + } +#endif + + VkDevice m_device; + VkQueryPool m_query; + VkTimeDomainEXT m_timeDomain; +#if defined TRACY_VK_USE_SYMBOL_TABLE + VkSymbolTable m_symbols; +#endif + uint64_t m_deviation; +#ifdef _WIN32 + int64_t m_qpcToNs; +#endif + int64_t m_prevCalibration; + uint8_t m_context; + + std::atomic m_head; + uint64_t m_tail; + unsigned int m_oldCnt; + unsigned int m_queryCount; + + int64_t* m_res; + + PFN_vkGetCalibratedTimestampsEXT m_vkGetCalibratedTimestampsEXT; +}; + +class VkCtxScope +{ +public: + tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, bool is_active ) +#ifdef TRACY_ON_DEMAND + : m_active( is_active && GetProfiler().IsConnected() ) +#else + : m_active( is_active ) +#endif + { + if( !m_active ) return; + m_cmdbuf = cmdbuf; + m_ctx = ctx; + + const auto queryId = ctx->NextQueryId(); + CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) ); + + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial ); + MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); + MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); + MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() ); + MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); + MemWrite( &item->gpuZoneBegin.context, ctx->GetId() ); + Profiler::QueueSerialFinish(); + } + + tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, int32_t depth, bool is_active ) +#ifdef TRACY_ON_DEMAND + : m_active( is_active && GetProfiler().IsConnected() ) +#else + : m_active( is_active ) +#endif + { + if( !m_active ) return; + m_cmdbuf = cmdbuf; + m_ctx = ctx; + + const auto queryId = ctx->NextQueryId(); + CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) ); + + QueueItem *item; + if( depth > 0 && has_callstack() ) + { + item = Profiler::QueueSerialCallstack( Callstack( depth ) ); + MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstackSerial ); + } + else + { + item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial ); + } + MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); + MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); + MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() ); + MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); + MemWrite( &item->gpuZoneBegin.context, ctx->GetId() ); + Profiler::QueueSerialFinish(); + } + + tracy_force_inline VkCtxScope( VkCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, VkCommandBuffer cmdbuf, bool is_active ) +#ifdef TRACY_ON_DEMAND + : m_active( is_active && GetProfiler().IsConnected() ) +#else + : m_active( is_active ) +#endif + { + if( !m_active ) return; + m_cmdbuf = cmdbuf; + m_ctx = ctx; + + const auto queryId = ctx->NextQueryId(); + CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) ); + + const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocSerial ); + MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); + MemWrite( &item->gpuZoneBegin.srcloc, srcloc ); + MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() ); + MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); + MemWrite( &item->gpuZoneBegin.context, ctx->GetId() ); + Profiler::QueueSerialFinish(); + } + + tracy_force_inline VkCtxScope( VkCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, VkCommandBuffer cmdbuf, int32_t depth, bool is_active ) +#ifdef TRACY_ON_DEMAND + : m_active( is_active && GetProfiler().IsConnected() ) +#else + : m_active( is_active ) +#endif + { + if( !m_active ) return; + m_cmdbuf = cmdbuf; + m_ctx = ctx; + + const auto queryId = ctx->NextQueryId(); + CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) ); + + const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); + QueueItem *item; + if( depth > 0 && has_callstack() ) + { + item = Profiler::QueueSerialCallstack( Callstack( depth ) ); + MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial ); + } + else + { + item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocSerial ); + } + MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); + MemWrite( &item->gpuZoneBegin.srcloc, srcloc ); + MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() ); + MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); + MemWrite( &item->gpuZoneBegin.context, ctx->GetId() ); + Profiler::QueueSerialFinish(); + } + + tracy_force_inline ~VkCtxScope() + { + if( !m_active ) return; + + const auto queryId = m_ctx->NextQueryId(); + CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( m_cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, m_ctx->m_query, queryId ) ); + + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial ); + MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() ); + MemWrite( &item->gpuZoneEnd.thread, GetThreadHandle() ); + MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) ); + MemWrite( &item->gpuZoneEnd.context, m_ctx->GetId() ); + Profiler::QueueSerialFinish(); + } + +private: + const bool m_active; + + VkCommandBuffer m_cmdbuf; + VkCtx* m_ctx; +}; + +#if defined TRACY_VK_USE_SYMBOL_TABLE +void* CreateVkContext( VkInstance instance, VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr getDeviceProcAddr, bool calibrated = false ) +#else +void* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct ) +#endif +{ + auto ctx = (VkCtx*)tracy_malloc( sizeof( VkCtx ) ); +#if defined TRACY_VK_USE_SYMBOL_TABLE + new(ctx) VkCtx( instance, physdev, device, queue, cmdbuf, instanceProcAddr, getDeviceProcAddr, calibrated ); +#else + new(ctx) VkCtx( physdev, device, queue, cmdbuf, gpdctd, gct ); +#endif + return ctx; +} + +#if defined VK_EXT_host_query_reset +#if defined TRACY_VK_USE_SYMBOL_TABLE +void* CreateVkContext( VkInstance instance, VkPhysicalDevice physdev, VkDevice device, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr getDeviceProcAddr ) +#else +void* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, PFN_vkResetQueryPoolEXT qpreset, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct ) +#endif +{ + auto ctx = (VkCtx*)tracy_malloc( sizeof( VkCtx ) ); +#if defined TRACY_VK_USE_SYMBOL_TABLE + new(ctx) VkCtx( instance, physdev, device, instanceProcAddr, getDeviceProcAddr ); +#else + new(ctx) VkCtx( physdev, device, qpreset, gpdctd, gct ); +#endif + ctx->Name("Vulkan", 6); + return ctx; +} +#endif + +void DestroyVkContext( void* ctx ) +{ + ((VkCtx*)ctx)->~VkCtx(); + tracy_free( ctx ); +} + +void CollectVkContext( void* ctx, VkCommandBuffer cmdbuf ) +{ + ((VkCtx*)ctx)->Collect(cmdbuf); +} + +void BeginVkZoneScope(void* zone, void* ctx, VkCommandBuffer cmdbuf, const char* name, size_t nameLen) +{ + new(zone) tracy::VkCtxScope{ (tracy::VkCtx*)ctx, 0, 0, 0, 0, 0, name, nameLen, cmdbuf, true }; +} + +void EndVkZoneScope(void* zone) +{ + ((tracy::VkCtxScope*)zone)->~VkCtxScope(); +} + +} + +using TracyVkCtx = tracy::VkCtx*; + +#if defined TRACY_VK_USE_SYMBOL_TABLE +#define TracyVkContext( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr ) tracy::CreateVkContext( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr ); +#else +#define TracyVkContext( physdev, device, queue, cmdbuf ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, nullptr, nullptr ); +#endif +#if defined TRACY_VK_USE_SYMBOL_TABLE +#define TracyVkContextCalibrated( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr ) tracy::CreateVkContext( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr, true ); +#else +#define TracyVkContextCalibrated( physdev, device, queue, cmdbuf, gpdctd, gct ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, gpdctd, gct ); +#endif +#if defined VK_EXT_host_query_reset +#if defined TRACY_VK_USE_SYMBOL_TABLE +#define TracyVkContextHostCalibrated( instance, physdev, device, instanceProcAddr, deviceProcAddr ) tracy::CreateVkContext( instance, physdev, device, instanceProcAddr, deviceProcAddr ); +#else +#define TracyVkContextHostCalibrated( physdev, device, qpreset, gpdctd, gct ) tracy::CreateVkContext( physdev, device, qpreset, gpdctd, gct ); +#endif +#endif +#define TracyVkDestroy( ctx ) tracy::DestroyVkContext( ctx ); +#define TracyVkContextName( ctx, name, size ) ctx->Name( name, size ); +#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK +# define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, TRACY_CALLSTACK, active ); +# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, TRACY_CALLSTACK, active ); +# define TracyVkZone( ctx, cmdbuf, name ) TracyVkNamedZoneS( ctx, ___tracy_gpu_zone, cmdbuf, name, TRACY_CALLSTACK, true ) +# define TracyVkZoneC( ctx, cmdbuf, name, color ) TracyVkNamedZoneCS( ctx, ___tracy_gpu_zone, cmdbuf, name, color, TRACY_CALLSTACK, true ) +# define TracyVkZoneTransient( ctx, varname, cmdbuf, name, active ) TracyVkZoneTransientS( ctx, varname, cmdbuf, name, TRACY_CALLSTACK, active ) +#else +# define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, active ); +# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, active ); +# define TracyVkZone( ctx, cmdbuf, name ) TracyVkNamedZone( ctx, ___tracy_gpu_zone, cmdbuf, name, true ) +# define TracyVkZoneC( ctx, cmdbuf, name, color ) TracyVkNamedZoneC( ctx, ___tracy_gpu_zone, cmdbuf, name, color, true ) +# define TracyVkZoneTransient( ctx, varname, cmdbuf, name, active ) tracy::VkCtxScope varname( ctx, TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), cmdbuf, active ); +#endif +#define TracyVkCollect( ctx, cmdbuf ) ctx->Collect( cmdbuf ); + +#ifdef TRACY_HAS_CALLSTACK +# define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, depth, active ); +# define TracyVkNamedZoneCS( ctx, varname, cmdbuf, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, depth, active ); +# define TracyVkZoneS( ctx, cmdbuf, name, depth ) TracyVkNamedZoneS( ctx, ___tracy_gpu_zone, cmdbuf, name, depth, true ) +# define TracyVkZoneCS( ctx, cmdbuf, name, color, depth ) TracyVkNamedZoneCS( ctx, ___tracy_gpu_zone, cmdbuf, name, color, depth, true ) +# define TracyVkZoneTransientS( ctx, varname, cmdbuf, name, depth, active ) tracy::VkCtxScope varname( ctx, TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), cmdbuf, depth, active ); +#else +# define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth, active ) TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) +# define TracyVkNamedZoneCS( ctx, varname, cmdbuf, name, color, depth, active ) TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) +# define TracyVkZoneS( ctx, cmdbuf, name, depth ) TracyVkZone( ctx, cmdbuf, name ) +# define TracyVkZoneCS( ctx, cmdbuf, name, color, depth ) TracyVkZoneC( ctx, cmdbuf, name, color ) +# define TracyVkZoneTransientS( ctx, varname, cmdbuf, name, depth, active ) TracyVkZoneTransient( ctx, varname, cmdbuf, name, active ) +#endif + +#else + +// Forward declarations to be used in engine +namespace tracy +{ +#if defined TRACY_VK_USE_SYMBOL_TABLE + extern void* CreateVkContext(VkInstance instance, VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr getDeviceProcAddr, bool calibrated = false); +#else + extern void* CreateVkContext(VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct); +#endif +#if defined VK_EXT_host_query_reset +#if defined TRACY_VK_USE_SYMBOL_TABLE + extern void* CreateVkContext(VkInstance instance, VkPhysicalDevice physdev, VkDevice device, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr getDeviceProcAddr); +#else + extern void* CreateVkContext(VkPhysicalDevice physdev, VkDevice device, PFN_vkResetQueryPoolEXT qpreset, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct); +#endif +#endif + extern void DestroyVkContext(void* ctx); + extern void CollectVkContext(void* ctx, VkCommandBuffer cmdbuf); + extern void BeginVkZoneScope(void* zone, void* ctx, VkCommandBuffer cmdbuf, const char* name, size_t nameLen); + extern void EndVkZoneScope(void* zone); +} + +#endif + +#endif diff --git a/Source/Tools/Flax.Build/Build/NativeCpp/BuildOptions.cs b/Source/Tools/Flax.Build/Build/NativeCpp/BuildOptions.cs index 0aa37b84e..95695e203 100644 --- a/Source/Tools/Flax.Build/Build/NativeCpp/BuildOptions.cs +++ b/Source/Tools/Flax.Build/Build/NativeCpp/BuildOptions.cs @@ -73,7 +73,7 @@ namespace Flax.Build.NativeCpp /// /// The native C++ module build settings container. /// - public sealed class BuildOptions + public sealed class BuildOptions : ICloneable { /// /// The target that builds this module. @@ -442,5 +442,26 @@ namespace Flax.Build.NativeCpp SourcePaths.Clear(); } } + + /// + public object Clone() + { + var clone = new BuildOptions + { + Target = Target, + Platform = Platform, + Toolchain = Toolchain, + Architecture = Architecture, + Configuration = Configuration, + CompileEnv = (CompileEnvironment)CompileEnv.Clone(), + LinkEnv = (LinkEnvironment)LinkEnv.Clone(), + IntermediateFolder = IntermediateFolder, + OutputFolder = OutputFolder, + WorkingDirectory = WorkingDirectory, + HotReloadPostfix = HotReloadPostfix, + Flags = Flags, + }; + return clone; + } } } From 492a5f979d1f6f9292b7c41f127637a90a78ddcc Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 30 Jul 2025 19:08:58 +0200 Subject: [PATCH 125/211] Fix yield on Android --- Source/Engine/Platform/Android/AndroidPlatform.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Engine/Platform/Android/AndroidPlatform.cpp b/Source/Engine/Platform/Android/AndroidPlatform.cpp index d78b716bf..18fb85bb9 100644 --- a/Source/Engine/Platform/Android/AndroidPlatform.cpp +++ b/Source/Engine/Platform/Android/AndroidPlatform.cpp @@ -750,7 +750,7 @@ void AndroidPlatform::Sleep(int32 milliseconds) void AndroidPlatform::Yield() { - pthread_yield(); + sched_yield(); } double AndroidPlatform::GetTimeSeconds() From 522d8d89e6c5521c7c1e32c236a7821c476d09e9 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 30 Jul 2025 19:09:21 +0200 Subject: [PATCH 126/211] Fix Global SDF to properly lock static chunks access --- Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp index e0d227b6b..c8c997180 100644 --- a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp +++ b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp @@ -237,6 +237,7 @@ public: OnSDFTextureDeleted(texture); // Clear static chunks cache + ConcurrentSystemLocker::WriteScope lock(Locker, true); for (auto& cascade : Cascades) cascade.StaticChunks.Clear(); } @@ -581,6 +582,7 @@ void GlobalSignDistanceFieldCustomBuffer::DrawCascadeJob(int32 cascadeIndex) if (!cascade.Dirty) return; PROFILE_CPU(); + ConcurrentSystemLocker::ReadScope lock(Locker); CurrentCascade.Set(&cascade); DrawCascadeActors(cascade); UpdateCascadeChunks(cascade); @@ -720,7 +722,6 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex } sdfData.LastFrameUsed = currentFrame; PROFILE_GPU_CPU("Global SDF"); - ConcurrentSystemLocker::WriteScope lock(sdfData.Locker); // Setup options int32 resolution, cascadesCount, resolutionMip; @@ -796,6 +797,7 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex Current = &sdfData; sdfData.StartDrawing(renderContext, false, reset); // (ignored if not started earlier this frame) sdfData.WaitForDrawing(); + ConcurrentSystemLocker::WriteScope lock(sdfData.Locker); // Rasterize world geometry into Global SDF bool anyDraw = false; From 3cd5890db1ce0f1f0c60cb820b1fd5f0059044ff Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 30 Jul 2025 19:11:15 +0200 Subject: [PATCH 127/211] Update `volk` to version `1.4.304` --- Source/ThirdParty/volk/LICENSE.txt | 2 +- Source/ThirdParty/volk/volk.cpp | 2018 +++++++++++++++++++++++++++- Source/ThirdParty/volk/volk.h | 1305 +++++++++++++++++- 3 files changed, 3212 insertions(+), 113 deletions(-) diff --git a/Source/ThirdParty/volk/LICENSE.txt b/Source/ThirdParty/volk/LICENSE.txt index da3caa7cd..5a717f267 100644 --- a/Source/ThirdParty/volk/LICENSE.txt +++ b/Source/ThirdParty/volk/LICENSE.txt @@ -1,4 +1,4 @@ -Copyright (c) 2018-2019 Arseny Kapoulkine +Copyright (c) 2018-2024 Arseny Kapoulkine Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/Source/ThirdParty/volk/volk.cpp b/Source/ThirdParty/volk/volk.cpp index ffbba4a0f..ba984c0c1 100644 --- a/Source/ThirdParty/volk/volk.cpp +++ b/Source/ThirdParty/volk/volk.cpp @@ -1,16 +1,51 @@ /* This file is part of volk library; see volk.h for version/license details */ +/* clang-format off */ #include "volk.h" #ifdef _WIN32 -# include + typedef const char* LPCSTR; + typedef struct HINSTANCE__* HINSTANCE; + typedef HINSTANCE HMODULE; + #if defined(_MINWINDEF_) + /* minwindef.h defines FARPROC, and attempting to redefine it may conflict with -Wstrict-prototypes */ + #elif defined(_WIN64) + typedef __int64 (__stdcall* FARPROC)(void); + #else + typedef int (__stdcall* FARPROC)(void); + #endif #else # include #endif +#ifdef __APPLE__ +# include +#endif + #ifdef __cplusplus extern "C" { #endif +#ifdef _WIN32 +__declspec(dllimport) HMODULE __stdcall LoadLibraryA(LPCSTR); +__declspec(dllimport) FARPROC __stdcall GetProcAddress(HMODULE, LPCSTR); +__declspec(dllimport) int __stdcall FreeLibrary(HMODULE); +#endif + +#if defined(__GNUC__) +# define VOLK_DISABLE_GCC_PEDANTIC_WARNINGS \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wpedantic\"") +# define VOLK_RESTORE_GCC_PEDANTIC_WARNINGS \ + _Pragma("GCC diagnostic pop") +#else +# define VOLK_DISABLE_GCC_PEDANTIC_WARNINGS +# define VOLK_RESTORE_GCC_PEDANTIC_WARNINGS +#endif + +static void* loadedModule = NULL; +static VkInstance loadedInstance = VK_NULL_HANDLE; +static VkDevice loadedDevice = VK_NULL_HANDLE; + static void volkGenLoadLoader(void* context, PFN_vkVoidFunction (*load)(void*, const char*)); static void volkGenLoadInstance(void* context, PFN_vkVoidFunction (*load)(void*, const char*)); static void volkGenLoadDevice(void* context, PFN_vkVoidFunction (*load)(void*, const char*)); @@ -26,6 +61,13 @@ static PFN_vkVoidFunction vkGetDeviceProcAddrStub(void* context, const char* nam return vkGetDeviceProcAddr((VkDevice)context, name); } +static PFN_vkVoidFunction nullProcAddrStub(void* context, const char* name) +{ + (void)context; + (void)name; + return NULL; +} + VkResult volkInitialize(void) { #if defined(_WIN32) @@ -33,27 +75,40 @@ VkResult volkInitialize(void) if (!module) return VK_ERROR_INITIALIZATION_FAILED; - vkGetInstanceProcAddr = (PFN_vkGetInstanceProcAddr)GetProcAddress(module, "vkGetInstanceProcAddr"); + // note: function pointer is cast through void function pointer to silence cast-function-type warning on gcc8 + vkGetInstanceProcAddr = (PFN_vkGetInstanceProcAddr)(void(*)(void))GetProcAddress(module, "vkGetInstanceProcAddr"); #elif defined(__APPLE__) void* module = dlopen("libvulkan.dylib", RTLD_NOW | RTLD_LOCAL); if (!module) - module = dlopen("libvulkan.dylib.1", RTLD_NOW | RTLD_LOCAL); + module = dlopen("libvulkan.1.dylib", RTLD_NOW | RTLD_LOCAL); if (!module) module = dlopen("libMoltenVK.dylib", RTLD_NOW | RTLD_LOCAL); + // Add support for using Vulkan and MoltenVK in a Framework. App store rules for iOS + // strictly enforce no .dylib's. If they aren't found it just falls through + if (!module) + module = dlopen("vulkan.framework/vulkan", RTLD_NOW | RTLD_LOCAL); + if (!module) + module = dlopen("MoltenVK.framework/MoltenVK", RTLD_NOW | RTLD_LOCAL); + // modern versions of macOS don't search /usr/local/lib automatically contrary to what man dlopen says + // Vulkan SDK uses this as the system-wide installation location, so we're going to fallback to this if all else fails + if (!module && getenv("DYLD_FALLBACK_LIBRARY_PATH") == NULL) + module = dlopen("/usr/local/lib/libvulkan.dylib", RTLD_NOW | RTLD_LOCAL); if (!module) return VK_ERROR_INITIALIZATION_FAILED; vkGetInstanceProcAddr = (PFN_vkGetInstanceProcAddr)dlsym(module, "vkGetInstanceProcAddr"); #else - void* module = dlopen("libvulkan.so", RTLD_NOW | RTLD_LOCAL); + void* module = dlopen("libvulkan.so.1", RTLD_NOW | RTLD_LOCAL); if (!module) - module = dlopen("libvulkan.so.1", RTLD_NOW | RTLD_LOCAL); + module = dlopen("libvulkan.so", RTLD_NOW | RTLD_LOCAL); if (!module) return VK_ERROR_INITIALIZATION_FAILED; - + VOLK_DISABLE_GCC_PEDANTIC_WARNINGS vkGetInstanceProcAddr = (PFN_vkGetInstanceProcAddr)dlsym(module, "vkGetInstanceProcAddr"); + VOLK_RESTORE_GCC_PEDANTIC_WARNINGS #endif + loadedModule = module; volkGenLoadLoader(NULL, vkGetInstanceProcAddrStub); return VK_SUCCESS; @@ -63,9 +118,31 @@ void volkInitializeCustom(PFN_vkGetInstanceProcAddr handler) { vkGetInstanceProcAddr = handler; + loadedModule = NULL; volkGenLoadLoader(NULL, vkGetInstanceProcAddrStub); } +void volkFinalize(void) +{ + if (loadedModule) + { +#if defined(_WIN32) + FreeLibrary((HMODULE)loadedModule); +#else + dlclose(loadedModule); +#endif + } + + vkGetInstanceProcAddr = NULL; + volkGenLoadLoader(NULL, nullProcAddrStub); + volkGenLoadInstance(NULL, nullProcAddrStub); + volkGenLoadDevice(NULL, nullProcAddrStub); + + loadedModule = NULL; + loadedInstance = VK_NULL_HANDLE; + loadedDevice = VK_NULL_HANDLE; +} + uint32_t volkGetInstanceVersion(void) { #if defined(VK_VERSION_1_1) @@ -82,15 +159,33 @@ uint32_t volkGetInstanceVersion(void) void volkLoadInstance(VkInstance instance) { + loadedInstance = instance; volkGenLoadInstance(instance, vkGetInstanceProcAddrStub); volkGenLoadDevice(instance, vkGetInstanceProcAddrStub); } +void volkLoadInstanceOnly(VkInstance instance) +{ + loadedInstance = instance; + volkGenLoadInstance(instance, vkGetInstanceProcAddrStub); +} + +VkInstance volkGetLoadedInstance(void) +{ + return loadedInstance; +} + void volkLoadDevice(VkDevice device) { + loadedDevice = device; volkGenLoadDevice(device, vkGetDeviceProcAddrStub); } +VkDevice volkGetLoadedDevice(void) +{ + return loadedDevice; +} + void volkLoadDeviceTable(struct VolkDeviceTable* table, VkDevice device) { volkGenLoadDeviceTable(table, device, vkGetDeviceProcAddrStub); @@ -141,6 +236,13 @@ static void volkGenLoadInstance(void* context, PFN_vkVoidFunction (*load)(void*, vkGetPhysicalDeviceQueueFamilyProperties2 = (PFN_vkGetPhysicalDeviceQueueFamilyProperties2)load(context, "vkGetPhysicalDeviceQueueFamilyProperties2"); vkGetPhysicalDeviceSparseImageFormatProperties2 = (PFN_vkGetPhysicalDeviceSparseImageFormatProperties2)load(context, "vkGetPhysicalDeviceSparseImageFormatProperties2"); #endif /* defined(VK_VERSION_1_1) */ +#if defined(VK_VERSION_1_3) + vkGetPhysicalDeviceToolProperties = (PFN_vkGetPhysicalDeviceToolProperties)load(context, "vkGetPhysicalDeviceToolProperties"); +#endif /* defined(VK_VERSION_1_3) */ +#if defined(VK_EXT_acquire_drm_display) + vkAcquireDrmDisplayEXT = (PFN_vkAcquireDrmDisplayEXT)load(context, "vkAcquireDrmDisplayEXT"); + vkGetDrmDisplayEXT = (PFN_vkGetDrmDisplayEXT)load(context, "vkGetDrmDisplayEXT"); +#endif /* defined(VK_EXT_acquire_drm_display) */ #if defined(VK_EXT_acquire_xlib_display) vkAcquireXlibDisplayEXT = (PFN_vkAcquireXlibDisplayEXT)load(context, "vkAcquireXlibDisplayEXT"); vkGetRandROutputDisplayEXT = (PFN_vkGetRandROutputDisplayEXT)load(context, "vkGetRandROutputDisplayEXT"); @@ -154,25 +256,58 @@ static void volkGenLoadInstance(void* context, PFN_vkVoidFunction (*load)(void*, vkDestroyDebugReportCallbackEXT = (PFN_vkDestroyDebugReportCallbackEXT)load(context, "vkDestroyDebugReportCallbackEXT"); #endif /* defined(VK_EXT_debug_report) */ #if defined(VK_EXT_debug_utils) + vkCmdBeginDebugUtilsLabelEXT = (PFN_vkCmdBeginDebugUtilsLabelEXT)load(context, "vkCmdBeginDebugUtilsLabelEXT"); + vkCmdEndDebugUtilsLabelEXT = (PFN_vkCmdEndDebugUtilsLabelEXT)load(context, "vkCmdEndDebugUtilsLabelEXT"); + vkCmdInsertDebugUtilsLabelEXT = (PFN_vkCmdInsertDebugUtilsLabelEXT)load(context, "vkCmdInsertDebugUtilsLabelEXT"); vkCreateDebugUtilsMessengerEXT = (PFN_vkCreateDebugUtilsMessengerEXT)load(context, "vkCreateDebugUtilsMessengerEXT"); vkDestroyDebugUtilsMessengerEXT = (PFN_vkDestroyDebugUtilsMessengerEXT)load(context, "vkDestroyDebugUtilsMessengerEXT"); + vkQueueBeginDebugUtilsLabelEXT = (PFN_vkQueueBeginDebugUtilsLabelEXT)load(context, "vkQueueBeginDebugUtilsLabelEXT"); + vkQueueEndDebugUtilsLabelEXT = (PFN_vkQueueEndDebugUtilsLabelEXT)load(context, "vkQueueEndDebugUtilsLabelEXT"); + vkQueueInsertDebugUtilsLabelEXT = (PFN_vkQueueInsertDebugUtilsLabelEXT)load(context, "vkQueueInsertDebugUtilsLabelEXT"); + vkSetDebugUtilsObjectNameEXT = (PFN_vkSetDebugUtilsObjectNameEXT)load(context, "vkSetDebugUtilsObjectNameEXT"); + vkSetDebugUtilsObjectTagEXT = (PFN_vkSetDebugUtilsObjectTagEXT)load(context, "vkSetDebugUtilsObjectTagEXT"); vkSubmitDebugUtilsMessageEXT = (PFN_vkSubmitDebugUtilsMessageEXT)load(context, "vkSubmitDebugUtilsMessageEXT"); #endif /* defined(VK_EXT_debug_utils) */ #if defined(VK_EXT_direct_mode_display) vkReleaseDisplayEXT = (PFN_vkReleaseDisplayEXT)load(context, "vkReleaseDisplayEXT"); #endif /* defined(VK_EXT_direct_mode_display) */ +#if defined(VK_EXT_directfb_surface) + vkCreateDirectFBSurfaceEXT = (PFN_vkCreateDirectFBSurfaceEXT)load(context, "vkCreateDirectFBSurfaceEXT"); + vkGetPhysicalDeviceDirectFBPresentationSupportEXT = (PFN_vkGetPhysicalDeviceDirectFBPresentationSupportEXT)load(context, "vkGetPhysicalDeviceDirectFBPresentationSupportEXT"); +#endif /* defined(VK_EXT_directfb_surface) */ #if defined(VK_EXT_display_surface_counter) vkGetPhysicalDeviceSurfaceCapabilities2EXT = (PFN_vkGetPhysicalDeviceSurfaceCapabilities2EXT)load(context, "vkGetPhysicalDeviceSurfaceCapabilities2EXT"); #endif /* defined(VK_EXT_display_surface_counter) */ +#if defined(VK_EXT_full_screen_exclusive) + vkGetPhysicalDeviceSurfacePresentModes2EXT = (PFN_vkGetPhysicalDeviceSurfacePresentModes2EXT)load(context, "vkGetPhysicalDeviceSurfacePresentModes2EXT"); +#endif /* defined(VK_EXT_full_screen_exclusive) */ +#if defined(VK_EXT_headless_surface) + vkCreateHeadlessSurfaceEXT = (PFN_vkCreateHeadlessSurfaceEXT)load(context, "vkCreateHeadlessSurfaceEXT"); +#endif /* defined(VK_EXT_headless_surface) */ +#if defined(VK_EXT_metal_surface) + vkCreateMetalSurfaceEXT = (PFN_vkCreateMetalSurfaceEXT)load(context, "vkCreateMetalSurfaceEXT"); +#endif /* defined(VK_EXT_metal_surface) */ #if defined(VK_EXT_sample_locations) vkGetPhysicalDeviceMultisamplePropertiesEXT = (PFN_vkGetPhysicalDeviceMultisamplePropertiesEXT)load(context, "vkGetPhysicalDeviceMultisamplePropertiesEXT"); #endif /* defined(VK_EXT_sample_locations) */ +#if defined(VK_EXT_tooling_info) + vkGetPhysicalDeviceToolPropertiesEXT = (PFN_vkGetPhysicalDeviceToolPropertiesEXT)load(context, "vkGetPhysicalDeviceToolPropertiesEXT"); +#endif /* defined(VK_EXT_tooling_info) */ #if defined(VK_FUCHSIA_imagepipe_surface) vkCreateImagePipeSurfaceFUCHSIA = (PFN_vkCreateImagePipeSurfaceFUCHSIA)load(context, "vkCreateImagePipeSurfaceFUCHSIA"); #endif /* defined(VK_FUCHSIA_imagepipe_surface) */ +#if defined(VK_GGP_stream_descriptor_surface) + vkCreateStreamDescriptorSurfaceGGP = (PFN_vkCreateStreamDescriptorSurfaceGGP)load(context, "vkCreateStreamDescriptorSurfaceGGP"); +#endif /* defined(VK_GGP_stream_descriptor_surface) */ #if defined(VK_KHR_android_surface) vkCreateAndroidSurfaceKHR = (PFN_vkCreateAndroidSurfaceKHR)load(context, "vkCreateAndroidSurfaceKHR"); #endif /* defined(VK_KHR_android_surface) */ +#if defined(VK_KHR_calibrated_timestamps) + vkGetPhysicalDeviceCalibrateableTimeDomainsKHR = (PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsKHR)load(context, "vkGetPhysicalDeviceCalibrateableTimeDomainsKHR"); +#endif /* defined(VK_KHR_calibrated_timestamps) */ +#if defined(VK_KHR_cooperative_matrix) + vkGetPhysicalDeviceCooperativeMatrixPropertiesKHR = (PFN_vkGetPhysicalDeviceCooperativeMatrixPropertiesKHR)load(context, "vkGetPhysicalDeviceCooperativeMatrixPropertiesKHR"); +#endif /* defined(VK_KHR_cooperative_matrix) */ #if defined(VK_KHR_device_group_creation) vkEnumeratePhysicalDeviceGroupsKHR = (PFN_vkEnumeratePhysicalDeviceGroupsKHR)load(context, "vkEnumeratePhysicalDeviceGroupsKHR"); #endif /* defined(VK_KHR_device_group_creation) */ @@ -194,6 +329,9 @@ static void volkGenLoadInstance(void* context, PFN_vkVoidFunction (*load)(void*, #if defined(VK_KHR_external_semaphore_capabilities) vkGetPhysicalDeviceExternalSemaphorePropertiesKHR = (PFN_vkGetPhysicalDeviceExternalSemaphorePropertiesKHR)load(context, "vkGetPhysicalDeviceExternalSemaphorePropertiesKHR"); #endif /* defined(VK_KHR_external_semaphore_capabilities) */ +#if defined(VK_KHR_fragment_shading_rate) + vkGetPhysicalDeviceFragmentShadingRatesKHR = (PFN_vkGetPhysicalDeviceFragmentShadingRatesKHR)load(context, "vkGetPhysicalDeviceFragmentShadingRatesKHR"); +#endif /* defined(VK_KHR_fragment_shading_rate) */ #if defined(VK_KHR_get_display_properties2) vkGetDisplayModeProperties2KHR = (PFN_vkGetDisplayModeProperties2KHR)load(context, "vkGetDisplayModeProperties2KHR"); vkGetDisplayPlaneCapabilities2KHR = (PFN_vkGetDisplayPlaneCapabilities2KHR)load(context, "vkGetDisplayPlaneCapabilities2KHR"); @@ -213,6 +351,10 @@ static void volkGenLoadInstance(void* context, PFN_vkVoidFunction (*load)(void*, vkGetPhysicalDeviceSurfaceCapabilities2KHR = (PFN_vkGetPhysicalDeviceSurfaceCapabilities2KHR)load(context, "vkGetPhysicalDeviceSurfaceCapabilities2KHR"); vkGetPhysicalDeviceSurfaceFormats2KHR = (PFN_vkGetPhysicalDeviceSurfaceFormats2KHR)load(context, "vkGetPhysicalDeviceSurfaceFormats2KHR"); #endif /* defined(VK_KHR_get_surface_capabilities2) */ +#if defined(VK_KHR_performance_query) + vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR = (PFN_vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR)load(context, "vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR"); + vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR = (PFN_vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR)load(context, "vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR"); +#endif /* defined(VK_KHR_performance_query) */ #if defined(VK_KHR_surface) vkDestroySurfaceKHR = (PFN_vkDestroySurfaceKHR)load(context, "vkDestroySurfaceKHR"); vkGetPhysicalDeviceSurfaceCapabilitiesKHR = (PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR)load(context, "vkGetPhysicalDeviceSurfaceCapabilitiesKHR"); @@ -220,6 +362,13 @@ static void volkGenLoadInstance(void* context, PFN_vkVoidFunction (*load)(void*, vkGetPhysicalDeviceSurfacePresentModesKHR = (PFN_vkGetPhysicalDeviceSurfacePresentModesKHR)load(context, "vkGetPhysicalDeviceSurfacePresentModesKHR"); vkGetPhysicalDeviceSurfaceSupportKHR = (PFN_vkGetPhysicalDeviceSurfaceSupportKHR)load(context, "vkGetPhysicalDeviceSurfaceSupportKHR"); #endif /* defined(VK_KHR_surface) */ +#if defined(VK_KHR_video_encode_queue) + vkGetPhysicalDeviceVideoEncodeQualityLevelPropertiesKHR = (PFN_vkGetPhysicalDeviceVideoEncodeQualityLevelPropertiesKHR)load(context, "vkGetPhysicalDeviceVideoEncodeQualityLevelPropertiesKHR"); +#endif /* defined(VK_KHR_video_encode_queue) */ +#if defined(VK_KHR_video_queue) + vkGetPhysicalDeviceVideoCapabilitiesKHR = (PFN_vkGetPhysicalDeviceVideoCapabilitiesKHR)load(context, "vkGetPhysicalDeviceVideoCapabilitiesKHR"); + vkGetPhysicalDeviceVideoFormatPropertiesKHR = (PFN_vkGetPhysicalDeviceVideoFormatPropertiesKHR)load(context, "vkGetPhysicalDeviceVideoFormatPropertiesKHR"); +#endif /* defined(VK_KHR_video_queue) */ #if defined(VK_KHR_wayland_surface) vkCreateWaylandSurfaceKHR = (PFN_vkCreateWaylandSurfaceKHR)load(context, "vkCreateWaylandSurfaceKHR"); vkGetPhysicalDeviceWaylandPresentationSupportKHR = (PFN_vkGetPhysicalDeviceWaylandPresentationSupportKHR)load(context, "vkGetPhysicalDeviceWaylandPresentationSupportKHR"); @@ -245,12 +394,29 @@ static void volkGenLoadInstance(void* context, PFN_vkVoidFunction (*load)(void*, #if defined(VK_NN_vi_surface) vkCreateViSurfaceNN = (PFN_vkCreateViSurfaceNN)load(context, "vkCreateViSurfaceNN"); #endif /* defined(VK_NN_vi_surface) */ -#if defined(VK_NVX_device_generated_commands) - vkGetPhysicalDeviceGeneratedCommandsPropertiesNVX = (PFN_vkGetPhysicalDeviceGeneratedCommandsPropertiesNVX)load(context, "vkGetPhysicalDeviceGeneratedCommandsPropertiesNVX"); -#endif /* defined(VK_NVX_device_generated_commands) */ +#if defined(VK_NV_acquire_winrt_display) + vkAcquireWinrtDisplayNV = (PFN_vkAcquireWinrtDisplayNV)load(context, "vkAcquireWinrtDisplayNV"); + vkGetWinrtDisplayNV = (PFN_vkGetWinrtDisplayNV)load(context, "vkGetWinrtDisplayNV"); +#endif /* defined(VK_NV_acquire_winrt_display) */ +#if defined(VK_NV_cooperative_matrix) + vkGetPhysicalDeviceCooperativeMatrixPropertiesNV = (PFN_vkGetPhysicalDeviceCooperativeMatrixPropertiesNV)load(context, "vkGetPhysicalDeviceCooperativeMatrixPropertiesNV"); +#endif /* defined(VK_NV_cooperative_matrix) */ +#if defined(VK_NV_cooperative_matrix2) + vkGetPhysicalDeviceCooperativeMatrixFlexibleDimensionsPropertiesNV = (PFN_vkGetPhysicalDeviceCooperativeMatrixFlexibleDimensionsPropertiesNV)load(context, "vkGetPhysicalDeviceCooperativeMatrixFlexibleDimensionsPropertiesNV"); +#endif /* defined(VK_NV_cooperative_matrix2) */ +#if defined(VK_NV_coverage_reduction_mode) + vkGetPhysicalDeviceSupportedFramebufferMixedSamplesCombinationsNV = (PFN_vkGetPhysicalDeviceSupportedFramebufferMixedSamplesCombinationsNV)load(context, "vkGetPhysicalDeviceSupportedFramebufferMixedSamplesCombinationsNV"); +#endif /* defined(VK_NV_coverage_reduction_mode) */ #if defined(VK_NV_external_memory_capabilities) vkGetPhysicalDeviceExternalImageFormatPropertiesNV = (PFN_vkGetPhysicalDeviceExternalImageFormatPropertiesNV)load(context, "vkGetPhysicalDeviceExternalImageFormatPropertiesNV"); #endif /* defined(VK_NV_external_memory_capabilities) */ +#if defined(VK_NV_optical_flow) + vkGetPhysicalDeviceOpticalFlowImageFormatsNV = (PFN_vkGetPhysicalDeviceOpticalFlowImageFormatsNV)load(context, "vkGetPhysicalDeviceOpticalFlowImageFormatsNV"); +#endif /* defined(VK_NV_optical_flow) */ +#if defined(VK_QNX_screen_surface) + vkCreateScreenSurfaceQNX = (PFN_vkCreateScreenSurfaceQNX)load(context, "vkCreateScreenSurfaceQNX"); + vkGetPhysicalDeviceScreenPresentationSupportQNX = (PFN_vkGetPhysicalDeviceScreenPresentationSupportQNX)load(context, "vkGetPhysicalDeviceScreenPresentationSupportQNX"); +#endif /* defined(VK_QNX_screen_surface) */ #if (defined(VK_KHR_device_group) && defined(VK_KHR_surface)) || (defined(VK_KHR_swapchain) && defined(VK_VERSION_1_1)) vkGetPhysicalDevicePresentRectanglesKHR = (PFN_vkGetPhysicalDevicePresentRectanglesKHR)load(context, "vkGetPhysicalDevicePresentRectanglesKHR"); #endif /* (defined(VK_KHR_device_group) && defined(VK_KHR_surface)) || (defined(VK_KHR_swapchain) && defined(VK_VERSION_1_1)) */ @@ -400,9 +566,101 @@ static void volkGenLoadDevice(void* context, PFN_vkVoidFunction (*load)(void*, c vkTrimCommandPool = (PFN_vkTrimCommandPool)load(context, "vkTrimCommandPool"); vkUpdateDescriptorSetWithTemplate = (PFN_vkUpdateDescriptorSetWithTemplate)load(context, "vkUpdateDescriptorSetWithTemplate"); #endif /* defined(VK_VERSION_1_1) */ +#if defined(VK_VERSION_1_2) + vkCmdBeginRenderPass2 = (PFN_vkCmdBeginRenderPass2)load(context, "vkCmdBeginRenderPass2"); + vkCmdDrawIndexedIndirectCount = (PFN_vkCmdDrawIndexedIndirectCount)load(context, "vkCmdDrawIndexedIndirectCount"); + vkCmdDrawIndirectCount = (PFN_vkCmdDrawIndirectCount)load(context, "vkCmdDrawIndirectCount"); + vkCmdEndRenderPass2 = (PFN_vkCmdEndRenderPass2)load(context, "vkCmdEndRenderPass2"); + vkCmdNextSubpass2 = (PFN_vkCmdNextSubpass2)load(context, "vkCmdNextSubpass2"); + vkCreateRenderPass2 = (PFN_vkCreateRenderPass2)load(context, "vkCreateRenderPass2"); + vkGetBufferDeviceAddress = (PFN_vkGetBufferDeviceAddress)load(context, "vkGetBufferDeviceAddress"); + vkGetBufferOpaqueCaptureAddress = (PFN_vkGetBufferOpaqueCaptureAddress)load(context, "vkGetBufferOpaqueCaptureAddress"); + vkGetDeviceMemoryOpaqueCaptureAddress = (PFN_vkGetDeviceMemoryOpaqueCaptureAddress)load(context, "vkGetDeviceMemoryOpaqueCaptureAddress"); + vkGetSemaphoreCounterValue = (PFN_vkGetSemaphoreCounterValue)load(context, "vkGetSemaphoreCounterValue"); + vkResetQueryPool = (PFN_vkResetQueryPool)load(context, "vkResetQueryPool"); + vkSignalSemaphore = (PFN_vkSignalSemaphore)load(context, "vkSignalSemaphore"); + vkWaitSemaphores = (PFN_vkWaitSemaphores)load(context, "vkWaitSemaphores"); +#endif /* defined(VK_VERSION_1_2) */ +#if defined(VK_VERSION_1_3) + vkCmdBeginRendering = (PFN_vkCmdBeginRendering)load(context, "vkCmdBeginRendering"); + vkCmdBindVertexBuffers2 = (PFN_vkCmdBindVertexBuffers2)load(context, "vkCmdBindVertexBuffers2"); + vkCmdBlitImage2 = (PFN_vkCmdBlitImage2)load(context, "vkCmdBlitImage2"); + vkCmdCopyBuffer2 = (PFN_vkCmdCopyBuffer2)load(context, "vkCmdCopyBuffer2"); + vkCmdCopyBufferToImage2 = (PFN_vkCmdCopyBufferToImage2)load(context, "vkCmdCopyBufferToImage2"); + vkCmdCopyImage2 = (PFN_vkCmdCopyImage2)load(context, "vkCmdCopyImage2"); + vkCmdCopyImageToBuffer2 = (PFN_vkCmdCopyImageToBuffer2)load(context, "vkCmdCopyImageToBuffer2"); + vkCmdEndRendering = (PFN_vkCmdEndRendering)load(context, "vkCmdEndRendering"); + vkCmdPipelineBarrier2 = (PFN_vkCmdPipelineBarrier2)load(context, "vkCmdPipelineBarrier2"); + vkCmdResetEvent2 = (PFN_vkCmdResetEvent2)load(context, "vkCmdResetEvent2"); + vkCmdResolveImage2 = (PFN_vkCmdResolveImage2)load(context, "vkCmdResolveImage2"); + vkCmdSetCullMode = (PFN_vkCmdSetCullMode)load(context, "vkCmdSetCullMode"); + vkCmdSetDepthBiasEnable = (PFN_vkCmdSetDepthBiasEnable)load(context, "vkCmdSetDepthBiasEnable"); + vkCmdSetDepthBoundsTestEnable = (PFN_vkCmdSetDepthBoundsTestEnable)load(context, "vkCmdSetDepthBoundsTestEnable"); + vkCmdSetDepthCompareOp = (PFN_vkCmdSetDepthCompareOp)load(context, "vkCmdSetDepthCompareOp"); + vkCmdSetDepthTestEnable = (PFN_vkCmdSetDepthTestEnable)load(context, "vkCmdSetDepthTestEnable"); + vkCmdSetDepthWriteEnable = (PFN_vkCmdSetDepthWriteEnable)load(context, "vkCmdSetDepthWriteEnable"); + vkCmdSetEvent2 = (PFN_vkCmdSetEvent2)load(context, "vkCmdSetEvent2"); + vkCmdSetFrontFace = (PFN_vkCmdSetFrontFace)load(context, "vkCmdSetFrontFace"); + vkCmdSetPrimitiveRestartEnable = (PFN_vkCmdSetPrimitiveRestartEnable)load(context, "vkCmdSetPrimitiveRestartEnable"); + vkCmdSetPrimitiveTopology = (PFN_vkCmdSetPrimitiveTopology)load(context, "vkCmdSetPrimitiveTopology"); + vkCmdSetRasterizerDiscardEnable = (PFN_vkCmdSetRasterizerDiscardEnable)load(context, "vkCmdSetRasterizerDiscardEnable"); + vkCmdSetScissorWithCount = (PFN_vkCmdSetScissorWithCount)load(context, "vkCmdSetScissorWithCount"); + vkCmdSetStencilOp = (PFN_vkCmdSetStencilOp)load(context, "vkCmdSetStencilOp"); + vkCmdSetStencilTestEnable = (PFN_vkCmdSetStencilTestEnable)load(context, "vkCmdSetStencilTestEnable"); + vkCmdSetViewportWithCount = (PFN_vkCmdSetViewportWithCount)load(context, "vkCmdSetViewportWithCount"); + vkCmdWaitEvents2 = (PFN_vkCmdWaitEvents2)load(context, "vkCmdWaitEvents2"); + vkCmdWriteTimestamp2 = (PFN_vkCmdWriteTimestamp2)load(context, "vkCmdWriteTimestamp2"); + vkCreatePrivateDataSlot = (PFN_vkCreatePrivateDataSlot)load(context, "vkCreatePrivateDataSlot"); + vkDestroyPrivateDataSlot = (PFN_vkDestroyPrivateDataSlot)load(context, "vkDestroyPrivateDataSlot"); + vkGetDeviceBufferMemoryRequirements = (PFN_vkGetDeviceBufferMemoryRequirements)load(context, "vkGetDeviceBufferMemoryRequirements"); + vkGetDeviceImageMemoryRequirements = (PFN_vkGetDeviceImageMemoryRequirements)load(context, "vkGetDeviceImageMemoryRequirements"); + vkGetDeviceImageSparseMemoryRequirements = (PFN_vkGetDeviceImageSparseMemoryRequirements)load(context, "vkGetDeviceImageSparseMemoryRequirements"); + vkGetPrivateData = (PFN_vkGetPrivateData)load(context, "vkGetPrivateData"); + vkQueueSubmit2 = (PFN_vkQueueSubmit2)load(context, "vkQueueSubmit2"); + vkSetPrivateData = (PFN_vkSetPrivateData)load(context, "vkSetPrivateData"); +#endif /* defined(VK_VERSION_1_3) */ +#if defined(VK_VERSION_1_4) + vkCmdBindDescriptorSets2 = (PFN_vkCmdBindDescriptorSets2)load(context, "vkCmdBindDescriptorSets2"); + vkCmdBindIndexBuffer2 = (PFN_vkCmdBindIndexBuffer2)load(context, "vkCmdBindIndexBuffer2"); + vkCmdPushConstants2 = (PFN_vkCmdPushConstants2)load(context, "vkCmdPushConstants2"); + vkCmdPushDescriptorSet = (PFN_vkCmdPushDescriptorSet)load(context, "vkCmdPushDescriptorSet"); + vkCmdPushDescriptorSet2 = (PFN_vkCmdPushDescriptorSet2)load(context, "vkCmdPushDescriptorSet2"); + vkCmdPushDescriptorSetWithTemplate = (PFN_vkCmdPushDescriptorSetWithTemplate)load(context, "vkCmdPushDescriptorSetWithTemplate"); + vkCmdPushDescriptorSetWithTemplate2 = (PFN_vkCmdPushDescriptorSetWithTemplate2)load(context, "vkCmdPushDescriptorSetWithTemplate2"); + vkCmdSetLineStipple = (PFN_vkCmdSetLineStipple)load(context, "vkCmdSetLineStipple"); + vkCmdSetRenderingAttachmentLocations = (PFN_vkCmdSetRenderingAttachmentLocations)load(context, "vkCmdSetRenderingAttachmentLocations"); + vkCmdSetRenderingInputAttachmentIndices = (PFN_vkCmdSetRenderingInputAttachmentIndices)load(context, "vkCmdSetRenderingInputAttachmentIndices"); + vkCopyImageToImage = (PFN_vkCopyImageToImage)load(context, "vkCopyImageToImage"); + vkCopyImageToMemory = (PFN_vkCopyImageToMemory)load(context, "vkCopyImageToMemory"); + vkCopyMemoryToImage = (PFN_vkCopyMemoryToImage)load(context, "vkCopyMemoryToImage"); + vkGetDeviceImageSubresourceLayout = (PFN_vkGetDeviceImageSubresourceLayout)load(context, "vkGetDeviceImageSubresourceLayout"); + vkGetImageSubresourceLayout2 = (PFN_vkGetImageSubresourceLayout2)load(context, "vkGetImageSubresourceLayout2"); + vkGetRenderingAreaGranularity = (PFN_vkGetRenderingAreaGranularity)load(context, "vkGetRenderingAreaGranularity"); + vkMapMemory2 = (PFN_vkMapMemory2)load(context, "vkMapMemory2"); + vkTransitionImageLayout = (PFN_vkTransitionImageLayout)load(context, "vkTransitionImageLayout"); + vkUnmapMemory2 = (PFN_vkUnmapMemory2)load(context, "vkUnmapMemory2"); +#endif /* defined(VK_VERSION_1_4) */ +#if defined(VK_AMDX_shader_enqueue) + vkCmdDispatchGraphAMDX = (PFN_vkCmdDispatchGraphAMDX)load(context, "vkCmdDispatchGraphAMDX"); + vkCmdDispatchGraphIndirectAMDX = (PFN_vkCmdDispatchGraphIndirectAMDX)load(context, "vkCmdDispatchGraphIndirectAMDX"); + vkCmdDispatchGraphIndirectCountAMDX = (PFN_vkCmdDispatchGraphIndirectCountAMDX)load(context, "vkCmdDispatchGraphIndirectCountAMDX"); + vkCmdInitializeGraphScratchMemoryAMDX = (PFN_vkCmdInitializeGraphScratchMemoryAMDX)load(context, "vkCmdInitializeGraphScratchMemoryAMDX"); + vkCreateExecutionGraphPipelinesAMDX = (PFN_vkCreateExecutionGraphPipelinesAMDX)load(context, "vkCreateExecutionGraphPipelinesAMDX"); + vkGetExecutionGraphPipelineNodeIndexAMDX = (PFN_vkGetExecutionGraphPipelineNodeIndexAMDX)load(context, "vkGetExecutionGraphPipelineNodeIndexAMDX"); + vkGetExecutionGraphPipelineScratchSizeAMDX = (PFN_vkGetExecutionGraphPipelineScratchSizeAMDX)load(context, "vkGetExecutionGraphPipelineScratchSizeAMDX"); +#endif /* defined(VK_AMDX_shader_enqueue) */ +#if defined(VK_AMD_anti_lag) + vkAntiLagUpdateAMD = (PFN_vkAntiLagUpdateAMD)load(context, "vkAntiLagUpdateAMD"); +#endif /* defined(VK_AMD_anti_lag) */ #if defined(VK_AMD_buffer_marker) vkCmdWriteBufferMarkerAMD = (PFN_vkCmdWriteBufferMarkerAMD)load(context, "vkCmdWriteBufferMarkerAMD"); #endif /* defined(VK_AMD_buffer_marker) */ +#if defined(VK_AMD_buffer_marker) && (defined(VK_VERSION_1_3) || defined(VK_KHR_synchronization2)) + vkCmdWriteBufferMarker2AMD = (PFN_vkCmdWriteBufferMarker2AMD)load(context, "vkCmdWriteBufferMarker2AMD"); +#endif /* defined(VK_AMD_buffer_marker) && (defined(VK_VERSION_1_3) || defined(VK_KHR_synchronization2)) */ +#if defined(VK_AMD_display_native_hdr) + vkSetLocalDimmingAMD = (PFN_vkSetLocalDimmingAMD)load(context, "vkSetLocalDimmingAMD"); +#endif /* defined(VK_AMD_display_native_hdr) */ #if defined(VK_AMD_draw_indirect_count) vkCmdDrawIndexedIndirectCountAMD = (PFN_vkCmdDrawIndexedIndirectCountAMD)load(context, "vkCmdDrawIndexedIndirectCountAMD"); vkCmdDrawIndirectCountAMD = (PFN_vkCmdDrawIndirectCountAMD)load(context, "vkCmdDrawIndirectCountAMD"); @@ -414,12 +672,18 @@ static void volkGenLoadDevice(void* context, PFN_vkVoidFunction (*load)(void*, c vkGetAndroidHardwareBufferPropertiesANDROID = (PFN_vkGetAndroidHardwareBufferPropertiesANDROID)load(context, "vkGetAndroidHardwareBufferPropertiesANDROID"); vkGetMemoryAndroidHardwareBufferANDROID = (PFN_vkGetMemoryAndroidHardwareBufferANDROID)load(context, "vkGetMemoryAndroidHardwareBufferANDROID"); #endif /* defined(VK_ANDROID_external_memory_android_hardware_buffer) */ +#if defined(VK_EXT_attachment_feedback_loop_dynamic_state) + vkCmdSetAttachmentFeedbackLoopEnableEXT = (PFN_vkCmdSetAttachmentFeedbackLoopEnableEXT)load(context, "vkCmdSetAttachmentFeedbackLoopEnableEXT"); +#endif /* defined(VK_EXT_attachment_feedback_loop_dynamic_state) */ #if defined(VK_EXT_buffer_device_address) vkGetBufferDeviceAddressEXT = (PFN_vkGetBufferDeviceAddressEXT)load(context, "vkGetBufferDeviceAddressEXT"); #endif /* defined(VK_EXT_buffer_device_address) */ #if defined(VK_EXT_calibrated_timestamps) vkGetCalibratedTimestampsEXT = (PFN_vkGetCalibratedTimestampsEXT)load(context, "vkGetCalibratedTimestampsEXT"); #endif /* defined(VK_EXT_calibrated_timestamps) */ +#if defined(VK_EXT_color_write_enable) + vkCmdSetColorWriteEnableEXT = (PFN_vkCmdSetColorWriteEnableEXT)load(context, "vkCmdSetColorWriteEnableEXT"); +#endif /* defined(VK_EXT_color_write_enable) */ #if defined(VK_EXT_conditional_rendering) vkCmdBeginConditionalRenderingEXT = (PFN_vkCmdBeginConditionalRenderingEXT)load(context, "vkCmdBeginConditionalRenderingEXT"); vkCmdEndConditionalRenderingEXT = (PFN_vkCmdEndConditionalRenderingEXT)load(context, "vkCmdEndConditionalRenderingEXT"); @@ -431,19 +695,45 @@ static void volkGenLoadDevice(void* context, PFN_vkVoidFunction (*load)(void*, c vkDebugMarkerSetObjectNameEXT = (PFN_vkDebugMarkerSetObjectNameEXT)load(context, "vkDebugMarkerSetObjectNameEXT"); vkDebugMarkerSetObjectTagEXT = (PFN_vkDebugMarkerSetObjectTagEXT)load(context, "vkDebugMarkerSetObjectTagEXT"); #endif /* defined(VK_EXT_debug_marker) */ -#if defined(VK_EXT_debug_utils) - vkCmdBeginDebugUtilsLabelEXT = (PFN_vkCmdBeginDebugUtilsLabelEXT)load(context, "vkCmdBeginDebugUtilsLabelEXT"); - vkCmdEndDebugUtilsLabelEXT = (PFN_vkCmdEndDebugUtilsLabelEXT)load(context, "vkCmdEndDebugUtilsLabelEXT"); - vkCmdInsertDebugUtilsLabelEXT = (PFN_vkCmdInsertDebugUtilsLabelEXT)load(context, "vkCmdInsertDebugUtilsLabelEXT"); - vkQueueBeginDebugUtilsLabelEXT = (PFN_vkQueueBeginDebugUtilsLabelEXT)load(context, "vkQueueBeginDebugUtilsLabelEXT"); - vkQueueEndDebugUtilsLabelEXT = (PFN_vkQueueEndDebugUtilsLabelEXT)load(context, "vkQueueEndDebugUtilsLabelEXT"); - vkQueueInsertDebugUtilsLabelEXT = (PFN_vkQueueInsertDebugUtilsLabelEXT)load(context, "vkQueueInsertDebugUtilsLabelEXT"); - vkSetDebugUtilsObjectNameEXT = (PFN_vkSetDebugUtilsObjectNameEXT)load(context, "vkSetDebugUtilsObjectNameEXT"); - vkSetDebugUtilsObjectTagEXT = (PFN_vkSetDebugUtilsObjectTagEXT)load(context, "vkSetDebugUtilsObjectTagEXT"); -#endif /* defined(VK_EXT_debug_utils) */ +#if defined(VK_EXT_depth_bias_control) + vkCmdSetDepthBias2EXT = (PFN_vkCmdSetDepthBias2EXT)load(context, "vkCmdSetDepthBias2EXT"); +#endif /* defined(VK_EXT_depth_bias_control) */ +#if defined(VK_EXT_descriptor_buffer) + vkCmdBindDescriptorBufferEmbeddedSamplersEXT = (PFN_vkCmdBindDescriptorBufferEmbeddedSamplersEXT)load(context, "vkCmdBindDescriptorBufferEmbeddedSamplersEXT"); + vkCmdBindDescriptorBuffersEXT = (PFN_vkCmdBindDescriptorBuffersEXT)load(context, "vkCmdBindDescriptorBuffersEXT"); + vkCmdSetDescriptorBufferOffsetsEXT = (PFN_vkCmdSetDescriptorBufferOffsetsEXT)load(context, "vkCmdSetDescriptorBufferOffsetsEXT"); + vkGetBufferOpaqueCaptureDescriptorDataEXT = (PFN_vkGetBufferOpaqueCaptureDescriptorDataEXT)load(context, "vkGetBufferOpaqueCaptureDescriptorDataEXT"); + vkGetDescriptorEXT = (PFN_vkGetDescriptorEXT)load(context, "vkGetDescriptorEXT"); + vkGetDescriptorSetLayoutBindingOffsetEXT = (PFN_vkGetDescriptorSetLayoutBindingOffsetEXT)load(context, "vkGetDescriptorSetLayoutBindingOffsetEXT"); + vkGetDescriptorSetLayoutSizeEXT = (PFN_vkGetDescriptorSetLayoutSizeEXT)load(context, "vkGetDescriptorSetLayoutSizeEXT"); + vkGetImageOpaqueCaptureDescriptorDataEXT = (PFN_vkGetImageOpaqueCaptureDescriptorDataEXT)load(context, "vkGetImageOpaqueCaptureDescriptorDataEXT"); + vkGetImageViewOpaqueCaptureDescriptorDataEXT = (PFN_vkGetImageViewOpaqueCaptureDescriptorDataEXT)load(context, "vkGetImageViewOpaqueCaptureDescriptorDataEXT"); + vkGetSamplerOpaqueCaptureDescriptorDataEXT = (PFN_vkGetSamplerOpaqueCaptureDescriptorDataEXT)load(context, "vkGetSamplerOpaqueCaptureDescriptorDataEXT"); +#endif /* defined(VK_EXT_descriptor_buffer) */ +#if defined(VK_EXT_descriptor_buffer) && (defined(VK_KHR_acceleration_structure) || defined(VK_NV_ray_tracing)) + vkGetAccelerationStructureOpaqueCaptureDescriptorDataEXT = (PFN_vkGetAccelerationStructureOpaqueCaptureDescriptorDataEXT)load(context, "vkGetAccelerationStructureOpaqueCaptureDescriptorDataEXT"); +#endif /* defined(VK_EXT_descriptor_buffer) && (defined(VK_KHR_acceleration_structure) || defined(VK_NV_ray_tracing)) */ +#if defined(VK_EXT_device_fault) + vkGetDeviceFaultInfoEXT = (PFN_vkGetDeviceFaultInfoEXT)load(context, "vkGetDeviceFaultInfoEXT"); +#endif /* defined(VK_EXT_device_fault) */ +#if defined(VK_EXT_device_generated_commands) + vkCmdExecuteGeneratedCommandsEXT = (PFN_vkCmdExecuteGeneratedCommandsEXT)load(context, "vkCmdExecuteGeneratedCommandsEXT"); + vkCmdPreprocessGeneratedCommandsEXT = (PFN_vkCmdPreprocessGeneratedCommandsEXT)load(context, "vkCmdPreprocessGeneratedCommandsEXT"); + vkCreateIndirectCommandsLayoutEXT = (PFN_vkCreateIndirectCommandsLayoutEXT)load(context, "vkCreateIndirectCommandsLayoutEXT"); + vkCreateIndirectExecutionSetEXT = (PFN_vkCreateIndirectExecutionSetEXT)load(context, "vkCreateIndirectExecutionSetEXT"); + vkDestroyIndirectCommandsLayoutEXT = (PFN_vkDestroyIndirectCommandsLayoutEXT)load(context, "vkDestroyIndirectCommandsLayoutEXT"); + vkDestroyIndirectExecutionSetEXT = (PFN_vkDestroyIndirectExecutionSetEXT)load(context, "vkDestroyIndirectExecutionSetEXT"); + vkGetGeneratedCommandsMemoryRequirementsEXT = (PFN_vkGetGeneratedCommandsMemoryRequirementsEXT)load(context, "vkGetGeneratedCommandsMemoryRequirementsEXT"); + vkUpdateIndirectExecutionSetPipelineEXT = (PFN_vkUpdateIndirectExecutionSetPipelineEXT)load(context, "vkUpdateIndirectExecutionSetPipelineEXT"); + vkUpdateIndirectExecutionSetShaderEXT = (PFN_vkUpdateIndirectExecutionSetShaderEXT)load(context, "vkUpdateIndirectExecutionSetShaderEXT"); +#endif /* defined(VK_EXT_device_generated_commands) */ #if defined(VK_EXT_discard_rectangles) vkCmdSetDiscardRectangleEXT = (PFN_vkCmdSetDiscardRectangleEXT)load(context, "vkCmdSetDiscardRectangleEXT"); #endif /* defined(VK_EXT_discard_rectangles) */ +#if defined(VK_EXT_discard_rectangles) && VK_EXT_DISCARD_RECTANGLES_SPEC_VERSION >= 2 + vkCmdSetDiscardRectangleEnableEXT = (PFN_vkCmdSetDiscardRectangleEnableEXT)load(context, "vkCmdSetDiscardRectangleEnableEXT"); + vkCmdSetDiscardRectangleModeEXT = (PFN_vkCmdSetDiscardRectangleModeEXT)load(context, "vkCmdSetDiscardRectangleModeEXT"); +#endif /* defined(VK_EXT_discard_rectangles) && VK_EXT_DISCARD_RECTANGLES_SPEC_VERSION >= 2 */ #if defined(VK_EXT_display_control) vkDisplayPowerControlEXT = (PFN_vkDisplayPowerControlEXT)load(context, "vkDisplayPowerControlEXT"); vkGetSwapchainCounterEXT = (PFN_vkGetSwapchainCounterEXT)load(context, "vkGetSwapchainCounterEXT"); @@ -453,15 +743,89 @@ static void volkGenLoadDevice(void* context, PFN_vkVoidFunction (*load)(void*, c #if defined(VK_EXT_external_memory_host) vkGetMemoryHostPointerPropertiesEXT = (PFN_vkGetMemoryHostPointerPropertiesEXT)load(context, "vkGetMemoryHostPointerPropertiesEXT"); #endif /* defined(VK_EXT_external_memory_host) */ +#if defined(VK_EXT_full_screen_exclusive) + vkAcquireFullScreenExclusiveModeEXT = (PFN_vkAcquireFullScreenExclusiveModeEXT)load(context, "vkAcquireFullScreenExclusiveModeEXT"); + vkReleaseFullScreenExclusiveModeEXT = (PFN_vkReleaseFullScreenExclusiveModeEXT)load(context, "vkReleaseFullScreenExclusiveModeEXT"); +#endif /* defined(VK_EXT_full_screen_exclusive) */ +#if defined(VK_EXT_full_screen_exclusive) && (defined(VK_KHR_device_group) || defined(VK_VERSION_1_1)) + vkGetDeviceGroupSurfacePresentModes2EXT = (PFN_vkGetDeviceGroupSurfacePresentModes2EXT)load(context, "vkGetDeviceGroupSurfacePresentModes2EXT"); +#endif /* defined(VK_EXT_full_screen_exclusive) && (defined(VK_KHR_device_group) || defined(VK_VERSION_1_1)) */ #if defined(VK_EXT_hdr_metadata) vkSetHdrMetadataEXT = (PFN_vkSetHdrMetadataEXT)load(context, "vkSetHdrMetadataEXT"); #endif /* defined(VK_EXT_hdr_metadata) */ +#if defined(VK_EXT_host_image_copy) + vkCopyImageToImageEXT = (PFN_vkCopyImageToImageEXT)load(context, "vkCopyImageToImageEXT"); + vkCopyImageToMemoryEXT = (PFN_vkCopyImageToMemoryEXT)load(context, "vkCopyImageToMemoryEXT"); + vkCopyMemoryToImageEXT = (PFN_vkCopyMemoryToImageEXT)load(context, "vkCopyMemoryToImageEXT"); + vkTransitionImageLayoutEXT = (PFN_vkTransitionImageLayoutEXT)load(context, "vkTransitionImageLayoutEXT"); +#endif /* defined(VK_EXT_host_image_copy) */ +#if defined(VK_EXT_host_query_reset) + vkResetQueryPoolEXT = (PFN_vkResetQueryPoolEXT)load(context, "vkResetQueryPoolEXT"); +#endif /* defined(VK_EXT_host_query_reset) */ #if defined(VK_EXT_image_drm_format_modifier) vkGetImageDrmFormatModifierPropertiesEXT = (PFN_vkGetImageDrmFormatModifierPropertiesEXT)load(context, "vkGetImageDrmFormatModifierPropertiesEXT"); #endif /* defined(VK_EXT_image_drm_format_modifier) */ +#if defined(VK_EXT_line_rasterization) + vkCmdSetLineStippleEXT = (PFN_vkCmdSetLineStippleEXT)load(context, "vkCmdSetLineStippleEXT"); +#endif /* defined(VK_EXT_line_rasterization) */ +#if defined(VK_EXT_mesh_shader) + vkCmdDrawMeshTasksEXT = (PFN_vkCmdDrawMeshTasksEXT)load(context, "vkCmdDrawMeshTasksEXT"); + vkCmdDrawMeshTasksIndirectEXT = (PFN_vkCmdDrawMeshTasksIndirectEXT)load(context, "vkCmdDrawMeshTasksIndirectEXT"); +#endif /* defined(VK_EXT_mesh_shader) */ +#if defined(VK_EXT_mesh_shader) && (defined(VK_KHR_draw_indirect_count) || defined(VK_VERSION_1_2)) + vkCmdDrawMeshTasksIndirectCountEXT = (PFN_vkCmdDrawMeshTasksIndirectCountEXT)load(context, "vkCmdDrawMeshTasksIndirectCountEXT"); +#endif /* defined(VK_EXT_mesh_shader) && (defined(VK_KHR_draw_indirect_count) || defined(VK_VERSION_1_2)) */ +#if defined(VK_EXT_metal_objects) + vkExportMetalObjectsEXT = (PFN_vkExportMetalObjectsEXT)load(context, "vkExportMetalObjectsEXT"); +#endif /* defined(VK_EXT_metal_objects) */ +#if defined(VK_EXT_multi_draw) + vkCmdDrawMultiEXT = (PFN_vkCmdDrawMultiEXT)load(context, "vkCmdDrawMultiEXT"); + vkCmdDrawMultiIndexedEXT = (PFN_vkCmdDrawMultiIndexedEXT)load(context, "vkCmdDrawMultiIndexedEXT"); +#endif /* defined(VK_EXT_multi_draw) */ +#if defined(VK_EXT_opacity_micromap) + vkBuildMicromapsEXT = (PFN_vkBuildMicromapsEXT)load(context, "vkBuildMicromapsEXT"); + vkCmdBuildMicromapsEXT = (PFN_vkCmdBuildMicromapsEXT)load(context, "vkCmdBuildMicromapsEXT"); + vkCmdCopyMemoryToMicromapEXT = (PFN_vkCmdCopyMemoryToMicromapEXT)load(context, "vkCmdCopyMemoryToMicromapEXT"); + vkCmdCopyMicromapEXT = (PFN_vkCmdCopyMicromapEXT)load(context, "vkCmdCopyMicromapEXT"); + vkCmdCopyMicromapToMemoryEXT = (PFN_vkCmdCopyMicromapToMemoryEXT)load(context, "vkCmdCopyMicromapToMemoryEXT"); + vkCmdWriteMicromapsPropertiesEXT = (PFN_vkCmdWriteMicromapsPropertiesEXT)load(context, "vkCmdWriteMicromapsPropertiesEXT"); + vkCopyMemoryToMicromapEXT = (PFN_vkCopyMemoryToMicromapEXT)load(context, "vkCopyMemoryToMicromapEXT"); + vkCopyMicromapEXT = (PFN_vkCopyMicromapEXT)load(context, "vkCopyMicromapEXT"); + vkCopyMicromapToMemoryEXT = (PFN_vkCopyMicromapToMemoryEXT)load(context, "vkCopyMicromapToMemoryEXT"); + vkCreateMicromapEXT = (PFN_vkCreateMicromapEXT)load(context, "vkCreateMicromapEXT"); + vkDestroyMicromapEXT = (PFN_vkDestroyMicromapEXT)load(context, "vkDestroyMicromapEXT"); + vkGetDeviceMicromapCompatibilityEXT = (PFN_vkGetDeviceMicromapCompatibilityEXT)load(context, "vkGetDeviceMicromapCompatibilityEXT"); + vkGetMicromapBuildSizesEXT = (PFN_vkGetMicromapBuildSizesEXT)load(context, "vkGetMicromapBuildSizesEXT"); + vkWriteMicromapsPropertiesEXT = (PFN_vkWriteMicromapsPropertiesEXT)load(context, "vkWriteMicromapsPropertiesEXT"); +#endif /* defined(VK_EXT_opacity_micromap) */ +#if defined(VK_EXT_pageable_device_local_memory) + vkSetDeviceMemoryPriorityEXT = (PFN_vkSetDeviceMemoryPriorityEXT)load(context, "vkSetDeviceMemoryPriorityEXT"); +#endif /* defined(VK_EXT_pageable_device_local_memory) */ +#if defined(VK_EXT_pipeline_properties) + vkGetPipelinePropertiesEXT = (PFN_vkGetPipelinePropertiesEXT)load(context, "vkGetPipelinePropertiesEXT"); +#endif /* defined(VK_EXT_pipeline_properties) */ +#if defined(VK_EXT_private_data) + vkCreatePrivateDataSlotEXT = (PFN_vkCreatePrivateDataSlotEXT)load(context, "vkCreatePrivateDataSlotEXT"); + vkDestroyPrivateDataSlotEXT = (PFN_vkDestroyPrivateDataSlotEXT)load(context, "vkDestroyPrivateDataSlotEXT"); + vkGetPrivateDataEXT = (PFN_vkGetPrivateDataEXT)load(context, "vkGetPrivateDataEXT"); + vkSetPrivateDataEXT = (PFN_vkSetPrivateDataEXT)load(context, "vkSetPrivateDataEXT"); +#endif /* defined(VK_EXT_private_data) */ #if defined(VK_EXT_sample_locations) vkCmdSetSampleLocationsEXT = (PFN_vkCmdSetSampleLocationsEXT)load(context, "vkCmdSetSampleLocationsEXT"); #endif /* defined(VK_EXT_sample_locations) */ +#if defined(VK_EXT_shader_module_identifier) + vkGetShaderModuleCreateInfoIdentifierEXT = (PFN_vkGetShaderModuleCreateInfoIdentifierEXT)load(context, "vkGetShaderModuleCreateInfoIdentifierEXT"); + vkGetShaderModuleIdentifierEXT = (PFN_vkGetShaderModuleIdentifierEXT)load(context, "vkGetShaderModuleIdentifierEXT"); +#endif /* defined(VK_EXT_shader_module_identifier) */ +#if defined(VK_EXT_shader_object) + vkCmdBindShadersEXT = (PFN_vkCmdBindShadersEXT)load(context, "vkCmdBindShadersEXT"); + vkCreateShadersEXT = (PFN_vkCreateShadersEXT)load(context, "vkCreateShadersEXT"); + vkDestroyShaderEXT = (PFN_vkDestroyShaderEXT)load(context, "vkDestroyShaderEXT"); + vkGetShaderBinaryDataEXT = (PFN_vkGetShaderBinaryDataEXT)load(context, "vkGetShaderBinaryDataEXT"); +#endif /* defined(VK_EXT_shader_object) */ +#if defined(VK_EXT_swapchain_maintenance1) + vkReleaseSwapchainImagesEXT = (PFN_vkReleaseSwapchainImagesEXT)load(context, "vkReleaseSwapchainImagesEXT"); +#endif /* defined(VK_EXT_swapchain_maintenance1) */ #if defined(VK_EXT_transform_feedback) vkCmdBeginQueryIndexedEXT = (PFN_vkCmdBeginQueryIndexedEXT)load(context, "vkCmdBeginQueryIndexedEXT"); vkCmdBeginTransformFeedbackEXT = (PFN_vkCmdBeginTransformFeedbackEXT)load(context, "vkCmdBeginTransformFeedbackEXT"); @@ -476,20 +840,100 @@ static void volkGenLoadDevice(void* context, PFN_vkVoidFunction (*load)(void*, c vkGetValidationCacheDataEXT = (PFN_vkGetValidationCacheDataEXT)load(context, "vkGetValidationCacheDataEXT"); vkMergeValidationCachesEXT = (PFN_vkMergeValidationCachesEXT)load(context, "vkMergeValidationCachesEXT"); #endif /* defined(VK_EXT_validation_cache) */ +#if defined(VK_FUCHSIA_buffer_collection) + vkCreateBufferCollectionFUCHSIA = (PFN_vkCreateBufferCollectionFUCHSIA)load(context, "vkCreateBufferCollectionFUCHSIA"); + vkDestroyBufferCollectionFUCHSIA = (PFN_vkDestroyBufferCollectionFUCHSIA)load(context, "vkDestroyBufferCollectionFUCHSIA"); + vkGetBufferCollectionPropertiesFUCHSIA = (PFN_vkGetBufferCollectionPropertiesFUCHSIA)load(context, "vkGetBufferCollectionPropertiesFUCHSIA"); + vkSetBufferCollectionBufferConstraintsFUCHSIA = (PFN_vkSetBufferCollectionBufferConstraintsFUCHSIA)load(context, "vkSetBufferCollectionBufferConstraintsFUCHSIA"); + vkSetBufferCollectionImageConstraintsFUCHSIA = (PFN_vkSetBufferCollectionImageConstraintsFUCHSIA)load(context, "vkSetBufferCollectionImageConstraintsFUCHSIA"); +#endif /* defined(VK_FUCHSIA_buffer_collection) */ +#if defined(VK_FUCHSIA_external_memory) + vkGetMemoryZirconHandleFUCHSIA = (PFN_vkGetMemoryZirconHandleFUCHSIA)load(context, "vkGetMemoryZirconHandleFUCHSIA"); + vkGetMemoryZirconHandlePropertiesFUCHSIA = (PFN_vkGetMemoryZirconHandlePropertiesFUCHSIA)load(context, "vkGetMemoryZirconHandlePropertiesFUCHSIA"); +#endif /* defined(VK_FUCHSIA_external_memory) */ +#if defined(VK_FUCHSIA_external_semaphore) + vkGetSemaphoreZirconHandleFUCHSIA = (PFN_vkGetSemaphoreZirconHandleFUCHSIA)load(context, "vkGetSemaphoreZirconHandleFUCHSIA"); + vkImportSemaphoreZirconHandleFUCHSIA = (PFN_vkImportSemaphoreZirconHandleFUCHSIA)load(context, "vkImportSemaphoreZirconHandleFUCHSIA"); +#endif /* defined(VK_FUCHSIA_external_semaphore) */ #if defined(VK_GOOGLE_display_timing) vkGetPastPresentationTimingGOOGLE = (PFN_vkGetPastPresentationTimingGOOGLE)load(context, "vkGetPastPresentationTimingGOOGLE"); vkGetRefreshCycleDurationGOOGLE = (PFN_vkGetRefreshCycleDurationGOOGLE)load(context, "vkGetRefreshCycleDurationGOOGLE"); #endif /* defined(VK_GOOGLE_display_timing) */ +#if defined(VK_HUAWEI_cluster_culling_shader) + vkCmdDrawClusterHUAWEI = (PFN_vkCmdDrawClusterHUAWEI)load(context, "vkCmdDrawClusterHUAWEI"); + vkCmdDrawClusterIndirectHUAWEI = (PFN_vkCmdDrawClusterIndirectHUAWEI)load(context, "vkCmdDrawClusterIndirectHUAWEI"); +#endif /* defined(VK_HUAWEI_cluster_culling_shader) */ +#if defined(VK_HUAWEI_invocation_mask) + vkCmdBindInvocationMaskHUAWEI = (PFN_vkCmdBindInvocationMaskHUAWEI)load(context, "vkCmdBindInvocationMaskHUAWEI"); +#endif /* defined(VK_HUAWEI_invocation_mask) */ +#if defined(VK_HUAWEI_subpass_shading) && VK_HUAWEI_SUBPASS_SHADING_SPEC_VERSION >= 2 + vkGetDeviceSubpassShadingMaxWorkgroupSizeHUAWEI = (PFN_vkGetDeviceSubpassShadingMaxWorkgroupSizeHUAWEI)load(context, "vkGetDeviceSubpassShadingMaxWorkgroupSizeHUAWEI"); +#endif /* defined(VK_HUAWEI_subpass_shading) && VK_HUAWEI_SUBPASS_SHADING_SPEC_VERSION >= 2 */ +#if defined(VK_HUAWEI_subpass_shading) + vkCmdSubpassShadingHUAWEI = (PFN_vkCmdSubpassShadingHUAWEI)load(context, "vkCmdSubpassShadingHUAWEI"); +#endif /* defined(VK_HUAWEI_subpass_shading) */ +#if defined(VK_INTEL_performance_query) + vkAcquirePerformanceConfigurationINTEL = (PFN_vkAcquirePerformanceConfigurationINTEL)load(context, "vkAcquirePerformanceConfigurationINTEL"); + vkCmdSetPerformanceMarkerINTEL = (PFN_vkCmdSetPerformanceMarkerINTEL)load(context, "vkCmdSetPerformanceMarkerINTEL"); + vkCmdSetPerformanceOverrideINTEL = (PFN_vkCmdSetPerformanceOverrideINTEL)load(context, "vkCmdSetPerformanceOverrideINTEL"); + vkCmdSetPerformanceStreamMarkerINTEL = (PFN_vkCmdSetPerformanceStreamMarkerINTEL)load(context, "vkCmdSetPerformanceStreamMarkerINTEL"); + vkGetPerformanceParameterINTEL = (PFN_vkGetPerformanceParameterINTEL)load(context, "vkGetPerformanceParameterINTEL"); + vkInitializePerformanceApiINTEL = (PFN_vkInitializePerformanceApiINTEL)load(context, "vkInitializePerformanceApiINTEL"); + vkQueueSetPerformanceConfigurationINTEL = (PFN_vkQueueSetPerformanceConfigurationINTEL)load(context, "vkQueueSetPerformanceConfigurationINTEL"); + vkReleasePerformanceConfigurationINTEL = (PFN_vkReleasePerformanceConfigurationINTEL)load(context, "vkReleasePerformanceConfigurationINTEL"); + vkUninitializePerformanceApiINTEL = (PFN_vkUninitializePerformanceApiINTEL)load(context, "vkUninitializePerformanceApiINTEL"); +#endif /* defined(VK_INTEL_performance_query) */ +#if defined(VK_KHR_acceleration_structure) + vkBuildAccelerationStructuresKHR = (PFN_vkBuildAccelerationStructuresKHR)load(context, "vkBuildAccelerationStructuresKHR"); + vkCmdBuildAccelerationStructuresIndirectKHR = (PFN_vkCmdBuildAccelerationStructuresIndirectKHR)load(context, "vkCmdBuildAccelerationStructuresIndirectKHR"); + vkCmdBuildAccelerationStructuresKHR = (PFN_vkCmdBuildAccelerationStructuresKHR)load(context, "vkCmdBuildAccelerationStructuresKHR"); + vkCmdCopyAccelerationStructureKHR = (PFN_vkCmdCopyAccelerationStructureKHR)load(context, "vkCmdCopyAccelerationStructureKHR"); + vkCmdCopyAccelerationStructureToMemoryKHR = (PFN_vkCmdCopyAccelerationStructureToMemoryKHR)load(context, "vkCmdCopyAccelerationStructureToMemoryKHR"); + vkCmdCopyMemoryToAccelerationStructureKHR = (PFN_vkCmdCopyMemoryToAccelerationStructureKHR)load(context, "vkCmdCopyMemoryToAccelerationStructureKHR"); + vkCmdWriteAccelerationStructuresPropertiesKHR = (PFN_vkCmdWriteAccelerationStructuresPropertiesKHR)load(context, "vkCmdWriteAccelerationStructuresPropertiesKHR"); + vkCopyAccelerationStructureKHR = (PFN_vkCopyAccelerationStructureKHR)load(context, "vkCopyAccelerationStructureKHR"); + vkCopyAccelerationStructureToMemoryKHR = (PFN_vkCopyAccelerationStructureToMemoryKHR)load(context, "vkCopyAccelerationStructureToMemoryKHR"); + vkCopyMemoryToAccelerationStructureKHR = (PFN_vkCopyMemoryToAccelerationStructureKHR)load(context, "vkCopyMemoryToAccelerationStructureKHR"); + vkCreateAccelerationStructureKHR = (PFN_vkCreateAccelerationStructureKHR)load(context, "vkCreateAccelerationStructureKHR"); + vkDestroyAccelerationStructureKHR = (PFN_vkDestroyAccelerationStructureKHR)load(context, "vkDestroyAccelerationStructureKHR"); + vkGetAccelerationStructureBuildSizesKHR = (PFN_vkGetAccelerationStructureBuildSizesKHR)load(context, "vkGetAccelerationStructureBuildSizesKHR"); + vkGetAccelerationStructureDeviceAddressKHR = (PFN_vkGetAccelerationStructureDeviceAddressKHR)load(context, "vkGetAccelerationStructureDeviceAddressKHR"); + vkGetDeviceAccelerationStructureCompatibilityKHR = (PFN_vkGetDeviceAccelerationStructureCompatibilityKHR)load(context, "vkGetDeviceAccelerationStructureCompatibilityKHR"); + vkWriteAccelerationStructuresPropertiesKHR = (PFN_vkWriteAccelerationStructuresPropertiesKHR)load(context, "vkWriteAccelerationStructuresPropertiesKHR"); +#endif /* defined(VK_KHR_acceleration_structure) */ #if defined(VK_KHR_bind_memory2) vkBindBufferMemory2KHR = (PFN_vkBindBufferMemory2KHR)load(context, "vkBindBufferMemory2KHR"); vkBindImageMemory2KHR = (PFN_vkBindImageMemory2KHR)load(context, "vkBindImageMemory2KHR"); #endif /* defined(VK_KHR_bind_memory2) */ +#if defined(VK_KHR_buffer_device_address) + vkGetBufferDeviceAddressKHR = (PFN_vkGetBufferDeviceAddressKHR)load(context, "vkGetBufferDeviceAddressKHR"); + vkGetBufferOpaqueCaptureAddressKHR = (PFN_vkGetBufferOpaqueCaptureAddressKHR)load(context, "vkGetBufferOpaqueCaptureAddressKHR"); + vkGetDeviceMemoryOpaqueCaptureAddressKHR = (PFN_vkGetDeviceMemoryOpaqueCaptureAddressKHR)load(context, "vkGetDeviceMemoryOpaqueCaptureAddressKHR"); +#endif /* defined(VK_KHR_buffer_device_address) */ +#if defined(VK_KHR_calibrated_timestamps) + vkGetCalibratedTimestampsKHR = (PFN_vkGetCalibratedTimestampsKHR)load(context, "vkGetCalibratedTimestampsKHR"); +#endif /* defined(VK_KHR_calibrated_timestamps) */ +#if defined(VK_KHR_copy_commands2) + vkCmdBlitImage2KHR = (PFN_vkCmdBlitImage2KHR)load(context, "vkCmdBlitImage2KHR"); + vkCmdCopyBuffer2KHR = (PFN_vkCmdCopyBuffer2KHR)load(context, "vkCmdCopyBuffer2KHR"); + vkCmdCopyBufferToImage2KHR = (PFN_vkCmdCopyBufferToImage2KHR)load(context, "vkCmdCopyBufferToImage2KHR"); + vkCmdCopyImage2KHR = (PFN_vkCmdCopyImage2KHR)load(context, "vkCmdCopyImage2KHR"); + vkCmdCopyImageToBuffer2KHR = (PFN_vkCmdCopyImageToBuffer2KHR)load(context, "vkCmdCopyImageToBuffer2KHR"); + vkCmdResolveImage2KHR = (PFN_vkCmdResolveImage2KHR)load(context, "vkCmdResolveImage2KHR"); +#endif /* defined(VK_KHR_copy_commands2) */ #if defined(VK_KHR_create_renderpass2) vkCmdBeginRenderPass2KHR = (PFN_vkCmdBeginRenderPass2KHR)load(context, "vkCmdBeginRenderPass2KHR"); vkCmdEndRenderPass2KHR = (PFN_vkCmdEndRenderPass2KHR)load(context, "vkCmdEndRenderPass2KHR"); vkCmdNextSubpass2KHR = (PFN_vkCmdNextSubpass2KHR)load(context, "vkCmdNextSubpass2KHR"); vkCreateRenderPass2KHR = (PFN_vkCreateRenderPass2KHR)load(context, "vkCreateRenderPass2KHR"); #endif /* defined(VK_KHR_create_renderpass2) */ +#if defined(VK_KHR_deferred_host_operations) + vkCreateDeferredOperationKHR = (PFN_vkCreateDeferredOperationKHR)load(context, "vkCreateDeferredOperationKHR"); + vkDeferredOperationJoinKHR = (PFN_vkDeferredOperationJoinKHR)load(context, "vkDeferredOperationJoinKHR"); + vkDestroyDeferredOperationKHR = (PFN_vkDestroyDeferredOperationKHR)load(context, "vkDestroyDeferredOperationKHR"); + vkGetDeferredOperationMaxConcurrencyKHR = (PFN_vkGetDeferredOperationMaxConcurrencyKHR)load(context, "vkGetDeferredOperationMaxConcurrencyKHR"); + vkGetDeferredOperationResultKHR = (PFN_vkGetDeferredOperationResultKHR)load(context, "vkGetDeferredOperationResultKHR"); +#endif /* defined(VK_KHR_deferred_host_operations) */ #if defined(VK_KHR_descriptor_update_template) vkCreateDescriptorUpdateTemplateKHR = (PFN_vkCreateDescriptorUpdateTemplateKHR)load(context, "vkCreateDescriptorUpdateTemplateKHR"); vkDestroyDescriptorUpdateTemplateKHR = (PFN_vkDestroyDescriptorUpdateTemplateKHR)load(context, "vkDestroyDescriptorUpdateTemplateKHR"); @@ -507,6 +951,14 @@ static void volkGenLoadDevice(void* context, PFN_vkVoidFunction (*load)(void*, c vkCmdDrawIndexedIndirectCountKHR = (PFN_vkCmdDrawIndexedIndirectCountKHR)load(context, "vkCmdDrawIndexedIndirectCountKHR"); vkCmdDrawIndirectCountKHR = (PFN_vkCmdDrawIndirectCountKHR)load(context, "vkCmdDrawIndirectCountKHR"); #endif /* defined(VK_KHR_draw_indirect_count) */ +#if defined(VK_KHR_dynamic_rendering) + vkCmdBeginRenderingKHR = (PFN_vkCmdBeginRenderingKHR)load(context, "vkCmdBeginRenderingKHR"); + vkCmdEndRenderingKHR = (PFN_vkCmdEndRenderingKHR)load(context, "vkCmdEndRenderingKHR"); +#endif /* defined(VK_KHR_dynamic_rendering) */ +#if defined(VK_KHR_dynamic_rendering_local_read) + vkCmdSetRenderingAttachmentLocationsKHR = (PFN_vkCmdSetRenderingAttachmentLocationsKHR)load(context, "vkCmdSetRenderingAttachmentLocationsKHR"); + vkCmdSetRenderingInputAttachmentIndicesKHR = (PFN_vkCmdSetRenderingInputAttachmentIndicesKHR)load(context, "vkCmdSetRenderingInputAttachmentIndicesKHR"); +#endif /* defined(VK_KHR_dynamic_rendering_local_read) */ #if defined(VK_KHR_external_fence_fd) vkGetFenceFdKHR = (PFN_vkGetFenceFdKHR)load(context, "vkGetFenceFdKHR"); vkImportFenceFdKHR = (PFN_vkImportFenceFdKHR)load(context, "vkImportFenceFdKHR"); @@ -531,20 +983,84 @@ static void volkGenLoadDevice(void* context, PFN_vkVoidFunction (*load)(void*, c vkGetSemaphoreWin32HandleKHR = (PFN_vkGetSemaphoreWin32HandleKHR)load(context, "vkGetSemaphoreWin32HandleKHR"); vkImportSemaphoreWin32HandleKHR = (PFN_vkImportSemaphoreWin32HandleKHR)load(context, "vkImportSemaphoreWin32HandleKHR"); #endif /* defined(VK_KHR_external_semaphore_win32) */ +#if defined(VK_KHR_fragment_shading_rate) + vkCmdSetFragmentShadingRateKHR = (PFN_vkCmdSetFragmentShadingRateKHR)load(context, "vkCmdSetFragmentShadingRateKHR"); +#endif /* defined(VK_KHR_fragment_shading_rate) */ #if defined(VK_KHR_get_memory_requirements2) vkGetBufferMemoryRequirements2KHR = (PFN_vkGetBufferMemoryRequirements2KHR)load(context, "vkGetBufferMemoryRequirements2KHR"); vkGetImageMemoryRequirements2KHR = (PFN_vkGetImageMemoryRequirements2KHR)load(context, "vkGetImageMemoryRequirements2KHR"); vkGetImageSparseMemoryRequirements2KHR = (PFN_vkGetImageSparseMemoryRequirements2KHR)load(context, "vkGetImageSparseMemoryRequirements2KHR"); #endif /* defined(VK_KHR_get_memory_requirements2) */ +#if defined(VK_KHR_line_rasterization) + vkCmdSetLineStippleKHR = (PFN_vkCmdSetLineStippleKHR)load(context, "vkCmdSetLineStippleKHR"); +#endif /* defined(VK_KHR_line_rasterization) */ #if defined(VK_KHR_maintenance1) vkTrimCommandPoolKHR = (PFN_vkTrimCommandPoolKHR)load(context, "vkTrimCommandPoolKHR"); #endif /* defined(VK_KHR_maintenance1) */ #if defined(VK_KHR_maintenance3) vkGetDescriptorSetLayoutSupportKHR = (PFN_vkGetDescriptorSetLayoutSupportKHR)load(context, "vkGetDescriptorSetLayoutSupportKHR"); #endif /* defined(VK_KHR_maintenance3) */ +#if defined(VK_KHR_maintenance4) + vkGetDeviceBufferMemoryRequirementsKHR = (PFN_vkGetDeviceBufferMemoryRequirementsKHR)load(context, "vkGetDeviceBufferMemoryRequirementsKHR"); + vkGetDeviceImageMemoryRequirementsKHR = (PFN_vkGetDeviceImageMemoryRequirementsKHR)load(context, "vkGetDeviceImageMemoryRequirementsKHR"); + vkGetDeviceImageSparseMemoryRequirementsKHR = (PFN_vkGetDeviceImageSparseMemoryRequirementsKHR)load(context, "vkGetDeviceImageSparseMemoryRequirementsKHR"); +#endif /* defined(VK_KHR_maintenance4) */ +#if defined(VK_KHR_maintenance5) + vkCmdBindIndexBuffer2KHR = (PFN_vkCmdBindIndexBuffer2KHR)load(context, "vkCmdBindIndexBuffer2KHR"); + vkGetDeviceImageSubresourceLayoutKHR = (PFN_vkGetDeviceImageSubresourceLayoutKHR)load(context, "vkGetDeviceImageSubresourceLayoutKHR"); + vkGetImageSubresourceLayout2KHR = (PFN_vkGetImageSubresourceLayout2KHR)load(context, "vkGetImageSubresourceLayout2KHR"); + vkGetRenderingAreaGranularityKHR = (PFN_vkGetRenderingAreaGranularityKHR)load(context, "vkGetRenderingAreaGranularityKHR"); +#endif /* defined(VK_KHR_maintenance5) */ +#if defined(VK_KHR_maintenance6) + vkCmdBindDescriptorSets2KHR = (PFN_vkCmdBindDescriptorSets2KHR)load(context, "vkCmdBindDescriptorSets2KHR"); + vkCmdPushConstants2KHR = (PFN_vkCmdPushConstants2KHR)load(context, "vkCmdPushConstants2KHR"); +#endif /* defined(VK_KHR_maintenance6) */ +#if defined(VK_KHR_maintenance6) && defined(VK_KHR_push_descriptor) + vkCmdPushDescriptorSet2KHR = (PFN_vkCmdPushDescriptorSet2KHR)load(context, "vkCmdPushDescriptorSet2KHR"); + vkCmdPushDescriptorSetWithTemplate2KHR = (PFN_vkCmdPushDescriptorSetWithTemplate2KHR)load(context, "vkCmdPushDescriptorSetWithTemplate2KHR"); +#endif /* defined(VK_KHR_maintenance6) && defined(VK_KHR_push_descriptor) */ +#if defined(VK_KHR_maintenance6) && defined(VK_EXT_descriptor_buffer) + vkCmdBindDescriptorBufferEmbeddedSamplers2EXT = (PFN_vkCmdBindDescriptorBufferEmbeddedSamplers2EXT)load(context, "vkCmdBindDescriptorBufferEmbeddedSamplers2EXT"); + vkCmdSetDescriptorBufferOffsets2EXT = (PFN_vkCmdSetDescriptorBufferOffsets2EXT)load(context, "vkCmdSetDescriptorBufferOffsets2EXT"); +#endif /* defined(VK_KHR_maintenance6) && defined(VK_EXT_descriptor_buffer) */ +#if defined(VK_KHR_map_memory2) + vkMapMemory2KHR = (PFN_vkMapMemory2KHR)load(context, "vkMapMemory2KHR"); + vkUnmapMemory2KHR = (PFN_vkUnmapMemory2KHR)load(context, "vkUnmapMemory2KHR"); +#endif /* defined(VK_KHR_map_memory2) */ +#if defined(VK_KHR_performance_query) + vkAcquireProfilingLockKHR = (PFN_vkAcquireProfilingLockKHR)load(context, "vkAcquireProfilingLockKHR"); + vkReleaseProfilingLockKHR = (PFN_vkReleaseProfilingLockKHR)load(context, "vkReleaseProfilingLockKHR"); +#endif /* defined(VK_KHR_performance_query) */ +#if defined(VK_KHR_pipeline_binary) + vkCreatePipelineBinariesKHR = (PFN_vkCreatePipelineBinariesKHR)load(context, "vkCreatePipelineBinariesKHR"); + vkDestroyPipelineBinaryKHR = (PFN_vkDestroyPipelineBinaryKHR)load(context, "vkDestroyPipelineBinaryKHR"); + vkGetPipelineBinaryDataKHR = (PFN_vkGetPipelineBinaryDataKHR)load(context, "vkGetPipelineBinaryDataKHR"); + vkGetPipelineKeyKHR = (PFN_vkGetPipelineKeyKHR)load(context, "vkGetPipelineKeyKHR"); + vkReleaseCapturedPipelineDataKHR = (PFN_vkReleaseCapturedPipelineDataKHR)load(context, "vkReleaseCapturedPipelineDataKHR"); +#endif /* defined(VK_KHR_pipeline_binary) */ +#if defined(VK_KHR_pipeline_executable_properties) + vkGetPipelineExecutableInternalRepresentationsKHR = (PFN_vkGetPipelineExecutableInternalRepresentationsKHR)load(context, "vkGetPipelineExecutableInternalRepresentationsKHR"); + vkGetPipelineExecutablePropertiesKHR = (PFN_vkGetPipelineExecutablePropertiesKHR)load(context, "vkGetPipelineExecutablePropertiesKHR"); + vkGetPipelineExecutableStatisticsKHR = (PFN_vkGetPipelineExecutableStatisticsKHR)load(context, "vkGetPipelineExecutableStatisticsKHR"); +#endif /* defined(VK_KHR_pipeline_executable_properties) */ +#if defined(VK_KHR_present_wait) + vkWaitForPresentKHR = (PFN_vkWaitForPresentKHR)load(context, "vkWaitForPresentKHR"); +#endif /* defined(VK_KHR_present_wait) */ #if defined(VK_KHR_push_descriptor) vkCmdPushDescriptorSetKHR = (PFN_vkCmdPushDescriptorSetKHR)load(context, "vkCmdPushDescriptorSetKHR"); #endif /* defined(VK_KHR_push_descriptor) */ +#if defined(VK_KHR_ray_tracing_maintenance1) && defined(VK_KHR_ray_tracing_pipeline) + vkCmdTraceRaysIndirect2KHR = (PFN_vkCmdTraceRaysIndirect2KHR)load(context, "vkCmdTraceRaysIndirect2KHR"); +#endif /* defined(VK_KHR_ray_tracing_maintenance1) && defined(VK_KHR_ray_tracing_pipeline) */ +#if defined(VK_KHR_ray_tracing_pipeline) + vkCmdSetRayTracingPipelineStackSizeKHR = (PFN_vkCmdSetRayTracingPipelineStackSizeKHR)load(context, "vkCmdSetRayTracingPipelineStackSizeKHR"); + vkCmdTraceRaysIndirectKHR = (PFN_vkCmdTraceRaysIndirectKHR)load(context, "vkCmdTraceRaysIndirectKHR"); + vkCmdTraceRaysKHR = (PFN_vkCmdTraceRaysKHR)load(context, "vkCmdTraceRaysKHR"); + vkCreateRayTracingPipelinesKHR = (PFN_vkCreateRayTracingPipelinesKHR)load(context, "vkCreateRayTracingPipelinesKHR"); + vkGetRayTracingCaptureReplayShaderGroupHandlesKHR = (PFN_vkGetRayTracingCaptureReplayShaderGroupHandlesKHR)load(context, "vkGetRayTracingCaptureReplayShaderGroupHandlesKHR"); + vkGetRayTracingShaderGroupHandlesKHR = (PFN_vkGetRayTracingShaderGroupHandlesKHR)load(context, "vkGetRayTracingShaderGroupHandlesKHR"); + vkGetRayTracingShaderGroupStackSizeKHR = (PFN_vkGetRayTracingShaderGroupStackSizeKHR)load(context, "vkGetRayTracingShaderGroupStackSizeKHR"); +#endif /* defined(VK_KHR_ray_tracing_pipeline) */ #if defined(VK_KHR_sampler_ycbcr_conversion) vkCreateSamplerYcbcrConversionKHR = (PFN_vkCreateSamplerYcbcrConversionKHR)load(context, "vkCreateSamplerYcbcrConversionKHR"); vkDestroySamplerYcbcrConversionKHR = (PFN_vkDestroySamplerYcbcrConversionKHR)load(context, "vkDestroySamplerYcbcrConversionKHR"); @@ -559,31 +1075,122 @@ static void volkGenLoadDevice(void* context, PFN_vkVoidFunction (*load)(void*, c vkGetSwapchainImagesKHR = (PFN_vkGetSwapchainImagesKHR)load(context, "vkGetSwapchainImagesKHR"); vkQueuePresentKHR = (PFN_vkQueuePresentKHR)load(context, "vkQueuePresentKHR"); #endif /* defined(VK_KHR_swapchain) */ -#if defined(VK_NVX_device_generated_commands) - vkCmdProcessCommandsNVX = (PFN_vkCmdProcessCommandsNVX)load(context, "vkCmdProcessCommandsNVX"); - vkCmdReserveSpaceForCommandsNVX = (PFN_vkCmdReserveSpaceForCommandsNVX)load(context, "vkCmdReserveSpaceForCommandsNVX"); - vkCreateIndirectCommandsLayoutNVX = (PFN_vkCreateIndirectCommandsLayoutNVX)load(context, "vkCreateIndirectCommandsLayoutNVX"); - vkCreateObjectTableNVX = (PFN_vkCreateObjectTableNVX)load(context, "vkCreateObjectTableNVX"); - vkDestroyIndirectCommandsLayoutNVX = (PFN_vkDestroyIndirectCommandsLayoutNVX)load(context, "vkDestroyIndirectCommandsLayoutNVX"); - vkDestroyObjectTableNVX = (PFN_vkDestroyObjectTableNVX)load(context, "vkDestroyObjectTableNVX"); - vkRegisterObjectsNVX = (PFN_vkRegisterObjectsNVX)load(context, "vkRegisterObjectsNVX"); - vkUnregisterObjectsNVX = (PFN_vkUnregisterObjectsNVX)load(context, "vkUnregisterObjectsNVX"); -#endif /* defined(VK_NVX_device_generated_commands) */ +#if defined(VK_KHR_synchronization2) + vkCmdPipelineBarrier2KHR = (PFN_vkCmdPipelineBarrier2KHR)load(context, "vkCmdPipelineBarrier2KHR"); + vkCmdResetEvent2KHR = (PFN_vkCmdResetEvent2KHR)load(context, "vkCmdResetEvent2KHR"); + vkCmdSetEvent2KHR = (PFN_vkCmdSetEvent2KHR)load(context, "vkCmdSetEvent2KHR"); + vkCmdWaitEvents2KHR = (PFN_vkCmdWaitEvents2KHR)load(context, "vkCmdWaitEvents2KHR"); + vkCmdWriteTimestamp2KHR = (PFN_vkCmdWriteTimestamp2KHR)load(context, "vkCmdWriteTimestamp2KHR"); + vkQueueSubmit2KHR = (PFN_vkQueueSubmit2KHR)load(context, "vkQueueSubmit2KHR"); +#endif /* defined(VK_KHR_synchronization2) */ +#if defined(VK_KHR_timeline_semaphore) + vkGetSemaphoreCounterValueKHR = (PFN_vkGetSemaphoreCounterValueKHR)load(context, "vkGetSemaphoreCounterValueKHR"); + vkSignalSemaphoreKHR = (PFN_vkSignalSemaphoreKHR)load(context, "vkSignalSemaphoreKHR"); + vkWaitSemaphoresKHR = (PFN_vkWaitSemaphoresKHR)load(context, "vkWaitSemaphoresKHR"); +#endif /* defined(VK_KHR_timeline_semaphore) */ +#if defined(VK_KHR_video_decode_queue) + vkCmdDecodeVideoKHR = (PFN_vkCmdDecodeVideoKHR)load(context, "vkCmdDecodeVideoKHR"); +#endif /* defined(VK_KHR_video_decode_queue) */ +#if defined(VK_KHR_video_encode_queue) + vkCmdEncodeVideoKHR = (PFN_vkCmdEncodeVideoKHR)load(context, "vkCmdEncodeVideoKHR"); + vkGetEncodedVideoSessionParametersKHR = (PFN_vkGetEncodedVideoSessionParametersKHR)load(context, "vkGetEncodedVideoSessionParametersKHR"); +#endif /* defined(VK_KHR_video_encode_queue) */ +#if defined(VK_KHR_video_queue) + vkBindVideoSessionMemoryKHR = (PFN_vkBindVideoSessionMemoryKHR)load(context, "vkBindVideoSessionMemoryKHR"); + vkCmdBeginVideoCodingKHR = (PFN_vkCmdBeginVideoCodingKHR)load(context, "vkCmdBeginVideoCodingKHR"); + vkCmdControlVideoCodingKHR = (PFN_vkCmdControlVideoCodingKHR)load(context, "vkCmdControlVideoCodingKHR"); + vkCmdEndVideoCodingKHR = (PFN_vkCmdEndVideoCodingKHR)load(context, "vkCmdEndVideoCodingKHR"); + vkCreateVideoSessionKHR = (PFN_vkCreateVideoSessionKHR)load(context, "vkCreateVideoSessionKHR"); + vkCreateVideoSessionParametersKHR = (PFN_vkCreateVideoSessionParametersKHR)load(context, "vkCreateVideoSessionParametersKHR"); + vkDestroyVideoSessionKHR = (PFN_vkDestroyVideoSessionKHR)load(context, "vkDestroyVideoSessionKHR"); + vkDestroyVideoSessionParametersKHR = (PFN_vkDestroyVideoSessionParametersKHR)load(context, "vkDestroyVideoSessionParametersKHR"); + vkGetVideoSessionMemoryRequirementsKHR = (PFN_vkGetVideoSessionMemoryRequirementsKHR)load(context, "vkGetVideoSessionMemoryRequirementsKHR"); + vkUpdateVideoSessionParametersKHR = (PFN_vkUpdateVideoSessionParametersKHR)load(context, "vkUpdateVideoSessionParametersKHR"); +#endif /* defined(VK_KHR_video_queue) */ +#if defined(VK_NVX_binary_import) + vkCmdCuLaunchKernelNVX = (PFN_vkCmdCuLaunchKernelNVX)load(context, "vkCmdCuLaunchKernelNVX"); + vkCreateCuFunctionNVX = (PFN_vkCreateCuFunctionNVX)load(context, "vkCreateCuFunctionNVX"); + vkCreateCuModuleNVX = (PFN_vkCreateCuModuleNVX)load(context, "vkCreateCuModuleNVX"); + vkDestroyCuFunctionNVX = (PFN_vkDestroyCuFunctionNVX)load(context, "vkDestroyCuFunctionNVX"); + vkDestroyCuModuleNVX = (PFN_vkDestroyCuModuleNVX)load(context, "vkDestroyCuModuleNVX"); +#endif /* defined(VK_NVX_binary_import) */ +#if defined(VK_NVX_image_view_handle) + vkGetImageViewHandleNVX = (PFN_vkGetImageViewHandleNVX)load(context, "vkGetImageViewHandleNVX"); +#endif /* defined(VK_NVX_image_view_handle) */ +#if defined(VK_NVX_image_view_handle) && VK_NVX_IMAGE_VIEW_HANDLE_SPEC_VERSION >= 3 + vkGetImageViewHandle64NVX = (PFN_vkGetImageViewHandle64NVX)load(context, "vkGetImageViewHandle64NVX"); +#endif /* defined(VK_NVX_image_view_handle) && VK_NVX_IMAGE_VIEW_HANDLE_SPEC_VERSION >= 3 */ +#if defined(VK_NVX_image_view_handle) && VK_NVX_IMAGE_VIEW_HANDLE_SPEC_VERSION >= 2 + vkGetImageViewAddressNVX = (PFN_vkGetImageViewAddressNVX)load(context, "vkGetImageViewAddressNVX"); +#endif /* defined(VK_NVX_image_view_handle) && VK_NVX_IMAGE_VIEW_HANDLE_SPEC_VERSION >= 2 */ #if defined(VK_NV_clip_space_w_scaling) vkCmdSetViewportWScalingNV = (PFN_vkCmdSetViewportWScalingNV)load(context, "vkCmdSetViewportWScalingNV"); #endif /* defined(VK_NV_clip_space_w_scaling) */ +#if defined(VK_NV_copy_memory_indirect) + vkCmdCopyMemoryIndirectNV = (PFN_vkCmdCopyMemoryIndirectNV)load(context, "vkCmdCopyMemoryIndirectNV"); + vkCmdCopyMemoryToImageIndirectNV = (PFN_vkCmdCopyMemoryToImageIndirectNV)load(context, "vkCmdCopyMemoryToImageIndirectNV"); +#endif /* defined(VK_NV_copy_memory_indirect) */ +#if defined(VK_NV_cuda_kernel_launch) + vkCmdCudaLaunchKernelNV = (PFN_vkCmdCudaLaunchKernelNV)load(context, "vkCmdCudaLaunchKernelNV"); + vkCreateCudaFunctionNV = (PFN_vkCreateCudaFunctionNV)load(context, "vkCreateCudaFunctionNV"); + vkCreateCudaModuleNV = (PFN_vkCreateCudaModuleNV)load(context, "vkCreateCudaModuleNV"); + vkDestroyCudaFunctionNV = (PFN_vkDestroyCudaFunctionNV)load(context, "vkDestroyCudaFunctionNV"); + vkDestroyCudaModuleNV = (PFN_vkDestroyCudaModuleNV)load(context, "vkDestroyCudaModuleNV"); + vkGetCudaModuleCacheNV = (PFN_vkGetCudaModuleCacheNV)load(context, "vkGetCudaModuleCacheNV"); +#endif /* defined(VK_NV_cuda_kernel_launch) */ #if defined(VK_NV_device_diagnostic_checkpoints) vkCmdSetCheckpointNV = (PFN_vkCmdSetCheckpointNV)load(context, "vkCmdSetCheckpointNV"); vkGetQueueCheckpointDataNV = (PFN_vkGetQueueCheckpointDataNV)load(context, "vkGetQueueCheckpointDataNV"); #endif /* defined(VK_NV_device_diagnostic_checkpoints) */ +#if defined(VK_NV_device_diagnostic_checkpoints) && (defined(VK_VERSION_1_3) || defined(VK_KHR_synchronization2)) + vkGetQueueCheckpointData2NV = (PFN_vkGetQueueCheckpointData2NV)load(context, "vkGetQueueCheckpointData2NV"); +#endif /* defined(VK_NV_device_diagnostic_checkpoints) && (defined(VK_VERSION_1_3) || defined(VK_KHR_synchronization2)) */ +#if defined(VK_NV_device_generated_commands) + vkCmdBindPipelineShaderGroupNV = (PFN_vkCmdBindPipelineShaderGroupNV)load(context, "vkCmdBindPipelineShaderGroupNV"); + vkCmdExecuteGeneratedCommandsNV = (PFN_vkCmdExecuteGeneratedCommandsNV)load(context, "vkCmdExecuteGeneratedCommandsNV"); + vkCmdPreprocessGeneratedCommandsNV = (PFN_vkCmdPreprocessGeneratedCommandsNV)load(context, "vkCmdPreprocessGeneratedCommandsNV"); + vkCreateIndirectCommandsLayoutNV = (PFN_vkCreateIndirectCommandsLayoutNV)load(context, "vkCreateIndirectCommandsLayoutNV"); + vkDestroyIndirectCommandsLayoutNV = (PFN_vkDestroyIndirectCommandsLayoutNV)load(context, "vkDestroyIndirectCommandsLayoutNV"); + vkGetGeneratedCommandsMemoryRequirementsNV = (PFN_vkGetGeneratedCommandsMemoryRequirementsNV)load(context, "vkGetGeneratedCommandsMemoryRequirementsNV"); +#endif /* defined(VK_NV_device_generated_commands) */ +#if defined(VK_NV_device_generated_commands_compute) + vkCmdUpdatePipelineIndirectBufferNV = (PFN_vkCmdUpdatePipelineIndirectBufferNV)load(context, "vkCmdUpdatePipelineIndirectBufferNV"); + vkGetPipelineIndirectDeviceAddressNV = (PFN_vkGetPipelineIndirectDeviceAddressNV)load(context, "vkGetPipelineIndirectDeviceAddressNV"); + vkGetPipelineIndirectMemoryRequirementsNV = (PFN_vkGetPipelineIndirectMemoryRequirementsNV)load(context, "vkGetPipelineIndirectMemoryRequirementsNV"); +#endif /* defined(VK_NV_device_generated_commands_compute) */ +#if defined(VK_NV_external_memory_rdma) + vkGetMemoryRemoteAddressNV = (PFN_vkGetMemoryRemoteAddressNV)load(context, "vkGetMemoryRemoteAddressNV"); +#endif /* defined(VK_NV_external_memory_rdma) */ #if defined(VK_NV_external_memory_win32) vkGetMemoryWin32HandleNV = (PFN_vkGetMemoryWin32HandleNV)load(context, "vkGetMemoryWin32HandleNV"); #endif /* defined(VK_NV_external_memory_win32) */ +#if defined(VK_NV_fragment_shading_rate_enums) + vkCmdSetFragmentShadingRateEnumNV = (PFN_vkCmdSetFragmentShadingRateEnumNV)load(context, "vkCmdSetFragmentShadingRateEnumNV"); +#endif /* defined(VK_NV_fragment_shading_rate_enums) */ +#if defined(VK_NV_low_latency2) + vkGetLatencyTimingsNV = (PFN_vkGetLatencyTimingsNV)load(context, "vkGetLatencyTimingsNV"); + vkLatencySleepNV = (PFN_vkLatencySleepNV)load(context, "vkLatencySleepNV"); + vkQueueNotifyOutOfBandNV = (PFN_vkQueueNotifyOutOfBandNV)load(context, "vkQueueNotifyOutOfBandNV"); + vkSetLatencyMarkerNV = (PFN_vkSetLatencyMarkerNV)load(context, "vkSetLatencyMarkerNV"); + vkSetLatencySleepModeNV = (PFN_vkSetLatencySleepModeNV)load(context, "vkSetLatencySleepModeNV"); +#endif /* defined(VK_NV_low_latency2) */ +#if defined(VK_NV_memory_decompression) + vkCmdDecompressMemoryIndirectCountNV = (PFN_vkCmdDecompressMemoryIndirectCountNV)load(context, "vkCmdDecompressMemoryIndirectCountNV"); + vkCmdDecompressMemoryNV = (PFN_vkCmdDecompressMemoryNV)load(context, "vkCmdDecompressMemoryNV"); +#endif /* defined(VK_NV_memory_decompression) */ #if defined(VK_NV_mesh_shader) - vkCmdDrawMeshTasksIndirectCountNV = (PFN_vkCmdDrawMeshTasksIndirectCountNV)load(context, "vkCmdDrawMeshTasksIndirectCountNV"); vkCmdDrawMeshTasksIndirectNV = (PFN_vkCmdDrawMeshTasksIndirectNV)load(context, "vkCmdDrawMeshTasksIndirectNV"); vkCmdDrawMeshTasksNV = (PFN_vkCmdDrawMeshTasksNV)load(context, "vkCmdDrawMeshTasksNV"); #endif /* defined(VK_NV_mesh_shader) */ +#if defined(VK_NV_mesh_shader) && (defined(VK_KHR_draw_indirect_count) || defined(VK_VERSION_1_2)) + vkCmdDrawMeshTasksIndirectCountNV = (PFN_vkCmdDrawMeshTasksIndirectCountNV)load(context, "vkCmdDrawMeshTasksIndirectCountNV"); +#endif /* defined(VK_NV_mesh_shader) && (defined(VK_KHR_draw_indirect_count) || defined(VK_VERSION_1_2)) */ +#if defined(VK_NV_optical_flow) + vkBindOpticalFlowSessionImageNV = (PFN_vkBindOpticalFlowSessionImageNV)load(context, "vkBindOpticalFlowSessionImageNV"); + vkCmdOpticalFlowExecuteNV = (PFN_vkCmdOpticalFlowExecuteNV)load(context, "vkCmdOpticalFlowExecuteNV"); + vkCreateOpticalFlowSessionNV = (PFN_vkCreateOpticalFlowSessionNV)load(context, "vkCreateOpticalFlowSessionNV"); + vkDestroyOpticalFlowSessionNV = (PFN_vkDestroyOpticalFlowSessionNV)load(context, "vkDestroyOpticalFlowSessionNV"); +#endif /* defined(VK_NV_optical_flow) */ #if defined(VK_NV_ray_tracing) vkBindAccelerationStructureMemoryNV = (PFN_vkBindAccelerationStructureMemoryNV)load(context, "vkBindAccelerationStructureMemoryNV"); vkCmdBuildAccelerationStructureNV = (PFN_vkCmdBuildAccelerationStructureNV)load(context, "vkCmdBuildAccelerationStructureNV"); @@ -598,6 +1205,9 @@ static void volkGenLoadDevice(void* context, PFN_vkVoidFunction (*load)(void*, c vkGetAccelerationStructureMemoryRequirementsNV = (PFN_vkGetAccelerationStructureMemoryRequirementsNV)load(context, "vkGetAccelerationStructureMemoryRequirementsNV"); vkGetRayTracingShaderGroupHandlesNV = (PFN_vkGetRayTracingShaderGroupHandlesNV)load(context, "vkGetRayTracingShaderGroupHandlesNV"); #endif /* defined(VK_NV_ray_tracing) */ +#if defined(VK_NV_scissor_exclusive) && VK_NV_SCISSOR_EXCLUSIVE_SPEC_VERSION >= 2 + vkCmdSetExclusiveScissorEnableNV = (PFN_vkCmdSetExclusiveScissorEnableNV)load(context, "vkCmdSetExclusiveScissorEnableNV"); +#endif /* defined(VK_NV_scissor_exclusive) && VK_NV_SCISSOR_EXCLUSIVE_SPEC_VERSION >= 2 */ #if defined(VK_NV_scissor_exclusive) vkCmdSetExclusiveScissorNV = (PFN_vkCmdSetExclusiveScissorNV)load(context, "vkCmdSetExclusiveScissorNV"); #endif /* defined(VK_NV_scissor_exclusive) */ @@ -606,9 +1216,115 @@ static void volkGenLoadDevice(void* context, PFN_vkVoidFunction (*load)(void*, c vkCmdSetCoarseSampleOrderNV = (PFN_vkCmdSetCoarseSampleOrderNV)load(context, "vkCmdSetCoarseSampleOrderNV"); vkCmdSetViewportShadingRatePaletteNV = (PFN_vkCmdSetViewportShadingRatePaletteNV)load(context, "vkCmdSetViewportShadingRatePaletteNV"); #endif /* defined(VK_NV_shading_rate_image) */ -#if (defined(VK_KHR_descriptor_update_template) && defined(VK_KHR_push_descriptor)) || (defined(VK_KHR_push_descriptor) && defined(VK_VERSION_1_1)) +#if defined(VK_QCOM_tile_properties) + vkGetDynamicRenderingTilePropertiesQCOM = (PFN_vkGetDynamicRenderingTilePropertiesQCOM)load(context, "vkGetDynamicRenderingTilePropertiesQCOM"); + vkGetFramebufferTilePropertiesQCOM = (PFN_vkGetFramebufferTilePropertiesQCOM)load(context, "vkGetFramebufferTilePropertiesQCOM"); +#endif /* defined(VK_QCOM_tile_properties) */ +#if defined(VK_QNX_external_memory_screen_buffer) + vkGetScreenBufferPropertiesQNX = (PFN_vkGetScreenBufferPropertiesQNX)load(context, "vkGetScreenBufferPropertiesQNX"); +#endif /* defined(VK_QNX_external_memory_screen_buffer) */ +#if defined(VK_VALVE_descriptor_set_host_mapping) + vkGetDescriptorSetHostMappingVALVE = (PFN_vkGetDescriptorSetHostMappingVALVE)load(context, "vkGetDescriptorSetHostMappingVALVE"); + vkGetDescriptorSetLayoutHostMappingInfoVALVE = (PFN_vkGetDescriptorSetLayoutHostMappingInfoVALVE)load(context, "vkGetDescriptorSetLayoutHostMappingInfoVALVE"); +#endif /* defined(VK_VALVE_descriptor_set_host_mapping) */ +#if (defined(VK_EXT_depth_clamp_control)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_depth_clamp_control)) + vkCmdSetDepthClampRangeEXT = (PFN_vkCmdSetDepthClampRangeEXT)load(context, "vkCmdSetDepthClampRangeEXT"); +#endif /* (defined(VK_EXT_depth_clamp_control)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_depth_clamp_control)) */ +#if (defined(VK_EXT_extended_dynamic_state)) || (defined(VK_EXT_shader_object)) + vkCmdBindVertexBuffers2EXT = (PFN_vkCmdBindVertexBuffers2EXT)load(context, "vkCmdBindVertexBuffers2EXT"); + vkCmdSetCullModeEXT = (PFN_vkCmdSetCullModeEXT)load(context, "vkCmdSetCullModeEXT"); + vkCmdSetDepthBoundsTestEnableEXT = (PFN_vkCmdSetDepthBoundsTestEnableEXT)load(context, "vkCmdSetDepthBoundsTestEnableEXT"); + vkCmdSetDepthCompareOpEXT = (PFN_vkCmdSetDepthCompareOpEXT)load(context, "vkCmdSetDepthCompareOpEXT"); + vkCmdSetDepthTestEnableEXT = (PFN_vkCmdSetDepthTestEnableEXT)load(context, "vkCmdSetDepthTestEnableEXT"); + vkCmdSetDepthWriteEnableEXT = (PFN_vkCmdSetDepthWriteEnableEXT)load(context, "vkCmdSetDepthWriteEnableEXT"); + vkCmdSetFrontFaceEXT = (PFN_vkCmdSetFrontFaceEXT)load(context, "vkCmdSetFrontFaceEXT"); + vkCmdSetPrimitiveTopologyEXT = (PFN_vkCmdSetPrimitiveTopologyEXT)load(context, "vkCmdSetPrimitiveTopologyEXT"); + vkCmdSetScissorWithCountEXT = (PFN_vkCmdSetScissorWithCountEXT)load(context, "vkCmdSetScissorWithCountEXT"); + vkCmdSetStencilOpEXT = (PFN_vkCmdSetStencilOpEXT)load(context, "vkCmdSetStencilOpEXT"); + vkCmdSetStencilTestEnableEXT = (PFN_vkCmdSetStencilTestEnableEXT)load(context, "vkCmdSetStencilTestEnableEXT"); + vkCmdSetViewportWithCountEXT = (PFN_vkCmdSetViewportWithCountEXT)load(context, "vkCmdSetViewportWithCountEXT"); +#endif /* (defined(VK_EXT_extended_dynamic_state)) || (defined(VK_EXT_shader_object)) */ +#if (defined(VK_EXT_extended_dynamic_state2)) || (defined(VK_EXT_shader_object)) + vkCmdSetDepthBiasEnableEXT = (PFN_vkCmdSetDepthBiasEnableEXT)load(context, "vkCmdSetDepthBiasEnableEXT"); + vkCmdSetLogicOpEXT = (PFN_vkCmdSetLogicOpEXT)load(context, "vkCmdSetLogicOpEXT"); + vkCmdSetPatchControlPointsEXT = (PFN_vkCmdSetPatchControlPointsEXT)load(context, "vkCmdSetPatchControlPointsEXT"); + vkCmdSetPrimitiveRestartEnableEXT = (PFN_vkCmdSetPrimitiveRestartEnableEXT)load(context, "vkCmdSetPrimitiveRestartEnableEXT"); + vkCmdSetRasterizerDiscardEnableEXT = (PFN_vkCmdSetRasterizerDiscardEnableEXT)load(context, "vkCmdSetRasterizerDiscardEnableEXT"); +#endif /* (defined(VK_EXT_extended_dynamic_state2)) || (defined(VK_EXT_shader_object)) */ +#if (defined(VK_EXT_extended_dynamic_state3)) || (defined(VK_EXT_shader_object)) + vkCmdSetAlphaToCoverageEnableEXT = (PFN_vkCmdSetAlphaToCoverageEnableEXT)load(context, "vkCmdSetAlphaToCoverageEnableEXT"); + vkCmdSetAlphaToOneEnableEXT = (PFN_vkCmdSetAlphaToOneEnableEXT)load(context, "vkCmdSetAlphaToOneEnableEXT"); + vkCmdSetColorBlendEnableEXT = (PFN_vkCmdSetColorBlendEnableEXT)load(context, "vkCmdSetColorBlendEnableEXT"); + vkCmdSetColorBlendEquationEXT = (PFN_vkCmdSetColorBlendEquationEXT)load(context, "vkCmdSetColorBlendEquationEXT"); + vkCmdSetColorWriteMaskEXT = (PFN_vkCmdSetColorWriteMaskEXT)load(context, "vkCmdSetColorWriteMaskEXT"); + vkCmdSetDepthClampEnableEXT = (PFN_vkCmdSetDepthClampEnableEXT)load(context, "vkCmdSetDepthClampEnableEXT"); + vkCmdSetLogicOpEnableEXT = (PFN_vkCmdSetLogicOpEnableEXT)load(context, "vkCmdSetLogicOpEnableEXT"); + vkCmdSetPolygonModeEXT = (PFN_vkCmdSetPolygonModeEXT)load(context, "vkCmdSetPolygonModeEXT"); + vkCmdSetRasterizationSamplesEXT = (PFN_vkCmdSetRasterizationSamplesEXT)load(context, "vkCmdSetRasterizationSamplesEXT"); + vkCmdSetSampleMaskEXT = (PFN_vkCmdSetSampleMaskEXT)load(context, "vkCmdSetSampleMaskEXT"); +#endif /* (defined(VK_EXT_extended_dynamic_state3)) || (defined(VK_EXT_shader_object)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && (defined(VK_KHR_maintenance2) || defined(VK_VERSION_1_1))) || (defined(VK_EXT_shader_object)) + vkCmdSetTessellationDomainOriginEXT = (PFN_vkCmdSetTessellationDomainOriginEXT)load(context, "vkCmdSetTessellationDomainOriginEXT"); +#endif /* (defined(VK_EXT_extended_dynamic_state3) && (defined(VK_KHR_maintenance2) || defined(VK_VERSION_1_1))) || (defined(VK_EXT_shader_object)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_transform_feedback)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_transform_feedback)) + vkCmdSetRasterizationStreamEXT = (PFN_vkCmdSetRasterizationStreamEXT)load(context, "vkCmdSetRasterizationStreamEXT"); +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_transform_feedback)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_transform_feedback)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_conservative_rasterization)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_conservative_rasterization)) + vkCmdSetConservativeRasterizationModeEXT = (PFN_vkCmdSetConservativeRasterizationModeEXT)load(context, "vkCmdSetConservativeRasterizationModeEXT"); + vkCmdSetExtraPrimitiveOverestimationSizeEXT = (PFN_vkCmdSetExtraPrimitiveOverestimationSizeEXT)load(context, "vkCmdSetExtraPrimitiveOverestimationSizeEXT"); +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_conservative_rasterization)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_conservative_rasterization)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_depth_clip_enable)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_depth_clip_enable)) + vkCmdSetDepthClipEnableEXT = (PFN_vkCmdSetDepthClipEnableEXT)load(context, "vkCmdSetDepthClipEnableEXT"); +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_depth_clip_enable)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_depth_clip_enable)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_sample_locations)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_sample_locations)) + vkCmdSetSampleLocationsEnableEXT = (PFN_vkCmdSetSampleLocationsEnableEXT)load(context, "vkCmdSetSampleLocationsEnableEXT"); +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_sample_locations)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_sample_locations)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_blend_operation_advanced)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_blend_operation_advanced)) + vkCmdSetColorBlendAdvancedEXT = (PFN_vkCmdSetColorBlendAdvancedEXT)load(context, "vkCmdSetColorBlendAdvancedEXT"); +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_blend_operation_advanced)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_blend_operation_advanced)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_provoking_vertex)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_provoking_vertex)) + vkCmdSetProvokingVertexModeEXT = (PFN_vkCmdSetProvokingVertexModeEXT)load(context, "vkCmdSetProvokingVertexModeEXT"); +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_provoking_vertex)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_provoking_vertex)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_line_rasterization)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_line_rasterization)) + vkCmdSetLineRasterizationModeEXT = (PFN_vkCmdSetLineRasterizationModeEXT)load(context, "vkCmdSetLineRasterizationModeEXT"); + vkCmdSetLineStippleEnableEXT = (PFN_vkCmdSetLineStippleEnableEXT)load(context, "vkCmdSetLineStippleEnableEXT"); +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_line_rasterization)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_line_rasterization)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_depth_clip_control)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_depth_clip_control)) + vkCmdSetDepthClipNegativeOneToOneEXT = (PFN_vkCmdSetDepthClipNegativeOneToOneEXT)load(context, "vkCmdSetDepthClipNegativeOneToOneEXT"); +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_depth_clip_control)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_depth_clip_control)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_clip_space_w_scaling)) || (defined(VK_EXT_shader_object) && defined(VK_NV_clip_space_w_scaling)) + vkCmdSetViewportWScalingEnableNV = (PFN_vkCmdSetViewportWScalingEnableNV)load(context, "vkCmdSetViewportWScalingEnableNV"); +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_clip_space_w_scaling)) || (defined(VK_EXT_shader_object) && defined(VK_NV_clip_space_w_scaling)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_viewport_swizzle)) || (defined(VK_EXT_shader_object) && defined(VK_NV_viewport_swizzle)) + vkCmdSetViewportSwizzleNV = (PFN_vkCmdSetViewportSwizzleNV)load(context, "vkCmdSetViewportSwizzleNV"); +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_viewport_swizzle)) || (defined(VK_EXT_shader_object) && defined(VK_NV_viewport_swizzle)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_fragment_coverage_to_color)) || (defined(VK_EXT_shader_object) && defined(VK_NV_fragment_coverage_to_color)) + vkCmdSetCoverageToColorEnableNV = (PFN_vkCmdSetCoverageToColorEnableNV)load(context, "vkCmdSetCoverageToColorEnableNV"); + vkCmdSetCoverageToColorLocationNV = (PFN_vkCmdSetCoverageToColorLocationNV)load(context, "vkCmdSetCoverageToColorLocationNV"); +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_fragment_coverage_to_color)) || (defined(VK_EXT_shader_object) && defined(VK_NV_fragment_coverage_to_color)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_framebuffer_mixed_samples)) || (defined(VK_EXT_shader_object) && defined(VK_NV_framebuffer_mixed_samples)) + vkCmdSetCoverageModulationModeNV = (PFN_vkCmdSetCoverageModulationModeNV)load(context, "vkCmdSetCoverageModulationModeNV"); + vkCmdSetCoverageModulationTableEnableNV = (PFN_vkCmdSetCoverageModulationTableEnableNV)load(context, "vkCmdSetCoverageModulationTableEnableNV"); + vkCmdSetCoverageModulationTableNV = (PFN_vkCmdSetCoverageModulationTableNV)load(context, "vkCmdSetCoverageModulationTableNV"); +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_framebuffer_mixed_samples)) || (defined(VK_EXT_shader_object) && defined(VK_NV_framebuffer_mixed_samples)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_shading_rate_image)) || (defined(VK_EXT_shader_object) && defined(VK_NV_shading_rate_image)) + vkCmdSetShadingRateImageEnableNV = (PFN_vkCmdSetShadingRateImageEnableNV)load(context, "vkCmdSetShadingRateImageEnableNV"); +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_shading_rate_image)) || (defined(VK_EXT_shader_object) && defined(VK_NV_shading_rate_image)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_representative_fragment_test)) || (defined(VK_EXT_shader_object) && defined(VK_NV_representative_fragment_test)) + vkCmdSetRepresentativeFragmentTestEnableNV = (PFN_vkCmdSetRepresentativeFragmentTestEnableNV)load(context, "vkCmdSetRepresentativeFragmentTestEnableNV"); +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_representative_fragment_test)) || (defined(VK_EXT_shader_object) && defined(VK_NV_representative_fragment_test)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_coverage_reduction_mode)) || (defined(VK_EXT_shader_object) && defined(VK_NV_coverage_reduction_mode)) + vkCmdSetCoverageReductionModeNV = (PFN_vkCmdSetCoverageReductionModeNV)load(context, "vkCmdSetCoverageReductionModeNV"); +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_coverage_reduction_mode)) || (defined(VK_EXT_shader_object) && defined(VK_NV_coverage_reduction_mode)) */ +#if (defined(VK_EXT_host_image_copy)) || (defined(VK_EXT_image_compression_control)) + vkGetImageSubresourceLayout2EXT = (PFN_vkGetImageSubresourceLayout2EXT)load(context, "vkGetImageSubresourceLayout2EXT"); +#endif /* (defined(VK_EXT_host_image_copy)) || (defined(VK_EXT_image_compression_control)) */ +#if (defined(VK_EXT_shader_object)) || (defined(VK_EXT_vertex_input_dynamic_state)) + vkCmdSetVertexInputEXT = (PFN_vkCmdSetVertexInputEXT)load(context, "vkCmdSetVertexInputEXT"); +#endif /* (defined(VK_EXT_shader_object)) || (defined(VK_EXT_vertex_input_dynamic_state)) */ +#if (defined(VK_KHR_descriptor_update_template) && defined(VK_KHR_push_descriptor)) || (defined(VK_KHR_push_descriptor) && (defined(VK_VERSION_1_1) || defined(VK_KHR_descriptor_update_template))) vkCmdPushDescriptorSetWithTemplateKHR = (PFN_vkCmdPushDescriptorSetWithTemplateKHR)load(context, "vkCmdPushDescriptorSetWithTemplateKHR"); -#endif /* (defined(VK_KHR_descriptor_update_template) && defined(VK_KHR_push_descriptor)) || (defined(VK_KHR_push_descriptor) && defined(VK_VERSION_1_1)) */ +#endif /* (defined(VK_KHR_descriptor_update_template) && defined(VK_KHR_push_descriptor)) || (defined(VK_KHR_push_descriptor) && (defined(VK_VERSION_1_1) || defined(VK_KHR_descriptor_update_template))) */ #if (defined(VK_KHR_device_group) && defined(VK_KHR_surface)) || (defined(VK_KHR_swapchain) && defined(VK_VERSION_1_1)) vkGetDeviceGroupPresentCapabilitiesKHR = (PFN_vkGetDeviceGroupPresentCapabilitiesKHR)load(context, "vkGetDeviceGroupPresentCapabilitiesKHR"); vkGetDeviceGroupSurfacePresentModesKHR = (PFN_vkGetDeviceGroupSurfacePresentModesKHR)load(context, "vkGetDeviceGroupSurfacePresentModesKHR"); @@ -762,9 +1478,101 @@ static void volkGenLoadDeviceTable(struct VolkDeviceTable* table, void* context, table->vkTrimCommandPool = (PFN_vkTrimCommandPool)load(context, "vkTrimCommandPool"); table->vkUpdateDescriptorSetWithTemplate = (PFN_vkUpdateDescriptorSetWithTemplate)load(context, "vkUpdateDescriptorSetWithTemplate"); #endif /* defined(VK_VERSION_1_1) */ +#if defined(VK_VERSION_1_2) + table->vkCmdBeginRenderPass2 = (PFN_vkCmdBeginRenderPass2)load(context, "vkCmdBeginRenderPass2"); + table->vkCmdDrawIndexedIndirectCount = (PFN_vkCmdDrawIndexedIndirectCount)load(context, "vkCmdDrawIndexedIndirectCount"); + table->vkCmdDrawIndirectCount = (PFN_vkCmdDrawIndirectCount)load(context, "vkCmdDrawIndirectCount"); + table->vkCmdEndRenderPass2 = (PFN_vkCmdEndRenderPass2)load(context, "vkCmdEndRenderPass2"); + table->vkCmdNextSubpass2 = (PFN_vkCmdNextSubpass2)load(context, "vkCmdNextSubpass2"); + table->vkCreateRenderPass2 = (PFN_vkCreateRenderPass2)load(context, "vkCreateRenderPass2"); + table->vkGetBufferDeviceAddress = (PFN_vkGetBufferDeviceAddress)load(context, "vkGetBufferDeviceAddress"); + table->vkGetBufferOpaqueCaptureAddress = (PFN_vkGetBufferOpaqueCaptureAddress)load(context, "vkGetBufferOpaqueCaptureAddress"); + table->vkGetDeviceMemoryOpaqueCaptureAddress = (PFN_vkGetDeviceMemoryOpaqueCaptureAddress)load(context, "vkGetDeviceMemoryOpaqueCaptureAddress"); + table->vkGetSemaphoreCounterValue = (PFN_vkGetSemaphoreCounterValue)load(context, "vkGetSemaphoreCounterValue"); + table->vkResetQueryPool = (PFN_vkResetQueryPool)load(context, "vkResetQueryPool"); + table->vkSignalSemaphore = (PFN_vkSignalSemaphore)load(context, "vkSignalSemaphore"); + table->vkWaitSemaphores = (PFN_vkWaitSemaphores)load(context, "vkWaitSemaphores"); +#endif /* defined(VK_VERSION_1_2) */ +#if defined(VK_VERSION_1_3) + table->vkCmdBeginRendering = (PFN_vkCmdBeginRendering)load(context, "vkCmdBeginRendering"); + table->vkCmdBindVertexBuffers2 = (PFN_vkCmdBindVertexBuffers2)load(context, "vkCmdBindVertexBuffers2"); + table->vkCmdBlitImage2 = (PFN_vkCmdBlitImage2)load(context, "vkCmdBlitImage2"); + table->vkCmdCopyBuffer2 = (PFN_vkCmdCopyBuffer2)load(context, "vkCmdCopyBuffer2"); + table->vkCmdCopyBufferToImage2 = (PFN_vkCmdCopyBufferToImage2)load(context, "vkCmdCopyBufferToImage2"); + table->vkCmdCopyImage2 = (PFN_vkCmdCopyImage2)load(context, "vkCmdCopyImage2"); + table->vkCmdCopyImageToBuffer2 = (PFN_vkCmdCopyImageToBuffer2)load(context, "vkCmdCopyImageToBuffer2"); + table->vkCmdEndRendering = (PFN_vkCmdEndRendering)load(context, "vkCmdEndRendering"); + table->vkCmdPipelineBarrier2 = (PFN_vkCmdPipelineBarrier2)load(context, "vkCmdPipelineBarrier2"); + table->vkCmdResetEvent2 = (PFN_vkCmdResetEvent2)load(context, "vkCmdResetEvent2"); + table->vkCmdResolveImage2 = (PFN_vkCmdResolveImage2)load(context, "vkCmdResolveImage2"); + table->vkCmdSetCullMode = (PFN_vkCmdSetCullMode)load(context, "vkCmdSetCullMode"); + table->vkCmdSetDepthBiasEnable = (PFN_vkCmdSetDepthBiasEnable)load(context, "vkCmdSetDepthBiasEnable"); + table->vkCmdSetDepthBoundsTestEnable = (PFN_vkCmdSetDepthBoundsTestEnable)load(context, "vkCmdSetDepthBoundsTestEnable"); + table->vkCmdSetDepthCompareOp = (PFN_vkCmdSetDepthCompareOp)load(context, "vkCmdSetDepthCompareOp"); + table->vkCmdSetDepthTestEnable = (PFN_vkCmdSetDepthTestEnable)load(context, "vkCmdSetDepthTestEnable"); + table->vkCmdSetDepthWriteEnable = (PFN_vkCmdSetDepthWriteEnable)load(context, "vkCmdSetDepthWriteEnable"); + table->vkCmdSetEvent2 = (PFN_vkCmdSetEvent2)load(context, "vkCmdSetEvent2"); + table->vkCmdSetFrontFace = (PFN_vkCmdSetFrontFace)load(context, "vkCmdSetFrontFace"); + table->vkCmdSetPrimitiveRestartEnable = (PFN_vkCmdSetPrimitiveRestartEnable)load(context, "vkCmdSetPrimitiveRestartEnable"); + table->vkCmdSetPrimitiveTopology = (PFN_vkCmdSetPrimitiveTopology)load(context, "vkCmdSetPrimitiveTopology"); + table->vkCmdSetRasterizerDiscardEnable = (PFN_vkCmdSetRasterizerDiscardEnable)load(context, "vkCmdSetRasterizerDiscardEnable"); + table->vkCmdSetScissorWithCount = (PFN_vkCmdSetScissorWithCount)load(context, "vkCmdSetScissorWithCount"); + table->vkCmdSetStencilOp = (PFN_vkCmdSetStencilOp)load(context, "vkCmdSetStencilOp"); + table->vkCmdSetStencilTestEnable = (PFN_vkCmdSetStencilTestEnable)load(context, "vkCmdSetStencilTestEnable"); + table->vkCmdSetViewportWithCount = (PFN_vkCmdSetViewportWithCount)load(context, "vkCmdSetViewportWithCount"); + table->vkCmdWaitEvents2 = (PFN_vkCmdWaitEvents2)load(context, "vkCmdWaitEvents2"); + table->vkCmdWriteTimestamp2 = (PFN_vkCmdWriteTimestamp2)load(context, "vkCmdWriteTimestamp2"); + table->vkCreatePrivateDataSlot = (PFN_vkCreatePrivateDataSlot)load(context, "vkCreatePrivateDataSlot"); + table->vkDestroyPrivateDataSlot = (PFN_vkDestroyPrivateDataSlot)load(context, "vkDestroyPrivateDataSlot"); + table->vkGetDeviceBufferMemoryRequirements = (PFN_vkGetDeviceBufferMemoryRequirements)load(context, "vkGetDeviceBufferMemoryRequirements"); + table->vkGetDeviceImageMemoryRequirements = (PFN_vkGetDeviceImageMemoryRequirements)load(context, "vkGetDeviceImageMemoryRequirements"); + table->vkGetDeviceImageSparseMemoryRequirements = (PFN_vkGetDeviceImageSparseMemoryRequirements)load(context, "vkGetDeviceImageSparseMemoryRequirements"); + table->vkGetPrivateData = (PFN_vkGetPrivateData)load(context, "vkGetPrivateData"); + table->vkQueueSubmit2 = (PFN_vkQueueSubmit2)load(context, "vkQueueSubmit2"); + table->vkSetPrivateData = (PFN_vkSetPrivateData)load(context, "vkSetPrivateData"); +#endif /* defined(VK_VERSION_1_3) */ +#if defined(VK_VERSION_1_4) + table->vkCmdBindDescriptorSets2 = (PFN_vkCmdBindDescriptorSets2)load(context, "vkCmdBindDescriptorSets2"); + table->vkCmdBindIndexBuffer2 = (PFN_vkCmdBindIndexBuffer2)load(context, "vkCmdBindIndexBuffer2"); + table->vkCmdPushConstants2 = (PFN_vkCmdPushConstants2)load(context, "vkCmdPushConstants2"); + table->vkCmdPushDescriptorSet = (PFN_vkCmdPushDescriptorSet)load(context, "vkCmdPushDescriptorSet"); + table->vkCmdPushDescriptorSet2 = (PFN_vkCmdPushDescriptorSet2)load(context, "vkCmdPushDescriptorSet2"); + table->vkCmdPushDescriptorSetWithTemplate = (PFN_vkCmdPushDescriptorSetWithTemplate)load(context, "vkCmdPushDescriptorSetWithTemplate"); + table->vkCmdPushDescriptorSetWithTemplate2 = (PFN_vkCmdPushDescriptorSetWithTemplate2)load(context, "vkCmdPushDescriptorSetWithTemplate2"); + table->vkCmdSetLineStipple = (PFN_vkCmdSetLineStipple)load(context, "vkCmdSetLineStipple"); + table->vkCmdSetRenderingAttachmentLocations = (PFN_vkCmdSetRenderingAttachmentLocations)load(context, "vkCmdSetRenderingAttachmentLocations"); + table->vkCmdSetRenderingInputAttachmentIndices = (PFN_vkCmdSetRenderingInputAttachmentIndices)load(context, "vkCmdSetRenderingInputAttachmentIndices"); + table->vkCopyImageToImage = (PFN_vkCopyImageToImage)load(context, "vkCopyImageToImage"); + table->vkCopyImageToMemory = (PFN_vkCopyImageToMemory)load(context, "vkCopyImageToMemory"); + table->vkCopyMemoryToImage = (PFN_vkCopyMemoryToImage)load(context, "vkCopyMemoryToImage"); + table->vkGetDeviceImageSubresourceLayout = (PFN_vkGetDeviceImageSubresourceLayout)load(context, "vkGetDeviceImageSubresourceLayout"); + table->vkGetImageSubresourceLayout2 = (PFN_vkGetImageSubresourceLayout2)load(context, "vkGetImageSubresourceLayout2"); + table->vkGetRenderingAreaGranularity = (PFN_vkGetRenderingAreaGranularity)load(context, "vkGetRenderingAreaGranularity"); + table->vkMapMemory2 = (PFN_vkMapMemory2)load(context, "vkMapMemory2"); + table->vkTransitionImageLayout = (PFN_vkTransitionImageLayout)load(context, "vkTransitionImageLayout"); + table->vkUnmapMemory2 = (PFN_vkUnmapMemory2)load(context, "vkUnmapMemory2"); +#endif /* defined(VK_VERSION_1_4) */ +#if defined(VK_AMDX_shader_enqueue) + table->vkCmdDispatchGraphAMDX = (PFN_vkCmdDispatchGraphAMDX)load(context, "vkCmdDispatchGraphAMDX"); + table->vkCmdDispatchGraphIndirectAMDX = (PFN_vkCmdDispatchGraphIndirectAMDX)load(context, "vkCmdDispatchGraphIndirectAMDX"); + table->vkCmdDispatchGraphIndirectCountAMDX = (PFN_vkCmdDispatchGraphIndirectCountAMDX)load(context, "vkCmdDispatchGraphIndirectCountAMDX"); + table->vkCmdInitializeGraphScratchMemoryAMDX = (PFN_vkCmdInitializeGraphScratchMemoryAMDX)load(context, "vkCmdInitializeGraphScratchMemoryAMDX"); + table->vkCreateExecutionGraphPipelinesAMDX = (PFN_vkCreateExecutionGraphPipelinesAMDX)load(context, "vkCreateExecutionGraphPipelinesAMDX"); + table->vkGetExecutionGraphPipelineNodeIndexAMDX = (PFN_vkGetExecutionGraphPipelineNodeIndexAMDX)load(context, "vkGetExecutionGraphPipelineNodeIndexAMDX"); + table->vkGetExecutionGraphPipelineScratchSizeAMDX = (PFN_vkGetExecutionGraphPipelineScratchSizeAMDX)load(context, "vkGetExecutionGraphPipelineScratchSizeAMDX"); +#endif /* defined(VK_AMDX_shader_enqueue) */ +#if defined(VK_AMD_anti_lag) + table->vkAntiLagUpdateAMD = (PFN_vkAntiLagUpdateAMD)load(context, "vkAntiLagUpdateAMD"); +#endif /* defined(VK_AMD_anti_lag) */ #if defined(VK_AMD_buffer_marker) table->vkCmdWriteBufferMarkerAMD = (PFN_vkCmdWriteBufferMarkerAMD)load(context, "vkCmdWriteBufferMarkerAMD"); #endif /* defined(VK_AMD_buffer_marker) */ +#if defined(VK_AMD_buffer_marker) && (defined(VK_VERSION_1_3) || defined(VK_KHR_synchronization2)) + table->vkCmdWriteBufferMarker2AMD = (PFN_vkCmdWriteBufferMarker2AMD)load(context, "vkCmdWriteBufferMarker2AMD"); +#endif /* defined(VK_AMD_buffer_marker) && (defined(VK_VERSION_1_3) || defined(VK_KHR_synchronization2)) */ +#if defined(VK_AMD_display_native_hdr) + table->vkSetLocalDimmingAMD = (PFN_vkSetLocalDimmingAMD)load(context, "vkSetLocalDimmingAMD"); +#endif /* defined(VK_AMD_display_native_hdr) */ #if defined(VK_AMD_draw_indirect_count) table->vkCmdDrawIndexedIndirectCountAMD = (PFN_vkCmdDrawIndexedIndirectCountAMD)load(context, "vkCmdDrawIndexedIndirectCountAMD"); table->vkCmdDrawIndirectCountAMD = (PFN_vkCmdDrawIndirectCountAMD)load(context, "vkCmdDrawIndirectCountAMD"); @@ -776,12 +1584,18 @@ static void volkGenLoadDeviceTable(struct VolkDeviceTable* table, void* context, table->vkGetAndroidHardwareBufferPropertiesANDROID = (PFN_vkGetAndroidHardwareBufferPropertiesANDROID)load(context, "vkGetAndroidHardwareBufferPropertiesANDROID"); table->vkGetMemoryAndroidHardwareBufferANDROID = (PFN_vkGetMemoryAndroidHardwareBufferANDROID)load(context, "vkGetMemoryAndroidHardwareBufferANDROID"); #endif /* defined(VK_ANDROID_external_memory_android_hardware_buffer) */ +#if defined(VK_EXT_attachment_feedback_loop_dynamic_state) + table->vkCmdSetAttachmentFeedbackLoopEnableEXT = (PFN_vkCmdSetAttachmentFeedbackLoopEnableEXT)load(context, "vkCmdSetAttachmentFeedbackLoopEnableEXT"); +#endif /* defined(VK_EXT_attachment_feedback_loop_dynamic_state) */ #if defined(VK_EXT_buffer_device_address) table->vkGetBufferDeviceAddressEXT = (PFN_vkGetBufferDeviceAddressEXT)load(context, "vkGetBufferDeviceAddressEXT"); #endif /* defined(VK_EXT_buffer_device_address) */ #if defined(VK_EXT_calibrated_timestamps) table->vkGetCalibratedTimestampsEXT = (PFN_vkGetCalibratedTimestampsEXT)load(context, "vkGetCalibratedTimestampsEXT"); #endif /* defined(VK_EXT_calibrated_timestamps) */ +#if defined(VK_EXT_color_write_enable) + table->vkCmdSetColorWriteEnableEXT = (PFN_vkCmdSetColorWriteEnableEXT)load(context, "vkCmdSetColorWriteEnableEXT"); +#endif /* defined(VK_EXT_color_write_enable) */ #if defined(VK_EXT_conditional_rendering) table->vkCmdBeginConditionalRenderingEXT = (PFN_vkCmdBeginConditionalRenderingEXT)load(context, "vkCmdBeginConditionalRenderingEXT"); table->vkCmdEndConditionalRenderingEXT = (PFN_vkCmdEndConditionalRenderingEXT)load(context, "vkCmdEndConditionalRenderingEXT"); @@ -793,19 +1607,45 @@ static void volkGenLoadDeviceTable(struct VolkDeviceTable* table, void* context, table->vkDebugMarkerSetObjectNameEXT = (PFN_vkDebugMarkerSetObjectNameEXT)load(context, "vkDebugMarkerSetObjectNameEXT"); table->vkDebugMarkerSetObjectTagEXT = (PFN_vkDebugMarkerSetObjectTagEXT)load(context, "vkDebugMarkerSetObjectTagEXT"); #endif /* defined(VK_EXT_debug_marker) */ -#if defined(VK_EXT_debug_utils) - table->vkCmdBeginDebugUtilsLabelEXT = (PFN_vkCmdBeginDebugUtilsLabelEXT)load(context, "vkCmdBeginDebugUtilsLabelEXT"); - table->vkCmdEndDebugUtilsLabelEXT = (PFN_vkCmdEndDebugUtilsLabelEXT)load(context, "vkCmdEndDebugUtilsLabelEXT"); - table->vkCmdInsertDebugUtilsLabelEXT = (PFN_vkCmdInsertDebugUtilsLabelEXT)load(context, "vkCmdInsertDebugUtilsLabelEXT"); - table->vkQueueBeginDebugUtilsLabelEXT = (PFN_vkQueueBeginDebugUtilsLabelEXT)load(context, "vkQueueBeginDebugUtilsLabelEXT"); - table->vkQueueEndDebugUtilsLabelEXT = (PFN_vkQueueEndDebugUtilsLabelEXT)load(context, "vkQueueEndDebugUtilsLabelEXT"); - table->vkQueueInsertDebugUtilsLabelEXT = (PFN_vkQueueInsertDebugUtilsLabelEXT)load(context, "vkQueueInsertDebugUtilsLabelEXT"); - table->vkSetDebugUtilsObjectNameEXT = (PFN_vkSetDebugUtilsObjectNameEXT)load(context, "vkSetDebugUtilsObjectNameEXT"); - table->vkSetDebugUtilsObjectTagEXT = (PFN_vkSetDebugUtilsObjectTagEXT)load(context, "vkSetDebugUtilsObjectTagEXT"); -#endif /* defined(VK_EXT_debug_utils) */ +#if defined(VK_EXT_depth_bias_control) + table->vkCmdSetDepthBias2EXT = (PFN_vkCmdSetDepthBias2EXT)load(context, "vkCmdSetDepthBias2EXT"); +#endif /* defined(VK_EXT_depth_bias_control) */ +#if defined(VK_EXT_descriptor_buffer) + table->vkCmdBindDescriptorBufferEmbeddedSamplersEXT = (PFN_vkCmdBindDescriptorBufferEmbeddedSamplersEXT)load(context, "vkCmdBindDescriptorBufferEmbeddedSamplersEXT"); + table->vkCmdBindDescriptorBuffersEXT = (PFN_vkCmdBindDescriptorBuffersEXT)load(context, "vkCmdBindDescriptorBuffersEXT"); + table->vkCmdSetDescriptorBufferOffsetsEXT = (PFN_vkCmdSetDescriptorBufferOffsetsEXT)load(context, "vkCmdSetDescriptorBufferOffsetsEXT"); + table->vkGetBufferOpaqueCaptureDescriptorDataEXT = (PFN_vkGetBufferOpaqueCaptureDescriptorDataEXT)load(context, "vkGetBufferOpaqueCaptureDescriptorDataEXT"); + table->vkGetDescriptorEXT = (PFN_vkGetDescriptorEXT)load(context, "vkGetDescriptorEXT"); + table->vkGetDescriptorSetLayoutBindingOffsetEXT = (PFN_vkGetDescriptorSetLayoutBindingOffsetEXT)load(context, "vkGetDescriptorSetLayoutBindingOffsetEXT"); + table->vkGetDescriptorSetLayoutSizeEXT = (PFN_vkGetDescriptorSetLayoutSizeEXT)load(context, "vkGetDescriptorSetLayoutSizeEXT"); + table->vkGetImageOpaqueCaptureDescriptorDataEXT = (PFN_vkGetImageOpaqueCaptureDescriptorDataEXT)load(context, "vkGetImageOpaqueCaptureDescriptorDataEXT"); + table->vkGetImageViewOpaqueCaptureDescriptorDataEXT = (PFN_vkGetImageViewOpaqueCaptureDescriptorDataEXT)load(context, "vkGetImageViewOpaqueCaptureDescriptorDataEXT"); + table->vkGetSamplerOpaqueCaptureDescriptorDataEXT = (PFN_vkGetSamplerOpaqueCaptureDescriptorDataEXT)load(context, "vkGetSamplerOpaqueCaptureDescriptorDataEXT"); +#endif /* defined(VK_EXT_descriptor_buffer) */ +#if defined(VK_EXT_descriptor_buffer) && (defined(VK_KHR_acceleration_structure) || defined(VK_NV_ray_tracing)) + table->vkGetAccelerationStructureOpaqueCaptureDescriptorDataEXT = (PFN_vkGetAccelerationStructureOpaqueCaptureDescriptorDataEXT)load(context, "vkGetAccelerationStructureOpaqueCaptureDescriptorDataEXT"); +#endif /* defined(VK_EXT_descriptor_buffer) && (defined(VK_KHR_acceleration_structure) || defined(VK_NV_ray_tracing)) */ +#if defined(VK_EXT_device_fault) + table->vkGetDeviceFaultInfoEXT = (PFN_vkGetDeviceFaultInfoEXT)load(context, "vkGetDeviceFaultInfoEXT"); +#endif /* defined(VK_EXT_device_fault) */ +#if defined(VK_EXT_device_generated_commands) + table->vkCmdExecuteGeneratedCommandsEXT = (PFN_vkCmdExecuteGeneratedCommandsEXT)load(context, "vkCmdExecuteGeneratedCommandsEXT"); + table->vkCmdPreprocessGeneratedCommandsEXT = (PFN_vkCmdPreprocessGeneratedCommandsEXT)load(context, "vkCmdPreprocessGeneratedCommandsEXT"); + table->vkCreateIndirectCommandsLayoutEXT = (PFN_vkCreateIndirectCommandsLayoutEXT)load(context, "vkCreateIndirectCommandsLayoutEXT"); + table->vkCreateIndirectExecutionSetEXT = (PFN_vkCreateIndirectExecutionSetEXT)load(context, "vkCreateIndirectExecutionSetEXT"); + table->vkDestroyIndirectCommandsLayoutEXT = (PFN_vkDestroyIndirectCommandsLayoutEXT)load(context, "vkDestroyIndirectCommandsLayoutEXT"); + table->vkDestroyIndirectExecutionSetEXT = (PFN_vkDestroyIndirectExecutionSetEXT)load(context, "vkDestroyIndirectExecutionSetEXT"); + table->vkGetGeneratedCommandsMemoryRequirementsEXT = (PFN_vkGetGeneratedCommandsMemoryRequirementsEXT)load(context, "vkGetGeneratedCommandsMemoryRequirementsEXT"); + table->vkUpdateIndirectExecutionSetPipelineEXT = (PFN_vkUpdateIndirectExecutionSetPipelineEXT)load(context, "vkUpdateIndirectExecutionSetPipelineEXT"); + table->vkUpdateIndirectExecutionSetShaderEXT = (PFN_vkUpdateIndirectExecutionSetShaderEXT)load(context, "vkUpdateIndirectExecutionSetShaderEXT"); +#endif /* defined(VK_EXT_device_generated_commands) */ #if defined(VK_EXT_discard_rectangles) table->vkCmdSetDiscardRectangleEXT = (PFN_vkCmdSetDiscardRectangleEXT)load(context, "vkCmdSetDiscardRectangleEXT"); #endif /* defined(VK_EXT_discard_rectangles) */ +#if defined(VK_EXT_discard_rectangles) && VK_EXT_DISCARD_RECTANGLES_SPEC_VERSION >= 2 + table->vkCmdSetDiscardRectangleEnableEXT = (PFN_vkCmdSetDiscardRectangleEnableEXT)load(context, "vkCmdSetDiscardRectangleEnableEXT"); + table->vkCmdSetDiscardRectangleModeEXT = (PFN_vkCmdSetDiscardRectangleModeEXT)load(context, "vkCmdSetDiscardRectangleModeEXT"); +#endif /* defined(VK_EXT_discard_rectangles) && VK_EXT_DISCARD_RECTANGLES_SPEC_VERSION >= 2 */ #if defined(VK_EXT_display_control) table->vkDisplayPowerControlEXT = (PFN_vkDisplayPowerControlEXT)load(context, "vkDisplayPowerControlEXT"); table->vkGetSwapchainCounterEXT = (PFN_vkGetSwapchainCounterEXT)load(context, "vkGetSwapchainCounterEXT"); @@ -815,15 +1655,89 @@ static void volkGenLoadDeviceTable(struct VolkDeviceTable* table, void* context, #if defined(VK_EXT_external_memory_host) table->vkGetMemoryHostPointerPropertiesEXT = (PFN_vkGetMemoryHostPointerPropertiesEXT)load(context, "vkGetMemoryHostPointerPropertiesEXT"); #endif /* defined(VK_EXT_external_memory_host) */ +#if defined(VK_EXT_full_screen_exclusive) + table->vkAcquireFullScreenExclusiveModeEXT = (PFN_vkAcquireFullScreenExclusiveModeEXT)load(context, "vkAcquireFullScreenExclusiveModeEXT"); + table->vkReleaseFullScreenExclusiveModeEXT = (PFN_vkReleaseFullScreenExclusiveModeEXT)load(context, "vkReleaseFullScreenExclusiveModeEXT"); +#endif /* defined(VK_EXT_full_screen_exclusive) */ +#if defined(VK_EXT_full_screen_exclusive) && (defined(VK_KHR_device_group) || defined(VK_VERSION_1_1)) + table->vkGetDeviceGroupSurfacePresentModes2EXT = (PFN_vkGetDeviceGroupSurfacePresentModes2EXT)load(context, "vkGetDeviceGroupSurfacePresentModes2EXT"); +#endif /* defined(VK_EXT_full_screen_exclusive) && (defined(VK_KHR_device_group) || defined(VK_VERSION_1_1)) */ #if defined(VK_EXT_hdr_metadata) table->vkSetHdrMetadataEXT = (PFN_vkSetHdrMetadataEXT)load(context, "vkSetHdrMetadataEXT"); #endif /* defined(VK_EXT_hdr_metadata) */ +#if defined(VK_EXT_host_image_copy) + table->vkCopyImageToImageEXT = (PFN_vkCopyImageToImageEXT)load(context, "vkCopyImageToImageEXT"); + table->vkCopyImageToMemoryEXT = (PFN_vkCopyImageToMemoryEXT)load(context, "vkCopyImageToMemoryEXT"); + table->vkCopyMemoryToImageEXT = (PFN_vkCopyMemoryToImageEXT)load(context, "vkCopyMemoryToImageEXT"); + table->vkTransitionImageLayoutEXT = (PFN_vkTransitionImageLayoutEXT)load(context, "vkTransitionImageLayoutEXT"); +#endif /* defined(VK_EXT_host_image_copy) */ +#if defined(VK_EXT_host_query_reset) + table->vkResetQueryPoolEXT = (PFN_vkResetQueryPoolEXT)load(context, "vkResetQueryPoolEXT"); +#endif /* defined(VK_EXT_host_query_reset) */ #if defined(VK_EXT_image_drm_format_modifier) table->vkGetImageDrmFormatModifierPropertiesEXT = (PFN_vkGetImageDrmFormatModifierPropertiesEXT)load(context, "vkGetImageDrmFormatModifierPropertiesEXT"); #endif /* defined(VK_EXT_image_drm_format_modifier) */ +#if defined(VK_EXT_line_rasterization) + table->vkCmdSetLineStippleEXT = (PFN_vkCmdSetLineStippleEXT)load(context, "vkCmdSetLineStippleEXT"); +#endif /* defined(VK_EXT_line_rasterization) */ +#if defined(VK_EXT_mesh_shader) + table->vkCmdDrawMeshTasksEXT = (PFN_vkCmdDrawMeshTasksEXT)load(context, "vkCmdDrawMeshTasksEXT"); + table->vkCmdDrawMeshTasksIndirectEXT = (PFN_vkCmdDrawMeshTasksIndirectEXT)load(context, "vkCmdDrawMeshTasksIndirectEXT"); +#endif /* defined(VK_EXT_mesh_shader) */ +#if defined(VK_EXT_mesh_shader) && (defined(VK_KHR_draw_indirect_count) || defined(VK_VERSION_1_2)) + table->vkCmdDrawMeshTasksIndirectCountEXT = (PFN_vkCmdDrawMeshTasksIndirectCountEXT)load(context, "vkCmdDrawMeshTasksIndirectCountEXT"); +#endif /* defined(VK_EXT_mesh_shader) && (defined(VK_KHR_draw_indirect_count) || defined(VK_VERSION_1_2)) */ +#if defined(VK_EXT_metal_objects) + table->vkExportMetalObjectsEXT = (PFN_vkExportMetalObjectsEXT)load(context, "vkExportMetalObjectsEXT"); +#endif /* defined(VK_EXT_metal_objects) */ +#if defined(VK_EXT_multi_draw) + table->vkCmdDrawMultiEXT = (PFN_vkCmdDrawMultiEXT)load(context, "vkCmdDrawMultiEXT"); + table->vkCmdDrawMultiIndexedEXT = (PFN_vkCmdDrawMultiIndexedEXT)load(context, "vkCmdDrawMultiIndexedEXT"); +#endif /* defined(VK_EXT_multi_draw) */ +#if defined(VK_EXT_opacity_micromap) + table->vkBuildMicromapsEXT = (PFN_vkBuildMicromapsEXT)load(context, "vkBuildMicromapsEXT"); + table->vkCmdBuildMicromapsEXT = (PFN_vkCmdBuildMicromapsEXT)load(context, "vkCmdBuildMicromapsEXT"); + table->vkCmdCopyMemoryToMicromapEXT = (PFN_vkCmdCopyMemoryToMicromapEXT)load(context, "vkCmdCopyMemoryToMicromapEXT"); + table->vkCmdCopyMicromapEXT = (PFN_vkCmdCopyMicromapEXT)load(context, "vkCmdCopyMicromapEXT"); + table->vkCmdCopyMicromapToMemoryEXT = (PFN_vkCmdCopyMicromapToMemoryEXT)load(context, "vkCmdCopyMicromapToMemoryEXT"); + table->vkCmdWriteMicromapsPropertiesEXT = (PFN_vkCmdWriteMicromapsPropertiesEXT)load(context, "vkCmdWriteMicromapsPropertiesEXT"); + table->vkCopyMemoryToMicromapEXT = (PFN_vkCopyMemoryToMicromapEXT)load(context, "vkCopyMemoryToMicromapEXT"); + table->vkCopyMicromapEXT = (PFN_vkCopyMicromapEXT)load(context, "vkCopyMicromapEXT"); + table->vkCopyMicromapToMemoryEXT = (PFN_vkCopyMicromapToMemoryEXT)load(context, "vkCopyMicromapToMemoryEXT"); + table->vkCreateMicromapEXT = (PFN_vkCreateMicromapEXT)load(context, "vkCreateMicromapEXT"); + table->vkDestroyMicromapEXT = (PFN_vkDestroyMicromapEXT)load(context, "vkDestroyMicromapEXT"); + table->vkGetDeviceMicromapCompatibilityEXT = (PFN_vkGetDeviceMicromapCompatibilityEXT)load(context, "vkGetDeviceMicromapCompatibilityEXT"); + table->vkGetMicromapBuildSizesEXT = (PFN_vkGetMicromapBuildSizesEXT)load(context, "vkGetMicromapBuildSizesEXT"); + table->vkWriteMicromapsPropertiesEXT = (PFN_vkWriteMicromapsPropertiesEXT)load(context, "vkWriteMicromapsPropertiesEXT"); +#endif /* defined(VK_EXT_opacity_micromap) */ +#if defined(VK_EXT_pageable_device_local_memory) + table->vkSetDeviceMemoryPriorityEXT = (PFN_vkSetDeviceMemoryPriorityEXT)load(context, "vkSetDeviceMemoryPriorityEXT"); +#endif /* defined(VK_EXT_pageable_device_local_memory) */ +#if defined(VK_EXT_pipeline_properties) + table->vkGetPipelinePropertiesEXT = (PFN_vkGetPipelinePropertiesEXT)load(context, "vkGetPipelinePropertiesEXT"); +#endif /* defined(VK_EXT_pipeline_properties) */ +#if defined(VK_EXT_private_data) + table->vkCreatePrivateDataSlotEXT = (PFN_vkCreatePrivateDataSlotEXT)load(context, "vkCreatePrivateDataSlotEXT"); + table->vkDestroyPrivateDataSlotEXT = (PFN_vkDestroyPrivateDataSlotEXT)load(context, "vkDestroyPrivateDataSlotEXT"); + table->vkGetPrivateDataEXT = (PFN_vkGetPrivateDataEXT)load(context, "vkGetPrivateDataEXT"); + table->vkSetPrivateDataEXT = (PFN_vkSetPrivateDataEXT)load(context, "vkSetPrivateDataEXT"); +#endif /* defined(VK_EXT_private_data) */ #if defined(VK_EXT_sample_locations) table->vkCmdSetSampleLocationsEXT = (PFN_vkCmdSetSampleLocationsEXT)load(context, "vkCmdSetSampleLocationsEXT"); #endif /* defined(VK_EXT_sample_locations) */ +#if defined(VK_EXT_shader_module_identifier) + table->vkGetShaderModuleCreateInfoIdentifierEXT = (PFN_vkGetShaderModuleCreateInfoIdentifierEXT)load(context, "vkGetShaderModuleCreateInfoIdentifierEXT"); + table->vkGetShaderModuleIdentifierEXT = (PFN_vkGetShaderModuleIdentifierEXT)load(context, "vkGetShaderModuleIdentifierEXT"); +#endif /* defined(VK_EXT_shader_module_identifier) */ +#if defined(VK_EXT_shader_object) + table->vkCmdBindShadersEXT = (PFN_vkCmdBindShadersEXT)load(context, "vkCmdBindShadersEXT"); + table->vkCreateShadersEXT = (PFN_vkCreateShadersEXT)load(context, "vkCreateShadersEXT"); + table->vkDestroyShaderEXT = (PFN_vkDestroyShaderEXT)load(context, "vkDestroyShaderEXT"); + table->vkGetShaderBinaryDataEXT = (PFN_vkGetShaderBinaryDataEXT)load(context, "vkGetShaderBinaryDataEXT"); +#endif /* defined(VK_EXT_shader_object) */ +#if defined(VK_EXT_swapchain_maintenance1) + table->vkReleaseSwapchainImagesEXT = (PFN_vkReleaseSwapchainImagesEXT)load(context, "vkReleaseSwapchainImagesEXT"); +#endif /* defined(VK_EXT_swapchain_maintenance1) */ #if defined(VK_EXT_transform_feedback) table->vkCmdBeginQueryIndexedEXT = (PFN_vkCmdBeginQueryIndexedEXT)load(context, "vkCmdBeginQueryIndexedEXT"); table->vkCmdBeginTransformFeedbackEXT = (PFN_vkCmdBeginTransformFeedbackEXT)load(context, "vkCmdBeginTransformFeedbackEXT"); @@ -838,20 +1752,100 @@ static void volkGenLoadDeviceTable(struct VolkDeviceTable* table, void* context, table->vkGetValidationCacheDataEXT = (PFN_vkGetValidationCacheDataEXT)load(context, "vkGetValidationCacheDataEXT"); table->vkMergeValidationCachesEXT = (PFN_vkMergeValidationCachesEXT)load(context, "vkMergeValidationCachesEXT"); #endif /* defined(VK_EXT_validation_cache) */ +#if defined(VK_FUCHSIA_buffer_collection) + table->vkCreateBufferCollectionFUCHSIA = (PFN_vkCreateBufferCollectionFUCHSIA)load(context, "vkCreateBufferCollectionFUCHSIA"); + table->vkDestroyBufferCollectionFUCHSIA = (PFN_vkDestroyBufferCollectionFUCHSIA)load(context, "vkDestroyBufferCollectionFUCHSIA"); + table->vkGetBufferCollectionPropertiesFUCHSIA = (PFN_vkGetBufferCollectionPropertiesFUCHSIA)load(context, "vkGetBufferCollectionPropertiesFUCHSIA"); + table->vkSetBufferCollectionBufferConstraintsFUCHSIA = (PFN_vkSetBufferCollectionBufferConstraintsFUCHSIA)load(context, "vkSetBufferCollectionBufferConstraintsFUCHSIA"); + table->vkSetBufferCollectionImageConstraintsFUCHSIA = (PFN_vkSetBufferCollectionImageConstraintsFUCHSIA)load(context, "vkSetBufferCollectionImageConstraintsFUCHSIA"); +#endif /* defined(VK_FUCHSIA_buffer_collection) */ +#if defined(VK_FUCHSIA_external_memory) + table->vkGetMemoryZirconHandleFUCHSIA = (PFN_vkGetMemoryZirconHandleFUCHSIA)load(context, "vkGetMemoryZirconHandleFUCHSIA"); + table->vkGetMemoryZirconHandlePropertiesFUCHSIA = (PFN_vkGetMemoryZirconHandlePropertiesFUCHSIA)load(context, "vkGetMemoryZirconHandlePropertiesFUCHSIA"); +#endif /* defined(VK_FUCHSIA_external_memory) */ +#if defined(VK_FUCHSIA_external_semaphore) + table->vkGetSemaphoreZirconHandleFUCHSIA = (PFN_vkGetSemaphoreZirconHandleFUCHSIA)load(context, "vkGetSemaphoreZirconHandleFUCHSIA"); + table->vkImportSemaphoreZirconHandleFUCHSIA = (PFN_vkImportSemaphoreZirconHandleFUCHSIA)load(context, "vkImportSemaphoreZirconHandleFUCHSIA"); +#endif /* defined(VK_FUCHSIA_external_semaphore) */ #if defined(VK_GOOGLE_display_timing) table->vkGetPastPresentationTimingGOOGLE = (PFN_vkGetPastPresentationTimingGOOGLE)load(context, "vkGetPastPresentationTimingGOOGLE"); table->vkGetRefreshCycleDurationGOOGLE = (PFN_vkGetRefreshCycleDurationGOOGLE)load(context, "vkGetRefreshCycleDurationGOOGLE"); #endif /* defined(VK_GOOGLE_display_timing) */ +#if defined(VK_HUAWEI_cluster_culling_shader) + table->vkCmdDrawClusterHUAWEI = (PFN_vkCmdDrawClusterHUAWEI)load(context, "vkCmdDrawClusterHUAWEI"); + table->vkCmdDrawClusterIndirectHUAWEI = (PFN_vkCmdDrawClusterIndirectHUAWEI)load(context, "vkCmdDrawClusterIndirectHUAWEI"); +#endif /* defined(VK_HUAWEI_cluster_culling_shader) */ +#if defined(VK_HUAWEI_invocation_mask) + table->vkCmdBindInvocationMaskHUAWEI = (PFN_vkCmdBindInvocationMaskHUAWEI)load(context, "vkCmdBindInvocationMaskHUAWEI"); +#endif /* defined(VK_HUAWEI_invocation_mask) */ +#if defined(VK_HUAWEI_subpass_shading) && VK_HUAWEI_SUBPASS_SHADING_SPEC_VERSION >= 2 + table->vkGetDeviceSubpassShadingMaxWorkgroupSizeHUAWEI = (PFN_vkGetDeviceSubpassShadingMaxWorkgroupSizeHUAWEI)load(context, "vkGetDeviceSubpassShadingMaxWorkgroupSizeHUAWEI"); +#endif /* defined(VK_HUAWEI_subpass_shading) && VK_HUAWEI_SUBPASS_SHADING_SPEC_VERSION >= 2 */ +#if defined(VK_HUAWEI_subpass_shading) + table->vkCmdSubpassShadingHUAWEI = (PFN_vkCmdSubpassShadingHUAWEI)load(context, "vkCmdSubpassShadingHUAWEI"); +#endif /* defined(VK_HUAWEI_subpass_shading) */ +#if defined(VK_INTEL_performance_query) + table->vkAcquirePerformanceConfigurationINTEL = (PFN_vkAcquirePerformanceConfigurationINTEL)load(context, "vkAcquirePerformanceConfigurationINTEL"); + table->vkCmdSetPerformanceMarkerINTEL = (PFN_vkCmdSetPerformanceMarkerINTEL)load(context, "vkCmdSetPerformanceMarkerINTEL"); + table->vkCmdSetPerformanceOverrideINTEL = (PFN_vkCmdSetPerformanceOverrideINTEL)load(context, "vkCmdSetPerformanceOverrideINTEL"); + table->vkCmdSetPerformanceStreamMarkerINTEL = (PFN_vkCmdSetPerformanceStreamMarkerINTEL)load(context, "vkCmdSetPerformanceStreamMarkerINTEL"); + table->vkGetPerformanceParameterINTEL = (PFN_vkGetPerformanceParameterINTEL)load(context, "vkGetPerformanceParameterINTEL"); + table->vkInitializePerformanceApiINTEL = (PFN_vkInitializePerformanceApiINTEL)load(context, "vkInitializePerformanceApiINTEL"); + table->vkQueueSetPerformanceConfigurationINTEL = (PFN_vkQueueSetPerformanceConfigurationINTEL)load(context, "vkQueueSetPerformanceConfigurationINTEL"); + table->vkReleasePerformanceConfigurationINTEL = (PFN_vkReleasePerformanceConfigurationINTEL)load(context, "vkReleasePerformanceConfigurationINTEL"); + table->vkUninitializePerformanceApiINTEL = (PFN_vkUninitializePerformanceApiINTEL)load(context, "vkUninitializePerformanceApiINTEL"); +#endif /* defined(VK_INTEL_performance_query) */ +#if defined(VK_KHR_acceleration_structure) + table->vkBuildAccelerationStructuresKHR = (PFN_vkBuildAccelerationStructuresKHR)load(context, "vkBuildAccelerationStructuresKHR"); + table->vkCmdBuildAccelerationStructuresIndirectKHR = (PFN_vkCmdBuildAccelerationStructuresIndirectKHR)load(context, "vkCmdBuildAccelerationStructuresIndirectKHR"); + table->vkCmdBuildAccelerationStructuresKHR = (PFN_vkCmdBuildAccelerationStructuresKHR)load(context, "vkCmdBuildAccelerationStructuresKHR"); + table->vkCmdCopyAccelerationStructureKHR = (PFN_vkCmdCopyAccelerationStructureKHR)load(context, "vkCmdCopyAccelerationStructureKHR"); + table->vkCmdCopyAccelerationStructureToMemoryKHR = (PFN_vkCmdCopyAccelerationStructureToMemoryKHR)load(context, "vkCmdCopyAccelerationStructureToMemoryKHR"); + table->vkCmdCopyMemoryToAccelerationStructureKHR = (PFN_vkCmdCopyMemoryToAccelerationStructureKHR)load(context, "vkCmdCopyMemoryToAccelerationStructureKHR"); + table->vkCmdWriteAccelerationStructuresPropertiesKHR = (PFN_vkCmdWriteAccelerationStructuresPropertiesKHR)load(context, "vkCmdWriteAccelerationStructuresPropertiesKHR"); + table->vkCopyAccelerationStructureKHR = (PFN_vkCopyAccelerationStructureKHR)load(context, "vkCopyAccelerationStructureKHR"); + table->vkCopyAccelerationStructureToMemoryKHR = (PFN_vkCopyAccelerationStructureToMemoryKHR)load(context, "vkCopyAccelerationStructureToMemoryKHR"); + table->vkCopyMemoryToAccelerationStructureKHR = (PFN_vkCopyMemoryToAccelerationStructureKHR)load(context, "vkCopyMemoryToAccelerationStructureKHR"); + table->vkCreateAccelerationStructureKHR = (PFN_vkCreateAccelerationStructureKHR)load(context, "vkCreateAccelerationStructureKHR"); + table->vkDestroyAccelerationStructureKHR = (PFN_vkDestroyAccelerationStructureKHR)load(context, "vkDestroyAccelerationStructureKHR"); + table->vkGetAccelerationStructureBuildSizesKHR = (PFN_vkGetAccelerationStructureBuildSizesKHR)load(context, "vkGetAccelerationStructureBuildSizesKHR"); + table->vkGetAccelerationStructureDeviceAddressKHR = (PFN_vkGetAccelerationStructureDeviceAddressKHR)load(context, "vkGetAccelerationStructureDeviceAddressKHR"); + table->vkGetDeviceAccelerationStructureCompatibilityKHR = (PFN_vkGetDeviceAccelerationStructureCompatibilityKHR)load(context, "vkGetDeviceAccelerationStructureCompatibilityKHR"); + table->vkWriteAccelerationStructuresPropertiesKHR = (PFN_vkWriteAccelerationStructuresPropertiesKHR)load(context, "vkWriteAccelerationStructuresPropertiesKHR"); +#endif /* defined(VK_KHR_acceleration_structure) */ #if defined(VK_KHR_bind_memory2) table->vkBindBufferMemory2KHR = (PFN_vkBindBufferMemory2KHR)load(context, "vkBindBufferMemory2KHR"); table->vkBindImageMemory2KHR = (PFN_vkBindImageMemory2KHR)load(context, "vkBindImageMemory2KHR"); #endif /* defined(VK_KHR_bind_memory2) */ +#if defined(VK_KHR_buffer_device_address) + table->vkGetBufferDeviceAddressKHR = (PFN_vkGetBufferDeviceAddressKHR)load(context, "vkGetBufferDeviceAddressKHR"); + table->vkGetBufferOpaqueCaptureAddressKHR = (PFN_vkGetBufferOpaqueCaptureAddressKHR)load(context, "vkGetBufferOpaqueCaptureAddressKHR"); + table->vkGetDeviceMemoryOpaqueCaptureAddressKHR = (PFN_vkGetDeviceMemoryOpaqueCaptureAddressKHR)load(context, "vkGetDeviceMemoryOpaqueCaptureAddressKHR"); +#endif /* defined(VK_KHR_buffer_device_address) */ +#if defined(VK_KHR_calibrated_timestamps) + table->vkGetCalibratedTimestampsKHR = (PFN_vkGetCalibratedTimestampsKHR)load(context, "vkGetCalibratedTimestampsKHR"); +#endif /* defined(VK_KHR_calibrated_timestamps) */ +#if defined(VK_KHR_copy_commands2) + table->vkCmdBlitImage2KHR = (PFN_vkCmdBlitImage2KHR)load(context, "vkCmdBlitImage2KHR"); + table->vkCmdCopyBuffer2KHR = (PFN_vkCmdCopyBuffer2KHR)load(context, "vkCmdCopyBuffer2KHR"); + table->vkCmdCopyBufferToImage2KHR = (PFN_vkCmdCopyBufferToImage2KHR)load(context, "vkCmdCopyBufferToImage2KHR"); + table->vkCmdCopyImage2KHR = (PFN_vkCmdCopyImage2KHR)load(context, "vkCmdCopyImage2KHR"); + table->vkCmdCopyImageToBuffer2KHR = (PFN_vkCmdCopyImageToBuffer2KHR)load(context, "vkCmdCopyImageToBuffer2KHR"); + table->vkCmdResolveImage2KHR = (PFN_vkCmdResolveImage2KHR)load(context, "vkCmdResolveImage2KHR"); +#endif /* defined(VK_KHR_copy_commands2) */ #if defined(VK_KHR_create_renderpass2) table->vkCmdBeginRenderPass2KHR = (PFN_vkCmdBeginRenderPass2KHR)load(context, "vkCmdBeginRenderPass2KHR"); table->vkCmdEndRenderPass2KHR = (PFN_vkCmdEndRenderPass2KHR)load(context, "vkCmdEndRenderPass2KHR"); table->vkCmdNextSubpass2KHR = (PFN_vkCmdNextSubpass2KHR)load(context, "vkCmdNextSubpass2KHR"); table->vkCreateRenderPass2KHR = (PFN_vkCreateRenderPass2KHR)load(context, "vkCreateRenderPass2KHR"); #endif /* defined(VK_KHR_create_renderpass2) */ +#if defined(VK_KHR_deferred_host_operations) + table->vkCreateDeferredOperationKHR = (PFN_vkCreateDeferredOperationKHR)load(context, "vkCreateDeferredOperationKHR"); + table->vkDeferredOperationJoinKHR = (PFN_vkDeferredOperationJoinKHR)load(context, "vkDeferredOperationJoinKHR"); + table->vkDestroyDeferredOperationKHR = (PFN_vkDestroyDeferredOperationKHR)load(context, "vkDestroyDeferredOperationKHR"); + table->vkGetDeferredOperationMaxConcurrencyKHR = (PFN_vkGetDeferredOperationMaxConcurrencyKHR)load(context, "vkGetDeferredOperationMaxConcurrencyKHR"); + table->vkGetDeferredOperationResultKHR = (PFN_vkGetDeferredOperationResultKHR)load(context, "vkGetDeferredOperationResultKHR"); +#endif /* defined(VK_KHR_deferred_host_operations) */ #if defined(VK_KHR_descriptor_update_template) table->vkCreateDescriptorUpdateTemplateKHR = (PFN_vkCreateDescriptorUpdateTemplateKHR)load(context, "vkCreateDescriptorUpdateTemplateKHR"); table->vkDestroyDescriptorUpdateTemplateKHR = (PFN_vkDestroyDescriptorUpdateTemplateKHR)load(context, "vkDestroyDescriptorUpdateTemplateKHR"); @@ -869,6 +1863,14 @@ static void volkGenLoadDeviceTable(struct VolkDeviceTable* table, void* context, table->vkCmdDrawIndexedIndirectCountKHR = (PFN_vkCmdDrawIndexedIndirectCountKHR)load(context, "vkCmdDrawIndexedIndirectCountKHR"); table->vkCmdDrawIndirectCountKHR = (PFN_vkCmdDrawIndirectCountKHR)load(context, "vkCmdDrawIndirectCountKHR"); #endif /* defined(VK_KHR_draw_indirect_count) */ +#if defined(VK_KHR_dynamic_rendering) + table->vkCmdBeginRenderingKHR = (PFN_vkCmdBeginRenderingKHR)load(context, "vkCmdBeginRenderingKHR"); + table->vkCmdEndRenderingKHR = (PFN_vkCmdEndRenderingKHR)load(context, "vkCmdEndRenderingKHR"); +#endif /* defined(VK_KHR_dynamic_rendering) */ +#if defined(VK_KHR_dynamic_rendering_local_read) + table->vkCmdSetRenderingAttachmentLocationsKHR = (PFN_vkCmdSetRenderingAttachmentLocationsKHR)load(context, "vkCmdSetRenderingAttachmentLocationsKHR"); + table->vkCmdSetRenderingInputAttachmentIndicesKHR = (PFN_vkCmdSetRenderingInputAttachmentIndicesKHR)load(context, "vkCmdSetRenderingInputAttachmentIndicesKHR"); +#endif /* defined(VK_KHR_dynamic_rendering_local_read) */ #if defined(VK_KHR_external_fence_fd) table->vkGetFenceFdKHR = (PFN_vkGetFenceFdKHR)load(context, "vkGetFenceFdKHR"); table->vkImportFenceFdKHR = (PFN_vkImportFenceFdKHR)load(context, "vkImportFenceFdKHR"); @@ -893,20 +1895,84 @@ static void volkGenLoadDeviceTable(struct VolkDeviceTable* table, void* context, table->vkGetSemaphoreWin32HandleKHR = (PFN_vkGetSemaphoreWin32HandleKHR)load(context, "vkGetSemaphoreWin32HandleKHR"); table->vkImportSemaphoreWin32HandleKHR = (PFN_vkImportSemaphoreWin32HandleKHR)load(context, "vkImportSemaphoreWin32HandleKHR"); #endif /* defined(VK_KHR_external_semaphore_win32) */ +#if defined(VK_KHR_fragment_shading_rate) + table->vkCmdSetFragmentShadingRateKHR = (PFN_vkCmdSetFragmentShadingRateKHR)load(context, "vkCmdSetFragmentShadingRateKHR"); +#endif /* defined(VK_KHR_fragment_shading_rate) */ #if defined(VK_KHR_get_memory_requirements2) table->vkGetBufferMemoryRequirements2KHR = (PFN_vkGetBufferMemoryRequirements2KHR)load(context, "vkGetBufferMemoryRequirements2KHR"); table->vkGetImageMemoryRequirements2KHR = (PFN_vkGetImageMemoryRequirements2KHR)load(context, "vkGetImageMemoryRequirements2KHR"); table->vkGetImageSparseMemoryRequirements2KHR = (PFN_vkGetImageSparseMemoryRequirements2KHR)load(context, "vkGetImageSparseMemoryRequirements2KHR"); #endif /* defined(VK_KHR_get_memory_requirements2) */ +#if defined(VK_KHR_line_rasterization) + table->vkCmdSetLineStippleKHR = (PFN_vkCmdSetLineStippleKHR)load(context, "vkCmdSetLineStippleKHR"); +#endif /* defined(VK_KHR_line_rasterization) */ #if defined(VK_KHR_maintenance1) table->vkTrimCommandPoolKHR = (PFN_vkTrimCommandPoolKHR)load(context, "vkTrimCommandPoolKHR"); #endif /* defined(VK_KHR_maintenance1) */ #if defined(VK_KHR_maintenance3) table->vkGetDescriptorSetLayoutSupportKHR = (PFN_vkGetDescriptorSetLayoutSupportKHR)load(context, "vkGetDescriptorSetLayoutSupportKHR"); #endif /* defined(VK_KHR_maintenance3) */ +#if defined(VK_KHR_maintenance4) + table->vkGetDeviceBufferMemoryRequirementsKHR = (PFN_vkGetDeviceBufferMemoryRequirementsKHR)load(context, "vkGetDeviceBufferMemoryRequirementsKHR"); + table->vkGetDeviceImageMemoryRequirementsKHR = (PFN_vkGetDeviceImageMemoryRequirementsKHR)load(context, "vkGetDeviceImageMemoryRequirementsKHR"); + table->vkGetDeviceImageSparseMemoryRequirementsKHR = (PFN_vkGetDeviceImageSparseMemoryRequirementsKHR)load(context, "vkGetDeviceImageSparseMemoryRequirementsKHR"); +#endif /* defined(VK_KHR_maintenance4) */ +#if defined(VK_KHR_maintenance5) + table->vkCmdBindIndexBuffer2KHR = (PFN_vkCmdBindIndexBuffer2KHR)load(context, "vkCmdBindIndexBuffer2KHR"); + table->vkGetDeviceImageSubresourceLayoutKHR = (PFN_vkGetDeviceImageSubresourceLayoutKHR)load(context, "vkGetDeviceImageSubresourceLayoutKHR"); + table->vkGetImageSubresourceLayout2KHR = (PFN_vkGetImageSubresourceLayout2KHR)load(context, "vkGetImageSubresourceLayout2KHR"); + table->vkGetRenderingAreaGranularityKHR = (PFN_vkGetRenderingAreaGranularityKHR)load(context, "vkGetRenderingAreaGranularityKHR"); +#endif /* defined(VK_KHR_maintenance5) */ +#if defined(VK_KHR_maintenance6) + table->vkCmdBindDescriptorSets2KHR = (PFN_vkCmdBindDescriptorSets2KHR)load(context, "vkCmdBindDescriptorSets2KHR"); + table->vkCmdPushConstants2KHR = (PFN_vkCmdPushConstants2KHR)load(context, "vkCmdPushConstants2KHR"); +#endif /* defined(VK_KHR_maintenance6) */ +#if defined(VK_KHR_maintenance6) && defined(VK_KHR_push_descriptor) + table->vkCmdPushDescriptorSet2KHR = (PFN_vkCmdPushDescriptorSet2KHR)load(context, "vkCmdPushDescriptorSet2KHR"); + table->vkCmdPushDescriptorSetWithTemplate2KHR = (PFN_vkCmdPushDescriptorSetWithTemplate2KHR)load(context, "vkCmdPushDescriptorSetWithTemplate2KHR"); +#endif /* defined(VK_KHR_maintenance6) && defined(VK_KHR_push_descriptor) */ +#if defined(VK_KHR_maintenance6) && defined(VK_EXT_descriptor_buffer) + table->vkCmdBindDescriptorBufferEmbeddedSamplers2EXT = (PFN_vkCmdBindDescriptorBufferEmbeddedSamplers2EXT)load(context, "vkCmdBindDescriptorBufferEmbeddedSamplers2EXT"); + table->vkCmdSetDescriptorBufferOffsets2EXT = (PFN_vkCmdSetDescriptorBufferOffsets2EXT)load(context, "vkCmdSetDescriptorBufferOffsets2EXT"); +#endif /* defined(VK_KHR_maintenance6) && defined(VK_EXT_descriptor_buffer) */ +#if defined(VK_KHR_map_memory2) + table->vkMapMemory2KHR = (PFN_vkMapMemory2KHR)load(context, "vkMapMemory2KHR"); + table->vkUnmapMemory2KHR = (PFN_vkUnmapMemory2KHR)load(context, "vkUnmapMemory2KHR"); +#endif /* defined(VK_KHR_map_memory2) */ +#if defined(VK_KHR_performance_query) + table->vkAcquireProfilingLockKHR = (PFN_vkAcquireProfilingLockKHR)load(context, "vkAcquireProfilingLockKHR"); + table->vkReleaseProfilingLockKHR = (PFN_vkReleaseProfilingLockKHR)load(context, "vkReleaseProfilingLockKHR"); +#endif /* defined(VK_KHR_performance_query) */ +#if defined(VK_KHR_pipeline_binary) + table->vkCreatePipelineBinariesKHR = (PFN_vkCreatePipelineBinariesKHR)load(context, "vkCreatePipelineBinariesKHR"); + table->vkDestroyPipelineBinaryKHR = (PFN_vkDestroyPipelineBinaryKHR)load(context, "vkDestroyPipelineBinaryKHR"); + table->vkGetPipelineBinaryDataKHR = (PFN_vkGetPipelineBinaryDataKHR)load(context, "vkGetPipelineBinaryDataKHR"); + table->vkGetPipelineKeyKHR = (PFN_vkGetPipelineKeyKHR)load(context, "vkGetPipelineKeyKHR"); + table->vkReleaseCapturedPipelineDataKHR = (PFN_vkReleaseCapturedPipelineDataKHR)load(context, "vkReleaseCapturedPipelineDataKHR"); +#endif /* defined(VK_KHR_pipeline_binary) */ +#if defined(VK_KHR_pipeline_executable_properties) + table->vkGetPipelineExecutableInternalRepresentationsKHR = (PFN_vkGetPipelineExecutableInternalRepresentationsKHR)load(context, "vkGetPipelineExecutableInternalRepresentationsKHR"); + table->vkGetPipelineExecutablePropertiesKHR = (PFN_vkGetPipelineExecutablePropertiesKHR)load(context, "vkGetPipelineExecutablePropertiesKHR"); + table->vkGetPipelineExecutableStatisticsKHR = (PFN_vkGetPipelineExecutableStatisticsKHR)load(context, "vkGetPipelineExecutableStatisticsKHR"); +#endif /* defined(VK_KHR_pipeline_executable_properties) */ +#if defined(VK_KHR_present_wait) + table->vkWaitForPresentKHR = (PFN_vkWaitForPresentKHR)load(context, "vkWaitForPresentKHR"); +#endif /* defined(VK_KHR_present_wait) */ #if defined(VK_KHR_push_descriptor) table->vkCmdPushDescriptorSetKHR = (PFN_vkCmdPushDescriptorSetKHR)load(context, "vkCmdPushDescriptorSetKHR"); #endif /* defined(VK_KHR_push_descriptor) */ +#if defined(VK_KHR_ray_tracing_maintenance1) && defined(VK_KHR_ray_tracing_pipeline) + table->vkCmdTraceRaysIndirect2KHR = (PFN_vkCmdTraceRaysIndirect2KHR)load(context, "vkCmdTraceRaysIndirect2KHR"); +#endif /* defined(VK_KHR_ray_tracing_maintenance1) && defined(VK_KHR_ray_tracing_pipeline) */ +#if defined(VK_KHR_ray_tracing_pipeline) + table->vkCmdSetRayTracingPipelineStackSizeKHR = (PFN_vkCmdSetRayTracingPipelineStackSizeKHR)load(context, "vkCmdSetRayTracingPipelineStackSizeKHR"); + table->vkCmdTraceRaysIndirectKHR = (PFN_vkCmdTraceRaysIndirectKHR)load(context, "vkCmdTraceRaysIndirectKHR"); + table->vkCmdTraceRaysKHR = (PFN_vkCmdTraceRaysKHR)load(context, "vkCmdTraceRaysKHR"); + table->vkCreateRayTracingPipelinesKHR = (PFN_vkCreateRayTracingPipelinesKHR)load(context, "vkCreateRayTracingPipelinesKHR"); + table->vkGetRayTracingCaptureReplayShaderGroupHandlesKHR = (PFN_vkGetRayTracingCaptureReplayShaderGroupHandlesKHR)load(context, "vkGetRayTracingCaptureReplayShaderGroupHandlesKHR"); + table->vkGetRayTracingShaderGroupHandlesKHR = (PFN_vkGetRayTracingShaderGroupHandlesKHR)load(context, "vkGetRayTracingShaderGroupHandlesKHR"); + table->vkGetRayTracingShaderGroupStackSizeKHR = (PFN_vkGetRayTracingShaderGroupStackSizeKHR)load(context, "vkGetRayTracingShaderGroupStackSizeKHR"); +#endif /* defined(VK_KHR_ray_tracing_pipeline) */ #if defined(VK_KHR_sampler_ycbcr_conversion) table->vkCreateSamplerYcbcrConversionKHR = (PFN_vkCreateSamplerYcbcrConversionKHR)load(context, "vkCreateSamplerYcbcrConversionKHR"); table->vkDestroySamplerYcbcrConversionKHR = (PFN_vkDestroySamplerYcbcrConversionKHR)load(context, "vkDestroySamplerYcbcrConversionKHR"); @@ -921,31 +1987,122 @@ static void volkGenLoadDeviceTable(struct VolkDeviceTable* table, void* context, table->vkGetSwapchainImagesKHR = (PFN_vkGetSwapchainImagesKHR)load(context, "vkGetSwapchainImagesKHR"); table->vkQueuePresentKHR = (PFN_vkQueuePresentKHR)load(context, "vkQueuePresentKHR"); #endif /* defined(VK_KHR_swapchain) */ -#if defined(VK_NVX_device_generated_commands) - table->vkCmdProcessCommandsNVX = (PFN_vkCmdProcessCommandsNVX)load(context, "vkCmdProcessCommandsNVX"); - table->vkCmdReserveSpaceForCommandsNVX = (PFN_vkCmdReserveSpaceForCommandsNVX)load(context, "vkCmdReserveSpaceForCommandsNVX"); - table->vkCreateIndirectCommandsLayoutNVX = (PFN_vkCreateIndirectCommandsLayoutNVX)load(context, "vkCreateIndirectCommandsLayoutNVX"); - table->vkCreateObjectTableNVX = (PFN_vkCreateObjectTableNVX)load(context, "vkCreateObjectTableNVX"); - table->vkDestroyIndirectCommandsLayoutNVX = (PFN_vkDestroyIndirectCommandsLayoutNVX)load(context, "vkDestroyIndirectCommandsLayoutNVX"); - table->vkDestroyObjectTableNVX = (PFN_vkDestroyObjectTableNVX)load(context, "vkDestroyObjectTableNVX"); - table->vkRegisterObjectsNVX = (PFN_vkRegisterObjectsNVX)load(context, "vkRegisterObjectsNVX"); - table->vkUnregisterObjectsNVX = (PFN_vkUnregisterObjectsNVX)load(context, "vkUnregisterObjectsNVX"); -#endif /* defined(VK_NVX_device_generated_commands) */ +#if defined(VK_KHR_synchronization2) + table->vkCmdPipelineBarrier2KHR = (PFN_vkCmdPipelineBarrier2KHR)load(context, "vkCmdPipelineBarrier2KHR"); + table->vkCmdResetEvent2KHR = (PFN_vkCmdResetEvent2KHR)load(context, "vkCmdResetEvent2KHR"); + table->vkCmdSetEvent2KHR = (PFN_vkCmdSetEvent2KHR)load(context, "vkCmdSetEvent2KHR"); + table->vkCmdWaitEvents2KHR = (PFN_vkCmdWaitEvents2KHR)load(context, "vkCmdWaitEvents2KHR"); + table->vkCmdWriteTimestamp2KHR = (PFN_vkCmdWriteTimestamp2KHR)load(context, "vkCmdWriteTimestamp2KHR"); + table->vkQueueSubmit2KHR = (PFN_vkQueueSubmit2KHR)load(context, "vkQueueSubmit2KHR"); +#endif /* defined(VK_KHR_synchronization2) */ +#if defined(VK_KHR_timeline_semaphore) + table->vkGetSemaphoreCounterValueKHR = (PFN_vkGetSemaphoreCounterValueKHR)load(context, "vkGetSemaphoreCounterValueKHR"); + table->vkSignalSemaphoreKHR = (PFN_vkSignalSemaphoreKHR)load(context, "vkSignalSemaphoreKHR"); + table->vkWaitSemaphoresKHR = (PFN_vkWaitSemaphoresKHR)load(context, "vkWaitSemaphoresKHR"); +#endif /* defined(VK_KHR_timeline_semaphore) */ +#if defined(VK_KHR_video_decode_queue) + table->vkCmdDecodeVideoKHR = (PFN_vkCmdDecodeVideoKHR)load(context, "vkCmdDecodeVideoKHR"); +#endif /* defined(VK_KHR_video_decode_queue) */ +#if defined(VK_KHR_video_encode_queue) + table->vkCmdEncodeVideoKHR = (PFN_vkCmdEncodeVideoKHR)load(context, "vkCmdEncodeVideoKHR"); + table->vkGetEncodedVideoSessionParametersKHR = (PFN_vkGetEncodedVideoSessionParametersKHR)load(context, "vkGetEncodedVideoSessionParametersKHR"); +#endif /* defined(VK_KHR_video_encode_queue) */ +#if defined(VK_KHR_video_queue) + table->vkBindVideoSessionMemoryKHR = (PFN_vkBindVideoSessionMemoryKHR)load(context, "vkBindVideoSessionMemoryKHR"); + table->vkCmdBeginVideoCodingKHR = (PFN_vkCmdBeginVideoCodingKHR)load(context, "vkCmdBeginVideoCodingKHR"); + table->vkCmdControlVideoCodingKHR = (PFN_vkCmdControlVideoCodingKHR)load(context, "vkCmdControlVideoCodingKHR"); + table->vkCmdEndVideoCodingKHR = (PFN_vkCmdEndVideoCodingKHR)load(context, "vkCmdEndVideoCodingKHR"); + table->vkCreateVideoSessionKHR = (PFN_vkCreateVideoSessionKHR)load(context, "vkCreateVideoSessionKHR"); + table->vkCreateVideoSessionParametersKHR = (PFN_vkCreateVideoSessionParametersKHR)load(context, "vkCreateVideoSessionParametersKHR"); + table->vkDestroyVideoSessionKHR = (PFN_vkDestroyVideoSessionKHR)load(context, "vkDestroyVideoSessionKHR"); + table->vkDestroyVideoSessionParametersKHR = (PFN_vkDestroyVideoSessionParametersKHR)load(context, "vkDestroyVideoSessionParametersKHR"); + table->vkGetVideoSessionMemoryRequirementsKHR = (PFN_vkGetVideoSessionMemoryRequirementsKHR)load(context, "vkGetVideoSessionMemoryRequirementsKHR"); + table->vkUpdateVideoSessionParametersKHR = (PFN_vkUpdateVideoSessionParametersKHR)load(context, "vkUpdateVideoSessionParametersKHR"); +#endif /* defined(VK_KHR_video_queue) */ +#if defined(VK_NVX_binary_import) + table->vkCmdCuLaunchKernelNVX = (PFN_vkCmdCuLaunchKernelNVX)load(context, "vkCmdCuLaunchKernelNVX"); + table->vkCreateCuFunctionNVX = (PFN_vkCreateCuFunctionNVX)load(context, "vkCreateCuFunctionNVX"); + table->vkCreateCuModuleNVX = (PFN_vkCreateCuModuleNVX)load(context, "vkCreateCuModuleNVX"); + table->vkDestroyCuFunctionNVX = (PFN_vkDestroyCuFunctionNVX)load(context, "vkDestroyCuFunctionNVX"); + table->vkDestroyCuModuleNVX = (PFN_vkDestroyCuModuleNVX)load(context, "vkDestroyCuModuleNVX"); +#endif /* defined(VK_NVX_binary_import) */ +#if defined(VK_NVX_image_view_handle) + table->vkGetImageViewHandleNVX = (PFN_vkGetImageViewHandleNVX)load(context, "vkGetImageViewHandleNVX"); +#endif /* defined(VK_NVX_image_view_handle) */ +#if defined(VK_NVX_image_view_handle) && VK_NVX_IMAGE_VIEW_HANDLE_SPEC_VERSION >= 3 + table->vkGetImageViewHandle64NVX = (PFN_vkGetImageViewHandle64NVX)load(context, "vkGetImageViewHandle64NVX"); +#endif /* defined(VK_NVX_image_view_handle) && VK_NVX_IMAGE_VIEW_HANDLE_SPEC_VERSION >= 3 */ +#if defined(VK_NVX_image_view_handle) && VK_NVX_IMAGE_VIEW_HANDLE_SPEC_VERSION >= 2 + table->vkGetImageViewAddressNVX = (PFN_vkGetImageViewAddressNVX)load(context, "vkGetImageViewAddressNVX"); +#endif /* defined(VK_NVX_image_view_handle) && VK_NVX_IMAGE_VIEW_HANDLE_SPEC_VERSION >= 2 */ #if defined(VK_NV_clip_space_w_scaling) table->vkCmdSetViewportWScalingNV = (PFN_vkCmdSetViewportWScalingNV)load(context, "vkCmdSetViewportWScalingNV"); #endif /* defined(VK_NV_clip_space_w_scaling) */ +#if defined(VK_NV_copy_memory_indirect) + table->vkCmdCopyMemoryIndirectNV = (PFN_vkCmdCopyMemoryIndirectNV)load(context, "vkCmdCopyMemoryIndirectNV"); + table->vkCmdCopyMemoryToImageIndirectNV = (PFN_vkCmdCopyMemoryToImageIndirectNV)load(context, "vkCmdCopyMemoryToImageIndirectNV"); +#endif /* defined(VK_NV_copy_memory_indirect) */ +#if defined(VK_NV_cuda_kernel_launch) + table->vkCmdCudaLaunchKernelNV = (PFN_vkCmdCudaLaunchKernelNV)load(context, "vkCmdCudaLaunchKernelNV"); + table->vkCreateCudaFunctionNV = (PFN_vkCreateCudaFunctionNV)load(context, "vkCreateCudaFunctionNV"); + table->vkCreateCudaModuleNV = (PFN_vkCreateCudaModuleNV)load(context, "vkCreateCudaModuleNV"); + table->vkDestroyCudaFunctionNV = (PFN_vkDestroyCudaFunctionNV)load(context, "vkDestroyCudaFunctionNV"); + table->vkDestroyCudaModuleNV = (PFN_vkDestroyCudaModuleNV)load(context, "vkDestroyCudaModuleNV"); + table->vkGetCudaModuleCacheNV = (PFN_vkGetCudaModuleCacheNV)load(context, "vkGetCudaModuleCacheNV"); +#endif /* defined(VK_NV_cuda_kernel_launch) */ #if defined(VK_NV_device_diagnostic_checkpoints) table->vkCmdSetCheckpointNV = (PFN_vkCmdSetCheckpointNV)load(context, "vkCmdSetCheckpointNV"); table->vkGetQueueCheckpointDataNV = (PFN_vkGetQueueCheckpointDataNV)load(context, "vkGetQueueCheckpointDataNV"); #endif /* defined(VK_NV_device_diagnostic_checkpoints) */ +#if defined(VK_NV_device_diagnostic_checkpoints) && (defined(VK_VERSION_1_3) || defined(VK_KHR_synchronization2)) + table->vkGetQueueCheckpointData2NV = (PFN_vkGetQueueCheckpointData2NV)load(context, "vkGetQueueCheckpointData2NV"); +#endif /* defined(VK_NV_device_diagnostic_checkpoints) && (defined(VK_VERSION_1_3) || defined(VK_KHR_synchronization2)) */ +#if defined(VK_NV_device_generated_commands) + table->vkCmdBindPipelineShaderGroupNV = (PFN_vkCmdBindPipelineShaderGroupNV)load(context, "vkCmdBindPipelineShaderGroupNV"); + table->vkCmdExecuteGeneratedCommandsNV = (PFN_vkCmdExecuteGeneratedCommandsNV)load(context, "vkCmdExecuteGeneratedCommandsNV"); + table->vkCmdPreprocessGeneratedCommandsNV = (PFN_vkCmdPreprocessGeneratedCommandsNV)load(context, "vkCmdPreprocessGeneratedCommandsNV"); + table->vkCreateIndirectCommandsLayoutNV = (PFN_vkCreateIndirectCommandsLayoutNV)load(context, "vkCreateIndirectCommandsLayoutNV"); + table->vkDestroyIndirectCommandsLayoutNV = (PFN_vkDestroyIndirectCommandsLayoutNV)load(context, "vkDestroyIndirectCommandsLayoutNV"); + table->vkGetGeneratedCommandsMemoryRequirementsNV = (PFN_vkGetGeneratedCommandsMemoryRequirementsNV)load(context, "vkGetGeneratedCommandsMemoryRequirementsNV"); +#endif /* defined(VK_NV_device_generated_commands) */ +#if defined(VK_NV_device_generated_commands_compute) + table->vkCmdUpdatePipelineIndirectBufferNV = (PFN_vkCmdUpdatePipelineIndirectBufferNV)load(context, "vkCmdUpdatePipelineIndirectBufferNV"); + table->vkGetPipelineIndirectDeviceAddressNV = (PFN_vkGetPipelineIndirectDeviceAddressNV)load(context, "vkGetPipelineIndirectDeviceAddressNV"); + table->vkGetPipelineIndirectMemoryRequirementsNV = (PFN_vkGetPipelineIndirectMemoryRequirementsNV)load(context, "vkGetPipelineIndirectMemoryRequirementsNV"); +#endif /* defined(VK_NV_device_generated_commands_compute) */ +#if defined(VK_NV_external_memory_rdma) + table->vkGetMemoryRemoteAddressNV = (PFN_vkGetMemoryRemoteAddressNV)load(context, "vkGetMemoryRemoteAddressNV"); +#endif /* defined(VK_NV_external_memory_rdma) */ #if defined(VK_NV_external_memory_win32) table->vkGetMemoryWin32HandleNV = (PFN_vkGetMemoryWin32HandleNV)load(context, "vkGetMemoryWin32HandleNV"); #endif /* defined(VK_NV_external_memory_win32) */ +#if defined(VK_NV_fragment_shading_rate_enums) + table->vkCmdSetFragmentShadingRateEnumNV = (PFN_vkCmdSetFragmentShadingRateEnumNV)load(context, "vkCmdSetFragmentShadingRateEnumNV"); +#endif /* defined(VK_NV_fragment_shading_rate_enums) */ +#if defined(VK_NV_low_latency2) + table->vkGetLatencyTimingsNV = (PFN_vkGetLatencyTimingsNV)load(context, "vkGetLatencyTimingsNV"); + table->vkLatencySleepNV = (PFN_vkLatencySleepNV)load(context, "vkLatencySleepNV"); + table->vkQueueNotifyOutOfBandNV = (PFN_vkQueueNotifyOutOfBandNV)load(context, "vkQueueNotifyOutOfBandNV"); + table->vkSetLatencyMarkerNV = (PFN_vkSetLatencyMarkerNV)load(context, "vkSetLatencyMarkerNV"); + table->vkSetLatencySleepModeNV = (PFN_vkSetLatencySleepModeNV)load(context, "vkSetLatencySleepModeNV"); +#endif /* defined(VK_NV_low_latency2) */ +#if defined(VK_NV_memory_decompression) + table->vkCmdDecompressMemoryIndirectCountNV = (PFN_vkCmdDecompressMemoryIndirectCountNV)load(context, "vkCmdDecompressMemoryIndirectCountNV"); + table->vkCmdDecompressMemoryNV = (PFN_vkCmdDecompressMemoryNV)load(context, "vkCmdDecompressMemoryNV"); +#endif /* defined(VK_NV_memory_decompression) */ #if defined(VK_NV_mesh_shader) - table->vkCmdDrawMeshTasksIndirectCountNV = (PFN_vkCmdDrawMeshTasksIndirectCountNV)load(context, "vkCmdDrawMeshTasksIndirectCountNV"); table->vkCmdDrawMeshTasksIndirectNV = (PFN_vkCmdDrawMeshTasksIndirectNV)load(context, "vkCmdDrawMeshTasksIndirectNV"); table->vkCmdDrawMeshTasksNV = (PFN_vkCmdDrawMeshTasksNV)load(context, "vkCmdDrawMeshTasksNV"); #endif /* defined(VK_NV_mesh_shader) */ +#if defined(VK_NV_mesh_shader) && (defined(VK_KHR_draw_indirect_count) || defined(VK_VERSION_1_2)) + table->vkCmdDrawMeshTasksIndirectCountNV = (PFN_vkCmdDrawMeshTasksIndirectCountNV)load(context, "vkCmdDrawMeshTasksIndirectCountNV"); +#endif /* defined(VK_NV_mesh_shader) && (defined(VK_KHR_draw_indirect_count) || defined(VK_VERSION_1_2)) */ +#if defined(VK_NV_optical_flow) + table->vkBindOpticalFlowSessionImageNV = (PFN_vkBindOpticalFlowSessionImageNV)load(context, "vkBindOpticalFlowSessionImageNV"); + table->vkCmdOpticalFlowExecuteNV = (PFN_vkCmdOpticalFlowExecuteNV)load(context, "vkCmdOpticalFlowExecuteNV"); + table->vkCreateOpticalFlowSessionNV = (PFN_vkCreateOpticalFlowSessionNV)load(context, "vkCreateOpticalFlowSessionNV"); + table->vkDestroyOpticalFlowSessionNV = (PFN_vkDestroyOpticalFlowSessionNV)load(context, "vkDestroyOpticalFlowSessionNV"); +#endif /* defined(VK_NV_optical_flow) */ #if defined(VK_NV_ray_tracing) table->vkBindAccelerationStructureMemoryNV = (PFN_vkBindAccelerationStructureMemoryNV)load(context, "vkBindAccelerationStructureMemoryNV"); table->vkCmdBuildAccelerationStructureNV = (PFN_vkCmdBuildAccelerationStructureNV)load(context, "vkCmdBuildAccelerationStructureNV"); @@ -960,6 +2117,9 @@ static void volkGenLoadDeviceTable(struct VolkDeviceTable* table, void* context, table->vkGetAccelerationStructureMemoryRequirementsNV = (PFN_vkGetAccelerationStructureMemoryRequirementsNV)load(context, "vkGetAccelerationStructureMemoryRequirementsNV"); table->vkGetRayTracingShaderGroupHandlesNV = (PFN_vkGetRayTracingShaderGroupHandlesNV)load(context, "vkGetRayTracingShaderGroupHandlesNV"); #endif /* defined(VK_NV_ray_tracing) */ +#if defined(VK_NV_scissor_exclusive) && VK_NV_SCISSOR_EXCLUSIVE_SPEC_VERSION >= 2 + table->vkCmdSetExclusiveScissorEnableNV = (PFN_vkCmdSetExclusiveScissorEnableNV)load(context, "vkCmdSetExclusiveScissorEnableNV"); +#endif /* defined(VK_NV_scissor_exclusive) && VK_NV_SCISSOR_EXCLUSIVE_SPEC_VERSION >= 2 */ #if defined(VK_NV_scissor_exclusive) table->vkCmdSetExclusiveScissorNV = (PFN_vkCmdSetExclusiveScissorNV)load(context, "vkCmdSetExclusiveScissorNV"); #endif /* defined(VK_NV_scissor_exclusive) */ @@ -968,9 +2128,115 @@ static void volkGenLoadDeviceTable(struct VolkDeviceTable* table, void* context, table->vkCmdSetCoarseSampleOrderNV = (PFN_vkCmdSetCoarseSampleOrderNV)load(context, "vkCmdSetCoarseSampleOrderNV"); table->vkCmdSetViewportShadingRatePaletteNV = (PFN_vkCmdSetViewportShadingRatePaletteNV)load(context, "vkCmdSetViewportShadingRatePaletteNV"); #endif /* defined(VK_NV_shading_rate_image) */ -#if (defined(VK_KHR_descriptor_update_template) && defined(VK_KHR_push_descriptor)) || (defined(VK_KHR_push_descriptor) && defined(VK_VERSION_1_1)) +#if defined(VK_QCOM_tile_properties) + table->vkGetDynamicRenderingTilePropertiesQCOM = (PFN_vkGetDynamicRenderingTilePropertiesQCOM)load(context, "vkGetDynamicRenderingTilePropertiesQCOM"); + table->vkGetFramebufferTilePropertiesQCOM = (PFN_vkGetFramebufferTilePropertiesQCOM)load(context, "vkGetFramebufferTilePropertiesQCOM"); +#endif /* defined(VK_QCOM_tile_properties) */ +#if defined(VK_QNX_external_memory_screen_buffer) + table->vkGetScreenBufferPropertiesQNX = (PFN_vkGetScreenBufferPropertiesQNX)load(context, "vkGetScreenBufferPropertiesQNX"); +#endif /* defined(VK_QNX_external_memory_screen_buffer) */ +#if defined(VK_VALVE_descriptor_set_host_mapping) + table->vkGetDescriptorSetHostMappingVALVE = (PFN_vkGetDescriptorSetHostMappingVALVE)load(context, "vkGetDescriptorSetHostMappingVALVE"); + table->vkGetDescriptorSetLayoutHostMappingInfoVALVE = (PFN_vkGetDescriptorSetLayoutHostMappingInfoVALVE)load(context, "vkGetDescriptorSetLayoutHostMappingInfoVALVE"); +#endif /* defined(VK_VALVE_descriptor_set_host_mapping) */ +#if (defined(VK_EXT_depth_clamp_control)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_depth_clamp_control)) + table->vkCmdSetDepthClampRangeEXT = (PFN_vkCmdSetDepthClampRangeEXT)load(context, "vkCmdSetDepthClampRangeEXT"); +#endif /* (defined(VK_EXT_depth_clamp_control)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_depth_clamp_control)) */ +#if (defined(VK_EXT_extended_dynamic_state)) || (defined(VK_EXT_shader_object)) + table->vkCmdBindVertexBuffers2EXT = (PFN_vkCmdBindVertexBuffers2EXT)load(context, "vkCmdBindVertexBuffers2EXT"); + table->vkCmdSetCullModeEXT = (PFN_vkCmdSetCullModeEXT)load(context, "vkCmdSetCullModeEXT"); + table->vkCmdSetDepthBoundsTestEnableEXT = (PFN_vkCmdSetDepthBoundsTestEnableEXT)load(context, "vkCmdSetDepthBoundsTestEnableEXT"); + table->vkCmdSetDepthCompareOpEXT = (PFN_vkCmdSetDepthCompareOpEXT)load(context, "vkCmdSetDepthCompareOpEXT"); + table->vkCmdSetDepthTestEnableEXT = (PFN_vkCmdSetDepthTestEnableEXT)load(context, "vkCmdSetDepthTestEnableEXT"); + table->vkCmdSetDepthWriteEnableEXT = (PFN_vkCmdSetDepthWriteEnableEXT)load(context, "vkCmdSetDepthWriteEnableEXT"); + table->vkCmdSetFrontFaceEXT = (PFN_vkCmdSetFrontFaceEXT)load(context, "vkCmdSetFrontFaceEXT"); + table->vkCmdSetPrimitiveTopologyEXT = (PFN_vkCmdSetPrimitiveTopologyEXT)load(context, "vkCmdSetPrimitiveTopologyEXT"); + table->vkCmdSetScissorWithCountEXT = (PFN_vkCmdSetScissorWithCountEXT)load(context, "vkCmdSetScissorWithCountEXT"); + table->vkCmdSetStencilOpEXT = (PFN_vkCmdSetStencilOpEXT)load(context, "vkCmdSetStencilOpEXT"); + table->vkCmdSetStencilTestEnableEXT = (PFN_vkCmdSetStencilTestEnableEXT)load(context, "vkCmdSetStencilTestEnableEXT"); + table->vkCmdSetViewportWithCountEXT = (PFN_vkCmdSetViewportWithCountEXT)load(context, "vkCmdSetViewportWithCountEXT"); +#endif /* (defined(VK_EXT_extended_dynamic_state)) || (defined(VK_EXT_shader_object)) */ +#if (defined(VK_EXT_extended_dynamic_state2)) || (defined(VK_EXT_shader_object)) + table->vkCmdSetDepthBiasEnableEXT = (PFN_vkCmdSetDepthBiasEnableEXT)load(context, "vkCmdSetDepthBiasEnableEXT"); + table->vkCmdSetLogicOpEXT = (PFN_vkCmdSetLogicOpEXT)load(context, "vkCmdSetLogicOpEXT"); + table->vkCmdSetPatchControlPointsEXT = (PFN_vkCmdSetPatchControlPointsEXT)load(context, "vkCmdSetPatchControlPointsEXT"); + table->vkCmdSetPrimitiveRestartEnableEXT = (PFN_vkCmdSetPrimitiveRestartEnableEXT)load(context, "vkCmdSetPrimitiveRestartEnableEXT"); + table->vkCmdSetRasterizerDiscardEnableEXT = (PFN_vkCmdSetRasterizerDiscardEnableEXT)load(context, "vkCmdSetRasterizerDiscardEnableEXT"); +#endif /* (defined(VK_EXT_extended_dynamic_state2)) || (defined(VK_EXT_shader_object)) */ +#if (defined(VK_EXT_extended_dynamic_state3)) || (defined(VK_EXT_shader_object)) + table->vkCmdSetAlphaToCoverageEnableEXT = (PFN_vkCmdSetAlphaToCoverageEnableEXT)load(context, "vkCmdSetAlphaToCoverageEnableEXT"); + table->vkCmdSetAlphaToOneEnableEXT = (PFN_vkCmdSetAlphaToOneEnableEXT)load(context, "vkCmdSetAlphaToOneEnableEXT"); + table->vkCmdSetColorBlendEnableEXT = (PFN_vkCmdSetColorBlendEnableEXT)load(context, "vkCmdSetColorBlendEnableEXT"); + table->vkCmdSetColorBlendEquationEXT = (PFN_vkCmdSetColorBlendEquationEXT)load(context, "vkCmdSetColorBlendEquationEXT"); + table->vkCmdSetColorWriteMaskEXT = (PFN_vkCmdSetColorWriteMaskEXT)load(context, "vkCmdSetColorWriteMaskEXT"); + table->vkCmdSetDepthClampEnableEXT = (PFN_vkCmdSetDepthClampEnableEXT)load(context, "vkCmdSetDepthClampEnableEXT"); + table->vkCmdSetLogicOpEnableEXT = (PFN_vkCmdSetLogicOpEnableEXT)load(context, "vkCmdSetLogicOpEnableEXT"); + table->vkCmdSetPolygonModeEXT = (PFN_vkCmdSetPolygonModeEXT)load(context, "vkCmdSetPolygonModeEXT"); + table->vkCmdSetRasterizationSamplesEXT = (PFN_vkCmdSetRasterizationSamplesEXT)load(context, "vkCmdSetRasterizationSamplesEXT"); + table->vkCmdSetSampleMaskEXT = (PFN_vkCmdSetSampleMaskEXT)load(context, "vkCmdSetSampleMaskEXT"); +#endif /* (defined(VK_EXT_extended_dynamic_state3)) || (defined(VK_EXT_shader_object)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && (defined(VK_KHR_maintenance2) || defined(VK_VERSION_1_1))) || (defined(VK_EXT_shader_object)) + table->vkCmdSetTessellationDomainOriginEXT = (PFN_vkCmdSetTessellationDomainOriginEXT)load(context, "vkCmdSetTessellationDomainOriginEXT"); +#endif /* (defined(VK_EXT_extended_dynamic_state3) && (defined(VK_KHR_maintenance2) || defined(VK_VERSION_1_1))) || (defined(VK_EXT_shader_object)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_transform_feedback)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_transform_feedback)) + table->vkCmdSetRasterizationStreamEXT = (PFN_vkCmdSetRasterizationStreamEXT)load(context, "vkCmdSetRasterizationStreamEXT"); +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_transform_feedback)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_transform_feedback)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_conservative_rasterization)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_conservative_rasterization)) + table->vkCmdSetConservativeRasterizationModeEXT = (PFN_vkCmdSetConservativeRasterizationModeEXT)load(context, "vkCmdSetConservativeRasterizationModeEXT"); + table->vkCmdSetExtraPrimitiveOverestimationSizeEXT = (PFN_vkCmdSetExtraPrimitiveOverestimationSizeEXT)load(context, "vkCmdSetExtraPrimitiveOverestimationSizeEXT"); +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_conservative_rasterization)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_conservative_rasterization)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_depth_clip_enable)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_depth_clip_enable)) + table->vkCmdSetDepthClipEnableEXT = (PFN_vkCmdSetDepthClipEnableEXT)load(context, "vkCmdSetDepthClipEnableEXT"); +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_depth_clip_enable)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_depth_clip_enable)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_sample_locations)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_sample_locations)) + table->vkCmdSetSampleLocationsEnableEXT = (PFN_vkCmdSetSampleLocationsEnableEXT)load(context, "vkCmdSetSampleLocationsEnableEXT"); +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_sample_locations)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_sample_locations)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_blend_operation_advanced)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_blend_operation_advanced)) + table->vkCmdSetColorBlendAdvancedEXT = (PFN_vkCmdSetColorBlendAdvancedEXT)load(context, "vkCmdSetColorBlendAdvancedEXT"); +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_blend_operation_advanced)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_blend_operation_advanced)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_provoking_vertex)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_provoking_vertex)) + table->vkCmdSetProvokingVertexModeEXT = (PFN_vkCmdSetProvokingVertexModeEXT)load(context, "vkCmdSetProvokingVertexModeEXT"); +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_provoking_vertex)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_provoking_vertex)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_line_rasterization)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_line_rasterization)) + table->vkCmdSetLineRasterizationModeEXT = (PFN_vkCmdSetLineRasterizationModeEXT)load(context, "vkCmdSetLineRasterizationModeEXT"); + table->vkCmdSetLineStippleEnableEXT = (PFN_vkCmdSetLineStippleEnableEXT)load(context, "vkCmdSetLineStippleEnableEXT"); +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_line_rasterization)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_line_rasterization)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_depth_clip_control)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_depth_clip_control)) + table->vkCmdSetDepthClipNegativeOneToOneEXT = (PFN_vkCmdSetDepthClipNegativeOneToOneEXT)load(context, "vkCmdSetDepthClipNegativeOneToOneEXT"); +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_depth_clip_control)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_depth_clip_control)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_clip_space_w_scaling)) || (defined(VK_EXT_shader_object) && defined(VK_NV_clip_space_w_scaling)) + table->vkCmdSetViewportWScalingEnableNV = (PFN_vkCmdSetViewportWScalingEnableNV)load(context, "vkCmdSetViewportWScalingEnableNV"); +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_clip_space_w_scaling)) || (defined(VK_EXT_shader_object) && defined(VK_NV_clip_space_w_scaling)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_viewport_swizzle)) || (defined(VK_EXT_shader_object) && defined(VK_NV_viewport_swizzle)) + table->vkCmdSetViewportSwizzleNV = (PFN_vkCmdSetViewportSwizzleNV)load(context, "vkCmdSetViewportSwizzleNV"); +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_viewport_swizzle)) || (defined(VK_EXT_shader_object) && defined(VK_NV_viewport_swizzle)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_fragment_coverage_to_color)) || (defined(VK_EXT_shader_object) && defined(VK_NV_fragment_coverage_to_color)) + table->vkCmdSetCoverageToColorEnableNV = (PFN_vkCmdSetCoverageToColorEnableNV)load(context, "vkCmdSetCoverageToColorEnableNV"); + table->vkCmdSetCoverageToColorLocationNV = (PFN_vkCmdSetCoverageToColorLocationNV)load(context, "vkCmdSetCoverageToColorLocationNV"); +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_fragment_coverage_to_color)) || (defined(VK_EXT_shader_object) && defined(VK_NV_fragment_coverage_to_color)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_framebuffer_mixed_samples)) || (defined(VK_EXT_shader_object) && defined(VK_NV_framebuffer_mixed_samples)) + table->vkCmdSetCoverageModulationModeNV = (PFN_vkCmdSetCoverageModulationModeNV)load(context, "vkCmdSetCoverageModulationModeNV"); + table->vkCmdSetCoverageModulationTableEnableNV = (PFN_vkCmdSetCoverageModulationTableEnableNV)load(context, "vkCmdSetCoverageModulationTableEnableNV"); + table->vkCmdSetCoverageModulationTableNV = (PFN_vkCmdSetCoverageModulationTableNV)load(context, "vkCmdSetCoverageModulationTableNV"); +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_framebuffer_mixed_samples)) || (defined(VK_EXT_shader_object) && defined(VK_NV_framebuffer_mixed_samples)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_shading_rate_image)) || (defined(VK_EXT_shader_object) && defined(VK_NV_shading_rate_image)) + table->vkCmdSetShadingRateImageEnableNV = (PFN_vkCmdSetShadingRateImageEnableNV)load(context, "vkCmdSetShadingRateImageEnableNV"); +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_shading_rate_image)) || (defined(VK_EXT_shader_object) && defined(VK_NV_shading_rate_image)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_representative_fragment_test)) || (defined(VK_EXT_shader_object) && defined(VK_NV_representative_fragment_test)) + table->vkCmdSetRepresentativeFragmentTestEnableNV = (PFN_vkCmdSetRepresentativeFragmentTestEnableNV)load(context, "vkCmdSetRepresentativeFragmentTestEnableNV"); +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_representative_fragment_test)) || (defined(VK_EXT_shader_object) && defined(VK_NV_representative_fragment_test)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_coverage_reduction_mode)) || (defined(VK_EXT_shader_object) && defined(VK_NV_coverage_reduction_mode)) + table->vkCmdSetCoverageReductionModeNV = (PFN_vkCmdSetCoverageReductionModeNV)load(context, "vkCmdSetCoverageReductionModeNV"); +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_coverage_reduction_mode)) || (defined(VK_EXT_shader_object) && defined(VK_NV_coverage_reduction_mode)) */ +#if (defined(VK_EXT_host_image_copy)) || (defined(VK_EXT_image_compression_control)) + table->vkGetImageSubresourceLayout2EXT = (PFN_vkGetImageSubresourceLayout2EXT)load(context, "vkGetImageSubresourceLayout2EXT"); +#endif /* (defined(VK_EXT_host_image_copy)) || (defined(VK_EXT_image_compression_control)) */ +#if (defined(VK_EXT_shader_object)) || (defined(VK_EXT_vertex_input_dynamic_state)) + table->vkCmdSetVertexInputEXT = (PFN_vkCmdSetVertexInputEXT)load(context, "vkCmdSetVertexInputEXT"); +#endif /* (defined(VK_EXT_shader_object)) || (defined(VK_EXT_vertex_input_dynamic_state)) */ +#if (defined(VK_KHR_descriptor_update_template) && defined(VK_KHR_push_descriptor)) || (defined(VK_KHR_push_descriptor) && (defined(VK_VERSION_1_1) || defined(VK_KHR_descriptor_update_template))) table->vkCmdPushDescriptorSetWithTemplateKHR = (PFN_vkCmdPushDescriptorSetWithTemplateKHR)load(context, "vkCmdPushDescriptorSetWithTemplateKHR"); -#endif /* (defined(VK_KHR_descriptor_update_template) && defined(VK_KHR_push_descriptor)) || (defined(VK_KHR_push_descriptor) && defined(VK_VERSION_1_1)) */ +#endif /* (defined(VK_KHR_descriptor_update_template) && defined(VK_KHR_push_descriptor)) || (defined(VK_KHR_push_descriptor) && (defined(VK_VERSION_1_1) || defined(VK_KHR_descriptor_update_template))) */ #if (defined(VK_KHR_device_group) && defined(VK_KHR_surface)) || (defined(VK_KHR_swapchain) && defined(VK_VERSION_1_1)) table->vkGetDeviceGroupPresentCapabilitiesKHR = (PFN_vkGetDeviceGroupPresentCapabilitiesKHR)load(context, "vkGetDeviceGroupPresentCapabilitiesKHR"); table->vkGetDeviceGroupSurfacePresentModesKHR = (PFN_vkGetDeviceGroupSurfacePresentModesKHR)load(context, "vkGetDeviceGroupSurfacePresentModesKHR"); @@ -982,8 +2248,12 @@ static void volkGenLoadDeviceTable(struct VolkDeviceTable* table, void* context, } #ifdef __GNUC__ +#ifdef VOLK_DEFAULT_VISIBILITY +# pragma GCC visibility push(default) +#else # pragma GCC visibility push(hidden) #endif +#endif /* VOLK_GENERATE_PROTOTYPES_C */ #if defined(VK_VERSION_1_0) @@ -1155,9 +2425,102 @@ PFN_vkGetPhysicalDeviceSparseImageFormatProperties2 vkGetPhysicalDeviceSparseIma PFN_vkTrimCommandPool vkTrimCommandPool; PFN_vkUpdateDescriptorSetWithTemplate vkUpdateDescriptorSetWithTemplate; #endif /* defined(VK_VERSION_1_1) */ +#if defined(VK_VERSION_1_2) +PFN_vkCmdBeginRenderPass2 vkCmdBeginRenderPass2; +PFN_vkCmdDrawIndexedIndirectCount vkCmdDrawIndexedIndirectCount; +PFN_vkCmdDrawIndirectCount vkCmdDrawIndirectCount; +PFN_vkCmdEndRenderPass2 vkCmdEndRenderPass2; +PFN_vkCmdNextSubpass2 vkCmdNextSubpass2; +PFN_vkCreateRenderPass2 vkCreateRenderPass2; +PFN_vkGetBufferDeviceAddress vkGetBufferDeviceAddress; +PFN_vkGetBufferOpaqueCaptureAddress vkGetBufferOpaqueCaptureAddress; +PFN_vkGetDeviceMemoryOpaqueCaptureAddress vkGetDeviceMemoryOpaqueCaptureAddress; +PFN_vkGetSemaphoreCounterValue vkGetSemaphoreCounterValue; +PFN_vkResetQueryPool vkResetQueryPool; +PFN_vkSignalSemaphore vkSignalSemaphore; +PFN_vkWaitSemaphores vkWaitSemaphores; +#endif /* defined(VK_VERSION_1_2) */ +#if defined(VK_VERSION_1_3) +PFN_vkCmdBeginRendering vkCmdBeginRendering; +PFN_vkCmdBindVertexBuffers2 vkCmdBindVertexBuffers2; +PFN_vkCmdBlitImage2 vkCmdBlitImage2; +PFN_vkCmdCopyBuffer2 vkCmdCopyBuffer2; +PFN_vkCmdCopyBufferToImage2 vkCmdCopyBufferToImage2; +PFN_vkCmdCopyImage2 vkCmdCopyImage2; +PFN_vkCmdCopyImageToBuffer2 vkCmdCopyImageToBuffer2; +PFN_vkCmdEndRendering vkCmdEndRendering; +PFN_vkCmdPipelineBarrier2 vkCmdPipelineBarrier2; +PFN_vkCmdResetEvent2 vkCmdResetEvent2; +PFN_vkCmdResolveImage2 vkCmdResolveImage2; +PFN_vkCmdSetCullMode vkCmdSetCullMode; +PFN_vkCmdSetDepthBiasEnable vkCmdSetDepthBiasEnable; +PFN_vkCmdSetDepthBoundsTestEnable vkCmdSetDepthBoundsTestEnable; +PFN_vkCmdSetDepthCompareOp vkCmdSetDepthCompareOp; +PFN_vkCmdSetDepthTestEnable vkCmdSetDepthTestEnable; +PFN_vkCmdSetDepthWriteEnable vkCmdSetDepthWriteEnable; +PFN_vkCmdSetEvent2 vkCmdSetEvent2; +PFN_vkCmdSetFrontFace vkCmdSetFrontFace; +PFN_vkCmdSetPrimitiveRestartEnable vkCmdSetPrimitiveRestartEnable; +PFN_vkCmdSetPrimitiveTopology vkCmdSetPrimitiveTopology; +PFN_vkCmdSetRasterizerDiscardEnable vkCmdSetRasterizerDiscardEnable; +PFN_vkCmdSetScissorWithCount vkCmdSetScissorWithCount; +PFN_vkCmdSetStencilOp vkCmdSetStencilOp; +PFN_vkCmdSetStencilTestEnable vkCmdSetStencilTestEnable; +PFN_vkCmdSetViewportWithCount vkCmdSetViewportWithCount; +PFN_vkCmdWaitEvents2 vkCmdWaitEvents2; +PFN_vkCmdWriteTimestamp2 vkCmdWriteTimestamp2; +PFN_vkCreatePrivateDataSlot vkCreatePrivateDataSlot; +PFN_vkDestroyPrivateDataSlot vkDestroyPrivateDataSlot; +PFN_vkGetDeviceBufferMemoryRequirements vkGetDeviceBufferMemoryRequirements; +PFN_vkGetDeviceImageMemoryRequirements vkGetDeviceImageMemoryRequirements; +PFN_vkGetDeviceImageSparseMemoryRequirements vkGetDeviceImageSparseMemoryRequirements; +PFN_vkGetPhysicalDeviceToolProperties vkGetPhysicalDeviceToolProperties; +PFN_vkGetPrivateData vkGetPrivateData; +PFN_vkQueueSubmit2 vkQueueSubmit2; +PFN_vkSetPrivateData vkSetPrivateData; +#endif /* defined(VK_VERSION_1_3) */ +#if defined(VK_VERSION_1_4) +PFN_vkCmdBindDescriptorSets2 vkCmdBindDescriptorSets2; +PFN_vkCmdBindIndexBuffer2 vkCmdBindIndexBuffer2; +PFN_vkCmdPushConstants2 vkCmdPushConstants2; +PFN_vkCmdPushDescriptorSet vkCmdPushDescriptorSet; +PFN_vkCmdPushDescriptorSet2 vkCmdPushDescriptorSet2; +PFN_vkCmdPushDescriptorSetWithTemplate vkCmdPushDescriptorSetWithTemplate; +PFN_vkCmdPushDescriptorSetWithTemplate2 vkCmdPushDescriptorSetWithTemplate2; +PFN_vkCmdSetLineStipple vkCmdSetLineStipple; +PFN_vkCmdSetRenderingAttachmentLocations vkCmdSetRenderingAttachmentLocations; +PFN_vkCmdSetRenderingInputAttachmentIndices vkCmdSetRenderingInputAttachmentIndices; +PFN_vkCopyImageToImage vkCopyImageToImage; +PFN_vkCopyImageToMemory vkCopyImageToMemory; +PFN_vkCopyMemoryToImage vkCopyMemoryToImage; +PFN_vkGetDeviceImageSubresourceLayout vkGetDeviceImageSubresourceLayout; +PFN_vkGetImageSubresourceLayout2 vkGetImageSubresourceLayout2; +PFN_vkGetRenderingAreaGranularity vkGetRenderingAreaGranularity; +PFN_vkMapMemory2 vkMapMemory2; +PFN_vkTransitionImageLayout vkTransitionImageLayout; +PFN_vkUnmapMemory2 vkUnmapMemory2; +#endif /* defined(VK_VERSION_1_4) */ +#if defined(VK_AMDX_shader_enqueue) +PFN_vkCmdDispatchGraphAMDX vkCmdDispatchGraphAMDX; +PFN_vkCmdDispatchGraphIndirectAMDX vkCmdDispatchGraphIndirectAMDX; +PFN_vkCmdDispatchGraphIndirectCountAMDX vkCmdDispatchGraphIndirectCountAMDX; +PFN_vkCmdInitializeGraphScratchMemoryAMDX vkCmdInitializeGraphScratchMemoryAMDX; +PFN_vkCreateExecutionGraphPipelinesAMDX vkCreateExecutionGraphPipelinesAMDX; +PFN_vkGetExecutionGraphPipelineNodeIndexAMDX vkGetExecutionGraphPipelineNodeIndexAMDX; +PFN_vkGetExecutionGraphPipelineScratchSizeAMDX vkGetExecutionGraphPipelineScratchSizeAMDX; +#endif /* defined(VK_AMDX_shader_enqueue) */ +#if defined(VK_AMD_anti_lag) +PFN_vkAntiLagUpdateAMD vkAntiLagUpdateAMD; +#endif /* defined(VK_AMD_anti_lag) */ #if defined(VK_AMD_buffer_marker) PFN_vkCmdWriteBufferMarkerAMD vkCmdWriteBufferMarkerAMD; #endif /* defined(VK_AMD_buffer_marker) */ +#if defined(VK_AMD_buffer_marker) && (defined(VK_VERSION_1_3) || defined(VK_KHR_synchronization2)) +PFN_vkCmdWriteBufferMarker2AMD vkCmdWriteBufferMarker2AMD; +#endif /* defined(VK_AMD_buffer_marker) && (defined(VK_VERSION_1_3) || defined(VK_KHR_synchronization2)) */ +#if defined(VK_AMD_display_native_hdr) +PFN_vkSetLocalDimmingAMD vkSetLocalDimmingAMD; +#endif /* defined(VK_AMD_display_native_hdr) */ #if defined(VK_AMD_draw_indirect_count) PFN_vkCmdDrawIndexedIndirectCountAMD vkCmdDrawIndexedIndirectCountAMD; PFN_vkCmdDrawIndirectCountAMD vkCmdDrawIndirectCountAMD; @@ -1169,10 +2532,17 @@ PFN_vkGetShaderInfoAMD vkGetShaderInfoAMD; PFN_vkGetAndroidHardwareBufferPropertiesANDROID vkGetAndroidHardwareBufferPropertiesANDROID; PFN_vkGetMemoryAndroidHardwareBufferANDROID vkGetMemoryAndroidHardwareBufferANDROID; #endif /* defined(VK_ANDROID_external_memory_android_hardware_buffer) */ +#if defined(VK_EXT_acquire_drm_display) +PFN_vkAcquireDrmDisplayEXT vkAcquireDrmDisplayEXT; +PFN_vkGetDrmDisplayEXT vkGetDrmDisplayEXT; +#endif /* defined(VK_EXT_acquire_drm_display) */ #if defined(VK_EXT_acquire_xlib_display) PFN_vkAcquireXlibDisplayEXT vkAcquireXlibDisplayEXT; PFN_vkGetRandROutputDisplayEXT vkGetRandROutputDisplayEXT; #endif /* defined(VK_EXT_acquire_xlib_display) */ +#if defined(VK_EXT_attachment_feedback_loop_dynamic_state) +PFN_vkCmdSetAttachmentFeedbackLoopEnableEXT vkCmdSetAttachmentFeedbackLoopEnableEXT; +#endif /* defined(VK_EXT_attachment_feedback_loop_dynamic_state) */ #if defined(VK_EXT_buffer_device_address) PFN_vkGetBufferDeviceAddressEXT vkGetBufferDeviceAddressEXT; #endif /* defined(VK_EXT_buffer_device_address) */ @@ -1180,6 +2550,9 @@ PFN_vkGetBufferDeviceAddressEXT vkGetBufferDeviceAddressEXT; PFN_vkGetCalibratedTimestampsEXT vkGetCalibratedTimestampsEXT; PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT vkGetPhysicalDeviceCalibrateableTimeDomainsEXT; #endif /* defined(VK_EXT_calibrated_timestamps) */ +#if defined(VK_EXT_color_write_enable) +PFN_vkCmdSetColorWriteEnableEXT vkCmdSetColorWriteEnableEXT; +#endif /* defined(VK_EXT_color_write_enable) */ #if defined(VK_EXT_conditional_rendering) PFN_vkCmdBeginConditionalRenderingEXT vkCmdBeginConditionalRenderingEXT; PFN_vkCmdEndConditionalRenderingEXT vkCmdEndConditionalRenderingEXT; @@ -1209,12 +2582,52 @@ PFN_vkSetDebugUtilsObjectNameEXT vkSetDebugUtilsObjectNameEXT; PFN_vkSetDebugUtilsObjectTagEXT vkSetDebugUtilsObjectTagEXT; PFN_vkSubmitDebugUtilsMessageEXT vkSubmitDebugUtilsMessageEXT; #endif /* defined(VK_EXT_debug_utils) */ +#if defined(VK_EXT_depth_bias_control) +PFN_vkCmdSetDepthBias2EXT vkCmdSetDepthBias2EXT; +#endif /* defined(VK_EXT_depth_bias_control) */ +#if defined(VK_EXT_descriptor_buffer) +PFN_vkCmdBindDescriptorBufferEmbeddedSamplersEXT vkCmdBindDescriptorBufferEmbeddedSamplersEXT; +PFN_vkCmdBindDescriptorBuffersEXT vkCmdBindDescriptorBuffersEXT; +PFN_vkCmdSetDescriptorBufferOffsetsEXT vkCmdSetDescriptorBufferOffsetsEXT; +PFN_vkGetBufferOpaqueCaptureDescriptorDataEXT vkGetBufferOpaqueCaptureDescriptorDataEXT; +PFN_vkGetDescriptorEXT vkGetDescriptorEXT; +PFN_vkGetDescriptorSetLayoutBindingOffsetEXT vkGetDescriptorSetLayoutBindingOffsetEXT; +PFN_vkGetDescriptorSetLayoutSizeEXT vkGetDescriptorSetLayoutSizeEXT; +PFN_vkGetImageOpaqueCaptureDescriptorDataEXT vkGetImageOpaqueCaptureDescriptorDataEXT; +PFN_vkGetImageViewOpaqueCaptureDescriptorDataEXT vkGetImageViewOpaqueCaptureDescriptorDataEXT; +PFN_vkGetSamplerOpaqueCaptureDescriptorDataEXT vkGetSamplerOpaqueCaptureDescriptorDataEXT; +#endif /* defined(VK_EXT_descriptor_buffer) */ +#if defined(VK_EXT_descriptor_buffer) && (defined(VK_KHR_acceleration_structure) || defined(VK_NV_ray_tracing)) +PFN_vkGetAccelerationStructureOpaqueCaptureDescriptorDataEXT vkGetAccelerationStructureOpaqueCaptureDescriptorDataEXT; +#endif /* defined(VK_EXT_descriptor_buffer) && (defined(VK_KHR_acceleration_structure) || defined(VK_NV_ray_tracing)) */ +#if defined(VK_EXT_device_fault) +PFN_vkGetDeviceFaultInfoEXT vkGetDeviceFaultInfoEXT; +#endif /* defined(VK_EXT_device_fault) */ +#if defined(VK_EXT_device_generated_commands) +PFN_vkCmdExecuteGeneratedCommandsEXT vkCmdExecuteGeneratedCommandsEXT; +PFN_vkCmdPreprocessGeneratedCommandsEXT vkCmdPreprocessGeneratedCommandsEXT; +PFN_vkCreateIndirectCommandsLayoutEXT vkCreateIndirectCommandsLayoutEXT; +PFN_vkCreateIndirectExecutionSetEXT vkCreateIndirectExecutionSetEXT; +PFN_vkDestroyIndirectCommandsLayoutEXT vkDestroyIndirectCommandsLayoutEXT; +PFN_vkDestroyIndirectExecutionSetEXT vkDestroyIndirectExecutionSetEXT; +PFN_vkGetGeneratedCommandsMemoryRequirementsEXT vkGetGeneratedCommandsMemoryRequirementsEXT; +PFN_vkUpdateIndirectExecutionSetPipelineEXT vkUpdateIndirectExecutionSetPipelineEXT; +PFN_vkUpdateIndirectExecutionSetShaderEXT vkUpdateIndirectExecutionSetShaderEXT; +#endif /* defined(VK_EXT_device_generated_commands) */ #if defined(VK_EXT_direct_mode_display) PFN_vkReleaseDisplayEXT vkReleaseDisplayEXT; #endif /* defined(VK_EXT_direct_mode_display) */ +#if defined(VK_EXT_directfb_surface) +PFN_vkCreateDirectFBSurfaceEXT vkCreateDirectFBSurfaceEXT; +PFN_vkGetPhysicalDeviceDirectFBPresentationSupportEXT vkGetPhysicalDeviceDirectFBPresentationSupportEXT; +#endif /* defined(VK_EXT_directfb_surface) */ #if defined(VK_EXT_discard_rectangles) PFN_vkCmdSetDiscardRectangleEXT vkCmdSetDiscardRectangleEXT; #endif /* defined(VK_EXT_discard_rectangles) */ +#if defined(VK_EXT_discard_rectangles) && VK_EXT_DISCARD_RECTANGLES_SPEC_VERSION >= 2 +PFN_vkCmdSetDiscardRectangleEnableEXT vkCmdSetDiscardRectangleEnableEXT; +PFN_vkCmdSetDiscardRectangleModeEXT vkCmdSetDiscardRectangleModeEXT; +#endif /* defined(VK_EXT_discard_rectangles) && VK_EXT_DISCARD_RECTANGLES_SPEC_VERSION >= 2 */ #if defined(VK_EXT_display_control) PFN_vkDisplayPowerControlEXT vkDisplayPowerControlEXT; PFN_vkGetSwapchainCounterEXT vkGetSwapchainCounterEXT; @@ -1227,16 +2640,100 @@ PFN_vkGetPhysicalDeviceSurfaceCapabilities2EXT vkGetPhysicalDeviceSurfaceCapabil #if defined(VK_EXT_external_memory_host) PFN_vkGetMemoryHostPointerPropertiesEXT vkGetMemoryHostPointerPropertiesEXT; #endif /* defined(VK_EXT_external_memory_host) */ +#if defined(VK_EXT_full_screen_exclusive) +PFN_vkAcquireFullScreenExclusiveModeEXT vkAcquireFullScreenExclusiveModeEXT; +PFN_vkGetPhysicalDeviceSurfacePresentModes2EXT vkGetPhysicalDeviceSurfacePresentModes2EXT; +PFN_vkReleaseFullScreenExclusiveModeEXT vkReleaseFullScreenExclusiveModeEXT; +#endif /* defined(VK_EXT_full_screen_exclusive) */ +#if defined(VK_EXT_full_screen_exclusive) && (defined(VK_KHR_device_group) || defined(VK_VERSION_1_1)) +PFN_vkGetDeviceGroupSurfacePresentModes2EXT vkGetDeviceGroupSurfacePresentModes2EXT; +#endif /* defined(VK_EXT_full_screen_exclusive) && (defined(VK_KHR_device_group) || defined(VK_VERSION_1_1)) */ #if defined(VK_EXT_hdr_metadata) PFN_vkSetHdrMetadataEXT vkSetHdrMetadataEXT; #endif /* defined(VK_EXT_hdr_metadata) */ +#if defined(VK_EXT_headless_surface) +PFN_vkCreateHeadlessSurfaceEXT vkCreateHeadlessSurfaceEXT; +#endif /* defined(VK_EXT_headless_surface) */ +#if defined(VK_EXT_host_image_copy) +PFN_vkCopyImageToImageEXT vkCopyImageToImageEXT; +PFN_vkCopyImageToMemoryEXT vkCopyImageToMemoryEXT; +PFN_vkCopyMemoryToImageEXT vkCopyMemoryToImageEXT; +PFN_vkTransitionImageLayoutEXT vkTransitionImageLayoutEXT; +#endif /* defined(VK_EXT_host_image_copy) */ +#if defined(VK_EXT_host_query_reset) +PFN_vkResetQueryPoolEXT vkResetQueryPoolEXT; +#endif /* defined(VK_EXT_host_query_reset) */ #if defined(VK_EXT_image_drm_format_modifier) PFN_vkGetImageDrmFormatModifierPropertiesEXT vkGetImageDrmFormatModifierPropertiesEXT; #endif /* defined(VK_EXT_image_drm_format_modifier) */ +#if defined(VK_EXT_line_rasterization) +PFN_vkCmdSetLineStippleEXT vkCmdSetLineStippleEXT; +#endif /* defined(VK_EXT_line_rasterization) */ +#if defined(VK_EXT_mesh_shader) +PFN_vkCmdDrawMeshTasksEXT vkCmdDrawMeshTasksEXT; +PFN_vkCmdDrawMeshTasksIndirectEXT vkCmdDrawMeshTasksIndirectEXT; +#endif /* defined(VK_EXT_mesh_shader) */ +#if defined(VK_EXT_mesh_shader) && (defined(VK_KHR_draw_indirect_count) || defined(VK_VERSION_1_2)) +PFN_vkCmdDrawMeshTasksIndirectCountEXT vkCmdDrawMeshTasksIndirectCountEXT; +#endif /* defined(VK_EXT_mesh_shader) && (defined(VK_KHR_draw_indirect_count) || defined(VK_VERSION_1_2)) */ +#if defined(VK_EXT_metal_objects) +PFN_vkExportMetalObjectsEXT vkExportMetalObjectsEXT; +#endif /* defined(VK_EXT_metal_objects) */ +#if defined(VK_EXT_metal_surface) +PFN_vkCreateMetalSurfaceEXT vkCreateMetalSurfaceEXT; +#endif /* defined(VK_EXT_metal_surface) */ +#if defined(VK_EXT_multi_draw) +PFN_vkCmdDrawMultiEXT vkCmdDrawMultiEXT; +PFN_vkCmdDrawMultiIndexedEXT vkCmdDrawMultiIndexedEXT; +#endif /* defined(VK_EXT_multi_draw) */ +#if defined(VK_EXT_opacity_micromap) +PFN_vkBuildMicromapsEXT vkBuildMicromapsEXT; +PFN_vkCmdBuildMicromapsEXT vkCmdBuildMicromapsEXT; +PFN_vkCmdCopyMemoryToMicromapEXT vkCmdCopyMemoryToMicromapEXT; +PFN_vkCmdCopyMicromapEXT vkCmdCopyMicromapEXT; +PFN_vkCmdCopyMicromapToMemoryEXT vkCmdCopyMicromapToMemoryEXT; +PFN_vkCmdWriteMicromapsPropertiesEXT vkCmdWriteMicromapsPropertiesEXT; +PFN_vkCopyMemoryToMicromapEXT vkCopyMemoryToMicromapEXT; +PFN_vkCopyMicromapEXT vkCopyMicromapEXT; +PFN_vkCopyMicromapToMemoryEXT vkCopyMicromapToMemoryEXT; +PFN_vkCreateMicromapEXT vkCreateMicromapEXT; +PFN_vkDestroyMicromapEXT vkDestroyMicromapEXT; +PFN_vkGetDeviceMicromapCompatibilityEXT vkGetDeviceMicromapCompatibilityEXT; +PFN_vkGetMicromapBuildSizesEXT vkGetMicromapBuildSizesEXT; +PFN_vkWriteMicromapsPropertiesEXT vkWriteMicromapsPropertiesEXT; +#endif /* defined(VK_EXT_opacity_micromap) */ +#if defined(VK_EXT_pageable_device_local_memory) +PFN_vkSetDeviceMemoryPriorityEXT vkSetDeviceMemoryPriorityEXT; +#endif /* defined(VK_EXT_pageable_device_local_memory) */ +#if defined(VK_EXT_pipeline_properties) +PFN_vkGetPipelinePropertiesEXT vkGetPipelinePropertiesEXT; +#endif /* defined(VK_EXT_pipeline_properties) */ +#if defined(VK_EXT_private_data) +PFN_vkCreatePrivateDataSlotEXT vkCreatePrivateDataSlotEXT; +PFN_vkDestroyPrivateDataSlotEXT vkDestroyPrivateDataSlotEXT; +PFN_vkGetPrivateDataEXT vkGetPrivateDataEXT; +PFN_vkSetPrivateDataEXT vkSetPrivateDataEXT; +#endif /* defined(VK_EXT_private_data) */ #if defined(VK_EXT_sample_locations) PFN_vkCmdSetSampleLocationsEXT vkCmdSetSampleLocationsEXT; PFN_vkGetPhysicalDeviceMultisamplePropertiesEXT vkGetPhysicalDeviceMultisamplePropertiesEXT; #endif /* defined(VK_EXT_sample_locations) */ +#if defined(VK_EXT_shader_module_identifier) +PFN_vkGetShaderModuleCreateInfoIdentifierEXT vkGetShaderModuleCreateInfoIdentifierEXT; +PFN_vkGetShaderModuleIdentifierEXT vkGetShaderModuleIdentifierEXT; +#endif /* defined(VK_EXT_shader_module_identifier) */ +#if defined(VK_EXT_shader_object) +PFN_vkCmdBindShadersEXT vkCmdBindShadersEXT; +PFN_vkCreateShadersEXT vkCreateShadersEXT; +PFN_vkDestroyShaderEXT vkDestroyShaderEXT; +PFN_vkGetShaderBinaryDataEXT vkGetShaderBinaryDataEXT; +#endif /* defined(VK_EXT_shader_object) */ +#if defined(VK_EXT_swapchain_maintenance1) +PFN_vkReleaseSwapchainImagesEXT vkReleaseSwapchainImagesEXT; +#endif /* defined(VK_EXT_swapchain_maintenance1) */ +#if defined(VK_EXT_tooling_info) +PFN_vkGetPhysicalDeviceToolPropertiesEXT vkGetPhysicalDeviceToolPropertiesEXT; +#endif /* defined(VK_EXT_tooling_info) */ #if defined(VK_EXT_transform_feedback) PFN_vkCmdBeginQueryIndexedEXT vkCmdBeginQueryIndexedEXT; PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT; @@ -1251,13 +2748,73 @@ PFN_vkDestroyValidationCacheEXT vkDestroyValidationCacheEXT; PFN_vkGetValidationCacheDataEXT vkGetValidationCacheDataEXT; PFN_vkMergeValidationCachesEXT vkMergeValidationCachesEXT; #endif /* defined(VK_EXT_validation_cache) */ +#if defined(VK_FUCHSIA_buffer_collection) +PFN_vkCreateBufferCollectionFUCHSIA vkCreateBufferCollectionFUCHSIA; +PFN_vkDestroyBufferCollectionFUCHSIA vkDestroyBufferCollectionFUCHSIA; +PFN_vkGetBufferCollectionPropertiesFUCHSIA vkGetBufferCollectionPropertiesFUCHSIA; +PFN_vkSetBufferCollectionBufferConstraintsFUCHSIA vkSetBufferCollectionBufferConstraintsFUCHSIA; +PFN_vkSetBufferCollectionImageConstraintsFUCHSIA vkSetBufferCollectionImageConstraintsFUCHSIA; +#endif /* defined(VK_FUCHSIA_buffer_collection) */ +#if defined(VK_FUCHSIA_external_memory) +PFN_vkGetMemoryZirconHandleFUCHSIA vkGetMemoryZirconHandleFUCHSIA; +PFN_vkGetMemoryZirconHandlePropertiesFUCHSIA vkGetMemoryZirconHandlePropertiesFUCHSIA; +#endif /* defined(VK_FUCHSIA_external_memory) */ +#if defined(VK_FUCHSIA_external_semaphore) +PFN_vkGetSemaphoreZirconHandleFUCHSIA vkGetSemaphoreZirconHandleFUCHSIA; +PFN_vkImportSemaphoreZirconHandleFUCHSIA vkImportSemaphoreZirconHandleFUCHSIA; +#endif /* defined(VK_FUCHSIA_external_semaphore) */ #if defined(VK_FUCHSIA_imagepipe_surface) PFN_vkCreateImagePipeSurfaceFUCHSIA vkCreateImagePipeSurfaceFUCHSIA; #endif /* defined(VK_FUCHSIA_imagepipe_surface) */ +#if defined(VK_GGP_stream_descriptor_surface) +PFN_vkCreateStreamDescriptorSurfaceGGP vkCreateStreamDescriptorSurfaceGGP; +#endif /* defined(VK_GGP_stream_descriptor_surface) */ #if defined(VK_GOOGLE_display_timing) PFN_vkGetPastPresentationTimingGOOGLE vkGetPastPresentationTimingGOOGLE; PFN_vkGetRefreshCycleDurationGOOGLE vkGetRefreshCycleDurationGOOGLE; #endif /* defined(VK_GOOGLE_display_timing) */ +#if defined(VK_HUAWEI_cluster_culling_shader) +PFN_vkCmdDrawClusterHUAWEI vkCmdDrawClusterHUAWEI; +PFN_vkCmdDrawClusterIndirectHUAWEI vkCmdDrawClusterIndirectHUAWEI; +#endif /* defined(VK_HUAWEI_cluster_culling_shader) */ +#if defined(VK_HUAWEI_invocation_mask) +PFN_vkCmdBindInvocationMaskHUAWEI vkCmdBindInvocationMaskHUAWEI; +#endif /* defined(VK_HUAWEI_invocation_mask) */ +#if defined(VK_HUAWEI_subpass_shading) && VK_HUAWEI_SUBPASS_SHADING_SPEC_VERSION >= 2 +PFN_vkGetDeviceSubpassShadingMaxWorkgroupSizeHUAWEI vkGetDeviceSubpassShadingMaxWorkgroupSizeHUAWEI; +#endif /* defined(VK_HUAWEI_subpass_shading) && VK_HUAWEI_SUBPASS_SHADING_SPEC_VERSION >= 2 */ +#if defined(VK_HUAWEI_subpass_shading) +PFN_vkCmdSubpassShadingHUAWEI vkCmdSubpassShadingHUAWEI; +#endif /* defined(VK_HUAWEI_subpass_shading) */ +#if defined(VK_INTEL_performance_query) +PFN_vkAcquirePerformanceConfigurationINTEL vkAcquirePerformanceConfigurationINTEL; +PFN_vkCmdSetPerformanceMarkerINTEL vkCmdSetPerformanceMarkerINTEL; +PFN_vkCmdSetPerformanceOverrideINTEL vkCmdSetPerformanceOverrideINTEL; +PFN_vkCmdSetPerformanceStreamMarkerINTEL vkCmdSetPerformanceStreamMarkerINTEL; +PFN_vkGetPerformanceParameterINTEL vkGetPerformanceParameterINTEL; +PFN_vkInitializePerformanceApiINTEL vkInitializePerformanceApiINTEL; +PFN_vkQueueSetPerformanceConfigurationINTEL vkQueueSetPerformanceConfigurationINTEL; +PFN_vkReleasePerformanceConfigurationINTEL vkReleasePerformanceConfigurationINTEL; +PFN_vkUninitializePerformanceApiINTEL vkUninitializePerformanceApiINTEL; +#endif /* defined(VK_INTEL_performance_query) */ +#if defined(VK_KHR_acceleration_structure) +PFN_vkBuildAccelerationStructuresKHR vkBuildAccelerationStructuresKHR; +PFN_vkCmdBuildAccelerationStructuresIndirectKHR vkCmdBuildAccelerationStructuresIndirectKHR; +PFN_vkCmdBuildAccelerationStructuresKHR vkCmdBuildAccelerationStructuresKHR; +PFN_vkCmdCopyAccelerationStructureKHR vkCmdCopyAccelerationStructureKHR; +PFN_vkCmdCopyAccelerationStructureToMemoryKHR vkCmdCopyAccelerationStructureToMemoryKHR; +PFN_vkCmdCopyMemoryToAccelerationStructureKHR vkCmdCopyMemoryToAccelerationStructureKHR; +PFN_vkCmdWriteAccelerationStructuresPropertiesKHR vkCmdWriteAccelerationStructuresPropertiesKHR; +PFN_vkCopyAccelerationStructureKHR vkCopyAccelerationStructureKHR; +PFN_vkCopyAccelerationStructureToMemoryKHR vkCopyAccelerationStructureToMemoryKHR; +PFN_vkCopyMemoryToAccelerationStructureKHR vkCopyMemoryToAccelerationStructureKHR; +PFN_vkCreateAccelerationStructureKHR vkCreateAccelerationStructureKHR; +PFN_vkDestroyAccelerationStructureKHR vkDestroyAccelerationStructureKHR; +PFN_vkGetAccelerationStructureBuildSizesKHR vkGetAccelerationStructureBuildSizesKHR; +PFN_vkGetAccelerationStructureDeviceAddressKHR vkGetAccelerationStructureDeviceAddressKHR; +PFN_vkGetDeviceAccelerationStructureCompatibilityKHR vkGetDeviceAccelerationStructureCompatibilityKHR; +PFN_vkWriteAccelerationStructuresPropertiesKHR vkWriteAccelerationStructuresPropertiesKHR; +#endif /* defined(VK_KHR_acceleration_structure) */ #if defined(VK_KHR_android_surface) PFN_vkCreateAndroidSurfaceKHR vkCreateAndroidSurfaceKHR; #endif /* defined(VK_KHR_android_surface) */ @@ -1265,12 +2822,39 @@ PFN_vkCreateAndroidSurfaceKHR vkCreateAndroidSurfaceKHR; PFN_vkBindBufferMemory2KHR vkBindBufferMemory2KHR; PFN_vkBindImageMemory2KHR vkBindImageMemory2KHR; #endif /* defined(VK_KHR_bind_memory2) */ +#if defined(VK_KHR_buffer_device_address) +PFN_vkGetBufferDeviceAddressKHR vkGetBufferDeviceAddressKHR; +PFN_vkGetBufferOpaqueCaptureAddressKHR vkGetBufferOpaqueCaptureAddressKHR; +PFN_vkGetDeviceMemoryOpaqueCaptureAddressKHR vkGetDeviceMemoryOpaqueCaptureAddressKHR; +#endif /* defined(VK_KHR_buffer_device_address) */ +#if defined(VK_KHR_calibrated_timestamps) +PFN_vkGetCalibratedTimestampsKHR vkGetCalibratedTimestampsKHR; +PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsKHR vkGetPhysicalDeviceCalibrateableTimeDomainsKHR; +#endif /* defined(VK_KHR_calibrated_timestamps) */ +#if defined(VK_KHR_cooperative_matrix) +PFN_vkGetPhysicalDeviceCooperativeMatrixPropertiesKHR vkGetPhysicalDeviceCooperativeMatrixPropertiesKHR; +#endif /* defined(VK_KHR_cooperative_matrix) */ +#if defined(VK_KHR_copy_commands2) +PFN_vkCmdBlitImage2KHR vkCmdBlitImage2KHR; +PFN_vkCmdCopyBuffer2KHR vkCmdCopyBuffer2KHR; +PFN_vkCmdCopyBufferToImage2KHR vkCmdCopyBufferToImage2KHR; +PFN_vkCmdCopyImage2KHR vkCmdCopyImage2KHR; +PFN_vkCmdCopyImageToBuffer2KHR vkCmdCopyImageToBuffer2KHR; +PFN_vkCmdResolveImage2KHR vkCmdResolveImage2KHR; +#endif /* defined(VK_KHR_copy_commands2) */ #if defined(VK_KHR_create_renderpass2) PFN_vkCmdBeginRenderPass2KHR vkCmdBeginRenderPass2KHR; PFN_vkCmdEndRenderPass2KHR vkCmdEndRenderPass2KHR; PFN_vkCmdNextSubpass2KHR vkCmdNextSubpass2KHR; PFN_vkCreateRenderPass2KHR vkCreateRenderPass2KHR; #endif /* defined(VK_KHR_create_renderpass2) */ +#if defined(VK_KHR_deferred_host_operations) +PFN_vkCreateDeferredOperationKHR vkCreateDeferredOperationKHR; +PFN_vkDeferredOperationJoinKHR vkDeferredOperationJoinKHR; +PFN_vkDestroyDeferredOperationKHR vkDestroyDeferredOperationKHR; +PFN_vkGetDeferredOperationMaxConcurrencyKHR vkGetDeferredOperationMaxConcurrencyKHR; +PFN_vkGetDeferredOperationResultKHR vkGetDeferredOperationResultKHR; +#endif /* defined(VK_KHR_deferred_host_operations) */ #if defined(VK_KHR_descriptor_update_template) PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR; PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR; @@ -1300,6 +2884,14 @@ PFN_vkCreateSharedSwapchainsKHR vkCreateSharedSwapchainsKHR; PFN_vkCmdDrawIndexedIndirectCountKHR vkCmdDrawIndexedIndirectCountKHR; PFN_vkCmdDrawIndirectCountKHR vkCmdDrawIndirectCountKHR; #endif /* defined(VK_KHR_draw_indirect_count) */ +#if defined(VK_KHR_dynamic_rendering) +PFN_vkCmdBeginRenderingKHR vkCmdBeginRenderingKHR; +PFN_vkCmdEndRenderingKHR vkCmdEndRenderingKHR; +#endif /* defined(VK_KHR_dynamic_rendering) */ +#if defined(VK_KHR_dynamic_rendering_local_read) +PFN_vkCmdSetRenderingAttachmentLocationsKHR vkCmdSetRenderingAttachmentLocationsKHR; +PFN_vkCmdSetRenderingInputAttachmentIndicesKHR vkCmdSetRenderingInputAttachmentIndicesKHR; +#endif /* defined(VK_KHR_dynamic_rendering_local_read) */ #if defined(VK_KHR_external_fence_capabilities) PFN_vkGetPhysicalDeviceExternalFencePropertiesKHR vkGetPhysicalDeviceExternalFencePropertiesKHR; #endif /* defined(VK_KHR_external_fence_capabilities) */ @@ -1333,6 +2925,10 @@ PFN_vkImportSemaphoreFdKHR vkImportSemaphoreFdKHR; PFN_vkGetSemaphoreWin32HandleKHR vkGetSemaphoreWin32HandleKHR; PFN_vkImportSemaphoreWin32HandleKHR vkImportSemaphoreWin32HandleKHR; #endif /* defined(VK_KHR_external_semaphore_win32) */ +#if defined(VK_KHR_fragment_shading_rate) +PFN_vkCmdSetFragmentShadingRateKHR vkCmdSetFragmentShadingRateKHR; +PFN_vkGetPhysicalDeviceFragmentShadingRatesKHR vkGetPhysicalDeviceFragmentShadingRatesKHR; +#endif /* defined(VK_KHR_fragment_shading_rate) */ #if defined(VK_KHR_get_display_properties2) PFN_vkGetDisplayModeProperties2KHR vkGetDisplayModeProperties2KHR; PFN_vkGetDisplayPlaneCapabilities2KHR vkGetDisplayPlaneCapabilities2KHR; @@ -1357,15 +2953,78 @@ PFN_vkGetPhysicalDeviceSparseImageFormatProperties2KHR vkGetPhysicalDeviceSparse PFN_vkGetPhysicalDeviceSurfaceCapabilities2KHR vkGetPhysicalDeviceSurfaceCapabilities2KHR; PFN_vkGetPhysicalDeviceSurfaceFormats2KHR vkGetPhysicalDeviceSurfaceFormats2KHR; #endif /* defined(VK_KHR_get_surface_capabilities2) */ +#if defined(VK_KHR_line_rasterization) +PFN_vkCmdSetLineStippleKHR vkCmdSetLineStippleKHR; +#endif /* defined(VK_KHR_line_rasterization) */ #if defined(VK_KHR_maintenance1) PFN_vkTrimCommandPoolKHR vkTrimCommandPoolKHR; #endif /* defined(VK_KHR_maintenance1) */ #if defined(VK_KHR_maintenance3) PFN_vkGetDescriptorSetLayoutSupportKHR vkGetDescriptorSetLayoutSupportKHR; #endif /* defined(VK_KHR_maintenance3) */ +#if defined(VK_KHR_maintenance4) +PFN_vkGetDeviceBufferMemoryRequirementsKHR vkGetDeviceBufferMemoryRequirementsKHR; +PFN_vkGetDeviceImageMemoryRequirementsKHR vkGetDeviceImageMemoryRequirementsKHR; +PFN_vkGetDeviceImageSparseMemoryRequirementsKHR vkGetDeviceImageSparseMemoryRequirementsKHR; +#endif /* defined(VK_KHR_maintenance4) */ +#if defined(VK_KHR_maintenance5) +PFN_vkCmdBindIndexBuffer2KHR vkCmdBindIndexBuffer2KHR; +PFN_vkGetDeviceImageSubresourceLayoutKHR vkGetDeviceImageSubresourceLayoutKHR; +PFN_vkGetImageSubresourceLayout2KHR vkGetImageSubresourceLayout2KHR; +PFN_vkGetRenderingAreaGranularityKHR vkGetRenderingAreaGranularityKHR; +#endif /* defined(VK_KHR_maintenance5) */ +#if defined(VK_KHR_maintenance6) +PFN_vkCmdBindDescriptorSets2KHR vkCmdBindDescriptorSets2KHR; +PFN_vkCmdPushConstants2KHR vkCmdPushConstants2KHR; +#endif /* defined(VK_KHR_maintenance6) */ +#if defined(VK_KHR_maintenance6) && defined(VK_KHR_push_descriptor) +PFN_vkCmdPushDescriptorSet2KHR vkCmdPushDescriptorSet2KHR; +PFN_vkCmdPushDescriptorSetWithTemplate2KHR vkCmdPushDescriptorSetWithTemplate2KHR; +#endif /* defined(VK_KHR_maintenance6) && defined(VK_KHR_push_descriptor) */ +#if defined(VK_KHR_maintenance6) && defined(VK_EXT_descriptor_buffer) +PFN_vkCmdBindDescriptorBufferEmbeddedSamplers2EXT vkCmdBindDescriptorBufferEmbeddedSamplers2EXT; +PFN_vkCmdSetDescriptorBufferOffsets2EXT vkCmdSetDescriptorBufferOffsets2EXT; +#endif /* defined(VK_KHR_maintenance6) && defined(VK_EXT_descriptor_buffer) */ +#if defined(VK_KHR_map_memory2) +PFN_vkMapMemory2KHR vkMapMemory2KHR; +PFN_vkUnmapMemory2KHR vkUnmapMemory2KHR; +#endif /* defined(VK_KHR_map_memory2) */ +#if defined(VK_KHR_performance_query) +PFN_vkAcquireProfilingLockKHR vkAcquireProfilingLockKHR; +PFN_vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR; +PFN_vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR; +PFN_vkReleaseProfilingLockKHR vkReleaseProfilingLockKHR; +#endif /* defined(VK_KHR_performance_query) */ +#if defined(VK_KHR_pipeline_binary) +PFN_vkCreatePipelineBinariesKHR vkCreatePipelineBinariesKHR; +PFN_vkDestroyPipelineBinaryKHR vkDestroyPipelineBinaryKHR; +PFN_vkGetPipelineBinaryDataKHR vkGetPipelineBinaryDataKHR; +PFN_vkGetPipelineKeyKHR vkGetPipelineKeyKHR; +PFN_vkReleaseCapturedPipelineDataKHR vkReleaseCapturedPipelineDataKHR; +#endif /* defined(VK_KHR_pipeline_binary) */ +#if defined(VK_KHR_pipeline_executable_properties) +PFN_vkGetPipelineExecutableInternalRepresentationsKHR vkGetPipelineExecutableInternalRepresentationsKHR; +PFN_vkGetPipelineExecutablePropertiesKHR vkGetPipelineExecutablePropertiesKHR; +PFN_vkGetPipelineExecutableStatisticsKHR vkGetPipelineExecutableStatisticsKHR; +#endif /* defined(VK_KHR_pipeline_executable_properties) */ +#if defined(VK_KHR_present_wait) +PFN_vkWaitForPresentKHR vkWaitForPresentKHR; +#endif /* defined(VK_KHR_present_wait) */ #if defined(VK_KHR_push_descriptor) PFN_vkCmdPushDescriptorSetKHR vkCmdPushDescriptorSetKHR; #endif /* defined(VK_KHR_push_descriptor) */ +#if defined(VK_KHR_ray_tracing_maintenance1) && defined(VK_KHR_ray_tracing_pipeline) +PFN_vkCmdTraceRaysIndirect2KHR vkCmdTraceRaysIndirect2KHR; +#endif /* defined(VK_KHR_ray_tracing_maintenance1) && defined(VK_KHR_ray_tracing_pipeline) */ +#if defined(VK_KHR_ray_tracing_pipeline) +PFN_vkCmdSetRayTracingPipelineStackSizeKHR vkCmdSetRayTracingPipelineStackSizeKHR; +PFN_vkCmdTraceRaysIndirectKHR vkCmdTraceRaysIndirectKHR; +PFN_vkCmdTraceRaysKHR vkCmdTraceRaysKHR; +PFN_vkCreateRayTracingPipelinesKHR vkCreateRayTracingPipelinesKHR; +PFN_vkGetRayTracingCaptureReplayShaderGroupHandlesKHR vkGetRayTracingCaptureReplayShaderGroupHandlesKHR; +PFN_vkGetRayTracingShaderGroupHandlesKHR vkGetRayTracingShaderGroupHandlesKHR; +PFN_vkGetRayTracingShaderGroupStackSizeKHR vkGetRayTracingShaderGroupStackSizeKHR; +#endif /* defined(VK_KHR_ray_tracing_pipeline) */ #if defined(VK_KHR_sampler_ycbcr_conversion) PFN_vkCreateSamplerYcbcrConversionKHR vkCreateSamplerYcbcrConversionKHR; PFN_vkDestroySamplerYcbcrConversionKHR vkDestroySamplerYcbcrConversionKHR; @@ -1387,6 +3046,41 @@ PFN_vkDestroySwapchainKHR vkDestroySwapchainKHR; PFN_vkGetSwapchainImagesKHR vkGetSwapchainImagesKHR; PFN_vkQueuePresentKHR vkQueuePresentKHR; #endif /* defined(VK_KHR_swapchain) */ +#if defined(VK_KHR_synchronization2) +PFN_vkCmdPipelineBarrier2KHR vkCmdPipelineBarrier2KHR; +PFN_vkCmdResetEvent2KHR vkCmdResetEvent2KHR; +PFN_vkCmdSetEvent2KHR vkCmdSetEvent2KHR; +PFN_vkCmdWaitEvents2KHR vkCmdWaitEvents2KHR; +PFN_vkCmdWriteTimestamp2KHR vkCmdWriteTimestamp2KHR; +PFN_vkQueueSubmit2KHR vkQueueSubmit2KHR; +#endif /* defined(VK_KHR_synchronization2) */ +#if defined(VK_KHR_timeline_semaphore) +PFN_vkGetSemaphoreCounterValueKHR vkGetSemaphoreCounterValueKHR; +PFN_vkSignalSemaphoreKHR vkSignalSemaphoreKHR; +PFN_vkWaitSemaphoresKHR vkWaitSemaphoresKHR; +#endif /* defined(VK_KHR_timeline_semaphore) */ +#if defined(VK_KHR_video_decode_queue) +PFN_vkCmdDecodeVideoKHR vkCmdDecodeVideoKHR; +#endif /* defined(VK_KHR_video_decode_queue) */ +#if defined(VK_KHR_video_encode_queue) +PFN_vkCmdEncodeVideoKHR vkCmdEncodeVideoKHR; +PFN_vkGetEncodedVideoSessionParametersKHR vkGetEncodedVideoSessionParametersKHR; +PFN_vkGetPhysicalDeviceVideoEncodeQualityLevelPropertiesKHR vkGetPhysicalDeviceVideoEncodeQualityLevelPropertiesKHR; +#endif /* defined(VK_KHR_video_encode_queue) */ +#if defined(VK_KHR_video_queue) +PFN_vkBindVideoSessionMemoryKHR vkBindVideoSessionMemoryKHR; +PFN_vkCmdBeginVideoCodingKHR vkCmdBeginVideoCodingKHR; +PFN_vkCmdControlVideoCodingKHR vkCmdControlVideoCodingKHR; +PFN_vkCmdEndVideoCodingKHR vkCmdEndVideoCodingKHR; +PFN_vkCreateVideoSessionKHR vkCreateVideoSessionKHR; +PFN_vkCreateVideoSessionParametersKHR vkCreateVideoSessionParametersKHR; +PFN_vkDestroyVideoSessionKHR vkDestroyVideoSessionKHR; +PFN_vkDestroyVideoSessionParametersKHR vkDestroyVideoSessionParametersKHR; +PFN_vkGetPhysicalDeviceVideoCapabilitiesKHR vkGetPhysicalDeviceVideoCapabilitiesKHR; +PFN_vkGetPhysicalDeviceVideoFormatPropertiesKHR vkGetPhysicalDeviceVideoFormatPropertiesKHR; +PFN_vkGetVideoSessionMemoryRequirementsKHR vkGetVideoSessionMemoryRequirementsKHR; +PFN_vkUpdateVideoSessionParametersKHR vkUpdateVideoSessionParametersKHR; +#endif /* defined(VK_KHR_video_queue) */ #if defined(VK_KHR_wayland_surface) PFN_vkCreateWaylandSurfaceKHR vkCreateWaylandSurfaceKHR; PFN_vkGetPhysicalDeviceWaylandPresentationSupportKHR vkGetPhysicalDeviceWaylandPresentationSupportKHR; @@ -1412,35 +3106,107 @@ PFN_vkCreateMacOSSurfaceMVK vkCreateMacOSSurfaceMVK; #if defined(VK_NN_vi_surface) PFN_vkCreateViSurfaceNN vkCreateViSurfaceNN; #endif /* defined(VK_NN_vi_surface) */ -#if defined(VK_NVX_device_generated_commands) -PFN_vkCmdProcessCommandsNVX vkCmdProcessCommandsNVX; -PFN_vkCmdReserveSpaceForCommandsNVX vkCmdReserveSpaceForCommandsNVX; -PFN_vkCreateIndirectCommandsLayoutNVX vkCreateIndirectCommandsLayoutNVX; -PFN_vkCreateObjectTableNVX vkCreateObjectTableNVX; -PFN_vkDestroyIndirectCommandsLayoutNVX vkDestroyIndirectCommandsLayoutNVX; -PFN_vkDestroyObjectTableNVX vkDestroyObjectTableNVX; -PFN_vkGetPhysicalDeviceGeneratedCommandsPropertiesNVX vkGetPhysicalDeviceGeneratedCommandsPropertiesNVX; -PFN_vkRegisterObjectsNVX vkRegisterObjectsNVX; -PFN_vkUnregisterObjectsNVX vkUnregisterObjectsNVX; -#endif /* defined(VK_NVX_device_generated_commands) */ +#if defined(VK_NVX_binary_import) +PFN_vkCmdCuLaunchKernelNVX vkCmdCuLaunchKernelNVX; +PFN_vkCreateCuFunctionNVX vkCreateCuFunctionNVX; +PFN_vkCreateCuModuleNVX vkCreateCuModuleNVX; +PFN_vkDestroyCuFunctionNVX vkDestroyCuFunctionNVX; +PFN_vkDestroyCuModuleNVX vkDestroyCuModuleNVX; +#endif /* defined(VK_NVX_binary_import) */ +#if defined(VK_NVX_image_view_handle) +PFN_vkGetImageViewHandleNVX vkGetImageViewHandleNVX; +#endif /* defined(VK_NVX_image_view_handle) */ +#if defined(VK_NVX_image_view_handle) && VK_NVX_IMAGE_VIEW_HANDLE_SPEC_VERSION >= 3 +PFN_vkGetImageViewHandle64NVX vkGetImageViewHandle64NVX; +#endif /* defined(VK_NVX_image_view_handle) && VK_NVX_IMAGE_VIEW_HANDLE_SPEC_VERSION >= 3 */ +#if defined(VK_NVX_image_view_handle) && VK_NVX_IMAGE_VIEW_HANDLE_SPEC_VERSION >= 2 +PFN_vkGetImageViewAddressNVX vkGetImageViewAddressNVX; +#endif /* defined(VK_NVX_image_view_handle) && VK_NVX_IMAGE_VIEW_HANDLE_SPEC_VERSION >= 2 */ +#if defined(VK_NV_acquire_winrt_display) +PFN_vkAcquireWinrtDisplayNV vkAcquireWinrtDisplayNV; +PFN_vkGetWinrtDisplayNV vkGetWinrtDisplayNV; +#endif /* defined(VK_NV_acquire_winrt_display) */ #if defined(VK_NV_clip_space_w_scaling) PFN_vkCmdSetViewportWScalingNV vkCmdSetViewportWScalingNV; #endif /* defined(VK_NV_clip_space_w_scaling) */ +#if defined(VK_NV_cooperative_matrix) +PFN_vkGetPhysicalDeviceCooperativeMatrixPropertiesNV vkGetPhysicalDeviceCooperativeMatrixPropertiesNV; +#endif /* defined(VK_NV_cooperative_matrix) */ +#if defined(VK_NV_cooperative_matrix2) +PFN_vkGetPhysicalDeviceCooperativeMatrixFlexibleDimensionsPropertiesNV vkGetPhysicalDeviceCooperativeMatrixFlexibleDimensionsPropertiesNV; +#endif /* defined(VK_NV_cooperative_matrix2) */ +#if defined(VK_NV_copy_memory_indirect) +PFN_vkCmdCopyMemoryIndirectNV vkCmdCopyMemoryIndirectNV; +PFN_vkCmdCopyMemoryToImageIndirectNV vkCmdCopyMemoryToImageIndirectNV; +#endif /* defined(VK_NV_copy_memory_indirect) */ +#if defined(VK_NV_coverage_reduction_mode) +PFN_vkGetPhysicalDeviceSupportedFramebufferMixedSamplesCombinationsNV vkGetPhysicalDeviceSupportedFramebufferMixedSamplesCombinationsNV; +#endif /* defined(VK_NV_coverage_reduction_mode) */ +#if defined(VK_NV_cuda_kernel_launch) +PFN_vkCmdCudaLaunchKernelNV vkCmdCudaLaunchKernelNV; +PFN_vkCreateCudaFunctionNV vkCreateCudaFunctionNV; +PFN_vkCreateCudaModuleNV vkCreateCudaModuleNV; +PFN_vkDestroyCudaFunctionNV vkDestroyCudaFunctionNV; +PFN_vkDestroyCudaModuleNV vkDestroyCudaModuleNV; +PFN_vkGetCudaModuleCacheNV vkGetCudaModuleCacheNV; +#endif /* defined(VK_NV_cuda_kernel_launch) */ #if defined(VK_NV_device_diagnostic_checkpoints) PFN_vkCmdSetCheckpointNV vkCmdSetCheckpointNV; PFN_vkGetQueueCheckpointDataNV vkGetQueueCheckpointDataNV; #endif /* defined(VK_NV_device_diagnostic_checkpoints) */ +#if defined(VK_NV_device_diagnostic_checkpoints) && (defined(VK_VERSION_1_3) || defined(VK_KHR_synchronization2)) +PFN_vkGetQueueCheckpointData2NV vkGetQueueCheckpointData2NV; +#endif /* defined(VK_NV_device_diagnostic_checkpoints) && (defined(VK_VERSION_1_3) || defined(VK_KHR_synchronization2)) */ +#if defined(VK_NV_device_generated_commands) +PFN_vkCmdBindPipelineShaderGroupNV vkCmdBindPipelineShaderGroupNV; +PFN_vkCmdExecuteGeneratedCommandsNV vkCmdExecuteGeneratedCommandsNV; +PFN_vkCmdPreprocessGeneratedCommandsNV vkCmdPreprocessGeneratedCommandsNV; +PFN_vkCreateIndirectCommandsLayoutNV vkCreateIndirectCommandsLayoutNV; +PFN_vkDestroyIndirectCommandsLayoutNV vkDestroyIndirectCommandsLayoutNV; +PFN_vkGetGeneratedCommandsMemoryRequirementsNV vkGetGeneratedCommandsMemoryRequirementsNV; +#endif /* defined(VK_NV_device_generated_commands) */ +#if defined(VK_NV_device_generated_commands_compute) +PFN_vkCmdUpdatePipelineIndirectBufferNV vkCmdUpdatePipelineIndirectBufferNV; +PFN_vkGetPipelineIndirectDeviceAddressNV vkGetPipelineIndirectDeviceAddressNV; +PFN_vkGetPipelineIndirectMemoryRequirementsNV vkGetPipelineIndirectMemoryRequirementsNV; +#endif /* defined(VK_NV_device_generated_commands_compute) */ #if defined(VK_NV_external_memory_capabilities) PFN_vkGetPhysicalDeviceExternalImageFormatPropertiesNV vkGetPhysicalDeviceExternalImageFormatPropertiesNV; #endif /* defined(VK_NV_external_memory_capabilities) */ +#if defined(VK_NV_external_memory_rdma) +PFN_vkGetMemoryRemoteAddressNV vkGetMemoryRemoteAddressNV; +#endif /* defined(VK_NV_external_memory_rdma) */ #if defined(VK_NV_external_memory_win32) PFN_vkGetMemoryWin32HandleNV vkGetMemoryWin32HandleNV; #endif /* defined(VK_NV_external_memory_win32) */ +#if defined(VK_NV_fragment_shading_rate_enums) +PFN_vkCmdSetFragmentShadingRateEnumNV vkCmdSetFragmentShadingRateEnumNV; +#endif /* defined(VK_NV_fragment_shading_rate_enums) */ +#if defined(VK_NV_low_latency2) +PFN_vkGetLatencyTimingsNV vkGetLatencyTimingsNV; +PFN_vkLatencySleepNV vkLatencySleepNV; +PFN_vkQueueNotifyOutOfBandNV vkQueueNotifyOutOfBandNV; +PFN_vkSetLatencyMarkerNV vkSetLatencyMarkerNV; +PFN_vkSetLatencySleepModeNV vkSetLatencySleepModeNV; +#endif /* defined(VK_NV_low_latency2) */ +#if defined(VK_NV_memory_decompression) +PFN_vkCmdDecompressMemoryIndirectCountNV vkCmdDecompressMemoryIndirectCountNV; +PFN_vkCmdDecompressMemoryNV vkCmdDecompressMemoryNV; +#endif /* defined(VK_NV_memory_decompression) */ #if defined(VK_NV_mesh_shader) -PFN_vkCmdDrawMeshTasksIndirectCountNV vkCmdDrawMeshTasksIndirectCountNV; PFN_vkCmdDrawMeshTasksIndirectNV vkCmdDrawMeshTasksIndirectNV; PFN_vkCmdDrawMeshTasksNV vkCmdDrawMeshTasksNV; #endif /* defined(VK_NV_mesh_shader) */ +#if defined(VK_NV_mesh_shader) && (defined(VK_KHR_draw_indirect_count) || defined(VK_VERSION_1_2)) +PFN_vkCmdDrawMeshTasksIndirectCountNV vkCmdDrawMeshTasksIndirectCountNV; +#endif /* defined(VK_NV_mesh_shader) && (defined(VK_KHR_draw_indirect_count) || defined(VK_VERSION_1_2)) */ +#if defined(VK_NV_optical_flow) +PFN_vkBindOpticalFlowSessionImageNV vkBindOpticalFlowSessionImageNV; +PFN_vkCmdOpticalFlowExecuteNV vkCmdOpticalFlowExecuteNV; +PFN_vkCreateOpticalFlowSessionNV vkCreateOpticalFlowSessionNV; +PFN_vkDestroyOpticalFlowSessionNV vkDestroyOpticalFlowSessionNV; +PFN_vkGetPhysicalDeviceOpticalFlowImageFormatsNV vkGetPhysicalDeviceOpticalFlowImageFormatsNV; +#endif /* defined(VK_NV_optical_flow) */ #if defined(VK_NV_ray_tracing) PFN_vkBindAccelerationStructureMemoryNV vkBindAccelerationStructureMemoryNV; PFN_vkCmdBuildAccelerationStructureNV vkCmdBuildAccelerationStructureNV; @@ -1455,6 +3221,9 @@ PFN_vkGetAccelerationStructureHandleNV vkGetAccelerationStructureHandleNV; PFN_vkGetAccelerationStructureMemoryRequirementsNV vkGetAccelerationStructureMemoryRequirementsNV; PFN_vkGetRayTracingShaderGroupHandlesNV vkGetRayTracingShaderGroupHandlesNV; #endif /* defined(VK_NV_ray_tracing) */ +#if defined(VK_NV_scissor_exclusive) && VK_NV_SCISSOR_EXCLUSIVE_SPEC_VERSION >= 2 +PFN_vkCmdSetExclusiveScissorEnableNV vkCmdSetExclusiveScissorEnableNV; +#endif /* defined(VK_NV_scissor_exclusive) && VK_NV_SCISSOR_EXCLUSIVE_SPEC_VERSION >= 2 */ #if defined(VK_NV_scissor_exclusive) PFN_vkCmdSetExclusiveScissorNV vkCmdSetExclusiveScissorNV; #endif /* defined(VK_NV_scissor_exclusive) */ @@ -1463,9 +3232,119 @@ PFN_vkCmdBindShadingRateImageNV vkCmdBindShadingRateImageNV; PFN_vkCmdSetCoarseSampleOrderNV vkCmdSetCoarseSampleOrderNV; PFN_vkCmdSetViewportShadingRatePaletteNV vkCmdSetViewportShadingRatePaletteNV; #endif /* defined(VK_NV_shading_rate_image) */ -#if (defined(VK_KHR_descriptor_update_template) && defined(VK_KHR_push_descriptor)) || (defined(VK_KHR_push_descriptor) && defined(VK_VERSION_1_1)) +#if defined(VK_QCOM_tile_properties) +PFN_vkGetDynamicRenderingTilePropertiesQCOM vkGetDynamicRenderingTilePropertiesQCOM; +PFN_vkGetFramebufferTilePropertiesQCOM vkGetFramebufferTilePropertiesQCOM; +#endif /* defined(VK_QCOM_tile_properties) */ +#if defined(VK_QNX_external_memory_screen_buffer) +PFN_vkGetScreenBufferPropertiesQNX vkGetScreenBufferPropertiesQNX; +#endif /* defined(VK_QNX_external_memory_screen_buffer) */ +#if defined(VK_QNX_screen_surface) +PFN_vkCreateScreenSurfaceQNX vkCreateScreenSurfaceQNX; +PFN_vkGetPhysicalDeviceScreenPresentationSupportQNX vkGetPhysicalDeviceScreenPresentationSupportQNX; +#endif /* defined(VK_QNX_screen_surface) */ +#if defined(VK_VALVE_descriptor_set_host_mapping) +PFN_vkGetDescriptorSetHostMappingVALVE vkGetDescriptorSetHostMappingVALVE; +PFN_vkGetDescriptorSetLayoutHostMappingInfoVALVE vkGetDescriptorSetLayoutHostMappingInfoVALVE; +#endif /* defined(VK_VALVE_descriptor_set_host_mapping) */ +#if (defined(VK_EXT_depth_clamp_control)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_depth_clamp_control)) +PFN_vkCmdSetDepthClampRangeEXT vkCmdSetDepthClampRangeEXT; +#endif /* (defined(VK_EXT_depth_clamp_control)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_depth_clamp_control)) */ +#if (defined(VK_EXT_extended_dynamic_state)) || (defined(VK_EXT_shader_object)) +PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT; +PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT; +PFN_vkCmdSetDepthBoundsTestEnableEXT vkCmdSetDepthBoundsTestEnableEXT; +PFN_vkCmdSetDepthCompareOpEXT vkCmdSetDepthCompareOpEXT; +PFN_vkCmdSetDepthTestEnableEXT vkCmdSetDepthTestEnableEXT; +PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT; +PFN_vkCmdSetFrontFaceEXT vkCmdSetFrontFaceEXT; +PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT; +PFN_vkCmdSetScissorWithCountEXT vkCmdSetScissorWithCountEXT; +PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT; +PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT; +PFN_vkCmdSetViewportWithCountEXT vkCmdSetViewportWithCountEXT; +#endif /* (defined(VK_EXT_extended_dynamic_state)) || (defined(VK_EXT_shader_object)) */ +#if (defined(VK_EXT_extended_dynamic_state2)) || (defined(VK_EXT_shader_object)) +PFN_vkCmdSetDepthBiasEnableEXT vkCmdSetDepthBiasEnableEXT; +PFN_vkCmdSetLogicOpEXT vkCmdSetLogicOpEXT; +PFN_vkCmdSetPatchControlPointsEXT vkCmdSetPatchControlPointsEXT; +PFN_vkCmdSetPrimitiveRestartEnableEXT vkCmdSetPrimitiveRestartEnableEXT; +PFN_vkCmdSetRasterizerDiscardEnableEXT vkCmdSetRasterizerDiscardEnableEXT; +#endif /* (defined(VK_EXT_extended_dynamic_state2)) || (defined(VK_EXT_shader_object)) */ +#if (defined(VK_EXT_extended_dynamic_state3)) || (defined(VK_EXT_shader_object)) +PFN_vkCmdSetAlphaToCoverageEnableEXT vkCmdSetAlphaToCoverageEnableEXT; +PFN_vkCmdSetAlphaToOneEnableEXT vkCmdSetAlphaToOneEnableEXT; +PFN_vkCmdSetColorBlendEnableEXT vkCmdSetColorBlendEnableEXT; +PFN_vkCmdSetColorBlendEquationEXT vkCmdSetColorBlendEquationEXT; +PFN_vkCmdSetColorWriteMaskEXT vkCmdSetColorWriteMaskEXT; +PFN_vkCmdSetDepthClampEnableEXT vkCmdSetDepthClampEnableEXT; +PFN_vkCmdSetLogicOpEnableEXT vkCmdSetLogicOpEnableEXT; +PFN_vkCmdSetPolygonModeEXT vkCmdSetPolygonModeEXT; +PFN_vkCmdSetRasterizationSamplesEXT vkCmdSetRasterizationSamplesEXT; +PFN_vkCmdSetSampleMaskEXT vkCmdSetSampleMaskEXT; +#endif /* (defined(VK_EXT_extended_dynamic_state3)) || (defined(VK_EXT_shader_object)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && (defined(VK_KHR_maintenance2) || defined(VK_VERSION_1_1))) || (defined(VK_EXT_shader_object)) +PFN_vkCmdSetTessellationDomainOriginEXT vkCmdSetTessellationDomainOriginEXT; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && (defined(VK_KHR_maintenance2) || defined(VK_VERSION_1_1))) || (defined(VK_EXT_shader_object)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_transform_feedback)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_transform_feedback)) +PFN_vkCmdSetRasterizationStreamEXT vkCmdSetRasterizationStreamEXT; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_transform_feedback)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_transform_feedback)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_conservative_rasterization)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_conservative_rasterization)) +PFN_vkCmdSetConservativeRasterizationModeEXT vkCmdSetConservativeRasterizationModeEXT; +PFN_vkCmdSetExtraPrimitiveOverestimationSizeEXT vkCmdSetExtraPrimitiveOverestimationSizeEXT; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_conservative_rasterization)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_conservative_rasterization)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_depth_clip_enable)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_depth_clip_enable)) +PFN_vkCmdSetDepthClipEnableEXT vkCmdSetDepthClipEnableEXT; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_depth_clip_enable)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_depth_clip_enable)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_sample_locations)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_sample_locations)) +PFN_vkCmdSetSampleLocationsEnableEXT vkCmdSetSampleLocationsEnableEXT; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_sample_locations)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_sample_locations)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_blend_operation_advanced)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_blend_operation_advanced)) +PFN_vkCmdSetColorBlendAdvancedEXT vkCmdSetColorBlendAdvancedEXT; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_blend_operation_advanced)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_blend_operation_advanced)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_provoking_vertex)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_provoking_vertex)) +PFN_vkCmdSetProvokingVertexModeEXT vkCmdSetProvokingVertexModeEXT; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_provoking_vertex)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_provoking_vertex)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_line_rasterization)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_line_rasterization)) +PFN_vkCmdSetLineRasterizationModeEXT vkCmdSetLineRasterizationModeEXT; +PFN_vkCmdSetLineStippleEnableEXT vkCmdSetLineStippleEnableEXT; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_line_rasterization)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_line_rasterization)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_depth_clip_control)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_depth_clip_control)) +PFN_vkCmdSetDepthClipNegativeOneToOneEXT vkCmdSetDepthClipNegativeOneToOneEXT; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_depth_clip_control)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_depth_clip_control)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_clip_space_w_scaling)) || (defined(VK_EXT_shader_object) && defined(VK_NV_clip_space_w_scaling)) +PFN_vkCmdSetViewportWScalingEnableNV vkCmdSetViewportWScalingEnableNV; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_clip_space_w_scaling)) || (defined(VK_EXT_shader_object) && defined(VK_NV_clip_space_w_scaling)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_viewport_swizzle)) || (defined(VK_EXT_shader_object) && defined(VK_NV_viewport_swizzle)) +PFN_vkCmdSetViewportSwizzleNV vkCmdSetViewportSwizzleNV; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_viewport_swizzle)) || (defined(VK_EXT_shader_object) && defined(VK_NV_viewport_swizzle)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_fragment_coverage_to_color)) || (defined(VK_EXT_shader_object) && defined(VK_NV_fragment_coverage_to_color)) +PFN_vkCmdSetCoverageToColorEnableNV vkCmdSetCoverageToColorEnableNV; +PFN_vkCmdSetCoverageToColorLocationNV vkCmdSetCoverageToColorLocationNV; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_fragment_coverage_to_color)) || (defined(VK_EXT_shader_object) && defined(VK_NV_fragment_coverage_to_color)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_framebuffer_mixed_samples)) || (defined(VK_EXT_shader_object) && defined(VK_NV_framebuffer_mixed_samples)) +PFN_vkCmdSetCoverageModulationModeNV vkCmdSetCoverageModulationModeNV; +PFN_vkCmdSetCoverageModulationTableEnableNV vkCmdSetCoverageModulationTableEnableNV; +PFN_vkCmdSetCoverageModulationTableNV vkCmdSetCoverageModulationTableNV; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_framebuffer_mixed_samples)) || (defined(VK_EXT_shader_object) && defined(VK_NV_framebuffer_mixed_samples)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_shading_rate_image)) || (defined(VK_EXT_shader_object) && defined(VK_NV_shading_rate_image)) +PFN_vkCmdSetShadingRateImageEnableNV vkCmdSetShadingRateImageEnableNV; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_shading_rate_image)) || (defined(VK_EXT_shader_object) && defined(VK_NV_shading_rate_image)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_representative_fragment_test)) || (defined(VK_EXT_shader_object) && defined(VK_NV_representative_fragment_test)) +PFN_vkCmdSetRepresentativeFragmentTestEnableNV vkCmdSetRepresentativeFragmentTestEnableNV; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_representative_fragment_test)) || (defined(VK_EXT_shader_object) && defined(VK_NV_representative_fragment_test)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_coverage_reduction_mode)) || (defined(VK_EXT_shader_object) && defined(VK_NV_coverage_reduction_mode)) +PFN_vkCmdSetCoverageReductionModeNV vkCmdSetCoverageReductionModeNV; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_coverage_reduction_mode)) || (defined(VK_EXT_shader_object) && defined(VK_NV_coverage_reduction_mode)) */ +#if (defined(VK_EXT_host_image_copy)) || (defined(VK_EXT_image_compression_control)) +PFN_vkGetImageSubresourceLayout2EXT vkGetImageSubresourceLayout2EXT; +#endif /* (defined(VK_EXT_host_image_copy)) || (defined(VK_EXT_image_compression_control)) */ +#if (defined(VK_EXT_shader_object)) || (defined(VK_EXT_vertex_input_dynamic_state)) +PFN_vkCmdSetVertexInputEXT vkCmdSetVertexInputEXT; +#endif /* (defined(VK_EXT_shader_object)) || (defined(VK_EXT_vertex_input_dynamic_state)) */ +#if (defined(VK_KHR_descriptor_update_template) && defined(VK_KHR_push_descriptor)) || (defined(VK_KHR_push_descriptor) && (defined(VK_VERSION_1_1) || defined(VK_KHR_descriptor_update_template))) PFN_vkCmdPushDescriptorSetWithTemplateKHR vkCmdPushDescriptorSetWithTemplateKHR; -#endif /* (defined(VK_KHR_descriptor_update_template) && defined(VK_KHR_push_descriptor)) || (defined(VK_KHR_push_descriptor) && defined(VK_VERSION_1_1)) */ +#endif /* (defined(VK_KHR_descriptor_update_template) && defined(VK_KHR_push_descriptor)) || (defined(VK_KHR_push_descriptor) && (defined(VK_VERSION_1_1) || defined(VK_KHR_descriptor_update_template))) */ #if (defined(VK_KHR_device_group) && defined(VK_KHR_surface)) || (defined(VK_KHR_swapchain) && defined(VK_VERSION_1_1)) PFN_vkGetDeviceGroupPresentCapabilitiesKHR vkGetDeviceGroupPresentCapabilitiesKHR; PFN_vkGetDeviceGroupSurfacePresentModesKHR vkGetDeviceGroupSurfacePresentModesKHR; @@ -1483,3 +3362,4 @@ PFN_vkAcquireNextImage2KHR vkAcquireNextImage2KHR; #ifdef __cplusplus } #endif +/* clang-format on */ diff --git a/Source/ThirdParty/volk/volk.h b/Source/ThirdParty/volk/volk.h index 8909b2fe8..20a5bb4b6 100644 --- a/Source/ThirdParty/volk/volk.h +++ b/Source/ThirdParty/volk/volk.h @@ -1,11 +1,12 @@ /** * volk * - * Copyright (C) 2018-2019, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) + * Copyright (C) 2018-2024, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) * Report bugs and download new versions at https://github.com/zeux/volk * * This library is distributed under the MIT License. See notice at the end of this file. */ +/* clang-format off */ #ifndef VOLK_H_ #define VOLK_H_ @@ -13,16 +14,41 @@ # error To use volk, you need to define VK_NO_PROTOTYPES before including vulkan.h #endif -/* VOLK_GENERATE_VERSION */ -#define VOLK_HEADER_VERSION 97 -/* VOLK_GENERATE_VERSION */ +/* VOLK_GENERATE_VERSION_DEFINE */ +#define VOLK_HEADER_VERSION 304 +/* VOLK_GENERATE_VERSION_DEFINE */ #ifndef VK_NO_PROTOTYPES # define VK_NO_PROTOTYPES #endif #ifndef VULKAN_H_ -# include +# ifdef VOLK_VULKAN_H_PATH +# include VOLK_VULKAN_H_PATH +# elif defined(VK_USE_PLATFORM_WIN32_KHR) +# include +# include + + /* When VK_USE_PLATFORM_WIN32_KHR is defined, instead of including vulkan.h directly, we include individual parts of the SDK + * This is necessary to avoid including which is very heavy - it takes 200ms to parse without WIN32_LEAN_AND_MEAN + * and 100ms to parse with it. vulkan_win32.h only needs a few symbols that are easy to redefine ourselves. + */ + typedef unsigned long DWORD; + typedef const wchar_t* LPCWSTR; + typedef void* HANDLE; + typedef struct HINSTANCE__* HINSTANCE; + typedef struct HWND__* HWND; + typedef struct HMONITOR__* HMONITOR; + typedef struct _SECURITY_ATTRIBUTES SECURITY_ATTRIBUTES; + +# include + +# ifdef VK_ENABLE_BETA_EXTENSIONS +# include +# endif +# else +# include +# endif #endif #ifdef __cplusplus @@ -47,6 +73,14 @@ VkResult volkInitialize(void); */ void volkInitializeCustom(PFN_vkGetInstanceProcAddr handler); +/** + * Finalize library by unloading Vulkan loader and resetting global symbols to NULL. + * + * This function does not need to be called on process exit (as loader will be unloaded automatically) or if volkInitialize failed. + * In general this function is optional to call but may be useful in rare cases eg if volk needs to be reinitialized multiple times. + */ +void volkFinalize(void); + /** * Get Vulkan instance version supported by the Vulkan loader, or 0 if Vulkan isn't supported * @@ -59,6 +93,12 @@ uint32_t volkGetInstanceVersion(void); */ void volkLoadInstance(VkInstance instance); +/** + * Load global function pointers using application-created VkInstance; call this function after creating the Vulkan instance. + * Skips loading device-based function pointers, requires usage of volkLoadDevice afterwards. + */ +void volkLoadInstanceOnly(VkInstance instance); + /** * Load global function pointers using application-created VkDevice; call this function after creating the Vulkan device. * @@ -66,6 +106,18 @@ void volkLoadInstance(VkInstance instance); */ void volkLoadDevice(VkDevice device); +/** + * Return last VkInstance for which global function pointers have been loaded via volkLoadInstance(), + * or VK_NULL_HANDLE if volkLoadInstance() has not been called. + */ +VkInstance volkGetLoadedInstance(void); + +/** + * Return last VkDevice for which global function pointers have been loaded via volkLoadDevice(), + * or VK_NULL_HANDLE if volkLoadDevice() has not been called. + */ +VkDevice volkGetLoadedDevice(void); + /** * Load function pointers using application-created VkDevice into a table. * Application should use function pointers from that table instead of using global function pointers. @@ -218,9 +270,101 @@ struct VolkDeviceTable PFN_vkTrimCommandPool vkTrimCommandPool; PFN_vkUpdateDescriptorSetWithTemplate vkUpdateDescriptorSetWithTemplate; #endif /* defined(VK_VERSION_1_1) */ +#if defined(VK_VERSION_1_2) + PFN_vkCmdBeginRenderPass2 vkCmdBeginRenderPass2; + PFN_vkCmdDrawIndexedIndirectCount vkCmdDrawIndexedIndirectCount; + PFN_vkCmdDrawIndirectCount vkCmdDrawIndirectCount; + PFN_vkCmdEndRenderPass2 vkCmdEndRenderPass2; + PFN_vkCmdNextSubpass2 vkCmdNextSubpass2; + PFN_vkCreateRenderPass2 vkCreateRenderPass2; + PFN_vkGetBufferDeviceAddress vkGetBufferDeviceAddress; + PFN_vkGetBufferOpaqueCaptureAddress vkGetBufferOpaqueCaptureAddress; + PFN_vkGetDeviceMemoryOpaqueCaptureAddress vkGetDeviceMemoryOpaqueCaptureAddress; + PFN_vkGetSemaphoreCounterValue vkGetSemaphoreCounterValue; + PFN_vkResetQueryPool vkResetQueryPool; + PFN_vkSignalSemaphore vkSignalSemaphore; + PFN_vkWaitSemaphores vkWaitSemaphores; +#endif /* defined(VK_VERSION_1_2) */ +#if defined(VK_VERSION_1_3) + PFN_vkCmdBeginRendering vkCmdBeginRendering; + PFN_vkCmdBindVertexBuffers2 vkCmdBindVertexBuffers2; + PFN_vkCmdBlitImage2 vkCmdBlitImage2; + PFN_vkCmdCopyBuffer2 vkCmdCopyBuffer2; + PFN_vkCmdCopyBufferToImage2 vkCmdCopyBufferToImage2; + PFN_vkCmdCopyImage2 vkCmdCopyImage2; + PFN_vkCmdCopyImageToBuffer2 vkCmdCopyImageToBuffer2; + PFN_vkCmdEndRendering vkCmdEndRendering; + PFN_vkCmdPipelineBarrier2 vkCmdPipelineBarrier2; + PFN_vkCmdResetEvent2 vkCmdResetEvent2; + PFN_vkCmdResolveImage2 vkCmdResolveImage2; + PFN_vkCmdSetCullMode vkCmdSetCullMode; + PFN_vkCmdSetDepthBiasEnable vkCmdSetDepthBiasEnable; + PFN_vkCmdSetDepthBoundsTestEnable vkCmdSetDepthBoundsTestEnable; + PFN_vkCmdSetDepthCompareOp vkCmdSetDepthCompareOp; + PFN_vkCmdSetDepthTestEnable vkCmdSetDepthTestEnable; + PFN_vkCmdSetDepthWriteEnable vkCmdSetDepthWriteEnable; + PFN_vkCmdSetEvent2 vkCmdSetEvent2; + PFN_vkCmdSetFrontFace vkCmdSetFrontFace; + PFN_vkCmdSetPrimitiveRestartEnable vkCmdSetPrimitiveRestartEnable; + PFN_vkCmdSetPrimitiveTopology vkCmdSetPrimitiveTopology; + PFN_vkCmdSetRasterizerDiscardEnable vkCmdSetRasterizerDiscardEnable; + PFN_vkCmdSetScissorWithCount vkCmdSetScissorWithCount; + PFN_vkCmdSetStencilOp vkCmdSetStencilOp; + PFN_vkCmdSetStencilTestEnable vkCmdSetStencilTestEnable; + PFN_vkCmdSetViewportWithCount vkCmdSetViewportWithCount; + PFN_vkCmdWaitEvents2 vkCmdWaitEvents2; + PFN_vkCmdWriteTimestamp2 vkCmdWriteTimestamp2; + PFN_vkCreatePrivateDataSlot vkCreatePrivateDataSlot; + PFN_vkDestroyPrivateDataSlot vkDestroyPrivateDataSlot; + PFN_vkGetDeviceBufferMemoryRequirements vkGetDeviceBufferMemoryRequirements; + PFN_vkGetDeviceImageMemoryRequirements vkGetDeviceImageMemoryRequirements; + PFN_vkGetDeviceImageSparseMemoryRequirements vkGetDeviceImageSparseMemoryRequirements; + PFN_vkGetPrivateData vkGetPrivateData; + PFN_vkQueueSubmit2 vkQueueSubmit2; + PFN_vkSetPrivateData vkSetPrivateData; +#endif /* defined(VK_VERSION_1_3) */ +#if defined(VK_VERSION_1_4) + PFN_vkCmdBindDescriptorSets2 vkCmdBindDescriptorSets2; + PFN_vkCmdBindIndexBuffer2 vkCmdBindIndexBuffer2; + PFN_vkCmdPushConstants2 vkCmdPushConstants2; + PFN_vkCmdPushDescriptorSet vkCmdPushDescriptorSet; + PFN_vkCmdPushDescriptorSet2 vkCmdPushDescriptorSet2; + PFN_vkCmdPushDescriptorSetWithTemplate vkCmdPushDescriptorSetWithTemplate; + PFN_vkCmdPushDescriptorSetWithTemplate2 vkCmdPushDescriptorSetWithTemplate2; + PFN_vkCmdSetLineStipple vkCmdSetLineStipple; + PFN_vkCmdSetRenderingAttachmentLocations vkCmdSetRenderingAttachmentLocations; + PFN_vkCmdSetRenderingInputAttachmentIndices vkCmdSetRenderingInputAttachmentIndices; + PFN_vkCopyImageToImage vkCopyImageToImage; + PFN_vkCopyImageToMemory vkCopyImageToMemory; + PFN_vkCopyMemoryToImage vkCopyMemoryToImage; + PFN_vkGetDeviceImageSubresourceLayout vkGetDeviceImageSubresourceLayout; + PFN_vkGetImageSubresourceLayout2 vkGetImageSubresourceLayout2; + PFN_vkGetRenderingAreaGranularity vkGetRenderingAreaGranularity; + PFN_vkMapMemory2 vkMapMemory2; + PFN_vkTransitionImageLayout vkTransitionImageLayout; + PFN_vkUnmapMemory2 vkUnmapMemory2; +#endif /* defined(VK_VERSION_1_4) */ +#if defined(VK_AMDX_shader_enqueue) + PFN_vkCmdDispatchGraphAMDX vkCmdDispatchGraphAMDX; + PFN_vkCmdDispatchGraphIndirectAMDX vkCmdDispatchGraphIndirectAMDX; + PFN_vkCmdDispatchGraphIndirectCountAMDX vkCmdDispatchGraphIndirectCountAMDX; + PFN_vkCmdInitializeGraphScratchMemoryAMDX vkCmdInitializeGraphScratchMemoryAMDX; + PFN_vkCreateExecutionGraphPipelinesAMDX vkCreateExecutionGraphPipelinesAMDX; + PFN_vkGetExecutionGraphPipelineNodeIndexAMDX vkGetExecutionGraphPipelineNodeIndexAMDX; + PFN_vkGetExecutionGraphPipelineScratchSizeAMDX vkGetExecutionGraphPipelineScratchSizeAMDX; +#endif /* defined(VK_AMDX_shader_enqueue) */ +#if defined(VK_AMD_anti_lag) + PFN_vkAntiLagUpdateAMD vkAntiLagUpdateAMD; +#endif /* defined(VK_AMD_anti_lag) */ #if defined(VK_AMD_buffer_marker) PFN_vkCmdWriteBufferMarkerAMD vkCmdWriteBufferMarkerAMD; #endif /* defined(VK_AMD_buffer_marker) */ +#if defined(VK_AMD_buffer_marker) && (defined(VK_VERSION_1_3) || defined(VK_KHR_synchronization2)) + PFN_vkCmdWriteBufferMarker2AMD vkCmdWriteBufferMarker2AMD; +#endif /* defined(VK_AMD_buffer_marker) && (defined(VK_VERSION_1_3) || defined(VK_KHR_synchronization2)) */ +#if defined(VK_AMD_display_native_hdr) + PFN_vkSetLocalDimmingAMD vkSetLocalDimmingAMD; +#endif /* defined(VK_AMD_display_native_hdr) */ #if defined(VK_AMD_draw_indirect_count) PFN_vkCmdDrawIndexedIndirectCountAMD vkCmdDrawIndexedIndirectCountAMD; PFN_vkCmdDrawIndirectCountAMD vkCmdDrawIndirectCountAMD; @@ -232,12 +376,18 @@ struct VolkDeviceTable PFN_vkGetAndroidHardwareBufferPropertiesANDROID vkGetAndroidHardwareBufferPropertiesANDROID; PFN_vkGetMemoryAndroidHardwareBufferANDROID vkGetMemoryAndroidHardwareBufferANDROID; #endif /* defined(VK_ANDROID_external_memory_android_hardware_buffer) */ +#if defined(VK_EXT_attachment_feedback_loop_dynamic_state) + PFN_vkCmdSetAttachmentFeedbackLoopEnableEXT vkCmdSetAttachmentFeedbackLoopEnableEXT; +#endif /* defined(VK_EXT_attachment_feedback_loop_dynamic_state) */ #if defined(VK_EXT_buffer_device_address) PFN_vkGetBufferDeviceAddressEXT vkGetBufferDeviceAddressEXT; #endif /* defined(VK_EXT_buffer_device_address) */ #if defined(VK_EXT_calibrated_timestamps) PFN_vkGetCalibratedTimestampsEXT vkGetCalibratedTimestampsEXT; #endif /* defined(VK_EXT_calibrated_timestamps) */ +#if defined(VK_EXT_color_write_enable) + PFN_vkCmdSetColorWriteEnableEXT vkCmdSetColorWriteEnableEXT; +#endif /* defined(VK_EXT_color_write_enable) */ #if defined(VK_EXT_conditional_rendering) PFN_vkCmdBeginConditionalRenderingEXT vkCmdBeginConditionalRenderingEXT; PFN_vkCmdEndConditionalRenderingEXT vkCmdEndConditionalRenderingEXT; @@ -249,19 +399,45 @@ struct VolkDeviceTable PFN_vkDebugMarkerSetObjectNameEXT vkDebugMarkerSetObjectNameEXT; PFN_vkDebugMarkerSetObjectTagEXT vkDebugMarkerSetObjectTagEXT; #endif /* defined(VK_EXT_debug_marker) */ -#if defined(VK_EXT_debug_utils) - PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT; - PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT; - PFN_vkCmdInsertDebugUtilsLabelEXT vkCmdInsertDebugUtilsLabelEXT; - PFN_vkQueueBeginDebugUtilsLabelEXT vkQueueBeginDebugUtilsLabelEXT; - PFN_vkQueueEndDebugUtilsLabelEXT vkQueueEndDebugUtilsLabelEXT; - PFN_vkQueueInsertDebugUtilsLabelEXT vkQueueInsertDebugUtilsLabelEXT; - PFN_vkSetDebugUtilsObjectNameEXT vkSetDebugUtilsObjectNameEXT; - PFN_vkSetDebugUtilsObjectTagEXT vkSetDebugUtilsObjectTagEXT; -#endif /* defined(VK_EXT_debug_utils) */ +#if defined(VK_EXT_depth_bias_control) + PFN_vkCmdSetDepthBias2EXT vkCmdSetDepthBias2EXT; +#endif /* defined(VK_EXT_depth_bias_control) */ +#if defined(VK_EXT_descriptor_buffer) + PFN_vkCmdBindDescriptorBufferEmbeddedSamplersEXT vkCmdBindDescriptorBufferEmbeddedSamplersEXT; + PFN_vkCmdBindDescriptorBuffersEXT vkCmdBindDescriptorBuffersEXT; + PFN_vkCmdSetDescriptorBufferOffsetsEXT vkCmdSetDescriptorBufferOffsetsEXT; + PFN_vkGetBufferOpaqueCaptureDescriptorDataEXT vkGetBufferOpaqueCaptureDescriptorDataEXT; + PFN_vkGetDescriptorEXT vkGetDescriptorEXT; + PFN_vkGetDescriptorSetLayoutBindingOffsetEXT vkGetDescriptorSetLayoutBindingOffsetEXT; + PFN_vkGetDescriptorSetLayoutSizeEXT vkGetDescriptorSetLayoutSizeEXT; + PFN_vkGetImageOpaqueCaptureDescriptorDataEXT vkGetImageOpaqueCaptureDescriptorDataEXT; + PFN_vkGetImageViewOpaqueCaptureDescriptorDataEXT vkGetImageViewOpaqueCaptureDescriptorDataEXT; + PFN_vkGetSamplerOpaqueCaptureDescriptorDataEXT vkGetSamplerOpaqueCaptureDescriptorDataEXT; +#endif /* defined(VK_EXT_descriptor_buffer) */ +#if defined(VK_EXT_descriptor_buffer) && (defined(VK_KHR_acceleration_structure) || defined(VK_NV_ray_tracing)) + PFN_vkGetAccelerationStructureOpaqueCaptureDescriptorDataEXT vkGetAccelerationStructureOpaqueCaptureDescriptorDataEXT; +#endif /* defined(VK_EXT_descriptor_buffer) && (defined(VK_KHR_acceleration_structure) || defined(VK_NV_ray_tracing)) */ +#if defined(VK_EXT_device_fault) + PFN_vkGetDeviceFaultInfoEXT vkGetDeviceFaultInfoEXT; +#endif /* defined(VK_EXT_device_fault) */ +#if defined(VK_EXT_device_generated_commands) + PFN_vkCmdExecuteGeneratedCommandsEXT vkCmdExecuteGeneratedCommandsEXT; + PFN_vkCmdPreprocessGeneratedCommandsEXT vkCmdPreprocessGeneratedCommandsEXT; + PFN_vkCreateIndirectCommandsLayoutEXT vkCreateIndirectCommandsLayoutEXT; + PFN_vkCreateIndirectExecutionSetEXT vkCreateIndirectExecutionSetEXT; + PFN_vkDestroyIndirectCommandsLayoutEXT vkDestroyIndirectCommandsLayoutEXT; + PFN_vkDestroyIndirectExecutionSetEXT vkDestroyIndirectExecutionSetEXT; + PFN_vkGetGeneratedCommandsMemoryRequirementsEXT vkGetGeneratedCommandsMemoryRequirementsEXT; + PFN_vkUpdateIndirectExecutionSetPipelineEXT vkUpdateIndirectExecutionSetPipelineEXT; + PFN_vkUpdateIndirectExecutionSetShaderEXT vkUpdateIndirectExecutionSetShaderEXT; +#endif /* defined(VK_EXT_device_generated_commands) */ #if defined(VK_EXT_discard_rectangles) PFN_vkCmdSetDiscardRectangleEXT vkCmdSetDiscardRectangleEXT; #endif /* defined(VK_EXT_discard_rectangles) */ +#if defined(VK_EXT_discard_rectangles) && VK_EXT_DISCARD_RECTANGLES_SPEC_VERSION >= 2 + PFN_vkCmdSetDiscardRectangleEnableEXT vkCmdSetDiscardRectangleEnableEXT; + PFN_vkCmdSetDiscardRectangleModeEXT vkCmdSetDiscardRectangleModeEXT; +#endif /* defined(VK_EXT_discard_rectangles) && VK_EXT_DISCARD_RECTANGLES_SPEC_VERSION >= 2 */ #if defined(VK_EXT_display_control) PFN_vkDisplayPowerControlEXT vkDisplayPowerControlEXT; PFN_vkGetSwapchainCounterEXT vkGetSwapchainCounterEXT; @@ -271,15 +447,89 @@ struct VolkDeviceTable #if defined(VK_EXT_external_memory_host) PFN_vkGetMemoryHostPointerPropertiesEXT vkGetMemoryHostPointerPropertiesEXT; #endif /* defined(VK_EXT_external_memory_host) */ +#if defined(VK_EXT_full_screen_exclusive) + PFN_vkAcquireFullScreenExclusiveModeEXT vkAcquireFullScreenExclusiveModeEXT; + PFN_vkReleaseFullScreenExclusiveModeEXT vkReleaseFullScreenExclusiveModeEXT; +#endif /* defined(VK_EXT_full_screen_exclusive) */ +#if defined(VK_EXT_full_screen_exclusive) && (defined(VK_KHR_device_group) || defined(VK_VERSION_1_1)) + PFN_vkGetDeviceGroupSurfacePresentModes2EXT vkGetDeviceGroupSurfacePresentModes2EXT; +#endif /* defined(VK_EXT_full_screen_exclusive) && (defined(VK_KHR_device_group) || defined(VK_VERSION_1_1)) */ #if defined(VK_EXT_hdr_metadata) PFN_vkSetHdrMetadataEXT vkSetHdrMetadataEXT; #endif /* defined(VK_EXT_hdr_metadata) */ +#if defined(VK_EXT_host_image_copy) + PFN_vkCopyImageToImageEXT vkCopyImageToImageEXT; + PFN_vkCopyImageToMemoryEXT vkCopyImageToMemoryEXT; + PFN_vkCopyMemoryToImageEXT vkCopyMemoryToImageEXT; + PFN_vkTransitionImageLayoutEXT vkTransitionImageLayoutEXT; +#endif /* defined(VK_EXT_host_image_copy) */ +#if defined(VK_EXT_host_query_reset) + PFN_vkResetQueryPoolEXT vkResetQueryPoolEXT; +#endif /* defined(VK_EXT_host_query_reset) */ #if defined(VK_EXT_image_drm_format_modifier) PFN_vkGetImageDrmFormatModifierPropertiesEXT vkGetImageDrmFormatModifierPropertiesEXT; #endif /* defined(VK_EXT_image_drm_format_modifier) */ +#if defined(VK_EXT_line_rasterization) + PFN_vkCmdSetLineStippleEXT vkCmdSetLineStippleEXT; +#endif /* defined(VK_EXT_line_rasterization) */ +#if defined(VK_EXT_mesh_shader) + PFN_vkCmdDrawMeshTasksEXT vkCmdDrawMeshTasksEXT; + PFN_vkCmdDrawMeshTasksIndirectEXT vkCmdDrawMeshTasksIndirectEXT; +#endif /* defined(VK_EXT_mesh_shader) */ +#if defined(VK_EXT_mesh_shader) && (defined(VK_KHR_draw_indirect_count) || defined(VK_VERSION_1_2)) + PFN_vkCmdDrawMeshTasksIndirectCountEXT vkCmdDrawMeshTasksIndirectCountEXT; +#endif /* defined(VK_EXT_mesh_shader) && (defined(VK_KHR_draw_indirect_count) || defined(VK_VERSION_1_2)) */ +#if defined(VK_EXT_metal_objects) + PFN_vkExportMetalObjectsEXT vkExportMetalObjectsEXT; +#endif /* defined(VK_EXT_metal_objects) */ +#if defined(VK_EXT_multi_draw) + PFN_vkCmdDrawMultiEXT vkCmdDrawMultiEXT; + PFN_vkCmdDrawMultiIndexedEXT vkCmdDrawMultiIndexedEXT; +#endif /* defined(VK_EXT_multi_draw) */ +#if defined(VK_EXT_opacity_micromap) + PFN_vkBuildMicromapsEXT vkBuildMicromapsEXT; + PFN_vkCmdBuildMicromapsEXT vkCmdBuildMicromapsEXT; + PFN_vkCmdCopyMemoryToMicromapEXT vkCmdCopyMemoryToMicromapEXT; + PFN_vkCmdCopyMicromapEXT vkCmdCopyMicromapEXT; + PFN_vkCmdCopyMicromapToMemoryEXT vkCmdCopyMicromapToMemoryEXT; + PFN_vkCmdWriteMicromapsPropertiesEXT vkCmdWriteMicromapsPropertiesEXT; + PFN_vkCopyMemoryToMicromapEXT vkCopyMemoryToMicromapEXT; + PFN_vkCopyMicromapEXT vkCopyMicromapEXT; + PFN_vkCopyMicromapToMemoryEXT vkCopyMicromapToMemoryEXT; + PFN_vkCreateMicromapEXT vkCreateMicromapEXT; + PFN_vkDestroyMicromapEXT vkDestroyMicromapEXT; + PFN_vkGetDeviceMicromapCompatibilityEXT vkGetDeviceMicromapCompatibilityEXT; + PFN_vkGetMicromapBuildSizesEXT vkGetMicromapBuildSizesEXT; + PFN_vkWriteMicromapsPropertiesEXT vkWriteMicromapsPropertiesEXT; +#endif /* defined(VK_EXT_opacity_micromap) */ +#if defined(VK_EXT_pageable_device_local_memory) + PFN_vkSetDeviceMemoryPriorityEXT vkSetDeviceMemoryPriorityEXT; +#endif /* defined(VK_EXT_pageable_device_local_memory) */ +#if defined(VK_EXT_pipeline_properties) + PFN_vkGetPipelinePropertiesEXT vkGetPipelinePropertiesEXT; +#endif /* defined(VK_EXT_pipeline_properties) */ +#if defined(VK_EXT_private_data) + PFN_vkCreatePrivateDataSlotEXT vkCreatePrivateDataSlotEXT; + PFN_vkDestroyPrivateDataSlotEXT vkDestroyPrivateDataSlotEXT; + PFN_vkGetPrivateDataEXT vkGetPrivateDataEXT; + PFN_vkSetPrivateDataEXT vkSetPrivateDataEXT; +#endif /* defined(VK_EXT_private_data) */ #if defined(VK_EXT_sample_locations) PFN_vkCmdSetSampleLocationsEXT vkCmdSetSampleLocationsEXT; #endif /* defined(VK_EXT_sample_locations) */ +#if defined(VK_EXT_shader_module_identifier) + PFN_vkGetShaderModuleCreateInfoIdentifierEXT vkGetShaderModuleCreateInfoIdentifierEXT; + PFN_vkGetShaderModuleIdentifierEXT vkGetShaderModuleIdentifierEXT; +#endif /* defined(VK_EXT_shader_module_identifier) */ +#if defined(VK_EXT_shader_object) + PFN_vkCmdBindShadersEXT vkCmdBindShadersEXT; + PFN_vkCreateShadersEXT vkCreateShadersEXT; + PFN_vkDestroyShaderEXT vkDestroyShaderEXT; + PFN_vkGetShaderBinaryDataEXT vkGetShaderBinaryDataEXT; +#endif /* defined(VK_EXT_shader_object) */ +#if defined(VK_EXT_swapchain_maintenance1) + PFN_vkReleaseSwapchainImagesEXT vkReleaseSwapchainImagesEXT; +#endif /* defined(VK_EXT_swapchain_maintenance1) */ #if defined(VK_EXT_transform_feedback) PFN_vkCmdBeginQueryIndexedEXT vkCmdBeginQueryIndexedEXT; PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT; @@ -294,20 +544,100 @@ struct VolkDeviceTable PFN_vkGetValidationCacheDataEXT vkGetValidationCacheDataEXT; PFN_vkMergeValidationCachesEXT vkMergeValidationCachesEXT; #endif /* defined(VK_EXT_validation_cache) */ +#if defined(VK_FUCHSIA_buffer_collection) + PFN_vkCreateBufferCollectionFUCHSIA vkCreateBufferCollectionFUCHSIA; + PFN_vkDestroyBufferCollectionFUCHSIA vkDestroyBufferCollectionFUCHSIA; + PFN_vkGetBufferCollectionPropertiesFUCHSIA vkGetBufferCollectionPropertiesFUCHSIA; + PFN_vkSetBufferCollectionBufferConstraintsFUCHSIA vkSetBufferCollectionBufferConstraintsFUCHSIA; + PFN_vkSetBufferCollectionImageConstraintsFUCHSIA vkSetBufferCollectionImageConstraintsFUCHSIA; +#endif /* defined(VK_FUCHSIA_buffer_collection) */ +#if defined(VK_FUCHSIA_external_memory) + PFN_vkGetMemoryZirconHandleFUCHSIA vkGetMemoryZirconHandleFUCHSIA; + PFN_vkGetMemoryZirconHandlePropertiesFUCHSIA vkGetMemoryZirconHandlePropertiesFUCHSIA; +#endif /* defined(VK_FUCHSIA_external_memory) */ +#if defined(VK_FUCHSIA_external_semaphore) + PFN_vkGetSemaphoreZirconHandleFUCHSIA vkGetSemaphoreZirconHandleFUCHSIA; + PFN_vkImportSemaphoreZirconHandleFUCHSIA vkImportSemaphoreZirconHandleFUCHSIA; +#endif /* defined(VK_FUCHSIA_external_semaphore) */ #if defined(VK_GOOGLE_display_timing) PFN_vkGetPastPresentationTimingGOOGLE vkGetPastPresentationTimingGOOGLE; PFN_vkGetRefreshCycleDurationGOOGLE vkGetRefreshCycleDurationGOOGLE; #endif /* defined(VK_GOOGLE_display_timing) */ +#if defined(VK_HUAWEI_cluster_culling_shader) + PFN_vkCmdDrawClusterHUAWEI vkCmdDrawClusterHUAWEI; + PFN_vkCmdDrawClusterIndirectHUAWEI vkCmdDrawClusterIndirectHUAWEI; +#endif /* defined(VK_HUAWEI_cluster_culling_shader) */ +#if defined(VK_HUAWEI_invocation_mask) + PFN_vkCmdBindInvocationMaskHUAWEI vkCmdBindInvocationMaskHUAWEI; +#endif /* defined(VK_HUAWEI_invocation_mask) */ +#if defined(VK_HUAWEI_subpass_shading) && VK_HUAWEI_SUBPASS_SHADING_SPEC_VERSION >= 2 + PFN_vkGetDeviceSubpassShadingMaxWorkgroupSizeHUAWEI vkGetDeviceSubpassShadingMaxWorkgroupSizeHUAWEI; +#endif /* defined(VK_HUAWEI_subpass_shading) && VK_HUAWEI_SUBPASS_SHADING_SPEC_VERSION >= 2 */ +#if defined(VK_HUAWEI_subpass_shading) + PFN_vkCmdSubpassShadingHUAWEI vkCmdSubpassShadingHUAWEI; +#endif /* defined(VK_HUAWEI_subpass_shading) */ +#if defined(VK_INTEL_performance_query) + PFN_vkAcquirePerformanceConfigurationINTEL vkAcquirePerformanceConfigurationINTEL; + PFN_vkCmdSetPerformanceMarkerINTEL vkCmdSetPerformanceMarkerINTEL; + PFN_vkCmdSetPerformanceOverrideINTEL vkCmdSetPerformanceOverrideINTEL; + PFN_vkCmdSetPerformanceStreamMarkerINTEL vkCmdSetPerformanceStreamMarkerINTEL; + PFN_vkGetPerformanceParameterINTEL vkGetPerformanceParameterINTEL; + PFN_vkInitializePerformanceApiINTEL vkInitializePerformanceApiINTEL; + PFN_vkQueueSetPerformanceConfigurationINTEL vkQueueSetPerformanceConfigurationINTEL; + PFN_vkReleasePerformanceConfigurationINTEL vkReleasePerformanceConfigurationINTEL; + PFN_vkUninitializePerformanceApiINTEL vkUninitializePerformanceApiINTEL; +#endif /* defined(VK_INTEL_performance_query) */ +#if defined(VK_KHR_acceleration_structure) + PFN_vkBuildAccelerationStructuresKHR vkBuildAccelerationStructuresKHR; + PFN_vkCmdBuildAccelerationStructuresIndirectKHR vkCmdBuildAccelerationStructuresIndirectKHR; + PFN_vkCmdBuildAccelerationStructuresKHR vkCmdBuildAccelerationStructuresKHR; + PFN_vkCmdCopyAccelerationStructureKHR vkCmdCopyAccelerationStructureKHR; + PFN_vkCmdCopyAccelerationStructureToMemoryKHR vkCmdCopyAccelerationStructureToMemoryKHR; + PFN_vkCmdCopyMemoryToAccelerationStructureKHR vkCmdCopyMemoryToAccelerationStructureKHR; + PFN_vkCmdWriteAccelerationStructuresPropertiesKHR vkCmdWriteAccelerationStructuresPropertiesKHR; + PFN_vkCopyAccelerationStructureKHR vkCopyAccelerationStructureKHR; + PFN_vkCopyAccelerationStructureToMemoryKHR vkCopyAccelerationStructureToMemoryKHR; + PFN_vkCopyMemoryToAccelerationStructureKHR vkCopyMemoryToAccelerationStructureKHR; + PFN_vkCreateAccelerationStructureKHR vkCreateAccelerationStructureKHR; + PFN_vkDestroyAccelerationStructureKHR vkDestroyAccelerationStructureKHR; + PFN_vkGetAccelerationStructureBuildSizesKHR vkGetAccelerationStructureBuildSizesKHR; + PFN_vkGetAccelerationStructureDeviceAddressKHR vkGetAccelerationStructureDeviceAddressKHR; + PFN_vkGetDeviceAccelerationStructureCompatibilityKHR vkGetDeviceAccelerationStructureCompatibilityKHR; + PFN_vkWriteAccelerationStructuresPropertiesKHR vkWriteAccelerationStructuresPropertiesKHR; +#endif /* defined(VK_KHR_acceleration_structure) */ #if defined(VK_KHR_bind_memory2) PFN_vkBindBufferMemory2KHR vkBindBufferMemory2KHR; PFN_vkBindImageMemory2KHR vkBindImageMemory2KHR; #endif /* defined(VK_KHR_bind_memory2) */ +#if defined(VK_KHR_buffer_device_address) + PFN_vkGetBufferDeviceAddressKHR vkGetBufferDeviceAddressKHR; + PFN_vkGetBufferOpaqueCaptureAddressKHR vkGetBufferOpaqueCaptureAddressKHR; + PFN_vkGetDeviceMemoryOpaqueCaptureAddressKHR vkGetDeviceMemoryOpaqueCaptureAddressKHR; +#endif /* defined(VK_KHR_buffer_device_address) */ +#if defined(VK_KHR_calibrated_timestamps) + PFN_vkGetCalibratedTimestampsKHR vkGetCalibratedTimestampsKHR; +#endif /* defined(VK_KHR_calibrated_timestamps) */ +#if defined(VK_KHR_copy_commands2) + PFN_vkCmdBlitImage2KHR vkCmdBlitImage2KHR; + PFN_vkCmdCopyBuffer2KHR vkCmdCopyBuffer2KHR; + PFN_vkCmdCopyBufferToImage2KHR vkCmdCopyBufferToImage2KHR; + PFN_vkCmdCopyImage2KHR vkCmdCopyImage2KHR; + PFN_vkCmdCopyImageToBuffer2KHR vkCmdCopyImageToBuffer2KHR; + PFN_vkCmdResolveImage2KHR vkCmdResolveImage2KHR; +#endif /* defined(VK_KHR_copy_commands2) */ #if defined(VK_KHR_create_renderpass2) PFN_vkCmdBeginRenderPass2KHR vkCmdBeginRenderPass2KHR; PFN_vkCmdEndRenderPass2KHR vkCmdEndRenderPass2KHR; PFN_vkCmdNextSubpass2KHR vkCmdNextSubpass2KHR; PFN_vkCreateRenderPass2KHR vkCreateRenderPass2KHR; #endif /* defined(VK_KHR_create_renderpass2) */ +#if defined(VK_KHR_deferred_host_operations) + PFN_vkCreateDeferredOperationKHR vkCreateDeferredOperationKHR; + PFN_vkDeferredOperationJoinKHR vkDeferredOperationJoinKHR; + PFN_vkDestroyDeferredOperationKHR vkDestroyDeferredOperationKHR; + PFN_vkGetDeferredOperationMaxConcurrencyKHR vkGetDeferredOperationMaxConcurrencyKHR; + PFN_vkGetDeferredOperationResultKHR vkGetDeferredOperationResultKHR; +#endif /* defined(VK_KHR_deferred_host_operations) */ #if defined(VK_KHR_descriptor_update_template) PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR; PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR; @@ -325,6 +655,14 @@ struct VolkDeviceTable PFN_vkCmdDrawIndexedIndirectCountKHR vkCmdDrawIndexedIndirectCountKHR; PFN_vkCmdDrawIndirectCountKHR vkCmdDrawIndirectCountKHR; #endif /* defined(VK_KHR_draw_indirect_count) */ +#if defined(VK_KHR_dynamic_rendering) + PFN_vkCmdBeginRenderingKHR vkCmdBeginRenderingKHR; + PFN_vkCmdEndRenderingKHR vkCmdEndRenderingKHR; +#endif /* defined(VK_KHR_dynamic_rendering) */ +#if defined(VK_KHR_dynamic_rendering_local_read) + PFN_vkCmdSetRenderingAttachmentLocationsKHR vkCmdSetRenderingAttachmentLocationsKHR; + PFN_vkCmdSetRenderingInputAttachmentIndicesKHR vkCmdSetRenderingInputAttachmentIndicesKHR; +#endif /* defined(VK_KHR_dynamic_rendering_local_read) */ #if defined(VK_KHR_external_fence_fd) PFN_vkGetFenceFdKHR vkGetFenceFdKHR; PFN_vkImportFenceFdKHR vkImportFenceFdKHR; @@ -349,20 +687,84 @@ struct VolkDeviceTable PFN_vkGetSemaphoreWin32HandleKHR vkGetSemaphoreWin32HandleKHR; PFN_vkImportSemaphoreWin32HandleKHR vkImportSemaphoreWin32HandleKHR; #endif /* defined(VK_KHR_external_semaphore_win32) */ +#if defined(VK_KHR_fragment_shading_rate) + PFN_vkCmdSetFragmentShadingRateKHR vkCmdSetFragmentShadingRateKHR; +#endif /* defined(VK_KHR_fragment_shading_rate) */ #if defined(VK_KHR_get_memory_requirements2) PFN_vkGetBufferMemoryRequirements2KHR vkGetBufferMemoryRequirements2KHR; PFN_vkGetImageMemoryRequirements2KHR vkGetImageMemoryRequirements2KHR; PFN_vkGetImageSparseMemoryRequirements2KHR vkGetImageSparseMemoryRequirements2KHR; #endif /* defined(VK_KHR_get_memory_requirements2) */ +#if defined(VK_KHR_line_rasterization) + PFN_vkCmdSetLineStippleKHR vkCmdSetLineStippleKHR; +#endif /* defined(VK_KHR_line_rasterization) */ #if defined(VK_KHR_maintenance1) PFN_vkTrimCommandPoolKHR vkTrimCommandPoolKHR; #endif /* defined(VK_KHR_maintenance1) */ #if defined(VK_KHR_maintenance3) PFN_vkGetDescriptorSetLayoutSupportKHR vkGetDescriptorSetLayoutSupportKHR; #endif /* defined(VK_KHR_maintenance3) */ +#if defined(VK_KHR_maintenance4) + PFN_vkGetDeviceBufferMemoryRequirementsKHR vkGetDeviceBufferMemoryRequirementsKHR; + PFN_vkGetDeviceImageMemoryRequirementsKHR vkGetDeviceImageMemoryRequirementsKHR; + PFN_vkGetDeviceImageSparseMemoryRequirementsKHR vkGetDeviceImageSparseMemoryRequirementsKHR; +#endif /* defined(VK_KHR_maintenance4) */ +#if defined(VK_KHR_maintenance5) + PFN_vkCmdBindIndexBuffer2KHR vkCmdBindIndexBuffer2KHR; + PFN_vkGetDeviceImageSubresourceLayoutKHR vkGetDeviceImageSubresourceLayoutKHR; + PFN_vkGetImageSubresourceLayout2KHR vkGetImageSubresourceLayout2KHR; + PFN_vkGetRenderingAreaGranularityKHR vkGetRenderingAreaGranularityKHR; +#endif /* defined(VK_KHR_maintenance5) */ +#if defined(VK_KHR_maintenance6) + PFN_vkCmdBindDescriptorSets2KHR vkCmdBindDescriptorSets2KHR; + PFN_vkCmdPushConstants2KHR vkCmdPushConstants2KHR; +#endif /* defined(VK_KHR_maintenance6) */ +#if defined(VK_KHR_maintenance6) && defined(VK_KHR_push_descriptor) + PFN_vkCmdPushDescriptorSet2KHR vkCmdPushDescriptorSet2KHR; + PFN_vkCmdPushDescriptorSetWithTemplate2KHR vkCmdPushDescriptorSetWithTemplate2KHR; +#endif /* defined(VK_KHR_maintenance6) && defined(VK_KHR_push_descriptor) */ +#if defined(VK_KHR_maintenance6) && defined(VK_EXT_descriptor_buffer) + PFN_vkCmdBindDescriptorBufferEmbeddedSamplers2EXT vkCmdBindDescriptorBufferEmbeddedSamplers2EXT; + PFN_vkCmdSetDescriptorBufferOffsets2EXT vkCmdSetDescriptorBufferOffsets2EXT; +#endif /* defined(VK_KHR_maintenance6) && defined(VK_EXT_descriptor_buffer) */ +#if defined(VK_KHR_map_memory2) + PFN_vkMapMemory2KHR vkMapMemory2KHR; + PFN_vkUnmapMemory2KHR vkUnmapMemory2KHR; +#endif /* defined(VK_KHR_map_memory2) */ +#if defined(VK_KHR_performance_query) + PFN_vkAcquireProfilingLockKHR vkAcquireProfilingLockKHR; + PFN_vkReleaseProfilingLockKHR vkReleaseProfilingLockKHR; +#endif /* defined(VK_KHR_performance_query) */ +#if defined(VK_KHR_pipeline_binary) + PFN_vkCreatePipelineBinariesKHR vkCreatePipelineBinariesKHR; + PFN_vkDestroyPipelineBinaryKHR vkDestroyPipelineBinaryKHR; + PFN_vkGetPipelineBinaryDataKHR vkGetPipelineBinaryDataKHR; + PFN_vkGetPipelineKeyKHR vkGetPipelineKeyKHR; + PFN_vkReleaseCapturedPipelineDataKHR vkReleaseCapturedPipelineDataKHR; +#endif /* defined(VK_KHR_pipeline_binary) */ +#if defined(VK_KHR_pipeline_executable_properties) + PFN_vkGetPipelineExecutableInternalRepresentationsKHR vkGetPipelineExecutableInternalRepresentationsKHR; + PFN_vkGetPipelineExecutablePropertiesKHR vkGetPipelineExecutablePropertiesKHR; + PFN_vkGetPipelineExecutableStatisticsKHR vkGetPipelineExecutableStatisticsKHR; +#endif /* defined(VK_KHR_pipeline_executable_properties) */ +#if defined(VK_KHR_present_wait) + PFN_vkWaitForPresentKHR vkWaitForPresentKHR; +#endif /* defined(VK_KHR_present_wait) */ #if defined(VK_KHR_push_descriptor) PFN_vkCmdPushDescriptorSetKHR vkCmdPushDescriptorSetKHR; #endif /* defined(VK_KHR_push_descriptor) */ +#if defined(VK_KHR_ray_tracing_maintenance1) && defined(VK_KHR_ray_tracing_pipeline) + PFN_vkCmdTraceRaysIndirect2KHR vkCmdTraceRaysIndirect2KHR; +#endif /* defined(VK_KHR_ray_tracing_maintenance1) && defined(VK_KHR_ray_tracing_pipeline) */ +#if defined(VK_KHR_ray_tracing_pipeline) + PFN_vkCmdSetRayTracingPipelineStackSizeKHR vkCmdSetRayTracingPipelineStackSizeKHR; + PFN_vkCmdTraceRaysIndirectKHR vkCmdTraceRaysIndirectKHR; + PFN_vkCmdTraceRaysKHR vkCmdTraceRaysKHR; + PFN_vkCreateRayTracingPipelinesKHR vkCreateRayTracingPipelinesKHR; + PFN_vkGetRayTracingCaptureReplayShaderGroupHandlesKHR vkGetRayTracingCaptureReplayShaderGroupHandlesKHR; + PFN_vkGetRayTracingShaderGroupHandlesKHR vkGetRayTracingShaderGroupHandlesKHR; + PFN_vkGetRayTracingShaderGroupStackSizeKHR vkGetRayTracingShaderGroupStackSizeKHR; +#endif /* defined(VK_KHR_ray_tracing_pipeline) */ #if defined(VK_KHR_sampler_ycbcr_conversion) PFN_vkCreateSamplerYcbcrConversionKHR vkCreateSamplerYcbcrConversionKHR; PFN_vkDestroySamplerYcbcrConversionKHR vkDestroySamplerYcbcrConversionKHR; @@ -377,31 +779,122 @@ struct VolkDeviceTable PFN_vkGetSwapchainImagesKHR vkGetSwapchainImagesKHR; PFN_vkQueuePresentKHR vkQueuePresentKHR; #endif /* defined(VK_KHR_swapchain) */ -#if defined(VK_NVX_device_generated_commands) - PFN_vkCmdProcessCommandsNVX vkCmdProcessCommandsNVX; - PFN_vkCmdReserveSpaceForCommandsNVX vkCmdReserveSpaceForCommandsNVX; - PFN_vkCreateIndirectCommandsLayoutNVX vkCreateIndirectCommandsLayoutNVX; - PFN_vkCreateObjectTableNVX vkCreateObjectTableNVX; - PFN_vkDestroyIndirectCommandsLayoutNVX vkDestroyIndirectCommandsLayoutNVX; - PFN_vkDestroyObjectTableNVX vkDestroyObjectTableNVX; - PFN_vkRegisterObjectsNVX vkRegisterObjectsNVX; - PFN_vkUnregisterObjectsNVX vkUnregisterObjectsNVX; -#endif /* defined(VK_NVX_device_generated_commands) */ +#if defined(VK_KHR_synchronization2) + PFN_vkCmdPipelineBarrier2KHR vkCmdPipelineBarrier2KHR; + PFN_vkCmdResetEvent2KHR vkCmdResetEvent2KHR; + PFN_vkCmdSetEvent2KHR vkCmdSetEvent2KHR; + PFN_vkCmdWaitEvents2KHR vkCmdWaitEvents2KHR; + PFN_vkCmdWriteTimestamp2KHR vkCmdWriteTimestamp2KHR; + PFN_vkQueueSubmit2KHR vkQueueSubmit2KHR; +#endif /* defined(VK_KHR_synchronization2) */ +#if defined(VK_KHR_timeline_semaphore) + PFN_vkGetSemaphoreCounterValueKHR vkGetSemaphoreCounterValueKHR; + PFN_vkSignalSemaphoreKHR vkSignalSemaphoreKHR; + PFN_vkWaitSemaphoresKHR vkWaitSemaphoresKHR; +#endif /* defined(VK_KHR_timeline_semaphore) */ +#if defined(VK_KHR_video_decode_queue) + PFN_vkCmdDecodeVideoKHR vkCmdDecodeVideoKHR; +#endif /* defined(VK_KHR_video_decode_queue) */ +#if defined(VK_KHR_video_encode_queue) + PFN_vkCmdEncodeVideoKHR vkCmdEncodeVideoKHR; + PFN_vkGetEncodedVideoSessionParametersKHR vkGetEncodedVideoSessionParametersKHR; +#endif /* defined(VK_KHR_video_encode_queue) */ +#if defined(VK_KHR_video_queue) + PFN_vkBindVideoSessionMemoryKHR vkBindVideoSessionMemoryKHR; + PFN_vkCmdBeginVideoCodingKHR vkCmdBeginVideoCodingKHR; + PFN_vkCmdControlVideoCodingKHR vkCmdControlVideoCodingKHR; + PFN_vkCmdEndVideoCodingKHR vkCmdEndVideoCodingKHR; + PFN_vkCreateVideoSessionKHR vkCreateVideoSessionKHR; + PFN_vkCreateVideoSessionParametersKHR vkCreateVideoSessionParametersKHR; + PFN_vkDestroyVideoSessionKHR vkDestroyVideoSessionKHR; + PFN_vkDestroyVideoSessionParametersKHR vkDestroyVideoSessionParametersKHR; + PFN_vkGetVideoSessionMemoryRequirementsKHR vkGetVideoSessionMemoryRequirementsKHR; + PFN_vkUpdateVideoSessionParametersKHR vkUpdateVideoSessionParametersKHR; +#endif /* defined(VK_KHR_video_queue) */ +#if defined(VK_NVX_binary_import) + PFN_vkCmdCuLaunchKernelNVX vkCmdCuLaunchKernelNVX; + PFN_vkCreateCuFunctionNVX vkCreateCuFunctionNVX; + PFN_vkCreateCuModuleNVX vkCreateCuModuleNVX; + PFN_vkDestroyCuFunctionNVX vkDestroyCuFunctionNVX; + PFN_vkDestroyCuModuleNVX vkDestroyCuModuleNVX; +#endif /* defined(VK_NVX_binary_import) */ +#if defined(VK_NVX_image_view_handle) + PFN_vkGetImageViewHandleNVX vkGetImageViewHandleNVX; +#endif /* defined(VK_NVX_image_view_handle) */ +#if defined(VK_NVX_image_view_handle) && VK_NVX_IMAGE_VIEW_HANDLE_SPEC_VERSION >= 3 + PFN_vkGetImageViewHandle64NVX vkGetImageViewHandle64NVX; +#endif /* defined(VK_NVX_image_view_handle) && VK_NVX_IMAGE_VIEW_HANDLE_SPEC_VERSION >= 3 */ +#if defined(VK_NVX_image_view_handle) && VK_NVX_IMAGE_VIEW_HANDLE_SPEC_VERSION >= 2 + PFN_vkGetImageViewAddressNVX vkGetImageViewAddressNVX; +#endif /* defined(VK_NVX_image_view_handle) && VK_NVX_IMAGE_VIEW_HANDLE_SPEC_VERSION >= 2 */ #if defined(VK_NV_clip_space_w_scaling) PFN_vkCmdSetViewportWScalingNV vkCmdSetViewportWScalingNV; #endif /* defined(VK_NV_clip_space_w_scaling) */ +#if defined(VK_NV_copy_memory_indirect) + PFN_vkCmdCopyMemoryIndirectNV vkCmdCopyMemoryIndirectNV; + PFN_vkCmdCopyMemoryToImageIndirectNV vkCmdCopyMemoryToImageIndirectNV; +#endif /* defined(VK_NV_copy_memory_indirect) */ +#if defined(VK_NV_cuda_kernel_launch) + PFN_vkCmdCudaLaunchKernelNV vkCmdCudaLaunchKernelNV; + PFN_vkCreateCudaFunctionNV vkCreateCudaFunctionNV; + PFN_vkCreateCudaModuleNV vkCreateCudaModuleNV; + PFN_vkDestroyCudaFunctionNV vkDestroyCudaFunctionNV; + PFN_vkDestroyCudaModuleNV vkDestroyCudaModuleNV; + PFN_vkGetCudaModuleCacheNV vkGetCudaModuleCacheNV; +#endif /* defined(VK_NV_cuda_kernel_launch) */ #if defined(VK_NV_device_diagnostic_checkpoints) PFN_vkCmdSetCheckpointNV vkCmdSetCheckpointNV; PFN_vkGetQueueCheckpointDataNV vkGetQueueCheckpointDataNV; #endif /* defined(VK_NV_device_diagnostic_checkpoints) */ +#if defined(VK_NV_device_diagnostic_checkpoints) && (defined(VK_VERSION_1_3) || defined(VK_KHR_synchronization2)) + PFN_vkGetQueueCheckpointData2NV vkGetQueueCheckpointData2NV; +#endif /* defined(VK_NV_device_diagnostic_checkpoints) && (defined(VK_VERSION_1_3) || defined(VK_KHR_synchronization2)) */ +#if defined(VK_NV_device_generated_commands) + PFN_vkCmdBindPipelineShaderGroupNV vkCmdBindPipelineShaderGroupNV; + PFN_vkCmdExecuteGeneratedCommandsNV vkCmdExecuteGeneratedCommandsNV; + PFN_vkCmdPreprocessGeneratedCommandsNV vkCmdPreprocessGeneratedCommandsNV; + PFN_vkCreateIndirectCommandsLayoutNV vkCreateIndirectCommandsLayoutNV; + PFN_vkDestroyIndirectCommandsLayoutNV vkDestroyIndirectCommandsLayoutNV; + PFN_vkGetGeneratedCommandsMemoryRequirementsNV vkGetGeneratedCommandsMemoryRequirementsNV; +#endif /* defined(VK_NV_device_generated_commands) */ +#if defined(VK_NV_device_generated_commands_compute) + PFN_vkCmdUpdatePipelineIndirectBufferNV vkCmdUpdatePipelineIndirectBufferNV; + PFN_vkGetPipelineIndirectDeviceAddressNV vkGetPipelineIndirectDeviceAddressNV; + PFN_vkGetPipelineIndirectMemoryRequirementsNV vkGetPipelineIndirectMemoryRequirementsNV; +#endif /* defined(VK_NV_device_generated_commands_compute) */ +#if defined(VK_NV_external_memory_rdma) + PFN_vkGetMemoryRemoteAddressNV vkGetMemoryRemoteAddressNV; +#endif /* defined(VK_NV_external_memory_rdma) */ #if defined(VK_NV_external_memory_win32) PFN_vkGetMemoryWin32HandleNV vkGetMemoryWin32HandleNV; #endif /* defined(VK_NV_external_memory_win32) */ +#if defined(VK_NV_fragment_shading_rate_enums) + PFN_vkCmdSetFragmentShadingRateEnumNV vkCmdSetFragmentShadingRateEnumNV; +#endif /* defined(VK_NV_fragment_shading_rate_enums) */ +#if defined(VK_NV_low_latency2) + PFN_vkGetLatencyTimingsNV vkGetLatencyTimingsNV; + PFN_vkLatencySleepNV vkLatencySleepNV; + PFN_vkQueueNotifyOutOfBandNV vkQueueNotifyOutOfBandNV; + PFN_vkSetLatencyMarkerNV vkSetLatencyMarkerNV; + PFN_vkSetLatencySleepModeNV vkSetLatencySleepModeNV; +#endif /* defined(VK_NV_low_latency2) */ +#if defined(VK_NV_memory_decompression) + PFN_vkCmdDecompressMemoryIndirectCountNV vkCmdDecompressMemoryIndirectCountNV; + PFN_vkCmdDecompressMemoryNV vkCmdDecompressMemoryNV; +#endif /* defined(VK_NV_memory_decompression) */ #if defined(VK_NV_mesh_shader) - PFN_vkCmdDrawMeshTasksIndirectCountNV vkCmdDrawMeshTasksIndirectCountNV; PFN_vkCmdDrawMeshTasksIndirectNV vkCmdDrawMeshTasksIndirectNV; PFN_vkCmdDrawMeshTasksNV vkCmdDrawMeshTasksNV; #endif /* defined(VK_NV_mesh_shader) */ +#if defined(VK_NV_mesh_shader) && (defined(VK_KHR_draw_indirect_count) || defined(VK_VERSION_1_2)) + PFN_vkCmdDrawMeshTasksIndirectCountNV vkCmdDrawMeshTasksIndirectCountNV; +#endif /* defined(VK_NV_mesh_shader) && (defined(VK_KHR_draw_indirect_count) || defined(VK_VERSION_1_2)) */ +#if defined(VK_NV_optical_flow) + PFN_vkBindOpticalFlowSessionImageNV vkBindOpticalFlowSessionImageNV; + PFN_vkCmdOpticalFlowExecuteNV vkCmdOpticalFlowExecuteNV; + PFN_vkCreateOpticalFlowSessionNV vkCreateOpticalFlowSessionNV; + PFN_vkDestroyOpticalFlowSessionNV vkDestroyOpticalFlowSessionNV; +#endif /* defined(VK_NV_optical_flow) */ #if defined(VK_NV_ray_tracing) PFN_vkBindAccelerationStructureMemoryNV vkBindAccelerationStructureMemoryNV; PFN_vkCmdBuildAccelerationStructureNV vkCmdBuildAccelerationStructureNV; @@ -416,6 +909,9 @@ struct VolkDeviceTable PFN_vkGetAccelerationStructureMemoryRequirementsNV vkGetAccelerationStructureMemoryRequirementsNV; PFN_vkGetRayTracingShaderGroupHandlesNV vkGetRayTracingShaderGroupHandlesNV; #endif /* defined(VK_NV_ray_tracing) */ +#if defined(VK_NV_scissor_exclusive) && VK_NV_SCISSOR_EXCLUSIVE_SPEC_VERSION >= 2 + PFN_vkCmdSetExclusiveScissorEnableNV vkCmdSetExclusiveScissorEnableNV; +#endif /* defined(VK_NV_scissor_exclusive) && VK_NV_SCISSOR_EXCLUSIVE_SPEC_VERSION >= 2 */ #if defined(VK_NV_scissor_exclusive) PFN_vkCmdSetExclusiveScissorNV vkCmdSetExclusiveScissorNV; #endif /* defined(VK_NV_scissor_exclusive) */ @@ -424,9 +920,115 @@ struct VolkDeviceTable PFN_vkCmdSetCoarseSampleOrderNV vkCmdSetCoarseSampleOrderNV; PFN_vkCmdSetViewportShadingRatePaletteNV vkCmdSetViewportShadingRatePaletteNV; #endif /* defined(VK_NV_shading_rate_image) */ -#if (defined(VK_KHR_descriptor_update_template) && defined(VK_KHR_push_descriptor)) || (defined(VK_KHR_push_descriptor) && defined(VK_VERSION_1_1)) +#if defined(VK_QCOM_tile_properties) + PFN_vkGetDynamicRenderingTilePropertiesQCOM vkGetDynamicRenderingTilePropertiesQCOM; + PFN_vkGetFramebufferTilePropertiesQCOM vkGetFramebufferTilePropertiesQCOM; +#endif /* defined(VK_QCOM_tile_properties) */ +#if defined(VK_QNX_external_memory_screen_buffer) + PFN_vkGetScreenBufferPropertiesQNX vkGetScreenBufferPropertiesQNX; +#endif /* defined(VK_QNX_external_memory_screen_buffer) */ +#if defined(VK_VALVE_descriptor_set_host_mapping) + PFN_vkGetDescriptorSetHostMappingVALVE vkGetDescriptorSetHostMappingVALVE; + PFN_vkGetDescriptorSetLayoutHostMappingInfoVALVE vkGetDescriptorSetLayoutHostMappingInfoVALVE; +#endif /* defined(VK_VALVE_descriptor_set_host_mapping) */ +#if (defined(VK_EXT_depth_clamp_control)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_depth_clamp_control)) + PFN_vkCmdSetDepthClampRangeEXT vkCmdSetDepthClampRangeEXT; +#endif /* (defined(VK_EXT_depth_clamp_control)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_depth_clamp_control)) */ +#if (defined(VK_EXT_extended_dynamic_state)) || (defined(VK_EXT_shader_object)) + PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT; + PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT; + PFN_vkCmdSetDepthBoundsTestEnableEXT vkCmdSetDepthBoundsTestEnableEXT; + PFN_vkCmdSetDepthCompareOpEXT vkCmdSetDepthCompareOpEXT; + PFN_vkCmdSetDepthTestEnableEXT vkCmdSetDepthTestEnableEXT; + PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT; + PFN_vkCmdSetFrontFaceEXT vkCmdSetFrontFaceEXT; + PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT; + PFN_vkCmdSetScissorWithCountEXT vkCmdSetScissorWithCountEXT; + PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT; + PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT; + PFN_vkCmdSetViewportWithCountEXT vkCmdSetViewportWithCountEXT; +#endif /* (defined(VK_EXT_extended_dynamic_state)) || (defined(VK_EXT_shader_object)) */ +#if (defined(VK_EXT_extended_dynamic_state2)) || (defined(VK_EXT_shader_object)) + PFN_vkCmdSetDepthBiasEnableEXT vkCmdSetDepthBiasEnableEXT; + PFN_vkCmdSetLogicOpEXT vkCmdSetLogicOpEXT; + PFN_vkCmdSetPatchControlPointsEXT vkCmdSetPatchControlPointsEXT; + PFN_vkCmdSetPrimitiveRestartEnableEXT vkCmdSetPrimitiveRestartEnableEXT; + PFN_vkCmdSetRasterizerDiscardEnableEXT vkCmdSetRasterizerDiscardEnableEXT; +#endif /* (defined(VK_EXT_extended_dynamic_state2)) || (defined(VK_EXT_shader_object)) */ +#if (defined(VK_EXT_extended_dynamic_state3)) || (defined(VK_EXT_shader_object)) + PFN_vkCmdSetAlphaToCoverageEnableEXT vkCmdSetAlphaToCoverageEnableEXT; + PFN_vkCmdSetAlphaToOneEnableEXT vkCmdSetAlphaToOneEnableEXT; + PFN_vkCmdSetColorBlendEnableEXT vkCmdSetColorBlendEnableEXT; + PFN_vkCmdSetColorBlendEquationEXT vkCmdSetColorBlendEquationEXT; + PFN_vkCmdSetColorWriteMaskEXT vkCmdSetColorWriteMaskEXT; + PFN_vkCmdSetDepthClampEnableEXT vkCmdSetDepthClampEnableEXT; + PFN_vkCmdSetLogicOpEnableEXT vkCmdSetLogicOpEnableEXT; + PFN_vkCmdSetPolygonModeEXT vkCmdSetPolygonModeEXT; + PFN_vkCmdSetRasterizationSamplesEXT vkCmdSetRasterizationSamplesEXT; + PFN_vkCmdSetSampleMaskEXT vkCmdSetSampleMaskEXT; +#endif /* (defined(VK_EXT_extended_dynamic_state3)) || (defined(VK_EXT_shader_object)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && (defined(VK_KHR_maintenance2) || defined(VK_VERSION_1_1))) || (defined(VK_EXT_shader_object)) + PFN_vkCmdSetTessellationDomainOriginEXT vkCmdSetTessellationDomainOriginEXT; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && (defined(VK_KHR_maintenance2) || defined(VK_VERSION_1_1))) || (defined(VK_EXT_shader_object)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_transform_feedback)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_transform_feedback)) + PFN_vkCmdSetRasterizationStreamEXT vkCmdSetRasterizationStreamEXT; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_transform_feedback)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_transform_feedback)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_conservative_rasterization)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_conservative_rasterization)) + PFN_vkCmdSetConservativeRasterizationModeEXT vkCmdSetConservativeRasterizationModeEXT; + PFN_vkCmdSetExtraPrimitiveOverestimationSizeEXT vkCmdSetExtraPrimitiveOverestimationSizeEXT; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_conservative_rasterization)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_conservative_rasterization)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_depth_clip_enable)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_depth_clip_enable)) + PFN_vkCmdSetDepthClipEnableEXT vkCmdSetDepthClipEnableEXT; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_depth_clip_enable)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_depth_clip_enable)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_sample_locations)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_sample_locations)) + PFN_vkCmdSetSampleLocationsEnableEXT vkCmdSetSampleLocationsEnableEXT; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_sample_locations)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_sample_locations)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_blend_operation_advanced)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_blend_operation_advanced)) + PFN_vkCmdSetColorBlendAdvancedEXT vkCmdSetColorBlendAdvancedEXT; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_blend_operation_advanced)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_blend_operation_advanced)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_provoking_vertex)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_provoking_vertex)) + PFN_vkCmdSetProvokingVertexModeEXT vkCmdSetProvokingVertexModeEXT; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_provoking_vertex)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_provoking_vertex)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_line_rasterization)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_line_rasterization)) + PFN_vkCmdSetLineRasterizationModeEXT vkCmdSetLineRasterizationModeEXT; + PFN_vkCmdSetLineStippleEnableEXT vkCmdSetLineStippleEnableEXT; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_line_rasterization)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_line_rasterization)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_depth_clip_control)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_depth_clip_control)) + PFN_vkCmdSetDepthClipNegativeOneToOneEXT vkCmdSetDepthClipNegativeOneToOneEXT; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_depth_clip_control)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_depth_clip_control)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_clip_space_w_scaling)) || (defined(VK_EXT_shader_object) && defined(VK_NV_clip_space_w_scaling)) + PFN_vkCmdSetViewportWScalingEnableNV vkCmdSetViewportWScalingEnableNV; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_clip_space_w_scaling)) || (defined(VK_EXT_shader_object) && defined(VK_NV_clip_space_w_scaling)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_viewport_swizzle)) || (defined(VK_EXT_shader_object) && defined(VK_NV_viewport_swizzle)) + PFN_vkCmdSetViewportSwizzleNV vkCmdSetViewportSwizzleNV; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_viewport_swizzle)) || (defined(VK_EXT_shader_object) && defined(VK_NV_viewport_swizzle)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_fragment_coverage_to_color)) || (defined(VK_EXT_shader_object) && defined(VK_NV_fragment_coverage_to_color)) + PFN_vkCmdSetCoverageToColorEnableNV vkCmdSetCoverageToColorEnableNV; + PFN_vkCmdSetCoverageToColorLocationNV vkCmdSetCoverageToColorLocationNV; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_fragment_coverage_to_color)) || (defined(VK_EXT_shader_object) && defined(VK_NV_fragment_coverage_to_color)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_framebuffer_mixed_samples)) || (defined(VK_EXT_shader_object) && defined(VK_NV_framebuffer_mixed_samples)) + PFN_vkCmdSetCoverageModulationModeNV vkCmdSetCoverageModulationModeNV; + PFN_vkCmdSetCoverageModulationTableEnableNV vkCmdSetCoverageModulationTableEnableNV; + PFN_vkCmdSetCoverageModulationTableNV vkCmdSetCoverageModulationTableNV; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_framebuffer_mixed_samples)) || (defined(VK_EXT_shader_object) && defined(VK_NV_framebuffer_mixed_samples)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_shading_rate_image)) || (defined(VK_EXT_shader_object) && defined(VK_NV_shading_rate_image)) + PFN_vkCmdSetShadingRateImageEnableNV vkCmdSetShadingRateImageEnableNV; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_shading_rate_image)) || (defined(VK_EXT_shader_object) && defined(VK_NV_shading_rate_image)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_representative_fragment_test)) || (defined(VK_EXT_shader_object) && defined(VK_NV_representative_fragment_test)) + PFN_vkCmdSetRepresentativeFragmentTestEnableNV vkCmdSetRepresentativeFragmentTestEnableNV; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_representative_fragment_test)) || (defined(VK_EXT_shader_object) && defined(VK_NV_representative_fragment_test)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_coverage_reduction_mode)) || (defined(VK_EXT_shader_object) && defined(VK_NV_coverage_reduction_mode)) + PFN_vkCmdSetCoverageReductionModeNV vkCmdSetCoverageReductionModeNV; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_coverage_reduction_mode)) || (defined(VK_EXT_shader_object) && defined(VK_NV_coverage_reduction_mode)) */ +#if (defined(VK_EXT_host_image_copy)) || (defined(VK_EXT_image_compression_control)) + PFN_vkGetImageSubresourceLayout2EXT vkGetImageSubresourceLayout2EXT; +#endif /* (defined(VK_EXT_host_image_copy)) || (defined(VK_EXT_image_compression_control)) */ +#if (defined(VK_EXT_shader_object)) || (defined(VK_EXT_vertex_input_dynamic_state)) + PFN_vkCmdSetVertexInputEXT vkCmdSetVertexInputEXT; +#endif /* (defined(VK_EXT_shader_object)) || (defined(VK_EXT_vertex_input_dynamic_state)) */ +#if (defined(VK_KHR_descriptor_update_template) && defined(VK_KHR_push_descriptor)) || (defined(VK_KHR_push_descriptor) && (defined(VK_VERSION_1_1) || defined(VK_KHR_descriptor_update_template))) PFN_vkCmdPushDescriptorSetWithTemplateKHR vkCmdPushDescriptorSetWithTemplateKHR; -#endif /* (defined(VK_KHR_descriptor_update_template) && defined(VK_KHR_push_descriptor)) || (defined(VK_KHR_push_descriptor) && defined(VK_VERSION_1_1)) */ +#endif /* (defined(VK_KHR_descriptor_update_template) && defined(VK_KHR_push_descriptor)) || (defined(VK_KHR_push_descriptor) && (defined(VK_VERSION_1_1) || defined(VK_KHR_descriptor_update_template))) */ #if (defined(VK_KHR_device_group) && defined(VK_KHR_surface)) || (defined(VK_KHR_swapchain) && defined(VK_VERSION_1_1)) PFN_vkGetDeviceGroupPresentCapabilitiesKHR vkGetDeviceGroupPresentCapabilitiesKHR; PFN_vkGetDeviceGroupSurfacePresentModesKHR vkGetDeviceGroupSurfacePresentModesKHR; @@ -607,9 +1209,102 @@ extern PFN_vkGetPhysicalDeviceSparseImageFormatProperties2 vkGetPhysicalDeviceSp extern PFN_vkTrimCommandPool vkTrimCommandPool; extern PFN_vkUpdateDescriptorSetWithTemplate vkUpdateDescriptorSetWithTemplate; #endif /* defined(VK_VERSION_1_1) */ +#if defined(VK_VERSION_1_2) +extern PFN_vkCmdBeginRenderPass2 vkCmdBeginRenderPass2; +extern PFN_vkCmdDrawIndexedIndirectCount vkCmdDrawIndexedIndirectCount; +extern PFN_vkCmdDrawIndirectCount vkCmdDrawIndirectCount; +extern PFN_vkCmdEndRenderPass2 vkCmdEndRenderPass2; +extern PFN_vkCmdNextSubpass2 vkCmdNextSubpass2; +extern PFN_vkCreateRenderPass2 vkCreateRenderPass2; +extern PFN_vkGetBufferDeviceAddress vkGetBufferDeviceAddress; +extern PFN_vkGetBufferOpaqueCaptureAddress vkGetBufferOpaqueCaptureAddress; +extern PFN_vkGetDeviceMemoryOpaqueCaptureAddress vkGetDeviceMemoryOpaqueCaptureAddress; +extern PFN_vkGetSemaphoreCounterValue vkGetSemaphoreCounterValue; +extern PFN_vkResetQueryPool vkResetQueryPool; +extern PFN_vkSignalSemaphore vkSignalSemaphore; +extern PFN_vkWaitSemaphores vkWaitSemaphores; +#endif /* defined(VK_VERSION_1_2) */ +#if defined(VK_VERSION_1_3) +extern PFN_vkCmdBeginRendering vkCmdBeginRendering; +extern PFN_vkCmdBindVertexBuffers2 vkCmdBindVertexBuffers2; +extern PFN_vkCmdBlitImage2 vkCmdBlitImage2; +extern PFN_vkCmdCopyBuffer2 vkCmdCopyBuffer2; +extern PFN_vkCmdCopyBufferToImage2 vkCmdCopyBufferToImage2; +extern PFN_vkCmdCopyImage2 vkCmdCopyImage2; +extern PFN_vkCmdCopyImageToBuffer2 vkCmdCopyImageToBuffer2; +extern PFN_vkCmdEndRendering vkCmdEndRendering; +extern PFN_vkCmdPipelineBarrier2 vkCmdPipelineBarrier2; +extern PFN_vkCmdResetEvent2 vkCmdResetEvent2; +extern PFN_vkCmdResolveImage2 vkCmdResolveImage2; +extern PFN_vkCmdSetCullMode vkCmdSetCullMode; +extern PFN_vkCmdSetDepthBiasEnable vkCmdSetDepthBiasEnable; +extern PFN_vkCmdSetDepthBoundsTestEnable vkCmdSetDepthBoundsTestEnable; +extern PFN_vkCmdSetDepthCompareOp vkCmdSetDepthCompareOp; +extern PFN_vkCmdSetDepthTestEnable vkCmdSetDepthTestEnable; +extern PFN_vkCmdSetDepthWriteEnable vkCmdSetDepthWriteEnable; +extern PFN_vkCmdSetEvent2 vkCmdSetEvent2; +extern PFN_vkCmdSetFrontFace vkCmdSetFrontFace; +extern PFN_vkCmdSetPrimitiveRestartEnable vkCmdSetPrimitiveRestartEnable; +extern PFN_vkCmdSetPrimitiveTopology vkCmdSetPrimitiveTopology; +extern PFN_vkCmdSetRasterizerDiscardEnable vkCmdSetRasterizerDiscardEnable; +extern PFN_vkCmdSetScissorWithCount vkCmdSetScissorWithCount; +extern PFN_vkCmdSetStencilOp vkCmdSetStencilOp; +extern PFN_vkCmdSetStencilTestEnable vkCmdSetStencilTestEnable; +extern PFN_vkCmdSetViewportWithCount vkCmdSetViewportWithCount; +extern PFN_vkCmdWaitEvents2 vkCmdWaitEvents2; +extern PFN_vkCmdWriteTimestamp2 vkCmdWriteTimestamp2; +extern PFN_vkCreatePrivateDataSlot vkCreatePrivateDataSlot; +extern PFN_vkDestroyPrivateDataSlot vkDestroyPrivateDataSlot; +extern PFN_vkGetDeviceBufferMemoryRequirements vkGetDeviceBufferMemoryRequirements; +extern PFN_vkGetDeviceImageMemoryRequirements vkGetDeviceImageMemoryRequirements; +extern PFN_vkGetDeviceImageSparseMemoryRequirements vkGetDeviceImageSparseMemoryRequirements; +extern PFN_vkGetPhysicalDeviceToolProperties vkGetPhysicalDeviceToolProperties; +extern PFN_vkGetPrivateData vkGetPrivateData; +extern PFN_vkQueueSubmit2 vkQueueSubmit2; +extern PFN_vkSetPrivateData vkSetPrivateData; +#endif /* defined(VK_VERSION_1_3) */ +#if defined(VK_VERSION_1_4) +extern PFN_vkCmdBindDescriptorSets2 vkCmdBindDescriptorSets2; +extern PFN_vkCmdBindIndexBuffer2 vkCmdBindIndexBuffer2; +extern PFN_vkCmdPushConstants2 vkCmdPushConstants2; +extern PFN_vkCmdPushDescriptorSet vkCmdPushDescriptorSet; +extern PFN_vkCmdPushDescriptorSet2 vkCmdPushDescriptorSet2; +extern PFN_vkCmdPushDescriptorSetWithTemplate vkCmdPushDescriptorSetWithTemplate; +extern PFN_vkCmdPushDescriptorSetWithTemplate2 vkCmdPushDescriptorSetWithTemplate2; +extern PFN_vkCmdSetLineStipple vkCmdSetLineStipple; +extern PFN_vkCmdSetRenderingAttachmentLocations vkCmdSetRenderingAttachmentLocations; +extern PFN_vkCmdSetRenderingInputAttachmentIndices vkCmdSetRenderingInputAttachmentIndices; +extern PFN_vkCopyImageToImage vkCopyImageToImage; +extern PFN_vkCopyImageToMemory vkCopyImageToMemory; +extern PFN_vkCopyMemoryToImage vkCopyMemoryToImage; +extern PFN_vkGetDeviceImageSubresourceLayout vkGetDeviceImageSubresourceLayout; +extern PFN_vkGetImageSubresourceLayout2 vkGetImageSubresourceLayout2; +extern PFN_vkGetRenderingAreaGranularity vkGetRenderingAreaGranularity; +extern PFN_vkMapMemory2 vkMapMemory2; +extern PFN_vkTransitionImageLayout vkTransitionImageLayout; +extern PFN_vkUnmapMemory2 vkUnmapMemory2; +#endif /* defined(VK_VERSION_1_4) */ +#if defined(VK_AMDX_shader_enqueue) +extern PFN_vkCmdDispatchGraphAMDX vkCmdDispatchGraphAMDX; +extern PFN_vkCmdDispatchGraphIndirectAMDX vkCmdDispatchGraphIndirectAMDX; +extern PFN_vkCmdDispatchGraphIndirectCountAMDX vkCmdDispatchGraphIndirectCountAMDX; +extern PFN_vkCmdInitializeGraphScratchMemoryAMDX vkCmdInitializeGraphScratchMemoryAMDX; +extern PFN_vkCreateExecutionGraphPipelinesAMDX vkCreateExecutionGraphPipelinesAMDX; +extern PFN_vkGetExecutionGraphPipelineNodeIndexAMDX vkGetExecutionGraphPipelineNodeIndexAMDX; +extern PFN_vkGetExecutionGraphPipelineScratchSizeAMDX vkGetExecutionGraphPipelineScratchSizeAMDX; +#endif /* defined(VK_AMDX_shader_enqueue) */ +#if defined(VK_AMD_anti_lag) +extern PFN_vkAntiLagUpdateAMD vkAntiLagUpdateAMD; +#endif /* defined(VK_AMD_anti_lag) */ #if defined(VK_AMD_buffer_marker) extern PFN_vkCmdWriteBufferMarkerAMD vkCmdWriteBufferMarkerAMD; #endif /* defined(VK_AMD_buffer_marker) */ +#if defined(VK_AMD_buffer_marker) && (defined(VK_VERSION_1_3) || defined(VK_KHR_synchronization2)) +extern PFN_vkCmdWriteBufferMarker2AMD vkCmdWriteBufferMarker2AMD; +#endif /* defined(VK_AMD_buffer_marker) && (defined(VK_VERSION_1_3) || defined(VK_KHR_synchronization2)) */ +#if defined(VK_AMD_display_native_hdr) +extern PFN_vkSetLocalDimmingAMD vkSetLocalDimmingAMD; +#endif /* defined(VK_AMD_display_native_hdr) */ #if defined(VK_AMD_draw_indirect_count) extern PFN_vkCmdDrawIndexedIndirectCountAMD vkCmdDrawIndexedIndirectCountAMD; extern PFN_vkCmdDrawIndirectCountAMD vkCmdDrawIndirectCountAMD; @@ -621,10 +1316,17 @@ extern PFN_vkGetShaderInfoAMD vkGetShaderInfoAMD; extern PFN_vkGetAndroidHardwareBufferPropertiesANDROID vkGetAndroidHardwareBufferPropertiesANDROID; extern PFN_vkGetMemoryAndroidHardwareBufferANDROID vkGetMemoryAndroidHardwareBufferANDROID; #endif /* defined(VK_ANDROID_external_memory_android_hardware_buffer) */ +#if defined(VK_EXT_acquire_drm_display) +extern PFN_vkAcquireDrmDisplayEXT vkAcquireDrmDisplayEXT; +extern PFN_vkGetDrmDisplayEXT vkGetDrmDisplayEXT; +#endif /* defined(VK_EXT_acquire_drm_display) */ #if defined(VK_EXT_acquire_xlib_display) extern PFN_vkAcquireXlibDisplayEXT vkAcquireXlibDisplayEXT; extern PFN_vkGetRandROutputDisplayEXT vkGetRandROutputDisplayEXT; #endif /* defined(VK_EXT_acquire_xlib_display) */ +#if defined(VK_EXT_attachment_feedback_loop_dynamic_state) +extern PFN_vkCmdSetAttachmentFeedbackLoopEnableEXT vkCmdSetAttachmentFeedbackLoopEnableEXT; +#endif /* defined(VK_EXT_attachment_feedback_loop_dynamic_state) */ #if defined(VK_EXT_buffer_device_address) extern PFN_vkGetBufferDeviceAddressEXT vkGetBufferDeviceAddressEXT; #endif /* defined(VK_EXT_buffer_device_address) */ @@ -632,6 +1334,9 @@ extern PFN_vkGetBufferDeviceAddressEXT vkGetBufferDeviceAddressEXT; extern PFN_vkGetCalibratedTimestampsEXT vkGetCalibratedTimestampsEXT; extern PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT vkGetPhysicalDeviceCalibrateableTimeDomainsEXT; #endif /* defined(VK_EXT_calibrated_timestamps) */ +#if defined(VK_EXT_color_write_enable) +extern PFN_vkCmdSetColorWriteEnableEXT vkCmdSetColorWriteEnableEXT; +#endif /* defined(VK_EXT_color_write_enable) */ #if defined(VK_EXT_conditional_rendering) extern PFN_vkCmdBeginConditionalRenderingEXT vkCmdBeginConditionalRenderingEXT; extern PFN_vkCmdEndConditionalRenderingEXT vkCmdEndConditionalRenderingEXT; @@ -661,12 +1366,52 @@ extern PFN_vkSetDebugUtilsObjectNameEXT vkSetDebugUtilsObjectNameEXT; extern PFN_vkSetDebugUtilsObjectTagEXT vkSetDebugUtilsObjectTagEXT; extern PFN_vkSubmitDebugUtilsMessageEXT vkSubmitDebugUtilsMessageEXT; #endif /* defined(VK_EXT_debug_utils) */ +#if defined(VK_EXT_depth_bias_control) +extern PFN_vkCmdSetDepthBias2EXT vkCmdSetDepthBias2EXT; +#endif /* defined(VK_EXT_depth_bias_control) */ +#if defined(VK_EXT_descriptor_buffer) +extern PFN_vkCmdBindDescriptorBufferEmbeddedSamplersEXT vkCmdBindDescriptorBufferEmbeddedSamplersEXT; +extern PFN_vkCmdBindDescriptorBuffersEXT vkCmdBindDescriptorBuffersEXT; +extern PFN_vkCmdSetDescriptorBufferOffsetsEXT vkCmdSetDescriptorBufferOffsetsEXT; +extern PFN_vkGetBufferOpaqueCaptureDescriptorDataEXT vkGetBufferOpaqueCaptureDescriptorDataEXT; +extern PFN_vkGetDescriptorEXT vkGetDescriptorEXT; +extern PFN_vkGetDescriptorSetLayoutBindingOffsetEXT vkGetDescriptorSetLayoutBindingOffsetEXT; +extern PFN_vkGetDescriptorSetLayoutSizeEXT vkGetDescriptorSetLayoutSizeEXT; +extern PFN_vkGetImageOpaqueCaptureDescriptorDataEXT vkGetImageOpaqueCaptureDescriptorDataEXT; +extern PFN_vkGetImageViewOpaqueCaptureDescriptorDataEXT vkGetImageViewOpaqueCaptureDescriptorDataEXT; +extern PFN_vkGetSamplerOpaqueCaptureDescriptorDataEXT vkGetSamplerOpaqueCaptureDescriptorDataEXT; +#endif /* defined(VK_EXT_descriptor_buffer) */ +#if defined(VK_EXT_descriptor_buffer) && (defined(VK_KHR_acceleration_structure) || defined(VK_NV_ray_tracing)) +extern PFN_vkGetAccelerationStructureOpaqueCaptureDescriptorDataEXT vkGetAccelerationStructureOpaqueCaptureDescriptorDataEXT; +#endif /* defined(VK_EXT_descriptor_buffer) && (defined(VK_KHR_acceleration_structure) || defined(VK_NV_ray_tracing)) */ +#if defined(VK_EXT_device_fault) +extern PFN_vkGetDeviceFaultInfoEXT vkGetDeviceFaultInfoEXT; +#endif /* defined(VK_EXT_device_fault) */ +#if defined(VK_EXT_device_generated_commands) +extern PFN_vkCmdExecuteGeneratedCommandsEXT vkCmdExecuteGeneratedCommandsEXT; +extern PFN_vkCmdPreprocessGeneratedCommandsEXT vkCmdPreprocessGeneratedCommandsEXT; +extern PFN_vkCreateIndirectCommandsLayoutEXT vkCreateIndirectCommandsLayoutEXT; +extern PFN_vkCreateIndirectExecutionSetEXT vkCreateIndirectExecutionSetEXT; +extern PFN_vkDestroyIndirectCommandsLayoutEXT vkDestroyIndirectCommandsLayoutEXT; +extern PFN_vkDestroyIndirectExecutionSetEXT vkDestroyIndirectExecutionSetEXT; +extern PFN_vkGetGeneratedCommandsMemoryRequirementsEXT vkGetGeneratedCommandsMemoryRequirementsEXT; +extern PFN_vkUpdateIndirectExecutionSetPipelineEXT vkUpdateIndirectExecutionSetPipelineEXT; +extern PFN_vkUpdateIndirectExecutionSetShaderEXT vkUpdateIndirectExecutionSetShaderEXT; +#endif /* defined(VK_EXT_device_generated_commands) */ #if defined(VK_EXT_direct_mode_display) extern PFN_vkReleaseDisplayEXT vkReleaseDisplayEXT; #endif /* defined(VK_EXT_direct_mode_display) */ +#if defined(VK_EXT_directfb_surface) +extern PFN_vkCreateDirectFBSurfaceEXT vkCreateDirectFBSurfaceEXT; +extern PFN_vkGetPhysicalDeviceDirectFBPresentationSupportEXT vkGetPhysicalDeviceDirectFBPresentationSupportEXT; +#endif /* defined(VK_EXT_directfb_surface) */ #if defined(VK_EXT_discard_rectangles) extern PFN_vkCmdSetDiscardRectangleEXT vkCmdSetDiscardRectangleEXT; #endif /* defined(VK_EXT_discard_rectangles) */ +#if defined(VK_EXT_discard_rectangles) && VK_EXT_DISCARD_RECTANGLES_SPEC_VERSION >= 2 +extern PFN_vkCmdSetDiscardRectangleEnableEXT vkCmdSetDiscardRectangleEnableEXT; +extern PFN_vkCmdSetDiscardRectangleModeEXT vkCmdSetDiscardRectangleModeEXT; +#endif /* defined(VK_EXT_discard_rectangles) && VK_EXT_DISCARD_RECTANGLES_SPEC_VERSION >= 2 */ #if defined(VK_EXT_display_control) extern PFN_vkDisplayPowerControlEXT vkDisplayPowerControlEXT; extern PFN_vkGetSwapchainCounterEXT vkGetSwapchainCounterEXT; @@ -679,16 +1424,100 @@ extern PFN_vkGetPhysicalDeviceSurfaceCapabilities2EXT vkGetPhysicalDeviceSurface #if defined(VK_EXT_external_memory_host) extern PFN_vkGetMemoryHostPointerPropertiesEXT vkGetMemoryHostPointerPropertiesEXT; #endif /* defined(VK_EXT_external_memory_host) */ +#if defined(VK_EXT_full_screen_exclusive) +extern PFN_vkAcquireFullScreenExclusiveModeEXT vkAcquireFullScreenExclusiveModeEXT; +extern PFN_vkGetPhysicalDeviceSurfacePresentModes2EXT vkGetPhysicalDeviceSurfacePresentModes2EXT; +extern PFN_vkReleaseFullScreenExclusiveModeEXT vkReleaseFullScreenExclusiveModeEXT; +#endif /* defined(VK_EXT_full_screen_exclusive) */ +#if defined(VK_EXT_full_screen_exclusive) && (defined(VK_KHR_device_group) || defined(VK_VERSION_1_1)) +extern PFN_vkGetDeviceGroupSurfacePresentModes2EXT vkGetDeviceGroupSurfacePresentModes2EXT; +#endif /* defined(VK_EXT_full_screen_exclusive) && (defined(VK_KHR_device_group) || defined(VK_VERSION_1_1)) */ #if defined(VK_EXT_hdr_metadata) extern PFN_vkSetHdrMetadataEXT vkSetHdrMetadataEXT; #endif /* defined(VK_EXT_hdr_metadata) */ +#if defined(VK_EXT_headless_surface) +extern PFN_vkCreateHeadlessSurfaceEXT vkCreateHeadlessSurfaceEXT; +#endif /* defined(VK_EXT_headless_surface) */ +#if defined(VK_EXT_host_image_copy) +extern PFN_vkCopyImageToImageEXT vkCopyImageToImageEXT; +extern PFN_vkCopyImageToMemoryEXT vkCopyImageToMemoryEXT; +extern PFN_vkCopyMemoryToImageEXT vkCopyMemoryToImageEXT; +extern PFN_vkTransitionImageLayoutEXT vkTransitionImageLayoutEXT; +#endif /* defined(VK_EXT_host_image_copy) */ +#if defined(VK_EXT_host_query_reset) +extern PFN_vkResetQueryPoolEXT vkResetQueryPoolEXT; +#endif /* defined(VK_EXT_host_query_reset) */ #if defined(VK_EXT_image_drm_format_modifier) extern PFN_vkGetImageDrmFormatModifierPropertiesEXT vkGetImageDrmFormatModifierPropertiesEXT; #endif /* defined(VK_EXT_image_drm_format_modifier) */ +#if defined(VK_EXT_line_rasterization) +extern PFN_vkCmdSetLineStippleEXT vkCmdSetLineStippleEXT; +#endif /* defined(VK_EXT_line_rasterization) */ +#if defined(VK_EXT_mesh_shader) +extern PFN_vkCmdDrawMeshTasksEXT vkCmdDrawMeshTasksEXT; +extern PFN_vkCmdDrawMeshTasksIndirectEXT vkCmdDrawMeshTasksIndirectEXT; +#endif /* defined(VK_EXT_mesh_shader) */ +#if defined(VK_EXT_mesh_shader) && (defined(VK_KHR_draw_indirect_count) || defined(VK_VERSION_1_2)) +extern PFN_vkCmdDrawMeshTasksIndirectCountEXT vkCmdDrawMeshTasksIndirectCountEXT; +#endif /* defined(VK_EXT_mesh_shader) && (defined(VK_KHR_draw_indirect_count) || defined(VK_VERSION_1_2)) */ +#if defined(VK_EXT_metal_objects) +extern PFN_vkExportMetalObjectsEXT vkExportMetalObjectsEXT; +#endif /* defined(VK_EXT_metal_objects) */ +#if defined(VK_EXT_metal_surface) +extern PFN_vkCreateMetalSurfaceEXT vkCreateMetalSurfaceEXT; +#endif /* defined(VK_EXT_metal_surface) */ +#if defined(VK_EXT_multi_draw) +extern PFN_vkCmdDrawMultiEXT vkCmdDrawMultiEXT; +extern PFN_vkCmdDrawMultiIndexedEXT vkCmdDrawMultiIndexedEXT; +#endif /* defined(VK_EXT_multi_draw) */ +#if defined(VK_EXT_opacity_micromap) +extern PFN_vkBuildMicromapsEXT vkBuildMicromapsEXT; +extern PFN_vkCmdBuildMicromapsEXT vkCmdBuildMicromapsEXT; +extern PFN_vkCmdCopyMemoryToMicromapEXT vkCmdCopyMemoryToMicromapEXT; +extern PFN_vkCmdCopyMicromapEXT vkCmdCopyMicromapEXT; +extern PFN_vkCmdCopyMicromapToMemoryEXT vkCmdCopyMicromapToMemoryEXT; +extern PFN_vkCmdWriteMicromapsPropertiesEXT vkCmdWriteMicromapsPropertiesEXT; +extern PFN_vkCopyMemoryToMicromapEXT vkCopyMemoryToMicromapEXT; +extern PFN_vkCopyMicromapEXT vkCopyMicromapEXT; +extern PFN_vkCopyMicromapToMemoryEXT vkCopyMicromapToMemoryEXT; +extern PFN_vkCreateMicromapEXT vkCreateMicromapEXT; +extern PFN_vkDestroyMicromapEXT vkDestroyMicromapEXT; +extern PFN_vkGetDeviceMicromapCompatibilityEXT vkGetDeviceMicromapCompatibilityEXT; +extern PFN_vkGetMicromapBuildSizesEXT vkGetMicromapBuildSizesEXT; +extern PFN_vkWriteMicromapsPropertiesEXT vkWriteMicromapsPropertiesEXT; +#endif /* defined(VK_EXT_opacity_micromap) */ +#if defined(VK_EXT_pageable_device_local_memory) +extern PFN_vkSetDeviceMemoryPriorityEXT vkSetDeviceMemoryPriorityEXT; +#endif /* defined(VK_EXT_pageable_device_local_memory) */ +#if defined(VK_EXT_pipeline_properties) +extern PFN_vkGetPipelinePropertiesEXT vkGetPipelinePropertiesEXT; +#endif /* defined(VK_EXT_pipeline_properties) */ +#if defined(VK_EXT_private_data) +extern PFN_vkCreatePrivateDataSlotEXT vkCreatePrivateDataSlotEXT; +extern PFN_vkDestroyPrivateDataSlotEXT vkDestroyPrivateDataSlotEXT; +extern PFN_vkGetPrivateDataEXT vkGetPrivateDataEXT; +extern PFN_vkSetPrivateDataEXT vkSetPrivateDataEXT; +#endif /* defined(VK_EXT_private_data) */ #if defined(VK_EXT_sample_locations) extern PFN_vkCmdSetSampleLocationsEXT vkCmdSetSampleLocationsEXT; extern PFN_vkGetPhysicalDeviceMultisamplePropertiesEXT vkGetPhysicalDeviceMultisamplePropertiesEXT; #endif /* defined(VK_EXT_sample_locations) */ +#if defined(VK_EXT_shader_module_identifier) +extern PFN_vkGetShaderModuleCreateInfoIdentifierEXT vkGetShaderModuleCreateInfoIdentifierEXT; +extern PFN_vkGetShaderModuleIdentifierEXT vkGetShaderModuleIdentifierEXT; +#endif /* defined(VK_EXT_shader_module_identifier) */ +#if defined(VK_EXT_shader_object) +extern PFN_vkCmdBindShadersEXT vkCmdBindShadersEXT; +extern PFN_vkCreateShadersEXT vkCreateShadersEXT; +extern PFN_vkDestroyShaderEXT vkDestroyShaderEXT; +extern PFN_vkGetShaderBinaryDataEXT vkGetShaderBinaryDataEXT; +#endif /* defined(VK_EXT_shader_object) */ +#if defined(VK_EXT_swapchain_maintenance1) +extern PFN_vkReleaseSwapchainImagesEXT vkReleaseSwapchainImagesEXT; +#endif /* defined(VK_EXT_swapchain_maintenance1) */ +#if defined(VK_EXT_tooling_info) +extern PFN_vkGetPhysicalDeviceToolPropertiesEXT vkGetPhysicalDeviceToolPropertiesEXT; +#endif /* defined(VK_EXT_tooling_info) */ #if defined(VK_EXT_transform_feedback) extern PFN_vkCmdBeginQueryIndexedEXT vkCmdBeginQueryIndexedEXT; extern PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT; @@ -703,13 +1532,73 @@ extern PFN_vkDestroyValidationCacheEXT vkDestroyValidationCacheEXT; extern PFN_vkGetValidationCacheDataEXT vkGetValidationCacheDataEXT; extern PFN_vkMergeValidationCachesEXT vkMergeValidationCachesEXT; #endif /* defined(VK_EXT_validation_cache) */ +#if defined(VK_FUCHSIA_buffer_collection) +extern PFN_vkCreateBufferCollectionFUCHSIA vkCreateBufferCollectionFUCHSIA; +extern PFN_vkDestroyBufferCollectionFUCHSIA vkDestroyBufferCollectionFUCHSIA; +extern PFN_vkGetBufferCollectionPropertiesFUCHSIA vkGetBufferCollectionPropertiesFUCHSIA; +extern PFN_vkSetBufferCollectionBufferConstraintsFUCHSIA vkSetBufferCollectionBufferConstraintsFUCHSIA; +extern PFN_vkSetBufferCollectionImageConstraintsFUCHSIA vkSetBufferCollectionImageConstraintsFUCHSIA; +#endif /* defined(VK_FUCHSIA_buffer_collection) */ +#if defined(VK_FUCHSIA_external_memory) +extern PFN_vkGetMemoryZirconHandleFUCHSIA vkGetMemoryZirconHandleFUCHSIA; +extern PFN_vkGetMemoryZirconHandlePropertiesFUCHSIA vkGetMemoryZirconHandlePropertiesFUCHSIA; +#endif /* defined(VK_FUCHSIA_external_memory) */ +#if defined(VK_FUCHSIA_external_semaphore) +extern PFN_vkGetSemaphoreZirconHandleFUCHSIA vkGetSemaphoreZirconHandleFUCHSIA; +extern PFN_vkImportSemaphoreZirconHandleFUCHSIA vkImportSemaphoreZirconHandleFUCHSIA; +#endif /* defined(VK_FUCHSIA_external_semaphore) */ #if defined(VK_FUCHSIA_imagepipe_surface) extern PFN_vkCreateImagePipeSurfaceFUCHSIA vkCreateImagePipeSurfaceFUCHSIA; #endif /* defined(VK_FUCHSIA_imagepipe_surface) */ +#if defined(VK_GGP_stream_descriptor_surface) +extern PFN_vkCreateStreamDescriptorSurfaceGGP vkCreateStreamDescriptorSurfaceGGP; +#endif /* defined(VK_GGP_stream_descriptor_surface) */ #if defined(VK_GOOGLE_display_timing) extern PFN_vkGetPastPresentationTimingGOOGLE vkGetPastPresentationTimingGOOGLE; extern PFN_vkGetRefreshCycleDurationGOOGLE vkGetRefreshCycleDurationGOOGLE; #endif /* defined(VK_GOOGLE_display_timing) */ +#if defined(VK_HUAWEI_cluster_culling_shader) +extern PFN_vkCmdDrawClusterHUAWEI vkCmdDrawClusterHUAWEI; +extern PFN_vkCmdDrawClusterIndirectHUAWEI vkCmdDrawClusterIndirectHUAWEI; +#endif /* defined(VK_HUAWEI_cluster_culling_shader) */ +#if defined(VK_HUAWEI_invocation_mask) +extern PFN_vkCmdBindInvocationMaskHUAWEI vkCmdBindInvocationMaskHUAWEI; +#endif /* defined(VK_HUAWEI_invocation_mask) */ +#if defined(VK_HUAWEI_subpass_shading) && VK_HUAWEI_SUBPASS_SHADING_SPEC_VERSION >= 2 +extern PFN_vkGetDeviceSubpassShadingMaxWorkgroupSizeHUAWEI vkGetDeviceSubpassShadingMaxWorkgroupSizeHUAWEI; +#endif /* defined(VK_HUAWEI_subpass_shading) && VK_HUAWEI_SUBPASS_SHADING_SPEC_VERSION >= 2 */ +#if defined(VK_HUAWEI_subpass_shading) +extern PFN_vkCmdSubpassShadingHUAWEI vkCmdSubpassShadingHUAWEI; +#endif /* defined(VK_HUAWEI_subpass_shading) */ +#if defined(VK_INTEL_performance_query) +extern PFN_vkAcquirePerformanceConfigurationINTEL vkAcquirePerformanceConfigurationINTEL; +extern PFN_vkCmdSetPerformanceMarkerINTEL vkCmdSetPerformanceMarkerINTEL; +extern PFN_vkCmdSetPerformanceOverrideINTEL vkCmdSetPerformanceOverrideINTEL; +extern PFN_vkCmdSetPerformanceStreamMarkerINTEL vkCmdSetPerformanceStreamMarkerINTEL; +extern PFN_vkGetPerformanceParameterINTEL vkGetPerformanceParameterINTEL; +extern PFN_vkInitializePerformanceApiINTEL vkInitializePerformanceApiINTEL; +extern PFN_vkQueueSetPerformanceConfigurationINTEL vkQueueSetPerformanceConfigurationINTEL; +extern PFN_vkReleasePerformanceConfigurationINTEL vkReleasePerformanceConfigurationINTEL; +extern PFN_vkUninitializePerformanceApiINTEL vkUninitializePerformanceApiINTEL; +#endif /* defined(VK_INTEL_performance_query) */ +#if defined(VK_KHR_acceleration_structure) +extern PFN_vkBuildAccelerationStructuresKHR vkBuildAccelerationStructuresKHR; +extern PFN_vkCmdBuildAccelerationStructuresIndirectKHR vkCmdBuildAccelerationStructuresIndirectKHR; +extern PFN_vkCmdBuildAccelerationStructuresKHR vkCmdBuildAccelerationStructuresKHR; +extern PFN_vkCmdCopyAccelerationStructureKHR vkCmdCopyAccelerationStructureKHR; +extern PFN_vkCmdCopyAccelerationStructureToMemoryKHR vkCmdCopyAccelerationStructureToMemoryKHR; +extern PFN_vkCmdCopyMemoryToAccelerationStructureKHR vkCmdCopyMemoryToAccelerationStructureKHR; +extern PFN_vkCmdWriteAccelerationStructuresPropertiesKHR vkCmdWriteAccelerationStructuresPropertiesKHR; +extern PFN_vkCopyAccelerationStructureKHR vkCopyAccelerationStructureKHR; +extern PFN_vkCopyAccelerationStructureToMemoryKHR vkCopyAccelerationStructureToMemoryKHR; +extern PFN_vkCopyMemoryToAccelerationStructureKHR vkCopyMemoryToAccelerationStructureKHR; +extern PFN_vkCreateAccelerationStructureKHR vkCreateAccelerationStructureKHR; +extern PFN_vkDestroyAccelerationStructureKHR vkDestroyAccelerationStructureKHR; +extern PFN_vkGetAccelerationStructureBuildSizesKHR vkGetAccelerationStructureBuildSizesKHR; +extern PFN_vkGetAccelerationStructureDeviceAddressKHR vkGetAccelerationStructureDeviceAddressKHR; +extern PFN_vkGetDeviceAccelerationStructureCompatibilityKHR vkGetDeviceAccelerationStructureCompatibilityKHR; +extern PFN_vkWriteAccelerationStructuresPropertiesKHR vkWriteAccelerationStructuresPropertiesKHR; +#endif /* defined(VK_KHR_acceleration_structure) */ #if defined(VK_KHR_android_surface) extern PFN_vkCreateAndroidSurfaceKHR vkCreateAndroidSurfaceKHR; #endif /* defined(VK_KHR_android_surface) */ @@ -717,12 +1606,39 @@ extern PFN_vkCreateAndroidSurfaceKHR vkCreateAndroidSurfaceKHR; extern PFN_vkBindBufferMemory2KHR vkBindBufferMemory2KHR; extern PFN_vkBindImageMemory2KHR vkBindImageMemory2KHR; #endif /* defined(VK_KHR_bind_memory2) */ +#if defined(VK_KHR_buffer_device_address) +extern PFN_vkGetBufferDeviceAddressKHR vkGetBufferDeviceAddressKHR; +extern PFN_vkGetBufferOpaqueCaptureAddressKHR vkGetBufferOpaqueCaptureAddressKHR; +extern PFN_vkGetDeviceMemoryOpaqueCaptureAddressKHR vkGetDeviceMemoryOpaqueCaptureAddressKHR; +#endif /* defined(VK_KHR_buffer_device_address) */ +#if defined(VK_KHR_calibrated_timestamps) +extern PFN_vkGetCalibratedTimestampsKHR vkGetCalibratedTimestampsKHR; +extern PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsKHR vkGetPhysicalDeviceCalibrateableTimeDomainsKHR; +#endif /* defined(VK_KHR_calibrated_timestamps) */ +#if defined(VK_KHR_cooperative_matrix) +extern PFN_vkGetPhysicalDeviceCooperativeMatrixPropertiesKHR vkGetPhysicalDeviceCooperativeMatrixPropertiesKHR; +#endif /* defined(VK_KHR_cooperative_matrix) */ +#if defined(VK_KHR_copy_commands2) +extern PFN_vkCmdBlitImage2KHR vkCmdBlitImage2KHR; +extern PFN_vkCmdCopyBuffer2KHR vkCmdCopyBuffer2KHR; +extern PFN_vkCmdCopyBufferToImage2KHR vkCmdCopyBufferToImage2KHR; +extern PFN_vkCmdCopyImage2KHR vkCmdCopyImage2KHR; +extern PFN_vkCmdCopyImageToBuffer2KHR vkCmdCopyImageToBuffer2KHR; +extern PFN_vkCmdResolveImage2KHR vkCmdResolveImage2KHR; +#endif /* defined(VK_KHR_copy_commands2) */ #if defined(VK_KHR_create_renderpass2) extern PFN_vkCmdBeginRenderPass2KHR vkCmdBeginRenderPass2KHR; extern PFN_vkCmdEndRenderPass2KHR vkCmdEndRenderPass2KHR; extern PFN_vkCmdNextSubpass2KHR vkCmdNextSubpass2KHR; extern PFN_vkCreateRenderPass2KHR vkCreateRenderPass2KHR; #endif /* defined(VK_KHR_create_renderpass2) */ +#if defined(VK_KHR_deferred_host_operations) +extern PFN_vkCreateDeferredOperationKHR vkCreateDeferredOperationKHR; +extern PFN_vkDeferredOperationJoinKHR vkDeferredOperationJoinKHR; +extern PFN_vkDestroyDeferredOperationKHR vkDestroyDeferredOperationKHR; +extern PFN_vkGetDeferredOperationMaxConcurrencyKHR vkGetDeferredOperationMaxConcurrencyKHR; +extern PFN_vkGetDeferredOperationResultKHR vkGetDeferredOperationResultKHR; +#endif /* defined(VK_KHR_deferred_host_operations) */ #if defined(VK_KHR_descriptor_update_template) extern PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR; extern PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR; @@ -752,6 +1668,14 @@ extern PFN_vkCreateSharedSwapchainsKHR vkCreateSharedSwapchainsKHR; extern PFN_vkCmdDrawIndexedIndirectCountKHR vkCmdDrawIndexedIndirectCountKHR; extern PFN_vkCmdDrawIndirectCountKHR vkCmdDrawIndirectCountKHR; #endif /* defined(VK_KHR_draw_indirect_count) */ +#if defined(VK_KHR_dynamic_rendering) +extern PFN_vkCmdBeginRenderingKHR vkCmdBeginRenderingKHR; +extern PFN_vkCmdEndRenderingKHR vkCmdEndRenderingKHR; +#endif /* defined(VK_KHR_dynamic_rendering) */ +#if defined(VK_KHR_dynamic_rendering_local_read) +extern PFN_vkCmdSetRenderingAttachmentLocationsKHR vkCmdSetRenderingAttachmentLocationsKHR; +extern PFN_vkCmdSetRenderingInputAttachmentIndicesKHR vkCmdSetRenderingInputAttachmentIndicesKHR; +#endif /* defined(VK_KHR_dynamic_rendering_local_read) */ #if defined(VK_KHR_external_fence_capabilities) extern PFN_vkGetPhysicalDeviceExternalFencePropertiesKHR vkGetPhysicalDeviceExternalFencePropertiesKHR; #endif /* defined(VK_KHR_external_fence_capabilities) */ @@ -785,6 +1709,10 @@ extern PFN_vkImportSemaphoreFdKHR vkImportSemaphoreFdKHR; extern PFN_vkGetSemaphoreWin32HandleKHR vkGetSemaphoreWin32HandleKHR; extern PFN_vkImportSemaphoreWin32HandleKHR vkImportSemaphoreWin32HandleKHR; #endif /* defined(VK_KHR_external_semaphore_win32) */ +#if defined(VK_KHR_fragment_shading_rate) +extern PFN_vkCmdSetFragmentShadingRateKHR vkCmdSetFragmentShadingRateKHR; +extern PFN_vkGetPhysicalDeviceFragmentShadingRatesKHR vkGetPhysicalDeviceFragmentShadingRatesKHR; +#endif /* defined(VK_KHR_fragment_shading_rate) */ #if defined(VK_KHR_get_display_properties2) extern PFN_vkGetDisplayModeProperties2KHR vkGetDisplayModeProperties2KHR; extern PFN_vkGetDisplayPlaneCapabilities2KHR vkGetDisplayPlaneCapabilities2KHR; @@ -809,15 +1737,78 @@ extern PFN_vkGetPhysicalDeviceSparseImageFormatProperties2KHR vkGetPhysicalDevic extern PFN_vkGetPhysicalDeviceSurfaceCapabilities2KHR vkGetPhysicalDeviceSurfaceCapabilities2KHR; extern PFN_vkGetPhysicalDeviceSurfaceFormats2KHR vkGetPhysicalDeviceSurfaceFormats2KHR; #endif /* defined(VK_KHR_get_surface_capabilities2) */ +#if defined(VK_KHR_line_rasterization) +extern PFN_vkCmdSetLineStippleKHR vkCmdSetLineStippleKHR; +#endif /* defined(VK_KHR_line_rasterization) */ #if defined(VK_KHR_maintenance1) extern PFN_vkTrimCommandPoolKHR vkTrimCommandPoolKHR; #endif /* defined(VK_KHR_maintenance1) */ #if defined(VK_KHR_maintenance3) extern PFN_vkGetDescriptorSetLayoutSupportKHR vkGetDescriptorSetLayoutSupportKHR; #endif /* defined(VK_KHR_maintenance3) */ +#if defined(VK_KHR_maintenance4) +extern PFN_vkGetDeviceBufferMemoryRequirementsKHR vkGetDeviceBufferMemoryRequirementsKHR; +extern PFN_vkGetDeviceImageMemoryRequirementsKHR vkGetDeviceImageMemoryRequirementsKHR; +extern PFN_vkGetDeviceImageSparseMemoryRequirementsKHR vkGetDeviceImageSparseMemoryRequirementsKHR; +#endif /* defined(VK_KHR_maintenance4) */ +#if defined(VK_KHR_maintenance5) +extern PFN_vkCmdBindIndexBuffer2KHR vkCmdBindIndexBuffer2KHR; +extern PFN_vkGetDeviceImageSubresourceLayoutKHR vkGetDeviceImageSubresourceLayoutKHR; +extern PFN_vkGetImageSubresourceLayout2KHR vkGetImageSubresourceLayout2KHR; +extern PFN_vkGetRenderingAreaGranularityKHR vkGetRenderingAreaGranularityKHR; +#endif /* defined(VK_KHR_maintenance5) */ +#if defined(VK_KHR_maintenance6) +extern PFN_vkCmdBindDescriptorSets2KHR vkCmdBindDescriptorSets2KHR; +extern PFN_vkCmdPushConstants2KHR vkCmdPushConstants2KHR; +#endif /* defined(VK_KHR_maintenance6) */ +#if defined(VK_KHR_maintenance6) && defined(VK_KHR_push_descriptor) +extern PFN_vkCmdPushDescriptorSet2KHR vkCmdPushDescriptorSet2KHR; +extern PFN_vkCmdPushDescriptorSetWithTemplate2KHR vkCmdPushDescriptorSetWithTemplate2KHR; +#endif /* defined(VK_KHR_maintenance6) && defined(VK_KHR_push_descriptor) */ +#if defined(VK_KHR_maintenance6) && defined(VK_EXT_descriptor_buffer) +extern PFN_vkCmdBindDescriptorBufferEmbeddedSamplers2EXT vkCmdBindDescriptorBufferEmbeddedSamplers2EXT; +extern PFN_vkCmdSetDescriptorBufferOffsets2EXT vkCmdSetDescriptorBufferOffsets2EXT; +#endif /* defined(VK_KHR_maintenance6) && defined(VK_EXT_descriptor_buffer) */ +#if defined(VK_KHR_map_memory2) +extern PFN_vkMapMemory2KHR vkMapMemory2KHR; +extern PFN_vkUnmapMemory2KHR vkUnmapMemory2KHR; +#endif /* defined(VK_KHR_map_memory2) */ +#if defined(VK_KHR_performance_query) +extern PFN_vkAcquireProfilingLockKHR vkAcquireProfilingLockKHR; +extern PFN_vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR; +extern PFN_vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR; +extern PFN_vkReleaseProfilingLockKHR vkReleaseProfilingLockKHR; +#endif /* defined(VK_KHR_performance_query) */ +#if defined(VK_KHR_pipeline_binary) +extern PFN_vkCreatePipelineBinariesKHR vkCreatePipelineBinariesKHR; +extern PFN_vkDestroyPipelineBinaryKHR vkDestroyPipelineBinaryKHR; +extern PFN_vkGetPipelineBinaryDataKHR vkGetPipelineBinaryDataKHR; +extern PFN_vkGetPipelineKeyKHR vkGetPipelineKeyKHR; +extern PFN_vkReleaseCapturedPipelineDataKHR vkReleaseCapturedPipelineDataKHR; +#endif /* defined(VK_KHR_pipeline_binary) */ +#if defined(VK_KHR_pipeline_executable_properties) +extern PFN_vkGetPipelineExecutableInternalRepresentationsKHR vkGetPipelineExecutableInternalRepresentationsKHR; +extern PFN_vkGetPipelineExecutablePropertiesKHR vkGetPipelineExecutablePropertiesKHR; +extern PFN_vkGetPipelineExecutableStatisticsKHR vkGetPipelineExecutableStatisticsKHR; +#endif /* defined(VK_KHR_pipeline_executable_properties) */ +#if defined(VK_KHR_present_wait) +extern PFN_vkWaitForPresentKHR vkWaitForPresentKHR; +#endif /* defined(VK_KHR_present_wait) */ #if defined(VK_KHR_push_descriptor) extern PFN_vkCmdPushDescriptorSetKHR vkCmdPushDescriptorSetKHR; #endif /* defined(VK_KHR_push_descriptor) */ +#if defined(VK_KHR_ray_tracing_maintenance1) && defined(VK_KHR_ray_tracing_pipeline) +extern PFN_vkCmdTraceRaysIndirect2KHR vkCmdTraceRaysIndirect2KHR; +#endif /* defined(VK_KHR_ray_tracing_maintenance1) && defined(VK_KHR_ray_tracing_pipeline) */ +#if defined(VK_KHR_ray_tracing_pipeline) +extern PFN_vkCmdSetRayTracingPipelineStackSizeKHR vkCmdSetRayTracingPipelineStackSizeKHR; +extern PFN_vkCmdTraceRaysIndirectKHR vkCmdTraceRaysIndirectKHR; +extern PFN_vkCmdTraceRaysKHR vkCmdTraceRaysKHR; +extern PFN_vkCreateRayTracingPipelinesKHR vkCreateRayTracingPipelinesKHR; +extern PFN_vkGetRayTracingCaptureReplayShaderGroupHandlesKHR vkGetRayTracingCaptureReplayShaderGroupHandlesKHR; +extern PFN_vkGetRayTracingShaderGroupHandlesKHR vkGetRayTracingShaderGroupHandlesKHR; +extern PFN_vkGetRayTracingShaderGroupStackSizeKHR vkGetRayTracingShaderGroupStackSizeKHR; +#endif /* defined(VK_KHR_ray_tracing_pipeline) */ #if defined(VK_KHR_sampler_ycbcr_conversion) extern PFN_vkCreateSamplerYcbcrConversionKHR vkCreateSamplerYcbcrConversionKHR; extern PFN_vkDestroySamplerYcbcrConversionKHR vkDestroySamplerYcbcrConversionKHR; @@ -839,6 +1830,41 @@ extern PFN_vkDestroySwapchainKHR vkDestroySwapchainKHR; extern PFN_vkGetSwapchainImagesKHR vkGetSwapchainImagesKHR; extern PFN_vkQueuePresentKHR vkQueuePresentKHR; #endif /* defined(VK_KHR_swapchain) */ +#if defined(VK_KHR_synchronization2) +extern PFN_vkCmdPipelineBarrier2KHR vkCmdPipelineBarrier2KHR; +extern PFN_vkCmdResetEvent2KHR vkCmdResetEvent2KHR; +extern PFN_vkCmdSetEvent2KHR vkCmdSetEvent2KHR; +extern PFN_vkCmdWaitEvents2KHR vkCmdWaitEvents2KHR; +extern PFN_vkCmdWriteTimestamp2KHR vkCmdWriteTimestamp2KHR; +extern PFN_vkQueueSubmit2KHR vkQueueSubmit2KHR; +#endif /* defined(VK_KHR_synchronization2) */ +#if defined(VK_KHR_timeline_semaphore) +extern PFN_vkGetSemaphoreCounterValueKHR vkGetSemaphoreCounterValueKHR; +extern PFN_vkSignalSemaphoreKHR vkSignalSemaphoreKHR; +extern PFN_vkWaitSemaphoresKHR vkWaitSemaphoresKHR; +#endif /* defined(VK_KHR_timeline_semaphore) */ +#if defined(VK_KHR_video_decode_queue) +extern PFN_vkCmdDecodeVideoKHR vkCmdDecodeVideoKHR; +#endif /* defined(VK_KHR_video_decode_queue) */ +#if defined(VK_KHR_video_encode_queue) +extern PFN_vkCmdEncodeVideoKHR vkCmdEncodeVideoKHR; +extern PFN_vkGetEncodedVideoSessionParametersKHR vkGetEncodedVideoSessionParametersKHR; +extern PFN_vkGetPhysicalDeviceVideoEncodeQualityLevelPropertiesKHR vkGetPhysicalDeviceVideoEncodeQualityLevelPropertiesKHR; +#endif /* defined(VK_KHR_video_encode_queue) */ +#if defined(VK_KHR_video_queue) +extern PFN_vkBindVideoSessionMemoryKHR vkBindVideoSessionMemoryKHR; +extern PFN_vkCmdBeginVideoCodingKHR vkCmdBeginVideoCodingKHR; +extern PFN_vkCmdControlVideoCodingKHR vkCmdControlVideoCodingKHR; +extern PFN_vkCmdEndVideoCodingKHR vkCmdEndVideoCodingKHR; +extern PFN_vkCreateVideoSessionKHR vkCreateVideoSessionKHR; +extern PFN_vkCreateVideoSessionParametersKHR vkCreateVideoSessionParametersKHR; +extern PFN_vkDestroyVideoSessionKHR vkDestroyVideoSessionKHR; +extern PFN_vkDestroyVideoSessionParametersKHR vkDestroyVideoSessionParametersKHR; +extern PFN_vkGetPhysicalDeviceVideoCapabilitiesKHR vkGetPhysicalDeviceVideoCapabilitiesKHR; +extern PFN_vkGetPhysicalDeviceVideoFormatPropertiesKHR vkGetPhysicalDeviceVideoFormatPropertiesKHR; +extern PFN_vkGetVideoSessionMemoryRequirementsKHR vkGetVideoSessionMemoryRequirementsKHR; +extern PFN_vkUpdateVideoSessionParametersKHR vkUpdateVideoSessionParametersKHR; +#endif /* defined(VK_KHR_video_queue) */ #if defined(VK_KHR_wayland_surface) extern PFN_vkCreateWaylandSurfaceKHR vkCreateWaylandSurfaceKHR; extern PFN_vkGetPhysicalDeviceWaylandPresentationSupportKHR vkGetPhysicalDeviceWaylandPresentationSupportKHR; @@ -864,35 +1890,107 @@ extern PFN_vkCreateMacOSSurfaceMVK vkCreateMacOSSurfaceMVK; #if defined(VK_NN_vi_surface) extern PFN_vkCreateViSurfaceNN vkCreateViSurfaceNN; #endif /* defined(VK_NN_vi_surface) */ -#if defined(VK_NVX_device_generated_commands) -extern PFN_vkCmdProcessCommandsNVX vkCmdProcessCommandsNVX; -extern PFN_vkCmdReserveSpaceForCommandsNVX vkCmdReserveSpaceForCommandsNVX; -extern PFN_vkCreateIndirectCommandsLayoutNVX vkCreateIndirectCommandsLayoutNVX; -extern PFN_vkCreateObjectTableNVX vkCreateObjectTableNVX; -extern PFN_vkDestroyIndirectCommandsLayoutNVX vkDestroyIndirectCommandsLayoutNVX; -extern PFN_vkDestroyObjectTableNVX vkDestroyObjectTableNVX; -extern PFN_vkGetPhysicalDeviceGeneratedCommandsPropertiesNVX vkGetPhysicalDeviceGeneratedCommandsPropertiesNVX; -extern PFN_vkRegisterObjectsNVX vkRegisterObjectsNVX; -extern PFN_vkUnregisterObjectsNVX vkUnregisterObjectsNVX; -#endif /* defined(VK_NVX_device_generated_commands) */ +#if defined(VK_NVX_binary_import) +extern PFN_vkCmdCuLaunchKernelNVX vkCmdCuLaunchKernelNVX; +extern PFN_vkCreateCuFunctionNVX vkCreateCuFunctionNVX; +extern PFN_vkCreateCuModuleNVX vkCreateCuModuleNVX; +extern PFN_vkDestroyCuFunctionNVX vkDestroyCuFunctionNVX; +extern PFN_vkDestroyCuModuleNVX vkDestroyCuModuleNVX; +#endif /* defined(VK_NVX_binary_import) */ +#if defined(VK_NVX_image_view_handle) +extern PFN_vkGetImageViewHandleNVX vkGetImageViewHandleNVX; +#endif /* defined(VK_NVX_image_view_handle) */ +#if defined(VK_NVX_image_view_handle) && VK_NVX_IMAGE_VIEW_HANDLE_SPEC_VERSION >= 3 +extern PFN_vkGetImageViewHandle64NVX vkGetImageViewHandle64NVX; +#endif /* defined(VK_NVX_image_view_handle) && VK_NVX_IMAGE_VIEW_HANDLE_SPEC_VERSION >= 3 */ +#if defined(VK_NVX_image_view_handle) && VK_NVX_IMAGE_VIEW_HANDLE_SPEC_VERSION >= 2 +extern PFN_vkGetImageViewAddressNVX vkGetImageViewAddressNVX; +#endif /* defined(VK_NVX_image_view_handle) && VK_NVX_IMAGE_VIEW_HANDLE_SPEC_VERSION >= 2 */ +#if defined(VK_NV_acquire_winrt_display) +extern PFN_vkAcquireWinrtDisplayNV vkAcquireWinrtDisplayNV; +extern PFN_vkGetWinrtDisplayNV vkGetWinrtDisplayNV; +#endif /* defined(VK_NV_acquire_winrt_display) */ #if defined(VK_NV_clip_space_w_scaling) extern PFN_vkCmdSetViewportWScalingNV vkCmdSetViewportWScalingNV; #endif /* defined(VK_NV_clip_space_w_scaling) */ +#if defined(VK_NV_cooperative_matrix) +extern PFN_vkGetPhysicalDeviceCooperativeMatrixPropertiesNV vkGetPhysicalDeviceCooperativeMatrixPropertiesNV; +#endif /* defined(VK_NV_cooperative_matrix) */ +#if defined(VK_NV_cooperative_matrix2) +extern PFN_vkGetPhysicalDeviceCooperativeMatrixFlexibleDimensionsPropertiesNV vkGetPhysicalDeviceCooperativeMatrixFlexibleDimensionsPropertiesNV; +#endif /* defined(VK_NV_cooperative_matrix2) */ +#if defined(VK_NV_copy_memory_indirect) +extern PFN_vkCmdCopyMemoryIndirectNV vkCmdCopyMemoryIndirectNV; +extern PFN_vkCmdCopyMemoryToImageIndirectNV vkCmdCopyMemoryToImageIndirectNV; +#endif /* defined(VK_NV_copy_memory_indirect) */ +#if defined(VK_NV_coverage_reduction_mode) +extern PFN_vkGetPhysicalDeviceSupportedFramebufferMixedSamplesCombinationsNV vkGetPhysicalDeviceSupportedFramebufferMixedSamplesCombinationsNV; +#endif /* defined(VK_NV_coverage_reduction_mode) */ +#if defined(VK_NV_cuda_kernel_launch) +extern PFN_vkCmdCudaLaunchKernelNV vkCmdCudaLaunchKernelNV; +extern PFN_vkCreateCudaFunctionNV vkCreateCudaFunctionNV; +extern PFN_vkCreateCudaModuleNV vkCreateCudaModuleNV; +extern PFN_vkDestroyCudaFunctionNV vkDestroyCudaFunctionNV; +extern PFN_vkDestroyCudaModuleNV vkDestroyCudaModuleNV; +extern PFN_vkGetCudaModuleCacheNV vkGetCudaModuleCacheNV; +#endif /* defined(VK_NV_cuda_kernel_launch) */ #if defined(VK_NV_device_diagnostic_checkpoints) extern PFN_vkCmdSetCheckpointNV vkCmdSetCheckpointNV; extern PFN_vkGetQueueCheckpointDataNV vkGetQueueCheckpointDataNV; #endif /* defined(VK_NV_device_diagnostic_checkpoints) */ +#if defined(VK_NV_device_diagnostic_checkpoints) && (defined(VK_VERSION_1_3) || defined(VK_KHR_synchronization2)) +extern PFN_vkGetQueueCheckpointData2NV vkGetQueueCheckpointData2NV; +#endif /* defined(VK_NV_device_diagnostic_checkpoints) && (defined(VK_VERSION_1_3) || defined(VK_KHR_synchronization2)) */ +#if defined(VK_NV_device_generated_commands) +extern PFN_vkCmdBindPipelineShaderGroupNV vkCmdBindPipelineShaderGroupNV; +extern PFN_vkCmdExecuteGeneratedCommandsNV vkCmdExecuteGeneratedCommandsNV; +extern PFN_vkCmdPreprocessGeneratedCommandsNV vkCmdPreprocessGeneratedCommandsNV; +extern PFN_vkCreateIndirectCommandsLayoutNV vkCreateIndirectCommandsLayoutNV; +extern PFN_vkDestroyIndirectCommandsLayoutNV vkDestroyIndirectCommandsLayoutNV; +extern PFN_vkGetGeneratedCommandsMemoryRequirementsNV vkGetGeneratedCommandsMemoryRequirementsNV; +#endif /* defined(VK_NV_device_generated_commands) */ +#if defined(VK_NV_device_generated_commands_compute) +extern PFN_vkCmdUpdatePipelineIndirectBufferNV vkCmdUpdatePipelineIndirectBufferNV; +extern PFN_vkGetPipelineIndirectDeviceAddressNV vkGetPipelineIndirectDeviceAddressNV; +extern PFN_vkGetPipelineIndirectMemoryRequirementsNV vkGetPipelineIndirectMemoryRequirementsNV; +#endif /* defined(VK_NV_device_generated_commands_compute) */ #if defined(VK_NV_external_memory_capabilities) extern PFN_vkGetPhysicalDeviceExternalImageFormatPropertiesNV vkGetPhysicalDeviceExternalImageFormatPropertiesNV; #endif /* defined(VK_NV_external_memory_capabilities) */ +#if defined(VK_NV_external_memory_rdma) +extern PFN_vkGetMemoryRemoteAddressNV vkGetMemoryRemoteAddressNV; +#endif /* defined(VK_NV_external_memory_rdma) */ #if defined(VK_NV_external_memory_win32) extern PFN_vkGetMemoryWin32HandleNV vkGetMemoryWin32HandleNV; #endif /* defined(VK_NV_external_memory_win32) */ +#if defined(VK_NV_fragment_shading_rate_enums) +extern PFN_vkCmdSetFragmentShadingRateEnumNV vkCmdSetFragmentShadingRateEnumNV; +#endif /* defined(VK_NV_fragment_shading_rate_enums) */ +#if defined(VK_NV_low_latency2) +extern PFN_vkGetLatencyTimingsNV vkGetLatencyTimingsNV; +extern PFN_vkLatencySleepNV vkLatencySleepNV; +extern PFN_vkQueueNotifyOutOfBandNV vkQueueNotifyOutOfBandNV; +extern PFN_vkSetLatencyMarkerNV vkSetLatencyMarkerNV; +extern PFN_vkSetLatencySleepModeNV vkSetLatencySleepModeNV; +#endif /* defined(VK_NV_low_latency2) */ +#if defined(VK_NV_memory_decompression) +extern PFN_vkCmdDecompressMemoryIndirectCountNV vkCmdDecompressMemoryIndirectCountNV; +extern PFN_vkCmdDecompressMemoryNV vkCmdDecompressMemoryNV; +#endif /* defined(VK_NV_memory_decompression) */ #if defined(VK_NV_mesh_shader) -extern PFN_vkCmdDrawMeshTasksIndirectCountNV vkCmdDrawMeshTasksIndirectCountNV; extern PFN_vkCmdDrawMeshTasksIndirectNV vkCmdDrawMeshTasksIndirectNV; extern PFN_vkCmdDrawMeshTasksNV vkCmdDrawMeshTasksNV; #endif /* defined(VK_NV_mesh_shader) */ +#if defined(VK_NV_mesh_shader) && (defined(VK_KHR_draw_indirect_count) || defined(VK_VERSION_1_2)) +extern PFN_vkCmdDrawMeshTasksIndirectCountNV vkCmdDrawMeshTasksIndirectCountNV; +#endif /* defined(VK_NV_mesh_shader) && (defined(VK_KHR_draw_indirect_count) || defined(VK_VERSION_1_2)) */ +#if defined(VK_NV_optical_flow) +extern PFN_vkBindOpticalFlowSessionImageNV vkBindOpticalFlowSessionImageNV; +extern PFN_vkCmdOpticalFlowExecuteNV vkCmdOpticalFlowExecuteNV; +extern PFN_vkCreateOpticalFlowSessionNV vkCreateOpticalFlowSessionNV; +extern PFN_vkDestroyOpticalFlowSessionNV vkDestroyOpticalFlowSessionNV; +extern PFN_vkGetPhysicalDeviceOpticalFlowImageFormatsNV vkGetPhysicalDeviceOpticalFlowImageFormatsNV; +#endif /* defined(VK_NV_optical_flow) */ #if defined(VK_NV_ray_tracing) extern PFN_vkBindAccelerationStructureMemoryNV vkBindAccelerationStructureMemoryNV; extern PFN_vkCmdBuildAccelerationStructureNV vkCmdBuildAccelerationStructureNV; @@ -907,6 +2005,9 @@ extern PFN_vkGetAccelerationStructureHandleNV vkGetAccelerationStructureHandleNV extern PFN_vkGetAccelerationStructureMemoryRequirementsNV vkGetAccelerationStructureMemoryRequirementsNV; extern PFN_vkGetRayTracingShaderGroupHandlesNV vkGetRayTracingShaderGroupHandlesNV; #endif /* defined(VK_NV_ray_tracing) */ +#if defined(VK_NV_scissor_exclusive) && VK_NV_SCISSOR_EXCLUSIVE_SPEC_VERSION >= 2 +extern PFN_vkCmdSetExclusiveScissorEnableNV vkCmdSetExclusiveScissorEnableNV; +#endif /* defined(VK_NV_scissor_exclusive) && VK_NV_SCISSOR_EXCLUSIVE_SPEC_VERSION >= 2 */ #if defined(VK_NV_scissor_exclusive) extern PFN_vkCmdSetExclusiveScissorNV vkCmdSetExclusiveScissorNV; #endif /* defined(VK_NV_scissor_exclusive) */ @@ -915,9 +2016,119 @@ extern PFN_vkCmdBindShadingRateImageNV vkCmdBindShadingRateImageNV; extern PFN_vkCmdSetCoarseSampleOrderNV vkCmdSetCoarseSampleOrderNV; extern PFN_vkCmdSetViewportShadingRatePaletteNV vkCmdSetViewportShadingRatePaletteNV; #endif /* defined(VK_NV_shading_rate_image) */ -#if (defined(VK_KHR_descriptor_update_template) && defined(VK_KHR_push_descriptor)) || (defined(VK_KHR_push_descriptor) && defined(VK_VERSION_1_1)) +#if defined(VK_QCOM_tile_properties) +extern PFN_vkGetDynamicRenderingTilePropertiesQCOM vkGetDynamicRenderingTilePropertiesQCOM; +extern PFN_vkGetFramebufferTilePropertiesQCOM vkGetFramebufferTilePropertiesQCOM; +#endif /* defined(VK_QCOM_tile_properties) */ +#if defined(VK_QNX_external_memory_screen_buffer) +extern PFN_vkGetScreenBufferPropertiesQNX vkGetScreenBufferPropertiesQNX; +#endif /* defined(VK_QNX_external_memory_screen_buffer) */ +#if defined(VK_QNX_screen_surface) +extern PFN_vkCreateScreenSurfaceQNX vkCreateScreenSurfaceQNX; +extern PFN_vkGetPhysicalDeviceScreenPresentationSupportQNX vkGetPhysicalDeviceScreenPresentationSupportQNX; +#endif /* defined(VK_QNX_screen_surface) */ +#if defined(VK_VALVE_descriptor_set_host_mapping) +extern PFN_vkGetDescriptorSetHostMappingVALVE vkGetDescriptorSetHostMappingVALVE; +extern PFN_vkGetDescriptorSetLayoutHostMappingInfoVALVE vkGetDescriptorSetLayoutHostMappingInfoVALVE; +#endif /* defined(VK_VALVE_descriptor_set_host_mapping) */ +#if (defined(VK_EXT_depth_clamp_control)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_depth_clamp_control)) +extern PFN_vkCmdSetDepthClampRangeEXT vkCmdSetDepthClampRangeEXT; +#endif /* (defined(VK_EXT_depth_clamp_control)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_depth_clamp_control)) */ +#if (defined(VK_EXT_extended_dynamic_state)) || (defined(VK_EXT_shader_object)) +extern PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT; +extern PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT; +extern PFN_vkCmdSetDepthBoundsTestEnableEXT vkCmdSetDepthBoundsTestEnableEXT; +extern PFN_vkCmdSetDepthCompareOpEXT vkCmdSetDepthCompareOpEXT; +extern PFN_vkCmdSetDepthTestEnableEXT vkCmdSetDepthTestEnableEXT; +extern PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT; +extern PFN_vkCmdSetFrontFaceEXT vkCmdSetFrontFaceEXT; +extern PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT; +extern PFN_vkCmdSetScissorWithCountEXT vkCmdSetScissorWithCountEXT; +extern PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT; +extern PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT; +extern PFN_vkCmdSetViewportWithCountEXT vkCmdSetViewportWithCountEXT; +#endif /* (defined(VK_EXT_extended_dynamic_state)) || (defined(VK_EXT_shader_object)) */ +#if (defined(VK_EXT_extended_dynamic_state2)) || (defined(VK_EXT_shader_object)) +extern PFN_vkCmdSetDepthBiasEnableEXT vkCmdSetDepthBiasEnableEXT; +extern PFN_vkCmdSetLogicOpEXT vkCmdSetLogicOpEXT; +extern PFN_vkCmdSetPatchControlPointsEXT vkCmdSetPatchControlPointsEXT; +extern PFN_vkCmdSetPrimitiveRestartEnableEXT vkCmdSetPrimitiveRestartEnableEXT; +extern PFN_vkCmdSetRasterizerDiscardEnableEXT vkCmdSetRasterizerDiscardEnableEXT; +#endif /* (defined(VK_EXT_extended_dynamic_state2)) || (defined(VK_EXT_shader_object)) */ +#if (defined(VK_EXT_extended_dynamic_state3)) || (defined(VK_EXT_shader_object)) +extern PFN_vkCmdSetAlphaToCoverageEnableEXT vkCmdSetAlphaToCoverageEnableEXT; +extern PFN_vkCmdSetAlphaToOneEnableEXT vkCmdSetAlphaToOneEnableEXT; +extern PFN_vkCmdSetColorBlendEnableEXT vkCmdSetColorBlendEnableEXT; +extern PFN_vkCmdSetColorBlendEquationEXT vkCmdSetColorBlendEquationEXT; +extern PFN_vkCmdSetColorWriteMaskEXT vkCmdSetColorWriteMaskEXT; +extern PFN_vkCmdSetDepthClampEnableEXT vkCmdSetDepthClampEnableEXT; +extern PFN_vkCmdSetLogicOpEnableEXT vkCmdSetLogicOpEnableEXT; +extern PFN_vkCmdSetPolygonModeEXT vkCmdSetPolygonModeEXT; +extern PFN_vkCmdSetRasterizationSamplesEXT vkCmdSetRasterizationSamplesEXT; +extern PFN_vkCmdSetSampleMaskEXT vkCmdSetSampleMaskEXT; +#endif /* (defined(VK_EXT_extended_dynamic_state3)) || (defined(VK_EXT_shader_object)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && (defined(VK_KHR_maintenance2) || defined(VK_VERSION_1_1))) || (defined(VK_EXT_shader_object)) +extern PFN_vkCmdSetTessellationDomainOriginEXT vkCmdSetTessellationDomainOriginEXT; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && (defined(VK_KHR_maintenance2) || defined(VK_VERSION_1_1))) || (defined(VK_EXT_shader_object)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_transform_feedback)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_transform_feedback)) +extern PFN_vkCmdSetRasterizationStreamEXT vkCmdSetRasterizationStreamEXT; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_transform_feedback)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_transform_feedback)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_conservative_rasterization)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_conservative_rasterization)) +extern PFN_vkCmdSetConservativeRasterizationModeEXT vkCmdSetConservativeRasterizationModeEXT; +extern PFN_vkCmdSetExtraPrimitiveOverestimationSizeEXT vkCmdSetExtraPrimitiveOverestimationSizeEXT; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_conservative_rasterization)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_conservative_rasterization)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_depth_clip_enable)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_depth_clip_enable)) +extern PFN_vkCmdSetDepthClipEnableEXT vkCmdSetDepthClipEnableEXT; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_depth_clip_enable)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_depth_clip_enable)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_sample_locations)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_sample_locations)) +extern PFN_vkCmdSetSampleLocationsEnableEXT vkCmdSetSampleLocationsEnableEXT; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_sample_locations)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_sample_locations)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_blend_operation_advanced)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_blend_operation_advanced)) +extern PFN_vkCmdSetColorBlendAdvancedEXT vkCmdSetColorBlendAdvancedEXT; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_blend_operation_advanced)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_blend_operation_advanced)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_provoking_vertex)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_provoking_vertex)) +extern PFN_vkCmdSetProvokingVertexModeEXT vkCmdSetProvokingVertexModeEXT; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_provoking_vertex)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_provoking_vertex)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_line_rasterization)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_line_rasterization)) +extern PFN_vkCmdSetLineRasterizationModeEXT vkCmdSetLineRasterizationModeEXT; +extern PFN_vkCmdSetLineStippleEnableEXT vkCmdSetLineStippleEnableEXT; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_line_rasterization)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_line_rasterization)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_depth_clip_control)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_depth_clip_control)) +extern PFN_vkCmdSetDepthClipNegativeOneToOneEXT vkCmdSetDepthClipNegativeOneToOneEXT; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_EXT_depth_clip_control)) || (defined(VK_EXT_shader_object) && defined(VK_EXT_depth_clip_control)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_clip_space_w_scaling)) || (defined(VK_EXT_shader_object) && defined(VK_NV_clip_space_w_scaling)) +extern PFN_vkCmdSetViewportWScalingEnableNV vkCmdSetViewportWScalingEnableNV; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_clip_space_w_scaling)) || (defined(VK_EXT_shader_object) && defined(VK_NV_clip_space_w_scaling)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_viewport_swizzle)) || (defined(VK_EXT_shader_object) && defined(VK_NV_viewport_swizzle)) +extern PFN_vkCmdSetViewportSwizzleNV vkCmdSetViewportSwizzleNV; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_viewport_swizzle)) || (defined(VK_EXT_shader_object) && defined(VK_NV_viewport_swizzle)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_fragment_coverage_to_color)) || (defined(VK_EXT_shader_object) && defined(VK_NV_fragment_coverage_to_color)) +extern PFN_vkCmdSetCoverageToColorEnableNV vkCmdSetCoverageToColorEnableNV; +extern PFN_vkCmdSetCoverageToColorLocationNV vkCmdSetCoverageToColorLocationNV; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_fragment_coverage_to_color)) || (defined(VK_EXT_shader_object) && defined(VK_NV_fragment_coverage_to_color)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_framebuffer_mixed_samples)) || (defined(VK_EXT_shader_object) && defined(VK_NV_framebuffer_mixed_samples)) +extern PFN_vkCmdSetCoverageModulationModeNV vkCmdSetCoverageModulationModeNV; +extern PFN_vkCmdSetCoverageModulationTableEnableNV vkCmdSetCoverageModulationTableEnableNV; +extern PFN_vkCmdSetCoverageModulationTableNV vkCmdSetCoverageModulationTableNV; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_framebuffer_mixed_samples)) || (defined(VK_EXT_shader_object) && defined(VK_NV_framebuffer_mixed_samples)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_shading_rate_image)) || (defined(VK_EXT_shader_object) && defined(VK_NV_shading_rate_image)) +extern PFN_vkCmdSetShadingRateImageEnableNV vkCmdSetShadingRateImageEnableNV; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_shading_rate_image)) || (defined(VK_EXT_shader_object) && defined(VK_NV_shading_rate_image)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_representative_fragment_test)) || (defined(VK_EXT_shader_object) && defined(VK_NV_representative_fragment_test)) +extern PFN_vkCmdSetRepresentativeFragmentTestEnableNV vkCmdSetRepresentativeFragmentTestEnableNV; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_representative_fragment_test)) || (defined(VK_EXT_shader_object) && defined(VK_NV_representative_fragment_test)) */ +#if (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_coverage_reduction_mode)) || (defined(VK_EXT_shader_object) && defined(VK_NV_coverage_reduction_mode)) +extern PFN_vkCmdSetCoverageReductionModeNV vkCmdSetCoverageReductionModeNV; +#endif /* (defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_coverage_reduction_mode)) || (defined(VK_EXT_shader_object) && defined(VK_NV_coverage_reduction_mode)) */ +#if (defined(VK_EXT_host_image_copy)) || (defined(VK_EXT_image_compression_control)) +extern PFN_vkGetImageSubresourceLayout2EXT vkGetImageSubresourceLayout2EXT; +#endif /* (defined(VK_EXT_host_image_copy)) || (defined(VK_EXT_image_compression_control)) */ +#if (defined(VK_EXT_shader_object)) || (defined(VK_EXT_vertex_input_dynamic_state)) +extern PFN_vkCmdSetVertexInputEXT vkCmdSetVertexInputEXT; +#endif /* (defined(VK_EXT_shader_object)) || (defined(VK_EXT_vertex_input_dynamic_state)) */ +#if (defined(VK_KHR_descriptor_update_template) && defined(VK_KHR_push_descriptor)) || (defined(VK_KHR_push_descriptor) && (defined(VK_VERSION_1_1) || defined(VK_KHR_descriptor_update_template))) extern PFN_vkCmdPushDescriptorSetWithTemplateKHR vkCmdPushDescriptorSetWithTemplateKHR; -#endif /* (defined(VK_KHR_descriptor_update_template) && defined(VK_KHR_push_descriptor)) || (defined(VK_KHR_push_descriptor) && defined(VK_VERSION_1_1)) */ +#endif /* (defined(VK_KHR_descriptor_update_template) && defined(VK_KHR_push_descriptor)) || (defined(VK_KHR_push_descriptor) && (defined(VK_VERSION_1_1) || defined(VK_KHR_descriptor_update_template))) */ #if (defined(VK_KHR_device_group) && defined(VK_KHR_surface)) || (defined(VK_KHR_swapchain) && defined(VK_VERSION_1_1)) extern PFN_vkGetDeviceGroupPresentCapabilitiesKHR vkGetDeviceGroupPresentCapabilitiesKHR; extern PFN_vkGetDeviceGroupSurfacePresentModesKHR vkGetDeviceGroupSurfacePresentModesKHR; @@ -934,8 +2145,15 @@ extern PFN_vkAcquireNextImage2KHR vkAcquireNextImage2KHR; #endif +#ifdef VOLK_IMPLEMENTATION +#undef VOLK_IMPLEMENTATION +/* Prevent tools like dependency checkers from detecting a cyclic dependency */ +#define VOLK_SOURCE "volk.c" +#include VOLK_SOURCE +#endif + /** - * Copyright (c) 2018-2019 Arseny Kapoulkine + * Copyright (c) 2018-2024 Arseny Kapoulkine * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -955,3 +2173,4 @@ extern PFN_vkAcquireNextImage2KHR vkAcquireNextImage2KHR; * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +/* clang-format on */ From 5dc4ebade17e700a9edb50361224b73690fed24f Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 30 Jul 2025 23:25:27 +0200 Subject: [PATCH 128/211] Fix Vulkan perf tracing with Tracy on Switch --- Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp | 7 ++----- Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp | 7 ++++++- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp index 99b3712ae..1a8739acc 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp @@ -111,7 +111,7 @@ GPUContextVulkan::GPUContextVulkan(GPUDeviceVulkan* device, QueueVulkan* queue) #endif #if GPU_ENABLE_TRACY -#if VK_EXT_calibrated_timestamps && VK_EXT_host_query_reset +#if VK_EXT_calibrated_timestamps && VK_EXT_host_query_reset && !PLATFORM_SWITCH // Use calibrated timestamps extension if (vkResetQueryPoolEXT && vkGetCalibratedTimestampsEXT) { @@ -120,7 +120,7 @@ GPUContextVulkan::GPUContextVulkan(GPUDeviceVulkan* device, QueueVulkan* queue) else #endif { - // Use immediate command buffer for Tracy initialization + // Use immediate command buffer for timestamps calibration VkCommandBufferAllocateInfo cmdInfo; RenderToolsVulkan::ZeroStruct(cmdInfo, VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO); cmdInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; @@ -1021,9 +1021,7 @@ void GPUContextVulkan::ResetCB() void GPUContextVulkan::BindCB(int32 slot, GPUConstantBuffer* cb) { ASSERT(slot >= 0 && slot < GPU_MAX_CB_BINDED); - const auto cbVulkan = static_cast(cb); - if (_cbHandles[slot] != cbVulkan) { _cbDirtyFlag = true; @@ -1110,7 +1108,6 @@ void GPUContextVulkan::UpdateCB(GPUConstantBuffer* cb, const void* data) const uint32 size = cbVulkan->GetSize(); if (size == 0) return; - const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer(); // Allocate bytes for the buffer const auto allocation = _device->UniformBufferUploader->Allocate(size, 0, this); diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp index 083748aaa..fb7510a2d 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp @@ -52,6 +52,9 @@ bool SupportsDebugUtilsExt = false; #if VK_EXT_debug_utils VkDebugUtilsMessengerEXT Messenger = VK_NULL_HANDLE; #endif +#if PLATFORM_SWITCH +VkInstance SwitchVkInstance = VK_NULL_HANDLE; +#endif bool SupportsDebugCallbackExt = false; VkDebugReportCallbackEXT MsgCallback = VK_NULL_HANDLE; @@ -1241,7 +1244,9 @@ GPUDevice* GPUDeviceVulkan::Create() return nullptr; } -#if !PLATFORM_SWITCH +#if PLATFORM_SWITCH + SwitchVkInstance = Instance; +#else // Setup bindings volkLoadInstance(Instance); #endif From b6229350a305d25df3dc5ebbd21ecaaf712d8b7f Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 30 Jul 2025 23:45:53 +0200 Subject: [PATCH 129/211] Fix crash on invalid GPUBuffer vertex layout in empty buffer --- Source/Engine/Graphics/GPUBuffer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Engine/Graphics/GPUBuffer.cpp b/Source/Engine/Graphics/GPUBuffer.cpp index f8372572e..ae86151f0 100644 --- a/Source/Engine/Graphics/GPUBuffer.cpp +++ b/Source/Engine/Graphics/GPUBuffer.cpp @@ -174,7 +174,7 @@ GPUBuffer::GPUBuffer() : GPUResource(SpawnParams(Guid::New(), TypeInitializer)) { // Buffer with size 0 is considered to be invalid - _desc.Size = 0; + _desc.Clear(); } bool GPUBuffer::IsStaging() const From 6307ad79790bc8499529c1c6f85b2174e8674ff9 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 30 Jul 2025 23:46:17 +0200 Subject: [PATCH 130/211] Fix shadow bias artifacts on Low shadows quality --- Source/Engine/Renderer/ShadowsPass.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Source/Engine/Renderer/ShadowsPass.cpp b/Source/Engine/Renderer/ShadowsPass.cpp index 5cf90876a..968b271dd 100644 --- a/Source/Engine/Renderer/ShadowsPass.cpp +++ b/Source/Engine/Renderer/ShadowsPass.cpp @@ -625,6 +625,10 @@ void ShadowsPass::SetupLight(ShadowsCustomBuffer& shadows, RenderContext& render atlasLight.Distance = Math::Min(renderContext.View.Far, light.ShadowsDistance); atlasLight.Bounds.Center = light.Position + renderContext.View.Origin; // Keep bounds in world-space to properly handle DirtyStaticBounds atlasLight.Bounds.Radius = 0.0f; + + // Adjust bias to account for lower shadow quality + if (shadows.MaxShadowsQuality == 0) + atlasLight.Bias *= 1.5f; } bool ShadowsPass::SetupLight(ShadowsCustomBuffer& shadows, RenderContext& renderContext, RenderContextBatch& renderContextBatch, RenderLocalLightData& light, ShadowAtlasLight& atlasLight) From bb8f569c410ea7d8384f1e0e3bc3d8e72768edde Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 30 Jul 2025 23:46:42 +0200 Subject: [PATCH 131/211] Fix Motion Vectors rendering skipping when not needed by SSR --- Source/Engine/Renderer/Renderer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Engine/Renderer/Renderer.cpp b/Source/Engine/Renderer/Renderer.cpp index fd02b133f..bca4641b5 100644 --- a/Source/Engine/Renderer/Renderer.cpp +++ b/Source/Engine/Renderer/Renderer.cpp @@ -370,7 +370,7 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont setup.UseMotionVectors = (EnumHasAnyFlags(renderContext.View.Flags, ViewFlags::MotionBlur) && motionBlurSettings.Enabled && motionBlurSettings.Scale > ZeroTolerance) || renderContext.View.Mode == ViewMode::MotionVectors || - (ssrSettings.TemporalEffect && EnumHasAnyFlags(renderContext.View.Flags, ViewFlags::SSR)) || + (ssrSettings.Intensity > ZeroTolerance && ssrSettings.TemporalEffect && EnumHasAnyFlags(renderContext.View.Flags, ViewFlags::SSR)) || renderContext.List->Settings.AntiAliasing.Mode == AntialiasingMode::TemporalAntialiasing; } setup.UseTemporalAAJitter = aaMode == AntialiasingMode::TemporalAntialiasing; From a53a438c3c30c67c50b5c69c4cad9159c5d587e9 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 31 Jul 2025 12:10:42 +0200 Subject: [PATCH 132/211] Fix to use static VkInstance --- Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp index fb7510a2d..2820d5777 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp @@ -43,7 +43,7 @@ #endif GPUDeviceVulkan::OptionalVulkanDeviceExtensions GPUDeviceVulkan::OptionalDeviceExtensions; -VkInstance GPUDeviceVulkan::Instance; +VkInstance GPUDeviceVulkan::Instance = VK_NULL_HANDLE; Array GPUDeviceVulkan::InstanceExtensions; Array GPUDeviceVulkan::InstanceLayers; @@ -52,9 +52,6 @@ bool SupportsDebugUtilsExt = false; #if VK_EXT_debug_utils VkDebugUtilsMessengerEXT Messenger = VK_NULL_HANDLE; #endif -#if PLATFORM_SWITCH -VkInstance SwitchVkInstance = VK_NULL_HANDLE; -#endif bool SupportsDebugCallbackExt = false; VkDebugReportCallbackEXT MsgCallback = VK_NULL_HANDLE; @@ -1244,9 +1241,7 @@ GPUDevice* GPUDeviceVulkan::Create() return nullptr; } -#if PLATFORM_SWITCH - SwitchVkInstance = Instance; -#else +#if !PLATFORM_SWITCH // Setup bindings volkLoadInstance(Instance); #endif From 08154d8fe5919c01be0f4494d531c67ab9260083 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 31 Jul 2025 15:07:57 +0200 Subject: [PATCH 133/211] Fix support for nesting classes inside other scripting classes --- Source/Tools/Flax.Build/Bindings/BindingsGenerator.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Tools/Flax.Build/Bindings/BindingsGenerator.cs b/Source/Tools/Flax.Build/Bindings/BindingsGenerator.cs index b1c388e29..9ecb5ecd3 100644 --- a/Source/Tools/Flax.Build/Bindings/BindingsGenerator.cs +++ b/Source/Tools/Flax.Build/Bindings/BindingsGenerator.cs @@ -270,8 +270,8 @@ namespace Flax.Build.Bindings { if (string.Equals(token.Value, ApiTokens.Class, StringComparison.Ordinal)) { - if (!(context.ScopeInfo is FileInfo)) - throw new NotImplementedException("TODO: add support for nested classes in scripting API"); + if (!(context.ScopeInfo is FileInfo) && !(context.ScopeInfo is ClassInfo)) + throw new Exception($"Not supported nested class at line {tokenizer.CurrentLine}. Classes can be nested only in other classes (or in global scope)."); var classInfo = ParseClass(ref context); scopeType = classInfo; From 846a0b5685b75877a93b59073a6b7f397af0bcde Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 31 Jul 2025 15:08:38 +0200 Subject: [PATCH 134/211] Fix PSO init to release old data on recreation --- .../GraphicsDevice/DirectX/DX11/GPUPipelineStateDX11.cpp | 3 ++- .../GraphicsDevice/DirectX/DX12/GPUPipelineStateDX12.cpp | 3 ++- Source/Engine/GraphicsDevice/Vulkan/GPUPipelineStateVulkan.cpp | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUPipelineStateDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUPipelineStateDX11.cpp index 5b5bbdb4c..99cadde7e 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUPipelineStateDX11.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUPipelineStateDX11.cpp @@ -31,7 +31,8 @@ bool GPUPipelineStateDX11::IsValid() const bool GPUPipelineStateDX11::Init(const Description& desc) { - ASSERT(!IsValid()); + if (IsValid()) + OnReleaseGPU(); // Cache shaders VS = (GPUShaderProgramVSDX11*)desc.VS; diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUPipelineStateDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUPipelineStateDX12.cpp index 7695260cd..2abfb3e48 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUPipelineStateDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUPipelineStateDX12.cpp @@ -135,7 +135,8 @@ void GPUPipelineStateDX12::OnReleaseGPU() bool GPUPipelineStateDX12::Init(const Description& desc) { - ASSERT(!IsValid()); + if (IsValid()) + OnReleaseGPU(); // Create description D3D12_GRAPHICS_PIPELINE_STATE_DESC psDesc; diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUPipelineStateVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUPipelineStateVulkan.cpp index b8357e29f..927e4fbca 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUPipelineStateVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUPipelineStateVulkan.cpp @@ -355,7 +355,8 @@ bool GPUPipelineStateVulkan::IsValid() const bool GPUPipelineStateVulkan::Init(const Description& desc) { - ASSERT(!IsValid()); + if (IsValid()) + OnReleaseGPU(); // Reset description RenderToolsVulkan::ZeroStruct(_desc, VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); From 9c5060584d43a9c28857c08d03af6b4668dcbc7f Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 31 Jul 2025 17:32:41 +0200 Subject: [PATCH 135/211] Fix output log console to remove executed command from history no matter the placement --- Source/Editor/Windows/OutputLogWindow.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Editor/Windows/OutputLogWindow.cs b/Source/Editor/Windows/OutputLogWindow.cs index be6e6ff4d..4a91680f1 100644 --- a/Source/Editor/Windows/OutputLogWindow.cs +++ b/Source/Editor/Windows/OutputLogWindow.cs @@ -344,8 +344,8 @@ namespace FlaxEditor.Windows // Update history buffer if (_window._commandHistory == null) _window._commandHistory = new List(); - else if (_window._commandHistory.Count != 0 && _window._commandHistory.Last() == command) - _window._commandHistory.RemoveAt(_window._commandHistory.Count - 1); + else if (_window._commandHistory.Count != 0 && _window._commandHistory.Contains(command)) + _window._commandHistory.Remove(command); _window._commandHistory.Add(command); if (_window._commandHistory.Count > CommandHistoryLimit) _window._commandHistory.RemoveAt(0); From a8b9211c32f447b588b8a79e64d5eb592ce70909 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 31 Jul 2025 17:34:22 +0200 Subject: [PATCH 136/211] Fix output log console to resize width to contain whole item text --- Source/Editor/Windows/OutputLogWindow.cs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Source/Editor/Windows/OutputLogWindow.cs b/Source/Editor/Windows/OutputLogWindow.cs index 4a91680f1..d74c52623 100644 --- a/Source/Editor/Windows/OutputLogWindow.cs +++ b/Source/Editor/Windows/OutputLogWindow.cs @@ -235,6 +235,8 @@ namespace FlaxEditor.Windows // Add items ItemsListContextMenu.Item lastItem = null; + var itemFont = Style.Current.FontSmall; + var maxWidth = 0.0f; foreach (var command in commands) { cm.AddItem(lastItem = new Item @@ -252,6 +254,7 @@ namespace FlaxEditor.Windows // Set command Set(item.Name); }; + maxWidth = Mathf.Max(maxWidth, itemFont.MeasureText(command).X); } cm.ItemClicked += item => { @@ -265,6 +268,9 @@ namespace FlaxEditor.Windows cm.Height = 220; if (cm.Height > totalHeight) cm.Height = totalHeight; // Limit popup height if list is small + maxWidth += 8.0f + ScrollBar.DefaultSize; // Margin + if (cm.Width < maxWidth) + cm.Width = maxWidth; if (searchText != null) { cm.SortItems(); From 4aa26760844e86d8de9979b7c04d5299ff7e18e0 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 31 Jul 2025 20:02:42 +0200 Subject: [PATCH 137/211] Fix `MClass::GetNamespace` typo that returned name instead --- Source/Engine/Scripting/ManagedCLR/MClass.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Engine/Scripting/ManagedCLR/MClass.h b/Source/Engine/Scripting/ManagedCLR/MClass.h index a89d75205..9484a2c44 100644 --- a/Source/Engine/Scripting/ManagedCLR/MClass.h +++ b/Source/Engine/Scripting/ManagedCLR/MClass.h @@ -91,7 +91,7 @@ public: /// FORCE_INLINE StringAnsiView GetNamespace() const { - return _name; + return _namespace; } #if USE_MONO From 01617ae6842c1fd92d4bdd63cceeb674ddb22292 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 31 Jul 2025 20:03:26 +0200 Subject: [PATCH 138/211] Optimize Debug Commands name building and support nested classes with name hierarchy --- Source/Engine/Debug/DebugCommands.cpp | 37 ++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/Source/Engine/Debug/DebugCommands.cpp b/Source/Engine/Debug/DebugCommands.cpp index 6927a1da9..45bd202c3 100644 --- a/Source/Engine/Debug/DebugCommands.cpp +++ b/Source/Engine/Debug/DebugCommands.cpp @@ -186,6 +186,34 @@ namespace Task* AsyncTask = nullptr; Array Commands; + void BuildName(CommandData& cmd, const MClass* mclass, const StringAnsiView& itemName) + { + StringAnsiView mclassName = mclass->GetName(); + StringAnsiView mclassFullname = mclass->GetFullName(); + StringAnsiView mclassNamespace = mclass->GetNamespace(); + + Array> buffer; // Stack-based but with option to alloc in case of very long commands + buffer.Resize(mclassFullname.Length() - mclassNamespace.Length() + itemName.Length() + 1); + Char* bufferPtr = buffer.Get(); + + // Check if class is nested, then include outer class name for hierarchy + if (mclassNamespace.Length() + mclassName.Length() + 1 != mclassFullname.Length()) + { + StringAnsiView outerName(mclassFullname.Get() + mclassNamespace.Length() + 1, mclassFullname.Length() - mclassNamespace.Length() - 2 - mclassName.Length()); + StringUtils::Copy(bufferPtr, outerName.Get(), outerName.Length()); + bufferPtr += outerName.Length(); + *bufferPtr++ = '.'; + } + StringUtils::Copy(bufferPtr, mclassName.Get(), mclassName.Length()); + bufferPtr += mclassName.Length(); + *bufferPtr++ = '.'; + StringUtils::Copy(bufferPtr, itemName.Get(), itemName.Length()); + bufferPtr += itemName.Length(); + *bufferPtr++ = 0; + + cmd.Name.Set(buffer.Get(), (int32)(bufferPtr - buffer.Get())); + } + void FindDebugCommands(BinaryModule* module) { if (module == GetBinaryModuleCorlib()) @@ -206,8 +234,6 @@ namespace mclass->IsEnum()) continue; const bool useClass = mclass->HasAttribute(attribute); - // TODO: optimize this via stack-based format buffer and then convert Ansi to UTF16 -#define BUILD_NAME(commandData, itemName) commandData.Name = String(mclass->GetName()) + TEXT(".") + String(itemName) // Process methods const auto& methods = mclass->GetMethods(); @@ -231,7 +257,7 @@ namespace continue; auto& commandData = Commands.AddOne(); - BUILD_NAME(commandData, method->GetName()); + BuildName(commandData, mclass, method->GetName()); commandData.Module = module; commandData.Method = method; } @@ -248,7 +274,7 @@ namespace continue; auto& commandData = Commands.AddOne(); - BUILD_NAME(commandData, field->GetName()); + BuildName(commandData, mclass, field->GetName()); commandData.Module = module; commandData.Field = field; } @@ -265,13 +291,12 @@ namespace continue; auto& commandData = Commands.AddOne(); - BUILD_NAME(commandData, property->GetName()); + BuildName(commandData, mclass, property->GetName()); commandData.Module = module; commandData.MethodGet = property->GetGetMethod(); commandData.MethodSet = property->GetSetMethod(); } } -#undef BUILD_NAME } else #endif From 7603109dce8ee07bee5ef5f8b33a8120718b3961 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 31 Jul 2025 20:04:03 +0200 Subject: [PATCH 139/211] Add `StringUtils::Copy` for `char` to `Char` --- Source/Engine/Platform/Base/StringUtilsBase.cpp | 12 ++++++++++++ Source/Engine/Platform/StringUtils.h | 3 +++ 2 files changed, 15 insertions(+) diff --git a/Source/Engine/Platform/Base/StringUtilsBase.cpp b/Source/Engine/Platform/Base/StringUtilsBase.cpp index adda1b5a7..e86968edf 100644 --- a/Source/Engine/Platform/Base/StringUtilsBase.cpp +++ b/Source/Engine/Platform/Base/StringUtilsBase.cpp @@ -28,6 +28,18 @@ int32 StringUtils::Copy(char* dst, const Char* src, int32 count) return i; } +int32 StringUtils::Copy(Char* dst, const char* src, int32 count) +{ + int32 i = 0; + while (i < count && src[i]) + { + dst[i] = (Char)src[i]; + i++; + } + dst[i] = 0; + return i; +} + const Char* StringUtils::FindIgnoreCase(const Char* str, const Char* toFind) { if (toFind == nullptr || str == nullptr) diff --git a/Source/Engine/Platform/StringUtils.h b/Source/Engine/Platform/StringUtils.h index 3e1dc5660..faa8127f8 100644 --- a/Source/Engine/Platform/StringUtils.h +++ b/Source/Engine/Platform/StringUtils.h @@ -128,6 +128,9 @@ public: // Copies the string (count is maximum amount of characters to copy). Returns amount of copied elements (excluding null terminator character). static int32 Copy(char* dst, const Char* src, int32 count); + // Copies the string (count is maximum amount of characters to copy). Returns amount of copied elements (excluding null terminator character). + static int32 Copy(Char* dst, const char* src, int32 count); + // Finds specific sub-string in the input string. Returns the first found position in the input string or nulll if failed. static const Char* Find(const Char* str, const Char* toFind); From 0f81c6496436144ffc489c8881b58188b5d54528 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 31 Jul 2025 20:05:08 +0200 Subject: [PATCH 140/211] Refactor Color Grading LUT rendering to have config for 2D/3D mode --- Source/Engine/Graphics/Graphics.cpp | 1 + Source/Engine/Graphics/Graphics.h | 10 ++ Source/Engine/Renderer/ColorGradingPass.cpp | 108 ++++++++------------ Source/Engine/Renderer/ColorGradingPass.h | 19 +--- 4 files changed, 57 insertions(+), 81 deletions(-) diff --git a/Source/Engine/Graphics/Graphics.cpp b/Source/Engine/Graphics/Graphics.cpp index bf17970ea..294bc4bd3 100644 --- a/Source/Engine/Graphics/Graphics.cpp +++ b/Source/Engine/Graphics/Graphics.cpp @@ -26,6 +26,7 @@ Quality Graphics::GIQuality = Quality::High; bool Graphics::GICascadesBlending = false; PostProcessSettings Graphics::PostProcessSettings; bool Graphics::SpreadWorkload = true; +bool Graphics::PostProcessing::ColorGradingVolumeLUT = true; #if GRAPHICS_API_NULL extern GPUDevice* CreateGPUDeviceNull(); diff --git a/Source/Engine/Graphics/Graphics.h b/Source/Engine/Graphics/Graphics.h index d6a137b9c..d328431b0 100644 --- a/Source/Engine/Graphics/Graphics.h +++ b/Source/Engine/Graphics/Graphics.h @@ -84,6 +84,16 @@ public: /// API_FIELD() static bool SpreadWorkload; +public: + // Post Processing effects rendering configuration. + API_CLASS(Static, Attributes = "DebugCommand") class FLAXENGINE_API PostProcessing + { + DECLARE_SCRIPTING_TYPE_MINIMAL(PostProcessing); + + // Toggles between 2D and 3D LUT texture for Color Grading. + API_FIELD() static bool ColorGradingVolumeLUT; + }; + public: /// /// Disposes the device. diff --git a/Source/Engine/Renderer/ColorGradingPass.cpp b/Source/Engine/Renderer/ColorGradingPass.cpp index 322e7d591..ea85d69bd 100644 --- a/Source/Engine/Renderer/ColorGradingPass.cpp +++ b/Source/Engine/Renderer/ColorGradingPass.cpp @@ -5,6 +5,7 @@ #include "Engine/Content/Content.h" #include "Engine/Graphics/GPUContext.h" #include "Engine/Graphics/GPULimits.h" +#include "Engine/Graphics/Graphics.h" #include "Engine/Graphics/RenderTargetPool.h" #include "Engine/Graphics/RenderTask.h" @@ -36,12 +37,6 @@ GPU_CB_STRUCT(Data { float LutWeight; }); -ColorGradingPass::ColorGradingPass() - : _useVolumeTexture(false) - , _lutFormat() -{ -} - String ColorGradingPass::ToString() const { return TEXT("ColorGradingPass"); @@ -49,103 +44,87 @@ String ColorGradingPass::ToString() const bool ColorGradingPass::Init() { - // Detect if can use volume texture (3d) for a LUT (faster, requires geometry shader) - const auto device = GPUDevice::Instance; -#if GPU_ALLOW_GEOMETRY_SHADERS - _useVolumeTexture = device->Limits.HasGeometryShaders && device->Limits.HasVolumeTextureRendering; -#endif - - // Pick a proper LUT pixels format - _lutFormat = PixelFormat::R10G10B10A2_UNorm; - const auto formatSupport = device->GetFormatFeatures(_lutFormat).Support; - FormatSupport formatSupportFlags = FormatSupport::ShaderSample | FormatSupport::RenderTarget; - if (_useVolumeTexture) - formatSupportFlags |= FormatSupport::Texture3D; - else - formatSupportFlags |= FormatSupport::Texture2D; - if (EnumHasNoneFlags(formatSupport, formatSupportFlags)) - { - // Fallback to format that is supported on every washing machine - _lutFormat = PixelFormat::R8G8B8A8_UNorm; - } - - // Create pipeline state _psLut.CreatePipelineStates(); - - // Load shader _shader = Content::LoadAsyncInternal(TEXT("Shaders/ColorGrading")); if (_shader == nullptr) return true; #if COMPILE_WITH_DEV_ENV _shader.Get()->OnReloading.Bind(this); #endif - return false; } bool ColorGradingPass::setupResources() { - // Wait for shader if (!_shader || !_shader->IsLoaded()) return true; const auto shader = _shader->GetShader(); CHECK_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data); - // Create pipeline stages - GPUPipelineState::Description psDesc = GPUPipelineState::Description::DefaultFullscreenTriangle; - if (!_psLut.IsValid()) - { - StringAnsiView psName; + // Create pipeline stage + auto psDesc = GPUPipelineState::Description::DefaultFullscreenTriangle; + StringAnsiView psName; #if GPU_ALLOW_GEOMETRY_SHADERS - if (_useVolumeTexture) - { - psDesc.VS = shader->GetVS("VS_WriteToSlice"); - psDesc.GS = shader->GetGS("GS_WriteToSlice"); - psName = "PS_Lut3D"; - } - else -#endif - { - psName = "PS_Lut2D"; - } - if (_psLut.Create(psDesc, shader, psName)) - return true; + if (_use3D == 1) + { + psDesc.VS = shader->GetVS("VS_WriteToSlice"); + psDesc.GS = shader->GetGS("GS_WriteToSlice"); + psName = "PS_Lut3D"; } + else +#endif + { + psName = "PS_Lut2D"; + } + if (_psLut.Create(psDesc, shader, psName)) + return true; return false; } void ColorGradingPass::Dispose() { - // Base RendererPass::Dispose(); - // Cleanup _psLut.Delete(); _shader = nullptr; } GPUTexture* ColorGradingPass::RenderLUT(RenderContext& renderContext) { + // Check if can use volume texture (3D) for a LUT (faster on modern platforms, requires geometry shader) + const auto device = GPUDevice::Instance; + bool use3D = GPU_ALLOW_GEOMETRY_SHADERS && Graphics::PostProcessing::ColorGradingVolumeLUT; + use3D &= device->Limits.HasGeometryShaders && device->Limits.HasVolumeTextureRendering; + use3D &= !PLATFORM_SWITCH; // TODO: move this in future to platform-specific configs + + // Rebuild PSO on change + if (_use3D != (int32)use3D) + { + invalidateResources(); + _use3D = use3D; + } + // Ensure to have valid data if (checkIfSkipPass()) return nullptr; - PROFILE_GPU_CPU("Color Grading LUT"); - // For a 3D texture, the viewport is 16x16 (per slice), for a 2D texture, it's unwrapped to 256x16 - const int32 LutSize = 32; // this must match value in shader (see ColorGrading.shader and PostProcessing.shader) + // Pick a proper LUT pixels format + auto lutFormat = PixelFormat::R10G10B10A2_UNorm; + const auto formatSupport = device->GetFormatFeatures(lutFormat).Support; + FormatSupport formatSupportFlags = FormatSupport::ShaderSample | FormatSupport::RenderTarget; + formatSupportFlags |= use3D ? FormatSupport::Texture3D : FormatSupport::Texture2D; + if (EnumHasNoneFlags(formatSupport, formatSupportFlags)) + lutFormat = PixelFormat::R8G8B8A8_UNorm; + + // For a 3D texture, the viewport is 32x32 (per slice), for a 2D texture, it's unwrapped to 1024x32 + constexpr int32 lutSize = 32; // this must match value in shader (see ColorGrading.shader and PostProcessing.shader) GPUTextureDescription lutDesc; -#if GPU_ALLOW_GEOMETRY_SHADERS - if (_useVolumeTexture) - { - lutDesc = GPUTextureDescription::New3D(LutSize, LutSize, LutSize, 1, _lutFormat); - } + if (use3D) + lutDesc = GPUTextureDescription::New3D(lutSize, lutSize, lutSize, 1, lutFormat); else -#endif - { - lutDesc = GPUTextureDescription::New2D(LutSize * LutSize, LutSize, 1, _lutFormat); - } + lutDesc = GPUTextureDescription::New2D(lutSize * lutSize, lutSize, 1, lutFormat); const auto lut = RenderTargetPool::Get(lutDesc); RENDER_TARGET_POOL_SET_NAME(lut, "ColorGrading.LUT"); @@ -181,7 +160,6 @@ GPUTexture* ColorGradingPass::RenderLUT(RenderContext& renderContext) data.LutWeight = useLut ? colorGrading.LutWeight : 0.0f; // Prepare - auto device = GPUDevice::Instance; auto context = device->GetMainContext(); const auto cb = _shader->GetShader()->GetCB(0); context->UpdateCB(cb, &data); @@ -192,7 +170,7 @@ GPUTexture* ColorGradingPass::RenderLUT(RenderContext& renderContext) // Draw #if GPU_ALLOW_GEOMETRY_SHADERS - if (_useVolumeTexture) + if (use3D) { context->SetRenderTarget(lut->ViewVolume()); diff --git a/Source/Engine/Renderer/ColorGradingPass.h b/Source/Engine/Renderer/ColorGradingPass.h index 3194cbe98..2288f9703 100644 --- a/Source/Engine/Renderer/ColorGradingPass.h +++ b/Source/Engine/Renderer/ColorGradingPass.h @@ -11,30 +11,19 @@ class ColorGradingPass : public RendererPass { private: - - bool _useVolumeTexture; - PixelFormat _lutFormat; + int32 _use3D = -1; AssetReference _shader; GPUPipelineStatePermutationsPs<4> _psLut; public: - /// - /// Init - /// - ColorGradingPass(); - -public: - - /// - /// Performs Look Up Table rendering for the input task. + /// Renders Look Up table with color grading parameters mixed in. /// /// The rendering context. - /// Allocated temp render target with a rendered LUT. Can be 2d or 3d based on current graphics hardware caps. Release after usage. + /// Allocated temp render target with a rendered LUT. Can be 2d or 3d based on current graphics hardware caps. Release after usage (via RenderTargetPool::Release). GPUTexture* RenderLUT(RenderContext& renderContext); private: - #if COMPILE_WITH_DEV_ENV void OnShaderReloading(Asset* obj) { @@ -44,14 +33,12 @@ private: #endif public: - // [RendererPass] String ToString() const override; bool Init() override; void Dispose() override; protected: - // [RendererPass] bool setupResources() override; }; From 5c5341e346f13d214b2835ce2566bf868ca8e962 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 1 Aug 2025 23:16:57 +0200 Subject: [PATCH 141/211] Optimize Color Grading LUT to be cached if unchanged from the previous frame --- Source/Engine/Renderer/ColorGradingPass.cpp | 65 +++++++++++++++---- Source/Engine/Renderer/ColorGradingPass.h | 2 +- .../Renderer/GI/GlobalSurfaceAtlasPass.cpp | 1 - Source/Engine/Renderer/Renderer.cpp | 2 - 4 files changed, 55 insertions(+), 15 deletions(-) diff --git a/Source/Engine/Renderer/ColorGradingPass.cpp b/Source/Engine/Renderer/ColorGradingPass.cpp index ea85d69bd..caa5ee44c 100644 --- a/Source/Engine/Renderer/ColorGradingPass.cpp +++ b/Source/Engine/Renderer/ColorGradingPass.cpp @@ -3,11 +3,13 @@ #include "ColorGradingPass.h" #include "RenderList.h" #include "Engine/Content/Content.h" +#include "Engine/Engine/Engine.h" #include "Engine/Graphics/GPUContext.h" #include "Engine/Graphics/GPULimits.h" #include "Engine/Graphics/Graphics.h" #include "Engine/Graphics/RenderTargetPool.h" #include "Engine/Graphics/RenderTask.h" +#include "Engine/Graphics/RenderBuffers.h" GPU_CB_STRUCT(Data { Float4 ColorSaturationShadows; @@ -37,6 +39,21 @@ GPU_CB_STRUCT(Data { float LutWeight; }); +// Custom render buffer for caching Color Grading LUT. +class ColorGradingCustomBuffer : public RenderBuffers::CustomBuffer +{ +public: + GPUTexture* LUT = nullptr; + Data CachedData; + ToneMappingMode Mode = ToneMappingMode::None; + Texture* LutTexture = nullptr; + + ~ColorGradingCustomBuffer() + { + RenderTargetPool::Release(LUT); + } +}; + String ColorGradingPass::ToString() const { return TEXT("ColorGradingPass"); @@ -92,6 +109,8 @@ void ColorGradingPass::Dispose() GPUTexture* ColorGradingPass::RenderLUT(RenderContext& renderContext) { + PROFILE_CPU(); + // Check if can use volume texture (3D) for a LUT (faster on modern platforms, requires geometry shader) const auto device = GPUDevice::Instance; bool use3D = GPU_ALLOW_GEOMETRY_SHADERS && Graphics::PostProcessing::ColorGradingVolumeLUT; @@ -108,7 +127,6 @@ GPUTexture* ColorGradingPass::RenderLUT(RenderContext& renderContext) // Ensure to have valid data if (checkIfSkipPass()) return nullptr; - PROFILE_GPU_CPU("Color Grading LUT"); // Pick a proper LUT pixels format auto lutFormat = PixelFormat::R10G10B10A2_UNorm; @@ -125,11 +143,24 @@ GPUTexture* ColorGradingPass::RenderLUT(RenderContext& renderContext) lutDesc = GPUTextureDescription::New3D(lutSize, lutSize, lutSize, 1, lutFormat); else lutDesc = GPUTextureDescription::New2D(lutSize * lutSize, lutSize, 1, lutFormat); - const auto lut = RenderTargetPool::Get(lutDesc); - RENDER_TARGET_POOL_SET_NAME(lut, "ColorGrading.LUT"); + + // Use existing texture or allocate a new one + auto& colorGradingBuffer = *renderContext.Buffers->GetCustomBuffer(TEXT("ColorGrading")); + colorGradingBuffer.LastFrameUsed = Engine::FrameCount; + if (colorGradingBuffer.LUT && colorGradingBuffer.LUT->Width() != lutDesc.Width) + { + RenderTargetPool::Release(colorGradingBuffer.LUT); + colorGradingBuffer.LUT = nullptr; + } + if (!colorGradingBuffer.LUT) + { + colorGradingBuffer.LUT = RenderTargetPool::Get(lutDesc); + RENDER_TARGET_POOL_SET_NAME(colorGradingBuffer.LUT, "ColorGrading.LUT"); + } // Prepare the parameters Data data; + data.Dummy = Float2::Zero; auto& toneMapping = renderContext.List->Settings.ToneMapping; auto& colorGrading = renderContext.List->Settings.ColorGrading; // White Balance @@ -156,23 +187,35 @@ GPUTexture* ColorGradingPass::RenderLUT(RenderContext& renderContext) data.ColorOffsetHighlights = colorGrading.ColorOffsetHighlights + colorGrading.ColorOffset; data.ColorCorrectionHighlightsMin = colorGrading.HighlightsMin; // - const bool useLut = colorGrading.LutTexture && colorGrading.LutTexture->IsLoaded() && colorGrading.LutTexture->GetResidentMipLevels() > 0 && colorGrading.LutWeight > ZeroTolerance; + Texture* lutTexture = colorGrading.LutTexture.Get(); + const bool useLut = lutTexture && lutTexture->IsLoaded() && lutTexture->GetResidentMipLevels() > 0 && colorGrading.LutWeight > ZeroTolerance; data.LutWeight = useLut ? colorGrading.LutWeight : 0.0f; - // Prepare + // Check if LUT parameter hasn't been changed since the last time + if (Platform::MemoryCompare(&colorGradingBuffer.CachedData , &data, sizeof(Data)) == 0 && + colorGradingBuffer.Mode == toneMapping.Mode && + colorGradingBuffer.LutTexture == lutTexture) + { + // Resue existing texture + return colorGradingBuffer.LUT; + } + colorGradingBuffer.CachedData = data; + colorGradingBuffer.Mode = toneMapping.Mode; + colorGradingBuffer.LutTexture = lutTexture; + + // Render LUT + PROFILE_GPU("Color Grading LUT"); auto context = device->GetMainContext(); const auto cb = _shader->GetShader()->GetCB(0); context->UpdateCB(cb, &data); context->BindCB(0, cb); context->SetViewportAndScissors((float)lutDesc.Width, (float)lutDesc.Height); context->SetState(_psLut.Get((int32)toneMapping.Mode)); - context->BindSR(0, useLut ? colorGrading.LutTexture->GetTexture() : nullptr); - - // Draw + context->BindSR(0, useLut ? lutTexture->GetTexture() : nullptr); #if GPU_ALLOW_GEOMETRY_SHADERS if (use3D) { - context->SetRenderTarget(lut->ViewVolume()); + context->SetRenderTarget(colorGradingBuffer.LUT->ViewVolume()); // Render one fullscreen-triangle per slice intersecting the bounds const int32 numInstances = lutDesc.Depth; @@ -181,10 +224,10 @@ GPUTexture* ColorGradingPass::RenderLUT(RenderContext& renderContext) else #endif { - context->SetRenderTarget(lut->View()); + context->SetRenderTarget(colorGradingBuffer.LUT->View()); context->DrawFullscreenTriangle(); } context->UnBindSR(0); - return lut; + return colorGradingBuffer.LUT; } diff --git a/Source/Engine/Renderer/ColorGradingPass.h b/Source/Engine/Renderer/ColorGradingPass.h index 2288f9703..612940e76 100644 --- a/Source/Engine/Renderer/ColorGradingPass.h +++ b/Source/Engine/Renderer/ColorGradingPass.h @@ -20,7 +20,7 @@ public: /// Renders Look Up table with color grading parameters mixed in. /// /// The rendering context. - /// Allocated temp render target with a rendered LUT. Can be 2d or 3d based on current graphics hardware caps. Release after usage (via RenderTargetPool::Release). + /// Allocated temp render target with a rendered LUT - cached within Render Buffers, released automatically. GPUTexture* RenderLUT(RenderContext& renderContext); private: diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp index dad9a23e6..bdb542208 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp @@ -1447,7 +1447,6 @@ void GlobalSurfaceAtlasPass::RenderDebug(RenderContext& renderContext, GPUContex auto colorGradingLUT = ColorGradingPass::Instance()->RenderLUT(renderContext); EyeAdaptationPass::Instance()->Render(renderContext, tempBuffer); PostProcessingPass::Instance()->Render(renderContext, tempBuffer, output, colorGradingLUT); - RenderTargetPool::Release(colorGradingLUT); RenderTargetPool::Release(tempBuffer); context->ResetRenderTarget(); diff --git a/Source/Engine/Renderer/Renderer.cpp b/Source/Engine/Renderer/Renderer.cpp index bca4641b5..9f11323e8 100644 --- a/Source/Engine/Renderer/Renderer.cpp +++ b/Source/Engine/Renderer/Renderer.cpp @@ -624,7 +624,6 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont RENDER_TARGET_POOL_SET_NAME(tempBuffer, "TempBuffer"); EyeAdaptationPass::Instance()->Render(renderContext, lightBuffer); PostProcessingPass::Instance()->Render(renderContext, lightBuffer, tempBuffer, colorGradingLUT); - RenderTargetPool::Release(colorGradingLUT); context->ResetRenderTarget(); if (aaMode == AntialiasingMode::TemporalAntialiasing) { @@ -745,7 +744,6 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont // Post-processing EyeAdaptationPass::Instance()->Render(renderContext, frameBuffer); PostProcessingPass::Instance()->Render(renderContext, frameBuffer, tempBuffer, colorGradingLUT); - RenderTargetPool::Release(colorGradingLUT); Swap(frameBuffer, tempBuffer); // Cleanup From b26d6ea108c65f03798edcbf432b12cd4addd3c2 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 2 Aug 2025 08:31:24 +0200 Subject: [PATCH 142/211] Fix various issues --- Source/Engine/Audio/AudioSource.cpp | 1 + Source/Engine/Graphics/Graphics.cpp | 10 ++++------ Source/Engine/Platform/Apple/ApplePlatform.cpp | 6 +++++- Source/Engine/Renderer/PostProcessingPass.cpp | 3 +++ Source/Engine/Renderer/VolumetricFogPass.cpp | 2 +- 5 files changed, 14 insertions(+), 8 deletions(-) diff --git a/Source/Engine/Audio/AudioSource.cpp b/Source/Engine/Audio/AudioSource.cpp index 1a7ef4768..98ee0f9c0 100644 --- a/Source/Engine/Audio/AudioSource.cpp +++ b/Source/Engine/Audio/AudioSource.cpp @@ -8,6 +8,7 @@ #include "Engine/Engine/Time.h" #include "Engine/Level/Scene/Scene.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "AudioBackend.h" #include "Audio.h" diff --git a/Source/Engine/Graphics/Graphics.cpp b/Source/Engine/Graphics/Graphics.cpp index 294bc4bd3..ec9cd5067 100644 --- a/Source/Engine/Graphics/Graphics.cpp +++ b/Source/Engine/Graphics/Graphics.cpp @@ -10,7 +10,9 @@ #include "Engine/Engine/EngineService.h" #include "Engine/Profiler/ProfilerGPU.h" #include "Engine/Profiler/ProfilerMemory.h" +#if !USE_EDITOR #include "Engine/Render2D/Font.h" +#endif bool Graphics::UseVSync = false; Quality Graphics::AAQuality = Quality::Medium; @@ -40,12 +42,6 @@ extern GPUDevice* CreateGPUDeviceDX11(); #if GRAPHICS_API_DIRECTX12 extern GPUDevice* CreateGPUDeviceDX12(); #endif -#if GRAPHICS_API_PS4 -extern GPUDevice* CreateGPUDevicePS4(); -#endif -#if GRAPHICS_API_PS5 -extern GPUDevice* CreateGPUDevicePS5(); -#endif class GraphicsService : public EngineService { @@ -166,10 +162,12 @@ bool GraphicsService::Init() device = CreateGPUDeviceVulkan(); #endif #if GRAPHICS_API_PS4 + extern GPUDevice* CreateGPUDevicePS4(); if (!device) device = CreateGPUDevicePS4(); #endif #if GRAPHICS_API_PS5 + extern GPUDevice* CreateGPUDevicePS5(); if (!device) device = CreateGPUDevicePS5(); #endif diff --git a/Source/Engine/Platform/Apple/ApplePlatform.cpp b/Source/Engine/Platform/Apple/ApplePlatform.cpp index 8f8a0ba61..6827e7ac6 100644 --- a/Source/Engine/Platform/Apple/ApplePlatform.cpp +++ b/Source/Engine/Platform/Apple/ApplePlatform.cpp @@ -295,7 +295,11 @@ void ApplePlatform::Sleep(int32 milliseconds) void ApplePlatform::Yield() { - pthread_yield(); +#if PLATFORM_ARCH_ARM64 + __builtin_arm_yield(); +#else + _mm_pause(); +#endif } double ApplePlatform::GetTimeSeconds() diff --git a/Source/Engine/Renderer/PostProcessingPass.cpp b/Source/Engine/Renderer/PostProcessingPass.cpp index bc7a1b820..0945e74de 100644 --- a/Source/Engine/Renderer/PostProcessingPass.cpp +++ b/Source/Engine/Renderer/PostProcessingPass.cpp @@ -380,6 +380,7 @@ void PostProcessingPass::Render(RenderContext& renderContext, GPUTexture* input, if (useBloom) { + PROFILE_GPU("Bloom"); context->SetRenderTarget(bloomBuffer1->View(0, 0)); context->SetViewportAndScissors((float)w2, (float)h2); context->BindSR(0, input->View()); @@ -440,6 +441,8 @@ void PostProcessingPass::Render(RenderContext& renderContext, GPUTexture* input, // Check if use lens flares if (useLensFlares) { + PROFILE_GPU("Lens Flares"); + // Prepare lens flares helper textures context->BindSR(5, GetCustomOrDefault(settings.LensFlares.LensStar, _defaultLensStar, TEXT("Engine/Textures/DefaultLensStarburst"))); context->BindSR(6, GetCustomOrDefault(settings.LensFlares.LensColor, _defaultLensColor, TEXT("Engine/Textures/DefaultLensColor"))); diff --git a/Source/Engine/Renderer/VolumetricFogPass.cpp b/Source/Engine/Renderer/VolumetricFogPass.cpp index 6029b399d..b1e2c9ab7 100644 --- a/Source/Engine/Renderer/VolumetricFogPass.cpp +++ b/Source/Engine/Renderer/VolumetricFogPass.cpp @@ -152,7 +152,7 @@ bool VolumetricFogPass::Init(RenderContext& renderContext, GPUContext* context, break; case Quality::Ultra: _cache.GridPixelSize = 8; - _cache.GridSizeZ = 256; + _cache.GridSizeZ = 128; _cache.FogJitter = true; _cache.MissedHistorySamplesCount = 8; break; From 744c94b3cced6b511a244d2425e505c551ffe34c Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 2 Aug 2025 17:05:13 +0200 Subject: [PATCH 143/211] Compilation errors fixing --- Source/Engine/Content/Asset.cpp | 2 +- Source/Engine/Content/BinaryAsset.cpp | 2 +- Source/Engine/Platform/Linux/LinuxPlatform.cpp | 2 +- Source/ThirdParty/tracy/tracy.Build.cs | 4 ++++ 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/Source/Engine/Content/Asset.cpp b/Source/Engine/Content/Asset.cpp index 559a673da..1ba0039bb 100644 --- a/Source/Engine/Content/Asset.cpp +++ b/Source/Engine/Content/Asset.cpp @@ -279,7 +279,7 @@ void Asset::OnDeleteObject() const bool wasMarkedToDelete = _deleteFileOnUnload != 0; #if USE_EDITOR - const String path = wasMarkedToDelete ? GetPath() : String::Empty; + const String path = wasMarkedToDelete ? GetPath() : StringView::Empty; #endif const Guid id = GetID(); diff --git a/Source/Engine/Content/BinaryAsset.cpp b/Source/Engine/Content/BinaryAsset.cpp index cb7b951f6..455b6ae3d 100644 --- a/Source/Engine/Content/BinaryAsset.cpp +++ b/Source/Engine/Content/BinaryAsset.cpp @@ -467,7 +467,7 @@ void BinaryAsset::OnDeleteObject() StringView BinaryAsset::GetPath() const { #if USE_EDITOR - return Storage ? Storage->GetPath() : StringView::Empty; + return Storage ? StringView(Storage->GetPath()) : StringView::Empty; #else // In build all assets are packed into packages so use ID for original path lookup return Content::GetRegistry()->GetEditorAssetPath(_id); diff --git a/Source/Engine/Platform/Linux/LinuxPlatform.cpp b/Source/Engine/Platform/Linux/LinuxPlatform.cpp index ce05bf361..98e0cbbfc 100644 --- a/Source/Engine/Platform/Linux/LinuxPlatform.cpp +++ b/Source/Engine/Platform/Linux/LinuxPlatform.cpp @@ -1838,7 +1838,7 @@ void LinuxPlatform::Sleep(int32 milliseconds) void LinuxPlatform::Yield() { - pthread_yield(); + sched_yield(); } double LinuxPlatform::GetTimeSeconds() diff --git a/Source/ThirdParty/tracy/tracy.Build.cs b/Source/ThirdParty/tracy/tracy.Build.cs index 6a26d21ff..bfa33ad26 100644 --- a/Source/ThirdParty/tracy/tracy.Build.cs +++ b/Source/ThirdParty/tracy/tracy.Build.cs @@ -78,7 +78,11 @@ public class tracy : ThirdPartyModule if (graphicsOptions.PrivateDependencies.Contains("GraphicsDeviceDX12")) options.PrivateDefinitions.Add("TRACY_GPU_D3D12"); if (graphicsOptions.PrivateDependencies.Contains("GraphicsDeviceVulkan")) + { options.PrivateDefinitions.Add("TRACY_GPU_VULKAN"); + if (VulkanSdk.Instance.TryGetIncludePath(options.Platform.Target, out var includesFolderPath)) + options.PrivateIncludePaths.Add(includesFolderPath); + } } } From 5de5d8f6835bc9cf894ee59dc060f64a5f721234 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 2 Aug 2025 17:12:53 +0200 Subject: [PATCH 144/211] Add more events for GPU profiling --- Content/Shaders/PostProcessing.flax | 4 ++-- Source/Engine/Level/Actors/ExponentialHeightFog.cpp | 1 + Source/Engine/Level/Actors/Sky.cpp | 1 + Source/Engine/Renderer/ReflectionsPass.cpp | 1 + Source/Shaders/PostProcessing.shader | 1 + 5 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Content/Shaders/PostProcessing.flax b/Content/Shaders/PostProcessing.flax index 7d7352a11..931c0b436 100644 --- a/Content/Shaders/PostProcessing.flax +++ b/Content/Shaders/PostProcessing.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bb7baebfb28eb44a18c8947ccc5eb4eed8467c3c669af63b7e1be5fde2514948 -size 22677 +oid sha256:e477eae82ac3a988f70c59913e5eeac39438edb8191b81fe6ca970b87754cb93 +size 22689 diff --git a/Source/Engine/Level/Actors/ExponentialHeightFog.cpp b/Source/Engine/Level/Actors/ExponentialHeightFog.cpp index efb5351e7..0ee719fa9 100644 --- a/Source/Engine/Level/Actors/ExponentialHeightFog.cpp +++ b/Source/Engine/Level/Actors/ExponentialHeightFog.cpp @@ -186,6 +186,7 @@ GPU_CB_STRUCT(Data { void ExponentialHeightFog::DrawFog(GPUContext* context, RenderContext& renderContext, GPUTextureView* output) { + PROFILE_GPU_CPU("Exponential Height Fog"); auto integratedLightScattering = renderContext.Buffers->VolumetricFog; bool useVolumetricFog = integratedLightScattering != nullptr; diff --git a/Source/Engine/Level/Actors/Sky.cpp b/Source/Engine/Level/Actors/Sky.cpp index 0aa3bf070..2ed6a9221 100644 --- a/Source/Engine/Level/Actors/Sky.cpp +++ b/Source/Engine/Level/Actors/Sky.cpp @@ -176,6 +176,7 @@ void Sky::DrawFog(GPUContext* context, RenderContext& renderContext, GPUTextureV AtmosphereCache cache; if (!AtmospherePreCompute::GetCache(&cache)) return; + PROFILE_GPU_CPU("Sky Fog"); context->BindSR(4, cache.Transmittance); context->BindSR(5, cache.Irradiance); context->BindSR(6, cache.Inscatter->ViewVolume()); diff --git a/Source/Engine/Renderer/ReflectionsPass.cpp b/Source/Engine/Renderer/ReflectionsPass.cpp index 5aa8404ab..d9e8df3fe 100644 --- a/Source/Engine/Renderer/ReflectionsPass.cpp +++ b/Source/Engine/Renderer/ReflectionsPass.cpp @@ -368,6 +368,7 @@ void ReflectionsPass::Render(RenderContext& renderContext, GPUTextureView* light // Check if no need to render reflection environment if (!useReflections || !(renderProbes || useSSR)) return; + PROFILE_GPU_CPU("Reflections"); // Setup data Data data; diff --git a/Source/Shaders/PostProcessing.shader b/Source/Shaders/PostProcessing.shader index 369a9b8cd..851d31e38 100644 --- a/Source/Shaders/PostProcessing.shader +++ b/Source/Shaders/PostProcessing.shader @@ -619,6 +619,7 @@ float4 PS_Composite(Quad_VS2PS input) : SV_Target float4 color; // Chromatic Abberation + BRANCH if (ChromaticDistortion > 0) { const float MAX_DIST_PX = 24.0; From 34ba45cd5a4d282af22ddc20b753a3b36106bdff Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 2 Aug 2025 17:36:48 +0200 Subject: [PATCH 145/211] Add Stack to Sanitizers options --- .../Flax.Build/Build/NativeCpp/CompileEnvironment.cs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Source/Tools/Flax.Build/Build/NativeCpp/CompileEnvironment.cs b/Source/Tools/Flax.Build/Build/NativeCpp/CompileEnvironment.cs index 7da495b46..6ebf6a78e 100644 --- a/Source/Tools/Flax.Build/Build/NativeCpp/CompileEnvironment.cs +++ b/Source/Tools/Flax.Build/Build/NativeCpp/CompileEnvironment.cs @@ -56,6 +56,16 @@ namespace Flax.Build.NativeCpp /// Undefined behavior (UB) detector. /// Undefined = 8, + + /// + /// Thread stack memory checks. + /// + Stack = 16, + + /// + /// Enables all available sanitizers for full compiler security. + /// + All = Address | Thread | Memory | Undefined | Stack, } /// From 31764d6d4ebb2f0a9025b9793eaa2647eb15d298 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 2 Aug 2025 17:56:26 +0200 Subject: [PATCH 146/211] Fix crash on memory access in Vulkan descriptor set data --- Source/Engine/GraphicsDevice/Vulkan/DescriptorSetVulkan.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Engine/GraphicsDevice/Vulkan/DescriptorSetVulkan.h b/Source/Engine/GraphicsDevice/Vulkan/DescriptorSetVulkan.h index d630bf61b..c2375da5b 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/DescriptorSetVulkan.h +++ b/Source/Engine/GraphicsDevice/Vulkan/DescriptorSetVulkan.h @@ -65,7 +65,7 @@ public: uint32 Hash = 0; uint32 SetLayoutsHash = 0; - uint32 LayoutTypes[VULKAN_DESCRIPTOR_TYPE_END]; + uint32 LayoutTypes[VULKAN_DESCRIPTOR_TYPE_END + 1]; Array SetLayouts; public: From 2730d632574acbcb6c2d1ca18bfdbbfd23132bd6 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 2 Aug 2025 21:50:09 +0200 Subject: [PATCH 147/211] More build fixes --- Source/Engine/Graphics/Graphics.Build.cs | 2 +- .../GraphicsDevice/Vulkan/QueueVulkan.cpp | 2 ++ Source/Engine/Profiler/Profiler.Build.cs | 10 +--------- Source/ThirdParty/tracy/tracy.Build.cs | 20 +++++++++++++++++++ 4 files changed, 24 insertions(+), 10 deletions(-) diff --git a/Source/Engine/Graphics/Graphics.Build.cs b/Source/Engine/Graphics/Graphics.Build.cs index e1c77d844..1c59349ec 100644 --- a/Source/Engine/Graphics/Graphics.Build.cs +++ b/Source/Engine/Graphics/Graphics.Build.cs @@ -21,7 +21,7 @@ public abstract class GraphicsDeviceBaseModule : EngineModule options.PublicDefinitions.Add("GPU_ENABLE_DIAGNOSTICS"); } - if (Profiler.Use(options) && tracy.GPU && true) + if (Profiler.Use(options) && tracy.Use(options) && tracy.GPU && true) { // Enables GPU profiling with Tracy options.PrivateDefinitions.Add("GPU_ENABLE_TRACY"); diff --git a/Source/Engine/GraphicsDevice/Vulkan/QueueVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/QueueVulkan.cpp index 1f18a0ee1..f2d2521d0 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/QueueVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/QueueVulkan.cpp @@ -6,6 +6,7 @@ #include "GPUDeviceVulkan.h" #include "CmdBufferVulkan.h" #include "RenderToolsVulkan.h" +#include "Engine/Profiler/ProfilerCPU.h" QueueVulkan::QueueVulkan(GPUDeviceVulkan* device, uint32 familyIndex) : _queue(VK_NULL_HANDLE) @@ -20,6 +21,7 @@ QueueVulkan::QueueVulkan(GPUDeviceVulkan* device, uint32 familyIndex) void QueueVulkan::Submit(CmdBufferVulkan* cmdBuffer, uint32 signalSemaphoresCount, const VkSemaphore* signalSemaphores) { + PROFILE_CPU_NAMED("vkQueueSubmit"); ASSERT(cmdBuffer->HasEnded()); auto fence = cmdBuffer->GetFence(); ASSERT(!fence->IsSignaled); diff --git a/Source/Engine/Profiler/Profiler.Build.cs b/Source/Engine/Profiler/Profiler.Build.cs index f47cbcf87..013336181 100644 --- a/Source/Engine/Profiler/Profiler.Build.cs +++ b/Source/Engine/Profiler/Profiler.Build.cs @@ -29,15 +29,7 @@ public class Profiler : EngineModule options.PublicDefinitions.Add("COMPILE_WITH_PROFILER"); // Tracy profiling tools - switch (options.Platform.Target) - { - case TargetPlatform.Android: - case TargetPlatform.Linux: - case TargetPlatform.Windows: - case TargetPlatform.Switch: - case TargetPlatform.Mac: + if (tracy.Use(options)) options.PublicDependencies.Add("tracy"); - break; - } } } diff --git a/Source/ThirdParty/tracy/tracy.Build.cs b/Source/ThirdParty/tracy/tracy.Build.cs index bfa33ad26..de0bd8665 100644 --- a/Source/ThirdParty/tracy/tracy.Build.cs +++ b/Source/ThirdParty/tracy/tracy.Build.cs @@ -20,6 +20,26 @@ public class tracy : ThirdPartyModule /// public static bool GPU = true; + /// + /// Determinates whenever performance Tracy supports a given platform. + /// + /// The options. + /// True if use profiler, otherwise false. + public static bool Use(BuildOptions options) + { + switch (options.Platform.Target) + { + case TargetPlatform.Android: + case TargetPlatform.Linux: + case TargetPlatform.Windows: + case TargetPlatform.Switch: + case TargetPlatform.Mac: + return true; + default: + return false; + } + } + /// public override void Init() { From db660721cefc5c70a6d68f21f26817030032b126 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 2 Aug 2025 22:01:25 +0200 Subject: [PATCH 148/211] Fix debug command length bug --- Source/Engine/Debug/DebugCommands.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Engine/Debug/DebugCommands.cpp b/Source/Engine/Debug/DebugCommands.cpp index 45bd202c3..d36c8b489 100644 --- a/Source/Engine/Debug/DebugCommands.cpp +++ b/Source/Engine/Debug/DebugCommands.cpp @@ -209,7 +209,7 @@ namespace *bufferPtr++ = '.'; StringUtils::Copy(bufferPtr, itemName.Get(), itemName.Length()); bufferPtr += itemName.Length(); - *bufferPtr++ = 0; + *bufferPtr = 0; cmd.Name.Set(buffer.Get(), (int32)(bufferPtr - buffer.Get())); } From c9e0637b0fa27947909036e60d6da6dfeb80648c Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 2 Aug 2025 22:03:19 +0200 Subject: [PATCH 149/211] Fix crash when unloading texture with leftover streaming task that was idle --- .../Graphics/Textures/StreamingTexture.cpp | 31 ++++++++++++------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/Source/Engine/Graphics/Textures/StreamingTexture.cpp b/Source/Engine/Graphics/Textures/StreamingTexture.cpp index 87378ef9d..735bc162e 100644 --- a/Source/Engine/Graphics/Textures/StreamingTexture.cpp +++ b/Source/Engine/Graphics/Textures/StreamingTexture.cpp @@ -197,9 +197,22 @@ public: ~StreamTextureResizeTask() { + OnResourceReleased2(); SAFE_DELETE_GPU_RESOURCE(_newTexture); } +private: + void OnResourceReleased2() + { + // Unlink texture + if (_streamingTexture) + { + ScopeLock lock(_streamingTexture->GetOwner()->GetOwnerLocker()); + _streamingTexture->_streamingTasks.Remove(this); + _streamingTexture = nullptr; + } + } + protected: // [GPUTask] Result run(GPUTasksContext* context) override @@ -225,11 +238,7 @@ protected: void OnEnd() override { - if (_streamingTexture) - { - ScopeLock lock(_streamingTexture->GetOwner()->GetOwnerLocker()); - _streamingTexture->_streamingTasks.Remove(this); - } + OnResourceReleased2(); // Base GPUTask::OnEnd(); @@ -336,6 +345,11 @@ public: _texture.Released.Bind(this); } + ~StreamTextureMipTask() + { + OnResourceReleased2(); + } + private: void OnResourceReleased2() { @@ -392,12 +406,7 @@ protected: void OnEnd() override { _dataLock.Release(); - if (_streamingTexture) - { - ScopeLock lock(_streamingTexture->GetOwner()->GetOwnerLocker()); - _streamingTexture->_streamingTasks.Remove(_rootTask); - _streamingTexture = nullptr; - } + OnResourceReleased2(); // Base GPUUploadTextureMipTask::OnEnd(); From abe496fe12ff2bf7ac66acc25face576ab3a27cf Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 4 Aug 2025 10:31:52 +0200 Subject: [PATCH 150/211] Fix crash on particles sorting memory access --- Source/Engine/Core/Collections/Sorting.h | 11 ++++----- Source/Engine/Particles/Particles.cpp | 30 +++++++++++++++--------- 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/Source/Engine/Core/Collections/Sorting.h b/Source/Engine/Core/Collections/Sorting.h index 0a83448ba..ddbee8b50 100644 --- a/Source/Engine/Core/Collections/Sorting.h +++ b/Source/Engine/Core/Collections/Sorting.h @@ -362,9 +362,10 @@ public: uint32 histogram[RADIXSORT_HISTOGRAM_SIZE]; uint16 shift = 0; int32 pass = 0; - for (; pass < 6; pass++) + constexpr int32 passCount = sizeof(T) >= sizeof(uint64) ? 6 : 3; + for (; pass < passCount; pass++) { - Platform::MemoryClear(histogram, sizeof(uint32) * RADIXSORT_HISTOGRAM_SIZE); + Platform::MemoryClear(histogram, sizeof(histogram)); bool sorted = true; T key = keys[0]; @@ -372,16 +373,14 @@ public: for (int32 i = 0; i < count; i++) { key = keys[i]; - const uint16 index = (key >> shift) & RADIXSORT_BIT_MASK; + const uint16 index = (key >> (T)shift) & RADIXSORT_BIT_MASK; ++histogram[index]; sorted &= prevKey <= key; prevKey = key; } if (sorted) - { goto end; - } uint32 offset = 0; for (int32 i = 0; i < RADIXSORT_HISTOGRAM_SIZE; ++i) @@ -394,7 +393,7 @@ public: for (int32 i = 0; i < count; i++) { const T k = keys[i]; - const uint16 index = (k >> shift) & RADIXSORT_BIT_MASK; + const uint16 index = (k >> (T)shift) & RADIXSORT_BIT_MASK; const uint32 dest = histogram[index]++; tempKeys[dest] = k; tempValues[dest] = values[i]; diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index b8dce728a..53069cb02 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -4,7 +4,6 @@ #include "ParticleEffect.h" #include "Engine/Content/Assets/Model.h" #include "Engine/Core/Collections/Sorting.h" -#include "Engine/Core/Collections/HashSet.h" #include "Engine/Engine/EngineService.h" #include "Engine/Engine/Time.h" #include "Engine/Engine/Engine.h" @@ -216,14 +215,17 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa { case ParticleSortMode::ViewDepth: { + const int32 positionOffset = emitter->Graph.GetPositionAttributeOffset(); + if (positionOffset == -1) + break; const Matrix viewProjection = renderContext.View.ViewProjection(); - byte* positionPtr = buffer->CPU.Buffer.Get() + emitter->Graph.GetPositionAttributeOffset(); + const byte* positionPtr = buffer->CPU.Buffer.Get() + positionOffset; if (emitter->SimulationSpace == ParticlesSimulationSpace::Local) { for (int32 i = 0; i < buffer->CPU.Count; i++) { // TODO: use SIMD - sortedKeys[i] = RenderTools::ComputeDistanceSortKey(Matrix::TransformPosition(viewProjection, Matrix::TransformPosition(drawCall.World, *(Float3*)positionPtr)).W) ^ sortKeyXor; + sortedKeys[i] = RenderTools::ComputeDistanceSortKey(Matrix::TransformPosition(viewProjection, Matrix::TransformPosition(drawCall.World, *(const Float3*)positionPtr)).W) ^ sortKeyXor; positionPtr += stride; } } @@ -231,7 +233,7 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa { for (int32 i = 0; i < buffer->CPU.Count; i++) { - sortedKeys[i] = RenderTools::ComputeDistanceSortKey(Matrix::TransformPosition(viewProjection, *(Float3*)positionPtr).W) ^ sortKeyXor; + sortedKeys[i] = RenderTools::ComputeDistanceSortKey(Matrix::TransformPosition(viewProjection, *(const Float3*)positionPtr).W) ^ sortKeyXor; positionPtr += stride; } } @@ -239,14 +241,17 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa } case ParticleSortMode::ViewDistance: { + const int32 positionOffset = emitter->Graph.GetPositionAttributeOffset(); + if (positionOffset == -1) + break; const Float3 viewPosition = renderContext.View.Position; - byte* positionPtr = buffer->CPU.Buffer.Get() + emitter->Graph.GetPositionAttributeOffset(); + const byte* positionPtr = buffer->CPU.Buffer.Get() + positionOffset; if (emitter->SimulationSpace == ParticlesSimulationSpace::Local) { for (int32 i = 0; i < buffer->CPU.Count; i++) { // TODO: use SIMD - sortedKeys[i] = RenderTools::ComputeDistanceSortKey((viewPosition - Float3::Transform(*(Float3*)positionPtr, drawCall.World)).LengthSquared()) ^ sortKeyXor; + sortedKeys[i] = RenderTools::ComputeDistanceSortKey((viewPosition - Float3::Transform(*(const Float3*)positionPtr, drawCall.World)).LengthSquared()) ^ sortKeyXor; positionPtr += stride; } } @@ -255,7 +260,7 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa for (int32 i = 0; i < buffer->CPU.Count; i++) { // TODO: use SIMD - sortedKeys[i] = RenderTools::ComputeDistanceSortKey((viewPosition - *(Float3*)positionPtr).LengthSquared()) ^ sortKeyXor; + sortedKeys[i] = RenderTools::ComputeDistanceSortKey((viewPosition - *(const Float3*)positionPtr).LengthSquared()) ^ sortKeyXor; positionPtr += stride; } } @@ -264,13 +269,16 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa case ParticleSortMode::CustomAscending: case ParticleSortMode::CustomDescending: { - int32 attributeIdx = module->Attributes[0]; + const int32 attributeIdx = module->Attributes[0]; if (attributeIdx == -1) break; - byte* attributePtr = buffer->CPU.Buffer.Get() + emitter->Graph.Layout.Attributes[attributeIdx].Offset; + const int32 attributeOffset = emitter->Graph.Layout.Attributes[attributeIdx].Offset; + if (attributeOffset == -1) + break; + const byte* attributePtr = buffer->CPU.Buffer.Get() + attributeOffset; for (int32 i = 0; i < buffer->CPU.Count; i++) { - sortedKeys[i] = RenderTools::ComputeDistanceSortKey(*(float*)attributePtr) ^ sortKeyXor; + sortedKeys[i] = RenderTools::ComputeDistanceSortKey(*(const float*)attributePtr) ^ sortKeyXor; attributePtr += stride; } break; @@ -286,7 +294,7 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa { ParticlesDrawCPU::SortedIndices.Resize(listSize); sortedIndices = ParticlesDrawCPU::SortedIndices.Get(); - for (int i = 0; i < listSize; i++) + for (int32 i = 0; i < listSize; i++) sortedIndices[i] = i; } From 1a88fefd76daae6cddd261e02b4a3d1b396c8b05 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 5 Aug 2025 09:03:47 +0200 Subject: [PATCH 151/211] Optimize CPU particles drawing to support async --- Source/Engine/Core/Memory/ArenaAllocation.h | 7 ++ Source/Engine/Level/Actors/PointLight.cpp | 1 + Source/Engine/Level/Actors/SpotLight.cpp | 1 + Source/Engine/Particles/Particles.cpp | 70 ++++++++++++-------- Source/Engine/Renderer/RenderList.cpp | 2 - Source/Engine/Renderer/RenderList.h | 6 +- Source/Engine/Renderer/Utils/BitonicSort.cpp | 9 +-- Source/Engine/Renderer/VolumetricFogPass.cpp | 4 +- 8 files changed, 59 insertions(+), 41 deletions(-) diff --git a/Source/Engine/Core/Memory/ArenaAllocation.h b/Source/Engine/Core/Memory/ArenaAllocation.h index bfb5dbfe6..c519ffab3 100644 --- a/Source/Engine/Core/Memory/ArenaAllocation.h +++ b/Source/Engine/Core/Memory/ArenaAllocation.h @@ -125,6 +125,13 @@ public: // Frees all memory allocations within allocator. void Free(); + // Allocates a chunk of unitialized memory. + template + inline T* Allocate(uint64 count) + { + return (T*)Allocate(count * sizeof(T), alignof(T)); + } + // Creates a new object within the arena allocator. template inline T* New(Args&&...args) diff --git a/Source/Engine/Level/Actors/PointLight.cpp b/Source/Engine/Level/Actors/PointLight.cpp index 607bf1bc4..d28ee3f16 100644 --- a/Source/Engine/Level/Actors/PointLight.cpp +++ b/Source/Engine/Level/Actors/PointLight.cpp @@ -19,6 +19,7 @@ PointLight::PointLight(const SpawnParams& params) _direction = Float3::Forward; _sphere = BoundingSphere(Vector3::Zero, _radius); BoundingBox::FromSphere(_sphere, _box); + _drawCategory = SceneRendering::SceneDrawAsync; } float PointLight::ComputeBrightness() const diff --git a/Source/Engine/Level/Actors/SpotLight.cpp b/Source/Engine/Level/Actors/SpotLight.cpp index 85b77647a..af783ce84 100644 --- a/Source/Engine/Level/Actors/SpotLight.cpp +++ b/Source/Engine/Level/Actors/SpotLight.cpp @@ -27,6 +27,7 @@ SpotLight::SpotLight(const SpawnParams& params) const float boundsRadius = Math::Sqrt(1.25f * _radius * _radius - _radius * _radius * _cosOuterCone); _sphere = BoundingSphere(GetPosition() + 0.5f * GetDirection() * _radius, boundsRadius); BoundingBox::FromSphere(_sphere, _box); + _drawCategory = SceneRendering::SceneDrawAsync; } float SpotLight::ComputeBrightness() const diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index 53069cb02..79457399e 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -40,6 +40,7 @@ PACK_STRUCT(struct SpriteParticleVertex class SpriteParticleRenderer { public: + volatile int64 Ready = 0; GPUBuffer* VB = nullptr; GPUBuffer* IB = nullptr; const static int32 VertexCount = 4; @@ -48,7 +49,10 @@ public: public: bool Init() { - if (VB) + if (Platform::AtomicRead(&Ready)) + return false; + ScopeLock lock(RenderContext::GPULocker); + if (Platform::AtomicRead(&Ready)) return false; VB = GPUDevice::Instance->CreateBuffer(TEXT("SpriteParticleRenderer.VB")); IB = GPUDevice::Instance->CreateBuffer(TEXT("SpriteParticleRenderer.IB")); @@ -64,8 +68,10 @@ public: { VertexElement::Types::Position, 0, 0, 0, PixelFormat::R32G32_Float }, { VertexElement::Types::TexCoord, 0, 0, 0, PixelFormat::R32G32_Float }, }); - return VB->Init(GPUBufferDescription::Vertex(layout, sizeof(SpriteParticleVertex), VertexCount, vertexBuffer)) || - IB->Init(GPUBufferDescription::Index(sizeof(uint16), IndexCount, indexBuffer)); + bool result = VB->Init(GPUBufferDescription::Vertex(layout, sizeof(SpriteParticleVertex), VertexCount, vertexBuffer)) || + IB->Init(GPUBufferDescription::Index(sizeof(uint16), IndexCount, indexBuffer)); + Platform::AtomicStore(&Ready, 1); + return result; } void Dispose() @@ -133,13 +139,6 @@ float Particles::ParticleBufferRecycleTimeout = 10.0f; SpriteParticleRenderer SpriteRenderer; -namespace ParticlesDrawCPU -{ - Array SortingKeys[2]; - Array SortingIndices; - Array SortedIndices; -} - class ParticleManagerService : public EngineService { public: @@ -190,7 +189,7 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa auto emitter = buffer->Emitter; // Check if need to perform any particles sorting - if (emitter->Graph.SortModules.HasItems() && renderContext.View.Pass != DrawPass::Depth) + if (emitter->Graph.SortModules.HasItems() && renderContext.View.Pass != DrawPass::Depth && (buffer->CPU.Count != 0 || buffer->GPU.SortedIndices)) { // Prepare sorting data if (!buffer->GPU.SortedIndices) @@ -204,12 +203,31 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa const auto sortMode = static_cast(module->Values[2].AsInt); const int32 stride = buffer->Stride; const int32 listSize = buffer->CPU.Count; -#define PREPARE_CACHE(list) (ParticlesDrawCPU::list).Clear(); (ParticlesDrawCPU::list).Resize(listSize) - PREPARE_CACHE(SortingKeys[0]); - PREPARE_CACHE(SortingKeys[1]); - PREPARE_CACHE(SortingIndices); -#undef PREPARE_CACHE - uint32* sortedKeys = ParticlesDrawCPU::SortingKeys[0].Get(); + Array sortingKeysList[4]; + Array sortingIndicesList[2]; + uint32* sortingKeys[2]; + int32* sortingIndices[2]; + if (listSize < 500) + { + // Use fast stack allocator from RenderList + sortingKeys[0] = renderContext.List->Memory.Allocate(listSize); + sortingKeys[1] = renderContext.List->Memory.Allocate(listSize); + sortingIndices[0] = renderContext.List->Memory.Allocate(listSize); + sortingIndices[1] = renderContext.List->Memory.Allocate(listSize); + } + else + { + // Use shared pooled memory from RendererAllocation + sortingKeysList[0].Resize(listSize); + sortingKeysList[1].Resize(listSize); + sortingIndicesList[0].Resize(listSize); + sortingIndicesList[1].Resize(listSize); + sortingKeys[0] = sortingKeysList[0].Get(); + sortingKeys[1] = sortingKeysList[1].Get(); + sortingIndices[0] = sortingIndicesList[0].Get(); + sortingIndices[1] = sortingIndicesList[1].Get(); + } + uint32* sortedKeys = sortingKeys[0]; const uint32 sortKeyXor = sortMode != ParticleSortMode::CustomAscending ? MAX_uint32 : 0; switch (sortMode) { @@ -290,29 +308,31 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa } // Generate sorting indices - int32* sortedIndices; + int32* sortedIndices = sortingIndices[0]; { - ParticlesDrawCPU::SortedIndices.Resize(listSize); - sortedIndices = ParticlesDrawCPU::SortedIndices.Get(); for (int32 i = 0; i < listSize; i++) sortedIndices[i] = i; } // Sort keys with indices { - Sorting::RadixSort(sortedKeys, sortedIndices, ParticlesDrawCPU::SortingKeys[1].Get(), ParticlesDrawCPU::SortingIndices.Get(), listSize); + Sorting::RadixSort(sortedKeys, sortedIndices, sortingKeys[1], sortingIndices[1], listSize); } // Upload CPU particles indices { - context->UpdateBuffer(buffer->GPU.SortedIndices, sortedIndices, listSize * sizeof(int32), sortedIndicesOffset); + RenderContext::GPULocker.Lock(); + context->UpdateBuffer(buffer->GPU.SortedIndices, sortedIndices, listSize * sizeof(uint32), sortedIndicesOffset); + RenderContext::GPULocker.Unlock(); } } } // Upload CPU particles data to GPU { + RenderContext::GPULocker.Lock(); context->UpdateBuffer(buffer->GPU.Buffer, buffer->CPU.Buffer.Get(), buffer->CPU.Count * buffer->Stride); + RenderContext::GPULocker.Unlock(); } // Check if need to setup ribbon modules @@ -443,8 +463,10 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa if (ribbonModuleIndex != 0) { // Upload data to the GPU buffer + RenderContext::GPULocker.Lock(); buffer->GPU.RibbonIndexBufferDynamic->Flush(context); buffer->GPU.RibbonVertexBufferDynamic->Flush(context); + RenderContext::GPULocker.Unlock(); } } @@ -1266,10 +1288,6 @@ void ParticleManagerService::Dispose() } CleanupGPUParticlesSorting(); #endif - ParticlesDrawCPU::SortingKeys[0].SetCapacity(0); - ParticlesDrawCPU::SortingKeys[1].SetCapacity(0); - ParticlesDrawCPU::SortingIndices.SetCapacity(0); - ParticlesDrawCPU::SortedIndices.SetCapacity(0); PoolLocker.Lock(); for (auto i = Pool.Begin(); i.IsNotEnd(); ++i) diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index 0dedfda38..dbb91680a 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -449,8 +449,6 @@ RenderList::RenderList(const SpawnParams& params) : ScriptingObject(params) , Memory(4 * 1024 * 1024, RendererAllocation::Allocate, RendererAllocation::Free) // 4MB pages, use page pooling via RendererAllocation , DirectionalLights(4) - , PointLights(32) - , SpotLights(32) , SkyLights(4) , EnvironmentProbes(32) , Decals(64) diff --git a/Source/Engine/Renderer/RenderList.h b/Source/Engine/Renderer/RenderList.h index f17e1b045..98f980e98 100644 --- a/Source/Engine/Renderer/RenderList.h +++ b/Source/Engine/Renderer/RenderList.h @@ -341,12 +341,12 @@ public: /// /// Light pass members - point lights /// - Array PointLights; + RenderListBuffer PointLights; /// /// Light pass members - spot lights /// - Array SpotLights; + RenderListBuffer SpotLights; /// /// Light pass members - sky lights @@ -366,7 +366,7 @@ public: /// /// Local volumetric fog particles registered for the rendering. /// - Array VolumetricFogParticles; + RenderListBuffer VolumetricFogParticles; /// /// Sky/skybox renderer proxy to use (only one per frame) diff --git a/Source/Engine/Renderer/Utils/BitonicSort.cpp b/Source/Engine/Renderer/Utils/BitonicSort.cpp index babc058e2..be5f38be4 100644 --- a/Source/Engine/Renderer/Utils/BitonicSort.cpp +++ b/Source/Engine/Renderer/Utils/BitonicSort.cpp @@ -91,16 +91,9 @@ void BitonicSort::Dispose() void BitonicSort::Sort(GPUContext* context, GPUBuffer* sortingKeysBuffer, GPUBuffer* countBuffer, uint32 counterOffset, bool sortAscending, GPUBuffer* sortedIndicesBuffer) { ASSERT(context && sortingKeysBuffer && countBuffer); - - PROFILE_GPU_CPU("Bitonic Sort"); - - // Check if has missing resources if (checkIfSkipPass()) - { return; - } - - // Prepare + PROFILE_GPU_CPU("Bitonic Sort"); const uint32 elementSizeBytes = sizeof(uint64); const uint32 maxNumElements = sortingKeysBuffer->GetSize() / elementSizeBytes; const uint32 alignedMaxNumElements = Math::RoundUpToPowerOf2(maxNumElements); diff --git a/Source/Engine/Renderer/VolumetricFogPass.cpp b/Source/Engine/Renderer/VolumetricFogPass.cpp index b1e2c9ab7..290c803d2 100644 --- a/Source/Engine/Renderer/VolumetricFogPass.cpp +++ b/Source/Engine/Renderer/VolumetricFogPass.cpp @@ -384,7 +384,7 @@ void VolumetricFogPass::Render(RenderContext& renderContext) } // Render local fog particles - if (renderContext.List->VolumetricFogParticles.HasItems()) + if (renderContext.List->VolumetricFogParticles.Count() != 0) { PROFILE_GPU_CPU_NAMED("Local Fog"); @@ -404,7 +404,7 @@ void VolumetricFogPass::Render(RenderContext& renderContext) customData.VolumetricFogMaxDistance = cache.Data.VolumetricFogMaxDistance; bindParams.CustomData = &customData; bindParams.BindViewData(); - bindParams.DrawCall = &renderContext.List->VolumetricFogParticles.First(); + bindParams.DrawCall = renderContext.List->VolumetricFogParticles.begin(); bindParams.BindDrawData(); for (auto& drawCall : renderContext.List->VolumetricFogParticles) From 8e043e533ea0c831f02ee5477682eb214cf30ff3 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 5 Aug 2025 11:02:03 +0200 Subject: [PATCH 152/211] Optimize `SpriteRenderer` to lazy-init --- Source/Engine/Particles/Particles.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index 79457399e..19848c779 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -486,7 +486,7 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa const auto material = (MaterialBase*)module->Assets[0].Get(); const auto moduleDrawModes = module->Values.Count() > 3 ? (DrawPass)module->Values[3].AsInt : DrawPass::Default; auto dp = drawModes & moduleDrawModes & material->GetDrawModes(); - if (dp == DrawPass::None) + if (dp == DrawPass::None || SpriteRenderer.Init()) break; drawCall.Material = material; @@ -895,18 +895,19 @@ void DrawEmitterGPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa { const auto material = (MaterialBase*)module->Assets[0].Get(); const auto moduleDrawModes = module->Values.Count() > 3 ? (DrawPass)module->Values[3].AsInt : DrawPass::Default; + drawCall.Draw.IndirectArgsOffset = indirectDrawCallIndex * sizeof(GPUDrawIndexedIndirectArgs); + indirectDrawCallIndex++; auto dp = drawModes & moduleDrawModes & material->GetDrawModes(); + if (dp == DrawPass::None || SpriteRenderer.Init()) + break; drawCall.Material = material; // Submit draw call SpriteRenderer.SetupDrawCall(drawCall); drawCall.InstanceCount = 0; drawCall.Draw.IndirectArgsBuffer = buffer->GPU.IndirectDrawArgsBuffer; - drawCall.Draw.IndirectArgsOffset = indirectDrawCallIndex * sizeof(GPUDrawIndexedIndirectArgs); if (dp != DrawPass::None) renderContext.List->AddDrawCall(renderContext, dp, staticFlags, drawCall, false, sortOrder); - indirectDrawCallIndex++; - break; } // Model Rendering @@ -937,7 +938,6 @@ void DrawEmitterGPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa renderContext.List->AddDrawCall(renderContext, dp, staticFlags, drawCall, false, sortOrder); indirectDrawCallIndex++; } - break; } // Ribbon Rendering @@ -963,7 +963,7 @@ void Particles::DrawParticles(RenderContext& renderContext, ParticleEffect* effe // Setup auto& view = renderContext.View; const DrawPass drawModes = view.Pass & effect->DrawModes; - if (drawModes == DrawPass::None || SpriteRenderer.Init()) + if (drawModes == DrawPass::None) return; PROFILE_MEM(Particles); ConcurrentSystemLocker::ReadScope systemScope(SystemLocker); From 761ea094d61c8a2b17bbe4e1e40f2fde253eec02 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 5 Aug 2025 11:50:26 +0200 Subject: [PATCH 153/211] Optimize render module indices data 4 times --- Source/Engine/Particles/Particles.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index 19848c779..f98be5ec8 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -178,7 +178,7 @@ void Particles::OnEffectDestroy(ParticleEffect* effect) #endif } -typedef Array> RenderModulesIndices; +typedef Array> RenderModulesIndices; void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCall& drawCall, DrawPass drawModes, StaticFlags staticFlags, ParticleEmitterInstance& emitterData, const RenderModulesIndices& renderModulesIndices, int8 sortOrder) { @@ -1013,7 +1013,7 @@ void Particles::DrawParticles(RenderContext& renderContext, ParticleEffect* effe // Check if need to render any module RenderModulesIndices renderModulesIndices; - for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.RenderModules.Count(); moduleIndex++) + for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.RenderModules.Count() && renderModulesIndices.Count() < PARTICLE_EMITTER_MAX_MODULES; moduleIndex++) { auto module = emitter->Graph.RenderModules[moduleIndex]; From a1a6d4738ffdbb94fce0d778ce8098b59d479138 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 5 Aug 2025 12:28:29 +0200 Subject: [PATCH 154/211] Fix async draw wait labels type --- Source/Engine/Foliage/Foliage.cpp | 2 +- Source/Engine/Graphics/RenderTask.h | 2 +- Source/Engine/Level/Scene/SceneRendering.cpp | 2 +- Source/Engine/Renderer/Renderer.cpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Source/Engine/Foliage/Foliage.cpp b/Source/Engine/Foliage/Foliage.cpp index f51f4ece0..8a0e75a6e 100644 --- a/Source/Engine/Foliage/Foliage.cpp +++ b/Source/Engine/Foliage/Foliage.cpp @@ -1233,7 +1233,7 @@ void Foliage::Draw(RenderContextBatch& renderContextBatch) _renderContextBatch = &renderContextBatch; Function func; func.Bind(this); - const uint64 waitLabel = JobSystem::Dispatch(func, FoliageTypes.Count()); + const int64 waitLabel = JobSystem::Dispatch(func, FoliageTypes.Count()); renderContextBatch.WaitLabels.Add(waitLabel); return; } diff --git a/Source/Engine/Graphics/RenderTask.h b/Source/Engine/Graphics/RenderTask.h index 18a34fe80..07926fd64 100644 --- a/Source/Engine/Graphics/RenderTask.h +++ b/Source/Engine/Graphics/RenderTask.h @@ -513,7 +513,7 @@ API_STRUCT(NoDefault) struct RenderContextBatch /// /// The Job System labels to wait on, after draw calls collecting. /// - API_FIELD() Array> WaitLabels; + API_FIELD() Array> WaitLabels; /// /// Enables using async tasks via Job System when performing drawing. diff --git a/Source/Engine/Level/Scene/SceneRendering.cpp b/Source/Engine/Level/Scene/SceneRendering.cpp index d7225036c..18f631833 100644 --- a/Source/Engine/Level/Scene/SceneRendering.cpp +++ b/Source/Engine/Level/Scene/SceneRendering.cpp @@ -91,7 +91,7 @@ void SceneRendering::Draw(RenderContextBatch& renderContextBatch, DrawCategory c // Run in async via Job System Function func; func.Bind(this); - const uint64 waitLabel = JobSystem::Dispatch(func, JobSystem::GetThreadsCount()); + const int64 waitLabel = JobSystem::Dispatch(func, JobSystem::GetThreadsCount()); renderContextBatch.WaitLabels.Add(waitLabel); } else diff --git a/Source/Engine/Renderer/Renderer.cpp b/Source/Engine/Renderer/Renderer.cpp index 9f11323e8..7c7a53a09 100644 --- a/Source/Engine/Renderer/Renderer.cpp +++ b/Source/Engine/Renderer/Renderer.cpp @@ -454,7 +454,7 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont // Wait for async jobs to finish JobSystem::SetJobStartingOnDispatch(true); - for (const uint64 label : renderContextBatch.WaitLabels) + for (const int64 label : renderContextBatch.WaitLabels) JobSystem::Wait(label); renderContextBatch.WaitLabels.Clear(); From baf0cfce8e13e91aedfebe8e0ebfd3432718097d Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 5 Aug 2025 22:13:21 +0200 Subject: [PATCH 155/211] Add support for using custom memory allocator in lambda bind to `Function` --- Source/Engine/Core/Delegate.h | 47 ++++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 4 deletions(-) diff --git a/Source/Engine/Core/Delegate.h b/Source/Engine/Core/Delegate.h index 00614a9ff..f71fcb94b 100644 --- a/Source/Engine/Core/Delegate.h +++ b/Source/Engine/Core/Delegate.h @@ -62,7 +62,7 @@ private: struct Lambda { int64 Refs; - void (*Dtor)(void*); + void (*Dtor)(void* callee, Lambda* lambda); }; void* _callee; @@ -78,8 +78,7 @@ private: { if (Platform::InterlockedDecrement(&_lambda->Refs) == 0) { - ((Lambda*)_lambda)->Dtor(_callee); - Allocator::Free(_lambda); + _lambda->Dtor(_callee, _lambda); } } @@ -189,9 +188,10 @@ public: LambdaDtor(); _lambda = (Lambda*)Allocator::Allocate(sizeof(Lambda) + sizeof(T)); _lambda->Refs = 1; - _lambda->Dtor = [](void* callee) -> void + _lambda->Dtor = [](void* callee, Lambda* lambda) -> void { static_cast(callee)->~T(); + Allocator::Free(lambda); }; _function = [](void* callee, Params... params) -> ReturnType { @@ -201,6 +201,45 @@ public: new(_callee) T(lambda); } + /// + /// Binds a lambda with a custom memory allocator. + /// + /// The custom allocation tag. + /// The lambda. + template + void Bind(typename AllocationType::Tag tag, const T& lambda) + { + if (_lambda) + LambdaDtor(); + using AllocationData = typename AllocationType::template Data; + static_assert(AllocationType::HasSwap, "Function lambda binding supports only custom allocators with swap operation."); + + // Allocate lambda (using temp data) + AllocationData tempAlloc(tag); + tempAlloc.Allocate(sizeof(Lambda) + sizeof(AllocationData) + sizeof(T)); + + // Move temp data into the one allocated + AllocationData* dataAlloc = (AllocationData*)(tempAlloc.Get() + sizeof(Lambda)); + new(dataAlloc) AllocationData(); + dataAlloc->Swap(tempAlloc); + + // Initialize lambda + _lambda = (Lambda*)dataAlloc->Get(); + _lambda->Refs = 1; + _lambda->Dtor = [](void* callee, Lambda* lambda) -> void + { + static_cast(callee)->~T(); + AllocationData* dataAlloc = (AllocationData*)((byte*)lambda + sizeof(Lambda)); + dataAlloc->Free(); + }; + _function = [](void* callee, Params... params) -> ReturnType + { + return (*static_cast(callee))(Forward(params)...); + }; + _callee = (byte*)_lambda + sizeof(AllocationData) + sizeof(Lambda); + new(_callee) T(lambda); + } + /// /// Unbinds the function. /// From b1710c4d018d89cc8b310fa92234c69d8e74dcc9 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 5 Aug 2025 22:53:09 +0200 Subject: [PATCH 156/211] Add async particles drawing (GPU emitters are sync) --- Source/Engine/Particles/ParticleEffect.cpp | 1 + Source/Engine/Particles/Particles.cpp | 18 ++++++++++++++++-- Source/Engine/Renderer/RenderList.cpp | 16 ++++++++++++++++ Source/Engine/Renderer/RenderList.h | 16 ++++++++++++++++ Source/Engine/Renderer/Renderer.cpp | 4 ++++ 5 files changed, 53 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Particles/ParticleEffect.cpp b/Source/Engine/Particles/ParticleEffect.cpp index 3f1ac7055..4df492fe2 100644 --- a/Source/Engine/Particles/ParticleEffect.cpp +++ b/Source/Engine/Particles/ParticleEffect.cpp @@ -20,6 +20,7 @@ ParticleEffect::ParticleEffect(const SpawnParams& params) { _box = BoundingBox(_transform.Translation); BoundingSphere::FromBox(_box, _sphere); + _drawCategory = SceneRendering::SceneDrawAsync; } void ParticleEffectParameter::Init(ParticleEffect* effect, int32 emitterIndex, int32 paramIndex) diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index f98be5ec8..7378b9f7b 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -649,8 +649,22 @@ void CleanupGPUParticlesSorting() GPUParticlesSorting = nullptr; } -void DrawEmitterGPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCall& drawCall, DrawPass drawModes, StaticFlags staticFlags, ParticleEmitterInstance& emitterData, const RenderModulesIndices& renderModulesIndices, int8 sortOrder) +void DrawEmitterGPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCall& drawCall, DrawPass drawModes, StaticFlags staticFlags, const RenderModulesIndices& renderModulesIndices, int8 sortOrder) { + if (!IsInMainThread()) + { + // Clone draw call data the hard way + byte drawCallCopy[sizeof(DrawCall)]; + Platform::MemoryCopy(&drawCallCopy, &drawCall, sizeof(DrawCall)); + + // When rendering in async, delay GPU particles drawing to be in sync by moving drawing into delayed callback post scene drawing to use GPUContext safely + // Move drawing into delayed callback post scene drawing to use GPUContext safely + renderContext.List->AddDelayedDraw([buffer, drawCallCopy, drawModes, staticFlags, renderModulesIndices, sortOrder](RenderContext& renderContext) + { + DrawEmitterGPU(renderContext, buffer, *(DrawCall*)drawCallCopy, drawModes, staticFlags, renderModulesIndices, sortOrder); + }); + return; + } const auto context = GPUDevice::Instance->GetMainContext(); auto emitter = buffer->Emitter; @@ -1092,7 +1106,7 @@ void Particles::DrawParticles(RenderContext& renderContext, ParticleEffect* effe break; #if COMPILE_WITH_GPU_PARTICLES case ParticlesSimulationMode::GPU: - DrawEmitterGPU(renderContext, buffer, drawCall, drawModes, staticFlags, emitterData, renderModulesIndices, sortOrder); + DrawEmitterGPU(renderContext, buffer, drawCall, drawModes, staticFlags, renderModulesIndices, sortOrder); break; #endif } diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index dbb91680a..05f72f83f 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -255,6 +255,20 @@ void RenderList::AddSettingsBlend(IPostFxSettingsProvider* provider, float weigh Blendable.Add(blend); } +void RenderList::AddDelayedDraw(DelayedDraw&& func) +{ + MemPoolLocker.Lock(); // TODO: convert _delayedDraws into RenderListBuffer with usage of arena Memory for fast alloc + _delayedDraws.Add(MoveTemp(func)); + MemPoolLocker.Unlock(); +} + +void RenderList::DrainDelayedDraws(RenderContext& renderContext) +{ + for (DelayedDraw& e : _delayedDraws) + e(renderContext); + _delayedDraws.SetCapacity(0); +} + void RenderList::BlendSettings() { PROFILE_CPU(); @@ -459,6 +473,7 @@ RenderList::RenderList(const SpawnParams& params) , ObjectBuffer(0, PixelFormat::R32G32B32A32_Float, false, TEXT("Object Buffer")) , TempObjectBuffer(0, PixelFormat::R32G32B32A32_Float, false, TEXT("Object Buffer")) , _instanceBuffer(0, sizeof(ShaderObjectDrawInstanceData), TEXT("Instance Buffer"), GPUVertexLayout::Get({ { VertexElement::Types::Attribute0, 3, 0, 1, PixelFormat::R32_UInt } })) + , _delayedDraws(&Memory) { } @@ -490,6 +505,7 @@ void RenderList::Clear() PostFx.Clear(); Settings = PostProcessSettings(); Blendable.Clear(); + _delayedDraws.Clear(); _instanceBuffer.Clear(); ObjectBuffer.Clear(); TempObjectBuffer.Clear(); diff --git a/Source/Engine/Renderer/RenderList.h b/Source/Engine/Renderer/RenderList.h index 98f980e98..202afadc9 100644 --- a/Source/Engine/Renderer/RenderList.h +++ b/Source/Engine/Renderer/RenderList.h @@ -435,8 +435,24 @@ public: /// DynamicTypedBuffer TempObjectBuffer; + typedef Function DelayedDraw; + void AddDelayedDraw(DelayedDraw&& func); + void DrainDelayedDraws(RenderContext& renderContext); + + /// + /// Adds custom callback (eg. lambda) to invoke after scene draw calls are collected on a main thread (some async draw tasks might be active). Allows for safe usage of GPUContext for draw preparations or to perform GPU-driven drawing. + /// + template + FORCE_INLINE void AddDelayedDraw(const T& lambda) + { + DelayedDraw func; + func.Bind(&Memory, lambda); + AddDelayedDraw(MoveTemp(func)); + } + private: DynamicVertexBuffer _instanceBuffer; + Array _delayedDraws; public: /// diff --git a/Source/Engine/Renderer/Renderer.cpp b/Source/Engine/Renderer/Renderer.cpp index 7c7a53a09..77bfa5305 100644 --- a/Source/Engine/Renderer/Renderer.cpp +++ b/Source/Engine/Renderer/Renderer.cpp @@ -458,6 +458,10 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont JobSystem::Wait(label); renderContextBatch.WaitLabels.Clear(); + // Perform custom post-scene drawing (eg. GPU dispatches used by VFX) + for (RenderContext& e : renderContextBatch.Contexts) + e.List->DrainDelayedDraws(e); + #if USE_EDITOR GBufferPass::Instance()->OverrideDrawCalls(renderContext); #endif From a5838f739d936bd813812f1a46751eb674f8061c Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 5 Aug 2025 23:01:07 +0200 Subject: [PATCH 157/211] Optimize GPU particles indirect args building code --- Source/Engine/Particles/Particles.cpp | 93 ++++++--------------------- 1 file changed, 21 insertions(+), 72 deletions(-) diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index 7378b9f7b..60f0e6978 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -783,8 +783,8 @@ void DrawEmitterGPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa int32 drawCalls = 0; for (int32 index = 0; index < renderModulesIndices.Count(); index++) { - int32 moduleIndex = renderModulesIndices[index]; - auto module = emitter->Graph.RenderModules[moduleIndex]; + int32 moduleIndex = renderModulesIndices.Get()[index]; + auto module = emitter->Graph.RenderModules.Get()[moduleIndex]; switch (module->TypeID) { // Sprite Rendering @@ -832,76 +832,15 @@ void DrawEmitterGPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa // Ensure to have enough space for indirect draw arguments const uint32 minSize = drawCalls * sizeof(GPUDrawIndexedIndirectArgs); if (buffer->GPU.IndirectDrawArgsBuffer->GetSize() < minSize) - { buffer->GPU.IndirectDrawArgsBuffer->Init(GPUBufferDescription::Argument(minSize)); - } - // Initialize indirect draw arguments contents (do it before drawing to reduce memory barriers amount when updating arguments buffer) + // Execute all rendering modules using indirect draw arguments int32 indirectDrawCallIndex = 0; for (int32 index = 0; index < renderModulesIndices.Count(); index++) { - int32 moduleIndex = renderModulesIndices[index]; - auto module = emitter->Graph.RenderModules[moduleIndex]; - switch (module->TypeID) - { - // Sprite Rendering - case 400: - { - GPUDrawIndexedIndirectArgs indirectArgsBufferInitData{ SpriteParticleRenderer::IndexCount, 1, 0, 0, 0 }; - const uint32 offset = indirectDrawCallIndex * sizeof(GPUDrawIndexedIndirectArgs); - context->UpdateBuffer(buffer->GPU.IndirectDrawArgsBuffer, &indirectArgsBufferInitData, sizeof(indirectArgsBufferInitData), offset); - const uint32 counterOffset = buffer->GPU.ParticleCounterOffset; - context->CopyBuffer(buffer->GPU.IndirectDrawArgsBuffer, buffer->GPU.Buffer, 4, offset + 4, counterOffset); - indirectDrawCallIndex++; - break; - } - // Model Rendering - case 403: - { - const auto model = (Model*)module->Assets[0].Get(); - - // TODO: model LOD picking for particles? - int32 lodIndex = 0; - ModelLOD& lod = model->LODs[lodIndex]; - for (int32 meshIndex = 0; meshIndex < lod.Meshes.Count(); meshIndex++) - { - Mesh& mesh = lod.Meshes[meshIndex]; - if (!mesh.IsInitialized()) - continue; - - GPUDrawIndexedIndirectArgs indirectArgsBufferInitData = { (uint32)mesh.GetTriangleCount() * 3, 1, 0, 0, 0 }; - const uint32 offset = indirectDrawCallIndex * sizeof(GPUDrawIndexedIndirectArgs); - context->UpdateBuffer(buffer->GPU.IndirectDrawArgsBuffer, &indirectArgsBufferInitData, sizeof(indirectArgsBufferInitData), offset); - const uint32 counterOffset = buffer->GPU.ParticleCounterOffset; - context->CopyBuffer(buffer->GPU.IndirectDrawArgsBuffer, buffer->GPU.Buffer, 4, offset + 4, counterOffset); - indirectDrawCallIndex++; - } - - break; - } - // Ribbon Rendering - case 404: - { - // Not supported - break; - } - // Volumetric Fog Rendering - case 405: - { - // Not supported - break; - } - } - } - - // Execute all rendering modules - indirectDrawCallIndex = 0; - for (int32 index = 0; index < renderModulesIndices.Count(); index++) - { - int32 moduleIndex = renderModulesIndices[index]; - auto module = emitter->Graph.RenderModules[moduleIndex]; + int32 moduleIndex = renderModulesIndices.Get()[index]; + auto module = emitter->Graph.RenderModules.Get()[moduleIndex]; drawCall.Particle.Module = module; - switch (module->TypeID) { // Sprite Rendering @@ -909,19 +848,24 @@ void DrawEmitterGPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa { const auto material = (MaterialBase*)module->Assets[0].Get(); const auto moduleDrawModes = module->Values.Count() > 3 ? (DrawPass)module->Values[3].AsInt : DrawPass::Default; - drawCall.Draw.IndirectArgsOffset = indirectDrawCallIndex * sizeof(GPUDrawIndexedIndirectArgs); - indirectDrawCallIndex++; auto dp = drawModes & moduleDrawModes & material->GetDrawModes(); if (dp == DrawPass::None || SpriteRenderer.Init()) break; drawCall.Material = material; + // Initialize indirect draw arguments + GPUDrawIndexedIndirectArgs args { SpriteParticleRenderer::IndexCount, 1, 0, 0, 0 }; + const uint32 argsOffset = indirectDrawCallIndex * sizeof(GPUDrawIndexedIndirectArgs); + context->UpdateBuffer(buffer->GPU.IndirectDrawArgsBuffer, &args, sizeof(args), argsOffset); + context->CopyBuffer(buffer->GPU.IndirectDrawArgsBuffer, buffer->GPU.Buffer, 4, argsOffset + 4, buffer->GPU.ParticleCounterOffset); + // Submit draw call SpriteRenderer.SetupDrawCall(drawCall); drawCall.InstanceCount = 0; drawCall.Draw.IndirectArgsBuffer = buffer->GPU.IndirectDrawArgsBuffer; - if (dp != DrawPass::None) - renderContext.List->AddDrawCall(renderContext, dp, staticFlags, drawCall, false, sortOrder); + drawCall.Draw.IndirectArgsOffset = indirectDrawCallIndex * sizeof(GPUDrawIndexedIndirectArgs); + renderContext.List->AddDrawCall(renderContext, dp, staticFlags, drawCall, false, sortOrder); + indirectDrawCallIndex++; break; } // Model Rendering @@ -943,13 +887,18 @@ void DrawEmitterGPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa continue; // TODO: include mesh entry transformation, visibility and shadows mode? + // Initialize indirect draw arguments + GPUDrawIndexedIndirectArgs args = { (uint32)mesh.GetTriangleCount() * 3, 1, 0, 0, 0 }; + const uint32 argsOffset = indirectDrawCallIndex * sizeof(GPUDrawIndexedIndirectArgs); + context->UpdateBuffer(buffer->GPU.IndirectDrawArgsBuffer, &args, sizeof(args), argsOffset); + context->CopyBuffer(buffer->GPU.IndirectDrawArgsBuffer, buffer->GPU.Buffer, 4, argsOffset + 4, buffer->GPU.ParticleCounterOffset); + // Execute draw call mesh.GetDrawCallGeometry(drawCall); drawCall.InstanceCount = 0; drawCall.Draw.IndirectArgsBuffer = buffer->GPU.IndirectDrawArgsBuffer; drawCall.Draw.IndirectArgsOffset = indirectDrawCallIndex * sizeof(GPUDrawIndexedIndirectArgs); - if (dp != DrawPass::None) - renderContext.List->AddDrawCall(renderContext, dp, staticFlags, drawCall, false, sortOrder); + renderContext.List->AddDrawCall(renderContext, dp, staticFlags, drawCall, false, sortOrder); indirectDrawCallIndex++; } break; From cf9c20385560f82275ff468d34edb5babbfc35cd Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 6 Aug 2025 18:48:18 +0200 Subject: [PATCH 158/211] Various optimizations --- Content/Shaders/BitonicSort.flax | 4 ++-- Source/Engine/Core/Types/Variant.cpp | 2 ++ Source/Engine/Graphics/GPUBufferDescription.h | 5 +++-- Source/Engine/Graphics/GPUContext.cpp | 4 +++- .../GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp | 3 ++- .../Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp | 2 +- Source/Engine/Particles/Graph/GPU/GPUParticles.cpp | 3 +++ Source/Engine/Particles/Particles.cpp | 1 - Source/Engine/Renderer/RenderList.cpp | 1 + Source/Engine/Renderer/Utils/BitonicSort.cpp | 10 ++++------ Source/Shaders/BitonicSort.shader | 1 + Source/Shaders/GPUParticlesSorting.shader | 2 -- 12 files changed, 22 insertions(+), 16 deletions(-) diff --git a/Content/Shaders/BitonicSort.flax b/Content/Shaders/BitonicSort.flax index ee7db3c74..1c01ad7bc 100644 --- a/Content/Shaders/BitonicSort.flax +++ b/Content/Shaders/BitonicSort.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:924884da1dfef7a802b7190fd148eebbeece50d6fa4d69295c38238dd96331e6 -size 6538 +oid sha256:4a7cb98a1cbfe00c7d8d9dabe713f213537eaf13d4061243c32ca29ba06f3403 +size 6546 diff --git a/Source/Engine/Core/Types/Variant.cpp b/Source/Engine/Core/Types/Variant.cpp index 0ce2d8387..4ab8552d3 100644 --- a/Source/Engine/Core/Types/Variant.cpp +++ b/Source/Engine/Core/Types/Variant.cpp @@ -632,6 +632,7 @@ Variant::Variant(ScriptingObject* v) AsObject = v; if (v) { + // TODO: optimize VariantType to support statically linked typename of ScriptingType (via 1 bit flag within Types enum, only in game as editor might hot-reload types) Type.SetTypeName(v->GetType().Fullname); v->Deleted.Bind(this); } @@ -643,6 +644,7 @@ Variant::Variant(Asset* v) AsAsset = v; if (v) { + // TODO: optimize VariantType to support statically linked typename of ScriptingType (via 1 bit flag within Types enum, only in game as editor might hot-reload types) Type.SetTypeName(v->GetType().Fullname); v->AddReference(); v->OnUnloaded.Bind(this); diff --git a/Source/Engine/Graphics/GPUBufferDescription.h b/Source/Engine/Graphics/GPUBufferDescription.h index f0f192954..6303ae089 100644 --- a/Source/Engine/Graphics/GPUBufferDescription.h +++ b/Source/Engine/Graphics/GPUBufferDescription.h @@ -334,11 +334,12 @@ public: /// Creates argument buffer description. /// /// The size (in bytes). + /// The additional bindings (for example, to use as UAV, pass ). /// The usage. /// The buffer description. - static GPUBufferDescription Argument(int32 size, GPUResourceUsage usage = GPUResourceUsage::Default) + static GPUBufferDescription Argument(int32 size, GPUResourceUsage usage = GPUResourceUsage::Default, GPUBufferFlags additionalFlags = GPUBufferFlags::None) { - return Buffer(size, GPUBufferFlags::Argument, PixelFormat::Unknown, nullptr, 0, usage); + return Buffer(size, GPUBufferFlags::Argument | additionalFlags, PixelFormat::R32_UInt, nullptr, sizeof(uint32), usage); } /// diff --git a/Source/Engine/Graphics/GPUContext.cpp b/Source/Engine/Graphics/GPUContext.cpp index fdeca122b..55f87ba3b 100644 --- a/Source/Engine/Graphics/GPUContext.cpp +++ b/Source/Engine/Graphics/GPUContext.cpp @@ -20,6 +20,7 @@ void GPUContext::LogInvalidResourceUsage(int32 slot, const GPUResourceView* view GPUResource* resource = view ? view->GetParent() : nullptr; const Char* resourceType = TEXT("resource"); const Char* flagType = TEXT("flags"); + StringView resourceName; if (resource) { switch (resource->GetResourceType()) @@ -36,6 +37,7 @@ void GPUContext::LogInvalidResourceUsage(int32 slot, const GPUResourceView* view flagType = TEXT("GPUBufferFlags"); break; } + resourceName = resource->GetName(); } const Char* usage = TEXT("-"); switch (bindPoint) @@ -53,7 +55,7 @@ void GPUContext::LogInvalidResourceUsage(int32 slot, const GPUResourceView* view usage = TEXT("render target"); break; } - LOG(Error, "Incorrect {} bind at slot {} as {} (ensure to setup correct {} when creating that resource)", resourceType, slot, usage, flagType); + LOG(Error, "Incorrect {} '{}' bind at slot {} as {} (ensure to setup correct {} when creating that resource)", resourceType, resourceName, slot, usage, flagType); } #endif diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp index 40e081175..20a26f2f5 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp @@ -359,7 +359,8 @@ bool GPUDeviceDX12::Init() // Debug Layer #if GPU_ENABLE_DIAGNOSTICS ComPtr infoQueue; - VALIDATE_DIRECTX_CALL(_device->QueryInterface(IID_PPV_ARGS(&infoQueue))); + HRESULT result = _device->QueryInterface(IID_PPV_ARGS(&infoQueue)); + LOG_DIRECTX_RESULT(result); if (infoQueue) { D3D12_INFO_QUEUE_FILTER filter; diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp index 1a8739acc..c36d1acee 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp @@ -1358,7 +1358,7 @@ void GPUContextVulkan::UpdateBuffer(GPUBuffer* buffer, const void* data, uint32 // Use direct update for small buffers const uint32 alignedSize = Math::AlignUp(size, 4); - if (size <= 16 * 1024 && alignedSize <= buffer->GetSize()) + if (size <= 4 * 1024 && alignedSize <= buffer->GetSize()) { //AddBufferBarrier(bufferVulkan, VK_ACCESS_TRANSFER_WRITE_BIT); //FlushBarriers(); diff --git a/Source/Engine/Particles/Graph/GPU/GPUParticles.cpp b/Source/Engine/Particles/Graph/GPU/GPUParticles.cpp index 2c570a741..94886136f 100644 --- a/Source/Engine/Particles/Graph/GPU/GPUParticles.cpp +++ b/Source/Engine/Particles/Graph/GPU/GPUParticles.cpp @@ -12,6 +12,7 @@ #include "Engine/Graphics/GPUContext.h" #include "Engine/Graphics/Shaders/GPUShader.h" #include "Engine/Graphics/Shaders/GPUConstantBuffer.h" +#include "Engine/Profiler/Profiler.h" GPU_CB_STRUCT(GPUParticlesData { Matrix ViewProjectionMatrix; @@ -131,6 +132,8 @@ void GPUParticles::CopyParticlesCount(GPUContext* context, ParticleEmitter* emit void GPUParticles::Execute(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, int32 emitterIndex, ParticleEmitterInstance& data) { + PROFILE_CPU_ASSET(emitter); + PROFILE_GPU("GPUParticles"); ASSERT(emitter->Graph.Version == data.Version); ASSERT(emitter->Graph.Version == data.Buffer->Version); uint32 counterDefaultValue = 0; diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index 60f0e6978..71b0f2c30 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -770,7 +770,6 @@ void DrawEmitterGPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa context->BindCB(0, GPUParticlesSortingCB); context->BindSR(0, buffer->GPU.Buffer->View()); context->BindUA(0, buffer->GPU.SortingKeysBuffer->View()); - // TODO: optimize it by using DispatchIndirect with shared invoke args generated after particles update const int32 threadGroupSize = 1024; context->Dispatch(GPUParticlesSortingCS[permutationIndex], Math::DivideAndRoundUp(buffer->GPU.ParticlesCountMax, threadGroupSize), 1, 1); diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index 05f72f83f..fa0eb8d61 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -264,6 +264,7 @@ void RenderList::AddDelayedDraw(DelayedDraw&& func) void RenderList::DrainDelayedDraws(RenderContext& renderContext) { + PROFILE_GPU_CPU_NAMED("DelayedDraws"); for (DelayedDraw& e : _delayedDraws) e(renderContext); _delayedDraws.SetCapacity(0); diff --git a/Source/Engine/Renderer/Utils/BitonicSort.cpp b/Source/Engine/Renderer/Utils/BitonicSort.cpp index be5f38be4..0834588ba 100644 --- a/Source/Engine/Renderer/Utils/BitonicSort.cpp +++ b/Source/Engine/Renderer/Utils/BitonicSort.cpp @@ -6,8 +6,6 @@ #include "Engine/Graphics/GPUContext.h" #include "Engine/Graphics/GPULimits.h" -#define INDIRECT_ARGS_STRIDE 12 - // The sorting keys buffer item structure template. Matches the shader type. struct Item { @@ -39,7 +37,7 @@ bool BitonicSort::Init() // Create indirect dispatch arguments buffer _dispatchArgsBuffer = GPUDevice::Instance->CreateBuffer(TEXT("BitonicSortDispatchArgs")); - if (_dispatchArgsBuffer->Init(GPUBufferDescription::Raw(22 * 23 / 2 * INDIRECT_ARGS_STRIDE, GPUBufferFlags::Argument | GPUBufferFlags::UnorderedAccess))) + if (_dispatchArgsBuffer->Init(GPUBufferDescription::Raw(22 * 23 / 2 * sizeof(GPUDispatchIndirectArgs), GPUBufferFlags::Argument | GPUBufferFlags::UnorderedAccess))) return true; // Load asset @@ -122,7 +120,7 @@ void BitonicSort::Sort(GPUContext* context, GPUBuffer* sortingKeysBuffer, GPUBuf // We have already pre-sorted up through k = 2048 when first writing our list, so we continue sorting with k = 4096 // For really large values of k, these indirect dispatches will be skipped over with thread counts of 0 - uint32 indirectArgsOffset = INDIRECT_ARGS_STRIDE; + uint32 indirectArgsOffset = sizeof(GPUDispatchIndirectArgs); for (uint32 k = 4096; k <= alignedMaxNumElements; k *= 2) { for (uint32 j = k / 2; j >= 2048; j /= 2) @@ -133,11 +131,11 @@ void BitonicSort::Sort(GPUContext* context, GPUBuffer* sortingKeysBuffer, GPUBuf context->BindCB(0, _cb); context->DispatchIndirect(_outerSortCS, _dispatchArgsBuffer, indirectArgsOffset); - indirectArgsOffset += INDIRECT_ARGS_STRIDE; + indirectArgsOffset += sizeof(GPUDispatchIndirectArgs); } context->DispatchIndirect(_innerSortCS, _dispatchArgsBuffer, indirectArgsOffset); - indirectArgsOffset += INDIRECT_ARGS_STRIDE; + indirectArgsOffset += sizeof(GPUDispatchIndirectArgs); } context->ResetUA(); diff --git a/Source/Shaders/BitonicSort.shader b/Source/Shaders/BitonicSort.shader index c4a275862..a2f7d215b 100644 --- a/Source/Shaders/BitonicSort.shader +++ b/Source/Shaders/BitonicSort.shader @@ -68,6 +68,7 @@ void CS_IndirectArgs(uint groupIndex : SV_GroupIndex) uint offset = 12 * prevDispatches; // Generate outer sort dispatch arguments + UNROLL for (uint j = k / 2; j > 1024; j /= 2) { // All of the groups of size 2j that are full diff --git a/Source/Shaders/GPUParticlesSorting.shader b/Source/Shaders/GPUParticlesSorting.shader index b0e0063ec..395172327 100644 --- a/Source/Shaders/GPUParticlesSorting.shader +++ b/Source/Shaders/GPUParticlesSorting.shader @@ -51,8 +51,6 @@ void CS_Sort(uint3 dispatchThreadId : SV_DispatchThreadID) if (index >= particlesCount) return; - // TODO: maybe process more than 1 particle at once and pre-sort them? - #if SORT_MODE == 0 // Sort particles by depth to the view's near plane From 959371a9951f62169a5b3dbfcb7e3b83da4904e3 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 6 Aug 2025 23:39:46 +0200 Subject: [PATCH 159/211] Fix particles regression on DirectX --- Content/Shaders/BitonicSort.flax | 4 ++-- Content/Shaders/GPUParticlesSorting.flax | 4 ++-- Source/Engine/Renderer/RenderList.cpp | 2 ++ Source/Shaders/BitonicSort.shader | 1 - 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/Content/Shaders/BitonicSort.flax b/Content/Shaders/BitonicSort.flax index 1c01ad7bc..ee7db3c74 100644 --- a/Content/Shaders/BitonicSort.flax +++ b/Content/Shaders/BitonicSort.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4a7cb98a1cbfe00c7d8d9dabe713f213537eaf13d4061243c32ca29ba06f3403 -size 6546 +oid sha256:924884da1dfef7a802b7190fd148eebbeece50d6fa4d69295c38238dd96331e6 +size 6538 diff --git a/Content/Shaders/GPUParticlesSorting.flax b/Content/Shaders/GPUParticlesSorting.flax index a9806913e..2045fd649 100644 --- a/Content/Shaders/GPUParticlesSorting.flax +++ b/Content/Shaders/GPUParticlesSorting.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:70db76daf1adae225c5354926b84ab738b0dfba4a66233e291223e81685900c8 -size 2629 +oid sha256:a16a973f4be075f8531a1b1551e33423b014da1e8b348f2672464ee21692e57a +size 2556 diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index fa0eb8d61..99bb1f0d8 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -264,6 +264,8 @@ void RenderList::AddDelayedDraw(DelayedDraw&& func) void RenderList::DrainDelayedDraws(RenderContext& renderContext) { + if (_delayedDraws.IsEmpty()) + return; PROFILE_GPU_CPU_NAMED("DelayedDraws"); for (DelayedDraw& e : _delayedDraws) e(renderContext); diff --git a/Source/Shaders/BitonicSort.shader b/Source/Shaders/BitonicSort.shader index a2f7d215b..c4a275862 100644 --- a/Source/Shaders/BitonicSort.shader +++ b/Source/Shaders/BitonicSort.shader @@ -68,7 +68,6 @@ void CS_IndirectArgs(uint groupIndex : SV_GroupIndex) uint offset = 12 * prevDispatches; // Generate outer sort dispatch arguments - UNROLL for (uint j = k / 2; j > 1024; j /= 2) { // All of the groups of size 2j that are full From 3ffb067e55a3296a66f22f96854f014acc268338 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 7 Aug 2025 09:27:28 +0200 Subject: [PATCH 160/211] Optimize Particles drawing to use a whole `RenderContextBatch` --- .../Particles/Graph/GPU/GPUParticles.cpp | 1 - Source/Engine/Particles/ParticleEffect.cpp | 17 +- Source/Engine/Particles/ParticleEffect.h | 1 + Source/Engine/Particles/Particles.cpp | 155 ++++++++++-------- Source/Engine/Particles/Particles.h | 6 +- 5 files changed, 110 insertions(+), 70 deletions(-) diff --git a/Source/Engine/Particles/Graph/GPU/GPUParticles.cpp b/Source/Engine/Particles/Graph/GPU/GPUParticles.cpp index 94886136f..e2dc78d59 100644 --- a/Source/Engine/Particles/Graph/GPU/GPUParticles.cpp +++ b/Source/Engine/Particles/Graph/GPU/GPUParticles.cpp @@ -133,7 +133,6 @@ void GPUParticles::CopyParticlesCount(GPUContext* context, ParticleEmitter* emit void GPUParticles::Execute(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, int32 emitterIndex, ParticleEmitterInstance& data) { PROFILE_CPU_ASSET(emitter); - PROFILE_GPU("GPUParticles"); ASSERT(emitter->Graph.Version == data.Version); ASSERT(emitter->Graph.Version == data.Buffer->Version); uint32 counterDefaultValue = 0; diff --git a/Source/Engine/Particles/ParticleEffect.cpp b/Source/Engine/Particles/ParticleEffect.cpp index 4df492fe2..183438ba2 100644 --- a/Source/Engine/Particles/ParticleEffect.cpp +++ b/Source/Engine/Particles/ParticleEffect.cpp @@ -576,8 +576,21 @@ void ParticleEffect::Draw(RenderContext& renderContext) { if (renderContext.View.Pass == DrawPass::GlobalSDF || renderContext.View.Pass == DrawPass::GlobalSurfaceAtlas) return; - _lastMinDstSqr = Math::Min(_lastMinDstSqr, Vector3::DistanceSquared(GetPosition(), renderContext.View.Position)); - Particles::DrawParticles(renderContext, this); + _lastMinDstSqr = Math::Min(_lastMinDstSqr, Vector3::DistanceSquared(GetPosition(), renderContext.View.WorldPosition)); + RenderContextBatch renderContextBatch(renderContext); + Particles::DrawParticles(renderContextBatch, this); +} + +void ParticleEffect::Draw(RenderContextBatch& renderContextBatch) +{ + Particles::DrawParticles(renderContextBatch, this); + + // Cull again against the main context (if using multiple ones) to skip caching draw distance from shadow projections + const RenderView& mainView = renderContextBatch.GetMainContext().View; + const BoundingSphere bounds(_sphere.Center - mainView.Origin, _sphere.Radius); + if (renderContextBatch.Contexts.Count() > 1 && !mainView.CullingFrustum.Intersects(bounds)) + return; + _lastMinDstSqr = Math::Min(_lastMinDstSqr, Vector3::DistanceSquared(bounds.Center, mainView.Position)); } #if USE_EDITOR diff --git a/Source/Engine/Particles/ParticleEffect.h b/Source/Engine/Particles/ParticleEffect.h index 2964012f6..4ec06e82b 100644 --- a/Source/Engine/Particles/ParticleEffect.h +++ b/Source/Engine/Particles/ParticleEffect.h @@ -404,6 +404,7 @@ public: // [Actor] bool HasContentLoaded() const override; void Draw(RenderContext& renderContext) override; + void Draw(RenderContextBatch& renderContextBatch) override; #if USE_EDITOR void OnDebugDrawSelected() override; void OnDebugDraw() override; diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index 71b0f2c30..d20f3cb55 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -178,9 +178,7 @@ void Particles::OnEffectDestroy(ParticleEffect* effect) #endif } -typedef Array> RenderModulesIndices; - -void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCall& drawCall, DrawPass drawModes, StaticFlags staticFlags, ParticleEmitterInstance& emitterData, const RenderModulesIndices& renderModulesIndices, int8 sortOrder) +void DrawEmitterCPU(RenderContextBatch& renderContextBatch, ParticleBuffer* buffer, DrawCall& drawCall, DrawPass drawModes, StaticFlags staticFlags, const BoundingSphere& bounds, uint32 renderModulesIndices, int8 sortOrder) { // Skip if CPU buffer is empty if (buffer->CPU.Count == 0) @@ -189,7 +187,7 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa auto emitter = buffer->Emitter; // Check if need to perform any particles sorting - if (emitter->Graph.SortModules.HasItems() && renderContext.View.Pass != DrawPass::Depth && (buffer->CPU.Count != 0 || buffer->GPU.SortedIndices)) + if (emitter->Graph.SortModules.HasItems() && EnumHasAnyFlags(drawModes, DrawPass::Forward) && (buffer->CPU.Count != 0 || buffer->GPU.SortedIndices)) { // Prepare sorting data if (!buffer->GPU.SortedIndices) @@ -210,10 +208,11 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa if (listSize < 500) { // Use fast stack allocator from RenderList - sortingKeys[0] = renderContext.List->Memory.Allocate(listSize); - sortingKeys[1] = renderContext.List->Memory.Allocate(listSize); - sortingIndices[0] = renderContext.List->Memory.Allocate(listSize); - sortingIndices[1] = renderContext.List->Memory.Allocate(listSize); + auto& memory = renderContextBatch.GetMainContext().List->Memory; + sortingKeys[0] = memory.Allocate(listSize); + sortingKeys[1] = memory.Allocate(listSize); + sortingIndices[0] = memory.Allocate(listSize); + sortingIndices[1] = memory.Allocate(listSize); } else { @@ -236,7 +235,7 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa const int32 positionOffset = emitter->Graph.GetPositionAttributeOffset(); if (positionOffset == -1) break; - const Matrix viewProjection = renderContext.View.ViewProjection(); + const Matrix viewProjection = renderContextBatch.GetMainContext().View.ViewProjection(); const byte* positionPtr = buffer->CPU.Buffer.Get() + positionOffset; if (emitter->SimulationSpace == ParticlesSimulationSpace::Local) { @@ -262,7 +261,7 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa const int32 positionOffset = emitter->Graph.GetPositionAttributeOffset(); if (positionOffset == -1) break; - const Float3 viewPosition = renderContext.View.Position; + const Float3 viewPosition = renderContextBatch.GetMainContext().View.Position; const byte* positionPtr = buffer->CPU.Buffer.Get() + positionOffset; if (emitter->SimulationSpace == ParticlesSimulationSpace::Local) { @@ -356,9 +355,10 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa auto& vertexBuffer = buffer->GPU.RibbonVertexBufferDynamic->Data; // Setup all ribbon modules - for (int32 index = 0; index < renderModulesIndices.Count(); index++) + for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.RenderModules.Count(); moduleIndex++) { - const int32 moduleIndex = renderModulesIndices[index]; + if ((renderModulesIndices & (1u << moduleIndex)) == 0) + continue; auto module = emitter->Graph.RenderModules[moduleIndex]; if (module->TypeID != 404 || ribbonModuleIndex >= PARTICLE_EMITTER_MAX_RIBBONS) continue; @@ -454,7 +454,7 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa // Setup ribbon data ribbonModulesSegmentCount[ribbonModuleIndex] = segmentCount; - ribbonModulesDrawIndicesCount[index] = indices; + ribbonModulesDrawIndicesCount[ribbonModuleIndex] = indices; ribbonModulesDrawIndicesPos += indices; ribbonModuleIndex++; @@ -472,9 +472,10 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa // Execute all rendering modules ribbonModuleIndex = 0; - for (int32 index = 0; index < renderModulesIndices.Count(); index++) + for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.RenderModules.Count(); moduleIndex++) { - const int32 moduleIndex = renderModulesIndices[index]; + if ((renderModulesIndices & (1u << moduleIndex)) == 0) + continue; auto module = emitter->Graph.RenderModules[moduleIndex]; drawCall.Particle.Module = module; @@ -493,7 +494,7 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa // Submit draw call SpriteRenderer.SetupDrawCall(drawCall); drawCall.InstanceCount = buffer->CPU.Count; - renderContext.List->AddDrawCall(renderContext, dp, staticFlags, drawCall, false, sortOrder); + renderContextBatch.GetMainContext().List->AddDrawCall(renderContextBatch, dp, staticFlags, ShadowsCastingMode::DynamicOnly, bounds, drawCall, false, sortOrder); break; } @@ -521,7 +522,7 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa // Submit draw call mesh.GetDrawCallGeometry(drawCall); drawCall.InstanceCount = buffer->CPU.Count; - renderContext.List->AddDrawCall(renderContext, dp, staticFlags, drawCall, false, sortOrder); + renderContextBatch.GetMainContext().List->AddDrawCall(renderContextBatch, dp, staticFlags, ShadowsCastingMode::DynamicOnly, bounds, drawCall, false, sortOrder); } break; @@ -580,7 +581,7 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa drawCall.Draw.StartIndex = ribbonModulesDrawIndicesStart[ribbonModuleIndex]; drawCall.Draw.IndicesCount = ribbonModulesDrawIndicesCount[ribbonModuleIndex]; drawCall.InstanceCount = 1; - renderContext.List->AddDrawCall(renderContext, dp, staticFlags, drawCall, false, sortOrder); + renderContextBatch.GetMainContext().List->AddDrawCall(renderContextBatch, dp, staticFlags, ShadowsCastingMode::DynamicOnly, bounds, drawCall, false, sortOrder); ribbonModuleIndex++; @@ -610,7 +611,7 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa Float3::Transform(drawCall.Particle.VolumetricFog.Position, drawCall.World, drawCall.Particle.VolumetricFog.Position); drawCall.Particle.VolumetricFog.Radius = hasRadius ? radiusData[i] : 100.0f; drawCall.Particle.VolumetricFog.ParticleIndex = i; - renderContext.List->VolumetricFogParticles.Add(drawCall); + renderContextBatch.GetMainContext().List->VolumetricFogParticles.Add(drawCall); } break; } @@ -649,7 +650,7 @@ void CleanupGPUParticlesSorting() GPUParticlesSorting = nullptr; } -void DrawEmitterGPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCall& drawCall, DrawPass drawModes, StaticFlags staticFlags, const RenderModulesIndices& renderModulesIndices, int8 sortOrder) +void DrawEmitterGPU(RenderContextBatch& renderContextBatch, ParticleBuffer* buffer, DrawCall& drawCall, DrawPass drawModes, StaticFlags staticFlags, const BoundingSphere& bounds, uint32 renderModulesIndices, int8 sortOrder) { if (!IsInMainThread()) { @@ -659,9 +660,9 @@ void DrawEmitterGPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa // When rendering in async, delay GPU particles drawing to be in sync by moving drawing into delayed callback post scene drawing to use GPUContext safely // Move drawing into delayed callback post scene drawing to use GPUContext safely - renderContext.List->AddDelayedDraw([buffer, drawCallCopy, drawModes, staticFlags, renderModulesIndices, sortOrder](RenderContext& renderContext) + renderContextBatch.GetMainContext().List->AddDelayedDraw([&renderContextBatch, buffer, drawCallCopy, drawModes, staticFlags, bounds, renderModulesIndices, sortOrder](RenderContext& renderContext) { - DrawEmitterGPU(renderContext, buffer, *(DrawCall*)drawCallCopy, drawModes, staticFlags, renderModulesIndices, sortOrder); + DrawEmitterGPU(renderContextBatch, buffer, *(DrawCall*)drawCallCopy, drawModes, staticFlags, bounds, renderModulesIndices, sortOrder); }); return; } @@ -669,7 +670,7 @@ void DrawEmitterGPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa auto emitter = buffer->Emitter; // Check if need to perform any particles sorting - if (emitter->Graph.SortModules.HasItems() && renderContext.View.Pass != DrawPass::Depth && buffer->GPU.ParticlesCountMax != 0) + if (emitter->Graph.SortModules.HasItems() && renderContextBatch.GetMainContext().View.Pass != DrawPass::Depth && buffer->GPU.ParticlesCountMax != 0) { PROFILE_GPU_CPU_NAMED("Sort Particles"); @@ -720,7 +721,7 @@ void DrawEmitterGPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa permutationIndex = 0; sortAscending = false; data.PositionOffset = emitter->Graph.GetPositionAttributeOffset(); - const Matrix viewProjection = renderContext.View.ViewProjection(); + const Matrix viewProjection = renderContextBatch.GetMainContext().View.ViewProjection(); if (emitter->SimulationSpace == ParticlesSimulationSpace::Local) { Matrix matrix; @@ -738,7 +739,7 @@ void DrawEmitterGPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa permutationIndex = 1; sortAscending = false; data.PositionOffset = emitter->Graph.GetPositionAttributeOffset(); - data.ViewPosition = renderContext.View.Position; + data.ViewPosition = renderContextBatch.GetMainContext().View.Position; if (emitter->SimulationSpace == ParticlesSimulationSpace::Local) { Matrix::Transpose(drawCall.World, data.PositionTransform); @@ -780,9 +781,10 @@ void DrawEmitterGPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa // Count draw calls to perform during this emitter rendering int32 drawCalls = 0; - for (int32 index = 0; index < renderModulesIndices.Count(); index++) + for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.RenderModules.Count(); moduleIndex++) { - int32 moduleIndex = renderModulesIndices.Get()[index]; + if ((renderModulesIndices & (1u << moduleIndex)) == 0) + continue; auto module = emitter->Graph.RenderModules.Get()[moduleIndex]; switch (module->TypeID) { @@ -835,9 +837,10 @@ void DrawEmitterGPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa // Execute all rendering modules using indirect draw arguments int32 indirectDrawCallIndex = 0; - for (int32 index = 0; index < renderModulesIndices.Count(); index++) + for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.RenderModules.Count(); moduleIndex++) { - int32 moduleIndex = renderModulesIndices.Get()[index]; + if ((renderModulesIndices & (1u << moduleIndex)) == 0) + continue; auto module = emitter->Graph.RenderModules.Get()[moduleIndex]; drawCall.Particle.Module = module; switch (module->TypeID) @@ -863,7 +866,7 @@ void DrawEmitterGPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa drawCall.InstanceCount = 0; drawCall.Draw.IndirectArgsBuffer = buffer->GPU.IndirectDrawArgsBuffer; drawCall.Draw.IndirectArgsOffset = indirectDrawCallIndex * sizeof(GPUDrawIndexedIndirectArgs); - renderContext.List->AddDrawCall(renderContext, dp, staticFlags, drawCall, false, sortOrder); + renderContextBatch.GetMainContext().List->AddDrawCall(renderContextBatch, dp, staticFlags, ShadowsCastingMode::DynamicOnly, bounds, drawCall, false, sortOrder); indirectDrawCallIndex++; break; } @@ -897,7 +900,7 @@ void DrawEmitterGPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa drawCall.InstanceCount = 0; drawCall.Draw.IndirectArgsBuffer = buffer->GPU.IndirectDrawArgsBuffer; drawCall.Draw.IndirectArgsOffset = indirectDrawCallIndex * sizeof(GPUDrawIndexedIndirectArgs); - renderContext.List->AddDrawCall(renderContext, dp, staticFlags, drawCall, false, sortOrder); + renderContextBatch.GetMainContext().List->AddDrawCall(renderContextBatch, dp, staticFlags, ShadowsCastingMode::DynamicOnly, bounds, drawCall, false, sortOrder); indirectDrawCallIndex++; } break; @@ -920,43 +923,65 @@ void DrawEmitterGPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa #endif -void Particles::DrawParticles(RenderContext& renderContext, ParticleEffect* effect) +void Particles::DrawParticles(RenderContextBatch& renderContextBatch, ParticleEffect* effect) { - // Setup - auto& view = renderContext.View; - const DrawPass drawModes = view.Pass & effect->DrawModes; - if (drawModes == DrawPass::None) - return; + PROFILE_CPU(); PROFILE_MEM(Particles); + + // Drawing assumes that all views within a batch have the same Origin + const Vector3& viewOrigin = renderContextBatch.GetMainContext().View.Origin; + BoundingSphere bounds = effect->GetSphere(); + bounds.Center -= viewOrigin; + + // Cull particles against all views + uint64 viewsMask = 0; + ASSERT_LOW_LAYER(renderContextBatch.Contexts.Count() <= 64); + DrawPass viewsDrawModes = DrawPass::None; + for (int32 i = 0; i < renderContextBatch.Contexts.Count(); i++) + { + const RenderView& view = renderContextBatch.Contexts.Get()[i].View; + const bool visible = (view.Pass & effect->DrawModes) != DrawPass::None && (view.IsCullingDisabled || view.CullingFrustum.Intersects(bounds)); + if (visible) + viewsMask |= 1ull << (uint64)i; + viewsDrawModes |= view.Pass; + } + if (viewsMask == 0) + return; + viewsDrawModes &= effect->DrawModes; + + // Setup ConcurrentSystemLocker::ReadScope systemScope(SystemLocker); Matrix worlds[2]; - Matrix::Translation(-renderContext.View.Origin, worlds[0]); // World - renderContext.View.GetWorldMatrix(effect->GetTransform(), worlds[1]); // Local + Matrix::Translation(-viewOrigin, worlds[0]); // World + renderContextBatch.GetMainContext().View.GetWorldMatrix(effect->GetTransform(), worlds[1]); // Local float worldDeterminantSigns[2]; worldDeterminantSigns[0] = Math::FloatSelect(worlds[0].RotDeterminant(), 1, -1); worldDeterminantSigns[1] = Math::FloatSelect(worlds[1].RotDeterminant(), 1, -1); const StaticFlags staticFlags = effect->GetStaticFlags(); const int8 sortOrder = effect->SortOrder; - // Draw lights - for (int32 emitterIndex = 0; emitterIndex < effect->Instance.Emitters.Count(); emitterIndex++) + // Draw lights (only to into the main view) + if ((viewsMask & 1) == 1 && renderContextBatch.GetMainContext().View.Pass != DrawPass::Depth) { - auto& emitterData = effect->Instance.Emitters[emitterIndex]; - const auto buffer = emitterData.Buffer; - if (!buffer || (buffer->Mode == ParticlesSimulationMode::CPU && buffer->CPU.Count == 0)) - continue; - auto emitter = buffer->Emitter; - if (!emitter || !emitter->IsLoaded()) - continue; + for (int32 emitterIndex = 0; emitterIndex < effect->Instance.Emitters.Count(); emitterIndex++) + { + auto& emitterData = effect->Instance.Emitters[emitterIndex]; + const auto buffer = emitterData.Buffer; + if (!buffer || (buffer->Mode == ParticlesSimulationMode::CPU && buffer->CPU.Count == 0)) + continue; + auto emitter = buffer->Emitter; + if (!emitter || !emitter->IsLoaded()) + continue; - buffer->Emitter->GraphExecutorCPU.Draw(buffer->Emitter, effect, emitterData, renderContext, worlds[(int32)emitter->SimulationSpace]); + buffer->Emitter->GraphExecutorCPU.Draw(buffer->Emitter, effect, emitterData, renderContextBatch.GetMainContext(), worlds[(int32)emitter->SimulationSpace]); + } } // Setup a draw call common data DrawCall drawCall; drawCall.PerInstanceRandom = effect->GetPerInstanceRandom(); - drawCall.ObjectPosition = effect->GetSphere().Center - view.Origin; - drawCall.ObjectRadius = (float)effect->GetSphere().Radius; + drawCall.ObjectPosition = bounds.Center; + drawCall.ObjectRadius = (float)bounds.Radius; // Draw all emitters for (int32 emitterIndex = 0; emitterIndex < effect->Instance.Emitters.Count(); emitterIndex++) @@ -974,8 +999,8 @@ void Particles::DrawParticles(RenderContext& renderContext, ParticleEffect* effe drawCall.Particle.Particles = buffer; // Check if need to render any module - RenderModulesIndices renderModulesIndices; - for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.RenderModules.Count() && renderModulesIndices.Count() < PARTICLE_EMITTER_MAX_MODULES; moduleIndex++) + uint32 renderModulesIndices = 0; + for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.RenderModules.Count() && moduleIndex < 32; moduleIndex++) { auto module = emitter->Graph.RenderModules[moduleIndex]; @@ -989,10 +1014,10 @@ void Particles::DrawParticles(RenderContext& renderContext, ParticleEffect* effe if (!material || !material->IsReady() || !material->IsParticle() || - (view.Pass & material->GetDrawModes() & moduleDrawModes) == DrawPass::None + (viewsDrawModes & material->GetDrawModes() & moduleDrawModes) == DrawPass::None ) break; - renderModulesIndices.Add(moduleIndex); + renderModulesIndices |= 1u << moduleIndex; break; } // Model Rendering @@ -1008,10 +1033,10 @@ void Particles::DrawParticles(RenderContext& renderContext, ParticleEffect* effe if (!material || !material->IsReady() || !material->IsParticle() || - (view.Pass & material->GetDrawModes() & moduleDrawModes) == DrawPass::None + (viewsDrawModes & material->GetDrawModes() & moduleDrawModes) == DrawPass::None ) break; - renderModulesIndices.Add(moduleIndex); + renderModulesIndices |= 1u << moduleIndex; break; } // Ribbon Rendering @@ -1022,10 +1047,10 @@ void Particles::DrawParticles(RenderContext& renderContext, ParticleEffect* effe if (!material || !material->IsReady() || !material->IsParticle() || - (view.Pass & material->GetDrawModes() & moduleDrawModes) == DrawPass::None + (viewsDrawModes & material->GetDrawModes() & moduleDrawModes) == DrawPass::None ) break; - renderModulesIndices.Add(moduleIndex); + renderModulesIndices |= 1u << moduleIndex; break; } // Volumetric Fog Rendering @@ -1035,26 +1060,27 @@ void Particles::DrawParticles(RenderContext& renderContext, ParticleEffect* effe if (!material || !material->IsReady() || material->GetInfo().Domain != MaterialDomain::VolumeParticle || - (view.Flags & ViewFlags::Fog) == ViewFlags::None + (renderContextBatch.GetMainContext().View.Flags & ViewFlags::Fog) == ViewFlags::None || + (viewsMask & 1) == 0 ) break; - renderModulesIndices.Add(moduleIndex); + renderModulesIndices |= 1u << moduleIndex; break; } } } - if (renderModulesIndices.IsEmpty()) + if (renderModulesIndices == 0) continue; // Draw switch (buffer->Mode) { case ParticlesSimulationMode::CPU: - DrawEmitterCPU(renderContext, buffer, drawCall, drawModes, staticFlags, emitterData, renderModulesIndices, sortOrder); + DrawEmitterCPU(renderContextBatch, buffer, drawCall, viewsDrawModes, staticFlags, bounds, renderModulesIndices, sortOrder); break; #if COMPILE_WITH_GPU_PARTICLES case ParticlesSimulationMode::GPU: - DrawEmitterGPU(renderContext, buffer, drawCall, drawModes, staticFlags, renderModulesIndices, sortOrder); + DrawEmitterGPU(renderContextBatch, buffer, drawCall, viewsDrawModes, staticFlags, bounds, renderModulesIndices, sortOrder); break; #endif } @@ -1090,6 +1116,7 @@ void UpdateGPU(RenderTask* task, GPUContext* context) ScopeLock lock(GpuUpdateListLocker); if (GpuUpdateList.IsEmpty()) return; + PROFILE_CPU_NAMED("GPUParticles"); PROFILE_GPU("GPU Particles"); PROFILE_MEM(Particles); diff --git a/Source/Engine/Particles/Particles.h b/Source/Engine/Particles/Particles.h index 77f651cfa..38ec39d2d 100644 --- a/Source/Engine/Particles/Particles.h +++ b/Source/Engine/Particles/Particles.h @@ -6,7 +6,7 @@ #include "Engine/Threading/ConcurrentSystemLocker.h" class TaskGraphSystem; -struct RenderContext; +struct RenderContextBatch; struct RenderView; class ParticleEmitter; class ParticleSystemInstance; @@ -48,9 +48,9 @@ public: /// /// Draws the particles. /// - /// The rendering context. + /// The rendering context. /// The owning actor. - static void DrawParticles(RenderContext& renderContext, ParticleEffect* effect); + static void DrawParticles(RenderContextBatch& renderContextBatch, ParticleEffect* effect); #if USE_EDITOR /// From d4355e31d82cbc04979d615652a4a73329e9386a Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 7 Aug 2025 18:41:58 +0200 Subject: [PATCH 161/211] Optimize GPU particles drawing with batched indirect args setup before sorting --- Source/Engine/Particles/Particles.cpp | 613 ++++++++++++---------- Source/Engine/Particles/ParticlesData.cpp | 2 - Source/Engine/Particles/ParticlesData.h | 5 - Source/Engine/Renderer/RenderList.cpp | 1 - 4 files changed, 350 insertions(+), 271 deletions(-) diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index d20f3cb55..da2e612d9 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -635,6 +635,22 @@ AssetReference GPUParticlesSorting; GPUConstantBuffer* GPUParticlesSortingCB; GPUShaderProgramCS* GPUParticlesSortingCS[3]; +// GPU emitters drawing is batched for efficiency +struct GPUEmitterDraw +{ + ParticleBuffer* Buffer; + DrawCall DrawCall; + DrawPass DrawModes; + StaticFlags StaticFlags; + BoundingSphere Bounds; + uint32 RenderModulesIndices; + uint32 IndirectArgsSize; + int8 SortOrder; + bool Sorting; +}; +Array GPUEmitterDraws; +GPUBuffer* GPUIndirectArgsBuffer = nullptr; + #if COMPILE_WITH_DEV_ENV void OnShaderReloading(Asset* obj) @@ -648,277 +664,347 @@ void OnShaderReloading(Asset* obj) void CleanupGPUParticlesSorting() { GPUParticlesSorting = nullptr; + GPUEmitterDraws.Resize(0); + SAFE_DELETE_GPU_RESOURCE(GPUIndirectArgsBuffer); +} + +void DrawEmittersGPU(RenderContextBatch& renderContextBatch) +{ + PROFILE_GPU_CPU_NAMED("DrawEmittersGPU"); + ConcurrentSystemLocker::ReadScope systemScope(Particles::SystemLocker); + GPUContext* context = GPUDevice::Instance->GetMainContext(); + + // Count draws and sorting passes needed for resources allocation + uint32 indirectArgsSize = 0; + bool sorting = false; + for (const GPUEmitterDraw& draw : GPUEmitterDraws) + { + indirectArgsSize += draw.IndirectArgsSize; + sorting |= draw.Sorting; + } + + // Prepare pipeline + if (sorting && GPUParticlesSorting == nullptr) + { + // TODO: preload shader if platform supports GPU particles (eg. inside ParticleEmitter::load if it's GPU sim with any sort module) + GPUParticlesSorting = Content::LoadAsyncInternal(TEXT("Shaders/GPUParticlesSorting")); +#if COMPILE_WITH_DEV_ENV + if (GPUParticlesSorting) + GPUParticlesSorting.Get()->OnReloading.Bind(); +#endif + } + if (GPUParticlesSorting == nullptr || !GPUParticlesSorting->IsLoaded()) + { + // Skip sorting until shader is ready + sorting = false; + } + else if (!GPUParticlesSortingCB) + { + const auto shader = GPUParticlesSorting->GetShader(); + const StringAnsiView CS_Sort("CS_Sort"); + GPUParticlesSortingCS[0] = shader->GetCS(CS_Sort, 0); + GPUParticlesSortingCS[1] = shader->GetCS(CS_Sort, 1); + GPUParticlesSortingCS[2] = shader->GetCS(CS_Sort, 2); + GPUParticlesSortingCB = shader->GetCB(0); + ASSERT_LOW_LAYER(GPUParticlesSortingCB); + } + const uint32 indirectArgsCapacity = Math::RoundUpToPowerOf2(indirectArgsSize); + if (GPUIndirectArgsBuffer == nullptr) + GPUIndirectArgsBuffer = GPUDevice::Instance->CreateBuffer(TEXT("ParticleIndirectDrawArgsBuffer")); + if (GPUIndirectArgsBuffer->GetSize() < indirectArgsCapacity) + GPUIndirectArgsBuffer->Init(GPUBufferDescription::Argument(indirectArgsCapacity)); + + // Build indirect arguments + uint32 indirectArgsOffset = 0; + for (GPUEmitterDraw& draw : GPUEmitterDraws) + { + ParticleEmitter* emitter = draw.Buffer->Emitter; + for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.RenderModules.Count(); moduleIndex++) + { + if ((draw.RenderModulesIndices & (1u << moduleIndex)) == 0) + continue; + auto module = emitter->Graph.RenderModules.Get()[moduleIndex]; + draw.DrawCall.Particle.Module = module; + switch (module->TypeID) + { + // Sprite Rendering + case 400: + { + const auto material = (MaterialBase*)module->Assets[0].Get(); + const auto moduleDrawModes = module->Values.Count() > 3 ? (DrawPass)module->Values[3].AsInt : DrawPass::Default; + auto dp = draw.DrawModes & moduleDrawModes & material->GetDrawModes(); + if (dp == DrawPass::None || SpriteRenderer.Init()) + break; + + // Draw sprite for each particle + GPUDrawIndexedIndirectArgs args { SpriteParticleRenderer::IndexCount, 1, 0, 0, 0 }; + context->UpdateBuffer(GPUIndirectArgsBuffer, &args, sizeof(args), indirectArgsOffset); + context->CopyBuffer(GPUIndirectArgsBuffer, draw.Buffer->GPU.Buffer, 4, indirectArgsOffset + 4, draw.Buffer->GPU.ParticleCounterOffset); + indirectArgsOffset += sizeof(GPUDrawIndexedIndirectArgs); + break; + } + // Model Rendering + case 403: + { + const auto model = (Model*)module->Assets[0].Get(); + const auto material = (MaterialBase*)module->Assets[1].Get(); + const auto moduleDrawModes = module->Values.Count() > 4 ? (DrawPass)module->Values[4].AsInt : DrawPass::Default; + auto dp = draw.DrawModes & moduleDrawModes & material->GetDrawModes(); + if (dp == DrawPass::None) + break; + // TODO: model LOD picking for particles? + int32 lodIndex = 0; + ModelLOD& lod = model->LODs[lodIndex]; + for (int32 meshIndex = 0; meshIndex < lod.Meshes.Count(); meshIndex++) + { + Mesh& mesh = lod.Meshes[meshIndex]; + if (!mesh.IsInitialized()) + continue; + + // Draw mesh for each particle + GPUDrawIndexedIndirectArgs args { (uint32)mesh.GetTriangleCount() * 3, 1, 0, 0, 0 }; + context->UpdateBuffer(GPUIndirectArgsBuffer, &args, sizeof(args), indirectArgsOffset); + context->CopyBuffer(GPUIndirectArgsBuffer, draw.Buffer->GPU.Buffer, 4, indirectArgsOffset + 4, draw.Buffer->GPU.ParticleCounterOffset); + indirectArgsOffset += sizeof(GPUDrawIndexedIndirectArgs); + } + break; + } + // Ribbon Rendering + case 404: + { + // Not supported + break; + } + // Volumetric Fog Rendering + case 405: + { + // Not supported + break; + } + } + } + } + indirectArgsOffset = 0; + + // Sort particles + if (sorting) + { + PROFILE_GPU_CPU_NAMED("Sort Particles"); + for (const GPUEmitterDraw& draw : GPUEmitterDraws) + { + if (!draw.Sorting) + continue; + ASSERT(draw.Buffer->GPU.SortingKeysBuffer); + + // Execute all sorting modules + ParticleEmitter* emitter = draw.Buffer->Emitter; + for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.SortModules.Count(); moduleIndex++) + { + auto module = emitter->Graph.SortModules[moduleIndex]; + const auto sortMode = (ParticleSortMode)module->Values[2].AsInt; + + // Generate sorting keys based on sorting mode + GPUParticlesSortingData data; + data.ParticleCounterOffset = draw.Buffer->GPU.ParticleCounterOffset; + data.ParticleStride = draw.Buffer->Stride; + data.ParticleCapacity = draw.Buffer->Capacity; + int32 permutationIndex; + bool sortAscending; + switch (sortMode) + { + case ParticleSortMode::ViewDepth: + { + permutationIndex = 0; + sortAscending = false; + data.PositionOffset = emitter->Graph.GetPositionAttributeOffset(); + const Matrix viewProjection = renderContextBatch.GetMainContext().View.ViewProjection(); + if (emitter->SimulationSpace == ParticlesSimulationSpace::Local) + { + Matrix matrix; + Matrix::Multiply(draw.DrawCall.World, viewProjection, matrix); + Matrix::Transpose(matrix, data.PositionTransform); + } + else + { + Matrix::Transpose(viewProjection, data.PositionTransform); + } + break; + } + case ParticleSortMode::ViewDistance: + { + permutationIndex = 1; + sortAscending = false; + data.PositionOffset = emitter->Graph.GetPositionAttributeOffset(); + data.ViewPosition = renderContextBatch.GetMainContext().View.Position; + if (emitter->SimulationSpace == ParticlesSimulationSpace::Local) + { + Matrix::Transpose(draw.DrawCall.World, data.PositionTransform); + } + else + { + Matrix::Transpose(Matrix::Identity, data.PositionTransform); + } + break; + } + case ParticleSortMode::CustomAscending: + case ParticleSortMode::CustomDescending: + { + permutationIndex = 2; + sortAscending = sortMode == ParticleSortMode::CustomAscending; + int32 attributeIdx = module->Attributes[0]; + if (attributeIdx == -1) + break; + data.CustomOffset = emitter->Graph.Layout.Attributes[attributeIdx].Offset; + break; + } +#if !BUILD_RELEASE + default: + CRASH; + return; +#endif + } + context->UpdateCB(GPUParticlesSortingCB, &data); + context->BindCB(0, GPUParticlesSortingCB); + context->BindSR(0, draw.Buffer->GPU.Buffer->View()); + context->BindUA(0, draw.Buffer->GPU.SortingKeysBuffer->View()); + const int32 threadGroupSize = 1024; + context->Dispatch(GPUParticlesSortingCS[permutationIndex], Math::DivideAndRoundUp(draw.Buffer->GPU.ParticlesCountMax, threadGroupSize), 1, 1); + + // Perform sorting + BitonicSort::Instance()->Sort(context, draw.Buffer->GPU.SortingKeysBuffer, draw.Buffer->GPU.Buffer, data.ParticleCounterOffset, sortAscending, draw.Buffer->GPU.SortedIndices); + } + } + } + + // Submit draw calls + for (GPUEmitterDraw& draw : GPUEmitterDraws) + { + // Execute all rendering modules using indirect draw arguments + ParticleEmitter* emitter = draw.Buffer->Emitter; + for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.RenderModules.Count(); moduleIndex++) + { + if ((draw.RenderModulesIndices & (1u << moduleIndex)) == 0) + continue; + auto module = emitter->Graph.RenderModules.Get()[moduleIndex]; + draw.DrawCall.Particle.Module = module; + switch (module->TypeID) + { + // Sprite Rendering + case 400: + { + const auto material = (MaterialBase*)module->Assets[0].Get(); + const auto moduleDrawModes = module->Values.Count() > 3 ? (DrawPass)module->Values[3].AsInt : DrawPass::Default; + auto dp = draw.DrawModes & moduleDrawModes & material->GetDrawModes(); + if (dp == DrawPass::None || SpriteRenderer.Init()) + break; + draw.DrawCall.Material = material; + + // Submit draw call + SpriteRenderer.SetupDrawCall(draw.DrawCall); + draw.DrawCall.InstanceCount = 0; + draw.DrawCall.Draw.IndirectArgsBuffer = GPUIndirectArgsBuffer; + draw.DrawCall.Draw.IndirectArgsOffset = indirectArgsOffset; + renderContextBatch.GetMainContext().List->AddDrawCall(renderContextBatch, dp, draw.StaticFlags, ShadowsCastingMode::DynamicOnly, draw.Bounds, draw.DrawCall, false, draw.SortOrder); + indirectArgsOffset += sizeof(GPUDrawIndexedIndirectArgs); + break; + } + // Model Rendering + case 403: + { + const auto model = (Model*)module->Assets[0].Get(); + const auto material = (MaterialBase*)module->Assets[1].Get(); + const auto moduleDrawModes = module->Values.Count() > 4 ? (DrawPass)module->Values[4].AsInt : DrawPass::Default; + auto dp = draw.DrawModes & moduleDrawModes & material->GetDrawModes(); + if (dp == DrawPass::None) + break; + draw.DrawCall.Material = material; + + // TODO: model LOD picking for particles? + int32 lodIndex = 0; + ModelLOD& lod = model->LODs[lodIndex]; + for (int32 meshIndex = 0; meshIndex < lod.Meshes.Count(); meshIndex++) + { + Mesh& mesh = lod.Meshes[meshIndex]; + if (!mesh.IsInitialized()) + continue; + // TODO: include mesh entry transformation, visibility and shadows mode? + + // Execute draw call + mesh.GetDrawCallGeometry(draw.DrawCall); + draw.DrawCall.InstanceCount = 0; + draw.DrawCall.Draw.IndirectArgsBuffer = GPUIndirectArgsBuffer; + draw.DrawCall.Draw.IndirectArgsOffset = indirectArgsOffset; + renderContextBatch.GetMainContext().List->AddDrawCall(renderContextBatch, dp, draw.StaticFlags, ShadowsCastingMode::DynamicOnly, draw.Bounds, draw.DrawCall, false, draw.SortOrder); + indirectArgsOffset += sizeof(GPUDrawIndexedIndirectArgs); + } + break; + } + // Ribbon Rendering + case 404: + { + // Not supported + break; + } + // Volumetric Fog Rendering + case 405: + { + // Not supported + break; + } + } + } + } + + GPUEmitterDraws.Clear(); } void DrawEmitterGPU(RenderContextBatch& renderContextBatch, ParticleBuffer* buffer, DrawCall& drawCall, DrawPass drawModes, StaticFlags staticFlags, const BoundingSphere& bounds, uint32 renderModulesIndices, int8 sortOrder) { - if (!IsInMainThread()) + // Setup drawing data + uint32 indirectArgsSize = 0; + ParticleEmitter* emitter = buffer->Emitter; + for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.RenderModules.Count(); moduleIndex++) { - // Clone draw call data the hard way - byte drawCallCopy[sizeof(DrawCall)]; - Platform::MemoryCopy(&drawCallCopy, &drawCall, sizeof(DrawCall)); - - // When rendering in async, delay GPU particles drawing to be in sync by moving drawing into delayed callback post scene drawing to use GPUContext safely - // Move drawing into delayed callback post scene drawing to use GPUContext safely - renderContextBatch.GetMainContext().List->AddDelayedDraw([&renderContextBatch, buffer, drawCallCopy, drawModes, staticFlags, bounds, renderModulesIndices, sortOrder](RenderContext& renderContext) + if ((renderModulesIndices & (1u << moduleIndex)) == 0) + continue; + auto module = emitter->Graph.RenderModules.Get()[moduleIndex]; + switch (module->TypeID) { - DrawEmitterGPU(renderContextBatch, buffer, *(DrawCall*)drawCallCopy, drawModes, staticFlags, bounds, renderModulesIndices, sortOrder); + // Sprite Rendering + case 400: + indirectArgsSize += sizeof(GPUDrawIndexedIndirectArgs); + break; + // Model Rendering + case 403: + { + const auto model = (Model*)module->Assets[0].Get(); + // TODO: model LOD picking for particles? + int32 lodIndex = 0; + ModelLOD& lod = model->LODs[lodIndex]; + indirectArgsSize += sizeof(GPUDrawIndexedIndirectArgs) * lod.Meshes.Count(); + break; + } + } + } + if (indirectArgsSize == 0) + return; + bool sorting = buffer->Emitter->Graph.SortModules.HasItems() && renderContextBatch.GetMainContext().View.Pass != DrawPass::Depth && buffer->GPU.ParticlesCountMax != 0; + if (sorting && !buffer->GPU.SortedIndices) + buffer->AllocateSortBuffer(); + + // When rendering in async, delay GPU particles drawing to be in sync by moving drawing into delayed callback post scene drawing to use GPUContext safely + // Also, batch rendering all GPU emitters together for more efficient usage of GPU memory barriers and indirect arguments buffers allocation + RenderContext::GPULocker.Lock(); + if (GPUEmitterDraws.Count() == 0) + { + // The first emitter schedules the drawing of all batched draws + renderContextBatch.GetMainContext().List->AddDelayedDraw([&renderContextBatch](RenderContext& renderContext) + { + DrawEmittersGPU(renderContextBatch); }); - return; - } - const auto context = GPUDevice::Instance->GetMainContext(); - auto emitter = buffer->Emitter; - - // Check if need to perform any particles sorting - if (emitter->Graph.SortModules.HasItems() && renderContextBatch.GetMainContext().View.Pass != DrawPass::Depth && buffer->GPU.ParticlesCountMax != 0) - { - PROFILE_GPU_CPU_NAMED("Sort Particles"); - - // Prepare pipeline - if (GPUParticlesSorting == nullptr) - { - // TODO: preload shader if platform supports GPU particles - GPUParticlesSorting = Content::LoadAsyncInternal(TEXT("Shaders/GPUParticlesSorting")); - if (GPUParticlesSorting == nullptr || GPUParticlesSorting->WaitForLoaded()) - return; -#if COMPILE_WITH_DEV_ENV - GPUParticlesSorting.Get()->OnReloading.Bind(); -#endif - } - if (!GPUParticlesSortingCB) - { - const auto shader = GPUParticlesSorting->GetShader(); - const StringAnsiView CS_Sort("CS_Sort"); - GPUParticlesSortingCS[0] = shader->GetCS(CS_Sort, 0); - GPUParticlesSortingCS[1] = shader->GetCS(CS_Sort, 1); - GPUParticlesSortingCS[2] = shader->GetCS(CS_Sort, 2); - GPUParticlesSortingCB = shader->GetCB(0); - ASSERT(GPUParticlesSortingCB); - } - - // Prepare sorting data - if (!buffer->GPU.SortedIndices) - buffer->AllocateSortBuffer(); - ASSERT(buffer->GPU.SortingKeysBuffer); - - // Execute all sorting modules - for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.SortModules.Count(); moduleIndex++) - { - auto module = emitter->Graph.SortModules[moduleIndex]; - const auto sortMode = static_cast(module->Values[2].AsInt); - - // Generate sorting keys based on sorting mode - GPUParticlesSortingData data; - data.ParticleCounterOffset = buffer->GPU.ParticleCounterOffset; - data.ParticleStride = buffer->Stride; - data.ParticleCapacity = buffer->Capacity; - int32 permutationIndex; - bool sortAscending; - switch (sortMode) - { - case ParticleSortMode::ViewDepth: - { - permutationIndex = 0; - sortAscending = false; - data.PositionOffset = emitter->Graph.GetPositionAttributeOffset(); - const Matrix viewProjection = renderContextBatch.GetMainContext().View.ViewProjection(); - if (emitter->SimulationSpace == ParticlesSimulationSpace::Local) - { - Matrix matrix; - Matrix::Multiply(drawCall.World, viewProjection, matrix); - Matrix::Transpose(matrix, data.PositionTransform); - } - else - { - Matrix::Transpose(viewProjection, data.PositionTransform); - } - break; - } - case ParticleSortMode::ViewDistance: - { - permutationIndex = 1; - sortAscending = false; - data.PositionOffset = emitter->Graph.GetPositionAttributeOffset(); - data.ViewPosition = renderContextBatch.GetMainContext().View.Position; - if (emitter->SimulationSpace == ParticlesSimulationSpace::Local) - { - Matrix::Transpose(drawCall.World, data.PositionTransform); - } - else - { - Matrix::Transpose(Matrix::Identity, data.PositionTransform); - } - break; - } - case ParticleSortMode::CustomAscending: - case ParticleSortMode::CustomDescending: - { - permutationIndex = 2; - sortAscending = sortMode == ParticleSortMode::CustomAscending; - int32 attributeIdx = module->Attributes[0]; - if (attributeIdx == -1) - break; - data.CustomOffset = emitter->Graph.Layout.Attributes[attributeIdx].Offset; - break; - } -#if !BUILD_RELEASE - default: - CRASH; - return; -#endif - } - context->UpdateCB(GPUParticlesSortingCB, &data); - context->BindCB(0, GPUParticlesSortingCB); - context->BindSR(0, buffer->GPU.Buffer->View()); - context->BindUA(0, buffer->GPU.SortingKeysBuffer->View()); - const int32 threadGroupSize = 1024; - context->Dispatch(GPUParticlesSortingCS[permutationIndex], Math::DivideAndRoundUp(buffer->GPU.ParticlesCountMax, threadGroupSize), 1, 1); - - // Perform sorting - BitonicSort::Instance()->Sort(context, buffer->GPU.SortingKeysBuffer, buffer->GPU.Buffer, data.ParticleCounterOffset, sortAscending, buffer->GPU.SortedIndices); - } - } - - // Count draw calls to perform during this emitter rendering - int32 drawCalls = 0; - for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.RenderModules.Count(); moduleIndex++) - { - if ((renderModulesIndices & (1u << moduleIndex)) == 0) - continue; - auto module = emitter->Graph.RenderModules.Get()[moduleIndex]; - switch (module->TypeID) - { - // Sprite Rendering - case 400: - { - drawCalls++; - break; - } - // Model Rendering - case 403: - { - const auto model = (Model*)module->Assets[0].Get(); - - // TODO: model LOD picking for particles? - int32 lodIndex = 0; - ModelLOD& lod = model->LODs[lodIndex]; - for (int32 meshIndex = 0; meshIndex < lod.Meshes.Count(); meshIndex++) - { - Mesh& mesh = lod.Meshes[meshIndex]; - if (!mesh.IsInitialized()) - continue; - - drawCalls++; - } - - break; - } - // Ribbon Rendering - case 404: - { - // Not supported - break; - } - // Volumetric Fog Rendering - case 405: - { - // Not supported - break; - } - } - } - if (drawCalls == 0) - return; - - // Ensure to have enough space for indirect draw arguments - const uint32 minSize = drawCalls * sizeof(GPUDrawIndexedIndirectArgs); - if (buffer->GPU.IndirectDrawArgsBuffer->GetSize() < minSize) - buffer->GPU.IndirectDrawArgsBuffer->Init(GPUBufferDescription::Argument(minSize)); - - // Execute all rendering modules using indirect draw arguments - int32 indirectDrawCallIndex = 0; - for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.RenderModules.Count(); moduleIndex++) - { - if ((renderModulesIndices & (1u << moduleIndex)) == 0) - continue; - auto module = emitter->Graph.RenderModules.Get()[moduleIndex]; - drawCall.Particle.Module = module; - switch (module->TypeID) - { - // Sprite Rendering - case 400: - { - const auto material = (MaterialBase*)module->Assets[0].Get(); - const auto moduleDrawModes = module->Values.Count() > 3 ? (DrawPass)module->Values[3].AsInt : DrawPass::Default; - auto dp = drawModes & moduleDrawModes & material->GetDrawModes(); - if (dp == DrawPass::None || SpriteRenderer.Init()) - break; - drawCall.Material = material; - - // Initialize indirect draw arguments - GPUDrawIndexedIndirectArgs args { SpriteParticleRenderer::IndexCount, 1, 0, 0, 0 }; - const uint32 argsOffset = indirectDrawCallIndex * sizeof(GPUDrawIndexedIndirectArgs); - context->UpdateBuffer(buffer->GPU.IndirectDrawArgsBuffer, &args, sizeof(args), argsOffset); - context->CopyBuffer(buffer->GPU.IndirectDrawArgsBuffer, buffer->GPU.Buffer, 4, argsOffset + 4, buffer->GPU.ParticleCounterOffset); - - // Submit draw call - SpriteRenderer.SetupDrawCall(drawCall); - drawCall.InstanceCount = 0; - drawCall.Draw.IndirectArgsBuffer = buffer->GPU.IndirectDrawArgsBuffer; - drawCall.Draw.IndirectArgsOffset = indirectDrawCallIndex * sizeof(GPUDrawIndexedIndirectArgs); - renderContextBatch.GetMainContext().List->AddDrawCall(renderContextBatch, dp, staticFlags, ShadowsCastingMode::DynamicOnly, bounds, drawCall, false, sortOrder); - indirectDrawCallIndex++; - break; - } - // Model Rendering - case 403: - { - const auto model = (Model*)module->Assets[0].Get(); - const auto material = (MaterialBase*)module->Assets[1].Get(); - const auto moduleDrawModes = module->Values.Count() > 4 ? (DrawPass)module->Values[4].AsInt : DrawPass::Default; - auto dp = drawModes & moduleDrawModes & material->GetDrawModes(); - drawCall.Material = material; - - // TODO: model LOD picking for particles? - int32 lodIndex = 0; - ModelLOD& lod = model->LODs[lodIndex]; - for (int32 meshIndex = 0; meshIndex < lod.Meshes.Count(); meshIndex++) - { - Mesh& mesh = lod.Meshes[meshIndex]; - if (!mesh.IsInitialized()) - continue; - // TODO: include mesh entry transformation, visibility and shadows mode? - - // Initialize indirect draw arguments - GPUDrawIndexedIndirectArgs args = { (uint32)mesh.GetTriangleCount() * 3, 1, 0, 0, 0 }; - const uint32 argsOffset = indirectDrawCallIndex * sizeof(GPUDrawIndexedIndirectArgs); - context->UpdateBuffer(buffer->GPU.IndirectDrawArgsBuffer, &args, sizeof(args), argsOffset); - context->CopyBuffer(buffer->GPU.IndirectDrawArgsBuffer, buffer->GPU.Buffer, 4, argsOffset + 4, buffer->GPU.ParticleCounterOffset); - - // Execute draw call - mesh.GetDrawCallGeometry(drawCall); - drawCall.InstanceCount = 0; - drawCall.Draw.IndirectArgsBuffer = buffer->GPU.IndirectDrawArgsBuffer; - drawCall.Draw.IndirectArgsOffset = indirectDrawCallIndex * sizeof(GPUDrawIndexedIndirectArgs); - renderContextBatch.GetMainContext().List->AddDrawCall(renderContextBatch, dp, staticFlags, ShadowsCastingMode::DynamicOnly, bounds, drawCall, false, sortOrder); - indirectDrawCallIndex++; - } - break; - } - // Ribbon Rendering - case 404: - { - // Not supported - break; - } - // Volumetric Fog Rendering - case 405: - { - // Not supported - break; - } - } } + GPUEmitterDraws.Add({ buffer, drawCall, drawModes, staticFlags, bounds, renderModulesIndices, indirectArgsSize, sortOrder, sorting }); + RenderContext::GPULocker.Unlock(); } #endif @@ -1119,6 +1205,7 @@ void UpdateGPU(RenderTask* task, GPUContext* context) PROFILE_CPU_NAMED("GPUParticles"); PROFILE_GPU("GPU Particles"); PROFILE_MEM(Particles); + ConcurrentSystemLocker::ReadScope systemScope(Particles::SystemLocker); for (ParticleEffect* effect : GpuUpdateList) { diff --git a/Source/Engine/Particles/ParticlesData.cpp b/Source/Engine/Particles/ParticlesData.cpp index 9b5a26246..074cc73d6 100644 --- a/Source/Engine/Particles/ParticlesData.cpp +++ b/Source/Engine/Particles/ParticlesData.cpp @@ -98,7 +98,6 @@ ParticleBuffer::~ParticleBuffer() { SAFE_DELETE_GPU_RESOURCE(GPU.Buffer); SAFE_DELETE_GPU_RESOURCE(GPU.BufferSecondary); - SAFE_DELETE_GPU_RESOURCE(GPU.IndirectDrawArgsBuffer); SAFE_DELETE_GPU_RESOURCE(GPU.SortingKeysBuffer); SAFE_DELETE_GPU_RESOURCE(GPU.SortedIndices); SAFE_DELETE(GPU.RibbonIndexBufferDynamic); @@ -146,7 +145,6 @@ bool ParticleBuffer::Init(ParticleEmitter* emitter) GPU.BufferSecondary = GPUDevice::Instance->CreateBuffer(TEXT("ParticleBuffer B")); if (GPU.BufferSecondary->Init(GPU.Buffer->GetDescription())) return true; - GPU.IndirectDrawArgsBuffer = GPUDevice::Instance->CreateBuffer(TEXT("ParticleIndirectDrawArgsBuffer")); GPU.PendingClear = true; GPU.HasValidCount = false; GPU.ParticleCounterOffset = size; diff --git a/Source/Engine/Particles/ParticlesData.h b/Source/Engine/Particles/ParticlesData.h index 53f63826f..5a5ebcba4 100644 --- a/Source/Engine/Particles/ParticlesData.h +++ b/Source/Engine/Particles/ParticlesData.h @@ -203,11 +203,6 @@ public: /// GPUBuffer* BufferSecondary = nullptr; - /// - /// The indirect draw command arguments buffer used by the GPU particles to invoke drawing on a GPU based on the particles amount (instances count). - /// - GPUBuffer* IndirectDrawArgsBuffer = nullptr; - /// /// The GPU particles sorting buffer. Contains structure of particle index and the sorting key for every particle. Used to sort particles. /// diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index 99bb1f0d8..136e9cf92 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -266,7 +266,6 @@ void RenderList::DrainDelayedDraws(RenderContext& renderContext) { if (_delayedDraws.IsEmpty()) return; - PROFILE_GPU_CPU_NAMED("DelayedDraws"); for (DelayedDraw& e : _delayedDraws) e(renderContext); _delayedDraws.SetCapacity(0); From 545df6ce3531d9f485f56cfdcebc52bb370eb7e0 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 7 Aug 2025 18:57:39 +0200 Subject: [PATCH 162/211] Add profiler event to GPU particles indirect args setup --- Source/Engine/Particles/Particles.cpp | 115 +++++++++++++------------- 1 file changed, 59 insertions(+), 56 deletions(-) diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index da2e612d9..3ae9e1aea 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -716,71 +716,74 @@ void DrawEmittersGPU(RenderContextBatch& renderContextBatch) // Build indirect arguments uint32 indirectArgsOffset = 0; - for (GPUEmitterDraw& draw : GPUEmitterDraws) { - ParticleEmitter* emitter = draw.Buffer->Emitter; - for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.RenderModules.Count(); moduleIndex++) + PROFILE_GPU_CPU_NAMED("Init Indirect Args"); + for (GPUEmitterDraw& draw : GPUEmitterDraws) { - if ((draw.RenderModulesIndices & (1u << moduleIndex)) == 0) - continue; - auto module = emitter->Graph.RenderModules.Get()[moduleIndex]; - draw.DrawCall.Particle.Module = module; - switch (module->TypeID) + ParticleEmitter* emitter = draw.Buffer->Emitter; + for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.RenderModules.Count(); moduleIndex++) { - // Sprite Rendering - case 400: - { - const auto material = (MaterialBase*)module->Assets[0].Get(); - const auto moduleDrawModes = module->Values.Count() > 3 ? (DrawPass)module->Values[3].AsInt : DrawPass::Default; - auto dp = draw.DrawModes & moduleDrawModes & material->GetDrawModes(); - if (dp == DrawPass::None || SpriteRenderer.Init()) - break; - - // Draw sprite for each particle - GPUDrawIndexedIndirectArgs args { SpriteParticleRenderer::IndexCount, 1, 0, 0, 0 }; - context->UpdateBuffer(GPUIndirectArgsBuffer, &args, sizeof(args), indirectArgsOffset); - context->CopyBuffer(GPUIndirectArgsBuffer, draw.Buffer->GPU.Buffer, 4, indirectArgsOffset + 4, draw.Buffer->GPU.ParticleCounterOffset); - indirectArgsOffset += sizeof(GPUDrawIndexedIndirectArgs); - break; - } - // Model Rendering - case 403: - { - const auto model = (Model*)module->Assets[0].Get(); - const auto material = (MaterialBase*)module->Assets[1].Get(); - const auto moduleDrawModes = module->Values.Count() > 4 ? (DrawPass)module->Values[4].AsInt : DrawPass::Default; - auto dp = draw.DrawModes & moduleDrawModes & material->GetDrawModes(); - if (dp == DrawPass::None) - break; - // TODO: model LOD picking for particles? - int32 lodIndex = 0; - ModelLOD& lod = model->LODs[lodIndex]; - for (int32 meshIndex = 0; meshIndex < lod.Meshes.Count(); meshIndex++) + if ((draw.RenderModulesIndices & (1u << moduleIndex)) == 0) + continue; + auto module = emitter->Graph.RenderModules.Get()[moduleIndex]; + draw.DrawCall.Particle.Module = module; + switch (module->TypeID) { - Mesh& mesh = lod.Meshes[meshIndex]; - if (!mesh.IsInitialized()) - continue; + // Sprite Rendering + case 400: + { + const auto material = (MaterialBase*)module->Assets[0].Get(); + const auto moduleDrawModes = module->Values.Count() > 3 ? (DrawPass)module->Values[3].AsInt : DrawPass::Default; + auto dp = draw.DrawModes & moduleDrawModes & material->GetDrawModes(); + if (dp == DrawPass::None || SpriteRenderer.Init()) + break; - // Draw mesh for each particle - GPUDrawIndexedIndirectArgs args { (uint32)mesh.GetTriangleCount() * 3, 1, 0, 0, 0 }; + // Draw sprite for each particle + GPUDrawIndexedIndirectArgs args{ SpriteParticleRenderer::IndexCount, 1, 0, 0, 0 }; context->UpdateBuffer(GPUIndirectArgsBuffer, &args, sizeof(args), indirectArgsOffset); context->CopyBuffer(GPUIndirectArgsBuffer, draw.Buffer->GPU.Buffer, 4, indirectArgsOffset + 4, draw.Buffer->GPU.ParticleCounterOffset); indirectArgsOffset += sizeof(GPUDrawIndexedIndirectArgs); + break; + } + // Model Rendering + case 403: + { + const auto model = (Model*)module->Assets[0].Get(); + const auto material = (MaterialBase*)module->Assets[1].Get(); + const auto moduleDrawModes = module->Values.Count() > 4 ? (DrawPass)module->Values[4].AsInt : DrawPass::Default; + auto dp = draw.DrawModes & moduleDrawModes & material->GetDrawModes(); + if (dp == DrawPass::None) + break; + // TODO: model LOD picking for particles? + int32 lodIndex = 0; + ModelLOD& lod = model->LODs[lodIndex]; + for (int32 meshIndex = 0; meshIndex < lod.Meshes.Count(); meshIndex++) + { + Mesh& mesh = lod.Meshes[meshIndex]; + if (!mesh.IsInitialized()) + continue; + + // Draw mesh for each particle + GPUDrawIndexedIndirectArgs args{ (uint32)mesh.GetTriangleCount() * 3, 1, 0, 0, 0 }; + context->UpdateBuffer(GPUIndirectArgsBuffer, &args, sizeof(args), indirectArgsOffset); + context->CopyBuffer(GPUIndirectArgsBuffer, draw.Buffer->GPU.Buffer, 4, indirectArgsOffset + 4, draw.Buffer->GPU.ParticleCounterOffset); + indirectArgsOffset += sizeof(GPUDrawIndexedIndirectArgs); + } + break; + } + // Ribbon Rendering + case 404: + { + // Not supported + break; + } + // Volumetric Fog Rendering + case 405: + { + // Not supported + break; + } } - break; - } - // Ribbon Rendering - case 404: - { - // Not supported - break; - } - // Volumetric Fog Rendering - case 405: - { - // Not supported - break; - } } } } From 3d8438017531f5edd5faba64838aa2ad0ca9e133 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 7 Aug 2025 19:09:58 +0200 Subject: [PATCH 163/211] Optimize particles sorting by splitting key generation and sorting to reduce CS switches --- Source/Engine/Particles/Particles.cpp | 29 ++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index 3ae9e1aea..c33b82edf 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -793,13 +793,15 @@ void DrawEmittersGPU(RenderContextBatch& renderContextBatch) if (sorting) { PROFILE_GPU_CPU_NAMED("Sort Particles"); + + // Generate sort keys for each particle for (const GPUEmitterDraw& draw : GPUEmitterDraws) { if (!draw.Sorting) continue; ASSERT(draw.Buffer->GPU.SortingKeysBuffer); - // Execute all sorting modules + // Generate sort keys for particles ParticleEmitter* emitter = draw.Buffer->Emitter; for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.SortModules.Count(); moduleIndex++) { @@ -812,13 +814,11 @@ void DrawEmittersGPU(RenderContextBatch& renderContextBatch) data.ParticleStride = draw.Buffer->Stride; data.ParticleCapacity = draw.Buffer->Capacity; int32 permutationIndex; - bool sortAscending; switch (sortMode) { case ParticleSortMode::ViewDepth: { permutationIndex = 0; - sortAscending = false; data.PositionOffset = emitter->Graph.GetPositionAttributeOffset(); const Matrix viewProjection = renderContextBatch.GetMainContext().View.ViewProjection(); if (emitter->SimulationSpace == ParticlesSimulationSpace::Local) @@ -836,7 +836,6 @@ void DrawEmittersGPU(RenderContextBatch& renderContextBatch) case ParticleSortMode::ViewDistance: { permutationIndex = 1; - sortAscending = false; data.PositionOffset = emitter->Graph.GetPositionAttributeOffset(); data.ViewPosition = renderContextBatch.GetMainContext().View.Position; if (emitter->SimulationSpace == ParticlesSimulationSpace::Local) @@ -853,7 +852,6 @@ void DrawEmittersGPU(RenderContextBatch& renderContextBatch) case ParticleSortMode::CustomDescending: { permutationIndex = 2; - sortAscending = sortMode == ParticleSortMode::CustomAscending; int32 attributeIdx = module->Attributes[0]; if (attributeIdx == -1) break; @@ -872,9 +870,26 @@ void DrawEmittersGPU(RenderContextBatch& renderContextBatch) context->BindUA(0, draw.Buffer->GPU.SortingKeysBuffer->View()); const int32 threadGroupSize = 1024; context->Dispatch(GPUParticlesSortingCS[permutationIndex], Math::DivideAndRoundUp(draw.Buffer->GPU.ParticlesCountMax, threadGroupSize), 1, 1); + } + } - // Perform sorting - BitonicSort::Instance()->Sort(context, draw.Buffer->GPU.SortingKeysBuffer, draw.Buffer->GPU.Buffer, data.ParticleCounterOffset, sortAscending, draw.Buffer->GPU.SortedIndices); + // Run sorting + for (const GPUEmitterDraw& draw : GPUEmitterDraws) + { + if (!draw.Sorting) + continue; + ASSERT(draw.Buffer->GPU.SortingKeysBuffer); + + // Execute all sorting modules + ParticleEmitter* emitter = draw.Buffer->Emitter; + for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.SortModules.Count(); moduleIndex++) + { + auto module = emitter->Graph.SortModules[moduleIndex]; + const auto sortMode = (ParticleSortMode)module->Values[2].AsInt; + bool sortAscending = sortMode == ParticleSortMode::CustomAscending; + BitonicSort::Instance()->Sort(context, draw.Buffer->GPU.SortingKeysBuffer, draw.Buffer->GPU.Buffer, draw.Buffer->GPU.ParticleCounterOffset, sortAscending, draw.Buffer->GPU.SortedIndices); + // TODO: split sorted keys copy with another loop to give time for UAV transition + // TODO: use args buffer from GPUIndirectArgsBuffer instead of internal from BitonicSort to get rid of UAV barrier } } } From 2a9260ddd5397d99db2429eb7f307ef7c72ecff9 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 7 Aug 2025 23:49:21 +0200 Subject: [PATCH 164/211] Optimize emitters to cull shadow passes sorting and drawing --- Source/Engine/Particles/Particles.cpp | 17 +++++++++++++---- Source/Engine/Renderer/RenderList.cpp | 4 ++-- Source/Engine/Renderer/RenderList.h | 4 ++-- Source/Engine/Renderer/Renderer.cpp | 4 ++-- 4 files changed, 19 insertions(+), 10 deletions(-) diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index c33b82edf..8f0fe29ca 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -178,6 +178,13 @@ void Particles::OnEffectDestroy(ParticleEffect* effect) #endif } +bool EmitterUseSorting(RenderContextBatch& renderContextBatch, ParticleBuffer* buffer, DrawPass drawModes, const BoundingSphere& bounds) +{ + const RenderView& mainView = renderContextBatch.GetMainContext().View; + drawModes &= mainView.Pass; + return buffer->Emitter->Graph.SortModules.HasItems() && EnumHasAnyFlags(drawModes, DrawPass::Forward) && (mainView.IsCullingDisabled || mainView.CullingFrustum.Intersects(bounds)); +} + void DrawEmitterCPU(RenderContextBatch& renderContextBatch, ParticleBuffer* buffer, DrawCall& drawCall, DrawPass drawModes, StaticFlags staticFlags, const BoundingSphere& bounds, uint32 renderModulesIndices, int8 sortOrder) { // Skip if CPU buffer is empty @@ -187,7 +194,7 @@ void DrawEmitterCPU(RenderContextBatch& renderContextBatch, ParticleBuffer* buff auto emitter = buffer->Emitter; // Check if need to perform any particles sorting - if (emitter->Graph.SortModules.HasItems() && EnumHasAnyFlags(drawModes, DrawPass::Forward) && (buffer->CPU.Count != 0 || buffer->GPU.SortedIndices)) + if (EmitterUseSorting(renderContextBatch, buffer, drawModes, bounds) && (buffer->CPU.Count != 0 || buffer->GPU.SortedIndices)) { // Prepare sorting data if (!buffer->GPU.SortedIndices) @@ -1006,7 +1013,7 @@ void DrawEmitterGPU(RenderContextBatch& renderContextBatch, ParticleBuffer* buff } if (indirectArgsSize == 0) return; - bool sorting = buffer->Emitter->Graph.SortModules.HasItems() && renderContextBatch.GetMainContext().View.Pass != DrawPass::Depth && buffer->GPU.ParticlesCountMax != 0; + bool sorting = EmitterUseSorting(renderContextBatch, buffer, drawModes, bounds) && (buffer->GPU.ParticlesCountMax != 0 || buffer->GPU.SortedIndices); if (sorting && !buffer->GPU.SortedIndices) buffer->AllocateSortBuffer(); @@ -1016,7 +1023,7 @@ void DrawEmitterGPU(RenderContextBatch& renderContextBatch, ParticleBuffer* buff if (GPUEmitterDraws.Count() == 0) { // The first emitter schedules the drawing of all batched draws - renderContextBatch.GetMainContext().List->AddDelayedDraw([&renderContextBatch](RenderContext& renderContext) + renderContextBatch.GetMainContext().List->AddDelayedDraw([](RenderContextBatch& renderContextBatch, int32 contextIndex) { DrawEmittersGPU(renderContextBatch); }); @@ -1046,8 +1053,10 @@ void Particles::DrawParticles(RenderContextBatch& renderContextBatch, ParticleEf const RenderView& view = renderContextBatch.Contexts.Get()[i].View; const bool visible = (view.Pass & effect->DrawModes) != DrawPass::None && (view.IsCullingDisabled || view.CullingFrustum.Intersects(bounds)); if (visible) + { viewsMask |= 1ull << (uint64)i; - viewsDrawModes |= view.Pass; + viewsDrawModes |= view.Pass; + } } if (viewsMask == 0) return; diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index 136e9cf92..c3989c253 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -262,12 +262,12 @@ void RenderList::AddDelayedDraw(DelayedDraw&& func) MemPoolLocker.Unlock(); } -void RenderList::DrainDelayedDraws(RenderContext& renderContext) +void RenderList::DrainDelayedDraws(RenderContextBatch& renderContextBatch, int32 contextIndex) { if (_delayedDraws.IsEmpty()) return; for (DelayedDraw& e : _delayedDraws) - e(renderContext); + e(renderContextBatch, contextIndex); _delayedDraws.SetCapacity(0); } diff --git a/Source/Engine/Renderer/RenderList.h b/Source/Engine/Renderer/RenderList.h index 202afadc9..ae02b36a9 100644 --- a/Source/Engine/Renderer/RenderList.h +++ b/Source/Engine/Renderer/RenderList.h @@ -435,9 +435,9 @@ public: /// DynamicTypedBuffer TempObjectBuffer; - typedef Function DelayedDraw; + typedef Function DelayedDraw; void AddDelayedDraw(DelayedDraw&& func); - void DrainDelayedDraws(RenderContext& renderContext); + void DrainDelayedDraws(RenderContextBatch& renderContextBatch, int32 contextIndex); /// /// Adds custom callback (eg. lambda) to invoke after scene draw calls are collected on a main thread (some async draw tasks might be active). Allows for safe usage of GPUContext for draw preparations or to perform GPU-driven drawing. diff --git a/Source/Engine/Renderer/Renderer.cpp b/Source/Engine/Renderer/Renderer.cpp index 77bfa5305..c775b50e1 100644 --- a/Source/Engine/Renderer/Renderer.cpp +++ b/Source/Engine/Renderer/Renderer.cpp @@ -459,8 +459,8 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont renderContextBatch.WaitLabels.Clear(); // Perform custom post-scene drawing (eg. GPU dispatches used by VFX) - for (RenderContext& e : renderContextBatch.Contexts) - e.List->DrainDelayedDraws(e); + for (int32 i = 0; i < renderContextBatch.Contexts.Count(); i++) + renderContextBatch.Contexts[i].List->DrainDelayedDraws(renderContextBatch, i); #if USE_EDITOR GBufferPass::Instance()->OverrideDrawCalls(renderContext); From 9fabc1028addf09814d4291c01214cebaa468afe Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 7 Aug 2025 23:51:02 +0200 Subject: [PATCH 165/211] Optimize GPU particles indirect args building with a single batched upload of default data --- Source/Engine/Particles/Particles.cpp | 79 +++++++++++++++++++++------ 1 file changed, 62 insertions(+), 17 deletions(-) diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index 8f0fe29ca..47f172636 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -725,6 +725,68 @@ void DrawEmittersGPU(RenderContextBatch& renderContextBatch) uint32 indirectArgsOffset = 0; { PROFILE_GPU_CPU_NAMED("Init Indirect Args"); + + // Init default arguments + byte* indirectArgsMemory = (byte*)renderContextBatch.GetMainContext().List->Memory.Allocate(indirectArgsSize, GPU_SHADER_DATA_ALIGNMENT); + for (GPUEmitterDraw& draw : GPUEmitterDraws) + { + ParticleEmitter* emitter = draw.Buffer->Emitter; + for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.RenderModules.Count(); moduleIndex++) + { + if ((draw.RenderModulesIndices & (1u << moduleIndex)) == 0) + continue; + auto module = emitter->Graph.RenderModules.Get()[moduleIndex]; + switch (module->TypeID) + { + // Sprite Rendering + case 400: + { + const auto material = (MaterialBase*)module->Assets[0].Get(); + const auto moduleDrawModes = module->Values.Count() > 3 ? (DrawPass)module->Values[3].AsInt : DrawPass::Default; + auto dp = draw.DrawModes & moduleDrawModes & material->GetDrawModes(); + if (dp == DrawPass::None || SpriteRenderer.Init()) + break; + + // Draw sprite for each particle + GPUDrawIndexedIndirectArgs args = { SpriteParticleRenderer::IndexCount, 1, 0, 0, 0 }; + Platform::MemoryCopy(indirectArgsMemory + indirectArgsOffset, &args, sizeof(args)); + indirectArgsOffset += sizeof(args); + break; + } + // Model Rendering + case 403: + { + const auto model = (Model*)module->Assets[0].Get(); + const auto material = (MaterialBase*)module->Assets[1].Get(); + const auto moduleDrawModes = module->Values.Count() > 4 ? (DrawPass)module->Values[4].AsInt : DrawPass::Default; + auto dp = draw.DrawModes & moduleDrawModes & material->GetDrawModes(); + if (dp == DrawPass::None) + break; + // TODO: model LOD picking for particles? + int32 lodIndex = 0; + ModelLOD& lod = model->LODs[lodIndex]; + for (int32 meshIndex = 0; meshIndex < lod.Meshes.Count(); meshIndex++) + { + Mesh& mesh = lod.Meshes[meshIndex]; + if (!mesh.IsInitialized()) + continue; + + // Draw mesh for each particle + GPUDrawIndexedIndirectArgs args = { (uint32)mesh.GetTriangleCount() * 3, 1, 0, 0, 0 }; + Platform::MemoryCopy(indirectArgsMemory + indirectArgsOffset, &args, sizeof(args)); + indirectArgsOffset += sizeof(args); + } + break; + } + } + } + } + + // Upload default arguments + context->UpdateBuffer(GPUIndirectArgsBuffer, indirectArgsMemory, indirectArgsOffset); + + // Copy particle counts into draw commands + indirectArgsOffset = 0; for (GPUEmitterDraw& draw : GPUEmitterDraws) { ParticleEmitter* emitter = draw.Buffer->Emitter; @@ -733,7 +795,6 @@ void DrawEmittersGPU(RenderContextBatch& renderContextBatch) if ((draw.RenderModulesIndices & (1u << moduleIndex)) == 0) continue; auto module = emitter->Graph.RenderModules.Get()[moduleIndex]; - draw.DrawCall.Particle.Module = module; switch (module->TypeID) { // Sprite Rendering @@ -746,8 +807,6 @@ void DrawEmittersGPU(RenderContextBatch& renderContextBatch) break; // Draw sprite for each particle - GPUDrawIndexedIndirectArgs args{ SpriteParticleRenderer::IndexCount, 1, 0, 0, 0 }; - context->UpdateBuffer(GPUIndirectArgsBuffer, &args, sizeof(args), indirectArgsOffset); context->CopyBuffer(GPUIndirectArgsBuffer, draw.Buffer->GPU.Buffer, 4, indirectArgsOffset + 4, draw.Buffer->GPU.ParticleCounterOffset); indirectArgsOffset += sizeof(GPUDrawIndexedIndirectArgs); break; @@ -771,25 +830,11 @@ void DrawEmittersGPU(RenderContextBatch& renderContextBatch) continue; // Draw mesh for each particle - GPUDrawIndexedIndirectArgs args{ (uint32)mesh.GetTriangleCount() * 3, 1, 0, 0, 0 }; - context->UpdateBuffer(GPUIndirectArgsBuffer, &args, sizeof(args), indirectArgsOffset); context->CopyBuffer(GPUIndirectArgsBuffer, draw.Buffer->GPU.Buffer, 4, indirectArgsOffset + 4, draw.Buffer->GPU.ParticleCounterOffset); indirectArgsOffset += sizeof(GPUDrawIndexedIndirectArgs); } break; } - // Ribbon Rendering - case 404: - { - // Not supported - break; - } - // Volumetric Fog Rendering - case 405: - { - // Not supported - break; - } } } } From 0369d9b2cbe9491b7e75615dfb21ac92786f5431 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 8 Aug 2025 11:03:03 +0200 Subject: [PATCH 166/211] Optimize `CSSetShader` on D3D11 when remains unchanged --- .../DirectX/DX11/GPUContextDX11.cpp | 15 +++++++++++++-- .../GraphicsDevice/DirectX/DX11/GPUContextDX11.h | 1 + 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp index b68f5e595..62f9afd3a 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp @@ -95,6 +95,7 @@ void GPUContextDX11::FrameBegin() _srMaskDirtyCompute = 0; _rtCount = 0; _vertexLayout = nullptr; + _currentCompute = nullptr; _currentState = nullptr; _rtDepth = nullptr; Platform::MemoryClear(_rtHandles, sizeof(_rtHandles)); @@ -497,7 +498,12 @@ void GPUContextDX11::Dispatch(GPUShaderProgramCS* shader, uint32 threadGroupCoun flushOM(); // Dispatch - _context->CSSetShader((ID3D11ComputeShader*)shader->GetBufferHandle(), nullptr, 0); + auto compute = (ID3D11ComputeShader*)shader->GetBufferHandle(); + if (_currentCompute != compute) + { + _currentCompute = compute; + _context->CSSetShader(compute, nullptr, 0); + } _context->Dispatch(threadGroupCountX, threadGroupCountY, threadGroupCountZ); RENDER_STAT_DISPATCH_CALL(); @@ -518,7 +524,12 @@ void GPUContextDX11::DispatchIndirect(GPUShaderProgramCS* shader, GPUBuffer* buf flushOM(); // Dispatch - _context->CSSetShader((ID3D11ComputeShader*)shader->GetBufferHandle(), nullptr, 0); + auto compute = (ID3D11ComputeShader*)shader->GetBufferHandle(); + if (_currentCompute != compute) + { + _currentCompute = compute; + _context->CSSetShader(compute, nullptr, 0); + } _context->DispatchIndirect(bufferForArgsDX11->GetBuffer(), offsetForArgs); RENDER_STAT_DISPATCH_CALL(); diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h index 48de69b3f..6d1877534 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h @@ -59,6 +59,7 @@ private: bool _iaInputLayoutDirtyFlag; // Pipeline State + ID3D11ComputeShader* _currentCompute; GPUPipelineStateDX11* _currentState; ID3D11BlendState* CurrentBlendState; ID3D11RasterizerState* CurrentRasterizerState; From 90d1e63b58afe32a5515b3506eb218d0f2fa5edc Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 8 Aug 2025 13:11:05 +0200 Subject: [PATCH 167/211] Add minor optimizations to particles drawing --- Content/Shaders/BitonicSort.flax | 4 +- Source/Engine/Particles/Particles.cpp | 4 +- Source/Engine/Renderer/Utils/BitonicSort.cpp | 19 +++------ Source/Engine/Renderer/Utils/BitonicSort.h | 3 +- Source/Shaders/BitonicSort.shader | 41 ++++++++++---------- 5 files changed, 33 insertions(+), 38 deletions(-) diff --git a/Content/Shaders/BitonicSort.flax b/Content/Shaders/BitonicSort.flax index ee7db3c74..fa9adc1ef 100644 --- a/Content/Shaders/BitonicSort.flax +++ b/Content/Shaders/BitonicSort.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:924884da1dfef7a802b7190fd148eebbeece50d6fa4d69295c38238dd96331e6 -size 6538 +oid sha256:db9ca2435baf7cba079e22af86feca8397723688107fd4abd4f11466a445791e +size 6669 diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index 47f172636..5ea195a00 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -845,6 +845,7 @@ void DrawEmittersGPU(RenderContextBatch& renderContextBatch) if (sorting) { PROFILE_GPU_CPU_NAMED("Sort Particles"); + context->BindCB(0, GPUParticlesSortingCB); // Generate sort keys for each particle for (const GPUEmitterDraw& draw : GPUEmitterDraws) @@ -917,7 +918,6 @@ void DrawEmittersGPU(RenderContextBatch& renderContextBatch) #endif } context->UpdateCB(GPUParticlesSortingCB, &data); - context->BindCB(0, GPUParticlesSortingCB); context->BindSR(0, draw.Buffer->GPU.Buffer->View()); context->BindUA(0, draw.Buffer->GPU.SortingKeysBuffer->View()); const int32 threadGroupSize = 1024; @@ -939,7 +939,7 @@ void DrawEmittersGPU(RenderContextBatch& renderContextBatch) auto module = emitter->Graph.SortModules[moduleIndex]; const auto sortMode = (ParticleSortMode)module->Values[2].AsInt; bool sortAscending = sortMode == ParticleSortMode::CustomAscending; - BitonicSort::Instance()->Sort(context, draw.Buffer->GPU.SortingKeysBuffer, draw.Buffer->GPU.Buffer, draw.Buffer->GPU.ParticleCounterOffset, sortAscending, draw.Buffer->GPU.SortedIndices); + BitonicSort::Instance()->Sort(context, draw.Buffer->GPU.SortingKeysBuffer, draw.Buffer->GPU.Buffer, draw.Buffer->GPU.ParticleCounterOffset, sortAscending, draw.Buffer->GPU.SortedIndices, draw.Buffer->GPU.ParticlesCountMax); // TODO: split sorted keys copy with another loop to give time for UAV transition // TODO: use args buffer from GPUIndirectArgsBuffer instead of internal from BitonicSort to get rid of UAV barrier } diff --git a/Source/Engine/Renderer/Utils/BitonicSort.cpp b/Source/Engine/Renderer/Utils/BitonicSort.cpp index 0834588ba..93f8dc97d 100644 --- a/Source/Engine/Renderer/Utils/BitonicSort.cpp +++ b/Source/Engine/Renderer/Utils/BitonicSort.cpp @@ -6,15 +6,9 @@ #include "Engine/Graphics/GPUContext.h" #include "Engine/Graphics/GPULimits.h" -// The sorting keys buffer item structure template. Matches the shader type. -struct Item -{ - float Key; - uint32 Value; -}; - GPU_CB_STRUCT(Data { - Item NullItem; + float NullItemKey; + uint32 NullItemValue; uint32 CounterOffset; uint32 MaxIterations; uint32 LoopK; @@ -86,22 +80,22 @@ void BitonicSort::Dispose() _shader = nullptr; } -void BitonicSort::Sort(GPUContext* context, GPUBuffer* sortingKeysBuffer, GPUBuffer* countBuffer, uint32 counterOffset, bool sortAscending, GPUBuffer* sortedIndicesBuffer) +void BitonicSort::Sort(GPUContext* context, GPUBuffer* sortingKeysBuffer, GPUBuffer* countBuffer, uint32 counterOffset, bool sortAscending, GPUBuffer* sortedIndicesBuffer, uint32 maxElements) { ASSERT(context && sortingKeysBuffer && countBuffer); if (checkIfSkipPass()) return; PROFILE_GPU_CPU("Bitonic Sort"); const uint32 elementSizeBytes = sizeof(uint64); - const uint32 maxNumElements = sortingKeysBuffer->GetSize() / elementSizeBytes; + const uint32 maxNumElements = maxElements != 0 ? maxElements : sortingKeysBuffer->GetSize() / elementSizeBytes; const uint32 alignedMaxNumElements = Math::RoundUpToPowerOf2(maxNumElements); const uint32 maxIterations = (uint32)Math::Log2((float)Math::Max(2048u, alignedMaxNumElements)) - 10; // Setup constants buffer Data data; data.CounterOffset = counterOffset; - data.NullItem.Key = sortAscending ? MAX_float : -MAX_float; - data.NullItem.Value = 0; + data.NullItemKey = sortAscending ? MAX_float : -MAX_float; + data.NullItemValue = 0; data.KeySign = sortAscending ? -1.0f : 1.0f; data.MaxIterations = maxIterations; data.LoopK = 0; @@ -128,7 +122,6 @@ void BitonicSort::Sort(GPUContext* context, GPUBuffer* sortingKeysBuffer, GPUBuf data.LoopK = k; data.LoopJ = j; context->UpdateCB(_cb, &data); - context->BindCB(0, _cb); context->DispatchIndirect(_outerSortCS, _dispatchArgsBuffer, indirectArgsOffset); indirectArgsOffset += sizeof(GPUDispatchIndirectArgs); diff --git a/Source/Engine/Renderer/Utils/BitonicSort.h b/Source/Engine/Renderer/Utils/BitonicSort.h index 289905b09..99069e182 100644 --- a/Source/Engine/Renderer/Utils/BitonicSort.h +++ b/Source/Engine/Renderer/Utils/BitonicSort.h @@ -34,7 +34,8 @@ public: /// The offset into counter buffer to find count for this list. Must be a multiple of 4 bytes. /// True to sort in ascending order (smallest to largest), otherwise false to sort in descending order. /// The output buffer for sorted values extracted from the sorted sortingKeysBuffer after algorithm run. Valid for uint value types - used as RWBuffer. - void Sort(GPUContext* context, GPUBuffer* sortingKeysBuffer, GPUBuffer* countBuffer, uint32 counterOffset, bool sortAscending, GPUBuffer* sortedIndicesBuffer); + /// Optional upper limit of elements to sort. Cna be used to optimize indirect dispatches allocation. If non-zero, then it gets calculated based on the input item buffer size. + void Sort(GPUContext* context, GPUBuffer* sortingKeysBuffer, GPUBuffer* countBuffer, uint32 counterOffset, bool sortAscending, GPUBuffer* sortedIndicesBuffer, uint32 maxElements = 0); public: diff --git a/Source/Shaders/BitonicSort.shader b/Source/Shaders/BitonicSort.shader index c4a275862..9e49a8185 100644 --- a/Source/Shaders/BitonicSort.shader +++ b/Source/Shaders/BitonicSort.shader @@ -36,14 +36,14 @@ uint InsertOneBit(uint value, uint oneBitMask) // (effectively a negation) or leave the value alone. When the KeySign is // 1, we are sorting descending, so when A < B, they should swap. For an // ascending sort, -A < -B should swap. -bool ShouldSwap(Item a, Item b) +bool ShouldSwap(Item a, Item b, float keySign) { //return (a ^ NullItem) < (b ^ NullItem); //return (a.Key) < (b.Key); - return (a.Key * KeySign) < (b.Key * KeySign); + return (a.Key * keySign) < (b.Key * keySign); //return asfloat(a) < asfloat(b); - //return (asfloat(a) * KeySign) < (asfloat(b) * KeySign); + //return (asfloat(a) * keySign) < (asfloat(b) * keySign); } #ifdef _CS_IndirectArgs @@ -136,6 +136,7 @@ void CS_PreSort(uint3 groupID : SV_GroupID, uint groupIndex : SV_GroupIndex) GroupMemoryBarrierWithGroupSync(); + float keySign = KeySign; UNROLL for (uint k = 2; k <= 2048; k <<= 1) { @@ -144,14 +145,14 @@ void CS_PreSort(uint3 groupID : SV_GroupID, uint groupIndex : SV_GroupIndex) uint index2 = InsertOneBit(groupIndex, j); uint index1 = index2 ^ (k == 2 * j ? k - 1 : j); - Item A = SortData[index1]; - Item B = SortData[index2]; + Item a = SortData[index1]; + Item b = SortData[index2]; - if (ShouldSwap(A, B)) + if (ShouldSwap(a, b, keySign)) { // Swap the items - SortData[index1] = B; - SortData[index2] = A; + SortData[index1] = b; + SortData[index2] = a; } GroupMemoryBarrierWithGroupSync(); @@ -182,20 +183,21 @@ void CS_InnerSort(uint3 groupID : SV_GroupID, uint groupIndex : SV_GroupIndex) GroupMemoryBarrierWithGroupSync(); + float keySign = KeySign; UNROLL for (uint j = 1024; j > 0; j /= 2) { uint index2 = InsertOneBit(groupIndex, j); uint index1 = index2 ^ j; - Item A = SortData[index1]; - Item B = SortData[index2]; + Item a = SortData[index1]; + Item b = SortData[index2]; - if (ShouldSwap(A, B)) + if (ShouldSwap(a, b, keySign)) { // Swap the items - SortData[index1] = B; - SortData[index2] = A; + SortData[index1] = b; + SortData[index2] = a; } GroupMemoryBarrierWithGroupSync(); @@ -224,14 +226,15 @@ void CS_OuterSort(uint3 dispatchThreadId : SV_DispatchThreadID) if (index2 >= count) return; - Item A = SortBuffer[index1]; - Item B = SortBuffer[index2]; + Item a = SortBuffer[index1]; + Item b = SortBuffer[index2]; - if (ShouldSwap(A, B)) + float keySign = KeySign; + if (ShouldSwap(a, b, keySign)) { // Swap the items - SortBuffer[index1] = B; - SortBuffer[index2] = A; + SortBuffer[index1] = b; + SortBuffer[index2] = a; } } @@ -248,12 +251,10 @@ void CS_CopyIndices(uint3 dispatchThreadId : SV_DispatchThreadID) { const uint count = CounterBuffer.Load(CounterOffset); uint index = dispatchThreadId.x; - if (index >= count) return; Item element = SortBuffer[index]; - SortedIndices[index] = element.Value; } From 0ea555b041f1df5bc8e1a9d0565ec6700e8af0c8 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 8 Aug 2025 16:25:09 +0200 Subject: [PATCH 168/211] Optimize GPU Particles simulation to perform memory buffer copies/updates before dispatch --- .../Particles/Graph/GPU/GPUParticles.cpp | 36 ++++++++++----- .../Engine/Particles/Graph/GPU/GPUParticles.h | 13 ++---- Source/Engine/Particles/Particles.cpp | 44 ++++++++++++++++--- 3 files changed, 68 insertions(+), 25 deletions(-) diff --git a/Source/Engine/Particles/Graph/GPU/GPUParticles.cpp b/Source/Engine/Particles/Graph/GPU/GPUParticles.cpp index e2dc78d59..54fbc254f 100644 --- a/Source/Engine/Particles/Graph/GPU/GPUParticles.cpp +++ b/Source/Engine/Particles/Graph/GPU/GPUParticles.cpp @@ -130,14 +130,20 @@ void GPUParticles::CopyParticlesCount(GPUContext* context, ParticleEmitter* emit } } -void GPUParticles::Execute(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, int32 emitterIndex, ParticleEmitterInstance& data) +bool GPUParticles::CanSim(const ParticleEmitter* emitter, const ParticleEmitterInstance& data) const +{ + const int32 threads = data.Buffer->GPU.ParticlesCountMax + data.GPU.SpawnCount; + return data.GPU.DeltaTime > 0.0f && + emitter->Graph.Version == data.Version && + emitter->Graph.Version == data.Buffer->Version && + threads != 0 && + _mainCS; +} + +void GPUParticles::PreSim(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, int32 emitterIndex, ParticleEmitterInstance& data) { - PROFILE_CPU_ASSET(emitter); - ASSERT(emitter->Graph.Version == data.Version); - ASSERT(emitter->Graph.Version == data.Buffer->Version); uint32 counterDefaultValue = 0; const uint32 counterOffset = data.Buffer->GPU.ParticleCounterOffset; - const bool hasCB = _cbData.HasItems(); // Clear buffers if need to if (data.Buffer->GPU.PendingClear) @@ -156,14 +162,17 @@ void GPUParticles::Execute(GPUContext* context, ParticleEmitter* emitter, Partic } } - // Skip if can - SceneRenderTask* viewTask = effect->GetRenderTask(); - const int32 threads = data.Buffer->GPU.ParticlesCountMax + data.GPU.SpawnCount; - if (data.GPU.DeltaTime <= 0.0f || threads == 0 || !_mainCS) - return; - // Clear destination buffer counter context->UpdateBuffer(data.Buffer->GPU.BufferSecondary, &counterDefaultValue, sizeof(counterDefaultValue), counterOffset); +} + +void GPUParticles::Sim(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, int32 emitterIndex, ParticleEmitterInstance& data) +{ + PROFILE_CPU_ASSET(emitter); + const bool hasCB = _cbData.HasItems(); + const int32 threads = data.Buffer->GPU.ParticlesCountMax + data.GPU.SpawnCount; + const uint32 counterOffset = data.Buffer->GPU.ParticleCounterOffset; + SceneRenderTask* viewTask = effect->GetRenderTask(); // Setup parameters MaterialParameter::BindMeta bindMeta; @@ -265,6 +274,11 @@ void GPUParticles::Execute(GPUContext* context, ParticleEmitter* emitter, Partic // Invoke Compute shader const int32 threadGroupSize = 1024; context->Dispatch(_mainCS, Math::Min(Math::DivideAndRoundUp(threads, threadGroupSize), GPU_MAX_CS_DISPATCH_THREAD_GROUPS), 1, 1); +} + +void GPUParticles::PostSim(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, int32 emitterIndex, ParticleEmitterInstance& data) +{ + const uint32 counterOffset = data.Buffer->GPU.ParticleCounterOffset; // Copy custom data for (int32 i = 0; i < CustomDataSize; i += 4) diff --git a/Source/Engine/Particles/Graph/GPU/GPUParticles.h b/Source/Engine/Particles/Graph/GPU/GPUParticles.h index fe7f26233..8d6008f53 100644 --- a/Source/Engine/Particles/Graph/GPU/GPUParticles.h +++ b/Source/Engine/Particles/Graph/GPU/GPUParticles.h @@ -79,15 +79,10 @@ public: /// The destination buffer offset from start (in bytes) to copy the counter (uint32). void CopyParticlesCount(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, ParticleEmitterInstance& data, GPUBuffer* dstBuffer, uint32 dstOffset); - /// - /// Performs the GPU particles simulation update using the graphics device. - /// - /// The GPU context that supports Compute. - /// The owning emitter. - /// The instance effect. - /// The index of the emitter in the particle system. - /// The instance data. - void Execute(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, int32 emitterIndex, ParticleEmitterInstance& data); + bool CanSim(const ParticleEmitter* emitter, const ParticleEmitterInstance& data) const; + void PreSim(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, int32 emitterIndex, ParticleEmitterInstance& data); + void Sim(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, int32 emitterIndex, ParticleEmitterInstance& data); + void PostSim(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, int32 emitterIndex, ParticleEmitterInstance& data); }; #endif diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index 5ea195a00..9236efd9d 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -1279,6 +1279,16 @@ void UpdateGPU(RenderTask* task, GPUContext* context) PROFILE_MEM(Particles); ConcurrentSystemLocker::ReadScope systemScope(Particles::SystemLocker); + // Collect valid emitter tracks to update + struct GPUSim + { + ParticleEffect* Effect; + ParticleEmitter* Emitter; + int32 EmitterIndex; + ParticleEmitterInstance& Data; + }; + Array sims; + sims.EnsureCapacity(Math::AlignUp(GpuUpdateList.Count(), 64)); // Preallocate with some slack for (ParticleEffect* effect : GpuUpdateList) { auto& instance = effect->Instance; @@ -1286,7 +1296,6 @@ void UpdateGPU(RenderTask* task, GPUContext* context) if (!particleSystem || !particleSystem->IsLoaded()) continue; - // Update all emitter tracks for (int32 j = 0; j < particleSystem->Tracks.Count(); j++) { const auto& track = particleSystem->Tracks[j]; @@ -1297,16 +1306,41 @@ void UpdateGPU(RenderTask* task, GPUContext* context) if (!emitter || !emitter->IsLoaded() || emitter->SimulationMode != ParticlesSimulationMode::GPU || instance.Emitters.Count() <= emitterIndex) continue; ParticleEmitterInstance& data = instance.Emitters[emitterIndex]; - if (!data.Buffer) + if (!data.Buffer || !emitter->GPU.CanSim(emitter, data)) continue; ASSERT(emitter->Capacity != 0 && emitter->Graph.Layout.Size != 0); - - // TODO: use async context for particles to update them on compute during GBuffer rendering - emitter->GPU.Execute(context, emitter, effect, emitterIndex, data); + sims.Add({ effect, emitter, emitterIndex, data }); } } GpuUpdateList.Clear(); + // Pre-pass with buffers setup + { + PROFILE_CPU_NAMED("PreSim"); + for (GPUSim& sim : sims) + { + sim.Emitter->GPU.PreSim(context, sim.Emitter, sim.Effect, sim.EmitterIndex, sim.Data); + } + } + + // Pre-pass with buffers setup + { + PROFILE_GPU_CPU("Sim"); + for (GPUSim& sim : sims) + { + sim.Emitter->GPU.Sim(context, sim.Emitter, sim.Effect, sim.EmitterIndex, sim.Data); + } + } + + // Post-pass with buffers setup + { + PROFILE_CPU_NAMED("PostSim"); + for (GPUSim& sim : sims) + { + sim.Emitter->GPU.PostSim(context, sim.Emitter, sim.Effect, sim.EmitterIndex, sim.Data); + } + } + context->ResetSR(); context->ResetUA(); context->FlushState(); From 519a9c0a1454cf63a48eabeb6857e71ba043368c Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 8 Aug 2025 17:03:39 +0200 Subject: [PATCH 169/211] Add direct dispatch for Bitonic Sort when using small input buffer --- Content/Shaders/BitonicSort.flax | 4 +- Source/Engine/Particles/Particles.cpp | 3 +- Source/Engine/Renderer/Utils/BitonicSort.cpp | 62 ++++++++++++-------- Source/Engine/Renderer/Utils/BitonicSort.h | 4 +- Source/Shaders/BitonicSort.shader | 45 +++++++------- 5 files changed, 66 insertions(+), 52 deletions(-) diff --git a/Content/Shaders/BitonicSort.flax b/Content/Shaders/BitonicSort.flax index fa9adc1ef..69d773379 100644 --- a/Content/Shaders/BitonicSort.flax +++ b/Content/Shaders/BitonicSort.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:db9ca2435baf7cba079e22af86feca8397723688107fd4abd4f11466a445791e -size 6669 +oid sha256:07d45b7f2085a28938e3bef090e259c0698a1987f9cd69df952168524ce07193 +size 6877 diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index 9236efd9d..2423feed2 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -940,8 +940,7 @@ void DrawEmittersGPU(RenderContextBatch& renderContextBatch) const auto sortMode = (ParticleSortMode)module->Values[2].AsInt; bool sortAscending = sortMode == ParticleSortMode::CustomAscending; BitonicSort::Instance()->Sort(context, draw.Buffer->GPU.SortingKeysBuffer, draw.Buffer->GPU.Buffer, draw.Buffer->GPU.ParticleCounterOffset, sortAscending, draw.Buffer->GPU.SortedIndices, draw.Buffer->GPU.ParticlesCountMax); - // TODO: split sorted keys copy with another loop to give time for UAV transition - // TODO: use args buffer from GPUIndirectArgsBuffer instead of internal from BitonicSort to get rid of UAV barrier + // TODO: use args buffer from GPUIndirectArgsBuffer instead of internal from BitonicSort to get rid of UAV barrier (run all sorting in parallel) } } } diff --git a/Source/Engine/Renderer/Utils/BitonicSort.cpp b/Source/Engine/Renderer/Utils/BitonicSort.cpp index 93f8dc97d..ed7ece05b 100644 --- a/Source/Engine/Renderer/Utils/BitonicSort.cpp +++ b/Source/Engine/Renderer/Utils/BitonicSort.cpp @@ -56,7 +56,7 @@ bool BitonicSort::setupResources() // Cache compute shaders _indirectArgsCS = shader->GetCS("CS_IndirectArgs"); - _preSortCS = shader->GetCS("CS_PreSort"); + _preSortCS.Get(shader, "CS_PreSort"); _innerSortCS = shader->GetCS("CS_InnerSort"); _outerSortCS = shader->GetCS("CS_OuterSort"); _copyIndicesCS = shader->GetCS("CS_CopyIndices"); @@ -73,7 +73,7 @@ void BitonicSort::Dispose() SAFE_DELETE_GPU_RESOURCE(_dispatchArgsBuffer); _cb = nullptr; _indirectArgsCS = nullptr; - _preSortCS = nullptr; + _preSortCS.Clear(); _innerSortCS = nullptr; _outerSortCS = nullptr; _copyIndicesCS = nullptr; @@ -86,8 +86,9 @@ void BitonicSort::Sort(GPUContext* context, GPUBuffer* sortingKeysBuffer, GPUBuf if (checkIfSkipPass()) return; PROFILE_GPU_CPU("Bitonic Sort"); - const uint32 elementSizeBytes = sizeof(uint64); - const uint32 maxNumElements = maxElements != 0 ? maxElements : sortingKeysBuffer->GetSize() / elementSizeBytes; + uint32 maxNumElements = sortingKeysBuffer->GetSize() / sizeof(uint64); + if (maxElements > 0 && maxElements < maxNumElements) + maxNumElements = maxElements; const uint32 alignedMaxNumElements = Math::RoundUpToPowerOf2(maxNumElements); const uint32 maxIterations = (uint32)Math::Log2((float)Math::Max(2048u, alignedMaxNumElements)) - 10; @@ -102,33 +103,44 @@ void BitonicSort::Sort(GPUContext* context, GPUBuffer* sortingKeysBuffer, GPUBuf data.LoopJ = 0; context->UpdateCB(_cb, &data); context->BindCB(0, _cb); - - // Generate execute indirect arguments context->BindSR(0, countBuffer->View()); - context->BindUA(0, _dispatchArgsBuffer->View()); - context->Dispatch(_indirectArgsCS, 1, 1, 1); - // Pre-Sort the buffer up to k = 2048 (this also pads the list with invalid indices that will drift to the end of the sorted list) - context->BindUA(0, sortingKeysBuffer->View()); - context->DispatchIndirect(_preSortCS, _dispatchArgsBuffer, 0); - - // We have already pre-sorted up through k = 2048 when first writing our list, so we continue sorting with k = 4096 - // For really large values of k, these indirect dispatches will be skipped over with thread counts of 0 - uint32 indirectArgsOffset = sizeof(GPUDispatchIndirectArgs); - for (uint32 k = 4096; k <= alignedMaxNumElements; k *= 2) + // If item count is small we can do only presorting within a single dispatch thread group + if (maxNumElements <= 2048) { - for (uint32 j = k / 2; j >= 2048; j /= 2) - { - data.LoopK = k; - data.LoopJ = j; - context->UpdateCB(_cb, &data); + // Use pre-sort with smaller thread group size (eg. for small particle emitters sorting) + const int32 permutation = maxNumElements < 128 ? 1 : 0; + context->BindUA(0, sortingKeysBuffer->View()); + context->Dispatch(_preSortCS.Get(permutation), 1, 1, 1); + } + else + { + // Generate execute indirect arguments + context->BindUA(0, _dispatchArgsBuffer->View()); + context->Dispatch(_indirectArgsCS, 1, 1, 1); - context->DispatchIndirect(_outerSortCS, _dispatchArgsBuffer, indirectArgsOffset); + // Pre-Sort the buffer up to k = 2048 (this also pads the list with invalid indices that will drift to the end of the sorted list) + context->BindUA(0, sortingKeysBuffer->View()); + context->DispatchIndirect(_preSortCS.Get(0), _dispatchArgsBuffer, 0); + + // We have already pre-sorted up through k = 2048 when first writing our list, so we continue sorting with k = 4096 + // For really large values of k, these indirect dispatches will be skipped over with thread counts of 0 + uint32 indirectArgsOffset = sizeof(GPUDispatchIndirectArgs); + for (uint32 k = 4096; k <= alignedMaxNumElements; k *= 2) + { + for (uint32 j = k / 2; j >= 2048; j /= 2) + { + data.LoopK = k; + data.LoopJ = j; + context->UpdateCB(_cb, &data); + + context->DispatchIndirect(_outerSortCS, _dispatchArgsBuffer, indirectArgsOffset); + indirectArgsOffset += sizeof(GPUDispatchIndirectArgs); + } + + context->DispatchIndirect(_innerSortCS, _dispatchArgsBuffer, indirectArgsOffset); indirectArgsOffset += sizeof(GPUDispatchIndirectArgs); } - - context->DispatchIndirect(_innerSortCS, _dispatchArgsBuffer, indirectArgsOffset); - indirectArgsOffset += sizeof(GPUDispatchIndirectArgs); } context->ResetUA(); diff --git a/Source/Engine/Renderer/Utils/BitonicSort.h b/Source/Engine/Renderer/Utils/BitonicSort.h index 99069e182..4280d5965 100644 --- a/Source/Engine/Renderer/Utils/BitonicSort.h +++ b/Source/Engine/Renderer/Utils/BitonicSort.h @@ -18,7 +18,7 @@ private: GPUBuffer* _dispatchArgsBuffer = nullptr; GPUConstantBuffer* _cb; GPUShaderProgramCS* _indirectArgsCS; - GPUShaderProgramCS* _preSortCS; + ComputeShaderPermutation<2> _preSortCS; GPUShaderProgramCS* _innerSortCS; GPUShaderProgramCS* _outerSortCS; GPUShaderProgramCS* _copyIndicesCS; @@ -46,7 +46,7 @@ public: #if COMPILE_WITH_DEV_ENV void OnShaderReloading(Asset* obj) { - _preSortCS = nullptr; + _preSortCS.Clear(); _innerSortCS = nullptr; _outerSortCS = nullptr; invalidateResources(); diff --git a/Source/Shaders/BitonicSort.shader b/Source/Shaders/BitonicSort.shader index 9e49a8185..0f9d5e656 100644 --- a/Source/Shaders/BitonicSort.shader +++ b/Source/Shaders/BitonicSort.shader @@ -3,6 +3,10 @@ #include "./Flax/Common.hlsl" #include "./Flax/Math.hlsl" +#ifndef THREAD_GROUP_SIZE +#define THREAD_GROUP_SIZE 1024 +#endif + struct Item { float Key; @@ -36,14 +40,14 @@ uint InsertOneBit(uint value, uint oneBitMask) // (effectively a negation) or leave the value alone. When the KeySign is // 1, we are sorting descending, so when A < B, they should swap. For an // ascending sort, -A < -B should swap. -bool ShouldSwap(Item a, Item b, float keySign) +bool ShouldSwap(Item a, Item b) { //return (a ^ NullItem) < (b ^ NullItem); //return (a.Key) < (b.Key); - return (a.Key * keySign) < (b.Key * keySign); + return (a.Key * KeySign) < (b.Key * KeySign); //return asfloat(a) < asfloat(b); - //return (asfloat(a) * keySign) < (asfloat(b) * keySign); + //return (asfloat(a) * KeySign) < (asfloat(b) * KeySign); } #ifdef _CS_IndirectArgs @@ -91,7 +95,7 @@ void CS_IndirectArgs(uint groupIndex : SV_GroupIndex) RWStructuredBuffer SortBuffer : register(u0); -groupshared Item SortData[2048]; +groupshared Item SortData[THREAD_GROUP_SIZE * 2]; void LoadItem(uint element, uint count) { @@ -106,7 +110,7 @@ void LoadItem(uint element, uint count) item.Key = NullItemKey; item.Value = NullItemValue; } - SortData[element & 2047] = item; + SortData[element & (THREAD_GROUP_SIZE * 2 - 1)] = item; } void StoreItem(uint element, uint count) @@ -122,23 +126,24 @@ void StoreItem(uint element, uint count) #ifdef _CS_PreSort META_CS(true, FEATURE_LEVEL_SM5) -[numthreads(1024, 1, 1)] +META_PERMUTATION_1(THREAD_GROUP_SIZE=1024) +META_PERMUTATION_1(THREAD_GROUP_SIZE=64) +[numthreads(THREAD_GROUP_SIZE, 1, 1)] void CS_PreSort(uint3 groupID : SV_GroupID, uint groupIndex : SV_GroupIndex) { // Item index of the start of this group - const uint groupStart = groupID.x * 2048; + const uint groupStart = groupID.x * (THREAD_GROUP_SIZE * 2); // Actual number of items that need sorting const uint count = CounterBuffer.Load(CounterOffset); LoadItem(groupStart + groupIndex, count); - LoadItem(groupStart + groupIndex + 1024, count); + LoadItem(groupStart + groupIndex + THREAD_GROUP_SIZE, count); GroupMemoryBarrierWithGroupSync(); - float keySign = KeySign; UNROLL - for (uint k = 2; k <= 2048; k <<= 1) + for (uint k = 2; k <= THREAD_GROUP_SIZE * 2; k <<= 1) { for (uint j = k / 2; j > 0; j /= 2) { @@ -148,7 +153,7 @@ void CS_PreSort(uint3 groupID : SV_GroupID, uint groupIndex : SV_GroupIndex) Item a = SortData[index1]; Item b = SortData[index2]; - if (ShouldSwap(a, b, keySign)) + if (ShouldSwap(a, b)) { // Swap the items SortData[index1] = b; @@ -161,7 +166,7 @@ void CS_PreSort(uint3 groupID : SV_GroupID, uint groupIndex : SV_GroupIndex) // Write sorted results to memory StoreItem(groupStart + groupIndex, count); - StoreItem(groupStart + groupIndex + 1024, count); + StoreItem(groupStart + groupIndex + THREAD_GROUP_SIZE, count); } #endif @@ -169,23 +174,22 @@ void CS_PreSort(uint3 groupID : SV_GroupID, uint groupIndex : SV_GroupIndex) #ifdef _CS_InnerSort META_CS(true, FEATURE_LEVEL_SM5) -[numthreads(1024, 1, 1)] +[numthreads(THREAD_GROUP_SIZE, 1, 1)] void CS_InnerSort(uint3 groupID : SV_GroupID, uint groupIndex : SV_GroupIndex) { const uint count = CounterBuffer.Load(CounterOffset); // Item index of the start of this group - const uint groupStart = groupID.x * 2048; + const uint groupStart = groupID.x * (THREAD_GROUP_SIZE * 2); // Load from memory into LDS to prepare sort LoadItem(groupStart + groupIndex, count); - LoadItem(groupStart + groupIndex + 1024, count); + LoadItem(groupStart + groupIndex + THREAD_GROUP_SIZE, count); GroupMemoryBarrierWithGroupSync(); - float keySign = KeySign; UNROLL - for (uint j = 1024; j > 0; j /= 2) + for (uint j = THREAD_GROUP_SIZE; j > 0; j /= 2) { uint index2 = InsertOneBit(groupIndex, j); uint index1 = index2 ^ j; @@ -193,7 +197,7 @@ void CS_InnerSort(uint3 groupID : SV_GroupID, uint groupIndex : SV_GroupIndex) Item a = SortData[index1]; Item b = SortData[index2]; - if (ShouldSwap(a, b, keySign)) + if (ShouldSwap(a, b)) { // Swap the items SortData[index1] = b; @@ -204,7 +208,7 @@ void CS_InnerSort(uint3 groupID : SV_GroupID, uint groupIndex : SV_GroupIndex) } StoreItem(groupStart + groupIndex, count); - StoreItem(groupStart + groupIndex + 1024, count); + StoreItem(groupStart + groupIndex + THREAD_GROUP_SIZE, count); } #endif @@ -229,8 +233,7 @@ void CS_OuterSort(uint3 dispatchThreadId : SV_DispatchThreadID) Item a = SortBuffer[index1]; Item b = SortBuffer[index2]; - float keySign = KeySign; - if (ShouldSwap(a, b, keySign)) + if (ShouldSwap(a, b)) { // Swap the items SortBuffer[index1] = b; From 854f3acd4c80ce5c6cb0f92bb661ea62bb1de899 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 8 Aug 2025 18:24:44 +0200 Subject: [PATCH 170/211] Optimize GPU particles Bitonic sort to use separate buffers for indices and keys to avoid additional buffer copy --- Content/Shaders/BitonicSort.flax | 4 +- Content/Shaders/GPUParticlesSorting.flax | 4 +- Source/Engine/Particles/Particles.cpp | 128 +++++++++---------- Source/Engine/Particles/ParticlesData.cpp | 16 +-- Source/Engine/Particles/ParticlesData.h | 2 +- Source/Engine/Renderer/Utils/BitonicSort.cpp | 43 ++----- Source/Engine/Renderer/Utils/BitonicSort.h | 8 +- Source/Shaders/BitonicSort.shader | 63 ++++----- Source/Shaders/GPUParticlesSorting.shader | 16 +-- 9 files changed, 115 insertions(+), 169 deletions(-) diff --git a/Content/Shaders/BitonicSort.flax b/Content/Shaders/BitonicSort.flax index 69d773379..4d388b3fc 100644 --- a/Content/Shaders/BitonicSort.flax +++ b/Content/Shaders/BitonicSort.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:07d45b7f2085a28938e3bef090e259c0698a1987f9cd69df952168524ce07193 -size 6877 +oid sha256:190867e40ef793168988f358edddeb92819cc4f972f4cf9ac34cc764a06eb6e3 +size 6824 diff --git a/Content/Shaders/GPUParticlesSorting.flax b/Content/Shaders/GPUParticlesSorting.flax index 2045fd649..35cebf7b6 100644 --- a/Content/Shaders/GPUParticlesSorting.flax +++ b/Content/Shaders/GPUParticlesSorting.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a16a973f4be075f8531a1b1551e33423b014da1e8b348f2672464ee21692e57a -size 2556 +oid sha256:80ed5f51cd982ea521e3588708db54d79b905ee41e88cfd41eff976b9b50514a +size 2518 diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index 2423feed2..dc2d9054d 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -848,81 +848,68 @@ void DrawEmittersGPU(RenderContextBatch& renderContextBatch) context->BindCB(0, GPUParticlesSortingCB); // Generate sort keys for each particle - for (const GPUEmitterDraw& draw : GPUEmitterDraws) { - if (!draw.Sorting) - continue; - ASSERT(draw.Buffer->GPU.SortingKeysBuffer); - - // Generate sort keys for particles - ParticleEmitter* emitter = draw.Buffer->Emitter; - for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.SortModules.Count(); moduleIndex++) + PROFILE_GPU("Gen Sort Keys"); + for (const GPUEmitterDraw& draw : GPUEmitterDraws) { - auto module = emitter->Graph.SortModules[moduleIndex]; - const auto sortMode = (ParticleSortMode)module->Values[2].AsInt; - - // Generate sorting keys based on sorting mode - GPUParticlesSortingData data; - data.ParticleCounterOffset = draw.Buffer->GPU.ParticleCounterOffset; - data.ParticleStride = draw.Buffer->Stride; - data.ParticleCapacity = draw.Buffer->Capacity; - int32 permutationIndex; - switch (sortMode) + if (!draw.Sorting) + continue; + ASSERT(draw.Buffer->GPU.SortingKeys); + ParticleEmitter* emitter = draw.Buffer->Emitter; + for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.SortModules.Count(); moduleIndex++) { - case ParticleSortMode::ViewDepth: - { - permutationIndex = 0; - data.PositionOffset = emitter->Graph.GetPositionAttributeOffset(); - const Matrix viewProjection = renderContextBatch.GetMainContext().View.ViewProjection(); - if (emitter->SimulationSpace == ParticlesSimulationSpace::Local) + auto module = emitter->Graph.SortModules[moduleIndex]; + // TODO: add support for module->SortedIndicesOffset (multiple sort modules) + const auto sortMode = (ParticleSortMode)module->Values[2].AsInt; + GPUParticlesSortingData data; + data.ParticleCounterOffset = draw.Buffer->GPU.ParticleCounterOffset; + data.ParticleStride = draw.Buffer->Stride; + data.ParticleCapacity = draw.Buffer->Capacity; + int32 permutationIndex; + switch (sortMode) { - Matrix matrix; - Matrix::Multiply(draw.DrawCall.World, viewProjection, matrix); - Matrix::Transpose(matrix, data.PositionTransform); - } - else + case ParticleSortMode::ViewDepth: { - Matrix::Transpose(viewProjection, data.PositionTransform); - } - break; - } - case ParticleSortMode::ViewDistance: - { - permutationIndex = 1; - data.PositionOffset = emitter->Graph.GetPositionAttributeOffset(); - data.ViewPosition = renderContextBatch.GetMainContext().View.Position; - if (emitter->SimulationSpace == ParticlesSimulationSpace::Local) - { - Matrix::Transpose(draw.DrawCall.World, data.PositionTransform); - } - else - { - Matrix::Transpose(Matrix::Identity, data.PositionTransform); - } - break; - } - case ParticleSortMode::CustomAscending: - case ParticleSortMode::CustomDescending: - { - permutationIndex = 2; - int32 attributeIdx = module->Attributes[0]; - if (attributeIdx == -1) + permutationIndex = 0; + data.PositionOffset = emitter->Graph.GetPositionAttributeOffset(); + const Matrix viewProjection = renderContextBatch.GetMainContext().View.ViewProjection(); + if (emitter->SimulationSpace == ParticlesSimulationSpace::Local) + Matrix::Transpose(draw.DrawCall.World * viewProjection, data.PositionTransform); + else + Matrix::Transpose(viewProjection, data.PositionTransform); break; - data.CustomOffset = emitter->Graph.Layout.Attributes[attributeIdx].Offset; - break; + } + case ParticleSortMode::ViewDistance: + { + permutationIndex = 1; + data.PositionOffset = emitter->Graph.GetPositionAttributeOffset(); + data.ViewPosition = renderContextBatch.GetMainContext().View.Position; + if (emitter->SimulationSpace == ParticlesSimulationSpace::Local) + Matrix::Transpose(draw.DrawCall.World, data.PositionTransform); + else + Matrix::Transpose(Matrix::Identity, data.PositionTransform); + break; + } + case ParticleSortMode::CustomAscending: + case ParticleSortMode::CustomDescending: + { + permutationIndex = 2; + int32 attributeIdx = module->Attributes[0]; + if (attributeIdx == -1) + break; + data.CustomOffset = emitter->Graph.Layout.Attributes[attributeIdx].Offset; + break; + } + } + context->UpdateCB(GPUParticlesSortingCB, &data); + context->BindSR(0, draw.Buffer->GPU.Buffer->View()); + context->BindUA(0, draw.Buffer->GPU.SortedIndices->View()); + context->BindUA(1, draw.Buffer->GPU.SortingKeys->View()); + const int32 threadGroupSize = 1024; + context->Dispatch(GPUParticlesSortingCS[permutationIndex], Math::DivideAndRoundUp(draw.Buffer->GPU.ParticlesCountMax, threadGroupSize), 1, 1); } -#if !BUILD_RELEASE - default: - CRASH; - return; -#endif - } - context->UpdateCB(GPUParticlesSortingCB, &data); - context->BindSR(0, draw.Buffer->GPU.Buffer->View()); - context->BindUA(0, draw.Buffer->GPU.SortingKeysBuffer->View()); - const int32 threadGroupSize = 1024; - context->Dispatch(GPUParticlesSortingCS[permutationIndex], Math::DivideAndRoundUp(draw.Buffer->GPU.ParticlesCountMax, threadGroupSize), 1, 1); } + context->ResetUA(); } // Run sorting @@ -930,17 +917,18 @@ void DrawEmittersGPU(RenderContextBatch& renderContextBatch) { if (!draw.Sorting) continue; - ASSERT(draw.Buffer->GPU.SortingKeysBuffer); // Execute all sorting modules ParticleEmitter* emitter = draw.Buffer->Emitter; for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.SortModules.Count(); moduleIndex++) { auto module = emitter->Graph.SortModules[moduleIndex]; + // TODO: add support for module->SortedIndicesOffset (multiple sort modules) const auto sortMode = (ParticleSortMode)module->Values[2].AsInt; bool sortAscending = sortMode == ParticleSortMode::CustomAscending; - BitonicSort::Instance()->Sort(context, draw.Buffer->GPU.SortingKeysBuffer, draw.Buffer->GPU.Buffer, draw.Buffer->GPU.ParticleCounterOffset, sortAscending, draw.Buffer->GPU.SortedIndices, draw.Buffer->GPU.ParticlesCountMax); - // TODO: use args buffer from GPUIndirectArgsBuffer instead of internal from BitonicSort to get rid of UAV barrier (run all sorting in parallel) + BitonicSort::Instance()->Sort(context, draw.Buffer->GPU.SortedIndices, draw.Buffer->GPU.SortingKeys, draw.Buffer->GPU.Buffer, draw.Buffer->GPU.ParticleCounterOffset, sortAscending, draw.Buffer->GPU.ParticlesCountMax); + // TODO: use args buffer from GPUIndirectArgsBuffer instead of internal from BitonicSort to get rid of UAV barrier (all sorting in parallel) + // TODO: run small emitters sorting (less than 2k particles) sorting in separate loop as pass without UAV barriers (all sorting in parallel) } } } diff --git a/Source/Engine/Particles/ParticlesData.cpp b/Source/Engine/Particles/ParticlesData.cpp index 074cc73d6..226d06d3d 100644 --- a/Source/Engine/Particles/ParticlesData.cpp +++ b/Source/Engine/Particles/ParticlesData.cpp @@ -98,7 +98,7 @@ ParticleBuffer::~ParticleBuffer() { SAFE_DELETE_GPU_RESOURCE(GPU.Buffer); SAFE_DELETE_GPU_RESOURCE(GPU.BufferSecondary); - SAFE_DELETE_GPU_RESOURCE(GPU.SortingKeysBuffer); + SAFE_DELETE_GPU_RESOURCE(GPU.SortingKeys); SAFE_DELETE_GPU_RESOURCE(GPU.SortedIndices); SAFE_DELETE(GPU.RibbonIndexBufferDynamic); SAFE_DELETE(GPU.RibbonVertexBufferDynamic); @@ -161,7 +161,7 @@ bool ParticleBuffer::Init(ParticleEmitter* emitter) bool ParticleBuffer::AllocateSortBuffer() { - ASSERT(Emitter && GPU.SortedIndices == nullptr && GPU.SortingKeysBuffer == nullptr); + ASSERT(Emitter && GPU.SortedIndices == nullptr && GPU.SortingKeys == nullptr); if (Emitter->Graph.SortModules.IsEmpty()) return false; @@ -170,7 +170,7 @@ bool ParticleBuffer::AllocateSortBuffer() case ParticlesSimulationMode::CPU: { const int32 sortedIndicesSize = Capacity * sizeof(uint32) * Emitter->Graph.SortModules.Count(); - GPU.SortedIndices = GPUDevice::Instance->CreateBuffer(TEXT("SortedIndices")); + GPU.SortedIndices = GPUDevice::Instance->CreateBuffer(TEXT("ParticleSortedIndices")); if (GPU.SortedIndices->Init(GPUBufferDescription::Buffer(sortedIndicesSize, GPUBufferFlags::ShaderResource, PixelFormat::R32_UInt, nullptr, sizeof(uint32), GPUResourceUsage::Dynamic))) return true; break; @@ -178,12 +178,12 @@ bool ParticleBuffer::AllocateSortBuffer() #if COMPILE_WITH_GPU_PARTICLES case ParticlesSimulationMode::GPU: { - const int32 sortedIndicesSize = Capacity * sizeof(uint32) * Emitter->Graph.SortModules.Count(); - GPU.SortingKeysBuffer = GPUDevice::Instance->CreateBuffer(TEXT("ParticleSortingKeysBuffer")); - if (GPU.SortingKeysBuffer->Init(GPUBufferDescription::Structured(Capacity, sizeof(float) + sizeof(uint32), true))) + const int32 sortedIndicesCount = Capacity * Emitter->Graph.SortModules.Count(); + GPU.SortingKeys = GPUDevice::Instance->CreateBuffer(TEXT("ParticleSortingKeys")); + if (GPU.SortingKeys->Init(GPUBufferDescription::Buffer(sortedIndicesCount * sizeof(float), GPUBufferFlags::UnorderedAccess, PixelFormat::R32_Float, nullptr, sizeof(float)))) return true; - GPU.SortedIndices = GPUDevice::Instance->CreateBuffer(TEXT("SortedIndices")); - if (GPU.SortedIndices->Init(GPUBufferDescription::Buffer(sortedIndicesSize, GPUBufferFlags::ShaderResource | GPUBufferFlags::UnorderedAccess, PixelFormat::R32_UInt, nullptr, sizeof(uint32)))) + GPU.SortedIndices = GPUDevice::Instance->CreateBuffer(TEXT("ParticleSortedIndices")); + if (GPU.SortedIndices->Init(GPUBufferDescription::Buffer(sortedIndicesCount * sizeof(uint32), GPUBufferFlags::ShaderResource | GPUBufferFlags::UnorderedAccess, PixelFormat::R32_UInt, nullptr, sizeof(uint32)))) return true; break; } diff --git a/Source/Engine/Particles/ParticlesData.h b/Source/Engine/Particles/ParticlesData.h index 5a5ebcba4..521138f37 100644 --- a/Source/Engine/Particles/ParticlesData.h +++ b/Source/Engine/Particles/ParticlesData.h @@ -206,7 +206,7 @@ public: /// /// The GPU particles sorting buffer. Contains structure of particle index and the sorting key for every particle. Used to sort particles. /// - GPUBuffer* SortingKeysBuffer = nullptr; + GPUBuffer* SortingKeys = nullptr; /// /// The particles indices buffer (GPU side). diff --git a/Source/Engine/Renderer/Utils/BitonicSort.cpp b/Source/Engine/Renderer/Utils/BitonicSort.cpp index ed7ece05b..cd0f627f5 100644 --- a/Source/Engine/Renderer/Utils/BitonicSort.cpp +++ b/Source/Engine/Renderer/Utils/BitonicSort.cpp @@ -8,7 +8,7 @@ GPU_CB_STRUCT(Data { float NullItemKey; - uint32 NullItemValue; + uint32 NullItemIndex; uint32 CounterOffset; uint32 MaxIterations; uint32 LoopK; @@ -47,7 +47,6 @@ bool BitonicSort::Init() bool BitonicSort::setupResources() { - // Check if shader has not been loaded if (!_shader->IsLoaded()) return true; const auto shader = _shader->GetShader(); @@ -59,14 +58,12 @@ bool BitonicSort::setupResources() _preSortCS.Get(shader, "CS_PreSort"); _innerSortCS = shader->GetCS("CS_InnerSort"); _outerSortCS = shader->GetCS("CS_OuterSort"); - _copyIndicesCS = shader->GetCS("CS_CopyIndices"); return false; } void BitonicSort::Dispose() { - // Base RendererPass::Dispose(); // Cleanup @@ -76,17 +73,16 @@ void BitonicSort::Dispose() _preSortCS.Clear(); _innerSortCS = nullptr; _outerSortCS = nullptr; - _copyIndicesCS = nullptr; _shader = nullptr; } -void BitonicSort::Sort(GPUContext* context, GPUBuffer* sortingKeysBuffer, GPUBuffer* countBuffer, uint32 counterOffset, bool sortAscending, GPUBuffer* sortedIndicesBuffer, uint32 maxElements) +void BitonicSort::Sort(GPUContext* context, GPUBuffer* indicesBuffer, GPUBuffer* keysBuffer, GPUBuffer* countBuffer, uint32 counterOffset, bool sortAscending, int32 maxElements) { - ASSERT(context && sortingKeysBuffer && countBuffer); + ASSERT(context && indicesBuffer && keysBuffer && countBuffer); if (checkIfSkipPass()) return; PROFILE_GPU_CPU("Bitonic Sort"); - uint32 maxNumElements = sortingKeysBuffer->GetSize() / sizeof(uint64); + uint32 maxNumElements = indicesBuffer->GetElementsCount(); if (maxElements > 0 && maxElements < maxNumElements) maxNumElements = maxElements; const uint32 alignedMaxNumElements = Math::RoundUpToPowerOf2(maxNumElements); @@ -96,7 +92,7 @@ void BitonicSort::Sort(GPUContext* context, GPUBuffer* sortingKeysBuffer, GPUBuf Data data; data.CounterOffset = counterOffset; data.NullItemKey = sortAscending ? MAX_float : -MAX_float; - data.NullItemValue = 0; + data.NullItemIndex = 0; data.KeySign = sortAscending ? -1.0f : 1.0f; data.MaxIterations = maxIterations; data.LoopK = 0; @@ -110,7 +106,8 @@ void BitonicSort::Sort(GPUContext* context, GPUBuffer* sortingKeysBuffer, GPUBuf { // Use pre-sort with smaller thread group size (eg. for small particle emitters sorting) const int32 permutation = maxNumElements < 128 ? 1 : 0; - context->BindUA(0, sortingKeysBuffer->View()); + context->BindUA(0, indicesBuffer->View()); + context->BindUA(1, keysBuffer->View()); context->Dispatch(_preSortCS.Get(permutation), 1, 1, 1); } else @@ -120,7 +117,8 @@ void BitonicSort::Sort(GPUContext* context, GPUBuffer* sortingKeysBuffer, GPUBuf context->Dispatch(_indirectArgsCS, 1, 1, 1); // Pre-Sort the buffer up to k = 2048 (this also pads the list with invalid indices that will drift to the end of the sorted list) - context->BindUA(0, sortingKeysBuffer->View()); + context->BindUA(0, indicesBuffer->View()); + context->BindUA(1, keysBuffer->View()); context->DispatchIndirect(_preSortCS.Get(0), _dispatchArgsBuffer, 0); // We have already pre-sorted up through k = 2048 when first writing our list, so we continue sorting with k = 4096 @@ -144,27 +142,4 @@ void BitonicSort::Sort(GPUContext* context, GPUBuffer* sortingKeysBuffer, GPUBuf } context->ResetUA(); - - if (sortedIndicesBuffer) - { - // Copy indices to another buffer -#if !BUILD_RELEASE - switch (sortedIndicesBuffer->GetDescription().Format) - { - case PixelFormat::R32_UInt: - case PixelFormat::R16_UInt: - case PixelFormat::R8_UInt: - break; - default: - LOG(Warning, "Invalid format {0} of sortedIndicesBuffer for BitonicSort. It needs to be UInt type.", (int32)sortedIndicesBuffer->GetDescription().Format); - } -#endif - context->BindSR(1, sortingKeysBuffer->View()); - context->BindUA(0, sortedIndicesBuffer->View()); - // TODO: use indirect dispatch to match the items count for copy - context->Dispatch(_copyIndicesCS, (alignedMaxNumElements + 1023) / 1024, 1, 1); - } - - context->ResetUA(); - context->ResetSR(); } diff --git a/Source/Engine/Renderer/Utils/BitonicSort.h b/Source/Engine/Renderer/Utils/BitonicSort.h index 4280d5965..1fd5d50cd 100644 --- a/Source/Engine/Renderer/Utils/BitonicSort.h +++ b/Source/Engine/Renderer/Utils/BitonicSort.h @@ -26,16 +26,16 @@ private: public: /// - /// Sorts the specified buffer of index-key pairs. + /// Sorts the specified buffers of index-key pairs. /// /// The GPU context. - /// The sorting keys buffer. Used as a structured buffer of type Item (see above). + /// The sorting indices buffer with an index for each item (sequence of: 0, 1, 2, 3...). After sorting represents actual items order based on their keys. Valid for uint value types - used as RWBuffer. + /// The sorting keys buffer with a sort value for each item (must match order of items in indicesBuffer). Valid for float value types - used as RWBuffer. /// The buffer that contains a items counter value. /// The offset into counter buffer to find count for this list. Must be a multiple of 4 bytes. /// True to sort in ascending order (smallest to largest), otherwise false to sort in descending order. - /// The output buffer for sorted values extracted from the sorted sortingKeysBuffer after algorithm run. Valid for uint value types - used as RWBuffer. /// Optional upper limit of elements to sort. Cna be used to optimize indirect dispatches allocation. If non-zero, then it gets calculated based on the input item buffer size. - void Sort(GPUContext* context, GPUBuffer* sortingKeysBuffer, GPUBuffer* countBuffer, uint32 counterOffset, bool sortAscending, GPUBuffer* sortedIndicesBuffer, uint32 maxElements = 0); + void Sort(GPUContext* context, GPUBuffer* indicesBuffer, GPUBuffer* keysBuffer, GPUBuffer* countBuffer, uint32 counterOffset, bool sortAscending, int32 maxElements = 0); public: diff --git a/Source/Shaders/BitonicSort.shader b/Source/Shaders/BitonicSort.shader index 0f9d5e656..6538ff7ff 100644 --- a/Source/Shaders/BitonicSort.shader +++ b/Source/Shaders/BitonicSort.shader @@ -10,12 +10,12 @@ struct Item { float Key; - uint Value; + uint Index; }; META_CB_BEGIN(0, Data) float NullItemKey; -uint NullItemValue; +uint NullItemIndex; uint CounterOffset; uint MaxIterations; uint LoopK; @@ -40,12 +40,12 @@ uint InsertOneBit(uint value, uint oneBitMask) // (effectively a negation) or leave the value alone. When the KeySign is // 1, we are sorting descending, so when A < B, they should swap. For an // ascending sort, -A < -B should swap. -bool ShouldSwap(Item a, Item b) +bool ShouldSwap(float a, float b) { //return (a ^ NullItem) < (b ^ NullItem); - //return (a.Key) < (b.Key); - return (a.Key * KeySign) < (b.Key * KeySign); + //return (a) < (b); + return (a * KeySign) < (b * KeySign); //return asfloat(a) < asfloat(b); //return (asfloat(a) * KeySign) < (asfloat(b) * KeySign); } @@ -93,7 +93,8 @@ void CS_IndirectArgs(uint groupIndex : SV_GroupIndex) #if defined(_CS_PreSort) || defined(_CS_InnerSort) -RWStructuredBuffer SortBuffer : register(u0); +RWBuffer SortedIndices : register(u0); +RWBuffer SortingKeys : register(u1); groupshared Item SortData[THREAD_GROUP_SIZE * 2]; @@ -103,12 +104,13 @@ void LoadItem(uint element, uint count) Item item; if (element < count) { - item = SortBuffer[element]; + item.Key = SortingKeys[element]; + item.Index = SortedIndices[element]; } else { item.Key = NullItemKey; - item.Value = NullItemValue; + item.Index = NullItemIndex; } SortData[element & (THREAD_GROUP_SIZE * 2 - 1)] = item; } @@ -117,7 +119,9 @@ void StoreItem(uint element, uint count) { if (element < count) { - SortBuffer[element] = SortData[element & 2047]; + Item item = SortData[element & ((THREAD_GROUP_SIZE * 2 - 1))]; + SortingKeys[element] = item.Key; + SortedIndices[element] = item.Index; } } @@ -153,7 +157,7 @@ void CS_PreSort(uint3 groupID : SV_GroupID, uint groupIndex : SV_GroupIndex) Item a = SortData[index1]; Item b = SortData[index2]; - if (ShouldSwap(a, b)) + if (ShouldSwap(a.Key, b.Key)) { // Swap the items SortData[index1] = b; @@ -197,7 +201,7 @@ void CS_InnerSort(uint3 groupID : SV_GroupID, uint groupIndex : SV_GroupIndex) Item a = SortData[index1]; Item b = SortData[index2]; - if (ShouldSwap(a, b)) + if (ShouldSwap(a.Key, b.Key)) { // Swap the items SortData[index1] = b; @@ -215,7 +219,8 @@ void CS_InnerSort(uint3 groupID : SV_GroupID, uint groupIndex : SV_GroupIndex) #ifdef _CS_OuterSort -RWStructuredBuffer SortBuffer : register(u0); +RWBuffer SortedIndices : register(u0); +RWBuffer SortingKeys : register(u1); META_CS(true, FEATURE_LEVEL_SM5) [numthreads(1024, 1, 1)] @@ -230,35 +235,19 @@ void CS_OuterSort(uint3 dispatchThreadId : SV_DispatchThreadID) if (index2 >= count) return; - Item a = SortBuffer[index1]; - Item b = SortBuffer[index2]; + float aKey = SortingKeys[index1]; + float bKey = SortingKeys[index2]; - if (ShouldSwap(a, b)) + if (ShouldSwap(aKey, bKey)) { // Swap the items - SortBuffer[index1] = b; - SortBuffer[index2] = a; + SortingKeys[index1] = bKey; + SortingKeys[index2] = aKey; + uint aIndex = SortedIndices[index1]; + uint bIndex = SortedIndices[index2]; + SortedIndices[index1] = bIndex; + SortedIndices[index2] = aIndex; } } #endif - -#ifdef _CS_CopyIndices - -StructuredBuffer SortBuffer : register(t1); -RWBuffer SortedIndices : register(u0); - -META_CS(true, FEATURE_LEVEL_SM5) -[numthreads(1024, 1, 1)] -void CS_CopyIndices(uint3 dispatchThreadId : SV_DispatchThreadID) -{ - const uint count = CounterBuffer.Load(CounterOffset); - uint index = dispatchThreadId.x; - if (index >= count) - return; - - Item element = SortBuffer[index]; - SortedIndices[index] = element.Value; -} - -#endif diff --git a/Source/Shaders/GPUParticlesSorting.shader b/Source/Shaders/GPUParticlesSorting.shader index 395172327..113096421 100644 --- a/Source/Shaders/GPUParticlesSorting.shader +++ b/Source/Shaders/GPUParticlesSorting.shader @@ -20,13 +20,9 @@ META_CB_END // Particles data buffer ByteAddressBuffer ParticlesData : register(t0); -// Output sorting keys buffer (index + key) -struct Item -{ - float Key; - uint Value; -}; -RWStructuredBuffer SortingKeys : register(u0); +// Sorting data (per-particle) +RWBuffer SortedIndices : register(u0); +RWBuffer SortingKeys : register(u1); float GetParticleFloat(uint particleIndex, int offset) { @@ -78,8 +74,6 @@ void CS_Sort(uint3 dispatchThreadId : SV_DispatchThreadID) #endif // Write sorting index-key pair - Item item; - item.Key = sortKey; - item.Value = index; - SortingKeys[index] = item; + SortedIndices[index] = index; + SortingKeys[index] = sortKey; } From 3907bc49579dfa3cb293103b81cd465fdf4bde7b Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 8 Aug 2025 18:46:10 +0200 Subject: [PATCH 171/211] Optimize sorted particle indices format to 16-bit for small emitters --- Source/Engine/Particles/Particles.cpp | 44 +++++++++++++++++------ Source/Engine/Particles/ParticlesData.cpp | 17 ++++++--- 2 files changed, 45 insertions(+), 16 deletions(-) diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index dc2d9054d..0ab1e0792 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -208,26 +208,27 @@ void DrawEmitterCPU(RenderContextBatch& renderContextBatch, ParticleBuffer* buff const auto sortMode = static_cast(module->Values[2].AsInt); const int32 stride = buffer->Stride; const int32 listSize = buffer->CPU.Count; + const int32 indicesByteSize = listSize * buffer->GPU.SortedIndices->GetStride(); Array sortingKeysList[4]; - Array sortingIndicesList[2]; + Array sortingIndicesList[2]; uint32* sortingKeys[2]; - int32* sortingIndices[2]; + void* sortingIndices[2]; if (listSize < 500) { // Use fast stack allocator from RenderList auto& memory = renderContextBatch.GetMainContext().List->Memory; sortingKeys[0] = memory.Allocate(listSize); sortingKeys[1] = memory.Allocate(listSize); - sortingIndices[0] = memory.Allocate(listSize); - sortingIndices[1] = memory.Allocate(listSize); + sortingIndices[0] = memory.Allocate(indicesByteSize, GPU_SHADER_DATA_ALIGNMENT); + sortingIndices[1] = memory.Allocate(indicesByteSize, GPU_SHADER_DATA_ALIGNMENT); } else { // Use shared pooled memory from RendererAllocation sortingKeysList[0].Resize(listSize); sortingKeysList[1].Resize(listSize); - sortingIndicesList[0].Resize(listSize); - sortingIndicesList[1].Resize(listSize); + sortingIndicesList[0].Resize(indicesByteSize); + sortingIndicesList[1].Resize(indicesByteSize); sortingKeys[0] = sortingKeysList[0].Get(); sortingKeys[1] = sortingKeysList[1].Get(); sortingIndices[0] = sortingIndicesList[0].Get(); @@ -314,21 +315,42 @@ void DrawEmitterCPU(RenderContextBatch& renderContextBatch, ParticleBuffer* buff } // Generate sorting indices - int32* sortedIndices = sortingIndices[0]; + void* sortedIndices = sortingIndices[0]; + switch (buffer->GPU.SortedIndices->GetFormat()) { + case PixelFormat::R16_UInt: for (int32 i = 0; i < listSize; i++) - sortedIndices[i] = i; + ((uint16*)sortedIndices)[i] = i; + break; + case PixelFormat::R32_UInt: + for (int32 i = 0; i < listSize; i++) + ((uint32*)sortedIndices)[i] = i; + break; } // Sort keys with indices + switch (buffer->GPU.SortedIndices->GetFormat()) { - Sorting::RadixSort(sortedKeys, sortedIndices, sortingKeys[1], sortingIndices[1], listSize); + case PixelFormat::R16_UInt: + { + uint16* sortedIndicesTyped = (uint16*)sortedIndices; + Sorting::RadixSort(sortedKeys, sortedIndicesTyped, sortingKeys[1], (uint16*)sortingIndices[1], listSize); + sortedIndices = sortedIndicesTyped; + break; + } + case PixelFormat::R32_UInt: + { + uint32* sortedIndicesTyped = (uint32*)sortedIndices; + Sorting::RadixSort(sortedKeys, sortedIndicesTyped, sortingKeys[1], (uint32*)sortingIndices[1], listSize); + sortedIndices = sortedIndicesTyped; + break; + } } // Upload CPU particles indices { RenderContext::GPULocker.Lock(); - context->UpdateBuffer(buffer->GPU.SortedIndices, sortedIndices, listSize * sizeof(uint32), sortedIndicesOffset); + context->UpdateBuffer(buffer->GPU.SortedIndices, sortedIndices, indicesByteSize, sortedIndicesOffset); RenderContext::GPULocker.Unlock(); } } @@ -1312,7 +1334,7 @@ void UpdateGPU(RenderTask* task, GPUContext* context) // Pre-pass with buffers setup { - PROFILE_GPU_CPU("Sim"); + PROFILE_GPU_CPU_NAMED("Sim"); for (GPUSim& sim : sims) { sim.Emitter->GPU.Sim(context, sim.Emitter, sim.Effect, sim.EmitterIndex, sim.Data); diff --git a/Source/Engine/Particles/ParticlesData.cpp b/Source/Engine/Particles/ParticlesData.cpp index 226d06d3d..287a97b0b 100644 --- a/Source/Engine/Particles/ParticlesData.cpp +++ b/Source/Engine/Particles/ParticlesData.cpp @@ -164,26 +164,33 @@ bool ParticleBuffer::AllocateSortBuffer() ASSERT(Emitter && GPU.SortedIndices == nullptr && GPU.SortingKeys == nullptr); if (Emitter->Graph.SortModules.IsEmpty()) return false; + const int32 sortedIndicesCount = Capacity * Emitter->Graph.SortModules.Count(); + uint32 indexSize = sizeof(uint32); + PixelFormat indexFormat = PixelFormat::R32_UInt; + if (Capacity <= MAX_uint16) + { + // 16-bit indices + indexSize = sizeof(uint16); + indexFormat = PixelFormat::R16_UInt; + } switch (Mode) { case ParticlesSimulationMode::CPU: { - const int32 sortedIndicesSize = Capacity * sizeof(uint32) * Emitter->Graph.SortModules.Count(); GPU.SortedIndices = GPUDevice::Instance->CreateBuffer(TEXT("ParticleSortedIndices")); - if (GPU.SortedIndices->Init(GPUBufferDescription::Buffer(sortedIndicesSize, GPUBufferFlags::ShaderResource, PixelFormat::R32_UInt, nullptr, sizeof(uint32), GPUResourceUsage::Dynamic))) + if (GPU.SortedIndices->Init(GPUBufferDescription::Buffer(sortedIndicesCount * indexSize, GPUBufferFlags::ShaderResource, indexFormat, nullptr, indexSize, GPUResourceUsage::Dynamic))) return true; break; } #if COMPILE_WITH_GPU_PARTICLES case ParticlesSimulationMode::GPU: { - const int32 sortedIndicesCount = Capacity * Emitter->Graph.SortModules.Count(); GPU.SortingKeys = GPUDevice::Instance->CreateBuffer(TEXT("ParticleSortingKeys")); - if (GPU.SortingKeys->Init(GPUBufferDescription::Buffer(sortedIndicesCount * sizeof(float), GPUBufferFlags::UnorderedAccess, PixelFormat::R32_Float, nullptr, sizeof(float)))) + if (GPU.SortingKeys->Init(GPUBufferDescription::Buffer(sortedIndicesCount * sizeof(float), GPUBufferFlags::ShaderResource | GPUBufferFlags::UnorderedAccess, PixelFormat::R32_Float, nullptr, sizeof(float)))) return true; GPU.SortedIndices = GPUDevice::Instance->CreateBuffer(TEXT("ParticleSortedIndices")); - if (GPU.SortedIndices->Init(GPUBufferDescription::Buffer(sortedIndicesCount * sizeof(uint32), GPUBufferFlags::ShaderResource | GPUBufferFlags::UnorderedAccess, PixelFormat::R32_UInt, nullptr, sizeof(uint32)))) + if (GPU.SortedIndices->Init(GPUBufferDescription::Buffer(sortedIndicesCount * indexSize, GPUBufferFlags::ShaderResource | GPUBufferFlags::UnorderedAccess, indexFormat, nullptr, indexSize))) return true; break; } From b5a431d2f51ed06f0d6aa7976f628545e8369f7f Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 9 Aug 2025 23:57:43 +0200 Subject: [PATCH 172/211] Add explicit GPU resource transitions, memory and compute barriers --- Source/Engine/Graphics/GPUContext.h | 27 +++++ Source/Engine/Graphics/GPUPass.h | 68 +++++++++++ Source/Engine/Graphics/GPUResourceAccess.h | 29 +++++ .../DirectX/DX11/GPUContextDX11.cpp | 4 +- .../DirectX/DX12/GPUContextDX12.cpp | 59 +++++++++- .../DirectX/DX12/GPUContextDX12.h | 4 +- .../GraphicsDevice/Vulkan/GPUBufferVulkan.cpp | 4 +- .../Vulkan/GPUContextVulkan.cpp | 109 ++++++++++++------ .../GraphicsDevice/Vulkan/GPUContextVulkan.h | 12 +- .../Vulkan/RenderToolsVulkan.cpp | 75 ++++++++++++ .../GraphicsDevice/Vulkan/RenderToolsVulkan.h | 8 ++ Source/Engine/Renderer/Utils/BitonicSort.cpp | 2 +- 12 files changed, 353 insertions(+), 48 deletions(-) create mode 100644 Source/Engine/Graphics/GPUPass.h create mode 100644 Source/Engine/Graphics/GPUResourceAccess.h diff --git a/Source/Engine/Graphics/GPUContext.h b/Source/Engine/Graphics/GPUContext.h index a042e3f83..4f1306567 100644 --- a/Source/Engine/Graphics/GPUContext.h +++ b/Source/Engine/Graphics/GPUContext.h @@ -9,6 +9,11 @@ #include "PixelFormat.h" #include "Config.h" +#if PLATFORM_WIN32 +// Fix nasty Win32 define garbage +#undef MemoryBarrier +#endif + class GPUConstantBuffer; class GPUShaderProgramCS; class GPUBuffer; @@ -21,6 +26,8 @@ class GPUResourceView; class GPUTextureView; class GPUBufferView; class GPUVertexLayout; +struct GPUPass; +enum class GPUResourceAccess; // Gets the GPU texture view. Checks if pointer is not null and texture has one or more mip levels loaded. #define GET_TEXTURE_VIEW_SAFE(t) (t && t->ResidentMipLevels() > 0 ? t->View() : nullptr) @@ -632,4 +639,24 @@ public: /// Forces graphics backend to rebind descriptors after command list was used by external graphics library. /// virtual void ForceRebindDescriptors(); + +protected: + friend GPUPass; + int32 _pass = 0; + +public: + // Performs resource state transition into a specific access (mask). + virtual void Transition(GPUResource* resource, GPUResourceAccess access) + { + } + + // Inserts a global memory barrier on data copies between resources. + virtual void MemoryBarrier() + { + } + + // Begins or ends unordered access resource overlap region that allows running different compute shader dispatches simultaneously. + virtual void OverlapUA(bool end) + { + } }; diff --git a/Source/Engine/Graphics/GPUPass.h b/Source/Engine/Graphics/GPUPass.h new file mode 100644 index 000000000..59f8608e2 --- /dev/null +++ b/Source/Engine/Graphics/GPUPass.h @@ -0,0 +1,68 @@ +// Copyright (c) Wojciech Figat. All rights reserved. + +#pragma once + +#include "GPUContext.h" +#include "Engine/Graphics/GPUResourceAccess.h" + +/// +/// Base for GPU rendering passes that control low-level memory access and GPU resources states with usage to optimize rendering. +/// +struct FLAXENGINE_API GPUPass +{ + NON_COPYABLE(GPUPass); + + GPUContext* Context; + + GPUPass(GPUContext* context) + : Context(context) + { + Context->_pass++; + } + + ~GPUPass() + { + Context->_pass--; + } + + // Performs resource state transition into a specific access (mask). Can be done preemptively in the prologue of the pass to execute more efficient barriers. + void Transition(GPUResource* resource, GPUResourceAccess access) + { + Context->Transition(resource, access); + } +}; + +/// +/// GPU pass that manually controls memory barriers and cache flushes when performing batched copy/upload operations with GPU context. Can be used to optimize GPU buffers usage by running different copy operations simultaneously. +/// +struct FLAXENGINE_API GPUMemoryPass : GPUPass +{ + GPUMemoryPass(GPUContext* context) + : GPUPass(context) + { + } + + ~GPUMemoryPass() + { + Context->MemoryBarrier(); + } +}; + +/// +/// GPU pass that controls memory barriers when performing batched Compute shader dispatches with GPU context. Can be used to optimize GPU utilization by running different dispatches simultaneously (by overlapping work). +/// +struct FLAXENGINE_API GPUComputePass : GPUPass +{ + GPUComputePass(GPUContext* context) + : GPUPass(context) + { + Context->OverlapUA(false); + } + + ~GPUComputePass() + { + Context->OverlapUA(true); + } +}; + +// TODO: add GPUDrawPass for render targets and depth/stencil setup with optimized clear for faster drawing on tiled-GPUs (mobile) diff --git a/Source/Engine/Graphics/GPUResourceAccess.h b/Source/Engine/Graphics/GPUResourceAccess.h new file mode 100644 index 000000000..360da2dbb --- /dev/null +++ b/Source/Engine/Graphics/GPUResourceAccess.h @@ -0,0 +1,29 @@ +// Copyright (c) Wojciech Figat. All rights reserved. + +#pragma once + +#include "Engine/Core/Types/BaseTypes.h" + +// GPU resource access flags. Used to describe how resource can be accessed which allows GPU to optimize data layout and memory access. +enum class GPUResourceAccess +{ + None = 0, + CopyRead = 1 << 0, + CopyWrite = 1 << 1, + CpuRead = 1 << 2, + CpuWrite = 1 << 3, + DepthRead = 1 << 4, + DepthWrite = 1 << 5, + DepthBuffer = DepthRead | DepthWrite, + RenderTarget = 1 << 6, + UnorderedAccess = 1 << 7, + IndirectArgs = 1 << 8, + ShaderReadCompute = 1 << 9, + ShaderReadPixel = 1 << 10, + ShaderReadNonPixel = 1 << 11, + ShaderReadGraphics = ShaderReadPixel | ShaderReadNonPixel, + Last, + All = (Last << 1) - 1, +}; + +DECLARE_ENUM_OPERATORS(GPUResourceAccess); diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp index 62f9afd3a..3d94cdd96 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp @@ -297,7 +297,7 @@ void GPUContextDX11::SetRenderTarget(GPUTextureView* depthBuffer, const Span(rts[i]); + auto rtDX11 = reinterpret_cast(rts.Get()[i]); rtvs[i] = rtDX11 ? rtDX11->RTV() : nullptr; } int32 rtvsSize = sizeof(ID3D11RenderTargetView*) * rts.Length(); @@ -431,7 +431,7 @@ void GPUContextDX11::BindVB(const Span& vertexBuffers, const uint32* bool vbEdited = false; for (int32 i = 0; i < vertexBuffers.Length(); i++) { - const auto vbDX11 = static_cast(vertexBuffers[i]); + const auto vbDX11 = static_cast(vertexBuffers.Get()[i]); const auto vb = vbDX11 ? vbDX11->GetBuffer() : nullptr; vbEdited |= vb != _vbHandles[i]; _vbHandles[i] = vb; diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp index 6d06231ee..88afc5cfb 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp @@ -35,6 +35,7 @@ #include "GPUShaderProgramDX12.h" #include "CommandSignatureDX12.h" #include "Engine/Profiler/RenderStats.h" +#include "Engine/Graphics/GPUResourceAccess.h" #include "Engine/Graphics/Shaders/GPUShader.h" #include "Engine/Threading/Threading.h" @@ -51,6 +52,47 @@ inline bool operator!=(const D3D12_INDEX_BUFFER_VIEW& l, const D3D12_INDEX_BUFFE return l.SizeInBytes != r.SizeInBytes || l.Format != r.Format || l.BufferLocation != r.BufferLocation; } +FORCE_INLINE D3D12_RESOURCE_STATES GetResourceState(GPUResourceAccess access) +{ + switch (access) + { + case GPUResourceAccess::None: + return D3D12_RESOURCE_STATE_COMMON; + case GPUResourceAccess::CopyRead: + return D3D12_RESOURCE_STATE_COPY_SOURCE; + case GPUResourceAccess::CopyWrite: + return D3D12_RESOURCE_STATE_COPY_DEST; + case GPUResourceAccess::CpuRead: + return D3D12_RESOURCE_STATE_GENERIC_READ; + case GPUResourceAccess::CpuWrite: + return D3D12_RESOURCE_STATE_COMMON; + case GPUResourceAccess::DepthRead: + return D3D12_RESOURCE_STATE_DEPTH_READ; + case GPUResourceAccess::DepthWrite: + return D3D12_RESOURCE_STATE_DEPTH_WRITE; + case GPUResourceAccess::DepthBuffer: + return D3D12_RESOURCE_STATE_DEPTH_READ | D3D12_RESOURCE_STATE_DEPTH_WRITE; + case GPUResourceAccess::RenderTarget: + return D3D12_RESOURCE_STATE_RENDER_TARGET; + case GPUResourceAccess::UnorderedAccess: + return D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + case GPUResourceAccess::IndirectArgs: + return D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT; + case GPUResourceAccess::ShaderReadPixel: + //return D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; // TODO: optimize SRV states in flushSRVs to be based on current binding usage slots + case GPUResourceAccess::ShaderReadCompute: + case GPUResourceAccess::ShaderReadNonPixel: + //return D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; // TODO: optimize SRV states in flushSRVs to be based on current binding usage slots + case GPUResourceAccess::ShaderReadGraphics: + return D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; +#if !BUILD_RELEASE + default: + LOG(Error, "Unsupported GPU Resource Access: {}", (uint32)access); +#endif + } + return D3D12_RESOURCE_STATE_COMMON; +} + // Ensure to match the indirect commands arguments layout static_assert(sizeof(GPUDispatchIndirectArgs) == sizeof(D3D12_DISPATCH_ARGUMENTS), "Wrong size of GPUDrawIndirectArgs."); static_assert(OFFSET_OF(GPUDispatchIndirectArgs, ThreadGroupCountX) == OFFSET_OF(D3D12_DISPATCH_ARGUMENTS, ThreadGroupCountX), "Wrong offset for GPUDrawIndirectArgs::ThreadGroupCountX"); @@ -1124,7 +1166,8 @@ void GPUContextDX12::Dispatch(GPUShaderProgramCS* shader, uint32 threadGroupCoun _psDirtyFlag = true; // Insert UAV barrier to ensure proper memory access for multiple sequential dispatches - AddUAVBarrier(); + if (_pass == 0) + AddUAVBarrier(); } void GPUContextDX12::DispatchIndirect(GPUShaderProgramCS* shader, GPUBuffer* bufferForArgs, uint32 offsetForArgs) @@ -1158,7 +1201,8 @@ void GPUContextDX12::DispatchIndirect(GPUShaderProgramCS* shader, GPUBuffer* buf _psDirtyFlag = true; // Insert UAV barrier to ensure proper memory access for multiple sequential dispatches - AddUAVBarrier(); + if (_pass == 0) + AddUAVBarrier(); } void GPUContextDX12::ResolveMultisample(GPUTexture* sourceMultisampleTexture, GPUTexture* destTexture, int32 sourceSubResource, int32 destSubResource, PixelFormat format) @@ -1549,4 +1593,15 @@ void GPUContextDX12::ForceRebindDescriptors() _commandList->SetDescriptorHeaps(ARRAY_COUNT(ppHeaps), ppHeaps); } +void GPUContextDX12::Transition(GPUResource* resource, GPUResourceAccess access) +{ + SetResourceState(dynamic_cast(resource), GetResourceState(access)); +} + +void GPUContextDX12::OverlapUA(bool end) +{ + if (end) + AddUAVBarrier(); +} + #endif diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h index 917b68165..4bd1b54a1 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h @@ -21,7 +21,7 @@ class GPUVertexLayoutDX12; /// /// Size of the resource barriers buffer size (will be flushed on overflow) /// -#define DX12_RB_BUFFER_SIZE 16 +#define DX12_RB_BUFFER_SIZE 64 /// /// GPU Commands Context implementation for DirectX 12 @@ -214,6 +214,8 @@ public: void CopySubresource(GPUResource* dstResource, uint32 dstSubresource, GPUResource* srcResource, uint32 srcSubresource) override; void SetResourceState(GPUResource* resource, uint64 state, int32 subresource) override; void ForceRebindDescriptors() override; + void Transition(GPUResource* resource, GPUResourceAccess access) override; + void OverlapUA(bool end) override; }; #endif diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUBufferVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUBufferVulkan.cpp index e20b2f89e..a1c3d71fb 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUBufferVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUBufferVulkan.cpp @@ -19,7 +19,7 @@ void GPUBufferViewVulkan::Init(GPUDeviceVulkan* device, GPUBufferVulkan* owner, Buffer = buffer; Size = size; - if ((owner->IsShaderResource() && !(owner->GetDescription().Flags & GPUBufferFlags::Structured)) || (usage & VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) == VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) + if ((EnumHasAnyFlags(owner->GetDescription().Flags, GPUBufferFlags::ShaderResource | GPUBufferFlags::UnorderedAccess) && !(owner->GetDescription().Flags & GPUBufferFlags::Structured)) || (usage & VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) == VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) { VkBufferViewCreateInfo viewInfo; RenderToolsVulkan::ZeroStruct(viewInfo, VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO); @@ -103,7 +103,7 @@ bool GPUBufferVulkan::OnInit() bufferInfo.usage |= VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT; if (useUAV || EnumHasAnyFlags(_desc.Flags, GPUBufferFlags::RawBuffer | GPUBufferFlags::Structured)) bufferInfo.usage |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; - if (useUAV && useSRV) + if (useUAV) bufferInfo.usage |= VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; if (EnumHasAnyFlags(_desc.Flags, GPUBufferFlags::Argument)) bufferInfo.usage |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT; diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp index c36d1acee..430ce5b70 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp @@ -78,13 +78,14 @@ const Char* ToString(VkImageLayout layout) void PipelineBarrierVulkan::Execute(const CmdBufferVulkan* cmdBuffer) { ASSERT(cmdBuffer->IsOutsideRenderPass()); - vkCmdPipelineBarrier(cmdBuffer->GetHandle(), SourceStage, DestStage, 0, 0, nullptr, BufferBarriers.Count(), BufferBarriers.Get(), ImageBarriers.Count(), ImageBarriers.Get()); + vkCmdPipelineBarrier(cmdBuffer->GetHandle(), SourceStage, DestStage, 0, MemoryBarriers.Count(), MemoryBarriers.Get(), BufferBarriers.Count(), BufferBarriers.Get(), ImageBarriers.Count(), ImageBarriers.Get()); // Reset SourceStage = 0; DestStage = 0; ImageBarriers.Clear(); BufferBarriers.Clear(); + MemoryBarriers.Clear(); #if VK_ENABLE_BARRIERS_DEBUG ImageBarriersDebug.Clear(); #endif @@ -153,12 +154,7 @@ void GPUContextVulkan::AddImageBarrier(VkImage image, VkImageLayout srcLayout, V #if VK_ENABLE_BARRIERS_BATCHING // Auto-flush on overflow if (_barriers.IsFull()) - { - const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer(); - if (cmdBuffer->IsInsideRenderPass()) - EndRenderPass(); - _barriers.Execute(cmdBuffer); - } + FlushBarriers(); #endif // Insert barrier @@ -190,10 +186,7 @@ void GPUContextVulkan::AddImageBarrier(VkImage image, VkImageLayout srcLayout, V #if !VK_ENABLE_BARRIERS_BATCHING // Auto-flush without batching - const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer(); - if (cmdBuffer->IsInsideRenderPass()) - EndRenderPass(); - _barriers.Execute(cmdBuffer); + FlushBarriers(); #endif } @@ -315,12 +308,7 @@ void GPUContextVulkan::AddBufferBarrier(GPUBufferVulkan* buffer, VkAccessFlags d #if VK_ENABLE_BARRIERS_BATCHING // Auto-flush on overflow if (_barriers.IsFull()) - { - const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer(); - if (cmdBuffer->IsInsideRenderPass()) - EndRenderPass(); - _barriers.Execute(cmdBuffer); - } + FlushBarriers(); #endif // Insert barrier @@ -339,13 +327,38 @@ void GPUContextVulkan::AddBufferBarrier(GPUBufferVulkan* buffer, VkAccessFlags d #if !VK_ENABLE_BARRIERS_BATCHING // Auto-flush without batching - const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer(); - if (cmdBuffer->IsInsideRenderPass()) - EndRenderPass(); - _barriers.Execute(cmdBuffer); + FlushBarriers(); #endif } +void GPUContextVulkan::AddMemoryBarrier() +{ +#if VK_ENABLE_BARRIERS_BATCHING + // Auto-flush on overflow + if (_barriers.IsFull()) + FlushBarriers(); +#endif + + // Insert barrier + VkMemoryBarrier& memoryBarrier = _barriers.MemoryBarriers.AddOne(); + RenderToolsVulkan::ZeroStruct(memoryBarrier, VK_STRUCTURE_TYPE_MEMORY_BARRIER); + memoryBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + memoryBarrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT; + _barriers.SourceStage |= VK_PIPELINE_STAGE_TRANSFER_BIT; + _barriers.DestStage |= VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + +#if !VK_ENABLE_BARRIERS_BATCHING + // Auto-flush without batching + FlushBarriers(); +#endif +} + +void GPUContextVulkan::AddUABarrier() +{ + _barriers.SourceStage |= VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + _barriers.DestStage |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; +} + void GPUContextVulkan::FlushBarriers() { #if VK_ENABLE_BARRIERS_BATCHING @@ -475,7 +488,7 @@ void GPUContextVulkan::EndRenderPass() cmdBuffer->EndRenderPass(); _renderPass = nullptr; - // Place a barrier between RenderPasses, so that color / depth outputs can be read in subsequent passes + // Place a barrier between RenderPasses, so that color/depth outputs can be read in subsequent passes // TODO: remove it in future and use proper barriers without whole pipeline stalls vkCmdPipelineBarrier(cmdBuffer->GetHandle(), VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, nullptr, 0, nullptr); } @@ -1155,8 +1168,8 @@ void GPUContextVulkan::Dispatch(GPUShaderProgramCS* shader, uint32 threadGroupCo RENDER_STAT_DISPATCH_CALL(); // Place a barrier between dispatches, so that UAVs can be read+write in subsequent passes - // TODO: optimize it by moving inputs/outputs into higher-layer so eg. Global SDF can manually optimize it - vkCmdPipelineBarrier(cmdBuffer->GetHandle(), VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, nullptr, 0, nullptr); + if (_pass == 0) + AddUABarrier(); #if VK_ENABLE_BARRIERS_DEBUG LOG(Warning, "Dispatch"); @@ -1191,8 +1204,8 @@ void GPUContextVulkan::DispatchIndirect(GPUShaderProgramCS* shader, GPUBuffer* b RENDER_STAT_DISPATCH_CALL(); // Place a barrier between dispatches, so that UAVs can be read+write in subsequent passes - // TODO: optimize it by moving inputs/outputs into higher-layer so eg. Global SDF can manually optimize it - vkCmdPipelineBarrier(cmdBuffer->GetHandle(), VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, nullptr, 0, nullptr); + if (_pass == 0) + AddUABarrier(); #if VK_ENABLE_BARRIERS_DEBUG LOG(Warning, "DispatchIndirect"); @@ -1351,18 +1364,14 @@ void GPUContextVulkan::UpdateBuffer(GPUBuffer* buffer, const void* data, uint32 const auto bufferVulkan = static_cast(buffer); - // Memory transfer barrier - // TODO: batch pipeline barriers - const VkMemoryBarrier barrierBefore = { VK_STRUCTURE_TYPE_MEMORY_BARRIER, nullptr, VK_ACCESS_MEMORY_WRITE_BIT, VK_ACCESS_MEMORY_READ_BIT }; - vkCmdPipelineBarrier(cmdBuffer->GetHandle(), VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 1, &barrierBefore, 0, nullptr, 0, nullptr); + // Transition resource + AddBufferBarrier(bufferVulkan, VK_ACCESS_TRANSFER_WRITE_BIT); + FlushBarriers(); // Use direct update for small buffers const uint32 alignedSize = Math::AlignUp(size, 4); if (size <= 4 * 1024 && alignedSize <= buffer->GetSize()) { - //AddBufferBarrier(bufferVulkan, VK_ACCESS_TRANSFER_WRITE_BIT); - //FlushBarriers(); - vkCmdUpdateBuffer(cmdBuffer->GetHandle(), bufferVulkan->GetHandle(), offset, alignedSize, data); } else @@ -1379,10 +1388,9 @@ void GPUContextVulkan::UpdateBuffer(GPUBuffer* buffer, const void* data, uint32 _device->StagingManager.ReleaseBuffer(cmdBuffer, staging); } - // Memory transfer barrier - // TODO: batch pipeline barriers - const VkMemoryBarrier barrierAfter = { VK_STRUCTURE_TYPE_MEMORY_BARRIER, nullptr, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT }; - vkCmdPipelineBarrier(cmdBuffer->GetHandle(), VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 1, &barrierAfter, 0, nullptr, 0, nullptr); + // Memory transfer barrier to ensure buffer is ready to read (eg. by Draw or Dispatch) + if (_pass == 0) + AddMemoryBarrier(); } void GPUContextVulkan::CopyBuffer(GPUBuffer* dstBuffer, GPUBuffer* srcBuffer, uint32 size, uint32 dstOffset, uint32 srcOffset) @@ -1407,6 +1415,10 @@ void GPUContextVulkan::CopyBuffer(GPUBuffer* dstBuffer, GPUBuffer* srcBuffer, ui bufferCopy.dstOffset = dstOffset; bufferCopy.size = size; vkCmdCopyBuffer(cmdBuffer->GetHandle(), srcBufferVulkan->GetHandle(), dstBufferVulkan->GetHandle(), 1, &bufferCopy); + + // Memory transfer barrier to ensure buffer is ready to read (eg. by Draw or Dispatch) + if (_pass == 0) + AddMemoryBarrier(); } void GPUContextVulkan::UpdateTexture(GPUTexture* texture, int32 arrayIndex, int32 mipIndex, const void* data, uint32 rowPitch, uint32 slicePitch) @@ -1816,4 +1828,27 @@ void GPUContextVulkan::CopySubresource(GPUResource* dstResource, uint32 dstSubre } } +void GPUContextVulkan::Transition(GPUResource* resource, GPUResourceAccess access) +{ + if (auto buffer = dynamic_cast(resource)) + { + AddBufferBarrier(buffer, RenderToolsVulkan::GetAccess(access)); + } + else if (auto texture = dynamic_cast(resource)) + { + AddImageBarrier(texture, RenderToolsVulkan::GetImageLayout(access)); + } +} + +void GPUContextVulkan::MemoryBarrier() +{ + AddMemoryBarrier(); +} + +void GPUContextVulkan::OverlapUA(bool end) +{ + if (end) + AddUABarrier(); +} + #endif diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h index 73aa5a52f..d3dd1c528 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h @@ -34,7 +34,7 @@ class DescriptorSetLayoutVulkan; /// /// Size of the pipeline barriers buffer size (will be auto-flushed on overflow). /// -#define VK_BARRIER_BUFFER_SIZE 16 +#define VK_BARRIER_BUFFER_SIZE 64 /// /// The Vulkan pipeline resources layout barrier batching structure. @@ -45,18 +45,19 @@ struct PipelineBarrierVulkan VkPipelineStageFlags DestStage = 0; Array> ImageBarriers; Array> BufferBarriers; + Array> MemoryBarriers; #if VK_ENABLE_BARRIERS_DEBUG Array> ImageBarriersDebug; #endif FORCE_INLINE bool IsFull() const { - return ImageBarriers.Count() == VK_BARRIER_BUFFER_SIZE || BufferBarriers.Count() == VK_BARRIER_BUFFER_SIZE; + return ImageBarriers.Count() == VK_BARRIER_BUFFER_SIZE || BufferBarriers.Count() == VK_BARRIER_BUFFER_SIZE || MemoryBarriers.Count() == 4; } FORCE_INLINE bool HasBarrier() const { - return ImageBarriers.Count() + BufferBarriers.Count() != 0; + return ImageBarriers.Count() + BufferBarriers.Count() + MemoryBarriers.Count() != 0; } void Execute(const CmdBufferVulkan* cmdBuffer); @@ -130,6 +131,8 @@ public: void AddImageBarrier(GPUTextureVulkan* texture, int32 mipSlice, int32 arraySlice, VkImageLayout dstLayout); void AddImageBarrier(GPUTextureVulkan* texture, VkImageLayout dstLayout); void AddBufferBarrier(GPUBufferVulkan* buffer, VkAccessFlags dstAccess); + void AddMemoryBarrier(); + void AddUABarrier(); void FlushBarriers(); @@ -199,6 +202,9 @@ public: void CopyCounter(GPUBuffer* dstBuffer, uint32 dstOffset, GPUBuffer* srcBuffer) override; void CopyResource(GPUResource* dstResource, GPUResource* srcResource) override; void CopySubresource(GPUResource* dstResource, uint32 dstSubresource, GPUResource* srcResource, uint32 srcSubresource) override; + void Transition(GPUResource* resource, GPUResourceAccess access) override; + void MemoryBarrier() override; + void OverlapUA(bool end) override; }; #endif diff --git a/Source/Engine/GraphicsDevice/Vulkan/RenderToolsVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/RenderToolsVulkan.cpp index 604b8a612..4a8d138ed 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/RenderToolsVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/RenderToolsVulkan.cpp @@ -5,6 +5,7 @@ #include "RenderToolsVulkan.h" #include "Engine/Core/Types/StringBuilder.h" #include "Engine/Core/Log.h" +#include "Engine/Graphics/GPUResourceAccess.h" // @formatter:off @@ -258,6 +259,80 @@ void RenderToolsVulkan::LogVkResult(VkResult result, const char* file, uint32 li #endif } +VkAccessFlags RenderToolsVulkan::GetAccess(GPUResourceAccess access) +{ + switch (access) + { + case GPUResourceAccess::None: + return VK_ACCESS_NONE; + case GPUResourceAccess::CopyRead: + return VK_ACCESS_TRANSFER_READ_BIT; + case GPUResourceAccess::CopyWrite: + return VK_ACCESS_TRANSFER_WRITE_BIT; + case GPUResourceAccess::CpuRead: + return VK_ACCESS_HOST_READ_BIT; + case GPUResourceAccess::CpuWrite: + return VK_ACCESS_HOST_WRITE_BIT; + case GPUResourceAccess::DepthRead: + return VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT; + case GPUResourceAccess::DepthWrite: + return VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + case GPUResourceAccess::DepthBuffer: + return VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + case GPUResourceAccess::RenderTarget: + return VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + case GPUResourceAccess::UnorderedAccess: + return VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + case GPUResourceAccess::IndirectArgs: + return VK_ACCESS_INDIRECT_COMMAND_READ_BIT; + case GPUResourceAccess::ShaderReadCompute: + case GPUResourceAccess::ShaderReadPixel: + case GPUResourceAccess::ShaderReadNonPixel: + case GPUResourceAccess::ShaderReadGraphics: + return VK_ACCESS_SHADER_READ_BIT; +#if !BUILD_RELEASE + default: + LOG(Error, "Unsupported GPU Resource Access: {}", (uint32)access); +#endif + } + return VK_ACCESS_NONE; +} + +VkImageLayout RenderToolsVulkan::GetImageLayout(GPUResourceAccess access) +{ + switch (access) + { + case GPUResourceAccess::None: + return VK_IMAGE_LAYOUT_UNDEFINED; + case GPUResourceAccess::CopyRead: + return VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + case GPUResourceAccess::CopyWrite: + return VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + case GPUResourceAccess::CpuRead: + case GPUResourceAccess::CpuWrite: + return VK_IMAGE_LAYOUT_GENERAL; + case GPUResourceAccess::DepthRead: + return VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL; + case GPUResourceAccess::DepthWrite: + case GPUResourceAccess::DepthBuffer: + return VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL; + return VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL; + case GPUResourceAccess::RenderTarget: + return VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL; + case GPUResourceAccess::UnorderedAccess: + case GPUResourceAccess::ShaderReadCompute: + case GPUResourceAccess::ShaderReadPixel: + case GPUResourceAccess::ShaderReadNonPixel: + case GPUResourceAccess::ShaderReadGraphics: + return VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; +#if !BUILD_RELEASE + default: + LOG(Error, "Unsupported GPU Resource Access: {}", (uint32)access); +#endif + } + return VK_IMAGE_LAYOUT_UNDEFINED; +} + bool RenderToolsVulkan::HasExtension(const Array& extensions, const char* name) { for (int32 i = 0; i < extensions.Count(); i++) diff --git a/Source/Engine/GraphicsDevice/Vulkan/RenderToolsVulkan.h b/Source/Engine/GraphicsDevice/Vulkan/RenderToolsVulkan.h index d2d1bca79..82167fd6c 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/RenderToolsVulkan.h +++ b/Source/Engine/GraphicsDevice/Vulkan/RenderToolsVulkan.h @@ -20,6 +20,8 @@ #define VK_SET_DEBUG_NAME(device, handle, type, name) #endif +enum class GPUResourceAccess; + /// /// Set of utilities for rendering on Vulkan platform. /// @@ -40,6 +42,9 @@ public: static String GetVkErrorString(VkResult result); static void LogVkResult(VkResult result, const char* file = nullptr, uint32 line = 0, bool fatal = false); + static VkAccessFlags GetAccess(GPUResourceAccess access); + static VkImageLayout GetImageLayout(GPUResourceAccess access); + static inline VkPipelineStageFlags GetBufferBarrierFlags(VkAccessFlags accessFlags) { VkPipelineStageFlags stageFlags = (VkPipelineStageFlags)0; @@ -67,6 +72,9 @@ public: case VK_ACCESS_SHADER_WRITE_BIT: stageFlags = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; break; + case VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT: + stageFlags = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + break; case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT: case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT: stageFlags = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; diff --git a/Source/Engine/Renderer/Utils/BitonicSort.cpp b/Source/Engine/Renderer/Utils/BitonicSort.cpp index cd0f627f5..a031b0e9d 100644 --- a/Source/Engine/Renderer/Utils/BitonicSort.cpp +++ b/Source/Engine/Renderer/Utils/BitonicSort.cpp @@ -82,7 +82,7 @@ void BitonicSort::Sort(GPUContext* context, GPUBuffer* indicesBuffer, GPUBuffer* if (checkIfSkipPass()) return; PROFILE_GPU_CPU("Bitonic Sort"); - uint32 maxNumElements = indicesBuffer->GetElementsCount(); + int32 maxNumElements = (int32)indicesBuffer->GetElementsCount(); if (maxElements > 0 && maxElements < maxNumElements) maxNumElements = maxElements; const uint32 alignedMaxNumElements = Math::RoundUpToPowerOf2(maxNumElements); From 1915e1e7f4d2795876ad174b152aa3f62d6fe2b1 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 9 Aug 2025 23:58:15 +0200 Subject: [PATCH 173/211] Optimize compute shader pipeline binding on D3D12 and Vulkan when unchanged --- .../DirectX/DX12/GPUContextDX12.cpp | 20 ++++++++++++------- .../Vulkan/GPUContextVulkan.cpp | 18 +++++++++++++---- .../GraphicsDevice/Vulkan/GPUContextVulkan.h | 2 ++ 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp index 88afc5cfb..c132c7343 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp @@ -384,7 +384,7 @@ void GPUContextDX12::flushSRVs() ASSERT(srCount <= GPU_MAX_SR_BINDED); // Fill table with source descriptors - DxShaderHeader& header = _currentCompute ? ((GPUShaderProgramCSDX12*)_currentCompute)->Header : _currentState->Header; + DxShaderHeader& header = _isCompute ? ((GPUShaderProgramCSDX12*)_currentCompute)->Header : _currentState->Header; D3D12_CPU_DESCRIPTOR_HANDLE srcDescriptorRangeStarts[GPU_MAX_SR_BINDED]; for (uint32 i = 0; i < srCount; i++) { @@ -1141,6 +1141,7 @@ void GPUContextDX12::UpdateCB(GPUConstantBuffer* cb, const void* data) void GPUContextDX12::Dispatch(GPUShaderProgramCS* shader, uint32 threadGroupCountX, uint32 threadGroupCountY, uint32 threadGroupCountZ) { + bool bindPipelineState = _currentCompute != shader; _isCompute = 1; _currentCompute = shader; @@ -1153,14 +1154,16 @@ void GPUContextDX12::Dispatch(GPUShaderProgramCS* shader, uint32 threadGroupCoun auto shaderDX12 = (GPUShaderProgramCSDX12*)shader; auto computeState = shaderDX12->GetOrCreateState(); - _commandList->SetPipelineState(computeState); - RENDER_STAT_PS_STATE_CHANGE(); + if (bindPipelineState) + { + _commandList->SetPipelineState(computeState); + RENDER_STAT_PS_STATE_CHANGE(); + } _commandList->Dispatch(threadGroupCountX, threadGroupCountY, threadGroupCountZ); RENDER_STAT_DISPATCH_CALL(); _isCompute = 0; - _currentCompute = nullptr; // Restore previous state on next draw call _psDirtyFlag = true; @@ -1172,6 +1175,7 @@ void GPUContextDX12::Dispatch(GPUShaderProgramCS* shader, uint32 threadGroupCoun void GPUContextDX12::DispatchIndirect(GPUShaderProgramCS* shader, GPUBuffer* bufferForArgs, uint32 offsetForArgs) { + bool bindPipelineState = _currentCompute != shader; _isCompute = 1; _currentCompute = shader; @@ -1187,15 +1191,17 @@ void GPUContextDX12::DispatchIndirect(GPUShaderProgramCS* shader, GPUBuffer* buf auto shaderDX12 = (GPUShaderProgramCSDX12*)shader; auto computeState = shaderDX12->GetOrCreateState(); - _commandList->SetPipelineState(computeState); - RENDER_STAT_PS_STATE_CHANGE(); + if (bindPipelineState) + { + _commandList->SetPipelineState(computeState); + RENDER_STAT_PS_STATE_CHANGE(); + } auto signature = _device->DispatchIndirectCommandSignature->GetSignature(); _commandList->ExecuteIndirect(signature, 1, bufferForArgsDX12->GetResource(), (UINT64)offsetForArgs, nullptr, 0); RENDER_STAT_DISPATCH_CALL(); _isCompute = 0; - _currentCompute = nullptr; // Restore previous state on next draw call _psDirtyFlag = true; diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp index 430ce5b70..34ad63a66 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp @@ -746,6 +746,7 @@ void GPUContextVulkan::FrameBegin() _stencilRef = 0; _renderPass = nullptr; _currentState = nullptr; + _currentCompute = nullptr; _vertexLayout = nullptr; _rtDepth = nullptr; Platform::MemoryClear(_rtHandles, sizeof(_rtHandles)); @@ -1157,8 +1158,12 @@ void GPUContextVulkan::Dispatch(GPUShaderProgramCS* shader, uint32 threadGroupCo FlushBarriers(); // Bind pipeline - vkCmdBindPipeline(cmdBuffer->GetHandle(), VK_PIPELINE_BIND_POINT_COMPUTE, pipelineState->GetHandle()); - RENDER_STAT_PS_STATE_CHANGE(); + if (_currentCompute != shaderVulkan) + { + _currentCompute = shaderVulkan; + vkCmdBindPipeline(cmdBuffer->GetHandle(), VK_PIPELINE_BIND_POINT_COMPUTE, pipelineState->GetHandle()); + RENDER_STAT_PS_STATE_CHANGE(); + } // Bind descriptors sets to the compute pipeline pipelineState->Bind(cmdBuffer); @@ -1193,8 +1198,12 @@ void GPUContextVulkan::DispatchIndirect(GPUShaderProgramCS* shader, GPUBuffer* b FlushBarriers(); // Bind pipeline - vkCmdBindPipeline(cmdBuffer->GetHandle(), VK_PIPELINE_BIND_POINT_COMPUTE, pipelineState->GetHandle()); - RENDER_STAT_PS_STATE_CHANGE(); + if (_currentCompute != shaderVulkan) + { + _currentCompute = shaderVulkan; + vkCmdBindPipeline(cmdBuffer->GetHandle(), VK_PIPELINE_BIND_POINT_COMPUTE, pipelineState->GetHandle()); + RENDER_STAT_PS_STATE_CHANGE(); + } // Bind descriptors sets to the compute pipeline pipelineState->Bind(cmdBuffer); @@ -1346,6 +1355,7 @@ void GPUContextVulkan::Flush() // Flush remaining and buffered commands FlushState(); _currentState = nullptr; + _currentCompute = nullptr; // Execute commands _cmdBufferManager->SubmitActiveCmdBuffer(); diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h index d3dd1c528..bba79c498 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h @@ -16,6 +16,7 @@ class GPUTextureViewVulkan; class GPUBufferVulkan; class GPUVertexLayoutVulkan; class GPUPipelineStateVulkan; +class GPUShaderProgramCSVulkan; class ComputePipelineStateVulkan; class GPUConstantBufferVulkan; class DescriptorPoolVulkan; @@ -84,6 +85,7 @@ private: RenderPassVulkan* _renderPass; GPUPipelineStateVulkan* _currentState; + GPUShaderProgramCSVulkan* _currentCompute; GPUVertexLayoutVulkan* _vertexLayout; GPUTextureViewVulkan* _rtDepth; GPUTextureViewVulkan* _rtHandles[GPU_MAX_RT_BINDED]; From 82231981dc5aa0b6bb8a97db95da29b5c0cdaf38 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 9 Aug 2025 23:58:47 +0200 Subject: [PATCH 174/211] Add debug tool detection for Nsight Graphics --- Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp index 20a26f2f5..e176d199c 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp @@ -335,10 +335,10 @@ bool GPUDeviceDX12::Init() IsDebugToolAttached = true; unknown->Release(); } - if (!IsDebugToolAttached && GetModuleHandleA("renderdoc.dll") != nullptr) - { + if (!IsDebugToolAttached && GetModuleHandleA("renderdoc.dll")) + IsDebugToolAttached = true; + if (!IsDebugToolAttached && (GetModuleHandleA("Nvda.Graphics.Interception.dll") || GetModuleHandleA("WarpViz.Injection.dll") || GetModuleHandleA("nvperf_grfx_target.dll"))) IsDebugToolAttached = true; - } #endif // Check if can use screen tearing on a swapchain From ff3d78548362cbeac57de955a5c26a3a0c70bb62 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 9 Aug 2025 23:59:52 +0200 Subject: [PATCH 175/211] Optimize GPU particles simulation, sorting and drawing with better resource transition barriers --- Source/Engine/Particles/Particles.cpp | 72 +++++++++++++++++++++++++-- 1 file changed, 68 insertions(+), 4 deletions(-) diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index 0ab1e0792..98fc230c6 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -12,6 +12,7 @@ #include "Engine/Graphics/RenderTask.h" #include "Engine/Graphics/DynamicBuffer.h" #include "Engine/Graphics/GPUContext.h" +#include "Engine/Graphics/GPUPass.h" #include "Engine/Graphics/RenderTools.h" #include "Engine/Graphics/Shaders/GPUVertexLayout.h" #include "Engine/Profiler/ProfilerCPU.h" @@ -748,6 +749,11 @@ void DrawEmittersGPU(RenderContextBatch& renderContextBatch) { PROFILE_GPU_CPU_NAMED("Init Indirect Args"); + GPUMemoryPass pass(context); + pass.Transition(GPUIndirectArgsBuffer, GPUResourceAccess::CopyWrite); + for (GPUEmitterDraw& draw : GPUEmitterDraws) + pass.Transition(draw.Buffer->GPU.Buffer, GPUResourceAccess::CopyRead); + // Init default arguments byte* indirectArgsMemory = (byte*)renderContextBatch.GetMainContext().List->Memory.Allocate(indirectArgsSize, GPU_SHADER_DATA_ALIGNMENT); for (GPUEmitterDraw& draw : GPUEmitterDraws) @@ -872,6 +878,18 @@ void DrawEmittersGPU(RenderContextBatch& renderContextBatch) // Generate sort keys for each particle { PROFILE_GPU("Gen Sort Keys"); + + GPUComputePass pass(context); + for (const GPUEmitterDraw& draw : GPUEmitterDraws) + { + if (draw.Sorting) + { + pass.Transition(draw.Buffer->GPU.Buffer, GPUResourceAccess::ShaderReadCompute); + pass.Transition(draw.Buffer->GPU.SortedIndices, GPUResourceAccess::UnorderedAccess); + pass.Transition(draw.Buffer->GPU.SortingKeys, GPUResourceAccess::UnorderedAccess); + } + } + for (const GPUEmitterDraw& draw : GPUEmitterDraws) { if (!draw.Sorting) @@ -935,12 +953,29 @@ void DrawEmittersGPU(RenderContextBatch& renderContextBatch) } // Run sorting + constexpr int32 inplaceSortSizeLimit = 2048; + { + // Small emitters can be sorted in-place with a single independent dispatch (simultaneously) + GPUComputePass pass(context); + for (const GPUEmitterDraw& draw : GPUEmitterDraws) + { + if (!draw.Sorting || draw.Buffer->GPU.ParticlesCountMax > inplaceSortSizeLimit) + continue; + ParticleEmitter* emitter = draw.Buffer->Emitter; + for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.SortModules.Count(); moduleIndex++) + { + auto module = emitter->Graph.SortModules[moduleIndex]; + // TODO: add support for module->SortedIndicesOffset (multiple sort modules) + const auto sortMode = (ParticleSortMode)module->Values[2].AsInt; + bool sortAscending = sortMode == ParticleSortMode::CustomAscending; + BitonicSort::Instance()->Sort(context, draw.Buffer->GPU.SortedIndices, draw.Buffer->GPU.SortingKeys, draw.Buffer->GPU.Buffer, draw.Buffer->GPU.ParticleCounterOffset, sortAscending, draw.Buffer->GPU.ParticlesCountMax); + } + } + } for (const GPUEmitterDraw& draw : GPUEmitterDraws) { - if (!draw.Sorting) + if (!draw.Sorting || draw.Buffer->GPU.ParticlesCountMax <= inplaceSortSizeLimit) continue; - - // Execute all sorting modules ParticleEmitter* emitter = draw.Buffer->Emitter; for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.SortModules.Count(); moduleIndex++) { @@ -950,11 +985,12 @@ void DrawEmittersGPU(RenderContextBatch& renderContextBatch) bool sortAscending = sortMode == ParticleSortMode::CustomAscending; BitonicSort::Instance()->Sort(context, draw.Buffer->GPU.SortedIndices, draw.Buffer->GPU.SortingKeys, draw.Buffer->GPU.Buffer, draw.Buffer->GPU.ParticleCounterOffset, sortAscending, draw.Buffer->GPU.ParticlesCountMax); // TODO: use args buffer from GPUIndirectArgsBuffer instead of internal from BitonicSort to get rid of UAV barrier (all sorting in parallel) - // TODO: run small emitters sorting (less than 2k particles) sorting in separate loop as pass without UAV barriers (all sorting in parallel) } } } + // TODO: transition here SortedIndices into ShaderReadNonPixel and Buffer into ShaderReadGraphics to reduce barriers during particles rendering + // Submit draw calls for (GPUEmitterDraw& draw : GPUEmitterDraws) { @@ -1326,6 +1362,15 @@ void UpdateGPU(RenderTask* task, GPUContext* context) // Pre-pass with buffers setup { PROFILE_CPU_NAMED("PreSim"); + + GPUMemoryPass pass(context); + for (GPUSim& sim : sims) + { + if (sim.Data.Buffer->GPU.PendingClear) + pass.Transition(sim.Data.Buffer->GPU.Buffer, GPUResourceAccess::CopyWrite); + pass.Transition(sim.Data.Buffer->GPU.BufferSecondary, GPUResourceAccess::CopyWrite); + } + for (GPUSim& sim : sims) { sim.Emitter->GPU.PreSim(context, sim.Emitter, sim.Effect, sim.EmitterIndex, sim.Data); @@ -1335,6 +1380,14 @@ void UpdateGPU(RenderTask* task, GPUContext* context) // Pre-pass with buffers setup { PROFILE_GPU_CPU_NAMED("Sim"); + + GPUComputePass pass(context); + for (GPUSim& sim : sims) + { + pass.Transition(sim.Data.Buffer->GPU.Buffer, GPUResourceAccess::ShaderReadCompute); + pass.Transition(sim.Data.Buffer->GPU.BufferSecondary, GPUResourceAccess::UnorderedAccess); + } + for (GPUSim& sim : sims) { sim.Emitter->GPU.Sim(context, sim.Emitter, sim.Effect, sim.EmitterIndex, sim.Data); @@ -1344,6 +1397,17 @@ void UpdateGPU(RenderTask* task, GPUContext* context) // Post-pass with buffers setup { PROFILE_CPU_NAMED("PostSim"); + + GPUMemoryPass pass(context); + for (GPUSim& sim : sims) + { + if (sim.Data.CustomData.HasItems()) + { + pass.Transition(sim.Data.Buffer->GPU.BufferSecondary, GPUResourceAccess::CopyRead); + pass.Transition(sim.Data.Buffer->GPU.Buffer, GPUResourceAccess::CopyWrite); + } + } + for (GPUSim& sim : sims) { sim.Emitter->GPU.PostSim(context, sim.Emitter, sim.Effect, sim.EmitterIndex, sim.Data); From a2e9d8d77b74b2edddf79ae30a688c27d7e498bf Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 10 Aug 2025 16:02:34 +0200 Subject: [PATCH 176/211] Add `nvapi` third party module R580-Developer SDK --- .../Binaries/ThirdParty/x64/nvapi64.lib | 3 + Source/ThirdParty/nvapi/LICENSE.txt | 22 + Source/ThirdParty/nvapi/nvHLSLExtns.h | 2578 ++ Source/ThirdParty/nvapi/nvHLSLExtnsInternal.h | 955 + Source/ThirdParty/nvapi/nvapi.Build.cs | 50 + Source/ThirdParty/nvapi/nvapi.h | 25533 ++++++++++++++++ Source/ThirdParty/nvapi/nvapi_interface.h | 547 + Source/ThirdParty/nvapi/nvapi_lite_common.h | 672 + Source/ThirdParty/nvapi/nvapi_lite_d3dext.h | 184 + Source/ThirdParty/nvapi/nvapi_lite_salend.h | 809 + Source/ThirdParty/nvapi/nvapi_lite_salstart.h | 813 + Source/ThirdParty/nvapi/nvapi_lite_sli.h | 247 + Source/ThirdParty/nvapi/nvapi_lite_stereo.h | 592 + Source/ThirdParty/nvapi/nvapi_lite_surround.h | 95 + .../Flax.Build/Deps/Dependencies/nvapi.cs | 57 + 15 files changed, 33157 insertions(+) create mode 100644 Source/Platforms/Windows/Binaries/ThirdParty/x64/nvapi64.lib create mode 100644 Source/ThirdParty/nvapi/LICENSE.txt create mode 100644 Source/ThirdParty/nvapi/nvHLSLExtns.h create mode 100644 Source/ThirdParty/nvapi/nvHLSLExtnsInternal.h create mode 100644 Source/ThirdParty/nvapi/nvapi.Build.cs create mode 100644 Source/ThirdParty/nvapi/nvapi.h create mode 100644 Source/ThirdParty/nvapi/nvapi_interface.h create mode 100644 Source/ThirdParty/nvapi/nvapi_lite_common.h create mode 100644 Source/ThirdParty/nvapi/nvapi_lite_d3dext.h create mode 100644 Source/ThirdParty/nvapi/nvapi_lite_salend.h create mode 100644 Source/ThirdParty/nvapi/nvapi_lite_salstart.h create mode 100644 Source/ThirdParty/nvapi/nvapi_lite_sli.h create mode 100644 Source/ThirdParty/nvapi/nvapi_lite_stereo.h create mode 100644 Source/ThirdParty/nvapi/nvapi_lite_surround.h create mode 100644 Source/Tools/Flax.Build/Deps/Dependencies/nvapi.cs diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/nvapi64.lib b/Source/Platforms/Windows/Binaries/ThirdParty/x64/nvapi64.lib new file mode 100644 index 000000000..f6a371ea2 --- /dev/null +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/nvapi64.lib @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66e018124c39b9319474d0f5087315d4a1c0b8c2ce2b8d23ae94765e73920c6e +size 980014 diff --git a/Source/ThirdParty/nvapi/LICENSE.txt b/Source/ThirdParty/nvapi/LICENSE.txt new file mode 100644 index 000000000..1523a830b --- /dev/null +++ b/Source/ThirdParty/nvapi/LICENSE.txt @@ -0,0 +1,22 @@ +nvapi.lib and nvapi64.lib are licensed under the following terms: + +SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +SPDX-License-Identifier: MIT + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/Source/ThirdParty/nvapi/nvHLSLExtns.h b/Source/ThirdParty/nvapi/nvHLSLExtns.h new file mode 100644 index 000000000..a8866f9a8 --- /dev/null +++ b/Source/ThirdParty/nvapi/nvHLSLExtns.h @@ -0,0 +1,2578 @@ +/*********************************************************************************************************\ +|* *| +|* SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. *| +|* SPDX-License-Identifier: MIT *| +|* *| +|* Permission is hereby granted, free of charge, to any person obtaining a *| +|* copy of this software and associated documentation files (the "Software"), *| +|* to deal in the Software without restriction, including without limitation *| +|* the rights to use, copy, modify, merge, publish, distribute, sublicense, *| +|* and/or sell copies of the Software, and to permit persons to whom the *| +|* Software is furnished to do so, subject to the following conditions: *| +|* *| +|* The above copyright notice and this permission notice shall be included in *| +|* all copies or substantial portions of the Software. *| +|* *| +|* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *| +|* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *| +|* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *| +|* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *| +|* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING *| +|* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER *| +|* DEALINGS IN THE SOFTWARE. *| +|* *| +|* *| +\*********************************************************************************************************/ + +////////////////////////// NVIDIA SHADER EXTENSIONS ///////////////// + +// this file is to be #included in the app HLSL shader code to make +// use of nvidia shader extensions + + +#include "nvHLSLExtnsInternal.h" + +//----------------------------------------------------------------------------// +//------------------------- Warp Shuffle Functions ---------------------------// +//----------------------------------------------------------------------------// + +// all functions have variants with width parameter which permits sub-division +// of the warp into segments - for example to exchange data between 4 groups of +// 8 lanes in a SIMD manner. If width is less than warpSize then each subsection +// of the warp behaves as a separate entity with a starting logical lane ID of 0. +// A thread may only exchange data with others in its own subsection. Width must +// have a value which is a power of 2 so that the warp can be subdivided equally; +// results are undefined if width is not a power of 2, or is a number greater +// than warpSize. + +// +// simple variant of SHFL instruction +// returns val from the specified lane +// optional width parameter must be a power of two and width <= 32 +// +int NvShfl(int val, uint srcLane, int width = NV_WARP_SIZE) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = val; // variable to be shuffled + g_NvidiaExt[index].src0u.y = srcLane; // source lane + g_NvidiaExt[index].src0u.z = __NvGetShflMaskFromWidth(width); + g_NvidiaExt[index].opcode = NV_EXTN_OP_SHFL; + + // result is returned as the return value of IncrementCounter on fake UAV slot + return g_NvidiaExt.IncrementCounter(); +} + +int2 NvShfl(int2 val, uint srcLane, int width = NV_WARP_SIZE) +{ + int x = NvShfl(val.x, srcLane, width); + int y = NvShfl(val.y, srcLane, width); + return int2(x, y); +} + +int4 NvShfl(int4 val, uint srcLane, int width = NV_WARP_SIZE) +{ + int x = NvShfl(val.x, srcLane, width); + int y = NvShfl(val.y, srcLane, width); + int z = NvShfl(val.z, srcLane, width); + int w = NvShfl(val.w, srcLane, width); + return int4(x, y, z, w); +} + +// +// Copy from a lane with lower ID relative to caller +// +int NvShflUp(int val, uint delta, int width = NV_WARP_SIZE) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = val; // variable to be shuffled + g_NvidiaExt[index].src0u.y = delta; // relative lane offset + g_NvidiaExt[index].src0u.z = (NV_WARP_SIZE - width) << 8; // minIndex = maxIndex for shfl_up (src2[4:0] is expected to be 0) + g_NvidiaExt[index].opcode = NV_EXTN_OP_SHFL_UP; + return g_NvidiaExt.IncrementCounter(); +} + +// +// Copy from a lane with higher ID relative to caller +// +int NvShflDown(int val, uint delta, int width = NV_WARP_SIZE) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = val; // variable to be shuffled + g_NvidiaExt[index].src0u.y = delta; // relative lane offset + g_NvidiaExt[index].src0u.z = __NvGetShflMaskFromWidth(width); + g_NvidiaExt[index].opcode = NV_EXTN_OP_SHFL_DOWN; + return g_NvidiaExt.IncrementCounter(); +} + +// +// Copy from a lane based on bitwise XOR of own lane ID +// +int NvShflXor(int val, uint laneMask, int width = NV_WARP_SIZE) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = val; // variable to be shuffled + g_NvidiaExt[index].src0u.y = laneMask; // laneMask to be XOR'ed with current laneId to get the source lane id + g_NvidiaExt[index].src0u.z = __NvGetShflMaskFromWidth(width); + g_NvidiaExt[index].opcode = NV_EXTN_OP_SHFL_XOR; + return g_NvidiaExt.IncrementCounter(); +} + + +//----------------------------------------------------------------------------// +//----------------------------- Warp Vote Functions---------------------------// +//----------------------------------------------------------------------------// + +// returns 0xFFFFFFFF if the predicate is true for any thread in the warp, returns 0 otherwise +uint NvAny(int predicate) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = predicate; + g_NvidiaExt[index].opcode = NV_EXTN_OP_VOTE_ANY; + return g_NvidiaExt.IncrementCounter(); +} + +// returns 0xFFFFFFFF if the predicate is true for ALL threads in the warp, returns 0 otherwise +uint NvAll(int predicate) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = predicate; + g_NvidiaExt[index].opcode = NV_EXTN_OP_VOTE_ALL; + return g_NvidiaExt.IncrementCounter(); +} + +// returns a mask of all threads in the warp with bits set for threads that have predicate true +uint NvBallot(int predicate) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = predicate; + g_NvidiaExt[index].opcode = NV_EXTN_OP_VOTE_BALLOT; + return g_NvidiaExt.IncrementCounter(); +} + + +//----------------------------------------------------------------------------// +//----------------------------- Utility Functions ----------------------------// +//----------------------------------------------------------------------------// + +// returns the lane index of the current thread (thread index in warp) +int NvGetLaneId() +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_GET_LANE_ID; + return g_NvidiaExt.IncrementCounter(); +} + +// returns value of special register - specify subopcode from any of NV_SPECIALOP_* specified in nvShaderExtnEnums.h - other opcodes undefined behavior +uint NvGetSpecial(uint subOpCode) +{ + return __NvGetSpecial(subOpCode); +} + +//----------------------------------------------------------------------------// +//----------------------------- FP16 Atmoic Functions-------------------------// +//----------------------------------------------------------------------------// + +// The functions below performs atomic operations on two consecutive fp16 +// values in the given raw UAV. +// The uint paramater 'fp16x2Val' is treated as two fp16 values byteAddress must be multiple of 4 +// The returned value are the two fp16 values packed into a single uint + +uint NvInterlockedAddFp16x2(RWByteAddressBuffer uav, uint byteAddress, uint fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, byteAddress, fp16x2Val, NV_EXTN_ATOM_ADD); +} + +uint NvInterlockedMinFp16x2(RWByteAddressBuffer uav, uint byteAddress, uint fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, byteAddress, fp16x2Val, NV_EXTN_ATOM_MIN); +} + +uint NvInterlockedMaxFp16x2(RWByteAddressBuffer uav, uint byteAddress, uint fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, byteAddress, fp16x2Val, NV_EXTN_ATOM_MAX); +} + + +// versions of the above functions taking two fp32 values (internally converted to fp16 values) +uint NvInterlockedAddFp16x2(RWByteAddressBuffer uav, uint byteAddress, float2 val) +{ + return __NvAtomicOpFP16x2(uav, byteAddress, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_ADD); +} + +uint NvInterlockedMinFp16x2(RWByteAddressBuffer uav, uint byteAddress, float2 val) +{ + return __NvAtomicOpFP16x2(uav, byteAddress, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_MIN); +} + +uint NvInterlockedMaxFp16x2(RWByteAddressBuffer uav, uint byteAddress, float2 val) +{ + return __NvAtomicOpFP16x2(uav, byteAddress, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_MAX); +} + + +//----------------------------------------------------------------------------// + +// The functions below perform atomic operation on a R16G16_FLOAT UAV at the given address +// the uint paramater 'fp16x2Val' is treated as two fp16 values +// the returned value are the two fp16 values (.x and .y components) packed into a single uint +// Warning: Behaviour of these set of functions is undefined if the UAV is not +// of R16G16_FLOAT format (might result in app crash or TDR) + +uint NvInterlockedAddFp16x2(RWTexture1D uav, uint address, uint fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_ADD); +} + +uint NvInterlockedMinFp16x2(RWTexture1D uav, uint address, uint fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MIN); +} + +uint NvInterlockedMaxFp16x2(RWTexture1D uav, uint address, uint fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MAX); +} + +uint NvInterlockedAddFp16x2(RWTexture2D uav, uint2 address, uint fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_ADD); +} + +uint NvInterlockedMinFp16x2(RWTexture2D uav, uint2 address, uint fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MIN); +} + +uint NvInterlockedMaxFp16x2(RWTexture2D uav, uint2 address, uint fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MAX); +} + +uint NvInterlockedAddFp16x2(RWTexture3D uav, uint3 address, uint fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_ADD); +} + +uint NvInterlockedMinFp16x2(RWTexture3D uav, uint3 address, uint fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MIN); +} + +uint NvInterlockedMaxFp16x2(RWTexture3D uav, uint3 address, uint fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MAX); +} + + +// versions taking two fp32 values (internally converted to fp16) +uint NvInterlockedAddFp16x2(RWTexture1D uav, uint address, float2 val) +{ + return __NvAtomicOpFP16x2(uav, address, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_ADD); +} + +uint NvInterlockedMinFp16x2(RWTexture1D uav, uint address, float2 val) +{ + return __NvAtomicOpFP16x2(uav, address, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_MIN); +} + +uint NvInterlockedMaxFp16x2(RWTexture1D uav, uint address, float2 val) +{ + return __NvAtomicOpFP16x2(uav, address, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_MAX); +} + +uint NvInterlockedAddFp16x2(RWTexture2D uav, uint2 address, float2 val) +{ + return __NvAtomicOpFP16x2(uav, address, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_ADD); +} + +uint NvInterlockedMinFp16x2(RWTexture2D uav, uint2 address, float2 val) +{ + return __NvAtomicOpFP16x2(uav, address, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_MIN); +} + +uint NvInterlockedMaxFp16x2(RWTexture2D uav, uint2 address, float2 val) +{ + return __NvAtomicOpFP16x2(uav, address, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_MAX); +} + +uint NvInterlockedAddFp16x2(RWTexture3D uav, uint3 address, float2 val) +{ + return __NvAtomicOpFP16x2(uav, address, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_ADD); +} + +uint NvInterlockedMinFp16x2(RWTexture3D uav, uint3 address, float2 val) +{ + return __NvAtomicOpFP16x2(uav, address, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_MIN); +} + +uint NvInterlockedMaxFp16x2(RWTexture3D uav, uint3 address, float2 val) +{ + return __NvAtomicOpFP16x2(uav, address, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_MAX); +} + + +//----------------------------------------------------------------------------// + +// The functions below perform Atomic operation on a R16G16B16A16_FLOAT UAV at the given address +// the uint2 paramater 'fp16x2Val' is treated as four fp16 values +// i.e, fp16x2Val.x = uav.xy and fp16x2Val.y = uav.yz +// The returned value are the four fp16 values (.xyzw components) packed into uint2 +// Warning: Behaviour of these set of functions is undefined if the UAV is not +// of R16G16B16A16_FLOAT format (might result in app crash or TDR) + +uint2 NvInterlockedAddFp16x4(RWTexture1D uav, uint address, uint2 fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_ADD); +} + +uint2 NvInterlockedMinFp16x4(RWTexture1D uav, uint address, uint2 fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MIN); +} + +uint2 NvInterlockedMaxFp16x4(RWTexture1D uav, uint address, uint2 fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MAX); +} + +uint2 NvInterlockedAddFp16x4(RWTexture2D uav, uint2 address, uint2 fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_ADD); +} + +uint2 NvInterlockedMinFp16x4(RWTexture2D uav, uint2 address, uint2 fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MIN); +} + +uint2 NvInterlockedMaxFp16x4(RWTexture2D uav, uint2 address, uint2 fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MAX); +} + +uint2 NvInterlockedAddFp16x4(RWTexture3D uav, uint3 address, uint2 fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_ADD); +} + +uint2 NvInterlockedMinFp16x4(RWTexture3D uav, uint3 address, uint2 fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MIN); +} + +uint2 NvInterlockedMaxFp16x4(RWTexture3D uav, uint3 address, uint2 fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MAX); +} + +// versions taking four fp32 values (internally converted to fp16) +uint2 NvInterlockedAddFp16x4(RWTexture1D uav, uint address, float4 val) +{ + return __NvAtomicOpFP16x2(uav, address, __fp32x4Tofp16x4(val), NV_EXTN_ATOM_ADD); +} + +uint2 NvInterlockedMinFp16x4(RWTexture1D uav, uint address, float4 val) +{ + return __NvAtomicOpFP16x2(uav, address, __fp32x4Tofp16x4(val), NV_EXTN_ATOM_MIN); +} + +uint2 NvInterlockedMaxFp16x4(RWTexture1D uav, uint address, float4 val) +{ + return __NvAtomicOpFP16x2(uav, address, __fp32x4Tofp16x4(val), NV_EXTN_ATOM_MAX); +} + +uint2 NvInterlockedAddFp16x4(RWTexture2D uav, uint2 address, float4 val) +{ + return __NvAtomicOpFP16x2(uav, address, __fp32x4Tofp16x4(val), NV_EXTN_ATOM_ADD); +} + +uint2 NvInterlockedMinFp16x4(RWTexture2D uav, uint2 address, float4 val) +{ + return __NvAtomicOpFP16x2(uav, address, __fp32x4Tofp16x4(val), NV_EXTN_ATOM_MIN); +} + +uint2 NvInterlockedMaxFp16x4(RWTexture2D uav, uint2 address, float4 val) +{ + return __NvAtomicOpFP16x2(uav, address, __fp32x4Tofp16x4(val), NV_EXTN_ATOM_MAX); +} + +uint2 NvInterlockedAddFp16x4(RWTexture3D uav, uint3 address, float4 val) +{ + return __NvAtomicOpFP16x2(uav, address, __fp32x4Tofp16x4(val), NV_EXTN_ATOM_ADD); +} + +uint2 NvInterlockedMinFp16x4(RWTexture3D uav, uint3 address, float4 val) +{ + return __NvAtomicOpFP16x2(uav, address, __fp32x4Tofp16x4(val), NV_EXTN_ATOM_MIN); +} + +uint2 NvInterlockedMaxFp16x4(RWTexture3D uav, uint3 address, float4 val) +{ + return __NvAtomicOpFP16x2(uav, address, __fp32x4Tofp16x4(val), NV_EXTN_ATOM_MAX); +} + + +//----------------------------------------------------------------------------// +//----------------------------- FP32 Atmoic Functions-------------------------// +//----------------------------------------------------------------------------// + +// The functions below performs atomic add on the given UAV treating the value as float +// byteAddress must be multiple of 4 +// The returned value is the value present in memory location before the atomic add + +float NvInterlockedAddFp32(RWByteAddressBuffer uav, uint byteAddress, float val) +{ + return __NvAtomicAddFP32(uav, byteAddress, val); +} + +//----------------------------------------------------------------------------// + +// The functions below perform atomic add on a R32_FLOAT UAV at the given address +// the returned value is the value before performing the atomic add +// Warning: Behaviour of these set of functions is undefined if the UAV is not +// of R32_FLOAT format (might result in app crash or TDR) + +float NvInterlockedAddFp32(RWTexture1D uav, uint address, float val) +{ + return __NvAtomicAddFP32(uav, address, val); +} + +float NvInterlockedAddFp32(RWTexture2D uav, uint2 address, float val) +{ + return __NvAtomicAddFP32(uav, address, val); +} + +float NvInterlockedAddFp32(RWTexture3D uav, uint3 address, float val) +{ + return __NvAtomicAddFP32(uav, address, val); +} + + +//----------------------------------------------------------------------------// +//--------------------------- UINT64 Atmoic Functions-------------------------// +//----------------------------------------------------------------------------// + +// The functions below performs atomic operation on the given UAV treating the value as uint64 +// byteAddress must be multiple of 8 +// The returned value is the value present in memory location before the atomic operation +// uint2 vector type is used to represent a single uint64 value with the x component containing the low 32 bits and y component the high 32 bits. + +uint2 NvInterlockedAddUint64(RWByteAddressBuffer uav, uint byteAddress, uint2 value) +{ + return __NvAtomicOpUINT64(uav, byteAddress, value, NV_EXTN_ATOM_ADD); +} + +uint2 NvInterlockedMaxUint64(RWByteAddressBuffer uav, uint byteAddress, uint2 value) +{ + return __NvAtomicOpUINT64(uav, byteAddress, value, NV_EXTN_ATOM_MAX); +} + +uint2 NvInterlockedMinUint64(RWByteAddressBuffer uav, uint byteAddress, uint2 value) +{ + return __NvAtomicOpUINT64(uav, byteAddress, value, NV_EXTN_ATOM_MIN); +} + +uint2 NvInterlockedAndUint64(RWByteAddressBuffer uav, uint byteAddress, uint2 value) +{ + return __NvAtomicOpUINT64(uav, byteAddress, value, NV_EXTN_ATOM_AND); +} + +uint2 NvInterlockedOrUint64(RWByteAddressBuffer uav, uint byteAddress, uint2 value) +{ + return __NvAtomicOpUINT64(uav, byteAddress, value, NV_EXTN_ATOM_OR); +} + +uint2 NvInterlockedXorUint64(RWByteAddressBuffer uav, uint byteAddress, uint2 value) +{ + return __NvAtomicOpUINT64(uav, byteAddress, value, NV_EXTN_ATOM_XOR); +} + +uint2 NvInterlockedCompareExchangeUint64(RWByteAddressBuffer uav, uint byteAddress, uint2 compare_value, uint2 value) +{ + return __NvAtomicCompareExchangeUINT64(uav, byteAddress, compare_value, value); +} + +uint2 NvInterlockedExchangeUint64(RWByteAddressBuffer uav, uint byteAddress, uint2 value) +{ + return __NvAtomicOpUINT64(uav, byteAddress, value, NV_EXTN_ATOM_SWAP); +} + +//----------------------------------------------------------------------------// + +// The functions below perform atomic operation on a R32G32_UINT UAV at the given address treating the value as uint64 +// the returned value is the value before performing the atomic operation +// uint2 vector type is used to represent a single uint64 value with the x component containing the low 32 bits and y component the high 32 bits. +// Warning: Behaviour of these set of functions is undefined if the UAV is not of R32G32_UINT format (might result in app crash or TDR) + +uint2 NvInterlockedAddUint64(RWTexture1D uav, uint address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_ADD); +} + +uint2 NvInterlockedMaxUint64(RWTexture1D uav, uint address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_MAX); +} + +uint2 NvInterlockedMinUint64(RWTexture1D uav, uint address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_MIN); +} + +uint2 NvInterlockedAndUint64(RWTexture1D uav, uint address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_AND); +} + +uint2 NvInterlockedOrUint64(RWTexture1D uav, uint address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_OR); +} + +uint2 NvInterlockedXorUint64(RWTexture1D uav, uint address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_XOR); +} + +uint2 NvInterlockedCompareExchangeUint64(RWTexture1D uav, uint address, uint2 compare_value, uint2 value) +{ + return __NvAtomicCompareExchangeUINT64(uav, address, compare_value, value); +} + +uint2 NvInterlockedExchangeUint64(RWTexture1D uav, uint address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_SWAP); +} + +uint2 NvInterlockedAddUint64(RWTexture2D uav, uint2 address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_ADD); +} + +uint2 NvInterlockedMaxUint64(RWTexture2D uav, uint2 address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_MAX); +} + +uint2 NvInterlockedMinUint64(RWTexture2D uav, uint2 address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_MIN); +} + +uint2 NvInterlockedAndUint64(RWTexture2D uav, uint2 address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_AND); +} + +uint2 NvInterlockedOrUint64(RWTexture2D uav, uint2 address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_OR); +} + +uint2 NvInterlockedXorUint64(RWTexture2D uav, uint2 address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_XOR); +} + +uint2 NvInterlockedCompareExchangeUint64(RWTexture2D uav, uint2 address, uint2 compare_value, uint2 value) +{ + return __NvAtomicCompareExchangeUINT64(uav, address, compare_value, value); +} + +uint2 NvInterlockedExchangeUint64(RWTexture2D uav, uint2 address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_SWAP); +} + +uint2 NvInterlockedAddUint64(RWTexture3D uav, uint3 address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_ADD); +} + +uint2 NvInterlockedMaxUint64(RWTexture3D uav, uint3 address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_MAX); +} + +uint2 NvInterlockedMinUint64(RWTexture3D uav, uint3 address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_MIN); +} + +uint2 NvInterlockedAndUint64(RWTexture3D uav, uint3 address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_AND); +} + +uint2 NvInterlockedOrUint64(RWTexture3D uav, uint3 address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_OR); +} + +uint2 NvInterlockedXorUint64(RWTexture3D uav, uint3 address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_XOR); +} + +uint2 NvInterlockedCompareExchangeUint64(RWTexture3D uav, uint3 address, uint2 compare_value, uint2 value) +{ + return __NvAtomicCompareExchangeUINT64(uav, address, compare_value, value); +} + +uint2 NvInterlockedExchangeUint64(RWTexture3D uav, uint3 address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_SWAP); +} + +//----------------------------------------------------------------------------// +//--------------------------- VPRS functions ---------------------------------// +//----------------------------------------------------------------------------// + +// Returns the shading rate and the number of per-pixel shading passes for current VPRS pixel +uint3 NvGetShadingRate() +{ + uint3 shadingRate = (uint3)0; + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_GET_SHADING_RATE; + g_NvidiaExt[index].numOutputsForIncCounter = 3; + shadingRate.x = g_NvidiaExt.IncrementCounter(); + shadingRate.y = g_NvidiaExt.IncrementCounter(); + shadingRate.z = g_NvidiaExt.IncrementCounter(); + return shadingRate; +} + +float NvEvaluateAttributeAtSampleForVPRS(float attrib, uint sampleIndex, int2 pixelOffset) +{ + float value = (float)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE; + g_NvidiaExt[ext].src0u.x = asuint(attrib.x); + g_NvidiaExt[ext].src1u.x = sampleIndex; + g_NvidiaExt[ext].src2u.xy = pixelOffset; + g_NvidiaExt[ext].numOutputsForIncCounter = 1; + value.x = asfloat(g_NvidiaExt.IncrementCounter()); + return value; +} + +float2 NvEvaluateAttributeAtSampleForVPRS(float2 attrib, uint sampleIndex, int2 pixelOffset) +{ + float2 value = (float2)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE; + g_NvidiaExt[ext].src0u.xy = asuint(attrib.xy); + g_NvidiaExt[ext].src1u.x = sampleIndex; + g_NvidiaExt[ext].src2u.xy = pixelOffset; + g_NvidiaExt[ext].numOutputsForIncCounter = 2; + value.x = asfloat(g_NvidiaExt.IncrementCounter()); + value.y = asfloat(g_NvidiaExt.IncrementCounter()); + return value; +} + +float3 NvEvaluateAttributeAtSampleForVPRS(float3 attrib, uint sampleIndex, int2 pixelOffset) +{ + float3 value = (float3)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE; + g_NvidiaExt[ext].src0u.xyz = asuint(attrib.xyz); + g_NvidiaExt[ext].src1u.x = sampleIndex; + g_NvidiaExt[ext].src2u.xy = pixelOffset; + g_NvidiaExt[ext].numOutputsForIncCounter = 3; + value.x = asfloat(g_NvidiaExt.IncrementCounter()); + value.y = asfloat(g_NvidiaExt.IncrementCounter()); + value.z = asfloat(g_NvidiaExt.IncrementCounter()); + return value; +} + +float4 NvEvaluateAttributeAtSampleForVPRS(float4 attrib, uint sampleIndex, int2 pixelOffset) +{ + float4 value = (float4)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE; + g_NvidiaExt[ext].src0u.xyzw = asuint(attrib.xyzw); + g_NvidiaExt[ext].src1u.x = sampleIndex; + g_NvidiaExt[ext].src2u.xy = pixelOffset; + g_NvidiaExt[ext].numOutputsForIncCounter = 4; + value.x = asfloat(g_NvidiaExt.IncrementCounter()); + value.y = asfloat(g_NvidiaExt.IncrementCounter()); + value.z = asfloat(g_NvidiaExt.IncrementCounter()); + value.w = asfloat(g_NvidiaExt.IncrementCounter()); + return value; +} + +int NvEvaluateAttributeAtSampleForVPRS(int attrib, uint sampleIndex, int2 pixelOffset) +{ + int value = (int)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE; + g_NvidiaExt[ext].src0u.x = asuint(attrib.x); + g_NvidiaExt[ext].src1u.x = sampleIndex; + g_NvidiaExt[ext].src2u.xy = pixelOffset; + g_NvidiaExt[ext].numOutputsForIncCounter = 1; + value.x = asint(g_NvidiaExt.IncrementCounter()); + return value; +} + +int2 NvEvaluateAttributeAtSampleForVPRS(int2 attrib, uint sampleIndex, int2 pixelOffset) +{ + int2 value = (int2)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE; + g_NvidiaExt[ext].src0u.xy = asuint(attrib.xy); + g_NvidiaExt[ext].src1u.x = sampleIndex; + g_NvidiaExt[ext].src2u.xy = pixelOffset; + g_NvidiaExt[ext].numOutputsForIncCounter = 2; + value.x = asint(g_NvidiaExt.IncrementCounter()); + value.y = asint(g_NvidiaExt.IncrementCounter()); + return value; +} + +int3 NvEvaluateAttributeAtSampleForVPRS(int3 attrib, uint sampleIndex, int2 pixelOffset) +{ + int3 value = (int3)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE; + g_NvidiaExt[ext].src0u.xyz = asuint(attrib.xyz); + g_NvidiaExt[ext].src1u.x = sampleIndex; + g_NvidiaExt[ext].src2u.xy = pixelOffset; + g_NvidiaExt[ext].numOutputsForIncCounter = 3; + value.x = asint(g_NvidiaExt.IncrementCounter()); + value.y = asint(g_NvidiaExt.IncrementCounter()); + value.z = asint(g_NvidiaExt.IncrementCounter()); + return value; +} + +int4 NvEvaluateAttributeAtSampleForVPRS(int4 attrib, uint sampleIndex, int2 pixelOffset) +{ + int4 value = (int4)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE; + g_NvidiaExt[ext].src0u.xyzw = asuint(attrib.xyzw); + g_NvidiaExt[ext].src1u.x = sampleIndex; + g_NvidiaExt[ext].src2u.xy = pixelOffset; + g_NvidiaExt[ext].numOutputsForIncCounter = 4; + value.x = asint(g_NvidiaExt.IncrementCounter()); + value.y = asint(g_NvidiaExt.IncrementCounter()); + value.z = asint(g_NvidiaExt.IncrementCounter()); + value.w = asint(g_NvidiaExt.IncrementCounter()); + return value; +} + +uint NvEvaluateAttributeAtSampleForVPRS(uint attrib, uint sampleIndex, int2 pixelOffset) +{ + uint value = (uint)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE; + g_NvidiaExt[ext].src0u.x = asuint(attrib.x); + g_NvidiaExt[ext].src1u.x = sampleIndex; + g_NvidiaExt[ext].src2u.xy = pixelOffset; + g_NvidiaExt[ext].numOutputsForIncCounter = 1; + value.x = asuint(g_NvidiaExt.IncrementCounter()); + return value; +} + +uint2 NvEvaluateAttributeAtSampleForVPRS(uint2 attrib, uint sampleIndex, int2 pixelOffset) +{ + uint2 value = (uint2)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE; + g_NvidiaExt[ext].src0u.xy = asuint(attrib.xy); + g_NvidiaExt[ext].src1u.x = sampleIndex; + g_NvidiaExt[ext].src2u.xy = pixelOffset; + g_NvidiaExt[ext].numOutputsForIncCounter = 2; + value.x = asuint(g_NvidiaExt.IncrementCounter()); + value.y = asuint(g_NvidiaExt.IncrementCounter()); + return value; +} + +uint3 NvEvaluateAttributeAtSampleForVPRS(uint3 attrib, uint sampleIndex, int2 pixelOffset) +{ + uint3 value = (uint3)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE; + g_NvidiaExt[ext].src0u.xyz = asuint(attrib.xyz); + g_NvidiaExt[ext].src1u.x = sampleIndex; + g_NvidiaExt[ext].src2u.xy = pixelOffset; + g_NvidiaExt[ext].numOutputsForIncCounter = 3; + value.x = asuint(g_NvidiaExt.IncrementCounter()); + value.y = asuint(g_NvidiaExt.IncrementCounter()); + value.z = asuint(g_NvidiaExt.IncrementCounter()); + return value; +} + +uint4 NvEvaluateAttributeAtSampleForVPRS(uint4 attrib, uint sampleIndex, int2 pixelOffset) +{ + uint4 value = (uint4)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE; + g_NvidiaExt[ext].src0u.xyzw = asuint(attrib.xyzw); + g_NvidiaExt[ext].src1u.x = sampleIndex; + g_NvidiaExt[ext].src2u.xy = pixelOffset; + g_NvidiaExt[ext].numOutputsForIncCounter = 4; + value.x = asuint(g_NvidiaExt.IncrementCounter()); + value.y = asuint(g_NvidiaExt.IncrementCounter()); + value.z = asuint(g_NvidiaExt.IncrementCounter()); + value.w = asuint(g_NvidiaExt.IncrementCounter()); + return value; +} + + +float NvEvaluateAttributeSnappedForVPRS(float attrib, uint2 offset) +{ + float value = (float)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED; + g_NvidiaExt[ext].src0u.x = asuint(attrib.x); + g_NvidiaExt[ext].src1u.xy = offset; + g_NvidiaExt[ext].numOutputsForIncCounter = 1; + value.x = asfloat(g_NvidiaExt.IncrementCounter()); + return value; +} + +float2 NvEvaluateAttributeSnappedForVPRS(float2 attrib, uint2 offset) +{ + float2 value = (float2)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED; + g_NvidiaExt[ext].src0u.xy = asuint(attrib.xy); + g_NvidiaExt[ext].src1u.xy = offset; + g_NvidiaExt[ext].numOutputsForIncCounter = 2; + value.x = asfloat(g_NvidiaExt.IncrementCounter()); + value.y = asfloat(g_NvidiaExt.IncrementCounter()); + return value; +} + +float3 NvEvaluateAttributeSnappedForVPRS(float3 attrib, uint2 offset) +{ + float3 value = (float3)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED; + g_NvidiaExt[ext].src0u.xyz = asuint(attrib.xyz); + g_NvidiaExt[ext].src1u.xy = offset; + g_NvidiaExt[ext].numOutputsForIncCounter = 3; + value.x = asfloat(g_NvidiaExt.IncrementCounter()); + value.y = asfloat(g_NvidiaExt.IncrementCounter()); + value.z = asfloat(g_NvidiaExt.IncrementCounter()); + return value; +} + +float4 NvEvaluateAttributeSnappedForVPRS(float4 attrib, uint2 offset) +{ + float4 value = (float4)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED; + g_NvidiaExt[ext].src0u.xyzw = asuint(attrib.xyzw); + g_NvidiaExt[ext].src1u.xy = offset; + g_NvidiaExt[ext].numOutputsForIncCounter = 4; + value.x = asfloat(g_NvidiaExt.IncrementCounter()); + value.y = asfloat(g_NvidiaExt.IncrementCounter()); + value.z = asfloat(g_NvidiaExt.IncrementCounter()); + value.w = asfloat(g_NvidiaExt.IncrementCounter()); + return value; +} + +int NvEvaluateAttributeSnappedForVPRS(int attrib, uint2 offset) +{ + int value = (int)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED; + g_NvidiaExt[ext].src0u.x = asuint(attrib.x); + g_NvidiaExt[ext].src1u.xy = offset; + g_NvidiaExt[ext].numOutputsForIncCounter = 1; + value.x = asint(g_NvidiaExt.IncrementCounter()); + return value; +} + +int2 NvEvaluateAttributeSnappedForVPRS(int2 attrib, uint2 offset) +{ + int2 value = (int2)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED; + g_NvidiaExt[ext].src0u.xy = asuint(attrib.xy); + g_NvidiaExt[ext].src1u.xy = offset; + g_NvidiaExt[ext].numOutputsForIncCounter = 2; + value.x = asint(g_NvidiaExt.IncrementCounter()); + value.y = asint(g_NvidiaExt.IncrementCounter()); + return value; +} + +int3 NvEvaluateAttributeSnappedForVPRS(int3 attrib, uint2 offset) +{ + int3 value = (int3)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED; + g_NvidiaExt[ext].src0u.xyz = asuint(attrib.xyz); + g_NvidiaExt[ext].src1u.xy = offset; + g_NvidiaExt[ext].numOutputsForIncCounter = 3; + value.x = asint(g_NvidiaExt.IncrementCounter()); + value.y = asint(g_NvidiaExt.IncrementCounter()); + value.z = asint(g_NvidiaExt.IncrementCounter()); + return value; +} + +int4 NvEvaluateAttributeSnappedForVPRS(int4 attrib, uint2 offset) +{ + int4 value = (int4)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED; + g_NvidiaExt[ext].src0u.xyzw = asuint(attrib.xyzw); + g_NvidiaExt[ext].src1u.xy = offset; + g_NvidiaExt[ext].numOutputsForIncCounter = 4; + value.x = asint(g_NvidiaExt.IncrementCounter()); + value.y = asint(g_NvidiaExt.IncrementCounter()); + value.z = asint(g_NvidiaExt.IncrementCounter()); + value.w = asint(g_NvidiaExt.IncrementCounter()); + return value; +} + +uint NvEvaluateAttributeSnappedForVPRS(uint attrib, uint2 offset) +{ + uint value = (uint)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED; + g_NvidiaExt[ext].src0u.x = asuint(attrib.x); + g_NvidiaExt[ext].src1u.xy = offset; + g_NvidiaExt[ext].numOutputsForIncCounter = 1; + value.x = asuint(g_NvidiaExt.IncrementCounter()); + return value; +} + +uint2 NvEvaluateAttributeSnappedForVPRS(uint2 attrib, uint2 offset) +{ + uint2 value = (uint2)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED; + g_NvidiaExt[ext].src0u.xy = asuint(attrib.xy); + g_NvidiaExt[ext].src1u.xy = offset; + g_NvidiaExt[ext].numOutputsForIncCounter = 2; + value.x = asuint(g_NvidiaExt.IncrementCounter()); + value.y = asuint(g_NvidiaExt.IncrementCounter()); + return value; +} + +uint3 NvEvaluateAttributeSnappedForVPRS(uint3 attrib, uint2 offset) +{ + uint3 value = (uint3)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED; + g_NvidiaExt[ext].src0u.xyz = asuint(attrib.xyz); + g_NvidiaExt[ext].src1u.xy = offset; + g_NvidiaExt[ext].numOutputsForIncCounter = 3; + value.x = asuint(g_NvidiaExt.IncrementCounter()); + value.y = asuint(g_NvidiaExt.IncrementCounter()); + value.z = asuint(g_NvidiaExt.IncrementCounter()); + return value; +} + +uint4 NvEvaluateAttributeSnappedForVPRS(uint4 attrib, uint2 offset) +{ + uint4 value = (uint4)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED; + g_NvidiaExt[ext].src0u.xyzw = asuint(attrib.xyzw); + g_NvidiaExt[ext].src1u.xy = offset; + g_NvidiaExt[ext].numOutputsForIncCounter = 4; + value.x = asuint(g_NvidiaExt.IncrementCounter()); + value.y = asuint(g_NvidiaExt.IncrementCounter()); + value.z = asuint(g_NvidiaExt.IncrementCounter()); + value.w = asuint(g_NvidiaExt.IncrementCounter()); + return value; +} + +// MATCH instruction variants +uint NvWaveMatch(uint value) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = value; + g_NvidiaExt[index].src1u.x = 1; + g_NvidiaExt[index].opcode = NV_EXTN_OP_MATCH_ANY; + // result is returned as the return value of IncrementCounter on fake UAV slot + return g_NvidiaExt.IncrementCounter(); +} + +uint NvWaveMatch(uint2 value) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.xy = value.xy; + g_NvidiaExt[index].src1u.x = 2; + g_NvidiaExt[index].opcode = NV_EXTN_OP_MATCH_ANY; + // result is returned as the return value of IncrementCounter on fake UAV slot + return g_NvidiaExt.IncrementCounter(); +} + +uint NvWaveMatch(uint4 value) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u = value; + g_NvidiaExt[index].src1u.x = 4; + g_NvidiaExt[index].opcode = NV_EXTN_OP_MATCH_ANY; + // result is returned as the return value of IncrementCounter on fake UAV slot + return g_NvidiaExt.IncrementCounter(); +} + +uint NvWaveMatch(float value) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = asuint(value); + g_NvidiaExt[index].src1u.x = 1; + g_NvidiaExt[index].opcode = NV_EXTN_OP_MATCH_ANY; + // result is returned as the return value of IncrementCounter on fake UAV slot + return g_NvidiaExt.IncrementCounter(); +} + +uint NvWaveMatch(float2 value) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.xy = asuint(value); + g_NvidiaExt[index].src1u.x = 2; + g_NvidiaExt[index].opcode = NV_EXTN_OP_MATCH_ANY; + // result is returned as the return value of IncrementCounter on fake UAV slot + return g_NvidiaExt.IncrementCounter(); +} + +uint NvWaveMatch(float4 value) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u = asuint(value); + g_NvidiaExt[index].src1u.x = 4; + g_NvidiaExt[index].opcode = NV_EXTN_OP_MATCH_ANY; + // result is returned as the return value of IncrementCounter on fake UAV slot + return g_NvidiaExt.IncrementCounter(); +} + + +//----------------------------------------------------------------------------// +//------------------------------ Footprint functions -------------------------// +//----------------------------------------------------------------------------// +// texSpace and smpSpace must be immediates, texIndex and smpIndex can be variable +// offset must be immediate +// the required components of location and offset fields can be filled depending on the dimension/type of the texture +// texType should be one of 2D or 3D as defined in nvShaderExtnEnums.h and and should be an immediate literal +// if the above restrictions are not met, the behaviour of this instruction is undefined + +uint4 NvFootprintFine(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, int3 offset = int3(0, 0, 0)) +{ + return __NvFootprint(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_FINE, gran, offset); +} + +uint4 NvFootprintCoarse(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, int3 offset = int3(0, 0, 0)) +{ + return __NvFootprint(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_COARSE, gran, offset); +} + + + +uint4 NvFootprintFineBias(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float bias, int3 offset = int3(0, 0, 0)) +{ + return __NvFootprintBias(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_FINE, gran, bias, offset); +} + +uint4 NvFootprintCoarseBias(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float bias, int3 offset = int3(0, 0, 0)) +{ + return __NvFootprintBias(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_COARSE, gran, bias, offset); +} + + + +uint4 NvFootprintFineLevel(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float lodLevel, int3 offset = int3(0, 0, 0)) +{ + return __NvFootprintLevel(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_FINE, gran, lodLevel, offset); +} + +uint4 NvFootprintCoarseLevel(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float lodLevel, int3 offset = int3(0, 0, 0)) +{ + return __NvFootprintLevel(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_COARSE, gran, lodLevel, offset); +} + + + +uint4 NvFootprintFineGrad(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float3 ddx, float3 ddy, int3 offset = int3(0, 0, 0)) +{ + return __NvFootprintGrad(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_FINE, gran, ddx, ddy, offset); +} + +uint4 NvFootprintCoarseGrad(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float3 ddx, float3 ddy, int3 offset = int3(0, 0, 0)) +{ + return __NvFootprintGrad(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_COARSE, gran, ddx, ddy, offset); +} + +uint NvFootprintExtractLOD(uint4 blob) +{ + return ((blob.w & 0xF000) >> 12); +} + +uint NvFootprintExtractReturnGran(uint4 blob) +{ + return ((blob.z & 0xF000000) >> 24); +} + +uint2 NvFootprintExtractAnchorTileLoc2D(uint4 blob) +{ + uint2 loc; + loc.x = (blob.w & 0xFFF); + loc.y = (blob.z & 0xFFF); + return loc; +} + +uint3 NvFootprintExtractAnchorTileLoc3D(uint4 blob) +{ + uint3 loc; + loc.x = (blob.w & 0xFFF); + loc.y = ((blob.w & 0xFFF0000) >> 16); + loc.z = (blob.z & 0x1FFF); + return loc; +} + +uint2 NvFootprintExtractOffset2D(uint4 blob) +{ + uint2 loc; + loc.x = ((blob.z & 0x070000) >> 16); + loc.y = ((blob.z & 0x380000) >> 19); + return loc; +} + +uint3 NvFootprintExtractOffset3D(uint4 blob) +{ + uint3 loc; + loc.x = ((blob.z & 0x030000) >> 16); + loc.y = ((blob.z & 0x0C0000) >> 18); + loc.z = ((blob.z & 0x300000) >> 20); + return loc; +} + +uint2 NvFootprintExtractBitmask(uint4 blob) +{ + return blob.xy; +} + + +// Variant of Footprint extensions which returns isSingleLod (out parameter) +// isSingleLod = true -> This footprint request touched the texels from only single LOD. +uint4 NvFootprintFine(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, out uint isSingleLod, int3 offset = int3(0, 0, 0)) +{ + uint4 res = __NvFootprint(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_FINE, gran, offset); + isSingleLod = __NvGetSpecial(NV_SPECIALOP_FOOTPRINT_SINGLELOD_PRED); + return res; +} + +uint4 NvFootprintCoarse(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, out uint isSingleLod, int3 offset = int3(0, 0, 0)) +{ + uint4 res = __NvFootprint(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_COARSE, gran, offset); + isSingleLod = __NvGetSpecial(NV_SPECIALOP_FOOTPRINT_SINGLELOD_PRED); + return res; +} + + + +uint4 NvFootprintFineBias(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float bias, out uint isSingleLod, int3 offset = int3(0, 0, 0)) +{ + uint4 res = __NvFootprintBias(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_FINE, gran, bias, offset); + isSingleLod = __NvGetSpecial(NV_SPECIALOP_FOOTPRINT_SINGLELOD_PRED); + return res; +} + +uint4 NvFootprintCoarseBias(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float bias, out uint isSingleLod, int3 offset = int3(0, 0, 0)) +{ + uint4 res = __NvFootprintBias(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_COARSE, gran, bias, offset); + isSingleLod = __NvGetSpecial(NV_SPECIALOP_FOOTPRINT_SINGLELOD_PRED); + return res; +} + + + +uint4 NvFootprintFineLevel(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float lodLevel, out uint isSingleLod, int3 offset = int3(0, 0, 0)) +{ + uint4 res = __NvFootprintLevel(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_FINE, gran, lodLevel, offset); + isSingleLod = __NvGetSpecial(NV_SPECIALOP_FOOTPRINT_SINGLELOD_PRED); + return res; +} + +uint4 NvFootprintCoarseLevel(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float lodLevel, out uint isSingleLod, int3 offset = int3(0, 0, 0)) +{ + uint4 res = __NvFootprintLevel(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_COARSE, gran, lodLevel, offset); + isSingleLod = __NvGetSpecial(NV_SPECIALOP_FOOTPRINT_SINGLELOD_PRED); + return res; +} + + + +uint4 NvFootprintFineGrad(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float3 ddx, float3 ddy, out uint isSingleLod, int3 offset = int3(0, 0, 0)) +{ + uint4 res = __NvFootprintGrad(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_FINE, gran, ddx, ddy, offset); + isSingleLod = __NvGetSpecial(NV_SPECIALOP_FOOTPRINT_SINGLELOD_PRED); + return res; +} + +uint4 NvFootprintCoarseGrad(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float3 ddx, float3 ddy, out uint isSingleLod, int3 offset = int3(0, 0, 0)) +{ + uint4 res = __NvFootprintGrad(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_COARSE, gran, ddx, ddy, offset); + isSingleLod = __NvGetSpecial(NV_SPECIALOP_FOOTPRINT_SINGLELOD_PRED); + return res; +} + + +uint NvActiveThreads() +{ + return NvBallot(1); +} + + +//----------------------------------------------------------------------------// +//------------------------------ WaveMultiPrefix functions -------------------// +//----------------------------------------------------------------------------// + +// Following are the WaveMultiPrefix functions for different operations (Add, Bitand, BitOr, BitXOr) for different datatypes (uint, uint2, uint4) +// This is a set of functions which implement multi-prefix operations among the set of active lanes in the current wave (WARP). +// A multi-prefix operation comprises a set of prefix operations, executed in parallel within subsets of lanes identified with the provided bitmasks. +// These bitmasks represent partitioning of the set of active lanes in the current wave into N groups (where N is the number of unique masks across all lanes in the wave). +// N prefix operations are then performed each within its corresponding group. +// The groups are assumed to be non-intersecting (that is, a given lane can be a member of one and only one group), +// and bitmasks in all lanes belonging to the same group are required to be the same. +// There are 2 type of functions - Exclusive and Inclusive prefix operations. +// e.g. For NvWaveMultiPrefixInclusiveAdd(val, mask) operation - For each of the groups (for which mask input is same) following is the expected output : +// i^th thread in a group has value = sum(values of threads 0 to i) +// For Exclusive version of same opeartion - +// i^th thread in a group has value = sum(values of threads 0 to i-1) and 0th thread in a the Group has value 0 + +// Extensions for Add +uint NvWaveMultiPrefixInclusiveAdd(uint val, uint mask) +{ + uint temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint nextLane = firstbithigh(remainingThreads); + for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++) + { + temp = NvShfl(val, nextLane); + uint laneValid; + // As remainingThreads only has threads in group with smaller thread ids than its own thread-id nextLane can never be 31 for any thread in the group except the smallest one + // For smallest thread in the group, remainingThreads is 0 --> nextLane is ~0 (i.e. considering last 5 bits its 31) + // So passing maskClampValue=30 to __NvShflGeneric, it will return laneValid=false for the smallest thread in the group. So update val and nextLane based on laneValid. + uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid)); + if (laneValid) // if nextLane's nextLane is valid + { + val = val + temp; + nextLane = newLane; + } + } + return val; +} + +uint NvWaveMultiPrefixExclusiveAdd(uint val, uint mask) +{ + uint temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint lane = firstbithigh(remainingThreads); + temp = NvShfl(val, lane); + val = remainingThreads != 0 ? temp : 0; + return NvWaveMultiPrefixInclusiveAdd(val, mask); +} + +uint2 NvWaveMultiPrefixInclusiveAdd(uint2 val, uint mask) +{ + uint2 temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint nextLane = firstbithigh(remainingThreads); + for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++) + { + temp = NvShfl(val, nextLane); + uint laneValid; + uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid)); + if (laneValid) // if nextLane's nextLane is valid + { + val = val + temp; + nextLane = newLane; + } + } + return val; +} + +uint2 NvWaveMultiPrefixExclusiveAdd(uint2 val, uint mask) +{ + uint2 temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint lane = firstbithigh(remainingThreads); + temp = NvShfl(val, lane); + val = remainingThreads != 0 ? temp : uint2(0, 0); + return NvWaveMultiPrefixInclusiveAdd(val, mask); +} + +uint4 NvWaveMultiPrefixInclusiveAdd(uint4 val, uint mask) +{ + uint4 temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint nextLane = firstbithigh(remainingThreads); + for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++) + { + temp = NvShfl(val, nextLane); + uint laneValid; + uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid)); + if (laneValid) // if nextLane's nextLane is valid + { + val = val + temp; + nextLane = newLane; + } + } + return val; +} + +uint4 NvWaveMultiPrefixExclusiveAdd(uint4 val, uint mask) +{ + uint4 temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint lane = firstbithigh(remainingThreads); + temp = NvShfl(val, lane); + val = remainingThreads != 0 ? temp : uint4(0, 0, 0, 0); + return NvWaveMultiPrefixInclusiveAdd(val, mask); +} + +// MultiPrefix extensions for Bitand +uint NvWaveMultiPrefixInclusiveAnd(uint val, uint mask) +{ + uint temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint nextLane = firstbithigh(remainingThreads); + for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++) + { + temp = NvShfl(val, nextLane); + uint laneValid; + uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid)); + if (laneValid) // if nextLane's nextLane is valid + { + val = val & temp; + nextLane = newLane; + } + } + return val; +} + +uint NvWaveMultiPrefixExclusiveAnd(uint val, uint mask) +{ + uint temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint lane = firstbithigh(remainingThreads); + temp = NvShfl(val, lane); + val = remainingThreads != 0 ? temp : ~0; + return NvWaveMultiPrefixInclusiveAnd(val, mask); +} + +uint2 NvWaveMultiPrefixInclusiveAnd(uint2 val, uint mask) +{ + uint2 temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint nextLane = firstbithigh(remainingThreads); + for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++) + { + temp = NvShfl(val, nextLane); + uint laneValid; + uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid)); + if (laneValid) // if nextLane's nextLane is valid + { + val = val & temp; + nextLane = newLane; + } + } + return val; +} + +uint2 NvWaveMultiPrefixExclusiveAnd(uint2 val, uint mask) +{ + uint2 temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint lane = firstbithigh(remainingThreads); + temp = NvShfl(val, lane); + val = remainingThreads != 0 ? temp : uint2(~0, ~0); + return NvWaveMultiPrefixInclusiveAnd(val, mask); +} + + +uint4 NvWaveMultiPrefixInclusiveAnd(uint4 val, uint mask) +{ + uint4 temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint nextLane = firstbithigh(remainingThreads); + for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++) + { + temp = NvShfl(val, nextLane); + uint laneValid; + uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid)); + if (laneValid) // if nextLane's nextLane is valid + { + val = val & temp; + nextLane = newLane; + } + } + return val; +} + +uint4 NvWaveMultiPrefixExclusiveAnd(uint4 val, uint mask) +{ + uint4 temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint lane = firstbithigh(remainingThreads); + temp = NvShfl(val, lane); + val = remainingThreads != 0 ? temp : uint4(~0, ~0, ~0, ~0); + return NvWaveMultiPrefixInclusiveAnd(val, mask); +} + + +// MultiPrefix extensions for BitOr +uint NvWaveMultiPrefixInclusiveOr(uint val, uint mask) +{ + uint temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint nextLane = firstbithigh(remainingThreads); + for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++) + { + temp = NvShfl(val, nextLane); + uint laneValid; + uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid)); + if (laneValid) // if nextLane's nextLane is valid + { + val = val | temp; + nextLane = newLane; + } + } + return val; +} + +uint NvWaveMultiPrefixExclusiveOr(uint val, uint mask) +{ + uint temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint lane = firstbithigh(remainingThreads); + temp = NvShfl(val, lane); + val = remainingThreads != 0 ? temp : 0; + return NvWaveMultiPrefixInclusiveOr(val, mask); +} + +uint2 NvWaveMultiPrefixInclusiveOr(uint2 val, uint mask) +{ + uint2 temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint nextLane = firstbithigh(remainingThreads); + for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++) + { + temp = NvShfl(val, nextLane); + uint laneValid; + uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid)); + if (laneValid) // if nextLane's nextLane is valid + { + val = val | temp; + nextLane = newLane; + } + } + return val; +} + +uint2 NvWaveMultiPrefixExclusiveOr(uint2 val, uint mask) +{ + uint2 temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint lane = firstbithigh(remainingThreads); + temp = NvShfl(val, lane); + val = remainingThreads != 0 ? temp : uint2(0, 0); + return NvWaveMultiPrefixInclusiveOr(val, mask); +} + + +uint4 NvWaveMultiPrefixInclusiveOr(uint4 val, uint mask) +{ + uint4 temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint nextLane = firstbithigh(remainingThreads); + for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++) + { + temp = NvShfl(val, nextLane); + uint laneValid; + uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid)); + if (laneValid) // if nextLane's nextLane is valid + { + val = val | temp; + nextLane = newLane; + } + } + return val; +} + +uint4 NvWaveMultiPrefixExclusiveOr(uint4 val, uint mask) +{ + uint4 temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint lane = firstbithigh(remainingThreads); + temp = NvShfl(val, lane); + val = remainingThreads != 0 ? temp : uint4(0, 0, 0, 0); + return NvWaveMultiPrefixInclusiveOr(val, mask); +} + + +// MultiPrefix extensions for BitXOr +uint NvWaveMultiPrefixInclusiveXOr(uint val, uint mask) +{ + uint temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint nextLane = firstbithigh(remainingThreads); + for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++) + { + temp = NvShfl(val, nextLane); + uint laneValid; + uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid)); + if (laneValid) // if nextLane's nextLane is valid + { + val = val ^ temp; + nextLane = newLane; + } + } + return val; +} + +uint NvWaveMultiPrefixExclusiveXOr(uint val, uint mask) +{ + uint temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint lane = firstbithigh(remainingThreads); + temp = NvShfl(val, lane); + val = remainingThreads != 0 ? temp : 0; + return NvWaveMultiPrefixInclusiveXOr(val, mask); +} + +uint2 NvWaveMultiPrefixInclusiveXOr(uint2 val, uint mask) +{ + uint2 temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint nextLane = firstbithigh(remainingThreads); + for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++) + { + temp = NvShfl(val, nextLane); + uint laneValid; + uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid)); + if (laneValid) // if nextLane's nextLane is valid + { + val = val ^ temp; + nextLane = newLane; + } + } + return val; +} + +uint2 NvWaveMultiPrefixExclusiveXOr(uint2 val, uint mask) +{ + uint2 temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint lane = firstbithigh(remainingThreads); + temp = NvShfl(val, lane); + val = remainingThreads != 0 ? temp : uint2(0, 0); + return NvWaveMultiPrefixInclusiveXOr(val, mask); +} + + +uint4 NvWaveMultiPrefixInclusiveXOr(uint4 val, uint mask) +{ + uint4 temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint nextLane = firstbithigh(remainingThreads); + for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++) + { + temp = NvShfl(val, nextLane); + uint laneValid; + uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid)); + if (laneValid) // if nextLane's nextLane is valid + { + val = val ^ temp; + nextLane = newLane; + } + } + return val; +} + +uint4 NvWaveMultiPrefixExclusiveXOr(uint4 val, uint mask) +{ + uint4 temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint lane = firstbithigh(remainingThreads); + temp = NvShfl(val, lane); + val = remainingThreads != 0 ? temp : uint4(0, 0, 0, 0); + return NvWaveMultiPrefixInclusiveXOr(val, mask); +} + + +//----------------------------------------------------------------------------// +//------------------------- DXR Micro-map Extension --------------------------// +//----------------------------------------------------------------------------// + +float3x3 NvRtTriangleObjectPositions() +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_TRIANGLE_OBJECT_POSITIONS; + + float3x3 ret; + ret[0][0] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[0][1] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[0][2] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][0] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][1] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][2] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[2][0] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[2][1] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[2][2] = asfloat(g_NvidiaExt.IncrementCounter()); + return ret; +} + +float3x3 NvRtMicroTriangleObjectPositions() +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_MICRO_TRIANGLE_OBJECT_POSITIONS; + + float3x3 ret; + ret[0][0] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[0][1] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[0][2] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][0] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][1] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][2] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[2][0] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[2][1] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[2][2] = asfloat(g_NvidiaExt.IncrementCounter()); + return ret; +} + +float3x2 NvRtMicroTriangleBarycentrics() +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_MICRO_TRIANGLE_BARYCENTRICS; + + float3x2 ret; + ret[0][0] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[0][1] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][0] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][1] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[2][0] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[2][1] = asfloat(g_NvidiaExt.IncrementCounter()); + return ret; +} + +bool NvRtIsMicroTriangleHit() +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_IS_MICRO_TRIANGLE_HIT; + uint ret = g_NvidiaExt.IncrementCounter(); + return ret != 0; +} + +bool NvRtIsBackFacing() +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_IS_BACK_FACING; + uint ret = g_NvidiaExt.IncrementCounter(); + return ret != 0; +} + +#if __SHADER_TARGET_MAJOR > 6 || (__SHADER_TARGET_MAJOR == 6 && __SHADER_TARGET_MINOR >= 5) + +float3 NvRtMicroVertexObjectPosition(RaytracingAccelerationStructure AccelerationStructure, uint InstanceIndex, uint GeometryIndex, uint PrimitiveIndex, uint2 UV) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_MICRO_VERTEX_OBJECT_POSITION; + g_NvidiaExt[index].src0u.x = InstanceIndex; + g_NvidiaExt[index].src0u.y = GeometryIndex; + g_NvidiaExt[index].src0u.z = PrimitiveIndex; + g_NvidiaExt[index].src0u.w = UV.x; + g_NvidiaExt[index].src1u.x = UV.y; + uint handle = g_NvidiaExt.IncrementCounter(); + float3 ret; + ret.x = asfloat(g_NvidiaExt.IncrementCounter()); + ret.y = asfloat(g_NvidiaExt.IncrementCounter()); + ret.z = asfloat(g_NvidiaExt.IncrementCounter()); + + RayQuery<0> rq; + rq.TraceRayInline(AccelerationStructure, 0, handle, (RayDesc)0); + + return ret; +} + +float2 NvRtMicroVertexBarycentrics(RaytracingAccelerationStructure AccelerationStructure, uint InstanceIndex, uint GeometryIndex, uint PrimitiveIndex, uint2 UV) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_MICRO_VERTEX_BARYCENTRICS; + g_NvidiaExt[index].src0u.x = InstanceIndex; + g_NvidiaExt[index].src0u.y = GeometryIndex; + g_NvidiaExt[index].src0u.z = PrimitiveIndex; + g_NvidiaExt[index].src0u.w = UV.x; + g_NvidiaExt[index].src1u.x = UV.y; + uint handle = g_NvidiaExt.IncrementCounter(); + float2 ret; + ret.x = asfloat(g_NvidiaExt.IncrementCounter()); + ret.y = asfloat(g_NvidiaExt.IncrementCounter()); + + RayQuery<0> rq; + rq.TraceRayInline(AccelerationStructure, 0, handle, (RayDesc)0); + + return ret; +} + +#endif + +//----------------------------------------------------------------------------// +//--------------------- DXR Cluster Geometry Extension -----------------------// +//----------------------------------------------------------------------------// + +#if __SHADER_TARGET_MAJOR > 6 || (__SHADER_TARGET_MAJOR == 6 && __SHADER_TARGET_MINOR >= 3) + +uint NvRtGetClusterID() +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_GET_CLUSTER_ID; + return g_NvidiaExt.IncrementCounter(); +} + +#endif + +#if __SHADER_TARGET_MAJOR > 6 || (__SHADER_TARGET_MAJOR == 6 && __SHADER_TARGET_MINOR >= 5) + +#define NvRtGetCandidateClusterID(rq) __NvRtGetCandidateClusterID(rq.RayFlags()) + +#define NvRtGetCommittedClusterID(rq) __NvRtGetCommittedClusterID(rq.RayFlags()) + +#define NvRtCandidateTriangleObjectPositions(rq) __NvRtCandidateTriangleObjectPositions(rq.RayFlags()) + +#define NvRtCommittedTriangleObjectPositions(rq) __NvRtCommittedTriangleObjectPositions(rq.RayFlags()) + +#endif + +//----------------------------------------------------------------------------// +//--------------------- DXR Linear Swept Sphere Extension --------------------// +//----------------------------------------------------------------------------// + +#if __SHADER_TARGET_MAJOR > 6 || (__SHADER_TARGET_MAJOR == 6 && __SHADER_TARGET_MINOR >= 3) + +float4 NvRtSphereObjectPositionAndRadius() +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_SPHERE_OBJECT_POSITION_AND_RADIUS; + + float4 ret; + ret[0] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[2] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[3] = asfloat(g_NvidiaExt.IncrementCounter()); + return ret; +} + +float2x4 NvRtLssObjectPositionsAndRadii() +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_LSS_OBJECT_POSITIONS_AND_RADII; + + float2x4 ret; + ret[0][0] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[0][1] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[0][2] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[0][3] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][0] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][1] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][2] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][3] = asfloat(g_NvidiaExt.IncrementCounter()); + return ret; +} + +bool NvRtIsSphereHit() +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_IS_SPHERE_HIT; + uint ret = g_NvidiaExt.IncrementCounter(); + return ret != 0; +} + +bool NvRtIsLssHit() +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_IS_LSS_HIT; + uint ret = g_NvidiaExt.IncrementCounter(); + return ret != 0; +} + +#endif + +#if __SHADER_TARGET_MAJOR > 6 || (__SHADER_TARGET_MAJOR == 6 && __SHADER_TARGET_MINOR >= 5) + +#define NvRtCandidateIsNonOpaqueSphere(rq) __NvRtCandidateIsNonOpaqueSphere(rq.RayFlags()) + +#define NvRtCandidateIsNonOpaqueLss(rq) __NvRtCandidateIsNonOpaqueLss(rq.RayFlags()) + +#define NvRtCandidateLssHitParameter(rq) __NvRtCandidateLssHitParameter(rq.RayFlags()) + +#define NvRtCandidateSphereObjectPositionAndRadius(rq) __NvRtCandidateSphereObjectPositionAndRadius(rq.RayFlags()) + +#define NvRtCandidateLssObjectPositionsAndRadii(rq) __NvRtCandidateLssObjectPositionsAndRadii(rq.RayFlags()) + +#define NvRtCandidateBuiltinPrimitiveRayT(rq) __NvRtCandidateBuiltinPrimitiveRayT(rq.RayFlags()) + +#define NvRtCommittedIsSphere(rq) __NvRtCommittedIsSphere(rq.RayFlags()) + +#define NvRtCommittedIsLss(rq) __NvRtCommittedIsLss(rq.RayFlags()) + +#define NvRtCommittedLssHitParameter(rq) __NvRtCommittedLssHitParameter(rq.RayFlags()) + +#define NvRtCommittedSphereObjectPositionAndRadius(rq) __NvRtCommittedSphereObjectPositionAndRadius(rq.RayFlags()) + +#define NvRtCommittedLssObjectPositionsAndRadii(rq) __NvRtCommittedLssObjectPositionsAndRadii(rq.RayFlags()) + +#define NvRtCommitNonOpaqueBuiltinPrimitiveHit(rq) __NvRtCommitNonOpaqueBuiltinPrimitiveHit(rq.RayFlags()) + +#endif + +//----------------------------------------------------------------------------// +//------------------------- DXR HitObject Extension --------------------------// +//----------------------------------------------------------------------------// + +// Support for templates in HLSL requires HLSL 2021+. When using dxc, +// use the -HV 2021 command line argument to enable these versions. +#if defined(__HLSL_VERSION) && (__HLSL_VERSION >= 2021) && !defined(NV_HITOBJECT_USE_MACRO_API) + +struct NvHitObject { + uint _handle; + + bool IsMiss() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_IS_MISS; + g_NvidiaExt[index].src0u.x = _handle; + uint ret = g_NvidiaExt.IncrementCounter(); + return ret != 0; + } + + bool IsHit() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_IS_HIT; + g_NvidiaExt[index].src0u.x = _handle; + uint ret = g_NvidiaExt.IncrementCounter(); + return ret != 0; + } + + bool IsNop() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_IS_NOP; + g_NvidiaExt[index].src0u.x = _handle; + uint ret = g_NvidiaExt.IncrementCounter(); + return ret != 0; + } + + uint GetInstanceID() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_INSTANCE_ID; + g_NvidiaExt[index].src0u.x = _handle; + return g_NvidiaExt.IncrementCounter(); + } + + uint GetInstanceIndex() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_INSTANCE_INDEX; + g_NvidiaExt[index].src0u.x = _handle; + return g_NvidiaExt.IncrementCounter(); + } + + uint GetPrimitiveIndex() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_PRIMITIVE_INDEX; + g_NvidiaExt[index].src0u.x = _handle; + return g_NvidiaExt.IncrementCounter(); + } + + uint GetGeometryIndex() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_GEOMETRY_INDEX; + g_NvidiaExt[index].src0u.x = _handle; + return g_NvidiaExt.IncrementCounter(); + } + + uint GetHitKind() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_HIT_KIND; + g_NvidiaExt[index].src0u.x = _handle; + return g_NvidiaExt.IncrementCounter(); + } + + RayDesc GetRayDesc() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_RAY_DESC; + g_NvidiaExt[index].src0u.x = _handle; + + uint tmin = g_NvidiaExt.IncrementCounter(); + uint tmax = g_NvidiaExt.IncrementCounter(); + uint rayOrgX = g_NvidiaExt.IncrementCounter(); + uint rayOrgY = g_NvidiaExt.IncrementCounter(); + uint rayOrgZ = g_NvidiaExt.IncrementCounter(); + uint rayDirX = g_NvidiaExt.IncrementCounter(); + uint rayDirY = g_NvidiaExt.IncrementCounter(); + uint rayDirZ = g_NvidiaExt.IncrementCounter(); + + RayDesc ray; + ray.TMin = asfloat(tmin); + ray.TMax = asfloat(tmax); + ray.Origin.x = asfloat(rayOrgX); + ray.Origin.y = asfloat(rayOrgY); + ray.Origin.z = asfloat(rayOrgZ); + ray.Direction.x = asfloat(rayDirX); + ray.Direction.y = asfloat(rayDirY); + ray.Direction.z = asfloat(rayDirZ); + + return ray; + } + + template + T GetAttributes() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_ATTRIBUTES; + g_NvidiaExt[index].src0u.x = _handle; + uint callHandle = g_NvidiaExt.IncrementCounter(); + + T attrs; + CallShader(callHandle, attrs); + return attrs; + } + + uint GetShaderTableIndex() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_SHADER_TABLE_INDEX; + g_NvidiaExt[index].src0u.x = _handle; + return g_NvidiaExt.IncrementCounter(); + } + + uint LoadLocalRootTableConstant(uint RootConstantOffsetInBytes) + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_LOAD_LOCAL_ROOT_TABLE_CONSTANT; + g_NvidiaExt[index].src0u.x = _handle; + g_NvidiaExt[index].src0u.y = RootConstantOffsetInBytes; + return g_NvidiaExt.IncrementCounter(); + } + + float4 GetSphereObjectPositionAndRadius() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_SPHERE_OBJECT_POSITION_AND_RADIUS; + g_NvidiaExt[index].src0u.x = _handle; + + float4 ret; + ret[0] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[2] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[3] = asfloat(g_NvidiaExt.IncrementCounter()); + return ret; + } + + float2x4 GetLssObjectPositionsAndRadii() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_LSS_OBJECT_POSITIONS_AND_RADII; + g_NvidiaExt[index].src0u.x = _handle; + + float2x4 ret; + ret[0][0] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[0][1] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[0][2] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[0][3] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][0] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][1] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][2] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][3] = asfloat(g_NvidiaExt.IncrementCounter()); + return ret; + } + + bool IsSphereHit() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_IS_SPHERE_HIT; + g_NvidiaExt[index].src0u.x = _handle; + uint ret = g_NvidiaExt.IncrementCounter(); + return ret != 0; + } + + bool IsLssHit() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_IS_LSS_HIT; + g_NvidiaExt[index].src0u.x = _handle; + uint ret = g_NvidiaExt.IncrementCounter(); + return ret != 0; + } + + uint GetClusterID() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_CLUSTER_ID; + g_NvidiaExt[index].src0u.x = _handle; + return g_NvidiaExt.IncrementCounter(); + } + + float3x3 GetTriangleObjectPositions() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_TRIANGLE_OBJECT_POSITIONS; + g_NvidiaExt[index].src0u.x = _handle; + + float3x3 ret; + ret[0][0] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[0][1] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[0][2] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][0] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][1] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][2] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[2][0] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[2][1] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[2][2] = asfloat(g_NvidiaExt.IncrementCounter()); + return ret; + } +}; + +template +NvHitObject NvTraceRayHitObject( + RaytracingAccelerationStructure AccelerationStructure, + uint RayFlags, + uint InstanceInclusionMask, + uint RayContributionToHitGroupIndex, + uint MultiplierForGeometryContributionToHitGroupIndex, + uint MissShaderIndex, + RayDesc Ray, + inout T Payload) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_TRACE_RAY; + g_NvidiaExt[index].numOutputsForIncCounter = 2; + g_NvidiaExt[index].src0u.x = MissShaderIndex; + uint hitHandle = g_NvidiaExt.IncrementCounter(); + uint traceHandle = g_NvidiaExt.IncrementCounter(); + + TraceRay(AccelerationStructure, RayFlags, InstanceInclusionMask, RayContributionToHitGroupIndex, MultiplierForGeometryContributionToHitGroupIndex, traceHandle, Ray, Payload); + + NvHitObject hitObj; + hitObj._handle = hitHandle; + return hitObj; +} + +template +NvHitObject NvMakeHit( + RaytracingAccelerationStructure AccelerationStructure, + uint InstanceIndex, + uint GeometryIndex, + uint PrimitiveIndex, + uint HitKind, + uint RayContributionToHitGroupIndex, + uint MultiplierForGeometryContributionToHitGroupIndex, + RayDesc Ray, + T Attributes) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_MAKE_HIT; + g_NvidiaExt[index].numOutputsForIncCounter = 2; + g_NvidiaExt[index].src0u.x = InstanceIndex; + g_NvidiaExt[index].src0u.y = GeometryIndex; + g_NvidiaExt[index].src0u.z = PrimitiveIndex; + g_NvidiaExt[index].src0u.w = HitKind; + g_NvidiaExt[index].src1u.x = RayContributionToHitGroupIndex; + g_NvidiaExt[index].src1u.y = MultiplierForGeometryContributionToHitGroupIndex; + uint hitHandle = g_NvidiaExt.IncrementCounter(); + uint traceHandle = g_NvidiaExt.IncrementCounter(); + + struct AttrWrapper { T Attrs; }; + AttrWrapper wrapper; + wrapper.Attrs = Attributes; + CallShader(traceHandle, wrapper); + + struct DummyPayload { int a; }; + DummyPayload payload; + TraceRay(AccelerationStructure, 0, 0, 0, 0, traceHandle, Ray, payload); + + NvHitObject hitObj; + hitObj._handle = hitHandle; + return hitObj; +} + +template +NvHitObject NvMakeHitWithRecordIndex( + uint HitGroupRecordIndex, + RaytracingAccelerationStructure AccelerationStructure, + uint InstanceIndex, + uint GeometryIndex, + uint PrimitiveIndex, + uint HitKind, + RayDesc Ray, + T Attributes) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_MAKE_HIT_WITH_RECORD_INDEX; + g_NvidiaExt[index].numOutputsForIncCounter = 2; + g_NvidiaExt[index].src0u.x = InstanceIndex; + g_NvidiaExt[index].src0u.y = GeometryIndex; + g_NvidiaExt[index].src0u.z = PrimitiveIndex; + g_NvidiaExt[index].src0u.w = HitKind; + g_NvidiaExt[index].src1u.x = HitGroupRecordIndex; + uint hitHandle = g_NvidiaExt.IncrementCounter(); + uint traceHandle = g_NvidiaExt.IncrementCounter(); + + struct AttrWrapper { T Attrs; }; + AttrWrapper wrapper; + wrapper.Attrs = Attributes; + CallShader(traceHandle, wrapper); + + struct DummyPayload { int a; }; + DummyPayload payload; + TraceRay(AccelerationStructure, 0, 0, 0, 0, traceHandle, Ray, payload); + + NvHitObject hitObj; + hitObj._handle = hitHandle; + return hitObj; +} + +NvHitObject NvMakeMiss( + uint MissShaderIndex, + RayDesc Ray) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_MAKE_MISS; + g_NvidiaExt[index].src0u.x = MissShaderIndex; + g_NvidiaExt[index].src0u.y = asuint(Ray.TMin); + g_NvidiaExt[index].src0u.z = asuint(Ray.TMax); + g_NvidiaExt[index].src1u.x = asuint(Ray.Origin.x); + g_NvidiaExt[index].src1u.y = asuint(Ray.Origin.y); + g_NvidiaExt[index].src1u.z = asuint(Ray.Origin.z); + g_NvidiaExt[index].src2u.x = asuint(Ray.Direction.x); + g_NvidiaExt[index].src2u.y = asuint(Ray.Direction.y); + g_NvidiaExt[index].src2u.z = asuint(Ray.Direction.z); + uint hitHandle = g_NvidiaExt.IncrementCounter(); + + NvHitObject hitObj; + hitObj._handle = hitHandle; + return hitObj; +} + +NvHitObject NvMakeNop() +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_MAKE_NOP; + uint hitHandle = g_NvidiaExt.IncrementCounter(); + + NvHitObject hitObj; + hitObj._handle = hitHandle; + return hitObj; +} + +void NvReorderThread(uint CoherenceHint, uint NumCoherenceHintBits) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_REORDER_THREAD; + g_NvidiaExt[index].src0u.x = 0; + g_NvidiaExt[index].src0u.y = 0; + g_NvidiaExt[index].src0u.z = CoherenceHint; + g_NvidiaExt[index].src0u.w = NumCoherenceHintBits; + g_NvidiaExt.IncrementCounter(); +} + +void NvReorderThread(NvHitObject HitObj, uint CoherenceHint, uint NumCoherenceHintBits) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_REORDER_THREAD; + g_NvidiaExt[index].src0u.x = 1; + g_NvidiaExt[index].src0u.y = HitObj._handle; + g_NvidiaExt[index].src0u.z = CoherenceHint; + g_NvidiaExt[index].src0u.w = NumCoherenceHintBits; + g_NvidiaExt.IncrementCounter(); +} + +void NvReorderThread(NvHitObject HitObj) +{ + NvReorderThread(HitObj, 0, 0); +} + +template +void NvInvokeHitObject( + RaytracingAccelerationStructure AccelerationStructure, + NvHitObject HitObj, + inout T Payload) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_INVOKE; + g_NvidiaExt[index].src0u.x = HitObj._handle; + uint handle = g_NvidiaExt.IncrementCounter(); + + TraceRay(AccelerationStructure, 0, 0, 0, 0, handle, (RayDesc)0, Payload); +} + +// Macro-based version of the HitObject API. Use this when HLSL 2021 is not available. +// Enable by specifying #define NV_HITOBJECT_USE_MACRO_API before including this header. +#elif defined(NV_HITOBJECT_USE_MACRO_API) + +struct NvHitObject { + uint _handle; + + bool IsMiss() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_IS_MISS; + g_NvidiaExt[index].src0u.x = _handle; + uint ret = g_NvidiaExt.IncrementCounter(); + return ret != 0; + } + + bool IsHit() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_IS_HIT; + g_NvidiaExt[index].src0u.x = _handle; + uint ret = g_NvidiaExt.IncrementCounter(); + return ret != 0; + } + + bool IsNop() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_IS_NOP; + g_NvidiaExt[index].src0u.x = _handle; + uint ret = g_NvidiaExt.IncrementCounter(); + return ret != 0; + } + + uint GetInstanceID() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_INSTANCE_ID; + g_NvidiaExt[index].src0u.x = _handle; + return g_NvidiaExt.IncrementCounter(); + } + + uint GetInstanceIndex() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_INSTANCE_INDEX; + g_NvidiaExt[index].src0u.x = _handle; + return g_NvidiaExt.IncrementCounter(); + } + + uint GetPrimitiveIndex() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_PRIMITIVE_INDEX; + g_NvidiaExt[index].src0u.x = _handle; + return g_NvidiaExt.IncrementCounter(); + } + + uint GetGeometryIndex() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_GEOMETRY_INDEX; + g_NvidiaExt[index].src0u.x = _handle; + return g_NvidiaExt.IncrementCounter(); + } + + uint GetHitKind() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_HIT_KIND; + g_NvidiaExt[index].src0u.x = _handle; + return g_NvidiaExt.IncrementCounter(); + } + + RayDesc GetRayDesc() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_RAY_DESC; + g_NvidiaExt[index].src0u.x = _handle; + + uint tmin = g_NvidiaExt.IncrementCounter(); + uint tmax = g_NvidiaExt.IncrementCounter(); + uint rayOrgX = g_NvidiaExt.IncrementCounter(); + uint rayOrgY = g_NvidiaExt.IncrementCounter(); + uint rayOrgZ = g_NvidiaExt.IncrementCounter(); + uint rayDirX = g_NvidiaExt.IncrementCounter(); + uint rayDirY = g_NvidiaExt.IncrementCounter(); + uint rayDirZ = g_NvidiaExt.IncrementCounter(); + + RayDesc ray; + ray.TMin = asfloat(tmin); + ray.TMax = asfloat(tmax); + ray.Origin.x = asfloat(rayOrgX); + ray.Origin.y = asfloat(rayOrgY); + ray.Origin.z = asfloat(rayOrgZ); + ray.Direction.x = asfloat(rayDirX); + ray.Direction.y = asfloat(rayDirY); + ray.Direction.z = asfloat(rayDirZ); + + return ray; + } + + uint GetShaderTableIndex() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_SHADER_TABLE_INDEX; + g_NvidiaExt[index].src0u.x = _handle; + return g_NvidiaExt.IncrementCounter(); + } + + uint LoadLocalRootTableConstant(uint RootConstantOffsetInBytes) + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_LOAD_LOCAL_ROOT_TABLE_CONSTANT; + g_NvidiaExt[index].src0u.x = _handle; + g_NvidiaExt[index].src0u.y = RootConstantOffsetInBytes; + return g_NvidiaExt.IncrementCounter(); + } + + float4 GetSphereObjectPositionAndRadius() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_SPHERE_OBJECT_POSITION_AND_RADIUS; + g_NvidiaExt[index].src0u.x = _handle; + + float4 ret; + ret[0] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[2] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[3] = asfloat(g_NvidiaExt.IncrementCounter()); + return ret; + } + + float2x4 GetLssObjectPositionsAndRadii() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_LSS_OBJECT_POSITIONS_AND_RADII; + g_NvidiaExt[index].src0u.x = _handle; + + float2x4 ret; + ret[0][0] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[0][1] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[0][2] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[0][3] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][0] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][1] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][2] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][3] = asfloat(g_NvidiaExt.IncrementCounter()); + return ret; + } + + bool IsSphereHit() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_IS_SPHERE_HIT; + g_NvidiaExt[index].src0u.x = _handle; + uint ret = g_NvidiaExt.IncrementCounter(); + return ret != 0; + } + + bool IsLssHit() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_IS_LSS_HIT; + g_NvidiaExt[index].src0u.x = _handle; + uint ret = g_NvidiaExt.IncrementCounter(); + return ret != 0; + } + + uint GetClusterID() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_CLUSTER_ID; + g_NvidiaExt[index].src0u.x = _handle; + return g_NvidiaExt.IncrementCounter(); + } + + float3x3 GetTriangleObjectPositions() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_TRIANGLE_OBJECT_POSITIONS; + g_NvidiaExt[index].src0u.x = _handle; + + float3x3 ret; + ret[0][0] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[0][1] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[0][2] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][0] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][1] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][2] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[2][0] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[2][1] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[2][2] = asfloat(g_NvidiaExt.IncrementCounter()); + return ret; + } +}; + +#define NvTraceRayHitObject(AccelerationStructure,RayFlags,InstanceInclusionMask,RayContributionToHitGroupIndex,MultiplierForGeometryContributionToHitGroupIndex,MissShaderIndex,Ray,Payload,ResultHitObj) \ +do { \ + uint _rayFlags = RayFlags; \ + uint _instanceInclusionMask = InstanceInclusionMask; \ + uint _rayContributionToHitGroupIndex = RayContributionToHitGroupIndex; \ + uint _multiplierForGeometryContributionToHitGroupIndex = MultiplierForGeometryContributionToHitGroupIndex; \ + uint _missShaderIndex = MissShaderIndex; \ + RayDesc _ray = Ray; \ + uint _index = g_NvidiaExt.IncrementCounter(); \ + g_NvidiaExt[_index].opcode = NV_EXTN_OP_HIT_OBJECT_TRACE_RAY; \ + g_NvidiaExt[_index].numOutputsForIncCounter = 2; \ + g_NvidiaExt[_index].src0u.x = _missShaderIndex; \ + uint _hitHandle = g_NvidiaExt.IncrementCounter(); \ + uint _traceHandle = g_NvidiaExt.IncrementCounter(); \ + TraceRay(AccelerationStructure, _rayFlags, _instanceInclusionMask, _rayContributionToHitGroupIndex, _multiplierForGeometryContributionToHitGroupIndex, _traceHandle, _ray, Payload); \ + ResultHitObj._handle = _hitHandle; \ +} while(0) + +struct NvHitObjectMacroDummyPayloadType { int a; }; + +#define NvMakeHit(AccelerationStructure,InstanceIndex,GeometryIndex,PrimitiveIndex,HitKind,RayContributionToHitGroupIndex,MultiplierForGeometryContributionToHitGroupIndex,Ray,Attributes,ResultHitObj) \ +do { \ + uint _instanceIndex = InstanceIndex; \ + uint _geometryIndex = GeometryIndex; \ + uint _primitiveIndex = PrimitiveIndex; \ + uint _hitKind = HitKind; \ + uint _rayContributionToHitGroupIndex = RayContributionToHitGroupIndex; \ + uint _multiplierForGeometryContributionToHitGroupIndex = MultiplierForGeometryContributionToHitGroupIndex; \ + RayDesc _ray = Ray; \ + uint _index = g_NvidiaExt.IncrementCounter(); \ + g_NvidiaExt[_index].opcode = NV_EXTN_OP_HIT_OBJECT_MAKE_HIT; \ + g_NvidiaExt[_index].numOutputsForIncCounter = 2; \ + g_NvidiaExt[_index].src0u.x = _instanceIndex; \ + g_NvidiaExt[_index].src0u.y = _geometryIndex; \ + g_NvidiaExt[_index].src0u.z = _primitiveIndex; \ + g_NvidiaExt[_index].src0u.w = _hitKind; \ + g_NvidiaExt[_index].src1u.x = _rayContributionToHitGroupIndex; \ + g_NvidiaExt[_index].src1u.y = _multiplierForGeometryContributionToHitGroupIndex; \ + uint _hitHandle = g_NvidiaExt.IncrementCounter(); \ + uint _traceHandle = g_NvidiaExt.IncrementCounter(); \ + CallShader(_traceHandle, Attributes); \ + NvHitObjectMacroDummyPayloadType _payload; \ + TraceRay(AccelerationStructure, 0, 0, 0, 0, _traceHandle, _ray, _payload); \ + ResultHitObj._handle = _hitHandle; \ +} while(0) + +#define NvMakeHitWithRecordIndex(HitGroupRecordIndex,AccelerationStructure,InstanceIndex,GeometryIndex,PrimitiveIndex,HitKind,Ray,Attributes,ResultHitObj) \ +do { \ + uint _hitGroupRecordIndex = HitGroupRecordIndex; \ + uint _instanceIndex = InstanceIndex; \ + uint _geometryIndex = GeometryIndex; \ + uint _primitiveIndex = PrimitiveIndex; \ + uint _hitKind = HitKind; \ + RayDesc _ray = Ray; \ + uint _index = g_NvidiaExt.IncrementCounter(); \ + g_NvidiaExt[_index].opcode = NV_EXTN_OP_HIT_OBJECT_MAKE_HIT_WITH_RECORD_INDEX; \ + g_NvidiaExt[_index].numOutputsForIncCounter = 2; \ + g_NvidiaExt[_index].src0u.x = _instanceIndex; \ + g_NvidiaExt[_index].src0u.y = _geometryIndex; \ + g_NvidiaExt[_index].src0u.z = _primitiveIndex; \ + g_NvidiaExt[_index].src0u.w = _hitKind; \ + g_NvidiaExt[_index].src1u.x = _hitGroupRecordIndex; \ + uint _hitHandle = g_NvidiaExt.IncrementCounter(); \ + uint _traceHandle = g_NvidiaExt.IncrementCounter(); \ + CallShader(_traceHandle, Attributes); \ + NvHitObjectMacroDummyPayloadType _payload; \ + TraceRay(AccelerationStructure, 0, 0, 0, 0, _traceHandle, _ray, _payload); \ + ResultHitObj._handle = _hitHandle; \ +} while(0) + +NvHitObject NvMakeMiss( + uint MissShaderIndex, + RayDesc Ray) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_MAKE_MISS; + g_NvidiaExt[index].src0u.x = MissShaderIndex; + g_NvidiaExt[index].src0u.y = asuint(Ray.TMin); + g_NvidiaExt[index].src0u.z = asuint(Ray.TMax); + g_NvidiaExt[index].src1u.x = asuint(Ray.Origin.x); + g_NvidiaExt[index].src1u.y = asuint(Ray.Origin.y); + g_NvidiaExt[index].src1u.z = asuint(Ray.Origin.z); + g_NvidiaExt[index].src2u.x = asuint(Ray.Direction.x); + g_NvidiaExt[index].src2u.y = asuint(Ray.Direction.y); + g_NvidiaExt[index].src2u.z = asuint(Ray.Direction.z); + uint hitHandle = g_NvidiaExt.IncrementCounter(); + + NvHitObject hitObj; + hitObj._handle = hitHandle; + return hitObj; +} + +NvHitObject NvMakeNop() +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_MAKE_NOP; + uint hitHandle = g_NvidiaExt.IncrementCounter(); + + NvHitObject hitObj; + hitObj._handle = hitHandle; + return hitObj; +} + +#define NvGetAttributesFromHitObject(HitObj,ResultAttributes) \ +do { \ + uint _index = g_NvidiaExt.IncrementCounter(); \ + g_NvidiaExt[_index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_ATTRIBUTES; \ + g_NvidiaExt[_index].src0u.x = HitObj._handle; \ + uint _callHandle = g_NvidiaExt.IncrementCounter(); \ + CallShader(_callHandle, ResultAttributes); \ +} while(0) + +void NvReorderThread(uint CoherenceHint, uint NumCoherenceHintBits) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_REORDER_THREAD; + g_NvidiaExt[index].src0u.x = 0; + g_NvidiaExt[index].src0u.y = 0; + g_NvidiaExt[index].src0u.z = CoherenceHint; + g_NvidiaExt[index].src0u.w = NumCoherenceHintBits; + g_NvidiaExt.IncrementCounter(); +} + +void NvReorderThread(NvHitObject HitObj, uint CoherenceHint, uint NumCoherenceHintBits) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_REORDER_THREAD; + g_NvidiaExt[index].src0u.x = 1; + g_NvidiaExt[index].src0u.y = HitObj._handle; + g_NvidiaExt[index].src0u.z = CoherenceHint; + g_NvidiaExt[index].src0u.w = NumCoherenceHintBits; + g_NvidiaExt.IncrementCounter(); +} + +void NvReorderThread(NvHitObject HitObj) +{ + NvReorderThread(HitObj, 0, 0); +} + +#define NvInvokeHitObject(AccelerationStructure,HitObj,Payload) \ +do { \ + uint _index = g_NvidiaExt.IncrementCounter(); \ + g_NvidiaExt[_index].opcode = NV_EXTN_OP_HIT_OBJECT_INVOKE; \ + g_NvidiaExt[_index].src0u.x = HitObj._handle; \ + uint _handle = g_NvidiaExt.IncrementCounter(); \ + TraceRay(AccelerationStructure, 0, 0, 0, 0, _handle, (RayDesc)0, Payload); \ +} while(0) + +#endif diff --git a/Source/ThirdParty/nvapi/nvHLSLExtnsInternal.h b/Source/ThirdParty/nvapi/nvHLSLExtnsInternal.h new file mode 100644 index 000000000..4c1111a10 --- /dev/null +++ b/Source/ThirdParty/nvapi/nvHLSLExtnsInternal.h @@ -0,0 +1,955 @@ +/*********************************************************************************************************\ +|* *| +|* SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. *| +|* SPDX-License-Identifier: MIT *| +|* *| +|* Permission is hereby granted, free of charge, to any person obtaining a *| +|* copy of this software and associated documentation files (the "Software"), *| +|* to deal in the Software without restriction, including without limitation *| +|* the rights to use, copy, modify, merge, publish, distribute, sublicense, *| +|* and/or sell copies of the Software, and to permit persons to whom the *| +|* Software is furnished to do so, subject to the following conditions: *| +|* *| +|* The above copyright notice and this permission notice shall be included in *| +|* all copies or substantial portions of the Software. *| +|* *| +|* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *| +|* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *| +|* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *| +|* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *| +|* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING *| +|* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER *| +|* DEALINGS IN THE SOFTWARE. *| +|* *| +|* *| +\*********************************************************************************************************/ +////////////////////////// NVIDIA SHADER EXTENSIONS ///////////////// +// internal functions +// Functions in this file are not expected to be called by apps directly + +#include "nvShaderExtnEnums.h" + +struct NvShaderExtnStruct +{ + uint opcode; // opcode + uint rid; // resource ID + uint sid; // sampler ID + + uint4 dst1u; // destination operand 1 (for instructions that need extra destination operands) + uint4 src3u; // source operand 3 + uint4 src4u; // source operand 4 + uint4 src5u; // source operand 5 + + uint4 src0u; // uint source operand 0 + uint4 src1u; // uint source operand 0 + uint4 src2u; // uint source operand 0 + uint4 dst0u; // uint destination operand + + uint markUavRef; // the next store to UAV is fake and is used only to identify the uav slot + uint numOutputsForIncCounter; // Used for output to IncrementCounter + float padding1[27]; // struct size: 256 bytes +}; + +// RW structured buffer for Nvidia shader extensions + +// Application needs to define NV_SHADER_EXTN_SLOT as a unused slot, which should be +// set using NvAPI_D3D11_SetNvShaderExtnSlot() call before creating the first shader that +// uses nvidia shader extensions. E.g before including this file in shader define it as: +// #define NV_SHADER_EXTN_SLOT u7 + +// For SM5.1, application needs to define NV_SHADER_EXTN_REGISTER_SPACE as register space +// E.g. before including this file in shader define it as: +// #define NV_SHADER_EXTN_REGISTER_SPACE space2 + +// Note that other operations to this UAV will be ignored so application +// should bind a null resource + +#ifdef NV_SHADER_EXTN_REGISTER_SPACE +RWStructuredBuffer g_NvidiaExt : register( NV_SHADER_EXTN_SLOT, NV_SHADER_EXTN_REGISTER_SPACE ); +#else +RWStructuredBuffer g_NvidiaExt : register( NV_SHADER_EXTN_SLOT ); +#endif + +//----------------------------------------------------------------------------// +// the exposed SHFL instructions accept a mask parameter in src2 +// To compute lane mask from width of segment: +// minLaneID : currentLaneId & src2[12:8] +// maxLaneID : minLaneId | (src2[4:0] & ~src2[12:8]) +// where [minLaneId, maxLaneId] defines the segment where currentLaneId belongs +// we always set src2[4:0] to 11111 (0x1F), and set src2[12:8] as (32 - width) +int __NvGetShflMaskFromWidth(uint width) +{ + return ((NV_WARP_SIZE - width) << 8) | 0x1F; +} + +//----------------------------------------------------------------------------// + +void __NvReferenceUAVForOp(RWByteAddressBuffer uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav.Store(index, 0); +} + +void __NvReferenceUAVForOp(RWTexture1D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[index] = float2(0,0); +} + +void __NvReferenceUAVForOp(RWTexture2D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[uint2(index,index)] = float2(0,0); +} + +void __NvReferenceUAVForOp(RWTexture3D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[uint3(index,index,index)] = float2(0,0); +} + +void __NvReferenceUAVForOp(RWTexture1D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[index] = float4(0,0,0,0); +} + +void __NvReferenceUAVForOp(RWTexture2D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[uint2(index,index)] = float4(0,0,0,0); +} + +void __NvReferenceUAVForOp(RWTexture3D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[uint3(index,index,index)] = float4(0,0,0,0); +} + +void __NvReferenceUAVForOp(RWTexture1D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[index] = 0.0f; +} + +void __NvReferenceUAVForOp(RWTexture2D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[uint2(index,index)] = 0.0f; +} + +void __NvReferenceUAVForOp(RWTexture3D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[uint3(index,index,index)] = 0.0f; +} + + +void __NvReferenceUAVForOp(RWTexture1D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[index] = uint2(0,0); +} + +void __NvReferenceUAVForOp(RWTexture2D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[uint2(index,index)] = uint2(0,0); +} + +void __NvReferenceUAVForOp(RWTexture3D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[uint3(index,index,index)] = uint2(0,0); +} + +void __NvReferenceUAVForOp(RWTexture1D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[index] = uint4(0,0,0,0); +} + +void __NvReferenceUAVForOp(RWTexture2D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[uint2(index,index)] = uint4(0,0,0,0); +} + +void __NvReferenceUAVForOp(RWTexture3D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[uint3(index,index,index)] = uint4(0,0,0,0); +} + +void __NvReferenceUAVForOp(RWTexture1D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[index] = 0; +} + +void __NvReferenceUAVForOp(RWTexture2D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[uint2(index,index)] = 0; +} + +void __NvReferenceUAVForOp(RWTexture3D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[uint3(index,index,index)] = 0; +} + +void __NvReferenceUAVForOp(RWTexture1D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[index] = int2(0,0); +} + +void __NvReferenceUAVForOp(RWTexture2D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[uint2(index,index)] = int2(0,0); +} + +void __NvReferenceUAVForOp(RWTexture3D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[uint3(index,index,index)] = int2(0,0); +} + +void __NvReferenceUAVForOp(RWTexture1D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[index] = int4(0,0,0,0); +} + +void __NvReferenceUAVForOp(RWTexture2D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[uint2(index,index)] = int4(0,0,0,0); +} + +void __NvReferenceUAVForOp(RWTexture3D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[uint3(index,index,index)] = int4(0,0,0,0); +} + +void __NvReferenceUAVForOp(RWTexture1D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[index] = 0; +} + +void __NvReferenceUAVForOp(RWTexture2D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[uint2(index,index)] = 0; +} + +void __NvReferenceUAVForOp(RWTexture3D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[uint3(index,index,index)] = 0; +} + +//----------------------------------------------------------------------------// +// ATOMIC op sub-opcodes +#define NV_EXTN_ATOM_AND 0 +#define NV_EXTN_ATOM_OR 1 +#define NV_EXTN_ATOM_XOR 2 + +#define NV_EXTN_ATOM_ADD 3 +#define NV_EXTN_ATOM_MAX 6 +#define NV_EXTN_ATOM_MIN 7 + +#define NV_EXTN_ATOM_SWAP 8 +#define NV_EXTN_ATOM_CAS 9 + +//----------------------------------------------------------------------------// + +// performs Atomic operation on two consecutive fp16 values in the given UAV +// the uint paramater 'fp16x2Val' is treated as two fp16 values +// the passed sub-opcode 'op' should be an immediate constant +// byteAddress must be multiple of 4 +// the returned value are the two fp16 values packed into a single uint +uint __NvAtomicOpFP16x2(RWByteAddressBuffer uav, uint byteAddress, uint fp16x2Val, uint atomicOpType) +{ + __NvReferenceUAVForOp(uav); + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = byteAddress; + g_NvidiaExt[index].src1u.x = fp16x2Val; + g_NvidiaExt[index].src2u.x = atomicOpType; + g_NvidiaExt[index].opcode = NV_EXTN_OP_FP16_ATOMIC; + + return g_NvidiaExt[index].dst0u.x; +} + +//----------------------------------------------------------------------------// + +// performs Atomic operation on a R16G16_FLOAT UAV at the given address +// the uint paramater 'fp16x2Val' is treated as two fp16 values +// the passed sub-opcode 'op' should be an immediate constant +// the returned value are the two fp16 values (.x and .y components) packed into a single uint +// Warning: Behaviour of these set of functions is undefined if the UAV is not +// of R16G16_FLOAT format (might result in app crash or TDR) + +uint __NvAtomicOpFP16x2(RWTexture1D uav, uint address, uint fp16x2Val, uint atomicOpType) +{ + __NvReferenceUAVForOp(uav); + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = address; + g_NvidiaExt[index].src1u.x = fp16x2Val; + g_NvidiaExt[index].src2u.x = atomicOpType; + g_NvidiaExt[index].opcode = NV_EXTN_OP_FP16_ATOMIC; + + return g_NvidiaExt[index].dst0u.x; +} + +uint __NvAtomicOpFP16x2(RWTexture2D uav, uint2 address, uint fp16x2Val, uint atomicOpType) +{ + __NvReferenceUAVForOp(uav); + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.xy = address; + g_NvidiaExt[index].src1u.x = fp16x2Val; + g_NvidiaExt[index].src2u.x = atomicOpType; + g_NvidiaExt[index].opcode = NV_EXTN_OP_FP16_ATOMIC; + + return g_NvidiaExt[index].dst0u.x; +} + +uint __NvAtomicOpFP16x2(RWTexture3D uav, uint3 address, uint fp16x2Val, uint atomicOpType) +{ + __NvReferenceUAVForOp(uav); + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.xyz = address; + g_NvidiaExt[index].src1u.x = fp16x2Val; + g_NvidiaExt[index].src2u.x = atomicOpType; + g_NvidiaExt[index].opcode = NV_EXTN_OP_FP16_ATOMIC; + + return g_NvidiaExt[index].dst0u.x; +} + +//----------------------------------------------------------------------------// + +// performs Atomic operation on a R16G16B16A16_FLOAT UAV at the given address +// the uint2 paramater 'fp16x2Val' is treated as four fp16 values +// i.e, fp16x2Val.x = uav.xy and fp16x2Val.y = uav.yz +// the passed sub-opcode 'op' should be an immediate constant +// the returned value are the four fp16 values (.xyzw components) packed into uint2 +// Warning: Behaviour of these set of functions is undefined if the UAV is not +// of R16G16B16A16_FLOAT format (might result in app crash or TDR) + +uint2 __NvAtomicOpFP16x2(RWTexture1D uav, uint address, uint2 fp16x2Val, uint atomicOpType) +{ + __NvReferenceUAVForOp(uav); + + // break it down into two fp16x2 atomic ops + uint2 retVal; + + // first op has x-coordinate = x * 2 + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = address * 2; + g_NvidiaExt[index].src1u.x = fp16x2Val.x; + g_NvidiaExt[index].src2u.x = atomicOpType; + g_NvidiaExt[index].opcode = NV_EXTN_OP_FP16_ATOMIC; + retVal.x = g_NvidiaExt[index].dst0u.x; + + // second op has x-coordinate = x * 2 + 1 + index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = address * 2 + 1; + g_NvidiaExt[index].src1u.x = fp16x2Val.y; + g_NvidiaExt[index].src2u.x = atomicOpType; + g_NvidiaExt[index].opcode = NV_EXTN_OP_FP16_ATOMIC; + retVal.y = g_NvidiaExt[index].dst0u.x; + + return retVal; +} + +uint2 __NvAtomicOpFP16x2(RWTexture2D uav, uint2 address, uint2 fp16x2Val, uint atomicOpType) +{ + __NvReferenceUAVForOp(uav); + + // break it down into two fp16x2 atomic ops + uint2 retVal; + + // first op has x-coordinate = x * 2 + uint2 addressTemp = uint2(address.x * 2, address.y); + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.xy = addressTemp; + g_NvidiaExt[index].src1u.x = fp16x2Val.x; + g_NvidiaExt[index].src2u.x = atomicOpType; + g_NvidiaExt[index].opcode = NV_EXTN_OP_FP16_ATOMIC; + retVal.x = g_NvidiaExt[index].dst0u.x; + + // second op has x-coordinate = x * 2 + 1 + addressTemp.x++; + index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.xy = addressTemp; + g_NvidiaExt[index].src1u.x = fp16x2Val.y; + g_NvidiaExt[index].src2u.x = atomicOpType; + g_NvidiaExt[index].opcode = NV_EXTN_OP_FP16_ATOMIC; + retVal.y = g_NvidiaExt[index].dst0u.x; + + return retVal; +} + +uint2 __NvAtomicOpFP16x2(RWTexture3D uav, uint3 address, uint2 fp16x2Val, uint atomicOpType) +{ + __NvReferenceUAVForOp(uav); + + // break it down into two fp16x2 atomic ops + uint2 retVal; + + // first op has x-coordinate = x * 2 + uint3 addressTemp = uint3(address.x * 2, address.y, address.z); + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.xyz = addressTemp; + g_NvidiaExt[index].src1u.x = fp16x2Val.x; + g_NvidiaExt[index].src2u.x = atomicOpType; + g_NvidiaExt[index].opcode = NV_EXTN_OP_FP16_ATOMIC; + retVal.x = g_NvidiaExt[index].dst0u.x; + + // second op has x-coordinate = x * 2 + 1 + addressTemp.x++; + index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.xyz = addressTemp; + g_NvidiaExt[index].src1u.x = fp16x2Val.y; + g_NvidiaExt[index].src2u.x = atomicOpType; + g_NvidiaExt[index].opcode = NV_EXTN_OP_FP16_ATOMIC; + retVal.y = g_NvidiaExt[index].dst0u.x; + + return retVal; +} + +uint __fp32x2Tofp16x2(float2 val) +{ + return (f32tof16(val.y)<<16) | f32tof16(val.x) ; +} + +uint2 __fp32x4Tofp16x4(float4 val) +{ + return uint2( (f32tof16(val.y)<<16) | f32tof16(val.x), (f32tof16(val.w)<<16) | f32tof16(val.z) ) ; +} + +//----------------------------------------------------------------------------// + +// FP32 Atomic functions +// performs Atomic operation treating the uav as float (fp32) values +// the passed sub-opcode 'op' should be an immediate constant +// byteAddress must be multiple of 4 +float __NvAtomicAddFP32(RWByteAddressBuffer uav, uint byteAddress, float val) +{ + __NvReferenceUAVForOp(uav); + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = byteAddress; + g_NvidiaExt[index].src1u.x = asuint(val); // passing as uint to make it more convinient for the driver to translate + g_NvidiaExt[index].src2u.x = NV_EXTN_ATOM_ADD; + g_NvidiaExt[index].opcode = NV_EXTN_OP_FP32_ATOMIC; + + return asfloat(g_NvidiaExt[index].dst0u.x); +} + +float __NvAtomicAddFP32(RWTexture1D uav, uint address, float val) +{ + __NvReferenceUAVForOp(uav); + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = address; + g_NvidiaExt[index].src1u.x = asuint(val); + g_NvidiaExt[index].src2u.x = NV_EXTN_ATOM_ADD; + g_NvidiaExt[index].opcode = NV_EXTN_OP_FP32_ATOMIC; + + return asfloat(g_NvidiaExt[index].dst0u.x); +} + +float __NvAtomicAddFP32(RWTexture2D uav, uint2 address, float val) +{ + __NvReferenceUAVForOp(uav); + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.xy = address; + g_NvidiaExt[index].src1u.x = asuint(val); + g_NvidiaExt[index].src2u.x = NV_EXTN_ATOM_ADD; + g_NvidiaExt[index].opcode = NV_EXTN_OP_FP32_ATOMIC; + + return asfloat(g_NvidiaExt[index].dst0u.x); +} + +float __NvAtomicAddFP32(RWTexture3D uav, uint3 address, float val) +{ + __NvReferenceUAVForOp(uav); + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.xyz = address; + g_NvidiaExt[index].src1u.x = asuint(val); + g_NvidiaExt[index].src2u.x = NV_EXTN_ATOM_ADD; + g_NvidiaExt[index].opcode = NV_EXTN_OP_FP32_ATOMIC; + + return asfloat(g_NvidiaExt[index].dst0u.x); +} + +//----------------------------------------------------------------------------// + +// UINT64 Atmoic Functions +// The functions below performs atomic operation on the given UAV treating the value as uint64 +// byteAddress must be multiple of 8 +// The returned value is the value present in memory location before the atomic operation +// uint2 vector type is used to represent a single uint64 value with the x component containing the low 32 bits and y component the high 32 bits. + +uint2 __NvAtomicCompareExchangeUINT64(RWByteAddressBuffer uav, uint byteAddress, uint2 compareValue, uint2 value) +{ + __NvReferenceUAVForOp(uav); + + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = byteAddress; + g_NvidiaExt[index].src1u.xy = compareValue; + g_NvidiaExt[index].src1u.zw = value; + g_NvidiaExt[index].src2u.x = NV_EXTN_ATOM_CAS; + g_NvidiaExt[index].opcode = NV_EXTN_OP_UINT64_ATOMIC; + + return g_NvidiaExt[index].dst0u.xy; +} + +uint2 __NvAtomicOpUINT64(RWByteAddressBuffer uav, uint byteAddress, uint2 value, uint atomicOpType) +{ + __NvReferenceUAVForOp(uav); + + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = byteAddress; + g_NvidiaExt[index].src1u.xy = value; + g_NvidiaExt[index].src2u.x = atomicOpType; + g_NvidiaExt[index].opcode = NV_EXTN_OP_UINT64_ATOMIC; + + return g_NvidiaExt[index].dst0u.xy; +} + +uint2 __NvAtomicCompareExchangeUINT64(RWTexture1D uav, uint address, uint2 compareValue, uint2 value) +{ + __NvReferenceUAVForOp(uav); + + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = address; + g_NvidiaExt[index].src1u.xy = compareValue; + g_NvidiaExt[index].src1u.zw = value; + g_NvidiaExt[index].src2u.x = NV_EXTN_ATOM_CAS; + g_NvidiaExt[index].opcode = NV_EXTN_OP_UINT64_ATOMIC; + + return g_NvidiaExt[index].dst0u.xy; +} + +uint2 __NvAtomicOpUINT64(RWTexture1D uav, uint address, uint2 value, uint atomicOpType) +{ + __NvReferenceUAVForOp(uav); + + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = address; + g_NvidiaExt[index].src1u.xy = value; + g_NvidiaExt[index].src2u.x = atomicOpType; + g_NvidiaExt[index].opcode = NV_EXTN_OP_UINT64_ATOMIC; + + return g_NvidiaExt[index].dst0u.xy; +} + +uint2 __NvAtomicCompareExchangeUINT64(RWTexture2D uav, uint2 address, uint2 compareValue, uint2 value) +{ + __NvReferenceUAVForOp(uav); + + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.xy = address; + g_NvidiaExt[index].src1u.xy = compareValue; + g_NvidiaExt[index].src1u.zw = value; + g_NvidiaExt[index].src2u.x = NV_EXTN_ATOM_CAS; + g_NvidiaExt[index].opcode = NV_EXTN_OP_UINT64_ATOMIC; + + return g_NvidiaExt[index].dst0u.xy; +} + +uint2 __NvAtomicOpUINT64(RWTexture2D uav, uint2 address, uint2 value, uint atomicOpType) +{ + __NvReferenceUAVForOp(uav); + + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.xy = address; + g_NvidiaExt[index].src1u.xy = value; + g_NvidiaExt[index].src2u.x = atomicOpType; + g_NvidiaExt[index].opcode = NV_EXTN_OP_UINT64_ATOMIC; + + return g_NvidiaExt[index].dst0u.xy; +} + +uint2 __NvAtomicCompareExchangeUINT64(RWTexture3D uav, uint3 address, uint2 compareValue, uint2 value) +{ + __NvReferenceUAVForOp(uav); + + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.xyz = address; + g_NvidiaExt[index].src1u.xy = compareValue; + g_NvidiaExt[index].src1u.zw = value; + g_NvidiaExt[index].src2u.x = NV_EXTN_ATOM_CAS; + g_NvidiaExt[index].opcode = NV_EXTN_OP_UINT64_ATOMIC; + + return g_NvidiaExt[index].dst0u.xy; +} + +uint2 __NvAtomicOpUINT64(RWTexture3D uav, uint3 address, uint2 value, uint atomicOpType) +{ + __NvReferenceUAVForOp(uav); + + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.xyz = address; + g_NvidiaExt[index].src1u.xy = value; + g_NvidiaExt[index].src2u.x = atomicOpType; + g_NvidiaExt[index].opcode = NV_EXTN_OP_UINT64_ATOMIC; + + return g_NvidiaExt[index].dst0u.xy; +} + + +uint4 __NvFootprint(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint footprintmode, uint gran, int3 offset = int3(0, 0, 0)) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = texIndex; + g_NvidiaExt[index].src0u.y = smpIndex; + g_NvidiaExt[index].src1u.xyz = asuint(location); + g_NvidiaExt[index].src1u.w = gran; + g_NvidiaExt[index].src3u.x = texSpace; + g_NvidiaExt[index].src3u.y = smpSpace; + g_NvidiaExt[index].src3u.z = texType; + g_NvidiaExt[index].src3u.w = footprintmode; + g_NvidiaExt[index].src4u.xyz = asuint(offset); + + g_NvidiaExt[index].opcode = NV_EXTN_OP_FOOTPRINT; + g_NvidiaExt[index].numOutputsForIncCounter = 4; + + // result is returned as the return value of IncrementCounter on fake UAV slot + uint4 op; + op.x = g_NvidiaExt.IncrementCounter(); + op.y = g_NvidiaExt.IncrementCounter(); + op.z = g_NvidiaExt.IncrementCounter(); + op.w = g_NvidiaExt.IncrementCounter(); + return op; +} + +uint4 __NvFootprintBias(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint footprintmode, uint gran, float bias, int3 offset = int3(0, 0, 0)) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = texIndex; + g_NvidiaExt[index].src0u.y = smpIndex; + g_NvidiaExt[index].src1u.xyz = asuint(location); + g_NvidiaExt[index].src1u.w = gran; + g_NvidiaExt[index].src2u.x = asuint(bias); + g_NvidiaExt[index].src3u.x = texSpace; + g_NvidiaExt[index].src3u.y = smpSpace; + g_NvidiaExt[index].src3u.z = texType; + g_NvidiaExt[index].src3u.w = footprintmode; + g_NvidiaExt[index].src4u.xyz = asuint(offset); + + g_NvidiaExt[index].opcode = NV_EXTN_OP_FOOTPRINT_BIAS; + g_NvidiaExt[index].numOutputsForIncCounter = 4; + + // result is returned as the return value of IncrementCounter on fake UAV slot + uint4 op; + op.x = g_NvidiaExt.IncrementCounter(); + op.y = g_NvidiaExt.IncrementCounter(); + op.z = g_NvidiaExt.IncrementCounter(); + op.w = g_NvidiaExt.IncrementCounter(); + return op; +} + +uint4 __NvFootprintLevel(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint footprintmode, uint gran, float lodLevel, int3 offset = int3(0, 0, 0)) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = texIndex; + g_NvidiaExt[index].src0u.y = smpIndex; + g_NvidiaExt[index].src1u.xyz = asuint(location); + g_NvidiaExt[index].src1u.w = gran; + g_NvidiaExt[index].src2u.x = asuint(lodLevel); + g_NvidiaExt[index].src3u.x = texSpace; + g_NvidiaExt[index].src3u.y = smpSpace; + g_NvidiaExt[index].src3u.z = texType; + g_NvidiaExt[index].src3u.w = footprintmode; + g_NvidiaExt[index].src4u.xyz = asuint(offset); + + g_NvidiaExt[index].opcode = NV_EXTN_OP_FOOTPRINT_LEVEL; + g_NvidiaExt[index].numOutputsForIncCounter = 4; + + // result is returned as the return value of IncrementCounter on fake UAV slot + uint4 op; + op.x = g_NvidiaExt.IncrementCounter(); + op.y = g_NvidiaExt.IncrementCounter(); + op.z = g_NvidiaExt.IncrementCounter(); + op.w = g_NvidiaExt.IncrementCounter(); + return op; +} + +uint4 __NvFootprintGrad(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint footprintmode, uint gran, float3 ddx, float3 ddy, int3 offset = int3(0, 0, 0)) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = texIndex; + g_NvidiaExt[index].src0u.y = smpIndex; + g_NvidiaExt[index].src1u.xyz = asuint(location); + g_NvidiaExt[index].src1u.w = gran; + g_NvidiaExt[index].src2u.xyz = asuint(ddx); + g_NvidiaExt[index].src5u.xyz = asuint(ddy); + g_NvidiaExt[index].src3u.x = texSpace; + g_NvidiaExt[index].src3u.y = smpSpace; + g_NvidiaExt[index].src3u.z = texType; + g_NvidiaExt[index].src3u.w = footprintmode; + g_NvidiaExt[index].src4u.xyz = asuint(offset); + g_NvidiaExt[index].opcode = NV_EXTN_OP_FOOTPRINT_GRAD; + g_NvidiaExt[index].numOutputsForIncCounter = 4; + + // result is returned as the return value of IncrementCounter on fake UAV slot + uint4 op; + op.x = g_NvidiaExt.IncrementCounter(); + op.y = g_NvidiaExt.IncrementCounter(); + op.z = g_NvidiaExt.IncrementCounter(); + op.w = g_NvidiaExt.IncrementCounter(); + return op; +} + +// returns value of special register - specify subopcode from any of NV_SPECIALOP_* specified in nvShaderExtnEnums.h - other opcodes undefined behavior +uint __NvGetSpecial(uint subOpCode) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_GET_SPECIAL; + g_NvidiaExt[index].src0u.x = subOpCode; + return g_NvidiaExt.IncrementCounter(); +} + +// predicate is returned in laneValid indicating if srcLane is in range and val from specified lane is returned. +int __NvShflGeneric(int val, uint srcLane, uint maskClampVal, out uint laneValid) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = val; // variable to be shuffled + g_NvidiaExt[index].src0u.y = srcLane; // source lane + g_NvidiaExt[index].src0u.z = maskClampVal; + g_NvidiaExt[index].opcode = NV_EXTN_OP_SHFL_GENERIC; + g_NvidiaExt[index].numOutputsForIncCounter = 2; + + laneValid = asuint(g_NvidiaExt.IncrementCounter()); + return g_NvidiaExt.IncrementCounter(); +} + +//----------------------------------------------------------------------------// + +// DXR RayQuery functions + +#if __SHADER_TARGET_MAJOR > 6 || (__SHADER_TARGET_MAJOR == 6 && __SHADER_TARGET_MINOR >= 5) + +uint __NvRtGetCandidateClusterID(uint rqFlags) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_GET_CANDIDATE_CLUSTER_ID; + g_NvidiaExt[index].src0u.x = rqFlags; + return g_NvidiaExt.IncrementCounter(); +} + +uint __NvRtGetCommittedClusterID(uint rqFlags) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_GET_COMMITTED_CLUSTER_ID; + g_NvidiaExt[index].src0u.x = rqFlags; + return g_NvidiaExt.IncrementCounter(); +} + +float3x3 __NvRtCandidateTriangleObjectPositions(uint rqFlags) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_CANDIDATE_TRIANGLE_OBJECT_POSITIONS; + g_NvidiaExt[index].src0u.x = rqFlags; + + float3x3 ret; + ret[0][0] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[0][1] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[0][2] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][0] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][1] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][2] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[2][0] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[2][1] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[2][2] = asfloat(g_NvidiaExt.IncrementCounter()); + return ret; +} + +float3x3 __NvRtCommittedTriangleObjectPositions(uint rqFlags) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_COMMITTED_TRIANGLE_OBJECT_POSITIONS; + g_NvidiaExt[index].src0u.x = rqFlags; + + float3x3 ret; + ret[0][0] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[0][1] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[0][2] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][0] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][1] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][2] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[2][0] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[2][1] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[2][2] = asfloat(g_NvidiaExt.IncrementCounter()); + return ret; +} + +bool __NvRtCandidateIsNonOpaqueSphere(uint rqFlags) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_CANDIDATE_IS_NONOPAQUE_SPHERE; + g_NvidiaExt[index].src0u.x = rqFlags; + uint ret = g_NvidiaExt.IncrementCounter(); + return ret != 0; +} + +bool __NvRtCandidateIsNonOpaqueLss(uint rqFlags) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_CANDIDATE_IS_NONOPAQUE_LSS; + g_NvidiaExt[index].src0u.x = rqFlags; + uint ret = g_NvidiaExt.IncrementCounter(); + return ret != 0; +} + +float __NvRtCandidateLssHitParameter(uint rqFlags) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_CANDIDATE_LSS_HIT_PARAMETER; + g_NvidiaExt[index].src0u.x = rqFlags; + float ret = asfloat(g_NvidiaExt.IncrementCounter()); + return ret; +} + +float4 __NvRtCandidateSphereObjectPositionAndRadius(uint rqFlags) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_CANDIDATE_SPHERE_OBJECT_POSITION_AND_RADIUS; + g_NvidiaExt[index].src0u.x = rqFlags; + + float4 ret; + ret[0] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[2] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[3] = asfloat(g_NvidiaExt.IncrementCounter()); + return ret; +} + +float2x4 __NvRtCandidateLssObjectPositionsAndRadii(uint rqFlags) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_CANDIDATE_LSS_OBJECT_POSITIONS_AND_RADII; + g_NvidiaExt[index].src0u.x = rqFlags; + + float2x4 ret; + ret[0][0] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[0][1] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[0][2] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[0][3] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][0] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][1] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][2] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][3] = asfloat(g_NvidiaExt.IncrementCounter()); + return ret; +} + +float __NvRtCandidateBuiltinPrimitiveRayT(uint rqFlags) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_CANDIDATE_BUILTIN_PRIMITIVE_RAY_T; + g_NvidiaExt[index].src0u.x = rqFlags; + float ret = asfloat(g_NvidiaExt.IncrementCounter()); + return ret; +} + +bool __NvRtCommittedIsSphere(uint rqFlags) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_COMMITTED_IS_SPHERE; + g_NvidiaExt[index].src0u.x = rqFlags; + uint ret = g_NvidiaExt.IncrementCounter(); + return ret != 0; +} + +bool __NvRtCommittedIsLss(uint rqFlags) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_COMMITTED_IS_LSS; + g_NvidiaExt[index].src0u.x = rqFlags; + uint ret = g_NvidiaExt.IncrementCounter(); + return ret != 0; +} + +float __NvRtCommittedLssHitParameter(uint rqFlags) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_COMMITTED_LSS_HIT_PARAMETER; + g_NvidiaExt[index].src0u.x = rqFlags; + float ret = asfloat(g_NvidiaExt.IncrementCounter()); + return ret; +} + +float4 __NvRtCommittedSphereObjectPositionAndRadius(uint rqFlags) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_COMMITTED_SPHERE_OBJECT_POSITION_AND_RADIUS; + g_NvidiaExt[index].src0u.x = rqFlags; + + float4 ret; + ret[0] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[2] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[3] = asfloat(g_NvidiaExt.IncrementCounter()); + return ret; +} + +float2x4 __NvRtCommittedLssObjectPositionsAndRadii(uint rqFlags) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_COMMITTED_LSS_OBJECT_POSITIONS_AND_RADII; + g_NvidiaExt[index].src0u.x = rqFlags; + + float2x4 ret; + ret[0][0] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[0][1] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[0][2] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[0][3] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][0] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][1] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][2] = asfloat(g_NvidiaExt.IncrementCounter()); + ret[1][3] = asfloat(g_NvidiaExt.IncrementCounter()); + return ret; +} + +void __NvRtCommitNonOpaqueBuiltinPrimitiveHit(uint rqFlags) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_COMMIT_NONOPAQUE_BUILTIN_PRIMITIVE_HIT; + g_NvidiaExt[index].src0u.x = rqFlags; + uint handle = g_NvidiaExt.IncrementCounter(); +} + +#endif diff --git a/Source/ThirdParty/nvapi/nvapi.Build.cs b/Source/ThirdParty/nvapi/nvapi.Build.cs new file mode 100644 index 000000000..3e7afe16a --- /dev/null +++ b/Source/ThirdParty/nvapi/nvapi.Build.cs @@ -0,0 +1,50 @@ +// Copyright (c) Wojciech Figat. All rights reserved. + +using System.IO; +using Flax.Build; +using Flax.Build.NativeCpp; + +/// +/// https://github.com/NVIDIA/nvapi +/// +public class nvapi : DepsModule +{ + public static bool Use(BuildOptions options) + { + return options.Platform.Target == TargetPlatform.Windows && options.Architecture == TargetArchitecture.x64; + } + + /// + public override void Init() + { + base.Init(); + + LicenseType = LicenseTypes.MIT; + LicenseFilePath = "License.txt"; + + // Merge third-party modules into engine binary + BinaryModuleName = "FlaxEngine"; + } + + /// + public override void Setup(BuildOptions options) + { + base.Setup(options); + + var depsRoot = options.DepsFolder; + options.PublicDefinitions.Add("COMPILE_WITH_NVAPI"); + switch (options.Platform.Target) + { + case TargetPlatform.Windows: + switch (options.Architecture) + { + case TargetArchitecture.x64: + options.OutputFiles.Add(Path.Combine(depsRoot, "nvapi64.lib")); + break; + default: throw new InvalidArchitectureException(options.Architecture); + } + break; + default: throw new InvalidPlatformException(options.Platform.Target); + } + } +} diff --git a/Source/ThirdParty/nvapi/nvapi.h b/Source/ThirdParty/nvapi/nvapi.h new file mode 100644 index 000000000..e35623a1a --- /dev/null +++ b/Source/ThirdParty/nvapi/nvapi.h @@ -0,0 +1,25533 @@ +/*********************************************************************************************************\ +|* *| +|* SPDX-FileCopyrightText: Copyright (c) 2019-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. *| +|* SPDX-License-Identifier: MIT *| +|* *| +|* Permission is hereby granted, free of charge, to any person obtaining a *| +|* copy of this software and associated documentation files (the "Software"), *| +|* to deal in the Software without restriction, including without limitation *| +|* the rights to use, copy, modify, merge, publish, distribute, sublicense, *| +|* and/or sell copies of the Software, and to permit persons to whom the *| +|* Software is furnished to do so, subject to the following conditions: *| +|* *| +|* The above copyright notice and this permission notice shall be included in *| +|* all copies or substantial portions of the Software. *| +|* *| +|* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *| +|* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *| +|* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *| +|* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *| +|* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING *| +|* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER *| +|* DEALINGS IN THE SOFTWARE. *| +|* *| +|* *| +\*********************************************************************************************************/ +///////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// Date: Jul 28, 2025 +// File: nvapi.h +// +// NvAPI provides an interface to NVIDIA devices. This file contains the +// interface constants, structure definitions and function prototypes. +// +// Target Profile: Open-Source +// Target Platform: windows +// +/////////////////////////////////////////////////////////////////////////////// +#include"nvapi_lite_salstart.h" +#include"nvapi_lite_common.h" +#include"nvapi_lite_sli.h" +#include"nvapi_lite_surround.h" +#include"nvapi_lite_stereo.h" +#include"nvapi_lite_d3dext.h" +#ifndef _NVAPI_H +#define _NVAPI_H + +#pragma pack(push,8) // Make sure we have consistent structure packings + +#ifdef __cplusplus +extern "C" { +#endif +// ==================================================== +// Universal NvAPI Definitions +// ==================================================== +#ifndef _WIN32 +#define __cdecl +#endif + + + +//! @} + + +//! \ingroup nvapistatus +#define NVAPI_API_NOT_INTIALIZED NVAPI_API_NOT_INITIALIZED //!< Fix typo in error code + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Initialize +// +//! This function initializes the NvAPI library (if not already initialized) but always increments the ref-counter. +//! This must be called before calling other NvAPI_ functions. +//! Note: It is now mandatory to call NvAPI_Initialize before calling any other NvAPI. +//! NvAPI_Unload should be called to unload the NVAPI Library. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 80 +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +//! \retval NVAPI_LIBRARY_NOT_FOUND Failed to load the NVAPI support library +//! \sa nvapistatus +//! \ingroup nvapifunctions +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Initialize(); + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Unload +// +//! DESCRIPTION: Decrements the ref-counter and when it reaches ZERO, unloads NVAPI library. +//! This must be called in pairs with NvAPI_Initialize. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! If the client wants unload functionality, it is recommended to always call NvAPI_Initialize and NvAPI_Unload in pairs. +//! +//! Unloading NvAPI library is not supported when the library is in a resource locked state. +//! Some functions in the NvAPI library initiates an operation or allocates certain resources +//! and there are corresponding functions available, to complete the operation or free the +//! allocated resources. All such function pairs are designed to prevent unloading NvAPI library. +//! +//! For example, if NvAPI_Unload is called after NvAPI_XXX which locks a resource, it fails with +//! NVAPI_ERROR. Developers need to call the corresponding NvAPI_YYY to unlock the resources, +//! before calling NvAPI_Unload again. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +//! \retval NVAPI_API_IN_USE Atleast an API is still being called hence cannot unload requested driver. +//! +//! \ingroup nvapifunctions +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Unload(); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GetErrorMessage +// +//! This function converts an NvAPI error code into a null terminated string. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 80 +//! +//! \param nr The error code to convert +//! \param szDesc The string corresponding to the error code +//! +//! \return NULL terminated string (always, never NULL) +//! \ingroup nvapifunctions +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GetErrorMessage(NvAPI_Status nr,NvAPI_ShortString szDesc); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GetInterfaceVersionString +// +//! This function returns a string describing the version of the NvAPI library. +//! The contents of the string are human readable. Do not assume a fixed +//! format. +//! +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 80 +//! +//! \param szDesc User readable string giving NvAPI version information +//! +//! \return See \ref nvapistatus for the list of possible return values. +//! \ingroup nvapifunctions +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GetInterfaceVersionString(NvAPI_ShortString szDesc); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GetInterfaceVersionStringEx +// +//! This function returns a string describing the version of the NvAPI library. +//! The contents of the string are human readable. Do not assume a fixed format. +//! +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 450 +//! +//! \param szDesc User readable string giving NvAPI version information +//! +//! \return See \ref nvapistatus for the list of possible return values. +//! \ingroup nvapifunctions +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GetInterfaceVersionStringEx(NvAPI_ShortString szDesc); + + +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// All display port related data types definition starts +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +// This category is intentionally added before the #ifdef. The #endif should also be in the same scope + +#ifndef DISPLAYPORT_STRUCTS_DEFINED +#define DISPLAYPORT_STRUCTS_DEFINED + +//! \ingroup dispcontrol +//! Used in NV_DISPLAY_PORT_INFO. +typedef enum _NV_DP_LINK_RATE +{ + NV_DP_1_62GBPS = 6, + NV_DP_2_70GBPS = 0xA, + NV_DP_5_40GBPS = 0x14, + NV_DP_8_10GBPS = 0x1E, +// Recommended Intermidiate Link Transfer Rates + NV_EDP_2_16GBPS = 8, + NV_EDP_2_43GBPS = 9, + NV_EDP_3_24GBPS = 0xC, + NV_EDP_4_32GBPS = 0x10 +} NV_DP_LINK_RATE; + +typedef enum _NV_DP2X_LINK_RATE +{ + NV_DP2X_1_62Gbps = 0x00A2, // 162 + NV_DP2X_2_16Gbps = 0x00D8, // 216 + NV_DP2X_2_43Gbps = 0x00F3, // 243 + NV_DP2X_2_50Gbps = 0x00FA, // 250 + NV_DP2X_2_70Gbps = 0x010E, // 270 + NV_DP2X_3_24Gbps = 0x0144, // 324 + NV_DP2X_4_32Gbps = 0x01B0, // 432 + NV_DP2X_5_40Gbps = 0x021C, // 540 + NV_DP2X_6_75Gbps = 0x02A3, // 675 + NV_DP2X_8_10Gbps = 0x032A, // 810 + NV_DP2X_10_0Gbps = 0x03E8, // 1000 + NV_DP2X_13_5Gbps = 0x0546, // 1350 + NV_DP2X_20_0Gbps = 0x07D0, // 2000 +} NV_DP2X_LINK_RATE; + +//! \ingroup dispcontrol +//! Used in NV_DISPLAY_PORT_INFO. +typedef enum _NV_DP_LANE_COUNT +{ + NV_DP_1_LANE = 1, + NV_DP_2_LANE = 2, + NV_DP_4_LANE = 4, +} NV_DP_LANE_COUNT; + + +//! \ingroup dispcontrol +//! Used in NV_DISPLAY_PORT_INFO. +typedef enum _NV_DP_COLOR_FORMAT +{ + NV_DP_COLOR_FORMAT_RGB = 0, + NV_DP_COLOR_FORMAT_YCbCr422, + NV_DP_COLOR_FORMAT_YCbCr444, +} NV_DP_COLOR_FORMAT; + + +//! \ingroup dispcontrol +//! Used in NV_DISPLAY_PORT_INFO. +typedef enum _NV_DP_COLORIMETRY +{ + NV_DP_COLORIMETRY_RGB = 0, + NV_DP_COLORIMETRY_YCbCr_ITU601, + NV_DP_COLORIMETRY_YCbCr_ITU709, +} NV_DP_COLORIMETRY; + + +//! \ingroup dispcontrol +//! Used in NV_DISPLAY_PORT_INFO. +typedef enum _NV_DP_DYNAMIC_RANGE +{ + NV_DP_DYNAMIC_RANGE_VESA = 0, + NV_DP_DYNAMIC_RANGE_CEA, +} NV_DP_DYNAMIC_RANGE; + + +//! \ingroup dispcontrol +//! Used in NV_DISPLAY_PORT_INFO. +typedef enum _NV_DP_BPC +{ + NV_DP_BPC_DEFAULT = 0, + NV_DP_BPC_6, + NV_DP_BPC_8, + NV_DP_BPC_10, + NV_DP_BPC_12, + NV_DP_BPC_16, +} NV_DP_BPC; + + +#endif //#ifndef DISPLAYPORT_STRUCTS_DEFINED + +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// All display port related data types definitions end +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetEDID +// +//! \fn NvAPI_GPU_GetEDID(NvPhysicalGpuHandle hPhysicalGpu, NvU32 displayOutputId, NV_EDID *pEDID) +//! This function returns the EDID data for the specified GPU handle and connection bit mask. +//! displayOutputId should have exactly 1 bit set to indicate a single display. See \ref handles. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 85 +//! +//! \retval NVAPI_INVALID_ARGUMENT pEDID is NULL; displayOutputId has 0 or > 1 bits set +//! \retval NVAPI_OK *pEDID contains valid data. +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found. +//! \retval NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE hPhysicalGpu was not a physical GPU handle. +//! \retval NVAPI_DATA_NOT_FOUND The requested display does not contain an EDID. +// +/////////////////////////////////////////////////////////////////////////////// + +//! \ingroup gpu +//! @{ + +#define NV_EDID_V1_DATA_SIZE 256 + +#define NV_EDID_DATA_SIZE NV_EDID_V1_DATA_SIZE + +typedef struct +{ + NvU32 version; //structure version + NvU8 EDID_Data[NV_EDID_DATA_SIZE]; +} NV_EDID_V1; + +//! Used in NvAPI_GPU_GetEDID() +typedef struct +{ + NvU32 version; //!< Structure version + NvU8 EDID_Data[NV_EDID_DATA_SIZE]; + NvU32 sizeofEDID; +} NV_EDID_V2; + +//! Used in NvAPI_GPU_GetEDID() +typedef struct +{ + NvU32 version; //!< Structure version + NvU8 EDID_Data[NV_EDID_DATA_SIZE]; + NvU32 sizeofEDID; + NvU32 edidId; //!< ID which always returned in a monotonically increasing counter. + //!< Across a split-EDID read we need to verify that all calls returned the same edidId. + //!< This counter is incremented if we get the updated EDID. + NvU32 offset; //!< Which 256-byte page of the EDID we want to read. Start at 0. + //!< If the read succeeds with edidSize > NV_EDID_DATA_SIZE, + //!< call back again with offset+256 until we have read the entire buffer +} NV_EDID_V3; + +typedef NV_EDID_V3 NV_EDID; + +#define NV_EDID_VER1 MAKE_NVAPI_VERSION(NV_EDID_V1,1) +#define NV_EDID_VER2 MAKE_NVAPI_VERSION(NV_EDID_V2,2) +#define NV_EDID_VER3 MAKE_NVAPI_VERSION(NV_EDID_V3,3) +#define NV_EDID_VER NV_EDID_VER3 + +//! @} + +//! \ingroup gpu +NVAPI_INTERFACE NvAPI_GPU_GetEDID(NvPhysicalGpuHandle hPhysicalGpu, NvU32 displayOutputId, NV_EDID *pEDID); + +//! \ingroup gpu + +//! Used in NvAPI_DISP_GetEdidData + +#define NV_EDID_DATA_SIZE_MAX 1024 //!< This is the current size supported by Nvidia Display Driver and may change in future. + +typedef enum +{ + NV_EDID_FLAG_DEFAULT = 0, //!< the EDID which is actively used by the driver, it could be _RAW/_COOKED/_FORCED/_INF. + NV_EDID_FLAG_RAW = 1, //!< the EDID which is not modified by the driver. If there's no _FORCED edid engaged, it + //!< will be the unmodified monitor EDID from the I2C bus. Otherwise it is original _FORCED edid. + NV_EDID_FLAG_COOKED = 2, //!< the EDID has been modified by the driver for compatibility + NV_EDID_FLAG_FORCED = 3, //!< the EDID is forced by the end-user over s/w interface, + NV_EDID_FLAG_INF = 4, //!< the EDID is from monitor INF + NV_EDID_FLAG_HW = 5, //!< the EDID is from the monitor over I2C bus without any modification. +} NV_EDID_FLAG; + +//! \ingroup gpu +//! Used in NV_GPU_CONNECTOR_DATA +typedef enum _NV_GPU_CONNECTOR_TYPE +{ + NVAPI_GPU_CONNECTOR_VGA_15_PIN = 0x00000000, + NVAPI_GPU_CONNECTOR_TV_COMPOSITE = 0x00000010, + NVAPI_GPU_CONNECTOR_TV_SVIDEO = 0x00000011, + NVAPI_GPU_CONNECTOR_TV_HDTV_COMPONENT = 0x00000013, + NVAPI_GPU_CONNECTOR_TV_SCART = 0x00000014, + NVAPI_GPU_CONNECTOR_TV_COMPOSITE_SCART_ON_EIAJ4120 = 0x00000016, + NVAPI_GPU_CONNECTOR_TV_HDTV_EIAJ4120 = 0x00000017, + NVAPI_GPU_CONNECTOR_PC_POD_HDTV_YPRPB = 0x00000018, + NVAPI_GPU_CONNECTOR_PC_POD_SVIDEO = 0x00000019, + NVAPI_GPU_CONNECTOR_PC_POD_COMPOSITE = 0x0000001A, + NVAPI_GPU_CONNECTOR_DVI_I_TV_SVIDEO = 0x00000020, + NVAPI_GPU_CONNECTOR_DVI_I_TV_COMPOSITE = 0x00000021, + NVAPI_GPU_CONNECTOR_DVI_I = 0x00000030, + NVAPI_GPU_CONNECTOR_DVI_D = 0x00000031, + NVAPI_GPU_CONNECTOR_ADC = 0x00000032, + NVAPI_GPU_CONNECTOR_LFH_DVI_I_1 = 0x00000038, + NVAPI_GPU_CONNECTOR_LFH_DVI_I_2 = 0x00000039, + NVAPI_GPU_CONNECTOR_SPWG = 0x00000040, + NVAPI_GPU_CONNECTOR_OEM = 0x00000041, + NVAPI_GPU_CONNECTOR_DISPLAYPORT_EXTERNAL = 0x00000046, + NVAPI_GPU_CONNECTOR_DISPLAYPORT_INTERNAL = 0x00000047, + NVAPI_GPU_CONNECTOR_DISPLAYPORT_MINI_EXT = 0x00000048, + NVAPI_GPU_CONNECTOR_HDMI_A = 0x00000061, + NVAPI_GPU_CONNECTOR_HDMI_C_MINI = 0x00000063, + NVAPI_GPU_CONNECTOR_LFH_DISPLAYPORT_1 = 0x00000064, + NVAPI_GPU_CONNECTOR_LFH_DISPLAYPORT_2 = 0x00000065, + NVAPI_GPU_CONNECTOR_VIRTUAL_WFD = 0x00000070, //!< Deprecated. + NVAPI_GPU_CONNECTOR_USB_C = 0x00000071, + NVAPI_GPU_CONNECTOR_UNKNOWN = 0xFFFFFFFF, +} NV_GPU_CONNECTOR_TYPE; + +//////////////////////////////////////////////////////////////////////////////// +// +// NvAPI_TVOutput Information +// +/////////////////////////////////////////////////////////////////////////////// + +//! \ingroup tvapi +//! Used in NV_DISPLAY_TV_OUTPUT_INFO +typedef enum _NV_DISPLAY_TV_FORMAT +{ + NV_DISPLAY_TV_FORMAT_NONE = 0, + NV_DISPLAY_TV_FORMAT_SD_NTSCM = 0x00000001, + NV_DISPLAY_TV_FORMAT_SD_NTSCJ = 0x00000002, + NV_DISPLAY_TV_FORMAT_SD_PALM = 0x00000004, + NV_DISPLAY_TV_FORMAT_SD_PALBDGH = 0x00000008, + NV_DISPLAY_TV_FORMAT_SD_PALN = 0x00000010, + NV_DISPLAY_TV_FORMAT_SD_PALNC = 0x00000020, + NV_DISPLAY_TV_FORMAT_SD_576i = 0x00000100, + NV_DISPLAY_TV_FORMAT_SD_480i = 0x00000200, + NV_DISPLAY_TV_FORMAT_ED_480p = 0x00000400, + NV_DISPLAY_TV_FORMAT_ED_576p = 0x00000800, + NV_DISPLAY_TV_FORMAT_HD_720p = 0x00001000, + NV_DISPLAY_TV_FORMAT_HD_1080i = 0x00002000, + NV_DISPLAY_TV_FORMAT_HD_1080p = 0x00004000, + NV_DISPLAY_TV_FORMAT_HD_720p50 = 0x00008000, + NV_DISPLAY_TV_FORMAT_HD_1080p24 = 0x00010000, + NV_DISPLAY_TV_FORMAT_HD_1080i50 = 0x00020000, + NV_DISPLAY_TV_FORMAT_HD_1080p50 = 0x00040000, + NV_DISPLAY_TV_FORMAT_UHD_4Kp30 = 0x00080000, + NV_DISPLAY_TV_FORMAT_UHD_4Kp30_3840 = NV_DISPLAY_TV_FORMAT_UHD_4Kp30, + NV_DISPLAY_TV_FORMAT_UHD_4Kp25 = 0x00100000, + NV_DISPLAY_TV_FORMAT_UHD_4Kp25_3840 = NV_DISPLAY_TV_FORMAT_UHD_4Kp25, + NV_DISPLAY_TV_FORMAT_UHD_4Kp24 = 0x00200000, + NV_DISPLAY_TV_FORMAT_UHD_4Kp24_3840 = NV_DISPLAY_TV_FORMAT_UHD_4Kp24, + NV_DISPLAY_TV_FORMAT_UHD_4Kp24_SMPTE = 0x00400000, + NV_DISPLAY_TV_FORMAT_UHD_4Kp50_3840 = 0x00800000, + NV_DISPLAY_TV_FORMAT_UHD_4Kp60_3840 = 0x00900000, + NV_DISPLAY_TV_FORMAT_UHD_4Kp30_4096 = 0x00A00000, + NV_DISPLAY_TV_FORMAT_UHD_4Kp25_4096 = 0x00B00000, + NV_DISPLAY_TV_FORMAT_UHD_4Kp24_4096 = 0x00C00000, + NV_DISPLAY_TV_FORMAT_UHD_4Kp50_4096 = 0x00D00000, + NV_DISPLAY_TV_FORMAT_UHD_4Kp60_4096 = 0x00E00000, + NV_DISPLAY_TV_FORMAT_UHD_8Kp24_7680 = 0x01000000, + NV_DISPLAY_TV_FORMAT_UHD_8Kp25_7680 = 0x02000000, + NV_DISPLAY_TV_FORMAT_UHD_8Kp30_7680 = 0x04000000, + NV_DISPLAY_TV_FORMAT_UHD_8Kp48_7680 = 0x08000000, + NV_DISPLAY_TV_FORMAT_UHD_8Kp50_7680 = 0x09000000, + NV_DISPLAY_TV_FORMAT_UHD_8Kp60_7680 = 0x0A000000, + NV_DISPLAY_TV_FORMAT_UHD_8Kp100_7680 = 0x0B000000, + NV_DISPLAY_TV_FORMAT_UHD_8Kp120_7680 = 0x0C000000, + NV_DISPLAY_TV_FORMAT_UHD_4Kp48_3840 = 0x0D000000, + NV_DISPLAY_TV_FORMAT_UHD_4Kp48_4096 = 0x0E000000, + NV_DISPLAY_TV_FORMAT_UHD_4Kp100_4096 = 0x0F000000, + NV_DISPLAY_TV_FORMAT_UHD_4Kp100_3840 = 0x10000000, + NV_DISPLAY_TV_FORMAT_UHD_4Kp120_4096 = 0x11000000, + NV_DISPLAY_TV_FORMAT_UHD_4Kp120_3840 = 0x12000000, + NV_DISPLAY_TV_FORMAT_UHD_4Kp100_5120 = 0x13000000, + NV_DISPLAY_TV_FORMAT_UHD_4Kp120_5120 = 0x14000000, + NV_DISPLAY_TV_FORMAT_UHD_4Kp24_5120 = 0x15000000, + NV_DISPLAY_TV_FORMAT_UHD_4Kp25_5120 = 0x16000000, + NV_DISPLAY_TV_FORMAT_UHD_4Kp30_5120 = 0x17000000, + NV_DISPLAY_TV_FORMAT_UHD_4Kp48_5120 = 0x18000000, + NV_DISPLAY_TV_FORMAT_UHD_4Kp50_5120 = 0x19000000, + NV_DISPLAY_TV_FORMAT_UHD_4Kp60_5120 = 0x20000000, + NV_DISPLAY_TV_FORMAT_UHD_10Kp24_10240 = 0x21000000, + NV_DISPLAY_TV_FORMAT_UHD_10Kp25_10240 = 0x22000000, + NV_DISPLAY_TV_FORMAT_UHD_10Kp30_10240 = 0x23000000, + NV_DISPLAY_TV_FORMAT_UHD_10Kp48_10240 = 0x24000000, + NV_DISPLAY_TV_FORMAT_UHD_10Kp50_10240 = 0x25000000, + NV_DISPLAY_TV_FORMAT_UHD_10Kp60_10240 = 0x26000000, + NV_DISPLAY_TV_FORMAT_UHD_10Kp100_10240 = 0x27000000, + NV_DISPLAY_TV_FORMAT_UHD_10Kp120_10240 = 0x28000000, + + + NV_DISPLAY_TV_FORMAT_SD_OTHER = 0x30000000, + NV_DISPLAY_TV_FORMAT_ED_OTHER = 0x40000000, + NV_DISPLAY_TV_FORMAT_HD_OTHER = 0x50000000, + + NV_DISPLAY_TV_FORMAT_ANY = 0x80000000, + +} NV_DISPLAY_TV_FORMAT; + + +//! \ingroup dispcontrol +//! @{ +#define NVAPI_MAX_VIEW_TARGET 2 +#define NVAPI_ADVANCED_MAX_VIEW_TARGET 4 + +#ifndef _NV_TARGET_VIEW_MODE_ +#define _NV_TARGET_VIEW_MODE_ + +//! Used in NvAPI_SetView(). +typedef enum _NV_TARGET_VIEW_MODE +{ + NV_VIEW_MODE_STANDARD = 0, + NV_VIEW_MODE_CLONE = 1, + NV_VIEW_MODE_HSPAN = 2, + NV_VIEW_MODE_VSPAN = 3, + NV_VIEW_MODE_DUALVIEW = 4, + NV_VIEW_MODE_MULTIVIEW = 5, +} NV_TARGET_VIEW_MODE; +#endif + +//! @} + +// Following definitions are used in NvAPI_SetViewEx. + +//! Scaling modes - used in NvAPI_SetViewEx(). +//! \ingroup dispcontrol +typedef enum _NV_SCALING +{ + NV_SCALING_DEFAULT = 0, //!< No change + + // New Scaling Declarations + NV_SCALING_GPU_SCALING_TO_CLOSEST = 1, //!< Balanced - Full Screen + NV_SCALING_GPU_SCALING_TO_NATIVE = 2, //!< Force GPU - Full Screen + NV_SCALING_GPU_SCANOUT_TO_NATIVE = 3, //!< Force GPU - Centered\No Scaling + NV_SCALING_GPU_SCALING_TO_ASPECT_SCANOUT_TO_NATIVE = 5, //!< Force GPU - Aspect Ratio + NV_SCALING_GPU_SCALING_TO_ASPECT_SCANOUT_TO_CLOSEST = 6, //!< Balanced - Aspect Ratio + NV_SCALING_GPU_SCANOUT_TO_CLOSEST = 7, //!< Balanced - Centered\No Scaling + NV_SCALING_GPU_INTEGER_ASPECT_SCALING = 8, //!< Force GPU - Integer Scaling + + // Legacy Declarations + NV_SCALING_MONITOR_SCALING = NV_SCALING_GPU_SCALING_TO_CLOSEST, + NV_SCALING_ADAPTER_SCALING = NV_SCALING_GPU_SCALING_TO_NATIVE, + NV_SCALING_CENTERED = NV_SCALING_GPU_SCANOUT_TO_NATIVE, + NV_SCALING_ASPECT_SCALING = NV_SCALING_GPU_SCALING_TO_ASPECT_SCANOUT_TO_NATIVE, + + NV_SCALING_CUSTOMIZED = 255 //!< For future use +} NV_SCALING; + +//! Rotate modes- used in NvAPI_SetViewEx(). + +//! \ingroup dispcontrol +typedef enum _NV_ROTATE +{ + NV_ROTATE_0 = 0, + NV_ROTATE_90 = 1, + NV_ROTATE_180 = 2, + NV_ROTATE_270 = 3, + NV_ROTATE_IGNORED = 4, +} NV_ROTATE; + +//! Color formats- used in NvAPI_SetViewEx(). +//! \ingroup dispcontrol +#define NVFORMAT_MAKEFOURCC(ch0, ch1, ch2, ch3) \ + ((NvU32)(NvU8)(ch0) | ((NvU32)(NvU8)(ch1) << 8) | \ + ((NvU32)(NvU8)(ch2) << 16) | ((NvU32)(NvU8)(ch3) << 24 )) + + + +//! Color formats- used in NvAPI_SetViewEx(). +//! \ingroup dispcontrol +typedef enum _NV_FORMAT +{ + NV_FORMAT_UNKNOWN = 0, //!< unknown. Driver will choose one as following value. + NV_FORMAT_P8 = 41, //!< for 8bpp mode + NV_FORMAT_R5G6B5 = 23, //!< for 16bpp mode + NV_FORMAT_A8R8G8B8 = 21, //!< for 32bpp mode + NV_FORMAT_A16B16G16R16F = 113, //!< for 64bpp(floating point) mode. + +} NV_FORMAT; + +// TV standard + +typedef struct +{ + float x; //!< x-coordinate of the viewport top-left point + float y; //!< y-coordinate of the viewport top-left point + float w; //!< Width of the viewport + float h; //!< Height of the viewport +} NV_VIEWPORTF; + + + +//! \ingroup dispcontrol +//! The timing override is not supported yet; must be set to _AUTO. \n + + +typedef enum _NV_TIMING_OVERRIDE +{ + NV_TIMING_OVERRIDE_CURRENT = 0, //!< get the current timing + NV_TIMING_OVERRIDE_AUTO, //!< the timing the driver will use based the current policy + NV_TIMING_OVERRIDE_EDID, //!< EDID timing + NV_TIMING_OVERRIDE_DMT, //!< VESA DMT timing + NV_TIMING_OVERRIDE_DMT_RB, //!< VESA DMT timing with reduced blanking + NV_TIMING_OVERRIDE_CVT, //!< VESA CVT timing + NV_TIMING_OVERRIDE_CVT_RB, //!< VESA CVT timing with reduced blanking + NV_TIMING_OVERRIDE_GTF, //!< VESA GTF timing + NV_TIMING_OVERRIDE_EIA861, //!< EIA 861x pre-defined timing + NV_TIMING_OVERRIDE_ANALOG_TV, //!< analog SD/HDTV timing + NV_TIMING_OVERRIDE_CUST, //!< NV custom timings + NV_TIMING_OVERRIDE_NV_PREDEFINED, //!< NV pre-defined timing (basically the PsF timings) + NV_TIMING_OVERRIDE_NV_PSF = NV_TIMING_OVERRIDE_NV_PREDEFINED, + NV_TIMING_OVERRIDE_NV_ASPR, + NV_TIMING_OVERRIDE_SDI, //!< Override for SDI timing + + NV_TIMING_OVRRIDE_MAX, +}NV_TIMING_OVERRIDE; + + +#ifndef NV_TIMING_STRUCTS_DEFINED +#define NV_TIMING_STRUCTS_DEFINED + +//*********************** +// The Timing Structure +//*********************** +// +//! \ingroup dispcontrol +//! NVIDIA-specific timing extras \n +//! Used in NV_TIMING. +typedef struct tagNV_TIMINGEXT +{ + NvU32 flag; //!< Reserved for NVIDIA hardware-based enhancement, such as double-scan. + NvU16 rr; //!< Logical refresh rate to present + NvU32 rrx1k; //!< Physical vertical refresh rate in 0.001Hz + NvU32 aspect; //!< Display aspect ratio Hi(aspect):horizontal-aspect, Low(aspect):vertical-aspect + NvU16 rep; //!< Bit-wise pixel repetition factor: 0x1:no pixel repetition; 0x2:each pixel repeats twice horizontally,.. + NvU32 status; //!< Timing standard + NvU8 name[40]; //!< Timing name +}NV_TIMINGEXT; + + + +//! \ingroup dispcontrol +//!The very basic timing structure based on the VESA standard: +//! \code +//! |<----------------------------htotal--------------------------->| +//! ---------"active" video-------->|<-------blanking------>|<----- +//! |<-------hvisible-------->|<-hb->|<-hfp->|<-hsw->|<-hbp->|<-hb->| +//! --------- -+-------------------------+ | | | | | +//! A A | | | | | | | +//! : : | | | | | | | +//! : : | | | | | | | +//! :vertical| addressable video | | | | | | +//! : visible| | | | | | | +//! : : | | | | | | | +//! : : | | | | | | | +//! vertical V | | | | | | | +//! total --+-------------------------+ | | | | | +//! : vb border | | | | | +//! : -----------------------------------+ | | | | +//! : vfp front porch | | | | +//! : -------------------------------------------+ | | | +//! : vsw sync width | | | +//! : ---------------------------------------------------+ | | +//! : vbp back porch | | +//! : -----------------------------------------------------------+ | +//! V vb border | +//! ---------------------------------------------------------------------------+ +//! \endcode +typedef struct _NV_TIMING +{ + // VESA scan out timing parameters: + NvU16 HVisible; //!< horizontal visible + NvU16 HBorder; //!< horizontal border + NvU16 HFrontPorch; //!< horizontal front porch + NvU16 HSyncWidth; //!< horizontal sync width + NvU16 HTotal; //!< horizontal total + NvU8 HSyncPol; //!< horizontal sync polarity: 1-negative, 0-positive + + NvU16 VVisible; //!< vertical visible + NvU16 VBorder; //!< vertical border + NvU16 VFrontPorch; //!< vertical front porch + NvU16 VSyncWidth; //!< vertical sync width + NvU16 VTotal; //!< vertical total + NvU8 VSyncPol; //!< vertical sync polarity: 1-negative, 0-positive + + NvU16 interlaced; //!< 1-interlaced, 0-progressive + NvU32 pclk; //!< pixel clock in 10 kHz + + //other timing related extras + NV_TIMINGEXT etc; +}NV_TIMING; +#endif //NV_TIMING_STRUCTS_DEFINED + + +//! \addtogroup dispcontrol +//! Timing-related constants +//! @{ +#define NV_TIMING_H_SYNC_POSITIVE 0 +#define NV_TIMING_H_SYNC_NEGATIVE 1 +#define NV_TIMING_H_SYNC_DEFAULT NV_TIMING_H_SYNC_NEGATIVE +// +#define NV_TIMING_V_SYNC_POSITIVE 0 +#define NV_TIMING_V_SYNC_NEGATIVE 1 +#define NV_TIMING_V_SYNC_DEFAULT NV_TIMING_V_SYNC_POSITIVE +// +#define NV_TIMING_PROGRESSIVE 0 +#define NV_TIMING_INTERLACED 1 +#define NV_TIMING_INTERLACED_EXTRA_VBLANK_ON_FIELD2 1 +#define NV_TIMING_INTERLACED_NO_EXTRA_VBLANK_ON_FIELD2 2 +//! @} + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_SetView +// +//! \fn NvAPI_SetView(NvDisplayHandle hNvDisplay, NV_VIEW_TARGET_INFO *pTargetInfo, NV_TARGET_VIEW_MODE targetView) +//! This function lets the caller modify the target display arrangement of the selected source display handle in any nView mode. +//! It can also modify or extend the source display in Dualview mode. +//! \note Maps the selected source to the associated target Ids. +//! \note Display PATH with this API is limited to single GPU. DUALVIEW across GPUs cannot be enabled with this API. +//! +//! \deprecated Do not use this function - it is deprecated in release 290. Instead, use NvAPI_DISP_SetDisplayConfig. +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 90 +//! +//! \param [in] hNvDisplay NVIDIA Display selection. #NVAPI_DEFAULT_HANDLE is not allowed, it has to be a handle enumerated with NvAPI_EnumNVidiaDisplayHandle(). +//! \param [in] pTargetInfo Pointer to array of NV_VIEW_TARGET_INFO, specifying device properties in this view. +//! The first device entry in the array is the physical primary. +//! The device entry with the lowest source id is the desktop primary. +//! \param [in] targetCount Count of target devices specified in pTargetInfo. +//! \param [in] targetView Target view selected from NV_TARGET_VIEW_MODE. +//! +//! \retval NVAPI_OK Completed request +//! \retval NVAPI_ERROR Miscellaneous error occurred +//! \retval NVAPI_INVALID_ARGUMENT Invalid input parameter. +// +/////////////////////////////////////////////////////////////////////////////// + +//! \ingroup dispcontrol +//! Used in NvAPI_SetView() and NvAPI_GetView() +typedef struct +{ + NvU32 version; //!< (IN) structure version + NvU32 count; //!< (IN) target count + struct + { + NvU32 deviceMask; //!< (IN/OUT) Device mask + NvU32 sourceId; //!< (IN/OUT) Source ID - values will be based on the number of heads exposed per GPU. + NvU32 bPrimary:1; //!< (OUT) Indicates if this is the GPU's primary view target. This is not the desktop GDI primary. + //!< NvAPI_SetView automatically selects the first target in NV_VIEW_TARGET_INFO index 0 as the GPU's primary view. + NvU32 bInterlaced:1; //!< (IN/OUT) Indicates if the timing being used on this monitor is interlaced. + NvU32 bGDIPrimary:1; //!< (IN/OUT) Indicates if this is the desktop GDI primary. + NvU32 bForceModeSet:1;//!< (IN) Used only on Win7 and higher during a call to NvAPI_SetView(). Turns off optimization & forces OS to set supplied mode. + } target[NVAPI_MAX_VIEW_TARGET]; +} NV_VIEW_TARGET_INFO; + +//! \ingroup dispcontrol +#define NV_VIEW_TARGET_INFO_VER MAKE_NVAPI_VERSION(NV_VIEW_TARGET_INFO,2) + +//! \ingroup dispcontrol +__nvapi_deprecated_function("Do not use this function - it is deprecated in release 290. Instead, use NvAPI_DISP_SetDisplayConfig.") +NVAPI_INTERFACE NvAPI_SetView(NvDisplayHandle hNvDisplay, NV_VIEW_TARGET_INFO *pTargetInfo, NV_TARGET_VIEW_MODE targetView); + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_SetViewEx +// +//! \fn NvAPI_SetViewEx(NvDisplayHandle hNvDisplay, NV_DISPLAY_PATH_INFO *pPathInfo, NV_TARGET_VIEW_MODE displayView) +//! This function lets caller to modify the display arrangement for selected source display handle in any of the nview modes. +//! It also allows to modify or extend the source display in dualview mode. +//! \note Maps the selected source to the associated target Ids. +//! \note Display PATH with this API is limited to single GPU. DUALVIEW across GPUs cannot be enabled with this API. +//! +//! \deprecated Do not use this function - it is deprecated in release 290. Instead, use NvAPI_DISP_SetDisplayConfig. +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 95 +//! +//! \param [in] hNvDisplay NVIDIA Display selection. #NVAPI_DEFAULT_HANDLE is not allowed, it has to be a handle enumerated with +//! NvAPI_EnumNVidiaDisplayHandle(). +//! \param [in] pPathInfo Pointer to array of NV_VIEW_PATH_INFO, specifying device properties in this view. +//! The first device entry in the array is the physical primary. +//! The device entry with the lowest source id is the desktop primary. +//! \param [in] pathCount Count of paths specified in pPathInfo. +//! \param [in] displayView Display view selected from NV_TARGET_VIEW_MODE. +//! +//! \retval NVAPI_OK Completed request +//! \retval NVAPI_ERROR Miscellaneous error occurred +//! \retval NVAPI_INVALID_ARGUMENT Invalid input parameter. +// +/////////////////////////////////////////////////////////////////////////////// + +//! \ingroup dispcontrol +#define NVAPI_MAX_DISPLAY_PATH NVAPI_MAX_VIEW_TARGET + +//! \ingroup dispcontrol +#define NVAPI_ADVANCED_MAX_DISPLAY_PATH NVAPI_ADVANCED_MAX_VIEW_TARGET + + + +//! \ingroup dispcontrol +//! Used in NV_DISPLAY_PATH_INFO. +typedef struct +{ + NvU32 deviceMask; //!< (IN) Device mask + NvU32 sourceId; //!< (IN) Values will be based on the number of heads exposed per GPU(0, 1?) + NvU32 bPrimary:1; //!< (IN/OUT) Indicates if this is the GPU's primary view target. This is not the desktop GDI primary. + //!< NvAPI_SetViewEx() automatically selects the first target in NV_DISPLAY_PATH_INFO index 0 as the GPU's primary view. + NV_GPU_CONNECTOR_TYPE connector; //!< (IN) Specify connector type. For TV only. + + // source mode information + NvU32 width; //!< (IN) Width of the mode + NvU32 height; //!< (IN) Height of the mode + NvU32 depth; //!< (IN) Depth of the mode + NV_FORMAT colorFormat; //!< Color format if it needs to be specified. Not used now. + + //rotation setting of the mode + NV_ROTATE rotation; //!< (IN) Rotation setting. + + // the scaling mode + NV_SCALING scaling; //!< (IN) Scaling setting + + // Timing info + NvU32 refreshRate; //!< (IN) Refresh rate of the mode + NvU32 interlaced:1; //!< (IN) Interlaced mode flag + + NV_DISPLAY_TV_FORMAT tvFormat; //!< (IN) To choose the last TV format set this value to NV_DISPLAY_TV_FORMAT_NONE + + // Windows desktop position + NvU32 posx; //!< (IN/OUT) X-offset of this display on the Windows desktop + NvU32 posy; //!< (IN/OUT) Y-offset of this display on the Windows desktop + NvU32 bGDIPrimary:1; //!< (IN/OUT) Indicates if this is the desktop GDI primary. + + NvU32 bForceModeSet:1;//!< (IN) Used only on Win7 and higher during a call to NvAPI_SetViewEx(). Turns off optimization & forces OS to set supplied mode. + NvU32 bFocusDisplay:1;//!< (IN) If set, this display path should have the focus after the GPU topology change + NvU32 gpuId:24; //!< (IN) the physical display/target Gpu id which is the owner of the scan out (for SLI multimon, display from the slave Gpu) + +} NV_DISPLAY_PATH; + +//! \ingroup dispcontrol +//! Used in NvAPI_SetViewEx() and NvAPI_GetViewEx(). +typedef struct +{ + NvU32 version; //!< (IN) Structure version + NvU32 count; //!< (IN) Path count + NV_DISPLAY_PATH path[NVAPI_MAX_DISPLAY_PATH]; +} NV_DISPLAY_PATH_INFO_V3; + +//! \ingroup dispcontrol +//! Used in NvAPI_SetViewEx() and NvAPI_GetViewEx(). +typedef struct +{ + NvU32 version; //!< (IN) Structure version + NvU32 count; //!< (IN) Path count + NV_DISPLAY_PATH path[NVAPI_ADVANCED_MAX_DISPLAY_PATH]; +} NV_DISPLAY_PATH_INFO; + +//! \addtogroup dispcontrol +//! Macro for constructing the version fields of NV_DISPLAY_PATH_INFO +//! @{ +#define NV_DISPLAY_PATH_INFO_VER NV_DISPLAY_PATH_INFO_VER4 +#define NV_DISPLAY_PATH_INFO_VER4 MAKE_NVAPI_VERSION(NV_DISPLAY_PATH_INFO,4) +#define NV_DISPLAY_PATH_INFO_VER3 MAKE_NVAPI_VERSION(NV_DISPLAY_PATH_INFO,3) +#define NV_DISPLAY_PATH_INFO_VER2 MAKE_NVAPI_VERSION(NV_DISPLAY_PATH_INFO,2) +#define NV_DISPLAY_PATH_INFO_VER1 MAKE_NVAPI_VERSION(NV_DISPLAY_PATH_INFO,1) +//! @} +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_SetViewEx +// +//! \fn NvAPI_SetViewEx(NvDisplayHandle hNvDisplay, NV_DISPLAY_PATH_INFO *pPathInfo, NV_TARGET_VIEW_MODE displayView) +//! This function lets caller to modify the display arrangement for selected source display handle in any of the nview modes. +//! It also allows to modify or extend the source display in dualview mode. +//! \note Maps the selected source to the associated target Ids. +//! \note Display PATH with this API is limited to single GPU. DUALVIEW across GPUs cannot be enabled with this API. +//! +//! \deprecated Do not use this function - it is deprecated in release 290. Instead, use NvAPI_DISP_SetDisplayConfig. +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 95 +//! +//! \param [in] hNvDisplay NVIDIA Display selection. #NVAPI_DEFAULT_HANDLE is not allowed, it has to be a handle enumerated with +//! NvAPI_EnumNVidiaDisplayHandle(). +//! \param [in] pPathInfo Pointer to array of NV_VIEW_PATH_INFO, specifying device properties in this view. +//! The first device entry in the array is the physical primary. +//! The device entry with the lowest source id is the desktop primary. +//! \param [in] pathCount Count of paths specified in pPathInfo. +//! \param [in] displayView Display view selected from NV_TARGET_VIEW_MODE. +//! +//! \retval NVAPI_OK Completed request +//! \retval NVAPI_ERROR Miscellaneous error occurred +//! \retval NVAPI_INVALID_ARGUMENT Invalid input parameter. +// +/////////////////////////////////////////////////////////////////////////////// + +//! \ingroup dispcontrol +__nvapi_deprecated_function("Do not use this function - it is deprecated in release 290. Instead, use NvAPI_DISP_SetDisplayConfig.") +NVAPI_INTERFACE NvAPI_SetViewEx(NvDisplayHandle hNvDisplay, NV_DISPLAY_PATH_INFO *pPathInfo, NV_TARGET_VIEW_MODE displayView); + + + +/////////////////////////////////////////////////////////////////////////////// +// SetDisplayConfig/GetDisplayConfig +/////////////////////////////////////////////////////////////////////////////// +//! \ingroup dispcontrol + +typedef struct _NV_POSITION +{ + NvS32 x; + NvS32 y; +} NV_POSITION; + +//! \ingroup dispcontrol +typedef struct _NV_RESOLUTION +{ + NvU32 width; + NvU32 height; + NvU32 colorDepth; +} NV_RESOLUTION; + +//! \ingroup dispcontrol +typedef struct _NV_DISPLAYCONFIG_PATH_ADVANCED_TARGET_INFO_V1 +{ + NvU32 version; + + // Rotation and Scaling + NV_ROTATE rotation; //!< (IN) rotation setting. + NV_SCALING scaling; //!< (IN) scaling setting. + + // Refresh Rate + NvU32 refreshRate1K; //!< (IN) Non-interlaced Refresh Rate of the mode, multiplied by 1000, 0 = ignored + //!< This is the value which driver reports to the OS. + // Flags + NvU32 interlaced:1; //!< (IN) Interlaced mode flag, ignored if refreshRate == 0 + NvU32 primary:1; //!< (IN) Declares primary display in clone configuration. This is *NOT* GDI Primary. + //!< Only one target can be primary per source. If no primary is specified, the first + //!< target will automatically be primary. +#ifdef NV_PAN_AND_SCAN_DEFINED + NvU32 isPanAndScanTarget:1; //!< Whether on this target Pan and Scan is enabled or has to be enabled. Valid only + //!< when the target is part of clone topology. +#else + NvU32 reservedBit1:1; +#endif + NvU32 disableVirtualModeSupport:1; + NvU32 isPreferredUnscaledTarget:1; + NvU32 reserved:27; + // TV format information + NV_GPU_CONNECTOR_TYPE connector; //!< Specify connector type. For TV only, ignored if tvFormat == NV_DISPLAY_TV_FORMAT_NONE + NV_DISPLAY_TV_FORMAT tvFormat; //!< (IN) to choose the last TV format set this value to NV_DISPLAY_TV_FORMAT_NONE + //!< In case of NvAPI_DISP_GetDisplayConfig(), this field will indicate the currently applied TV format; + //!< if no TV format is applied, this field will have NV_DISPLAY_TV_FORMAT_NONE value. + //!< In case of NvAPI_DISP_SetDisplayConfig(), this field should only be set in case of TVs; + //!< for other displays this field will be ignored and resolution & refresh rate specified in input will be used to apply the TV format. + + // Backend (raster) timing standard + NV_TIMING_OVERRIDE timingOverride; //!< Ignored if timingOverride == NV_TIMING_OVERRIDE_CURRENT + NV_TIMING timing; //!< Scan out timing, valid only if timingOverride == NV_TIMING_OVERRIDE_CUST + //!< The value NV_TIMING::NV_TIMINGEXT::rrx1k is obtained from the EDID. The driver may + //!< tweak this value for HDTV, stereo, etc., before reporting it to the OS. +} NV_DISPLAYCONFIG_PATH_ADVANCED_TARGET_INFO_V1; + +//! \ingroup dispcontrol +typedef NV_DISPLAYCONFIG_PATH_ADVANCED_TARGET_INFO_V1 NV_DISPLAYCONFIG_PATH_ADVANCED_TARGET_INFO; + +//! \ingroup dispcontrol +#define NV_DISPLAYCONFIG_PATH_ADVANCED_TARGET_INFO_VER1 MAKE_NVAPI_VERSION(NV_DISPLAYCONFIG_PATH_ADVANCED_TARGET_INFO_V1,1) + +//! \ingroup dispcontrol +#define NV_DISPLAYCONFIG_PATH_ADVANCED_TARGET_INFO_VER NV_DISPLAYCONFIG_PATH_ADVANCED_TARGET_INFO_VER1 + +//! \ingroup dispcontrol +typedef struct _NV_DISPLAYCONFIG_PATH_TARGET_INFO_V1 +{ + NvU32 displayId; //!< Display ID + NV_DISPLAYCONFIG_PATH_ADVANCED_TARGET_INFO* details; //!< May be NULL if no advanced settings are required. NULL for Non-NVIDIA Display. +} NV_DISPLAYCONFIG_PATH_TARGET_INFO_V1; + +//! \ingroup dispcontrol +typedef struct _NV_DISPLAYCONFIG_PATH_TARGET_INFO_V2 +{ + NvU32 displayId; //!< Display ID + NV_DISPLAYCONFIG_PATH_ADVANCED_TARGET_INFO* details; //!< May be NULL if no advanced settings are required + NvU32 targetId; //!< Windows CCD target ID. Must be present only for non-NVIDIA adapter, for NVIDIA adapter this parameter is ignored. +} NV_DISPLAYCONFIG_PATH_TARGET_INFO_V2; + + +//! \ingroup dispcontrol +//! As version is not defined for this structure, we will be using version of NV_DISPLAYCONFIG_PATH_INFO +typedef NV_DISPLAYCONFIG_PATH_TARGET_INFO_V2 NV_DISPLAYCONFIG_PATH_TARGET_INFO; + + +//! \ingroup dispcontrol +typedef enum _NV_DISPLAYCONFIG_SPANNING_ORIENTATION +{ + NV_DISPLAYCONFIG_SPAN_NONE = 0, + NV_DISPLAYCONFIG_SPAN_HORIZONTAL = 1, + NV_DISPLAYCONFIG_SPAN_VERTICAL = 2, +} NV_DISPLAYCONFIG_SPANNING_ORIENTATION; + +//! \ingroup dispcontrol +typedef struct _NV_DISPLAYCONFIG_SOURCE_MODE_INFO_V1 +{ + NV_RESOLUTION resolution; + NV_FORMAT colorFormat; //!< Ignored at present, must be NV_FORMAT_UNKNOWN (0) + NV_POSITION position; //!< Is all positions are 0 or invalid, displays will be automatically + //!< positioned from left to right with GDI Primary at 0,0, and all + //!< other displays in the order of the path array. + NV_DISPLAYCONFIG_SPANNING_ORIENTATION spanningOrientation; //!< Spanning is only supported on XP + NvU32 bGDIPrimary : 1; + NvU32 bSLIFocus : 1; + NvU32 reserved : 30; //!< Must be 0 +} NV_DISPLAYCONFIG_SOURCE_MODE_INFO_V1; + + + +//! \ingroup dispcontrol +typedef struct _NV_DISPLAYCONFIG_PATH_INFO_V1 +{ + NvU32 version; + NvU32 reserved_sourceId; //!< This field is reserved. There is ongoing debate if we need this field. + //!< Identifies sourceIds used by Windows. If all sourceIds are 0, + //!< these will be computed automatically. + NvU32 targetInfoCount; //!< Number of elements in targetInfo array + NV_DISPLAYCONFIG_PATH_TARGET_INFO_V1* targetInfo; + NV_DISPLAYCONFIG_SOURCE_MODE_INFO_V1* sourceModeInfo; //!< May be NULL if mode info is not important +} NV_DISPLAYCONFIG_PATH_INFO_V1; + +//! \ingroup dispcontrol +//! This define is temporary and must be removed once DVS failure is fixed. +#define _NV_DISPLAYCONFIG_PATH_INFO_V2 _NV_DISPLAYCONFIG_PATH_INFO + +//! \ingroup dispcontrol +typedef struct _NV_DISPLAYCONFIG_PATH_INFO_V2 +{ + NvU32 version; + union { + NvU32 sourceId; //!< Identifies sourceId used by Windows CCD. This can be optionally set. + NvU32 reserved_sourceId; //!< Only for compatibility + }; + + NvU32 targetInfoCount; //!< Number of elements in targetInfo array + NV_DISPLAYCONFIG_PATH_TARGET_INFO_V2* targetInfo; + NV_DISPLAYCONFIG_SOURCE_MODE_INFO_V1* sourceModeInfo; //!< May be NULL if mode info is not important + NvU32 IsNonNVIDIAAdapter : 1; //!< True for non-NVIDIA adapter. + NvU32 reserved : 31; //!< Must be 0 + void *pOSAdapterID; //!< Used by Non-NVIDIA adapter for pointer to OS Adapter of LUID + //!< type, type casted to void *. +} NV_DISPLAYCONFIG_PATH_INFO_V2; + +//! \ingroup dispcontrol +#define NV_DISPLAYCONFIG_PATH_INFO_VER1 MAKE_NVAPI_VERSION(NV_DISPLAYCONFIG_PATH_INFO_V1,1) + +//! \ingroup dispcontrol +#define NV_DISPLAYCONFIG_PATH_INFO_VER2 MAKE_NVAPI_VERSION(NV_DISPLAYCONFIG_PATH_INFO_V2,2) + +#ifndef NV_DISPLAYCONFIG_PATH_INFO_VER + +typedef NV_DISPLAYCONFIG_PATH_INFO_V2 NV_DISPLAYCONFIG_PATH_INFO; + +#define NV_DISPLAYCONFIG_PATH_INFO_VER NV_DISPLAYCONFIG_PATH_INFO_VER2 + +typedef NV_DISPLAYCONFIG_SOURCE_MODE_INFO_V1 NV_DISPLAYCONFIG_SOURCE_MODE_INFO; + +#endif + + +//! \ingroup dispcontrol +typedef enum _NV_DISPLAYCONFIG_FLAGS +{ + NV_DISPLAYCONFIG_VALIDATE_ONLY = 0x00000001, + NV_DISPLAYCONFIG_SAVE_TO_PERSISTENCE = 0x00000002, + NV_DISPLAYCONFIG_DRIVER_RELOAD_ALLOWED = 0x00000004, //!< Driver reload is permitted if necessary + NV_DISPLAYCONFIG_FORCE_MODE_ENUMERATION = 0x00000008, //!< Refresh OS mode list. + NV_FORCE_COMMIT_VIDPN = 0x00000010, //!< Tell OS to avoid optimizing CommitVidPn call during a modeset +} NV_DISPLAYCONFIG_FLAGS; + + +#define NVAPI_UNICODE_STRING_MAX 2048 +#define NVAPI_BINARY_DATA_MAX 4096 + +typedef NvU16 NvAPI_UnicodeString[NVAPI_UNICODE_STRING_MAX]; +typedef const NvU16 *NvAPI_LPCWSTR; + +// Common + + + +//! \ingroup gpuclock +//! @{ +#define NVAPI_MAX_GPU_CLOCKS 32 +#define NVAPI_MAX_GPU_PUBLIC_CLOCKS 32 +#define NVAPI_MAX_GPU_PERF_CLOCKS 32 +#define NVAPI_MAX_GPU_PERF_VOLTAGES 16 +#define NVAPI_MAX_GPU_PERF_PSTATES 16 +//! @} + +//! \ingroup gpuclock +typedef enum _NV_GPU_PERF_VOLTAGE_INFO_DOMAIN_ID +{ + NVAPI_GPU_PERF_VOLTAGE_INFO_DOMAIN_CORE = 0, + NVAPI_GPU_PERF_VOLTAGE_INFO_DOMAIN_UNDEFINED = NVAPI_MAX_GPU_PERF_VOLTAGES, +} NV_GPU_PERF_VOLTAGE_INFO_DOMAIN_ID; + +//! \ingroup gpuclock +typedef enum _NV_GPU_PUBLIC_CLOCK_ID +{ + NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS = 0, + NVAPI_GPU_PUBLIC_CLOCK_MEMORY = 4, + NVAPI_GPU_PUBLIC_CLOCK_PROCESSOR = 7, + NVAPI_GPU_PUBLIC_CLOCK_VIDEO = 8, + NVAPI_GPU_PUBLIC_CLOCK_UNDEFINED = NVAPI_MAX_GPU_PUBLIC_CLOCKS, +} NV_GPU_PUBLIC_CLOCK_ID; + + +//! \addtogroup gpupstate +//! @{ + +typedef enum _NV_GPU_PERF_PSTATE_ID +{ + NVAPI_GPU_PERF_PSTATE_P0 = 0, + NVAPI_GPU_PERF_PSTATE_P1, + NVAPI_GPU_PERF_PSTATE_P2, + NVAPI_GPU_PERF_PSTATE_P3, + NVAPI_GPU_PERF_PSTATE_P4, + NVAPI_GPU_PERF_PSTATE_P5, + NVAPI_GPU_PERF_PSTATE_P6, + NVAPI_GPU_PERF_PSTATE_P7, + NVAPI_GPU_PERF_PSTATE_P8, + NVAPI_GPU_PERF_PSTATE_P9, + NVAPI_GPU_PERF_PSTATE_P10, + NVAPI_GPU_PERF_PSTATE_P11, + NVAPI_GPU_PERF_PSTATE_P12, + NVAPI_GPU_PERF_PSTATE_P13, + NVAPI_GPU_PERF_PSTATE_P14, + NVAPI_GPU_PERF_PSTATE_P15, + NVAPI_GPU_PERF_PSTATE_UNDEFINED = NVAPI_MAX_GPU_PERF_PSTATES, + NVAPI_GPU_PERF_PSTATE_ALL, + +} NV_GPU_PERF_PSTATE_ID; + +//! @} + + + +//! \addtogroup gpupstate +//! @{ + +#define NVAPI_MAX_GPU_PSTATE20_PSTATES 16 +#define NVAPI_MAX_GPU_PSTATE20_CLOCKS 8 +#define NVAPI_MAX_GPU_PSTATE20_BASE_VOLTAGES 4 + +//! Used to identify clock type +typedef enum +{ + //! Clock domains that use single frequency value within given pstate + NVAPI_GPU_PERF_PSTATE20_CLOCK_TYPE_SINGLE = 0, + + //! Clock domains that allow range of frequency values within given pstate + NVAPI_GPU_PERF_PSTATE20_CLOCK_TYPE_RANGE, +} NV_GPU_PERF_PSTATE20_CLOCK_TYPE_ID; + +//! Used to describe both voltage and frequency deltas +typedef struct +{ + //! Value of parameter delta (in respective units [kHz, uV]) + NvS32 value; + + struct + { + //! Min value allowed for parameter delta (in respective units [kHz, uV]) + NvS32 min; + + //! Max value allowed for parameter delta (in respective units [kHz, uV]) + NvS32 max; + } valueRange; +} NV_GPU_PERF_PSTATES20_PARAM_DELTA; + +//! Used to describe single clock entry +typedef struct +{ + //! ID of the clock domain + NV_GPU_PUBLIC_CLOCK_ID domainId; + + //! Clock type ID + NV_GPU_PERF_PSTATE20_CLOCK_TYPE_ID typeId; + NvU32 bIsEditable:1; + + //! These bits are reserved for future use (must be always 0) + NvU32 reserved:31; + + //! Current frequency delta from nominal settings in (kHz) + NV_GPU_PERF_PSTATES20_PARAM_DELTA freqDelta_kHz; + + //! Clock domain type dependant information + union + { + struct + { + //! Clock frequency within given pstate in (kHz) + NvU32 freq_kHz; + } single; + + struct + { + //! Min clock frequency within given pstate in (kHz) + NvU32 minFreq_kHz; + + //! Max clock frequency within given pstate in (kHz) + NvU32 maxFreq_kHz; + + //! Voltage domain ID and value range in (uV) required for this clock + NV_GPU_PERF_VOLTAGE_INFO_DOMAIN_ID domainId; + NvU32 minVoltage_uV; + NvU32 maxVoltage_uV; + } range; + } data; +} NV_GPU_PSTATE20_CLOCK_ENTRY_V1; + +//! Used to describe single base voltage entry +typedef struct +{ + //! ID of the voltage domain + NV_GPU_PERF_VOLTAGE_INFO_DOMAIN_ID domainId; + NvU32 bIsEditable:1; + + //! These bits are reserved for future use (must be always 0) + NvU32 reserved:31; + + //! Current base voltage settings in [uV] + NvU32 volt_uV; + + NV_GPU_PERF_PSTATES20_PARAM_DELTA voltDelta_uV; // Current base voltage delta from nominal settings in [uV] +} NV_GPU_PSTATE20_BASE_VOLTAGE_ENTRY_V1; + +//! Used in NvAPI_GPU_GetPstates20() interface call. + +typedef struct +{ + //! Version info of the structure (NV_GPU_PERF_PSTATES20_INFO_VER) + NvU32 version; + + NvU32 bIsEditable:1; + + //! These bits are reserved for future use (must be always 0) + NvU32 reserved:31; + + //! Number of populated pstates + NvU32 numPstates; + + //! Number of populated clocks (per pstate) + NvU32 numClocks; + + //! Number of populated base voltages (per pstate) + NvU32 numBaseVoltages; + + //! Performance state (P-State) settings + //! Valid index range is 0 to numPstates-1 + struct + { + //! ID of the P-State + NV_GPU_PERF_PSTATE_ID pstateId; + + NvU32 bIsEditable:1; + + //! These bits are reserved for future use (must be always 0) + NvU32 reserved:31; + + //! Array of clock entries + //! Valid index range is 0 to numClocks-1 + NV_GPU_PSTATE20_CLOCK_ENTRY_V1 clocks[NVAPI_MAX_GPU_PSTATE20_CLOCKS]; + + //! Array of baseVoltage entries + //! Valid index range is 0 to numBaseVoltages-1 + NV_GPU_PSTATE20_BASE_VOLTAGE_ENTRY_V1 baseVoltages[NVAPI_MAX_GPU_PSTATE20_BASE_VOLTAGES]; + } pstates[NVAPI_MAX_GPU_PSTATE20_PSTATES]; +} NV_GPU_PERF_PSTATES20_INFO_V1; + +//! Used in NvAPI_GPU_GetPstates20() interface call. + +typedef struct _NV_GPU_PERF_PSTATES20_INFO_V2 +{ + //! Version info of the structure (NV_GPU_PERF_PSTATES20_INFO_VER) + NvU32 version; + + NvU32 bIsEditable:1; + + //! These bits are reserved for future use (must be always 0) + NvU32 reserved:31; + + //! Number of populated pstates + NvU32 numPstates; + + //! Number of populated clocks (per pstate) + NvU32 numClocks; + + //! Number of populated base voltages (per pstate) + NvU32 numBaseVoltages; + + //! Performance state (P-State) settings + //! Valid index range is 0 to numPstates-1 + struct + { + //! ID of the P-State + NV_GPU_PERF_PSTATE_ID pstateId; + + NvU32 bIsEditable:1; + + //! These bits are reserved for future use (must be always 0) + NvU32 reserved:31; + + //! Array of clock entries + //! Valid index range is 0 to numClocks-1 + NV_GPU_PSTATE20_CLOCK_ENTRY_V1 clocks[NVAPI_MAX_GPU_PSTATE20_CLOCKS]; + + //! Array of baseVoltage entries + //! Valid index range is 0 to numBaseVoltages-1 + NV_GPU_PSTATE20_BASE_VOLTAGE_ENTRY_V1 baseVoltages[NVAPI_MAX_GPU_PSTATE20_BASE_VOLTAGES]; + } pstates[NVAPI_MAX_GPU_PSTATE20_PSTATES]; + + //! OV settings - Please refer to NVIDIA over-volting recommendation to understand impact of this functionality + //! Valid index range is 0 to numVoltages-1 + struct + { + //! Number of populated voltages + NvU32 numVoltages; + + //! Array of voltage entries + //! Valid index range is 0 to numVoltages-1 + NV_GPU_PSTATE20_BASE_VOLTAGE_ENTRY_V1 voltages[NVAPI_MAX_GPU_PSTATE20_BASE_VOLTAGES]; + } ov; +} NV_GPU_PERF_PSTATES20_INFO_V2; + +typedef NV_GPU_PERF_PSTATES20_INFO_V2 NV_GPU_PERF_PSTATES20_INFO; + +//! Macro for constructing the version field of NV_GPU_PERF_PSTATES20_INFO_V1 +#define NV_GPU_PERF_PSTATES20_INFO_VER1 MAKE_NVAPI_VERSION(NV_GPU_PERF_PSTATES20_INFO_V1,1) + +//! Macro for constructing the version field of NV_GPU_PERF_PSTATES20_INFO_V2 +#define NV_GPU_PERF_PSTATES20_INFO_VER2 MAKE_NVAPI_VERSION(NV_GPU_PERF_PSTATES20_INFO_V2,2) + +//! Macro for constructing the version field of NV_GPU_PERF_PSTATES20_INFO_V2 +#define NV_GPU_PERF_PSTATES20_INFO_VER3 MAKE_NVAPI_VERSION(NV_GPU_PERF_PSTATES20_INFO_V2,3) + +//! Macro for constructing the version field of NV_GPU_PERF_PSTATES20_INFO +#define NV_GPU_PERF_PSTATES20_INFO_VER NV_GPU_PERF_PSTATES20_INFO_VER3 + +//! @} + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GetDisplayDriverVersion +//! \fn NvAPI_GetDisplayDriverVersion(NvDisplayHandle hNvDisplay, NV_DISPLAY_DRIVER_VERSION *pVersion) +//! This function returns a struct that describes aspects of the display driver +//! build. +//! +//! \deprecated Do not use this function - it is deprecated in release 290. Instead, use NvAPI_SYS_GetDriverAndBranchVersion. +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 80 +//! +//! \param [in] hNvDisplay NVIDIA display handle. +//! \param [out] pVersion Pointer to NV_DISPLAY_DRIVER_VERSION struc +//! +//! \retval NVAPI_ERROR +//! \retval NVAPI_OK +/////////////////////////////////////////////////////////////////////////////// + +//! \ingroup driverapi +//! Used in NvAPI_GetDisplayDriverVersion() +typedef struct +{ + NvU32 version; // Structure version + NvU32 drvVersion; + NvU32 bldChangeListNum; + NvAPI_ShortString szBuildBranchString; + NvAPI_ShortString szAdapterString; +} NV_DISPLAY_DRIVER_VERSION; + +//! \ingroup driverapi +#define NV_DISPLAY_DRIVER_VERSION_VER MAKE_NVAPI_VERSION(NV_DISPLAY_DRIVER_VERSION,1) + + +//! \ingroup driverapi +__nvapi_deprecated_function("Do not use this function - it is deprecated in release 290. Instead, use NvAPI_SYS_GetDriverAndBranchVersion.") +NVAPI_INTERFACE NvAPI_GetDisplayDriverVersion(NvDisplayHandle hNvDisplay, NV_DISPLAY_DRIVER_VERSION *pVersion); + + + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_OGL_ExpertModeSet[Get] +// +//! \name NvAPI_OGL_ExpertModeSet[Get] Functions +//@{ +//! This function configures OpenGL Expert Mode, an API usage feedback and +//! advice reporting mechanism. The effects of this call are +//! applied only to the current context, and are reset to the +//! defaults when the context is destroyed. +//! +//! \note This feature is valid at runtime only when GLExpert +//! functionality has been built into the OpenGL driver +//! installed on the system. All Windows Vista OpenGL +//! drivers provided by NVIDIA have this instrumentation +//! included by default. Windows XP, however, requires a +//! special display driver available with the NVIDIA +//! PerfSDK found at developer.nvidia.com. +//! +//! \note These functions are valid only for the current OpenGL +//! context. Calling these functions prior to creating a +//! context and calling MakeCurrent with it will result +//! in errors and undefined behavior. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 80 +//! +//! \param expertDetailMask Mask made up of NVAPI_OGLEXPERT_DETAIL bits, +//! this parameter specifies the detail level in +//! the feedback stream. +//! +//! \param expertReportMask Mask made up of NVAPI_OGLEXPERT_REPORT bits, +//! this parameter specifies the areas of +//! functional interest. +//! +//! \param expertOutputMask Mask made up of NVAPI_OGLEXPERT_OUTPUT bits, +//! this parameter specifies the feedback output +//! location. +//! +//! \param expertCallback Used in conjunction with OUTPUT_TO_CALLBACK, +//! this is a simple callback function the user +//! may use to obtain the feedback stream. The +//! function will be called once per fully +//! qualified feedback stream extry. +//! +//! \retval NVAPI_API_NOT_INTIALIZED NVAPI not initialized +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU found +//! \retval NVAPI_OPENGL_CONTEXT_NOT_CURRENT No NVIDIA OpenGL context +//! which supports GLExpert +//! has been made current +//! \retval NVAPI_ERROR OpenGL driver failed to load properly +//! \retval NVAPI_OK Success +// +/////////////////////////////////////////////////////////////////////////////// + +//! \addtogroup oglapi +//! @{ +#define NVAPI_OGLEXPERT_DETAIL_NONE 0x00000000 +#define NVAPI_OGLEXPERT_DETAIL_ERROR 0x00000001 +#define NVAPI_OGLEXPERT_DETAIL_SWFALLBACK 0x00000002 +#define NVAPI_OGLEXPERT_DETAIL_BASIC_INFO 0x00000004 +#define NVAPI_OGLEXPERT_DETAIL_DETAILED_INFO 0x00000008 +#define NVAPI_OGLEXPERT_DETAIL_PERFORMANCE_WARNING 0x00000010 +#define NVAPI_OGLEXPERT_DETAIL_QUALITY_WARNING 0x00000020 +#define NVAPI_OGLEXPERT_DETAIL_USAGE_WARNING 0x00000040 +#define NVAPI_OGLEXPERT_DETAIL_ALL 0xFFFFFFFF + +#define NVAPI_OGLEXPERT_REPORT_NONE 0x00000000 +#define NVAPI_OGLEXPERT_REPORT_ERROR 0x00000001 +#define NVAPI_OGLEXPERT_REPORT_SWFALLBACK 0x00000002 +#define NVAPI_OGLEXPERT_REPORT_PIPELINE_VERTEX 0x00000004 +#define NVAPI_OGLEXPERT_REPORT_PIPELINE_GEOMETRY 0x00000008 +#define NVAPI_OGLEXPERT_REPORT_PIPELINE_XFB 0x00000010 +#define NVAPI_OGLEXPERT_REPORT_PIPELINE_RASTER 0x00000020 +#define NVAPI_OGLEXPERT_REPORT_PIPELINE_FRAGMENT 0x00000040 +#define NVAPI_OGLEXPERT_REPORT_PIPELINE_ROP 0x00000080 +#define NVAPI_OGLEXPERT_REPORT_PIPELINE_FRAMEBUFFER 0x00000100 +#define NVAPI_OGLEXPERT_REPORT_PIPELINE_PIXEL 0x00000200 +#define NVAPI_OGLEXPERT_REPORT_PIPELINE_TEXTURE 0x00000400 +#define NVAPI_OGLEXPERT_REPORT_OBJECT_BUFFEROBJECT 0x00000800 +#define NVAPI_OGLEXPERT_REPORT_OBJECT_TEXTURE 0x00001000 +#define NVAPI_OGLEXPERT_REPORT_OBJECT_PROGRAM 0x00002000 +#define NVAPI_OGLEXPERT_REPORT_OBJECT_FBO 0x00004000 +#define NVAPI_OGLEXPERT_REPORT_FEATURE_SLI 0x00008000 +#define NVAPI_OGLEXPERT_REPORT_ALL 0xFFFFFFFF + + +#define NVAPI_OGLEXPERT_OUTPUT_TO_NONE 0x00000000 +#define NVAPI_OGLEXPERT_OUTPUT_TO_CONSOLE 0x00000001 +#define NVAPI_OGLEXPERT_OUTPUT_TO_DEBUGGER 0x00000004 +#define NVAPI_OGLEXPERT_OUTPUT_TO_CALLBACK 0x00000008 +#define NVAPI_OGLEXPERT_OUTPUT_TO_ALL 0xFFFFFFFF + +//! @} + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION TYPE: NVAPI_OGLEXPERT_CALLBACK +// +//! DESCRIPTION: Used in conjunction with OUTPUT_TO_CALLBACK, this is a simple +//! callback function the user may use to obtain the feedback +//! stream. The function will be called once per fully qualified +//! feedback stream entry. +//! +//! \param categoryId Contains the bit from the NVAPI_OGLEXPERT_REPORT +//! mask that corresponds to the current message +//! \param messageId Unique ID for the current message +//! \param detailLevel Contains the bit from the NVAPI_OGLEXPERT_DETAIL +//! mask that corresponds to the current message +//! \param objectId Unique ID of the object that corresponds to the +//! current message +//! \param messageStr Text string from the current message +//! +//! \ingroup oglapi +/////////////////////////////////////////////////////////////////////////////// +typedef void (* NVAPI_OGLEXPERT_CALLBACK) (unsigned int categoryId, unsigned int messageId, unsigned int detailLevel, int objectId, const char *messageStr); + + + +//! \ingroup oglapi +//! SUPPORTED OS: Windows 10 and higher +//! +NVAPI_INTERFACE NvAPI_OGL_ExpertModeSet(NvU32 expertDetailLevel, + NvU32 expertReportMask, + NvU32 expertOutputMask, + NVAPI_OGLEXPERT_CALLBACK expertCallback); + +//! \addtogroup oglapi +//! SUPPORTED OS: Windows 10 and higher +//! +NVAPI_INTERFACE NvAPI_OGL_ExpertModeGet(NvU32 *pExpertDetailLevel, + NvU32 *pExpertReportMask, + NvU32 *pExpertOutputMask, + NVAPI_OGLEXPERT_CALLBACK *pExpertCallback); + +//@} +/////////////////////////////////////////////////////////////////////////////// +// +//! \name NvAPI_OGL_ExpertModeDefaultsSet[Get] Functions +//! +//@{ +//! This function configures OpenGL Expert Mode global defaults. These settings +//! apply to any OpenGL application which starts up after these +//! values are applied (i.e. these settings *do not* apply to +//! currently running applications). +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 80 +//! +//! \param expertDetailLevel Value which specifies the detail level in +//! the feedback stream. This is a mask made up +//! of NVAPI_OGLEXPERT_LEVEL bits. +//! +//! \param expertReportMask Mask made up of NVAPI_OGLEXPERT_REPORT bits, +//! this parameter specifies the areas of +//! functional interest. +//! +//! \param expertOutputMask Mask made up of NVAPI_OGLEXPERT_OUTPUT bits, +//! this parameter specifies the feedback output +//! location. Note that using OUTPUT_TO_CALLBACK +//! here is meaningless and has no effect, but +//! using it will not cause an error. +//! +//! \return ::NVAPI_ERROR or ::NVAPI_OK +// +/////////////////////////////////////////////////////////////////////////////// + +//! \ingroup oglapi +//! SUPPORTED OS: Windows 10 and higher +//! +NVAPI_INTERFACE NvAPI_OGL_ExpertModeDefaultsSet(NvU32 expertDetailLevel, + NvU32 expertReportMask, + NvU32 expertOutputMask); + +//! \addtogroup oglapi +//! SUPPORTED OS: Windows 10 and higher +//! +NVAPI_INTERFACE NvAPI_OGL_ExpertModeDefaultsGet(NvU32 *pExpertDetailLevel, + NvU32 *pExpertReportMask, + NvU32 *pExpertOutputMask); +//@} + + + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_EnumTCCPhysicalGPUs +// +//! This function returns an array of physical GPU handles that are in TCC Mode. +//! Each handle represents a physical GPU present in the system in TCC Mode. +//! That GPU may not be visible to the OS directly. +//! +//! The array nvGPUHandle will be filled with physical GPU handle values. The returned +//! gpuCount determines how many entries in the array are valid. +//! +//! NOTE: Handles enumerated by this API are only valid for NvAPIs that are tagged as TCC_SUPPORTED +//! If handle is passed to any other API, it will fail with NVAPI_INVALID_HANDLE +//! +//! For WDDM GPU handles please use NvAPI_EnumPhysicalGPUs() +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! +//! \param [out] nvGPUHandle Physical GPU array that will contain all TCC Physical GPUs +//! \param [out] pGpuCount count represent the number of valid entries in nvGPUHandle +//! +//! +//! \retval NVAPI_INVALID_ARGUMENT nvGPUHandle or pGpuCount is NULL +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_EnumTCCPhysicalGPUs( NvPhysicalGpuHandle nvGPUHandle[NVAPI_MAX_PHYSICAL_GPUS], NvU32 *pGpuCount); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_EnumLogicalGPUs +// +//! This function returns an array of logical GPU handles. +//! +//! Each handle represents one or more GPUs acting in concert as a single graphics device. +//! +//! At least one GPU must be present in the system and running an NVIDIA display driver. +//! +//! The array nvGPUHandle will be filled with logical GPU handle values. The returned +//! gpuCount determines how many entries in the array are valid. +//! +//! \note All logical GPUs handles get invalidated on a GPU topology change, so the calling +//! application is required to renum the logical GPU handles to get latest physical handle +//! mapping after every GPU topology change activated by a call to NvAPI_SetGpuTopologies(). +//! +//! To detect if SLI rendering is enabled, use NvAPI_D3D_GetCurrentSLIState(). +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 80 +//! +//! \retval NVAPI_INVALID_ARGUMENT nvGPUHandle or pGpuCount is NULL +//! \retval NVAPI_OK One or more handles were returned +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_EnumLogicalGPUs(NvLogicalGpuHandle nvGPUHandle[NVAPI_MAX_LOGICAL_GPUS], NvU32 *pGpuCount); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GetPhysicalGPUsFromDisplay +// +//! This function returns an array of physical GPU handles associated with the specified display. +//! +//! At least one GPU must be present in the system and running an NVIDIA display driver. +//! +//! The array nvGPUHandle will be filled with physical GPU handle values. The returned +//! gpuCount determines how many entries in the array are valid. +//! +//! If the display corresponds to more than one physical GPU, the first GPU returned +//! is the one with the attached active output. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 80 +//! +//! \retval NVAPI_INVALID_ARGUMENT hNvDisp is not valid; nvGPUHandle or pGpuCount is NULL +//! \retval NVAPI_OK One or more handles were returned +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND no NVIDIA GPU driving a display was found +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GetPhysicalGPUsFromDisplay(NvDisplayHandle hNvDisp, NvPhysicalGpuHandle nvGPUHandle[NVAPI_MAX_PHYSICAL_GPUS], NvU32 *pGpuCount); + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GetPhysicalGPUFromUnAttachedDisplay +// +//! This function returns a physical GPU handle associated with the specified unattached display. +//! The source GPU is a physical render GPU which renders the frame buffer but may or may not drive the scan out. +//! +//! At least one GPU must be present in the system and running an NVIDIA display driver. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 80 +//! +//! \retval NVAPI_INVALID_ARGUMENT hNvUnAttachedDisp is not valid or pPhysicalGpu is NULL. +//! \retval NVAPI_OK One or more handles were returned +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GetPhysicalGPUFromUnAttachedDisplay(NvUnAttachedDisplayHandle hNvUnAttachedDisp, NvPhysicalGpuHandle *pPhysicalGpu); + + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GetLogicalGPUFromDisplay +// +//! This function returns the logical GPU handle associated with the specified display. +//! At least one GPU must be present in the system and running an NVIDIA display driver. +//! hNvDisp can be NVAPI_DEFAULT_HANDLE or a handle enumerated from NvAPI_EnumNVidiaDisplayHandle(). +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 80 +//! +//! \retval NVAPI_INVALID_ARGUMENT hNvDisp is not valid; pLogicalGPU is NULL +//! \retval NVAPI_OK One or more handles were returned +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GetLogicalGPUFromDisplay(NvDisplayHandle hNvDisp, NvLogicalGpuHandle *pLogicalGPU); + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GetLogicalGPUFromPhysicalGPU +// +//! This function returns the logical GPU handle associated with specified physical GPU handle. +//! At least one GPU must be present in the system and running an NVIDIA display driver. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 80 +//! +//! \retval NVAPI_INVALID_ARGUMENT hPhysicalGPU is not valid; pLogicalGPU is NULL +//! \retval NVAPI_OK One or more handles were returned +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GetLogicalGPUFromPhysicalGPU(NvPhysicalGpuHandle hPhysicalGPU, NvLogicalGpuHandle *pLogicalGPU); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GetPhysicalGPUsFromLogicalGPU +// +//! This function returns the physical GPU handles associated with the specified logical GPU handle. +//! At least one GPU must be present in the system and running an NVIDIA display driver. +//! +//! The array hPhysicalGPU will be filled with physical GPU handle values. The returned +//! gpuCount determines how many entries in the array are valid. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 80 +//! +//! \retval NVAPI_INVALID_ARGUMENT hLogicalGPU is not valid; hPhysicalGPU is NULL +//! \retval NVAPI_OK One or more handles were returned +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found +//! \retval NVAPI_EXPECTED_LOGICAL_GPU_HANDLE hLogicalGPU was not a logical GPU handle +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GetPhysicalGPUsFromLogicalGPU(NvLogicalGpuHandle hLogicalGPU,NvPhysicalGpuHandle hPhysicalGPU[NVAPI_MAX_PHYSICAL_GPUS], NvU32 *pGpuCount); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GetPhysicalGPUFromGPUID +// +//! Do not use this function for new software development. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 80 +//! +//! \retval NVAPI_INVALID_ARGUMENT gpuId is zero or pPhysicalGPU is NULL +//! \retval NVAPI_OK Handle was returned +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GetPhysicalGPUFromGPUID(NvU32 gpuId, NvPhysicalGpuHandle *pPhysicalGPU); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GetGPUIDfromPhysicalGPU +// +//! Do not use this function for new software development. +// +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 95 +//! +//! \retval NVAPI_INVALID_ARGUMENT hPhysicalGpu is NULL or invalid. Re-enumerate the GPU handles. +//! \retval NVAPI_OK Handle was returned +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GetGPUIDfromPhysicalGPU(NvPhysicalGpuHandle hPhysicalGpu, NvU32 *pGpuId); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetShaderSubPipeCount +// +//! DESCRIPTION: This function retrieves the number of Shader SubPipes on the GPU +//! On newer architectures, this corresponds to the number of SM units +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 170 +//! +//! \retval NVAPI_INVALID_ARGUMENT: pCount is NULL +//! \retval NVAPI_OK: *pCount is set +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND: no NVIDIA GPU driving a display was found +//! \retval NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE: hPhysicalGpu was not a physical GPU handle +//! +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetShaderSubPipeCount(NvPhysicalGpuHandle hPhysicalGpu,NvU32 *pCount); + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetGpuCoreCount +// +//! DESCRIPTION: Retrieves the total number of cores defined for a GPU. +//! Returns 0 on architectures that don't define GPU cores. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \retval ::NVAPI_INVALID_ARGUMENT pCount is NULL +//! \retval ::NVAPI_OK *pCount is set +//! \retval ::NVAPI_NVIDIA_DEVICE_NOT_FOUND no NVIDIA GPU driving a display was found +//! \retval ::NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE hPhysicalGpu was not a physical GPU handle +//! \retval ::NVAPI_NOT_SUPPORTED API call is not supported on current architecture +//! +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetGpuCoreCount(NvPhysicalGpuHandle hPhysicalGpu,NvU32 *pCount); + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetAllOutputs +// +//! This function returns set of all GPU-output identifiers as a bitmask. +//! +//! \deprecated Do not use this function - it is deprecated in release 290. Instead, use NvAPI_GPU_GetAllDisplayIds. +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 85 +//! +//! \retval NVAPI_INVALID_ARGUMENT hPhysicalGpu or pOutputsMask is NULL. +//! \retval NVAPI_OK *pOutputsMask contains a set of GPU-output identifiers. +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found. +//! \retval NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE hPhysicalGpu was not a physical GPU handle. +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +__nvapi_deprecated_function("Do not use this function - it is deprecated in release 290. Instead, use NvAPI_GPU_GetAllDisplayIds.") +NVAPI_INTERFACE NvAPI_GPU_GetAllOutputs(NvPhysicalGpuHandle hPhysicalGpu,NvU32 *pOutputsMask); + + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetConnectedOutputs +// +//! This function is the same as NvAPI_GPU_GetAllOutputs() but returns only the set of GPU output +//! identifiers that are connected to display devices. +//! +//! \deprecated Do not use this function - it is deprecated in release 290. Instead, use NvAPI_GPU_GetConnectedDisplayIds. +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 80 +//! +//! \retval NVAPI_INVALID_ARGUMENT hPhysicalGpu or pOutputsMask is NULL. +//! \retval NVAPI_OK *pOutputsMask contains a set of GPU-output identifiers. +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found. +//! \retval NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE hPhysicalGpu was not a physical GPU handle. +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +__nvapi_deprecated_function("Do not use this function - it is deprecated in release 290. Instead, use NvAPI_GPU_GetConnectedDisplayIds.") +NVAPI_INTERFACE NvAPI_GPU_GetConnectedOutputs(NvPhysicalGpuHandle hPhysicalGpu, NvU32 *pOutputsMask); + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetConnectedSLIOutputs +// +//! DESCRIPTION: This function is the same as NvAPI_GPU_GetConnectedOutputs() but returns only the set of GPU-output +//! identifiers that can be selected in an SLI configuration. +//! NOTE: This function matches NvAPI_GPU_GetConnectedOutputs() +//! - On systems which are not SLI capable. +//! - If the queried GPU is not part of a valid SLI group. +//! +//! \deprecated Do not use this function - it is deprecated in release 290. Instead, use NvAPI_GPU_GetConnectedDisplayIds. +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 170 +//! +//! \retval NVAPI_INVALID_ARGUMENT hPhysicalGpu or pOutputsMask is NULL +//! \retval NVAPI_OK *pOutputsMask contains a set of GPU-output identifiers +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found +//! \retval NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE: hPhysicalGpu was not a physical GPU handle +//! +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +__nvapi_deprecated_function("Do not use this function - it is deprecated in release 290. Instead, use NvAPI_GPU_GetConnectedDisplayIds.") +NVAPI_INTERFACE NvAPI_GPU_GetConnectedSLIOutputs(NvPhysicalGpuHandle hPhysicalGpu, NvU32 *pOutputsMask); + + + + +//! \ingroup gpu +typedef enum +{ + NV_MONITOR_CONN_TYPE_UNINITIALIZED = 0, + NV_MONITOR_CONN_TYPE_VGA, + NV_MONITOR_CONN_TYPE_COMPONENT, + NV_MONITOR_CONN_TYPE_SVIDEO, + NV_MONITOR_CONN_TYPE_HDMI, + NV_MONITOR_CONN_TYPE_DVI, + NV_MONITOR_CONN_TYPE_LVDS, + NV_MONITOR_CONN_TYPE_DP, + NV_MONITOR_CONN_TYPE_COMPOSITE, + NV_MONITOR_CONN_TYPE_UNKNOWN = -1 +} NV_MONITOR_CONN_TYPE; + + +//! \addtogroup gpu +//! @{ +#define NV_GPU_CONNECTED_IDS_FLAG_UNCACHED NV_BIT(0) //!< Get uncached connected devices +#define NV_GPU_CONNECTED_IDS_FLAG_SLI NV_BIT(1) //!< Get devices such that those can be selected in an SLI configuration +#define NV_GPU_CONNECTED_IDS_FLAG_LIDSTATE NV_BIT(2) //!< Get devices such that to reflect the Lid State +#define NV_GPU_CONNECTED_IDS_FLAG_FAKE NV_BIT(3) //!< Get devices that includes the fake connected monitors +#define NV_GPU_CONNECTED_IDS_FLAG_EXCLUDE_MST NV_BIT(4) //!< Excludes devices that are part of the multi stream topology. + +//! @} + +//! \ingroup gpu +typedef struct _NV_GPU_DISPLAYIDS +{ + NvU32 version; + NV_MONITOR_CONN_TYPE connectorType; //!< out: vga, tv, dvi, hdmi and dp. This is reserved for future use and clients should not rely on this information. Instead get the + //!< GPU connector type from NvAPI_GPU_GetConnectorInfo/NvAPI_GPU_GetConnectorInfoEx + NvU32 displayId; //!< this is a unique identifier for each device + + NvU32 isDynamic : 1; //!< if bit is set then this display is part of MST topology and it's a dynamic + NvU32 isMultiStreamRootNode : 1; //!< if bit is set then this displayID belongs to a multi stream enabled connector(root node). Note that when multi stream is enabled and + //!< a single multi stream capable monitor is connected to it, the monitor will share the display id with the RootNode. + //!< When there is more than one monitor connected in a multi stream topology, then the root node will have a separate displayId. + NvU32 isActive : 1; //!< if bit is set then this display is being actively driven + NvU32 isCluster : 1; //!< if bit is set then this display is the representative display + NvU32 isOSVisible : 1; //!< if bit is set, then this display is reported to the OS + NvU32 isWFD : 1; //!< Deprecated. Will always return 0. + NvU32 isConnected : 1; //!< if bit is set, then this display is connected + + NvU32 reservedInternal : 10; //!< Do not use + NvU32 isPhysicallyConnected : 1; //!< if bit is set, then this display is a phycially connected display; Valid only when isConnected bit is set + NvU32 reserved : 14; //!< must be zero +} NV_GPU_DISPLAYIDS; + +//! \ingroup gpu +//! Macro for constructing the version field of ::_NV_GPU_DISPLAYIDS +#define NV_GPU_DISPLAYIDS_VER1 MAKE_NVAPI_VERSION(NV_GPU_DISPLAYIDS,1) +#define NV_GPU_DISPLAYIDS_VER2 MAKE_NVAPI_VERSION(NV_GPU_DISPLAYIDS,3) + +#define NV_GPU_DISPLAYIDS_VER NV_GPU_DISPLAYIDS_VER2 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetConnectedDisplayIds +// +//! +//! DESCRIPTION: Due to space limitation NvAPI_GPU_GetConnectedOutputs can return maximum 32 devices, but +//! this is no longer true for DPMST. NvAPI_GPU_GetConnectedDisplayIds will return all +//! the connected display devices in the form of displayIds for the associated hPhysicalGpu. +//! This function can accept set of flags to request cached, uncached, sli and lid to get the connected devices. +//! Default value for flags will be cached . +//! +//! HOW TO USE: 1) for each PhysicalGpu, make a call to get the number of connected displayId's +//! using NvAPI_GPU_GetConnectedDisplayIds by passing the pDisplayIds as NULL +//! On call success: +//! +//! 2) If pDisplayIdCount is greater than 0, allocate memory based on pDisplayIdCount. Then make a call NvAPI_GPU_GetConnectedDisplayIds to populate DisplayIds. +//! However, if pDisplayIdCount is 0, do not make this call. +//! SUPPORTED OS: Windows 10 and higher +//! +//! \param [in] hPhysicalGpu - GPU selection +//! \param [in] flags - One or more defines from NV_GPU_CONNECTED_IDS_FLAG_* as valid flags. +//! \param [in,out] pDisplayIds - Pointer to an NV_GPU_DISPLAYIDS struct, each entry represents a one displayID and its attributes +//! \param [in] pDisplayIdCount - Number of displayId's. +//! +//! \retval NVAPI_INVALID_ARGUMENT: hPhysicalGpu or pDisplayIds or pDisplayIdCount is NULL +//! \retval NVAPI_OK: *pDisplayIds contains a set of GPU-output identifiers +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND: no NVIDIA GPU driving a display was found +//! \retval NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE: hPhysicalGpu was not a physical GPU handle +//! +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetConnectedDisplayIds(__in NvPhysicalGpuHandle hPhysicalGpu, __inout_ecount_part_opt(*pDisplayIdCount, *pDisplayIdCount) NV_GPU_DISPLAYIDS* pDisplayIds, __inout NvU32* pDisplayIdCount, __in NvU32 flags); + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetAllDisplayIds +// +//! DESCRIPTION: This API returns display IDs for all possible outputs on the GPU. +//! For DPMST connector, it will return display IDs for all the video sinks in the topology. \n +//! HOW TO USE: 1. The first call should be made to get the all display ID count. To get the display ID count, send in \n +//! a) hPhysicalGpu - a valid WDDM type GPU handle(enumerated using NvAPI_SYS_GetPhysicalGPUs()) as input, \n +//! b) pDisplayIds - NULL, as we just want to get the display ID count. \n +//! c) pDisplayIdCount - a valid pointer to NvU32, whose value is set to ZERO. \n +//! If all parameters are correct and this call is successful, this call will return the display ID's count. \n +//! 2. To get the display ID array, make the second call to NvAPI_GPU_GetAllDisplayIds() with \n +//! a) hPhysicalGpu - should be same value which was sent in first call, \n +//! b) pDisplayIds - pointer to the display ID array allocated by caller based on display ID count, \n +//! eg. malloc(sizeof(NV_GPU_DISPLAYIDS) * pDisplayIdCount). \n +//! c) pDisplayIdCount - a valid pointer to NvU32. This indicates for how many display IDs \n +//! the memory is allocated(pDisplayIds) by the caller. \n +//! If all parameters are correct and this call is successful, this call will return the display ID array and actual +//! display ID count (which was obtained in the first call to NvAPI_GPU_GetAllDisplayIds). If the input display ID count is +//! less than the actual display ID count, it will overwrite the input and give the pDisplayIdCount as actual count and the +//! API will return NVAPI_INSUFFICIENT_BUFFER. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] hPhysicalGpu GPU selection. +//! \param [in,out] DisplayIds Pointer to an array of NV_GPU_DISPLAYIDS structures, each entry represents one displayID +//! and its attributes. +//! \param [in,out] pDisplayIdCount As input, this parameter indicates the number of display's id's for which caller has +//! allocated the memory. As output, it will return the actual number of display IDs. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +//! +//! \retval NVAPI_INSUFFICIENT_BUFFER When the input buffer(pDisplayIds) is less than the actual number of display IDs, this API +//! will return NVAPI_INSUFFICIENT_BUFFER. +//! +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetAllDisplayIds(__in NvPhysicalGpuHandle hPhysicalGpu, __inout_ecount_part_opt(*pDisplayIdCount, *pDisplayIdCount) NV_GPU_DISPLAYIDS* pDisplayIds, __inout NvU32* pDisplayIdCount); + + + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetConnectedOutputsWithLidState +// +//! This function is similar to NvAPI_GPU_GetConnectedOutputs(), and returns the connected display identifiers that are connected +//! as an output mask but unlike NvAPI_GPU_GetConnectedOutputs() this API "always" reflects the Lid State in the output mask. +//! Thus if you expect the LID close state to be available in the connection mask use this API. +//! - If LID is closed then this API will remove the LID panel from the connected display identifiers. +//! - If LID is open then this API will reflect the LID panel in the connected display identifiers. +//! +//! \note This API should be used on notebook systems and on systems where the LID state is required in the connection +//! output mask. On desktop systems the returned identifiers will match NvAPI_GPU_GetConnectedOutputs(). +//! +//! \deprecated Do not use this function - it is deprecated in release 290. Instead, use NvAPI_GPU_GetConnectedDisplayIds. +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 95 +//! +//! \retval NVAPI_INVALID_ARGUMENT hPhysicalGpu or pOutputsMask is NULL +//! \retval NVAPI_OK *pOutputsMask contains a set of GPU-output identifiers +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found +//! \retval NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE hPhysicalGpu was not a physical GPU handle +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +__nvapi_deprecated_function("Do not use this function - it is deprecated in release 290. Instead, use NvAPI_GPU_GetConnectedDisplayIds.") +NVAPI_INTERFACE NvAPI_GPU_GetConnectedOutputsWithLidState(NvPhysicalGpuHandle hPhysicalGpu, NvU32 *pOutputsMask); + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetConnectedSLIOutputsWithLidState +// +//! DESCRIPTION: This function is the same as NvAPI_GPU_GetConnectedOutputsWithLidState() but returns only the set +//! of GPU-output identifiers that can be selected in an SLI configuration. With SLI disabled, +//! this function matches NvAPI_GPU_GetConnectedOutputsWithLidState(). +//! +//! \deprecated Do not use this function - it is deprecated in release 290. Instead, use NvAPI_GPU_GetConnectedDisplayIds. +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 170 +//! +//! \retval NVAPI_INVALID_ARGUMENT hPhysicalGpu or pOutputsMask is NULL +//! \retval NVAPI_OK *pOutputsMask contains a set of GPU-output identifiers +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found +//! \retval NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE hPhysicalGpu was not a physical GPU handle +//! +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +__nvapi_deprecated_function("Do not use this function - it is deprecated in release 290. Instead, use NvAPI_GPU_GetConnectedDisplayIds.") +NVAPI_INTERFACE NvAPI_GPU_GetConnectedSLIOutputsWithLidState(NvPhysicalGpuHandle hPhysicalGpu, NvU32 *pOutputsMask); + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetSystemType +// +//! \fn NvAPI_GPU_GetSystemType(NvPhysicalGpuHandle hPhysicalGpu, NV_SYSTEM_TYPE *pSystemType) +//! This function identifies whether the GPU is a notebook GPU or a desktop GPU. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 95 +//! +//! \retval NVAPI_INVALID_ARGUMENT hPhysicalGpu or pOutputsMask is NULL +//! \retval NVAPI_OK *pSystemType contains the GPU system type +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found +//! \retval NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE: hPhysicalGpu was not a physical GPU handle +// +/////////////////////////////////////////////////////////////////////////////// + +//! \ingroup gpu +//! Used in NvAPI_GPU_GetSystemType() +typedef enum +{ + NV_SYSTEM_TYPE_UNKNOWN = 0, + NV_SYSTEM_TYPE_LAPTOP = 1, + NV_SYSTEM_TYPE_DESKTOP = 2, + +} NV_SYSTEM_TYPE; + + + +//! \ingroup gpu +NVAPI_INTERFACE NvAPI_GPU_GetSystemType(NvPhysicalGpuHandle hPhysicalGpu, NV_SYSTEM_TYPE *pSystemType); + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetActiveOutputs +// +//! This function is the same as NvAPI_GPU_GetAllOutputs but returns only the set of GPU output +//! identifiers that are actively driving display devices. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 85 +//! +//! \retval NVAPI_INVALID_ARGUMENT hPhysicalGpu or pOutputsMask is NULL. +//! \retval NVAPI_OK *pOutputsMask contains a set of GPU-output identifiers. +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found. +//! \retval NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE hPhysicalGpu was not a physical GPU handle. +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetActiveOutputs(NvPhysicalGpuHandle hPhysicalGpu, NvU32 *pOutputsMask); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_SetEDID +// +//! Thus function sets the EDID data for the specified GPU handle and connection bit mask. +//! User can either send (Gpu handle & output id) or only display Id in variable displayOutputId parameter & hPhysicalGpu parameter can be default handle (0). +//! \note The EDID will be cached across the boot session and will be enumerated to the OS in this call. +//! To remove the EDID set sizeofEDID to zero. +//! OS and NVAPI connection status APIs will reflect the newly set or removed EDID dynamically. +//! +//! This feature will NOT be supported on the following boards: +//! - GeForce +//! - Quadro VX +//! - Tesla +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 100 +//! +//! \requires Administrator privileges +//! +//! \retval NVAPI_INVALID_ARGUMENT pEDID is NULL; displayOutputId has 0 or > 1 bits set +//! \retval NVAPI_OK *pEDID data was applied to the requested displayOutputId. +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found. +//! \retval NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE: hPhysicalGpu was not a physical GPU handle. +//! \retval NVAPI_NOT_SUPPORTED For the above mentioned GPUs +//! \retval NVAPI_INVALID_USER_PRIVILEGE The application will require Administrator privileges to access this API. +//! The application can be elevated to a higher permission level by selecting "Run as Administrator". +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_SetEDID(NvPhysicalGpuHandle hPhysicalGpu, NvU32 displayOutputId, NV_EDID *pEDID); + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetOutputType +// +//! \fn NvAPI_GPU_GetOutputType(NvPhysicalGpuHandle hPhysicalGpu, NvU32 outputId, NV_GPU_OUTPUT_TYPE *pOutputType) +//! This function returns the output type. User can either specify both 'physical GPU handle and outputId (exactly 1 bit set - see \ref handles)' or +//! a valid displayId in the outputId parameter. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \Version Earliest supported ForceWare version: 82.61 +//! +//! \retval NVAPI_INVALID_ARGUMENT outputId, pOutputType is NULL; or if outputId parameter is not displayId and either it has > 1 bit set or hPhysicalGpu is NULL. +//! \retval NVAPI_OK *pOutputType contains a NvGpuOutputType value +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found +//! \retval NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE hPhysicalGpu was not a physical GPU handle +// +/////////////////////////////////////////////////////////////////////////////// + +//! \ingroup gpu +//! used in NvAPI_GPU_GetOutputType() +typedef enum _NV_GPU_OUTPUT_TYPE +{ + NVAPI_GPU_OUTPUT_UNKNOWN = 0, + NVAPI_GPU_OUTPUT_CRT = 1, //!< CRT display device + NVAPI_GPU_OUTPUT_DFP = 2, //!< Digital Flat Panel display device + NVAPI_GPU_OUTPUT_TV = 3, //!< TV display device +} NV_GPU_OUTPUT_TYPE; + + + + +//! \ingroup gpu +NVAPI_INTERFACE NvAPI_GPU_GetOutputType(NvPhysicalGpuHandle hPhysicalGpu, NvU32 outputId, NV_GPU_OUTPUT_TYPE *pOutputType); + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_ValidateOutputCombination +// +//! This function determines if a set of GPU outputs can be active +//! simultaneously. While a GPU may have outputs, typically they cannot +//! all be active at the same time due to internal resource sharing. +//! +//! Given a physical GPU handle and a mask of candidate outputs, this call +//! will return NVAPI_OK if all of the specified outputs can be driven +//! simultaneously. It will return NVAPI_INVALID_COMBINATION if they cannot. +//! +//! Use NvAPI_GPU_GetAllOutputs() to determine which outputs are candidates. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 85 +//! +//! \retval NVAPI_OK Combination of outputs in outputsMask are valid (can be active simultaneously). +//! \retval NVAPI_INVALID_COMBINATION Combination of outputs in outputsMask are NOT valid. +//! \retval NVAPI_INVALID_ARGUMENT hPhysicalGpu or outputsMask does not have at least 2 bits set. +//! \retval NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE hPhysicalGpu was not a physical GPU handle. +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found. +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_ValidateOutputCombination(NvPhysicalGpuHandle hPhysicalGpu, NvU32 outputsMask); + + + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetFullName +// +//! This function retrieves the full GPU name as an ASCII string - for example, "Quadro FX 1400". +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 90 +//! +//! \return NVAPI_ERROR or NVAPI_OK +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetFullName(NvPhysicalGpuHandle hPhysicalGpu, NvAPI_ShortString szName); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetPCIIdentifiers +// +//! This function returns the PCI identifiers associated with this GPU. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 90 +//! +//! \param DeviceId The internal PCI device identifier for the GPU. +//! \param SubSystemId The internal PCI subsystem identifier for the GPU. +//! \param RevisionId The internal PCI device-specific revision identifier for the GPU. +//! \param ExtDeviceId The external PCI device identifier for the GPU. +//! +//! \retval NVAPI_INVALID_ARGUMENT hPhysicalGpu or an argument is NULL +//! \retval NVAPI_OK Arguments are populated with PCI identifiers +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found +//! \retval NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE hPhysicalGpu was not a physical GPU handle +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetPCIIdentifiers(NvPhysicalGpuHandle hPhysicalGpu,NvU32 *pDeviceId,NvU32 *pSubSystemId,NvU32 *pRevisionId,NvU32 *pExtDeviceId); + + + + +//! \ingroup gpu +//! Used in NvAPI_GPU_GetGPUType(). +typedef enum _NV_GPU_TYPE +{ + NV_SYSTEM_TYPE_GPU_UNKNOWN = 0, + NV_SYSTEM_TYPE_IGPU = 1, //!< Integrated GPU + NV_SYSTEM_TYPE_DGPU = 2, //!< Discrete GPU +} NV_GPU_TYPE; + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetGPUType +// +//! DESCRIPTION: This function returns the GPU type (integrated or discrete). +//! See ::NV_GPU_TYPE. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 173 +//! +//! \retval NVAPI_INVALID_ARGUMENT hPhysicalGpu +//! \retval NVAPI_OK *pGpuType contains the GPU type +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found +//! \retval NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE: hPhysicalGpu was not a physical GPU handle +//! +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetGPUType(__in NvPhysicalGpuHandle hPhysicalGpu, __inout NV_GPU_TYPE *pGpuType); + + + + +//! \ingroup gpu +//! Used in NvAPI_GPU_GetBusType() +typedef enum _NV_GPU_BUS_TYPE +{ + NVAPI_GPU_BUS_TYPE_UNDEFINED = 0, + NVAPI_GPU_BUS_TYPE_PCI = 1, + NVAPI_GPU_BUS_TYPE_AGP = 2, + NVAPI_GPU_BUS_TYPE_PCI_EXPRESS = 3, + NVAPI_GPU_BUS_TYPE_FPCI = 4, + NVAPI_GPU_BUS_TYPE_AXI = 5, +} NV_GPU_BUS_TYPE; +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetBusType +// +//! This function returns the type of bus associated with this GPU. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 90 +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +//! \retval NVAPI_INVALID_ARGUMENT hPhysicalGpu or pBusType is NULL. +//! \retval NVAPI_OK *pBusType contains bus identifier. +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetBusType(NvPhysicalGpuHandle hPhysicalGpu,NV_GPU_BUS_TYPE *pBusType); + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetBusId +// +//! DESCRIPTION: Returns the ID of the bus associated with this GPU. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 167 +//! +//! \retval NVAPI_INVALID_ARGUMENT hPhysicalGpu or pBusId is NULL. +//! \retval NVAPI_OK *pBusId contains the bus ID. +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found. +//! \retval NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE hPhysicalGpu was not a physical GPU handle. +//! +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetBusId(NvPhysicalGpuHandle hPhysicalGpu, NvU32 *pBusId); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetBusSlotId +// +//! DESCRIPTION: Returns the ID of the bus slot associated with this GPU. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 167 +//! +//! \retval NVAPI_INVALID_ARGUMENT hPhysicalGpu or pBusSlotId is NULL. +//! \retval NVAPI_OK *pBusSlotId contains the bus slot ID. +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found. +//! \retval NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE hPhysicalGpu was not a physical GPU handle. +//! +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetBusSlotId(NvPhysicalGpuHandle hPhysicalGpu, NvU32 *pBusSlotId); + + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetIRQ +// +//! This function returns the interrupt number associated with this GPU. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 90 +//! +//! \retval NVAPI_INVALID_ARGUMENT hPhysicalGpu or pIRQ is NULL. +//! \retval NVAPI_OK *pIRQ contains interrupt number. +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found. +//! \retval NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE hPhysicalGpu was not a physical GPU handle. +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetIRQ(NvPhysicalGpuHandle hPhysicalGpu,NvU32 *pIRQ); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetVbiosRevision +// +//! This function returns the revision of the video BIOS associated with this GPU. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 90 +//! +//! \retval NVAPI_INVALID_ARGUMENT hPhysicalGpu or pBiosRevision is NULL. +//! \retval NVAPI_OK *pBiosRevision contains revision number. +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found. +//! \retval NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE hPhysicalGpu was not a physical GPU handle. +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetVbiosRevision(NvPhysicalGpuHandle hPhysicalGpu,NvU32 *pBiosRevision); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetVbiosOEMRevision +// +//! This function returns the OEM revision of the video BIOS associated with this GPU. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 90 +//! +//! \retval NVAPI_INVALID_ARGUMENT hPhysicalGpu or pBiosRevision is NULL +//! \retval NVAPI_OK *pBiosRevision contains revision number +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found +//! \retval NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE hPhysicalGpu was not a physical GPU handle +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetVbiosOEMRevision(NvPhysicalGpuHandle hPhysicalGpu,NvU32 *pBiosRevision); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetVbiosVersionString +// +//! This function returns the full video BIOS version string in the form of xx.xx.xx.xx.yy where +//! - xx numbers come from NvAPI_GPU_GetVbiosRevision() and +//! - yy comes from NvAPI_GPU_GetVbiosOEMRevision(). +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 90 +//! +//! \retval NVAPI_INVALID_ARGUMENT hPhysicalGpu is NULL. +//! \retval NVAPI_OK szBiosRevision contains version string. +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found. +//! \retval NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE hPhysicalGpu was not a physical GPU handle. +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetVbiosVersionString(NvPhysicalGpuHandle hPhysicalGpu,NvAPI_ShortString szBiosRevision); + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetAGPAperture +// +//! This function returns the AGP aperture in megabytes. +//! +//! \deprecated Do not use this function - it is deprecated in release 455. +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! \since Release: 90 +//! +//! \retval NVAPI_NOT_SUPPORTED +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +__nvapi_deprecated_function("Do not use this function - it is deprecated in release 455.") +NVAPI_INTERFACE NvAPI_GPU_GetAGPAperture(NvPhysicalGpuHandle hPhysicalGpu,NvU32 *pSize); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetCurrentAGPRate +// +//! This function returns the current AGP Rate (0 = AGP not present, 1 = 1x, 2 = 2x, etc.). +//! +//! \deprecated Do not use this function - it is deprecated in release 455. +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! \since Release: 90 +//! +//! \retval NVAPI_NOT_SUPPORTED +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +__nvapi_deprecated_function("Do not use this function - it is deprecated in release 455.") +NVAPI_INTERFACE NvAPI_GPU_GetCurrentAGPRate(NvPhysicalGpuHandle hPhysicalGpu,NvU32 *pRate); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetCurrentPCIEDownstreamWidth +// +//! This function returns the number of PCIE lanes being used for the PCIE interface +//! downstream from the GPU. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 90 +//! +//! \retval NVAPI_INVALID_ARGUMENT pWidth is NULL. +//! \retval NVAPI_OK Call successful. +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found. +//! \retval NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE hPhysicalGpu was not a physical GPU handle. +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetCurrentPCIEDownstreamWidth(NvPhysicalGpuHandle hPhysicalGpu,NvU32 *pWidth); + + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetPhysicalFrameBufferSize +// +//! This function returns the physical size of framebuffer in KB. This does NOT include any +//! system RAM that may be dedicated for use by the GPU. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 90 +//! +//! \retval NVAPI_INVALID_ARGUMENT pSize is NULL +//! \retval NVAPI_OK Call successful +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found +//! \retval NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE hPhysicalGpu was not a physical GPU handle +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetPhysicalFrameBufferSize(NvPhysicalGpuHandle hPhysicalGpu,NvU32 *pSize); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetVirtualFrameBufferSize +// +//! This function returns the virtual size of framebuffer in KB. This includes the physical RAM plus any +//! system RAM that has been dedicated for use by the GPU. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 90 +//! +//! \retval NVAPI_INVALID_ARGUMENT pSize is NULL. +//! \retval NVAPI_OK Call successful. +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found. +//! \retval NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE hPhysicalGpu was not a physical GPU handle. +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetVirtualFrameBufferSize(NvPhysicalGpuHandle hPhysicalGpu,NvU32 *pSize); + + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetQuadroStatus +// +//! This function retrieves the Quadro status for the GPU (1 if Quadro, 0 if GeForce) +//! +//! This API is deprecated and it is recommended to instead query +//! NV_GPU_WORKSTATION_FEATURE_TYPE_PROVIZ support from the API +//! NvAPI_GPU_QueryWorkstationFeatureSupport. +//! \deprecated Do not use this function - it is deprecated in release 460. +//! SUPPORTED OS: Do not use this function. It is not supported on Windows 10 and higher OS versions. +//! +//! +//! \since Release: 80 +//! +//! \return NVAPI_ERROR or NVAPI_OK +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +__nvapi_deprecated_function("Do not use this function - it is deprecated in release 460.") +NVAPI_INTERFACE NvAPI_GPU_GetQuadroStatus(NvPhysicalGpuHandle hPhysicalGpu, NvU32 *pStatus); + + +//! \ingroup gpu +typedef struct _NV_BOARD_INFO +{ + NvU32 version; //!< structure version + NvU8 BoardNum[16]; //!< Board Serial Number + +}NV_BOARD_INFO_V1; + +#define NV_BOARD_INFO_VER1 MAKE_NVAPI_VERSION(NV_BOARD_INFO_V1,1) +#ifndef NV_BOARD_INFO_VER +//! \ingroup gpu +typedef NV_BOARD_INFO_V1 NV_BOARD_INFO; +//! \ingroup gpu +//! \ingroup gpu +#define NV_BOARD_INFO_VER NV_BOARD_INFO_VER1 +#endif + +//! SUPPORTED OS: Windows 10 and higher +//! +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetBoardInfo +// +//! DESCRIPTION: This API Retrieves the Board information (a unique GPU Board Serial Number) stored in the InfoROM. +//! +//! \param [in] hPhysicalGpu Physical GPU Handle. +//! \param [in,out] NV_BOARD_INFO Board Information. +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \retval ::NVAPI_OK completed request +//! \retval ::NVAPI_ERROR miscellaneous error occurred +//! \retval ::NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE handle passed is not a physical GPU handle +//! \retval ::NVAPI_API_NOT_INTIALIZED NVAPI not initialized +//! \retval ::NVAPI_INVALID_POINTER pBoardInfo is NULL +//! \retval ::NVAPI_INCOMPATIBLE_STRUCT_VERSION the version of the INFO struct is not supported +//! +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetBoardInfo(NvPhysicalGpuHandle hPhysicalGpu, NV_BOARD_INFO *pBoardInfo); + + + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetRamBusWidth +// +//! This function returns the width of the GPU's RAM memory bus. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 100 +//! +//! \return NVAPI_ERROR or NVAPI_OK +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetRamBusWidth(NvPhysicalGpuHandle hPhysicalGpu, NvU32 *pBusWidth); + + + +//! Used in NvAPI_GPU_GetArchInfo() +typedef struct +{ + NvU32 version; + + NvU32 architecture; + NvU32 implementation; + NvU32 revision; + +} NV_GPU_ARCH_INFO_V1; + +//! NV_GPU_ARCH_INFO() values to identify Architecture level for the GPU. +typedef enum _NV_GPU_ARCHITECTURE_ID +{ + + NV_GPU_ARCHITECTURE_T2X = 0xE0000020, + NV_GPU_ARCHITECTURE_T3X = 0xE0000030, + NV_GPU_ARCHITECTURE_T4X = 0xE0000040, + NV_GPU_ARCHITECTURE_T12X = 0xE0000040, + NV_GPU_ARCHITECTURE_NV40 = 0x00000040, + NV_GPU_ARCHITECTURE_NV50 = 0x00000050, + NV_GPU_ARCHITECTURE_G78 = 0x00000060, + NV_GPU_ARCHITECTURE_G80 = 0x00000080, + NV_GPU_ARCHITECTURE_G90 = 0x00000090, + NV_GPU_ARCHITECTURE_GT200 = 0x000000A0, + NV_GPU_ARCHITECTURE_GF100 = 0x000000C0, + NV_GPU_ARCHITECTURE_GF110 = 0x000000D0, + NV_GPU_ARCHITECTURE_GK100 = 0x000000E0, + NV_GPU_ARCHITECTURE_GK110 = 0x000000F0, + NV_GPU_ARCHITECTURE_GK200 = 0x00000100, + NV_GPU_ARCHITECTURE_GM000 = 0x00000110, + NV_GPU_ARCHITECTURE_GM200 = 0x00000120, + NV_GPU_ARCHITECTURE_GP100 = 0x00000130, + NV_GPU_ARCHITECTURE_GV100 = 0x00000140, + NV_GPU_ARCHITECTURE_GV110 = 0x00000150, + NV_GPU_ARCHITECTURE_TU100 = 0x00000160, + NV_GPU_ARCHITECTURE_GA100 = 0x00000170, + NV_GPU_ARCHITECTURE_AD100 = 0x00000190, + NV_GPU_ARCHITECTURE_GB200 = 0x000001B0, + +}NV_GPU_ARCHITECTURE_ID; + +//! NV_GPU_ARCH_INFO() values to identify GPU Architecture Implementation. +typedef enum _NV_GPU_ARCH_IMPLEMENTATION_ID +{ + + NV_GPU_ARCH_IMPLEMENTATION_T20 = 0x00000000, + + NV_GPU_ARCH_IMPLEMENTATION_T30 = 0x00000000, + NV_GPU_ARCH_IMPLEMENTATION_T35 = 0x00000005, + + NV_GPU_ARCH_IMPLEMENTATION_T40 = 0x00000000, + NV_GPU_ARCH_IMPLEMENTATION_T124 = 0x00000000, + + NV_GPU_ARCH_IMPLEMENTATION_NV40 = 0x00000000, + NV_GPU_ARCH_IMPLEMENTATION_NV41 = 0x00000001, + NV_GPU_ARCH_IMPLEMENTATION_NV42 = 0x00000002, + NV_GPU_ARCH_IMPLEMENTATION_NV43 = 0x00000003, + NV_GPU_ARCH_IMPLEMENTATION_NV44 = 0x00000004, + NV_GPU_ARCH_IMPLEMENTATION_NV44A = 0x0000000A, + NV_GPU_ARCH_IMPLEMENTATION_NV46 = 0x00000006, + NV_GPU_ARCH_IMPLEMENTATION_NV47 = 0x00000007, + NV_GPU_ARCH_IMPLEMENTATION_NV49 = 0x00000009, + NV_GPU_ARCH_IMPLEMENTATION_NV4B = 0x0000000B, + NV_GPU_ARCH_IMPLEMENTATION_NV4C = 0x0000000C, + NV_GPU_ARCH_IMPLEMENTATION_NV4E = 0x0000000E, + + NV_GPU_ARCH_IMPLEMENTATION_NV50 = 0x00000000, + + NV_GPU_ARCH_IMPLEMENTATION_NV63 = 0x00000003, + NV_GPU_ARCH_IMPLEMENTATION_NV67 = 0x00000007, + + NV_GPU_ARCH_IMPLEMENTATION_G84 = 0x00000004, + NV_GPU_ARCH_IMPLEMENTATION_G86 = 0x00000006, + + NV_GPU_ARCH_IMPLEMENTATION_G92 = 0x00000002, + NV_GPU_ARCH_IMPLEMENTATION_G94 = 0x00000004, + NV_GPU_ARCH_IMPLEMENTATION_G96 = 0x00000006, + NV_GPU_ARCH_IMPLEMENTATION_G98 = 0x00000008, + + NV_GPU_ARCH_IMPLEMENTATION_GT200 = 0x00000000, + NV_GPU_ARCH_IMPLEMENTATION_GT212 = 0x00000002, + NV_GPU_ARCH_IMPLEMENTATION_GT214 = 0x00000004, + NV_GPU_ARCH_IMPLEMENTATION_GT215 = 0x00000003, + NV_GPU_ARCH_IMPLEMENTATION_GT216 = 0x00000005, + NV_GPU_ARCH_IMPLEMENTATION_GT218 = 0x00000008, + NV_GPU_ARCH_IMPLEMENTATION_MCP77 = 0x0000000A, + NV_GPU_ARCH_IMPLEMENTATION_GT21C = 0x0000000B, + NV_GPU_ARCH_IMPLEMENTATION_MCP79 = 0x0000000C, + NV_GPU_ARCH_IMPLEMENTATION_GT21A = 0x0000000D, + NV_GPU_ARCH_IMPLEMENTATION_MCP89 = 0x0000000F, + + NV_GPU_ARCH_IMPLEMENTATION_GF100 = 0x00000000, + NV_GPU_ARCH_IMPLEMENTATION_GF104 = 0x00000004, + NV_GPU_ARCH_IMPLEMENTATION_GF106 = 0x00000003, + NV_GPU_ARCH_IMPLEMENTATION_GF108 = 0x00000001, + + NV_GPU_ARCH_IMPLEMENTATION_GF110 = 0x00000000, + NV_GPU_ARCH_IMPLEMENTATION_GF116 = 0x00000006, + NV_GPU_ARCH_IMPLEMENTATION_GF117 = 0x00000007, + NV_GPU_ARCH_IMPLEMENTATION_GF118 = 0x00000008, + NV_GPU_ARCH_IMPLEMENTATION_GF119 = 0x00000009, + + NV_GPU_ARCH_IMPLEMENTATION_GK104 = 0x00000004, + NV_GPU_ARCH_IMPLEMENTATION_GK106 = 0x00000006, + NV_GPU_ARCH_IMPLEMENTATION_GK107 = 0x00000007, + NV_GPU_ARCH_IMPLEMENTATION_GK20A = 0x0000000A, + NV_GPU_ARCH_IMPLEMENTATION_GK110 = 0x00000000, + + NV_GPU_ARCH_IMPLEMENTATION_GK208 = 0x00000008, + + NV_GPU_ARCH_IMPLEMENTATION_GM204 = 0x00000004, + NV_GPU_ARCH_IMPLEMENTATION_GM206 = 0x00000006, + + NV_GPU_ARCH_IMPLEMENTATION_GP100 = 0x00000000, + NV_GPU_ARCH_IMPLEMENTATION_GP000 = 0x00000001, + NV_GPU_ARCH_IMPLEMENTATION_GP102 = 0x00000002, + NV_GPU_ARCH_IMPLEMENTATION_GP104 = 0x00000004, + NV_GPU_ARCH_IMPLEMENTATION_GP106 = 0x00000006, + NV_GPU_ARCH_IMPLEMENTATION_GP107 = 0x00000007, + NV_GPU_ARCH_IMPLEMENTATION_GP108 = 0x00000008, + + NV_GPU_ARCH_IMPLEMENTATION_GV100 = 0x00000000, + NV_GPU_ARCH_IMPLEMENTATION_GV10B = 0x0000000B, + + NV_GPU_ARCH_IMPLEMENTATION_TU100 = 0x00000000, + NV_GPU_ARCH_IMPLEMENTATION_TU102 = 0x00000002, + NV_GPU_ARCH_IMPLEMENTATION_TU104 = 0x00000004, + NV_GPU_ARCH_IMPLEMENTATION_TU106 = 0x00000006, + NV_GPU_ARCH_IMPLEMENTATION_TU116 = 0x00000008, + NV_GPU_ARCH_IMPLEMENTATION_TU117 = 0x00000007, + NV_GPU_ARCH_IMPLEMENTATION_TU000 = 0x00000001, + + NV_GPU_ARCH_IMPLEMENTATION_GA100 = 0x00000000, + NV_GPU_ARCH_IMPLEMENTATION_GA102 = 0x00000002, + NV_GPU_ARCH_IMPLEMENTATION_GA104 = 0x00000004, + + NV_GPU_ARCH_IMPLEMENTATION_AD102 = 0x00000002, + NV_GPU_ARCH_IMPLEMENTATION_AD103 = 0x00000003, + NV_GPU_ARCH_IMPLEMENTATION_AD104 = 0x00000004, + + NV_GPU_ARCH_IMPLEMENTATION_GB202 = 0x00000002, + +}NV_GPU_ARCH_IMPLEMENTATION_ID; + +typedef enum _NV_GPU_CHIP_REVISION +{ + NV_GPU_CHIP_REV_EMULATION_QT = 0x00000000, //!< QT chip + NV_GPU_CHIP_REV_EMULATION_FPGA = 0x00000001, //!< FPGA implementation of the chipset + NV_GPU_CHIP_REV_A01 = 0x00000011, //!< First silicon chipset revision + NV_GPU_CHIP_REV_A02 = 0x00000012, //!< Second Silicon chipset revision + NV_GPU_CHIP_REV_A03 = 0x00000013, //!< Third Silicon chipset revision + NV_GPU_CHIP_REV_UNKNOWN = 0xffffffff, //!< Unknown chip revision +}NV_GPU_CHIP_REVISION; + +//! \ingroup gpu +//! Used in NvAPI_GPU_GetArchInfo() +typedef struct +{ + NvU32 version; + + union + { + NvU32 architecture; //!< architecture and architecture_id are the same. The former is NvU32 while the latter is an enum made for readability. + NV_GPU_ARCHITECTURE_ID architecture_id; //!< specifies the architecture level for the GPU. + }; + union + { + NvU32 implementation; //!< implementation and implementation_id are the same. The former is NvU32 while the latter is an enum made for readability. + NV_GPU_ARCH_IMPLEMENTATION_ID implementation_id; //!< specifies the implementation of the architecture for the GPU. + }; + union + { + NvU32 revision; //!< revision and revision_id are the same. The former is NvU32 while the latter is an enum made for readability. + NV_GPU_CHIP_REVISION revision_id; //!< specifies the architecture revision of the GPU. + }; + +} NV_GPU_ARCH_INFO_V2; + +//! \ingroup gpu +typedef NV_GPU_ARCH_INFO_V2 NV_GPU_ARCH_INFO; + +//! \ingroup gpu +#define NV_GPU_ARCH_INFO_VER_1 MAKE_NVAPI_VERSION(NV_GPU_ARCH_INFO_V1,1) +#define NV_GPU_ARCH_INFO_VER_2 MAKE_NVAPI_VERSION(NV_GPU_ARCH_INFO_V2,2) +#define NV_GPU_ARCH_INFO_VER NV_GPU_ARCH_INFO_VER_2 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetArchInfo +// +//! \fn NvAPI_GPU_GetArchInfo(NvPhysicalGpuHandle hPhysicalGpu, NV_GPU_ARCH_INFO *pGpuArchInfo) +//! This function retrieves the architecture, implementation and chip revision for the specified GPU . +//! +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! \since Release: 85 +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. If there are return error codes with specific +//! meaning for this API, they are listed below. +// +/////////////////////////////////////////////////////////////////////////////// + +//! \ingroup gpu +NVAPI_INTERFACE NvAPI_GPU_GetArchInfo(NvPhysicalGpuHandle hPhysicalGpu, NV_GPU_ARCH_INFO *pGpuArchInfo); + + + +/////////////////////////////////////////////////////////////////////////////////// +// I2C API +// Provides ability to read or write data using I2C protocol. +// These APIs allow I2C access only to DDC monitors + + +//! \ingroup i2capi +//! @{ +#define NVAPI_MAX_SIZEOF_I2C_DATA_BUFFER 4096 +#define NVAPI_MAX_SIZEOF_I2C_REG_ADDRESS 4 +#define NVAPI_DISPLAY_DEVICE_MASK_MAX 24 +#define NVAPI_I2C_SPEED_DEPRECATED 0xFFFF + +typedef enum +{ + NVAPI_I2C_SPEED_DEFAULT, //!< Set i2cSpeedKhz to I2C_SPEED_DEFAULT if default I2C speed is to be chosen, ie.use the current frequency setting. + NVAPI_I2C_SPEED_3KHZ, + NVAPI_I2C_SPEED_10KHZ, + NVAPI_I2C_SPEED_33KHZ, + NVAPI_I2C_SPEED_100KHZ, + NVAPI_I2C_SPEED_200KHZ, + NVAPI_I2C_SPEED_400KHZ, +} NV_I2C_SPEED; + +//! Used in NvAPI_I2CRead() and NvAPI_I2CWrite() +typedef struct +{ + NvU32 version; //!< The structure version. + NvU32 displayMask; //!< The Display Mask of the concerned display. + NvU8 bIsDDCPort; //!< This flag indicates either the DDC port (TRUE) or the communication port + //!< (FALSE) of the concerned display. + NvU8 i2cDevAddress; //!< The address of the I2C slave. The address should be shifted left by one. For + //!< example, the I2C address 0x50, often used for reading EDIDs, would be stored + //!< here as 0xA0. This matches the position within the byte sent by the master, as + //!< the last bit is reserved to specify the read or write direction. + NvU8* pbI2cRegAddress; //!< The I2C target register address. May be NULL, which indicates no register + //!< address should be sent. + NvU32 regAddrSize; //!< The size in bytes of target register address. If pbI2cRegAddress is NULL, this + //!< field must be 0. + NvU8* pbData; //!< The buffer of data which is to be read or written (depending on the command). + NvU32 cbSize; //!< The size of the data buffer, pbData, to be read or written. + NvU32 i2cSpeed; //!< The target speed of the transaction (between 28Kbps to 40Kbps; not guaranteed). +} NV_I2C_INFO_V1; + +//! Used in NvAPI_I2CRead() and NvAPI_I2CWrite() +typedef struct +{ + NvU32 version; //!< The structure version. + NvU32 displayMask; //!< The Display Mask of the concerned display. + NvU8 bIsDDCPort; //!< This flag indicates either the DDC port (TRUE) or the communication port + //!< (FALSE) of the concerned display. + NvU8 i2cDevAddress; //!< The address of the I2C slave. The address should be shifted left by one. For + //!< example, the I2C address 0x50, often used for reading EDIDs, would be stored + //!< here as 0xA0. This matches the position within the byte sent by the master, as + //!< the last bit is reserved to specify the read or write direction. + NvU8* pbI2cRegAddress; //!< The I2C target register address. May be NULL, which indicates no register + //!< address should be sent. + NvU32 regAddrSize; //!< The size in bytes of target register address. If pbI2cRegAddress is NULL, this + //!< field must be 0. + NvU8* pbData; //!< The buffer of data which is to be read or written (depending on the command). + NvU32 cbSize; //!< The size of the data buffer, pbData, to be read or written. + NvU32 i2cSpeed; //!< Deprecated, Must be set to NVAPI_I2C_SPEED_DEPRECATED. + NV_I2C_SPEED i2cSpeedKhz; //!< The target speed of the transaction in (kHz) (Chosen from the enum NV_I2C_SPEED). +} NV_I2C_INFO_V2; + +//! Used in NvAPI_I2CRead() and NvAPI_I2CWrite() +typedef struct +{ + NvU32 version; //!< The structure version. + NvU32 displayMask; //!< The Display Mask of the concerned display. + NvU8 bIsDDCPort; //!< This flag indicates either the DDC port (TRUE) or the communication port + //!< (FALSE) of the concerned display. + NvU8 i2cDevAddress; //!< The address of the I2C slave. The address should be shifted left by one. For + //!< example, the I2C address 0x50, often used for reading EDIDs, would be stored + //!< here as 0xA0. This matches the position within the byte sent by the master, as + //!< the last bit is reserved to specify the read or write direction. + NvU8* pbI2cRegAddress; //!< The I2C target register address. May be NULL, which indicates no register + //!< address should be sent. + NvU32 regAddrSize; //!< The size in bytes of target register address. If pbI2cRegAddress is NULL, this + //!< field must be 0. + NvU8* pbData; //!< The buffer of data which is to be read or written (depending on the command). + NvU32 cbSize; //!< The size of the data buffer, pbData, to be read or written. + NvU32 i2cSpeed; //!< Deprecated, Must be set to NVAPI_I2C_SPEED_DEPRECATED. + NV_I2C_SPEED i2cSpeedKhz; //!< The target speed of the transaction in (kHz) (Chosen from the enum NV_I2C_SPEED). + NvU8 portId; //!< The portid on which device is connected (remember to set bIsPortIdSet if this value is set) + //!< Optional for pre-Kepler + NvU32 bIsPortIdSet; //!< set this flag on if and only if portid value is set +} NV_I2C_INFO_V3; + +typedef NV_I2C_INFO_V3 NV_I2C_INFO; + +#define NV_I2C_INFO_VER3 MAKE_NVAPI_VERSION(NV_I2C_INFO_V3,3) +#define NV_I2C_INFO_VER2 MAKE_NVAPI_VERSION(NV_I2C_INFO_V2,2) +#define NV_I2C_INFO_VER1 MAKE_NVAPI_VERSION(NV_I2C_INFO_V1,1) + +#define NV_I2C_INFO_VER NV_I2C_INFO_VER3 +//! @} + +/***********************************************************************************/ + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_I2CRead +// +//! This function reads the data buffer from the I2C port. +//! The I2C request must be for a DDC port: pI2cInfo->bIsDDCPort = 1. +//! +//! A data buffer size larger than 16 bytes may be rejected if a register address is specified. In such a case, +//! NVAPI_ARGUMENT_EXCEED_MAX_SIZE would be returned. +//! +//! If a register address is specified (i.e. regAddrSize is positive), then the transaction will be performed in +//! the combined format described in the I2C specification. The register address will be written, followed by +//! reading into the data buffer. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 85 +//! +//! \param [in] hPhysicalGPU GPU selection. +//! \param [out] NV_I2C_INFO *pI2cInfo The I2C data input structure +//! +//! \retval NVAPI_OK Completed request +//! \retval NVAPI_ERROR Miscellaneous error occurred. +//! \retval NVAPI_HANDLE_INVALIDATED Handle passed has been invalidated (see user guide). +//! \retval NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE Handle passed is not a physical GPU handle. +//! \retval NVAPI_INCOMPATIBLE_STRUCT_VERSION Structure version is not supported. +//! \retval NVAPI_INVALID_ARGUMENT - argument does not meet specified requirements +//! \retval NVAPI_ARGUMENT_EXCEED_MAX_SIZE - an argument exceeds the maximum +//! +//! \ingroup i2capi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_I2CRead(NvPhysicalGpuHandle hPhysicalGpu, NV_I2C_INFO *pI2cInfo); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_I2CWrite +// +//! This function writes the data buffer to the I2C port. +//! +//! The I2C request must be for a DDC port: pI2cInfo->bIsDDCPort = 1. +//! +//! A data buffer size larger than 16 bytes may be rejected if a register address is specified. In such a case, +//! NVAPI_ARGUMENT_EXCEED_MAX_SIZE would be returned. +//! +//! If a register address is specified (i.e. regAddrSize is positive), then the register address will be written +//! and the data buffer will immediately follow without a restart. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 85 +//! +//! \param [in] hPhysicalGPU GPU selection. +//! \param [in] pI2cInfo The I2C data input structure +//! +//! \retval NVAPI_OK Completed request +//! \retval NVAPI_ERROR Miscellaneous error occurred. +//! \retval NVAPI_HANDLE_INVALIDATED Handle passed has been invalidated (see user guide). +//! \retval NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE Handle passed is not a physical GPU handle. +//! \retval NVAPI_INCOMPATIBLE_STRUCT_VERSION Structure version is not supported. +//! \retval NVAPI_INVALID_ARGUMENT Argument does not meet specified requirements +//! \retval NVAPI_ARGUMENT_EXCEED_MAX_SIZE Argument exceeds the maximum +//! +//! \ingroup i2capi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_I2CWrite(NvPhysicalGpuHandle hPhysicalGpu, NV_I2C_INFO *pI2cInfo); + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_WorkstationFeatureSetup +// +//! \fn NvAPI_GPU_WorkstationFeatureSetup(NvPhysicalGpuHandle hPhysicalGpu, NvU32 featureEnableMask, NvU32 featureDisableMask) +//! DESCRIPTION: This API configures the driver for a set of workstation features. +//! The driver can allocate the memory resources accordingly. +//! +//! SUPPORTED OS: Do not use this function. It is not supported on Windows 10 and higher OS versions. +//! +//! +//! \param [in] hPhysicalGpu Physical GPU Handle of the display adapter to be configured. GPU handles may be retrieved +//! using NvAPI_EnumPhysicalGPUs. A value of NULL is permitted and applies the same operation +//! to all GPU handles enumerated by NvAPI_EnumPhysicalGPUs. +//! \param [in] featureEnableMask Mask of features the caller requests to enable for use +//! \param [in] featureDisableMask Mask of features the caller requests to disable +//! +//! As a general rule, features in the enable and disable masks are expected to be disjoint, although the disable +//! mask has precedence and a feature flagged in both masks will be disabled. +//! +//! \retval ::NVAPI_OK configuration request succeeded +//! \retval ::NVAPI_ERROR configuration request failed +//! \retval ::NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE hPhysicalGpu is not a physical GPU handle. +//! \retval ::NVAPI_GPU_WORKSTATION_FEATURE_INCOMPLETE requested feature set does not have all resources allocated for completeness. +//! \retval ::NVAPI_NO_IMPLEMENTATION OS below Win7, implemented only for Win7 but returns NVAPI_OK on OS above Win7 to +//! keep compatibility with apps written against Win7. +// +/////////////////////////////////////////////////////////////////////////////// + +//! \ingroup gpu +typedef enum +{ + NVAPI_GPU_WORKSTATION_FEATURE_MASK_SWAPGROUP = 0x00000001, + NVAPI_GPU_WORKSTATION_FEATURE_MASK_STEREO = 0x00000010, + NVAPI_GPU_WORKSTATION_FEATURE_MASK_WARPING = 0x00000100, + NVAPI_GPU_WORKSTATION_FEATURE_MASK_PIXINTENSITY = 0x00000200, + NVAPI_GPU_WORKSTATION_FEATURE_MASK_GRAYSCALE = 0x00000400, + NVAPI_GPU_WORKSTATION_FEATURE_MASK_BPC10 = 0x00001000 +} NVAPI_GPU_WORKSTATION_FEATURE_MASK; + +//! \ingroup gpu +NVAPI_INTERFACE NvAPI_GPU_WorkstationFeatureSetup(__in NvPhysicalGpuHandle hPhysicalGpu, __in NvU32 featureEnableMask, __in NvU32 featureDisableMask); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_WorkstationFeatureQuery +// +//! DESCRIPTION: This API queries the current set of workstation features. +//! +//! SUPPORTED OS: Do not use this function. It is not supported on Windows 10 and higher OS versions. +//! +//! +//! \param [in] hPhysicalGpu Physical GPU Handle of the display adapter to be configured. GPU handles may be retrieved +//! using NvAPI_EnumPhysicalGPUs. +//! \param [out] pConfiguredFeatureMask Mask of features requested for use by client drivers +//! \param [out] pConsistentFeatureMask Mask of features that have all resources allocated for completeness. +//! +//! \retval ::NVAPI_OK configuration request succeeded +//! \retval ::NVAPI_ERROR configuration request failed +//! \retval ::NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE hPhysicalGpu is not a physical GPU handle. +//! \retval ::NVAPI_NO_IMPLEMENTATION OS below Win7, implemented only for Win7 but returns NVAPI_OK on OS above Win7 to +//! keep compatibility with apps written against Win7. +// +/////////////////////////////////////////////////////////////////////////////// + +//! \ingroup gpu +NVAPI_INTERFACE NvAPI_GPU_WorkstationFeatureQuery(__in NvPhysicalGpuHandle hPhysicalGpu, __out_opt NvU32 *pConfiguredFeatureMask, __out_opt NvU32 *pConsistentFeatureMask); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetHDCPSupportStatus +// +//! \fn NvAPI_GPU_GetHDCPSupportStatus(NvPhysicalGpuHandle hPhysicalGpu, NV_GPU_GET_HDCP_SUPPORT_STATUS *pGetHDCPSupportStatus) +//! DESCRIPTION: This function returns a GPU's HDCP support status. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 175 +//! +//! \retval ::NVAPI_OK +//! \retval ::NVAPI_ERROR +//! \retval ::NVAPI_INVALID_ARGUMENT +//! \retval ::NVAPI_HANDLE_INVALIDATED +//! \retval ::NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE +//! \retval ::NVAPI_INCOMPATIBLE_STRUCT_VERSION +// +//////////////////////////////////////////////////////////////////////////////// + + +//! \addtogroup gpu +//! @{ + + +//! HDCP fuse states - used in NV_GPU_GET_HDCP_SUPPORT_STATUS +typedef enum _NV_GPU_HDCP_FUSE_STATE +{ + NV_GPU_HDCP_FUSE_STATE_UNKNOWN = 0, + NV_GPU_HDCP_FUSE_STATE_DISABLED = 1, + NV_GPU_HDCP_FUSE_STATE_ENABLED = 2, +} NV_GPU_HDCP_FUSE_STATE; + + +//! HDCP key sources - used in NV_GPU_GET_HDCP_SUPPORT_STATUS +typedef enum _NV_GPU_HDCP_KEY_SOURCE +{ + NV_GPU_HDCP_KEY_SOURCE_UNKNOWN = 0, + NV_GPU_HDCP_KEY_SOURCE_NONE = 1, + NV_GPU_HDCP_KEY_SOURCE_CRYPTO_ROM = 2, + NV_GPU_HDCP_KEY_SOURCE_SBIOS = 3, + NV_GPU_HDCP_KEY_SOURCE_I2C_ROM = 4, + NV_GPU_HDCP_KEY_SOURCE_FUSES = 5, +} NV_GPU_HDCP_KEY_SOURCE; + + +//! HDCP key source states - used in NV_GPU_GET_HDCP_SUPPORT_STATUS +typedef enum _NV_GPU_HDCP_KEY_SOURCE_STATE +{ + NV_GPU_HDCP_KEY_SOURCE_STATE_UNKNOWN = 0, + NV_GPU_HDCP_KEY_SOURCE_STATE_ABSENT = 1, + NV_GPU_HDCP_KEY_SOURCE_STATE_PRESENT = 2, +} NV_GPU_HDCP_KEY_SOURCE_STATE; + + +//! HDPC support status - used in NvAPI_GPU_GetHDCPSupportStatus() +typedef struct +{ + NvU32 version; //! Structure version constucted by macro #NV_GPU_GET_HDCP_SUPPORT_STATUS + NV_GPU_HDCP_FUSE_STATE hdcpFuseState; //! GPU's HDCP fuse state + NV_GPU_HDCP_KEY_SOURCE hdcpKeySource; //! GPU's HDCP key source + NV_GPU_HDCP_KEY_SOURCE_STATE hdcpKeySourceState; //! GPU's HDCP key source state +} NV_GPU_GET_HDCP_SUPPORT_STATUS; + + +//! Macro for constructing the version for structure NV_GPU_GET_HDCP_SUPPORT_STATUS +#define NV_GPU_GET_HDCP_SUPPORT_STATUS_VER MAKE_NVAPI_VERSION(NV_GPU_GET_HDCP_SUPPORT_STATUS,1) + + +//! @} + + +//! \ingroup gpu +NVAPI_INTERFACE NvAPI_GPU_GetHDCPSupportStatus(NvPhysicalGpuHandle hPhysicalGpu, NV_GPU_GET_HDCP_SUPPORT_STATUS *pGetHDCPSupportStatus); + + + + +//! \ingroup gpucuda +//! @{ + +//! defgroup nvcomp_gpu_top NVIDIA Compute GPU Topology Flags +//! @{ + +#define NV_COMPUTE_GPU_TOPOLOGY_PHYSICS_CAPABLE NV_BIT(0) //!< This is Read only GPU flag - Physics capable GPU. +#define NV_COMPUTE_GPU_TOPOLOGY_PHYSICS_ENABLE NV_BIT(1) //!< This is Read/Write GPU flag - Indicates GPU is already enabled for physics. +#define NV_COMPUTE_GPU_TOPOLOGY_PHYSICS_DEDICATED NV_BIT(2) //!< This is Read only GPU flag - Physics enabled GPU which is *dedicated* for physics indicates + //!< this compute capable GPU was automatically made usable for Physx on CUDA. +#define NV_COMPUTE_GPU_TOPOLOGY_PHYSICS_RECOMMENDED NV_BIT(3) //!< This is Read only GPU flag - Physics GPU which is recommended by the driver. +#define NV_COMPUTE_GPU_TOPOLOGY_CUDA_AVAILABLE NV_BIT(4) //!< This is Read only GPU flag. +#define NV_COMPUTE_GPU_TOPOLOGY_CUDA_CAPABLE NV_BIT(16) //!< This is Read only GPU flag. +#define NV_COMPUTE_GPU_TOPOLOGY_CUDA_DISABLED NV_BIT(17) //!< This is Read only GPU flag. +#define NV_COMPUTE_GPU_TOPOLOGY_PHYSICS_AVAILABLE NV_BIT(21) //!< This is Read only GPU flag - PhysX is able to use this GPU (PhysX is installed, etc) + +//! @} + +typedef struct +{ + NvU32 version; //!< Structure version + NvU32 gpuCount; //!< Total number of compute-capable GPUs + + struct + { + NvPhysicalGpuHandle hPhysicalGpu; //!< Compute-capable physical GPU handle + NvU32 flags; //!< One or more flags from \ref nvcomp_gpu_top + + } computeGpus[NVAPI_MAX_GPU_PER_TOPOLOGY]; //!< Array of compute-capable physical GPUs. + +} NV_COMPUTE_GPU_TOPOLOGY_V1; + + +typedef struct _NV_COMPUTE_GPU +{ + NvPhysicalGpuHandle hPhysicalGpu; //!< Compute-capable physical GPU handle + NvU32 flags; //!< One or more flags from \ref nvcomp_gpu_top + +} NV_COMPUTE_GPU; + +typedef struct _NV_COMPUTE_GPU_TOPOLOGY_V2 +{ + NvU32 version; //!< Structure version + NvU32 gpuCount; //!< Size of array + NV_COMPUTE_GPU *computeGpus; //!< Array of compute-capable physical GPUs (allocate memory of size of Physical gpuCount of system). + +} NV_COMPUTE_GPU_TOPOLOGY_V2; + +//! Macro for constructing the version field of NV_COMPUTE_GPU_TOPOLOGY +#define NV_COMPUTE_GPU_TOPOLOGY_VER1 MAKE_NVAPI_VERSION(NV_COMPUTE_GPU_TOPOLOGY_V1,1) + +#define NV_COMPUTE_GPU_TOPOLOGY_VER MAKE_NVAPI_VERSION(NV_COMPUTE_GPU_TOPOLOGY_V2,2) +typedef NV_COMPUTE_GPU_TOPOLOGY_V2 NV_COMPUTE_GPU_TOPOLOGY; + +//! @} + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_CudaEnumComputeCapableGpus +// +//! DESCRIPTION: This API enumerates one or more compute-capable GPUs. +//! Note that this API is deprecated and should no longer be used for new software development. +//! \deprecated Do not use this function - it is deprecated in release 319. +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 180 +//! +//! \param [in,out] pComputeTopo Pointer to the structure NV_COMPUTE_GPU_TOPOLOGY. +//! +//! \retval ::NVAPI_OK The request was completed successfully. +//! The gpuCount indicates if one or more compute-capable GPUs are found. +//! gpuCount >= 1 means atleast one compute-capable GPU is found. +//! \retval ::NVAPI_INCOMPATIBLE_STRUCT_VERSION Structure version is not supported, initialize to NV_COMPUTE_GPU_TOPOLOGY_VER. +//! \retval ::NVAPI_ERROR Enumeration of the GPUs failed. Make sure at least one NVIDIA GPU exists in the system. +//! +//! \ingroup gpucuda +/////////////////////////////////////////////////////////////////////////////// +__nvapi_deprecated_function("Do not use this function - it is deprecated in release 319.") +NVAPI_INTERFACE NvAPI_GPU_CudaEnumComputeCapableGpus(__inout NV_COMPUTE_GPU_TOPOLOGY *pComputeTopo); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetTachReading +// +//! DESCRIPTION: This API retrieves the fan speed tachometer reading for the specified physical GPU. +//! +//! HOW TO USE: +//! - NvU32 Value = 0; +//! - ret = NvAPI_GPU_GetTachReading(hPhysicalGpu, &Value); +//! - On call success: +//! - Value contains the tachometer reading +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \param [in] hPhysicalGpu GPU selection. +//! \param [out] pValue Pointer to a variable to get the tachometer reading +//! +//! \retval ::NVAPI_OK - completed request +//! \retval ::NVAPI_ERROR - miscellaneous error occurred +//! \retval ::NVAPI_NOT_SUPPORTED - functionality not supported +//! \retval ::NVAPI_API_NOT_INTIALIZED - nvapi not initialized +//! \retval ::NVAPI_INVALID_ARGUMENT - invalid argument passed +//! \retval ::NVAPI_HANDLE_INVALIDATED - handle passed has been invalidated (see user guide) +//! \retval ::NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE - handle passed is not a physical GPU handle +//! +//! \ingroup gpucooler +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetTachReading(NvPhysicalGpuHandle hPhysicalGPU, NvU32 *pValue); + + + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetECCStatusInfo +// +//! \fn NvAPI_GPU_GetECCStatusInfo(NvPhysicalGpuHandle hPhysicalGpu, +//! NV_GPU_ECC_STATUS_INFO *pECCStatusInfo); +//! DESCRIPTION: This function returns ECC memory status information. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \param [in] hPhysicalGpu A handle identifying the physical GPU for which ECC +//! status information is to be retrieved. +//! \param [out] pECCStatusInfo A pointer to an ECC status structure. +//! +//! \retval ::NVAPI_OK The request was completed successfully. +//! \retval ::NVAPI_ERROR An unknown error occurred. +//! \retval ::NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE The provided GPU handle is not a physical GPU handle. +//! \retval ::NVAPI_INVALID_HANDLE The provided GPU handle is invalid. +//! \retval ::NVAPI_HANDLE_INVALIDATED The provided GPU handle is no longer valid. +//! \retval ::NVAPI_INVALID_POINTER An invalid argument pointer was provided. +//! \retval ::NVAPI_NOT_SUPPORTED The request is not supported. +//! \retval ::NVAPI_API_NOT_INTIALIZED NvAPI was not yet initialized. +// +/////////////////////////////////////////////////////////////////////////////// + +//! \addtogroup gpuecc +//! Used in NV_GPU_ECC_STATUS_INFO. +typedef enum _NV_ECC_CONFIGURATION +{ + NV_ECC_CONFIGURATION_NOT_SUPPORTED = 0, + NV_ECC_CONFIGURATION_DEFERRED, //!< Changes require a POST to take effect + NV_ECC_CONFIGURATION_IMMEDIATE, //!< Changes can optionally be made to take effect immediately +} NV_ECC_CONFIGURATION; + +//! \ingroup gpuecc +//! Used in NvAPI_GPU_GetECCStatusInfo(). +typedef struct +{ + NvU32 version; //!< Structure version + NvU32 isSupported : 1; //!< ECC memory feature support + NV_ECC_CONFIGURATION configurationOptions; //!< Supported ECC memory feature configuration options + NvU32 isEnabled : 1; //!< Active ECC memory setting +} NV_GPU_ECC_STATUS_INFO; + +//! \ingroup gpuecc +//! Macro for constructing the version field of NV_GPU_ECC_STATUS_INFO +#define NV_GPU_ECC_STATUS_INFO_VER MAKE_NVAPI_VERSION(NV_GPU_ECC_STATUS_INFO,1) + +//! \ingroup gpuecc +NVAPI_INTERFACE NvAPI_GPU_GetECCStatusInfo(NvPhysicalGpuHandle hPhysicalGpu, + NV_GPU_ECC_STATUS_INFO *pECCStatusInfo); + +//! \ingroup gpuecc +//! Used in NvAPI_GPU_GetECCErrorInfo()/ +typedef struct +{ + NvU32 version; //!< Structure version + struct + { + NvU64 singleBitErrors; //!< Number of single-bit ECC errors detected since last boot + NvU64 doubleBitErrors; //!< Number of double-bit ECC errors detected since last boot + } current; + struct + { + NvU64 singleBitErrors; //!< Number of single-bit ECC errors detected since last counter reset + NvU64 doubleBitErrors; //!< Number of double-bit ECC errors detected since last counter reset + } aggregate; +} NV_GPU_ECC_ERROR_INFO; + +//! \ingroup gpuecc +//! Macro for constructing the version field of NV_GPU_ECC_ERROR_INFO +#define NV_GPU_ECC_ERROR_INFO_VER MAKE_NVAPI_VERSION(NV_GPU_ECC_ERROR_INFO,1) + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetECCErrorInfo +// +//! \fn NvAPI_GPU_GetECCErrorInfo(NvPhysicalGpuHandle hPhysicalGpu, +//! NV_GPU_ECC_ERROR_INFO *pECCErrorInfo); +//! +//! DESCRIPTION: This function returns ECC memory error information. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \param [in] hPhysicalGpu A handle identifying the physical GPU for +//! which ECC error information is to be +//! retrieved. +//! \param [out] pECCErrorInfo A pointer to an ECC error structure. +//! +//! \retval ::NVAPI_OK The request was completed successfully. +//! \retval ::NVAPI_ERROR An unknown error occurred. +//! \retval ::NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE The provided GPU handle is not a physical GPU handle. +//! \retval ::NVAPI_INVALID_ARGUMENT incorrect param value +//! \retval ::NVAPI_INVALID_POINTER An invalid argument pointer was provided. +//! \retval ::NVAPI_INCOMPATIBLE_STRUCT_VERSION structure version is not supported, initialize to NV_GPU_ECC_ERROR_INFO_VER. +//! \retval ::NVAPI_HANDLE_INVALIDATED The provided GPU handle is no longer valid. +//! \retval ::NVAPI_NOT_SUPPORTED The request is not supported. +//! \retval ::NVAPI_API_NOT_INTIALIZED NvAPI was not yet initialized. +// +/////////////////////////////////////////////////////////////////////////////// +//! \ingroup gpuecc + +NVAPI_INTERFACE NvAPI_GPU_GetECCErrorInfo(NvPhysicalGpuHandle hPhysicalGpu, + NV_GPU_ECC_ERROR_INFO *pECCErrorInfo); + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_ResetECCErrorInfo +// +//! DESCRIPTION: This function resets ECC memory error counters. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \requires Administrator privileges since release 430.39 +//! +//! \param [in] hPhysicalGpu A handle identifying the physical GPU for +//! which ECC error information is to be +//! cleared. +//! \param [in] bResetCurrent Reset the current ECC error counters. +//! \param [in] bResetAggregate Reset the aggregate ECC error counters. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +//! +//! \retval ::NVAPI_INVALID_USER_PRIVILEGE - The application will require Administrator privileges to access this API. +//! The application can be elevated to a higher permission level by selecting "Run as Administrator". +//! +//! \ingroup gpuecc +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_ResetECCErrorInfo(NvPhysicalGpuHandle hPhysicalGpu, NvU8 bResetCurrent, + NvU8 bResetAggregate); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetECCConfigurationInfo +// +//! \fn NvAPI_GPU_GetECCConfigurationInfo(NvPhysicalGpuHandle hPhysicalGpu, +//! NV_GPU_ECC_CONFIGURATION_INFO *pECCConfigurationInfo); +//! DESCRIPTION: This function returns ECC memory configuration information. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \param [in] hPhysicalGpu A handle identifying the physical GPU for +//! which ECC configuration information +//! is to be retrieved. +//! \param [out] pECCConfigurationInfo A pointer to an ECC +//! configuration structure. +//! +//! \retval ::NVAPI_OK The request was completed successfully. +//! \retval ::NVAPI_ERROR An unknown error occurred. +//! \retval ::NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE The provided GPU handle is not a physical GPU handle. +//! \retval ::NVAPI_INVALID_HANDLE The provided GPU handle is invalid. +//! \retval ::NVAPI_HANDLE_INVALIDATED The provided GPU handle is no longer valid. +//! \retval ::NVAPI_INVALID_POINTER An invalid argument pointer was provided. +//! \retval ::NVAPI_NOT_SUPPORTED The request is not supported. +//! \retval ::NVAPI_API_NOT_INTIALIZED NvAPI was not yet initialized. +// +/////////////////////////////////////////////////////////////////////////////// + +//! \ingroup gpuecc +//! Used in NvAPI_GPU_GetECCConfigurationInfo(). +typedef struct +{ + NvU32 version; //! Structure version + NvU32 isEnabled : 1; //! Current ECC configuration stored in non-volatile memory + NvU32 isEnabledByDefault : 1; //! Factory default ECC configuration (static) +} NV_GPU_ECC_CONFIGURATION_INFO; + +//! \ingroup gpuecc +//! Macro for consstructing the verion field of NV_GPU_ECC_CONFIGURATION_INFO +#define NV_GPU_ECC_CONFIGURATION_INFO_VER MAKE_NVAPI_VERSION(NV_GPU_ECC_CONFIGURATION_INFO,1) + +//! \ingroup gpuecc +NVAPI_INTERFACE NvAPI_GPU_GetECCConfigurationInfo(NvPhysicalGpuHandle hPhysicalGpu, + NV_GPU_ECC_CONFIGURATION_INFO *pECCConfigurationInfo); + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_SetECCConfiguration +// +//! DESCRIPTION: This function updates the ECC memory configuration setting. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \requires Administrator privileges since release 430.39 +//! +//! \param [in] hPhysicalGpu A handle identifying the physical GPU for +//! which to update the ECC configuration +//! setting. +//! \param [in] bEnable The new ECC configuration setting. +//! \param [in] bEnableImmediately Request that the new setting take effect immediately. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +//! +//! \retval ::NVAPI_INVALID_CONFIGURATION - Possibly SLI is enabled. Disable SLI and retry. +//! \retval ::NVAPI_INVALID_USER_PRIVILEGE - The application will require Administrator privileges to access this API. +//! The application can be elevated to a higher permission level by selecting "Run as Administrator". +//! +//! \ingroup gpuecc +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_SetECCConfiguration(NvPhysicalGpuHandle hPhysicalGpu, NvU8 bEnable, + NvU8 bEnableImmediately); + + + +// The following enum is providing definitions for events signaled by a Quadro Sync (QSYNC) device. +// QSYNC event broadcast is supported for Windows 10 and later OS. +typedef enum +{ + NV_QSYNC_EVENT_NONE = 0, + NV_QSYNC_EVENT_SYNC_LOSS = 1, // Frame Lock sync loss event + NV_QSYNC_EVENT_SYNC_GAIN = 2, // Frame Lock sync gain event + NV_QSYNC_EVENT_HOUSESYNC_GAIN = 3, // House cable gain(plug in) event + NV_QSYNC_EVENT_HOUSESYNC_LOSS = 4, // House cable loss(plug out) event + NV_QSYNC_EVENT_RJ45_GAIN = 5, // RJ45 cable gain(plug in) event + NV_QSYNC_EVENT_RJ45_LOSS = 6, // RJ45 cable loss(plug out) event +} NV_QSYNC_EVENT; + +typedef struct +{ + NV_QSYNC_EVENT qsyncEvent; // One of the value of the enum NV_QSYNC_EVENT + NvU32 reserved[7]; // Reserved for future use. Do not use this. +} NV_QSYNC_EVENT_DATA; + +//! Callback for QSYNC event +typedef void(__cdecl *NVAPI_CALLBACK_QSYNCEVENT)(NV_QSYNC_EVENT_DATA qyncEventData, void *callbackParam); + + +typedef struct _NV_DISPLAY_OUTPUT_MODE_CHANGE_EVENT_DATA +{ + NvU32 displayId; //!< displayId of the display + NvU32 outputMode; //!< display output mode +} NV_DISPLAY_OUTPUT_MODE_CHANGE_EVENT_DATA; + +//! Callback for Display Output Mode change events +typedef void(__cdecl *NVAPI_CALLBACK_DISPLAY_OUTPUT_MODE_CHANGE_EVENT)(NV_DISPLAY_OUTPUT_MODE_CHANGE_EVENT_DATA* pEventData, void *callbackParam); + +typedef struct _NV_DISPLAY_COLORIMETRY_CHANGE_EVENT_DATA +{ + NvU32 displayId; //!< displayId of the monitor + + float min_luminance; //!< min luminance, cd/m^2 + float max_full_frame_luminance; //!< max 100% frame luminance, cd/m^2 + float max_luminance; //!< max 10% frame luminance, cd/m^2 + + float hdrBrightnessLuminanceScalingFactor; //!< HDR brightness luminance scaling factor applied by GPU on output pixels + + float red_primary_x; //!< red primary chromaticity coordinate x + float red_primary_y; //!< red primary chromaticity coordinate y + float green_primary_x; //!< green primary chromaticity coordinate x + float green_primary_y; //!< green primary chromaticity coordinate y + float blue_primary_x; //!< blue primary chromaticity coordinate x + float blue_primary_y; //!< blue primary chromaticity coordinate y + float white_point_x; //!< white point chromaticity coordinate x + float white_point_y; //!< white point chromaticity coordinate y +} NV_DISPLAY_COLORIMETRY_CHANGE_EVENT_DATA; + +//! Callback for Display Colorimetry change events +typedef void(__cdecl *NVAPI_CALLBACK_DISPLAY_COLORIMETRY_CHANGE_EVENT)(NV_DISPLAY_COLORIMETRY_CHANGE_EVENT_DATA* pEventData, void *callbackParam); + + +//! Enum for Event IDs +typedef enum +{ + NV_EVENT_TYPE_NONE = 0, + NV_EVENT_TYPE_QSYNC = 6, + NV_EVENT_TYPE_DISPLAY_OUTPUT_MODE_CHANGE = 103, + NV_EVENT_TYPE_DISPLAY_COLORIMETRY_CHANGE = 104, +} NV_EVENT_TYPE; + +//! Core NV_EVENT_REGISTER_CALLBACK structure declaration +typedef struct +{ + NvU32 version; //!< version field to ensure minimum version compatibility + NV_EVENT_TYPE eventId; //!< ID of the event being sent + void *callbackParam; //!< This value will be passed back to the callback function when an event occurs + union + { + NVAPI_CALLBACK_QSYNCEVENT nvQSYNCEventCallback; //!< Callback function pointer for QSYNC events + NVAPI_CALLBACK_DISPLAY_OUTPUT_MODE_CHANGE_EVENT nvDisplayOutputModeChangeEventCallback; //!< Callback function pointer for Display Output Mode change events + NVAPI_CALLBACK_DISPLAY_COLORIMETRY_CHANGE_EVENT nvDisplayColorimetryChangeEventCallback; //!< Callback function pointer for Display Colorimetry change events + }nvCallBackFunc; + +} NV_EVENT_REGISTER_CALLBACK, *PNV_EVENT_REGISTER_CALLBACK; + +//! Macro for constructing the version field of ::NV_EVENT_REGISTER_CALLBACK +#define NV_EVENT_REGISTER_CALLBACK_VERSION MAKE_NVAPI_VERSION(NV_EVENT_REGISTER_CALLBACK,1) + + +//! \ingroup gpu +typedef enum _NV_GPU_WORKSTATION_FEATURE_TYPE +{ + NV_GPU_WORKSTATION_FEATURE_TYPE_NVIDIA_RTX_VR_READY = 1, //!< NVIDIA RTX VR Ready + NV_GPU_WORKSTATION_FEATURE_TYPE_QUADRO_VR_READY = NV_GPU_WORKSTATION_FEATURE_TYPE_NVIDIA_RTX_VR_READY, //!< DEPRECATED name - do not use + NV_GPU_WORKSTATION_FEATURE_TYPE_PROVIZ = 2, +} NV_GPU_WORKSTATION_FEATURE_TYPE; + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_GPU_QueryWorkstationFeatureSupport +// +//! \fn NvAPI_GPU_QueryWorkstationFeatureSupport(NvPhysicalGpuHandle physicalGpu, NV_GPU_WORKSTATION_FEATURE_TYPE gpuWorkstationFeature) +//! \code +//! DESCRIPTION: Indicates whether a queried workstation feature is supported by the requested GPU. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! \since Release: 440 +//! +//! DESCRIPTION: This API, when called with a valid physical gpu handle as Input, lets caller know whether the given workstation feature is supported by this GPU. +//! +//! PARAMETERS: physicalGpu(IN) : The handle of the GPU for the which caller wants to get the support information. +//! gpuWorkstationFeature(IN ) : The feature for the GPU in question. One of the values from enum NV_GPU_WORKSTATION_FEATURE_TYPE. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status listed below +//! +//! \retval ::NVAPI_OK the queried workstation feature is supported on the given GPU. +//! \retval ::NVAPI_NO_IMPLEMENTATION the current driver doesn't support this interface. +//! \retval ::NVAPI_INVALID_HANDLE the incoming physicalGpu handle is invalid. +//! \retval ::NVAPI_NOT_SUPPORTED the requested gpuWorkstationFeature is not supported in the selected GPU. +//! \retval ::NVAPI_SETTING_NOT_FOUND the requested gpuWorkstationFeature is unknown to the current driver version. +//! +//! \endcode +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_QueryWorkstationFeatureSupport(NvPhysicalGpuHandle physicalGpu, NV_GPU_WORKSTATION_FEATURE_TYPE gpuWorkstationFeature); + + + +//! \ingroup gpu +typedef struct +{ + NvU32 version; //!< version of this structure + NvU32 width; //!< width of the input texture + NvU32 height; //!< height of the input texture + float* blendingTexture; //!< array of floating values building an intensity RGB texture +} NV_SCANOUT_INTENSITY_DATA_V1; + +//! \ingroup gpu +typedef struct +{ + NvU32 version; //!< version of this structure + NvU32 width; //!< width of the input texture + NvU32 height; //!< height of the input texture + float* blendingTexture; //!< array of floating values building an intensity RGB texture + float* offsetTexture; //!< array of floating values building an offset texture + NvU32 offsetTexChannels; //!< number of channels per pixel in the offset texture +} NV_SCANOUT_INTENSITY_DATA_V2; + +typedef NV_SCANOUT_INTENSITY_DATA_V2 NV_SCANOUT_INTENSITY_DATA; + +//! \ingroup gpu +#define NV_SCANOUT_INTENSITY_DATA_VER1 MAKE_NVAPI_VERSION(NV_SCANOUT_INTENSITY_DATA_V1, 1) +#define NV_SCANOUT_INTENSITY_DATA_VER2 MAKE_NVAPI_VERSION(NV_SCANOUT_INTENSITY_DATA_V2, 2) +#define NV_SCANOUT_INTENSITY_DATA_VER NV_SCANOUT_INTENSITY_DATA_VER2 + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_GPU_SetScanoutIntensity +// +//! DESCRIPTION: This API enables and sets up per-pixel intensity feature on the specified display. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] displayId combined physical display and GPU identifier of the display to apply the intensity control. +//! \param [in] scanoutIntensityData the intensity texture info. +//! \param [out] pbSticky(OUT) indicates whether the settings will be kept over a reboot. +//! +//! \retval ::NVAPI_INVALID_ARGUMENT Invalid input parameters. +//! \retval ::NVAPI_API_NOT_INITIALIZED NvAPI not initialized. +//! \retval ::NVAPI_NOT_SUPPORTED Interface not supported by the driver used, or only supported on selected GPUs +//! \retval ::NVAPI_INVALID_ARGUMENT Invalid input data. +//! \retval ::NVAPI_INCOMPATIBLE_STRUCT_VERSION NV_SCANOUT_INTENSITY_DATA structure version mismatch. +//! \retval ::NVAPI_OK Feature enabled. +//! \retval ::NVAPI_ERROR Miscellaneous error occurred. +//! +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_SetScanoutIntensity(NvU32 displayId, NV_SCANOUT_INTENSITY_DATA* scanoutIntensityData, int *pbSticky); + + +//! \ingroup gpu +typedef struct _NV_SCANOUT_INTENSITY_STATE_DATA +{ + NvU32 version; //!< version of this structure + NvU32 bEnabled; //!< intensity is enabled or not +} NV_SCANOUT_INTENSITY_STATE_DATA; + +//! \ingroup gpu +#define NV_SCANOUT_INTENSITY_STATE_VER MAKE_NVAPI_VERSION(NV_SCANOUT_INTENSITY_STATE_DATA, 1) + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_GPU_GetScanoutIntensityState +// +//! DESCRIPTION: This API queries current state of the intensity feature on the specified display. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] displayId combined physical display and GPU identifier of the display to query the configuration. +//! \param [in,out] scanoutIntensityStateData intensity state data. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +//! +//! \retval ::NVAPI_INVALID_ARGUMENT Invalid input parameters. +//! \retval ::NVAPI_API_NOT_INITIALIZED NvAPI not initialized. +//! \retval ::NVAPI_NOT_SUPPORTED Interface not supported by the driver used, or only supported on selected GPUs. +//! \retval ::NVAPI_OK Feature enabled. +//! \retval ::NVAPI_ERROR Miscellaneous error occurred. +//! +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetScanoutIntensityState(__in NvU32 displayId, __inout NV_SCANOUT_INTENSITY_STATE_DATA* scanoutIntensityStateData); + + +//! \ingroup gpu +typedef enum +{ + NV_GPU_WARPING_VERTICE_FORMAT_TRIANGLESTRIP_XYUVRQ = 0, + NV_GPU_WARPING_VERTICE_FORMAT_TRIANGLES_XYUVRQ = 1, +} NV_GPU_WARPING_VERTICE_FORMAT; + +//! \ingroup gpu +typedef struct +{ + NvU32 version; //!< version of this structure + float* vertices; //!< width of the input texture + NV_GPU_WARPING_VERTICE_FORMAT vertexFormat; //!< format of the input vertices + int numVertices; //!< number of the input vertices + NvSBox* textureRect; //!< rectangle in desktop coordinates describing the source area for the warping +} NV_SCANOUT_WARPING_DATA; + +//! \ingroup gpu +#define NV_SCANOUT_WARPING_VER MAKE_NVAPI_VERSION(NV_SCANOUT_WARPING_DATA, 1) + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_GPU_SetScanoutWarping +// +//! DESCRIPTION: This API enables and sets up the warping feature on the specified display. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] displayId Combined physical display and GPU identifier of the display to apply the intensity control +//! \param [in] scanoutWarpingData The warping data info +//! \param [out] pbSticky Indicates whether the settings will be kept over a reboot. +//! +//! \retval ::NVAPI_INVALID_ARGUMENT Invalid input parameters. +//! \retval ::NVAPI_API_NOT_INITIALIZED NvAPI not initialized. +//! \retval ::NVAPI_NOT_SUPPORTED Interface not supported by the driver used, or only supported on selected GPUs +//! \retval ::NVAPI_INVALID_ARGUMENT Invalid input data. +//! \retval ::NVAPI_INCOMPATIBLE_STRUCT_VERSION NV_SCANOUT_WARPING_DATA structure version mismatch. +//! \retval ::NVAPI_OK Feature enabled. +//! \retval ::NVAPI_ERROR Miscellaneous error occurred. +//! +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// + +NVAPI_INTERFACE NvAPI_GPU_SetScanoutWarping(NvU32 displayId, NV_SCANOUT_WARPING_DATA* scanoutWarpingData, int* piMaxNumVertices, int* pbSticky); + + +//! \ingroup gpu +typedef struct _NV_SCANOUT_WARPING_STATE_DATA +{ + NvU32 version; //!< version of this structure + NvU32 bEnabled; //!< warping is enabled or not +} NV_SCANOUT_WARPING_STATE_DATA; + +//! \ingroup gpu +#define NV_SCANOUT_WARPING_STATE_VER MAKE_NVAPI_VERSION(NV_SCANOUT_WARPING_STATE_DATA, 1) + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_GPU_GetScanoutWarpingState +// +//! DESCRIPTION: This API queries current state of the warping feature on the specified display. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] displayId combined physical display and GPU identifier of the display to query the configuration. +//! \param [in,out] scanoutWarpingStateData warping state data. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +//! +//! \retval ::NVAPI_INVALID_ARGUMENT Invalid input parameters. +//! \retval ::NVAPI_API_NOT_INITIALIZED NvAPI not initialized. +//! \retval ::NVAPI_NOT_SUPPORTED Interface not supported by the driver used, or only supported on selected GPUs. +//! \retval ::NVAPI_OK Feature enabled. +//! \retval ::NVAPI_ERROR Miscellaneous error occurred. +//! +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetScanoutWarpingState(__in NvU32 displayId, __inout NV_SCANOUT_WARPING_STATE_DATA* scanoutWarpingStateData); + +typedef enum +{ + NV_GPU_SCANOUT_COMPOSITION_PARAMETER_WARPING_RESAMPLING_METHOD = 0, + NV_GPU_SCANOUT_COMPOSITION_PARAMETER_SCANOUT_INTENSITY_METHOD = 1 +} NV_GPU_SCANOUT_COMPOSITION_PARAMETER; + +//! This enum defines a collection of scanout composition values that can be used to configure +//! the supported scanout composition settings +typedef enum +{ + NV_GPU_SCANOUT_COMPOSITION_PARAMETER_SET_TO_DEFAULT = 0, // Set parameter to default value. + // WARPING_RESAMPLING_METHOD section: + NV_GPU_SCANOUT_COMPOSITION_PARAMETER_VALUE_WARPING_RESAMPLING_METHOD_BILINEAR = 0x100, + NV_GPU_SCANOUT_COMPOSITION_PARAMETER_VALUE_WARPING_RESAMPLING_METHOD_BICUBIC_TRIANGULAR = 0x101, + NV_GPU_SCANOUT_COMPOSITION_PARAMETER_VALUE_WARPING_RESAMPLING_METHOD_BICUBIC_BELL_SHAPED = 0x102, + NV_GPU_SCANOUT_COMPOSITION_PARAMETER_VALUE_WARPING_RESAMPLING_METHOD_BICUBIC_BSPLINE = 0x103, + NV_GPU_SCANOUT_COMPOSITION_PARAMETER_VALUE_WARPING_RESAMPLING_METHOD_BICUBIC_ADAPTIVE_TRIANGULAR = 0x104, + NV_GPU_SCANOUT_COMPOSITION_PARAMETER_VALUE_WARPING_RESAMPLING_METHOD_BICUBIC_ADAPTIVE_BELL_SHAPED = 0x105, + NV_GPU_SCANOUT_COMPOSITION_PARAMETER_VALUE_WARPING_RESAMPLING_METHOD_BICUBIC_ADAPTIVE_BSPLINE = 0x106, + // SCANOUT_INTENSITY_METHOD section: + NV_GPU_SCANOUT_COMPOSITION_PARAMETER_VALUE_SCANOUT_INTENSITY_NO_GAMMA = 0x200, + NV_GPU_SCANOUT_COMPOSITION_PARAMETER_VALUE_SCANOUT_INTENSITY_COLOR_GAMMA = 0x201, + NV_GPU_SCANOUT_COMPOSITION_PARAMETER_VALUE_SCANOUT_INTENSITY_COLOR_AND_OFFSET_GAMMA = 0x202, +} NV_GPU_SCANOUT_COMPOSITION_PARAMETER_VALUE; + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_GPU_SetScanoutCompositionParameter +// +//! DESCRIPTION: This API sets various parameters that configure the scanout composition feature on the specified display. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] displayId Combined physical display and GPU identifier of the display to apply the intensity control +//! \param [in] parameter The scanout composition parameter to be set +//! \param [in] parameterValue The data to be set for the specified parameter +//! \param [in] pContainer Additional container for data associated with the specified parameter. If parameterValue +//! is WARPING_RESAMPLING_METHOD, pContainer is not in use. If parameterValue is SCANOUT_INTENSITY_METHOD, +//! pContainer is a pointer to a floating-point value, in the range of (0.0f, 5.0f], which specifies the +//! gamma vallue used in the gamma and degamma calculation. If pContainer data is NULL and parameterValue is +//! NV_GPU_SCANOUT_COMPOSITION_PARAMETER_VALUE_SCANOUT_INTENSITY_COLOR_GAMMA or +//! NV_GPU_SCANOUT_COMPOSITION_PARAMETER_VALUE_SCANOUT_INTENSITY_COLOR_AND_OFFSET_GAMMA, +//! the default gamma value used in driver implementation is 2.2f +//! +//! \retval ::NVAPI_INVALID_ARGUMENT Invalid input parameters. +//! \retval ::NVAPI_API_NOT_INITIALIZED NvAPI not initialized. +//! \retval ::NVAPI_NOT_SUPPORTED Interface not supported by the driver used, or only supported on selected GPUs +//! \retval ::NVAPI_INVALID_ARGUMENT Invalid input data. +//! \retval ::NVAPI_OK Feature enabled. +//! \retval ::NVAPI_ERROR Miscellaneous error occurred. +//! +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// + +NVAPI_INTERFACE NvAPI_GPU_SetScanoutCompositionParameter(NvU32 displayId, NV_GPU_SCANOUT_COMPOSITION_PARAMETER parameter, + NV_GPU_SCANOUT_COMPOSITION_PARAMETER_VALUE parameterValue, float *pContainer); + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_GPU_GetScanoutCompositionParameter +// +//! DESCRIPTION: This API queries current state of one of the various scanout composition parameters on the specified display. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] displayId combined physical display and GPU identifier of the display to query the configuration. +//! \param [in] parameter scanout composition parameter to by queried. +//! \param [out] parameterData scanout composition parameter data. +//! \param [out] pContainer Additional container for returning data associated with the specified parameter +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +//! +//! \retval ::NVAPI_INVALID_ARGUMENT Invalid input parameters. +//! \retval ::NVAPI_API_NOT_INITIALIZED NvAPI not initialized. +//! \retval ::NVAPI_NOT_SUPPORTED Interface not supported by the driver used, or only supported on selected GPUs. +//! \retval ::NVAPI_OK Feature enabled. +//! \retval ::NVAPI_ERROR Miscellaneous error occurred. +//! +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetScanoutCompositionParameter(__in NvU32 displayId, __in NV_GPU_SCANOUT_COMPOSITION_PARAMETER parameter, + __out NV_GPU_SCANOUT_COMPOSITION_PARAMETER_VALUE *parameterData, __out float *pContainer); + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_GPU_GetScanoutConfiguration +// +//! DESCRIPTION: This API queries the desktop and scanout portion of the specified display. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] displayId combined physical display and GPU identifier of the display to query the configuration. +//! \param [in,out] desktopRect desktop area of the display in desktop coordinates. +//! \param [in,out] scanoutRect scanout area of the display relative to desktopRect. +//! +//! \retval ::NVAPI_INVALID_ARGUMENT Invalid input parameters. +//! \retval ::NVAPI_API_NOT_INITIALIZED NvAPI not initialized. +//! \retval ::NVAPI_NOT_SUPPORTED Interface not supported by the driver used, or only supported on selected GPUs. +//! \retval ::NVAPI_OK Feature enabled. +//! \retval ::NVAPI_ERROR Miscellaneous error occurred. +//! +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetScanoutConfiguration(NvU32 displayId, NvSBox* desktopRect, NvSBox* scanoutRect); + + + +//! \ingroup gpu +//! Used in NvAPI_GPU_GetScanoutConfigurationEx(). +typedef struct _NV_SCANOUT_INFORMATION +{ + NvU32 version; //!< Structure version, needs to be initialized with NV_SCANOUT_INFORMATION_VER. + + NvSBox sourceDesktopRect; //!< Operating system display device rect in desktop coordinates displayId is scanning out from. + NvSBox sourceViewportRect; //!< Area inside the sourceDesktopRect which is scanned out to the display. + NvSBox targetViewportRect; //!< Area inside the rect described by targetDisplayWidth/Height sourceViewportRect is scanned out to. + NvU32 targetDisplayWidth; //!< Horizontal size of the active resolution scanned out to the display. + NvU32 targetDisplayHeight; //!< Vertical size of the active resolution scanned out to the display. + NvU32 cloneImportance; //!< If targets are cloned views of the sourceDesktopRect the cloned targets have an importance assigned (0:primary,1 secondary,...). + NV_ROTATE sourceToTargetRotation; //!< Rotation performed between the sourceViewportRect and the targetViewportRect. +} NV_SCANOUT_INFORMATION; + +#define NV_SCANOUT_INFORMATION_VER MAKE_NVAPI_VERSION(NV_SCANOUT_INFORMATION,1) + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_GPU_GetScanoutConfigurationEx +// +//! DESCRIPTION: This API queries the desktop and scanout portion of the specified display. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! \since Release: 331 +//! +//! \param [in] displayId combined physical display and GPU identifier of the display to query the configuration. +//! \param [in,out] pScanoutInformation desktop area to displayId mapping information. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetScanoutConfigurationEx(__in NvU32 displayId, __inout NV_SCANOUT_INFORMATION *pScanoutInformation); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetAdapterIdFromPhysicalGpu +// +//! DESCRIPTION: This API returns the OS-AdapterID from physicalGpu Handle. OS-AdapterID +//! is the Adapter ID that is used by Win7 CCD APIs. +//! This API is deprecated. Please use NvAPI_GPU_GetLogicalGpuInfo to get the OS-AdapterID. +//! NvAPI_GetLogicalGPUFromPhysicalGPU can be used to get the logical GPU handle associated with specified physical GPU handle. +//! +//! \deprecated Do not use this function - it is deprecated in release 520. Instead, use NvAPI_GPU_GetLogicalGpuInfo. +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] hPhysicalGpu PhysicalGpu Handle +//! \param [out] pOSAdapterId Returns OS-AdapterId, it is typed as void * to +//! avoid dependency with windows.h. User must type cast it to LUID. +//! +//! \retval ::NVAPI_INVALID_ARGUMENT pOSAdapterId is NULL; hPhysicalGpu is invalid +//! \retval ::NVAPI_OK *pOSAdapterId contains valid data. +//! \retval ::NVAPI_NOT_SUPPORTED This API is not supported on the system. +//! +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +__nvapi_deprecated_function("Do not use this function - it is deprecated in release 520. Instead, use NvAPI_GPU_GetLogicalGpuInfo.") +NVAPI_INTERFACE NvAPI_GPU_GetAdapterIdFromPhysicalGpu(NvPhysicalGpuHandle hPhysicalGpu, void *pOSAdapterId); + + + + +//! \ingroup gpu +typedef enum _NV_VIRTUALIZATION_MODE +{ + NV_VIRTUALIZATION_MODE_NONE = 0, //!< Represents Bare Metal GPU + NV_VIRTUALIZATION_MODE_NMOS = 1, //!< Represents GPU-Passthrough + NV_VIRTUALIZATION_MODE_VGX = 2, //!< Represents vGPU inside virtual machine + NV_VIRTUALIZATION_MODE_HOST_VGPU = 3 //!< Represents VGX hypervisor in vGPU mode +} NV_VIRTUALIZATION_MODE; + +//! \ingroup gpu +typedef struct _NV_GPU_VIRTUALIZATION_INFO +{ + NvU32 version; //!< Structure version + + NV_VIRTUALIZATION_MODE virtualizationMode; //!< one of NV_VIRTUALIZATION_MODE. + NvU32 reserved; //!< reserved for future use. Should be set to ZERO. +} NV_GPU_VIRTUALIZATION_INFO_V1; + +//! \ingroup gpu +typedef NV_GPU_VIRTUALIZATION_INFO_V1 NV_GPU_VIRTUALIZATION_INFO; + +//! \ingroup gpu +#define NV_GPU_VIRTUALIZATION_INFO_VER1 MAKE_NVAPI_VERSION(NV_GPU_VIRTUALIZATION_INFO_V1,1) + +//! \ingroup gpu +#define NV_GPU_VIRTUALIZATION_INFO_VER NV_GPU_VIRTUALIZATION_INFO_VER1 + + +/////////////////////////////////////////////////////////////////////////////// +//! +//! FUNCTION NAME: NvAPI_GPU_GetVirtualizationInfo +//! +//! DESCRIPTION: This API returns virtualization information of the GPU +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 367 +//! +//! \param [in,out] pVirtualizationInfo Pointer to NV_GPU_VIRTUALIZATION_INFO structure. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +//! +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetVirtualizationInfo(__in NvPhysicalGpuHandle hPhysicalGpu, __inout NV_GPU_VIRTUALIZATION_INFO *pVirtualizationInfo); + + +//! \ingroup gpu +typedef struct _NV_LOGICAL_GPU_DATA_V1 +{ + NvU32 version; //!< [in] Structure version. + void *pOSAdapterId; //!< [out] Returns OS-AdapterId. User must send memory buffer of size atleast equal to the size of LUID structure before calling the NVAPI. + NvU32 physicalGpuCount; //!< [out] Number of physical GPU handles associated with the specified logical GPU handle. + + NvPhysicalGpuHandle physicalGpuHandles[NVAPI_MAX_PHYSICAL_GPUS]; //!< [out] This array will be filled with physical GPU handles associated with the given logical GPU handle. + //!< The array index refers to the Physical Gpu Index (Idx). + //!< Idx value is the same as D3D11 MultiGPUDevice GPU index, D3D12 node index, OpenGL GL_NV_gpu_multicast GPU index. + //!< When converted to a bit mask (1 << Idx), it matches: + //!< 1. Vulkan deviceNodeMask in VkPhysicalDeviceIDProperties + //!< 2. CUDA deviceNodeMask returned by cuDeviceGetLuid + NvU32 reserved[8]; //!< Reserved for future use. Should be set to ZERO. +} NV_LOGICAL_GPU_DATA_V1; + +//! \ingroup gpu +typedef NV_LOGICAL_GPU_DATA_V1 NV_LOGICAL_GPU_DATA; +#define NV_LOGICAL_GPU_DATA_VER1 MAKE_NVAPI_VERSION(NV_LOGICAL_GPU_DATA_V1,1) +#define NV_LOGICAL_GPU_DATA_VER NV_LOGICAL_GPU_DATA_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetLogicalGpuInfo +// +//! This function is used to query Logical GPU information. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 421 +//! +//! \param [in] hLogicalGpu logical GPU Handle. +//! \param [in,out] pLogicalGpuData Pointer to NV_LOGICAL_GPU_DATA structure. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +//! +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetLogicalGpuInfo(__in NvLogicalGpuHandle hLogicalGpu, __inout NV_LOGICAL_GPU_DATA *pLogicalGpuData); + + +//! \ingroup gridlicense +//! @{ + +//! Maximum number of supported Feature License +#define NV_LICENSE_MAX_COUNT 3 + +//! Signature length for GRID License +#define NV_LICENSE_SIGNATURE_SIZE (128) + +//! Length for Grid License +#define NV_LICENSE_INFO_MAX_LENGTH (128) + +//! License string +typedef char NvAPI_LicenseString[NV_LICENSE_INFO_MAX_LENGTH]; + +//! Used in NV_LICENSE_FEATURE_DETAILS +typedef enum _NV_LICENSE_FEATURE_TYPE +{ + NV_LICENSE_FEATURE_UNKNOWN = 0, + NV_LICENSE_FEATURE_VGPU = 1, + NV_LICENSE_FEATURE_NVIDIA_RTX = 2, + NV_LICENSE_FEATURE_QUADRO = NV_LICENSE_FEATURE_NVIDIA_RTX, //!< DEPRECATED name - do not use + NV_LICENSE_FEATURE_GAMING = 3, + NV_LICENSE_FEATURE_COMPUTE = 4, +} NV_LICENSE_FEATURE_TYPE; + +//! Used in NV_LICENSE_FEATURE_DETAILS +//! +//!< The 'status' variable holds any one of the following license expiry status codes : +//!< 0 - Expiry information not available +//!< 1 - Invalid expiry or error fetching expiry +//!< 2 - Valid expiry +//!< 3 - Expiry not applicable +//!< 4 - Permanent expiry +typedef struct _NV_LICENSE_EXPIRY_DETAILS +{ + NvU32 year; //!< Year value of license expiry + NvU16 month; //!< Month value of license expiry + NvU16 day; //!< Day value of license expiry + NvU16 hour; //!< Hour value of license expiry + NvU16 min; //!< Minutes value of license expiry + NvU16 sec; //!< Seconds value of license expiry + + NvU8 status; //!< License expiry status +}NV_LICENSE_EXPIRY_DETAILS; + +//! Used in NV_LICENSABLE_FEATURES +typedef struct _NV_LICENSE_FEATURE_DETAILS_V1 +{ + NvU32 version; //!< IN - Structure version. + NvU32 isEnabled : 1; //!< The current state of the licensed feature, true=enabled, false=disabled. + NvU32 reserved : 31; //!< Reserved + NV_LICENSE_FEATURE_TYPE featureCode; //!< Feature code that corresponds to the licensable feature. + NvAPI_LicenseString licenseInfo; //!< Deprecated +}NV_LICENSE_FEATURE_DETAILS_V1; + +typedef struct _NV_LICENSE_FEATURE_DETAILS_V2 +{ + NvU32 version; //!< Unused. + NvU32 isEnabled : 1; //!< The current state of the licensed feature, true=enabled, false=disabled. + NvU32 reserved : 31; //!< Reserved + NV_LICENSE_FEATURE_TYPE featureCode; //!< Feature code that corresponds to the licensable feature. + NvAPI_LicenseString licenseInfo; //!< Deprecated + NvAPI_LicenseString productName; //!< Nvidia Grid licensable product name. +}NV_LICENSE_FEATURE_DETAILS_V2; + +typedef struct _NV_LICENSE_FEATURE_DETAILS_V3 +{ + NvU32 version; //!< Unused. + NvU32 isEnabled : 1; //!< The current state of license, true=licensed, false=unlicensed. + NvU32 isFeatureEnabled : 1; //!< The current state of feature, true=enabled, false=disabled. + NvU32 reserved : 30; //!< Reserved + NV_LICENSE_FEATURE_TYPE featureCode; //!< Feature code that corresponds to the licensable feature. + NvAPI_LicenseString licenseInfo; //!< Deprecated + NvAPI_LicenseString productName; //!< Nvidia Grid licensable product name. +}NV_LICENSE_FEATURE_DETAILS_V3; + +typedef struct _NV_LICENSE_FEATURE_DETAILS_V4 +{ + NvU32 version; //!< Unused. + NvU32 isEnabled : 1; //!< The current state of license, true=licensed, false=unlicensed. + NvU32 isFeatureEnabled : 1; //!< The current state of feature, true=enabled, false=disabled. + NvU32 reserved : 30; //!< Reserved + NV_LICENSE_FEATURE_TYPE featureCode; //!< Feature code that corresponds to the licensable feature. + NvAPI_LicenseString licenseInfo; //!< Deprecated + NvAPI_LicenseString productName; //!< Nvidia Grid licensable product name. + NV_LICENSE_EXPIRY_DETAILS licenseExpiry; //!< License expiry information. +}NV_LICENSE_FEATURE_DETAILS_V4; + +typedef NV_LICENSE_FEATURE_DETAILS_V1 NV_LICENSE_FEATURE_DETAILS; + +#define NV_LICENSE_FEATURE_DETAILS_VER1 MAKE_NVAPI_VERSION(NV_LICENSE_FEATURE_DETAILS_V1, 1) +#define NV_LICENSE_FEATURE_DETAILS_VER NV_LICENSE_FEATURE_DETAILS_VER1 + +//! Licensable features +typedef struct _NV_LICENSABLE_FEATURES_V1 +{ + NvU32 version; //!< IN - Structure version. + NvU32 isLicenseSupported : 1; //!< True if Software Licensing is supported. + NvU32 reserved : 31; //!< Reserved + NvU32 licensableFeatureCount; //!< The number of licensable features. + NvU8 signature[NV_LICENSE_SIGNATURE_SIZE]; //!< Dynamic signature required for Authentication of the components, + //!< signature length limited to NV_LICENSE_SIGNATURE_SIZE + NV_LICENSE_FEATURE_DETAILS_V1 licenseDetails[NV_LICENSE_MAX_COUNT]; //!< Array of licensable features +}NV_LICENSABLE_FEATURES_V1; + +typedef struct _NV_LICENSABLE_FEATURES_V2 +{ + NvU32 version; //!< IN - Structure version. + NvU32 isLicenseSupported : 1; //!< True if Software Licensing is supported. + NvU32 reserved : 31; //!< Reserved + NvU32 licensableFeatureCount; //!< The number of licensable features. + NvU8 signature[NV_LICENSE_SIGNATURE_SIZE]; //!< Dynamic signature required for Authentication of the components, + //!< signature length limited to NV_LICENSE_SIGNATURE_SIZE + NV_LICENSE_FEATURE_DETAILS_V2 licenseDetails[NV_LICENSE_MAX_COUNT]; //!< Array of licensable features +}NV_LICENSABLE_FEATURES_V2; + +typedef struct _NV_LICENSABLE_FEATURES_V3 +{ + NvU32 version; //!< IN - Structure version. + NvU32 isLicenseSupported : 1; //!< True if Software Licensing is supported. + NvU32 reserved : 31; //!< Reserved + NvU32 licensableFeatureCount; //!< The number of licensable features. + NvU8 signature[NV_LICENSE_SIGNATURE_SIZE]; //!< Dynamic signature required for Authentication of the components, + //!< signature length limited to NV_LICENSE_SIGNATURE_SIZE + NV_LICENSE_FEATURE_DETAILS_V3 licenseDetails[NV_LICENSE_MAX_COUNT]; //!< Array of licensable features +}NV_LICENSABLE_FEATURES_V3; + +typedef struct _NV_LICENSABLE_FEATURES_V4 +{ + NvU32 version; //!< IN - Structure version. + NvU32 isLicenseSupported : 1; //!< True if Software Licensing is supported. + NvU32 reserved : 31; //!< Reserved + NvU32 licensableFeatureCount; //!< The number of licensable features. + NvU8 signature[NV_LICENSE_SIGNATURE_SIZE]; //!< Dynamic signature required for Authentication of the components, + //!< signature length limited to NV_LICENSE_SIGNATURE_SIZE + NV_LICENSE_FEATURE_DETAILS_V4 licenseDetails[NV_LICENSE_MAX_COUNT]; //!< Array of licensable features +}NV_LICENSABLE_FEATURES_V4; + +typedef NV_LICENSABLE_FEATURES_V4 NV_LICENSABLE_FEATURES; + +#define NV_LICENSABLE_FEATURES_VER1 MAKE_NVAPI_VERSION(NV_LICENSABLE_FEATURES_V1, 1) +#define NV_LICENSABLE_FEATURES_VER2 MAKE_NVAPI_VERSION(NV_LICENSABLE_FEATURES_V2, 2) +#define NV_LICENSABLE_FEATURES_VER3 MAKE_NVAPI_VERSION(NV_LICENSABLE_FEATURES_V3, 3) +#define NV_LICENSABLE_FEATURES_VER4 MAKE_NVAPI_VERSION(NV_LICENSABLE_FEATURES_V4, 4) +#define NV_LICENSABLE_FEATURES_VER NV_LICENSABLE_FEATURES_VER4 + +//! @} + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetLicensableFeatures +// +//! DESCRIPTION: This function call identifies whether licenses are supported on this system and if +//! they are supported, returns the details of the features that can be licensed. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \param [in] hPhysicalGpu GPU selection +//! \param [in,out] pLicensableFeatures Licensable features information. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup gridlicense +///////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetLicensableFeatures(__in NvPhysicalGpuHandle hPhysicalGpu, __inout NV_LICENSABLE_FEATURES *pLicensableFeatures); + + +#define NVAPI_NVLINK_COUNTER_MAX_TYPES 32 +#define NVAPI_NVLINK_MAX_LINKS 32 + +//! \ingroup nvlink +//! @{ +//! Used in NvAPI_GPU_NVLINK_GetCaps() + +/* caps format is byte_index:bit_mask */ +#define NVAPI_NVLINK_CAPS_SUPPORTED 0x00000001 //!< Set if NVLink is present and supported on this GPU.This field is used for *global* caps only and NOT for per-link caps +#define NVAPI_NVLINK_CAPS_P2P_SUPPORTED 0x00000002 //!< Set if P2P over NVLink is supported on this GPU. +#define NVAPI_NVLINK_CAPS_SYSMEM_ACCESS 0x00000004 //!< Set if sysmem can be accessed over NVLink on this GPU. +#define NVAPI_NVLINK_CAPS_P2P_ATOMICS 0x00000008 //!< Set if P2P atomics are supported over NVLink on this GPU. +#define NVAPI_NVLINK_CAPS_SYSMEM_ATOMICS 0x00000010 //!< Set if sysmem atomic transcations are supported over NVLink on this GPU. +#define NVAPI_NVLINK_CAPS_PEX_TUNNELING 0x00000020 //!< Set if PEX tunneling over NVLink is supported on this GPU. +#define NVAPI_NVLINK_CAPS_SLI_BRIDGE 0x00000040 //!< Set if SLI over NVLink is supported on this GPU. +#define NVAPI_NVLINK_CAPS_SLI_BRIDGE_SENSABLE 0x00000080 //!< This bit is set if capable of sensing SLI bridges. +#define NVAPI_NVLINK_CAPS_POWER_STATE_L0 0x00000100 //!< This bit is set if L0 is a supported power state on this GPU. +#define NVAPI_NVLINK_CAPS_POWER_STATE_L1 0x00000200 //!< This bit is set if L1 is a supported power state on this GPU. +#define NVAPI_NVLINK_CAPS_POWER_STATE_L2 0x00000400 //!< This bit is set if L2 is a supported power state on this GPU. +#define NVAPI_NVLINK_CAPS_POWER_STATE_L3 0x00000800 //!< This bit is set if L3 is a supported power state on this GPU. + +#define NVAPI_NVLINK_CAPS_VALID 0x00001000 //!< Set if this link is supported on this GPU.This field is used for *per-link* caps only and NOT for global caps. + +#define NVAPI_NVLINK_CAPS_NVLINK_VERSION_INVALID (0x00000000) +#define NVAPI_NVLINK_CAPS_NVLINK_VERSION_1_0 (0x00000001) +#define NVAPI_NVLINK_CAPS_NVLINK_VERSION_2_0 (0x00000002) +#define NVAPI_NVLINK_CAPS_NVLINK_VERSION_2_2 (0x00000004U) +#define NVAPI_NVLINK_CAPS_NVLINK_VERSION_3_0 (0x00000005U) +#define NVAPI_NVLINK_CAPS_NVLINK_VERSION_3_1 (0x00000006U) +#define NVAPI_NVLINK_CAPS_NVLINK_VERSION_4_0 (0x00000007U) +#define NVAPI_NVLINK_CAPS_NVLINK_VERSION_5_0 (0x00000008U) + +#define NVAPI_NVLINK_CAPS_NCI_VERSION_INVALID (0x00000000) +#define NVAPI_NVLINK_CAPS_NCI_VERSION_1_0 (0x00000001) +#define NVAPI_NVLINK_CAPS_NCI_VERSION_2_0 (0x00000002) +#define NVAPI_NVLINK_CAPS_NCI_VERSION_2_2 (0x00000004U) +#define NVAPI_NVLINK_CAPS_NCI_VERSION_3_0 (0x00000005U) +#define NVAPI_NVLINK_CAPS_NCI_VERSION_3_1 (0x00000006U) +#define NVAPI_NVLINK_CAPS_NCI_VERSION_4_0 (0x00000007U) +#define NVAPI_NVLINK_CAPS_NCI_VERSION_5_0 (0x00000008U) + +typedef struct +{ + NvU32 version; //!< Version of this structure. Must always be first element in this structure. + NvU32 capsTbl; //!< This is bit field for getting different global caps.The individual bitfields are specified by NVAPI_NVLINK_CAPS_* + NvU8 lowestNvlinkVersion; //!< This field specifies the lowest supported NVLink version for this GPU. + NvU8 highestNvlinkVersion; //!< This field specifies the highest supported NVLink version for this GPU. + NvU8 lowestNciVersion; //!< This field specifies the lowest supported NCI version for this GPU. + NvU8 highestNciVersion; //!< This field specifies the highest supported NCI version for this GPU. + NvU32 linkMask; //!< This field provides a bitfield mask of NVLink links enabled on this GPU. +}NVLINK_GET_CAPS_V1; + +typedef NVLINK_GET_CAPS_V1 NVLINK_GET_CAPS; +#define NVLINK_GET_CAPS_VER1 MAKE_NVAPI_VERSION(NVLINK_GET_CAPS_V1, 1) + +#define NVLINK_GET_CAPS_VER NVLINK_GET_CAPS_VER1 +//! @} +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_NVLINK_GetCaps +// +//! DESCRIPTION: This function returns the NVLink capabilities supported by the GPU. +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 361 +//! +//! \param [in] hPhysicalGpu GPU selection +//! +//! \param [in,out] NVLINK_GET_CAPS This structure contains the output parameters. +//! Also need to specify the version. +//! +//! \retval ::NVAPI_INVALID_USER_PRIVILEGE - The caller does not have administrative privileges +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. If there are return error codes with specific +//! meaning for this API, they are listed below. +//! +//! \ingroup nvlink +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_NVLINK_GetCaps(__in NvPhysicalGpuHandle hPhysicalGpu, __inout NVLINK_GET_CAPS *capsParams); + + +//! \ingroup nvlink +//! @{ +//! Used in NvAPI_GPU_NVLINK_GetStatus() + +#define NVAPI_NVLINK_DEVICE_INFO_DEVICE_ID_FLAGS_NONE (0x00000000) +#define NVAPI_NVLINK_DEVICE_INFO_DEVICE_ID_FLAGS_PCI (0x00000001) +#define NVAPI_NVLINK_DEVICE_INFO_DEVICE_ID_FLAGS_UUID (0x00000002) + +typedef enum _NVAPI_NVLINK_DEVICE_INFO_DEVICE_TYPE +{ + NVAPI_NVLINK_DEVICE_INFO_DEVICE_TYPE_EBRIDGE, + NVAPI_NVLINK_DEVICE_INFO_DEVICE_TYPE_NPU, + NVAPI_NVLINK_DEVICE_INFO_DEVICE_TYPE_GPU, + NVAPI_NVLINK_DEVICE_INFO_DEVICE_TYPE_SWITCH, + NVAPI_NVLINK_DEVICE_INFO_DEVICE_TYPE_TEGRA, + NVAPI_NVLINK_DEVICE_INFO_DEVICE_TYPE_NONE, + NVAPI_NVLINK_DEVICE_INFO_DEVICE_UUID_INVALID, +} NVAPI_NVLINK_DEVICE_INFO_DEVICE_TYPE; + +typedef struct +{ + NvU32 deviceIdFlags; //!< ID Flags, Bitmask that specifies which IDs are valid for the GPU. Refer NVAPI_NVLINK_DEVICE_INFO_DEVICE_ID_FLAGS_* for possible values. + //!< If NVAPI_NVLINK_DEVICE_INFO_DEVICE_ID_FLAGS_PCI is set, PCI information is valid. + //!< If NVAPI_NVLINK_DEVICE_INFO_DEVICE_ID_FLAGS_UUID is set, UUID is valid. + NvU16 domain; //!< domain, bus, device, function, pciDeviceId : PCI information for the GPU. + NvU16 bus; + NvU16 device; + NvU16 function; + NvU32 pciDeviceId; + NvU64 deviceType; //!< GPU Type. See NVAPI_NVLINK_DEVICE_INFO_DEVICE_TYPE_* for possible values. + NvU8 deviceUUID[16]; //!< GPU UUID +}NVLINK_DEVICE_INFO_V1; + +typedef enum _NVAPI_NVLINK_STATUS_LINK_STATE +{ + NVAPI_NVLINK_STATUS_LINK_STATE_UNKNOWN, + NVAPI_NVLINK_STATUS_LINK_STATE_INIT, + NVAPI_NVLINK_STATUS_LINK_STATE_HWCFG, + NVAPI_NVLINK_STATUS_LINK_STATE_SWCFG, + NVAPI_NVLINK_STATUS_LINK_STATE_ACTIVE, + NVAPI_NVLINK_STATUS_LINK_STATE_FAULT, + NVAPI_NVLINK_STATUS_LINK_STATE_RECOVERY, + NVAPI_NVLINK_STATUS_LINK_STATE_RECOVERY_AC, + NVAPI_NVLINK_STATUS_LINK_STATE_RECOVERY_AX, + NVAPI_NVLINK_STATUS_LINK_STATE_INVALID = 0xFFFFFFFF, +}NVAPI_NVLINK_STATUS_LINK_STATE; + +typedef enum _NVAPI_NVLINK_STATUS_SUBLINK_RX_STATE +{ + NVAPI_NVLINK_STATUS_SUBLINK_RX_STATE_UNKNOWN, + NVAPI_NVLINK_STATUS_SUBLINK_RX_STATE_HIGH_SPEED_1, + NVAPI_NVLINK_STATUS_SUBLINK_RX_STATE_LOW_POWER, + NVAPI_NVLINK_STATUS_SUBLINK_RX_STATE_TRAINING, + NVAPI_NVLINK_STATUS_SUBLINK_RX_STATE_SAFE_MODE, + NVAPI_NVLINK_STATUS_SUBLINK_RX_STATE_OFF, + NVAPI_NVLINK_STATUS_SUBLINK_RX_STATE_TEST, + NVAPI_NVLINK_STATUS_SUBLINK_RX_STATE_FAULT, + NVAPI_NVLINK_STATUS_SUBLINK_RX_STATE_INVALID = 0xFF, +}NVAPI_NVLINK_STATUS_SUBLINK_RX_STATE; + +typedef enum _NVAPI_NVLINK_STATUS_SUBLINK_TX_STATE +{ + NVAPI_NVLINK_STATUS_SUBLINK_TX_STATE_UNKNOWN, + NVAPI_NVLINK_STATUS_SUBLINK_TX_STATE_HIGH_SPEED_1, + NVAPI_NVLINK_STATUS_SUBLINK_TX_STATE_LOW_POWER, + NVAPI_NVLINK_STATUS_SUBLINK_TX_STATE_TRAINING, + NVAPI_NVLINK_STATUS_SUBLINK_TX_STATE_SAFE_MODE, + NVAPI_NVLINK_STATUS_SUBLINK_TX_STATE_OFF, + NVAPI_NVLINK_STATUS_SUBLINK_TX_STATE_TEST, + NVAPI_NVLINK_STATUS_SUBLINK_TX_STATE_FAULT, + + NVAPI_NVLINK_STATUS_SUBLINK_TX_STATE_INVALID= 0xFF, +} NVAPI_NVLINK_STATUS_SUBLINK_TX_STATE; + + +#define NVAPI_NVLINK_STATUS_PHY_NVHS (0x00000001) +#define NVAPI_NVLINK_STATUS_PHY_GRS (0x00000002) +#define NVAPI_NVLINK_STATUS_PHY_INVALID (0x000000FF) + +#define NVAPI_NVLINK_STATUS_NVLINK_VERSION_1_0 (0x00000001) +#define NVAPI_NVLINK_STATUS_NVLINK_VERSION_2_0 (0x00000002) +#define NVAPI_NVLINK_STATUS_NVLINK_VERSION_INVALID (0x000000FF) + +#define NVAPI_NVLINK_STATUS_NCI_VERSION_1_0 (0x00000001) +#define NVAPI_NVLINK_STATUS_NCI_VERSION_2_0 (0x00000002) +#define NVAPI_NVLINK_STATUS_NCI_VERSION_INVALID (0x000000FF) + +#define NVAPI_NVLINK_STATUS_NVHS_VERSION_1_0 (0x00000001) +#define NVAPI_NVLINK_STATUS_NVHS_VERSION_INVALID (0x000000FF) + +#define NVAPI_NVLINK_STATUS_GRS_VERSION_1_0 (0x00000001) +#define NVAPI_NVLINK_STATUS_GRS_VERSION_INVALID (0x000000FF) + +#define NVAPI_NVLINK_STATUS_CONNECTED_TRUE (0x00000001) +#define NVAPI_NVLINK_STATUS_CONNECTED_FALSE (0x00000000) + +#define NVAPI_NVLINK_STATUS_LOOP_PROPERTY_LOOPBACK (0x00000001) +#define NVAPI_NVLINK_STATUS_LOOP_PROPERTY_LOOPOUT (0x00000002) +#define NVAPI_NVLINK_STATUS_LOOP_PROPERTY_NONE (0x00000000) + +#define NVAPI_NVLINK_STATUS_REMOTE_LINK_NUMBER_INVALID (0x000000FF) + +#define NVAPI_NVLINK_REFCLK_TYPE_INVALID (0x00) +#define NVAPI_NVLINK_REFCLK_TYPE_NVHS (0x01) +#define NVAPI_NVLINK_REFCLK_TYPE_PEX (0x02) + + +typedef struct +{ + NvU32 capsTbl; //!< This is bit field for getting different global caps.The individual bitfields are specified by NVAPI_NVLINK_CAPS_*. + NvU8 phyType; //!< This field specifies the type of PHY (NVHS or GRS) being used for this link. + NvU8 subLinkWidth; //!< This field specifies the no. of lanes per sublink. + NvU32 linkState; //!< This field specifies the current state of the link.See NVAPI_NVLINK_GET_NVLINK_STATUS_LINK_STATE_* for possible values. + NvU8 rxSublinkStatus; //!< This field specifies the current state of RX sublink.See NVAPI_NVLINK_GET_NVLINK_STATUS_SUBLINK_RX_STATE_* for possible values. + NvU8 txSublinkStatus; //!< This field specifies the current state of TX sublink.See NVAPI_NVLINK_GET_NVLINK_STATUS_SUBLINK_TX_STATE_* for possible values. + NvU8 nvlinkVersion; //!< This field specifies the NVLink version supported by the link. + NvU8 nciVersion; //!< This field specifies the NCI version supported by the link. + NvU8 phyVersion; //!< This field specifies the version of PHY being used by the link. + NvU32 nvlinkCommonClockSpeedMhz; //!< This field gives the value of nvlink common clock in MHz. + NvU32 nvlinkRefClkSpeedMhz; //!< This field gives the value of nvlink refclk clock in MHz. + NvU8 nvlinkRefClkType; //!< This field specifies whether refclk is taken from NVHS reflck or PEX refclk for the current GPU.See NVAPI_NVLINK_REFCLK_TYPE_INVALID* for possible values. + NvU32 nvlinkLinkClockMhz; //!< This field gives the actual clock/speed at which links is running in MHz. + NvU32 connected:1 ; //!< This field specifies if any device is connected on the other end of the link. + NvU32 reserved:31; //!< Reserved for future use. + NvU8 loopProperty; //!< This field specifies if the link is a loopback/loopout link. See NVAPI_NVLINK_STATUS_LOOP_PROPERTY_* for possible values. + NvU8 remoteDeviceLinkNumber; //!< This field specifies the link number on the remote end of the link. + NVLINK_DEVICE_INFO_V1 remoteDeviceInfo; //!< This field stores the GPU information for the remote end of the link +}NVLINK_LINK_STATUS_INFO_V1; + +typedef struct +{ + NvU32 capsTbl; //!< This is bit field for getting different global caps.The individual bitfields are specified by NVAPI_NVLINK_CAPS_*. + NvU8 phyType; //!< This field specifies the type of PHY (NVHS or GRS) being used for this link. + NvU8 subLinkWidth; //!< This field specifies the no. of lanes per sublink. + NvU32 linkState; //!< This field specifies the current state of the link.See NVAPI_NVLINK_GET_NVLINK_STATUS_LINK_STATE_* for possible values. + NvU8 rxSublinkStatus; //!< This field specifies the current state of RX sublink.See NVAPI_NVLINK_GET_NVLINK_STATUS_SUBLINK_RX_STATE_* for possible values. + NvU8 txSublinkStatus; //!< This field specifies the current state of TX sublink.See NVAPI_NVLINK_GET_NVLINK_STATUS_SUBLINK_TX_STATE_* for possible values. + NvU8 nvlinkVersion; //!< This field specifies the NVLink version supported by the link. + NvU8 nciVersion; //!< This field specifies the NCI version supported by the link. + NvU8 phyVersion; //!< This field specifies the version of PHY being used by the link. + NvU32 nvlinkCommonClockSpeedMhz; //!< This field gives the value of nvlink common clock in MHz. + NvU32 nvlinkRefClkSpeedMhz; //!< This field gives the value of nvlink refclk clock in MHz. + NvU8 nvlinkRefClkType; //!< This field specifies whether refclk is taken from NVHS reflck or PEX refclk for the current GPU.See NVAPI_NVLINK_REFCLK_TYPE_INVALID* for possible values. + NvU32 nvlinkLinkClockMhz; //!< This field gives the actual clock/speed at which links is running in MHz. + NvU32 connected:1 ; //!< This field specifies if any device is connected on the other end of the link. + NvU32 reserved:31; //!< Reserved for future use. + NvU8 loopProperty; //!< This field specifies if the link is a loopback/loopout link. See NVAPI_NVLINK_STATUS_LOOP_PROPERTY_* for possible values. + NvU8 remoteDeviceLinkNumber; //!< This field specifies the link number on the remote end of the link. + NVLINK_DEVICE_INFO_V1 remoteDeviceInfo; //!< This field stores the device information for the remote end of the link + NvU8 localDeviceLinkNumber; //!< This field specifies the link number on the local end of the link. + NVLINK_DEVICE_INFO_V1 localDeviceInfo; //!< This field stores the device information for the local end of the link. + NvU32 nvlinkLineRateMbps; //!< Bit rate at which bits toggle on wires in megabits per second. + NvU32 reservedEx[8]; //!< Reserved for future use to avoid versioning. +}NVLINK_LINK_STATUS_INFO_V2; + +typedef struct +{ + NvU32 version; //!< Version of this structure. Must always be first element in this structure. + NvU32 linkMask; //!< This parameter specifies for which links we want the status. + NVLINK_LINK_STATUS_INFO_V1 linkInfo[NVAPI_NVLINK_MAX_LINKS]; //!< This structure stores the per-link status of different NVLink parameters. The link is identified by the index. +}NVLINK_GET_STATUS_V1; + +typedef struct +{ + NvU32 version; //!< Version of this structure. Must always be first element in this structure. + NvU32 linkMask; //!< This parameter specifies for which links we want the status. + NVLINK_LINK_STATUS_INFO_V2 linkInfo[NVAPI_NVLINK_MAX_LINKS]; //!< This structure stores the per-link status of different NVLink parameters. The link is identified by the index. +}NVLINK_GET_STATUS_V2; + + +typedef NVLINK_GET_STATUS_V2 NVLINK_GET_STATUS; +#define NVLINK_GET_STATUS_VER1 MAKE_NVAPI_VERSION(NVLINK_GET_STATUS_V1, 1) +#define NVLINK_GET_STATUS_VER2 MAKE_NVAPI_VERSION(NVLINK_GET_STATUS_V2, 2) + +#define NVLINK_GET_STATUS_VER NVLINK_GET_STATUS_VER2 +//! @} +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_NVLINK_GetStatus +// +//! DESCRIPTION: This function returns the NVLink status. +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 361 +//! +//! \param [in] hPhysicalGpu GPU selection +//! +//! \param [in,out] NVLINK_GET_STATUS This structure contains the input and output parameters. +//! linkMask is the input param while others are output parameters. +//! Also need to specify the version. +//! +//! \retval ::NVAPI_INVALID_USER_PRIVILEGE - The caller does not have administrative privileges +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. If there are return error codes with specific +//! meaning for this API, they are listed below. +//! +//! \ingroup nvlink +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_NVLINK_GetStatus(__in NvPhysicalGpuHandle hPhysicalGpu, __inout NVLINK_GET_STATUS* statusParams); + + + +typedef struct _NV_ENCODER_STATISTICS_V1 +{ + NvU32 version; //!< [in] Structure version value. + NvU32 sessionsCount; //!< [out] Count of active encoder sessions. + NvU32 averageFps; //!< [out] Trailing average FPS of all active sessions. + NvU32 averageLatency; //!< [out] Encode latency in milliseconds. +} NV_ENCODER_STATISTICS_V1; + +typedef NV_ENCODER_STATISTICS_V1 NV_ENCODER_STATISTICS; + +#define NV_ENCODER_STATISTICS_VER1 MAKE_NVAPI_VERSION(NV_ENCODER_STATISTICS_V1, 1) +#define NNV_ENCODER_STATISTICS_VER NV_ENCODER_STATISTICS_VER1 + + +/////////////////////////////////////////////////////////////////////////////// +//! +//! FUNCTION NAME: NvAPI_GPU_GetEncoderStatistics +//! +//! DESCRIPTION: This API can be used to get encoder statistics for the specified Nvidia GPU. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 384 +//! +//! \param [in] hPhysicalGpu - Handle of the Nvidia GPU. +//! \param [in,out] pEncoderStatistics - Pointer to the structure containing encoder statistics data. +//! +//! RETURN STATUS: This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! (None) +//! +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetEncoderStatistics(__in NvPhysicalGpuHandle hPhysicalGpu, __inout NV_ENCODER_STATISTICS *pEncoderStatistics); + + +typedef enum _NV_ENCODER_TYPE +{ + NV_ENCODER_H264, + NV_ENCODER_HEVC, + + NV_ENCODER_UNKNOWN = 0xFFFFFFFF, +}NV_ENCODER_TYPE; + +typedef struct _NV_ENCODER_PER_SESSION_INFO_V1 +{ + NvU32 sessionId; //!< Unique session ID. + NvU32 processId; //!< Owning process ID. + NvU32 vgpuInstance; //!< Owning vGPU instance ID (only valid on vGPU hosts, otherwise zero). + NV_ENCODER_TYPE codecType; //!< Video encoder type. + NvU32 hResolution; //!< Current encode horizontal resolution. + NvU32 vResolution; //!< Current encode vertical resolution. + NvU32 averageEncodeFps; //!< Moving average encode frames per second. + NvU32 averageEncodeLatency; //!< Moving average encode latency in milliseconds. +}NV_ENCODER_PER_SESSION_INFO_V1; + +#define NV_ENCODER_SESSION_INFO_MAX_ENTRIES_V1 0x200 //!< 512 entries. + +typedef struct _NV_ENCODER_SESSIONS_INFO_V1 +{ + NvU32 version; //!< [in] Structure version value. + + NvU32 sessionsCount; //!< [out] Count of active encoder sessions. + NV_ENCODER_PER_SESSION_INFO_V1 *pSessionInfo; //!< [out] Array of session info. Caller should allocate memory for this field before making + //!< the NvAPI_GPU_GetEncoderSessionsInfo() call, memory allocated should be of + //!< size = sizeof(NV_ENCODER_PER_SESSION_INFO_V1) * NV_ENCODER_SESSION_INFO_MAX_ENTRIES_V1. +} NV_ENCODER_SESSIONS_INFO_V1; + +typedef NV_ENCODER_SESSIONS_INFO_V1 NV_ENCODER_SESSIONS_INFO; + +#define NV_ENCODER_SESSIONS_INFO_VER1 MAKE_NVAPI_VERSION(NV_ENCODER_SESSIONS_INFO_V1, 1) +#define NV_ENCODER_SESSIONS_INFO_VER NV_ENCODER_SESSIONS_INFO_VER1 + + +/////////////////////////////////////////////////////////////////////////////// +//! +//! FUNCTION NAME: NvAPI_GPU_GetEncoderSessionsInfo +//! +//! DESCRIPTION: This API can be used to retrieve information about active encoder sessions on the specified GPU. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 384 +//! +//! \param [in] hPhysicalGpu - Handle of the Nvidia GPU. +//! \param [in,out] pEncoderSessionsInfo - Pointer to the structure containing information related to encoder sessions. +//! +//! RETURN STATUS: This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! (None) +//! +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetEncoderSessionsInfo(__in NvPhysicalGpuHandle hPhysicalGpu, __inout NV_ENCODER_SESSIONS_INFO *pEncoderSessionsInfo); + +typedef struct _NV_GPU_INFO_V1 +{ + NvU32 version; //!< Structure Version. + NvU32 bIsExternalGpu:1; //!< This flag is set for external GPU. + NvU32 reserved:31; //!< Reserved for future use +} NV_GPU_INFO_V1; + +typedef struct _NV_GPU_INFO_V2 +{ + NvU32 version; //!< Structure Version. + NvU32 bIsExternalGpu:1; //!< This flag is set for external GPU. + NvU32 reserved0:31; //!< Reserved for future use + NvU64 reserved1; //!< Reserved for future use + NvU32 rayTracingCores; //!< Number of "Ray Tracing Cores" supported by the GPU. + NvU32 tensorCores; //!< Number of "Tensor Cores" supported by the GPU. + NvU32 reserved2[14]; //!< Reserved for future use. +} NV_GPU_INFO_V2; + +#define NV_GPU_INFO_VER1 MAKE_NVAPI_VERSION(NV_GPU_INFO_V1, 1) +#define NV_GPU_INFO_VER2 MAKE_NVAPI_VERSION(NV_GPU_INFO_V2, 2) +#define NV_GPU_INFO_VER NV_GPU_INFO_VER2 +typedef NV_GPU_INFO_V2 NV_GPU_INFO; + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetGPUInfo +// +//! DESCRIPTION: This API will return NVIDIA GPU related information. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 400 +//! +//! \param [in,out] pGpuInfo - This structure will be filled with required information. +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. If there are return error codes with specific +//! meaning for this API, they are listed below. +//! +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetGPUInfo(__in NvPhysicalGpuHandle hPhysicalGpu, __inout NV_GPU_INFO *pGpuInfo); + + + +typedef struct _NV_GPU_VR_READY_V1 +{ + NvU32 version; //!< Structure Version. + NvU32 isVRReady : 1; //!< Is the requested GPU VR ready. + NvU32 reserved : 31; +} NV_GPU_VR_READY_V1; + +#define NV_GPU_VR_READY_VER1 MAKE_NVAPI_VERSION(NV_GPU_VR_READY_V1, 1) +#define NV_GPU_VR_READY_VER NV_GPU_VR_READY_VER1 +typedef NV_GPU_VR_READY_V1 NV_GPU_VR_READY; + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetVRReadyData +// +//! DESCRIPTION: This API will return NVIDIA GPU VR Ready state. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 465 +//! +//! \param [in,out] pGpuVrReadyData - This structure will be filled with required information. +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. If there are return error codes with specific +//! meaning for this API, they are listed below. +//! +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetVRReadyData(__in NvPhysicalGpuHandle hPhysicalGpu, __inout NV_GPU_VR_READY *pGpuVrReadyData); + + +typedef enum _NV_ADAPTER_TYPE +{ + NV_ADAPTER_TYPE_UNKNOWN = 0x0, + NV_ADAPTER_TYPE_WDDM = NV_BIT(0), //BATT event + NV_GPU_PERF_DECREASE_REASON_API_TRIGGERED = 0x00000008, //!< API triggered slowdown + NV_GPU_PERF_DECREASE_REASON_INSUFFICIENT_POWER = 0x00000010, //!< Power connector missing + NV_GPU_PERF_DECREASE_REASON_UNKNOWN = 0x80000000, //!< Unknown reason +} NVAPI_GPU_PERF_DECREASE; + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetPerfDecreaseInfo +// +//! DESCRIPTION: This function retrieves - in NvU32 variable - reasons for the current performance decrease. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! \param [in] hPhysicalGPU (IN) - GPU for which performance decrease is to be evaluated. +//! \param [out] pPerfDecrInfo (OUT) - Pointer to a NvU32 variable containing performance decrease info +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! +//! \ingroup gpuPerf +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetPerfDecreaseInfo(__in NvPhysicalGpuHandle hPhysicalGpu, __inout NvU32 *pPerfDecrInfo); + + +//! \ingroup gpupstate +//! Used in NvAPI_GPU_GetPstatesInfoEx() +typedef struct +{ + NvU32 version; + NvU32 flags; //!< - bit 0 indicates if perfmon is enabled or not + //!< - bit 1 indicates if dynamic Pstate is capable or not + //!< - bit 2 indicates if dynamic Pstate is enable or not + //!< - all other bits must be set to 0 + NvU32 numPstates; //!< The number of available p-states + NvU32 numClocks; //!< The number of clock domains supported by each P-State + struct + { + NV_GPU_PERF_PSTATE_ID pstateId; //!< ID of the p-state. + NvU32 flags; //!< - bit 0 indicates if the PCIE limit is GEN1 or GEN2 + //!< - bit 1 indicates if the Pstate is overclocked or not + //!< - bit 2 indicates if the Pstate is overclockable or not + //!< - all other bits must be set to 0 + struct + { + NV_GPU_PUBLIC_CLOCK_ID domainId; //!< ID of the clock domain + NvU32 flags; //!< Reserved. Must be set to 0 + NvU32 freq; //!< Clock frequency in kHz + + } clocks[NVAPI_MAX_GPU_PERF_CLOCKS]; + } pstates[NVAPI_MAX_GPU_PERF_PSTATES]; + +} NV_GPU_PERF_PSTATES_INFO_V1; + + +//! \ingroup gpupstate +typedef struct +{ + NvU32 version; + NvU32 flags; //!< - bit 0 indicates if perfmon is enabled or not + //!< - bit 1 indicates if dynamic Pstate is capable or not + //!< - bit 2 indicates if dynamic Pstate is enable or not + //!< - all other bits must be set to 0 + NvU32 numPstates; //!< The number of available p-states + NvU32 numClocks; //!< The number of clock domains supported by each P-State + NvU32 numVoltages; + struct + { + NV_GPU_PERF_PSTATE_ID pstateId; //!< ID of the p-state. + NvU32 flags; //!< - bit 0 indicates if the PCIE limit is GEN1 or GEN2 + //!< - bit 1 indicates if the Pstate is overclocked or not + //!< - bit 2 indicates if the Pstate is overclockable or not + //!< - all other bits must be set to 0 + struct + { + NV_GPU_PUBLIC_CLOCK_ID domainId; + NvU32 flags; //!< bit 0 indicates if this clock is overclockable + //!< all other bits must be set to 0 + NvU32 freq; + + } clocks[NVAPI_MAX_GPU_PERF_CLOCKS]; + struct + { + NV_GPU_PERF_VOLTAGE_INFO_DOMAIN_ID domainId; //!< ID of the voltage domain, containing flags and mvolt info + NvU32 flags; //!< Reserved for future use. Must be set to 0 + NvU32 mvolt; //!< Voltage in mV + + } voltages[NVAPI_MAX_GPU_PERF_VOLTAGES]; + + } pstates[NVAPI_MAX_GPU_PERF_PSTATES]; //!< Valid index range is 0 to numVoltages-1 + +} NV_GPU_PERF_PSTATES_INFO_V2; + +//! \ingroup gpupstate +typedef NV_GPU_PERF_PSTATES_INFO_V2 NV_GPU_PERF_PSTATES_INFO; + + +//! \ingroup gpupstate +//! @{ + +//! Macro for constructing the version field of NV_GPU_PERF_PSTATES_INFO_V1 +#define NV_GPU_PERF_PSTATES_INFO_VER1 MAKE_NVAPI_VERSION(NV_GPU_PERF_PSTATES_INFO_V1,1) + +//! Macro for constructing the version field of NV_GPU_PERF_PSTATES_INFO_V2 +#define NV_GPU_PERF_PSTATES_INFO_VER2 MAKE_NVAPI_VERSION(NV_GPU_PERF_PSTATES_INFO_V2,2) + +//! Macro for constructing the version field of NV_GPU_PERF_PSTATES_INFO_V2 +#define NV_GPU_PERF_PSTATES_INFO_VER3 MAKE_NVAPI_VERSION(NV_GPU_PERF_PSTATES_INFO_V2,3) + +//! Macro for constructing the version field of NV_GPU_PERF_PSTATES_INFO +#define NV_GPU_PERF_PSTATES_INFO_VER NV_GPU_PERF_PSTATES_INFO_VER3 + +//! @} + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetPstatesInfoEx +// +//! DESCRIPTION: This API retrieves all performance states (P-States) information. +//! +//! P-States are GPU active/executing performance capability and power consumption states. +//! +//! P-States ranges from P0 to P15, with P0 being the highest performance/power state, and +//! P15 being the lowest performance/power state. Each P-State, if available, maps to a +//! performance level. Not all P-States are available on a given system. The definitions +//! of each P-State are currently as follows: \n +//! - P0/P1 - Maximum 3D performance +//! - P2/P3 - Balanced 3D performance-power +//! - P8 - Basic HD video playback +//! - P10 - DVD playback +//! - P12 - Minimum idle power consumption +//! +//! \deprecated Do not use this function - it is deprecated in release 304. Instead, use NvAPI_GPU_GetPstates20. +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] hPhysicalGPU GPU selection. +//! \param [out] pPerfPstatesInfo P-States information retrieved, as detailed below: \n +//! - flags is reserved for future use. +//! - numPstates is the number of available P-States +//! - numClocks is the number of clock domains supported by each P-State +//! - pstates has valid index range from 0 to numPstates - 1 +//! - pstates[i].pstateId is the ID of the P-State, +//! containing the following info: +//! - pstates[i].flags containing the following info: +//! - bit 0 indicates if the PCIE limit is GEN1 or GEN2 +//! - bit 1 indicates if the Pstate is overclocked or not +//! - bit 2 indicates if the Pstate is overclockable or not +//! - pstates[i].clocks has valid index range from 0 to numClocks -1 +//! - pstates[i].clocks[j].domainId is the public ID of the clock domain, +//! containing the following info: +//! - pstates[i].clocks[j].flags containing the following info: +//! bit 0 indicates if the clock domain is overclockable or not +//! - pstates[i].clocks[j].freq is the clock frequency in kHz +//! - pstates[i].voltages has a valid index range from 0 to numVoltages - 1 +//! - pstates[i].voltages[j].domainId is the ID of the voltage domain, +//! containing the following info: +//! - pstates[i].voltages[j].flags is reserved for future use. +//! - pstates[i].voltages[j].mvolt is the voltage in mV +//! inputFlags(IN) - This can be used to select various options: +//! - if bit 0 is set, pPerfPstatesInfo would contain the default settings +//! instead of the current, possibily overclocked settings. +//! - if bit 1 is set, pPerfPstatesInfo would contain the maximum clock +//! frequencies instead of the nominal frequencies. +//! - if bit 2 is set, pPerfPstatesInfo would contain the minimum clock +//! frequencies instead of the nominal frequencies. +//! - all other bits must be set to 0. +//! +//! \retval ::NVAPI_OK Completed request +//! \retval ::NVAPI_ERROR Miscellaneous error occurred +//! \retval ::NVAPI_HANDLE_INVALIDATED Handle passed has been invalidated (see user guide) +//! \retval ::NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE Handle passed is not a physical GPU handle +//! \retval ::NVAPI_INCOMPATIBLE_STRUCT_VERSION The version of the NV_GPU_PERF_PSTATES struct is not supported +//! +//! \ingroup gpupstate +/////////////////////////////////////////////////////////////////////////////// +__nvapi_deprecated_function("Do not use this function - it is deprecated in release 304. Instead, use NvAPI_GPU_GetPstates20.") +NVAPI_INTERFACE NvAPI_GPU_GetPstatesInfoEx(NvPhysicalGpuHandle hPhysicalGpu, NV_GPU_PERF_PSTATES_INFO *pPerfPstatesInfo, NvU32 inputFlags); + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetPstates20 +// +//! DESCRIPTION: This API retrieves all performance states (P-States) 2.0 information. +//! +//! P-States are GPU active/executing performance capability states. +//! They range from P0 to P15, with P0 being the highest performance state, +//! and P15 being the lowest performance state. Each P-State, if available, +//! maps to a performance level. Not all P-States are available on a given system. +//! The definition of each P-States are currently as follow: +//! - P0/P1 - Maximum 3D performance +//! - P2/P3 - Balanced 3D performance-power +//! - P8 - Basic HD video playback +//! - P10 - DVD playback +//! - P12 - Minimum idle power consumption +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 295 +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] hPhysicalGPU GPU selection +//! \param [out] pPstatesInfo P-States information retrieved, as documented in declaration above +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, +//! they are listed below. +//! +//! \ingroup gpupstate +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetPstates20(__in NvPhysicalGpuHandle hPhysicalGpu, __inout NV_GPU_PERF_PSTATES20_INFO *pPstatesInfo); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetCurrentPstate +// +//! DESCRIPTION: This function retrieves the current performance state (P-State). +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 165 +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \param [in] hPhysicalGPU GPU selection +//! \param [out] pCurrentPstate The ID of the current P-State of the GPU - see \ref NV_GPU_PERF_PSTATES. +//! +//! \retval NVAPI_OK Completed request +//! \retval NVAPI_ERROR Miscellaneous error occurred. +//! \retval NVAPI_HANDLE_INVALIDATED Handle passed has been invalidated (see user guide). +//! \retval NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE Handle passed is not a physical GPU handle. +//! \retval NVAPI_NOT_SUPPORTED P-States is not supported on this setup. +//! +//! \ingroup gpupstate +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetCurrentPstate(NvPhysicalGpuHandle hPhysicalGpu, NV_GPU_PERF_PSTATE_ID *pCurrentPstate); + + + + +//! \ingroup gpupstate +#define NVAPI_MAX_GPU_UTILIZATIONS 8 + + + +//! \ingroup gpupstate +//! Used in NvAPI_GPU_GetDynamicPstatesInfoEx(). +typedef struct +{ + NvU32 version; //!< Structure version + NvU32 flags; //!< bit 0 indicates if the dynamic Pstate is enabled or not + struct + { + NvU32 bIsPresent:1; //!< Set if this utilization domain is present on this GPU + NvU32 percentage; //!< Percentage of time where the domain is considered busy in the last 1 second interval + } utilization[NVAPI_MAX_GPU_UTILIZATIONS]; +} NV_GPU_DYNAMIC_PSTATES_INFO_EX; + +//! \ingroup gpupstate +//! Macro for constructing the version field of NV_GPU_DYNAMIC_PSTATES_INFO_EX +#define NV_GPU_DYNAMIC_PSTATES_INFO_EX_VER MAKE_NVAPI_VERSION(NV_GPU_DYNAMIC_PSTATES_INFO_EX,1) + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetDynamicPstatesInfoEx +// +//! DESCRIPTION: This API retrieves the NV_GPU_DYNAMIC_PSTATES_INFO_EX structure for the specified physical GPU. +//! Each domain's info is indexed in the array. For example: +//! - pDynamicPstatesInfo->utilization[NVAPI_GPU_UTILIZATION_DOMAIN_GPU] holds the info for the GPU domain. \p +//! There are currently 4 domains for which GPU utilization and dynamic P-State thresholds can be retrieved: +//! graphic engine (GPU), frame buffer (FB), video engine (VID), and bus interface (BUS). +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! \since Release: 185 +//! +//! \retval ::NVAPI_OK +//! \retval ::NVAPI_ERROR +//! \retval ::NVAPI_INVALID_ARGUMENT pDynamicPstatesInfo is NULL +//! \retval ::NVAPI_HANDLE_INVALIDATED +//! \retval ::NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE +//! \retval ::NVAPI_INCOMPATIBLE_STRUCT_VERSION The version of the INFO struct is not supported +//! +//! \ingroup gpupstate +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetDynamicPstatesInfoEx(NvPhysicalGpuHandle hPhysicalGpu, NV_GPU_DYNAMIC_PSTATES_INFO_EX *pDynamicPstatesInfoEx); +/////////////////////////////////////////////////////////////////////////////////// +// Thermal API +// Provides ability to get temperature levels from the various thermal sensors associated with the GPU + +//! \ingroup gputhermal +#define NVAPI_MAX_THERMAL_SENSORS_PER_GPU 3 + +//! \ingroup gputhermal +//! Used in NV_GPU_THERMAL_SETTINGS +typedef enum +{ + NVAPI_THERMAL_TARGET_NONE = 0, + NVAPI_THERMAL_TARGET_GPU = 1, //!< GPU core temperature requires NvPhysicalGpuHandle + NVAPI_THERMAL_TARGET_MEMORY = 2, //!< GPU memory temperature requires NvPhysicalGpuHandle + NVAPI_THERMAL_TARGET_POWER_SUPPLY = 4, //!< GPU power supply temperature requires NvPhysicalGpuHandle + NVAPI_THERMAL_TARGET_BOARD = 8, //!< GPU board ambient temperature requires NvPhysicalGpuHandle + NVAPI_THERMAL_TARGET_VCD_BOARD = 9, //!< Visual Computing Device Board temperature requires NvVisualComputingDeviceHandle + NVAPI_THERMAL_TARGET_VCD_INLET = 10, //!< Visual Computing Device Inlet temperature requires NvVisualComputingDeviceHandle + NVAPI_THERMAL_TARGET_VCD_OUTLET = 11, //!< Visual Computing Device Outlet temperature requires NvVisualComputingDeviceHandle + + NVAPI_THERMAL_TARGET_ALL = 15, + NVAPI_THERMAL_TARGET_UNKNOWN = -1, +} NV_THERMAL_TARGET; + +//! \ingroup gputhermal +//! Used in NV_GPU_THERMAL_SETTINGS +typedef enum +{ + NVAPI_THERMAL_CONTROLLER_NONE = 0, + NVAPI_THERMAL_CONTROLLER_GPU_INTERNAL, + NVAPI_THERMAL_CONTROLLER_ADM1032, + NVAPI_THERMAL_CONTROLLER_MAX6649, + NVAPI_THERMAL_CONTROLLER_MAX1617, + NVAPI_THERMAL_CONTROLLER_LM99, + NVAPI_THERMAL_CONTROLLER_LM89, + NVAPI_THERMAL_CONTROLLER_LM64, + NVAPI_THERMAL_CONTROLLER_ADT7473, + NVAPI_THERMAL_CONTROLLER_SBMAX6649, + NVAPI_THERMAL_CONTROLLER_VBIOSEVT, + NVAPI_THERMAL_CONTROLLER_OS, + NVAPI_THERMAL_CONTROLLER_UNKNOWN = -1, +} NV_THERMAL_CONTROLLER; + +//! \ingroup gputhermal +//! Used in NvAPI_GPU_GetThermalSettings() +typedef struct +{ + NvU32 version; //!< structure version + NvU32 count; //!< number of associated thermal sensors + struct + { + NV_THERMAL_CONTROLLER controller; //!< internal, ADM1032, MAX6649... + NvU32 defaultMinTemp; //!< The min default temperature value of the thermal sensor in degree Celsius + NvU32 defaultMaxTemp; //!< The max default temperature value of the thermal sensor in degree Celsius + NvU32 currentTemp; //!< The current temperature value of the thermal sensor in degree Celsius + NV_THERMAL_TARGET target; //!< Thermal sensor targeted @ GPU, memory, chipset, powersupply, Visual Computing Device, etc. + } sensor[NVAPI_MAX_THERMAL_SENSORS_PER_GPU]; + +} NV_GPU_THERMAL_SETTINGS_V1; + +//! \ingroup gputhermal +typedef struct +{ + NvU32 version; //!< structure version + NvU32 count; //!< number of associated thermal sensors + struct + { + NV_THERMAL_CONTROLLER controller; //!< internal, ADM1032, MAX6649... + NvS32 defaultMinTemp; //!< Minimum default temperature value of the thermal sensor in degree Celsius + NvS32 defaultMaxTemp; //!< Maximum default temperature value of the thermal sensor in degree Celsius + NvS32 currentTemp; //!< Current temperature value of the thermal sensor in degree Celsius + NV_THERMAL_TARGET target; //!< Thermal sensor targeted - GPU, memory, chipset, powersupply, Visual Computing Device, etc + } sensor[NVAPI_MAX_THERMAL_SENSORS_PER_GPU]; + +} NV_GPU_THERMAL_SETTINGS_V2; + +//! \ingroup gputhermal +typedef NV_GPU_THERMAL_SETTINGS_V2 NV_GPU_THERMAL_SETTINGS; + +//! \ingroup gputhermal +//! @{ + +//! Macro for constructing the version field of NV_GPU_THERMAL_SETTINGS_V1 +#define NV_GPU_THERMAL_SETTINGS_VER_1 MAKE_NVAPI_VERSION(NV_GPU_THERMAL_SETTINGS_V1,1) + +//! Macro for constructing the version field of NV_GPU_THERMAL_SETTINGS_V2 +#define NV_GPU_THERMAL_SETTINGS_VER_2 MAKE_NVAPI_VERSION(NV_GPU_THERMAL_SETTINGS_V2,2) + +//! Macro for constructing the version field of NV_GPU_THERMAL_SETTINGS +#define NV_GPU_THERMAL_SETTINGS_VER NV_GPU_THERMAL_SETTINGS_VER_2 +//! @} + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetThermalSettings +// +//! This function retrieves the thermal information of all thermal sensors or specific thermal sensor associated with the selected GPU. +//! Thermal sensors are indexed 0 to NVAPI_MAX_THERMAL_SENSORS_PER_GPU-1. +//! +//! - To retrieve specific thermal sensor info, set the sensorIndex to the required thermal sensor index. +//! - To retrieve info for all sensors, set sensorIndex to NVAPI_THERMAL_TARGET_ALL. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 85 +//! +//! \param [in] hPhysicalGPU GPU selection. +//! \param [in] sensorIndex Explicit thermal sensor index selection. +//! \param [out] pThermalSettings Array of thermal settings. +//! +//! \retval NVAPI_OK Completed request +//! \retval NVAPI_ERROR Miscellaneous error occurred. +//! \retval NVAPI_INVALID_ARGUMENT pThermalInfo is NULL. +//! \retval NVAPI_HANDLE_INVALIDATED Handle passed has been invalidated (see user guide). +//! \retval NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE Handle passed is not a physical GPU handle. +//! \retval NVAPI_INCOMPATIBLE_STRUCT_VERSION The version of the INFO struct is not supported. +//! \ingroup gputhermal +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetThermalSettings(NvPhysicalGpuHandle hPhysicalGpu, NvU32 sensorIndex, NV_GPU_THERMAL_SETTINGS *pThermalSettings); + + + +//! \ingroup gpuclock +//! Used in NvAPI_GPU_GetAllClockFrequencies() +typedef struct +{ + NvU32 version; //!< Structure version + NvU32 reserved; //!< These bits are reserved for future use. + struct + { + NvU32 bIsPresent:1; //!< Set if this domain is present on this GPU + NvU32 reserved:31; //!< These bits are reserved for future use. + NvU32 frequency; //!< Clock frequency (kHz) + }domain[NVAPI_MAX_GPU_PUBLIC_CLOCKS]; +} NV_GPU_CLOCK_FREQUENCIES_V1; + +#ifndef NV_GPU_MAX_CLOCK_FREQUENCIES + #define NV_GPU_MAX_CLOCK_FREQUENCIES 3 +#endif + +//! \ingroup gpuclock +//! Used in NvAPI_GPU_GetAllClockFrequencies() +typedef enum +{ + NV_GPU_CLOCK_FREQUENCIES_CURRENT_FREQ = 0, + NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK = 1, + NV_GPU_CLOCK_FREQUENCIES_BOOST_CLOCK = 2, + NV_GPU_CLOCK_FREQUENCIES_CLOCK_TYPE_NUM = NV_GPU_MAX_CLOCK_FREQUENCIES +} NV_GPU_CLOCK_FREQUENCIES_CLOCK_TYPE; + +//! \ingroup gpuclock +//! Used in NvAPI_GPU_GetAllClockFrequencies() +typedef struct +{ + NvU32 version; //!< Structure version + NvU32 ClockType:4; //!< One of NV_GPU_CLOCK_FREQUENCIES_CLOCK_TYPE. Used to specify the type of clock to be returned. + NvU32 reserved:20; //!< These bits are reserved for future use. Must be set to 0. + NvU32 reserved1:8; //!< These bits are reserved. + struct + { + NvU32 bIsPresent:1; //!< Set if this domain is present on this GPU + NvU32 reserved:31; //!< These bits are reserved for future use. + NvU32 frequency; //!< Clock frequency (kHz) + }domain[NVAPI_MAX_GPU_PUBLIC_CLOCKS]; +} NV_GPU_CLOCK_FREQUENCIES_V2; + +//! \ingroup gpuclock +//! Used in NvAPI_GPU_GetAllClockFrequencies() +typedef NV_GPU_CLOCK_FREQUENCIES_V2 NV_GPU_CLOCK_FREQUENCIES; + +//! \addtogroup gpuclock +//! @{ +#define NV_GPU_CLOCK_FREQUENCIES_VER_1 MAKE_NVAPI_VERSION(NV_GPU_CLOCK_FREQUENCIES_V1,1) +#define NV_GPU_CLOCK_FREQUENCIES_VER_2 MAKE_NVAPI_VERSION(NV_GPU_CLOCK_FREQUENCIES_V2,2) +#define NV_GPU_CLOCK_FREQUENCIES_VER_3 MAKE_NVAPI_VERSION(NV_GPU_CLOCK_FREQUENCIES_V2,3) +#define NV_GPU_CLOCK_FREQUENCIES_VER NV_GPU_CLOCK_FREQUENCIES_VER_3 +//! @} + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetAllClockFrequencies +// +//! This function retrieves the NV_GPU_CLOCK_FREQUENCIES structure for the specified physical GPU. +//! +//! For each clock domain: +//! - bIsPresent is set for each domain that is present on the GPU +//! - frequency is the domain's clock freq in kHz +//! +//! Each domain's info is indexed in the array. For example: +//! clkFreqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY] holds the info for the MEMORY domain. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 295 +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, +//! they are listed below. +//! \retval NVAPI_INVALID_ARGUMENT pClkFreqs is NULL. +//! \ingroup gpuclock +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetAllClockFrequencies(__in NvPhysicalGpuHandle hPhysicalGPU, __inout NV_GPU_CLOCK_FREQUENCIES *pClkFreqs); + + +/////////////////////////////////////////////////////////////////////////////// +//! +//! FUNCTION NAME: NvAPI_GPU_QueryIlluminationSupport +//! +//! \fn NvAPI_GPU_QueryIlluminationSupport(__inout NV_GPU_QUERY_ILLUMINATION_SUPPORT_PARM *pIlluminationSupportInfo) +//! DESCRIPTION: This function reports if the specified illumination attribute is supported. +//! +//! \note Only a single GPU can manage an given attribute on a given HW element, +//! regardless of how many are attatched. I.E. only one GPU will be used to control +//! the brightness of the LED on an SLI bridge, regardless of how many are physicaly attached. +//! You should enumerate thru the GPUs with this call to determine which GPU is managing the attribute. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 300.05 +//! +//! \param [in] hPhysicalGpu Physical GPU handle +//! \param Attribute An enumeration value specifying the Illumination attribute to be querried +//! \param [out] pSupported A boolean indicating if the attribute is supported. +//! +//! \return See \ref nvapistatus for the list of possible return values. +// +////////////////////////////////////////////////////////////////////////////// + +//! \ingroup gpu +typedef enum _NV_GPU_ILLUMINATION_ATTRIB +{ + NV_GPU_IA_LOGO_BRIGHTNESS = 0, + NV_GPU_IA_SLI_BRIGHTNESS = 1, +} NV_GPU_ILLUMINATION_ATTRIB; + +//! \ingroup gpu +typedef struct _NV_GPU_QUERY_ILLUMINATION_SUPPORT_PARM_V1 { + + // IN + NvU32 version; //!< Version of this structure + NvPhysicalGpuHandle hPhysicalGpu; //!< The handle of the GPU that you are checking for the specified attribute. + //!< note that this is the GPU that is managing the attribute. + //!< Only a single GPU can manage an given attribute on a given HW element, + //!< regardless of how many are attatched. + //!< I.E. only one GPU will be used to control the brightness of the LED on an SLI bridge, + //!< regardless of how many are physicaly attached. + //!< You enumerate thru the GPUs with this call to determine which GPU is managing the attribute. + NV_GPU_ILLUMINATION_ATTRIB Attribute; //!< An enumeration value specifying the Illumination attribute to be querried. + //!< refer to enum \ref NV_GPU_ILLUMINATION_ATTRIB. + + // OUT + NvU32 bSupported; //!< A boolean indicating if the attribute is supported. + +} NV_GPU_QUERY_ILLUMINATION_SUPPORT_PARM_V1; + +//! \ingroup gpu +typedef NV_GPU_QUERY_ILLUMINATION_SUPPORT_PARM_V1 NV_GPU_QUERY_ILLUMINATION_SUPPORT_PARM; +//! \ingroup gpu +#define NV_GPU_QUERY_ILLUMINATION_SUPPORT_PARM_VER_1 MAKE_NVAPI_VERSION(NV_GPU_QUERY_ILLUMINATION_SUPPORT_PARM_V1,1) +//! \ingroup gpu +#define NV_GPU_QUERY_ILLUMINATION_SUPPORT_PARM_VER NV_GPU_QUERY_ILLUMINATION_SUPPORT_PARM_VER_1 + +//! \ingroup gpu +NVAPI_INTERFACE NvAPI_GPU_QueryIlluminationSupport(__inout NV_GPU_QUERY_ILLUMINATION_SUPPORT_PARM *pIlluminationSupportInfo); + + + + +/////////////////////////////////////////////////////////////////////////////// +//! +//! FUNCTION NAME: NvAPI_GPU_GetIllumination +//! +//! \fn NvAPI_GPU_GetIllumination(NV_GPU_GET_ILLUMINATION_PARM *pIlluminationInfo) +//! DESCRIPTION: This function reports value of the specified illumination attribute. +//! +//! \note Only a single GPU can manage an given attribute on a given HW element, +//! regardless of how many are attatched. I.E. only one GPU will be used to control +//! the brightness of the LED on an SLI bridge, regardless of how many are physicaly attached. +//! You should enumerate thru the GPUs with the \ref NvAPI_GPU_QueryIlluminationSupport call to +//! determine which GPU is managing the attribute. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 300.05 +//! +//! \param [in] hPhysicalGpu Physical GPU handle +//! \param Attribute An enumeration value specifying the Illumination attribute to be querried +//! \param [out] Value A DWORD containing the current value for the specified attribute. +//! This is specified as a percentage of the full range of the attribute +//! (0-100; 0 = off, 100 = full brightness) +//! +//! \return See \ref nvapistatus for the list of possible return values. Return values of special interest are: +//! NVAPI_INVALID_ARGUMENT The specified attibute is not known to the driver. +//! NVAPI_NOT_SUPPORTED: The specified attribute is not supported on the specified GPU +// +////////////////////////////////////////////////////////////////////////////// + +//! \ingroup gpu +typedef struct _NV_GPU_GET_ILLUMINATION_PARM_V1 { + + // IN + NvU32 version; //!< Version of this structure + NvPhysicalGpuHandle hPhysicalGpu; //!< The handle of the GPU that you are checking for the specified attribute. + //!< Note that this is the GPU that is managing the attribute. + //!< Only a single GPU can manage an given attribute on a given HW element, + //!< regardless of how many are attatched. + //!< I.E. only one GPU will be used to control the brightness of the LED on an SLI bridge, + //!< regardless of how many are physicaly attached. + //!< You enumerate thru the GPUs with this call to determine which GPU is managing the attribute. + NV_GPU_ILLUMINATION_ATTRIB Attribute; //!< An enumeration value specifying the Illumination attribute to be querried. + //!< refer to enum \ref NV_GPU_ILLUMINATION_ATTRIB. + + // OUT + NvU32 Value; //!< A DWORD that will contain the current value of the specified attribute. + //! This is specified as a percentage of the full range of the attribute + //! (0-100; 0 = off, 100 = full brightness) + +} NV_GPU_GET_ILLUMINATION_PARM_V1; + +//! \ingroup gpu +typedef NV_GPU_GET_ILLUMINATION_PARM_V1 NV_GPU_GET_ILLUMINATION_PARM; +//! \ingroup gpu +#define NV_GPU_GET_ILLUMINATION_PARM_VER_1 MAKE_NVAPI_VERSION(NV_GPU_GET_ILLUMINATION_PARM_V1,1) +//! \ingroup gpu +#define NV_GPU_GET_ILLUMINATION_PARM_VER NV_GPU_GET_ILLUMINATION_PARM_VER_1 + +//! \ingroup gpu +NVAPI_INTERFACE NvAPI_GPU_GetIllumination(NV_GPU_GET_ILLUMINATION_PARM *pIlluminationInfo); + + + + +/////////////////////////////////////////////////////////////////////////////// +//! +//! FUNCTION NAME: NvAPI_GPU_SetIllumination +//! +//! \fn NvAPI_GPU_SetIllumination(NV_GPU_SET_ILLUMINATION_PARM *pIlluminationInfo) +//! DESCRIPTION: This function sets the value of the specified illumination attribute. +//! +//! \note Only a single GPU can manage an given attribute on a given HW element, +//! regardless of how many are attatched. I.E. only one GPU will be used to control +//! the brightness of the LED on an SLI bridge, regardless of how many are physicaly attached. +//! You should enumerate thru the GPUs with the \ref NvAPI_GPU_QueryIlluminationSupport call to +//! determine which GPU is managing the attribute. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 300.05 +//! +//! \param [in] hPhysicalGpu Physical GPU handle +//! \param Attribute An enumeration value specifying the Illumination attribute to be set +//! \param Value The new value for the specified attribute. +//! This should be specified as a percentage of the full range of the attribute +//! (0-100; 0 = off, 100 = full brightness) +//! If a value is specified outside this range, NVAPI_INVALID_ARGUMENT will be returned. +//! +//! \return See \ref nvapistatus for the list of possible return values. Return values of special interest are: +//! NVAPI_INVALID_ARGUMENT The specified attibute is not known to the driver, or the specified value is out of range. +//! NVAPI_NOT_SUPPORTED The specified attribute is not supported on the specified GPU. +// +/////////////////////////////////////////////////////////////////////////////// + +//! \ingroup gpu +typedef struct _NV_GPU_SET_ILLUMINATION_PARM_V1 { + + // IN + NvU32 version; //!< Version of this structure + NvPhysicalGpuHandle hPhysicalGpu; //!< The handle of the GPU that you are checking for the specified attribute. + //!< Note that this is the GPU that is managing the attribute. + //!< Only a single GPU can manage an given attribute on a given HW element, + //!< regardless of how many are attatched. + //!< I.E. only one GPU will be used to control the brightness of the LED on an SLI bridge, + //!< regardless of how many are physicaly attached. + //!< You enumerate thru the GPUs with this call to determine which GPU is managing the attribute. + NV_GPU_ILLUMINATION_ATTRIB Attribute; //!< An enumeration value specifying the Illumination attribute to be querried. + //!< refer to enum \ref NV_GPU_ILLUMINATION_ATTRIB. + NvU32 Value; //!< A DWORD containing the new value for the specified attribute. + //!< This should be specified as a percentage of the full range of the attribute + //!< (0-100; 0 = off, 100 = full brightness) + //!< If a value is specified outside this range, NVAPI_INVALID_ARGUMENT will be returned. + + // OUT + +} NV_GPU_SET_ILLUMINATION_PARM_V1; + +//! \ingroup gpu +typedef NV_GPU_SET_ILLUMINATION_PARM_V1 NV_GPU_SET_ILLUMINATION_PARM; +//! \ingroup gpu +#define NV_GPU_SET_ILLUMINATION_PARM_VER_1 MAKE_NVAPI_VERSION(NV_GPU_SET_ILLUMINATION_PARM_V1,1) +//! \ingroup gpu +#define NV_GPU_SET_ILLUMINATION_PARM_VER NV_GPU_SET_ILLUMINATION_PARM_VER_1 + +//! \ingroup gpu +NVAPI_INTERFACE NvAPI_GPU_SetIllumination(NV_GPU_SET_ILLUMINATION_PARM *pIlluminationInfo); + + + +/*! + * Enumeration of control modes that can be applied to Illumination Zones. + */ +typedef enum +{ + NV_GPU_CLIENT_ILLUM_CTRL_MODE_MANUAL_RGB = 0, // deprecated + NV_GPU_CLIENT_ILLUM_CTRL_MODE_PIECEWISE_LINEAR_RGB, // deprecated + + NV_GPU_CLIENT_ILLUM_CTRL_MODE_MANUAL = 0, + NV_GPU_CLIENT_ILLUM_CTRL_MODE_PIECEWISE_LINEAR, + + // Strictly add new control modes above this. + NV_GPU_CLIENT_ILLUM_CTRL_MODE_INVALID = 0xFF, +} NV_GPU_CLIENT_ILLUM_CTRL_MODE; + +/*! + * Enumeration of locations where an Illumination Zone might be present. + * Encoding used - + * 1:0 - Number specifier (0) + * 4:2 - Location (TOP) + * 7:5 - Type (GPU/SLI) + */ +typedef enum +{ + NV_GPU_CLIENT_ILLUM_ZONE_LOCATION_GPU_TOP_0 = 0x00, + NV_GPU_CLIENT_ILLUM_ZONE_LOCATION_GPU_FRONT_0 = 0x08, + NV_GPU_CLIENT_ILLUM_ZONE_LOCATION_GPU_BACK_0 = 0x0C, + NV_GPU_CLIENT_ILLUM_ZONE_LOCATION_SLI_TOP_0 = 0x20, + NV_GPU_CLIENT_ILLUM_ZONE_LOCATION_INVALID = 0xFFFFFFFF, +} NV_GPU_CLIENT_ILLUM_ZONE_LOCATION; + +/*! + * Enumeration of ILLUM_DEVICEs. + */ +typedef enum +{ + NV_GPU_CLIENT_ILLUM_DEVICE_TYPE_INVALID = 0, + NV_GPU_CLIENT_ILLUM_DEVICE_TYPE_MCUV10, + NV_GPU_CLIENT_ILLUM_DEVICE_TYPE_GPIO_PWM_RGBW_V10, + NV_GPU_CLIENT_ILLUM_DEVICE_TYPE_GPIO_PWM_SINGLE_COLOR_V10, +} NV_GPU_CLIENT_ILLUM_DEVICE_TYPE; + +/*! + * Enumeration of ILLUM_ZONEs. + */ +typedef enum +{ + NV_GPU_CLIENT_ILLUM_ZONE_TYPE_INVALID = 0, + NV_GPU_CLIENT_ILLUM_ZONE_TYPE_RGB, + NV_GPU_CLIENT_ILLUM_ZONE_TYPE_COLOR_FIXED, + NV_GPU_CLIENT_ILLUM_ZONE_TYPE_RGBW, + NV_GPU_CLIENT_ILLUM_ZONE_TYPE_SINGLE_COLOR, +} NV_GPU_CLIENT_ILLUM_ZONE_TYPE; + +/*! + * Number of color points for the piecewise linear control mode. + */ +#define NV_GPU_CLIENT_ILLUM_CTRL_MODE_PIECEWISE_LINEAR_COLOR_ENDPOINTS 2 + +/*! + * Enumeration of Cycle types for piecewise linear control mode. + */ +typedef enum +{ + NV_GPU_CLIENT_ILLUM_PIECEWISE_LINEAR_CYCLE_HALF_HALT = 0, + NV_GPU_CLIENT_ILLUM_PIECEWISE_LINEAR_CYCLE_FULL_HALT, + NV_GPU_CLIENT_ILLUM_PIECEWISE_LINEAR_CYCLE_FULL_REPEAT, + NV_GPU_CLIENT_ILLUM_PIECEWISE_LINEAR_CYCLE_INVALID = 0xFF, +} NV_GPU_CLIENT_ILLUM_PIECEWISE_LINEAR_CYCLE_TYPE; + +#define NV_GPU_CLIENT_ILLUM_DEVICE_NUM_DEVICES_MAX 32 + +/*! + * Used in \ref NV_GPU_CLIENT_ILLUM_DEVICE_INFO_V1 + * Describes the static information of illumination device type MCUV10. + */ +typedef struct _NV_GPU_CLIENT_ILLUM_DEVICE_INFO_DATA_MCUV10 +{ + /*! + * I2C Device Index: Pointing to the illumination device in I2C Devices Table. + */ + NvU8 i2cDevIdx; +} NV_GPU_CLIENT_ILLUM_DEVICE_INFO_DATA_MCUV10; + +/*! + * Used in \ref NV_GPU_CLIENT_ILLUM_DEVICE_INFO_V1 + * Describes the static information of illum device type GPIO_PWM_RGBW. + */ +typedef struct _NV_GPU_CLIENT_ILLUM_DEVICE_INFO_DATA_GPIO_PWM_RGBW +{ + /*! + * Red drive GPIO pin. + */ + NvU8 gpioPinRed; + + /*! + * Green drive GPIO pin. + */ + NvU8 gpioPinGreen; + + /*! + * Blue drive GPIO pin. + */ + NvU8 gpioPinBlue; + + /*! + * White drive GPIO pin. + */ + NvU8 gpioPinWhite; +} NV_GPU_CLIENT_ILLUM_DEVICE_INFO_DATA_GPIO_PWM_RGBW; +/*! + * Used in \ref NV_GPU_CLIENT_ILLUM_DEVICE_INFO_V1 + * Describes the static information of illum device type GPIO_PWM_SINGLE_COLOR. + */ +typedef struct _NV_GPU_CLIENT_ILLUM_DEVICE_INFO_DATA_GPIO_PWM_SINGLE_COLOR +{ + /*! + * Single Color GPIO pin. + */ + NvU8 gpioPinSingleColor; +} NV_GPU_CLIENT_ILLUM_DEVICE_INFO_DATA_GPIO_PWM_SINGLE_COLOR; + +/*! + */ +typedef struct _NV_GPU_CLIENT_ILLUM_DEVICE_INFO_V1 +{ + /*! + * Type of the illumination device. + */ + NV_GPU_CLIENT_ILLUM_DEVICE_TYPE type; + + /*! + * Supported control modes for this illumination device. + */ + NvU32 ctrlModeMask; + + /*! + * Union of illumination device info data. Interpreted as per + * @ref NV_GPU_CLIENT_ILLUM_DEVICE_INFO_V1::type + */ + union + { + // + // Need to be careful when add/expanding types in this union. If any type + // exceeds sizeof(rsvd) then rsvd has failed its purpose. + // + NV_GPU_CLIENT_ILLUM_DEVICE_INFO_DATA_MCUV10 mcuv10; + NV_GPU_CLIENT_ILLUM_DEVICE_INFO_DATA_GPIO_PWM_RGBW gpioPwmRgbwv10; + NV_GPU_CLIENT_ILLUM_DEVICE_INFO_DATA_GPIO_PWM_SINGLE_COLOR gpioPwmSingleColorv10; + + /*! + * Reserved bytes for possible future extension of this struct. + */ + NvU8 rsvd[64]; + } data; + + /*! + * Reserved for future. + */ + NvU8 rsvd[64]; +} NV_GPU_CLIENT_ILLUM_DEVICE_INFO_V1; + +/*! + */ +typedef struct _NV_GPU_CLIENT_ILLUM_DEVICE_INFO_PARAMS_V1 +{ + /*! + * Version of structure. Must always be first member. + */ + NvU32 version; + + /*! + * Number of illumination devices present. + */ + NvU32 numIllumDevices; + + /*! + * Reserved bytes for possible future extension of this struct. + */ + NvU8 rsvd[64]; + + /*! + */ + NV_GPU_CLIENT_ILLUM_DEVICE_INFO_V1 devices[NV_GPU_CLIENT_ILLUM_DEVICE_NUM_DEVICES_MAX]; +} NV_GPU_CLIENT_ILLUM_DEVICE_INFO_PARAMS_V1; + +#define NV_GPU_CLIENT_ILLUM_DEVICE_INFO_PARAMS_VER_1 MAKE_NVAPI_VERSION(NV_GPU_CLIENT_ILLUM_DEVICE_INFO_PARAMS_V1, 1) +#define NV_GPU_CLIENT_ILLUM_DEVICE_INFO_PARAMS_VER NV_GPU_CLIENT_ILLUM_DEVICE_INFO_PARAMS_VER_1 +typedef NV_GPU_CLIENT_ILLUM_DEVICE_INFO_PARAMS_V1 NV_GPU_CLIENT_ILLUM_DEVICE_INFO_PARAMS; + +/////////////////////////////////////////////////////////////////////////////// +//! +//! FUNCTION NAME: NvAPI_GPU_ClientIllumDevicesGetInfo +//! +//! DESCRIPTION: This API returns static information about illumination devices on the +//! given GPU. +// +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 400 +//! \param [in] hPhysicalGpu The physical GPU handle +//! \param [out] pIllumDevicesInfo Pointer to structure containing static +//! information about illumination devices. +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, +//! they are listed below. +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_ClientIllumDevicesGetInfo(__in NvPhysicalGpuHandle hPhysicalGpu, __inout NV_GPU_CLIENT_ILLUM_DEVICE_INFO_PARAMS *pIllumDevicesInfo); + + +/*! + * Structure representing the data required for synchronization. + */ +typedef struct +{ + /*! + * Boolean representing the need for synchronization. + */ + NvBool bSync; + + /*! + * Time stamp value required for synchronization. + */ + NvU64 timeStampms; + + /*! + * Reserved for future. + */ + NvU8 rsvd[64]; +} NV_GPU_CLIENT_ILLUM_DEVICE_SYNC_V1; + +/*! + * Structure representing the device control parameters of each ILLUM_DEVICE. + */ +typedef struct +{ + /*! + * Type of the illum device. + */ + NV_GPU_CLIENT_ILLUM_DEVICE_TYPE type; + + /*! + * Structure containing the synchronization data for the illumination device. + */ + NV_GPU_CLIENT_ILLUM_DEVICE_SYNC_V1 syncData; + + /*! + * Reserved for future. + */ + NvU8 rsvd[64]; +} NV_GPU_CLIENT_ILLUM_DEVICE_CONTROL_V1; + +typedef NV_GPU_CLIENT_ILLUM_DEVICE_CONTROL_V1 NV_GPU_CLIENT_ILLUM_DEVICE_CONTROL; + +/*! + * Structure representing the control parameters of ILLUM_DEVICE-s. + */ +typedef struct +{ + /*! + * Version of structure. Must always be first member. + */ + NvU32 version; + + /*! + * Number of illumination devices present. + */ + NvU32 numIllumDevices; + + /*! + * Reserved bytes for possible future extension of this struct. + */ + NvU8 rsvd[64]; + + /*! + */ + NV_GPU_CLIENT_ILLUM_DEVICE_CONTROL_V1 devices[NV_GPU_CLIENT_ILLUM_DEVICE_NUM_DEVICES_MAX]; +} NV_GPU_CLIENT_ILLUM_DEVICE_CONTROL_PARAMS_V1; + +#define NV_GPU_CLIENT_ILLUM_DEVICE_CONTROL_PARAMS_VER_1 MAKE_NVAPI_VERSION(NV_GPU_CLIENT_ILLUM_DEVICE_CONTROL_PARAMS_V1, 1) +#define NV_GPU_CLIENT_ILLUM_DEVICE_CONTROL_PARAMS_VER NV_GPU_CLIENT_ILLUM_DEVICE_CONTROL_PARAMS_VER_1 +typedef NV_GPU_CLIENT_ILLUM_DEVICE_CONTROL_PARAMS_V1 NV_GPU_CLIENT_ILLUM_DEVICE_CONTROL_PARAMS; + +/////////////////////////////////////////////////////////////////////////////// +//! +//! FUNCTION NAME: NvAPI_GPU_ClientIllumDevicesGetControl +//! +//! DESCRIPTION: This API gets control parameters about illumination devices on the +//! given GPU. +// +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 400 +//! \param [in] hPhysicalGpu The physical GPU handle +//! \param [inout] pIllumDevicesControl Pointer to structure containing control +//! information about illum devices. +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, +//! they are listed below. +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_ClientIllumDevicesGetControl(__in NvPhysicalGpuHandle hPhysicalGpu, __inout NV_GPU_CLIENT_ILLUM_DEVICE_CONTROL_PARAMS *pClientIllumDevicesControl); + +/////////////////////////////////////////////////////////////////////////////// +//! +//! FUNCTION NAME: NvAPI_GPU_ClientIllumDevicesSetControl +//! +//! DESCRIPTION: This API sets control parameters about illumination devices on the +//! given GPU. +// +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 400 +//! \param [in] hPhysicalGpu The physical GPU handle +//! \param [inout] pClientIllumDevicesControl Pointer to structure containing control +//! information about illum devices. +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, +//! they are listed below. +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_ClientIllumDevicesSetControl(__in NvPhysicalGpuHandle hPhysicalGpu, __inout NV_GPU_CLIENT_ILLUM_DEVICE_CONTROL_PARAMS *pClientIllumDevicesControl); + + +#define NV_GPU_CLIENT_ILLUM_ZONE_NUM_ZONES_MAX 32 + +typedef struct _NV_GPU_CLIENT_ILLUM_ZONE_INFO_DATA_RGB +{ + NvU8 rsvd; +} NV_GPU_CLIENT_ILLUM_ZONE_INFO_DATA_RGB; + +/*! + * Used in \ref NV_GPU_CLIENT_ILLUM_ZONE_INFO_V1 + * Describes the static information of illum zone type RGBW. + */ +typedef struct _NV_GPU_CLIENT_ILLUM_ZONE_INFO_DATA_RGBW +{ + NvU8 rsvd; +} NV_GPU_CLIENT_ILLUM_ZONE_INFO_DATA_RGBW; + +/*! + * Used in \ref NV_GPU_CLIENT_ILLUM_ZONE_INFO_V1 + * Describes the static information of illum zone type SINGLE_COLOR. + */ +typedef struct _NV_GPU_CLIENT_ILLUM_ZONE_INFO_DATA_SINGLE_COLOR +{ + NvU8 rsvd; +} NV_GPU_CLIENT_ILLUM_ZONE_INFO_DATA_SINGLE_COLOR; + +typedef struct _NV_GPU_CLIENT_ILLUM_ZONE_INFO_V1 +{ + NV_GPU_CLIENT_ILLUM_ZONE_TYPE type; + + /*! + * Index pointing to an Illumination Device that controls this zone. + */ + NvU8 illumDeviceIdx; + + /*! + * Provider index for representing logical to physical zone mapping. + */ + NvU8 provIdx; + + /*! + * Location of the zone on the board. + */ + NV_GPU_CLIENT_ILLUM_ZONE_LOCATION zoneLocation; + + union + { + // + // Need to be careful when add/expanding types in this union. If any type + // exceeds sizeof(rsvd) then rsvd has failed its purpose. + // + NV_GPU_CLIENT_ILLUM_ZONE_INFO_DATA_RGB rgb; + NV_GPU_CLIENT_ILLUM_ZONE_INFO_DATA_RGBW rgbw; + NV_GPU_CLIENT_ILLUM_ZONE_INFO_DATA_SINGLE_COLOR singleColor; + + /*! + * Reserved bytes for possible future extension of this struct. + */ + NvU8 rsvd[64]; + } data; + + NvU8 rsvd[64]; +} NV_GPU_CLIENT_ILLUM_ZONE_INFO_V1; + +typedef struct _NV_GPU_CLIENT_ILLUM_ZONE_INFO_PARAMS_V1 +{ + /*! + * Version of structure. Must always be first member. + */ + NvU32 version; + + /*! + * Number of illumination zones present. + */ + NvU32 numIllumZones; + + /*! + * Reserved bytes for possible future extension of this struct. + */ + NvU8 rsvd[64]; + NV_GPU_CLIENT_ILLUM_ZONE_INFO_V1 zones[NV_GPU_CLIENT_ILLUM_ZONE_NUM_ZONES_MAX]; +} NV_GPU_CLIENT_ILLUM_ZONE_INFO_PARAMS_V1; + +#define NV_GPU_CLIENT_ILLUM_ZONE_INFO_PARAMS_VER_1 MAKE_NVAPI_VERSION(NV_GPU_CLIENT_ILLUM_ZONE_INFO_PARAMS_V1, 1) +#define NV_GPU_CLIENT_ILLUM_ZONE_INFO_PARAMS_VER NV_GPU_CLIENT_ILLUM_ZONE_INFO_PARAMS_VER_1 +typedef NV_GPU_CLIENT_ILLUM_ZONE_INFO_PARAMS_V1 NV_GPU_CLIENT_ILLUM_ZONE_INFO_PARAMS; + +/////////////////////////////////////////////////////////////////////////////// +//! +//! FUNCTION NAME: NvAPI_GPU_ClientIllumZonesGetInfo +//! +//! DESCRIPTION: This API returns static information about illumination zones on the +//! given GPU. +// +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 400 +//! \param [in] hPhysicalGpu The physical GPU handle +//! \param [out] pIllumZonesInfo Pointer to structure containing static +//! information about illumination devices. +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, +//! they are listed below. +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_ClientIllumZonesGetInfo(__in NvPhysicalGpuHandle hPhysicalGpu, __inout NV_GPU_CLIENT_ILLUM_ZONE_INFO_PARAMS *pIllumZonesInfo); + + +/*! + * Used in \ref NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_MANUAL_RGB + * Parameters required to represent control mode of type + * \ref NV_GPU_CLIENT_ILLUM_CTRL_MODE_MANUAL_RGB. + */ +typedef struct _NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_MANUAL_RGB_PARAMS +{ + /*! + * Red compenent of color applied to the zone. + */ + NvU8 colorR; + + /*! + * Green compenent of color applied to the zone. + */ + NvU8 colorG; + + /*! + * Blue compenent of color applied to the zone. + */ + NvU8 colorB; + + /*! + * Brightness perecentage value of the zone. + */ + NvU8 brightnessPct; +} NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_MANUAL_RGB_PARAMS; + +/*! + * Used in \ref NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_RGB + * Data required to represent control mode of type + * \ref NV_GPU_CLIENT_ILLUM_CTRL_MODE_MANUAL_RGB. + */ +typedef struct _NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_MANUAL_RGB +{ + /*! + * Parameters required to represent control mode of type + * \ref NV_GPU_CLIENT_ILLUM_CTRL_MODE_MANUAL_RGB. + */ + NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_MANUAL_RGB_PARAMS rgbParams; +} NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_MANUAL_RGB; + +/*! + * Used in \ref NV_GPU_ILLUM_ZONE_CONTROL_DATA_PIECEWISE_LINEAR_RGB + * Data required to represent control mode of type + * \ref NV_GPU_ILLUM_CTRL_MODE_PIECEWISE_LINEAR_RGB. + */ +typedef struct _NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_PIECEWISE_LINEAR +{ + /*! + * Type of cycle effect to apply. + */ + NV_GPU_CLIENT_ILLUM_PIECEWISE_LINEAR_CYCLE_TYPE cycleType; + + /*! + * Number of times to repeat function within group period. + */ + NvU8 grpCount; + + /*! + * Time in ms to transition from color A to color B. + */ + NvU16 riseTimems; + + /*! + * Time in ms to transition from color B to color A. + */ + NvU16 fallTimems; + + /*! + * Time in ms to remain at color A before color A to color B transition. + */ + NvU16 ATimems; + + /*! + * Time in ms to remain at color B before color B to color A transition. + */ + NvU16 BTimems; + + /*! + * Time in ms to remain idle before next group of repeated function cycles. + */ + NvU16 grpIdleTimems; + + /*! + * Time in ms to offset the cycle relative to other zones. + */ + NvU16 phaseOffsetms; +} NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_PIECEWISE_LINEAR; + +/*! + * Used in \ref NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_RGB + * Data required to represent control mode of type + * \ref NV_GPU_CLIENT_ILLUM_CTRL_MODE_PIECEWISE_LINEAR_RGB. + */ +typedef struct _NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_PIECEWISE_LINEAR_RGB +{ + /*! + * Parameters required to represent control mode of type + * \ref NV_GPU_CLIENT_ILLUM_CTRL_MODE_PIECEWISE_LINEAR_RGB. + */ + NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_MANUAL_RGB_PARAMS rgbParams[NV_GPU_CLIENT_ILLUM_CTRL_MODE_PIECEWISE_LINEAR_COLOR_ENDPOINTS]; + + NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_PIECEWISE_LINEAR piecewiseLinearData; +} NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_PIECEWISE_LINEAR_RGB; + +/*! + * Used in \ref NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_V1 + * Describes the control data for illumination zone of type + * \ref NV_GPU_CLIENT_ILLUM_ZONE_TYPE_RGB. + */ +typedef struct _NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_RGB +{ + /*! + * Union of illumination zone control data for zone of type NV_GPU_CLIENT_ILLUM_ZONE_TYPE_RGB. + * Interpreted as per ctrlMode. + */ + union + { + // + // Need to be careful when add/expanding types in this union. If any type + // exceeds sizeof(rsvd) then rsvd has failed its purpose. + // + NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_MANUAL_RGB manualRGB; + NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_PIECEWISE_LINEAR_RGB piecewiseLinearRGB; + + /*! + * Reserved bytes for possible future extension of this struct. + */ + NvU8 rsvd[64]; + } data; + + /*! + * Reserved for future. + */ + NvU8 rsvd[64]; +} NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_RGB; + +/*! + * Used in \ref NV_GPU_ILLUM_ZONE_CONTROL_DATA_MANUAL_COLOR_FIXED + * Parameters required to represent control mode of type + * \ref NV_GPU_ILLUM_CTRL_MODE_MANUAL_RGB. + */ +typedef struct _NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_MANUAL_COLOR_FIXED_PARAMS +{ + /*! + * Brightness percentage value of the zone. + */ + NvU8 brightnessPct; +} NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_MANUAL_COLOR_FIXED_PARAMS; + +/*! + * Used in \ref NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_COLOR_FIXED + * Data required to represent control mode of type + * \ref NV_GPU_CLIENT_ILLUM_CTRL_MODE_MANUAL_RGB. + */ +typedef struct _NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_MANUAL_COLOR_FIXED +{ + /*! + * Parameters required to represent control mode of type + * \ref NV_GPU_CLIENT_ILLUM_CTRL_MODE_MANUAL_RGB. + */ + NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_MANUAL_COLOR_FIXED_PARAMS colorFixedParams; +} NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_MANUAL_COLOR_FIXED; + +/*! + * Used in \ref NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_COLOR_FIXED + * Data required to represent control mode of type + * \ref NV_GPU_CLIENT_ILLUM_CTRL_MODE_PIECEWISE_LINEAR_RGB. + */ +typedef struct _NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_PIECEWISE_LINEAR_COLOR_FIXED +{ + /*! + * Parameters required to represent control mode of type + * \ref NV_GPU_CLIENT_ILLUM_CTRL_MODE_PIECEWISE_LINEAR_RGB. + */ + NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_MANUAL_COLOR_FIXED_PARAMS colorFixedParams[NV_GPU_CLIENT_ILLUM_CTRL_MODE_PIECEWISE_LINEAR_COLOR_ENDPOINTS]; + + NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_PIECEWISE_LINEAR piecewiseLinearData; +} NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_PIECEWISE_LINEAR_COLOR_FIXED; + +/*! + * Used in \ref NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_V1 + * Describes the control data for illum zone of type + * \ref NV_GPU_CLIENT_ILLUM_ZONE_TYPE_COLOR_FIXED. + */ +typedef struct _NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_COLOR_FIXED +{ + /*! + * Union of illum zone control data for zone of type NV_GPU_CLIENT_ILLUM_ZONE_TYPE_COLOR_FIXED. + * Interpreted as per ctrlMode. + */ + union + { + // + // Need to be careful when add/expanding types in this union. If any type + // exceeds sizeof(rsvd) then rsvd has failed its purpose. + // + NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_MANUAL_COLOR_FIXED manualColorFixed; + NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_PIECEWISE_LINEAR_COLOR_FIXED piecewiseLinearColorFixed; + /*! + * Reserved bytes for possible future extension of this struct. + */ + NvU8 rsvd[64]; + } data; + + /*! + * Reserved for future. + */ + NvU8 rsvd[64]; +} NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_COLOR_FIXED; + +/*! + * Used in \ref NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_MANUAL_RGBW + * Parameters required to represent control mode of type + * \ref NV_GPU_CLIENT_ILLUM_CTRL_MODE_MANUAL_RGBW. + */ +typedef struct _NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_MANUAL_RGBW_PARAMS +{ + /*! + * Red component of color applied to the zone. + */ + NvU8 colorR; + + /*! + * Green component of color applied to the zone. + */ + NvU8 colorG; + + /*! + * Blue component of color applied to the zone. + */ + NvU8 colorB; + + /*! + * White component of color applied to the zone. + */ + NvU8 colorW; + + /*! + * Brightness percentage value of the zone. + */ + NvU8 brightnessPct; +} NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_MANUAL_RGBW_PARAMS; + +/*! + * Used in \ref NV_GPU_ILLUM_ZONE_CONTROL_DATA_RGBW + * Data required to represent control mode of type + * \ref NV_GPU_ILLUM_CTRL_MODE_MANUAL_RGBW. + */ +typedef struct _NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_MANUAL_RGBW +{ + /*! + * Parameters required to represent control mode of type + * \ref NV_GPU_ILLUM_CTRL_MODE_MANUAL_RGBW. + */ + NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_MANUAL_RGBW_PARAMS rgbwParams; +} NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_MANUAL_RGBW; + +/*! + * Used in \ref NV_GPU_ILLUM_ZONE_CONTROL_DATA_RGBW + * Data required to represent control mode of type + * \ref NV_GPU_ILLUM_CTRL_MODE_PIECEWISE_LINEAR_RGBW. + */ +typedef struct _NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_PIECEWISE_LINEAR_RGBW +{ + /*! + * Parameters required to represent control mode of type + * \ref NV_GPU_ILLUM_CTRL_MODE_PIECEWISE_LINEAR_RGBW. + */ + NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_MANUAL_RGBW_PARAMS rgbwParams[NV_GPU_CLIENT_ILLUM_CTRL_MODE_PIECEWISE_LINEAR_COLOR_ENDPOINTS]; + + NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_PIECEWISE_LINEAR piecewiseLinearData; +} NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_PIECEWISE_LINEAR_RGBW; + +/*! + * Used in \ref NV_GPU_ILLUM_ZONE_CONTROL_V1 + * Describes the control data for illum zone of type + * \ref NV_GPU_ILLUM_ZONE_TYPE_RGBW. + */ +typedef struct _NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_RGBW +{ + /*! + * Union of illum zone control data for zone of type NV_GPU_ILLUM_ZONE_TYPE_RGBW. + * Interpreted as per ctrlMode. + */ + union + { + // + // Need to be careful when add/expanding types in this union. If any type + // exceeds sizeof(rsvd) then rsvd has failed its purpose. + // + NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_MANUAL_RGBW manualRGBW; + NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_PIECEWISE_LINEAR_RGBW piecewiseLinearRGBW; + /*! + * Reserved bytes for possible future extension of this struct. + */ + NvU8 rsvd[64]; + } data; + + /*! + * Reserved for future. + */ + NvU8 rsvd[64]; +} NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_RGBW; + +/*! + * Used in \ref NV_GPU_ILLUM_ZONE_CONTROL_DATA_MANUAL_SINGLE_COLOR + * Parameters required to represent control mode of type + * \ref NV_GPU_ILLUM_CTRL_MODE_MANUAL_SINGLE_COLOR. + */ +typedef struct _NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_MANUAL_SINGLE_COLOR_PARAMS +{ + /*! + * Brightness percentage value of the zone. + */ + NvU8 brightnessPct; +} NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_MANUAL_SINGLE_COLOR_PARAMS; + +/*! + * Used in \ref NV_GPU_ILLUM_ZONE_CONTROL_DATA_SINGLE_COLOR + * Data required to represent control mode of type + * \ref NV_GPU_ILLUM_CTRL_MODE_MANUAL_SINGLE_COLOR. + */ +typedef struct _NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_MANUAL_SINGLE_COLOR +{ + /*! + * Parameters required to represent control mode of type + * \ref NV_GPU_ILLUM_CTRL_MODE_MANUAL_SINGLE_COLOR. + */ + NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_MANUAL_SINGLE_COLOR_PARAMS singleColorParams; +} NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_MANUAL_SINGLE_COLOR; + +/*! + * Used in \ref NV_GPU_ILLUM_ZONE_CONTROL_DATA_SINGLE_COLOR + * Data required to represent control mode of type + * \ref NV_GPU_ILLUM_CTRL_MODE_PIECEWISE_LINEAR_SINGLE_COLOR. + */ +typedef struct _NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_PIECEWISE_LINEAR_SINGLE_COLOR +{ + /*! + * Parameters required to represent control mode of type + * \ref NV_GPU_ILLUM_CTRL_MODE_PIECEWISE_LINEAR_SINGLE_COLOR. + */ + NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_MANUAL_SINGLE_COLOR_PARAMS singleColorParams[NV_GPU_CLIENT_ILLUM_CTRL_MODE_PIECEWISE_LINEAR_COLOR_ENDPOINTS]; + + NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_PIECEWISE_LINEAR piecewiseLinearData; +} NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_PIECEWISE_LINEAR_SINGLE_COLOR; + +/*! + * Used in \ref NV_GPU_ILLUM_ZONE_CONTROL_V1 + * Describes the control data for illum zone of type + * \ref NV_GPU_ILLUM_ZONE_TYPE_SINGLE_COLOR. + */ +typedef struct _NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_SINGLE_COLOR +{ + /*! + * Union of illum zone control data for zone of type NV_GPU_ILLUM_ZONE_TYPE_SINGLE_COLOR. + * Interpreted as per ctrlMode. + */ + union + { + // + // Need to be careful when add/expanding types in this union. If any type + // exceeds sizeof(rsvd) then rsvd has failed its purpose. + // + NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_MANUAL_SINGLE_COLOR manualSingleColor; + NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_PIECEWISE_LINEAR_SINGLE_COLOR piecewiseLinearSingleColor; + /*! + * Reserved bytes for possible future extension of this struct. + */ + NvU8 rsvd[64]; + } data; + + /*! + * Reserved for future. + */ + NvU8 rsvd[64]; +} NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_SINGLE_COLOR; + +typedef struct _NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_V1 +{ + NV_GPU_CLIENT_ILLUM_ZONE_TYPE type; + NV_GPU_CLIENT_ILLUM_CTRL_MODE ctrlMode; + union + { + NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_RGB rgb; + NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_COLOR_FIXED colorFixed; + NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_RGBW rgbw; + NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_DATA_SINGLE_COLOR singleColor; + NvU8 rsvd[64]; + } data; + NvU8 rsvd[64]; +} NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_V1; + +typedef struct _NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_PARAMS_V1 +{ + NvU32 version; + + /*! + * Bit field specifying the set of values to retrieve or set + * - default (NV_TRUE) + * - currently active (NV_FALSE). + */ + NvU32 bDefault : 1; + NvU32 rsvdField : 31; + + /*! + * Number of illumination zones present. + */ + NvU32 numIllumZonesControl; + + /*! + * Reserved bytes for possible future extension of this struct. + */ + NvU8 rsvd[64]; + + NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_V1 zones[NV_GPU_CLIENT_ILLUM_ZONE_NUM_ZONES_MAX]; +} NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_PARAMS_V1; + +#define NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_PARAMS_VER_1 MAKE_NVAPI_VERSION(NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_PARAMS_V1, 1) +#define NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_PARAMS_VER NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_PARAMS_VER_1 +typedef NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_PARAMS_V1 NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_PARAMS; + +/////////////////////////////////////////////////////////////////////////////// +//! +//! FUNCTION NAME: NvAPI_GPU_ClientIllumZonesGetControl +//! +//! DESCRIPTION: Accessor for control information about illumination zones on the +//! given GPU. +// +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 400 +//! \param [in] hPhysicalGpu The physical GPU handle +//! \param [out] pIllumZonesControl Pointer to structure containing control +//! information about illumination zones. +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, +//! they are listed below. +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_ClientIllumZonesGetControl(__in NvPhysicalGpuHandle hPhysicalGpu, __inout NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_PARAMS *pIllumZonesControl); + +/////////////////////////////////////////////////////////////////////////////// +//! +//! FUNCTION NAME: NvAPI_GPU_ClientIllumZonesSetControl +//! +//! DESCRIPTION: Mutator for control information about illumination zones on the +//! given GPU. +// +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 400 +//! \param [in] hPhysicalGpu The physical GPU handle +//! \param [out] pIllumZonesControl Pointer to structure containing control +//! information about illumination zones. +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, +//! they are listed below. +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_ClientIllumZonesSetControl(__in NvPhysicalGpuHandle hPhysicalGpu, __inout NV_GPU_CLIENT_ILLUM_ZONE_CONTROL_PARAMS *pIllumZonesControl); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Event_RegisterCallback +// +//! DESCRIPTION: This API registers the process for events. This API should be called for each eventcallback. +//! The handle returned to the client will be common across all eventCallbacks. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] eventCallback Pointer to NV_EVENT_REGISTER_CALLBACK structure to call +//! on new events +//! \param [out] phClient Handle to client for use with +//! unregister function +//! +//! \retval ::NVAPI_OK - completed request +//! \retval ::NVAPI_API_NOT_INTIALIZED - NvAPI not initialized +//! \retval ::NVAPI_INVALID_ARGUMENT - Invalid argument +//! \retval ::NVAPI_ERROR - miscellaneous error occurred +//! +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Event_RegisterCallback(PNV_EVENT_REGISTER_CALLBACK eventCallback, + NvEventHandle* phClient); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Event_UnregisterCallback +// +//! DESCRIPTION: This API unregister an event handle. +//! This API should be called only once per process(irrespective of the number of callbacks registered). +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] hClient Handle associated with this listeners +//! event queue. Same as returned from +//! NvAPI_Event_RegisterCallback(). +//! +//! \retval ::NVAPI_OK - completed request +//! \retval ::NVAPI_API_NOT_INTIALIZED - NvAPI not initialized +//! \retval ::NVAPI_INVALID_ARGUMENT - Invalid argument +//! \retval ::NVAPI_ERROR - miscellaneous error occurred +//! +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Event_UnregisterCallback(NvEventHandle hClient); +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_EnumNvidiaDisplayHandle +// +//! This function returns the handle of the NVIDIA display specified by the enum +//! index (thisEnum). The client should keep enumerating until it +//! returns error. +//! +//! Note: Display handles can get invalidated on a modeset, so the calling applications need to +//! renum the handles after every modeset. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 80 +//! +//! \param [in] thisEnum The index of the NVIDIA display. +//! \param [out] pNvDispHandle Pointer to the NVIDIA display handle. +//! +//! \retval NVAPI_INVALID_ARGUMENT Either the handle pointer is NULL or enum index too big +//! \retval NVAPI_OK Return a valid NvDisplayHandle based on the enum index +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA device found in the system +//! \retval NVAPI_END_ENUMERATION No more display device to enumerate +//! \ingroup disphandle +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_EnumNvidiaDisplayHandle(NvU32 thisEnum, NvDisplayHandle *pNvDispHandle); + + + + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_EnumNvidiaUnAttachedDisplayHandle +// +//! This function returns the handle of the NVIDIA unattached display specified by the enum +//! index (thisEnum). The client should keep enumerating until it +//! returns error. +//! Note: Display handles can get invalidated on a modeset, so the calling applications need to +//! renum the handles after every modeset. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 80 +//! +//! \param [in] thisEnum The index of the NVIDIA display. +//! \param [out] pNvUnAttachedDispHandle Pointer to the NVIDIA display handle of the unattached display. +//! +//! \retval NVAPI_INVALID_ARGUMENT Either the handle pointer is NULL or enum index too big +//! \retval NVAPI_OK Return a valid NvDisplayHandle based on the enum index +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA device found in the system +//! \retval NVAPI_END_ENUMERATION No more display device to enumerate. +//! \ingroup disphandle +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_EnumNvidiaUnAttachedDisplayHandle(NvU32 thisEnum, NvUnAttachedDisplayHandle *pNvUnAttachedDispHandle); + + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_CreateDisplayFromUnAttachedDisplay +// +//! This function converts the unattached display handle to an active attached display handle. +//! +//! At least one GPU must be present in the system and running an NVIDIA display driver. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 80 +//! +//! \retval NVAPI_INVALID_ARGUMENT hNvUnAttachedDisp is not valid or pNvDisplay is NULL. +//! \retval NVAPI_OK One or more handles were returned +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found +//! \ingroup dispcontrol +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_CreateDisplayFromUnAttachedDisplay(NvUnAttachedDisplayHandle hNvUnAttachedDisp, NvDisplayHandle *pNvDisplay); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GetAssociatedNVidiaDisplayHandle +// +//! This function returns the handle of the NVIDIA display that is associated +//! with the given display "name" (such as "\\.\DISPLAY1"). +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 80 +//! +//! \retval NVAPI_INVALID_ARGUMENT Either argument is NULL +//! \retval NVAPI_OK *pNvDispHandle is now valid +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA device maps to that display name +//! \ingroup disphandle +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GetAssociatedNvidiaDisplayHandle(const char *szDisplayName, NvDisplayHandle *pNvDispHandle); + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DISP_GetAssociatedUnAttachedNvidiaDisplayHandle +// +//! DESCRIPTION: This function returns the handle of an unattached NVIDIA display that is +//! associated with the given display name (such as "\\DISPLAY1"). +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 185 +//! +//! \retval ::NVAPI_INVALID_ARGUMENT Either argument is NULL. +//! \retval ::NVAPI_OK *pNvUnAttachedDispHandle is now valid. +//! \retval ::NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA device maps to that display name. +//! +//! \ingroup disphandle +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DISP_GetAssociatedUnAttachedNvidiaDisplayHandle(const char *szDisplayName, NvUnAttachedDisplayHandle *pNvUnAttachedDispHandle); + + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GetAssociatedNVidiaDisplayName +// +//! For a given NVIDIA display handle, this function returns a string (such as "\\.\DISPLAY1") to identify the display. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 80 +//! +//! \retval NVAPI_INVALID_ARGUMENT Either argument is NULL +//! \retval NVAPI_OK *pNvDispHandle is now valid +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA device maps to that display name +//! \ingroup dispcontrol +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GetAssociatedNvidiaDisplayName(NvDisplayHandle NvDispHandle, NvAPI_ShortString szDisplayName); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GetUnAttachedAssociatedDisplayName +// +//! This function returns the display name given, for example, "\\DISPLAY1", using the unattached NVIDIA display handle +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 95 +//! +//! \retval NVAPI_INVALID_ARGUMENT Either argument is NULL +//! \retval NVAPI_OK *pNvDispHandle is now valid +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA device maps to that display name +//! \ingroup dispcontrol +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GetUnAttachedAssociatedDisplayName(NvUnAttachedDisplayHandle hNvUnAttachedDisp, NvAPI_ShortString szDisplayName); + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_EnableHWCursor +// +//! This function enables hardware cursor support +//! +//! SUPPORTED OS: Do not use this function. It is not supported on Windows 10 and higher OS versions. +//! +//! +//! +//! \since Release: 80 +//! +//! \return NVAPI_ERROR or NVAPI_OK +//! \ingroup dispcontrol +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_EnableHWCursor(NvDisplayHandle hNvDisplay); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DisableHWCursor +// +//! This function disables hardware cursor support +//! +//! SUPPORTED OS: Do not use this function. It is not supported on Windows 10 and higher OS versions. +//! +//! +//! \since Release: 80 +//! +//! \return NVAPI_ERROR or NVAPI_OK +//! \ingroup dispcontrol +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DisableHWCursor(NvDisplayHandle hNvDisplay); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GetVBlankCounter +// +//! This function gets the V-blank counter +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 80 +//! +//! \return NVAPI_ERROR or NVAPI_OK +//! \ingroup dispcontrol +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GetVBlankCounter(NvDisplayHandle hNvDisplay, NvU32 *pCounter); + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_SetRefreshRateOverride +// +//! This function overrides the refresh rate on the given display/outputsMask. +//! The new refresh rate can be applied right away in this API call or deferred to be applied with the +//! next OS modeset. The override is good for only one modeset (regardless whether it's deferred or immediate). +//! +//! +//! SUPPORTED OS: Do not use this function. It is not supported on Windows 10 and higher OS versions. +//! +//! +//! \since Release: 80 +//! +//! \param [in] hNvDisplay The NVIDIA display handle. It can be NVAPI_DEFAULT_HANDLE or a handle +//! enumerated from NvAPI_EnumNVidiaDisplayHandle(). +//! \param [in] outputsMask A set of bits that identify all target outputs which are associated with the NVIDIA +//! display handle to apply the refresh rate override. When SLI is enabled, the +//! outputsMask only applies to the GPU that is driving the display output. +//! \param [in] refreshRate The override value. "0.0" means cancel the override. +//! \param [in] bSetDeferred +//! - "0": Apply the refresh rate override immediately in this API call.\p +//! - "1": Apply refresh rate at the next OS modeset. +//! +//! \retval NVAPI_INVALID_ARGUMENT hNvDisplay or outputsMask is invalid +//! \retval NVAPI_OK The refresh rate override is correct set +//! \retval NVAPI_ERROR The operation failed +//! \ingroup dispcontrol +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_SetRefreshRateOverride(NvDisplayHandle hNvDisplay, NvU32 outputsMask, float refreshRate, NvU32 bSetDeferred); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GetAssociatedDisplayOutputId +// +//! This function gets the active outputId associated with the display handle. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 90 +//! +//! \param [in] hNvDisplay NVIDIA Display selection. It can be NVAPI_DEFAULT_HANDLE or a handle enumerated from NvAPI_EnumNVidiaDisplayHandle(). +//! \param [out] outputId The active display output ID associated with the selected display handle hNvDisplay. +//! The outputid will have only one bit set. In the case of Clone or Span mode, this will indicate the +//! display outputId of the primary display that the GPU is driving. See \ref handles. +//! +//! \retval NVAPI_OK Call successful. +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found. +//! \retval NVAPI_EXPECTED_DISPLAY_HANDLE hNvDisplay is not a valid display handle. +//! \ingroup dispcontrol +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GetAssociatedDisplayOutputId(NvDisplayHandle hNvDisplay, NvU32 *pOutputId); + + +//! \ingroup dispcontrol +//! Used in NvAPI_GetDisplayPortInfo(). +typedef struct _NV_DISPLAY_PORT_INFO_V1 +{ + NvU32 version; //!< Structure version + NvU32 dpcd_ver; //!< DPCD version of the monitor + NV_DP_LINK_RATE maxLinkRate; //!< Maximum supported link rate + NV_DP_LANE_COUNT maxLaneCount; //!< Maximum supported lane count + NV_DP_LINK_RATE curLinkRate; //!< Current link rate + NV_DP_LANE_COUNT curLaneCount; //!< Current lane count + NV_DP_COLOR_FORMAT colorFormat; //!< Current color format + NV_DP_DYNAMIC_RANGE dynamicRange; //!< Dynamic range + NV_DP_COLORIMETRY colorimetry; //!< Ignored in RGB space + NV_DP_BPC bpc; //!< Current bit-per-component + NvU32 isDp : 1; //!< If the monitor is driven by a DisplayPort + NvU32 isInternalDp : 1; //!< If the monitor is driven by an NV Dp transmitter + NvU32 isColorCtrlSupported : 1; //!< If the color format change is supported + NvU32 is6BPCSupported : 1; //!< If 6 bpc is supported + NvU32 is8BPCSupported : 1; //!< If 8 bpc is supported + NvU32 is10BPCSupported : 1; //!< If 10 bpc is supported + NvU32 is12BPCSupported : 1; //!< If 12 bpc is supported + NvU32 is16BPCSupported : 1; //!< If 16 bpc is supported + NvU32 isYCrCb420Supported : 1; //!< If YCrCb420 is supported + NvU32 isYCrCb422Supported : 1; //!< If YCrCb422 is supported + NvU32 isYCrCb444Supported : 1; //!< If YCrCb444 is supported + NvU32 isRgb444SupportedOnCurrentMode : 1; //!< If Rgb444 is supported on the current mode + NvU32 isYCbCr444SupportedOnCurrentMode : 1; //!< If YCbCr444 is supported on the current mode + NvU32 isYCbCr422SupportedOnCurrentMode : 1; //!< If YCbCr422 is supported on the current mode + NvU32 isYCbCr420SupportedOnCurrentMode : 1; //!< If YCbCr420 is supported on the current mode + NvU32 is6BPCSupportedOnCurrentMode : 1; // if 6 bpc is supported On Current Mode + NvU32 is8BPCSupportedOnCurrentMode : 1; // if 8 bpc is supported On Current Mode + NvU32 is10BPCSupportedOnCurrentMode : 1; // if 10 bpc is supported On Current Mode + NvU32 is12BPCSupportedOnCurrentMode : 1; // if 12 bpc is supported On Current Mode + NvU32 is16BPCSupportedOnCurrentMode : 1; // if 16 bpc is supported On Current Mode + NvU32 isMonxvYCC601Capable : 1; // if xvYCC 601 extended colorimetry is supported + NvU32 isMonxvYCC709Capable : 1; // if xvYCC 709 extended colorimetry is supported + NvU32 isMonsYCC601Capable : 1; // if sYCC601 extended colorimetry is supported + NvU32 isMonAdobeYCC601Capable : 1; // if AdobeYCC601 extended colorimetry is supported + NvU32 isMonAdobeRGBCapable : 1; // if AdobeRGB extended colorimetry is supported + NvU32 isMonBT2020RGBCapable : 1; // if BT2020 RGB extended colorimetry is supported + NvU32 isMonBT2020YCCCapable : 1; // if BT2020 Y'CbCr extended colorimetry is supported + NvU32 isMonBT2020cYCCCapable : 1; // if BT2020 cYCbCr (constant luminance) extended colorimetry is supported + + NvU32 reserved : 4; //!< reserved + } NV_DISPLAY_PORT_INFO_V1; + + typedef NV_DISPLAY_PORT_INFO_V1 NV_DISPLAY_PORT_INFO; + +//! Macro for constructing the version field of NV_DISPLAY_PORT_INFO. +#define NV_DISPLAY_PORT_INFO_VER1 MAKE_NVAPI_VERSION(NV_DISPLAY_PORT_INFO,1) +#define NV_DISPLAY_PORT_INFO_VER2 MAKE_NVAPI_VERSION(NV_DISPLAY_PORT_INFO,2) +#define NV_DISPLAY_PORT_INFO_VER NV_DISPLAY_PORT_INFO_VER2 + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_GetDisplayPortInfo +// +//! \fn NvAPI_GetDisplayPortInfo(__in_opt NvDisplayHandle hNvDisplay, __in NvU32 outputId, __inout NV_DISPLAY_PORT_INFO *pInfo) +//! DESCRIPTION: This function returns the current DisplayPort-related information on the specified device (monitor). +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 165 +//! +//! \param [in] hvDisplay NVIDIA Display selection. It can be NVAPI_DEFAULT_HANDLE or a handle enumerated from NvAPI_EnumNVidiaDisplayHandle(). +//! This parameter is ignored when the outputId is a NvAPI displayId. +//! \param [in] outputId This can either be the connection bit mask or the NvAPI displayId. When the legacy connection bit mask is passed, +//! it should have exactly 1 bit set to indicate a single display. If it's "0" then the default outputId from +//! NvAPI_GetAssociatedDisplayOutputId() will be used. See \ref handles. +//! \param [out] pInfo The DisplayPort information +//! +//! \retval NVAPI_OK Completed request +//! \retval NVAPI_ERROR Miscellaneous error occurred +//! \retval NVAPI_INVALID_ARGUMENT Invalid input parameter. +// +/////////////////////////////////////////////////////////////////////////////// +//! \ingroup dispcontrol +NVAPI_INTERFACE NvAPI_GetDisplayPortInfo(__in_opt NvDisplayHandle hNvDisplay, __in NvU32 outputId, __inout NV_DISPLAY_PORT_INFO *pInfo); + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_SetDisplayPort +// +//! \fn NvAPI_SetDisplayPort(NvDisplayHandle hNvDisplay, NvU32 outputId, NV_DISPLAY_PORT_CONFIG *pCfg) +//! DESCRIPTION: This function sets up DisplayPort-related configurations. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 165 +//! +//! \param [in] hNvDisplay NVIDIA display handle. It can be NVAPI_DEFAULT_HANDLE or a handle enumerated from +//! NvAPI_EnumNVidiaDisplayHandle(). +//! \param [in] outputId This display output ID, when it's "0" it means the default outputId generated from the return of +//! NvAPI_GetAssociatedDisplayOutputId(). See \ref handles. +//! \param [in] pCfg The display port config structure. If pCfg is NULL, it means to use the driver's default value to setup. +//! +//! \retval NVAPI_OK Completed request +//! \retval NVAPI_ERROR Miscellaneous error occurred +//! \retval NVAPI_INVALID_ARGUMENT Invalid input parameter +/////////////////////////////////////////////////////////////////////////////// + + +//! \ingroup dispcontrol +//! DisplayPort configuration settings - used in NvAPI_SetDisplayPort(). +typedef struct +{ + NvU32 version; //!< Structure version - 2 is the latest + NV_DP_LINK_RATE linkRate; //!< Link rate + NV_DP_LANE_COUNT laneCount; //!< Lane count + NV_DP_COLOR_FORMAT colorFormat; //!< Color format to set + NV_DP_DYNAMIC_RANGE dynamicRange; //!< Dynamic range + NV_DP_COLORIMETRY colorimetry; //!< Ignored in RGB space + NV_DP_BPC bpc; //!< Bit-per-component + NvU32 isHPD : 1; //!< If the control panel is making this call due to HPD + NvU32 isSetDeferred : 1; //!< Requires an OS modeset to finalize the setup if set + NvU32 isChromaLpfOff : 1; //!< Force the chroma low_pass_filter to be off + NvU32 isDitherOff : 1; //!< Force to turn off dither + NvU32 testLinkTrain : 1; //!< If testing mode, skip validation + NvU32 testColorChange : 1; //!< If testing mode, skip validation + +} NV_DISPLAY_PORT_CONFIG; + +//! \addtogroup dispcontrol +//! @{ +//! Macro for constructing the version field of NV_DISPLAY_PORT_CONFIG +#define NV_DISPLAY_PORT_CONFIG_VER MAKE_NVAPI_VERSION(NV_DISPLAY_PORT_CONFIG,2) +//! Macro for constructing the version field of NV_DISPLAY_PORT_CONFIG +#define NV_DISPLAY_PORT_CONFIG_VER_1 MAKE_NVAPI_VERSION(NV_DISPLAY_PORT_CONFIG,1) +//! Macro for constructing the version field of NV_DISPLAY_PORT_CONFIG +#define NV_DISPLAY_PORT_CONFIG_VER_2 MAKE_NVAPI_VERSION(NV_DISPLAY_PORT_CONFIG,2) +//! @} + + +//! \ingroup dispcontrol +NVAPI_INTERFACE NvAPI_SetDisplayPort(NvDisplayHandle hNvDisplay, NvU32 outputId, NV_DISPLAY_PORT_CONFIG *pCfg); + + + + +//! \ingroup dispcontrol +//! Used in NvAPI_GetHDMISupportInfo(). +typedef struct _NV_HDMI_SUPPORT_INFO_V1 +{ + NvU32 version; //!< Structure version + + NvU32 isGpuHDMICapable : 1; //!< If the GPU can handle HDMI + NvU32 isMonUnderscanCapable : 1; //!< If the monitor supports underscan + NvU32 isMonBasicAudioCapable : 1; //!< If the monitor supports basic audio + NvU32 isMonYCbCr444Capable : 1; //!< If YCbCr 4:4:4 is supported + NvU32 isMonYCbCr422Capable : 1; //!< If YCbCr 4:2:2 is supported + NvU32 isMonxvYCC601Capable : 1; //!< If xvYCC 601 is supported + NvU32 isMonxvYCC709Capable : 1; //!< If xvYCC 709 is supported + NvU32 isMonHDMI : 1; //!< If the monitor is HDMI (with IEEE's HDMI registry ID) + NvU32 reserved : 24; //!< Reserved. + + NvU32 EDID861ExtRev; //!< Revision number of the EDID 861 extension + } NV_HDMI_SUPPORT_INFO_V1; + +typedef struct _NV_HDMI_SUPPORT_INFO_V2 +{ + NvU32 version; //!< Structure version + + NvU32 isGpuHDMICapable : 1; //!< If the GPU can handle HDMI + NvU32 isMonUnderscanCapable : 1; //!< If the monitor supports underscan + NvU32 isMonBasicAudioCapable : 1; //!< If the monitor supports basic audio + NvU32 isMonYCbCr444Capable : 1; //!< If YCbCr 4:4:4 is supported + NvU32 isMonYCbCr422Capable : 1; //!< If YCbCr 4:2:2 is supported + NvU32 isMonxvYCC601Capable : 1; //!< If xvYCC extended colorimetry 601 is supported + NvU32 isMonxvYCC709Capable : 1; //!< If xvYCC extended colorimetry 709 is supported + NvU32 isMonHDMI : 1; //!< If the monitor is HDMI (with IEEE's HDMI registry ID) + NvU32 isMonsYCC601Capable : 1; //!< if sYCC601 extended colorimetry is supported + NvU32 isMonAdobeYCC601Capable : 1; //!< if AdobeYCC601 extended colorimetry is supported + NvU32 isMonAdobeRGBCapable : 1; //!< if AdobeRGB extended colorimetry is supported + NvU32 reserved : 21; //!< Reserved. + + NvU32 EDID861ExtRev; //!< Revision number of the EDID 861 extension + } NV_HDMI_SUPPORT_INFO_V2; + +#define NV_HDMI_SUPPORT_INFO_VER1 MAKE_NVAPI_VERSION(NV_HDMI_SUPPORT_INFO_V1, 1) +#define NV_HDMI_SUPPORT_INFO_VER2 MAKE_NVAPI_VERSION(NV_HDMI_SUPPORT_INFO_V2, 2) + + + +#ifndef NV_HDMI_SUPPORT_INFO_VER + +typedef NV_HDMI_SUPPORT_INFO_V2 NV_HDMI_SUPPORT_INFO; +#define NV_HDMI_SUPPORT_INFO_VER NV_HDMI_SUPPORT_INFO_VER2 + +#endif + + +//! SUPPORTED OS: Windows 10 and higher +//! +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_GetHDMISupportInfo +// +//! \fn NvAPI_GetHDMISupportInfo(__in_opt NvDisplayHandle hNvDisplay, __in NvU32 outputId, __inout NV_HDMI_SUPPORT_INFO *pInfo) +//! This API returns the current infoframe data on the specified device(monitor). +//! +//! \since Release: 95 +//! +//! \param [in] hvDisplay NVIDIA Display selection. It can be NVAPI_DEFAULT_HANDLE or a handle enumerated from NvAPI_EnumNVidiaDisplayHandle(). +//! This parameter is ignored when the outputId is a NvAPI displayId. +//! \param [in] outputId This can either be the connection bit mask or the NvAPI displayId. When the legacy connection bit mask is passed, +//! it should have exactly 1 bit set to indicate a single display. If it's "0" then the default outputId from +//! NvAPI_GetAssociatedDisplayOutputId() will be used. See \ref handles. +//! \param [out] pInfo The monitor and GPU's HDMI support info +//! +//! \retval NVAPI_OK Completed request +//! \retval NVAPI_ERROR Miscellaneous error occurred +//! \retval NVAPI_INVALID_ARGUMENT Invalid input parameter. +/////////////////////////////////////////////////////////////////////////////// + + +//! \ingroup dispcontrol +NVAPI_INTERFACE NvAPI_GetHDMISupportInfo(__in_opt NvDisplayHandle hNvDisplay, __in NvU32 outputId, __inout NV_HDMI_SUPPORT_INFO *pInfo); + + +//! \ingroup dispcontrol + +typedef enum +{ + NV_INFOFRAME_CMD_GET_DEFAULT = 0, //!< Returns the fields in the infoframe with values set by the manufacturer - NVIDIA/OEM. + NV_INFOFRAME_CMD_RESET, //!< Sets the fields in the infoframe to auto, and infoframe to the default infoframe for use in a set. + NV_INFOFRAME_CMD_GET, //!< Get the current infoframe state. + NV_INFOFRAME_CMD_SET, //!< Set the current infoframe state (flushed to the monitor), the values are one time and do not persist. + NV_INFOFRAME_CMD_GET_OVERRIDE, //!< Get the override infoframe state, non-override fields will be set to value = AUTO, overridden fields will have the current override values. + NV_INFOFRAME_CMD_SET_OVERRIDE, //!< Set the override infoframe state, non-override fields will be set to value = AUTO, other values indicate override; persist across modeset/reboot + NV_INFOFRAME_CMD_GET_PROPERTY, //!< get properties associated with infoframe (each of the infoframe type will have properties) + NV_INFOFRAME_CMD_SET_PROPERTY, //!< set properties associated with infoframe +} NV_INFOFRAME_CMD; + + +typedef enum +{ + NV_INFOFRAME_PROPERTY_MODE_AUTO = 0, //!< Driver determines whether to send infoframes. + NV_INFOFRAME_PROPERTY_MODE_ENABLE, //!< Driver always sends infoframe. + NV_INFOFRAME_PROPERTY_MODE_DISABLE, //!< Driver never sends infoframe. + NV_INFOFRAME_PROPERTY_MODE_ALLOW_OVERRIDE, //!< Driver only sends infoframe when client requests it via infoframe escape call. +} NV_INFOFRAME_PROPERTY_MODE; + + +//! Returns whether the current monitor is in blacklist or force this monitor to be in blacklist. +typedef enum +{ + NV_INFOFRAME_PROPERTY_BLACKLIST_FALSE = 0, + NV_INFOFRAME_PROPERTY_BLACKLIST_TRUE, +} NV_INFOFRAME_PROPERTY_BLACKLIST; + +typedef struct +{ + NvU32 mode : 4; + NvU32 blackList : 2; + NvU32 reserved : 10; + NvU32 version : 8; + NvU32 length : 8; +} NV_INFOFRAME_PROPERTY; + +//! Byte1 related +typedef enum +{ + NV_INFOFRAME_FIELD_VALUE_AVI_SCANINFO_NODATA = 0, + NV_INFOFRAME_FIELD_VALUE_AVI_SCANINFO_OVERSCAN, + NV_INFOFRAME_FIELD_VALUE_AVI_SCANINFO_UNDERSCAN, + NV_INFOFRAME_FIELD_VALUE_AVI_SCANINFO_FUTURE, + NV_INFOFRAME_FIELD_VALUE_AVI_SCANINFO_AUTO = 7 +} NV_INFOFRAME_FIELD_VALUE_AVI_SCANINFO; + + +typedef enum +{ + NV_INFOFRAME_FIELD_VALUE_AVI_BARDATA_NOT_PRESENT = 0, + NV_INFOFRAME_FIELD_VALUE_AVI_BARDATA_VERTICAL_PRESENT, + NV_INFOFRAME_FIELD_VALUE_AVI_BARDATA_HORIZONTAL_PRESENT, + NV_INFOFRAME_FIELD_VALUE_AVI_BARDATA_BOTH_PRESENT, + NV_INFOFRAME_FIELD_VALUE_AVI_BARDATA_AUTO = 7 +} NV_INFOFRAME_FIELD_VALUE_AVI_BARDATA; + +typedef enum +{ + NV_INFOFRAME_FIELD_VALUE_AVI_AFI_ABSENT = 0, + NV_INFOFRAME_FIELD_VALUE_AVI_AFI_PRESENT, + NV_INFOFRAME_FIELD_VALUE_AVI_AFI_AUTO = 3 +} NV_INFOFRAME_FIELD_VALUE_AVI_ACTIVEFORMATINFO; + + +typedef enum +{ + NV_INFOFRAME_FIELD_VALUE_AVI_COLORFORMAT_RGB = 0, + NV_INFOFRAME_FIELD_VALUE_AVI_COLORFORMAT_YCbCr422, + NV_INFOFRAME_FIELD_VALUE_AVI_COLORFORMAT_YCbCr444, + NV_INFOFRAME_FIELD_VALUE_AVI_COLORFORMAT_FUTURE, + NV_INFOFRAME_FIELD_VALUE_AVI_COLORFORMAT_AUTO = 7 +} NV_INFOFRAME_FIELD_VALUE_AVI_COLORFORMAT; + +typedef enum +{ + NV_INFOFRAME_FIELD_VALUE_AVI_F17_FALSE = 0, + NV_INFOFRAME_FIELD_VALUE_AVI_F17_TRUE, + NV_INFOFRAME_FIELD_VALUE_AVI_F17_AUTO = 3 +} NV_INFOFRAME_FIELD_VALUE_AVI_F17; + +//! Byte2 related +typedef enum +{ + NV_INFOFRAME_FIELD_VALUE_AVI_ASPECTRATIOACTIVEPORTION_NO_AFD = 0, + NV_INFOFRAME_FIELD_VALUE_AVI_ASPECTRATIOACTIVEPORTION_RESERVE01, + NV_INFOFRAME_FIELD_VALUE_AVI_ASPECTRATIOACTIVEPORTION_RESERVE02, + NV_INFOFRAME_FIELD_VALUE_AVI_ASPECTRATIOACTIVEPORTION_RESERVE03, + NV_INFOFRAME_FIELD_VALUE_AVI_ASPECTRATIOACTIVEPORTION_LETTERBOX_GT16x9, + NV_INFOFRAME_FIELD_VALUE_AVI_ASPECTRATIOACTIVEPORTION_RESERVE05, + NV_INFOFRAME_FIELD_VALUE_AVI_ASPECTRATIOACTIVEPORTION_RESERVE06, + NV_INFOFRAME_FIELD_VALUE_AVI_ASPECTRATIOACTIVEPORTION_RESERVE07, + NV_INFOFRAME_FIELD_VALUE_AVI_ASPECTRATIOACTIVEPORTION_EQUAL_CODEDFRAME = 8, + NV_INFOFRAME_FIELD_VALUE_AVI_ASPECTRATIOACTIVEPORTION_CENTER_4x3, + NV_INFOFRAME_FIELD_VALUE_AVI_ASPECTRATIOACTIVEPORTION_CENTER_16x9, + NV_INFOFRAME_FIELD_VALUE_AVI_ASPECTRATIOACTIVEPORTION_CENTER_14x9, + NV_INFOFRAME_FIELD_VALUE_AVI_ASPECTRATIOACTIVEPORTION_RESERVE12, + NV_INFOFRAME_FIELD_VALUE_AVI_ASPECTRATIOACTIVEPORTION_4x3_ON_14x9, + NV_INFOFRAME_FIELD_VALUE_AVI_ASPECTRATIOACTIVEPORTION_16x9_ON_14x9, + NV_INFOFRAME_FIELD_VALUE_AVI_ASPECTRATIOACTIVEPORTION_16x9_ON_4x3, + NV_INFOFRAME_FIELD_VALUE_AVI_ASPECTRATIOACTIVEPORTION_AUTO = 31, +} NV_INFOFRAME_FIELD_VALUE_AVI_ASPECTRATIOACTIVEPORTION; + + +typedef enum +{ + NV_INFOFRAME_FIELD_VALUE_AVI_ASPECTRATIOCODEDFRAME_NO_DATA = 0, + NV_INFOFRAME_FIELD_VALUE_AVI_ASPECTRATIOCODEDFRAME_4x3, + NV_INFOFRAME_FIELD_VALUE_AVI_ASPECTRATIOCODEDFRAME_16x9, + NV_INFOFRAME_FIELD_VALUE_AVI_ASPECTRATIOCODEDFRAME_FUTURE, + NV_INFOFRAME_FIELD_VALUE_AVI_ASPECTRATIOCODEDFRAME_AUTO = 7 +} NV_INFOFRAME_FIELD_VALUE_AVI_ASPECTRATIOCODEDFRAME; + +typedef enum +{ + NV_INFOFRAME_FIELD_VALUE_AVI_COLORIMETRY_NO_DATA = 0, + NV_INFOFRAME_FIELD_VALUE_AVI_COLORIMETRY_SMPTE_170M, + NV_INFOFRAME_FIELD_VALUE_AVI_COLORIMETRY_ITUR_BT709, + NV_INFOFRAME_FIELD_VALUE_AVI_COLORIMETRY_USE_EXTENDED_COLORIMETRY, + NV_INFOFRAME_FIELD_VALUE_AVI_COLORIMETRY_AUTO = 7 +} NV_INFOFRAME_FIELD_VALUE_AVI_COLORIMETRY; + +//! Byte 3 related +typedef enum +{ + NV_INFOFRAME_FIELD_VALUE_AVI_NONUNIFORMPICTURESCALING_NO_DATA = 0, + NV_INFOFRAME_FIELD_VALUE_AVI_NONUNIFORMPICTURESCALING_HORIZONTAL, + NV_INFOFRAME_FIELD_VALUE_AVI_NONUNIFORMPICTURESCALING_VERTICAL, + NV_INFOFRAME_FIELD_VALUE_AVI_NONUNIFORMPICTURESCALING_BOTH, + NV_INFOFRAME_FIELD_VALUE_AVI_NONUNIFORMPICTURESCALING_AUTO = 7 +} NV_INFOFRAME_FIELD_VALUE_AVI_NONUNIFORMPICTURESCALING; + +typedef enum +{ + NV_INFOFRAME_FIELD_VALUE_AVI_RGBQUANTIZATION_DEFAULT = 0, + NV_INFOFRAME_FIELD_VALUE_AVI_RGBQUANTIZATION_LIMITED_RANGE, + NV_INFOFRAME_FIELD_VALUE_AVI_RGBQUANTIZATION_FULL_RANGE, + NV_INFOFRAME_FIELD_VALUE_AVI_RGBQUANTIZATION_RESERVED, + NV_INFOFRAME_FIELD_VALUE_AVI_RGBQUANTIZATION_AUTO = 7 +} NV_INFOFRAME_FIELD_VALUE_AVI_RGBQUANTIZATION; + +typedef enum +{ + NV_INFOFRAME_FIELD_VALUE_AVI_EXTENDEDCOLORIMETRY_XVYCC601 = 0, + NV_INFOFRAME_FIELD_VALUE_AVI_EXTENDEDCOLORIMETRY_XVYCC709, + NV_INFOFRAME_FIELD_VALUE_AVI_EXTENDEDCOLORIMETRY_SYCC601, + NV_INFOFRAME_FIELD_VALUE_AVI_EXTENDEDCOLORIMETRY_ADOBEYCC601, + NV_INFOFRAME_FIELD_VALUE_AVI_EXTENDEDCOLORIMETRY_ADOBERGB, + NV_INFOFRAME_FIELD_VALUE_AVI_EXTENDEDCOLORIMETRY_RESERVED05, + NV_INFOFRAME_FIELD_VALUE_AVI_EXTENDEDCOLORIMETRY_RESERVED06, + NV_INFOFRAME_FIELD_VALUE_AVI_EXTENDEDCOLORIMETRY_RESERVED07, + NV_INFOFRAME_FIELD_VALUE_AVI_EXTENDEDCOLORIMETRY_AUTO = 15 +} NV_INFOFRAME_FIELD_VALUE_AVI_EXTENDEDCOLORIMETRY; + +typedef enum +{ + NV_INFOFRAME_FIELD_VALUE_AVI_ITC_VIDEO_CONTENT = 0, + NV_INFOFRAME_FIELD_VALUE_AVI_ITC_ITCONTENT, + NV_INFOFRAME_FIELD_VALUE_AVI_ITC_AUTO = 3 +} NV_INFOFRAME_FIELD_VALUE_AVI_ITC; + +//! Byte 4 related +typedef enum +{ + NV_INFOFRAME_FIELD_VALUE_AVI_PIXELREPETITION_NONE = 0, + NV_INFOFRAME_FIELD_VALUE_AVI_PIXELREPETITION_X02, + NV_INFOFRAME_FIELD_VALUE_AVI_PIXELREPETITION_X03, + NV_INFOFRAME_FIELD_VALUE_AVI_PIXELREPETITION_X04, + NV_INFOFRAME_FIELD_VALUE_AVI_PIXELREPETITION_X05, + NV_INFOFRAME_FIELD_VALUE_AVI_PIXELREPETITION_X06, + NV_INFOFRAME_FIELD_VALUE_AVI_PIXELREPETITION_X07, + NV_INFOFRAME_FIELD_VALUE_AVI_PIXELREPETITION_X08, + NV_INFOFRAME_FIELD_VALUE_AVI_PIXELREPETITION_X09, + NV_INFOFRAME_FIELD_VALUE_AVI_PIXELREPETITION_X10, + NV_INFOFRAME_FIELD_VALUE_AVI_PIXELREPETITION_RESERVED10, + NV_INFOFRAME_FIELD_VALUE_AVI_PIXELREPETITION_RESERVED11, + NV_INFOFRAME_FIELD_VALUE_AVI_PIXELREPETITION_RESERVED12, + NV_INFOFRAME_FIELD_VALUE_AVI_PIXELREPETITION_RESERVED13, + NV_INFOFRAME_FIELD_VALUE_AVI_PIXELREPETITION_RESERVED14, + NV_INFOFRAME_FIELD_VALUE_AVI_PIXELREPETITION_RESERVED15, + NV_INFOFRAME_FIELD_VALUE_AVI_PIXELREPETITION_AUTO = 31 +} NV_INFOFRAME_FIELD_VALUE_AVI_PIXELREPETITION; + + +typedef enum +{ + NV_INFOFRAME_FIELD_VALUE_AVI_CONTENTTYPE_GRAPHICS = 0, + NV_INFOFRAME_FIELD_VALUE_AVI_CONTENTTYPE_PHOTO, + NV_INFOFRAME_FIELD_VALUE_AVI_CONTENTTYPE_CINEMA, + NV_INFOFRAME_FIELD_VALUE_AVI_CONTENTTYPE_GAME, + NV_INFOFRAME_FIELD_VALUE_AVI_CONTENTTYPE_AUTO = 7 +} NV_INFOFRAME_FIELD_VALUE_AVI_CONTENTTYPE; + +typedef enum +{ + NV_INFOFRAME_FIELD_VALUE_AVI_YCCQUANTIZATION_LIMITED_RANGE = 0, + NV_INFOFRAME_FIELD_VALUE_AVI_YCCQUANTIZATION_FULL_RANGE, + NV_INFOFRAME_FIELD_VALUE_AVI_YCCQUANTIZATION_RESERVED02, + NV_INFOFRAME_FIELD_VALUE_AVI_YCCQUANTIZATION_RESERVED03, + NV_INFOFRAME_FIELD_VALUE_AVI_YCCQUANTIZATION_AUTO = 7 +} NV_INFOFRAME_FIELD_VALUE_AVI_YCCQUANTIZATION; + +//! Adding an Auto bit to each field +typedef struct +{ + NvU32 vic : 8; + NvU32 pixelRepeat : 5; + NvU32 colorSpace : 3; + NvU32 colorimetry : 3; + NvU32 extendedColorimetry : 4; + NvU32 rgbQuantizationRange : 3; + NvU32 yccQuantizationRange : 3; + NvU32 itContent : 2; + NvU32 contentTypes : 3; + NvU32 scanInfo : 3; + NvU32 activeFormatInfoPresent : 2; + NvU32 activeFormatAspectRatio : 5; + NvU32 picAspectRatio : 3; + NvU32 nonuniformScaling : 3; + NvU32 barInfo : 3; + NvU32 top_bar : 17; + NvU32 bottom_bar : 17; + NvU32 left_bar : 17; + NvU32 right_bar : 17; + NvU32 Future17 : 2; + NvU32 Future47 : 2; +} NV_INFOFRAME_VIDEO; + +//! Byte 1 related +typedef enum +{ + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELCOUNT_IN_HEADER = 0, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELCOUNT_2, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELCOUNT_3, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELCOUNT_4, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELCOUNT_5, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELCOUNT_6, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELCOUNT_7, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELCOUNT_8, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELCOUNT_AUTO = 15 +} NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELCOUNT; + +typedef enum +{ + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGTYPE_IN_HEADER = 0, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGTYPE_PCM, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGTYPE_AC3, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGTYPE_MPEG1, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGTYPE_MP3, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGTYPE_MPEG2, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGTYPE_AACLC, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGTYPE_DTS, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGTYPE_ATRAC, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGTYPE_DSD, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGTYPE_EAC3, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGTYPE_DTSHD, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGTYPE_MLP, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGTYPE_DST, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGTYPE_WMAPRO, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGTYPE_USE_CODING_EXTENSION_TYPE, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGTYPE_AUTO = 31 +} NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGTYPE; + +//! Byte 2 related +typedef enum +{ + NV_INFOFRAME_FIELD_VALUE_AUDIO_SAMPLESIZE_IN_HEADER = 0, + NV_INFOFRAME_FIELD_VALUE_AUDIO_SAMPLESIZE_16BITS, + NV_INFOFRAME_FIELD_VALUE_AUDIO_SAMPLESIZE_20BITS, + NV_INFOFRAME_FIELD_VALUE_AUDIO_SAMPLESIZE_24BITS, + NV_INFOFRAME_FIELD_VALUE_AUDIO_SAMPLESIZE_AUTO = 7 +} NV_INFOFRAME_FIELD_VALUE_AUDIO_SAMPLESIZE; + +typedef enum +{ + NV_INFOFRAME_FIELD_VALUE_AUDIO_SAMPLEFREQUENCY_IN_HEADER = 0, + NV_INFOFRAME_FIELD_VALUE_AUDIO_SAMPLEFREQUENCY_32000HZ, + NV_INFOFRAME_FIELD_VALUE_AUDIO_SAMPLEFREQUENCY_44100HZ, + NV_INFOFRAME_FIELD_VALUE_AUDIO_SAMPLEFREQUENCY_48000HZ, + NV_INFOFRAME_FIELD_VALUE_AUDIO_SAMPLEFREQUENCY_88200KHZ, + NV_INFOFRAME_FIELD_VALUE_AUDIO_SAMPLEFREQUENCY_96000KHZ, + NV_INFOFRAME_FIELD_VALUE_AUDIO_SAMPLEFREQUENCY_176400KHZ, + NV_INFOFRAME_FIELD_VALUE_AUDIO_SAMPLEFREQUENCY_192000KHZ, + NV_INFOFRAME_FIELD_VALUE_AUDIO_SAMPLEFREQUENCY_AUTO = 15 +} NV_INFOFRAME_FIELD_VALUE_AUDIO_SAMPLEFREQUENCY; + + + +//! Byte 3 related +typedef enum +{ + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGEXTENSIONTYPE_USE_CODING_TYPE = 0, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGEXTENSIONTYPE_HEAAC, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGEXTENSIONTYPE_HEAACV2, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGEXTENSIONTYPE_MPEGSURROUND, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGEXTENSIONTYPE_RESERVE04, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGEXTENSIONTYPE_RESERVE05, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGEXTENSIONTYPE_RESERVE06, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGEXTENSIONTYPE_RESERVE07, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGEXTENSIONTYPE_RESERVE08, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGEXTENSIONTYPE_RESERVE09, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGEXTENSIONTYPE_RESERVE10, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGEXTENSIONTYPE_RESERVE11, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGEXTENSIONTYPE_RESERVE12, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGEXTENSIONTYPE_RESERVE13, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGEXTENSIONTYPE_RESERVE14, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGEXTENSIONTYPE_RESERVE15, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGEXTENSIONTYPE_RESERVE16, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGEXTENSIONTYPE_RESERVE17, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGEXTENSIONTYPE_RESERVE18, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGEXTENSIONTYPE_RESERVE19, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGEXTENSIONTYPE_RESERVE20, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGEXTENSIONTYPE_RESERVE21, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGEXTENSIONTYPE_RESERVE22, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGEXTENSIONTYPE_RESERVE23, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGEXTENSIONTYPE_RESERVE24, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGEXTENSIONTYPE_RESERVE25, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGEXTENSIONTYPE_RESERVE26, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGEXTENSIONTYPE_RESERVE27, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGEXTENSIONTYPE_RESERVE28, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGEXTENSIONTYPE_RESERVE29, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGEXTENSIONTYPE_RESERVE30, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGEXTENSIONTYPE_RESERVE31, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGEXTENSIONTYPE_AUTO = 63 +} NV_INFOFRAME_FIELD_VALUE_AUDIO_CODINGEXTENSIONTYPE; + + +//! Byte 4 related +typedef enum +{ + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_X_X_X_X_X_X_FR_FL =0, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_X_X_X_X_X_LFE_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_X_X_X_X_FC_X_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_X_X_X_X_FC_LFE_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_X_X_X_RC_X_X_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_X_X_X_RC_X_LFE_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_X_X_X_RC_FC_X_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_X_X_X_RC_FC_LFE_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_X_X_RR_RL_X_X_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_X_X_RR_RL_X_LFE_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_X_X_RR_RL_FC_X_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_X_X_RR_RL_FC_LFE_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_X_RC_RR_RL_X_X_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_X_RC_RR_RL_X_LFE_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_X_RC_RR_RL_FC_X_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_X_RC_RR_RL_FC_LFE_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_RRC_RLC_RR_RL_X_X_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_RRC_RLC_RR_RL_X_LFE_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_RRC_RLC_RR_RL_FC_X_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_RRC_RLC_RR_RL_FC_LFE_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_FRC_FLC_X_X_X_X_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_FRC_FLC_X_X_X_LFE_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_FRC_FLC_X_X_FC_X_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_FRC_FLC_X_X_FC_LFE_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_FRC_FLC_X_RC_X_X_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_FRC_FLC_X_RC_X_LFE_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_FRC_FLC_X_RC_FC_X_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_FRC_FLC_X_RC_FC_LFE_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_FRC_FLC_RR_RL_X_X_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_FRC_FLC_RR_RL_X_LFE_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_FRC_FLC_RR_RL_FC_X_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_FRC_FLC_RR_RL_FC_LFE_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_X_FCH_RR_RL_FC_X_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_X_FCH_RR_RL_FC_LFE_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_TC_X_RR_RL_FC_X_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_TC_X_RR_RL_FC_LFE_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_FRH_FLH_RR_RL_X_X_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_FRH_FLH_RR_RL_X_LFE_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_FRW_FLW_RR_RL_X_X_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_FRW_FLW_RR_RL_X_LFE_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_TC_RC_RR_RL_FC_X_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_TC_RC_RR_RL_FC_LFE_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_FCH_RC_RR_RL_FC_X_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_FCH_RC_RR_RL_FC_LFE_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_TC_FCH_RR_RL_FC_X_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_TC_FCH_RR_RL_FC_LFE_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_FRH_FLH_RR_RL_FC_X_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_FRH_FLH_RR_RL_FC_LFE_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_FRW_FLW_RR_RL_FC_X_FR_FL, + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_FRW_FLW_RR_RL_FC_LFE_FR_FL = 0X31, + // all other values should default to auto + NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION_AUTO = 0x1FF +} NV_INFOFRAME_FIELD_VALUE_AUDIO_CHANNELALLOCATION; + +//! Byte 5 related +typedef enum +{ + NV_INFOFRAME_FIELD_VALUE_AUDIO_LFEPLAYBACKLEVEL_NO_DATA = 0, + NV_INFOFRAME_FIELD_VALUE_AUDIO_LFEPLAYBACKLEVEL_0DB, + NV_INFOFRAME_FIELD_VALUE_AUDIO_LFEPLAYBACKLEVEL_PLUS10DB, + NV_INFOFRAME_FIELD_VALUE_AUDIO_LFEPLAYBACKLEVEL_RESERVED03, + NV_INFOFRAME_FIELD_VALUE_AUDIO_LFEPLAYBACKLEVEL_AUTO = 7 +} NV_INFOFRAME_FIELD_VALUE_AUDIO_LFEPLAYBACKLEVEL; + +typedef enum +{ + NV_INFOFRAME_FIELD_VALUE_AUDIO_LEVELSHIFTVALUES_0DB = 0, + NV_INFOFRAME_FIELD_VALUE_AUDIO_LEVELSHIFTVALUES_1DB, + NV_INFOFRAME_FIELD_VALUE_AUDIO_LEVELSHIFTVALUES_2DB, + NV_INFOFRAME_FIELD_VALUE_AUDIO_LEVELSHIFTVALUES_3DB, + NV_INFOFRAME_FIELD_VALUE_AUDIO_LEVELSHIFTVALUES_4DB, + NV_INFOFRAME_FIELD_VALUE_AUDIO_LEVELSHIFTVALUES_5DB, + NV_INFOFRAME_FIELD_VALUE_AUDIO_LEVELSHIFTVALUES_6DB, + NV_INFOFRAME_FIELD_VALUE_AUDIO_LEVELSHIFTVALUES_7DB, + NV_INFOFRAME_FIELD_VALUE_AUDIO_LEVELSHIFTVALUES_8DB, + NV_INFOFRAME_FIELD_VALUE_AUDIO_LEVELSHIFTVALUES_9DB, + NV_INFOFRAME_FIELD_VALUE_AUDIO_LEVELSHIFTVALUES_10DB, + NV_INFOFRAME_FIELD_VALUE_AUDIO_LEVELSHIFTVALUES_11DB, + NV_INFOFRAME_FIELD_VALUE_AUDIO_LEVELSHIFTVALUES_12DB, + NV_INFOFRAME_FIELD_VALUE_AUDIO_LEVELSHIFTVALUES_13DB, + NV_INFOFRAME_FIELD_VALUE_AUDIO_LEVELSHIFTVALUES_14DB, + NV_INFOFRAME_FIELD_VALUE_AUDIO_LEVELSHIFTVALUES_15DB, + NV_INFOFRAME_FIELD_VALUE_AUDIO_LEVELSHIFTVALUES_AUTO = 31 +} NV_INFOFRAME_FIELD_VALUE_AUDIO_LEVELSHIFTVALUES; + + +typedef enum +{ + NV_INFOFRAME_FIELD_VALUE_AUDIO_DOWNMIX_PERMITTED = 0, + NV_INFOFRAME_FIELD_VALUE_AUDIO_DOWNMIX_PROHIBITED, + NV_INFOFRAME_FIELD_VALUE_AUDIO_DOWNMIX_AUTO = 3 +} NV_INFOFRAME_FIELD_VALUE_AUDIO_DOWNMIX; + +typedef struct +{ + NvU32 codingType : 5; + NvU32 codingExtensionType : 6; + NvU32 sampleSize : 3; + NvU32 sampleRate : 4; + NvU32 channelCount : 4; + NvU32 speakerPlacement : 9; + NvU32 downmixInhibit : 2; + NvU32 lfePlaybackLevel : 3; + NvU32 levelShift : 5; + NvU32 Future12 : 2; + NvU32 Future2x : 4; + NvU32 Future3x : 4; + NvU32 Future52 : 2; + NvU32 Future6 : 9; + NvU32 Future7 : 9; + NvU32 Future8 : 9; + NvU32 Future9 : 9; + NvU32 Future10 : 9; +} NV_INFOFRAME_AUDIO; + +typedef struct +{ + NvU32 version; //!< version of this structure + NvU16 size; //!< size of this structure + NvU8 cmd; //!< The actions to perform from NV_INFOFRAME_CMD + NvU8 type; //!< type of infoframe + + union + { + NV_INFOFRAME_PROPERTY property; //!< This is NVIDIA-specific and corresponds to the property cmds and associated infoframe. + NV_INFOFRAME_AUDIO audio; + NV_INFOFRAME_VIDEO video; + } infoframe; +} NV_INFOFRAME_DATA; + +//! Macro for constructing the version field of ::NV_INFOFRAME_DATA +#define NV_INFOFRAME_DATA_VER MAKE_NVAPI_VERSION(NV_INFOFRAME_DATA,1) + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_Disp_InfoFrameControl +// +//! DESCRIPTION: This API controls the InfoFrame values. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] displayId Monitor Identifier +//! \param [in,out] pInfoframeData Contains data corresponding to InfoFrame +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +//! +//! \ingroup dispcontrol +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Disp_InfoFrameControl(__in NvU32 displayId, __inout NV_INFOFRAME_DATA *pInfoframeData); + + + + + + +//! \ingroup dispcontrol +//! @{ +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_Disp_ColorControl +// +//! \fn NvAPI_Disp_ColorControl(NvU32 displayId, NV_COLOR_DATA *pColorData) +//! DESCRIPTION: This API controls the Color values. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] displayId Monitor Identifier +//! \param [in,out] pColorData Contains data corresponding to color information +//! +//! \return RETURN STATUS: +//! ::NVAPI_OK, +//! ::NVAPI_ERROR, +//! ::NVAPI_INVALID_ARGUMENT +// +/////////////////////////////////////////////////////////////////////////////// + +typedef enum +{ + NV_COLOR_CMD_GET = 1, + NV_COLOR_CMD_SET, + NV_COLOR_CMD_IS_SUPPORTED_COLOR, + NV_COLOR_CMD_GET_DEFAULT +} NV_COLOR_CMD; + +//! See Table 14 of CEA-861E. Not all of this is supported by the GPU. +typedef enum +{ + NV_COLOR_FORMAT_RGB = 0, + NV_COLOR_FORMAT_YUV422, + NV_COLOR_FORMAT_YUV444, + NV_COLOR_FORMAT_YUV420, + + NV_COLOR_FORMAT_DEFAULT = 0xFE, + NV_COLOR_FORMAT_AUTO = 0xFF +} NV_COLOR_FORMAT; + + + +typedef enum +{ + NV_COLOR_COLORIMETRY_RGB = 0, + NV_COLOR_COLORIMETRY_YCC601, + NV_COLOR_COLORIMETRY_YCC709, + NV_COLOR_COLORIMETRY_XVYCC601, + NV_COLOR_COLORIMETRY_XVYCC709, + NV_COLOR_COLORIMETRY_SYCC601, + NV_COLOR_COLORIMETRY_ADOBEYCC601, + NV_COLOR_COLORIMETRY_ADOBERGB, + NV_COLOR_COLORIMETRY_BT2020RGB, + NV_COLOR_COLORIMETRY_BT2020YCC, + NV_COLOR_COLORIMETRY_BT2020cYCC, + + NV_COLOR_COLORIMETRY_DEFAULT = 0xFE, + NV_COLOR_COLORIMETRY_AUTO = 0xFF +} NV_COLOR_COLORIMETRY; + +typedef enum _NV_DYNAMIC_RANGE +{ + NV_DYNAMIC_RANGE_VESA = 0x0, + NV_DYNAMIC_RANGE_CEA = 0x1, + + NV_DYNAMIC_RANGE_AUTO = 0xFF +} NV_DYNAMIC_RANGE; + +typedef enum _NV_BPC +{ + NV_BPC_DEFAULT = 0, + NV_BPC_6 = 1, + NV_BPC_8 = 2, + NV_BPC_10 = 3, + NV_BPC_12 = 4, + NV_BPC_16 = 5, +} NV_BPC; + +typedef enum _NV_COLOR_SELECTION_POLICY +{ + NV_COLOR_SELECTION_POLICY_USER = 0, //!< app/nvcpl make decision to select the desire color format + NV_COLOR_SELECTION_POLICY_BEST_QUALITY = 1, //!< driver/ OS make decision to select the best color format + NV_COLOR_SELECTION_POLICY_DEFAULT = NV_COLOR_SELECTION_POLICY_BEST_QUALITY, + NV_COLOR_SELECTION_POLICY_UNKNOWN = 0xFF, +} NV_COLOR_SELECTION_POLICY; + +typedef enum _NV_DESKTOP_COLOR_DEPTH +{ + NV_DESKTOP_COLOR_DEPTH_DEFAULT = 0x0, // set if the current setting should be kept + NV_DESKTOP_COLOR_DEPTH_8BPC = 0x1, //8 bit int per color component (8 bit int alpha) + NV_DESKTOP_COLOR_DEPTH_10BPC = 0x2, //10 bit int per color component (2 bit int alpha) + NV_DESKTOP_COLOR_DEPTH_16BPC_FLOAT = 0x3, //16 bit float per color component (16 bit float alpha) + NV_DESKTOP_COLOR_DEPTH_16BPC_FLOAT_WCG = 0x4, //16 bit float per color component (16 bit float alpha) wide color gamut + NV_DESKTOP_COLOR_DEPTH_16BPC_FLOAT_HDR = 0x5, //16 bit float per color component (16 bit float alpha) HDR + NV_DESKTOP_COLOR_DEPTH_MAX_VALUE = NV_DESKTOP_COLOR_DEPTH_16BPC_FLOAT_HDR, // must be set to highest enum value +} NV_DESKTOP_COLOR_DEPTH; + +typedef struct _NV_COLOR_DATA_V1 +{ + NvU32 version; //!< Version of this structure + NvU16 size; //!< Size of this structure + NvU8 cmd; + struct + { + NvU8 colorFormat; //!< One of NV_COLOR_FORMAT enum values. + NvU8 colorimetry; //!< One of NV_COLOR_COLORIMETRY enum values. + } data; +} NV_COLOR_DATA_V1; + +typedef struct _NV_COLOR_DATA_V2 +{ + NvU32 version; //!< Version of this structure + NvU16 size; //!< Size of this structure + NvU8 cmd; + struct + { + NvU8 colorFormat; //!< One of NV_COLOR_FORMAT enum values. + NvU8 colorimetry; //!< One of NV_COLOR_COLORIMETRY enum values. + NvU8 dynamicRange; //!< One of NV_DYNAMIC_RANGE enum values. + } data; +} NV_COLOR_DATA_V2; + +typedef struct _NV_COLOR_DATA_V3 +{ + NvU32 version; //!< Version of this structure + NvU16 size; //!< Size of this structure + NvU8 cmd; + struct + { + NvU8 colorFormat; //!< One of NV_COLOR_FORMAT enum values. + NvU8 colorimetry; //!< One of NV_COLOR_COLORIMETRY enum values. + NvU8 dynamicRange; //!< One of NV_DYNAMIC_RANGE enum values. + NV_BPC bpc; //!< One of NV_BPC enum values. + } data; +} NV_COLOR_DATA_V3; + +typedef struct _NV_COLOR_DATA_V4 +{ + NvU32 version; //!< Version of this structure + NvU16 size; //!< Size of this structure + NvU8 cmd; + struct + { + NvU8 colorFormat; //!< One of NV_COLOR_FORMAT enum values. + NvU8 colorimetry; //!< One of NV_COLOR_COLORIMETRY enum values. + NvU8 dynamicRange; //!< One of NV_DYNAMIC_RANGE enum values. + NV_BPC bpc; //!< One of NV_BPC enum values. + NV_COLOR_SELECTION_POLICY colorSelectionPolicy; //!< One of the color selection policy + } data; +} NV_COLOR_DATA_V4; + +typedef struct _NV_COLOR_DATA_V5 +{ + NvU32 version; //!< Version of this structure + NvU16 size; //!< Size of this structure + NvU8 cmd; + struct + { + NvU8 colorFormat; //!< One of NV_COLOR_FORMAT enum values. + NvU8 colorimetry; //!< One of NV_COLOR_COLORIMETRY enum values. + NvU8 dynamicRange; //!< One of NV_DYNAMIC_RANGE enum values. + NV_BPC bpc; //!< One of NV_BPC enum values. + NV_COLOR_SELECTION_POLICY colorSelectionPolicy; //!< One of the color selection policy + NV_DESKTOP_COLOR_DEPTH depth; //!< One of NV_DESKTOP_COLOR_DEPTH enum values. + } data; +} NV_COLOR_DATA_V5; + +typedef NV_COLOR_DATA_V5 NV_COLOR_DATA; + +#define NV_COLOR_DATA_VER1 MAKE_NVAPI_VERSION(NV_COLOR_DATA_V1, 1) +#define NV_COLOR_DATA_VER2 MAKE_NVAPI_VERSION(NV_COLOR_DATA_V2, 2) +#define NV_COLOR_DATA_VER3 MAKE_NVAPI_VERSION(NV_COLOR_DATA_V3, 3) +#define NV_COLOR_DATA_VER4 MAKE_NVAPI_VERSION(NV_COLOR_DATA_V4, 4) +#define NV_COLOR_DATA_VER5 MAKE_NVAPI_VERSION(NV_COLOR_DATA_V5, 5) +#define NV_COLOR_DATA_VER NV_COLOR_DATA_VER5 + +NVAPI_INTERFACE NvAPI_Disp_ColorControl(NvU32 displayId, NV_COLOR_DATA *pColorData); + +//! @} + + +typedef enum +{ + NV_STATIC_METADATA_TYPE_1 = 0 //!< Tells the type of structure used to define the Static Metadata Descriptor block. +}NV_STATIC_METADATA_DESCRIPTOR_ID; + +typedef struct _NV_HDR_CAPABILITIES_V1 +{ + NvU32 version; //!< Version of this structure + + NvU32 isST2084EotfSupported :1; //!< HDMI2.0a UHDA HDR with ST2084 EOTF (CEA861.3). Boolean: 0 = not supported, 1 = supported; + NvU32 isTraditionalHdrGammaSupported :1; //!< HDMI2.0a traditional HDR gamma (CEA861.3). Boolean: 0 = not supported, 1 = supported; + NvU32 isEdrSupported :1; //!< Extended Dynamic Range on SDR displays. Boolean: 0 = not supported, 1 = supported; + NvU32 driverExpandDefaultHdrParameters :1; //!< If set, driver will expand default (=zero) HDR capabilities parameters contained in display's EDID. + //!< Boolean: 0 = report actual HDR parameters, 1 = expand default HDR parameters; + NvU32 isTraditionalSdrGammaSupported :1; //!< HDMI2.0a traditional SDR gamma (CEA861.3). Boolean: 0 = not supported, 1 = supported; + NvU32 reserved :27; + + NV_STATIC_METADATA_DESCRIPTOR_ID static_metadata_descriptor_id; //!< Static Metadata Descriptor Id (0 for static metadata type 1) + + struct //!< Static Metadata Descriptor Type 1, CEA-861.3, SMPTE ST2086 + { + NvU16 displayPrimary_x0; //!< x coordinate of color primary 0 (e.g. Red) of the display ([0x0000-0xC350] = [0.0 - 1.0]) + NvU16 displayPrimary_y0; //!< y coordinate of color primary 0 (e.g. Red) of the display ([0x0000-0xC350] = [0.0 - 1.0]) + + NvU16 displayPrimary_x1; //!< x coordinate of color primary 1 (e.g. Green) of the display ([0x0000-0xC350] = [0.0 - 1.0]) + NvU16 displayPrimary_y1; //!< y coordinate of color primary 1 (e.g. Green) of the display ([0x0000-0xC350] = [0.0 - 1.0]) + + NvU16 displayPrimary_x2; //!< x coordinate of color primary 2 (e.g. Blue) of the display ([0x0000-0xC350] = [0.0 - 1.0]) + NvU16 displayPrimary_y2; //!< y coordinate of color primary 2 (e.g. Blue) of the display ([0x0000-0xC350] = [0.0 - 1.0]) + + NvU16 displayWhitePoint_x; //!< x coordinate of white point of the display ([0x0000-0xC350] = [0.0 - 1.0]) + NvU16 displayWhitePoint_y; //!< y coordinate of white point of the display ([0x0000-0xC350] = [0.0 - 1.0]) + + NvU16 desired_content_max_luminance; //!< Maximum display luminance = desired max luminance of HDR content ([0x0001-0xFFFF] = [1.0 - 65535.0] cd/m^2) + NvU16 desired_content_min_luminance; //!< Minimum display luminance = desired min luminance of HDR content ([0x0001-0xFFFF] = [1.0 - 6.55350] cd/m^2) + NvU16 desired_content_max_frame_average_luminance; //!< Desired maximum Frame-Average Light Level (MaxFALL) of HDR content ([0x0001-0xFFFF] = [1.0 - 65535.0] cd/m^2) + }display_data; +} NV_HDR_CAPABILITIES_V1; + +typedef struct _NV_HDR_CAPABILITIES_V2 +{ + NvU32 version; //!< Version of this structure + + NvU32 isST2084EotfSupported :1; //!< HDMI2.0a UHDA HDR with ST2084 EOTF (CEA861.3). Boolean: 0 = not supported, 1 = supported; + NvU32 isTraditionalHdrGammaSupported :1; //!< HDMI2.0a traditional HDR gamma (CEA861.3). Boolean: 0 = not supported, 1 = supported; + NvU32 isEdrSupported :1; //!< Extended Dynamic Range on SDR displays. Boolean: 0 = not supported, 1 = supported; + NvU32 driverExpandDefaultHdrParameters :1; //!< If set, driver will expand default (=zero) HDR capabilities parameters contained in display's EDID. + //!< Boolean: 0 = report actual HDR parameters, 1 = expand default HDR parameters; + NvU32 isTraditionalSdrGammaSupported :1; //!< HDMI2.0a traditional SDR gamma (CEA861.3). Boolean: 0 = not supported, 1 = supported; + NvU32 isDolbyVisionSupported :1; //!< Dolby Vision Support. Boolean: 0 = not supported, 1 = supported; + NvU32 reserved :26; + + NV_STATIC_METADATA_DESCRIPTOR_ID static_metadata_descriptor_id; //!< Static Metadata Descriptor Id (0 for static metadata type 1) + + struct //!< Static Metadata Descriptor Type 1, CEA-861.3, SMPTE ST2086 + { + NvU16 displayPrimary_x0; //!< x coordinate of color primary 0 (e.g. Red) of the display ([0x0000-0xC350] = [0.0 - 1.0]) + NvU16 displayPrimary_y0; //!< y coordinate of color primary 0 (e.g. Red) of the display ([0x0000-0xC350] = [0.0 - 1.0]) + + NvU16 displayPrimary_x1; //!< x coordinate of color primary 1 (e.g. Green) of the display ([0x0000-0xC350] = [0.0 - 1.0]) + NvU16 displayPrimary_y1; //!< y coordinate of color primary 1 (e.g. Green) of the display ([0x0000-0xC350] = [0.0 - 1.0]) + + NvU16 displayPrimary_x2; //!< x coordinate of color primary 2 (e.g. Blue) of the display ([0x0000-0xC350] = [0.0 - 1.0]) + NvU16 displayPrimary_y2; //!< y coordinate of color primary 2 (e.g. Blue) of the display ([0x0000-0xC350] = [0.0 - 1.0]) + + NvU16 displayWhitePoint_x; //!< x coordinate of white point of the display ([0x0000-0xC350] = [0.0 - 1.0]) + NvU16 displayWhitePoint_y; //!< y coordinate of white point of the display ([0x0000-0xC350] = [0.0 - 1.0]) + + NvU16 desired_content_max_luminance; //!< Maximum display luminance = desired max luminance of HDR content ([0x0001-0xFFFF] = [1.0 - 65535.0] cd/m^2) + NvU16 desired_content_min_luminance; //!< Minimum display luminance = desired min luminance of HDR content ([0x0001-0xFFFF] = [1.0 - 6.55350] cd/m^2) + NvU16 desired_content_max_frame_average_luminance; //!< Desired maximum Frame-Average Light Level (MaxFALL) of HDR content ([0x0001-0xFFFF] = [1.0 - 65535.0] cd/m^2) + }display_data; + + struct + { + NvU32 VSVDB_version : 3; //!< Version of Vendor Data block,Version 0: 25 bytes Version 1: 14 bytes + NvU32 dm_version : 8; //!< Upper Nibble represents major version of Display Management(DM) while lower represents minor version of DM + NvU32 supports_2160p60hz : 1; //!< If set sink is capable of 4kx2k @ 60hz + NvU32 supports_YUV422_12bit : 1; //!< If set, sink is capable of YUV422-12 bit + NvU32 supports_global_dimming : 1; //!< Indicates if sink supports global dimming + NvU32 colorimetry : 1; //!< If set indicates sink supports DCI P3 colorimetry, REc709 otherwise + NvU32 supports_backlight_control : 2; //!< This is set when sink is using lowlatency interface and can control its backlight. + NvU32 backlt_min_luma : 2; //!< It is the level for Backlt min luminance value. + NvU32 interface_supported_by_sink : 2; //!< Indicates the interface (standard or low latency) supported by the sink. + NvU32 supports_10b_12b_444 : 2; //!< It is set when interface supported is low latency, it tells whether it supports 10 bit or 12 bit RGB 4:4:4 or YCbCr 4:4:4 or both. + NvU32 reserved : 9; //!< Should be set to zero + //!< All values below are encoded use DolbyVisionHDMITransmissionSpecification document to decode + NvU16 target_min_luminance; //!< Represents min luminance level of Sink + NvU16 target_max_luminance; //!< Represents max luminance level of sink + NvU16 cc_red_x; //!< Red primary chromaticity coordinate x + NvU16 cc_red_y; //!< Red primary chromaticity coordinate y + NvU16 cc_green_x; //!< Green primary chromaticity coordinate x + NvU16 cc_green_y; //!< Green primary chromaticity coordinate Y + NvU16 cc_blue_x; //!< Blue primary chromaticity coordinate x + NvU16 cc_blue_y; //!< Blue primary chromaticity coordinate y + NvU16 cc_white_x; //!< White primary chromaticity coordinate x + NvU16 cc_white_y; //!< White primary chromaticity coordinate y + }dv_static_metadata; + +} NV_HDR_CAPABILITIES_V2; + +typedef struct _NV_HDR_CAPABILITIES_V3 +{ + NvU32 version; //!< Version of this structure + + NvU32 isST2084EotfSupported :1; //!< HDMI2.0a UHDA HDR with ST2084 EOTF (CEA861.3). Boolean: 0 = not supported, 1 = supported; + NvU32 isTraditionalHdrGammaSupported :1; //!< HDMI2.0a traditional HDR gamma (CEA861.3). Boolean: 0 = not supported, 1 = supported; + NvU32 isEdrSupported :1; //!< Extended Dynamic Range on SDR displays. Boolean: 0 = not supported, 1 = supported; + NvU32 driverExpandDefaultHdrParameters :1; //!< If set, driver will expand default (=zero) HDR capabilities parameters contained in display's EDID. + //!< Boolean: 0 = report actual HDR parameters, 1 = expand default HDR parameters; + NvU32 isTraditionalSdrGammaSupported :1; //!< HDMI2.0a traditional SDR gamma (CEA861.3). Boolean: 0 = not supported, 1 = supported; + NvU32 isDolbyVisionSupported :1; //!< Dolby Vision Support. Boolean: 0 = not supported, 1 = supported; + NvU32 isHdr10PlusSupported :1; //!< HDR10+ (Sink Side Tonemapping) is supported + NvU32 isHdr10PlusGamingSupported :1; //!< HDR10+ Gaming, a.k.a HDR10+ Source Side Tonemapping (SSTM), is supported + NvU32 isNvidiaCertifiedDisplay :1; //!< Nvidia certified HDR display + NvU32 reserved :23; + + NV_STATIC_METADATA_DESCRIPTOR_ID static_metadata_descriptor_id; //!< Static Metadata Descriptor Id (0 for static metadata type 1) + + struct //!< Static Metadata Descriptor Type 1, CEA-861.3, SMPTE ST2086 + { + NvU16 displayPrimary_x0; //!< x coordinate of color primary 0 (e.g. Red) of the display ([0x0000-0xC350] = [0.0 - 1.0]) + NvU16 displayPrimary_y0; //!< y coordinate of color primary 0 (e.g. Red) of the display ([0x0000-0xC350] = [0.0 - 1.0]) + + NvU16 displayPrimary_x1; //!< x coordinate of color primary 1 (e.g. Green) of the display ([0x0000-0xC350] = [0.0 - 1.0]) + NvU16 displayPrimary_y1; //!< y coordinate of color primary 1 (e.g. Green) of the display ([0x0000-0xC350] = [0.0 - 1.0]) + + NvU16 displayPrimary_x2; //!< x coordinate of color primary 2 (e.g. Blue) of the display ([0x0000-0xC350] = [0.0 - 1.0]) + NvU16 displayPrimary_y2; //!< y coordinate of color primary 2 (e.g. Blue) of the display ([0x0000-0xC350] = [0.0 - 1.0]) + + NvU16 displayWhitePoint_x; //!< x coordinate of white point of the display ([0x0000-0xC350] = [0.0 - 1.0]) + NvU16 displayWhitePoint_y; //!< y coordinate of white point of the display ([0x0000-0xC350] = [0.0 - 1.0]) + + NvU16 desired_content_max_luminance; //!< Maximum display luminance = desired max luminance of HDR content ([0x0000-0xFFFF] = [0.0 - 65535.0] cd/m^2, in units of 1 cd/m^2) + NvU16 desired_content_min_luminance; //!< Minimum display luminance = desired min luminance of HDR content ([0x0000-0xFFFF] = [0.0 - 6.55350] cd/m^2, in units of 0.0001 cd/m^2) + NvU16 desired_content_max_frame_average_luminance; //!< Desired maximum Frame-Average Light Level (MaxFALL) of HDR content ([0x0000-0xFFFF] = [0.0 - 65535.0] cd/m^2, in units of 1 cd/m^2) + }display_data; + + struct + { + NvU32 VSVDB_version : 3; //!< Version of Vendor Data block,Version 0: 25 bytes Version 1: 14 bytes + NvU32 dm_version : 8; //!< Upper Nibble represents major version of Display Management(DM) while lower represents minor version of DM + NvU32 supports_2160p60hz : 1; //!< If set sink is capable of 4kx2k @ 60hz + NvU32 supports_YUV422_12bit : 1; //!< If set, sink is capable of YUV422-12 bit + NvU32 supports_global_dimming : 1; //!< Indicates if sink supports global dimming + NvU32 colorimetry : 1; //!< If set indicates sink supports DCI P3 colorimetry, REc709 otherwise + NvU32 supports_backlight_control : 2; //!< This is set when sink is using lowlatency interface and can control its backlight. + NvU32 backlt_min_luma : 2; //!< It is the level for Backlt min luminance value (reserved = 0x3 in latest DV spec). + NvU32 interface_supported_by_sink : 2; //!< Indicates the interface (standard or low latency) supported by the sink. + NvU32 supports_10b_12b_444 : 2; //!< It is set when interface supported is low latency, it tells whether it supports 10 bit or 12 bit RGB 4:4:4 or YCbCr 4:4:4 or both. + NvU32 parity : 1; //!< resolution and frame-rate relationships between Dolby Vision and other video processing + NvU32 reserved : 8; //!< Should be set to zero + + //!< All values below are encoded use DolbyVisionHDMITransmissionSpecification document to decode + NvU16 target_min_luminance; //!< Represents min luminance level of Sink + NvU16 target_max_luminance; //!< Represents max luminance level of sink + NvU16 cc_red_x; //!< Red primary chromaticity coordinate x + NvU16 cc_red_y; //!< Red primary chromaticity coordinate y + NvU16 cc_green_x; //!< Green primary chromaticity coordinate x + NvU16 cc_green_y; //!< Green primary chromaticity coordinate Y + NvU16 cc_blue_x; //!< Blue primary chromaticity coordinate x + NvU16 cc_blue_y; //!< Blue primary chromaticity coordinate y + NvU16 cc_white_x; //!< White primary chromaticity coordinate x + NvU16 cc_white_y; //!< White primary chromaticity coordinate y + }dv_static_metadata; + + struct + { + NvU16 application_version : 2; //!< Application version of HDR10+ Vendor Specific Video Data Block + NvU16 full_frame_peak_luminance_index : 2; //!< Full frame peak luminance index + NvU16 peak_luminance_index : 4; //!< Peak luminance index + NvU16 reserved : 8; + }hdr10plus_vsvdb; + +} NV_HDR_CAPABILITIES_V3; + +#define NV_HDR_CAPABILITIES_VER1 MAKE_NVAPI_VERSION(NV_HDR_CAPABILITIES_V1, 1) +#define NV_HDR_CAPABILITIES_VER2 MAKE_NVAPI_VERSION(NV_HDR_CAPABILITIES_V2, 2) +#define NV_HDR_CAPABILITIES_VER3 MAKE_NVAPI_VERSION(NV_HDR_CAPABILITIES_V3, 3) +#define NV_HDR_CAPABILITIES_VER NV_HDR_CAPABILITIES_VER3 +typedef NV_HDR_CAPABILITIES_V3 NV_HDR_CAPABILITIES; + +//! \ingroup dispcontrol +//! @{ +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_Disp_GetHdrCapabilities +// +//! \fn NvAPI_Disp_GetHdrCapabilities(NvU32 displayId, NV_HDR_CAPABILITIES *pHdrCapabilities) +//! DESCRIPTION: This API gets High Dynamic Range (HDR) capabilities of the display. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] displayId Monitor Identifier +//! \param [in,out] pHdrCapabilities display's HDR capabilities +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +// +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Disp_GetHdrCapabilities(__in NvU32 displayId, __inout NV_HDR_CAPABILITIES *pHdrCapabilities); + +//! @} + + +typedef enum +{ + NV_HDR_CMD_GET = 0, //!< Get current HDR output configuration + NV_HDR_CMD_SET = 1 //!< Set HDR output configuration +} NV_HDR_CMD; + +typedef enum +{ + // Official production-ready HDR modes + NV_HDR_MODE_OFF = 0, //!< Turn off HDR + NV_HDR_MODE_UHDA = 2, //!< Source: CCCS [a.k.a FP16 scRGB, linear, sRGB primaries, [-65504,0, 65504] range, RGB(1,1,1) = 80nits] Output : UHDA HDR [a.k.a HDR10, RGB/YCC 10/12bpc ST2084(PQ) EOTF RGB(1,1,1) = 10000 nits, Rec2020 color primaries, ST2086 static HDR metadata]. This is the only supported production HDR mode. + + // Experimental + NV_HDR_MODE_UHDA_PASSTHROUGH = 5, //!< Experimental mode only, not for production! Source: HDR10 RGB 10bpc Output: HDR10 RGB 10 bpc - signal UHDA HDR mode (PQ + Rec2020) to the sink but send source pixel values unmodified (no PQ or Rec2020 conversions) - assumes source is already in HDR10 format. + NV_HDR_MODE_DOLBY_VISION = 7, //!< Experimental mode only, not for production! Source: RGB8 Dolby Vision encoded (12 bpc YCbCr422 packed into RGB8) Output: Dolby Vision encoded : Application is to encoded frames in DV format and embed DV dynamic metadata as described in Dolby Vision specification. + + // Unsupported/obsolete HDR modes + NV_HDR_MODE_EDR = 3, //!< Do not use! Internal test mode only, to be removed. Source: CCCS (a.k.a FP16 scRGB) Output : EDR (Extended Dynamic Range) - HDR content is tonemapped and gamut mapped to output on regular SDR display set to max luminance ( ~300 nits ). + NV_HDR_MODE_SDR = 4, //!< Do not use! Internal test mode only, to be removed. Source: any Output: SDR (Standard Dynamic Range), we continuously send SDR EOTF InfoFrame signaling, HDMI compliance testing. + NV_HDR_MODE_UHDA_NB = 6, //!< Do not use! Internal test mode only, to be removed. Source: CCCS (a.k.a FP16 scRGB) Output : notebook HDR + NV_HDR_MODE_UHDBD = 2 //!< Do not use! Obsolete, to be removed. NV_HDR_MODE_UHDBD == NV_HDR_MODE_UHDA, reflects obsolete pre-UHDA naming convention. + +} NV_HDR_MODE; + +typedef struct _NV_HDR_COLOR_DATA_V1 +{ + NvU32 version; //!< Version of this structure + NV_HDR_CMD cmd; //!< Command get/set + NV_HDR_MODE hdrMode; //!< HDR mode + NV_STATIC_METADATA_DESCRIPTOR_ID static_metadata_descriptor_id; //!< Static Metadata Descriptor Id (0 for static metadata type 1) + + struct //!< Static Metadata Descriptor Type 1, CEA-861.3, SMPTE ST2086 + { + NvU16 displayPrimary_x0; //!< x coordinate of color primary 0 (e.g. Red) of mastering display ([0x0000-0xC350] = [0.0 - 1.0]) + NvU16 displayPrimary_y0; //!< y coordinate of color primary 0 (e.g. Red) of mastering display ([0x0000-0xC350] = [0.0 - 1.0]) + + NvU16 displayPrimary_x1; //!< x coordinate of color primary 1 (e.g. Green) of mastering display ([0x0000-0xC350] = [0.0 - 1.0]) + NvU16 displayPrimary_y1; //!< y coordinate of color primary 1 (e.g. Green) of mastering display ([0x0000-0xC350] = [0.0 - 1.0]) + + NvU16 displayPrimary_x2; //!< x coordinate of color primary 2 (e.g. Blue) of mastering display ([0x0000-0xC350] = [0.0 - 1.0]) + NvU16 displayPrimary_y2; //!< y coordinate of color primary 2 (e.g. Blue) of mastering display ([0x0000-0xC350] = [0.0 - 1.0]) + + NvU16 displayWhitePoint_x; //!< x coordinate of white point of mastering display ([0x0000-0xC350] = [0.0 - 1.0]) + NvU16 displayWhitePoint_y; //!< y coordinate of white point of mastering display ([0x0000-0xC350] = [0.0 - 1.0]) + + NvU16 max_display_mastering_luminance; //!< Maximum display mastering luminance ([0x0001-0xFFFF] = [1.0 - 65535.0] cd/m^2) + NvU16 min_display_mastering_luminance; //!< Minimum display mastering luminance ([0x0001-0xFFFF] = [1.0 - 6.55350] cd/m^2) + + NvU16 max_content_light_level; //!< Maximum Content Light level (MaxCLL) ([0x0001-0xFFFF] = [1.0 - 65535.0] cd/m^2) + NvU16 max_frame_average_light_level; //!< Maximum Frame-Average Light Level (MaxFALL) ([0x0001-0xFFFF] = [1.0 - 65535.0] cd/m^2) + } mastering_display_data; +} NV_HDR_COLOR_DATA_V1; + +typedef struct _NV_HDR_COLOR_DATA_V2 +{ + NvU32 version; //!< Version of this structure + NV_HDR_CMD cmd; //!< Command get/set + NV_HDR_MODE hdrMode; //!< HDR mode + NV_STATIC_METADATA_DESCRIPTOR_ID static_metadata_descriptor_id; //!< Static Metadata Descriptor Id (0 for static metadata type 1) + + struct //!< Static Metadata Descriptor Type 1, CEA-861.3, SMPTE ST2086 + { + NvU16 displayPrimary_x0; //!< x coordinate of color primary 0 (e.g. Red) of mastering display ([0x0000-0xC350] = [0.0 - 1.0]) + NvU16 displayPrimary_y0; //!< y coordinate of color primary 0 (e.g. Red) of mastering display ([0x0000-0xC350] = [0.0 - 1.0]) + + NvU16 displayPrimary_x1; //!< x coordinate of color primary 1 (e.g. Green) of mastering display ([0x0000-0xC350] = [0.0 - 1.0]) + NvU16 displayPrimary_y1; //!< y coordinate of color primary 1 (e.g. Green) of mastering display ([0x0000-0xC350] = [0.0 - 1.0]) + + NvU16 displayPrimary_x2; //!< x coordinate of color primary 2 (e.g. Blue) of mastering display ([0x0000-0xC350] = [0.0 - 1.0]) + NvU16 displayPrimary_y2; //!< y coordinate of color primary 2 (e.g. Blue) of mastering display ([0x0000-0xC350] = [0.0 - 1.0]) + + NvU16 displayWhitePoint_x; //!< x coordinate of white point of mastering display ([0x0000-0xC350] = [0.0 - 1.0]) + NvU16 displayWhitePoint_y; //!< y coordinate of white point of mastering display ([0x0000-0xC350] = [0.0 - 1.0]) + + NvU16 max_display_mastering_luminance; //!< Maximum display mastering luminance ([0x0001-0xFFFF] = [1.0 - 65535.0] cd/m^2) + NvU16 min_display_mastering_luminance; //!< Minimum display mastering luminance ([0x0001-0xFFFF] = [1.0 - 6.55350] cd/m^2) + + NvU16 max_content_light_level; //!< Maximum Content Light level (MaxCLL) ([0x0001-0xFFFF] = [1.0 - 65535.0] cd/m^2) + NvU16 max_frame_average_light_level; //!< Maximum Frame-Average Light Level (MaxFALL) ([0x0001-0xFFFF] = [1.0 - 65535.0] cd/m^2) + } mastering_display_data; + + NV_COLOR_FORMAT hdrColorFormat; //!< Optional, One of NV_COLOR_FORMAT enum values, if set it will apply requested color format for HDR session + NV_DYNAMIC_RANGE hdrDynamicRange; //!< Optional, One of NV_DYNAMIC_RANGE enum values, if set it will apply requested dynamic range for HDR session + NV_BPC hdrBpc; //!< Optional, One of NV_BPC enum values, if set it will apply requested color depth + //!< Dolby Vision mode: DV supports specific combinations of colorformat, dynamic range and bpc. Please refer Dolby Vision specification. + //!< If invalid or no combination is passed driver will force default combination of RGB format + full range + 8bpc. + //!< HDR mode: These fields are ignored in hdr mode +} NV_HDR_COLOR_DATA_V2; + +#define NV_HDR_COLOR_DATA_VER1 MAKE_NVAPI_VERSION(NV_HDR_COLOR_DATA_V1, 1) +#define NV_HDR_COLOR_DATA_VER2 MAKE_NVAPI_VERSION(NV_HDR_COLOR_DATA_V2, 2) + +#ifndef NV_HDR_COLOR_DATA_VER +#define NV_HDR_COLOR_DATA_VER NV_HDR_COLOR_DATA_VER2 +typedef NV_HDR_COLOR_DATA_V2 NV_HDR_COLOR_DATA; +#endif + +//! \ingroup dispcontrol +//! @{ + /////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_Disp_HdrColorControl +// +//! \fn NvAPI_Disp_HdrColorControl(NvU32 displayId, NV_HDR_COLOR_DATA *pHdrColorData) +//! DESCRIPTION: This API configures High Dynamic Range (HDR) and Extended Dynamic Range (EDR) output. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] displayId Monitor Identifier +//! \param [in,out] pHdrColorData HDR configuration data +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +// +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Disp_HdrColorControl(__in NvU32 displayId, __inout NV_HDR_COLOR_DATA *pHdrColorData); + +typedef enum _NV_COLORSPACE_TYPE +{ + NV_COLORSPACE_sRGB = 0, //!< sRGB IEC 61966-2-1:1999 == DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709 + NV_COLORSPACE_xRGB = 1, //!< FP16 linear with sRGB color primaries == DXGI_COLOR_SPACE_RGB_FULL_G10_NONE_P709 + NV_COLORSPACE_REC2100 = 12, //!< ITU-R Rec BT.2100 (HDR10) == DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020 +} NV_COLORSPACE_TYPE; + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_Disp_SetSourceColorSpace +// +//! \fn NvAPI_Disp_SetSourceColorSpace(__in NvU32 displayId, __in NV_SOURCE_COLORSPACE colorSpace) +//! DESCRIPTION: This API sets colorspace of the source identified by the process id of the caller +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 525 +//! +//! \param [in] displayId Monitor Identifier +//! \param [in] colorSpaceType Source colorspace type +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +// +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Disp_SetSourceColorSpace(__in NvU32 displayId, __in NV_COLORSPACE_TYPE colorSpaceType); + +#define NV_SOURCE_PID_CURRENT 0 + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_Disp_GetSourceColorSpace +// +//! \fn NvAPI_Disp_GetSourceColorSpace(__in NvU32 displayId, __inout NV_SOURCE_COLORSPACE* pColorSpace) +//! DESCRIPTION: This API gets colorspace of the source identified by the process id. +//! Set sourcePID = NV_SOURCE_PID_CURRENT to use the process id of the caller. +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 525 +//! +//! \param [in] displayId Monitor Identifier +//! \param [out] pColorSpaceType Source colorspace type +//! \param [in] sourcePID Source process id (PID) +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +// +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Disp_GetSourceColorSpace(__in NvU32 displayId, __inout NV_COLORSPACE_TYPE* pColorSpaceType, NvU64 sourcePID); + +typedef struct _NV_HDR_METADATA_V1 +{ + NvU32 version; //!< Version of this structure + + NvU16 displayPrimary_x0; //!< x coordinate of color primary 0 (e.g. Red) of mastering display ([0x0000-0xC350] = [0.0 - 1.0]) + NvU16 displayPrimary_y0; //!< y coordinate of color primary 0 (e.g. Red) of mastering display ([0x0000-0xC350] = [0.0 - 1.0]) + + NvU16 displayPrimary_x1; //!< x coordinate of color primary 1 (e.g. Green) of mastering display ([0x0000-0xC350] = [0.0 - 1.0]) + NvU16 displayPrimary_y1; //!< y coordinate of color primary 1 (e.g. Green) of mastering display ([0x0000-0xC350] = [0.0 - 1.0]) + + NvU16 displayPrimary_x2; //!< x coordinate of color primary 2 (e.g. Blue) of mastering display ([0x0000-0xC350] = [0.0 - 1.0]) + NvU16 displayPrimary_y2; //!< y coordinate of color primary 2 (e.g. Blue) of mastering display ([0x0000-0xC350] = [0.0 - 1.0]) + + NvU16 displayWhitePoint_x; //!< x coordinate of white point of mastering display ([0x0000-0xC350] = [0.0 - 1.0]) + NvU16 displayWhitePoint_y; //!< y coordinate of white point of mastering display ([0x0000-0xC350] = [0.0 - 1.0]) + + NvU16 max_display_mastering_luminance; //!< Maximum display mastering luminance ([0x0000-0xFFFF] = [0.0 - 65535.0] cd/m^2, in units of 1 cd/m^2) + NvU16 min_display_mastering_luminance; //!< Minimum display mastering luminance ([0x0000-0xFFFF] = [0.0 - 6.55350] cd/m^2, in units of 0.0001 cd/m^2) + + NvU16 max_content_light_level; //!< Maximum Content Light level (MaxCLL) ([0x0000-0xFFFF] = [0.0 - 65535.0] cd/m^2, in units of 1 cd/m^2) + NvU16 max_frame_average_light_level; //!< Maximum Frame-Average Light Level (MaxFALL) ([0x0000-0xFFFF] = [0.0 - 65535.0] cd/m^2, in units of 1 cd/m^2) +} NV_HDR_METADATA_V1; + +#define NV_HDR_METADATA_VER1 MAKE_NVAPI_VERSION(NV_HDR_METADATA_V1, 1) +#define NV_HDR_METADATA_VER NV_HDR_METADATA_VER1 +typedef NV_HDR_METADATA_V1 NV_HDR_METADATA; + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_Disp_SetSourceHdrMetadata +// +//! \fn NvAPI_Disp_SetSourceHdrMetadata(__in NvU32 displayId, __in NV_HDR_METADATA* pMetadata) +//! DESCRIPTION: This API sets HDR metadata of the source identified by the process id of the caller +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 525 +//! +//! \param [in] displayId Monitor Identifier +//! \param [in] pMetadata HDR metadata +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +// +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Disp_SetSourceHdrMetadata(__in NvU32 displayId, __in NV_HDR_METADATA* pMetadata); + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_Disp_GetSourceHdrMetadata +// +//! \fn NvAPI_NvAPI_Disp_GetSourceHdrMetadata(__in NvU32 displayId, __inout NV_HDR_METADATA* pMetadata) +//! DESCRIPTION: This API gets HDR metadata of the source identified by the process id of the caller +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 525 +//! +//! \param [in] displayId Monitor Identifier +//! \param [out] pMetadata HDR metadata +//! \param [in] sourcePID Source process id (PID) +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +// +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Disp_GetSourceHdrMetadata(__in NvU32 displayId, __inout NV_HDR_METADATA* pMetadata, NvU64 sourcePID); + +typedef enum _NV_DISPLAY_OUTPUT_MODE +{ + NV_DISPLAY_OUTPUT_MODE_SDR = 0, + NV_DISPLAY_OUTPUT_MODE_HDR10 = 1, + NV_DISPLAY_OUTPUT_MODE_HDR10PLUS_GAMING = 2 +} NV_DISPLAY_OUTPUT_MODE; + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_Disp_SetOutputMode +// +//! \fn NvAPI_Disp_SetOutputMode(__in NvU32 displayId, __inout NV_DISPLAY_OUTPUT_MODE* pDisplayMode) +//! DESCRIPTION: This API sets display output mode and returns the display output mode used by the OS before the API call. +//! Only one application at a time can override OS display output mode. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 525 +//! +//! \param [in] displayId Display identifier +//! \param [in, out] pDisplayMode New/original display output mode +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +//! +//! \retval ::NVAPI_RESOURCE_IN_USE the output mode can't be changed as it is already overriden by another application. +// +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Disp_SetOutputMode(__in NvU32 displayId, __inout NV_DISPLAY_OUTPUT_MODE* pDisplayMode); + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_Disp_GetOutputMode +// +//! \fn NvAPI_Disp_GetOutputMode(__in NvU32 displayId, __inout NV_DISPLAY_OUTPUT_MODE* pDisplayMode) +//! DESCRIPTION: This API gets display output mode. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 525 +//! +//! \param [in] displayId Display identifier +//! \param [out] pDisplayMode Current display output mode +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +// +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Disp_GetOutputMode(__in NvU32 displayId, __inout NV_DISPLAY_OUTPUT_MODE* pDisplayMode); + +typedef enum _NV_HDR_TONEMAPPING_METHOD +{ + NV_HDR_TONEMAPPING_APP = 0, + NV_HDR_TONEMAPPING_GPU = 1 +} NV_HDR_TONEMAPPING_METHOD; + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_Disp_SetHdrToneMapping +// +//! \fn NvAPI_Disp_SetHdrToneMapping(__in NvU32 displayId, __in NV_HDR_TONEMAPPING_METHOD hdrTonemapping) +//! DESCRIPTION: This API sets HDR tonemapping method for the display +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 525 +//! +//! \param [in] displayId Display identifier +//! \param [in] hdrTonemapping HDR tonemapping method +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +// +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Disp_SetHdrToneMapping(__in NvU32 displayId, __in NV_HDR_TONEMAPPING_METHOD hdrTonemapping); + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_Disp_GetHdrToneMapping +// +//! \fn NvAPI_Disp_GetHdrToneMapping(__in NvU32 displayId, __inout NV_HDR_TONEMAPPING_METHOD* pHdrTonemapping) +//! DESCRIPTION: This API gets HDR tonemapping method for the display. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 525 +//! +//! \param [in] displayId display identifier +//! \param [out] pHdrTonemapping HDR tonemapping method +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +// +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Disp_GetHdrToneMapping(__in NvU32 displayId, __inout NV_HDR_TONEMAPPING_METHOD* pHdrTonemapping); + +typedef struct _NV_DISPLAY_COLORIMETRY_V1 +{ + NvU32 version; //!< Version of this structure + + float min_luminance; //!< min luminance, cd/m^2 + float max_full_frame_luminance; //!< max 100% frame luminance, cd/m^2 + float max_luminance; //!< max 10% frame luminance, cd/m^2 + + float hdrBrightnessLuminanceScalingFactor; //!< HDR brightness luminance scaling factor applied by GPU on output pixels + + float red_primary_x; //!< red primary chromaticity coordinate x + float red_primary_y; //!< red primary chromaticity coordinate y + float green_primary_x; //!< green primary chromaticity coordinate x + float green_primary_y; //!< green primary chromaticity coordinate y + float blue_primary_x; //!< blue primary chromaticity coordinate x + float blue_primary_y; //!< blue primary chromaticity coordinate y + float white_point_x; //!< white point chromaticity coordinate x + float white_point_y; //!< white point chromaticity coordinate y +} NV_DISPLAY_COLORIMETRY_V1; + +#define NV_DISPLAY_COLORIMETRY_VER1 MAKE_NVAPI_VERSION(NV_DISPLAY_COLORIMETRY_V1, 1) +#define NV_DISPLAY_COLORIMETRY_VER NV_DISPLAY_COLORIMETRY_VER1 +typedef NV_DISPLAY_COLORIMETRY_V1 NV_DISPLAY_COLORIMETRY; + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_Disp_GetColorimetry +// +//! \fn NvAPI_Disp_GetColorimetry(__in NvU32 displayId, __inout NV_DISPLAY_COLORIMETRY* pColorimetry) +//! DESCRIPTION: This API gets colorimetry parameters for the display. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 580 +//! +//! \param [in] displayId display identifier +//! \param [out] pColorimetry display colorimetry +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +// +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Disp_GetColorimetry(__in NvU32 displayId, __inout NV_DISPLAY_COLORIMETRY* pColorimetry); +//! @} + +//! \ingroup dispcontrol +//! Used in NvAPI_DISP_GetTiming(). +typedef struct +{ + NvU32 isInterlaced : 4; //!< To retrieve interlaced/progressive timing + NvU32 reserved0 : 12; + union + { + NvU32 tvFormat : 8; //!< The actual analog HD/SDTV format. Used when the timing type is + //! NV_TIMING_OVERRIDE_ANALOG_TV and width==height==rr==0. + NvU32 ceaId : 8; //!< The EIA/CEA 861B/D predefined short timing descriptor ID. + //! Used when the timing type is NV_TIMING_OVERRIDE_EIA861 + //! and width==height==rr==0. + NvU32 nvPsfId : 8; //!< The NV predefined PsF format Id. + //! Used when the timing type is NV_TIMING_OVERRIDE_NV_PREDEFINED. + }; + NvU32 scaling : 8; //!< Define preferred scaling +}NV_TIMING_FLAG; + +//! \ingroup dispcontrol +//! Used in NvAPI_DISP_GetTiming(). +typedef struct _NV_TIMING_INPUT +{ + NvU32 version; //!< (IN) structure version + + NvU32 width; //!< Visible horizontal size + NvU32 height; //!< Visible vertical size + float rr; //!< Timing refresh rate + + NV_TIMING_FLAG flag; //!< Flag containing additional info for timing calculation. + + NV_TIMING_OVERRIDE type; //!< Timing type(formula) to use for calculating the timing +}NV_TIMING_INPUT; + +#define NV_TIMING_INPUT_VER MAKE_NVAPI_VERSION(NV_TIMING_INPUT,1) + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_DISP_GetTiming +// +//! DESCRIPTION: This function calculates the timing from the visible width/height/refresh-rate and timing type info. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 313 +//! +//! +//! \param [in] displayId Display ID of the display. +//! \param [in] timingInput Inputs used for calculating the timing. +//! \param [out] pTiming Pointer to the NV_TIMING structure. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +//! +//! \ingroup dispcontrol +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DISP_GetTiming( __in NvU32 displayId,__in NV_TIMING_INPUT *timingInput, __out NV_TIMING *pTiming); + + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_DISP_GetMonitorCapabilities +// +//! \fn NvAPI_DISP_GetMonitorCapabilities(NvU32 displayId, NV_MONITOR_CAPABILITIES *pMonitorCapabilities) +//! DESCRIPTION: This API returns the Monitor capabilities +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] displayId Monitor Identifier +//! \param [out] pMonitorCapabilities The monitor support info +//! +//! \return ::NVAPI_OK, +//! ::NVAPI_ERROR, +//! ::NVAPI_INVALID_ARGUMENT +// +/////////////////////////////////////////////////////////////////////////////// + +//! \ingroup dispcontrol +//! @{ + + +//! HDMI-related and extended CAPs +typedef enum +{ + // hdmi related caps + NV_MONITOR_CAPS_TYPE_HDMI_VSDB = 0x1000, + NV_MONITOR_CAPS_TYPE_HDMI_VCDB = 0x1001, + // backend caps + NV_MONITOR_CAPS_TYPE_GENERIC = 0x1002, +} NV_MONITOR_CAPS_TYPE; + + + +typedef struct _NV_MONITOR_CAPS_VCDB +{ + NvU8 quantizationRangeYcc : 1; + NvU8 quantizationRangeRgb : 1; + NvU8 scanInfoPreferredVideoFormat : 2; + NvU8 scanInfoITVideoFormats : 2; + NvU8 scanInfoCEVideoFormats : 2; +} NV_MONITOR_CAPS_VCDB; + + +//! See NvAPI_DISP_GetMonitorCapabilities(). +typedef struct _NV_MONITOR_CAPS_VSDB +{ + // byte 1 + NvU8 sourcePhysicalAddressB : 4; //!< Byte 1 + NvU8 sourcePhysicalAddressA : 4; //!< Byte 1 + // byte 2 + NvU8 sourcePhysicalAddressD : 4; //!< Byte 2 + NvU8 sourcePhysicalAddressC : 4; //!< Byte 2 + // byte 3 + NvU8 supportDualDviOperation : 1; //!< Byte 3 + NvU8 reserved6 : 2; //!< Byte 3 + NvU8 supportDeepColorYCbCr444 : 1; //!< Byte 3 + NvU8 supportDeepColor30bits : 1; //!< Byte 3 + NvU8 supportDeepColor36bits : 1; //!< Byte 3 + NvU8 supportDeepColor48bits : 1; //!< Byte 3 + NvU8 supportAI : 1; //!< Byte 3 + // byte 4 + NvU8 maxTmdsClock; //!< Bye 4 + // byte 5 + NvU8 cnc0SupportGraphicsTextContent : 1; //!< Byte 5 + NvU8 cnc1SupportPhotoContent : 1; //!< Byte 5 + NvU8 cnc2SupportCinemaContent : 1; //!< Byte 5 + NvU8 cnc3SupportGameContent : 1; //!< Byte 5 + NvU8 reserved8 : 1; //!< Byte 5 + NvU8 hasVicEntries : 1; //!< Byte 5 + NvU8 hasInterlacedLatencyField : 1; //!< Byte 5 + NvU8 hasLatencyField : 1; //!< Byte 5 + // byte 6 + NvU8 videoLatency; //!< Byte 6 + // byte 7 + NvU8 audioLatency; //!< Byte 7 + // byte 8 + NvU8 interlacedVideoLatency; //!< Byte 8 + // byte 9 + NvU8 interlacedAudioLatency; //!< Byte 9 + // byte 10 + NvU8 reserved13 : 7; //!< Byte 10 + NvU8 has3dEntries : 1; //!< Byte 10 + // byte 11 + NvU8 hdmi3dLength : 5; //!< Byte 11 + NvU8 hdmiVicLength : 3; //!< Byte 11 + // Remaining bytes + NvU8 hdmi_vic[7]; //!< Keeping maximum length for 3 bits + NvU8 hdmi_3d[31]; //!< Keeping maximum length for 5 bits +} NV_MONITOR_CAPS_VSDB; + +typedef struct _NV_MONITOR_CAPS_GENERIC +{ + NvU8 supportVRR : 1; //!< monitor supports variable refresh rate. Valid for NV_MONITOR_CAPS_TYPE_GENERIC only. + NvU8 supportULMB : 1; //!< monitor supports ULMB with variable refresh rate. Valid for NV_MONITOR_CAPS_TYPE_GENERIC only. + NvU8 isTrueGsync : 1; //!< whether the monitor is actually GSYNC or adaptive sync monitor : 0 for adaptive sync. + NvU8 isRLACapable : 1; //!< whether monitor supports RLA + NvU8 currentlyCapableOfVRR : 1; //!< monitor currently supports VRR on applied display settings. Valid for NV_MONITOR_CAPS_TYPE_GENERIC only. + NvU8 reserved : 3; +} NV_MONITOR_CAPS_GENERIC; + +//! See NvAPI_DISP_GetMonitorCapabilities(). +typedef struct _NV_MONITOR_CAPABILITIES_V1 +{ + NvU32 version; + NvU16 size; + NvU32 infoType; + NvU32 connectorType; //!< Out: VGA, TV, DVI, HDMI, DP + NvU8 bIsValidInfo : 1; //!< Boolean : Returns invalid if requested info is not present such as VCDB not present + union { + NV_MONITOR_CAPS_VSDB vsdb; + NV_MONITOR_CAPS_VCDB vcdb; + NV_MONITOR_CAPS_GENERIC caps; + } data; +} NV_MONITOR_CAPABILITIES_V1; + +typedef NV_MONITOR_CAPABILITIES_V1 NV_MONITOR_CAPABILITIES; + +//! Macro for constructing the version field of ::NV_MONITOR_CAPABILITIES_V1 +#define NV_MONITOR_CAPABILITIES_VER1 MAKE_NVAPI_VERSION(NV_MONITOR_CAPABILITIES_V1,1) +#define NV_MONITOR_CAPABILITIES_VER NV_MONITOR_CAPABILITIES_VER1 + +//! @} + +//! SUPPORTED OS: Windows 10 and higher +//! +//! \ingroup dispcontrol +NVAPI_INTERFACE NvAPI_DISP_GetMonitorCapabilities(__in NvU32 displayId, __inout NV_MONITOR_CAPABILITIES *pMonitorCapabilities); + +//! \ingroup dispcontrol +typedef struct _NV_MONITOR_COLOR_DATA +{ + NvU32 version; +// We are only supporting DP monitors for now. We need to extend this to HDMI panels as well + NV_DP_COLOR_FORMAT colorFormat; //!< One of the supported color formats + NV_DP_BPC backendBitDepths; //!< One of the supported bit depths +} NV_MONITOR_COLOR_CAPS_V1; + +typedef NV_MONITOR_COLOR_CAPS_V1 NV_MONITOR_COLOR_CAPS; + +//! \ingroup dispcontrol +#define NV_MONITOR_COLOR_CAPS_VER1 MAKE_NVAPI_VERSION(NV_MONITOR_COLOR_CAPS_V1,1) +#define NV_MONITOR_COLOR_CAPS_VER NV_MONITOR_COLOR_CAPS_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_DISP_GetMonitorColorCapabilities +// +//! DESCRIPTION: This API returns all the color formats and bit depth values supported by a given DP monitor. +//! +//! USAGE: Sequence of calls which caller should make to get the information. +//! 1. First call NvAPI_DISP_GetMonitorColorCapabilities() with pMonitorColorCapabilities as NULL to get the count. +//! 2. Allocate memory for color caps(NV_MONITOR_COLOR_CAPS) array. +//! 3. Call NvAPI_DISP_GetMonitorColorCapabilities() again with the pointer to the memory allocated to get all the +//! color capabilities. +//! +//! Note : +//! 1. pColorCapsCount should never be NULL, else the API will fail with NVAPI_INVALID_ARGUMENT. +//! 2. *pColorCapsCount returned from the API will always be the actual count in any/every call. +//! 3. Memory size to be allocated should be (*pColorCapsCount * sizeof(NV_MONITOR_COLOR_CAPS)). +//! 4. If the memory allocated is less than what is required to return all the timings, this API will return the +//! amount of information which can fit in user provided buffer and API will return NVAPI_INSUFFICIENT_BUFFER. +//! 5. If the caller specifies a greater value for *pColorCapsCount in second call to NvAPI_DISP_GetMonitorColorCapabilities() +//! than what was returned from first call, the API will return only the actual number of elements in the color +//! capabilities array and the extra buffer will remain unused. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] displayId Monitor Identifier +//! \param [in, out] pMonitorColorCapabilities The monitor color capabilities information +//! \param [in, out] pColorCapsCount - During input, the number of elements allocated for the pMonitorColorCapabilities pointer +//! - During output, the actual number of color data elements the monitor supports +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +//! +//! \retval NVAPI_INSUFFICIENT_BUFFER The input buffer size is not sufficient to hold the total contents. In this case +//! *pColorCapsCount will hold the required amount of elements. +//! \retval NVAPI_INVALID_DISPLAY_ID The input monitor is either not connected or is not a DP panel. +//! +//! \ingroup dispcontrol +//! +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DISP_GetMonitorColorCapabilities(__in NvU32 displayId, __inout_ecount_part_opt(*pColorCapsCount, *pColorCapsCount) NV_MONITOR_COLOR_CAPS *pMonitorColorCapabilities, __inout NvU32 *pColorCapsCount); + +//! \ingroup dispcontrol +//! Used in NvAPI_DISP_EnumCustomDisplay() and NvAPI_DISP_TryCustomDisplay(). +typedef struct +{ + NvU32 version; + + // the source mode information + NvU32 width; //!< Source surface(source mode) width + NvU32 height; //!< Source surface(source mode) height + NvU32 depth; //!< Source surface color depth."0" means all 8/16/32bpp + NV_FORMAT colorFormat; //!< Color format (optional) + + NV_VIEWPORTF srcPartition; //!< For multimon support, should be set to (0,0,1.0,1.0) for now. + + float xRatio; //!< Horizontal scaling ratio + float yRatio; //!< Vertical scaling ratio + + NV_TIMING timing; //!< Timing used to program TMDS/DAC/LVDS/HDMI/TVEncoder, etc. + NvU32 hwModeSetOnly : 1; //!< If set, it means a hardware modeset without OS update + +}NV_CUSTOM_DISPLAY; + +//! \ingroup dispcontrol +//! Used in NV_CUSTOM_DISPLAY. +#define NV_CUSTOM_DISPLAY_VER MAKE_NVAPI_VERSION(NV_CUSTOM_DISPLAY,1) + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_DISP_EnumCustomDisplay +// +//! DESCRIPTION: This API enumerates the custom timing specified by the enum index. +//! The client should keep enumerating until it returns NVAPI_END_ENUMERATION. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 313 +//! +//! \param [in] displayId Dispaly ID of the display. +//! \param [in] index Enum index +//! \param [inout] pCustDisp Pointer to the NV_CUSTOM_DISPLAY structure +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +//! \retval NVAPI_INVALID_DISPLAY_ID: Custom Timing is not supported on the Display, whose display id is passed +//! +//! \ingroup dispcontrol +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DISP_EnumCustomDisplay( __in NvU32 displayId, __in NvU32 index, __inout NV_CUSTOM_DISPLAY *pCustDisp); + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_DISP_TryCustomDisplay +// +//! DESCRIPTION: This API is used to set up a custom display without saving the configuration on multiple displays. +//! +//! \note +//! All the members of srcPartition, present in NV_CUSTOM_DISPLAY structure, should have their range in (0.0,1.0). +//! In clone mode the timings can applied to both the target monitors but only one target at a time. \n +//! For the secondary target the applied timings works under the following conditions: +//! - If the secondary monitor EDID supports the selected timing, OR +//! - If the selected custom timings can be scaled by the secondary monitor for the selected source resolution on the primary, OR +//! - If the selected custom timings matches the existing source resolution on the primary. +//! Setting up a custom display on non-active but connected monitors is supported only for Win7 and above. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 313 +//! +//! +//! \param [in] pDisplayIds Array of the target display Dispaly IDs - See \ref handles. +//! \param [in] count Total number of the incoming Display IDs and corresponding NV_CUSTOM_DISPLAY structure. This is for the multi-head support. +//! \param [in] pCustDisp Pointer to the NV_CUSTOM_DISPLAY structure array. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +//! \retval NVAPI_INVALID_DISPLAY_ID: Custom Timing is not supported on the Display, whose display id is passed +//! +//! \ingroup dispcontrol +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DISP_TryCustomDisplay( __in_ecount(count) NvU32 *pDisplayIds, __in NvU32 count, __in_ecount(count) NV_CUSTOM_DISPLAY *pCustDisp); + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_DISP_DeleteCustomDisplay +// +//! DESCRIPTION: This function deletes the custom display configuration, specified from the registry for all the displays whose display IDs are passed. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 313 +//! +//! +//! \param [in] pDisplayIds Array of Dispaly IDs on which custom display configuration is to be saved. +//! \param [in] count Total number of the incoming Dispaly IDs. This is for the multi-head support. +//! \param [in] pCustDisp Pointer to the NV_CUSTOM_DISPLAY structure +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +//! \retval NVAPI_INVALID_DISPLAY_ID: Custom Timing is not supported on the Display, whose display id is passed +//! +//! \ingroup dispcontrol +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DISP_DeleteCustomDisplay( __in_ecount(count) NvU32 *pDisplayIds, __in NvU32 count, __in NV_CUSTOM_DISPLAY *pCustDisp); + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_DISP_SaveCustomDisplay +// +//! DESCRIPTION: This function saves the current hardware display configuration on the specified Display IDs as a custom display configuration. +//! This function should be called right after NvAPI_DISP_TryCustomDisplay() to save the custom display from the current +//! hardware context. This function will not do anything if the custom display configuration is not tested on the hardware. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 313 +//! +//! +//! \param [in] pDisplayIds Array of Dispaly IDs on which custom display configuration is to be saved. +//! \param [in] count Total number of the incoming Dispaly IDs. This is for the multi-head support. +//! \param [in] isThisOutputIdOnly If set, the saved custom display will only be applied on the monitor with the same outputId (see \ref handles). +//! \param [in] isThisMonitorIdOnly If set, the saved custom display will only be applied on the monitor with the same EDID ID or +//! the same TV connector in case of analog TV. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +//! \retval NVAPI_INVALID_DISPLAY_ID: Custom Timing is not supported on the Display, whose display id is passed +//! +//! \ingroup dispcontrol +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DISP_SaveCustomDisplay( __in_ecount(count) NvU32 *pDisplayIds, __in NvU32 count, __in NvU32 isThisOutputIdOnly, __in NvU32 isThisMonitorIdOnly); + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_DISP_RevertCustomDisplayTrial +// +//! DESCRIPTION: This API is used to restore the display configuration, that was changed by calling NvAPI_DISP_TryCustomDisplay(). This function +//! must be called only after a custom display configuration is tested on the hardware, using NvAPI_DISP_TryCustomDisplay(), +//! otherwise no action is taken. On Vista, NvAPI_DISP_RevertCustomDisplayTrial should be called with an active display that +//! was affected during the NvAPI_DISP_TryCustomDisplay() call, per GPU. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 313 +//! +//! +//! \param [in] pDisplayIds Pointer to display Id, of an active display. +//! \param [in] count Total number of incoming Display IDs. For future use only. Currently it is expected to be passed as 1. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +//! +//! \ingroup dispcontrol +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DISP_RevertCustomDisplayTrial( __in_ecount(count) NvU32* pDisplayIds, __in NvU32 count); + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_GetView +// +//! This API lets caller retrieve the target display arrangement for selected source display handle. +//! \note Display PATH with this API is limited to single GPU. DUALVIEW across GPUs will be returned as STANDARD VIEW. +//! Use NvAPI_SYS_GetDisplayTopologies() to query views across GPUs. +//! +//! \deprecated Do not use this function - it is deprecated in release 290. Instead, use NvAPI_DISP_GetDisplayConfig. +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 85 +//! +//! \param [in] hNvDisplay NVIDIA Display selection. It can be #NVAPI_DEFAULT_HANDLE or a handle enumerated from +//! NvAPI_EnumNVidiaDisplayHandle(). +//! \param [out] pTargets User allocated storage to retrieve an array of NV_VIEW_TARGET_INFO. Can be NULL to retrieve +//! the targetCount. +//! \param [in,out] targetMaskCount Count of target device mask specified in pTargetMask. +//! \param [out] targetView Target view selected from NV_TARGET_VIEW_MODE. +//! +//! \retval NVAPI_OK Completed request +//! \retval NVAPI_ERROR Miscellaneous error occurred +//! \retval NVAPI_INVALID_ARGUMENT Invalid input parameter. +//! \ingroup dispcontrol +/////////////////////////////////////////////////////////////////////////////// +__nvapi_deprecated_function("Do not use this function - it is deprecated in release 290. Instead, use NvAPI_DISP_GetDisplayConfig.") +NVAPI_INTERFACE NvAPI_GetView(NvDisplayHandle hNvDisplay, NV_VIEW_TARGET_INFO *pTargets, NvU32 *pTargetMaskCount, NV_TARGET_VIEW_MODE *pTargetView); + + + + + + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_GetViewEx +// +//! DESCRIPTION: This API lets caller retrieve the target display arrangement for selected source display handle. +//! \note Display PATH with this API is limited to single GPU. DUALVIEW across GPUs will be returned as STANDARD VIEW. +//! Use NvAPI_SYS_GetDisplayTopologies() to query views across GPUs. +//! +//! \deprecated Do not use this function - it is deprecated in release 290. Instead, use NvAPI_DISP_GetDisplayConfig. +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 165 +//! +//! \param [in] hNvDisplay NVIDIA Display selection. #NVAPI_DEFAULT_HANDLE is not allowed, it has to be a handle enumerated with +//! NvAPI_EnumNVidiaDisplayHandle(). +//! \param [in,out] pPathInfo Count field should be set to NVAPI_MAX_DISPLAY_PATH. Can be NULL to retrieve just the pathCount. +//! \param [in,out] pPathCount Number of elements in array pPathInfo->path. +//! \param [out] pTargetViewMode Display view selected from NV_TARGET_VIEW_MODE. +//! +//! \retval NVAPI_OK Completed request +//! \retval NVAPI_API_NOT_INTIALIZED NVAPI not initialized +//! \retval NVAPI_ERROR Miscellaneous error occurred +//! \retval NVAPI_INVALID_ARGUMENT Invalid input parameter. +//! \retval NVAPI_EXPECTED_DISPLAY_HANDLE hNvDisplay is not a valid display handle. +//! +//! \ingroup dispcontrol +/////////////////////////////////////////////////////////////////////////////// +__nvapi_deprecated_function("Do not use this function - it is deprecated in release 290. Instead, use NvAPI_DISP_GetDisplayConfig.") +NVAPI_INTERFACE NvAPI_GetViewEx(NvDisplayHandle hNvDisplay, NV_DISPLAY_PATH_INFO *pPathInfo, NvU32 *pPathCount, NV_TARGET_VIEW_MODE *pTargetViewMode); + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_GetSupportedViews +// +//! This API lets caller enumerate all the supported NVIDIA display views - nView and Dualview modes. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 85 +//! +//! \param [in] hNvDisplay NVIDIA Display selection. It can be #NVAPI_DEFAULT_HANDLE or a handle enumerated from +//! NvAPI_EnumNVidiaDisplayHandle(). +//! \param [out] pTargetViews Array of supported views. Can be NULL to retrieve the pViewCount first. +//! \param [in,out] pViewCount Count of supported views. +//! +//! \retval NVAPI_OK Completed request +//! \retval NVAPI_ERROR Miscellaneous error occurred +//! \retval NVAPI_INVALID_ARGUMENT Invalid input parameter. +//! \ingroup dispcontrol +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GetSupportedViews(NvDisplayHandle hNvDisplay, NV_TARGET_VIEW_MODE *pTargetViews, NvU32 *pViewCount); + + +//! SUPPORTED OS: Windows 10 and higher +//! +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DISP_GetDisplayIdByDisplayName +// +//! DESCRIPTION: This API retrieves the Display Id of a given display by +//! display name. The display must be active to retrieve the +//! displayId. In the case of clone mode or Surround gaming, +//! the primary or top-left display will be returned. +//! +//! \param [in] displayName Name of display (Eg: "\\DISPLAY1" to +//! retrieve the displayId for. +//! \param [out] displayId Display ID of the requested display. +//! +//! retval ::NVAPI_OK: Capabilties have been returned. +//! retval ::NVAPI_INVALID_ARGUMENT: One or more args passed in are invalid. +//! retval ::NVAPI_API_NOT_INTIALIZED: The NvAPI API needs to be initialized first +//! retval ::NVAPI_NO_IMPLEMENTATION: This entrypoint not available +//! retval ::NVAPI_ERROR: Miscellaneous error occurred +//! +//! \ingroup dispcontrol +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DISP_GetDisplayIdByDisplayName(const char *displayName, NvU32* displayId); + + + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_DISP_GetDisplayConfig +// +//! DESCRIPTION: This API lets caller retrieve the current global display +//! configuration. +//! USAGE: The caller might have to call this three times to fetch all the required configuration details as follows: +//! First Pass: Caller should Call NvAPI_DISP_GetDisplayConfig() with pathInfo set to NULL to fetch pathInfoCount. +//! Second Pass: Allocate memory for pathInfo with respect to the number of pathInfoCount(from First Pass) to fetch +//! targetInfoCount. If sourceModeInfo is needed allocate memory or it can be initialized to NULL. +//! Third Pass(Optional, only required if target information is required): Allocate memory for targetInfo with respect +//! to number of targetInfoCount(from Second Pass). +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in,out] pathInfoCount Number of elements in pathInfo array, returns number of valid topologies, this cannot be null. +//! \param [in,out] pathInfo Array of path information +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +//! +//! \retval NVAPI_INVALID_ARGUMENT - Invalid input parameter. Following can be the reason for this return value: +//! -# pathInfoCount is NULL. +//! -# *pathInfoCount is 0 and pathInfo is not NULL. +//! -# *pathInfoCount is not 0 and pathInfo is NULL. +//! \retval NVAPI_DEVICE_BUSY - ModeSet has not yet completed. Please wait and call it again. +//! +//! \ingroup dispcontrol +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DISP_GetDisplayConfig(__inout NvU32 *pathInfoCount, __out_ecount_full_opt(*pathInfoCount) NV_DISPLAYCONFIG_PATH_INFO *pathInfo); + + + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_DISP_SetDisplayConfig +// +// +//! DESCRIPTION: This API lets caller apply a global display configuration +//! across multiple GPUs. +//! +//! If all sourceIds are zero, then NvAPI will pick up sourceId's based on the following criteria : +//! - If user provides sourceModeInfo then we are trying to assign 0th sourceId always to GDIPrimary. +//! This is needed since active windows always moves along with 0th sourceId. +//! - For rest of the paths, we are incrementally assigning the sourceId per adapter basis. +//! - If user doesn't provide sourceModeInfo then NVAPI just picks up some default sourceId's in incremental order. +//! Note : NVAPI will not intelligently choose the sourceIDs for any configs that does not need a modeset. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] pathInfoCount Number of supplied elements in pathInfo +//! \param [in] pathInfo Array of path information +//! \param [in] flags A bitwise OR of supported flags from NV_DISPLAYCONFIG_FLAGS. +//! +//! \retval ::NVAPI_OK - completed request +//! \retval ::NVAPI_API_NOT_INTIALIZED - NVAPI not initialized +//! \retval ::NVAPI_ERROR - miscellaneous error occurred +//! \retval ::NVAPI_INVALID_ARGUMENT - Invalid input parameter. +//! +//! \ingroup dispcontrol +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DISP_SetDisplayConfig(__in NvU32 pathInfoCount, __in_ecount(pathInfoCount) NV_DISPLAYCONFIG_PATH_INFO* pathInfo, __in NvU32 flags); + + + +//! \ingroup dispcontrol +//! @{ +typedef struct _NV_EDID_DATA_V1 +{ + NvU32 version; //!< Structure version. + NvU8 *pEDID; //!< Pointer to EDID data. + NvU32 sizeOfEDID; //!< Size of EDID data. +} NV_EDID_DATA_V1; + +typedef struct _NV_EDID_DATA_V2 +{ + NvU32 version; //!< Structure version. + NvU8 *pEDID; //!< Pointer to EDID data. + NvU32 sizeOfEDID; //!< Size of EDID data. + NvU32 reserved[8]; //!< Reserved for future use. +} NV_EDID_DATA_V2; + +typedef NV_EDID_DATA_V2 NV_EDID_DATA; +#define NV_EDID_DATA_VER1 MAKE_NVAPI_VERSION(NV_EDID_DATA_V1, 1) +#define NV_EDID_DATA_VER2 MAKE_NVAPI_VERSION(NV_EDID_DATA_V2, 2) + +#define NV_EDID_DATA_VER NV_EDID_DATA_VER2 +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DISP_GetEdidData +// +//! \code +//! DESCRIPTION: This API returns the EDID data for the specified display Id. +//! (NvAPI_GPU_GetConnectedDisplayIds() can be used to get the DisplayIds). +//! +//! USAGE: The caller has to call this API two times to fetch the required details as follows: +//! First Pass : Caller should call NvAPI_DISP_GetEdidData() with pEdidParams->pEDID set to NULL, +//! to get the size of the EDID buffer in pEdidParams->sizeOfEDID. +//! Second Pass: Allocate memory for the EDID buffer of the size - pEdidParams->sizeOfEDID, +//! and call the API again to get the EDID buffer populated. +//! +//! +//! \param [in] displayId - NVIDIA Display ID +//! \param [inout] pEdidParams - Pointer to the structure that contains - pointer to EDID buffer and its size +//! \param [inout] pFlag - The type of EDID to be retrieved (IN). +//! To only retrieve the EDID type, the user should send pEdidParams->pEDID as NULL and +//! pEdidParams->sizeOfEDID as 0. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 400 +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! NVAPI_INSUFFICIENT_BUFFER: Reallocate buffer with pEdidParams->sizeOfEDID and call again to get complete data. +//! In this case pEdidParams->pEDID contains undefined data. +//! This error occurs only when pEdidParams->pEDID is present. +//! \endcode +//! \ingroup dispcontrol +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DISP_GetEdidData(__in NvU32 displayId, __inout NV_EDID_DATA *pEdidParams, __inout NV_EDID_FLAG *pFlag); +//! @} + +typedef struct _NV_GET_ADAPTIVE_SYNC_DATA_V1 +{ + NvU32 version ; //!< [in] structure version + NvU32 maxFrameInterval; //!< [out] maximum frame interval in micro seconds as set previously using NvAPI_DISP_SetAdaptiveSyncData function. + //!< If default values from EDID are used, this parameter returns 0. + NvU32 bDisableAdaptiveSync : 1; //!< [out] Indicates if adaptive sync is disabled on the display. + NvU32 bDisableFrameSplitting : 1; //!< [out] Indicates if frame splitting is disabled on the display. + NvU32 reserved : 30; //!< reserved for future use. + NvU32 lastFlipRefreshCount; //!< [out] Number of times the last flip was shown on the screen + NvU64 lastFlipTimeStamp; //!< [out] Timestamp for the lastest flip on the screen + NvU32 reservedEx[4]; //!< reserved for future use. +} NV_GET_ADAPTIVE_SYNC_DATA_V1; + +#define NV_GET_ADAPTIVE_SYNC_DATA_VER1 MAKE_NVAPI_VERSION(NV_GET_ADAPTIVE_SYNC_DATA_V1,1) +#define NV_GET_ADAPTIVE_SYNC_DATA_VER NV_GET_ADAPTIVE_SYNC_DATA_VER1 + +typedef NV_GET_ADAPTIVE_SYNC_DATA_V1 NV_GET_ADAPTIVE_SYNC_DATA; + +/////////////////////////////////////////////////////////////////////////////// +// +//! \fn NvAPI_DISP_GetAdaptiveSyncData(__in NvU32 displayId, __inout NV_GET_ADAPTIVE_SYNC_DATA *pAdaptiveSyncData) +//! \code +//! DESCRIPTION: This function is used to get data for the Adaptive Sync Display. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! \since Release: 415 +//! +//! \param [in] displayId - display id of the display +//! \param [inout] pAdaptiveSyncData - A pointer to NV_GET_ADAPTIVE_SYNC_DATA, containing the information about the values of parameters that are to be retrieved on given display. +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. If there are return error codes with specific +//! meaning for this API, they are listed below. +//! +//! \endcode +/////////////////////////////////////////////////////////////////////////////// +//! \ingroup dispcontrol +NVAPI_INTERFACE NvAPI_DISP_GetAdaptiveSyncData(__in NvU32 displayId, __inout NV_GET_ADAPTIVE_SYNC_DATA *pAdaptiveSyncData); + +typedef struct _NV_SET_ADAPTIVE_SYNC_DATA_V1 +{ + NvU32 version ; //!< [in] structure version + NvU32 maxFrameInterval; //!< [in] maximum frame interval in micro seconds. + //!< If maxFrameInterval is sent as 0, default values from EDID will be used. + NvU32 bDisableAdaptiveSync : 1; //!< [in] Indicates if adaptive sync is disabled on the display. + NvU32 bDisableFrameSplitting : 1; //!< [in] Indicates if Frame Splitting should be disabled. + NvU32 reserved : 30; //!< reserved for future use. + NvU32 reserved1; //!< Ensure the alignment is 8 bytes and reserve it for future use. + NvU64 maxFrameIntervalNs; //!< [in] maximum frame interval in nano seconds. + //!< If maxFrameIntervalNs is sent as 0, default values from EDID will be used. + NvU32 reservedEx[4]; //!< reserved for future use. +} NV_SET_ADAPTIVE_SYNC_DATA_V1; + +#define NV_SET_ADAPTIVE_SYNC_DATA_VER1 MAKE_NVAPI_VERSION(NV_SET_ADAPTIVE_SYNC_DATA_V1,1) +#define NV_SET_ADAPTIVE_SYNC_DATA_VER NV_SET_ADAPTIVE_SYNC_DATA_VER1 + +typedef NV_SET_ADAPTIVE_SYNC_DATA_V1 NV_SET_ADAPTIVE_SYNC_DATA; + +/////////////////////////////////////////////////////////////////////////////// +// +//! \fn NvAPI_DISP_SetAdaptiveSyncData(__in NvU32 displayId, __in NV_SET_ADAPTIVE_SYNC_DATA *pAdaptiveSyncData) +//! \code +//! DESCRIPTION: This function is used to set data for Adaptive Sync Display. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! \since Release: 415 +//! +//! \param [in] displayId - display id of the display +//! \param [in] pAdaptiveSyncData - A pointer to NV_SET_ADAPTIVE_SYNC_DATA, containing the information about the values of parameters that are to be set on given display. +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. If there are return error codes with specific +//! meaning for this API, they are listed below. +//! +//! \endcode +/////////////////////////////////////////////////////////////////////////////// +//! \ingroup dispcontrol +NVAPI_INTERFACE NvAPI_DISP_SetAdaptiveSyncData(__in NvU32 displayId, __in NV_SET_ADAPTIVE_SYNC_DATA *pAdaptiveSyncData); + + +typedef struct _NV_GET_VIRTUAL_REFRESH_RATE_DATA_V1 +{ + NvU32 version; //!< [in] structure version + NvU32 frameIntervalUs; //!< [out] frame interval in micro seconds if Virtual RR is currently applied + NvU32 rrx1k; //!< [out] Refresh Rate * 1000 + NvU32 bIsGamingVrr; //!< [out] If the content is Gaming + NvU32 reservedEx[6]; //!< reserved for future use. +} NV_GET_VIRTUAL_REFRESH_RATE_DATA_V1; + +typedef struct _NV_GET_VIRTUAL_REFRESH_RATE_DATA_V2 +{ + NvU32 version; //!< [in] structure version + NvU32 frameIntervalUs; //!< [out] frame interval in micro seconds if Virtual RR is currently applied. This variable is deprecated in V2 and should not be used. + NvU32 rrx1k; //!< [out] Refresh Rate * 1000 + NvU32 bIsGamingVrr; //!< [out] If the content is Gaming + NvU64 frameIntervalNs; //!< [out] frame interval in nano seconds if Virtual RR is currently applied + NvU32 reservedEx[4]; //!< reserved for future use. +} NV_GET_VIRTUAL_REFRESH_RATE_DATA_V2; + +#define NV_GET_VIRTUAL_REFRESH_RATE_DATA_VER1 MAKE_NVAPI_VERSION(_NV_GET_VIRTUAL_REFRESH_RATE_DATA_V1,1) +#define NV_GET_VIRTUAL_REFRESH_RATE_DATA_VER2 MAKE_NVAPI_VERSION(_NV_GET_VIRTUAL_REFRESH_RATE_DATA_V2,2) +#define NV_GET_VIRTUAL_REFRESH_RATE_DATA_VER NV_GET_VIRTUAL_REFRESH_RATE_DATA_VER2 + +typedef NV_GET_VIRTUAL_REFRESH_RATE_DATA_V2 NV_GET_VIRTUAL_REFRESH_RATE_DATA; + +/////////////////////////////////////////////////////////////////////////////// +// +//! \fn NvAPI_DISP_GetVirtualRefreshRateData(__in NvU32 displayId, __inout NV_GET_VIRTUAL_REFRESH_RATE_DATA *pVirtualRefreshRateData) +//! \code +//! DESCRIPTION: This function is used to get Virtual Refresh Rate data for a VRR Capable Display. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! \since Release: 510 +//! +//! \param [in] displayId - display id of the display +//! \param [inout] pVirtualRefreshRateData - A pointer to NV_GET_VIRTUAL_REFRESH_RATE_DATA, containing the information about the values of parameters that are to be retrieved on given display. +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. If there are return error codes with specific +//! meaning for this API, they are listed below. +//! +//! \endcode +/////////////////////////////////////////////////////////////////////////////// +//! \ingroup dispcontrol +NVAPI_INTERFACE NvAPI_DISP_GetVirtualRefreshRateData(__in NvU32 displayId, __inout NV_GET_VIRTUAL_REFRESH_RATE_DATA *pVirtualRefreshRateData); + +typedef struct _NV_SET_VIRTUAL_REFRESH_RATE_DATA_V1 +{ + NvU32 version; //!< [in] structure version + NvU32 frameIntervalUs; //!< [in] frame interval in micro seconds if Virtual RR is currently applied + NvU32 rrx1k; //!< [in] Refresh Rate * 1000 + NvU32 bIsGamingVrr; //!< [in] If the content is Gaming + NvU32 reservedEx[6]; //!< reserved for future use. +} NV_SET_VIRTUAL_REFRESH_RATE_DATA_V1; + +typedef struct _NV_SET_VIRTUAL_REFRESH_RATE_DATA_V2 +{ + NvU32 version; //!< [in] structure version + NvU32 frameIntervalUs; //!< [in] frame interval in micro seconds if Virtual RR is currently applied. This variable is deprecated in V2 and should not be used. + NvU32 rrx1k; //!< [in] Refresh Rate * 1000 + NvU32 bIsGamingVrr; //!< [in] If the content is Gaming + NvU64 frameIntervalNs; //!< [in] frame interval in nano seconds if Virtual RR is currently applied + NvU32 reservedEx[4]; //!< reserved for future use. +} NV_SET_VIRTUAL_REFRESH_RATE_DATA_V2; + +#define NV_SET_VIRTUAL_REFRESH_RATE_DATA_VER1 MAKE_NVAPI_VERSION(_NV_SET_VIRTUAL_REFRESH_RATE_DATA_V1,1) +#define NV_SET_VIRTUAL_REFRESH_RATE_DATA_VER2 MAKE_NVAPI_VERSION(_NV_SET_VIRTUAL_REFRESH_RATE_DATA_V2,2) +#define NV_SET_VIRTUAL_REFRESH_RATE_DATA_VER NV_SET_VIRTUAL_REFRESH_RATE_DATA_VER2 + +typedef NV_SET_VIRTUAL_REFRESH_RATE_DATA_V2 NV_SET_VIRTUAL_REFRESH_RATE_DATA; + +/////////////////////////////////////////////////////////////////////////////// +// +//! \fn NvAPI_DISP_SetVirtualRefreshRateData(__in NvU32 displayId, __in NV_SET_VIRTUAL_REFRESH_RATE_DATA *pVirtualRefreshRateData) +//! \code +//! DESCRIPTION: This function is used to set Virtual Refresh Rate data for a VRR Capable Display. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! \since Release: 510 +//! +//! \param [in] displayId - display id of the display +//! \param [inout] pVirtualRefreshRateData - A pointer to NV_SET_VIRTUAL_REFRESH_RATE_DATA, containing the information about the values of parameters that are to be retrieved on given display. +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. If there are return error codes with specific +//! meaning for this API, they are listed below. +//! +//! \endcode +/////////////////////////////////////////////////////////////////////////////// +//! \ingroup dispcontrol +NVAPI_INTERFACE NvAPI_DISP_SetVirtualRefreshRateData(__in NvU32 displayId, __in NV_SET_VIRTUAL_REFRESH_RATE_DATA *pVirtualRefreshRateData); + +//! \ingroup dispcontrol +typedef struct +{ + NvU32 version; //!< [in] Structure version + NvU32 displayId; //!< [in] Monitor Identifier to be set + NvU32 reserved; //!< Reserved for future use without adding versioning +} NV_SET_PREFERRED_STEREO_DISPLAY_V1; + +//! \ingroup dispcontrol +typedef NV_SET_PREFERRED_STEREO_DISPLAY_V1 NV_SET_PREFERRED_STEREO_DISPLAY; + +//! \ingroup dispcontrol +#define NV_SET_PREFERRED_STEREO_DISPLAY_VER1 MAKE_NVAPI_VERSION(NV_SET_PREFERRED_STEREO_DISPLAY_V1,1) + +//! \ingroup dispcontrol +#define NV_SET_PREFERRED_STEREO_DISPLAY_VER NV_SET_PREFERRED_STEREO_DISPLAY_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_DISP_SetPreferredStereoDisplay +// +//! DESCRIPTION: Specifies a display output that drives the 3pin DIN output signal +//! in a workstation stereo system environment. +//! If display output is specified as a displayId of 0, the preferred +//! stereo display target is reset to the driver default selection. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! \since Release: 470 +//! +//! \param [in] pPreferredStereoDisplay Pointer to a NV_SET_PREFERRED_STEREO_DISPLAY structure +//! +//! \retval ::NVAPI_OK completed request +//! \retval ::NVAPI_ERROR miscellaneous error occurred +//! \retval ::NVAPI_INVALID_ARGUMENT Invalid input parameter. +//! \retval ::NVAPI_INVALID_USER_PRIVILEGE - The application will require Administrator privileges to access this API. +//! The application can be elevated to a higher permission level by selecting "Run as Administrator". +//! \ingroup dispcontrol +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DISP_SetPreferredStereoDisplay(__in NV_SET_PREFERRED_STEREO_DISPLAY *pPreferredStereoDisplay); + +//! \ingroup dispcontrol +typedef struct +{ + NvU32 version; //!< [in] Structure version + NvU32 displayId; //!< [out] The queried stereo display + NvU32 reserved; //!< Reserved for future use without adding versioning +} NV_GET_PREFERRED_STEREO_DISPLAY_V1; + +//! \ingroup dispcontrol +typedef NV_GET_PREFERRED_STEREO_DISPLAY_V1 NV_GET_PREFERRED_STEREO_DISPLAY; + +//! \ingroup dispcontrol +#define NV_GET_PREFERRED_STEREO_DISPLAY_VER1 MAKE_NVAPI_VERSION(NV_GET_PREFERRED_STEREO_DISPLAY_V1,1) + +//! \ingroup dispcontrol +#define NV_GET_PREFERRED_STEREO_DISPLAY_VER NV_GET_PREFERRED_STEREO_DISPLAY_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_DISP_GetPreferredStereoDisplay +// +//! DESCRIPTION: Queries the displayId of the display output driving the 3pin +//! DIN stereo signal, if any. +//! SUPPORTED OS: Windows 10 and higher +//! +//! \since Release: 470 +//! +//! \param [inout] pPreferredStereoDisplay Pointer to a NV_GET_PREFERRED_STEREO_DISPLAY structure +//! +//! \retval ::NVAPI_OK completed request +//! \retval ::NVAPI_ERROR miscellaneous error occurred +//! \retval ::NVAPI_INVALID_ARGUMENT Invalid input parameter. +//! \retval ::NVAPI_INVALID_POINTER An invalid pointer was passed as an argument (probably NULL). +//! \ingroup dispcontrol +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DISP_GetPreferredStereoDisplay(__inout NV_GET_PREFERRED_STEREO_DISPLAY *pPreferredStereoDisplay); + +//! SUPPORTED OS: Windows 10 and higher +//! +#if defined(__cplusplus) + +typedef struct _NV_MANAGED_DEDICATED_DISPLAY_INFO +{ + NvU32 version; //!< [in] Version of this structure. + NvU32 displayId; //!< [out] DisplayId. + NvU32 isAcquired : 1; //!< [out] If bit is set, this display has been acquired by another process through NvAPI_DISP_AcquireDedicatedDisplay(). + NvU32 isMosaic : 1; //!< [out] If bit is set, this display represents a Mosaic grid. + NvU32 reserved : 30; //!< [out] Reserved for future use without adding versioning. +} NV_MANAGED_DEDICATED_DISPLAY_INFO_V1; + +#define NV_MANAGED_DEDICATED_DISPLAY_INFO_VER1 MAKE_NVAPI_VERSION(NV_MANAGED_DEDICATED_DISPLAY_INFO_V1,1) +#define NV_MANAGED_DEDICATED_DISPLAY_INFO_VER NV_MANAGED_DEDICATED_DISPLAY_INFO_VER1 + +typedef NV_MANAGED_DEDICATED_DISPLAY_INFO_V1 NV_MANAGED_DEDICATED_DISPLAY_INFO; + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DISP_GetNvManagedDedicatedDisplays +// +//! DESCRIPTION: This API returns the count of dedicated displays managed by +//! the Nvidia driver, and specific driver info of each display. +//! This API does not return VR displays. +//! +//! HOW TO USE: 1) First make a call NvAPI_DISP_GetNvManagedDedicatedDisplays to get the number of dedicated displays managed by the Nvidia driver +//! by passing pDedicatedDisplays as NULL. +//! On call success: +//! 2) Allocate memory based on pDisplayCount, then call NvAPI_DISP_GetNvManagedDedicatedDisplays to populate pDedicatedDisplays. +//! +//! \since Release: 495 +//! +//! \param [in/out] pDedicatedDisplayCount The count of dedicated displays managed by the Nvidia driver. +//! \param [in/out] pDedicatedDisplays Pointer to an array of NV_MANAGED_DEDICATED_DISPLAY_INFO data structure. +//! +//! \retval ::NVAPI_OK The call succeeded. +//! \retval ::NVAPI_ERROR The call failed. +//! \retval ::NVAPI_NO_IMPLEMENTATION The API is not implemented in current driver. +//! \retval ::NVAPI_OUT_OF_MEMORY Could not allocate sufficient memory to complete the call. +//! \retval ::NVAPI_INVALID_POINTER An invalid pointer was passed as an argument. +//! \retval ::NVAPI_API_NOT_INITIALIZED NvAPI was not initialized. +//! \retval ::NVAPI_INCOMPATIBLE_STRUCT_VERSION The version of NV_MANAGED_DEDICATED_DISPLAY_INFO structure is invalid. +//! +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DISP_GetNvManagedDedicatedDisplays(__inout NvU32* pDedicatedDisplayCount, __inout NV_MANAGED_DEDICATED_DISPLAY_INFO* pDedicatedDisplays); +#endif // defined(__cplusplus) + +//! SUPPORTED OS: Windows 10 and higher +//! +#if defined(__cplusplus) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DISP_AcquireDedicatedDisplay +// +//! DESCRIPTION: This API requests exclusive access to the specified dedicated +//! display managed by the Nvidia driver. If the call succeeds, a handle +//! is returned which could be opened as a DisplaySource. +//! The call fails if the display is not managed by the Nvidia driver. +//! The call fails if the display has already been acquired by another process. +//! +//! \since Release: 495 +//! +//! \param [in] displayId Display ID of the dedicated display to be acquired. +//! \param [in/out] pDisplaySourceHandle Pointer to memory that receives the shared handle to a DisplaySource. +//! +//! \retval ::NVAPI_OK The call succeeded. +//! \retval ::NVAPI_ERROR The call failed. +//! \retval ::NVAPI_NO_IMPLEMENTATION The API not implemented. +//! \retval ::NVAPI_INVALID_POINTER An invalid pointer was passed as an argument. +//! \retval ::NVAPI_API_NOT_INITIALIZED NvAPI not initialized. +//! \retval ::NVAPI_INVALID_DISPLAY_ID An invalid displayId was passed as an argument. +//! \retval ::NVAPI_UNREGISTERED_RESOURCE The display is not managed by the Nvidia driver. +//! \retval ::NVAPI_RESOURCE_IN_USE The display has already been acquired by another process. +//! +//! \ingroup dispcontrol +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DISP_AcquireDedicatedDisplay(__in NvU32 displayId, __inout NvU64* pDisplaySourceHandle); +#endif // defined(__cplusplus) + +//! SUPPORTED OS: Windows 10 and higher +//! +#if defined(__cplusplus) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DISP_ReleaseDedicatedDisplay +// +//! DESCRIPTION: This API releases the exclusive ownership of a specific dedicated +//! display previously acquired by the process. +//! +//! \since Release: 495 +//! +//! \param [in] displayId Display ID of the dedicated display to be released. +//! +//! \retval ::NVAPI_OK The call succeeded. +//! \retval ::NVAPI_ERROR The call failed. +//! \retval ::NVAPI_NO_IMPLEMENTATION The API not implemented. +//! \retval ::NVAPI_API_NOT_INITIALIZED NvAPI not initialized. +//! \retval ::NVAPI_UNREGISTERED_RESOURCE The display is not managed by the Nvidia driver. +//! \retval ::NVAPI_DDISPLAY_NOT_ACQUIRED The display is not acquired by the process. +//! +//! \ingroup dispcontrol +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DISP_ReleaseDedicatedDisplay(__in NvU32 displayId); +#endif // defined(__cplusplus) + +//! SUPPORTED OS: Windows 11 and higher +//! +#if defined(__cplusplus) + +typedef struct _NV_MANAGED_DEDICATED_DISPLAY_METADATA +{ + NvU32 version; //!< [in] Version of this structure. + NvU32 displayId; //!< [in] DisplayId to identify the display connector the metadata operation is requested for. + NvU32 bSetPosition : 1; //!< [in] Set call: 1 in case the information in variables/fields "positionX" and "positionY" should be stored as metadata. 0 otherwise. + NvU32 bRemovePosition : 1; //!< [in] Set call: 1 in case the stored positionX and positionY metadata should be set to 'not defined', N/A. 0 otherwise. + NvU32 bPositionIsAvailable : 1; //!< [out] Query call: 1 in case the information in variables/fields "positionX" and "positionY" is valid (has been set before). 0 otherwise. + NvU32 bSetName : 1; //!< [in] Set call: 1 in case the information in variable/field "name" should be stored as metadata. 0 otherwise. + NvU32 bRemoveName : 1; //!< [in] Set call: 1 in case the stored name metadata should be set to 'not defined',N/A. 0 otherwise. + NvU32 bNameIsAvailable : 1; //!< [out] Query call: 1 in case the information in variable/field "name" is valid (has been set before). 0 otherwise. + NvU32 reserved : 26; //!< [in][out] Reserved for future use without adding versioning. + NvS32 positionX; //!< [in][out] Metadata for the virtual horizontal position for the display connector specified by displayId. + NvS32 positionY; //!< [in][out] Metadata for the virtual vertical position for the display connector specified by displayId. + NvAPI_ShortString name; //!< [in][out] Metadata for the virtual name of for the display connector specified by displayId. + //!< Valid characters are in the range of 32 ' ' (space) to 126 '~' (both included). +} NV_MANAGED_DEDICATED_DISPLAY_METADATA_V1; + +#define NV_MANAGED_DEDICATED_DISPLAY_METADATA_VER1 MAKE_NVAPI_VERSION(NV_MANAGED_DEDICATED_DISPLAY_METADATA_V1,1) +#define NV_MANAGED_DEDICATED_DISPLAY_METADATA_VER NV_MANAGED_DEDICATED_DISPLAY_METADATA_VER1 + +typedef NV_MANAGED_DEDICATED_DISPLAY_METADATA_V1 NV_MANAGED_DEDICATED_DISPLAY_METADATA; + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DISP_GetNvManagedDedicatedDisplayMetadata +// +//! DESCRIPTION: This API returns metadata which has been set for the display connector in question. +//! Main use case would be to query the data for an Nvidia managed dedicated display. +//! The function will work for any valid displayId though. +//! +//! \since Release: 550 +//! +//! \param [in/out] pDedicatedDisplayMetadata Data structure containing input and output data. +//! +//! \retval ::NVAPI_OK The call succeeded. +//! \retval ::NVAPI_ERROR The call failed. +//! \retval ::NVAPI_NO_IMPLEMENTATION The API is not implemented in current driver. +//! \retval ::NVAPI_NOT_SUPPORTED The API is not supported on the current operating system or gpu. +//! \retval ::NVAPI_OUT_OF_MEMORY There wasn't sufficient memory to complete the call. +//! \retval ::NVAPI_INVALID_POINTER An invalid pointer was passed as an argument. +//! \retval ::NVAPI_API_NOT_INITIALIZED NvAPI was not initialized. +//! \retval ::NVAPI_INCOMPATIBLE_STRUCT_VERSION The version of the NV_MANAGED_DEDICATED_DISPLAY_METADATA structure is invalid. +//! +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DISP_GetNvManagedDedicatedDisplayMetadata(__inout NV_MANAGED_DEDICATED_DISPLAY_METADATA* pDedicatedDisplayMetadata); +#endif // defined(__cplusplus) + +//! SUPPORTED OS: Windows 11 and higher +//! +#if defined(__cplusplus) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DISP_SetNvManagedDedicatedDisplayMetadata +// +//! DESCRIPTION: This API allows to set metadata for the display connector in question. +//! Main use case would be to set the data for an Nvidia managed dedicated display. +//! The function will work for any valid displayId though. +//! +//! \since Release: 550 +//! +//! \param [in/out] pDedicatedDisplayMetadata Data structure containing input and output data. +//! +//! \retval ::NVAPI_OK The call succeeded. +//! \retval ::NVAPI_ERROR The call failed. +//! \retval ::NVAPI_NO_IMPLEMENTATION The API is not implemented in current driver. +//! \retval ::NVAPI_NOT_SUPPORTED The API is not supported on the current operating system or gpu. +//! \retval ::NVAPI_OUT_OF_MEMORY There wasn't sufficient memory to complete the call. +//! \retval ::NVAPI_INVALID_POINTER An invalid pointer was passed as an argument. +//! \retval ::NVAPI_API_NOT_INITIALIZED NvAPI was not initialized. +//! \retval ::NVAPI_INCOMPATIBLE_STRUCT_VERSION The version of the NV_MANAGED_DEDICATED_DISPLAY_METADATA structure is invalid. +//! \retval ::NVAPI_INVALID_USER_PRIVILEGE The caller doesn't have the required administrator privileges to access this API. +//! \retval ::NVAPI_INVALID_ARGUMENT Characters in pDedicatedDisplayMetadata->name are out of the allowed range. +//! +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DISP_SetNvManagedDedicatedDisplayMetadata(__inout NV_MANAGED_DEDICATED_DISPLAY_METADATA* pDedicatedDisplayMetadata); +#endif // defined(__cplusplus) + + +#if defined (_WINNT_) + +//! \ingroup dispcontrol + +typedef struct _NV_DISPLAY_ID_INFO_DATA_V1 +{ + NvU32 version; //!< [in] Structure version + LUID adapterId; //!< [out] Locally unique ID (LUID) of the display adapter on which the given display is present. + NvU32 targetId; //!< [out] The target identifier of the given display. This is also called AdapterRelativeId. + NvU32 reserved[4]; //!< Reserved for future use. +} NV_DISPLAY_ID_INFO_DATA_V1; + +#define NV_DISPLAY_ID_INFO_DATA_VER1 MAKE_NVAPI_VERSION(NV_DISPLAY_ID_INFO_DATA_V1,1) +#define NV_DISPLAY_ID_INFO_DATA_VER NV_DISPLAY_ID_INFO_DATA_VER1 + +typedef NV_DISPLAY_ID_INFO_DATA_V1 NV_DISPLAY_ID_INFO_DATA; + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Disp_GetDisplayIdInfo +// +//! DESCRIPTION: This API returns information related to the given displayId. +//! It returns adapterId and targetId (AdapterRelativeId) corresponding to the given displayId. +//! If the displayId is part of a display grid (Mosaic/Surround), then every displayId that is part of the same display grid +//! outputs the same (adapterId, targetId) pair, and no other displayId outputs this pair. +//! Otherwise, the (adapterId, targetId) pair is unique to this displayId. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 530 +//! +//! \param [in] displayId DisplayId of the display. +//! \param [inout] pDisplayIdInfoData Pointer to the NV_DISPLAY_ID_INFO_DATA structure. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! +//! \ingroup dispcontrol +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Disp_GetDisplayIdInfo(__in NvU32 displayId, __inout NV_DISPLAY_ID_INFO_DATA* pDisplayIdInfoData); + +#endif + + +//! \ingroup dispcontrol + +#if defined (_WINNT_) + +typedef struct _NV_TARGET_INFO_DATA_V1 +{ + NvU32 version; //!< [in] Structure version + LUID adapterId; //!< [in] Locally unique ID (LUID) of the display adapter on which the target is present. + NvU32 targetId; //!< [in] The target identifier. This is also called AdapterRelativeId. + NvU32 displayId[NVAPI_MAX_DISPLAYS]; //!< [out] An array of displayIds corresponding to the input adapterId and targetId. + //!< If the input (targetId, adapterId) pair is a display grid (Mosaic/Surround), + //!< then the output contains the displayId of every display that is part of the display grid. + //!< Otherwise, it contains exactly one displayId. + //!< These displayId values are unique to this (targetId, adapterId) pair. + NvU32 displayIdCount; //!< [out] The number of displays returned in displayId array. + NvU32 reserved[4]; //!< Reserved for future use. +} NV_TARGET_INFO_DATA_V1; + +#define NV_TARGET_INFO_DATA_VER1 MAKE_NVAPI_VERSION(NV_TARGET_INFO_DATA_V1,1) +#define NV_TARGET_INFO_DATA_VER NV_TARGET_INFO_DATA_VER1 + +typedef NV_TARGET_INFO_DATA_V1 NV_TARGET_INFO_DATA; + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Disp_GetDisplayIdsFromTarget +// +//! DESCRIPTION: This API returns displayId(s) corresponding to the given target. +//! If the input (targetId, adapterId) pair is a display grid (Mosaic/Surround), then the output contains the displayId of every display +//! that is part of the display grid. Otherwise, it contains exactly one displayId. +//! These displayId values are unique to this (targetId, adapterId) pair. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 530 +//! +//! \param [inout] pTargetInfoData Pointer to the NV_TARGET_INFO_DATA structure. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! +//! \ingroup dispcontrol +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Disp_GetDisplayIdsFromTarget(__inout NV_TARGET_INFO_DATA* pTargetInfoData); + +#endif + + +//! \ingroup dispcontrol + +typedef struct _NV_GET_VRR_INFO_V1 +{ + NvU32 version; //!< [in] Structure version + NvU32 bIsVRREnabled : 1; //!< [out] Set if VRR Mode is currently active on given display. + NvU32 bIsVRRPossible : 1; //!< [out] Set to true if VRR Mode is possible on the given display. + NvU32 bIsVRRRequested : 1; //!< [out] Set to true if VRR Mode is requested for the given display. + NvU32 bIsVRRIndicatorEnabled : 1; //!< [out] Set to true if the VRR indicator is enabled. This can be done through the NVIDIA Control Panel. + NvU32 bIsDisplayInVRRMode : 1; //!< [out] Set to true if the display is in VRR Mode. + NvU32 reserved : 27; //!< Reserved for future use. + NvU32 reservedEx[4]; //!< Reserved for future use +} NV_GET_VRR_INFO_V1; + +#define NV_GET_VRR_INFO_VER1 MAKE_NVAPI_VERSION(NV_GET_VRR_INFO_V1,1) +#define NV_GET_VRR_INFO_VER NV_GET_VRR_INFO_VER1 + +typedef NV_GET_VRR_INFO_V1 NV_GET_VRR_INFO; + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Disp_GetVRRInfo +// +//! DESCRIPTION: This API returns Variable Refresh Rate(VRR) information for the given display ID. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 525 +//! +//! \param [inout] pVrrInfo Pointer to the NV_GET_VRR_INFO structure. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! +//! \ingroup dispcontrol +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Disp_GetVRRInfo(__in NvU32 displayId, __inout NV_GET_VRR_INFO *pVrrInfo); + + + +//////////////////////////////////////////////////////////////////////////////////////// +// +// MOSAIC allows a multi display target output scanout on a single source. +// +// SAMPLE of MOSAIC 1x4 topo with 8 pixel horizontal overlap +// +//+-------------------------++-------------------------++-------------------------++-------------------------+ +//| || || || | +//| || || || | +//| || || || | +//| DVI1 || DVI2 || DVI3 || DVI4 | +//| || || || | +//| || || || | +//| || || || | +//| || || || | +//+-------------------------++-------------------------++-------------------------++-------------------------+ + + +//! \addtogroup mosaicapi +//! @{ + +#define NVAPI_MAX_MOSAIC_DISPLAY_ROWS 8 +#define NVAPI_MAX_MOSAIC_DISPLAY_COLUMNS 8 +// +// These bits are used to describe the validity of a topo. +// +#define NV_MOSAIC_TOPO_VALIDITY_VALID 0x00000000 //!< The topology is valid +#define NV_MOSAIC_TOPO_VALIDITY_MISSING_GPU 0x00000001 //!< Not enough SLI GPUs were found to fill the entire + //! topology. hPhysicalGPU will be 0 for these. +#define NV_MOSAIC_TOPO_VALIDITY_MISSING_DISPLAY 0x00000002 //!< Not enough displays were found to fill the entire + //! topology. displayOutputId will be 0 for these. +#define NV_MOSAIC_TOPO_VALIDITY_MIXED_DISPLAY_TYPES 0x00000004 //!< The topoogy is only possible with displays of the same + //! NV_GPU_OUTPUT_TYPE. Check displayOutputIds to make + //! sure they are all CRTs, or all DFPs. + + +// +//! This structure defines the topology details. +typedef struct +{ + NvU32 version; //!< Version of this structure + NvLogicalGpuHandle hLogicalGPU; //!< Logical GPU for this topology + NvU32 validityMask; //!< 0 means topology is valid with the current hardware. + //! If not 0, inspect bits against NV_MOSAIC_TOPO_VALIDITY_*. + NvU32 rowCount; //!< Number of displays in a row + NvU32 colCount; //!< Number of displays in a column + + struct + { + NvPhysicalGpuHandle hPhysicalGPU; //!< Physical GPU to be used in the topology (0 if GPU missing) + NvU32 displayOutputId; //!< Connected display target (0 if no display connected) + NvS32 overlapX; //!< Pixels of overlap on left of target: (+overlap, -gap) + NvS32 overlapY; //!< Pixels of overlap on top of target: (+overlap, -gap) + + } gpuLayout[NVAPI_MAX_MOSAIC_DISPLAY_ROWS][NVAPI_MAX_MOSAIC_DISPLAY_COLUMNS]; + +} NV_MOSAIC_TOPO_DETAILS; + +//! Macro for constructing te vesion field of NV_MOSAIC_TOPO_DETAILS +#define NVAPI_MOSAIC_TOPO_DETAILS_VER MAKE_NVAPI_VERSION(NV_MOSAIC_TOPO_DETAILS,1) + + +// +//! These values refer to the different types of Mosaic topologies that are possible. When +//! getting the supported Mosaic topologies, you can specify one of these types to narrow down +//! the returned list to only those that match the given type. +typedef enum +{ + NV_MOSAIC_TOPO_TYPE_ALL, //!< All mosaic topologies + NV_MOSAIC_TOPO_TYPE_BASIC, //!< Basic Mosaic topologies + NV_MOSAIC_TOPO_TYPE_PASSIVE_STEREO, //!< Passive Stereo topologies + NV_MOSAIC_TOPO_TYPE_SCALED_CLONE, //!< Not supported at this time + NV_MOSAIC_TOPO_TYPE_PASSIVE_STEREO_SCALED_CLONE, //!< Not supported at this time + NV_MOSAIC_TOPO_TYPE_MAX, //!< Always leave this at end of the enum +} NV_MOSAIC_TOPO_TYPE; + + +// +//! This is a complete list of supported Mosaic topologies. +//! +//! Using a "Basic" topology combines multiple monitors to create a single desktop. +//! +//! Using a "Passive" topology combines multiples monitors to create a passive stereo desktop. +//! In passive stereo, two identical topologies combine - one topology is used for the right eye and the other identical //! topology (targeting different displays) is used for the left eye. \n +//! NOTE: common\inc\nvEscDef.h shadows a couple PASSIVE_STEREO enums. If this +//! enum list changes and effects the value of NV_MOSAIC_TOPO_BEGIN_PASSIVE_STEREO +//! please update the corresponding value in nvEscDef.h +typedef enum +{ + NV_MOSAIC_TOPO_NONE, + + // 'BASIC' topos start here + // + // The result of using one of these Mosaic topos is that multiple monitors + // will combine to create a single desktop. + // + NV_MOSAIC_TOPO_BEGIN_BASIC, + NV_MOSAIC_TOPO_1x2_BASIC = NV_MOSAIC_TOPO_BEGIN_BASIC, + NV_MOSAIC_TOPO_2x1_BASIC, + NV_MOSAIC_TOPO_1x3_BASIC, + NV_MOSAIC_TOPO_3x1_BASIC, + NV_MOSAIC_TOPO_1x4_BASIC, + NV_MOSAIC_TOPO_4x1_BASIC, + NV_MOSAIC_TOPO_2x2_BASIC, + NV_MOSAIC_TOPO_2x3_BASIC, + NV_MOSAIC_TOPO_2x4_BASIC, + NV_MOSAIC_TOPO_3x2_BASIC, + NV_MOSAIC_TOPO_4x2_BASIC, + NV_MOSAIC_TOPO_1x5_BASIC, + NV_MOSAIC_TOPO_1x6_BASIC, + NV_MOSAIC_TOPO_7x1_BASIC, + + // Add padding for 10 more entries. 6 will be enough room to specify every + // possible topology with 8 or fewer displays, so this gives us a little + // extra should we need it. + NV_MOSAIC_TOPO_END_BASIC = NV_MOSAIC_TOPO_7x1_BASIC + 9, + + // 'PASSIVE_STEREO' topos start here + // + // The result of using one of these Mosaic topos is that multiple monitors + // will combine to create a single PASSIVE STEREO desktop. What this means is + // that there will be two topos that combine to create the overall desktop. + // One topo will be used for the left eye, and the other topo (of the + // same rows x cols), will be used for the right eye. The difference between + // the two topos is that different GPUs and displays will be used. + // + NV_MOSAIC_TOPO_BEGIN_PASSIVE_STEREO, // value shadowed in nvEscDef.h + NV_MOSAIC_TOPO_1x2_PASSIVE_STEREO = NV_MOSAIC_TOPO_BEGIN_PASSIVE_STEREO, + NV_MOSAIC_TOPO_2x1_PASSIVE_STEREO, + NV_MOSAIC_TOPO_1x3_PASSIVE_STEREO, + NV_MOSAIC_TOPO_3x1_PASSIVE_STEREO, + NV_MOSAIC_TOPO_1x4_PASSIVE_STEREO, + NV_MOSAIC_TOPO_4x1_PASSIVE_STEREO, + NV_MOSAIC_TOPO_2x2_PASSIVE_STEREO, + NV_MOSAIC_TOPO_END_PASSIVE_STEREO = NV_MOSAIC_TOPO_2x2_PASSIVE_STEREO + 4, + + + // + // Total number of topos. Always leave this at the end of the enumeration. + // + NV_MOSAIC_TOPO_MAX //! Total number of topologies. + +} NV_MOSAIC_TOPO; + + +// +//! This is a "topology brief" structure. It tells you what you need to know about +//! a topology at a high level. A list of these is returned when you query for the +//! supported Mosaic information. +//! +//! If you need more detailed information about the topology, call +//! NvAPI_Mosaic_GetTopoGroup() with the topology value from this structure. +typedef struct +{ + NvU32 version; //!< Version of this structure + NV_MOSAIC_TOPO topo; //!< The topology + NvU32 enabled; //!< 1 if topo is enabled, else 0 + NvU32 isPossible; //!< 1 if topo *can* be enabled, else 0 + +} NV_MOSAIC_TOPO_BRIEF; + +//! Macro for constructing the version field of NV_MOSAIC_TOPO_BRIEF +#define NVAPI_MOSAIC_TOPO_BRIEF_VER MAKE_NVAPI_VERSION(NV_MOSAIC_TOPO_BRIEF,1) + + +// +//! Basic per-display settings that are used in setting/getting the Mosaic mode +typedef struct _NV_MOSAIC_DISPLAY_SETTING_V1 +{ + NvU32 version; //!< Version of this structure + NvU32 width; //!< Per-display width + NvU32 height; //!< Per-display height + NvU32 bpp; //!< Bits per pixel + NvU32 freq; //!< Display frequency +} NV_MOSAIC_DISPLAY_SETTING_V1; + +typedef struct NV_MOSAIC_DISPLAY_SETTING_V2 +{ + NvU32 version; //!< Version of this structure + NvU32 width; //!< Per-display width + NvU32 height; //!< Per-display height + NvU32 bpp; //!< Bits per pixel + NvU32 freq; //!< Display frequency + NvU32 rrx1k; //!< Display frequency in x1k +} NV_MOSAIC_DISPLAY_SETTING_V2; + +typedef NV_MOSAIC_DISPLAY_SETTING_V2 NV_MOSAIC_DISPLAY_SETTING; + +//! Macro for constructing the version field of NV_MOSAIC_DISPLAY_SETTING +#define NVAPI_MOSAIC_DISPLAY_SETTING_VER1 MAKE_NVAPI_VERSION(NV_MOSAIC_DISPLAY_SETTING_V1,1) +#define NVAPI_MOSAIC_DISPLAY_SETTING_VER2 MAKE_NVAPI_VERSION(NV_MOSAIC_DISPLAY_SETTING_V2,2) +#define NVAPI_MOSAIC_DISPLAY_SETTING_VER NVAPI_MOSAIC_DISPLAY_SETTING_VER2 + + +// +// Set a reasonable max number of display settings to support +// so arrays are bound. +// +#define NV_MOSAIC_DISPLAY_SETTINGS_MAX 40 //!< Set a reasonable maximum number of display settings to support + //! so arrays are bound. + + +// +//! This structure is used to contain a list of supported Mosaic topologies +//! along with the display settings that can be used. +typedef struct _NV_MOSAIC_SUPPORTED_TOPO_INFO_V1 +{ + NvU32 version; //!< Version of this structure + NvU32 topoBriefsCount; //!< Number of topologies in below array + NV_MOSAIC_TOPO_BRIEF topoBriefs[NV_MOSAIC_TOPO_MAX]; //!< List of supported topologies with only brief details + NvU32 displaySettingsCount; //!< Number of display settings in below array + NV_MOSAIC_DISPLAY_SETTING_V1 displaySettings[NV_MOSAIC_DISPLAY_SETTINGS_MAX]; //!< List of per display settings possible + +} NV_MOSAIC_SUPPORTED_TOPO_INFO_V1; + +typedef struct _NV_MOSAIC_SUPPORTED_TOPO_INFO_V2 +{ + NvU32 version; //!< Version of this structure + NvU32 topoBriefsCount; //!< Number of topologies in below array + NV_MOSAIC_TOPO_BRIEF topoBriefs[NV_MOSAIC_TOPO_MAX]; //!< List of supported topologies with only brief details + NvU32 displaySettingsCount; //!< Number of display settings in below array + NV_MOSAIC_DISPLAY_SETTING_V2 displaySettings[NV_MOSAIC_DISPLAY_SETTINGS_MAX]; //!< List of per display settings possible + +} NV_MOSAIC_SUPPORTED_TOPO_INFO_V2; + +typedef NV_MOSAIC_SUPPORTED_TOPO_INFO_V2 NV_MOSAIC_SUPPORTED_TOPO_INFO; + +//! Macro forconstructing the version field of NV_MOSAIC_SUPPORTED_TOPO_INFO +#define NVAPI_MOSAIC_SUPPORTED_TOPO_INFO_VER1 MAKE_NVAPI_VERSION(NV_MOSAIC_SUPPORTED_TOPO_INFO_V1,1) +#define NVAPI_MOSAIC_SUPPORTED_TOPO_INFO_VER2 MAKE_NVAPI_VERSION(NV_MOSAIC_SUPPORTED_TOPO_INFO_V2,2) +#define NVAPI_MOSAIC_SUPPORTED_TOPO_INFO_VER NVAPI_MOSAIC_SUPPORTED_TOPO_INFO_VER2 + + +// +// Indices to use to access the topos array within the mosaic topology +#define NV_MOSAIC_TOPO_IDX_DEFAULT 0 + +#define NV_MOSAIC_TOPO_IDX_LEFT_EYE 0 +#define NV_MOSAIC_TOPO_IDX_RIGHT_EYE 1 +#define NV_MOSAIC_TOPO_NUM_EYES 2 + + +// +//! This defines the maximum number of topos that can be in a topo group. +//! At this time, it is set to 2 because our largest topo group (passive +//! stereo) only needs 2 topos (left eye and right eye). +//! +//! If a new topo group with more than 2 topos is added above, then this +//! number will also have to be incremented. +#define NV_MOSAIC_MAX_TOPO_PER_TOPO_GROUP 2 + + +// +//! This structure defines a group of topologies that work together to create one +//! overall layout. All of the supported topologies are represented with this +//! structure. +//! +//! For example, a 'Passive Stereo' topology would be represented with this +//! structure, and would have separate topology details for the left and right eyes. +//! The count would be 2. A 'Basic' topology is also represented by this structure, +//! with a count of 1. +//! +//! The structure is primarily used internally, but is exposed to applications in a +//! read-only fashion because there are some details in it that might be useful +//! (like the number of rows/cols, or connected display information). A user can +//! get the filled-in structure by calling NvAPI_Mosaic_GetTopoGroup(). +//! +//! You can then look at the detailed values within the structure. There are no +//! entrypoints which take this structure as input (effectively making it read-only). +typedef struct +{ + NvU32 version; //!< Version of this structure + NV_MOSAIC_TOPO_BRIEF brief; //!< The brief details of this topo + NvU32 count; //!< Number of topos in array below + NV_MOSAIC_TOPO_DETAILS topos[NV_MOSAIC_MAX_TOPO_PER_TOPO_GROUP]; + +} NV_MOSAIC_TOPO_GROUP; + +//! Macro for constructing the version field of NV_MOSAIC_TOPO_GROUP +#define NVAPI_MOSAIC_TOPO_GROUP_VER MAKE_NVAPI_VERSION(NV_MOSAIC_TOPO_GROUP,1) + +//! @} + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Mosaic_GetSupportedTopoInfo +// +//! DESCRIPTION: This API returns information on the topologies and display resolutions +//! supported by Mosaic mode. +//! +//! NOTE: Not all topologies returned can be set immediately. +//! See 'OUT' Notes below. +//! +//! Once you get the list of supported topologies, you can call +//! NvAPI_Mosaic_GetTopoGroup() with one of the Mosaic topologies if you need +//! more information about it. +//! +//! 'IN' Notes: pSupportedTopoInfo->version must be set before calling this function. +//! If the specified version is not supported by this implementation, +//! an error will be returned (NVAPI_INCOMPATIBLE_STRUCT_VERSION). +//! +//! 'OUT' Notes: Some of the topologies returned might not be valid for one reason or +//! another. It could be due to mismatched or missing displays. It +//! could also be because the required number of GPUs is not found. +//! At a high level, you can see if the topology is valid and can be enabled +//! by looking at the pSupportedTopoInfo->topoBriefs[xxx].isPossible flag. +//! If this is true, the topology can be enabled. If it +//! is false, you can find out why it cannot be enabled by getting the +//! details of the topology via NvAPI_Mosaic_GetTopoGroup(). From there, +//! look at the validityMask of the individual topologies. The bits can +//! be tested against the NV_MOSAIC_TOPO_VALIDITY_* bits. +//! +//! It is possible for this function to return NVAPI_OK with no topologies +//! listed in the return structure. If this is the case, it means that +//! the current hardware DOES support Mosaic, but with the given configuration +//! no valid topologies were found. This most likely means that SLI was not +//! enabled for the hardware. Once enabled, you should see valid topologies +//! returned from this function. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 185 +//! +//! +//! \param [in,out] pSupportedTopoInfo Information about what topologies and display resolutions +//! are supported for Mosaic. +//! \param [in] type The type of topologies the caller is interested in +//! getting. See NV_MOSAIC_TOPO_TYPE for possible values. +//! +//! \retval ::NVAPI_OK No errors in returning supported topologies. +//! \retval ::NVAPI_NOT_SUPPORTED Mosaic is not supported with the existing hardware. +//! \retval ::NVAPI_INVALID_ARGUMENT One or more arguments passed in are invalid. +//! \retval ::NVAPI_API_NOT_INTIALIZED The NvAPI API needs to be initialized first. +//! \retval ::NVAPI_NO_IMPLEMENTATION This entrypoint not available. +//! \retval ::NVAPI_INCOMPATIBLE_STRUCT_VERSION The version of the structure passed in is not +// compatible with this entry point. +//! \retval ::NVAPI_ERROR: Miscellaneous error occurred. +//! +//! \ingroup mosaicapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Mosaic_GetSupportedTopoInfo(NV_MOSAIC_SUPPORTED_TOPO_INFO *pSupportedTopoInfo, NV_MOSAIC_TOPO_TYPE type); + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Mosaic_GetTopoGroup +// +//! DESCRIPTION: This API returns a structure filled with the details +//! of the specified Mosaic topology. +//! +//! If the pTopoBrief passed in matches the current topology, +//! then information in the brief and group structures +//! will reflect what is current. Thus the brief would have +//! the current 'enable' status, and the group would have the +//! current overlap values. If there is no match, then the +//! returned brief has an 'enable' status of FALSE (since it +//! is obviously not enabled), and the overlap values will be 0. +//! +//! 'IN' Notes: pTopoGroup->version must be set before calling this function. +//! If the specified version is not supported by this implementation, +//! an error will be returned (NVAPI_INCOMPATIBLE_STRUCT_VERSION). +//! +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 185 +//! +//! \param [in] pTopoBrief The topology for getting the details +//! This must be one of the topology briefs +//! returned from NvAPI_Mosaic_GetSupportedTopoInfo(). +//! \param [in,out] pTopoGroup The topology details matching the brief +//! +//! \retval ::NVAPI_OK Details were retrieved successfully. +//! \retval ::NVAPI_NOT_SUPPORTED Mosaic is not supported with the existing hardware. +//! \retval ::NVAPI_INVALID_ARGUMENT One or more argumentss passed in are invalid. +//! \retval ::NVAPI_API_NOT_INTIALIZED The NvAPI API needs to be initialized first. +//! \retval ::NVAPI_NO_IMPLEMENTATION This entrypoint not available. +//! \retval ::NVAPI_INCOMPATIBLE_STRUCT_VERSION The version of the structure passed in is not +// compatible with this entry point. +//! \retval ::NVAPI_ERROR: Miscellaneous error occurred. +//! +//! \ingroup mosaicapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Mosaic_GetTopoGroup(NV_MOSAIC_TOPO_BRIEF *pTopoBrief, NV_MOSAIC_TOPO_GROUP *pTopoGroup); + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Mosaic_GetOverlapLimits +// +//! DESCRIPTION: This API returns the X and Y overlap limits required if +//! the given Mosaic topology and display settings are to be used. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 185 +//! +//! \param [in] pTopoBrief The topology for getting limits +//! This must be one of the topo briefs +//! returned from NvAPI_Mosaic_GetSupportedTopoInfo(). +//! \param [in] pDisplaySetting The display settings for getting the limits. +//! This must be one of the settings +//! returned from NvAPI_Mosaic_GetSupportedTopoInfo(). +//! \param [out] pMinOverlapX X overlap minimum +//! \param [out] pMaxOverlapX X overlap maximum +//! \param [out] pMinOverlapY Y overlap minimum +//! \param [out] pMaxOverlapY Y overlap maximum +//! +//! \retval ::NVAPI_OK Details were retrieved successfully. +//! \retval ::NVAPI_NOT_SUPPORTED Mosaic is not supported with the existing hardware. +//! \retval ::NVAPI_INVALID_ARGUMENT One or more argumentss passed in are invalid. +//! \retval ::NVAPI_API_NOT_INTIALIZED The NvAPI API needs to be initialized first. +//! \retval ::NVAPI_NO_IMPLEMENTATION This entrypoint not available. +//! \retval ::NVAPI_INCOMPATIBLE_STRUCT_VERSION The version of the structure passed in is not +//! compatible with this entry point. +//! \retval ::NVAPI_ERROR Miscellaneous error occurred. +//! +//! \ingroup mosaicapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Mosaic_GetOverlapLimits(NV_MOSAIC_TOPO_BRIEF *pTopoBrief, NV_MOSAIC_DISPLAY_SETTING *pDisplaySetting, NvS32 *pMinOverlapX, NvS32 *pMaxOverlapX, NvS32 *pMinOverlapY, NvS32 *pMaxOverlapY); + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Mosaic_SetCurrentTopo +// +//! DESCRIPTION: This API sets the Mosaic topology and performs a mode switch +//! using the given display settings. +//! +//! If NVAPI_OK is returned, the current Mosaic topology was set +//! correctly. Any other status returned means the +//! topology was not set, and remains what it was before this +//! function was called. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 185 +//! +//! \param [in] pTopoBrief The topology to set. This must be one of the topologies returned from +//! NvAPI_Mosaic_GetSupportedTopoInfo(), and it must have an isPossible value of 1. +//! \param [in] pDisplaySetting The per display settings to be used in the Mosaic mode. This must be one of the +//! settings returned from NvAPI_Mosaic_GetSupportedTopoInfo(). +//! \param [in] overlapX The pixel overlap to use between horizontal displays (use positive a number for +//! overlap, or a negative number to create a gap.) If the overlap is out of bounds +//! for what is possible given the topo and display setting, the overlap will be clamped. +//! \param [in] overlapY The pixel overlap to use between vertical displays (use positive a number for +//! overlap, or a negative number to create a gap.) If the overlap is out of bounds for +//! what is possible given the topo and display setting, the overlap will be clamped. +//! \param [in] enable If 1, the topology being set will also be enabled, meaning that the mode set will +//! occur. \n +//! If 0, you don't want to be in Mosaic mode right now, but want to set the current +//! Mosaic topology so you can enable it later with NvAPI_Mosaic_EnableCurrentTopo(). +//! +//! \retval ::NVAPI_OK The Mosaic topology was set. +//! \retval ::NVAPI_NOT_SUPPORTED Mosaic is not supported with the existing hardware. +//! \retval ::NVAPI_INVALID_ARGUMENT One or more argumentss passed in are invalid. +//! \retval ::NVAPI_TOPO_NOT_POSSIBLE The topology passed in is not currently possible. +//! \retval ::NVAPI_API_NOT_INTIALIZED The NvAPI API needs to be initialized first. +//! \retval ::NVAPI_NO_IMPLEMENTATION This entrypoint not available. +//! \retval ::NVAPI_INCOMPATIBLE_STRUCT_VERSION The version of the structure passed in is not +//! compatible with this entrypoint. +//! \retval ::NVAPI_MODE_CHANGE_FAILED There was an error changing the display mode. +//! \retval ::NVAPI_ERROR Miscellaneous error occurred. +//! +//! \ingroup mosaicapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Mosaic_SetCurrentTopo(NV_MOSAIC_TOPO_BRIEF *pTopoBrief, NV_MOSAIC_DISPLAY_SETTING *pDisplaySetting, NvS32 overlapX, NvS32 overlapY, NvU32 enable); + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Mosaic_GetCurrentTopo +// +//! DESCRIPTION: This API returns information for the current Mosaic topology. +//! This includes topology, display settings, and overlap values. +//! +//! You can call NvAPI_Mosaic_GetTopoGroup() with the topology +//! if you require more information. +//! +//! If there isn't a current topology, then pTopoBrief->topo will +//! be NV_MOSAIC_TOPO_NONE. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 185 +//! +//! \param [out] pTopoBrief The current Mosaic topology +//! \param [out] pDisplaySetting The current per-display settings +//! \param [out] pOverlapX The pixel overlap between horizontal displays +//! \param [out] pOverlapY The pixel overlap between vertical displays +//! +//! \retval ::NVAPI_OK Success getting current info. +//! \retval ::NVAPI_NOT_SUPPORTED Mosaic is not supported with the existing hardware. +//! \retval ::NVAPI_INVALID_ARGUMENT One or more argumentss passed in are invalid. +//! \retval ::NVAPI_API_NOT_INTIALIZED The NvAPI API needs to be initialized first. +//! \retval ::NVAPI_NO_IMPLEMENTATION This entry point not available. +//! \retval ::NVAPI_ERROR Miscellaneous error occurred. +//! +//! \ingroup mosaicapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Mosaic_GetCurrentTopo(NV_MOSAIC_TOPO_BRIEF *pTopoBrief, NV_MOSAIC_DISPLAY_SETTING *pDisplaySetting, NvS32 *pOverlapX, NvS32 *pOverlapY); + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Mosaic_EnableCurrentTopo +// +//! DESCRIPTION: This API enables or disables the current Mosaic topology +//! based on the setting of the incoming 'enable' parameter. +//! +//! An "enable" setting enables the current (previously set) Mosaic topology. +//! Note that when the current Mosaic topology is retrieved, it must have an isPossible value of 1 or +//! an error will occur. +//! +//! A "disable" setting disables the current Mosaic topology. +//! The topology information will persist, even across reboots. +//! To re-enable the Mosaic topology, call this function +//! again with the enable parameter set to 1. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 185 +//! +//! \param [in] enable 1 to enable the current Mosaic topo, 0 to disable it. +//! +//! \retval ::NVAPI_OK The Mosaic topo was enabled/disabled. +//! \retval ::NVAPI_NOT_SUPPORTED Mosaic is not supported with the existing hardware. +//! \retval ::NVAPI_INVALID_ARGUMENT One or more arguments passed in are invalid. +//! \retval ::NVAPI_TOPO_NOT_POSSIBLE The current topology is not currently possible. +//! \retval ::NVAPI_MODE_CHANGE_FAILED There was an error changing the display mode. +//! \retval ::NVAPI_ERROR: Miscellaneous error occurred. +//! +//! \ingroup mosaicapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Mosaic_EnableCurrentTopo(NvU32 enable); + +//! \ingroup mosaicapi +//! @{ +typedef struct _NV_MOSAIC_GRID_TOPO_DISPLAY_V1 +{ + NvU32 displayId; //!< DisplayID of the display + NvS32 overlapX; //!< (+overlap, -gap) + NvS32 overlapY; //!< (+overlap, -gap) + NV_ROTATE rotation; //!< Rotation of display + NvU32 cloneGroup; //!< Reserved, must be 0 +} NV_MOSAIC_GRID_TOPO_DISPLAY_V1; + +typedef enum _NV_PIXEL_SHIFT_TYPE +{ + NV_PIXEL_SHIFT_TYPE_NO_PIXEL_SHIFT = 0, //!< No pixel shift will be applied to this display. + NV_PIXEL_SHIFT_TYPE_2x2_TOP_LEFT_PIXELS = 1, //!< This display will be used to scanout top left pixels in 2x2 PixelShift configuration + NV_PIXEL_SHIFT_TYPE_2x2_BOTTOM_RIGHT_PIXELS = 2, //!< This display will be used to scanout bottom right pixels in 2x2 PixelShift configuration + NV_PIXEL_SHIFT_TYPE_2x2_TOP_RIGHT_PIXELS = 4, //!< This display will be used to scanout top right pixels in 2x2 PixelShift configuration + NV_PIXEL_SHIFT_TYPE_2x2_BOTTOM_LEFT_PIXELS = 8, //!< This display will be used to scanout bottom left pixels in 2x2 PixelShift configuration +} NV_PIXEL_SHIFT_TYPE; + +typedef struct _NV_MOSAIC_GRID_TOPO_DISPLAY_V2 +{ + NvU32 version; //!< Version of this structure + + NvU32 displayId; //!< DisplayID of the display + NvS32 overlapX; //!< (+overlap, -gap) + NvS32 overlapY; //!< (+overlap, -gap) + NV_ROTATE rotation; //!< Rotation of display + NvU32 cloneGroup; //!< Reserved, must be 0 + NV_PIXEL_SHIFT_TYPE pixelShiftType; //!< Type of the pixel shift enabled display +} NV_MOSAIC_GRID_TOPO_DISPLAY_V2; + +#ifndef NV_MOSAIC_GRID_TOPO_DISPLAY_VER + +typedef NV_MOSAIC_GRID_TOPO_DISPLAY_V1 NV_MOSAIC_GRID_TOPO_DISPLAY; + +#endif + +typedef struct _NV_MOSAIC_GRID_TOPO_V1 +{ + NvU32 version; //!< Version of this structure + NvU32 rows; //!< Number of rows + NvU32 columns; //!< Number of columns + NvU32 displayCount; //!< Number of display details + NvU32 applyWithBezelCorrect : 1; //!< When enabling and doing the modeset, do we switch to the bezel-corrected resolution + NvU32 immersiveGaming : 1; //!< Enable as immersive gaming instead of Mosaic SLI (for Quadro-boards only) + NvU32 baseMosaic : 1; //!< Enable as Base Mosaic (Panoramic) instead of Mosaic SLI (for NVS and Quadro-boards only) + NvU32 driverReloadAllowed : 1; //!< If necessary, reloading the driver is permitted (for Vista and above only). Will not be persisted. Value undefined on get. + NvU32 acceleratePrimaryDisplay : 1; //!< Enable SLI acceleration on the primary display while in single-wide mode (For Immersive Gaming only). Will not be persisted. Value undefined on get. + NvU32 reserved : 27; //!< Reserved, must be 0 + NV_MOSAIC_GRID_TOPO_DISPLAY_V1 displays[NV_MOSAIC_MAX_DISPLAYS]; //!< Displays are done as [(row * columns) + column] + NV_MOSAIC_DISPLAY_SETTING_V1 displaySettings; //!< Display settings +} NV_MOSAIC_GRID_TOPO_V1; + +typedef struct _NV_MOSAIC_GRID_TOPO_V2 +{ + NvU32 version; //!< Version of this structure + NvU32 rows; //!< Number of rows + NvU32 columns; //!< Number of columns + NvU32 displayCount; //!< Number of display details + NvU32 applyWithBezelCorrect : 1; //!< When enabling and doing the modeset, do we switch to the bezel-corrected resolution + NvU32 immersiveGaming : 1; //!< Enable as immersive gaming instead of Mosaic SLI (for Quadro-boards only) + NvU32 baseMosaic : 1; //!< Enable as Base Mosaic (Panoramic) instead of Mosaic SLI (for NVS and Quadro-boards only) + NvU32 driverReloadAllowed : 1; //!< If necessary, reloading the driver is permitted (for Vista and above only). Will not be persisted. Value undefined on get. + NvU32 acceleratePrimaryDisplay : 1; //!< Enable SLI acceleration on the primary display while in single-wide mode (For Immersive Gaming only). Will not be persisted. Value undefined on get. + NvU32 pixelShift : 1; //!< Enable Pixel shift + NvU32 reserved : 26; //!< Reserved, must be 0 + NV_MOSAIC_GRID_TOPO_DISPLAY_V2 displays[NV_MOSAIC_MAX_DISPLAYS]; //!< Displays are done as [(row * columns) + column] + NV_MOSAIC_DISPLAY_SETTING_V1 displaySettings; //!< Display settings +} NV_MOSAIC_GRID_TOPO_V2; + +//! Macro for constructing the version field of ::NV_MOSAIC_GRID_TOPO +#define NV_MOSAIC_GRID_TOPO_VER1 MAKE_NVAPI_VERSION(NV_MOSAIC_GRID_TOPO_V1,1) +#define NV_MOSAIC_GRID_TOPO_VER2 MAKE_NVAPI_VERSION(NV_MOSAIC_GRID_TOPO_V2,2) +#ifndef NV_MOSAIC_GRID_TOPO_VER + +typedef NV_MOSAIC_GRID_TOPO_V2 NV_MOSAIC_GRID_TOPO; + +//! Macro for constructing the version field of ::NV_MOSAIC_GRID_TOPO +#define NV_MOSAIC_GRID_TOPO_VER NV_MOSAIC_GRID_TOPO_VER2 + +#endif + +//! @} + +//! since Release R290 + +#define NV_MOSAIC_DISPLAYCAPS_PROBLEM_DISPLAY_ON_INVALID_GPU NV_BIT(0) +#define NV_MOSAIC_DISPLAYCAPS_PROBLEM_DISPLAY_ON_WRONG_CONNECTOR NV_BIT(1) +#define NV_MOSAIC_DISPLAYCAPS_PROBLEM_NO_COMMON_TIMINGS NV_BIT(2) +#define NV_MOSAIC_DISPLAYCAPS_PROBLEM_NO_EDID_AVAILABLE NV_BIT(3) +#define NV_MOSAIC_DISPLAYCAPS_PROBLEM_MISMATCHED_OUTPUT_TYPE NV_BIT(4) +#define NV_MOSAIC_DISPLAYCAPS_PROBLEM_NO_DISPLAY_CONNECTED NV_BIT(5) +#define NV_MOSAIC_DISPLAYCAPS_PROBLEM_NO_GPU_TOPOLOGY NV_BIT(6) +#define NV_MOSAIC_DISPLAYCAPS_PROBLEM_NOT_SUPPORTED NV_BIT(7) +#define NV_MOSAIC_DISPLAYCAPS_PROBLEM_NO_SLI_BRIDGE NV_BIT(8) +#define NV_MOSAIC_DISPLAYCAPS_PROBLEM_ECC_ENABLED NV_BIT(9) +#define NV_MOSAIC_DISPLAYCAPS_PROBLEM_GPU_TOPOLOGY_NOT_SUPPORTED NV_BIT(10) + + + + + + + +//! Do not change the current GPU topology. If the NO_DRIVER_RELOAD bit is not +//! specified, then it may still require a driver reload. +#define NV_MOSAIC_SETDISPLAYTOPO_FLAG_CURRENT_GPU_TOPOLOGY NV_BIT(0) + +//! Do not allow a driver reload. That is, stick with the same master GPU as well as the +//! same SLI configuration. +#define NV_MOSAIC_SETDISPLAYTOPO_FLAG_NO_DRIVER_RELOAD NV_BIT(1) + +//! When choosing a GPU topology, choose the topology with the best performance. +//! Without this flag, it will choose the topology that uses the smallest number +//! of GPU's. +#define NV_MOSAIC_SETDISPLAYTOPO_FLAG_MAXIMIZE_PERFORMANCE NV_BIT(2) + +//! Do not return an error if no configuration will work with all of the grids. +#define NV_MOSAIC_SETDISPLAYTOPO_FLAG_ALLOW_INVALID NV_BIT(3) + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Mosaic_SetDisplayGrids +// +//! DESCRIPTION: Sets a new display topology, replacing any existing topologies +//! that use the same displays. +//! +//! This function will look for an SLI configuration that will +//! allow the display topology to work. +//! +//! To revert to a single display, specify that display as a 1x1 +//! grid. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] pGridTopologies The topology details to set. +//! \param [in] gridCount The number of elements in the pGridTopologies array. +//! \param [in] setTopoFlags Zero or more of the NVAPI_MOSAIC_SETDISPLAYTOPO_FLAG_* +//! flags. +//! +//! +//! \retval ::NVAPI_OK Capabilities have been returned. +//! \retval ::NVAPI_INVALID_ARGUMENT One or more args passed in are invalid. +//! \retval ::NVAPI_API_NOT_INTIALIZED The NvAPI API needs to be initialized first +//! \retval ::NVAPI_NO_IMPLEMENTATION This entrypoint not available +//! \retval ::NVAPI_NO_ACTIVE_SLI_TOPOLOGY No matching GPU topologies could be found. +//! \retval ::NVAPI_TOPO_NOT_POSSIBLE One or more of the display grids are not valid. +//! \retval ::NVAPI_ERROR Miscellaneous error occurred +//! \ingroup mosaicapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Mosaic_SetDisplayGrids(__in_ecount(gridCount) NV_MOSAIC_GRID_TOPO *pGridTopologies, __in NvU32 gridCount, __in NvU32 setTopoFlags); + + +//! \ingroup mosaicapi +//! Indicates that a display's position in the grid is sub-optimal. +#define NV_MOSAIC_DISPLAYTOPO_WARNING_DISPLAY_POSITION NV_BIT(0) + +//! \ingroup mosaicapi +//! Indicates that SetDisplaySettings would need to perform a driver reload. +#define NV_MOSAIC_DISPLAYTOPO_WARNING_DRIVER_RELOAD_REQUIRED NV_BIT(1) + +//! \ingroup mosaicapi +typedef struct +{ + NvU32 version; + NvU32 errorFlags; //!< (OUT) Any of the NV_MOSAIC_DISPLAYTOPO_ERROR_* flags. + NvU32 warningFlags; //!< (OUT) Any of the NV_MOSAIC_DISPLAYTOPO_WARNING_* flags. + + NvU32 displayCount; //!< (OUT) The number of valid entries in the displays array. + struct + { + NvU32 displayId; //!< (OUT) The DisplayID of this display. + NvU32 errorFlags; //!< (OUT) Any of the NV_MOSAIC_DISPLAYCAPS_PROBLEM_* flags. + NvU32 warningFlags; //!< (OUT) Any of the NV_MOSAIC_DISPLAYTOPO_WARNING_* flags. + + NvU32 supportsRotation : 1; //!< (OUT) This display can be rotated + NvU32 reserved : 31; //!< (OUT) reserved + } displays[NVAPI_MAX_DISPLAYS]; +} NV_MOSAIC_DISPLAY_TOPO_STATUS; + +//! \ingroup mosaicapi +#define NV_MOSAIC_DISPLAY_TOPO_STATUS_VER MAKE_NVAPI_VERSION(NV_MOSAIC_DISPLAY_TOPO_STATUS,1) + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Mosaic_ValidateDisplayGrids +// +//! DESCRIPTION: Determines if a list of grid topologies is valid. It will choose an SLI +//! configuration in the same way that NvAPI_Mosaic_SetDisplayGrids() does. +//! +//! On return, each element in the pTopoStatus array will contain any errors or +//! warnings about each grid topology. If any error flags are set, then the topology +//! is not valid. If any warning flags are set, then the topology is valid, but +//! sub-optimal. +//! +//! If the ALLOW_INVALID flag is set, then it will continue to validate the grids +//! even if no SLI configuration will allow all of the grids. In this case, a grid +//! grid with no matching GPU topology will have the error +//! flags NO_GPU_TOPOLOGY or NOT_SUPPORTED set. +//! +//! If the ALLOW_INVALID flag is not set and no matching SLI configuration is +//! found, then it will skip the rest of the validation and return +//! NVAPI_NO_ACTIVE_SLI_TOPOLOGY. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] setTopoFlags Zero or more of the NVAPI_MOSAIC_SETDISPLAYTOPO_FLAG_* +//! flags. +//! \param [in] pGridTopologies The array of grid topologies to verify. +//! \param [in,out] pTopoStatus The array of problems and warnings with each grid topology. +//! \param [in] gridCount The number of elements in the pGridTopologies and +//! pTopoStatus arrays. +//! +//! +//! \retval ::NVAPI_OK: Capabilities have been returned. +//! \retval ::NVAPI_INVALID_ARGUMENT: One or more args passed in are invalid. +//! \retval ::NVAPI_API_NOT_INTIALIZED: The NvAPI API needs to be initialized first +//! \retval ::NVAPI_NO_IMPLEMENTATION: This entrypoint not available +//! \retval ::NVAPI_NO_ACTIVE_SLI_TOPOLOGY: No matching GPU topologies could be found. +//! \retval ::NVAPI_ERROR: Miscellaneous error occurred +//! +//! \ingroup mosaicapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Mosaic_ValidateDisplayGrids(__in NvU32 setTopoFlags, + __in_ecount(gridCount) NV_MOSAIC_GRID_TOPO *pGridTopologies, + __inout_ecount_full(gridCount) NV_MOSAIC_DISPLAY_TOPO_STATUS *pTopoStatus, + __in NvU32 gridCount); + + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Mosaic_EnumDisplayModes +// +//! DESCRIPTION: Determines the set of available display modes for a given grid topology. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] pGridTopology The grid topology to use. +//! \param [in,out] pDisplaySettings A pointer to an array of display settings to populate, +//! or NULL to find out the total number of available modes. +//! \param [in,out] pDisplayCount If pDisplaySettings is not NULL, then pDisplayCount +//! should point to the number of elements in the +//! pDisplaySettings array. On return, it will contain the +//! number of modes that were actually returned. If +//! pDisplaySettings is NULL, then pDisplayCount will receive +//! the total number of modes that are available. +//! +//! +//! \retval ::NVAPI_OK Capabilities have been returned. +//! \retval ::NVAPI_INVALID_ARGUMENT One or more args passed in are invalid. +//! \retval ::NVAPI_API_NOT_INTIALIZED The NvAPI API needs to be initialized first +//! \retval ::NVAPI_NO_IMPLEMENTATION This entrypoint not available +//! \retval ::NVAPI_ERROR Miscellaneous error occurred +//! +//! \ingroup mosaciapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Mosaic_EnumDisplayModes(__in NV_MOSAIC_GRID_TOPO *pGridTopology, + __inout_ecount_part_opt(*pDisplayCount, *pDisplayCount) NV_MOSAIC_DISPLAY_SETTING *pDisplaySettings, + __inout NvU32 *pDisplayCount); + + +//! SUPPORTED OS: Windows 10 and higher +//! +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Mosaic_EnumDisplayGrids +// +//! DESCRIPTION: Enumerates the current active grid topologies. This includes Mosaic, IG, and +//! Panoramic topologies, as well as single displays. +//! +//! If pGridTopologies is NULL, then pGridCount will be set to the number of active +//! grid topologies. +//! +//! If pGridTopologies is not NULL, then pGridCount contains the maximum number of +//! grid topologies to return. On return, pGridCount will be set to the number of +//! grid topologies that were returned. +//! +//! \param [out] pGridTopologies The list of active grid topologies. +//! \param [in,out] pGridCount A pointer to the number of grid topologies returned. +//! +//! \retval ::NVAPI_OK Capabilties have been returned. +//! \retval ::NVAPI_END_ENUMERATION There are no more topologies to return. +//! \retval ::NVAPI_INVALID_ARGUMENT One or more args passed in are invalid. +//! \retval ::NVAPI_API_NOT_INTIALIZED The NvAPI API needs to be initialized first +//! \retval ::NVAPI_NO_IMPLEMENTATION This entrypoint not available +//! \retval ::NVAPI_ERROR Miscellaneous error occurred +//! +//! \ingroup mosaicapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Mosaic_EnumDisplayGrids(__inout_ecount_part_opt(*pGridCount, *pGridCount) NV_MOSAIC_GRID_TOPO *pGridTopologies, + __inout NvU32 *pGridCount); + + +//////////////////////////////////////////////////////////////////////////////////////// +// +// ########################################################################### +// DELME_RUSS - DELME_RUSS - DELME_RUSS - DELME_RUSS - DELME_RUSS - DELME_RUSS +// +// Below is the Phase 1 Mosaic stuff, the Phase 2 stuff above is what will remain +// once Phase 2 is complete. For a small amount of time, the two will co-exist. As +// soon as apps (nvapichk, NvAPITestMosaic, and CPL) are updated to use the Phase 2 +// entrypoints, the code below will be deleted. +// +// DELME_RUSS - DELME_RUSS - DELME_RUSS - DELME_RUSS - DELME_RUSS - DELME_RUSS +// ########################################################################### +// +// Supported topos 1x4, 4x1 and 2x2 to start with. +// +// Selected scan out targets can be one per GPU or more than one on the same GPU. +// +// SAMPLE of MOSAIC 1x4 SCAN OUT TOPO with 8 pixel horizontal overlap +// +//+-------------------------++-------------------------++-------------------------++-------------------------+ +//| || || || | +//| || || || | +//| || || || | +//| DVI1 || DVI2 || DVI3 || DVI4 | +//| || || || | +//| || || || | +//| || || || | +//| || || || | +//+-------------------------++-------------------------++-------------------------++-------------------------+ + + +//! \addtogroup mosaicapi +//! @{ + +//! Used in NV_MOSAIC_TOPOLOGY. +#define NVAPI_MAX_MOSAIC_DISPLAY_ROWS 8 + +//! Used in NV_MOSAIC_TOPOLOGY. +#define NVAPI_MAX_MOSAIC_DISPLAY_COLUMNS 8 + +//! Used in NV_MOSAIC_TOPOLOGY. +#define NVAPI_MAX_MOSAIC_TOPOS 16 + +//! Used in NvAPI_GetCurrentMosaicTopology() and NvAPI_SetCurrentMosaicTopology(). +typedef struct +{ + NvU32 version; //!< Version number of the mosaic topology + NvU32 rowCount; //!< Horizontal display count + NvU32 colCount; //!< Vertical display count + + struct + { + NvPhysicalGpuHandle hPhysicalGPU; //!< Physical GPU to be used in the topology + NvU32 displayOutputId; //!< Connected display target + NvS32 overlapX; //!< Pixels of overlap on the left of target: (+overlap, -gap) + NvS32 overlapY; //!< Pixels of overlap on the top of target: (+overlap, -gap) + + } gpuLayout[NVAPI_MAX_MOSAIC_DISPLAY_ROWS][NVAPI_MAX_MOSAIC_DISPLAY_COLUMNS]; + +} NV_MOSAIC_TOPOLOGY; + +//! Used in NV_MOSAIC_TOPOLOGY. +#define NVAPI_MOSAIC_TOPOLOGY_VER MAKE_NVAPI_VERSION(NV_MOSAIC_TOPOLOGY,1) + +//! Used in NvAPI_GetSupportedMosaicTopologies(). +typedef struct +{ + NvU32 version; + NvU32 totalCount; //!< Count of valid topologies + NV_MOSAIC_TOPOLOGY topos[NVAPI_MAX_MOSAIC_TOPOS]; //!< Maximum number of topologies + +} NV_MOSAIC_SUPPORTED_TOPOLOGIES; + +//! Used in NV_MOSAIC_SUPPORTED_TOPOLOGIES. +#define NVAPI_MOSAIC_SUPPORTED_TOPOLOGIES_VER MAKE_NVAPI_VERSION(NV_MOSAIC_SUPPORTED_TOPOLOGIES,1) + +//!@} + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GetSupportedMosaicTopologies +// +//! DESCRIPTION: This API returns all valid Mosaic topologies. +//! +//! SUPPORTED OS: Do not use this function. It is not supported on Windows 10 and higher OS versions. +//! +//! +//! \since Release: 177 +//! +//! \param [out] pMosaicTopos An array of valid Mosaic topologies. +//! +//! \retval NVAPI_OK Call succeeded; 1 or more topologies were returned +//! \retval NVAPI_INVALID_ARGUMENT One or more arguments are invalid +//! \retval NVAPI_MIXED_TARGET_TYPES Mosaic topology is only possible with all targets of the same NV_GPU_OUTPUT_TYPE. +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found +//! \retval NVAPI_NOT_SUPPORTED Mosaic is not supported with GPUs on this system. +//! \retval NVAPI_NO_ACTIVE_SLI_TOPOLOGY SLI is not enabled, yet needs to be, in order for this function to succeed. +//! +//! \ingroup mosaicapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GetSupportedMosaicTopologies(NV_MOSAIC_SUPPORTED_TOPOLOGIES *pMosaicTopos); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GetCurrentMosaicTopology +// +//! DESCRIPTION: This API gets the current Mosaic topology. +//! +//! SUPPORTED OS: Do not use this function. It is not supported on Windows 10 and higher OS versions. +//! +//! +//! \since Release: 177 +//! +//! \param [out] pMosaicTopo The current Mosaic topology +//! \param [out] pEnabled TRUE if returned topology is currently enabled, else FALSE +//! +//! \retval NVAPI_OK Call succeeded +//! \retval NVAPI_INVALID_ARGUMENT One or more arguments are invalid +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found +//! \retval NVAPI_NOT_SUPPORTED Mosaic is not supported with GPUs on this system. +//! \retval NVAPI_NO_ACTIVE_SLI_TOPOLOGY SLI is not enabled, yet needs to be, in order for this function to succeed. +//! +//! \ingroup mosaicapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GetCurrentMosaicTopology(NV_MOSAIC_TOPOLOGY *pMosaicTopo, NvU32 *pEnabled); + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_SetCurrentMosaicTopology +// +//! DESCRIPTION: This API sets the Mosaic topology, and enables it so that the +//! Mosaic display settings are enumerated upon request. +//! +//! SUPPORTED OS: Do not use this function. It is not supported on Windows 10 and higher OS versions. +//! +//! +//! \since Release: 177 +//! +//! \param [in] pMosaicTopo A valid Mosaic topology +//! +//! \retval NVAPI_OK Call succeeded +//! \retval NVAPI_INVALID_ARGUMENT One or more arguments are invalid +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found +//! \retval NVAPI_NOT_SUPPORTED Mosaic is not supported with GPUs on this system. +//! \retval NVAPI_NO_ACTIVE_SLI_TOPOLOGY SLI is not enabled, yet needs to be, in order for this function to succeed. +//! +//! \ingroup mosaicapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_SetCurrentMosaicTopology(NV_MOSAIC_TOPOLOGY *pMosaicTopo); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_EnableCurrentMosaicTopology +// +//! DESCRIPTION: This API enables or disables the current Mosaic topology. +//! When enabling, the last Mosaic topology will be set. +//! +//! - If enabled, enumeration of display settings will include valid Mosaic resolutions. +//! - If disabled, enumeration of display settings will not include Mosaic resolutions. +//! +//! SUPPORTED OS: Do not use this function. It is not supported on Windows 10 and higher OS versions. +//! +//! +//! \since Release: 177 +//! +//! \param [in] enable TRUE to enable the Mosaic Topology, FALSE to disable it. +//! +//! \retval NVAPI_OK Call succeeded +//! \retval NVAPI_INVALID_ARGUMENT One or more arguments are invalid +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found +//! \retval NVAPI_NOT_SUPPORTED Mosaic is not supported with GPUs on this system. +//! \retval NVAPI_NO_ACTIVE_SLI_TOPOLOGY SLI is not enabled, yet needs to be, in order for this function to succeed. +//! +//! \ingroup mosaicapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_EnableCurrentMosaicTopology(NvU32 enable); + + +#define NVAPI_MAX_GSYNC_DEVICES 4 + + +// Sync Display APIs + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GSync_EnumSyncDevices +// +//! DESCRIPTION: This API returns an array of Sync device handles. A Sync device handle represents a +//! single Sync device on the system. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 313 +//! +//! \param [out] nvGSyncHandles- The caller provides an array of handles, which must contain at least +//! NVAPI_MAX_GSYNC_DEVICES elements. The API will zero out the entire array and then fill in one +//! or more handles. If an error occurs, the array is invalid. +//! \param [out] *gsyncCount- The caller provides the storage space. NvAPI_GSync_EnumSyncDevices +//! sets *gsyncCount to indicate how many of the elements in the nvGSyncHandles[] array are valid. +//! If an error occurs, *gsyncCount will be set to zero. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! \retval ::NVAPI_INVALID_ARGUMENT nvGSyncHandles or gsyncCount is NULL. +//! \retval ::NVAPI_NVIDIA_DEVICE_NOT_FOUND The queried Graphics system does not have any Sync Device. +//! +//! \ingroup gsyncapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GSync_EnumSyncDevices(__out NvGSyncDeviceHandle nvGSyncHandles[NVAPI_MAX_GSYNC_DEVICES], __out NvU32 *gsyncCount); + + +// GSync boardId values +#define NVAPI_GSYNC_BOARD_ID_P358 856 //!< GSync board ID 0x358, see NV_GSYNC_CAPABILITIES +#define NVAPI_GSYNC_BOARD_ID_P2060 8288 //!< GSync board ID 0x2060, see NV_GSYNC_CAPABILITIES + +//! \since Release: 375 +#define NVAPI_GSYNC_BOARD_ID_P2061 8289 //!< GSync board ID 0x2061, see NV_GSYNC_CAPABILITIES + + +//! Used in NvAPI_GSync_QueryCapabilities(). +typedef struct _NV_GSYNC_CAPABILITIES_V1 +{ + NvU32 version; //!< Version of the structure + NvU32 boardId; //!< Board ID + NvU32 revision; //!< FPGA Revision + NvU32 capFlags; //!< Capabilities of the Sync board. Reserved for future use +} NV_GSYNC_CAPABILITIES_V1; + +typedef struct _NV_GSYNC_CAPABILITIES_V2 +{ + NvU32 version; //!< Version of the structure + NvU32 boardId; //!< Board ID + NvU32 revision; //!< FPGA major revision + NvU32 capFlags; //!< Capabilities of the Sync board. Reserved for future use + NvU32 extendedRevision; //!< FPGA minor revision +} NV_GSYNC_CAPABILITIES_V2; + +typedef struct _NV_GSYNC_CAPABILITIES_V3 +{ + NvU32 version; //!< Version of the structure + NvU32 boardId; //!< Board ID + NvU32 revision; //!< FPGA major revision + NvU32 capFlags; //!< Capabilities of the Sync board. Reserved for future use + NvU32 extendedRevision; //!< FPGA minor revision + NvU32 bIsMulDivSupported : 1; //!< Indicates if multiplication/division of the frequency of house sync signal is supported. + NvU32 reserved : 31; //!< Reserved for future use + NvU32 maxMulDivValue; //!< This parameter returns the maximum possible value that can be programmed + //!< for multiplying / dividing house sync. Only valid if bIsMulDivSupported is set to 1. +} NV_GSYNC_CAPABILITIES_V3; + +typedef NV_GSYNC_CAPABILITIES_V3 NV_GSYNC_CAPABILITIES; + + +//! \ingroup gsyncapi +//! Macro for constructing the version field of NV_GSYNC_CAPABILITIES. +#define NV_GSYNC_CAPABILITIES_VER1 MAKE_NVAPI_VERSION(NV_GSYNC_CAPABILITIES_V1,1) +#define NV_GSYNC_CAPABILITIES_VER2 MAKE_NVAPI_VERSION(NV_GSYNC_CAPABILITIES_V2,2) +#define NV_GSYNC_CAPABILITIES_VER3 MAKE_NVAPI_VERSION(NV_GSYNC_CAPABILITIES_V3,3) +#define NV_GSYNC_CAPABILITIES_VER NV_GSYNC_CAPABILITIES_VER3 + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GSync_QueryCapabilities +// +//! DESCRIPTION: This API returns the capabilities of the Sync device. +//! +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 313 +//! +//! \param [in] hNvGSyncDevice- The handle for a Sync device for which the capabilities will be queried. +//! \param [inout] *pNvGSyncCapabilities- The caller provides the storage space. NvAPI_GSync_QueryCapabilities() sets +//! *pNvGSyncCapabilities to the version and capabilities details of the Sync device +//! If an error occurs, *pNvGSyncCapabilities will be set to NULL. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! \retval ::NVAPI_INVALID_ARGUMENT hNvGSyncDevice is NULL. +//! \retval ::NVAPI_NVIDIA_DEVICE_NOT_FOUND The queried Graphics system does not have any Sync Device. +//! +//! \ingroup gsyncapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GSync_QueryCapabilities(__in NvGSyncDeviceHandle hNvGSyncDevice, __inout NV_GSYNC_CAPABILITIES *pNvGSyncCapabilities); + + + +//! Connector values for a GPU. Used in NV_GSYNC_GPU. +typedef enum _NVAPI_GSYNC_GPU_TOPOLOGY_CONNECTOR +{ + NVAPI_GSYNC_GPU_TOPOLOGY_CONNECTOR_NONE = 0, + NVAPI_GSYNC_GPU_TOPOLOGY_CONNECTOR_PRIMARY = 1, + NVAPI_GSYNC_GPU_TOPOLOGY_CONNECTOR_SECONDARY = 2, + NVAPI_GSYNC_GPU_TOPOLOGY_CONNECTOR_TERTIARY = 3, + NVAPI_GSYNC_GPU_TOPOLOGY_CONNECTOR_QUARTERNARY = 4, +} NVAPI_GSYNC_GPU_TOPOLOGY_CONNECTOR; + +//! Display sync states. Used in NV_GSYNC_DISPLAY. +typedef enum _NVAPI_GSYNC_DISPLAY_SYNC_STATE +{ + NVAPI_GSYNC_DISPLAY_SYNC_STATE_UNSYNCED = 0, + NVAPI_GSYNC_DISPLAY_SYNC_STATE_SLAVE = 1, + NVAPI_GSYNC_DISPLAY_SYNC_STATE_MASTER = 2, +} NVAPI_GSYNC_DISPLAY_SYNC_STATE; + +typedef struct _NV_GSYNC_GPU +{ + NvU32 version; //!< Version of the structure + NvPhysicalGpuHandle hPhysicalGpu; //!< GPU handle + NVAPI_GSYNC_GPU_TOPOLOGY_CONNECTOR connector; //!< Indicates which connector on the device the GPU is connected to. + NvPhysicalGpuHandle hProxyPhysicalGpu; //!< GPU through which hPhysicalGpu is connected to the Sync device (if not directly connected) + //!< - this is NULL otherwise + NvU32 isSynced : 1; //!< Whether this GPU is sync'd or not. + NvU32 reserved : 31; //!< Should be set to ZERO +} NV_GSYNC_GPU; + +typedef struct _NV_GSYNC_DISPLAY +{ + NvU32 version; //!< Version of the structure + NvU32 displayId; //!< display identifier for displays.The GPU to which it is connected, can be retireved from NvAPI_SYS_GetPhysicalGpuFromDisplayId + NvU32 isMasterable : 1; //!< Can this display be the master? (Read only) + NvU32 reserved : 31; //!< Should be set to ZERO + NVAPI_GSYNC_DISPLAY_SYNC_STATE syncState; //!< Is this display slave/master + //!< (Retrieved with topology or set by caller for enable/disable sync) +} NV_GSYNC_DISPLAY; + +#define NV_GSYNC_DISPLAY_VER MAKE_NVAPI_VERSION(NV_GSYNC_DISPLAY,1) +#define NV_GSYNC_GPU_VER MAKE_NVAPI_VERSION(NV_GSYNC_GPU,1) + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GSync_GetTopology +// +//! DESCRIPTION: This API returns the topology for the specified Sync device. +//! +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 313 +//! +//! \param [in] hNvGSyncDevice- The caller provides the handle for a Sync device for which the topology will be queried. +//! \param [in, out] gsyncGpuCount- It returns number of GPUs connected to Sync device +//! \param [in, out] gsyncGPUs- It returns info about GPUs connected to Sync device +//! \param [in, out] gsyncDisplayCount- It returns number of active displays that belongs to Sync device +//! \param [in, out] gsyncDisplays- It returns info about all active displays that belongs to Sync device +//! +//! HOW TO USE: 1) make a call to get the number of GPUs connected OR displays synced through Sync device +//! by passing the gsyncGPUs OR gsyncDisplays as NULL respectively. Both gsyncGpuCount and gsyncDisplayCount can be retrieved in same call by passing +//! both gsyncGPUs and gsyncDisplays as NULL +//! On call success: +//! 2) Allocate memory based on gsyncGpuCount(for gsyncGPUs) and/or gsyncDisplayCount(for gsyncDisplays) then make a call to populate gsyncGPUs and/or gsyncDisplays respectively. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! \retval ::NVAPI_INVALID_ARGUMENT hNvGSyncDevice is NULL. +//! \retval ::NVAPI_NVIDIA_DEVICE_NOT_FOUND The queried Graphics system does not have any Sync Device. +//! \retval ::NVAPI_INSUFFICIENT_BUFFER When the actual number of GPUs/displays in the topology exceed the number of elements allocated for SyncGPUs/SyncDisplays respectively. +//! +//! \ingroup gsyncapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GSync_GetTopology(__in NvGSyncDeviceHandle hNvGSyncDevice, __inout_opt NvU32 *gsyncGpuCount, __inout_ecount_part_opt(*gsyncGpuCount, *gsyncGpuCount) NV_GSYNC_GPU *gsyncGPUs, + __inout_opt NvU32 *gsyncDisplayCount, __inout_ecount_part_opt(*gsyncDisplayCount, *gsyncDisplayCount) NV_GSYNC_DISPLAY *gsyncDisplays); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GSync_SetSyncStateSettings +// +//! DESCRIPTION: Sets a new sync state for the displays in system. +//! +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 313 +//! +//! \param [in] gsyncDisplayCount- The number of displays in gsyncDisplays. +//! \param [in] pGsyncDisplays- The caller provides the structure containing all displays that need to be synchronized in the system. +//! The displays that are not part of pGsyncDisplays, will be un-synchronized. +//! \param [in] flags- Reserved for future use. +//! +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \retval ::NVAPI_INVALID_ARGUMENT If the display topology or count not valid. +//! \retval ::NVAPI_NVIDIA_DEVICE_NOT_FOUND The queried Graphics system does not have any Sync Device. +//! \retval ::NVAPI_INVALID_SYNC_TOPOLOGY 1.If any mosaic grid is partial. +//! 2.If timing(HVisible/VVisible/refreshRate) applied of any display is different. +//! 3.If There is a across GPU mosaic grid in system and that is not a part of pGsyncDisplays. +//! \retval ::NVAPI_INVALID_USER_PRIVILEGE The application will require Administrator privileges to access this API. +//! The application can be elevated to a higher permission level by selecting "Run as Administrator". +//! +//! \ingroup gsyncapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GSync_SetSyncStateSettings(__in NvU32 gsyncDisplayCount, __in_ecount(gsyncDisplayCount) NV_GSYNC_DISPLAY *pGsyncDisplays, __in NvU32 flags); + + +//! \ingroup gsyncapi + +//! Source signal edge to be used for output pulse. See NV_GSYNC_CONTROL_PARAMS. +typedef enum _NVAPI_GSYNC_POLARITY +{ + NVAPI_GSYNC_POLARITY_RISING_EDGE = 0, + NVAPI_GSYNC_POLARITY_FALLING_EDGE = 1, + NVAPI_GSYNC_POLARITY_BOTH_EDGES = 2, +} NVAPI_GSYNC_POLARITY; + +//! Used in NV_GSYNC_CONTROL_PARAMS. +typedef enum _NVAPI_GSYNC_VIDEO_MODE +{ + NVAPI_GSYNC_VIDEO_MODE_NONE = 0, + NVAPI_GSYNC_VIDEO_MODE_TTL = 1, + NVAPI_GSYNC_VIDEO_MODE_NTSCPALSECAM = 2, + NVAPI_GSYNC_VIDEO_MODE_HDTV = 3, + NVAPI_GSYNC_VIDEO_MODE_COMPOSITE = 4, +} NVAPI_GSYNC_VIDEO_MODE; + +//! Used in NV_GSYNC_CONTROL_PARAMS. +typedef enum _NVAPI_GSYNC_SYNC_SOURCE +{ + NVAPI_GSYNC_SYNC_SOURCE_VSYNC = 0, + NVAPI_GSYNC_SYNC_SOURCE_HOUSESYNC = 1, +} NVAPI_GSYNC_SYNC_SOURCE; + +//! Used in NV_GSYNC_CONTROL_PARAMS. +typedef struct _NV_GSYNC_DELAY +{ + NvU32 version; //!< Version of the structure + NvU32 numLines; //!< delay to be induced in number of horizontal lines. + NvU32 numPixels; //!< delay to be induced in number of pixels. + NvU32 maxLines; //!< maximum number of lines supported at current display mode to induce delay. Updated by NvAPI_GSync_GetControlParameters(). Read only. + NvU32 minPixels; //!< minimum number of pixels required at current display mode to induce delay. Updated by NvAPI_GSync_GetControlParameters(). Read only. +} NV_GSYNC_DELAY; + +#define NV_GSYNC_DELAY_VER MAKE_NVAPI_VERSION(NV_GSYNC_DELAY,1) + +//! Used in NvAPI_GSync_GetControlParameters() and NvAPI_GSync_SetControlParameters(). +typedef struct _NV_GSYNC_CONTROL_PARAMS_V1 +{ + NvU32 version; //!< Version of the structure + NVAPI_GSYNC_POLARITY polarity; //!< Leading edge / Falling edge / both + NVAPI_GSYNC_VIDEO_MODE vmode; //!< None, TTL, NTSCPALSECAM, HDTV + NvU32 interval; //!< Number of pulses to wait between framelock signal generation + NVAPI_GSYNC_SYNC_SOURCE source; //!< VSync/House sync + NvU32 interlaceMode:1; //!< interlace mode for a Sync device + NvU32 syncSourceIsOutput:1; //!< Set this to make house sync as an output; valid only when NV_GSYNC_CONTROL_PARAMS::source is NVAPI_GSYNC_SYNC_SOURCE_VSYNC on P2061 boards. + //!< syncSourceIsOutput should always be NVAPI_GSYNC_SYNC_SOURCE_HOUSESYNC i.e. 0 on P2060 boards or when NV_GSYNC_CONTROL_PARAMS::source is set to NVAPI_GSYNC_SYNC_SOURCE_HOUSESYNC. + NvU32 reserved:30; //!< should be set zero + NV_GSYNC_DELAY syncSkew; //!< The time delay between the frame sync signal and the GPUs signal. + NV_GSYNC_DELAY startupDelay; //!< Sync start delay for master. +} NV_GSYNC_CONTROL_PARAMS_V1; + +//! Used in NV_GSYNC_CONTROL_PARAMS. +typedef enum _NVAPI_GSYNC_MULTIPLY_DIVIDE_MODE +{ + NVAPI_GSYNC_UNDEFINED_MODE = 0, + NVAPI_GSYNC_MULTIPLY_MODE = 1, + NVAPI_GSYNC_DIVIDE_MODE = 2, +} NVAPI_GSYNC_MULTIPLY_DIVIDE_MODE; + +typedef struct _NV_GSYNC_CONTROL_PARAMS_V2 +{ + NvU32 version; //!< Version of the structure + NVAPI_GSYNC_POLARITY polarity; //!< Leading edge / Falling edge / both + NVAPI_GSYNC_VIDEO_MODE vmode; //!< None, TTL, NTSCPALSECAM, HDTV + NvU32 interval; //!< Number of pulses to wait between framelock signal generation + NVAPI_GSYNC_SYNC_SOURCE source; //!< VSync/House sync + NvU32 interlaceMode:1; //!< interlace mode for a Sync device + NvU32 syncSourceIsOutput:1; //!< Set this to make house sync as an output; valid only when NV_GSYNC_CONTROL_PARAMS::source is NVAPI_GSYNC_SYNC_SOURCE_VSYNC on P2061 boards. + //!< syncSourceIsOutput should always be NVAPI_GSYNC_SYNC_SOURCE_HOUSESYNC i.e. 0 on P2060 boards or when NV_GSYNC_CONTROL_PARAMS::source is set to NVAPI_GSYNC_SYNC_SOURCE_HOUSESYNC. + NvU32 reserved:30; //!< should be set zero + NV_GSYNC_DELAY syncSkew; //!< The time delay between the frame sync signal and the GPUs signal. + NV_GSYNC_DELAY startupDelay; //!< Sync start delay for master. + NVAPI_GSYNC_MULTIPLY_DIVIDE_MODE multiplyDivideMode; //!< Indicates multiplier/divider mode for the housesync signal. + //!< While setting multiplyDivideMode, source needs to be set as NVAPI_GSYNC_SYNC_SOURCE_HOUSESYNC. + NvU8 multiplyDivideValue; //!< Indicates the multiplier/divider value for the housesync signal. Only supported if bIsMulDivSupported field of the structure NV_GSYNC_CAPABILITIES is set to 1. + //!< The maximum supported value for this field can be obtained from maxMulDivValue field of the structure NV_GSYNC_CAPABILITIES. +} NV_GSYNC_CONTROL_PARAMS_V2; + +typedef NV_GSYNC_CONTROL_PARAMS_V2 NV_GSYNC_CONTROL_PARAMS; +#define NV_GSYNC_CONTROL_PARAMS_VER1 MAKE_NVAPI_VERSION(NV_GSYNC_CONTROL_PARAMS_V1,1) +#define NV_GSYNC_CONTROL_PARAMS_VER2 MAKE_NVAPI_VERSION(NV_GSYNC_CONTROL_PARAMS_V2,2) +#define NV_GSYNC_CONTROL_PARAMS_VER NV_GSYNC_CONTROL_PARAMS_VER2 + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GSync_GetControlParameters +// +//! DESCRIPTION: This API queries for sync control parameters as defined in NV_GSYNC_CONTROL_PARAMS. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 313 +//! +//! \param [in] hNvGSyncDevice- The caller provides the handle of the Sync device for which to get parameters +//! \param [inout] *pGsyncControls- The caller provides the storage space. NvAPI_GSync_GetControlParameters() populates *pGsyncControls with values. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! \retval ::NVAPI_INVALID_ARGUMENT hNvGSyncDevice is NULL. +//! \retval ::NVAPI_NVIDIA_DEVICE_NOT_FOUND The queried Graphics system does not have any Sync Device. +//! +//! \ingroup gsyncapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GSync_GetControlParameters(__in NvGSyncDeviceHandle hNvGSyncDevice, __inout NV_GSYNC_CONTROL_PARAMS *pGsyncControls); + + + +////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GSync_SetControlParameters +// +//! DESCRIPTION: This API sets control parameters as defined in NV_SYNC_CONTROL_PARAMS. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 313 +//! +//! \param [in] hNvGSyncDevice- The caller provides the handle of the Sync device for which to get parameters +//! \param [inout] *pGsyncControls- The caller provides NV_GSYNC_CONTROL_PARAMS. skew and startDelay will be updated to the applied values. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! \retval ::NVAPI_INVALID_ARGUMENT hNvGSyncDevice is NULL. +//! \retval ::NVAPI_NVIDIA_DEVICE_NOT_FOUND The queried Graphics system does not have any Sync Device. +//! \retval ::NVAPI_INVALID_USER_PRIVILEGE The application will require Administrator privileges to access this API. +//! The application can be elevated to a higher permission level by selecting "Run as Administrator". +//! +//! \ingroup gsyncapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GSync_SetControlParameters(__in NvGSyncDeviceHandle hNvGSyncDevice, __inout NV_GSYNC_CONTROL_PARAMS *pGsyncControls); + + + + +//! Used in NvAPI_GSync_AdjustSyncDelay() +typedef enum _NVAPI_GSYNC_DELAY_TYPE +{ + NVAPI_GSYNC_DELAY_TYPE_UNKNOWN = 0, + NVAPI_GSYNC_DELAY_TYPE_SYNC_SKEW = 1, + NVAPI_GSYNC_DELAY_TYPE_STARTUP = 2 +} NVAPI_GSYNC_DELAY_TYPE; + +////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GSync_AdjustSyncDelay +// +//! DESCRIPTION: This API adjusts the skew and startDelay to the closest possible values. Use this API before calling NvAPI_GSync_SetControlParameters for skew or startDelay. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 319 +//! +//! \param [in] hNvGSyncDevice- The caller provides the handle of the Sync device for which to get parameters +//! \param [in] delayType- Specifies whether the delay is syncSkew or startupDelay. +//! \param [inout] *pGsyncDelay- The caller provides NV_GSYNC_DELAY. skew and startDelay will be adjusted and updated to the closest values. +//! \param [out] *syncSteps- This parameter is optional. It returns the sync delay in unit steps. If 0, it means either the NV_GSYNC_DELAY::numPixels is less than NV_GSYNC_DELAY::minPixels or NV_GSYNC_DELAY::numOfLines exceeds the NV_GSYNC_DELAY::maxLines. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup gsyncapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GSync_AdjustSyncDelay(__in NvGSyncDeviceHandle hNvGSyncDevice, __in NVAPI_GSYNC_DELAY_TYPE delayType, __inout NV_GSYNC_DELAY *pGsyncDelay, __out_opt NvU32* syncSteps); + + + +//! Used in NvAPI_GSync_GetSyncStatus(). +typedef struct _NV_GSYNC_STATUS +{ + NvU32 version; //!< Version of the structure + NvU32 bIsSynced; //!< Is timing in sync? + NvU32 bIsStereoSynced; //!< Does the phase of the timing signal from the GPU = the phase of the master sync signal? + NvU32 bIsSyncSignalAvailable; //!< Is the sync signal available? +} NV_GSYNC_STATUS; + +//! Macro for constructing the version field for NV_GSYNC_STATUS. +#define NV_GSYNC_STATUS_VER MAKE_NVAPI_VERSION(NV_GSYNC_STATUS,1) + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GSync_GetSyncStatus +// +//! DESCRIPTION: This API queries the sync status of a GPU - timing, stereosync and sync signal availability. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 313 +//! +//! \param [in] hNvGSyncDevice- Handle of the Sync device +//! \param [in] hPhysicalGpu- GPU to be queried for sync status. +//! \param [out] *status- The caller provides the storage space. NvAPI_GSync_GetSyncStatus() populates *status with +//! values - timing, stereosync and signal availability. On error, *status is set to NULL. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! \retval ::NVAPI_INVALID_ARGUMENT hNvGSyncDevice is NULL / SyncTarget is NULL. +//! \retval ::NVAPI_NVIDIA_DEVICE_NOT_FOUND The queried Graphics system does not have any G-Sync Device. +//! +//! \ingroup gsyncapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GSync_GetSyncStatus(__in NvGSyncDeviceHandle hNvGSyncDevice, __in NvPhysicalGpuHandle hPhysicalGpu, __inout NV_GSYNC_STATUS *status); + + +//! \ingroup gsyncapi + +#define NVAPI_MAX_RJ45_PER_GSYNC 2 + +//! Used in NV_GSYNC_STATUS_PARAMS. +typedef enum _NVAPI_GSYNC_RJ45_IO +{ + NVAPI_GSYNC_RJ45_OUTPUT = 0, + NVAPI_GSYNC_RJ45_INPUT = 1, + NVAPI_GSYNC_RJ45_UNUSED = 2 //!< This field is used to notify that the framelock is not actually present. + +} NVAPI_GSYNC_RJ45_IO; + +//! \ingroup gsyncapi +//! Used in NvAPI_GSync_GetStatusParameters(). +typedef struct _NV_GSYNC_STATUS_PARAMS_V1 +{ + NvU32 version; + NvU32 refreshRate; //!< The refresh rate + NVAPI_GSYNC_RJ45_IO RJ45_IO[NVAPI_MAX_RJ45_PER_GSYNC]; //!< Configured as input / output + NvU32 RJ45_Ethernet[NVAPI_MAX_RJ45_PER_GSYNC]; //!< Connected to ethernet hub? [ERRONEOUSLY CONNECTED!] + NvU32 houseSyncIncoming; //!< Incoming house sync frequency in Hz + NvU32 bHouseSync; //!< Is house sync connected? +} NV_GSYNC_STATUS_PARAMS_V1; + +typedef struct _NV_GSYNC_STATUS_PARAMS_V2 +{ + NvU32 version; + NvU32 refreshRate; //!< The refresh rate + NVAPI_GSYNC_RJ45_IO RJ45_IO[NVAPI_MAX_RJ45_PER_GSYNC]; //!< Configured as input / output + NvU32 RJ45_Ethernet[NVAPI_MAX_RJ45_PER_GSYNC]; //!< Connected to ethernet hub? [ERRONEOUSLY CONNECTED!] + NvU32 houseSyncIncoming; //!< Incoming house sync frequency in Hz + NvU32 bHouseSync; //!< Is house sync connected? + NvU32 bInternalSlave : 1; //!< Valid only for P2061 board. + //!< If set to 1, it means that this P2061 board receives input from another P2061 board. + NvU32 reserved : 31; //!< Reserved for future use. +} NV_GSYNC_STATUS_PARAMS_V2; + + +typedef NV_GSYNC_STATUS_PARAMS_V2 NV_GSYNC_STATUS_PARAMS; + +//! \ingroup gsyncapi +//! Macro for constructing the version field of NV_GSYNC_STATUS_PARAMS +#define NV_GSYNC_STATUS_PARAMS_VER1 MAKE_NVAPI_VERSION(NV_GSYNC_STATUS_PARAMS_V1,1) +#define NV_GSYNC_STATUS_PARAMS_VER2 MAKE_NVAPI_VERSION(NV_GSYNC_STATUS_PARAMS_V2,2) +#define NV_GSYNC_STATUS_PARAMS_VER NV_GSYNC_STATUS_PARAMS_VER2 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GSync_GetStatusParameters +// +//! DESCRIPTION: This API queries for sync status parameters as defined in NV_GSYNC_STATUS_PARAMS. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 313 +//! +//! \param [in] hNvGSyncDevice The caller provides the handle of the GSync device for which to get parameters +//! \param [out] *pStatusParams The caller provides the storage space. NvAPI_GSync_GetStatusParameters populates *pStatusParams with +//! values. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! \retval ::NVAPI_INVALID_ARGUMENT hNvGSyncDevice is NULL / pStatusParams is NULL. +//! \retval ::NVAPI_NVIDIA_DEVICE_NOT_FOUND The queried Graphics system does not have any GSync Device. +//! +//! \ingroup gsyncapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GSync_GetStatusParameters(NvGSyncDeviceHandle hNvGSyncDevice, NV_GSYNC_STATUS_PARAMS *pStatusParams); + +//! @} + + + + + + + + +#if defined(_D3D9_H_) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D9_RegisterResource +// +//! DESCRIPTION: This API binds a resource (surface/texture) so that it can be retrieved +//! internally by NVAPI. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! \param [in] pResource surface/texture +//! +//! \return ::NVAPI_OK, ::NVAPI_ERROR +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D9_RegisterResource(IDirect3DResource9* pResource); +#endif //defined(_D3D9_H_) + +#if defined(_D3D9_H_) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D9_UnregisterResource +// +//! DESCRIPTION: This API unbinds a resource (surface/texture) after use. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] pResource surface/texture +//! +//! \return ::NVAPI_OK, ::NVAPI_ERROR +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D9_UnregisterResource(IDirect3DResource9* pResource); + +#endif //defined(_D3D9_H_) + + + + +#if defined(_D3D9_H_) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D9_AliasSurfaceAsTexture +// +//! \fn NvAPI_D3D9_AliasSurfaceAsTexture(IDirect3DDevice9* pDev, +//! IDirect3DSurface9* pSurface, +//! IDirect3DTexture9 **ppTexture, +//! DWORD dwFlag); +//! DESCRIPTION: Create a texture that is an alias of a surface registered with NvAPI. The +//! new texture can be bound with IDirect3DDevice9::SetTexture(). Note that the texture must +//! be unbound before drawing to the surface again. +//! Unless the USE_SUPER flag is passed, MSAA surfaces will be resolved before +//! being used as a texture. MSAA depth buffers are resolved with a point filter, +//! and non-depth MSAA surfaces are resolved with a linear filter. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] pDev The D3D device that owns the objects +//! \param [in] pSurface Pointer to a surface that has been registered with NvAPI +//! to which a texture alias is to be provided +//! \param [out] ppTexture Fill with the texture created +//! \param [in] dwFlag NVAPI_ALIAS_SURFACE_FLAG to describe how to handle the texture +//! +//! \retval ::NVAPI_OK completed request +//! \retval ::NVAPI_INVALID_POINTER A null pointer was passed as an argument +//! \retval ::NVAPI_INVALID_ARGUMENT One of the arguments was invalid, probably dwFlag. +//! \retval ::NVAPI_UNREGISTERED_RESOURCE pSurface has not been registered with NvAPI +//! \retval ::NVAPI_ERROR error occurred +// +/////////////////////////////////////////////////////////////////////////////// + + +//! \ingroup dx +//! See NvAPI_D3D9_AliasSurfaceAsTexture(). +typedef enum { + NVAPI_ALIAS_SURFACE_FLAG_NONE = 0x00000000, + NVAPI_ALIAS_SURFACE_FLAG_USE_SUPER = 0x00000001, //!< Use the surface's msaa buffer directly as a texture, rather than resolving. (This is much slower, but potentially has higher quality.) + NVAPI_ALIAS_SURFACE_FLAG_MASK = 0x00000001 +} NVAPI_ALIAS_SURFACE_FLAG; + + +//! \ingroup dx +NVAPI_INTERFACE NvAPI_D3D9_AliasSurfaceAsTexture(IDirect3DDevice9* pDev, + IDirect3DSurface9* pSurface, + IDirect3DTexture9 **ppTexture, + DWORD dwFlag); +#endif //defined(_D3D9_H_) + +#if defined(_D3D9_H_) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D9_StretchRectEx +// +//! DESCRIPTION: This API copies the contents of the source resource to the destination +//! resource. This function can convert +//! between a wider range of surfaces than +//! IDirect3DDevice9::StretchRect. For example, it can copy +//! from a depth/stencil surface to a texture. +//! +//! The source and destination resources *must* be registered +//! with NvAPI before being used with NvAPI_D3D9_StretchRectEx(). +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] pDevice The D3D device that owns the objects. +//! \param [in] pSourceResource Pointer to the source resource. +//! \param [in] pSrcRect Defines the rectangle on the source to copy from. If NULL, copy from the entire resource. +//! \param [in] pDestResource Pointer to the destination resource. +//! \param [in] pDstRect Defines the rectangle on the destination to copy to. If NULL, copy to the entire resource. +//! \param [in] Filter Choose a filtering method: D3DTEXF_NONE, D3DTEXF_POINT, D3DTEXF_LINEAR. +//! +//! \retval ::NVAPI_OK completed request +//! \retval ::NVAPI_INVALID_POINTER An invalid pointer was passed as an argument (probably NULL) +//! \retval ::NVAPI_INVALID_ARGUMENT One of the arguments was invalid +//! \retval ::NVAPI_UNREGISTERED_RESOURCE a resource was passed in without being registered +//! \retval ::NVAPI_ERROR error occurred +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D9_StretchRectEx(IDirect3DDevice9 * pDevice, + IDirect3DResource9 * pSourceResource, + CONST RECT * pSourceRect, + IDirect3DResource9 * pDestResource, + CONST RECT * pDestRect, + D3DTEXTUREFILTERTYPE Filter); + +#endif //defined(_D3D9_H_) + +#if defined(_D3D9_H_) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D9_ClearRT +// +//! DESCRIPTION: This API Clears the currently bound render target(s) with the +//! given color +//! +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] pDevice The D3D device that owns the objects. +//! \param [in] dwNumRects The no of rectangles to clear. If 0, clear the entire surface (clipped to viewport) +//! \param [in] pRects Defines the rectangles to clear. Should be NULL if dwNumRects == 0 +//! \param [in] r red component of the clear color +//! \param [in] g green component of the clear color +//! \param [in] b blue component of the clear color +//! \param [in] a alpha component of the clear color +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D9_ClearRT(IDirect3DDevice9 * pDevice, + NvU32 dwNumRects, + CONST RECT * pRects, + float r, float g, float b, float a); +#endif //if defined(_D3D9_H_) + + + + + + + + + + +#if defined(_D3D9_H_) && defined(__cplusplus) +//! SUPPORTED OS: Windows 10 and higher +//! + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D9_GetSurfaceHandle +// +//! This function gets the handle of a given surface. This handle uniquely +//! identifies the surface through all NvAPI entries. +//! +//! +//! \since Release: 313 +//! +//! \param [in] pSurface Surface to be identified +//! \param [out] pHandle Will be filled by the return handle +//! +//! \return An int which could be an NvAPI status or DX HRESULT code +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D9_GetSurfaceHandle(IDirect3DSurface9 *pSurface, + NVDX_ObjectHandle *pHandle); + +#endif //defined(_D3D9_H_) && defined(__cplusplus) + +#if defined(_D3D9_H_) && defined(__cplusplus) +//! SUPPORTED OS: Windows 10 and higher +//! +//! \addtogroup dxvidcontrol +//! @{ + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION_NAME: NvAPI_D3D9_VideoSetStereoInfo +// +//! \fn NvAPI_D3D9_VideoSetStereoInfo(IDirect3DDevice9 *pDev, +//! NV_DX_VIDEO_STEREO_INFO *pStereoInfo); +//! \code +//! DESCRIPTION: This api specifies the stereo format of a surface, so that the +//! surface could be used for stereo video processing or compositing. +//! In particular, this api could be used to link the left and right +//! views of a decoded picture. +//! +//! \since Release: 313 +//! +//! INPUT: pDev - The device on which the stereo surface will be used +//! pStereoInfo - The stereo format of the surface +//! +//! RETURN STATUS: an int which could be an NvAPI status or DX HRESULT code +//! \endcode +/////////////////////////////////////////////////////////////////////////////// + +#ifndef NV_STEREO_VIDEO_FORMAT_DEFINE +#define NV_STEREO_VIDEO_FORMAT_DEFINE + + +typedef enum _NV_STEREO_VIDEO_FORMAT +{ + NV_STEREO_VIDEO_FORMAT_NOT_STEREO = 0, + + NV_STEREO_VIDEO_FORMAT_SIDE_BY_SIDE_LR = 1, + NV_STEREO_VIDEO_FORMAT_SIDE_BY_SIDE_RL = 2, + NV_STEREO_VIDEO_FORMAT_TOP_BOTTOM_LR = 3, + NV_STEREO_VIDEO_FORMAT_TOP_BOTTOM_RL = 4, + NV_STEREO_VIDEO_FORMAT_ROW_INTERLEAVE_LR = 5, + NV_STEREO_VIDEO_FORMAT_ROW_INTERLEAVE_RL = 6, + NV_STEREO_VIDEO_FORMAT_TWO_FRAMES_LR = 7, + NV_STEREO_VIDEO_FORMAT_MONO_PLUS_OFFSET = 8, + + NV_STEREO_VIDEO_FORMAT_LAST = 9, +} NV_STEREO_VIDEO_FORMAT; + +#endif // NV_STEREO_VIDEO_FORMAT_DEFINE + + +typedef struct _NV_DX_VIDEO_STEREO_INFO { + NvU32 dwVersion; //!< Must be NV_DX_VIDEO_STEREO_INFO_VER + NVDX_ObjectHandle hSurface; //!< The surface whose stereo format is to be set + NVDX_ObjectHandle hLinkedSurface; //!< The linked surface (must be valid when eFormat==NV_STEREO_VIDEO_FORMAT_TWO_FRAMES_LR) + NV_STEREO_VIDEO_FORMAT eFormat; //!< Stereo format of the surface + NvS32 sViewOffset; //!< Signed offset of each view (positive offset indicating left view is shifted left) + BOOL bStereoEnable; //!< Whether stereo rendering should be enabled (if FALSE, only left view will be used) +} NV_DX_VIDEO_STEREO_INFO; + +//! Macro for constructing the version field of ::NV_DX_VIDEO_STEREO_INFO +#define NV_DX_VIDEO_STEREO_INFO_VER MAKE_NVAPI_VERSION(NV_DX_VIDEO_STEREO_INFO,1) + +NVAPI_INTERFACE NvAPI_D3D9_VideoSetStereoInfo(IDirect3DDevice9 *pDev, + NV_DX_VIDEO_STEREO_INFO *pStereoInfo); + +//! @} +#endif //defined(_D3D9_H_) && defined(__cplusplus) + + +#if defined(__cplusplus) && defined(__d3d10_h__) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D10_SetDepthBoundsTest +// +//! DESCRIPTION: This function enables/disables the depth bounds test. +//! +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] pDev The device to set the depth bounds test +//! \param [in] bEnable Enable(non-zero)/disable(zero) the depth bounds test +//! \param [in] fMinDepth The minimum depth for the depth bounds test +//! \param [in] fMaxDepth The maximum depth for the depth bounds test \n +//! The valid values for fMinDepth and fMaxDepth +//! are such that 0 <= fMinDepth <= fMaxDepth <= 1 +//! +//! \return NVAPI_OK if the depth bounds test was correctly enabled or disabled +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D10_SetDepthBoundsTest(ID3D10Device *pDev, + NvU32 bEnable, + float fMinDepth, + float fMaxDepth); + +#endif //defined(__cplusplus) && defined(__d3d10_h__) + + + + + +#if defined (__cplusplus) && (defined(__d3d11_h__) || defined(__d3d11_1_h__)) + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_IsNvShaderExtnOpCodeSupported +// +//! DESCRIPTION: This function checks if a nv HLSL shader extension opcode is +//! supported on current hardware. List of opcodes is in nvShaderExtnEnums.h +//! To use Nvidia HLSL extensions the application must include nvHLSLExtns.h +//! in the hlsl shader code. See nvHLSLExtns.h for more details on supported opcodes. +//! +//! This function can be called from a different thread than the one calling immediate device setstate functions. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] pDev The device on which to query for support, +//! should be a ID3D11Device+ device +//! \param [in] opCode the opcode to check +//! \param [out] pSupported true if supported, false otherwise +//! +//! RETURN STATUS: This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! \retval :: NVAPI_OK if the call succeeded +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D11_IsNvShaderExtnOpCodeSupported(__in IUnknown *pDev, + __in NvU32 opCode, + __out bool *pSupported); + +#endif //defined (__cplusplus) && (defined(__d3d11_h__) || defined(__d3d11_1_h__)) + +#if defined (__cplusplus) && (defined(__d3d11_h__) || defined(__d3d11_1_h__)) + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_SetNvShaderExtnSlot +// +//! DESCRIPTION: This function sets the fake UAV slot that is used by Nvidia HLSL +//! shader extensions globally. All createShader calls made to the driver after +//! setting this slot would treat writes/reads to this UAV in a +//! different way. Applications are expected to bind null UAV to this slot. +//! The same slot is used for all shader stages. +//! To disable shader extensions the app need to set this uav slot to 0xFFFFFFFF. +//! To use Nvidia HLSL extensions the application must include nvHLSLExtns.h +//! in the hlsl shader code. See nvHLSLExtns.h for more details. +//! +//! This function can be called from a different thread than the one calling immediate device setstate functions. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] pDev The device for which to set the extension slot +//! should be a ID3D11Device+ device +//! \param [in] uavSlot the uav slot to use +//! +//! RETURN STATUS: This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! \retval :: NVAPI_OK : success, the uavSlot was set sucessfully +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D11_SetNvShaderExtnSlot(__in IUnknown *pDev, + __in NvU32 uavSlot); + +#endif //defined (__cplusplus) && (defined(__d3d11_h__) || defined(__d3d11_1_h__)) + +#if defined (__cplusplus) && defined (__d3d12_h__) + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_SetNvShaderExtnSlotSpace +// +//! DESCRIPTION: This function is specifically created for ray tracing since we do not +//! currently support PSOs with DXR. +//! This function sets the device's fake UAV slot and space that is used by Nvidia HLSL +//! shader extensions globally. All state objects created by the driver after +//! setting this slot would treat writes/reads to this UAV in a +//! different way. Applications are expected to bind null UAV to this slot. +//! The same slot is used for all shader stages. +//! To disable shader extensions the app need to set this uav slot to 0xFFFFFFFF. +//! To use Nvidia HLSL extensions the application must include nvHLSLExtns.h +//! in the hlsl shader code. See nvHLSLExtns.h for more details. +//! +//! This function can be called from a different thread than the one calling immediate device setstate functions. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] pDev The device for which to set the extension slot +//! should be a ID3D12Device+ device +//! \param [in] uavSlot The uav slot to use +//! \param [in] uavSpace The uav space to use +//! +//! RETURN STATUS: This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! \retval :: NVAPI_OK : success, the uavSlot and uavSpace were set sucessfully +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_SetNvShaderExtnSlotSpace(__in IUnknown *pDev, + __in NvU32 uavSlot, + __in NvU32 uavSpace); + + /////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_SetNvShaderExtnSlotSpaceLocalThread +// +//! DESCRIPTION: This function is specifically created for ray tracing shaders since we do not +//! currently support PSOs with DXR. +//! This function sets the device's fake UAV slot that is used by Nvidia HLSL +//! shader extensions on local thread. All state objects created by the driver +//! on the same thread that call this function after setting this slot would treat writes/reads +//! to this UAV in a different way. +//! Applications are expected to bind null UAV to this slot. +//! The same slot is used for all shader stages for the device. +//! To disable shader extensions the app may set this uav slot to 0xFFFFFFFF. +//! To use Nvidia HLSL extensions the application must include nvHLSLExtns.h +//! in the hlsl shader code. See nvHLSLExtns.h for more details. +//! +//! This function can be called from a different thread than the one calling immediate device setstate functions. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! +//! \since Release: 387 +//! +//! \param [in] pDev The device for which to set the extension slot +//! should be a ID3D12Device+ device +//! \param [in] uavSlot the uav slot to use +//! \param [in] uavSpace the uav space to use +//! +//! RETURN STATUS: This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! \retval :: NVAPI_OK : success, the uavSlot and uavSpace were set sucessfully +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_SetNvShaderExtnSlotSpaceLocalThread(__in IUnknown *pDev, + __in NvU32 uavSlot, + __in NvU32 uavSpace); + +#endif //defined (__cplusplus) && defined (__d3d12_h__) + +#if defined (__cplusplus) && (defined(__d3d11_h__) || defined(__d3d11_1_h__)) + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_SetNvShaderExtnSlotLocalThread +// +//! DESCRIPTION: This function sets the fake UAV slot that is used by Nvidia HLSL +//! shader extensions on local thread. All createShader calls on the same thread +//! that calls this function after setting this slot would treat writes/reads +//! to this UAV in a different way. +//! Applications are expected to bind null UAV to this slot. +//! The same slot is used for all shader stages. +//! To disable shader extensions the app may set this uav slot to 0xFFFFFFFF. +//! To use Nvidia HLSL extensions the application must include nvHLSLExtns.h +//! in the hlsl shader code. See nvHLSLExtns.h for more details. +//! +//! This function can be called from a different thread than the one calling immediate device setstate functions. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 387 +//! +//! \param [in] pDev The device for which to set the extension slot +//! should be a ID3D11Device+ device +//! \param [in] uavSlot the uav slot to use +//! +//! RETURN STATUS: This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! \retval :: NVAPI_OK : success, the uavSlot was set sucessfully +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D11_SetNvShaderExtnSlotLocalThread(__in IUnknown *pDev, + __in NvU32 uavSlot); + +#endif //defined (__cplusplus) && (defined(__d3d11_h__) || defined(__d3d11_1_h__)) + +#if defined (__cplusplus) && (defined(__d3d11_h__) || defined(__d3d11_1_h__)) + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_BeginUAVOverlapEx +// +//! DESCRIPTION: Causes the driver to skip synchronization that is normally needed when accessing UAVs. +//! Applications must use this with caution otherwise this might cause data hazards when +//! multiple draw calls/compute shader launches are accessing same memory locations +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] *pDeviceOrContext pointer to D3D11 device, or D3D11 device context +//! \param [in] insertWFIFlags bit fields to indicate which WFI would be inserted (gfx / compute / both). +//! +//! RETURN STATUS: This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +typedef enum _NVAPI_D3D11_INSERTWFI_FLAG +{ + NVAPI_D3D_BEGIN_UAV_OVERLAP_NO_WFI = 0x00000000, //!< no WFI + NVAPI_D3D_BEGIN_UAV_OVERLAP_GFX_WFI = 0x00000001, //!< (bit 0) force graphics WFI + NVAPI_D3D_BEGIN_UAV_OVERLAP_COMP_WFI = 0x00000002, //!< (bit 1) force compute WFI +} NVAPI_D3D11_INSERTWFI_FLAG; + +NVAPI_INTERFACE NvAPI_D3D11_BeginUAVOverlapEx(__in IUnknown *pDeviceOrContext, __in NvU32 insertWFIFlags); + +#endif //defined (__cplusplus) && (defined(__d3d11_h__) || defined(__d3d11_1_h__)) + +#if defined (__cplusplus) && (defined(__d3d11_h__) || defined(__d3d11_1_h__)) + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_BeginUAVOverlap +// +//! DESCRIPTION: Causes the driver to skip synchronization that is normally needed when accessing UAVs. +//! Applications must use this with caution otherwise this might cause data hazards when +//! multiple draw calls/compute shader launches are accessing same memory locations +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] *pDeviceOrContext pointer to D3D11 device, or D3D11 device context +//! +//! RETURN STATUS: This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D11_BeginUAVOverlap(__in IUnknown *pDeviceOrContext); + +#endif //defined (__cplusplus) && (defined(__d3d11_h__) || defined(__d3d11_1_h__)) + +#if defined (__cplusplus) && (defined(__d3d11_h__) || defined(__d3d11_1_h__)) + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_EndUAVOverlap +// +//! DESCRIPTION: Re-enables driver synchronization between calls that access same UAVs +//! See NvAPI_D3D_BeginUAVOverlap for more details. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] *pDeviceOrContext pointer to D3D11 device, or D3D11 device context +//! +//! RETURN STATUS: This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D11_EndUAVOverlap(__in IUnknown *pDeviceOrContext); + +#endif //defined (__cplusplus) && (defined(__d3d11_h__) || defined(__d3d11_1_h__)) + +#if defined(__cplusplus) && defined(__d3d11_h__) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_GetResourceHandle +// +//! \code +//! DESCRIPTION: This function retrieves a driver handle to a DX10 resource +//! +//! INPUT: pDev The device on which the resource was created +//! pResource The resource for which we want to retrieve a +//! driver handle. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! OUTPUT: phObject Pointer to an NvAPI handle to be populated +//! on success +//! +//! RETURN STATUS: NVAPI_OK if and only if phObject was populated with a valid +//! driver handle +//! \endcode +//! \ingroup nsightapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D11_GetResourceHandle(ID3D11Device *pDev, + ID3D11Resource* pResource, + NVDX_ObjectHandle* phObject); + +#endif //defined(__cplusplus) && defined(__d3d11_h__) + +#if defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D_SetFPSIndicatorState +// +//! DESCRIPTION: Display an overlay that tracks the number of times the app presents per second, or, +//! the number of frames-per-second (FPS) +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] bool Whether or not to enable the fps indicator. +//! +//! \return ::NVAPI_OK, +//! ::NVAPI_ERROR +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D_SetFPSIndicatorState(IUnknown *pDev, NvU8 doEnable); + +#endif //if defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) + +#if defined(_D3D9_H_) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D9_Present +// +//! DESCRIPTION: This API presents the contents of the next buffer in the sequence of back buffers +//! owned by a IDirect3DDevice9 device. +//! This Present operation supports using a SwapGroup and SwapBarrier on the SwapChain +//! that owns the back buffer to be presented. +//! +//! NOTE: NvAPI_D3D9_Present is a wrapper of the method IDirect3DDevice9::Present which +//! additionally notifies the D3D driver of the SwapChain used by the runtime for +//! presentation, thus allowing the D3D driver to apply SwapGroup and SwapBarrier +//! functionality to that SwapChain. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] pDevice The IDirect3DDevice9 interface that is used to issue the Present, +//! using the following IDirect3DDevice9::Present input parameters +//! \param [in] pSwapChain Optional pointer to a IDirect3DSwapChain9 interface. If provided, the presentation is executed +//! using this interface (i.e. pSwapChain->Present()) for the given swapchain only. +//! If NULL, the presentation is executed on the device for all swapchains as in pDevice->Present() +//! \param [in] pSourceRect A pointer to a RECT structure containing the source rectangle. +//! If NULL, the entire source surface is presented. +//! \param [in] pDestRect A pointer to a RECT structure containing the destination rectangle, in window client coordinates. +//! If NULL, the entire client area is filled. +//! \param [in] hDestWindowOverride A pointer to a destination window whose client area is taken as the target for this presentation. +//! If this value is NULL, then the hWndDeviceWindow member of D3DPRESENT_PARAMTERS is taken. +//! \param [in] pDirtyRegion (IN) A pointer to a region to be presented. It must be NULL unless the swap chain was reated with +//! D3DSWAPEFFECT_COPY. If this value is non-NULL, the contained region is expressed in back buffer coordinates. +//! +//! \retval ::NVAPI_OK the Present operation was successfully executed +//! \retval ::NVAPI_D3D_DEVICE_LOST D3D device status is D3DERR_DEVICELOST or D3DERR_DEVICENOTRESET, the caller has to reset device +//! \retval ::NVAPI_DEVICE_BUSY the Present operation failed with an error other than D3DERR_DEVICELOST or D3DERR_DEVICENOTRESET +//! \retval ::NVAPI_ERROR the communication with the D3D driver failed, SwapGroup/SwapBarrier may not be possible. +//! \retval ::NVAPI_API_NOT_INITIALIZED NvAPI was not yet initialized. +//! +//!\ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D9_Present(IDirect3DDevice9 *pDevice, + IDirect3DSwapChain9 *pSwapChain, + const RECT *pSourceRect, + const RECT *pDestRect, + HWND hDestWindowOverride, + const RGNDATA *pDirtyRegion); +#endif //if defined(_D3D9_H_) + +#if defined(_D3D9_H_) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D9_QueryFrameCount +// +//! DESCRIPTION: This API queries the universal framecounter of the Quadro-Sync master device. +//! +//! \param [in] pDevice The caller provides the DX9 device that has access to the Quadro-Sync device +//! \param [out] pFrameCount The caller provides the storage space where the framecount is stored. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \retval ::NVAPI_OK *pFrameCount populated with framecount value. +//! \retval ::NVAPI_ERROR The operation failed. +//! \retval ::NVAPI_INVALID_ARGUMENT One or more args passed in are invalid. +//! \retval ::NVAPI_API_NOT_INITIALIZED NvAPI was not yet initialized. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D9_QueryFrameCount(IDirect3DDevice9 *pDevice, + NvU32 *pFrameCount); +#endif //if defined(_D3D9_H_) + +#if defined(_D3D9_H_) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D9_ResetFrameCount +// +//! DESCRIPTION: This API resets the universal framecounter on the Quadro-Sync master device. +//! +//! \param [in] pDevice The caller provides the DX9 device that has access to the Quadro-Sync device +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \retval ::NVAPI_OK framecounter has been reset +//! \retval ::NVAPI_ERROR The operation failed. +//! \retval ::NVAPI_INVALID_ARGUMENT One or more args passed in are invalid. +//! \retval ::NVAPI_API_NOT_INITIALIZED NvAPI was not yet initialized. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D9_ResetFrameCount(IDirect3DDevice9 *pDevice); +#endif //if defined(_D3D9_H_) + +#if defined(_D3D9_H_) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D9_QueryMaxSwapGroup +// +//! DESCRIPTION: This API queries the number of supported SwapGroups and SwapBarriers in the graphics system. +//! +//! \param [in] pDevice The caller provides the DirectX 9 device that is used as a swapgroup client +//! \param [out] pMaxGroups The caller provides the storage space where the number of available SwapGroups is stored. +//! \param [out] pMaxBarriers The caller provides the storage space where the number of available SwapBarriers is stored. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \retval ::NVAPI_OK the number of SwapGroups and SwapBarriers has been stored +//! \retval ::NVAPI_ERROR The operation failed. +//! \retval ::NVAPI_INVALID_ARGUMENT One or more args passed in are invalid. +//! \retval ::NVAPI_API_NOT_INITIALIZED NvAPI was not yet initialized. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D9_QueryMaxSwapGroup(IDirect3DDevice9 *pDevice, + NvU32 *pMaxGroups, + NvU32 *pMaxBarriers); +#endif //if defined(_D3D9_H_) + +#if defined(_D3D9_H_) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D9_QuerySwapGroup +// +//! DESCRIPTION: This API queries the current SwapGroup and SwapBarrier that a SwapChain of a specific client device is bound to. +//! +//! \param [in] pDevice The caller provides the DirectX 9 device that is used as a swapgroup client +//! \param [in] pSwapChain The caller provides the IDirect3DSwapChain9 interface as a handle to the SwapChain +//! that belongs to the swapgroup client device +//! \param [out] pSwapGroup The caller provides the storage space where the current SwapGroup is stored. +//! \param [out] pSwapBarrier The caller provides the storage space where the current SwapBarrier is stored. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \retval ::NVAPI_OK the current SwapGroup and SwapBarrier has been stored +//! \retval ::NVAPI_ERROR The operation failed. +//! \retval ::NVAPI_INVALID_ARGUMENT One or more args passed in are invalid. +//! \retval ::NVAPI_API_NOT_INITIALIZED NvAPI was not yet initialized. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D9_QuerySwapGroup(IDirect3DDevice9 *pDevice, + IDirect3DSwapChain9 *pSwapChain, + NvU32 *pSwapGroup, + NvU32 *pSwapBarrier); +#endif //if defined(_D3D9_H_) + +#if defined(_D3D9_H_) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D9_JoinSwapGroup +// +//! DESCRIPTION: This API causes the SwapChain of a SwapGroup client to join or leave the specified SwapGroup. +//! +//! \param [in] pDevice The caller provides the DirectX 9 device that is used as a swapgroup client +//! \param [in] pSwapChain The caller provides the IDirect3DSwapChain9 interface as a handle to the SwapChain +//! that belongs to the swapgroup client device +//! \param [in] group The caller specifies the SwapGroup which the SwapChain should join. +//! - If the value of group is zero, the SwapChain leaves the SwapGroup. +//! - The SwapChain joins a SwapGroup if the SwapGroup number is a positive integer less than or +//! equal to the maximum number of SwapGroups queried by NvAPI_SwapGroup_QueryMaxSwapGroup. +//! \param [in] blocking The caller specifies that a presentation of this SwapChain should return immediately or block +//! until all members of the SwapGroup are ready and the presentation was actually executed. +//! A boolean value of false means the Present operation returns immediately and a value of true +//! means the Present operation is blocking. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \retval ::NVAPI_OK the SwapChain joined/left the SwapGroup accordingly +//! \retval ::NVAPI_ERROR The operation failed. +//! \retval ::NVAPI_INVALID_ARGUMENT One or more args passed in are invalid. +//! \retval ::NVAPI_API_NOT_INITIALIZED NvAPI was not yet initialized. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D9_JoinSwapGroup(IDirect3DDevice9 *pDevice, + IDirect3DSwapChain9 *pSwapChain, + NvU32 group, + BOOL blocking); +#endif //if defined(_D3D9_H_) + +#if defined(_D3D9_H_) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D9_BindSwapBarrier +// +//! DESCRIPTION: This API causes a SwapGroup to be bound to or released from the specified SwapBarrier. +//! +//! \param [in] pDevice The caller provides the DirectX 9 device that is used as a swapgroup client +//! \param [in] group The caller specifies the SwapGroup to be bound to the SwapBarrier. +//! \param [in] barrier The caller specifies the SwapBarrier that the SwapGroup should be bound to. +//! - If the value of barrier is zero, the SwapGroup will be released from the SwapBarrier. +//! - The SwapGroup will be bound to the SwapBarrier if the value of barrier is a positive +//! integer less than or equal to the maximum number of SwapBarriers queried by NvAPI_SwapGroup_QueryMaxSwapGroup. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \retval ::NVAPI_OK the SwapGroup is bound to or released from the specified SwapBarrier +//! \retval ::NVAPI_ERROR The operation failed. +//! \retval ::NVAPI_INVALID_ARGUMENT One or more args passed in are invalid. +//! \retval ::NVAPI_API_NOT_INITIALIZED NvAPI was not yet initialized. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D9_BindSwapBarrier(IDirect3DDevice9 *pDevice, + NvU32 group, + NvU32 barrier); +#endif //if defined(_D3D9_H_) + +//! \ingroup dx +typedef enum +{ + NVAPI_VSYNC_DEFAULT, //!< Fall back to the default settings + NVAPI_VSYNC_OFF, //!< Force vertical sync off when performance is more important than image quality and for benchmarking" + NVAPI_VSYNC_ON, //!< Force vertical sync on when image quality is more important than performance + NVAPI_VSYNC_ADAPTIVE, //!< Select adaptive to turn vertical sync on or off based on the frame rate. + //! Vertical sync will only be on for frame rates above the monitor refresh rate. + NVAPI_VSYNC_ADAPTIVE_HALF_REFRESH_RATE //!< + +} NVAPI_VSYNC_MODE; + + +#if defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) || defined(__d3d12_h__) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D_SetVerticalSyncMode +// +//! DESCRIPTION: This API set the vertical sync mode for the given device context. +//! +//! \param [in] pDevice The caller provides the device and can be either IDirect3DDevice9 or ID3D10Device or ID3D10Device1 or ID3D11Device. +//! \param [in] vsyncMode The caller specifies the NVAPI_VSYNC_MODE to be set. +//! +//! SUPPORTED OS: Do not use this function. It is not supported on Windows 10 and higher OS versions. +//! +//! +//! RETURN STATUS: This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D_SetVerticalSyncMode(__in IUnknown *pDevice, __in NVAPI_VSYNC_MODE vsyncMode); + +#endif //if defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) || defined(__d3d12_h__) + +#if defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) || defined(__d3d12_h__) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D1x_Present +// +//! DESCRIPTION: Presents the contents of the next buffer in the sequence of back buffers +//! owned by a D3D device. +//! This Present operation supports using a SwapGroup and SwapBarrier on the SwapChain +//! that owns the back buffer to be presented. +//! +//! NOTE: NvAPI_D3D1x_Present is a wrapper of the method IDXGISwapChain::Present which +//! additionally notifies the D3D driver of the SwapChain used by the runtime for +//! presentation, thus allowing the D3D driver to apply SwapGroup and SwapBarrier +//! functionality to that SwapChain. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] pDevice The D3D device interface that is used to issue the Present operation, +//! using the following IDirect3DDevice9::Present input parameters. +//! pDevice can be either ID3D10Device or ID3D10Device1 or ID3D11Device or ID3D12Device. +//! \param [in] pSwapChain The IDXGISwapChain interface that is intended to present +//! \param [in] SyncInterval An integer that specifies the how to synchronize presentation of a frame with the vertical blank. +//! Values are: +//! - 0: The presentation occurs immediately, there is no synchronization. +//! - 1,2,3,4 : Synchronize presentation after the n'th vertical blank. +//! \param [in] Flags An integer value that contains swap-chain presentation options as defined in DXGI_PRESENT. +//! +//! \retval ::NVAPI_OK the Present operation was successfully executed +//! \retval ::NVAPI_DEVICE_BUSY the Present operation failed with an error DXGI_ERROR_DEVICE_RESET or DXGI_ERROR_DEVICE_REMOVED, +// DXGI_STATUS_OCCLUDED, or D3DDDIERR_DEVICEREMOVED. +//! \retval ::NVAPI_ERROR the communication with the D3D driver failed, SwapGroup/SwapBarrier may not be possible. +//! \retval ::NVAPI_API_NOT_INITIALIZED NvAPI was not yet initialized. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D1x_Present(IUnknown *pDevice, + IDXGISwapChain *pSwapChain, + UINT SyncInterval, + UINT Flags); +#endif // defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) || defined(__d3d12_h__) + +#if defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) || defined(__d3d12_h__) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D1x_QueryFrameCount +// +//! DESCRIPTION: This API queries the universal framecounter of the Quadro-Sync master device. +//! +//! \param [in] pDevice The caller provides the D3D device that has access to the Quadro-Sync device, +//! pDevice can be either ID3D10Device or ID3D10Device1 or ID3D11Device or ID3D12Device. +//! \param [out] pFrameCount The caller provides the storage space where the framecount is stored. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \retval ::NVAPI_OK *pFrameCount populated with framecount value. +//! \retval ::NVAPI_ERROR The operation failed. +//! \retval ::NVAPI_INVALID_ARGUMENT One or more args passed in are invalid. +//! \retval ::NVAPI_API_NOT_INITIALIZED NvAPI was not yet initialized. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D1x_QueryFrameCount(IUnknown *pDevice, + NvU32 *pFrameCount); +#endif // defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) || defined(__d3d12_h__) + +#if defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) || defined(__d3d12_h__) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D1x_ResetFrameCount +// +//! DESCRIPTION: This API resets the universal framecounter on the Quadro-Sync master device. +//! +//! \param [in] pDevice The caller provides the D3D device that has access to the Quadro-Sync device, +//! pDevice can be either ID3D10Device or ID3D10Device1 or ID3D11Device or ID3D12Device. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \retval ::NVAPI_OK framecounter has been reset +//! \retval ::NVAPI_ERROR The operation failed. +//! \retval ::NVAPI_INVALID_ARGUMENT pDevice arg passed in is invalid. +//! \retval ::NVAPI_API_NOT_INITIALIZED NvAPI was not yet initialized. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D1x_ResetFrameCount(IUnknown *pDevice); +#endif // defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) || defined(__d3d12_h__) + +#if defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) || defined(__d3d12_h__) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D1x_QueryMaxSwapGroup +// +//! DESCRIPTION: This API queries the number of supported SwapGroups and SwapBarriers in the graphics system. +//! +//! \param [in] pDevice The caller provides the D3D device that is intended to use SwapGroup functionality. +//! pDevice can be either ID3D10Device or ID3D10Device1 or ID3D11Device or ID3D12Device. +//! \param [out] pMaxGroups The caller provides the storage space where the number of available SwapGroups is stored. +//! \param [out] pMaxBarriers The caller provides the storage space where the number of available SwapBarriers is stored. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \retval ::NVAPI_OK the number of SwapGroups and SwapBarriers has been stored +//! \retval ::NVAPI_ERROR The operation failed. +//! \retval ::NVAPI_INVALID_ARGUMENT One or more args passed in are invalid. +//! \retval ::NVAPI_API_NOT_INITIALIZED NvAPI was not yet initialized. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D1x_QueryMaxSwapGroup(IUnknown *pDevice, + NvU32 *pMaxGroups, + NvU32 *pMaxBarriers); +#endif // defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) || defined(__d3d12_h__) + +#if defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) || defined(__d3d12_h__) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D1x_QuerySwapGroup +// +//! DESCRIPTION: This API queries the current SwapGroup and SwapBarrier that a SwapChain of a specific client device is bound to. +//! +//! \param [in] pDevice The caller provides the D3D device that owns the SwapChain used as a SwapGroup client. +//! pDevice can be either ID3D10Device or ID3D10Device1 or ID3D11Device or ID3D12Device. +//! \param [in] pSwapChain The IDXGISwapChain interface that is used as the SwapGroup client. +//! +//! \param [out] pSwapGroup The caller provides the storage space where the current SwapGroup is stored. +//! \param [out] pSwapBarrier The caller provides the storage space where the current SwapBarrier is stored. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \retval ::NVAPI_OK the current SwapGroup and SwapBarrier has been stored +//! \retval ::NVAPI_ERROR The operation failed. +//! \retval ::NVAPI_INVALID_ARGUMENT One or more args passed in are invalid. +//! \retval ::NVAPI_API_NOT_INITIALIZED NvAPI was not yet initialized. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D1x_QuerySwapGroup(IUnknown *pDevice, + IDXGISwapChain *pSwapChain, + NvU32 *pSwapGroup, + NvU32 *pSwapBarrier); +#endif // defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) || defined(__d3d12_h__) + +#if defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) || defined(__d3d12_h__) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D1x_JoinSwapGroup +// +//! DESCRIPTION: This API causes the SwapChain of a SwapGroup client to join or leave the specified SwapGroup. +//! +//! \param [in] pDevice The caller provides the D3D device that owns the SwapChain used as a SwapGroup client. +//! pDevice can be either ID3D10Device or ID3D10Device1 or ID3D11Device or ID3D12Device. +//! \param [in] pSwapChain The IDXGISwapChain interface that is used as the SwapGroup client. +//! \param [in] group The caller specifies the SwapGroup which the SwapChain should join. +//! - If the value of group is zero, the SwapChain leaves the SwapGroup. +//! - The SwapChain joins a SwapGroup if the SwapGroup number is a positive integer less than or +//! equal to the maximum number of SwapGroups queried by NvAPI_SwapGroup_QueryMaxSwapGroup. +//! \param [in] blocking The caller specifies that a presentation of this SwapChain should return immediately or block +//! until all members of the SwapGroup are ready and the presentation was actually executed. +//! A boolean value of false means the Present operation returns immediately and a value of true +//! means the Present operation is blocking. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \retval ::NVAPI_OK the SwapChain joined/left the SwapGroup accordingly +//! \retval ::NVAPI_ERROR The operation failed. +//! \retval ::NVAPI_INVALID_ARGUMENT One or more args passed in are invalid. +//! \retval ::NVAPI_API_NOT_INITIALIZED NvAPI was not yet initialized. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D1x_JoinSwapGroup(IUnknown *pDevice, + IDXGISwapChain *pSwapChain, + NvU32 group, + BOOL blocking); +#endif // defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) || defined(__d3d12_h__) + +#if defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) || defined(__d3d12_h__) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D1x_BindSwapBarrier +// +//! DESCRIPTION: This API causes a SwapGroup to be bound to or released from the specified SwapBarrier. +//! +//! \param [in] pDevice The caller provides the D3D device that owns the SwapChain used as a SwapGroup client. +//! pDevice can be either ID3D10Device or ID3D10Device1 or ID3D11Device or ID3D12Device. +//! \param [in] group The caller specifies the SwapGroup to be bound to the SwapBarrier. +//! \param [in] barrier The caller specifies the SwapBarrier that the SwapGroup should be bound to. +//! - If the value of barrier is zero, the SwapGroup releases the SwapBarrier. +//! - The SwapGroup will be bound to the SwapBarrier if the value of barrier is a positive +//! integer less than or equal to the maximum number of SwapBarriers queried by NvAPI_D3D1x_QueryMaxSwapGroup. +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \retval ::NVAPI_OK the SwapGroup is bound to the specified SwapBarrier +//! \retval ::NVAPI_ERROR The operation failed. +//! \retval ::NVAPI_INVALID_ARGUMENT One or more args passed in are invalid. +//! \retval ::NVAPI_API_NOT_INITIALIZED NvAPI was not yet initialized. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D1x_BindSwapBarrier(IUnknown *pDevice, + NvU32 group, + NvU32 barrier); +#endif // defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) || defined(__d3d12_h__) + +//! SUPPORTED OS: Windows 10 and higher +//! +#if defined(__cplusplus) && defined(__d3d12_h__) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_QueryPresentBarrierSupport +// +//! DESCRIPTION: This API returns if presentBarrier feature is supported on the specified device. +//! +//! \since Release: 470 +//! +//! \param [in] pDevice The ID3D12Device device which owns the SwapChain as a PresentBarrier client. +//! \param [out] pSupported Pointer to a boolean returning true if supported, false otherwise. +//! +//! \return ::NVAPI_OK the call succeeded +//! \return ::NVAPI_ERROR the call failed +//! \return ::NVAPI_NO_IMPLEMENTATION the API is not implemented +//! \return ::NVAPI_INVALID_POINTER an invalid pointer was passed as an argument +//! \retval ::NVAPI_API_NOT_INITIALIZED NvAPI not initialized +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_QueryPresentBarrierSupport(__in ID3D12Device *pDevice, __out bool *pSupported); +#endif // defined(__cplusplus) && defined(__d3d12_h__) + +//! SUPPORTED OS: Windows 10 and higher +//! +#if defined(__cplusplus) && defined(__d3d12_h__) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_CreatePresentBarrierClient +// +//! DESCRIPTION: This API returns an NvPresentBarrierClientHandle handle, which +//! owns the swapchain to be synchronized through PresentBarrier. +//! This handle is used in other PresentBarrier functions. +//! +//! \since Release: 470 +//! +//! \param [in] pDevice The ID3D12Device device which owns the SwapChain as a PresentBarrier client. +//! \param [in] pSwapChain The IDXGISwapChain interface that presentBarrier is operated on. +//! \param [OUT] pPresentBarrierClient Pointer to an NvPresentBarrierClientHandle handle created by the driver +//! on success. +//! +//! \return ::NVAPI_OK the call succeeded +//! \return ::NVAPI_ERROR the call failed +//! \return ::NVAPI_INVALID_POINTER an invalid pointer was passed as an argument +//! \return ::NVAPI_NO_IMPLEMENTATION the API is not implemented +//! \retval ::NVAPI_API_NOT_INITIALIZED NvAPI not initialized +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_CreatePresentBarrierClient(__in ID3D12Device *pDevice, + __in IDXGISwapChain *pSwapChain, + __out NvPresentBarrierClientHandle *pPresentBarrierClient); +#endif // defined(__cplusplus) && defined(__d3d12_h__) + +//! SUPPORTED OS: Windows 10 and higher +//! +#if defined(__cplusplus) && defined(__d3d12_h__) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_RegisterPresentBarrierResources +// +//! DESCRIPTION: This API registers scanout resources of a presentBarrier client +//! to the presentBarrier, and a fence object which is used for +//! presentBarrier synchronization. Once the registration has completed +//! successfully, it is not allowed to add additional resources, i.e. the +//! number of back buffers and fence object are not allowed to be +//! changed. However, application must call this function whenever the +//! back buffers are changed, e.g. ResizeBuffers() is called. +//! +//! \since Release: 470 +//! +//! \param [in] presentBarrierClient The NvPresentBarrierClientHandle client handle that owns the resources. +//! \param [in] pFence An ID3D12Fence object created by the application and used for present +//! synchronization through presentBarrier. Application must wait on this +//! fence to ensure the scanout resources are ready for use in the next +//! rendering loop. The fence is only signaled by the driver and must not +//! be signaled through any other queue command. The fence value must be +//! monotonically increasing on every present call, and tracked by the +//! application. +//! \param [in] ppResources An array of ID3D12Resource to be synchronized through presentBarrier, and +//! the size is specified by numResources. +//! \param [in] numResources The number of ID3D12Resource elements in ppResources. +//! +//! \return ::NVAPI_OK the call succeeded +//! \return ::NVAPI_ERROR the call failed +//! \return ::NVAPI_NO_IMPLEMENTATION the API is not implemented +//! \return ::NVAPI_INVALID_POINTER an invalid pointer was passed as an argument +//! \return ::NVAPI_INVALID_HANDLE an invalid NvPresentBarrierClientHandle was passed as an argument +//! \return ::NVAPI_INVALID_ARGUMENT an invalid number of resources was passed as an argument +//! \retval ::NVAPI_API_NOT_INITIALIZED NvAPI not initialized +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_RegisterPresentBarrierResources(__in NvPresentBarrierClientHandle presentBarrierClient, + __in ID3D12Fence *pFence, + __in ID3D12Resource **ppResources, + __in NvU32 numResources); +#endif // defined(__cplusplus) && defined(__d3d12_h__) + +//! SUPPORTED OS: Windows 10 and higher +//! +#if defined(__cplusplus) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DestroyPresentBarrierClient +// +//! DESCRIPTION: This API destroys a presentBarrier client, and must be called +//! after client leaves presentBarrier to avoid memory leak. +//! +//! \since Release: 470 +//! +//! \param [in] presentBarrierClient An NvPresentBarrierClientHandle handle created by NvAPI_xxxx_CreatedPresentBarrierClient +//! +//! \return ::NVAPI_OK the call succeeded +//! \return ::NVAPI_INVALID_HANDLE an invalid NvPresentBarrierClientHandle was passed as an argument +//! \return ::NVAPI_NO_IMPLEMENTATION the API is not implemented +//! \retval ::NVAPI_API_NOT_INITIALIZED NvAPI not initialized +//! +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DestroyPresentBarrierClient(__in NvPresentBarrierClientHandle presentBarrierClient); +#endif // defined(__cplusplus) + +//! SUPPORTED OS: Windows 10 and higher +//! +#if defined(__cplusplus) +typedef struct _NV_JOIN_PRESENT_BARRIER_PARAMS +{ + NvU32 dwVersion; //!< Must be NV_JOIN_PRESENT_BARRIER_PARAMS_VER1 +} NV_JOIN_PRESENT_BARRIER_PARAMS; + +//! Macro for constructing the version field of ::NV_JOIN_PRESENT_BARRIER_PARAMS +#define NV_JOIN_PRESENT_BARRIER_PARAMS_VER1 MAKE_NVAPI_VERSION(NV_JOIN_PRESENT_BARRIER_PARAMS, 1) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_JoinPresentBarrier +// +//! DESCRIPTION: This API adds a registered PresentBarrier client to the presentBarrier. +//! If the call suceeds, image present of the registered scanout resources +//! from this client is under the synchronization of presentBarrier. +//! +//! \since Release: 470 +//! +//! \param [in] presentBarrierClient An NvPresentBarrierClientHandle handle created by NvAPI_xxxx_CreatedPresentBarrierClient +//! \param [in] pParams Parameters to joining presentBarrier. +//! +//! \retval ::NVAPI_OK the call succeeded +//! \retval ::NVAPI_ERROR the call failed +//! \retval ::NVAPI_NO_IMPLEMENTATION the interface is not implemented +//! \retval ::NVAPI_INCOMPATIBLE_STRUCT_VERSION the version of data structure is not correct +//! \retval ::NVAPI_INVALID_HANDLE an invalid NvPresentBarrierClientHandle was passed as an argument +//! \retval ::NVAPI_INVALID_POINTER an invalid pointer was passed as an argument (probably NULL) +//! \retval ::NVAPI_API_NOT_INITIALIZED NvAPI not initialized +//! +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_JoinPresentBarrier(__in NvPresentBarrierClientHandle presentBarrierClient, __in NV_JOIN_PRESENT_BARRIER_PARAMS *pParams); +#endif // defined(__cplusplus) + +//! SUPPORTED OS: Windows 10 and higher +//! +#if defined(__cplusplus) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_LeavePresentBarrier +// +//! DESCRIPTION: This API removes a registered client from presentBarrier. If this +//! client does not join presentBarrier, this function does nothing. +//! +//! \since Release: 470 +//! +//! \param [in] presentBarrierClient An NvPresentBarrierClientHandle handle created by NvAPI_xxxxx_CreatePresentBarrierClient. +//! +//! \retval ::NVAPI_OK the call succeeded +//! \retval ::NVAPI_ERROR the call failed +//! \retval ::NVAPI_NO_IMPLEMENTATION the interface is not implemented +//! \retval ::NVAPI_INVALID_HANDLE an invalid NvPresentBarrierClientHandle was passed as an argument +//! \retval ::NVAPI_API_NOT_INITIALIZED NvAPI not initialized +//! +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_LeavePresentBarrier(__in NvPresentBarrierClientHandle presentBarrierClient); +#endif // defined(__cplusplus) + +//! SUPPORTED OS: Windows 10 and higher +//! +#if defined(__cplusplus) + +#define NV_PRESENT_BARRIER_FRAME_STATICS_VER1 MAKE_NVAPI_VERSION(NV_PRESENT_BARRIER_FRAME_STATISTICS,1) + +typedef enum _NV_PRESENT_BARRIER_SYNC_MODE +{ + PRESENT_BARRIER_NOT_JOINED = 0x00000000, //!< The client hasn't joined presentBarrier + PRESENT_BARRIER_SYNC_CLIENT = 0x00000001, //!< The client joined the presentBarrier, but is not synchronized with + //! any other presentBarrier clients. This happens if the back buffers + //! of this client are composited instead of being flipped out to screen + PRESENT_BARRIER_SYNC_SYSTEM = 0x00000002, //!< The client joined the presentBarrier, and is synchronized with other + //! presentBarrier clients within the system + PRESENT_BARRIER_SYNC_CLUSTER = 0x00000003, //!< The client joined the presentBarrier, and is synchronized with other + //! clients within the system and across systems through QSync devices +} NV_PRESENT_BARRIER_SYNC_MODE; + +typedef struct _NV_PRESENT_BARRIER_FRAME_STATISTICS +{ + NvU32 dwVersion; //!< Must be NV_PRESENT_BARRIER_FRAME_STATICS_VER1 + NV_PRESENT_BARRIER_SYNC_MODE SyncMode; //!< The presentBarrier mode of this client from last present call + NvU32 PresentCount; //!< The total count of times that a frame has been presented from this + //! client after it joined presentBarrier successfully. + NvU32 PresentInSyncCount; //!< The total count of times that a frame has been presented from this + //! client and that has happened since the returned SyncMode is + //! PRESENT_BARRIER_SYNC_SYSTEM or PRESENT_BARRIER_SYNC_CLUSTER. + //! If the returned SyncMode is any other mode, this value is 0. + //! This count is set back to 0 in case the SyncMode switches away from + //! PRESENT_BARRIER_SYNC_SYSTEM or PRESENT_BARRIER_SYNC_CLUSTER. + NvU32 FlipInSyncCount; //!< The total count of flips from this client since the returned SyncMode + //! is PRESENT_BARRIER_SYNC_SYSTEM or PRESENT_BARRIER_SYNC_CLUSTER. + //! If the returned SyncMode is any other mode, this value is 0. + //! This count is set back to 0 in case the SyncMode switches away from + //! PRESENT_BARRIER_SYNC_SYSTEM or PRESENT_BARRIER_SYNC_CLUSTER. + NvU32 RefreshCount; //!< The total count of v-blanks since the returned SyncMode of this client + //! is PRESENT_BARRIER_SYNC_SYSTEM or PRESENT_BARRIER_SYNC_CLUSTER. + //! If the returned SyncMode is any other mode, this value is 0. + //! This count is set back to 0 in case the SyncMode switches away from + //! PRESENT_BARRIER_SYNC_SYSTEM or PRESENT_BARRIER_SYNC_CLUSTER. +} NV_PRESENT_BARRIER_FRAME_STATISTICS; + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_QueryPresentBarrierFrameStatistics +// +//! DESCRIPTION: This API returns the presentBarrier frame statistics of last +//! present call from this client. If the client did not join +//! presentBarrier, the SyncMode is returned as PRESENT_BARRIER_NOT_JOINED, +//! and all other fields are reset. Driver does not retain any +//! presentBarrier info of the client once it leaves presentBarrier. +//! +//! \since Release: 470 +//! +//! \param [in] presentBarrierClient An NvPresentBarrierClientHandle handle created by NvAPI_xxxxx_CreatePresentBarrierClient. +//! \param [out] pFrameStats Pointer to NV_PRESENT_BARRIER_FRAME_STATISTICS structure about presentBarrier statistics. +//! +//! \retval ::NVAPI_OK the call succeeded +//! \retval ::NVAPI_ERROR the call failed +//! \retval ::NVAPI_NO_IMPLEMENTATION the interface is not implemented +//! \retval ::NVAPI_INVALID_HANDLE an invalid NvPresentBarrierClientHandle was passed as an argument +//! \retval ::NVAPI_INVALID_POINTER an invalid pointer was passed as an argument (probably NULL) +//! \retval ::NVAPI_INCOMPATIBLE_STRUCT_VERSION invalid version of frameStatistics params +//! \retval ::NVAPI_API_NOT_INITIALIZED NvAPI not initialized +//! +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_QueryPresentBarrierFrameStatistics(__in NvPresentBarrierClientHandle presentBarrierClient, + __out NV_PRESENT_BARRIER_FRAME_STATISTICS *pFrameStats); +#endif // defined(__cplusplus) + +//! SUPPORTED OS: Windows 10 and higher +//! +#if defined(__cplusplus) && defined(__d3d12_h__) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_CreateDDisplayPresentBarrierClient +// +//! DESCRIPTION: This API returns an NvPresentBarrierClientHandle handle. +//! +//! \since Release: 510 +//! +//! \param [in] pDevice The ID3D12Device device which executes the rendering commands of this PresentBarrier +//! client. It must be created on the same adapter as DisplayDevice. +//! \param [in] sourceId The adapter-relative identifier for the DisplaySource obtained from DisplaySource.SourceId(). +//! \param [OUT] pPresentBarrierClient Pointer to an NvPresentBarrierClientHandle handle created by the driver on success. +//! +//! \return ::NVAPI_OK the call succeeded +//! \return ::NVAPI_ERROR the call failed +//! \return ::NVAPI_INVALID_POINTER an invalid pointer was passed as an argument +//! \return ::NVAPI_INVALID_HANDLE the input displaySource handle is not owned by the process +//! \return ::NVAPI_NOT_SUPPORTED PresentBarrier featue is not supported on this configuration +//! \return ::NVAPI_NO_IMPLEMENTATION the API is not implemented +//! \retval ::NVAPI_API_NOT_INITIALIZED NvAPI not initialized +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_CreateDDisplayPresentBarrierClient(__in ID3D12Device *pDevice, __in NvU32 sourceId, __out NvPresentBarrierClientHandle *pPresentBarrierClient); +#endif // defined(__cplusplus) && defined(__d3d12_h__) + +//! SUPPORTED OS: Windows 10 and higher +//! +#if defined (__cplusplus) && (defined(__d3d11_h__) || defined(__d3d11_1_h__) || defined(__d3d12_h__)) + +enum NVAPI_QUAD_FILLMODE +{ + NVAPI_QUAD_FILLMODE_DISABLED = 0, + NVAPI_QUAD_FILLMODE_BBOX = 1, + NVAPI_QUAD_FILLMODE_FULL_VIEWPORT = 2, +}; + +#endif //defined(__cplusplus) && (defined(__d3d11_h__) || defined(__d3d11_1_h__) || defined(__d3d12_h__)) + +//! SUPPORTED OS: Windows 10 and higher +//! +#if defined (__cplusplus) && (defined(__d3d11_h__) || defined(__d3d11_1_h__)) + +typedef struct NvAPI_D3D11_RASTERIZER_DESC_EX +{ + // D3D11_RASTERIZER_DESC member variables + D3D11_FILL_MODE FillMode; + D3D11_CULL_MODE CullMode; + BOOL FrontCounterClockwise; + INT DepthBias; + FLOAT DepthBiasClamp; + FLOAT SlopeScaledDepthBias; + BOOL DepthClipEnable; + BOOL ScissorEnable; + BOOL MultisampleEnable; + BOOL AntialiasedLineEnable; + + // NvAPI_D3D11_RASTERIZER_DESC_EX specific member variables + NvU32 ForcedSampleCount; //1 it needs to match N, in non-TIR it needs to match RT sample count. Ignored if ForcePerSampleInterlock is set + NvU8 SamplePositionsX[16]; // 1 && (pDesc->MiscFlags&D3D11_RESOURCE_MISC_TILED) +//! \param [in] pInitialData A pointer to an array of D3D11_SUBRESOURCE_DATA structures that describe subresources for the 2D texture resource. +//! \param [out] ppTexture2D A pointer to a buffer that receives a pointer to a ID3D11Texture2D interface for the created texture. + +//! +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. If there are return error codes with specific +//! meaning for this API, they are listed below. +//! +//! \endcode +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D11_CreateTiledTexture2DArray(__in ID3D11Device *pDevice, + __in const D3D11_TEXTURE2D_DESC *pDesc, + __in const D3D11_SUBRESOURCE_DATA *pInitialData, + __out ID3D11Texture2D **ppTexture2D); + +#endif //defined(__cplusplus) && defined(__d3d11_2_h__) + +//! SUPPORTED OS: Windows 10 and higher +//! + +#if defined (__cplusplus) && defined(__d3d11_h__) + +typedef enum _NV_D3D11_FEATURE +{ + NV_D3D11_FEATURE_RASTERIZER, +} NV_D3D11_FEATURE; + +typedef struct _NV_D3D11_FEATURE_DATA_RASTERIZER_SUPPORT +{ + BOOL TargetIndependentRasterWithDepth; + BOOL ProgrammableSamplePositions; + BOOL InterleavedSampling; + BOOL ConservativeRaster; + BOOL PostZCoverage; + BOOL CoverageToColor; +} NV_D3D11_FEATURE_DATA_RASTERIZER_SUPPORT; + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_CheckFeatureSupport +// +//! DESCRIPTION: This function gets information about the features that are supported by the current graphics driver. +//! +//! +//! +//! \param [in] pDevice The device on which to query for support. +//! \param [in] Feature A member of the NvAPI_D3D11_FEATURE enumerated type that describes which feature to query for suppor. +//! \param [in] pFeatureSupportData Upon completion of the method, the passed structure is filled with data that describes the feature support. +//! \param [out] FeatureSupportDataSize The size of the structure passed to the pFeatureSupportData parameter. +//! +//! \since Release: 410 +//! +//! RETURN STATUS: This API can return any of the error codes enumerated in #NvAPI_Status. +//! \retval :: Returns NVAPI_OK if successful; returns NVAPI_INVALID_ARGUMENT if an unsupported data type is passed to the pFeatureSupportData parameter +//! or a size mismatch is detected for the FeatureSupportDataSize parameter; +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D11_CheckFeatureSupport(__in ID3D11Device *pDevice, + __in NV_D3D11_FEATURE Feature, + __out void *pFeatureSupportData, + __in UINT FeatureSupportDataSize); + +#endif //defined(__cplusplus) && defined(__d3d11_h__) + +//! SUPPORTED OS: Windows 10 and higher +//! + +#if defined (__cplusplus) && defined(__d3d11_h__) + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_CreateImplicitMSAATexture2D +// +//! \since Release: 410 +// +//! \code +//! DESCRIPTION: NvAPI_D3D11_CreateImplicitMSAATexture2D is a simple wrapper of ID3D11Device::CreateTexture2D +//! which allows to create multisampled 2D texture that is exposed to DX runtime as non-multisampled texture. +//! +//! \param [in] pDevice Current d3d device +//! \param [in] pDesc A pointer to a D3D11_TEXTURE2D_DESC structure that describes a 2D texture resource. +//! To create a typeless resource that can be interpreted at runtime into different, +//! compatible formats, specify a typeless format in the texture description. +//! To generatemipmap levels automatically, set the number of mipmap levels to 0. +//! SampleDesc.SampleCount specifies actual resource sample count, while D3D runtime object +//! sees resource as non-multisampled. +//! +//! \param [out] ppTexture2D A pointer to a buffer that receives a pointer to a ID3D11Texture2D interface for the +//! created texture. +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. See MSDN for the API specific error codes. +//! +//! \endcode +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D11_CreateImplicitMSAATexture2D(__in ID3D11Device *pDevice, + __in const D3D11_TEXTURE2D_DESC *pDesc, + __out ID3D11Texture2D **ppTexture2D); + +#endif //defined(__cplusplus) && defined(__d3d11_h__) + +//! SUPPORTED OS: Windows 10 and higher +//! + +#if defined (__cplusplus) && defined(__d3d12_h__) + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_CreateImplicitMSAATexture2D +// +//! \since Release: 410 +// +//! \code +//! DESCRIPTION: NvAPI_D3D12_CreateCommittedImplicitMSAATexture2D is a simple wrapper of ID3D12Device::CreateCommittedResource +//! which allows to create multisampled 2D texture that is exposed to DX runtime as non-multisampled texture. +//! +//! \param [in] pDevice Current d3d device +//! \param [in] pDesc A pointer to a D3D12_RESOURCE_DESC structure that describes a 2D texture resource. +//! To create a typeless resource that can be interpreted at runtime into different, +//! compatible formats, specify a typeless format in the texture description. +//! To generatemipmap levels automatically, set the number of mipmap levels to 0. +//! SampleDesc.SampleCount specifies actual resource sample count, while D3D runtime object +//! sees resource as non-multisampled. +//! \param [in] pHeapProperties, HeapFlags, InitialResourceState, pOptimizedClearValue, riidResource See D3D12 docs +//! +//! \param [out] ppResource Same ID3D12Device::CreateCommittedResource +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. See MSDN for the API specific error codes. +//! +//! \endcode +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_CreateCommittedImplicitMSAATexture2D( + __in ID3D12Device* pDevice, + __in const D3D12_HEAP_PROPERTIES *pHeapProperties, + D3D12_HEAP_FLAGS HeapFlags, + __in const D3D12_RESOURCE_DESC *pDesc, + D3D12_RESOURCE_STATES InitialResourceState, + __in_opt const D3D12_CLEAR_VALUE *pOptimizedClearValue, + REFIID riidResource, + __out void **ppvResource); + +#endif //defined(__cplusplus) && defined(__d3d12_h__) + + +//! SUPPORTED OS: Windows 10 and higher +//! + +//! \ingroup dx +//! Valid modes for NvAPI_D3D11_ResolveSubresourceRegion() and NvAPI_D3D12_ResolveSubresourceRegion +typedef enum _NV_RESOLVE_MODE { + NV_RESOLVE_MODE_SAMPLE_0, +} NV_RESOLVE_MODE; + +#if defined (__cplusplus) && defined(__d3d11_h__) + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_ResolveSubresourceRegion +// +//! \since Release: 410 +// +//! \code +//! DESCRIPTION: NvAPI_D3D11_ResolveSubresourceRegion is D3D11 an analog of D3D12 ResolveSubresourceRegion. +//! +//! \param [in] pDstResource Destination resource. Must be a created with the D3D11_USAGE_DEFAULT flag and be single-sampled. +//! \param [in] DstSubresource A zero-based index, that identifies the destination subresource. Use D3D11CalcSubresource to calculate the index. +//! \param [in] DstX The X coordinate of the left-most edge of the destination region. +//! The width of the destination region is the same as the width of the source rect. +//! +//! \param [in] DstY The Y coordinate of the top-most edge of the destination region. +//! The height of the destination region is the same as the height of the source rect. +//! +//! \param [in] pSrcResource Source resource. Must be multisampled. +//! \param [in] SrcSubresource The source subresource of the source resource. +//! \param [in] pSrcRect Specifies the rectangular region of the source resource to be resolved. +//! Passing NULL for pSrcRect specifies that the entire subresource is to be resolved. +//! +//! \param [in] Format A DXGI_FORMAT that indicates how the multisampled resource will be resolved to a single-sampled resource. +//! \param [in] ResolveMode Specifies the operation used to resolve the source samples. NV_RESOLVE_MODE_SAMPLE_0 is the only supported mode. +//! NV_RESOLVE_MODE_SAMPLE_0 outputs sample 0 and discards all other samples. +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. See MSDN for the API specific error codes. +//! +//! \endcode +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D11_ResolveSubresourceRegion( + __in ID3D11Device *pDevice, + __in ID3D11Texture2D *pDstResource, + __in UINT DstSubresource, + __in UINT DstX, + __in UINT DstY, + __in ID3D11Texture2D *pSrcResource, + __in UINT SrcSubresource, + __in_opt const RECT *pSrcRect, + __in DXGI_FORMAT Format, + __in NV_RESOLVE_MODE ResolveMode); + +#endif //defined(__cplusplus) && defined(__d3d11_h__) + +//! SUPPORTED OS: Windows 10 and higher +//! + +#if defined (__cplusplus) && defined(__d3d12_h__) + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_ResolveSubresourceRegion +// +//! \since Release: 410 +// +//! \code +//! DESCRIPTION: NvAPI_D3D12_ResolveSubresourceRegion is D3D11 an analog of D3D12 ResolveSubresourceRegion. +//! +//! \param [in] pDstResource Destination resource. Must be a created with the D3D11_USAGE_DEFAULT flag and be single-sampled. +//! \param [in] DstSubresource A zero-based index, that identifies the destination subresource. Use D3D11CalcSubresource to calculate the index. +//! \param [in] DstX The X coordinate of the left-most edge of the destination region. +//! The width of the destination region is the same as the width of the source rect. +//! +//! \param [in] DstY The Y coordinate of the top-most edge of the destination region. +//! The height of the destination region is the same as the height of the source rect. +//! +//! \param [in] pSrcResource Source resource. Must be multisampled. +//! \param [in] SrcSubresource The source subresource of the source resource. +//! \param [in] pSrcRect Specifies the rectangular region of the source resource to be resolved. +//! Passing NULL for pSrcRect specifies that the entire subresource is to be resolved. +//! +//! \param [in] Format A DXGI_FORMAT that indicates how the multisampled resource will be resolved to a single-sampled resource. +//! \param [in] ResolveMode Specifies the operation used to resolve the source samples. NV_RESOLVE_MODE_SAMPLE_0 is the only supported mode. +//! NV_RESOLVE_MODE_SAMPLE_0 outputs sample 0 and discards all other samples. +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. See MSDN for the API specific error codes. +//! +//! \endcode +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_ResolveSubresourceRegion( + __in ID3D12GraphicsCommandList1*pCommandList, + __in ID3D12Resource *pDstResource, + __in UINT DstSubresource, + __in UINT DstX, + __in UINT DstY, + __in ID3D12Resource *pSrcResource, + __in UINT SrcSubresource, + __in_opt RECT *pSrcRect, + __in DXGI_FORMAT Format, + __in NV_RESOLVE_MODE ResolveMode); + +#endif //defined(__cplusplus) && defined(__d3d12_h__) + +//! SUPPORTED OS: Windows 10 and higher +//! + +#if defined (__cplusplus) && defined(__d3d11_2_h__) + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_TiledTexture2DArrayGetDesc +// +//! \since Release: 375 +// +//! \code +//! DESCRIPTION: NvAPI_D3D11_TiledTexture2DArrayGetDesc is an simple wrapper of ID3D11Texture2D::GetDesc +//! when pTiledTexture2DArray is created with NvAPI_D3D11_CreateTiledTexture2DArray. +//! Runtime doesn't know the created resource is actually a tiled resource. +//! So calling ID3D11Texture2D::GetDesc will get a desc without D3D11_RESOURCE_MISC_TILED in MiscFlags. +//! This wrapper API just adds D3D11_RESOURCE_MISC_TILED back. +//! +//! \param [in] pTiledTexture2DArray Pointer of tiled texture2D array to get resource desc from. +//! \param [out] pDesc Pointer to a resource description. + +//! +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. If there are return error codes with specific +//! meaning for this API, they are listed below. +//! +//! \endcode +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D11_TiledTexture2DArrayGetDesc(__in ID3D11Texture2D *pTiledTexture2DArray, + __out D3D11_TEXTURE2D_DESC *pDesc); + +#endif //defined(__cplusplus) && defined(__d3d11_2_h__) + +//! SUPPORTED OS: Windows 10 and higher +//! + +#if defined (__cplusplus) && defined(__d3d11_2_h__) + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_UpdateTileMappings +// +//! \since Release: 375 +// +//! \code +//! DESCRIPTION: NvAPI_D3D11_UpdateTileMappings is an extension of ID3D11DeviceContext2::UpdateTileMappings. +//! It allows pTiledResource to be a resource created with NvAPI_D3D11_CreateTiledTexture2DArray, and should be used only in such case. +//! +//! \param [in] pDeviceContext Must be Immediate DeviceContext. +//! \param [in] pTiledResource A pointer to the tiled texture 2D array resource created by NvAPI_D3D11_CreateTiledTexture2DArray. +//! \param [in] NumTiledResourceRegions The number of tiled resource regions. +//! \param [in] pTiledResourceRegionStartCoordinates An array of D3D11_TILED_RESOURCE_COORDINATE structures that describe the starting coordinates of the tiled resource regions. Cannot be NULL. +//! \param [in] pTiledResourceRegionSizes An array of D3D11_TILE_REGION_SIZE structures that describe the sizes of the tiled resource regions. Cannot be NULL. +//! \param [in] pTilePool A pointer to the tile pool. This resource should be created by standard API. +//! \param [in] NumRanges The number of tile-pool ranges. +//! \param [in] pRangeFlags An array of D3D11_TILE_RANGE_FLAG values that describe each tile-pool range. +//! \param [in] pTilePoolStartOffsets An array of offsets into the tile pool. These are 0-based tile offsets, counting in tiles (not bytes). +//! \param [in] pRangeTileCounts An array of values that specify the number of tiles in each tile-pool range. +//! \param [in] Flags A combination of D3D11_TILE_MAPPING_FLAGS values that are combined by using a bitwise OR operation. + +//! +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. If there are return error codes with specific +//! meaning for this API, they are listed below. +//! +//! \endcode +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D11_UpdateTileMappings( + __in ID3D11DeviceContext2 *pDeviceContext, + __in ID3D11Resource *pTiledResource, + __in UINT NumTiledResourceRegions, + __in const D3D11_TILED_RESOURCE_COORDINATE *pTiledResourceRegionStartCoordinates, + __in const D3D11_TILE_REGION_SIZE *pTiledResourceRegionSizes, + __in ID3D11Buffer *pTilePool, + __in UINT NumRanges, + __in const UINT *pRangeFlags, + __in const UINT *pTilePoolStartOffsets, + __in const UINT *pRangeTileCounts, + __in UINT Flags); + +#endif //defined(__cplusplus) && defined(__d3d11_2_h__) + +//! SUPPORTED OS: Windows 10 and higher +//! + +#if defined (__cplusplus) && defined(__d3d11_2_h__) + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_CopyTileMappings +// +//! \since Release: 375 +// +//! \code +//! DESCRIPTION: NvAPI_D3D11_CopyTileMappings is an extension of ID3D11DeviceContext2::CopyTileMappings +//! It allows pDestTiledResource or pSourceTiledResource or both to be created with NvAPI_D3D11_CreateTiledTexture2DArray. +//! It should be used only in such case. +//! +//! \param [in] pDeviceContext Must be Immediate DeviceContext. +//! \param [in] pDestTiledResource Tiled resource created by NvAPI_D3D11_CreateTiledTexture2DArray to copy tile mappings into. +//! \param [in] pDestRegionStartCoordinate A pointer to a D3D11_TILED_RESOURCE_COORDINATE structure that describes the starting coordinates of the destination tiled resource. +//! \param [in] pSourceTiledResource Tiled resource created by NvAPI_D3D11_CreateTiledTexture2DArray to copy tile mappings from. +//! \param [in] pSourceRegionStartCoordinate A pointer to a D3D11_TILED_RESOURCE_COORDINATE structure that describes the starting coordinates of the source tiled resource. +//! \param [in] pTileRegionSize A pointer to a D3D11_TILE_REGION_SIZE structure that describes the size of the tiled region. +//! \param [in] Flags A combination of D3D11_TILE_MAPPING_FLAGS values that are combined by using a bitwise OR operation. + +//! +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. If there are return error codes with specific +//! meaning for this API, they are listed below. +//! +//! \endcode +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D11_CopyTileMappings( + __in ID3D11DeviceContext *pDeviceContext, + __in ID3D11Resource *pDestTiledResource, + __in const D3D11_TILED_RESOURCE_COORDINATE *pDestRegionStartCoordinate, + __in ID3D11Resource *pSourceTiledResource, + __in const D3D11_TILED_RESOURCE_COORDINATE *pSourceRegionStartCoordinate, + __in const D3D11_TILE_REGION_SIZE *pTileRegionSize, + __in UINT Flags); + +#endif //defined(__cplusplus) && defined(__d3d11_2_h__) + +//! SUPPORTED OS: Windows 10 and higher +//! + +#if defined (__cplusplus) && defined(__d3d11_2_h__) + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_TiledResourceBarrier +// +//! \since Release: 375 +// +//! \code +//! DESCRIPTION: NvAPI_D3D11_TiledResourceBarrier is an extension of ID3D11DeviceContext2::TiledResourceBarrier, but only works on ID3D11Resource(no support for ID3D11View). +//! If pTiledResourceAccessBeforeBarrier or pTiledResourceAccessAfterBarrier or both are created by NvAPI_D3D11_CreateTiledTexture2DArray, +//! NvAPI_D3D11_TiledResourceBarrier must be used instead of ID3D11DeviceContext2::TiledResourceBarrier. +//! +//! \param [in] pDeviceContext Must be Immediate DeviceContext. +//! \param [in] pTiledResourceAccessBeforeBarrier Access operations on this resource must complete before the access operations on the object that pTiledResourceAccessAfterBarrier specifies. +//! \param [in] pTiledResourceAccessAfterBarrier Access operations on this resource must begin after the access operations on the object that pTiledResourceAccessBeforeBarrier specifies. + +//! +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. If there are return error codes with specific +//! meaning for this API, they are listed below. +//! +//! \endcode +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D11_TiledResourceBarrier( + __in ID3D11DeviceContext *pDeviceContext, + __in ID3D11Resource *pTiledResourceAccessBeforeBarrier, + __in ID3D11Resource *pTiledResourceAccessAfterBarrier); + +#endif //defined(__cplusplus) && defined(__d3d11_2_h__) + +//! SUPPORTED OS: Windows 10 and higher +//! + +#if defined (__cplusplus) && (defined(__d3d11_h__) || defined(__d3d11_1_h__)) + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_AliasMSAATexture2DAsNonMSAA +// +//! \code +//! DESCRIPTION: This function allows creating (aliasing) a non-MSAA Texture2D object using the same memory as the given multi-sampled +//! texture (pInputTex). The surface created would be bloated in width and height but it will have SampleCount = 1 +//! For 2X MSAA: OutTex.Width = InputTex.Width * 2, outTex.Height = InputTex.Height +//! For 4X MSAA: OutTex.Width = InputTex.Width * 2, outTex.Height = InputTex.Height * 2 +//! For 8X MSAA: OutTex.Width = InputTex.Width * 4, outTex.Height = InputTex.Height * 2 +//! Only textures SampleQuality = 0 can be aliased as Non MSAA +//! The app should ensure that original texture is released only after the aliased copy is released. +//! +//! This function is free-threaded create compatible i.e. it can be called from a different thread +//! than the one calling immediate device setstate functions. +//! +//! \param [in] pDevice current d3d device +//! \param [in] pInputTex The MultiSampled Texture2D resource that is being aliased +//! \param [out] ppOutTex The aliased non AA copy MultiSampled Texture2D resource +//! +//! +//! \return :: NVAPI_OK if the call succeeds. +//! \endcode +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D11_AliasMSAATexture2DAsNonMSAA(__in ID3D11Device *pDevice, + __in ID3D11Texture2D *pInputTex, + __out ID3D11Texture2D **ppOutTex); + +#endif //defined(__cplusplus) && defined(__d3d11_h__) + +#if defined (__cplusplus) && (defined(__d3d11_h__) || defined(__d3d12_h__)) && (!defined(CINTERFACE)) +typedef UINT NvAPI_D3D11_SWIZZLE_MODE; + +typedef enum _NV_SWIZZLE_MODE +{ + NV_SWIZZLE_POS_X = 0, + NV_SWIZZLE_NEG_X = 1, + NV_SWIZZLE_POS_Y = 2, + NV_SWIZZLE_NEG_Y = 3, + NV_SWIZZLE_POS_Z = 4, + NV_SWIZZLE_NEG_Z = 5, + NV_SWIZZLE_POS_W = 6, + NV_SWIZZLE_NEG_W = 7 +}NV_SWIZZLE_MODE; + +typedef enum _NV_SWIZZLE_OFFSET +{ + NV_SWIZZLE_OFFSET_X = 0, + NV_SWIZZLE_OFFSET_Y = 4, + NV_SWIZZLE_OFFSET_Z = 8, + NV_SWIZZLE_OFFSET_W = 12 +}NV_SWIZZLE_OFFSET; + +#endif //defined (__cplusplus) && (defined(__d3d11_h__) || defined(__d3d12_h__)) && (!defined(CINTERFACE)) + +//! SUPPORTED OS: Windows 10 and higher +//! + +#if defined (__cplusplus) && (defined(__d3d11_h__) || defined(__d3d12_h__)) && (!defined(CINTERFACE)) +#define NV_CUSTOM_SEMANTIC_MAX_LIMIT 32 + +typedef enum NV_CUSTOM_SEMANTIC_TYPE +{ + NV_NONE_SEMANTIC = 0, + NV_X_RIGHT_SEMANTIC = 1, + NV_VIEWPORT_MASK_SEMANTIC = 2, + NV_XYZW_RIGHT_SEMANTIC = 3, + NV_VIEWPORT_MASK_2_SEMANTIC = 4, + + NV_POSITION_SEMANTIC = 5, + NV_CLIP_DISTANCE_0_SEMANTIC = 6, // MultiView can accept upto two vec4 values. So the application should not use + NV_CLIP_DISTANCE_1_SEMANTIC = 7, // more than 2 of the below Clip / Cull semantics in a single shader. + NV_CULL_DISTANCE_0_SEMANTIC = 8, + NV_CULL_DISTANCE_1_SEMANTIC = 9, + NV_GENERIC_ATTRIBUTE_SEMANTIC = 10, + + NV_PACKED_EYE_INDEX_SEMANTIC = 17, + NV_CUSTOM_SEMANTIC_MAX = NV_CUSTOM_SEMANTIC_MAX_LIMIT, +} NV_CUSTOM_SEMANTIC_TYPE; + +typedef struct _NV_CUSTOM_SEMANTIC +{ + UINT version; // NV_CUSTOM_SEMANTIC_VERSION + + NV_CUSTOM_SEMANTIC_TYPE NVCustomSemanticType; // type of custom semantic (NV_CUSTOM_SEMANTIC_TYPE) + NvAPI_LongString NVCustomSemanticNameString; // name of custom semantic e.g. "NV_X_RIGHT", "NV_VIEWPORT_MASK" + BOOL RegisterSpecified; // (optional) set to TRUE to explicitly provide register number and mask as below + NvU32 RegisterNum; // (optional) output register which has the custom semantic. + NvU32 RegisterMask; // (optional) output register component mask which has the custom semantic (X:1, Y:2, Z:4) + NvU32 Reserved; // reserved +} NV_CUSTOM_SEMANTIC; + +#define NV_CUSTOM_SEMANTIC_VERSION MAKE_NVAPI_VERSION(NV_CUSTOM_SEMANTIC, 1) + +#endif //defined(__cplusplus) && (defined(__d3d11_h__) || defined(__d3d12_h__)) && (!defined(CINTERFACE)) + +#if defined (__cplusplus) && defined(__d3d11_h__) && (!defined(CINTERFACE)) + +typedef struct NvAPI_D3D11_CREATE_GEOMETRY_SHADER_EX_V5 +{ + UINT version; + + BOOL UseViewportMask; + BOOL OffsetRtIndexByVpIndex; + BOOL ForceFastGS; + BOOL DontUseViewportOrder; + BOOL UseAttributeSkipMask; + BOOL UseCoordinateSwizzle; + NvAPI_D3D11_SWIZZLE_MODE *pCoordinateSwizzling; + + NvU32 NumCustomSemantics; // Number of custom semantics elements (upto NV_CUSTOM_SEMANTIC_MAX) provided in array pointer pCustomSemantics + NV_CUSTOM_SEMANTIC *pCustomSemantics; // pointer to array of NV_CUSTOM_SEMANTIC + BOOL ConvertToFastGS; // reserved + BOOL UseSpecificShaderExt; // TRUE if creating minimal specific shaders with nvapi shader extensions +} NvAPI_D3D11_CREATE_GEOMETRY_SHADER_EX_V5; + +typedef NvAPI_D3D11_CREATE_GEOMETRY_SHADER_EX_V5 NvAPI_D3D11_CREATE_GEOMETRY_SHADER_EX; +#define NVAPI_D3D11_CREATEGEOMETRYSHADEREX_2_VER_5 MAKE_NVAPI_VERSION(NvAPI_D3D11_CREATE_GEOMETRY_SHADER_EX_V5, 5) +#define NVAPI_D3D11_CREATEGEOMETRYSHADEREX_2_VERSION NVAPI_D3D11_CREATEGEOMETRYSHADEREX_2_VER_5 + +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_CreateGeometryShaderEx +// +//! \fn NvAPI_D3D11_CreateGeometryShaderEx +//! +//! DESCRIPTION: This function allows us to extend the creation of geometry shaders with extra bits +//! of functionality. +//! +//! The first parameters are identical to ID3D11Device::CreateGeometryShader() +//! so please refer to its documentation for their usage. +//! +//! The new parameter is UseViewportMask which is to tell the driver to create a shader +//! that outputs a viewport mask instead when a viewport index is indicated. +//! Outputting a viewport mask allows a single primitive to land on many different viewports +//! as specified by the bits set in the mask, rather than to rely on a single number that tells it +//! which unique viewport it would be drawn on. +//! This can be used for example in conjunction with the setting of coordinates swizzling (see XXX_NVAPI function) +//! to generates multiple adjacent views of the same primitive in a more efficient fashion +//! (outputting the primitive only once). +//! +//! This function is free-threaded create compatible i.e. it can be called from a different +//! thread than the one calling immediate device setstate functions. +//! +//! \since Release: +//! +//! \param [in] pDevice The device pointer +//! \param [in] pShaderBytecode A pointer to the compiled shader. +//! \param [in] BytecodeLength Size of the compiled geometry shader. +//! \param [in] pClassLinkage A pointer to a class linkage interface. Can be NULL. +//! \param [in] UseViewportMask Set to FALSE for custom semantic shaders. Tell the driver to create a shader that outputs the viewport mask in lieu of the viewport index. See above description. +//! \param [in] OffsetRtIndexByVpIndex Set to FALSE for custom semantic shaders. The Rendertarget index is offset by the viewport index +//! \param [in] ForceFastGS If TRUE, GS must be written with maxvertexcount(1) and must pass-through input vertex 0 to the output without modification +//! \param [in] DontUseViewportOrder Default FALSE for Primitives batched per viewport to improve performance. Set TRUE for API order (slow). +//! \param [in] UseAttributeSkipMask reserved +//! \param [in] UseCoordinateSwizzle reserved +//! \param [in] pCoordinateSwizzling reserved +//! \param [in] NumCustomSemantics Number of custom semantics elements (upto NV_CUSTOM_SEMANTIC_MAX) provided in array pointer pCustomSemantics +//! \param [in] pCustomSemantics pointer to array of NV_CUSTOM_SEMANTIC +//! \param [in] ConvertToFastGS reserved +//! \param [in] UseSpecificShaderExt TRUE if creating minimal specific shaders with nvapi shader extensions +//! \param [out] ppGeometryShader Address of a pointer to a ID3D11GeometryShader interface. +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. If there are return error codes with specific +//! meaning for this API, they are listed below. +// +/////////////////////////////////////////////////////////////////////////////// + +NVAPI_INTERFACE NvAPI_D3D11_CreateGeometryShaderEx_2(__in ID3D11Device *pDevice, __in const void *pShaderBytecode, + __in SIZE_T BytecodeLength, __in_opt ID3D11ClassLinkage *pClassLinkage, + __in const NvAPI_D3D11_CREATE_GEOMETRY_SHADER_EX *pCreateGeometryShaderExArgs, + __out ID3D11GeometryShader **ppGeometryShader); + +#endif //defined(__cplusplus) && defined(__d3d11_h__) && (!defined(CINTERFACE)) + +//! SUPPORTED OS: Windows 10 and higher +//! + +#if defined (__cplusplus) && defined(__d3d11_h__) && (!defined(CINTERFACE) ) + +typedef struct NvAPI_D3D11_CREATE_VERTEX_SHADER_EX_V1 +{ + UINT version; + + NvU32 NumCustomSemantics; // Number of custom semantics elements (upto NV_CUSTOM_SEMANTIC_MAX) provided in array pointer pCustomSemantics + NV_CUSTOM_SEMANTIC *pCustomSemantics; // pointer to array of NV_CUSTOM_SEMANTIC +} NvAPI_D3D11_CREATE_VERTEX_SHADER_EX_V1; + +typedef struct NvAPI_D3D11_CREATE_VERTEX_SHADER_EX_V2 +{ + UINT version; + + NvU32 NumCustomSemantics; // Number of custom semantics elements (upto NV_CUSTOM_SEMANTIC_MAX) provided in array pointer pCustomSemantics + NV_CUSTOM_SEMANTIC *pCustomSemantics; // pointer to array of NV_CUSTOM_SEMANTIC + BOOL UseWithFastGS; // reserved +} NvAPI_D3D11_CREATE_VERTEX_SHADER_EX_V2; + +typedef struct NvAPI_D3D11_CREATE_VERTEX_SHADER_EX_V3 +{ + UINT version; + + NvU32 NumCustomSemantics; // Number of custom semantics elements (upto NV_CUSTOM_SEMANTIC_MAX) provided in array pointer pCustomSemantics + NV_CUSTOM_SEMANTIC *pCustomSemantics; // pointer to array of NV_CUSTOM_SEMANTIC + BOOL UseWithFastGS; // reserved + BOOL UseSpecificShaderExt; // TRUE if creating minimal specific shaders with nvapi shader extensions +} NvAPI_D3D11_CREATE_VERTEX_SHADER_EX_V3; + +typedef NvAPI_D3D11_CREATE_VERTEX_SHADER_EX_V3 NvAPI_D3D11_CREATE_VERTEX_SHADER_EX; +#define NVAPI_D3D11_CREATEVERTEXSHADEREX_VER_1 MAKE_NVAPI_VERSION(NvAPI_D3D11_CREATE_VERTEX_SHADER_EX_V1, 1) +#define NVAPI_D3D11_CREATEVERTEXSHADEREX_VER_2 MAKE_NVAPI_VERSION(NvAPI_D3D11_CREATE_VERTEX_SHADER_EX_V2, 2) +#define NVAPI_D3D11_CREATEVERTEXSHADEREX_VER_3 MAKE_NVAPI_VERSION(NvAPI_D3D11_CREATE_VERTEX_SHADER_EX_V2, 3) +#define NVAPI_D3D11_CREATEVERTEXSHADEREX_VERSION NVAPI_D3D11_CREATEVERTEXSHADEREX_VER_3 + +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_CreateVertexShaderEx +// +//! \fn NvAPI_D3D11_CreateVertexShaderEx +//! +//! DESCRIPTION: This function allows us to extend the creation of vertex shaders with extra bits +//! of functionality. +//! +//! The first parameters are identical to ID3D11Device::CreateVertexShader() +//! so please refer to its documentation for their usage. +//! +//! The new parameter are custom semantics which allow setting of custom semantic variables +//! in the shader +//! +//! This function is free-threaded create compatible i.e. it can be called from a different thread +//! than the one calling immediate device setstate functions. +//! +//! \since Release: +//! +//! \param [in] pDevice The device pointer +//! \param [in] pShaderBytecode A pointer to the compiled shader. +//! \param [in] BytecodeLength Size of the compiled vertex shader. +//! \param [in] pClassLinkage A pointer to a class linkage interface. Can be NULL. +//! \param [in] NumCustomSemantics Number of custom semantics elements (upto NV_CUSTOM_SEMANTIC_MAX) provided in array pointer pCustomSemantics +//! \param [in] pCustomSemantics pointer to array of NV_CUSTOM_SEMANTIC +//! \param [in] UseWithFastGS reserved +//! \param [in] UseSpecificShaderExt TRUE if creating minimal specific shaders with nvapi shader extensions +//! \param [out] ppVertexShader Address of a pointer to a ID3D11VertexShader interface. +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. If there are return error codes with specific +//! meaning for this API, they are listed below. +// +/////////////////////////////////////////////////////////////////////////////// + +NVAPI_INTERFACE NvAPI_D3D11_CreateVertexShaderEx(__in ID3D11Device *pDevice, __in const void *pShaderBytecode, + __in SIZE_T BytecodeLength, __in_opt ID3D11ClassLinkage *pClassLinkage, + __in const NvAPI_D3D11_CREATE_VERTEX_SHADER_EX *pCreateVertexShaderExArgs, + __out ID3D11VertexShader **ppVertexShader); + +#endif //defined(__cplusplus) && defined(__d3d11_h__) && (!defined(CINTERFACE)) + +//! SUPPORTED OS: Windows 10 and higher +//! + +#if defined (__cplusplus) && defined(__d3d11_h__) && (!defined(CINTERFACE) ) + +typedef struct NvAPI_D3D11_CREATE_HULL_SHADER_EX_V1 +{ + UINT version; + + NvU32 NumCustomSemantics; // Number of custom semantics elements (upto NV_CUSTOM_SEMANTIC_MAX) provided in array pointer pCustomSemantics + NV_CUSTOM_SEMANTIC *pCustomSemantics; // pointer to array of NV_CUSTOM_SEMANTIC + BOOL UseWithFastGS; // reserved +} NvAPI_D3D11_CREATE_HULL_SHADER_EX_V1; + +typedef struct NvAPI_D3D11_CREATE_HULL_SHADER_EX_V2 +{ + UINT version; + + NvU32 NumCustomSemantics; // Number of custom semantics elements (upto NV_CUSTOM_SEMANTIC_MAX) provided in array pointer pCustomSemantics + NV_CUSTOM_SEMANTIC *pCustomSemantics; // pointer to array of NV_CUSTOM_SEMANTIC + BOOL UseWithFastGS; // reserved + BOOL UseSpecificShaderExt; // TRUE if creating minimal specific shaders with nvapi shader extensions +} NvAPI_D3D11_CREATE_HULL_SHADER_EX_V2; + +typedef NvAPI_D3D11_CREATE_HULL_SHADER_EX_V2 NvAPI_D3D11_CREATE_HULL_SHADER_EX; +#define NVAPI_D3D11_CREATEHULLSHADEREX_VER_1 MAKE_NVAPI_VERSION(NvAPI_D3D11_CREATE_HULL_SHADER_EX_V1, 1) +#define NVAPI_D3D11_CREATEHULLSHADEREX_VER_2 MAKE_NVAPI_VERSION(NvAPI_D3D11_CREATE_HULL_SHADER_EX_V1, 2) +#define NVAPI_D3D11_CREATEHULLSHADEREX_VERSION NVAPI_D3D11_CREATEHULLSHADEREX_VER_2 + +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_CreateHullShaderEx +// +//! \fn NvAPI_D3D11_CreateHullShaderEx +//! +//! DESCRIPTION: This function allows us to extend the creation of hull shaders with extra bits +//! of functionality. +//! +//! The first parameters are identical to ID3D11Device::CreateHullShader() +//! so please refer to its documentation for their usage. +//! +//! The new parameter are custom semantics which allow setting of custom semantic variables +//! in the shader +//! +//! This function is free-threaded create compatible i.e. it can be called from a different thread +//! than the one calling immediate device setstate functions. +//! +//! \since Release: +//! +//! \param [in] pDevice The device pointer +//! \param [in] pShaderBytecode A pointer to the compiled shader. +//! \param [in] BytecodeLength Size of the compiled hull shader. +//! \param [in] pClassLinkage A pointer to a class linkage interface. Can be NULL. +//! \param [in] NumCustomSemantics Number of custom semantics elements (upto NV_CUSTOM_SEMANTIC_MAX) provided in array pointer pCustomSemantics +//! \param [in] pCustomSemantics pointer to array of NV_CUSTOM_SEMANTIC +//! \param [in] UseWithFastGS reserved +//! \param [in] UseSpecificShaderExt TRUE if creating minimal specific shaders with nvapi shader extensions +//! \param [out] ppHullShader Address of a pointer to a ID3D11HullShader interface. +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. If there are return error codes with specific +//! meaning for this API, they are listed below. +// +/////////////////////////////////////////////////////////////////////////////// + +NVAPI_INTERFACE NvAPI_D3D11_CreateHullShaderEx(__in ID3D11Device *pDevice, __in const void *pShaderBytecode, + __in SIZE_T BytecodeLength, __in_opt ID3D11ClassLinkage *pClassLinkage, + __in const NvAPI_D3D11_CREATE_HULL_SHADER_EX *pCreateHullShaderExArgs, + __out ID3D11HullShader **ppHullShader); + +#endif //defined(__cplusplus) && defined(__d3d11_h__) && (!defined(CINTERFACE)) + +//! SUPPORTED OS: Windows 10 and higher +//! + +#if defined (__cplusplus) && defined(__d3d11_h__) && (!defined(CINTERFACE) ) + +typedef struct NvAPI_D3D11_CREATE_DOMAIN_SHADER_EX_V1 +{ + UINT version; + + NvU32 NumCustomSemantics; // Number of custom semantics elements (upto NV_CUSTOM_SEMANTIC_MAX) provided in array pointer pCustomSemantics + NV_CUSTOM_SEMANTIC *pCustomSemantics; // pointer to array of NV_CUSTOM_SEMANTIC +} NvAPI_D3D11_CREATE_DOMAIN_SHADER_EX_V1; + +typedef struct NvAPI_D3D11_CREATE_DOMAIN_SHADER_EX_V2 +{ + UINT version; + + NvU32 NumCustomSemantics; // Number of custom semantics elements (upto NV_CUSTOM_SEMANTIC_MAX) provided in array pointer pCustomSemantics + NV_CUSTOM_SEMANTIC *pCustomSemantics; // pointer to array of NV_CUSTOM_SEMANTIC + BOOL UseWithFastGS; // reserved +} NvAPI_D3D11_CREATE_DOMAIN_SHADER_EX_V2; + +typedef struct NvAPI_D3D11_CREATE_DOMAIN_SHADER_EX_V3 +{ + UINT version; + + NvU32 NumCustomSemantics; // Number of custom semantics elements (upto NV_CUSTOM_SEMANTIC_MAX) provided in array pointer pCustomSemantics + NV_CUSTOM_SEMANTIC *pCustomSemantics; // pointer to array of NV_CUSTOM_SEMANTIC + BOOL UseWithFastGS; // reserved + BOOL UseSpecificShaderExt; // TRUE if creating minimal specific shaders with nvapi shader extensions +} NvAPI_D3D11_CREATE_DOMAIN_SHADER_EX_V3; + +typedef NvAPI_D3D11_CREATE_DOMAIN_SHADER_EX_V3 NvAPI_D3D11_CREATE_DOMAIN_SHADER_EX; +#define NVAPI_D3D11_CREATEDOMAINSHADEREX_VER_1 MAKE_NVAPI_VERSION(NvAPI_D3D11_CREATE_DOMAIN_SHADER_EX_V1, 1) +#define NVAPI_D3D11_CREATEDOMAINSHADEREX_VER_2 MAKE_NVAPI_VERSION(NvAPI_D3D11_CREATE_DOMAIN_SHADER_EX_V2, 2) +#define NVAPI_D3D11_CREATEDOMAINSHADEREX_VER_3 MAKE_NVAPI_VERSION(NvAPI_D3D11_CREATE_DOMAIN_SHADER_EX_V3, 3) +#define NVAPI_D3D11_CREATEDOMAINSHADEREX_VERSION NVAPI_D3D11_CREATEDOMAINSHADEREX_VER_3 + +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_CreateDomainShaderEx +// +//! \fn NvAPI_D3D11_CreateDomainShaderEx +//! +//! DESCRIPTION: This function allows us to extend the creation of domain shaders with extra bits +//! of functionality. +//! +//! The first parameters are identical to ID3D11Device::CreateDomainShader() +//! so please refer to its documentation for their usage. +//! +//! The new parameter are custom semantics which allow setting of custom semantic variables +//! in the shader +//! +//! This function is free-threaded create compatible i.e. it can be called from a different thread +//! than the one calling immediate device setstate functions. +//! +//! \since Release: +//! +//! \param [in] pDevice The device pointer +//! \param [in] pShaderBytecode A pointer to the compiled shader. +//! \param [in] BytecodeLength Size of the compiled domain shader. +//! \param [in] pClassLinkage A pointer to a class linkage interface. Can be NULL. +//! \param [in] NumCustomSemantics Number of custom semantics elements (upto NV_CUSTOM_SEMANTIC_MAX) provided in array pointer pCustomSemantics +//! \param [in] pCustomSemantics pointer to array of NV_CUSTOM_SEMANTIC +//! \param [in] UseWithFastGS reserved +//! \param [in] UseSpecificShaderExt TRUE if creating minimal specific shaders with nvapi shader extensions +//! \param [out] ppDomainShader Address of a pointer to a ID3D11DomainShader interface. +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. If there are return error codes with specific +//! meaning for this API, they are listed below. +// +/////////////////////////////////////////////////////////////////////////////// + +NVAPI_INTERFACE NvAPI_D3D11_CreateDomainShaderEx(__in ID3D11Device *pDevice, __in const void *pShaderBytecode, + __in SIZE_T BytecodeLength, __in_opt ID3D11ClassLinkage *pClassLinkage, + __in const NvAPI_D3D11_CREATE_DOMAIN_SHADER_EX *pCreateDomainShaderExArgs, + __out ID3D11DomainShader **ppDomainShader); + +#endif //defined(__cplusplus) && defined(__d3d11_h__) && (!defined(CINTERFACE)) + +//! SUPPORTED OS: Windows 10 and higher +//! + +#if defined (__cplusplus) && defined(__d3d11_h__) && (!defined(CINTERFACE) ) + +typedef struct NvAPI_D3D11_CREATE_PIXEL_SHADER_EX_V1 +{ + UINT version; + + NvU32 NumCustomSemantics; // Number of custom semantics elements (upto NV_CUSTOM_SEMANTIC_MAX) provided in array pointer pCustomSemantics + NV_CUSTOM_SEMANTIC *pCustomSemantics; // pointer to array of NV_CUSTOM_SEMANTIC +} NvAPI_D3D11_CREATE_PIXEL_SHADER_EX_V1; + +#define NVAPI_D3D11_CREATEPIXELSHADEREX_VER_1 MAKE_NVAPI_VERSION(NvAPI_D3D11_CREATE_PIXEL_SHADER_EX_V1, 1) + +typedef struct NvAPI_D3D11_CREATE_PIXEL_SHADER_EX_V2 +{ + UINT version; // Always use NVAPI_D3D11_CREATEPIXELSHADEREX_VERSION + + NvU32 NumCustomSemantics; // Number of custom semantics elements (upto NV_CUSTOM_SEMANTIC_MAX) provided in array pointer pCustomSemantics + NV_CUSTOM_SEMANTIC *pCustomSemantics; // pointer to array of NV_CUSTOM_SEMANTIC + NvU32 bEnableSuperSamplingPredicationForVRS : 1; // This enables sampling within a pixel for SuperSampling mode of Variable Rate Shading for relevant attributes tagged with "sample" modifier + NvU32 bEnableSuperSamplingPredicationForVRSAllAttributes : 1; // This enables sampling within a pixel for SuperSampling mode of Variable Rate Shading for all relevant attributes + NvU32 reserved : 30; // Reserved for further use +} NvAPI_D3D11_CREATE_PIXEL_SHADER_EX_V2; + +typedef NvAPI_D3D11_CREATE_PIXEL_SHADER_EX_V2 NvAPI_D3D11_CREATE_PIXEL_SHADER_EX; +#define NVAPI_D3D11_CREATEPIXELSHADEREX_VER_2 MAKE_NVAPI_VERSION(NvAPI_D3D11_CREATE_PIXEL_SHADER_EX_V2, 2) +#define NVAPI_D3D11_CREATEPIXELSHADEREX_VERSION NVAPI_D3D11_CREATEPIXELSHADEREX_VER_2 + +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_CreatePixelShaderEx_2 +// +//! \fn NvAPI_D3D11_CreatePixelShaderEx_2 +//! +//! DESCRIPTION: This function allows us to extend the creation of pixel shaders with extra bits +//! of functionality. +//! +//! The first parameters are identical to ID3D11Device::CreatePixelShader() +//! so please refer to its documentation for their usage. +//! +//! The new parameter are custom semantics which allow setting of custom semantic variables +//! in the shader +//! +//! This function is free-threaded create compatible i.e. it can be called from a different thread +//! than the one calling immediate device setstate functions. +//! +//! \since Release: 410 +//! +//! \param [in] pDevice The device pointer +//! \param [in] pShaderBytecode A pointer to the compiled shader. +//! \param [in] BytecodeLength Size of the compiled domain shader. +//! \param [in] pClassLinkage A pointer to a class linkage interface. Can be NULL. +//! \param [in] NumCustomSemantics Number of custom semantics elements (upto NV_CUSTOM_SEMANTIC_MAX) provided in array pointer pCustomSemantics +//! \param [in] pCustomSemantics pointer to array of NV_CUSTOM_SEMANTIC +//! \param [out] ppPixelShader Address of a pointer to a ID3D11PixelShader interface. +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. If there are return error codes with specific +//! meaning for this API, they are listed below. +// +/////////////////////////////////////////////////////////////////////////////// + +NVAPI_INTERFACE NvAPI_D3D11_CreatePixelShaderEx_2(__in ID3D11Device *pDevice, __in const void *pShaderBytecode, + __in SIZE_T BytecodeLength, __in_opt ID3D11ClassLinkage *pClassLinkage, + __in const NvAPI_D3D11_CREATE_PIXEL_SHADER_EX *pCreatePixelShaderExArgs, + __out ID3D11PixelShader **ppPixelShader); + +#endif //defined(__cplusplus) && defined(__d3d11_h__) && (!defined(CINTERFACE)) + +//! SUPPORTED OS: Windows 10 and higher +//! + +#if defined (__cplusplus) && (defined(__d3d11_h__) || defined(__d3d12_h__)) && (!defined(CINTERFACE)) + +typedef enum _NV_FASTGS_FLAGS +{ + NV_FASTGS_USE_VIEWPORT_MASK = 0x01, // Causes SV_ViewportArrayIndex value to be interpreted as a bitmask of viewports to broadcast to. + NV_FASTGS_OFFSET_RT_INDEX_BY_VP_INDEX = 0x02, // Causes SV_RenderTargetArrayIndex value to be offset by the viewport index when broadcasting. + NV_FASTGS_STRICT_API_ORDER = 0x04, // Causes broadcast primitives to be rendered strictly in API order (slow). + // By default, primitives may be batched per viewport to improve performance. +} NV_FASTGS_FLAGS; + +#endif //defined(__cplusplus) && (defined(__d3d11_h__) || defined(__d3d12_h__)) && (!defined(CINTERFACE)) + +#if defined (__cplusplus) && defined(__d3d11_h__) && (!defined(CINTERFACE)) + +struct NvAPI_D3D11_CREATE_FASTGS_EXPLICIT_DESC_V1 +{ + NvU32 version; // ALWAYS == NVAPI_D3D11_CREATEFASTGSEXPLICIT_VER + NvU32 flags; // A combination of flags from NV_FASTGS_FLAGS + NvAPI_D3D11_SWIZZLE_MODE *pCoordinateSwizzling; // [optional] Array of 16 coordinate swizzle modes, one per viewport. NULL if not used. + // The output x, y, z, and w coordinates of all vertices can be set to any of the coordinates or their + // negated versions i.e. {x, y, z, w, -x, -y, -z, -w}. Coordinates are swizzled before any viewport + // operation occurs i.e. before frustum clipping, scaling, and viewport clipping. And after + // last of vertex/tesselation/geometry shader stage, stream-out and viewport broadcast expansion (see NV_FASTGS_USE_VIEWPORT_MASK) + // pCoordinateSwizzling[i] sets the swizzle-mode of each component for viewport i. + // See NV_SWIZZLE_MODE for values of allowed swizzle modes. + // See NV_SWIZZLE_OFFSET for bit offset from where NV_SWIZZLE_MODE to be set for each component. + // For example : + // 1. To set swizzle for viewport 0 such that - w and z are unchanged and values of x and y are swapped : + // pCoordinateSwizzling[0] = (NV_SWIZZLE_POS_W << NV_SWIZZLE_OFFSET_W) | + // (NV_SWIZZLE_POS_Z << NV_SWIZZLE_OFFSET_Z) | + // (NV_SWIZZLE_POS_X << NV_SWIZZLE_OFFSET_Y) | + // (NV_SWIZZLE_POS_Y << NV_SWIZZLE_OFFSET_X); + // 2. To set swizzle for viewport 0 such that - w, z and y are unchanged and value of x is negated : + // pCoordinateSwizzling[0] = (NV_SWIZZLE_POS_W << NV_SWIZZLE_OFFSET_W) | + // (NV_SWIZZLE_POS_Z << NV_SWIZZLE_OFFSET_Z) | + // (NV_SWIZZLE_POS_Y << NV_SWIZZLE_OFFSET_Y) | + // (NV_SWIZZLE_NEG_X << NV_SWIZZLE_OFFSET_X); + // Need to set some valid combination of swizzle-modes for all viewports, irrespective of whether that viewport is set. + // Invalid swizzle-mode for any viewport (even if that viewport is not set) may result in removal of device. +}; + +#define NVAPI_D3D11_CREATEFASTGSEXPLICIT_VER1 MAKE_NVAPI_VERSION(NvAPI_D3D11_CREATE_FASTGS_EXPLICIT_DESC_V1, 1) +#define NVAPI_D3D11_CREATEFASTGSEXPLICIT_VER NVAPI_D3D11_CREATEFASTGSEXPLICIT_VER1 + +typedef NvAPI_D3D11_CREATE_FASTGS_EXPLICIT_DESC_V1 NvAPI_D3D11_CREATE_FASTGS_EXPLICIT_DESC; + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_CreateFastGeometryShaderExplicit +// +//! \fn NvAPI_D3D11_CreateFastGeometryShaderExplicit +//! +//! DESCRIPTION: This function will create a fast geometry shader written using an "explicit" +//! coding style, rather than converting a standard GS. For the explicit coding +//! style, the GS must be written with maxvertexcount(1), and must pass-through +//! input vertex 0 to the output without modification. +//! +//! Additional per-primitive outputs may also be computed and written to the single +//! output vertex. If these outputs are read by the pixel shader, they must be +//! declared with the "nointerpolation" attribute in the PS input signature; +//! otherwise, visual corruption may occur. Also, unlike D3D API, there is no guarantee +//! that pixel shader will get the default value of an attribute if that attribute is not written +//! by the earlier shader stage in the pipeline. +//! +//! The first four parameters are identical to ID3D11Device::CreateGeometryShader(), +//! so please refer to its documentation for their usage. +//! +//! This function is free-threaded create compatible i.e. it can be called from a different thread +//! than the one calling immediate device setstate functions. +//! +//! \since Release: +//! +//! \param [in] pDevice The device pointer +//! \param [in] pShaderBytecode A pointer to the compiled shader. +//! \param [in] BytecodeLength Size of the compiled geometry shader. +//! \param [in] pClassLinkage A pointer to a class linkage interface. Can be NULL. +//! \param [in] pCreateFastGSArgs A pointer to a NvAPI_D3D11_CREATE_FASTGS_EXPLICIT struct. +//! \param [out] ppGeometryShader Address of a pointer to a ID3D11GeometryShader interface. +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. If there are return error codes with specific +//! meaning for this API, they are listed below. +// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +NVAPI_INTERFACE NvAPI_D3D11_CreateFastGeometryShaderExplicit(__in ID3D11Device *pDevice, __in const void *pShaderBytecode, + __in SIZE_T BytecodeLength, __in_opt ID3D11ClassLinkage *pClassLinkage, + __in const NvAPI_D3D11_CREATE_FASTGS_EXPLICIT_DESC *pCreateFastGSArgs, + __out ID3D11GeometryShader **ppGeometryShader); + +#endif //defined(__cplusplus) && defined(__d3d11_h__) && (!defined(CINTERFACE)) + +//! SUPPORTED OS: Windows 10 and higher +//! + +#if defined (__cplusplus) && defined(__d3d11_h__) && (!defined(CINTERFACE) ) +//////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_CreateFastGeometryShader +// +//! \fn NvAPI_D3D11_CreateFastGeometryShader +//! +//! DESCRIPTION: This function will convert a regular geometry shader into a fast GS variant if possible. +//! It will not do any validation regarding the compatibility of the resulting fast GS with any +//! Pixel shader. The validation has to be done by the application manually. +//! +//! The parameters are identical to ID3D11Device::CreateGeometryShader() +//! so please refer to its documentation for their usage. +//! +//! If the shader is too complex or is not in adequate form to be converted to fast GS +//! this function will simply fail. You should then call ID3D11Device::CreateGeometryShader() +//! to create the regular geometry shader. +//! +//! This function is free-threaded create compatible i.e. it can be called from a different thread +//! than the one calling immediate device setstate functions. +//! +//! \since Release: +//! +//! \param [in] pDevice The device pointer +//! \param [in] pShaderBytecode A pointer to the compiled shader. +//! \param [in] BytecodeLength Size of the compiled geometry shader. +//! \param [in] pClassLinkage A pointer to a class linkage interface. Can be NULL. +//! \param [out] ppGeometryShader Address of a pointer to a ID3D11GeometryShader interface. +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. If there are return error codes with specific +//! meaning for this API, they are listed below. +// +/////////////////////////////////////////////////////////////////////////////// + +NVAPI_INTERFACE NvAPI_D3D11_CreateFastGeometryShader(__in ID3D11Device *pDevice, __in const void *pShaderBytecode, + __in SIZE_T BytecodeLength, __in_opt ID3D11ClassLinkage *pClassLinkage, + __out ID3D11GeometryShader **ppGeometryShader); + +#endif //defined(__cplusplus) && defined(__d3d11_h__) && (!defined(CINTERFACE)) + +//! SUPPORTED OS: Windows 10 and higher +//! + +#if defined (__cplusplus) && defined(__d3d11_h__) + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_DecompressView +// +//! \code +//! DESCRIPTION: This function is used to decompress a surface using the currently bound programmable sample positions. +//! +//! This is needed: +//! - When writing to a surface in a region previously rendered by different sample positions and no clear was done. +//! - When reading a surface in a shader that was rendered using non-standard sample positions. +//! - When copying from a surface that was rendered using non-standard sample positions. +//! +//! \param [in] pDevice Current d3d11 device +//! \param [in] pDeviceContext Current d3d11 device context +//! \param [in] pView Current view to decompress +//! +//! +//! \return ::NVAPI_OK if the call succeeds. +//! \endcode +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D11_DecompressView(__in ID3D11Device* pDevice, __in ID3D11DeviceContext *pDeviceContext, __in ID3D11View* pView); + +#endif //defined(__cplusplus) && defined(__d3d11_h__) + + +#if defined (__cplusplus) && defined(__d3d12_h__) + +//! Enum for CreatePSO extensions. +//! \ingroup dx +//! constant 5 is assigned to two members of this enum becuase the first member name contains a typo: EXTNENSION. Please use the correctly-spelled enumerator. +typedef enum _NV_PSO_EXTENSION +{ + NV_PSO_RASTER_EXTENSION = 0, + NV_PSO_REQUEST_FASTGS_EXTENSION = 1, + NV_PSO_GEOMETRY_SHADER_EXTENSION = 2, + NV_PSO_ENABLE_DEPTH_BOUND_TEST_EXTENSION = 3, + NV_PSO_EXPLICIT_FASTGS_EXTENSION = 4, + NV_PSO_SET_SHADER_EXTNENSION_SLOT_AND_SPACE = 5, + NV_PSO_SET_SHADER_EXTENSION_SLOT_AND_SPACE = 5, + NV_PSO_VERTEX_SHADER_EXTENSION = 6, + NV_PSO_DOMAIN_SHADER_EXTENSION = 7, + NV_PSO_HULL_SHADER_EXTENSION = 9, +}NV_PSO_EXTENSION; + +struct NVAPI_D3D12_PSO_EXTENSION_DESC_V1 +{ + NvU32 baseVersion; //1 it needs to match N, in non-TIR it needs to match RT sample count. Ignored if ForcePerSampleInterlock is set + NvU8 SamplePositionsX[16]; //= 201103L + +#define compile_time_assert(b) static_assert((b), "Compile time assertion failed: "#b) + +enum NV_META_COMMAND_TENSOR_DATA_TYPE : NvU64 +{ + NV_META_COMMAND_TENSOR_DATA_TYPE_FLOAT32, + NV_META_COMMAND_TENSOR_DATA_TYPE_FLOAT16, + NV_META_COMMAND_TENSOR_DATA_TYPE_UINT32, + + NV_META_COMMAND_TENSOR_DATA_TYPE_COUNT, +}; + + +enum NV_META_COMMAND_TENSOR_LAYOUT : NvU64 +{ + NV_META_COMMAND_TENSOR_LAYOUT_UNKNOWN, // opaque HW-native layout + NV_META_COMMAND_TENSOR_LAYOUT_STANDARD, // NCDHW - planar / row major layout (width is inner-most dimension, batch-size N is the outermost) + NV_META_COMMAND_TENSOR_LAYOUT_COUNT, +}; + +enum NV_META_COMMAND_TENSOR_FLAGS : NvU64 +{ + NV_META_COMMAND_TENSOR_FLAG_NONE = 0, + NV_META_COMMAND_TENSOR_FLAG_DATA_STATIC = 0x1, // data pointed by the tensor is static (i.e, won't be modified after command list recording) +}; + +enum NV_META_COMMAND_PRECISION : NvU64 +{ + NV_META_COMMAND_PRECISION_FLOAT32, + NV_META_COMMAND_PRECISION_FLOAT16, + NV_META_COMMAND_PRECISION_MUL_FLOAT16_ADD_FLOAT32, + + NV_META_COMMAND_PRECISION_COUNT, +}; + +struct NV_META_COMMAND_TENSOR_DESC +{ + NV_META_COMMAND_TENSOR_DATA_TYPE DataType; + NV_META_COMMAND_TENSOR_LAYOUT Layout; + NV_META_COMMAND_TENSOR_FLAGS Flags; + NvU64 DimensionCount; // 4 or 5 + NvU64 Size[NV_META_COMMAND_MAX_TENSOR_DIM]; + NvU64 Stride[NV_META_COMMAND_MAX_TENSOR_DIM]; // only used with NV_META_COMMAND_TENSOR_LAYOUT_STANDARD +}; + +enum NV_META_COMMAND_ACTIVATION_FUNCTION : NvU64 +{ + NV_META_COMMAND_ACTIVATION_FUNCTION_ELU, + NV_META_COMMAND_ACTIVATION_FUNCTION_HARDMAX, + NV_META_COMMAND_ACTIVATION_FUNCTION_HARD_SIGMOID, + NV_META_COMMAND_ACTIVATION_FUNCTION_IDENTITY, + NV_META_COMMAND_ACTIVATION_FUNCTION_LEAKY_RELU, + NV_META_COMMAND_ACTIVATION_FUNCTION_LINEAR, + NV_META_COMMAND_ACTIVATION_FUNCTION_LOG_SOFTMAX, + NV_META_COMMAND_ACTIVATION_FUNCTION_PARAMETERIZED_RELU, + NV_META_COMMAND_ACTIVATION_FUNCTION_PARAMETRIC_SOFTPLUS, + NV_META_COMMAND_ACTIVATION_FUNCTION_RELU, + NV_META_COMMAND_ACTIVATION_FUNCTION_SCALED_ELU, + NV_META_COMMAND_ACTIVATION_FUNCTION_SCALED_TANH, + NV_META_COMMAND_ACTIVATION_FUNCTION_SIGMOID, + NV_META_COMMAND_ACTIVATION_FUNCTION_SOFTMAX, + NV_META_COMMAND_ACTIVATION_FUNCTION_SOFTPLUS, + NV_META_COMMAND_ACTIVATION_FUNCTION_SOFTSIGN, + NV_META_COMMAND_ACTIVATION_FUNCTION_TANH, + NV_META_COMMAND_ACTIVATION_FUNCTION_THRESHOLDED_RELU, + + NV_META_COMMAND_ACTIVATION_FUNCTION_COUNT, +}; + +struct NV_META_COMMAND_ACTIVATION_DESC +{ + NV_META_COMMAND_ACTIVATION_FUNCTION Function; + float Params[NV_META_COMMAND_ACTIVATION_MAX_PARAMS]; +}; + +#else + +#define compile_time_assert(b) typedef char compile_time_assertion_failed_in_line_##__LINE__[(b)?1:-1] + +enum NV_META_COMMAND_TENSOR_DATA_TYPE +{ + NV_META_COMMAND_TENSOR_DATA_TYPE_FLOAT32, + NV_META_COMMAND_TENSOR_DATA_TYPE_FLOAT16, + NV_META_COMMAND_TENSOR_DATA_TYPE_UINT32, + + NV_META_COMMAND_TENSOR_DATA_TYPE_COUNT, +}; + + +enum NV_META_COMMAND_TENSOR_LAYOUT +{ + NV_META_COMMAND_TENSOR_LAYOUT_UNKNOWN, // opaque HW-native layout + NV_META_COMMAND_TENSOR_LAYOUT_STANDARD, // NCDHW - planar / row major layout (width is inner-most dimension, batch-size N is the outermost) + NV_META_COMMAND_TENSOR_LAYOUT_COUNT, +}; + +enum NV_META_COMMAND_TENSOR_FLAGS +{ + NV_META_COMMAND_TENSOR_FLAG_NONE = 0, + NV_META_COMMAND_TENSOR_FLAG_DATA_STATIC = 0x1, // data pointed by the tensor is static (i.e, won't be modified after command list recording) +}; + +enum NV_META_COMMAND_PRECISION +{ + NV_META_COMMAND_PRECISION_FLOAT32, + NV_META_COMMAND_PRECISION_FLOAT16, + NV_META_COMMAND_PRECISION_MUL_FLOAT16_ADD_FLOAT32, + + NV_META_COMMAND_PRECISION_COUNT, +}; + +struct NV_META_COMMAND_TENSOR_DESC +{ + NvU64 DataType; // NV_META_COMMAND_TENSOR_DATA_TYPE + NvU64 Layout; // NV_META_COMMAND_TENSOR_LAYOUT + NvU64 Flags; // NV_META_COMMAND_TENSOR_FLAGS + NvU64 DimensionCount; // 4 or 5 + NvU64 Size[NV_META_COMMAND_MAX_TENSOR_DIM]; + NvU64 Stride[NV_META_COMMAND_MAX_TENSOR_DIM]; // only used with NV_META_COMMAND_TENSOR_LAYOUT_STANDARD +}; + +enum NV_META_COMMAND_ACTIVATION_FUNCTION +{ + NV_META_COMMAND_ACTIVATION_FUNCTION_ELU, + NV_META_COMMAND_ACTIVATION_FUNCTION_HARDMAX, + NV_META_COMMAND_ACTIVATION_FUNCTION_HARD_SIGMOID, + NV_META_COMMAND_ACTIVATION_FUNCTION_IDENTITY, + NV_META_COMMAND_ACTIVATION_FUNCTION_LEAKY_RELU, + NV_META_COMMAND_ACTIVATION_FUNCTION_LINEAR, + NV_META_COMMAND_ACTIVATION_FUNCTION_LOG_SOFTMAX, + NV_META_COMMAND_ACTIVATION_FUNCTION_PARAMETERIZED_RELU, + NV_META_COMMAND_ACTIVATION_FUNCTION_PARAMETRIC_SOFTPLUS, + NV_META_COMMAND_ACTIVATION_FUNCTION_RELU, + NV_META_COMMAND_ACTIVATION_FUNCTION_SCALED_ELU, + NV_META_COMMAND_ACTIVATION_FUNCTION_SCALED_TANH, + NV_META_COMMAND_ACTIVATION_FUNCTION_SIGMOID, + NV_META_COMMAND_ACTIVATION_FUNCTION_SOFTMAX, + NV_META_COMMAND_ACTIVATION_FUNCTION_SOFTPLUS, + NV_META_COMMAND_ACTIVATION_FUNCTION_SOFTSIGN, + NV_META_COMMAND_ACTIVATION_FUNCTION_TANH, + NV_META_COMMAND_ACTIVATION_FUNCTION_THRESHOLDED_RELU, + + NV_META_COMMAND_ACTIVATION_FUNCTION_COUNT, +}; + +struct NV_META_COMMAND_ACTIVATION_DESC +{ + NvU64 Function; // NV_META_COMMAND_ACTIVATION_FUNCTION + float Params[NV_META_COMMAND_ACTIVATION_MAX_PARAMS]; +}; + +#endif + +struct NV_META_COMMAND_OPTIONAL_TENSOR_DESC : NV_META_COMMAND_TENSOR_DESC +{ + // true when the tensor isn't needed (e.g, bias is optional) + NV_META_COMMAND_BOOL IsNull; +}; + + +struct NV_META_COMMAND_OPTIONAL_ACTIVATION_DESC : NV_META_COMMAND_ACTIVATION_DESC +{ + // true when activation isn't needed + NV_META_COMMAND_BOOL IsNull; +}; + + +enum NV_META_COMMAND_PADDING_MODE +{ + NV_META_COMMAND_PADDING_ZEROS, + NV_META_COMMAND_PADDING_MIRROR, + NV_META_COMMAND_PADDING_CLAMP, + NV_META_COMMAND_PADDING_CONSTANT, + + NV_META_COMMAND_PADDING_COUNT, +}; + +struct NV_META_COMMAND_PADDING_DESC +{ + NV_META_COMMAND_PADDING_MODE Mode; + + // used with NV_META_COMMAND_PADDING_CONSTANT + float ConstantPadVal; +}; + +// use this enum to query resource sizes using GetRequiredParameterResourceSize() call +enum NV_META_COMMAND_RESOURCE_TYPE +{ + NV_META_COMMAND_RESOURCE_TYPE_INPUT = 0, + NV_META_COMMAND_RESOURCE_TYPE_OUTPUT = 1, + + NV_META_COMMAND_RESOURCE_TYPE_FILTER = 2, + NV_META_COMMAND_RESOURCE_TYPE_WEIGHT = 2, + NV_META_COMMAND_RESOURCE_TYPE_BIAS = 3, + + NV_META_COMMAND_RESOURCE_TYPE_MATRIX_A = 0, + NV_META_COMMAND_RESOURCE_TYPE_MATRIX_B = 2, + NV_META_COMMAND_RESOURCE_TYPE_MATRIX_C = 3, + + NV_META_COMMAND_RESOURCE_TYPE_PERSISTENT = 4, + NV_META_COMMAND_RESOURCE_TYPE_TEMPORARY = 5, +}; + + +// Extended version of convolution operation that performs: +// +// y = act ( alpha1 * conv(x) + alpha2 * z + bias ) +// +// alpha1 and alpha2 are either scalars or if PerChannelScaling is TRUE, they are vectors of +// same dimension as the bias tensor (vector of size equal to number of output channels) +// +// z (SkipConnectionResource) has same dimension as output tensor y (OutputResource). + +static const GUID MetaCommand_ConvolutionEx = +{ 0xa7666f1e, 0x9c55, 0x47ee, { 0x9e, 0xb3, 0xe1, 0x62, 0x0, 0x92, 0xd1, 0xe9 } }; + +#define NV_META_COMMAND_NUM_SPATIAL_DIM 3 +// D, H, W when DimensionCount is 3 +// H, W when DimensionCount is 2 + + +#if __cplusplus >= 201103L +enum NV_META_COMMAND_CONVOLUTION_DIRECTION : NvU64 +{ + NV_META_COMMAND_CONVOLUTION_DIRECTION_FORWARD, // Corresponds to regular Convolution + NV_META_COMMAND_CONVOLUTION_DIRECTION_BACKWARD, // Corresponds to ConvolutionTranspose + + NV_META_COMMAND_CONVOLUTION_DIRECTION_COUNT, +}; + +enum NV_META_COMMAND_CONVOLUTION_MODE : NvU64 +{ + NV_META_COMMAND_CONVOLUTION_MODE_CONVOLUTION, + NV_META_COMMAND_CONVOLUTION_MODE_CROSS_CORRELATION, + + NV_META_COMMAND_CONVOLUTION_MODE_COUNT, +}; + +struct NV_META_COMMAND_CREATE_CONVOLUTION_EX_DESC +{ + // Descriptor of the input tensor + NV_META_COMMAND_TENSOR_DESC DescIn; + + // Descriptor of the tensor acting as the filter kernel + NV_META_COMMAND_TENSOR_DESC DescFilter; + + // Descriptor of the optional bias tensor + NV_META_COMMAND_OPTIONAL_TENSOR_DESC DescBias; + + // Descriptor of the output tensor + NV_META_COMMAND_TENSOR_DESC DescOut; + + // Convolution mode (CROSS_CORRELATION or CONVOLUTION) + NV_META_COMMAND_CONVOLUTION_MODE Mode; + + // Convolution direction (FORWARD or BACKWARD) + NV_META_COMMAND_CONVOLUTION_DIRECTION Direction; + + // Precision at which convolution is done + NV_META_COMMAND_PRECISION Precision; + + // Optional activation function + NV_META_COMMAND_OPTIONAL_ACTIVATION_DESC Activation; + + // Padding mode (only used when output tensor dimensions are different from input tensor dimensions) + NV_META_COMMAND_PADDING_DESC Padding; + + // enables per channel scaling i.e, use Alpha1Resource and Alpha2Resource + // instead of Alpha1 and Alpha2 below + NV_META_COMMAND_BOOL PerChannelScaling; + + // scaling factors used when PerChannelScaling is FALSE + // set Alpha1 = 1.0f, Alpha2 = 0.0f for simple convolutions + // that don't need scaling or skip connection + float Alpha1; + float Alpha2; + + // Strides for the filter kernel position + NvU64 Stride[NV_META_COMMAND_NUM_SPATIAL_DIM]; + + // The distance per dimension between elements that are multiplied + NvU64 Dilation[NV_META_COMMAND_NUM_SPATIAL_DIM]; + + // Padding at the start of each dimension + NvU64 StartPadding[NV_META_COMMAND_NUM_SPATIAL_DIM]; + + // Padding at the end of each dimension + NvU64 EndPadding[NV_META_COMMAND_NUM_SPATIAL_DIM]; + + // Number of dimensions to which convolution occurs (2 or 3) + NvU64 DimensionCount; + + // Number of channel groups convolved independently + NvU64 GroupCount; +}; + +#else + +enum NV_META_COMMAND_CONVOLUTION_DIRECTION +{ + NV_META_COMMAND_CONVOLUTION_DIRECTION_FORWARD, // Corresponds to regular Convolution + NV_META_COMMAND_CONVOLUTION_DIRECTION_BACKWARD, // Corresponds to ConvolutionTranspose + + NV_META_COMMAND_CONVOLUTION_DIRECTION_COUNT, +}; + +enum NV_META_COMMAND_CONVOLUTION_MODE +{ + NV_META_COMMAND_CONVOLUTION_MODE_CONVOLUTION, + NV_META_COMMAND_CONVOLUTION_MODE_CROSS_CORRELATION, + + NV_META_COMMAND_CONVOLUTION_MODE_COUNT, +}; + +struct NV_META_COMMAND_CREATE_CONVOLUTION_EX_DESC +{ + // Descriptor of the input tensor + NV_META_COMMAND_TENSOR_DESC DescIn; + + // Descriptor of the tensor acting as the filter kernel + NV_META_COMMAND_TENSOR_DESC DescFilter; + + // Descriptor of the optional bias tensor + NV_META_COMMAND_OPTIONAL_TENSOR_DESC DescBias; + + // Descriptor of the output tensor + NV_META_COMMAND_TENSOR_DESC DescOut; + + // Convolution mode (CROSS_CORRELATION or CONVOLUTION) + NvU64 Mode; // NV_META_COMMAND_CONVOLUTION_MODE + + // Convolution direction (FORWARD or BACKWARD) + NvU64 Direction; // NV_META_COMMAND_CONVOLUTION_DIRECTION + + // Precision at which convolution is done + NvU64 Precision; // NV_META_COMMAND_PRECISION + + // Optional activation function + NV_META_COMMAND_OPTIONAL_ACTIVATION_DESC Activation; + + // Padding mode (only used when output tensor dimensions are different from input tensor dimensions) + NV_META_COMMAND_PADDING_DESC Padding; + + // enables per channel scaling i.e, use Alpha1Resource and Alpha2Resource + // instead of Alpha1 and Alpha2 below + NV_META_COMMAND_BOOL PerChannelScaling; + + // scaling factors used when PerChannelScaling is FALSE + // set Alpha1 = 1.0f, Alpha2 = 0.0f for simple convolutions + // that don't need scaling or skip connection + float Alpha1; + float Alpha2; + + // Strides for the filter kernel position + NvU64 Stride[NV_META_COMMAND_NUM_SPATIAL_DIM]; + + // The distance per dimension between elements that are multiplied + NvU64 Dilation[NV_META_COMMAND_NUM_SPATIAL_DIM]; + + // Padding at the start of each dimension + NvU64 StartPadding[NV_META_COMMAND_NUM_SPATIAL_DIM]; + + // Padding at the end of each dimension + NvU64 EndPadding[NV_META_COMMAND_NUM_SPATIAL_DIM]; + + // Number of dimensions to which convolution occurs (2 or 3) + NvU64 DimensionCount; + + // Number of channel groups convolved independently + NvU64 GroupCount; +}; +#endif + + +// Fused Convolution variants + +// supported combinations right now are: +// - Convolution + Max Pooling (also optionally outputs pre-pool data) +// - 2x2 upsample + (optional) residual add + Convolution +// +// other combinations may be exposed in future + +static const GUID MetaCommand_ConvolutionExFused = +{ 0xe1b112eb, 0xdecd, 0x4ff6,{ 0x85, 0xbb, 0x1f, 0xe, 0x3a, 0xb0, 0x4, 0x14 } }; + + +enum NV_META_COMMAND_CONVOLUTION_POOL_MODE +{ + NV_META_COMMAND_CONVOLUTION_POOL_MODE_NONE, + NV_META_COMMAND_CONVOLUTION_POOL_MODE_REDUCTION_MAX, + NV_META_COMMAND_CONVOLUTION_POOL_MODE_REDUCTION_AVG, + NV_META_COMMAND_CONVOLUTION_POOL_MODE_REDUCTION_MIN, + + NV_META_COMMAND_CONVOLUTION_POOL_MODE_COUNT, +}; + +enum NV_META_COMMAND_CONVOLUTION_UPSAMPLE_MODE +{ + NV_META_COMMAND_CONVOLUTION_UPSAMPLE_MODE_NONE, + NV_META_COMMAND_CONVOLUTION_UPSAMPLE_MODE_REPLICATE, + NV_META_COMMAND_CONVOLUTION_UPSAMPLE_MODE_BILINEAR, + + NV_META_COMMAND_CONVOLUTION_UPSAMPLE_MODE_COUNT, +}; + +enum NV_META_COMMAND_CONVOLUTION_SKIP_MODE +{ + NV_META_COMMAND_CONVOLUTION_SKIP_MODE_NONE, + NV_META_COMMAND_CONVOLUTION_SKIP_MODE_ADD, + NV_META_COMMAND_CONVOLUTION_SKIP_MODE_CONCAT, + + NV_META_COMMAND_CONVOLUTION_SKIP_MODE_COUNT, +}; + +struct NV_META_COMMAND_CONVOLUTION_FUSE_DESC +{ + NV_META_COMMAND_CONVOLUTION_POOL_MODE PoolMode; + NV_META_COMMAND_CONVOLUTION_UPSAMPLE_MODE UpsampleMode; + NV_META_COMMAND_CONVOLUTION_SKIP_MODE SkipMode; + + NV_META_COMMAND_BOOL OutputPrepool; // used with NV_META_COMMAND_CONVOLUTION_POOL_MODE +}; + +// uses same structures for init and execute descriptors +// SkipConnectionResource is used to specify the resource for pre-pool data or residual add +struct NV_META_COMMAND_CREATE_CONVOLUTION_EX_FUSED_DESC : NV_META_COMMAND_CREATE_CONVOLUTION_EX_DESC +{ + NV_META_COMMAND_CONVOLUTION_FUSE_DESC FuseDesc; +}; + +// make sure structure sizes match what the driver assumes +compile_time_assert(sizeof(NV_META_COMMAND_TENSOR_DESC) == 112); +compile_time_assert(sizeof(NV_META_COMMAND_CREATE_CONVOLUTION_EX_DESC) == 640); +compile_time_assert(sizeof(NV_META_COMMAND_CONVOLUTION_FUSE_DESC) == 20); +compile_time_assert(sizeof(NV_META_COMMAND_CREATE_CONVOLUTION_EX_FUSED_DESC) == 660); + + +// GEMM (General matrix multiply) +// +// Y = alpha * t(A) * t(B) + beta * C, +// +// where t is a matrix transform option +// +// If C is null, and beta is non-zero, the output +// matrix is used as C matrix. i.e, the operation performed is: +// Y = alpha * t(A) * t(B) + beta * Y +// +static const GUID MetaCommand_Gemm = + { 0x8f9ff059, 0xfe72, 0x488e, { 0xa0, 0x66, 0xb1, 0x4e, 0x79, 0x48, 0xec, 0x8 } }; + +#if __cplusplus >= 201103L + +enum NV_META_COMMAND_MATRIX_TRANSFORM : NvU64 +{ + NV_META_COMMAND_MATRIX_TRANSFORM_NONE, + NV_META_COMMAND_MATRIX_TRANSFORM_TRANSPOSE, + + NV_META_COMMAND_MATRIX_TRANSFORM_COUNT, +}; + +struct NV_META_COMMAND_CREATE_GEMM_DESC +{ + NV_META_COMMAND_TENSOR_DESC DescA; + NV_META_COMMAND_TENSOR_DESC DescB; + NV_META_COMMAND_OPTIONAL_TENSOR_DESC DescC; + NV_META_COMMAND_TENSOR_DESC DescOut; + NV_META_COMMAND_PRECISION Precision; + + NV_META_COMMAND_MATRIX_TRANSFORM TransA; + NV_META_COMMAND_MATRIX_TRANSFORM TransB; + float Alpha; + float Beta; + + NV_META_COMMAND_OPTIONAL_ACTIVATION_DESC Activation; +}; + +#else + +enum NV_META_COMMAND_MATRIX_TRANSFORM +{ + NV_META_COMMAND_MATRIX_TRANSFORM_NONE, + NV_META_COMMAND_MATRIX_TRANSFORM_TRANSPOSE, + + NV_META_COMMAND_MATRIX_TRANSFORM_COUNT, +}; + +struct NV_META_COMMAND_CREATE_GEMM_DESC +{ + NV_META_COMMAND_TENSOR_DESC DescA; + NV_META_COMMAND_TENSOR_DESC DescB; + NV_META_COMMAND_OPTIONAL_TENSOR_DESC DescC; + NV_META_COMMAND_TENSOR_DESC DescOut; + NvU64 Precision; // NV_META_COMMAND_PRECISION + + NvU64 TransA; // NV_META_COMMAND_MATRIX_TRANSFORM + NvU64 TransB; // NV_META_COMMAND_MATRIX_TRANSFORM + float Alpha; + float Beta; + + NV_META_COMMAND_OPTIONAL_ACTIVATION_DESC Activation; +}; + +#endif + + +#pragma pack(pop) + +#endif // #if defined (__cplusplus) && (defined(__d3d11_h__) || defined(__d3d12_h__)) + + +#if defined (__cplusplus) && defined(__d3d11_h__) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_EnumerateMetaCommands +// +//! \since Release: 400 +// +//! \code +//! DESCRIPTION: Enumerates MetaCommands supported on the system +//! +//! \param [in] pDevice A pointer to D3D11 device. +//! \param [in/out] pNumMetaCommands Should be non-null. When the value pointed by pNumMetaCommands is 0 (or when pDescs is NULL), the function returns number of metacommands supported. +//! When the value pointed is non-zero, the value indicates number of Metacommand descriptions to be populated in pDescs array. +//! \param [out] pDescs Pointer to array where Metacommand descriptions will be returned. Can be null to indicate that the app is querying the number of supported metacommands. +//! Otherwise should have enough space to hold *pNumMetaCommands descriptors +//! SUPPORTED OS: Windows 10 +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. If there are return error codes with specific +//! meaning for this API, they are listed below. +//! \endcode +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// + +NVAPI_INTERFACE NvAPI_D3D11_EnumerateMetaCommands(__in ID3D11Device *pDevice, + __inout NvU32 *pNumMetaCommands, + __out_ecount_opt(*pNumMetaCommands) NVAPI_META_COMMAND_DESC *pDescs); + +#endif //defined(__cplusplus) && defined(__d3d11_h__) + + + +#if defined (__cplusplus) && defined(__d3d11_h__) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_CreateMetaCommand +// +//! \since Release: 400 +// +//! \code +//! DESCRIPTION: Creates a MetaCommand object which can be used to execute optimized operations exposed by driver like convolutions. +//! +//! \param [in] pDevice A pointer to D3D11 device. +//! \param [in] CommandId GUID of the operations to perform +//! \param [in] pCreationParametersData structure containing all creation parameters for the requested Metacommand +//! \param [in] CreationParametersDataSize size of parameter data structure +//! \param [out] ppMetaCommand A pointer to memory that receives the pointer to the created MetaCommand object. +//! SUPPORTED OS: Windows 10 +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. If there are return error codes with specific +//! meaning for this API, they are listed below. +//! +//! NVAPI_NOT_SUPPORTED - The requested Metacommand is not supported. +//! \endcode +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// + +#pragma pack(push, 4) +struct NV_D3D11_META_COMMAND_RESOURCE +{ + union + { + NVDX_ObjectHandle ResourceHandle; // NVAPI handle of a buffer resource (use NvAPI_D3D11_GetResourceHandle to get this handle) + NvU64 unused; // to get correct sturcutre size on 32 bit builds + }; + NvU64 Offset; // offset within the resource in bytes +}; + +struct NV_D3D11_META_COMMAND_INITIALIZE_CONVOLUTION_EX_DESC +{ + // Persistent resource used as scratch space by driver + // it's written at time of init, and read at time of execute + // use GetRequiredParameterResourceSize to query its size + NV_D3D11_META_COMMAND_RESOURCE PersistentResource; +}; + +struct NV_D3D11_META_COMMAND_EXECUTE_CONVOLUTION_EX_DESC +{ + NV_D3D11_META_COMMAND_RESOURCE InputResource; + NV_D3D11_META_COMMAND_RESOURCE FilterResource; + NV_D3D11_META_COMMAND_RESOURCE BiasResource; // optional + NV_D3D11_META_COMMAND_RESOURCE OutputResource; + + // Alpha1Resource and Alpha2Resource are used only when + // PerChannelScaling is set. Otherwise the scalars Alpha1/Alpha2 are used + // should have same dimension as bias + NV_D3D11_META_COMMAND_RESOURCE Alpha1Resource; + NV_D3D11_META_COMMAND_RESOURCE Alpha2Resource; + + // optional, same dimension/descriptor as output + NV_D3D11_META_COMMAND_RESOURCE SkipConnectionResource; + + + // should point to same memory that was specified at time of init + NV_D3D11_META_COMMAND_RESOURCE PersistentResource; + + // temporary resource used as scratch space by driver + // used for both read and write at the time of execute + // use GetRequiredParameterResourceSize to query its size + NV_D3D11_META_COMMAND_RESOURCE TemporaryResource; +}; + +// make sure structure sizes match what the driver assumes +compile_time_assert(sizeof(NV_D3D11_META_COMMAND_INITIALIZE_CONVOLUTION_EX_DESC) == 16); +compile_time_assert(sizeof(NV_D3D11_META_COMMAND_EXECUTE_CONVOLUTION_EX_DESC) == 144); + +struct NV_D3D11_META_COMMAND_INITIALIZE_GEMM_DESC +{ + NV_D3D11_META_COMMAND_RESOURCE PersistentResource; +}; + +struct NV_D3D11_META_COMMAND_EXECUTE_GEMM_DESC +{ + NV_D3D11_META_COMMAND_RESOURCE AResource; + NV_D3D11_META_COMMAND_RESOURCE BResource; + NV_D3D11_META_COMMAND_RESOURCE CResource; + NV_D3D11_META_COMMAND_RESOURCE OutputResource; + + NV_D3D11_META_COMMAND_RESOURCE PersistentResource; + NV_D3D11_META_COMMAND_RESOURCE TemporaryResource; +}; + +// make sure structure sizes match what the driver assumes +compile_time_assert(sizeof(NV_D3D11_META_COMMAND_INITIALIZE_GEMM_DESC) == 16); +compile_time_assert(sizeof(NV_D3D11_META_COMMAND_EXECUTE_GEMM_DESC) == 96); + + +#pragma pack(pop) + +DECLARE_INTERFACE_(__declspec(uuid("00BF193A-117B-42BC-BBCD-E964A0EA4F2B"))ID3D11NvMetaCommand_V1, IUnknown) +{ + BEGIN_INTERFACE + + // *** IUnknown methods *** + STDMETHOD(QueryInterface)(THIS_ REFIID riid, void **ppv) PURE; + STDMETHOD_(ULONG,AddRef)(THIS) PURE; + STDMETHOD_(ULONG,Release)(THIS) PURE; + + // ** ID3D11NvMetaCommand methods *** + // Return size of parameter + STDMETHOD(GetRequiredParameterResourceSize)(THIS_ NV_META_COMMAND_RESOURCE_TYPE ResourceType, NvU64 *SizeInBytes) const PURE; + + END_INTERFACE +}; + +typedef ID3D11NvMetaCommand_V1 ID3D11NvMetaCommand; +#define ID3D11NvMetaCommand_VER1 MAKE_NVAPI_VERSION(IID3D11NvMetaCommand_V1, 1) +#define ID3D11NvMetaCommand_VER ID3D11NvMetaCommand_VER1 + +NVAPI_INTERFACE NvAPI_D3D11_CreateMetaCommand(__in ID3D11Device *pDevice, + __in REFGUID CommandId, + __in_bcount(CreationParametersDataSize) const void *pCreationParametersData, + __in NvU32 CreationParametersDataSize, + __out ID3D11NvMetaCommand **ppMetaCommand); + +#endif //defined(__cplusplus) && defined(__d3d11_h__) + + +#if defined (__cplusplus) && defined(__d3d11_h__) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_InitializeMetaCommand +// +//! \since Release: 400 +// +//! \code +//! DESCRIPTION: Initializes the given MetaCommand with the parameters passed in +//! +//! \param [in] pDeviceContext A pointer to the d3d11 device context +//! \param [in] pMetaCommand the MetaCommand to initialize +//! \param [in] pInitializationParametersData Structure containing parameters +//! \param [in] InitializationParametersDataSize Size of the parameter structure in bytes +//! SUPPORTED OS: Windows 10 +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. If there are return error codes with specific +//! meaning for this API, they are listed below. +//! \endcode +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// + +NVAPI_INTERFACE NvAPI_D3D11_InitializeMetaCommand(__in ID3D11DeviceContext *pDeviceContext, + __in ID3D11NvMetaCommand *pMetaCommand, + __in_bcount(InitializationParametersDataSize) const void *pInitializationParametersData, + __in NvU32 InitializationParametersDataSize); + +#endif //defined(__cplusplus) && defined(__d3d11_h__) + + +#if defined (__cplusplus) && defined(__d3d11_h__) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_ExecuteMetaCommand +// +//! \since Release: 400 +// +//! \code +//! DESCRIPTION: Executes the given MetaCommand with the parameters passed in +//! +//! \param [in] pDeviceContext A pointer to the d3d11 device context +//! \param [in] pMetaCommand the MetaCommand to execute +//! \param [in] pExecutionParametersData Structure containing parameters +//! \param [in] ExecutionParametersDataSize Size of the parameter structure in bytes +//! SUPPORTED OS: Windows 10 +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. If there are return error codes with specific +//! meaning for this API, they are listed below. +//! \endcode +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// + +NVAPI_INTERFACE NvAPI_D3D11_ExecuteMetaCommand(__in ID3D11DeviceContext *pDeviceContext, + __in ID3D11NvMetaCommand *pMetaCommand, + __in_bcount(ExecutionParametersDataSize) const void *pExecutionParametersData, + __in NvU32 ExecutionParametersDataSize); + +#endif //defined(__cplusplus) && defined(__d3d11_h__) + + + + +#if defined (__cplusplus) && defined(__d3d12_h__) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_EnumerateMetaCommands +// +//! \since Release: 400 +// +//! \code +//! DESCRIPTION: Enumerates MetaCommands supported on the system +//! +//! \param [in] pDevice A pointer to D3D12 device. +//! \param [in/out] pNumMetaCommands Should be non-null. When the value pointed by pNumMetaCommands is 0 or when pDescs is NULL, the function returns number of metacommands supported. +//! When the value pointed is non-zero, the value indicates number of Metacommand descriptions to be populated in pDescs array. +//! \param [out] pDescs Pointer to array where Metacommand descriptions will be returned. Can be null to indicate that the app is querying the number of supported metacommands. +//! Otherwise should have enough space to hold *pNumMetaCommands descriptors +//! SUPPORTED OS: Windows 10 +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. If there are return error codes with specific +//! meaning for this API, they are listed below. +//! \endcode +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// + +NVAPI_INTERFACE NvAPI_D3D12_EnumerateMetaCommands(__in ID3D12Device *pDevice, + __inout NvU32 *pNumMetaCommands, + __out_ecount_opt(*pNumMetaCommands) NVAPI_META_COMMAND_DESC *pDescs); + +#endif //defined(__cplusplus) && defined(__d3d12_h__) + + + +#if defined (__cplusplus) && defined(__d3d12_h__) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_CreateMetaCommand +// +//! \since Release: 400 +// +//! \code +//! DESCRIPTION: Creates a MetaCommand object which can be used to execute optimized operations exposed by driver like convolutions. +//! +//! \param [in] pDevice A pointer to D3D12 device. +//! \param [in] CommandId GUID of the operations to perform +//! \param [in] NodeMask GPU mask for which metacommand is to be created. Set it to 0 for single GPU systems +//! \param [in] pCreationParametersData structure containing all creation parameters for the requested Metacommand +//! \param [in] CreationParametersDataSize size of parameter data structure +//! \param [out] ppMetaCommand A pointer to memory that receives the pointer to the created MetaCommand object. +//! SUPPORTED OS: Windows 10 +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. If there are return error codes with specific +//! meaning for this API, they are listed below. +//! +//! NVAPI_NOT_SUPPORTED - The requested Metacommand is not supported. +//! \endcode +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// + +#pragma pack(push, 4) +struct NV_D3D12_META_COMMAND_INITIALIZE_CONVOLUTION_EX_DESC +{ + // Persistent resource used as scratch space by driver + // it's written at time of init, and read at time of execute + // use GetRequiredParameterResourceSize to query its size + D3D12_GPU_VIRTUAL_ADDRESS PersistentResource; +}; + +struct NV_D3D12_META_COMMAND_EXECUTE_CONVOLUTION_EX_DESC +{ + D3D12_GPU_VIRTUAL_ADDRESS InputResource; + D3D12_GPU_VIRTUAL_ADDRESS FilterResource; + D3D12_GPU_VIRTUAL_ADDRESS BiasResource; // optional + D3D12_GPU_VIRTUAL_ADDRESS OutputResource; + + // Alpha1Resource and Alpha2Resource are used only when + // PerChannelScaling is set. Otherwise the scalars Alpha1/Alpha2 are used + // should have same dimension as bias + D3D12_GPU_VIRTUAL_ADDRESS Alpha1Resource; + D3D12_GPU_VIRTUAL_ADDRESS Alpha2Resource; + + // optional, same dimension/descriptor as output + D3D12_GPU_VIRTUAL_ADDRESS SkipConnectionResource; + + // should point to same memory that was specified at time of init + D3D12_GPU_VIRTUAL_ADDRESS PersistentResource; + + // temporary resource used as scratch space by driver + // both written and read at time of execute + // use GetRequiredParameterResourceSize to query its size + D3D12_GPU_VIRTUAL_ADDRESS TemporaryResource; +}; + +// make sure structure sizes match what the driver assumes +compile_time_assert(sizeof(NV_D3D12_META_COMMAND_INITIALIZE_CONVOLUTION_EX_DESC) == 8); +compile_time_assert(sizeof(NV_D3D12_META_COMMAND_EXECUTE_CONVOLUTION_EX_DESC) == 72); + +struct NV_D3D12_META_COMMAND_INITIALIZE_GEMM_DESC +{ + NvU64 PersistentResource; +}; + +struct NV_D3D12_META_COMMAND_EXECUTE_GEMM_DESC +{ + NvU64 AResource; + NvU64 BResource; + NvU64 CResource; + NvU64 OutputResource; + + NvU64 PersistentResource; + NvU64 TemporaryResource; +}; + +// make sure structure sizes match what the driver assumes +compile_time_assert(sizeof(NV_D3D12_META_COMMAND_INITIALIZE_GEMM_DESC) == 8); +compile_time_assert(sizeof(NV_D3D12_META_COMMAND_EXECUTE_GEMM_DESC) == 48); + + +#pragma pack(pop) + +DECLARE_INTERFACE_(__declspec(uuid("00BF193A-117B-42BC-BBCD-E964A0EA4F2B"))ID3D12NvMetaCommand_V1, IUnknown) +{ + BEGIN_INTERFACE + + // *** IUnknown methods *** + STDMETHOD(QueryInterface)(THIS_ REFIID riid, void **ppv) PURE; + STDMETHOD_(ULONG,AddRef)(THIS) PURE; + STDMETHOD_(ULONG,Release)(THIS) PURE; + + // ** ID3D12NvMetaCommand methods *** + // Return size of parameter + STDMETHOD(GetRequiredParameterResourceSize)(THIS_ NV_META_COMMAND_RESOURCE_TYPE ResourceType, NvU64 *SizeInBytes) const PURE; + + END_INTERFACE +}; + +typedef ID3D12NvMetaCommand_V1 ID3D12NvMetaCommand; +#define ID3D12NvMetaCommand_VER1 MAKE_NVAPI_VERSION(IID3D12NvMetaCommand_V1, 1) +#define ID3D12NvMetaCommand_VER ID3D12NvMetaCommand_VER1 + +NVAPI_INTERFACE NvAPI_D3D12_CreateMetaCommand(__in ID3D12Device *pDevice, + __in REFGUID CommandId, + __in NvU32 NodeMask, + __in_bcount(CreationParametersDataSize) const void *pCreationParametersData, + __in NvU32 CreationParametersDataSize, + __out ID3D12NvMetaCommand **ppMetaCommand); + +#endif //defined(__cplusplus) && defined(__d3d12_h__) + + +#if defined (__cplusplus) && defined(__d3d12_h__) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_InitializeMetaCommand +// +//! \since Release: 400 +// +//! \code +//! DESCRIPTION: Initializes the given MetaCommand with the parameters passed in +//! +//! \param [in] pCommandList A pointer to D3D12 command list. +//! \param [in] pMetaCommand the MetaCommand to initialize +//! \param [in] pInitializationParametersData Structure containing parameters +//! \param [in] InitializationParametersDataSize Size of the parameter structure in bytes +//! SUPPORTED OS: Windows 10 +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. If there are return error codes with specific +//! meaning for this API, they are listed below. +//! \endcode +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// + +NVAPI_INTERFACE NvAPI_D3D12_InitializeMetaCommand(__in ID3D12GraphicsCommandList *pCommandlist, + __in ID3D12NvMetaCommand *pMetaCommand, + __in_bcount(InitializationParametersDataSize) const void *pInitializationParametersData, + __in NvU32 InitializationParametersDataSize); + +#endif //defined(__cplusplus) && defined(__d3d12_h__) + + +#if defined (__cplusplus) && defined(__d3d12_h__) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_ExecuteMetaCommand +// +//! \since Release: 400 +// +//! \code +//! DESCRIPTION: Executes the given MetaCommand with the parameters passed in +//! +//! \param [in] pCommandList A pointer to D3D12 command list. +//! \param [in] pMetaCommand the MetaCommand to execute +//! \param [in] pExecutionParametersData Structure containing parameters +//! \param [in] ExecutionParametersDataSize Size of the parameter structure in bytes +//! SUPPORTED OS: Windows 10 +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. If there are return error codes with specific +//! meaning for this API, they are listed below. +//! \endcode +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// + +NVAPI_INTERFACE NvAPI_D3D12_ExecuteMetaCommand(__in ID3D12GraphicsCommandList *pCommandlist, + __in ID3D12NvMetaCommand *pMetaCommand, + __in_bcount(ExecutionParametersDataSize) const void *pExecutionParametersData, + __in NvU32 ExecutionParametersDataSize); + +#endif //defined(__cplusplus) && defined(__d3d12_h__) + + +#if defined (__cplusplus) && defined(__d3d12_h__) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_CreateCommittedResource +// +//! \since Release: 384 +// +//! \code +//! DESCRIPTION: Wrapper around ID3D12Device::CreateCommittedResource to allow creation of resources according to params provided. +//! HTEX resource is created when NV_D3D12_RESOURCE_FLAG_HTEX is set in the nvResourceFlags parameter. +//! NV_D3D12_RESOURCE_FLAG_CPUVISIBLE_VIDMEM gives driver hint to create the resource on cpu visible vidmem +//! only upload resources use this flag currently, others behave exactly as ID3D12Device::CreateCommittedResource +//! Otherwise the function behaves exactly same as regular ID3D12Device::CreateCommittedResource. +//! When NV_D3D12_RESOURCE_FLAG_HTEX is set, the texels are centered on integer coordinates and filtering +//! and LOD are calculated based on the size minus one, which then allows the edges to filter to the exact texels on the edge, +//! eliminating the border/edge filtering issue. Dimension of next mip level is CEIL(currentMipDimension/2), and size of smallest mip is 2x2. +//! Note that NV_D3D12_RESOURCE_FLAG_HTEX can't be used for shared resources. +//! Best practice: Query available space in cpu visible vidmem using NvAPI_D3D12_QueryCpuVisibleVidmem +//! before using NV_D3D12_RESOURCE_FLAG_CPUVISIBLE_VIDMEM +//! +//! \param [in] pDevice A pointer to D3D12 device. +//! \param [in] pHeapProperties A pointer to a D3D12_HEAP_PROPERTIES structure that provides properties for the resource's heap. +//! \param [in] HeapFlags Heap options, as a bitwise-OR'd combination of D3D12_HEAP_FLAGS enumeration constants. +//! \param [in] pDesc A pointer to a D3D12_RESOURCE_DESC structure that describes the resource. +//! \param [in] InitialState The initial state of the resource, as a bitwise-OR'd combination of D3D12_RESOURCE_STATES enumeration constants. +//! \param [in] pOptimizedClearValue Specifies a D3D12_CLEAR_VALUE that describes the default value for a clear color. +//! \param [in] pNVResourceParams A pointer to a structure containing additional NV specific resource creation information (see NV_D3D12_RESOURCE_FLAGS below for more info on flags) +//! \param [in] riid The globally unique identifier (GUID) for the resource interface. +//! \param [out] ppvResource A pointer to memory that receives the requested interface pointer to the created resource object. +//! ppvResource can be NULL, to enable capability testing. When ppvResource is NULL, no object will be created and pSupported +//! will be set to true when pResourceDesc is valid. +//! \param [out] pSupported optional, needed only for capability testing when ppvResource is NULL +//! SUPPORTED OS: Windows 10 +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. If there are return error codes with specific +//! meaning for this API, they are listed below. +//! \endcode +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// + +typedef enum { + NV_D3D12_RESOURCE_FLAG_NONE = 0, + NV_D3D12_RESOURCE_FLAG_HTEX = 1, //!< Create HTEX texture + NV_D3D12_RESOURCE_FLAG_CPUVISIBLE_VIDMEM= 2, //!< Hint to create resource in cpuvisible vidmem +} NV_D3D12_RESOURCE_FLAGS; + +typedef struct _NV_RESOURCE_PARAMS_V1 +{ + NvU32 version; //!SetFence(dstGpu, hFence, Value); \ + pMultiGPUDevice->WaitForFence(1 << (srcGpu), hFence, Value); \ + Value++; + +#define FENCE_SYNCHRONIZATION_END(pMultiGPUDevice, hFence, Value, srcGpu, dstGpu) \ + pMultiGPUDevice->SetFence(srcGpu, hFence, Value); \ + pMultiGPUDevice->WaitForFence(1 << (dstGpu), hFence, Value); \ + Value++; + +//! PresentCompositingConfig method flags. +#define NVAPI_PRESENT_COMPOSITING_CONFIG_FLAG_USE_VIDEO_BRIDGE 0x01 +#define NVAPI_PRESENT_COMPOSITING_CONFIG_FLAG_CLEAR_OUTBANDS 0x02 +#define NVAPI_PRESENT_COMPOSITING_CONFIG_FLAG_GET_VIDEO_BRIDGE_STATUS 0x80000000 + +#define NVAPI_VIDEO_BRIDGE_STATUS_AVAILABLE 0 +#define NVAPI_VIDEO_BRIDGE_STATUS_NOT_AVAILABLE 1 +#define NVAPI_VIDEO_BRIDGE_STATUS_FAILED_ACCESS 2 +#define NVAPI_VIDEO_BRIDGE_STATUS_UNKNOWN 3 + +#define NVAPI_ALL_GPUS 0 +typedef ID3D11MultiGPUDevice_V1 ID3D11MultiGPUDevice; + +#define ID3D11MultiGPUDevice_VER1 MAKE_NVAPI_VERSION(ID3D11MultiGPUDevice_V1, 1) +#define ID3D11MultiGPUDevice_VER2 MAKE_NVAPI_VERSION(ID3D11MultiGPUDevice_V1, 2) +#define ID3D11MultiGPUDevice_VER3 MAKE_NVAPI_VERSION(ID3D11MultiGPUDevice_V1, 3) +#define ID3D11MultiGPUDevice_VER ID3D11MultiGPUDevice_VER3 + +#define ALL_GPUS 0 + +//! \ingroup dx +NVAPI_INTERFACE NvAPI_D3D11_CreateMultiGPUDevice(__in ID3D11Device *pDevice, __in ULONG version, __out ULONG *currentVersion, __out ID3D11MultiGPUDevice **ppD3D11MultiGPUDevice, __in UINT maxGpus=ALL_GPUS); + +#endif //defined(__cplusplus) && defined(__d3d11_h__) + +//! SUPPORTED OS: Windows 10 and higher +//! +//! Used to query the support of Single Pass Stereo HW feature +//! \ingroup dx +typedef struct _NV_QUERY_SINGLE_PASS_STEREO_SUPPORT_PARAMS_V1 +{ + NvU32 version; // parameter struct version + NvU32 bSinglePassStereoSupported; // Single Pass Stereo supported +} NV_QUERY_SINGLE_PASS_STEREO_SUPPORT_PARAMS_V1; + +typedef struct _NV_QUERY_SINGLE_PASS_STEREO_SUPPORT_PARAMS_V2 +{ + NvU32 version; // _IN_ parameter struct version + NvU32 bSinglePassStereoSupported : 1; // _OUT_ Single Pass Stereo supported + NvU32 bSinglePassStereoXYZWSupported : 1; // _OUT_ Single Pass Stereo XYZW supported + NvU32 reserved : 30; // _INOUT_ bits reserved for future use +} NV_QUERY_SINGLE_PASS_STEREO_SUPPORT_PARAMS_V2; + +typedef NV_QUERY_SINGLE_PASS_STEREO_SUPPORT_PARAMS_V2 NV_QUERY_SINGLE_PASS_STEREO_SUPPORT_PARAMS; +#define NV_QUERY_SINGLE_PASS_STEREO_SUPPORT_PARAMS_VER1 MAKE_NVAPI_VERSION(NV_QUERY_SINGLE_PASS_STEREO_SUPPORT_PARAMS_V1, 1) +#define NV_QUERY_SINGLE_PASS_STEREO_SUPPORT_PARAMS_VER2 MAKE_NVAPI_VERSION(NV_QUERY_SINGLE_PASS_STEREO_SUPPORT_PARAMS_V2, 2) +#define NV_QUERY_SINGLE_PASS_STEREO_SUPPORT_PARAMS_VER NV_QUERY_SINGLE_PASS_STEREO_SUPPORT_PARAMS_VER2 + +#ifndef NV_QUERY_SINGLE_PASS_STEREO_SUPPORT_PARAMS_VER +typedef NV_QUERY_SINGLE_PASS_STEREO_SUPPORT_PARAMS_V1 NV_QUERY_SINGLE_PASS_STEREO_SUPPORT_PARAMS; +#define NV_QUERY_SINGLE_PASS_STEREO_SUPPORT_PARAMS_VER1 MAKE_NVAPI_VERSION(NV_QUERY_SINGLE_PASS_STEREO_SUPPORT_PARAMS_V1, 1) +#define NV_QUERY_SINGLE_PASS_STEREO_SUPPORT_PARAMS_VER NV_QUERY_SINGLE_PASS_STEREO_SUPPORT_PARAMS_VER1 +#endif + +#if defined(__cplusplus) && (defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__)) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D_QuerySinglePassStereoSupport +// +//! DESCRIPTION: Queries the support of Single Pass Stereo feature on current setup and returns appropriate boolean value. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] pDevice The ID3D11Device to use. +//! \param [inout] pSinglePassStereoSupportedParams Stores value of whether Single Pass Stereo is supported on current setup or not. +//! +//! \retval NVAPI_OK Call succeeded. +//! \retval NVAPI_ERROR Call failed. +//! \retval NVAPI_INVALID_ARGUMENT One or more arguments are invalid. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D_QuerySinglePassStereoSupport(__in IUnknown *pDevice, + __inout NV_QUERY_SINGLE_PASS_STEREO_SUPPORT_PARAMS *pQuerySinglePassStereoSupportedParams); + +#endif //defined(__cplusplus) && (defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__)) + +#if defined(__cplusplus) && defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D_SetSinglePassStereoMode +// +//! DESCRIPTION: Set the Single Pass Stereo state +//! +//! \note Note that this is an asynchronous function and returns NVAPI_OK if all arguments are valid. +//! Returned value NVAPI_OK does not reflect that Single Pass Stereo is supported or is set in hardware. +//! One must call NvAPI_D3D_QuerySinglePassStereoSupport() to confirm that the current setup +//! supports Single Pass Stereo before calling this set-function. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] pDevOrContext The ID3D11Device or ID3D11DeviceContext to use. +//! \param [in] numViews Number of views to render. +//! \param [in] renderTargetIndexOffset Offset between render targets of the different views. +//! \param [in] independentViewportMaskEnable Is the independent viewport mask enabled. +//! +//! \retval NVAPI_OK Call succeeded. +//! \retval NVAPI_ERROR Call failed. +//! \retval NVAPI_INVALID_ARGUMENT One or more arguments are invalid. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D_SetSinglePassStereoMode(__in IUnknown *pDevOrContext, __in NvU32 numViews, __in NvU32 renderTargetIndexOffset, __in NvU8 independentViewportMaskEnable); + +#endif //defined(__cplusplus) && defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) + +#if defined(__cplusplus) && ( defined(__d3d12_h__)) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_QuerySinglePassStereoSupport +// +//! DESCRIPTION: Queries the support of Single Pass Stereo feature on current setup and returns appropriate boolean value. +//! +//! SUPPORTED OS: Windows 10 +//! +//! +//! \param [in] pDevice The IDirect3DDevice12 to use. +//! \param [inout] pQuerySinglePassStereoSupportedParams Stores value of whether Single Pass Stereo is supported on current setup or not. +//! +//! \retval NVAPI_OK Call succeeded. +//! \retval NVAPI_ERROR Call failed. +//! \retval NVAPI_INVALID_ARGUMENT One or more arguments are invalid. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// + +NVAPI_INTERFACE NvAPI_D3D12_QuerySinglePassStereoSupport(__in ID3D12Device *pDevice, + __inout NV_QUERY_SINGLE_PASS_STEREO_SUPPORT_PARAMS *pQuerySinglePassStereoSupportedParams); + +#endif // defined(__cplusplus) && ( defined(__d3d12_h__)) + +#if defined(__cplusplus) && ( defined(__d3d12_h__)) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_SetSinglePassStereoMode +// +//! DESCRIPTION: Set the Single Pass Stereo state. +//! +//! \note Note that Single Pass Stereo state persists on a particular CommandList till it is closed. +//! The state is reset to default (disabled) for every newly created CommandList. +//! One must call NvAPI_D3D12_QuerySinglePassStereoSupport() to confirm that the current setup +//! supports Single Pass Stereo before calling this set-function. +//! +//! SUPPORTED OS: Windows 10 +//! +//! +//! \param [in] pCommandList The command list in which we will add push buffer commmands for enabling Single Pass Stereo feature +//! Note: Command list of type D3D12_COMMAND_LIST_TYPE_BUNDLE is not allowed for setting the state of this feature. +//! \param [in] numViews Number of views to render. +//! \param [in] RenderTargetIndexOffset Offset between render targets of the different views. +//! \param [in] IndependentViewportMaskEnable Is the independent viewport mask enabled. +//! +//! \retval NVAPI_OK Call succeeded. +//! \retval NVAPI_ERROR Call failed. +//! \retval NVAPI_INVALID_ARGUMENT One or more arguments are invalid. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// + +NVAPI_INTERFACE NvAPI_D3D12_SetSinglePassStereoMode(__in ID3D12GraphicsCommandList* pCommandList, + __in NvU32 numViews, + __in NvU32 renderTargetIndexOffset, + __in NvU8 independentViewportMaskEnable); + +#endif // defined(__cplusplus) && ( defined(__d3d12_h__)) + +//! SUPPORTED OS: Windows 10 and higher +//! +//! Used to query the support of MultiView HW feature +//! \ingroup dx + +typedef struct _NV_QUERY_MULTIVIEW_SUPPORT_PARAMS_V1 +{ + NvU32 version; // _IN_ parameter struct version + NvU32 bMultiViewSupported : 1; // _OUT_ MultiView supported (Render 4 views in a single pass) + NvU32 bSinglePassStereoSupported : 1; // _OUT_ StereoX supported (Render 2 views in a single pass) + NvU32 bSinglePassStereoXYZWSupported : 1; // _OUT_ StereoXYZW supported (Render 2 views in a single pass) + NvU32 reserved : 29; // _INOUT_ bits reserved for future use +} NV_QUERY_MULTIVIEW_SUPPORT_PARAMS_V1; + +typedef NV_QUERY_MULTIVIEW_SUPPORT_PARAMS_V1 NV_QUERY_MULTIVIEW_SUPPORT_PARAMS; +#define NV_QUERY_MULTIVIEW_SUPPORT_PARAMS_VER1 MAKE_NVAPI_VERSION(NV_QUERY_MULTIVIEW_SUPPORT_PARAMS_V1, 1) +#define NV_QUERY_MULTIVIEW_SUPPORT_PARAMS_VER NV_QUERY_MULTIVIEW_SUPPORT_PARAMS_VER1 +#define NV_MULTIVIEW_MAX_SUPPORTED_VIEWS 4 + +#if defined(__cplusplus) && (defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__)) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D_QueryMultiViewSupport +// +//! DESCRIPTION: Queries the support of MultiView feature on current setup and returns appropriate boolean value. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 410 +//! +//! \param [in] pDevice The ID3D11Device to use. +//! \param [inout] pMultiViewSupportedParams Stores value of whether MultiView is supported on current setup or not. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! (none) +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D_QueryMultiViewSupport(__in IUnknown *pDevice, + __inout NV_QUERY_MULTIVIEW_SUPPORT_PARAMS *pQueryMultiViewSupportedParams); + +#endif //defined(__cplusplus) && (defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__)) + +//! SUPPORTED OS: Windows 10 and higher +//! +//! Used for setting the Mode for MultiView HW Feature. +//! \ingroup dx +typedef struct _NV_MULTIVIEW_PARAMS_V1 +{ + NvU32 version; // _IN_ parameter struct version + NvU32 numViews; // _IN_ Number of views to render. + NvU32 renderTargetIndexOffset[NV_MULTIVIEW_MAX_SUPPORTED_VIEWS]; // _IN_ Offset between render targets for each of the per views. + NvU8 independentViewportMaskEnable; // _IN_ Is the independent viewport mask enabled. +} NV_MULTIVIEW_PARAMS_V1; + +typedef NV_MULTIVIEW_PARAMS_V1 NV_MULTIVIEW_PARAMS; +#define NV_MULTIVIEW_PARAMS_VER1 MAKE_NVAPI_VERSION(NV_MULTIVIEW_PARAMS_V1, 1) +#define NV_MULTIVIEW_PARAMS_VER NV_MULTIVIEW_PARAMS_VER1 + +#if defined(__cplusplus) && defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D_SetMultiViewMode +// +//! DESCRIPTION: Set the MultiView state +//! +//! \note Note that this is an asynchronous function and returns NVAPI_OK if all arguments are valid. +//! Returned value NVAPI_OK does not reflect that MultiView is supported or is set in hardware. +//! One must call NvAPI_D3D_QueryMultiViewSupport() to confirm that the current setup +//! supports MultiView before calling this set-function. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 410 +//! +//! \param [in] pDevOrContext The ID3D11Device or ID3D11DeviceContext to use. +//! \param [in] pMultiViewParams MultiView Params +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! (none) +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D_SetMultiViewMode(__in IUnknown *pDevOrContext, __in NV_MULTIVIEW_PARAMS *pMultiViewParams); + +#endif //defined(__cplusplus) && defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) + +//! SUPPORTED OS: Windows 10 and higher +//! +//! Used to query the support of Lens Matched Shading HW feature +//! \ingroup dx +typedef struct _NV_QUERY_MODIFIED_W_SUPPORT_PARAMS +{ + NvU32 version; // parameter struct version + NvU32 bModifiedWSupported; // Modified W supported +} NV_QUERY_MODIFIED_W_SUPPORT_PARAMS_V1; + +typedef NV_QUERY_MODIFIED_W_SUPPORT_PARAMS_V1 NV_QUERY_MODIFIED_W_SUPPORT_PARAMS; +#define NV_QUERY_MODIFIED_W_SUPPORT_PARAMS_VER1 MAKE_NVAPI_VERSION(NV_QUERY_MODIFIED_W_SUPPORT_PARAMS_V1, 1) +#define NV_QUERY_MODIFIED_W_SUPPORT_PARAMS_VER NV_QUERY_MODIFIED_W_SUPPORT_PARAMS_VER1 + +#if defined(__cplusplus) && (defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__)) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D_QueryModifiedWSupport +// +//! DESCRIPTION: Queries the support of Modified W feature on current setup and returns appropriate boolean value. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] pDevice The ID3D11Device to use. +//! \param [inout] pQueryModifiedWSupportedParams Stores value of whether Modified W is supported on current setup or not. +//! +//! \retval NVAPI_OK Call succeeded. +//! \retval NVAPI_ERROR Call failed. +//! \retval NVAPI_INVALID_ARGUMENT One or more arguments are invalid. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D_QueryModifiedWSupport(__in IUnknown *pDev, + __inout NV_QUERY_MODIFIED_W_SUPPORT_PARAMS *pQueryModifiedWSupportedParams); +#endif //defined(__cplusplus) && (defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__)) + +//! SUPPORTED OS: Windows 10 and higher +//! +#define NV_MODIFIED_W_MAX_VIEWPORTS 16 + +typedef struct _NV_MODIFIED_W_COEFFICIENTS +{ + float fA; // A coefficient in w' = w + Ax + By + float fB; // B coefficient in w' = w + Ax + By + float fAReserved; // reserved + float fBReserved; // reserved + + float fReserved[2]; // reserved +} NV_MODIFIED_W_COEFFICIENTS; + +typedef struct _NV_MODIFIED_W_PARAMS +{ + NvU32 version; // parameter struct version + NvU32 numEntries; // number of valid NV_MODIFIED_W_COEFFICIENTS structs in array + NV_MODIFIED_W_COEFFICIENTS modifiedWCoefficients[NV_MODIFIED_W_MAX_VIEWPORTS]; // coefficients + + NvU32 id; // reserved + NvU32 reserved[NV_MODIFIED_W_MAX_VIEWPORTS]; // reserved +} NV_MODIFIED_W_PARAMS_V1; + +typedef NV_MODIFIED_W_PARAMS_V1 NV_MODIFIED_W_PARAMS; +#define NV_MODIFIED_W_PARAMS_VER1 MAKE_NVAPI_VERSION(NV_MODIFIED_W_PARAMS_V1, 1) +#define NV_MODIFIED_W_PARAMS_VER NV_MODIFIED_W_PARAMS_VER1 + +#if defined(__cplusplus) && (defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__)) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D_SetModifiedWMode +// +//! DESCRIPTION: Set the Modified W state and A,B coefficients for HW support +//! +//! \note Note that this is an asynchronous function and returns NVAPI_OK if all arguments are valid. +//! Returned value NVAPI_OK does not reflect that Modified-W is supported or is set in hardware. +//! One must call NvAPI_D3D_QueryModifiedWSupport() to confirm that the current setup +//! supports Modified-W before calling this set-function. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] pDevOrContext The ID3D11Device or ID3D11DeviceContext to use. +//! \param [in] psModifiedWParams Modified W parameters. +//! +//! \retval NVAPI_OK Call succeeded. +//! \retval NVAPI_ERROR Call failed. +//! \retval NVAPI_INVALID_ARGUMENT One or more arguments are invalid. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D_SetModifiedWMode(__in IUnknown *pDevOrContext, __in NV_MODIFIED_W_PARAMS *psModifiedWParams); + +#endif //defined(__cplusplus) && (defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__)) + +#if defined(__cplusplus) && ( defined(__d3d12_h__)) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_QueryModifiedWSupport +// +//! DESCRIPTION: Queries the support of Modified-W feature on current setup and returns appropriate boolean value. +//! +//! SUPPORTED OS: Windows 10 +//! +//! +//! \param [in] pDevice The ID3D12Device Device created by application +//! \param [inout] pQueryModifiedWSupportedParams Stores value of whether Modified-W is supported on current setup or not. +//! +//! \retval NVAPI_OK Call succeeded. +//! \retval NVAPI_ERROR Call failed. +//! \retval NVAPI_INVALID_ARGUMENT One or more arguments are invalid. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// + +NVAPI_INTERFACE NvAPI_D3D12_QueryModifiedWSupport(__in ID3D12Device *pDevice, + __inout NV_QUERY_MODIFIED_W_SUPPORT_PARAMS *pQueryModifiedWSupportedParams); + +#endif // defined(__cplusplus) && ( defined(__d3d12_h__)) + +#if defined(__cplusplus) && ( defined(__d3d12_h__)) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_SetModifiedWMode +// +//! DESCRIPTION: Set the Modified-W state and A, B coefficients for HW support +//! +//! \note Note that Modified-W state persists on a particular CommandList till it is closed. +//! The state is reset to default (disabled) for every newly created CommandList. +//! One must call NvAPI_D3D12_QueryModifiedWSupport() to confirm that the current setup +//! supports Modified-W before calling this set-function. +//! +//! SUPPORTED OS: Windows 10 +//! +//! +//! \param [in] pCommandList The command list in which we will add push buffer commmands for enabling Modified-W feature +//! Note: Command list of type D3D12_COMMAND_LIST_TYPE_BUNDLE is not allowed for setting the state of this feature. +//! \param [in] pModifiedWParams Modified-W parameters. +//! +//! \retval NVAPI_OK Call succeeded. +//! \retval NVAPI_ERROR Call failed. +//! \retval NVAPI_INVALID_ARGUMENT One or more arguments are invalid. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// + +NVAPI_INTERFACE NvAPI_D3D12_SetModifiedWMode(__in ID3D12GraphicsCommandList* pCommandList, + __in NV_MODIFIED_W_PARAMS *pModifiedWParams); + +#endif // defined(__cplusplus) && ( defined(__d3d12_h__)) + + +#if defined(__cplusplus) && (defined(__d3d11_h__)) + +//! \ingroup dx +//! See NvAPI_D3D_CreateLateLatchObject +DECLARE_INTERFACE(ID3DLateLatchObject_V1) +{ + STDMETHOD_(UINT,Release) (THIS) PURE; //! Release the created LateLatch object and associated buffers. + STDMETHOD_(NvAPI_Status,Latch) (THIS_ __in IUnknown* pContext = NULL) PURE; //! Request to queue the latch operation to the GPU. + STDMETHOD_(ID3D11Buffer*,GetD3D11Buffer) (THIS_ __in UINT index = 0) PURE; //! Get ID3D11Buffer* available at the given 'index' + STDMETHOD_(UINT,GetBufferCount) (THIS) PURE; //! Returns the number of late latch buffers created for this LateLatchObject. + STDMETHOD_(NvAPI_Status,UpdateData) (THIS_ __in void **ppData) PURE; //! Fully update all LateLatch buffers with new data. + STDMETHOD_(NvAPI_Status,UpdateData) (THIS_ __in void *pData, __in size_t offset, + __in size_t size, __in UINT index = 0) PURE; //! Partially update one of the LateLatch buffers with new data. +}; + +//! \ingroup dx +//! See NvAPI_D3D_CreateLateLatchObject +typedef ID3DLateLatchObject_V1 ID3DLateLatchObject; +#define ID3DLateLatchObject_VER1 MAKE_NVAPI_VERSION(ID3DLateLatchObject_V1, 1) +#define ID3DLateLatchObject_VER ID3DLateLatchObject_VER1 + +typedef struct _NV_D3D_LATELATCH_OBJECT_DESC_V1 +{ + NvU32 version; + NvU32 numBuffers; // _IN_ Number of LateLatch buffers that the app wants to create. + D3D11_BUFFER_DESC **ppBufferDesc; // _IN_ Description of buffers + ID3DLateLatchObject **ppD3DLateLatchObject; // _Out_ Pointer to created interface +} NV_D3D_LATELATCH_OBJECT_DESC_V1; + +typedef NV_D3D_LATELATCH_OBJECT_DESC_V1 NV_D3D_LATELATCH_OBJECT_DESC; +#define NV_D3D_LATELATCH_OBJECT_DESC_VER1 MAKE_NVAPI_VERSION(NV_D3D_LATELATCH_OBJECT_DESC_V1, 1) +#define NV_D3D_LATELATCH_OBJECT_DESC_VER NV_D3D_LATELATCH_OBJECT_DESC_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D_CreateLateLatchObject +// +//! DESCRIPTION: Creates a Late Latch Object interface +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 384 +//! +//! \param [in] pDevice Current ID3D11Device. +//! \param [inout] pLateLatchObjectDesc Pointer to in/out structure for late latch object creation +//! +//! RETURN STATUS: This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// + +NVAPI_INTERFACE NvAPI_D3D_CreateLateLatchObject(__in IUnknown *pDevice, __inout NV_D3D_LATELATCH_OBJECT_DESC* pLateLatchObjectDesc); + +#endif // defined(__cplusplus) && (defined(__d3d11_h__)) + + + +#if defined(__cplusplus) && (defined(__d3d11_h__) || defined(__d3d12_h__)) +//! \ingroup dx +//! See NvAPI_D3D_QueryLateLatchSupport +typedef struct _NV_QUERY_LATELATCH_SUPPORT_PARAMS +{ + NvU32 version; //!< (IN) Parameter structure version + NvU32 bLateLatchSupported; //!< (OUT) LateLatch supported +} NV_QUERY_LATELATCH_SUPPORT_PARAMS_V1; + +typedef NV_QUERY_LATELATCH_SUPPORT_PARAMS_V1 NV_QUERY_LATELATCH_SUPPORT_PARAMS; +#define NV_QUERY_LATELATCH_SUPPORT_PARAMS_VER1 MAKE_NVAPI_VERSION(NV_QUERY_LATELATCH_SUPPORT_PARAMS_V1, 1) +#define NV_QUERY_LATELATCH_SUPPORT_PARAMS_VER NV_QUERY_LATELATCH_SUPPORT_PARAMS_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D_QueryLateLatchSupport +// +//! DESCRIPTION: Queries the support of DX11 Late Latch feature on current setup. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 384 +//! +//! \param [in] pDevice Current ID3D11Device. +//! \param [inout] pQueryLateLatchSupportParams Stores value of whether Late Latch is supported on current setup or not. +//! +//! RETURN STATUS: This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D_QueryLateLatchSupport(__in IUnknown *pDevice, + __inout NV_QUERY_LATELATCH_SUPPORT_PARAMS *pQueryLateLatchSupportParams); +#endif // defined(__cplusplus) && (defined(__d3d11_h__) || defined(__d3d12_h__)) + + + +#if defined (__cplusplus) && (defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__)) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D_RegisterDevice +// +//! DESCRIPTION: Tells NvAPI about a D3D device. This must be called prior to using any DX1x +//! deferred-context calls. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] pDev The ID3D10Device or ID3D11Device to use. +//! +//! RETURN STATUS: This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D_RegisterDevice(__in IUnknown *pDev); + +#endif //if defined(__cplusplus) && (defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__)) + + + +#if defined (__cplusplus) && (defined(__d3d11_h__) || defined(__d3d11_1_h__)) + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_MultiDrawInstancedIndirect +// +//! DESCRIPTION: Extension of DrawInstancedIndirect that takes a draw count in. The effect of this function is to loop over +//! that draw count and perform the DrawInstancedIndirect operation each time, incrementing the buffer offset +//! by the supplied stride each time. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] *pDevContext11 Pointer to D3D11 device context (IC or DC) +//! \param [in] drawCount Do DrawInstancedIndirect operation this many times +//! \param [in] *pBuffer ID3D11Buffer that contains the command parameters +//! \param [in] alignedByteOffsetForArgs Start in pBuffer of the command parameters +//! \param [in] alignedByteStrideForArgs Stride of the command parameters - must be >= 4 * sizeof(NvU32) +//! +//! RETURN STATUS: This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \retval NVAPI_D3D_DEVICE_NOT_REGISTERED When MultiDraw is called on a deferred context, and the device has not yet +//! been registered (NvAPI_D3D_RegisterDevice), this error is returned. +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// + +NVAPI_INTERFACE NvAPI_D3D11_MultiDrawInstancedIndirect(__in ID3D11DeviceContext *pDevContext11, + __in NvU32 drawCount, + __in ID3D11Buffer *pBuffer, + __in NvU32 alignedByteOffsetForArgs, + __in NvU32 alignedByteStrideForArgs); + +#endif //defined (__cplusplus) && (defined(__d3d11_h__) || defined(__d3d11_1_h__)) + + +#if defined (__cplusplus) && (defined(__d3d11_h__) || defined(__d3d11_1_h__)) + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_MultiDrawIndexedInstancedIndirect +// +//! DESCRIPTION: Extension of DrawIndexedInstancedIndirect that takes a draw count in. The effect of this function is to loop over +//! that draw count and perform the DrawIndexedInstancedIndirect operation each time, incrementing the buffer offset +//! by the supplied stride each time. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] *pDevContext11 Pointer to D3D11 device context (IC or DC) +//! \param [in] drawCount Do DrawIndexedInstancedIndirect operation this many times +//! \param [in] *pBuffer ID3D11Buffer that contains the command parameters +//! \param [in] alignedByteOffsetForArgs Start in pBuffer of the command parameters +//! \param [in] alignedByteStrideForArgs Stride of the command parameters - must be >= 5 * sizeof(NvU32) +//! +//! RETURN STATUS: This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \retval NVAPI_D3D_DEVICE_NOT_REGISTERED When MultiDraw is called on a deferred context, and the device has not yet +//! been registered (NvAPI_D3D_RegisterDevice), this error is returned. +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// + +NVAPI_INTERFACE NvAPI_D3D11_MultiDrawIndexedInstancedIndirect(__in ID3D11DeviceContext *pDevContext11, + __in NvU32 drawCount, + __in ID3D11Buffer *pBuffer, + __in NvU32 alignedByteOffsetForArgs, + __in NvU32 alignedByteStrideForArgs); + +#endif //defined (__cplusplus) && (defined(__d3d11_h__) || defined(__d3d11_1_h__)) + +//! SUPPORTED OS: Windows 10 and higher +//! +#if defined (__cplusplus) && ( defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d10_1_h__) ||defined(__d3d11_h__) ) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D_ImplicitSLIControl +// +//! This function enables/disables the SLI rendering mode. It has to be called prior to D3D device creation. Once this function is called with DISABLE_IMPLICIT_SLI +//! parameter all subsequently created devices will be forced to run in a single gpu mode until the same function is called with ENABLE_IMPLICIT_SLI parameter. The enable +//! call will force all subsequently created devices to run in default implicit SLI mode being determined by an application profile or a global control panel SLI setting. +//! This NvAPI call is supported in all DX10+ versions of the driver. It is supported on all Windows versions. +//! +//! \retval NVAPI_OK Completed request +//! \retval NVAPI_ERROR Error occurred +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// + +//! \ingroup dx +typedef enum _IMPLICIT_SLI_CONTROL +{ + DISABLE_IMPLICIT_SLI = 0, + ENABLE_IMPLICIT_SLI = 1, +} IMPLICIT_SLI_CONTROL; + +//! \ingroup dx +NVAPI_INTERFACE NvAPI_D3D_ImplicitSLIControl(__in IMPLICIT_SLI_CONTROL implicitSLIControl); + +#endif //defined (__cplusplus) && ( defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d10_1_h__) ||defined(__d3d11_h__) ) + + +#if defined (__cplusplus) && defined(__d3d12_h__) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_GetNeedsAppFPBlendClamping +// +//! \code +//! DESCRIPTION: This function returns whether the application needs to do FP blend clamping itself +//! +//! \param [in] pDevice Current d3d device +//! \param [out] pAppClampNeeded If true, app needs to clamp. If false, HW does the clamping +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. If there are return error codes with specific +//! meaning for this API, they are listed below. +//! +//! \since Release: 375 +//! +//! SUPPORTED OS: Windows 10 +//! +//! \endcode +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// + +NVAPI_INTERFACE NvAPI_D3D12_GetNeedsAppFPBlendClamping(__in ID3D12Device *pDevice, + __out bool *pAppClampNeeded); + +#endif //defined(__cplusplus) && defined(__d3d12_h__) + +//! SUPPORTED OS: Windows 10 +//! + +#if defined (__cplusplus) && defined(__d3d12_h__) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_UseDriverHeapPriorities +// +//! \code +//! DESCRIPTION: Sets the driver to override Microsoft's heap allocation priority values with Nvidia driver priority values. Use this once per process before allocating resources. +//! +//! \param [in] pDevice The IDirect3DDevice12 to use. +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. If there are return error codes with specific +//! meaning for this API, they are listed below. +//! +//! \since Release: 381 +//! +//! \endcode +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// + +NVAPI_INTERFACE NvAPI_D3D12_UseDriverHeapPriorities(__in ID3D12Device *pDevice); + +#endif // defined (__cplusplus) && defined(__d3d12_h__) + +//! SUPPORTED OS: Windows 10 and higher +//! +#if defined(__cplusplus) && ( defined(__d3d12_h__)) + + +typedef struct _NV_D3D12_MOSAIC_GETCOMPANIONALLOCATIONS +{ + __in NvU32 version; //!< Structure version + __in ID3D12Device *pDevice; //!< The ID3D12Device created by application. + __in ID3D12Resource *pSwapChainBuffer; //!< The ID3D12Resource part of the application swap chain that has companion allocations. + __in NvU32 companionBufferCount; //!< The number of ID3D12Resource pointers requested to be returned in the ppComanionResources array, which should match ID3D12Device::GetNodeCount for the complete set of companion allocations. + __inout ID3D12Resource **ppCompanionResources; //!< An array of ID3D12Resource pointers sized to match companionBufferCount, which will receive the companion allocations. +} NV_D3D12_MOSAIC_GETCOMPANIONALLOCATIONS_V1; + +typedef NV_D3D12_MOSAIC_GETCOMPANIONALLOCATIONS_V1 NV_D3D12_MOSAIC_GETCOMPANIONALLOCATIONS; +#define NV_D3D12_MOSAIC_GETCOMPANIONALLOCATIONS_VER1 MAKE_NVAPI_VERSION(NV_D3D12_MOSAIC_GETCOMPANIONALLOCATIONS_V1, 1) +#define NV_D3D12_MOSAIC_GETCOMPANIONALLOCATIONS_VER NV_D3D12_MOSAIC_GETCOMPANIONALLOCATIONS_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_Mosaic_GetCompanionAllocations +// +//! DESCRIPTION: Queries the driver for internally created allocations that accompany a swap chain buffer for present-related operations. +//! Surfaces returned by this interface must be destroied at the same time that the original swap chain buffer is destroyed. +//! In general this occurs prior to a ResizeBuffers call, or when the swap chain is released. +//! Note that this function only works in Landscape orientation due to Windows behavior, and attempts to utilize it with any type of display +//! rotation will result in failure. +//! +//! \param [inout] companionBufferCount The parameters for this function. +//! +//! \retval NVAPI_OK Call succeeded. +//! \retval NVAPI_ERROR Call failed. +//! \retval NVAPI_INVALID_ARGUMENT One or more arguments are invalid. +//! \retval NVAPI_INVALID_CALL System configuration does not support this interface (eg, display is rotated, mosaic not enabled, etc) +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// + +NVAPI_INTERFACE NvAPI_D3D12_Mosaic_GetCompanionAllocations(__inout NV_D3D12_MOSAIC_GETCOMPANIONALLOCATIONS *params); + +#endif // defined(__cplusplus) && ( defined(__d3d12_h__)) + +//! SUPPORTED OS: Windows 10 and higher +//! +#if defined(__cplusplus) && ( defined(__d3d12_h__)) + +typedef struct _NV_D3D12_MOSAIC_GETVIEWPORTANDGPUPARTITIONS +{ + __in NvU32 version; //!< Structure version + __in ID3D12Device *pDevice; //!< The ID3D12Device created by application. + __in ID3D12Resource *pSwapChainBuffer; //!< The ID3D12Resource part of the application swap chain. + __inout NvU32 *pPartitionCount; //!< A variable to receive the number of NV_MGPU_MOSAIC_DISPLAY_SURFACE_PARTITION elements returned or that holds the size of pPartitions when it is non-NULL. + __inout RECT *pViewport; //!< An optional array to hold the viewport information per partition. When this is valid pNodeMask must also be valid. + __inout NvU32 *pNodeMask; //!< An optional array to hold the GPU mask where this viewport must be valid per partition. When this is valid pViewport must also be valid. +} NV_D3D12_MOSAIC_GETVIEWPORTANDGPUPARTITIONS_V1; + +typedef NV_D3D12_MOSAIC_GETVIEWPORTANDGPUPARTITIONS_V1 NV_D3D12_MOSAIC_GETVIEWPORTANDGPUPARTITIONS; +#define NV_D3D12_MOSAIC_GETVIEWPORTANDGPUPARTITIONS_VER1 MAKE_NVAPI_VERSION(NV_D3D12_MOSAIC_GETVIEWPORTANDGPUPARTITIONS_V1, 1) +#define NV_D3D12_MOSAIC_GETVIEWPORTANDGPUPARTITIONS_VER NV_D3D12_MOSAIC_GETVIEWPORTANDGPUPARTITIONS_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_Mosaic_GetViewportAndGpuPartitions +// +//! DESCRIPTION: Queries the driver for how a swap chain display surface is subdivided across devices in relation to display connectivity. +//! Call this interface with NULL pPartitions in order to know how many subdivisions exist and allocate the proper size to hold all data. +//! Call it a second time with a properly sized partitions array to receive all subdivisions along with GPU node masks of each rectangle. +//! Note that this function only works in Landscape orientation due to Windows behavior, and attempts to utilize it with any type of display +//! rotation will result in failure. +//! +//! \param [inout] params The parameters for this function. +//! +//! \retval NVAPI_OK Call succeeded. +//! \retval NVAPI_ERROR Call failed. +//! \retval NVAPI_INVALID_ARGUMENT One or more arguments are invalid. +//! \retval NVAPI_INVALID_CALL System configuration does not support this interface (eg, display is rotated, mosaic not enabled, etc) +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// + +NVAPI_INTERFACE NvAPI_D3D12_Mosaic_GetViewportAndGpuPartitions(__inout NV_D3D12_MOSAIC_GETVIEWPORTANDGPUPARTITIONS *params); + +#endif // defined(__cplusplus) && ( defined(__d3d12_h__)) + + +#if defined(__cplusplus) && (defined(__d3d11_h__)) +//! \ingroup dx +//! See NvAPI_D3D1x_GetGraphicsCapabilities + +typedef struct _NV_D3D1x_GRAPHICS_CAPS_V1 +{ + NvU32 bExclusiveScissorRectsSupported : 1; //!< (OUT) Outputs whether Exclusive Scissor Rects are supported or not + NvU32 bVariablePixelRateShadingSupported : 1; //!< (OUT) Outputs whether Variable Pixel Shading Rates are supported or not + NvU32 reservedBits : 30; // Reserved bits for future expansion + NvU32 reserved[7]; // Reserved for future expansion +} NV_D3D1x_GRAPHICS_CAPS_V1; + +#define NV_D3D1x_GRAPHICS_CAPS_VER1 MAKE_NVAPI_VERSION(NV_D3D1x_GRAPHICS_CAPS_V1, 1) + +typedef struct _NV_D3D1x_GRAPHICS_CAPS_V2 +{ + NvU32 bExclusiveScissorRectsSupported : 1; //!< (OUT) Outputs whether Exclusive Scissor Rects are supported or not + NvU32 bVariablePixelRateShadingSupported : 1; //!< (OUT) Outputs whether Variable Pixel Shading Rates are supported or not + NvU32 bFastUAVClearSupported : 1; //!< (OUT) Outputs whether UAVClear is implemented using ZBC rather than compute shader + NvU32 reservedBits : 29; // Reserved bits for future expansion + NvU16 majorSMVersion; //!< (OUT) Major SM version of the device + NvU16 minorSMVersion; //!< (OUT) Minor SM version of the device + NvU32 reserved[14]; // Reserved for future expansion +} NV_D3D1x_GRAPHICS_CAPS_V2; + +typedef NV_D3D1x_GRAPHICS_CAPS_V2 NV_D3D1x_GRAPHICS_CAPS; +#define NV_D3D1x_GRAPHICS_CAPS_VER2 MAKE_NVAPI_VERSION(NV_D3D1x_GRAPHICS_CAPS_V2, 2) +#define NV_D3D1x_GRAPHICS_CAPS_VER NV_D3D1x_GRAPHICS_CAPS_VER2 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D1x_GetGraphicsCapabilities +// +//! DESCRIPTION: Get the graphics capabilities for current hardware/software setup +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 410 +//! +//! \param [in] pDevice The ID3D11Device device to be used for getting the graphics capabilities. +//! \param [in] structVersion Version of the caps struct. Should be set to NV_D3D1x_GRAPHICS_CAPS_VER. +//! \param [inout] pGraphicsCaps Pointer to a NV_D3D1x_GRAPHICS_CAPS_CAPS struct created by app. +//! Graphics capabilities will be filled in this struct by the driver. +//! +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! (none) +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// + +NVAPI_INTERFACE NvAPI_D3D1x_GetGraphicsCapabilities(__in IUnknown *pDevice, + __in NvU32 structVersion, + __inout NV_D3D1x_GRAPHICS_CAPS *pGraphicsCaps); + +#endif // defined(__cplusplus) && (defined(__d3d11_h__)) + +#if defined(__cplusplus) && (defined(__d3d12_h__)) +//! \ingroup dx +//! See NvAPI_D3D12_GetGraphicsCapabilities + +typedef struct _NV_D3D12_GRAPHICS_CAPS_V1 +{ + NvU32 bExclusiveScissorRectsSupported : 1; //!< (OUT) Outputs whether Exclusive Scissor Rects are supported or not + NvU32 bVariablePixelRateShadingSupported : 1; //!< (OUT) Outputs whether Variable Pixel Shading Rates are supported or not + NvU32 bFastUAVClearSupported : 1; //!< (OUT) Outputs whether UAVClear is implemented using ZBC rather than compute shader + NvU32 reservedBits : 29; // Reserved bits for future expansion + NvU16 majorSMVersion; //!< (OUT) Major SM version of the device + NvU16 minorSMVersion; //!< (OUT) Minor SM version of the device + NvU32 reserved[6]; // Reserved for future expansion +} NV_D3D12_GRAPHICS_CAPS_V1; + +typedef NV_D3D12_GRAPHICS_CAPS_V1 NV_D3D12_GRAPHICS_CAPS; +#define NV_D3D12_GRAPHICS_CAPS_VER1 MAKE_NVAPI_VERSION(NV_D3D12_GRAPHICS_CAPS_V1, 1) +#define NV_D3D12_GRAPHICS_CAPS_VER NV_D3D12_GRAPHICS_CAPS_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_GetGraphicsCapabilities +// +//! DESCRIPTION: Get the graphics capabilities for current hardware/software setup +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 410 +//! +//! \param [in] pDevice The ID3D12Device device to be used for getting the graphics capabilities. +//! \param [in] structVersion Version of the caps struct. Should be set to NV_D3D12_GRAPHICS_CAPS_VER. +//! \param [inout] pGraphicsCaps Pointer to a NV_D3D12_GRAPHICS_CAPS_CAPS struct created by app. +//! Graphics capabilities will be filled in this struct by the driver. +//! +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! (none) +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// + +NVAPI_INTERFACE NvAPI_D3D12_GetGraphicsCapabilities(__in IUnknown *pDevice, + __in NvU32 structVersion, + __inout NV_D3D12_GRAPHICS_CAPS *pGraphicsCaps); + +#endif // defined(__cplusplus) && (defined(__d3d12_h__)) + + +#if defined(__cplusplus) && (defined(__d3d11_h__) || defined(__d3d12_h__)) +#define NV_MAX_NUM_EXCLUSIVE_SCISSOR_RECTS 16 +#endif // defined(__cplusplus) && (defined(__d3d11_h__) || defined(__d3d12_h__)) + +#if defined(__cplusplus) && (defined(__d3d11_h__)) +//! \ingroup dx +//! See NvAPI_D3D11_RSSetExclusiveScissorRects + +typedef struct _NV_D3D11_EXCLUSIVE_SCISSOR_RECT_DESC_V1 +{ + bool enableExclusiveScissorRect; //!< (IN) Control of enabling Exclusive ScissorRect per rect + D3D11_RECT scissorRect; //!< (IN) Single rect dimensions +} NV_D3D11_EXCLUSIVE_SCISSOR_RECT_DESC_V1; + +typedef struct _NV_D3D11_EXCLUSIVE_SCISSOR_RECTS_DESC_V1 +{ + NvU32 version; //!< (IN) Parameter struct version + NvU32 numRects; //!< (IN) Number of Exclusive Scissor Rects to be set. + // \note Passing zero will globally disable Exclusive Scissor Rects + // \note Max value can be equal to NV_MAX_NUM_EXCLUSIVE_SCISSOR_RECTS + NV_D3D11_EXCLUSIVE_SCISSOR_RECT_DESC_V1 *pRects; //!< (IN) Array of NV_D3D11_EXCLUSIVE_SCISSOR_RECT_DESC with number of elements equal to Exclusive Scissor Rects +} NV_D3D11_EXCLUSIVE_SCISSOR_RECTS_DESC_V1; + +typedef NV_D3D11_EXCLUSIVE_SCISSOR_RECTS_DESC_V1 NV_D3D11_EXCLUSIVE_SCISSOR_RECTS_DESC; +typedef NV_D3D11_EXCLUSIVE_SCISSOR_RECT_DESC_V1 NV_D3D11_EXCLUSIVE_SCISSOR_RECT_DESC; +#define NV_D3D11_EXCLUSIVE_SCISSOR_RECTS_DESC_VER1 MAKE_NVAPI_VERSION(NV_D3D11_EXCLUSIVE_SCISSOR_RECTS_DESC_V1, 1) +#define NV_D3D11_EXCLUSIVE_SCISSOR_RECTS_DESC_VER NV_D3D11_EXCLUSIVE_SCISSOR_RECTS_DESC_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_RSSetExclusiveScissorRects +// +//! DESCRIPTION: Sets Exclusive Scissor Rects. The content bounded within the Scissor Rects +//! will be excluded from rendering unlike regular Scissor Rects. These are +//! orthogonal with Regular Scissor Rects. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 410 +//! +//! \param [in] pContext The device context (ID3D11DeviceContext) to be used for setting the Exclusive Scissor Rects. +//! \param [in] pExclusiveScissorRectsDesc Description of the Exclusive Scissor Rects duly filled with their dimensions +//! and control over enablement of individual ScissorRect +//! +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! (none) +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// + +NVAPI_INTERFACE NvAPI_D3D11_RSSetExclusiveScissorRects(__in IUnknown *pContext, + __in NV_D3D11_EXCLUSIVE_SCISSOR_RECTS_DESC *pExclusiveScissorRectsDesc); + +#endif // defined(__cplusplus) && (defined(__d3d11_h__)) + +#if defined(__cplusplus) && (defined(__d3d11_h__) || defined(__d3d12_h__)) +//! \ingroup dx +//! See NvAPI_D3D11_RSSetViewportsPixelShadingRates + +#define NV_MAX_PIXEL_SHADING_RATES 16 // Currently only 12 Shading Rates are available +#define NV_MAX_NUM_VIEWPORTS 16 + +// Every element in Shading Rate Resource represents the shading rate for all pixels in the corresponding tile +// The Shading Rate Resource dimensions must be the bound render target size divided by the tile dimensions (width/height) + +#define NV_VARIABLE_PIXEL_SHADING_TILE_WIDTH 16 // Width of the tile, in pixels +#define NV_VARIABLE_PIXEL_SHADING_TILE_HEIGHT 16 // Height of the tile, in pixels + +typedef enum +{ + NV_PIXEL_X0_CULL_RASTER_PIXELS, // No shading, tiles are culled + NV_PIXEL_X16_PER_RASTER_PIXEL, // 16 shading passes per 1 raster pixel + NV_PIXEL_X8_PER_RASTER_PIXEL, // 8 shading passes per 1 raster pixel + NV_PIXEL_X4_PER_RASTER_PIXEL, // 4 shading passes per 1 raster pixel + NV_PIXEL_X2_PER_RASTER_PIXEL, // 2 shading passes per 1 raster pixel + NV_PIXEL_X1_PER_RASTER_PIXEL, // Per-pixel shading + NV_PIXEL_X1_PER_2X1_RASTER_PIXELS, // 1 shading pass per 2 raster pixels + NV_PIXEL_X1_PER_1X2_RASTER_PIXELS, // 1 shading pass per 2 raster pixels + NV_PIXEL_X1_PER_2X2_RASTER_PIXELS, // 1 shading pass per 4 raster pixels + NV_PIXEL_X1_PER_4X2_RASTER_PIXELS, // 1 shading pass per 8 raster pixels + NV_PIXEL_X1_PER_2X4_RASTER_PIXELS, // 1 shading pass per 8 raster pixels + NV_PIXEL_X1_PER_4X4_RASTER_PIXELS // 1 shading pass per 16 raster pixels +} NV_PIXEL_SHADING_RATE; +#endif // defined(__cplusplus) && (defined(__d3d11_h__) || defined(__d3d12_h__)) + +#if defined(__cplusplus) && (defined(__d3d11_h__)) +typedef struct _NV_D3D11_VIEWPORT_SHADING_RATE_DESC_V1 +{ + bool enableVariablePixelShadingRate; //!< (IN) Control of enabling Variable Pixel Shading Rate per viewport + NV_PIXEL_SHADING_RATE shadingRateTable[NV_MAX_PIXEL_SHADING_RATES]; //!< (IN) Lookup table of converting Shading Rate Index to NV_PIXEL_SHADING_RATE + // \note Shading Rate Resource View would be populated by application with indices of this table +} NV_D3D11_VIEWPORT_SHADING_RATE_DESC_V1; + +typedef struct _NV_D3D11_VIEWPORTS_SHADING_RATE_DESC_V1 +{ + NvU32 version; //!< (IN) Struct version + NvU32 numViewports; //!< (IN) Number of viewports with shading rate set. + // \note Passing zero will globally disable Variable Pixel Rate Shading for all viewports immaterial of values in pViewports + // \note Max value can be equal to NV_MAX_NUM_VIEWPORTS + NV_D3D11_VIEWPORT_SHADING_RATE_DESC_V1 *pViewports; //!< (IN) Array of NV_D3D11_VIEWPORT_SHADING_RATE_DESC with number of elements equal to NumViewports +} NV_D3D11_VIEWPORTS_SHADING_RATE_DESC_V1; + +typedef NV_D3D11_VIEWPORTS_SHADING_RATE_DESC_V1 NV_D3D11_VIEWPORTS_SHADING_RATE_DESC; +typedef NV_D3D11_VIEWPORT_SHADING_RATE_DESC_V1 NV_D3D11_VIEWPORT_SHADING_RATE_DESC; +#define NV_D3D11_VIEWPORTS_SHADING_RATE_DESC_VER1 MAKE_NVAPI_VERSION(NV_D3D11_VIEWPORTS_SHADING_RATE_DESC_V1, 1) +#define NV_D3D11_VIEWPORTS_SHADING_RATE_DESC_VER NV_D3D11_VIEWPORTS_SHADING_RATE_DESC_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_RSSetViewportsPixelShadingRates +// +//! DESCRIPTION: Sets Pixel Shading Rates and Enables/Disables per-viewport Variable Pixel Shading Rate feature +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 410 +//! +//! \param [in] pContext The device context (ID3D11DeviceContext) to be used for setting the Viewports Shading Rates +//! \param [in] pShadingRateDesc Shading rate descriptor +//! +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! (none) +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// + +NVAPI_INTERFACE NvAPI_D3D11_RSSetViewportsPixelShadingRates(__in IUnknown *pContext, + __in NV_D3D11_VIEWPORTS_SHADING_RATE_DESC *pShadingRateDesc); + +#endif // defined(__cplusplus) && (defined(__d3d11_h__)) + +#if defined(__cplusplus) && (defined(__d3d11_h__) || defined(__d3d12_h__)) + +typedef enum _NV_SRRV_DIMENSION +{ + NV_SRRV_DIMENSION_TEXTURE2D = 4, + NV_SRRV_DIMENSION_TEXTURE2DARRAY = 5, +} NV_SRRV_DIMENSION; + +typedef struct _NV_TEX2D_SRRV +{ + UINT MipSlice; +} NV_TEX2D_SRRV; + +typedef struct _NV_TEX2D_ARRAY_SRRV +{ + UINT MipSlice; + UINT FirstArraySlice; + UINT ArraySize; +} NV_TEX2D_ARRAY_SRRV; +#endif // defined(__cplusplus) && (defined(__d3d11_h__) || defined(__d3d12_h__)) + +#if defined(__cplusplus) && (defined(__d3d11_h__)) +typedef struct _NV_D3D11_SHADING_RATE_RESOURCE_VIEW_DESC_V1 +{ + NvU32 version; //!< (IN) Parameter struct version + DXGI_FORMAT Format; //!< (IN) Format of the resource used as Shading Rate Surface. Should be either DXGI_FORMAT_R8_UINT or DXGI_FORMAT_R8_TYPELESS + NV_SRRV_DIMENSION ViewDimension; //!< (IN) This declares whether the Shading Rate Surface is a simple 2D Texture or Array of 2D Textures + union + { + NV_TEX2D_SRRV Texture2D; + NV_TEX2D_ARRAY_SRRV Texture2DArray; + }; +} NV_D3D11_SHADING_RATE_RESOURCE_VIEW_DESC_V1; + +typedef NV_D3D11_SHADING_RATE_RESOURCE_VIEW_DESC_V1 NV_D3D11_SHADING_RATE_RESOURCE_VIEW_DESC; +#define NV_D3D11_SHADING_RATE_RESOURCE_VIEW_DESC_VER1 MAKE_NVAPI_VERSION(NV_D3D11_SHADING_RATE_RESOURCE_VIEW_DESC_V1, 1) +#define NV_D3D11_SHADING_RATE_RESOURCE_VIEW_DESC_VER NV_D3D11_SHADING_RATE_RESOURCE_VIEW_DESC_VER1 + +DECLARE_INTERFACE_(__declspec(uuid("E14BE7F6-8FF5-4F5E-B63A-AD016EB8FBE5"))ID3D11NvShadingRateResourceView_V1, ID3D11View) +{ + BEGIN_INTERFACE + + // *** IUnknown methods *** + STDMETHOD(QueryInterface)(THIS_ REFIID riid, void **ppv) PURE; + STDMETHOD_(ULONG,AddRef)(THIS) PURE; + STDMETHOD_(ULONG,Release)(THIS) PURE; + + // **** ID3D11View method **/ + // Get Shading Rate Resource used while creating the Shading Rate Resource View + STDMETHOD_(void,GetResource)(THIS_ _Outptr_ ID3D11Resource **ppResource) PURE; + + // ** ID3D11NvShadingRateResourceView methods *** + // The descriptor used while creating the Shading Rate Resource View + STDMETHOD(GetDesc)(THIS_ NV_D3D11_SHADING_RATE_RESOURCE_VIEW_DESC* pDesc) PURE; + + END_INTERFACE +}; + +typedef ID3D11NvShadingRateResourceView_V1 ID3D11NvShadingRateResourceView; +#define ID3D11NvShadingRateResourceView_VER1 MAKE_NVAPI_VERSION(ID3D11NvShadingRateResourceView_V1, 1) +#define ID3D11NvShadingRateResourceView_VER ID3D11NvShadingRateResourceView_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_CreateShadingRateResourceView +// +//! DESCRIPTION: Creates Shading Rate Resource View by taking ID3D11Resource as an input Shading Rate Surface. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 410 +//! +//! \param [in] pDevice The device to be used for creating the Shading Rate Resource View +//! \param [in] pShadingRateResource Shading Rate Resource on which the view is to be created. +//! \note This should be of format DXGI_FORMAT_R8_UINT or DXGI_FORMAT_R8_TYPELESS +//! \note This should be confined to size calculated using render target dimensions, +//! NV_VARIABLE_PIXEL_SHADING_TILE_WIDTH and NV_VARIABLE_PIXEL_SHADING_TILE_HEIGHT +//! \param [in] pShadingRateDesc Shading Rate Resource View descriptor +//! \param [out] ppShadingRateResourceView Address of a pointer to ID3D11NvShadingRateResourceView for returning the newly created Shading Rate Resource View +//! +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! (none) +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// + +NVAPI_INTERFACE NvAPI_D3D11_CreateShadingRateResourceView(__in ID3D11Device *pDevice, + __in ID3D11Resource *pShadingRateResource, + __in NV_D3D11_SHADING_RATE_RESOURCE_VIEW_DESC *pShadingRateResourceViewDesc, + __out ID3D11NvShadingRateResourceView **ppShadingRateResourceView); + +#endif // defined(__cplusplus) && (defined(__d3d11_h__)) + +#if defined(__cplusplus) && (defined(__d3d11_h__)) + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_RSSetShadingRateResourceView +// +//! DESCRIPTION: Sets Shading Rate Resource View +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 410 +//! +//! \param [in] pContext The device context (ID3D11DeviceContext) used for setting the Shading Rate Resource View +//! \param [out] pShadingRateResourceView Shading Rate Resource View to be set +//! \note See NvAPI_D3D11_CreateShadingRateResourceView +//! \note Passing this as null will reset Shading Rate Resource View to defaults +//! +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! (none) +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// + +NVAPI_INTERFACE NvAPI_D3D11_RSSetShadingRateResourceView(__in IUnknown *pContext, + __in ID3D11NvShadingRateResourceView *pShadingRateResourceView); + +#endif // defined(__cplusplus) && (defined(__d3d11_h__)) + +#if defined(__cplusplus) && (defined(__d3d11_h__) || defined(__d3d12_h__)) +//! \ingroup dx +//! See NvAPI_D3D11_RSGetPixelShadingRateSampleOrder +//! See NvAPI_D3D11_RSSetPixelShadingRateSampleOrder + +// X, Y = sample position. S = sample number. +// The inner-most dimension is the sample number, followed by X and Y. + +typedef struct _NV_PIXEL_SRSO_1x2 +{ + struct NV_PIXEL_SRSO_1x2_X1 { NvU8 Y[2]; } X1; + struct NV_PIXEL_SRSO_1x2_X2 { NvU8 YS[2][2]; } X2; + struct NV_PIXEL_SRSO_1x2_X4 { NvU8 YS[2][4]; } X4; + struct NV_PIXEL_SRSO_1x2_X8 { NvU8 YS[2][8]; } X8; +} NV_PIXEL_SRSO_1x2; + +typedef struct _NV_PIXEL_SRSO_2x1 +{ + struct NV_PIXEL_SRSO_2x1_X1 { NvU8 X[2]; } X1; + struct NV_PIXEL_SRSO_2x1_X2 { NvU8 XS[2][2]; } X2; + struct NV_PIXEL_SRSO_2x1_X4 { NvU8 XS[2][4]; } X4; +} NV_PIXEL_SRSO_2x1; + +typedef struct _NV_PIXEL_SRSO_2x2 +{ + struct NV_PIXEL_SRSO_2x2_X1 { NvU8 YX[2][2]; } X1; + struct NV_PIXEL_SRSO_2x2_X2 { NvU8 YXS[2][2][2]; } X2; + struct NV_PIXEL_SRSO_2x2_X4 { NvU8 YXS[2][2][4]; } X4; +} NV_PIXEL_SRSO_2x2; + +typedef struct _NV_PIXEL_SRSO_2x4 +{ + struct NV_PIXEL_SRSO_2x4_X1 { NvU8 YX[4][2]; } X1; + struct NV_PIXEL_SRSO_2x4_X2 { NvU8 YXS[4][2][2]; } X2; +} NV_PIXEL_SRSO_2x4; + +typedef struct _NV_PIXEL_SRSO_4x2 +{ + struct NV_PIXEL_SRSO_4x2_X1 { NvU8 YX[2][4]; } X1; +} NV_PIXEL_SRSO_4x2; + +typedef struct _NV_PIXEL_SRSO_4x4 +{ + struct NV_PIXEL_SRSO_4x4_X1 { NvU8 YX[4][4]; } X1; +} NV_PIXEL_SRSO_4x4; + +typedef struct _NV_PIXEL_SHADING_RATE_SAMPLE_ORDER_TABLE_V1 +{ + NvU32 version; + NV_PIXEL_SRSO_1x2 Pixel_1x2; + NV_PIXEL_SRSO_2x1 Pixel_2x1; + NV_PIXEL_SRSO_2x2 Pixel_2x2; + NV_PIXEL_SRSO_2x4 Pixel_2x4; + NV_PIXEL_SRSO_4x2 Pixel_4x2; + NV_PIXEL_SRSO_4x4 Pixel_4x4; +} NV_PIXEL_SHADING_RATE_SAMPLE_ORDER_TABLE_V1; + +typedef NV_PIXEL_SHADING_RATE_SAMPLE_ORDER_TABLE_V1 NV_PIXEL_SHADING_RATE_SAMPLE_ORDER_TABLE; +#define NV_PIXEL_SHADING_RATE_SAMPLE_ORDER_TABLE_VER1 MAKE_NVAPI_VERSION(NV_PIXEL_SHADING_RATE_SAMPLE_ORDER_TABLE_V1, 1) +#define NV_PIXEL_SHADING_RATE_SAMPLE_ORDER_TABLE_VER NV_PIXEL_SHADING_RATE_SAMPLE_ORDER_TABLE_VER1 + +#endif // defined(__cplusplus) && (defined(__d3d11_h__) || defined(__d3d12_h__)) + +#if defined(__cplusplus) && (defined(__d3d11_h__)) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_RSGetPixelShadingRateSampleOrder +// +//! DESCRIPTION: Get the Sample Order for Variable Shading Rate +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 410 +//! +//! \param [in] pContext The device context (ID3D11DeviceContext) used for getting the Shading Rate Sample Order +//! \param [out] pSampleOrderTable A pointer to NV_PIXEL_SHADING_RATE_SAMPLE_ORDER_TABLE where the current Sample Order for Variable Pixel Rate Shading that is returned +//! +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! (none) +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// + +NVAPI_INTERFACE NvAPI_D3D11_RSGetPixelShadingRateSampleOrder(__in IUnknown *pContext, + __out NV_PIXEL_SHADING_RATE_SAMPLE_ORDER_TABLE* pSampleOrderTable); + +#endif // defined(__cplusplus) && (defined(__d3d11_h__)) + +#if defined(__cplusplus) && (defined(__d3d11_h__)) + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_RSSetPixelShadingRateSampleOrder +// +//! DESCRIPTION: Set the Sample Order for Variable Shading Rate +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 410 +//! +//! \param [in] pContext The device context (ID3D11DeviceContext) used for setting the Shading Rate Sample Order +//! \param [out] pSampleOrderTable Sample Order for Variable Shading Rate to be set +//! +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! (none) +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// + +NVAPI_INTERFACE NvAPI_D3D11_RSSetPixelShadingRateSampleOrder(__in IUnknown *pContext, + __in NV_PIXEL_SHADING_RATE_SAMPLE_ORDER_TABLE* pSampleOrderTable); + +#endif // defined(__cplusplus) && (defined(__d3d11_h__)) + +//! SUPPORTED OS: Windows 10 and higher +//! + +#if defined(__cplusplus) && (defined(__d3d11_h__)) +typedef struct _NV_VRS_HELPER_LATCH_GAZE_PARAMS_V1 +{ + NvU32 version; //!< (IN) Struct version + NvU32 flags; //!< (IN) Reserved for future use +} NV_VRS_HELPER_LATCH_GAZE_PARAMS_V1; + +typedef NV_VRS_HELPER_LATCH_GAZE_PARAMS_V1 NV_VRS_HELPER_LATCH_GAZE_PARAMS; +#define NV_VRS_HELPER_LATCH_GAZE_PARAMS_VER1 MAKE_NVAPI_VERSION(NV_VRS_HELPER_LATCH_GAZE_PARAMS_V1, 1) +#define NV_VRS_HELPER_LATCH_GAZE_PARAMS_VER NV_VRS_HELPER_LATCH_GAZE_PARAMS_VER1 + +typedef enum _NV_VRS_CONTENT_TYPE +{ + NV_VRS_CONTENT_TYPE_INVALID = 0x0, + NV_VRS_CONTENT_TYPE_FOVEATED_RENDERING = 0x1, + NV_VRS_CONTENT_TYPE_MAX = NV_VRS_CONTENT_TYPE_FOVEATED_RENDERING +} NV_VRS_CONTENT_TYPE; + +typedef enum _NV_FOVEATED_RENDERING_SHADING_RATE_PRESET +{ + NV_FOVEATED_RENDERING_SHADING_RATE_PRESET_INVALID = 0, + NV_FOVEATED_RENDERING_SHADING_RATE_PRESET_HIGHEST_PERFORMANCE = 1, + NV_FOVEATED_RENDERING_SHADING_RATE_PRESET_HIGH_PERFORMANCE = 2, + NV_FOVEATED_RENDERING_SHADING_RATE_PRESET_BALANCED = 3, + NV_FOVEATED_RENDERING_SHADING_RATE_PRESET_HIGH_QUALITY = 4, + NV_FOVEATED_RENDERING_SHADING_RATE_PRESET_HIGHEST_QUALITY = 5, + NV_FOVEATED_RENDERING_SHADING_RATE_PRESET_CUSTOM = 6, + NV_FOVEATED_RENDERING_SHADING_RATE_PRESET_MAX = NV_FOVEATED_RENDERING_SHADING_RATE_PRESET_CUSTOM +} NV_FOVEATED_RENDERING_SHADING_RATE_PRESET; + +typedef struct _NV_FOVEATED_RENDERING_CUSTOM_SHADING_RATE_PRESET_DESC_V1 +{ + NvU32 version; + + NV_PIXEL_SHADING_RATE InnerMostRegionShadingRate; //!< (IN) Shading Rate for the inner-most region of the foveated rendering pattern + NV_PIXEL_SHADING_RATE MiddleRegionShadingRate; //!< (IN) Shading Rate for the middle region of the foveated rendering pattern + NV_PIXEL_SHADING_RATE PeripheralRegionShadingRate; //!< (IN) Shading Rate for the peripheral region of the foveated rendering pattern +} NV_FOVEATED_RENDERING_CUSTOM_SHADING_RATE_PRESET_DESC_V1; + +typedef NV_FOVEATED_RENDERING_CUSTOM_SHADING_RATE_PRESET_DESC_V1 NV_FOVEATED_RENDERING_CUSTOM_SHADING_RATE_PRESET_DESC; +#define NV_FOVEATED_RENDERING_CUSTOM_SHADING_RATE_PRESET_DESC_VER1 MAKE_NVAPI_VERSION(NV_FOVEATED_RENDERING_CUSTOM_SHADING_RATE_PRESET_DESC_V1, 1) +#define NV_FOVEATED_RENDERING_CUSTOM_SHADING_RATE_PRESET_DESC_VER NV_FOVEATED_RENDERING_CUSTOM_SHADING_RATE_PRESET_DESC_VER1 + +typedef enum _NV_FOVEATED_RENDERING_FOVEATION_PATTERN_PRESET +{ + NV_FOVEATED_RENDERING_FOVEATION_PATTERN_PRESET_INVALID = 0, + NV_FOVEATED_RENDERING_FOVEATION_PATTERN_PRESET_WIDE = 1, + NV_FOVEATED_RENDERING_FOVEATION_PATTERN_PRESET_BALANCED = 2, + NV_FOVEATED_RENDERING_FOVEATION_PATTERN_PRESET_NARROW = 3, + NV_FOVEATED_RENDERING_FOVEATION_PATTERN_PRESET_CUSTOM = 4, + NV_FOVEATED_RENDERING_FOVEATION_PATTERN_PRESET_MAX = NV_FOVEATED_RENDERING_FOVEATION_PATTERN_PRESET_CUSTOM +} NV_FOVEATED_RENDERING_FOVEATION_PATTERN_PRESET; + +typedef struct _NV_FOVEATED_RENDERING_CUSTOM_FOVEATION_PATTERN_PRESET_DESC_V1 +{ + NvU32 version; + + float fInnermostRadii[2]; //!< (IN) Horizontal and vertical radius for the inner-most region of the foveated rendering pattern + float fMiddleRadii[2]; //!< (IN) Horizontal and vertical radius for the middle region of the foveated rendering pattern + float fPeripheralRadii[2]; //!< (IN) Horizontal and vertical radius for the peripheral region of the foveated rendering pattern +} NV_FOVEATED_RENDERING_CUSTOM_FOVEATION_PATTERN_PRESET_DESC_V1; + +typedef NV_FOVEATED_RENDERING_CUSTOM_FOVEATION_PATTERN_PRESET_DESC_V1 NV_FOVEATED_RENDERING_CUSTOM_FOVEATION_PATTERN_PRESET_DESC; +#define NV_FOVEATED_RENDERING_CUSTOM_FOVEATION_PATTERN_PRESET_DESC_VER1 MAKE_NVAPI_VERSION(NV_FOVEATED_RENDERING_CUSTOM_FOVEATION_PATTERN_PRESET_DESC_V1, 1) +#define NV_FOVEATED_RENDERING_CUSTOM_FOVEATION_PATTERN_PRESET_DESC_VER NV_FOVEATED_RENDERING_CUSTOM_FOVEATION_PATTERN_PRESET_DESC_VER1 + +typedef struct _NV_FOVEATED_RENDERING_DESC_V1 +{ + NvU32 version; //!< (IN) Struct version + NvU32 flags; //!< (IN) Reserved for future use + + NV_FOVEATED_RENDERING_SHADING_RATE_PRESET ShadingRatePreset; //!< (IN) Preset of the shading rate + NV_FOVEATED_RENDERING_CUSTOM_SHADING_RATE_PRESET_DESC_V1 ShadingRateCustomPresetDesc; //!< (IN) To be provided only if ShadingRatePreset is NV_FOVEATED_RENDERING_SHADING_RATE_PRESET_CUSTOM + + NV_FOVEATED_RENDERING_FOVEATION_PATTERN_PRESET FoveationPatternPreset; //!< (IN) Preset of the foveation pattern + NV_FOVEATED_RENDERING_CUSTOM_FOVEATION_PATTERN_PRESET_DESC_V1 FoveationPatternCustomPresetDesc; //!< (IN) To be provided only if FoveationPatternPreset is NV_FOVEATED_RENDERING_FOVEATION_PATTERN_PRESET_CUSTOM + + NvU32 GazeDataDeviceId; //!< (IN) ID of the gaze data provider. Needed only for supporting more than one device with eye tracking. + // Should be 0 if gaze data is provided only from a single device. Should be less than (MAX_NUMBER_OF_GAZE_DATA_PROVIDERS - 1) + +} NV_FOVEATED_RENDERING_DESC_V1; + +typedef NV_FOVEATED_RENDERING_DESC_V1 NV_FOVEATED_RENDERING_DESC; +#define NV_FOVEATED_RENDERING_DESC_VER1 MAKE_NVAPI_VERSION(NV_FOVEATED_RENDERING_DESC_V1, 1) +#define NV_FOVEATED_RENDERING_DESC_VER NV_FOVEATED_RENDERING_DESC_VER1 + +typedef enum _NV_VRS_RENDER_MODE +{ + NV_VRS_RENDER_MODE_INVALID = 0, + NV_VRS_RENDER_MODE_MONO = 1, // States mono rendering on the entire render target + NV_VRS_RENDER_MODE_LEFT_EYE = 2, // States Left eye rendering of a stereo pair on the entire render target + NV_VRS_RENDER_MODE_RIGHT_EYE = 3, // States Right eye rendering of a stereo pair on the entire render target + NV_VRS_RENDER_MODE_STEREO = 4, // States side-by-side stereo rendering on the render target + NV_VRS_RENDER_MODE_MAX = NV_VRS_RENDER_MODE_STEREO +} NV_VRS_RENDER_MODE; + +#define MAX_NUMBER_OF_GAZE_DATA_PROVIDERS 8 // Maximum number of gaze data providers / devices. + +typedef struct _NV_VRS_HELPER_ENABLE_PARAMS_V1 +{ + NvU32 version; //!< (IN) Struct version + NvU32 flags; //!< (IN) Reserved for future use + + NV_VRS_RENDER_MODE RenderMode; //!< (IN) This defines whether subsequent render calls are for mono/stereo + NV_VRS_CONTENT_TYPE ContentType; //!< (IN) This defines the type of content with which the VRS pattern will be generated + + NV_FOVEATED_RENDERING_DESC_V1 sFoveatedRenderingDesc; //!< (IN) Provide this if ContentType has NV_VRS_CONTENT_TYPE_FOVEATED_RENDERING flag +} NV_VRS_HELPER_ENABLE_PARAMS_V1; + +typedef NV_VRS_HELPER_ENABLE_PARAMS_V1 NV_VRS_HELPER_ENABLE_PARAMS; +#define NV_VRS_HELPER_ENABLE_PARAMS_VER1 MAKE_NVAPI_VERSION(NV_VRS_HELPER_ENABLE_PARAMS_V1, 1) +#define NV_VRS_HELPER_ENABLE_PARAMS_VER NV_VRS_HELPER_ENABLE_PARAMS_VER1 + +typedef struct _NV_VRS_HELPER_DISABLE_PARAMS_V1 +{ + NvU32 version; //!< (IN) Struct version + NvU32 reserved; //!< (IN) Reserved for future use +} NV_VRS_HELPER_DISABLE_PARAMS_V1; + +typedef NV_VRS_HELPER_DISABLE_PARAMS_V1 NV_VRS_HELPER_DISABLE_PARAMS; +#define NV_VRS_HELPER_DISABLE_PARAMS_VER1 MAKE_NVAPI_VERSION(NV_VRS_HELPER_DISABLE_PARAMS_V1, 1) +#define NV_VRS_HELPER_DISABLE_PARAMS_VER NV_VRS_HELPER_DISABLE_PARAMS_VER1 + +typedef struct _NV_VRS_HELPER_GET_SHADING_RATE_RESOURCE_PARAMS_V1 +{ + NvU32 version; //!< (IN) Struct version + IUnknown **ppShadingRateResource; //!< (OUT) Pointer to 2D Texture resource with currently applied shading rate pattern + NV_PIXEL_SHADING_RATE shadingRateTable[NV_MAX_PIXEL_SHADING_RATES]; //!< (OUT) Shading Rate Table filled by the driver +} NV_VRS_HELPER_GET_SHADING_RATE_RESOURCE_PARAMS_V1; + +typedef NV_VRS_HELPER_GET_SHADING_RATE_RESOURCE_PARAMS_V1 NV_VRS_HELPER_GET_SHADING_RATE_RESOURCE_PARAMS; +#define NV_VRS_HELPER_GET_SHADING_RATE_RESOURCE_PARAMS_VER1 MAKE_NVAPI_VERSION(NV_VRS_HELPER_GET_SHADING_RATE_RESOURCE_PARAMS_V1, 1) +#define NV_VRS_HELPER_GET_SHADING_RATE_RESOURCE_PARAMS_VER NV_VRS_HELPER_GET_SHADING_RATE_RESOURCE_PARAMS_VER1 + +typedef struct _NV_VRS_HELPER_PURGE_INTERNAL_RESOURCES_PARAMS_V1 +{ + NvU32 version; //!< (IN) Struct version + NvU32 reserved; //!< (IN) Reserved for future use +} NV_VRS_HELPER_PURGE_INTERNAL_RESOURCES_PARAMS_V1; + +typedef NV_VRS_HELPER_PURGE_INTERNAL_RESOURCES_PARAMS_V1 NV_VRS_HELPER_PURGE_INTERNAL_RESOURCES_PARAMS; +#define NV_VRS_HELPER_PURGE_INTERNAL_RESOURCES_PARAMS_VER1 MAKE_NVAPI_VERSION(NV_VRS_HELPER_PURGE_INTERNAL_RESOURCES_PARAMS_V1, 1) +#define NV_VRS_HELPER_PURGE_INTERNAL_RESOURCES_PARAMS_VER NV_VRS_HELPER_PURGE_INTERNAL_RESOURCES_PARAMS_VER1 + +DECLARE_INTERFACE(ID3DNvVRSHelper_V1) +{ + BEGIN_INTERFACE + + STDMETHOD_(ULONG,AddRef)(THIS) PURE; + STDMETHOD_(ULONG,Release)(THIS) PURE; + + // Latches the latest gaze which will be used for subsequent foveated rendering. Recommended to be called once per frame before scene drawing begins. + STDMETHOD_(NvAPI_Status,LatchGaze)(THIS_ IUnknown* pContext, NV_VRS_HELPER_LATCH_GAZE_PARAMS* pLatchGazeParams) PURE; + + // Enables VRS with sepcified content type and preset. This can be called per draw call. + STDMETHOD_(NvAPI_Status,Enable)(THIS_ IUnknown* pContext, NV_VRS_HELPER_ENABLE_PARAMS* pEnableParams) PURE; + + // Disables VRS till re-enabled. + STDMETHOD_(NvAPI_Status,Disable)(THIS_ IUnknown* pContext, NV_VRS_HELPER_DISABLE_PARAMS* pDisableParams) PURE; + + // Creates a 2D texture, copies the current shading rate pattern on it and returns the pointer to this texture. + // It also returns an array that conveys which value in the shading rate resource corresponds to which exact pixel shading rate. + STDMETHOD_(NvAPI_Status,GetShadingRateResource)(THIS_ IUnknown* pContext, NV_VRS_HELPER_GET_SHADING_RATE_RESOURCE_PARAMS *pGetShadingRateResourceParams) PURE; + + // Destroys all internally created shading rate resources and views. + STDMETHOD_(NvAPI_Status,PurgeInternalShadingRateResources)(THIS_ IUnknown* pContext, NV_VRS_HELPER_PURGE_INTERNAL_RESOURCES_PARAMS* pPurgeParams) PURE; + + END_INTERFACE +}; + +typedef ID3DNvVRSHelper_V1 ID3DNvVRSHelper; +#define ID3DNvVRSHelper_VER1 MAKE_NVAPI_VERSION(ID3DNvVRSHelper_V1, 1) +#define ID3DNvVRSHelper_VER ID3DNvVRSHelper_VER1 + +typedef struct _NV_VRS_HELPER_INIT_PARAMS_V1 +{ + NvU32 version; //!< (IN) Struct version + NvU32 flags; //!< (IN) Reserved for future use + + ID3DNvVRSHelper_V1 **ppVRSHelper; //!< (OUT) Interface for Shading Rate Pattern Tracker +} NV_VRS_HELPER_INIT_PARAMS_V1; + +typedef NV_VRS_HELPER_INIT_PARAMS_V1 NV_VRS_HELPER_INIT_PARAMS; +#define NV_VRS_HELPER_INIT_PARAMS_VER1 MAKE_NVAPI_VERSION(NV_VRS_HELPER_INIT_PARAMS_V1, 1) +#define NV_VRS_HELPER_INIT_PARAMS_VER NV_VRS_HELPER_INIT_PARAMS_VER1 + +//! SUPPORTED OS: Windows 10 and higher +//! +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D_InitializeVRSHelper +// +//! DESCRIPTION: Creates an interface for updating, enabling and disabling internally tracked shading rate pattern for Variable Rate Shading +//! +//! SUPPORTED OS: Windows 7 and higher +//! +//! +//! \since Release: 430 +//! +//! \param [in] pDevice The device to be used for creating the VRS Handler interface +//! \note This should be same the device used for Gaze Handler. See also: NvAPI_D3D_InitializeNvGazeHandler. +//! \param [in] pInitializeVRSHelperParams Descriptor for VRS Helper initialization +//! +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! (none) +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// + +NVAPI_INTERFACE NvAPI_D3D_InitializeVRSHelper(__in IUnknown *pDevice, + __inout NV_VRS_HELPER_INIT_PARAMS *pInitializeVRSHelperParams); + +typedef enum _NV_GAZE_DATA_VALIDITY_FLAGS +{ + NV_GAZE_ORIGIN_VALID = 0x1, + NV_GAZE_DIRECTION_VALID = 0x2, + NV_GAZE_LOCATION_VALID = 0x4, + NV_GAZE_VELOCITY_VALID = 0x8, + NV_GAZE_PUPIL_DIAMETER_VALID = 0x10, + NV_GAZE_EYE_OPENNESS_VALID = 0x20, + NV_GAZE_EYE_SACCADE_DATA_VALID = 0x40 +} NV_GAZE_DATA_VALIDITY_FLAGS; + +typedef struct _NV_FOVEATED_RENDERING_GAZE_DATA_PER_EYE +{ + NvU32 version; //!< (IN) Version of the structure + + NvU32 GazeDataValidityFlags; //!< (IN) To be populated with OR'ing flags from NV_GAZE_DATA_VALIDITY_FLAGS + + float fGazeOrigin_mm[3]; //!< (IN) Use flag NV_GAZE_ORIGIN_VALID. Origin of the eye in millimeters. Used mainly to detect whether Left Eye or Right Eye. + float fGazeDirection[3]; //!< (IN) Use flag NV_GAZE_DIRECTION_VALID. Normalized direction of the gaze of the eye. Used for calculating the gaze location using the FOV. + float fGazeNormalizedLocation[2]; //!< (IN) Use flag NV_GAZE_LOCATION_VALID. Precalculated normalized gaze location in limits (-1 to +1) for X and Y. Center of the screen denotes (0, 0). If this is valid, this will be given higher priority than direction. + float fGazeVelocity[2]; //!< (IN) Use flag NV_GAZE_VELOCITY_VALID. Optional: Velocity of the eye on the normalized space in each direction. Central foveated region would be skewed in the direction of the velocity. + float fPupilDiameter_mm; //!< (IN) Use flag NV_GAZE_PUPIL_DIAMETER_VALID. Unused at the moment. + float fEyeOpenness; //!< (IN) Use flag NV_GAZE_EYE_OPENNESS_VALID. Unused at the moment. + BOOL bInSaccade; //!< (IN) Use flag NV_GAZE_EYE_SACCADE_DATA_VALID. Denotes whether eye is currently in saccade movement or not. +} NV_FOVEATED_RENDERING_GAZE_DATA_PER_EYE_V1; + +typedef NV_FOVEATED_RENDERING_GAZE_DATA_PER_EYE_V1 NV_FOVEATED_RENDERING_GAZE_DATA_PER_EYE; +#define NV_FOVEATED_RENDERING_GAZE_DATA_PER_EYE_VER1 MAKE_NVAPI_VERSION(NV_FOVEATED_RENDERING_GAZE_DATA_PER_EYE_V1, 1) +#define NV_FOVEATED_RENDERING_GAZE_DATA_PER_EYE_VER NV_FOVEATED_RENDERING_GAZE_DATA_PER_EYE_VER1 + +typedef struct _NV_FOVEATED_RENDERING_UPDATE_GAZE_DATA_PARAMS +{ + NvU32 version; //!< (IN) Struct version + NvU32 flags; //!< (IN) Reserved for future use + + NvU64 Timestamp; //!< (IN) Timestamp at which the gaze data has been captured. Should be larger than timestamp provided at previous update. + union + { + NV_FOVEATED_RENDERING_GAZE_DATA_PER_EYE_V1 sMonoData; //!< (IN) Gaze data for Mono rendering mode + struct + { + NV_FOVEATED_RENDERING_GAZE_DATA_PER_EYE_V1 sLeftEye; //!< (IN) Gaze data for Left Eye of Stereo rendering mode + NV_FOVEATED_RENDERING_GAZE_DATA_PER_EYE_V1 sRightEye; //!< (IN) Gaze data for Right Eye of Stereo rendering mode + } sStereoData; + }; +} NV_FOVEATED_RENDERING_UPDATE_GAZE_DATA_PARAMS_V1; + +typedef NV_FOVEATED_RENDERING_UPDATE_GAZE_DATA_PARAMS_V1 NV_FOVEATED_RENDERING_UPDATE_GAZE_DATA_PARAMS; +#define NV_FOVEATED_RENDERING_UPDATE_GAZE_DATA_PARAMS_VER1 MAKE_NVAPI_VERSION(NV_FOVEATED_RENDERING_UPDATE_GAZE_DATA_PARAMS_V1, 1) +#define NV_FOVEATED_RENDERING_UPDATE_GAZE_DATA_PARAMS_VER NV_FOVEATED_RENDERING_UPDATE_GAZE_DATA_PARAMS_VER1 + + +DECLARE_INTERFACE(ID3DNvGazeHandler_V2) +{ + BEGIN_INTERFACE + + STDMETHOD_(ULONG,AddRef)(THIS) PURE; + STDMETHOD_(ULONG,Release)(THIS) PURE; + + // Updates the gaze data for foveated rendering + STDMETHOD_(NvAPI_Status,UpdateGazeData)(THIS_ IUnknown* pContext, NV_FOVEATED_RENDERING_UPDATE_GAZE_DATA_PARAMS* pUpdateGazeDataParams) PURE; + + + END_INTERFACE +}; +typedef ID3DNvGazeHandler_V2 ID3DNvGazeHandler; +#define ID3DNvGazeHandler_VER2 MAKE_NVAPI_VERSION(ID3DNvGazeHandler_V2, 2) +#define ID3DNvGazeHandler_VER ID3DNvGazeHandler_VER2 + +DECLARE_INTERFACE(ID3DNvGazeHandler_V1) +{ + BEGIN_INTERFACE + + STDMETHOD_(ULONG,AddRef)(THIS) PURE; + STDMETHOD_(ULONG,Release)(THIS) PURE; + + // Updates the gaze data for foveated rendering + STDMETHOD_(NvAPI_Status,UpdateGazeData)(THIS_ IUnknown* pContext, NV_FOVEATED_RENDERING_UPDATE_GAZE_DATA_PARAMS* pUpdateGazeDataParams) PURE; + + END_INTERFACE +}; +#define ID3DNvGazeHandler_VER1 MAKE_NVAPI_VERSION(ID3DNvGazeHandler_V1, 1) +#ifndef ID3DNvGazeHandler_VER +typedef ID3DNvGazeHandler_V1 ID3DNvGazeHandler; +#define ID3DNvGazeHandler_VER ID3DNvGazeHandler_VER1 +#endif + +typedef enum _NV_GAZE_DATA_TYPE +{ + NV_GAZE_DATA_INVALID = 0, + NV_GAZE_DATA_MONO = 1, + NV_GAZE_DATA_STEREO = 2, + NV_GAZE_DATA_MAX = NV_GAZE_DATA_STEREO +} NV_GAZE_DATA_TYPE; + +typedef struct _NV_GAZE_HANDLER_INIT_PARAMS_V2 +{ + NvU32 version; //!< (IN) Struct version + + NvU32 GazeDataDeviceId; //!< (IN) ID of the gaze data provider. Needed only for supporting more than one device with eye tracking. + // Should be 0 if gaze data is provided only from a single device. Should be less than (MAX_NUMBER_OF_GAZE_DATA_PROVIDERS - 1) + + NV_GAZE_DATA_TYPE GazeDataType; //!< (IN) Describes whether gaze is Mono or Stereo + NvU32 flags; //!< (IN) Reserved for future use + float fHorizontalFOV; //!< (IN) Horizontal Field of View + float fVericalFOV; //!< (IN) Vertical Field of View + + ID3DNvGazeHandler_V2 **ppNvGazeHandler; //!< (OUT) Interface for Gaze Data Handler +} NV_GAZE_HANDLER_INIT_PARAMS_V2; + +typedef NV_GAZE_HANDLER_INIT_PARAMS_V2 NV_GAZE_HANDLER_INIT_PARAMS; +#define NV_GAZE_HANDLER_INIT_PARAMS_VER2 MAKE_NVAPI_VERSION(NV_GAZE_HANDLER_INIT_PARAMS_V2, 2) +#define NV_GAZE_HANDLER_INIT_PARAMS_VER NV_GAZE_HANDLER_INIT_PARAMS_VER2 + +typedef struct _NV_GAZE_HANDLER_INIT_PARAMS_V1 +{ + NvU32 version; //!< (IN) Struct version + + NvU32 GazeDataDeviceId; //!< (IN) ID of the gaze data provider. Needed only for supporting more than one device with eye tracking. + // Should be 0 if gaze data is provided only from a single device. Should be less than (MAX_NUMBER_OF_GAZE_DATA_PROVIDERS - 1) + + NV_GAZE_DATA_TYPE GazeDataType; //!< (IN) Describes whether gaze is Mono or Stereo + NvU32 flags; //!< (IN) Reserved for future use + float fHorizontalFOV; //!< (IN) Horizontal Field of View + float fVericalFOV; //!< (IN) Vertical Field of View + + ID3DNvGazeHandler_V1 **ppNvGazeHandler; //!< (OUT) Interface for Gaze Data Handler +} NV_GAZE_HANDLER_INIT_PARAMS_V1; + +#define NV_GAZE_HANDLER_INIT_PARAMS_VER1 MAKE_NVAPI_VERSION(NV_GAZE_HANDLER_INIT_PARAMS_V1, 1) +#ifndef NV_GAZE_HANDLER_INIT_PARAMS_VER +typedef NV_GAZE_HANDLER_INIT_PARAMS_V1 NV_GAZE_HANDLER_INIT_PARAMS; +#define NV_GAZE_HANDLER_INIT_PARAMS_VER NV_GAZE_HANDLER_INIT_PARAMS_VER1 +#endif + +#endif // defined(__cplusplus) && (defined(__d3d11_h__)) + +//! SUPPORTED OS: Windows 10 and higher +//! + +#if defined(__cplusplus) && (defined(__d3d11_h__)) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D_InitializeNvGazeHandler +// +//! DESCRIPTION: Creates an interface for updating and managing gaze data +//! +//! SUPPORTED OS: Windows 7 and higher +//! +//! +//! \since Release: 430 +//! +//! \param [in] pDevice The device to be used for creating the Gaze Handler interface +//! \note This should be same as the device used with VRS Handler. See also: NvAPI_D3D_InitializeVRSHelper. +//! \param [in] pInitializeNvGazeHandlerParams Descriptor for Gaze Data Handler initialization +//! +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! (none) +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// + +NVAPI_INTERFACE NvAPI_D3D_InitializeNvGazeHandler(__in IUnknown *pDevice, + __inout NV_GAZE_HANDLER_INIT_PARAMS *pInitializeNvGazeHandlerParams); + +#endif // defined(__cplusplus) && (defined(__d3d11_h__)) + +//! SUPPORTED OS: Windows 10 and higher +//! +#if defined (__cplusplus) && defined(__d3d11_h__) + +//! \ingroup dx + +typedef enum NV_SMP_ASSIST_TYPE +{ + NV_SMP_ASSIST_NONE = 0, + NV_SMP_ASSIST_MRS = 1, // SMP Assist required for Multi-Res Shading + NV_SMP_ASSIST_LMS = 2, // SMP Assist required for Lens Matched Shading + NV_SMP_ASSIST_NUM_TYPES +} NV_SMP_ASSIST_TYPE; + +typedef enum NV_SMP_ASSIST_LEVEL +{ + NV_SMP_ASSIST_LEVEL_FULL = 0, // Full assistance. App selects a pre-baked MRS/LMS config, driver handles correct setting of viewport, scissors and FastGS + NV_SMP_ASSIST_LEVEL_PARTIAL = 1, // Partial assistance. App provides a custom MRS/LMS config, driver handles correct setting of viewport, scissors and FastGS + NV_SMP_ASSIST_LEVEL_MINIMAL = 2, // Minimal assistance. App provides viewports and scissors. App sets FastGS as required. App sets LMS params as required (NvAPI_D3D_SetModifiedWMode). App provides SMPType as NONE. Driver handles correct setting of viewports and scissors. + NV_SMP_ASSIST_NUM_LEVELS +} NV_SMP_ASSIST_LEVEL; + +typedef enum NV_MRS_CONFIG +{ + NV_MRS_CONFIG_BALANCED = 0, + NV_MRS_CONFIG_AGGRESSIVE = 1, + NV_MRS_CONFIG_OCULUSRIFT_CV1_CONSERVATIVE = 2, + NV_MRS_CONFIG_OCULUSRIFT_CV1_BALANCED = 3, + NV_MRS_CONFIG_OCULUSRIFT_CV1_AGGRESSIVE = 4, + NV_MRS_CONFIG_HTC_VIVE_CONSERVATIVE = 5, + NV_MRS_CONFIG_HTC_VIVE_BALANCED = 6, + NV_MRS_CONFIG_HTC_VIVE_AGGRESSIVE = 7, + NV_MRS_NUM_CONFIGS +} NV_MRS_CONFIG; + +typedef enum NV_LMS_CONFIG +{ + NV_LMS_CONFIG_OCULUSRIFT_CV1_CONSERVATIVE = 0, + NV_LMS_CONFIG_OCULUSRIFT_CV1_BALANCED = 1, + NV_LMS_CONFIG_OCULUSRIFT_CV1_AGGRESSIVE = 2, + NV_LMS_CONFIG_HTC_VIVE_CONSERVATIVE = 3, + NV_LMS_CONFIG_HTC_VIVE_BALANCED = 4, + NV_LMS_CONFIG_HTC_VIVE_AGGRESSIVE = 5, + NV_LMS_NUM_CONFIGS +} NV_LMS_CONFIG; + +#define NV_SMP_ASSIST_FLAGS_DEFAULT 0x00000000 + +#define NV_SMP_ASSIST_MAX_VIEWPORTS 16 + +typedef struct _NV_MRS_CUSTOM_CONFIG_V1 +{ + float centerWidth; //!< (IN) Size of the central viewport, ranging (0,1], where 1 is full original viewport size + float centerHeight; + float centerX; //!< (IN) Location of the central viewport, ranging 0..1, where 0.5 is the center of the screen + float centerY; + float densityScaleX[3]; //!< (IN) Pixel density scale factors: how much the linear pixel density is scaled within each row and column (1.0 = full density) + float densityScaleY[3]; +} NV_MRS_CUSTOM_CONFIG_V1; + +typedef NV_MRS_CUSTOM_CONFIG_V1 NV_MRS_CUSTOM_CONFIG; + +typedef struct _NV_MRS_INSTANCED_STEREO_CONFIG_V1 +{ //!< (OUT) MRS Instanced stereo config returned by the SMP Assist GetConstants API + float centerWidth[2]; //!< (OUT) Size of the central viewport, ranging (0,1], where 1 is full original viewport size + float centerHeight; + float centerX[2]; //!< (OUT) Location of the central viewport, ranging 0..1, where 0.5 is the center of the screen + float centerY; + float densityScaleX[5]; //!< (OUT) Pixel density scale factors: how much the linear pixel density is scaled within each row and column (1.0 = full density) + float densityScaleY[3]; +} NV_MRS_INSTANCED_STEREO_CONFIG_V1; + +typedef NV_MRS_INSTANCED_STEREO_CONFIG_V1 NV_MRS_INSTANCED_STEREO_CONFIG; + +typedef struct _NV_LMS_CUSTOM_CONFIG_V1 +{ + float warpLeft; //!< (IN) LMS params to control warping of the 2 left quadrants + float warpRight; //!< (IN) LMS params to control warping of the 2 right quadrants + float warpUp; //!< (IN) LMS params to control warping of the 2 upper quadrants + float warpDown; //!< (IN) LMS params to control warping of the 2 lower quadrants + + float relativeSizeLeft; //!< (IN) LMS params to control the width of the 2 left quandrants relative to the bounding box width + float relativeSizeRight; //!< (IN) LMS params to control the width of the 2 right quandrants relative to the bounding box width + float relativeSizeUp; //!< (IN) LMS params to control the height of the 2 upper quandrants relative to the bounding box height + float relativeSizeDown; //!< (IN) LMS params to control the height of the 2 lower quandrants relative to the bounding box height +} NV_LMS_CUSTOM_CONFIG_V1; + +typedef NV_LMS_CUSTOM_CONFIG_V1 NV_LMS_CUSTOM_CONFIG; + +typedef struct _NV_LMS_INSTANCED_STEREO_CONFIG_V1 +{ //!< (OUT) LMS Instanced stereo config returned by the SMP Assist GetConstants API + NV_LMS_CUSTOM_CONFIG_V1 sLeftConfig; //!< (OUT) LMS config for the Left eye view + NV_LMS_CUSTOM_CONFIG_V1 sRightConfig; //!< (OUT) LMS config for the Right eye view +} NV_LMS_INSTANCED_STEREO_CONFIG_V1; + +typedef NV_LMS_INSTANCED_STEREO_CONFIG_V1 NV_LMS_INSTANCED_STEREO_CONFIG; + +typedef enum _NV_SMP_ASSIST_EYE_INDEX +{ + NV_SMP_ASSIST_EYE_INDEX_MONO = 0, // Non-stereo rendering + NV_SMP_ASSIST_EYE_INDEX_LEFT_EYE = 1, // Stereo - Rendering left eye + NV_SMP_ASSIST_EYE_INDEX_RIGHT_EYE = 2, // Stereo - Rendering right eye + NV_SMP_ASSIST_EYE_INDEX_INSTANCED_STEREO = 3, // Stereo - Rendering both eyes +} NV_SMP_ASSIST_EYE_INDEX; + +#define NV_SMP_ASSIST_MINIMAL_LEVEL_NUM_EYE_INDICES 4 + +typedef struct _NV_CUSTOM_RECTS_V1 +{ + NvU32 numViewports[NV_SMP_ASSIST_MINIMAL_LEVEL_NUM_EYE_INDICES]; + D3D11_VIEWPORT *pViewports[NV_SMP_ASSIST_MINIMAL_LEVEL_NUM_EYE_INDICES]; //!< (IN) Viewports, for each eye index, that should be set when app calls Enable(eyeIndex) + D3D11_RECT *pScissors[NV_SMP_ASSIST_MINIMAL_LEVEL_NUM_EYE_INDICES]; //!< (IN) Scissors, for each eye index, that should be set when app calls Enable(eyeIndex) +} NV_CUSTOM_RECTS_V1; + +typedef NV_CUSTOM_RECTS_V1 NV_CUSTOM_RECTS; + +typedef struct _NV_SMP_ASSIST_ENABLE_PARAMS_V1 +{ + NvU32 version; //!< (IN) Structure version + NV_SMP_ASSIST_EYE_INDEX eEyeIndex; //!< (IN) Rendering mode for upcoming draw calls (Mono/Stereo-Left/Stereo-Right/Instanced Stereo) +} NV_SMP_ASSIST_ENABLE_PARAMS_V1; + +typedef NV_SMP_ASSIST_ENABLE_PARAMS_V1 NV_SMP_ASSIST_ENABLE_PARAMS; +#define NV_SMP_ASSIST_ENABLE_PARAMS_VER1 MAKE_NVAPI_VERSION(NV_SMP_ASSIST_ENABLE_PARAMS_V1, 1) +#define NV_SMP_ASSIST_ENABLE_PARAMS_VER NV_SMP_ASSIST_ENABLE_PARAMS_VER1 + +typedef struct _NV_SMP_ASSIST_DISABLE_PARAMS_V1 +{ + NvU32 version; //!< (IN) Structure version + NvU32 Reserved; //!< (IN) Unused. +} NV_SMP_ASSIST_DISABLE_PARAMS_V1; + +typedef NV_SMP_ASSIST_DISABLE_PARAMS_V1 NV_SMP_ASSIST_DISABLE_PARAMS; +#define NV_SMP_ASSIST_DISABLE_PARAMS_VER1 MAKE_NVAPI_VERSION(NV_SMP_ASSIST_DISABLE_PARAMS_V1, 1) +#define NV_SMP_ASSIST_DISABLE_PARAMS_VER NV_SMP_ASSIST_DISABLE_PARAMS_VER1 + +// FastGS constant buffer data returned by the GetConstants API. +// Refer VRWorks SDK's multiprojection_dx app (struct FastGSCBData) +typedef struct _NV_SMP_ASSIST_FASTGSCBDATA_V1 +{ + float NDCSplitsX[2]; //!< (OUT) FastGS constant buffer data for Multi-Res Shading/ Lens Matched Shading + float NDCSplitsY[2]; +} NV_SMP_ASSIST_FASTGSCBDATA_V1; + +typedef NV_SMP_ASSIST_FASTGSCBDATA_V1 NV_SMP_ASSIST_FASTGSCBDATA; + +typedef struct _NV_SMP_ASSIST_FASTGSCBDATA_MRS_INSTANCED_STEREO_V1 +{ + float NDCSplitsX[4]; //!< (OUT) FastGS constant buffer data for Multi-Res Shading (Instanced stereo). 2 splits for left eye followed by 2 splits for right eye + float NDCSplitsY[2]; +} NV_SMP_ASSIST_FASTGSCBDATA_MRS_INSTANCED_STEREO_V1; + +typedef NV_SMP_ASSIST_FASTGSCBDATA_MRS_INSTANCED_STEREO_V1 NV_SMP_ASSIST_FASTGSCBDATA_MRS_INSTANCED_STEREO; + +// Constant buffer data to supply the UV-remapping helper functions +// Refer VRWorks SDK's multiprojection_dx app (struct RemapCBData) +typedef struct _NV_SMP_ASSIST_REMAPCBDATA_V1 +{ + //!< (OUT) Constant buffer data to supply the UV-remapping helper functions + float ClipToWindowSplitsX[2]; + float ClipToWindowSplitsY[2]; + float ClipToWindowX[3][2]; // ClipToWindowX[i][0] is Scale and ClipToWindowX[i][1] is Bias + float ClipToWindowY[3][2]; // ClipToWindowY[i][0] is Scale and ClipToWindowY[i][1] is Bias + float ClipToWindowZ[2]; // ClipToWindowZ[0] is Scale and ClipToWindowZ[1] is Bias + + float WindowToClipSplitsX[2]; + float WindowToClipSplitsY[2]; + float WindowToClipX[3][2]; // WindowToClipX[i][0] is Scale and WindowToClipX[i][1] is Bias + float WindowToClipY[3][2]; // WindowToClipY[i][0] is Scale and WindowToClipY[i][1] is Bias + float WindowToClipZ[2]; // WindowToClipZ[0] is Scale and WindowToClipZ[1] is Bias + + float BoundingRectOriginX; + float BoundingRectOriginY; + float BoundingRectSizeWidth; + float BoundingRectSizeHeight; + float BoundingRectSizeInvWidth; + float BoundingRectSizeInvHeight; + + float Padding[2]; +}NV_SMP_ASSIST_REMAPCBDATA_V1; + +typedef NV_SMP_ASSIST_REMAPCBDATA_V1 NV_SMP_ASSIST_REMAPCBDATA; + +//! SUPPORTED OS: Windows 10 and higher +//! + +typedef struct _NV_SMP_ASSIST_GET_CONSTANTS_V3 +{ + NvU32 version; //!< (IN) Structure version + NV_SMP_ASSIST_EYE_INDEX eEyeIndex; //!< (IN) Viewports/scissors/constant buffer data corresponding to the input eEyeIndex will be returned + NvU32 numViewports; //!< (OUT) Number of valid viewport entries + D3D11_VIEWPORT *pViewports; //!< (OUT) If not NULL, this will contain the viewports computed by driver based on init params + D3D11_RECT *pScissors; //!< (OUT) If not NULL, this will contain the scissors computed by the driver based on init params + + NV_SMP_ASSIST_TYPE eSMPAssistType; //!< (OUT) SMP type provided in Init call + NV_SMP_ASSIST_LEVEL eSMPAssistLevel; //!< (OUT) SMP Assist level provided in Init call + + union + { + NV_MRS_CUSTOM_CONFIG_V1 sMRSConfig; //!< (OUT) If eSMPAssistType is MRS, then MRS config will be populated + NV_LMS_CUSTOM_CONFIG_V1 sLMSConfig; //!< (OUT) If eSMPAssistType is LMS, then LMS config will be populated + }; + + float projectionSizeWidth; //!< (OUT) MRS/LMS projection size + float projectionSizeHeight; //!< (OUT) + + NV_SMP_ASSIST_FASTGSCBDATA_V1 *pFastGSCBData; //!< (OUT) If not NULL, this will contain constant buffer data to supply the FastGS for culling primitives per-viewport + NV_SMP_ASSIST_REMAPCBDATA_V1 *pRemapCBData; //!< (OUT) If not NULL, this will contain constant buffer data to supply the UV-remapping helper functions + + D3D11_VIEWPORT boundingViewport; //!< (OUT) If eSMPType is MRS or LMS then this will be a union of the individual viewports populated in pViewports + D3D11_RECT boundingScissor; //!< (OUT) If eSMPType is MRS or LMS then this will be a union of the individual scissor rects populated in pScissors + + union + { + NV_MRS_INSTANCED_STEREO_CONFIG_V1 sMRS_ISConfig; //!< (OUT) If eSMPAssistType is MRS and eEyeIndex is NV_SMP_ASSIST_EYE_INDEX_INSTANCED_STEREO then MRS Instanced stereo config will be populated + NV_LMS_INSTANCED_STEREO_CONFIG_V1 sLMS_ISConfig; //!< (OUT) If eSMPAssistType is LMS and eEyeIndex is NV_SMP_ASSIST_EYE_INDEX_INSTANCED_STEREO then LMS Instanced stereo config will be populated + }; + + NV_SMP_ASSIST_FASTGSCBDATA_MRS_INSTANCED_STEREO_V1 *pFastGSCBDataMRS_IS; //!< (OUT) If non-NULL and eSMPAssistType is MRS and eEyeIndex is NV_SMP_ASSIST_EYE_INDEX_INSTANCED_STEREO then MRS Instanced stereo FastGS constant buffer data will be populated +} NV_SMP_ASSIST_GET_CONSTANTS_V3; + +#define NV_SMP_ASSIST_GET_CONSTANTS_VER3 MAKE_NVAPI_VERSION(NV_SMP_ASSIST_GET_CONSTANTS_V3, 3) +typedef NV_SMP_ASSIST_GET_CONSTANTS_V3 NV_SMP_ASSIST_GET_CONSTANTS; +#define NV_SMP_ASSIST_GET_CONSTANTS_VER NV_SMP_ASSIST_GET_CONSTANTS_VER3 + +//! SUPPORTED OS: Windows 10 and higher +//! + +typedef struct _NV_SMP_ASSIST_SETUP_PARAMS_V1 +{ + NvU32 version; //!< (IN) Structure version + union + { + NV_MRS_CONFIG eMRSConfig; //!< (IN) If eSMPAssistType is MRS and SMP Assist Level is Full then provide MRS config enum + NV_LMS_CONFIG eLMSConfig; //!< (IN) If eSMPAssistType is LMS and SMP Assist Level is Full then provide LMS config enum + NV_MRS_CUSTOM_CONFIG_V1 sMRSCustomConfig; //!< (IN) If eSMPAssistType is MRS and SMP Assist Level is Partial, then provide MRS config + NV_LMS_CUSTOM_CONFIG_V1 sLMSCustomConfig; //!< (IN) If eSMPAssistType is LMS and SMP Assist Level is Partial, then provide LMS config + NV_CUSTOM_RECTS_V1 sCustomRects; //!< (IN) If SMP Assist Level is Minimal, provide custom viewports and scissor rects for each eye index. + }; + + float resolutionScale; //!< (IN) A resolution multiplier in the range [0.1, 3.0] if app wants to render at higher resolution + D3D11_VIEWPORT boundingBox; //!< (IN) Rect on the rendertarget, to place the projection + float vpOffsets[2]; //!< (IN) Default set to 0. If non-zero, MRS/LMS viewports' TopLeftX and TopLeftY will be + //!< offset by vpOffsets[0] and vpOffsets[1] respectively. +} NV_SMP_ASSIST_SETUP_PARAMS_V1; + +typedef NV_SMP_ASSIST_SETUP_PARAMS_V1 NV_SMP_ASSIST_SETUP_PARAMS; +#define NV_SMP_ASSIST_SETUP_PARAMS_VER1 MAKE_NVAPI_VERSION(NV_SMP_ASSIST_SETUP_PARAMS_V1, 1) +#define NV_SMP_ASSIST_SETUP_PARAMS_VER NV_SMP_ASSIST_SETUP_PARAMS_VER1 + +typedef struct _NV_SMP_ASSIST_UPDATE_INSTANCEDSTEREO_DATA_PARAMS_V1 +{ + NvU32 version; //!< (IN) Structure version + NV_SMP_ASSIST_TYPE eSMPAssistType; // Patch instanced stereo shaders (created with packed eye index) with this SMPAssistType (NV_SMP_ASSIST_LMS only) + float leftCoeffs[4]; // Left eye: outpos.x = dotproduct(outputpos, leftCoeffs) + leftConst + float leftConst; + float rightCoeffs[4]; // Right eye: outpos.x = dotproduct(outputpos, rightCoeffs) + rightConst + float rightConst; +} NV_SMP_ASSIST_UPDATE_INSTANCEDSTEREO_DATA_PARAMS_V1; + +typedef NV_SMP_ASSIST_UPDATE_INSTANCEDSTEREO_DATA_PARAMS_V1 NV_SMP_ASSIST_UPDATE_INSTANCEDSTEREO_DATA_PARAMS; +#define NV_SMP_ASSIST_UPDATE_INSTANCEDSTEREO_DATA_PARAMS_VER1 MAKE_NVAPI_VERSION(NV_SMP_ASSIST_UPDATE_INSTANCEDSTEREO_DATA_PARAMS_V1, 1) +#define NV_SMP_ASSIST_UPDATE_INSTANCEDSTEREO_DATA_PARAMS_VER NV_SMP_ASSIST_UPDATE_INSTANCEDSTEREO_DATA_PARAMS_VER1 + +//! SUPPORTED OS: Windows 10 and higher +//! + +DECLARE_INTERFACE(ID3DNvSMPAssist_V1) +{ +////////////////////////////// VER1 methods ////////////////////////////////////////// + // Disable SMP Assist for further Draw calls + STDMETHOD_(NvAPI_Status,Disable)(THIS_ __in IUnknown *pDevContext, __in const NV_SMP_ASSIST_DISABLE_PARAMS *psSMPAssistDisableParams) PURE; + + // Enable SMP Assist for further Draw calls. App has to provide the type of rendering done in upcoming Draw calls - Mono/Left eye/Right eye/Instanced Stereo + STDMETHOD_(NvAPI_Status,Enable)(THIS_ __in IUnknown *pDevContext, __in const NV_SMP_ASSIST_ENABLE_PARAMS *psSMPAssistEnableParams) PURE; + + // Get the constants used by the drivers + STDMETHOD_(NvAPI_Status,GetConstants)(THIS_ __inout NV_SMP_ASSIST_GET_CONSTANTS *psSMPAssistGetConstants) PURE; + + // Setup the projections (rects, constant buffer data etc.) + STDMETHOD_(NvAPI_Status,SetupProjections)(THIS_ __in IUnknown *pDevice, __in const NV_SMP_ASSIST_SETUP_PARAMS *psSMPAssistSetupParams) PURE; + + // Update instanced stereo specific data + STDMETHOD_(NvAPI_Status,UpdateInstancedStereoData)(THIS_ __in IUnknown *pDevice, __in const NV_SMP_ASSIST_UPDATE_INSTANCEDSTEREO_DATA_PARAMS *psSMPAssistInstancedStereoParams) PURE; +////////////////////////////// end of VER1 methods ////////////////////////////////////////// +}; + +typedef ID3DNvSMPAssist_V1 ID3DNvSMPAssist; +#define ID3DNVSMPASSIST_VER1 MAKE_NVAPI_VERSION(ID3DNvSMPAssist_V1, 1) +#define ID3DNVSMPASSIST_VER ID3DNVSMPASSIST_VER1 + +//! SUPPORTED OS: Windows 10 and higher +//! + +typedef struct _NV_SMP_ASSIST_INITIALIZE_PARAMS_V1 +{ + NvU32 version; //!< (IN) Structure version + NV_SMP_ASSIST_TYPE eSMPAssistType; //!< (IN) None/MRS/LMS + NV_SMP_ASSIST_LEVEL eSMPAssistLevel; //!< (IN) Full/Partial/Minimal + NvU32 flags; //!< (IN) Flags, if any + + ID3DNvSMPAssist **ppD3DNvSMPAssist; //!< (OUT) Interface pointer returned by the Init call. Use for future Enable/Disable etc. calls + +} NV_SMP_ASSIST_INITIALIZE_PARAMS_V1; + +#define NV_SMP_ASSIST_INITIALIZE_PARAMS_VER1 MAKE_NVAPI_VERSION(NV_SMP_ASSIST_INITIALIZE_PARAMS_V1, 1) +#ifndef NV_SMP_ASSIST_INITIALIZE_PARAMS_VER +typedef NV_SMP_ASSIST_INITIALIZE_PARAMS_V1 NV_SMP_ASSIST_INITIALIZE_PARAMS; +#define NV_SMP_ASSIST_INITIALIZE_PARAMS_VER NV_SMP_ASSIST_INITIALIZE_PARAMS_VER1 +#endif + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D_InitializeSMPAssist +// +//! DESCRIPTION: Initialize SMP Assist extension. Take the SMPAssist(MRS/LMS) params from the application. +//! Provide the application with a interface pointer for future use. +//! +//! \since Release: 396 +//! \param [in] pDevice Pointer to IUnknown (Currently supports ID3D11Device) +//! \param [inout] pSMPAssistInitParams SMP Assist initialization params +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! \retval ::NVAPI_OK Call succeeded. +//! \retval ::NVAPI_ERROR Call failed. +//! \retval ::NVAPI_INVALID_ARGUMENT One of the required input arguments was NULL +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D_InitializeSMPAssist(__in IUnknown *pDevice, __inout NV_SMP_ASSIST_INITIALIZE_PARAMS *pSMPAssistInitParams); + +#endif //defined(__cplusplus) && defined(__d3d11_h__) + +//! SUPPORTED OS: Windows 10 and higher +//! +#if defined (__cplusplus) && defined(__d3d11_h__) +//! \ingroup dx +typedef struct _NV_QUERY_SMP_ASSIST_SUPPORT_PARAMS_V1 +{ + NvU32 version; //!< (IN) Structure version + NV_SMP_ASSIST_TYPE eSMPAssistType; //!< (IN) None/MRS/LMS + NV_SMP_ASSIST_LEVEL eSMPAssistLevel; //!< (IN) Full/Partial/Minimal + NvBool bSMPAssistSupported; //!< (OUT) SMP Assist supported or not +} NV_QUERY_SMP_ASSIST_SUPPORT_PARAMS_V1; + +typedef NV_QUERY_SMP_ASSIST_SUPPORT_PARAMS_V1 NV_QUERY_SMP_ASSIST_SUPPORT_PARAMS; +#define NV_QUERY_SMP_ASSIST_SUPPORT_PARAMS_VER1 MAKE_NVAPI_VERSION(NV_QUERY_SMP_ASSIST_SUPPORT_PARAMS_V1, 1) +#define NV_QUERY_SMP_ASSIST_SUPPORT_PARAMS_VER NV_QUERY_SMP_ASSIST_SUPPORT_PARAMS_VER1 + +//! SUPPORTED OS: Windows 10 and higher +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D_QuerySMPAssistSupport +// +//! DESCRIPTION: Query SMP assist extension support. +//! +//! \since Release: 396 +//! \param [in] pDev Pointer to IUnknown (Currently supports ID3D11Device) +//! \param [out] pQuerySMPAssistSupportParams Pointer to a structure returning requested SMP assist support +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! \retval ::NVAPI_NOT_SUPPORTED Requested SMP assist is not supported. +//! \retval ::NVAPI_OK Call succeeded. Check value of pQuerySMPAssistSupportParams->bSMPAssistSupported +//! \retval ::NVAPI_INVALID_POINTER pDev or pQuerySMPAssistSupportParams was a NULL pointer +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D_QuerySMPAssistSupport(__in IUnknown *pDev, __inout NV_QUERY_SMP_ASSIST_SUPPORT_PARAMS *pQuerySMPAssistSupportParams); + +#endif //defined(__cplusplus) && defined(__d3d11_h__) + + + +//! SUPPORTED OS: Windows 10 and higher +//! +//! Used to get sleep status +//! \ingroup dx +typedef struct _NV_GET_SLEEP_STATUS_PARAMS +{ + NvU32 version; //!< (IN) Structure version + NvBool bLowLatencyMode; //!< (OUT) Is low latency mode enabled? + NvBool bFsVrr; //!< (OUT) Is fullscreen VRR enabled? + NvBool bCplVsyncOn; //!< (OUT) Is Control Panel overriding VSYNC ON? + NvU32 sleepIntervalUs; //!< (OUT) Latest sleep interval in microseconds. + NvBool bUseGameSleep; //!< (OUT) Is NvAPI_D3D_Sleep() being called? + NvU8 rsvd[121]; //!< (IN) Reserved. Must be set to 0s. +} NV_GET_SLEEP_STATUS_PARAMS_V1; + +typedef NV_GET_SLEEP_STATUS_PARAMS_V1 NV_GET_SLEEP_STATUS_PARAMS; +#define NV_GET_SLEEP_STATUS_PARAMS_VER1 MAKE_NVAPI_VERSION(NV_GET_SLEEP_STATUS_PARAMS_V1, 1) +#define NV_GET_SLEEP_STATUS_PARAMS_VER NV_GET_SLEEP_STATUS_PARAMS_VER1 + +#if defined(__cplusplus) && (defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) || defined(__d3d12_h__)) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D_GetSleepStatus +// +//! DESCRIPTION: This function can be used to get the latest sleep status: +//! bLowLatencyMode indicates whether low latency mode is currently +//! enabled in the driver. +//! Note that it may not always reflect the previously requested sleep mode, +//! as the feature may not be available on the platform, or the setting has +//! been overridden by the control panel, for example. +//! bFsVrr indicates fullscreen GSYNC or GSYNC Compatible mode. It is valid +//! only when the application is in the foreground. +//! bCplVsyncOn indicates Control Panel VSYNC ON override. +//! +//! \since Release: 455 +//! \param [in] pDev The target device. +//! \param [in] pGetSleepStatusParams Sleep status params. +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D_GetSleepStatus(__in IUnknown *pDev, __in NV_GET_SLEEP_STATUS_PARAMS *pGetSleepStatusParams); +#endif //defined(__cplusplus) && (defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) || defined(__d3d12_h__)) + +//! SUPPORTED OS: Windows 10 and higher +//! +//! Used to set sleep mode +//! \ingroup dx +typedef struct _NV_SET_SLEEP_MODE_PARAMS +{ + NvU32 version; //!< (IN) Structure version + NvBool bLowLatencyMode; //!< (IN) Low latency mode enable/disable. + NvBool bLowLatencyBoost; //!< (IN) Request maximum GPU clock frequency regardless of workload. + NvU32 minimumIntervalUs; //!< (IN) Minimum frame interval in microseconds. 0 = no frame rate limit. + NvBool bUseMarkersToOptimize; //!< (IN) Allow latency markers to be used for runtime optimizations. + NvU8 rsvd[31]; //!< (IN) Reserved. Must be set to 0s. +} NV_SET_SLEEP_MODE_PARAMS_V1; + +typedef NV_SET_SLEEP_MODE_PARAMS_V1 NV_SET_SLEEP_MODE_PARAMS; +#define NV_SET_SLEEP_MODE_PARAMS_VER1 MAKE_NVAPI_VERSION(NV_SET_SLEEP_MODE_PARAMS_V1, 1) +#define NV_SET_SLEEP_MODE_PARAMS_VER NV_SET_SLEEP_MODE_PARAMS_VER1 + +#if defined(__cplusplus) && (defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) || defined(__d3d12_h__)) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D_SetSleepMode +// +//! DESCRIPTION: This function can be used to update sleep mode dynamically. +//! The settings are not dependent to each other, meaning low latency mode +//! can be enabled/disabled regardless of whether minimum interval is set or +//! not. The former is to intelligently lower latency without impacting frame +//! rate. The later is to limit frame rate (e.g. minimumIntervalUs = 10000 +//! limits frame rate to 100 FPS). They work well separately and/or together. +//! Note that minimumIntervalUs usage is not limited to lowering latency, so +//! feel free to use it to limit frame rate for menu, cut scenes, etc. +//! Note that low latency mode can be enabled, and/or minimum interval can +//! be set, even without using NvAPI_D3D_Sleep(). However, without it, the +//! sleep to achieve these features would happen at a less optimal point, +//! resulting in higher overall latency. +//! The bLowLatencyBoost will request the GPU run at max clocks even in +//! scenarios where it is idle most of the frame and would normally try +//! to save power. This can decrease latency in CPU-limited scenarios. +//! While this function can be called as often as needed, it is not +//! necessary nor recommended to call this too frequently (e.g. every frame), +//! as the settings persist for the target device. +//! +//! \since Release: 455 +//! \param [in] pDev The target device. +//! \param [in] pSetSleepModeParams Sleep mode params. +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D_SetSleepMode(__in IUnknown *pDev, __in NV_SET_SLEEP_MODE_PARAMS *pSetSleepModeParams); +#endif //defined(__cplusplus) && (defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) || defined(__d3d12_h__)) + +#if defined(__cplusplus) && (defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) || defined(__d3d12_h__)) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D_Sleep +// +//! DESCRIPTION: It is recommended to call this function at the very start of +//! each frame (e.g. before input sampling). If there is a need to sleep, +//! due to frame rate limit and/or low latency features, for example, +//! this call provides an entry point for the driver to sleep at the most +//! optimal spot to achieve the lowest latency. +//! It is recommended to call this function even when low latency mode is +//! disabled and minimum interval is 0. Other features, such as Maximum Frame +//! Rate setting, could be enabled in the control panel to benefit from this. +//! It is OK to start (or stop) using this function at any time. However, +//! when using this function, it must be called exactly once on each frame. +//! If this function is not called, after several frames, the driver would +//! fallback to sleep at its less optimal spot. +//! +//! \since Release: 455 +//! \param [in] pDev The target device. +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D_Sleep(__in IUnknown *pDev); +#endif //defined(__cplusplus) && (defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) || defined(__d3d12_h__)) + +//! SUPPORTED OS: Windows 10 and higher +//! +//! Used to send Reflex Sync data to UMD +//! \ingroup dx +typedef struct _NV_SET_REFLEX_SYNC_PARAMS +{ + NvU32 version; //!< (IN) Structure version + NvU32 bEnable:1; //!< (IN) Enable Reflex Sync + NvU32 bDisable:1; //!< (IN) Disable Reflex Sync + NvU32 flagsRsvd:30; //!< (IN) Reserved flag bits. Must be set to 0s. + NvU32 vblankIntervalUs; //!< (IN) Interval between VBLANKs in microseconds. (0 means N/A) + NvS32 timeInQueueUs; //!< (IN) Amount of time in the completed frame queue. Can be negative. (0 means N/A) + NvU32 timeInQueueUsTarget; //!< (IN) Target amount of time in the completed frame queue. (0 means N/A) + NvU8 rsvd[28]; //!< (IN) Reserved. Must be set to 0s. +} NV_SET_REFLEX_SYNC_PARAMS_V1; + +typedef NV_SET_REFLEX_SYNC_PARAMS_V1 NV_SET_REFLEX_SYNC_PARAMS; +#define NV_SET_REFLEX_SYNC_PARAMS_VER1 MAKE_NVAPI_VERSION(NV_SET_REFLEX_SYNC_PARAMS_V1, 1) +#define NV_SET_REFLEX_SYNC_PARAMS_VER NV_SET_REFLEX_SYNC_PARAMS_VER1 + +#if defined(__cplusplus) && (defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) || defined(__d3d12_h__)) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D_SetReflexSync +// +//! DESCRIPTION: This function can be used to enable/disable Reflex Sync, +//! and to pass in essential data for the Reflex Sync operation. +//! +//! \since Release: 530 +//! \param [in] pDev The target device. +//! \param [in] pSetReflexSyncParams Reflex Sync params. +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D_SetReflexSync(__in IUnknown *pDev, __in NV_SET_REFLEX_SYNC_PARAMS *pSetReflexSyncParams); +#endif //defined(__cplusplus) && (defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) || defined(__d3d12_h__)) + +//! SUPPORTED OS: Windows 10 and higher +//! +//! Used to get latency report. +//! \ingroup dx + +typedef struct _NV_LATENCY_RESULT_PARAMS +{ + NvU32 version; //!< (IN) Structure version + struct FrameReport { + NvU64 frameID; + NvU64 inputSampleTime; + NvU64 simStartTime; + NvU64 simEndTime; + NvU64 renderSubmitStartTime; + NvU64 renderSubmitEndTime; + NvU64 presentStartTime; + NvU64 presentEndTime; + NvU64 driverStartTime; + NvU64 driverEndTime; + NvU64 osRenderQueueStartTime; + NvU64 osRenderQueueEndTime; + NvU64 gpuRenderStartTime; + NvU64 gpuRenderEndTime; + NvU32 gpuActiveRenderTimeUs; //!< (OUT) Difference between gpuRenderStartTime and gpuRenderEndTime, excluding the idles in between, in microseconds. + NvU32 gpuFrameTimeUs; //!< (OUT) Difference between previous and current frame's gpuRenderEndTime, in microseconds. + NvU8 rsvd[120]; + } frameReport[64]; + NvU8 rsvd[32]; +} NV_LATENCY_RESULT_PARAMS_V1; + +typedef NV_LATENCY_RESULT_PARAMS_V1 NV_LATENCY_RESULT_PARAMS; +#define NV_LATENCY_RESULT_PARAMS_VER1 MAKE_NVAPI_VERSION(NV_LATENCY_RESULT_PARAMS_V1, 1) +#define NV_LATENCY_RESULT_PARAMS_VER NV_LATENCY_RESULT_PARAMS_VER1 + +#if defined(__cplusplus) && (defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) || defined(__d3d12_h__)) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D_GetLatency +// +//! DESCRIPTION: Get a latency report including the timestamps of the +//! application latency markers set with NvAPI_D3D_SetLatencyMarker as well +//! as driver, OS queue and graphics hardware times. Requires calling +//! NvAPI_D3D_SetLatencyMarker with incrementing frameID for valid results. +//! Rendering for at least 90 frames is recommended to properly fill out the +//! structure. The newest completed frame is at the end (element 63) and +//! is preceeded by older frames. If not enough frames are valid then all +//! frames are returned with all zeroes. +//! +//! \since Release: 455 +//! \param [in] pDev The target device +//! \param [in] pGetLatencyParams The latency result structure. +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D_GetLatency(__in IUnknown *pDev, __out NV_LATENCY_RESULT_PARAMS *pGetLatencyParams); +#endif //defined(__cplusplus) && (defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) || defined(__d3d12_h__)) + +//! SUPPORTED OS: Windows 10 and higher +//! +//! Used define latency marker type +//! \ingroup dx +typedef enum +{ + SIMULATION_START = 0, + SIMULATION_END = 1, + RENDERSUBMIT_START = 2, + RENDERSUBMIT_END = 3, + PRESENT_START = 4, + PRESENT_END = 5, + INPUT_SAMPLE = 6, + TRIGGER_FLASH = 7, + PC_LATENCY_PING = 8, + OUT_OF_BAND_RENDERSUBMIT_START = 9, + OUT_OF_BAND_RENDERSUBMIT_END = 10, + OUT_OF_BAND_PRESENT_START = 11, + OUT_OF_BAND_PRESENT_END = 12, +} NV_LATENCY_MARKER_TYPE; + +//! SUPPORTED OS: Windows 10 and higher +//! +//! Used set latency markers +//! \ingroup dx +typedef struct _NV_LATENCY_MARKER_PARAMS +{ + NvU32 version; //!< (IN) Structure version + NvU64 frameID; + NV_LATENCY_MARKER_TYPE markerType; + NvU64 rsvd0; + NvU8 rsvd[56]; +} NV_LATENCY_MARKER_PARAMS_V1; + +typedef NV_LATENCY_MARKER_PARAMS_V1 NV_LATENCY_MARKER_PARAMS; +#define NV_LATENCY_MARKER_PARAMS_VER1 MAKE_NVAPI_VERSION(NV_LATENCY_MARKER_PARAMS_V1, 1) +#define NV_LATENCY_MARKER_PARAMS_VER NV_LATENCY_MARKER_PARAMS_VER1 + +#if defined(__cplusplus) && (defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) || defined(__d3d12_h__)) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D_SetLatencyMarker +// +//! DESCRIPTION: Set a latency marker to be tracked by the +//! NvAPI_D3D_GetLatency function. SIMULATION_START must be the first marker +//! sent in a frame, after the previous frame's Sleep call (if used). +//! INPUT_SAMPLE may be sent to record the moment user input was sampled and +//! should come between SIMULATION_START and SIMULATION_END. +//! RENDERSUBMIT_START should come before any Direct3D calls are made for +//! the given frame and RENDERSUBMIT_END should come before calling Present. +//! PRESENT_START and END should wrap the Present call to inform the driver +//! of a present block done by the OS before the driver receives the Present. +//! TRIGGER_FLASH tells the driver to render its flash indicator for latency +//! testing, typically driven by a mouse click. +//! The frameID can start at an abitrary moment in the application lifetime +//! but must strictly increment from that point forward for consistent results. +//! +//! \since Release: 455 +//! \param [in] pDev Pointer to the target device +//! \param [in] pSetLatencyMarkerParams Pointer to the latency marker structure +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D_SetLatencyMarker(__in IUnknown *pDev, __in NV_LATENCY_MARKER_PARAMS* pSetLatencyMarkerParams); +#endif //defined(__cplusplus) && (defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) || defined(__d3d12_h__)) + +//! SUPPORTED OS: Windows 10 and higher +//! +//! Used in NvAPI_D3D12_SetAsyncFrameMarker and NvAPI_D3D11_SetAsyncFrameMarker +//! \ingroup dx +typedef struct _NV_ASYNC_FRAME_MARKER_PARAMS_V1 +{ + NvU32 version; //!< (IN) Structure version + NvU64 frameID; + NV_LATENCY_MARKER_TYPE markerType; + NvU64 presentFrameID; + NvBool vendorInternal; + NvU8 rsvd[55]; +} NV_ASYNC_FRAME_MARKER_PARAMS_V1; + +typedef NV_ASYNC_FRAME_MARKER_PARAMS_V1 NV_ASYNC_FRAME_MARKER_PARAMS; +#define NV_ASYNC_FRAME_MARKER_PARAMS_VER1 MAKE_NVAPI_VERSION(NV_ASYNC_FRAME_MARKER_PARAMS_V1, 1) +#define NV_ASYNC_FRAME_MARKER_PARAMS_VER NV_ASYNC_FRAME_MARKER_PARAMS_VER1 + +#if defined(__cplusplus) && (defined(__d3d12_h__)) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_SetAsyncFrameMarker +// +//! DESCRIPTION: Set an async frame marker for present and out-of-band render tracking. +//! +//! \since Release: 520 +//! \param [in] pCommandQueue Pointer to the D3D12CommandQueue +//! \param [in] pSetAsyncFrameMarkerParams Pointer to the async frame marker structure +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_SetAsyncFrameMarker(__in ID3D12CommandQueue *pCommandQueue, __in NV_ASYNC_FRAME_MARKER_PARAMS* pSetAsyncFrameMarkerParams); +#endif //defined(__cplusplus) && (defined(__d3d12_h__)) + +#if defined(__cplusplus) && (defined(__d3d11_h__)) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_SetAsyncFrameMarker +// +//! DESCRIPTION: Set an async frame marker for present and out-of-band render tracking. +//! +//! \since Release: 565 +//! \param [in] pDevice Pointer to the target device +//! \param [in] pSetAsyncFrameMarkerParams Pointer to the async frame marker structure +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D11_SetAsyncFrameMarker(__in IUnknown *pDevice, __in NV_ASYNC_FRAME_MARKER_PARAMS* pSetAsyncFrameMarkerParams); +#endif //defined(__cplusplus) && (defined(__d3d11_h__)) + +//! SUPPORTED OS: Windows 10 and higher +//! +//! Used in NvAPI_D3D12_NotifyOutOfBandCommandQueue +//! \ingroup dx +typedef enum +{ + OUT_OF_BAND_RENDER = 0, + OUT_OF_BAND_PRESENT = 1, + OUT_OF_BAND_IGNORE = 2, + OUT_OF_BAND_RENDER_PRESENT = 3, +} NV_OUT_OF_BAND_CQ_TYPE; + +#if defined(__cplusplus) && defined(__d3d12_h__) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_NotifyOutOfBandCommandQueue +// +//! DESCRIPTION: Notifies the driver that this command queue runs out of band +//! from the application's frame cadence. +//! +//! \since Release: 520 +//! \param [in] pCommandQueue The D3D12CommandQueue +//! \param [in] cqType The type of out of band command queue +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_NotifyOutOfBandCommandQueue(__in ID3D12CommandQueue *pCommandQueue, __in NV_OUT_OF_BAND_CQ_TYPE cqType); +#endif //defined(__cplusplus) && defined(__d3d12_h__)) + + +#if defined(__cplusplus) && defined(__d3d12_h__) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_SetCreateCommandQueueLowLatencyHint +// +//! DESCRIPTION: Reserved call. +//! +//! \since Release: 530 +//! \param [in] pDevice The creating device +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_SetCreateCommandQueueLowLatencyHint(__in ID3D12Device *pDevice); +#endif //defined(__cplusplus) && defined(__d3d12_h__)) + +#if defined (__cplusplus) && defined(__d3d12_h__) + +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! +typedef struct NVAPI_D3D12_CREATE_CUBIN_SHADER_PARAMS +{ + size_t structSizeIn; + size_t structSizeOut; + + ID3D12Device* pDevice; + const void* pCubin; + NvU32 size; + NvU32 blockX; + NvU32 blockY; + NvU32 blockZ; + NvU32 dynSharedMemBytes; + const char* pShaderName; + NvU32 flags; + NVDX_ObjectHandle hShader; +} NVAPI_D3D12_CREATE_CUBIN_SHADER_PARAMS; + +NVAPI_INTERFACE NvAPI_D3D12_CreateCubinComputeShaderExV2(__inout NVAPI_D3D12_CREATE_CUBIN_SHADER_PARAMS* pParams); + +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! +NVAPI_INTERFACE NvAPI_D3D12_CreateCubinComputeShader(__in ID3D12Device* pDevice, + __in const void* pCubin, + __in NvU32 size, + __in NvU32 blockX, + __in NvU32 blockY, + __in NvU32 blockZ, + __out NVDX_ObjectHandle* phShader); + +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! +NVAPI_INTERFACE NvAPI_D3D12_CreateCubinComputeShaderEx(__in ID3D12Device* pDevice, + __in const void* pCubin, + __in NvU32 size, + __in NvU32 blockX, + __in NvU32 blockY, + __in NvU32 blockZ, + __in NvU32 dynSharedMemBytes, + __in const char* pShaderName, + __out NVDX_ObjectHandle* phShader); + +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! +NVAPI_INTERFACE NvAPI_D3D12_CreateCubinComputeShaderWithName(__in ID3D12Device* pDevice, + __in const void* pCubin, + __in NvU32 size, + __in NvU32 blockX, + __in NvU32 blockY, + __in NvU32 blockZ, + __in const char* pShaderName, + __out NVDX_ObjectHandle* phShader); + +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! +NVAPI_INTERFACE NvAPI_D3D12_LaunchCubinShader(__in ID3D12GraphicsCommandList* pCommandList, + __in NVDX_ObjectHandle hShader, + __in NvU32 gridX, + __in NvU32 gridY, + __in NvU32 gridZ, + __in const void* pParams, + __in NvU32 paramSize); + +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! +NVAPI_INTERFACE NvAPI_D3D12_DestroyCubinComputeShader(__in ID3D12Device* pDevice, + __in NVDX_ObjectHandle hShader); + +#endif //defined(__cplusplus) && defined(__d3d12_h__) + +#if defined(__d3d12_h__) +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! + +typedef struct NVAPI_D3D12_GET_CUDA_MERGED_TEXTURE_SAMPLER_OBJECT_PARAMS +{ + size_t structSizeIn; + size_t structSizeOut; + + ID3D12Device* pDevice; + D3D12_CPU_DESCRIPTOR_HANDLE texDesc; + D3D12_CPU_DESCRIPTOR_HANDLE smpDesc; + NvU64 textureHandle; +} NVAPI_D3D12_GET_CUDA_MERGED_TEXTURE_SAMPLER_OBJECT_PARAMS; + +NVAPI_INTERFACE NvAPI_D3D12_GetCudaMergedTextureSamplerObject(__inout NVAPI_D3D12_GET_CUDA_MERGED_TEXTURE_SAMPLER_OBJECT_PARAMS* pParams); + +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! +typedef enum _NVAPI_D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_TYPE +{ + NVAPI_D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_SURFACE = 0, + NVAPI_D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_TEXTURE = 1, + NVAPI_D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_SAMPLER = 2, +} NVAPI_D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_TYPE; + +typedef struct NVAPI_D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_PARAMS +{ + size_t structSizeIn; + size_t structSizeOut; + + ID3D12Device* pDevice; + NVAPI_D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_TYPE type; + D3D12_CPU_DESCRIPTOR_HANDLE desc; + NvU64 handle; +} NVAPI_D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_PARAMS; + +NVAPI_INTERFACE NvAPI_D3D12_GetCudaIndependentDescriptorObject(__inout NVAPI_D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_PARAMS* pParams); + +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! +NVAPI_INTERFACE NvAPI_D3D12_GetCudaTextureObject(__in ID3D12Device* pDevice, + __in D3D12_CPU_DESCRIPTOR_HANDLE texDesc, + __in D3D12_CPU_DESCRIPTOR_HANDLE smpDesc, + __out NvU32* pTextureHandle); + +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! +NVAPI_INTERFACE NvAPI_D3D12_GetCudaSurfaceObject(__in ID3D12Device* pDevice, + __in D3D12_CPU_DESCRIPTOR_HANDLE uavDesc, + __out NvU32* pSurfaceHandle); + +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! +NVAPI_INTERFACE NvAPI_D3D12_IsFatbinPTXSupported(__in ID3D12Device *pDevice, + __out bool *pSupported); + +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! +NVAPI_INTERFACE NvAPI_D3D12_CreateCuModule(__in ID3D12Device* pDevice, + __in const void* pBlob, + __in NvU32 size, + __out NVDX_ObjectHandle* phModule); + +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! +NVAPI_INTERFACE NvAPI_D3D12_EnumFunctionsInModule(__in ID3D12Device* pDevice, + __in NVDX_ObjectHandle hModule, + __inout NvU32* pArraySize, + __out const char** const pFunctionNames); + +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! +NVAPI_INTERFACE NvAPI_D3D12_CreateCuFunction(__in ID3D12Device* pDevice, + __in NVDX_ObjectHandle hModule, + __in const char* pName, + __out NVDX_ObjectHandle* phFunction); + +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! + +typedef struct _NVAPI_DIM3 +{ + NvU32 x; + NvU32 y; + NvU32 z; +} NVAPI_DIM3; + +typedef struct _NVAPI_CU_KERNEL_LAUNCH_PARAMS +{ + NVDX_ObjectHandle hFunction; + NVAPI_DIM3 gridDim; + NVAPI_DIM3 blockDim; + NvU32 dynSharedMemBytes; + void const * pParams; + NvU32 paramSize; +} NVAPI_CU_KERNEL_LAUNCH_PARAMS; + +NVAPI_INTERFACE NvAPI_D3D12_LaunchCuKernelChain(__in ID3D12GraphicsCommandList* pCommandList, + __in const NVAPI_CU_KERNEL_LAUNCH_PARAMS* pKernels, + __in NvU32 numKernels); + +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! + +typedef struct _NVAPI_CU_KERNEL_LAUNCH_PARAMS_EX +{ + NVDX_ObjectHandle hFunction; + NVAPI_DIM3 gridDim; + NVAPI_DIM3 blockDim; + NvU32 dynSharedMemBytes; + + // either pParams/paramsSize is used or kernelParams is used + void const * pParams; + NvU32 paramSize; + void **kernelParams; +} NVAPI_CU_KERNEL_LAUNCH_PARAMS_EX; + +NVAPI_INTERFACE NvAPI_D3D12_LaunchCuKernelChainEx(__in ID3D12GraphicsCommandList* pCommandList, + __in const NVAPI_CU_KERNEL_LAUNCH_PARAMS_EX* pKernels, + __in NvU32 numKernels); + + +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! +NVAPI_INTERFACE NvAPI_D3D12_DestroyCuModule(__in ID3D12Device* pDevice, + __in NVDX_ObjectHandle hModule); + +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! +NVAPI_INTERFACE NvAPI_D3D12_DestroyCuFunction(__in ID3D12Device* pDevice, + __in NVDX_ObjectHandle hFunction); +#endif //if defined (__cplusplus) && defined(__d3d12_h__) + + +// Experimental API for internal use. DO NOT USE! +#if defined (__cplusplus) && defined(__d3d11_h__) +//! SUPPORTED OS: Windows 10 and higher +//! +typedef struct NVAPI_D3D11_CREATE_CUBIN_SHADER_PARAMS +{ + size_t structSizeIn; + size_t structSizeOut; + + ID3D11Device* pDevice; + const void* pCubin; + NvU32 size; + NvU32 blockX; + NvU32 blockY; + NvU32 blockZ; + NvU32 dynSharedMemBytes; + const char* pShaderName; + NvU32 flags; + NVDX_ObjectHandle hShader; +} NVAPI_D3D11_CREATE_CUBIN_SHADER_PARAMS; + +NVAPI_INTERFACE NvAPI_D3D11_CreateCubinComputeShaderExV2(__inout NVAPI_D3D11_CREATE_CUBIN_SHADER_PARAMS* pParams); + +#endif //if defined (__cplusplus) && defined(__d3d11_h__) + +#if defined (__cplusplus) && defined(__d3d11_h__) + +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! +NVAPI_INTERFACE NvAPI_D3D11_CreateCubinComputeShader(__in ID3D11Device* pDevice, + __in const void* pCubin, + __in NvU32 size, + __in NvU32 blockX, + __in NvU32 blockY, + __in NvU32 blockZ, + __out NVDX_ObjectHandle* phShader); + +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! +NVAPI_INTERFACE NvAPI_D3D11_CreateCubinComputeShaderEx(__in ID3D11Device* pDevice, + __in const void* pCubin, + __in NvU32 size, + __in NvU32 blockX, + __in NvU32 blockY, + __in NvU32 blockZ, + __in NvU32 dynSharedMemBytes, + __in const char* pShaderName, + __out NVDX_ObjectHandle* phShader); + +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! +NVAPI_INTERFACE NvAPI_D3D11_CreateCubinComputeShaderWithName(__in ID3D11Device* pDevice, + __in const void* pCubin, + __in NvU32 size, + __in NvU32 blockX, + __in NvU32 blockY, + __in NvU32 blockZ, + __in const char* pShaderName, + __out NVDX_ObjectHandle* phShader); + + +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! +NVAPI_INTERFACE NvAPI_D3D11_LaunchCubinShader(__in ID3D11DeviceContext *pDeviceContext, + __in NVDX_ObjectHandle hShader, + __in NvU32 gridX, + __in NvU32 gridY, + __in NvU32 gridZ, + __in const void* pParams, + __in NvU32 paramSize, + __in const NVDX_ObjectHandle* pReadResources, + __in NvU32 numReadResources, + __in const NVDX_ObjectHandle* pWriteResources, + __in NvU32 numWriteResources); + +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! +NVAPI_INTERFACE NvAPI_D3D11_DestroyCubinComputeShader(__in ID3D11Device* pDevice, + __in NVDX_ObjectHandle hShader); + +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! +NVAPI_INTERFACE NvAPI_D3D11_IsFatbinPTXSupported(__in ID3D11Device *pDevice, + __out bool *pSupported); + +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! +typedef enum _NVAPI_D3D11_GET_CUDA_INDEPENDENT_VIEW_OBJECT_TYPE +{ + NVAPI_D3D11_GET_CUDA_INDEPENDENT_VIEW_OBJECT_SURFACE = 0, + NVAPI_D3D11_GET_CUDA_INDEPENDENT_VIEW_OBJECT_TEXTURE = 1, + NVAPI_D3D11_GET_CUDA_INDEPENDENT_VIEW_OBJECT_SAMPLER = 2, + +} NVAPI_D3D11_GET_CUDA_INDEPENDENT_VIEW_OBJECT_TYPE; + +typedef struct _NVAPI_D3D11_GET_CUDA_VIEW_OBJECT +{ + union + { + struct + { + const D3D11_UNORDERED_ACCESS_VIEW_DESC* pUavDesc; + ID3D11UnorderedAccessView* pUAV; + } uav; + struct + { + const D3D11_SHADER_RESOURCE_VIEW_DESC* pSrvDesc; + ID3D11ShaderResourceView* pSRV; + } srv; + struct + { + const D3D11_SAMPLER_DESC* pSamplerDesc; + ID3D11SamplerState* pSampler; + } sampler; + }; +} NVAPI_D3D11_GET_CUDA_VIEW_OBJECT; + +typedef struct NVAPI_D3D11_GET_CUDA_INDEPENDENT_VIEW_OBJECT_PARAMS +{ + size_t structSizeIn; + size_t structSizeOut; + + ID3D11Device* pDevice; + ID3D11Resource* pResource; + NVAPI_D3D11_GET_CUDA_INDEPENDENT_VIEW_OBJECT_TYPE type; + NVAPI_D3D11_GET_CUDA_VIEW_OBJECT desc; + NvU64 handle; +} NVAPI_D3D11_GET_CUDA_INDEPENDENT_VIEW_OBJECT_PARAMS; + +NVAPI_INTERFACE NvAPI_D3D11_GetCudaIndependentViewObject(__inout NVAPI_D3D11_GET_CUDA_INDEPENDENT_VIEW_OBJECT_PARAMS* pParams); + + +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! + +typedef struct NVAPI_D3D11_GET_CUDA_MERGED_TEXTURE_SAMPLER_OBJECT_PARAMS +{ + size_t structSizeIn; + size_t structSizeOut; + + ID3D11Device* pDevice; + ID3D11Resource* pResource; + NVAPI_D3D11_GET_CUDA_VIEW_OBJECT texDesc; + NVAPI_D3D11_GET_CUDA_VIEW_OBJECT samplerDesc; + NvU64 handle; +} NVAPI_D3D11_GET_CUDA_MERGED_TEXTURE_SAMPLER_OBJECT_PARAMS; + +NVAPI_INTERFACE NvAPI_D3D11_GetCudaMergedTextureSamplerObject(__inout NVAPI_D3D11_GET_CUDA_MERGED_TEXTURE_SAMPLER_OBJECT_PARAMS* pParams); + + +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! +NVAPI_INTERFACE NvAPI_D3D11_CreateUnorderedAccessView(__in ID3D11Device* pDevice, + __in ID3D11Resource* pResource, + __in const D3D11_UNORDERED_ACCESS_VIEW_DESC* pDesc, + __out ID3D11UnorderedAccessView** ppUAV, + __out NvU32* pDriverHandle); + +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! +NVAPI_INTERFACE NvAPI_D3D11_CreateShaderResourceView(__in ID3D11Device* pDevice, + __in ID3D11Resource* pResource, + __in const D3D11_SHADER_RESOURCE_VIEW_DESC* pDesc, + __out ID3D11ShaderResourceView** ppSRV, + __out NvU32* pDriverHandle); + +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! +NVAPI_INTERFACE NvAPI_D3D11_CreateSamplerState(__in ID3D11Device* pDevice, + __in const D3D11_SAMPLER_DESC* pSamplerDesc, + __out ID3D11SamplerState** ppSamplerState, + __out NvU32* pDriverHandle); + +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! +NVAPI_INTERFACE NvAPI_D3D11_GetCudaTextureObject(__in ID3D11Device* pDevice, + __in NvU32 srvDriverHandle, + __in NvU32 samplerDriverHandle, + __out NvU32* pCudaTextureHandle); + +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! +NVAPI_INTERFACE NvAPI_D3D11_GetResourceGPUVirtualAddress(__in ID3D11Device* pDevice, + __in const NVDX_ObjectHandle hResource, + __out NvU64* pGpuVA); +#endif //defined(__cplusplus) && defined(__d3d11_h__) + + +#if defined(__cplusplus) && defined(__d3d12_h__) +//! Flags specifying raytracing thread reordering hardware support. +//! Additional flags will be added as support becomes available. +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_THREAD_REORDERING_CAPS +{ + NVAPI_D3D12_RAYTRACING_THREAD_REORDERING_CAP_NONE = 0x0, //!< Thread reordering acts as a no-op + NVAPI_D3D12_RAYTRACING_THREAD_REORDERING_CAP_STANDARD = NV_BIT(0) //!< Standard thread reordering is supported +} NVAPI_D3D12_RAYTRACING_THREAD_REORDERING_CAPS; + +//! Flags specifying raytracing Opacity Micromap support. +//! Additional flags will be added as support becomes available. +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_CAPS +{ + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_CAP_NONE = 0x0, //!< Opacity Micromap support is not available. + //!< The application must not attempt to use any OMM entrypoints or flags. + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_CAP_STANDARD = NV_BIT(0) //!< Standard Opacity Micromap support is available +} NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_CAPS; + +//! Flags specifying raytracing Displacement Micromap support. +//! Additional flags will be added as support becomes available. +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_CAPS +{ + NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_CAP_NONE = 0x0, //!< Displacement Micromap support is not available. + //!< The application must not attempt to use any DMM entrypoints or flags. + NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_CAP_STANDARD = NV_BIT(0) //!< Standard Displacement Micromap support is available +} NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_CAPS; + +//! Flags specifying raytracing Cluster Operation support. +//! Additional flags will be added as support becomes available. +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_CLUSTER_OPERATIONS_CAPS +{ + NVAPI_D3D12_RAYTRACING_CLUSTER_OPERATIONS_CAP_NONE = 0x0, //!< Cluster Operations support is not available. + //!< The application must not attempt to use any Cluster Operations entrypoints or flags. + NVAPI_D3D12_RAYTRACING_CLUSTER_OPERATIONS_CAP_STANDARD = NV_BIT(0) //!< Standard Cluster Operations support is available +} NVAPI_D3D12_RAYTRACING_CLUSTER_OPERATIONS_CAPS; + +//! Flags specifying raytracing Partitioned TLAS support. +//! Additional flags will be added as support becomes available. +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_PARTITIONED_TLAS_CAPS +{ + NVAPI_D3D12_RAYTRACING_PARTITIONED_TLAS_CAP_NONE = 0x0, //!< Partitioned TLAS support is not available. + //!< The application must not attempt to use any Partitioned TLAS entrypoints or flags. + NVAPI_D3D12_RAYTRACING_PARTITIONED_TLAS_CAP_STANDARD = NV_BIT(0) //!< Standard Partitioned TLAS support is available +} NVAPI_D3D12_RAYTRACING_PARTITIONED_TLAS_CAPS; + +//! Flags specifying raytracing sphere support. +//! Additional flags will be added as support becomes available. +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_SPHERES_CAP +{ + NVAPI_D3D12_RAYTRACING_SPHERES_CAP_NONE = 0x0, //!< Sphere primitive support is not available. + //!< The application must not attempt to use any sphere geometry. + NVAPI_D3D12_RAYTRACING_SPHERES_CAP_STANDARD = NV_BIT(0) //!< Standard sphere primitive support is available +} NVAPI_D3D12_RAYTRACING_SPHERES_CAPS; + +//! Flags specifying raytracing linear swept sphere (LSS) support. +//! Additional flags will be added as support becomes available. +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_LINEAR_SWEPT_SPHERES_CAP +{ + NVAPI_D3D12_RAYTRACING_LINEAR_SWEPT_SPHERES_CAP_NONE = 0x0, //!< Linear swept sphere primitive support is not available. + //!< The application must not attempt to use any linear swept sphere geometry. + NVAPI_D3D12_RAYTRACING_LINEAR_SWEPT_SPHERES_CAP_STANDARD = NV_BIT(0) //!< Standard linear swept sphere primitive support is available +} NVAPI_D3D12_RAYTRACING_LINEAR_SWEPT_SPHERES_CAPS; + +//! List of Raytracing CAPS types that can be queried. +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_CAPS_TYPE +{ + NVAPI_D3D12_RAYTRACING_CAPS_TYPE_THREAD_REORDERING = 0, + NVAPI_D3D12_RAYTRACING_CAPS_TYPE_OPACITY_MICROMAP = 1, + NVAPI_D3D12_RAYTRACING_CAPS_TYPE_DISPLACEMENT_MICROMAP = 2, + NVAPI_D3D12_RAYTRACING_CAPS_TYPE_CLUSTER_OPERATIONS = 3, + NVAPI_D3D12_RAYTRACING_CAPS_TYPE_PARTITIONED_TLAS = 4, + NVAPI_D3D12_RAYTRACING_CAPS_TYPE_SPHERES = 5, + NVAPI_D3D12_RAYTRACING_CAPS_TYPE_LINEAR_SWEPT_SPHERES = 6, + NVAPI_D3D12_RAYTRACING_CAPS_TYPE_INVALID = -1 +} NVAPI_D3D12_RAYTRACING_CAPS_TYPE; + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_GetRaytracingCaps +// +//! DESCRIPTION: Query raytracing capabilities of a device. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 520 +//! +//! \param [in] pDevice Pointer to the device on which raytracing caps should be queried from. +//! \param [in] type Raytracing caps type requested. (ex: NVAPI_D3D12_RAYTRACING_CAPS_TYPE_THREAD_REORDERING) +//! \param [out] pData Pointer to memory that receives caps. (ex: NVAPI_D3D12_RAYTRACING_THREAD_REORDERING_CAPS*) +//! \param [in] dataSize Size in bytes to return to pData. Must match the size of the caps data requested. (ex: sizeof(NVAPI_D3D12_RAYTRACING_THREAD_REORDERING_CAPS)) +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \retval ::NVAPI_OK Completed request +//! \retval ::NVAPI_INVALID_POINTER A null pointer was passed as an argument +//! \retval ::NVAPI_INVALID_ARGUMENT At least one of the arguments are invalid +//! \retval ::NVAPI_ERROR Error occurred +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_GetRaytracingCaps( + __in ID3D12Device* pDevice, + __in NVAPI_D3D12_RAYTRACING_CAPS_TYPE type, + __out void* pData, + __in size_t dataSize); +#endif // defined(__cplusplus) && defined(__d3d12_h__) + +#if defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12Device5_INTERFACE_DEFINED__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__) + +//! Flags specifying validation behaviour for raytracing operations. +//! \ingroup dx +//! See NvAPI_D3D12_EnableRaytracingValidation +typedef enum _NVAPI_D3D12_RAYTRACING_VALIDATION_FLAGS +{ + NVAPI_D3D12_RAYTRACING_VALIDATION_FLAG_NONE = 0x0, //!< No validation flags. +} NVAPI_D3D12_RAYTRACING_VALIDATION_FLAGS; + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_EnableRaytracingValidation +// +//! DESCRIPTION: Enable raytracing validation for a device. +//! This function must be called before any other raytracing-related function +//! is invoked on the device. Raytracing validation can only be enabled when +//! the NV_ALLOW_RAYTRACING_VALIDATION envvar is set to 1. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 545 +//! +//! \param [in] pDevice Pointer to the device on which raytracing validation should be enabled. +//! \param [in] flags Raytracing validation flags. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \retval ::NVAPI_OK Completed request +//! \retval ::NVAPI_INVALID_POINTER A null pointer was passed as device argument +//! \retval ::NVAPI_INVALID_ARGUMENT An unsupported flag was specified +//! \retval ::NVAPI_INVALID_CALL The call was made too late (other raytracing-related calls have already been made) +//! \retval ::NVAPI_ACCESS_DENIED Validation is not allowed by envvar +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_EnableRaytracingValidation( + __in ID3D12Device5* pDevice, + __in NVAPI_D3D12_RAYTRACING_VALIDATION_FLAGS flags); + + + +//! Severity classification of validation messages. +//! \ingroup dx +//! See NVAPI_D3D12_RAYTRACING_VALIDATION_MESSAGE_CALLBACK +typedef enum _NVAPI_D3D12_RAYTRACING_VALIDATION_MESSAGE_SEVERITY +{ + NVAPI_D3D12_RAYTRACING_VALIDATION_MESSAGE_SEVERITY_ERROR = 0x0, //!< Error message (indicates likely bug) + NVAPI_D3D12_RAYTRACING_VALIDATION_MESSAGE_SEVERITY_WARNING = 0x1 //!< Warning message (indicates inadvisable usage or possible bug) +} NVAPI_D3D12_RAYTRACING_VALIDATION_MESSAGE_SEVERITY; + +//! Callback for raytracing validation messages. +//! \param [in] pUserData User data pointer as provided to callback registration. +//! \param [in] severity Severity of message. +//! \param [in] messageCode Type of reported validation message. +//! \param [in] message Human-readable description of what the message code means. +//! \param [in] messageDetails Additional human-readable context for validation message. May contain newlines. +//! \ingroup dx +//! See NvAPI_D3D12_RegisterRaytracingValidationMessageCallback +typedef void(__stdcall *NVAPI_D3D12_RAYTRACING_VALIDATION_MESSAGE_CALLBACK)(void* pUserData, NVAPI_D3D12_RAYTRACING_VALIDATION_MESSAGE_SEVERITY severity, const char* messageCode, const char* message, const char* messageDetails); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_RegisterRaytracingValidationMessageCallback +// +//! DESCRIPTION: Register a message callback for raytracing validation messages. +//! The provided callback may be invoked by the driver using any thread at any time until the callback is unregistered. +//! It is invalid to register/unregister callbacks from within the callback. +//! It is invalid to create or destroy objects for the device or record commands onto command lists from within the callback. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 545 +//! +//! \param [in] pDevice Pointer to the device from which to obtain raytracing validation messages. +//! \param [in] pfnMessageCallback Callback used to report validation messages. +//! \param [in] pUserData [optional] User data to pass as argument to message callback. +//! \param [out] pHandle Handle that may be used to unregister the callback. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \retval ::NVAPI_OK Completed request +//! \retval ::NVAPI_INVALID_POINTER A null pointer was passed as an argument +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_RegisterRaytracingValidationMessageCallback( + __in ID3D12Device5* pDevice, + __in NVAPI_D3D12_RAYTRACING_VALIDATION_MESSAGE_CALLBACK pfnMessageCallback, + __in_opt void* pUserData, + __out void** pHandle); + + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_UnregisterRaytracingValidationMessageCallback +// +//! DESCRIPTION: Unregister a previously registered message callback for raytracing validation messages. +//! The provided callback will not be invoked once the unregister call has returned. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 545 +//! +//! \param [in] pDevice Pointer to the device from which to stop obtaining raytracing validation messages. +//! \param [in] handle Handle to which callback should be unregistered, obtained at registration. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \retval ::NVAPI_OK Completed request +//! \retval ::NVAPI_INVALID_POINTER A null pointer was passed as device argument +//! \retval ::NVAPI_INVALID_ARGUMENT Callback handle not recognized +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_UnregisterRaytracingValidationMessageCallback( + __in ID3D12Device5* pDevice, + __in void* handle); + + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_FlushRaytracingValidationMessages +// +//! DESCRIPTION: Flush any validation messages that have not yet been reported. +//! This guarantees that any validation messages for work which is known to be complete on the GPU +//! at the time of the call are reported to registered callbacks. +//! This operation is lightweight if the flushed device does not have raytracing validation enabled. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 545 +//! +//! \param [in] pDevice Pointer to the device on which raytracing validation messages should be flushed. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \retval ::NVAPI_OK Completed request +//! \retval ::NVAPI_INVALID_POINTER A null pointer was passed as device argument +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_FlushRaytracingValidationMessages( + __in ID3D12Device5* pDevice); + +#endif // defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12Device5_INTERFACE_DEFINED__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__) + +//! SUPPORTED OS: Windows 10 and higher +//! +#if defined(__cplusplus) && defined(__d3d12_h__) && (defined(__ID3D12Device5_INTERFACE_DEFINED__) || defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__)) + +// Types used by both device and command list functions. + +//! Flags specifying building instructions and hints when constructing a DMM Array. +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_BUILD_FLAGS +{ + NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_BUILD_FLAG_NONE = 0x0, //!< No options specified for the DMM Array build. + NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_BUILD_FLAG_PREFER_FAST_TRACE = NV_BIT(0), //!< Allow the DMM Array build to take a little longer in order to optimize for traversal performance. + //!< This flag is incompatible with #NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_BUILD_FLAG_PREFER_FAST_BUILD. + NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_BUILD_FLAG_PREFER_FAST_BUILD = NV_BIT(1) //!< Spend as little time as possible on the DMM Array build with some potential loss to traversal performance. + //!< This flag is incompatible with #NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_BUILD_FLAG_PREFER_FAST_TRACE. +} NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_BUILD_FLAGS; + +//! Specifies the input Displacement Micromap formats. +//! The DC1 (Displacement Compression 1) format follows the space-filling curve in barycentric space over the uniformly tessellated micro-triangles. +//! +//! \note This is a 16-bit value when used in #NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_DESC +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_FORMAT +{ + NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_FORMAT_DC1_64_TRIS_64_BYTES = 0x1, //!< 64 micro-triangles packed into 64 bytes + NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_FORMAT_DC1_256_TRIS_128_BYTES = 0x2, //!< 256 micro-triangles packed into 128 bytes + NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_FORMAT_DC1_1024_TRIS_128_BYTES = 0x3, //!< 1024 micro-triangles packed into 128 bytes + +} NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_FORMAT; + +//! Number of DMMs of a specific configuration in a DMM Array or BLAS build. +//! Used to compute conservative buffer size estimates for DMM Array and BLAS builds. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_USAGE_COUNT +{ + NvU32 count; //!< For DMM Array builds: total number of DMMs in the DMM Array with the particular \p subdivisionLevel and \p format specified in this descriptor. + //!< For BLAS builds: total number of DMMs with the \p subdivisionLevel and \p format combination that is referenced from the BLAS. + NvU32 subdivisionLevel; //!< Number of subdivisions for the DMM; valid inputs are [0, 5] (#NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_DC1_MAX_SUBDIVISION_LEVEL). + //!< The total number of micro-triangles is 4subdivisionLevel. + NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_FORMAT format; //!< Displacement Micromap format. +} NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_USAGE_COUNT; + +//! Describes one Displacement Micromap. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_DESC +{ + NvU32 byteOffset; //!< Byte offset from the \c inputBuffer, specified in the input structure #NVAPI_D3D12_BUILD_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_INPUTS, to where the input DMM data is located. + NvU16 subdivisionLevel; //!< Number of subdivisions for the DMM; valid inputs are [0, 5] (#NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_DC1_MAX_SUBDIVISION_LEVEL). + //!< The total number of micro-triangles is 4subdivisionLevel. + NvU16 format; //!< Format of the DMM of type #NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_FORMAT. +} NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_DESC; + +//! Input structure to DMM Array construction. +//! Individual DMMs are accessed via indices when used in bottom-level acceleration structure (BLAS) construction. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_BUILD_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_INPUTS +{ + NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_BUILD_FLAGS flags; //!< Flags which apply to all DMMs in the array. + NvU32 numDMMUsageCounts; //!< Number of DMM usage count entries in the \p pDMMUsageCounts array. + const NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_USAGE_COUNT* pDMMUsageCounts; //!< Usage counts for each subdivision level and format combination across all the DMM entries in the build. + D3D12_GPU_VIRTUAL_ADDRESS inputBuffer; //!< Address for raw DMM input data; it must be 256-byte aligned (#NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_BYTE_ALIGNMENT) + //!< It is recommended to try to organize DMMs together in memory that are expected to be used close together spatially. + D3D12_GPU_VIRTUAL_ADDRESS_AND_STRIDE perDMMDescs; //!< GPU array with one #NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_DESC entry per DMM. +} NVAPI_D3D12_BUILD_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_INPUTS; + +#endif // defined(__cplusplus) && defined(__d3d12_h__) && (defined(__ID3D12Device5_INTERFACE_DEFINED__) || defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__)) + +#if defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12Device5_INTERFACE_DEFINED__) + +//! Conservative memory requirements for building a DMM Array. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_PREBUILD_INFO +{ + NvU64 resultDataMaxSizeInBytes; //!< Size required to hold the result of a DMM Array build based on the specified inputs. + NvU64 scratchDataSizeInBytes; //!< Scratch storage on GPU required during DMM Array build based on the specified inputs. +} NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_PREBUILD_INFO; + +//! Parameters given to NvAPI_D3D12_GetRaytracingDisplacementMicromapArrayPrebuildInfo(). +//! +//! \ingroup dx +typedef struct _NVAPI_GET_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS_V1 +{ + NvU32 version; //!< [in] Structure version; it should be set to #NVAPI_GET_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS_VER. + const NVAPI_D3D12_BUILD_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_INPUTS* pDesc; //!< [in] Description of the DMM Array build. + NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_PREBUILD_INFO* pInfo; //!< [out] Result of the query. +} NVAPI_GET_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS_V1; +#define NVAPI_GET_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS_VER1 MAKE_NVAPI_VERSION(NVAPI_GET_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS_V1, 1) +typedef NVAPI_GET_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS_V1 NVAPI_GET_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS; +#define NVAPI_GET_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS_VER NVAPI_GET_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_GetRaytracingDisplacementMicromapArrayPrebuildInfo +// +//! DESCRIPTION: Query conservative memory requirements for building a DMM (Displacement Micromap) Array. +//! The returned size is conservative for DMM Array builds containing +//! a lower or equal number of entries for each resolution and format combination. +//! +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! \deprecated Do not use this function - it is deprecated in release 570. +//! +//! \since Release: 525 +//! +//! \param [in] pDevice Device on which the DMM Array will be built. +//! \param [in,out] pParams Wrapper around the inputs and outputs of the function. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +__nvapi_deprecated_function("Do not use this function - it is deprecated in release 570.") +NVAPI_INTERFACE NvAPI_D3D12_GetRaytracingDisplacementMicromapArrayPrebuildInfo( + __in ID3D12Device5* pDevice, + __inout NVAPI_GET_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS* pParams); + +#endif // defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12Device5_INTERFACE_DEFINED__) + +//! SUPPORTED OS: Windows 10 and higher +//! +#if defined(__cplusplus) && defined(__d3d12_h__) && (defined(__ID3D12Device5_INTERFACE_DEFINED__) || defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__)) + +// Types used by both device and command list functions. + +//! Flags specifying building instructions and hints when constructing an OMM Array. +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BUILD_FLAGS +{ + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BUILD_FLAG_NONE = 0x0, //!< No options specified for the OMM Array build. + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BUILD_FLAG_PREFER_FAST_TRACE = NV_BIT(0), //!< Allow the OMM Array build to take a little longer in order to optimize for traversal performance. + //!< This flag is incompatible with #NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BUILD_FLAG_PREFER_FAST_BUILD. + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BUILD_FLAG_PREFER_FAST_BUILD = NV_BIT(1) //!< Spend as little time as possible on the OMM Array build with some potential loss to traversal performance. + //!< This flag is incompatible with #NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BUILD_FLAG_PREFER_FAST_TRACE. +} NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BUILD_FLAGS; + +//! Specifies the input Opacity Micromap formats. +//! The OC1 (Opacity Compression 1) format follows the space-filling curve in barycentric space over the uniformly tessellated micro-triangles. +//! +//! \note This is a 16-bit value when used in #NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_DESC. +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_FORMAT +{ + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_FORMAT_OC1_2_STATE = 0x1, //!< 2-state (Transparent/Opaque) format. + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_FORMAT_OC1_4_STATE = 0x2 //!< 4-state (Transparent/Opaque, Known/Unknown) format. +} NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_FORMAT; + +//! Number of OMMs of a specific configuration in an OMM Array. +//! Used to compute conservative buffer size estimates for OMM Array builds. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_USAGE_COUNT +{ + NvU32 count; //!< Total number of OMMs in the OMM Array with the particular \p subdivisionLevel and \p format specified in this descriptor. + NvU32 subdivisionLevel; //!< Number of subdivisions for the OMM; valid inputs are [0, 12] (#NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_OC1_MAX_SUBDIVISION_LEVEL). + //!< The total number of micro-triangles is 4subdivisionLevel. + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_FORMAT format; //!< Opacity Micromap format. +} NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_USAGE_COUNT; + +//! Describes one Opacity Micromap. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_DESC +{ + NvU32 byteOffset; //!< Byte offset from the \c inputBuffer, specified in the input structure #NVAPI_D3D12_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_INPUTS, to where the input OMM data is located. + NvU16 subdivisionLevel; //!< Number of subdivisions for the OMM; valid inputs are [0, 12] (#NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_OC1_MAX_SUBDIVISION_LEVEL). + //!< The total number of micro-triangles is 4subdivisionLevel. + NvU16 format; //!< Format of the OMM of type #NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_FORMAT. +} NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_DESC; + +//! Input structure to OMM Array construction. +//! Individual OMMs are accessed via indices when used in bottom-level acceleration structure (BLAS) construction. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_INPUTS +{ + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BUILD_FLAGS flags; //!< Flags which apply to all OMMs in the array. + NvU32 numOMMUsageCounts; //!< Number of OMM usage count entries in the \p pOMMUsageCounts array. + const NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_USAGE_COUNT* pOMMUsageCounts; //!< Usage counts for each subdivision level and format combination across all the OMM entries in the build. + D3D12_GPU_VIRTUAL_ADDRESS inputBuffer; //!< Address for raw OMM input data; it must be 256-byte aligned. + //!< It is recommended to try to organize OMMs together in memory that are expected to be used close together spatially. + D3D12_GPU_VIRTUAL_ADDRESS_AND_STRIDE perOMMDescs; //!< GPU array with one #NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_DESC entry per OMM. +} NVAPI_D3D12_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_INPUTS; + +#endif // defined(__cplusplus) && defined(__d3d12_h__) && (defined(__ID3D12Device5_INTERFACE_DEFINED__) || defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__)) + +#if defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12Device5_INTERFACE_DEFINED__) + +//! Conservative memory requirements for building an OMM Array. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO +{ + NvU64 resultDataMaxSizeInBytes; //!< Size required to hold the result of an OMM Array build based on the specified inputs. + NvU64 scratchDataSizeInBytes; //!< Scratch storage on GPU required during OMM Array build based on the specified inputs. +} NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO; + +//! Parameters given to NvAPI_D3D12_GetRaytracingOpacityMicromapArrayPrebuildInfo(). +//! +//! \ingroup dx +typedef struct _NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS_V1 +{ + NvU32 version; //!< [in] Structure version; it should be set to #NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS_VER. + const NVAPI_D3D12_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_INPUTS* pDesc; //!< [in] Description of the OMM Array build. + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO* pInfo; //!< [out] Result of the query. +} NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS_V1; +#define NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS_VER1 MAKE_NVAPI_VERSION(NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS_V1, 1) +typedef NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS_V1 NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS; +#define NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS_VER NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_GetRaytracingOpacityMicromapArrayPrebuildInfo +// +//! DESCRIPTION: Query conservative memory requirements for building an OMM (Opacity Micromap) Array. +//! The returned size is conservative for OMM Array builds containing +//! a lower or equal number of entries for each resolution and format combination. +//! +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 520 +//! +//! \param [in] pDevice Device on which the OMM Array will be built. +//! \param [in,out] pParams Wrapper around the inputs and outputs of the function. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_GetRaytracingOpacityMicromapArrayPrebuildInfo( + __in ID3D12Device5* pDevice, + __inout NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS* pParams); + +#endif // defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12Device5_INTERFACE_DEFINED__) + +#if defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12Device5_INTERFACE_DEFINED__) + +//! Pipeline creation state flags. +//! Support should only be enabled for the specific features that are present, since they may incur a small penalty on traversal performance overall. +//! If the pipeline is not created with the correct primitive support, and that primitive type is encountered during traversal, behavior is undefined. +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_PIPELINE_CREATION_STATE_FLAGS +{ + NVAPI_D3D12_PIPELINE_CREATION_STATE_FLAGS_NONE = 0, //!< [in] No pipeline flags. + NVAPI_D3D12_PIPELINE_CREATION_STATE_FLAGS_ENABLE_OMM_SUPPORT = NV_BIT(0), //!< [in] Change whether raytracing pipelines are created with support for Opacity Micromaps. + NVAPI_D3D12_PIPELINE_CREATION_STATE_FLAGS_ENABLE_DMM_SUPPORT = NV_BIT(1), //!< [in] Change whether raytracing pipelines are created with support for Displacement Micromaps. + NVAPI_D3D12_PIPELINE_CREATION_STATE_FLAGS_ENABLE_CLUSTER_SUPPORT = NV_BIT(2), //!< [in] Change whether raytracing pipelines are created with support for Clustered BLAS. + NVAPI_D3D12_PIPELINE_CREATION_STATE_FLAGS_ENABLE_SPHERE_SUPPORT = NV_BIT(3), //!< [in] Change whether raytracing pipelines are created with support for Spheres. + NVAPI_D3D12_PIPELINE_CREATION_STATE_FLAGS_ENABLE_LSS_SUPPORT = NV_BIT(4), //!< [in] Change whether raytracing pipelines are created with support for Linear Swept Spheres (LSS). +} NVAPI_D3D12_PIPELINE_CREATION_STATE_FLAGS; + +//! State used when creating new pipelines. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS_V1 +{ + NvU32 version; //!< [in] Structure version; it should be set to #NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS_VER. + NvU32 flags; //!< [in] A bitwise OR of one or more #NVAPI_D3D12_PIPELINE_CREATION_STATE_FLAGS flags for raytracing pipeline creation. +} NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS_V1; +#define NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS_VER1 MAKE_NVAPI_VERSION(NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS_V1, 1) +typedef NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS_V1 NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS; +#define NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS_VER NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_SetCreatePipelineStateOptions +// +//! DESCRIPTION: Globally change the state affecting pipeline creations. +//! This affects all pipelines created after this call, and until this function is called again. +//! +//! \note Only supported on GPUs capable of DXR. +//! Some of the flags and fields have further restrictions, in which case their description will include a note with more details. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 520 +//! +//! \param [in] pDevice Device on which the pipelines will be created. +//! \param [in] pState State to be applied to all future pipeline creations. + +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_SetCreatePipelineStateOptions( + __in ID3D12Device5* pDevice, + __in const NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS* pState); + +#endif // defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12Device5_INTERFACE_DEFINED__) + +#if defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12Device5_INTERFACE_DEFINED__) + +//! Type of serialized data. +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_SERIALIZED_DATA_TYPE_EX +{ + // D3D12_SERIALIZED_DATA_TYPE flags + NVAPI_D3D12_SERIALIZED_DATA_RAYTRACING_ACCELERATION_STRUCTURE_EX = 0x0, //!< Serialized data contains a raytracing acceleration structure. + //!< Starting from offset 0, the first bytes of the serialized acceleration structure can be reinterpreted as \c D3D12_SERIALIZED_RAYTRACING_ACCELERATION_STRUCTURE_HEADER. + //!< That structure contains the identifier to be passed along to NvAPI_D3D12_CheckDriverMatchingIdentifierEx(). + + // NVAPI_D3D12_SERIALIZED_DATA_TYPE_EX specific flags + NVAPI_D3D12_SERIALIZED_DATA_RAYTRACING_OPACITY_MICROMAP_ARRAY_EX = 0x1, //!< Data blob contains an OMM Array. + //!< Starting from offset 0, the first bytes of the OMM Array can be reinterpreted as \c D3D12_SERIALIZED_DATA_DRIVER_MATCHING_IDENTIFIER. + NVAPI_D3D12_SERIALIZED_DATA_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_EX = 0x2, //!< Data blob contains a DMM Array. + //!< Starting from offset 0, the first bytes of the DMM Array can be reinterpreted as \c D3D12_SERIALIZED_DATA_DRIVER_MATCHING_IDENTIFIER. + +} NVAPI_D3D12_SERIALIZED_DATA_TYPE_EX; + +//! Parameters given to NvAPI_D3D12_CheckDriverMatchingIdentifierEx(). +//! +//! \ingroup dx +typedef struct _NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS_V1 +{ + NvU32 version; //!< [in] Structure version; it should be set to #NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS_VER. + NVAPI_D3D12_SERIALIZED_DATA_TYPE_EX serializedDataType; //!< [in] Type of data to be deserialized; see #NVAPI_D3D12_SERIALIZED_DATA_TYPE_EX. + const D3D12_SERIALIZED_DATA_DRIVER_MATCHING_IDENTIFIER* pIdentifierToCheck; //!< [in] Identifier from the header of the serialized data to check with the driver; see \c D3D12_SERIALIZED_DATA_DRIVER_MATCHING_IDENTIFIER. + //!< Information about how to retrieve that identifier can be found in the description of each #NVAPI_D3D12_SERIALIZED_DATA_TYPE_EX enum. + D3D12_DRIVER_MATCHING_IDENTIFIER_STATUS checkStatus; //!< [out] Result of the check; see \c D3D12_DRIVER_MATCHING_IDENTIFIER_STATUS. +} NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS_V1; +#define NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS_VER1 MAKE_NVAPI_VERSION(NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS_V1, 1) +typedef NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS_V1 NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS; +#define NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS_VER NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_CheckDriverMatchingIdentifierEx +// +//! DESCRIPTION: This function is an extension of ID3D12Device5::CheckDriverMatchingIdentifier() with additional serialized data types. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 520 +//! +//! \param [in] pDevice Device on which the data will be deserialized. +//! \param [in,out] pParams Wrapper around the inputs and outputs of the function. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_CheckDriverMatchingIdentifierEx( + __in ID3D12Device5* pDevice, + __inout NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS* pParams); + +#endif // defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12Device5_INTERFACE_DEFINED__) + +#if defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12Device5_INTERFACE_DEFINED__) + +//! This enum extends \c D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS with modified and additional values. +//! Only modified/new values are fully described; for more information on the other values, please check Microsoft's DirectX Raytracing Specification. +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS_EX +{ + // D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS flags + NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_NONE_EX = 0x0, //!< No options specified for the acceleration structure build. + NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_UPDATE_EX = NV_BIT(0), //!< Allow the acceleration structure to later be updated (via the flag #NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PERFORM_UPDATE_EX), rather than always requiring a full rebuild. + NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_COMPACTION_EX = NV_BIT(1), //!< Allow for the acceleration structure to later be compacted. + NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_TRACE_EX = NV_BIT(2), //!< Favorize higher raytracing performance at the cost of longer build times. + NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_BUILD_EX = NV_BIT(3), //!< Favorize faster build times at the cost of lower raytracing performance. + NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_MINIMIZE_MEMORY_EX = NV_BIT(4), //!< Minimize the memory footprint of the produced acceleration structure, potentially at the cost of longer build time or lower raytracing performance. + NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PERFORM_UPDATE_EX = NV_BIT(5), //!< Instead of rebuilding the acceleration structure from scratch, the existing acceleration structure will be updated. + //!< Added behaviour: If #NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_OMM_UPDATE_EX is specified, OMM references may be changed along with positions when an update is performed. + + // NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS_EX specific flags + NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_OMM_UPDATE_EX = NV_BIT(6), //!< The acceleration structure (AS) supports updating OMM contents (base OMM Array and/or indices). + //!< Specifying this flag may result in larger AS size and may reduce traversal performance. + NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_DISABLE_OMMS_EX = NV_BIT(7), //!< Only applicable for BLAS builds. If enabled, any instances referencing this BLAS are allowed to disable the OMM test through the #NVAPI_D3D12_RAYTRACING_INSTANCE_FLAG_DISABLE_OMMS_EX flag. + //!< Specifying this build flag may result in some reductions in traversal performance. + NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_OMM_OPACITY_STATES_UPDATE_EX = NV_BIT(8), //!< The acceleration structure (AS) supports updating OMM data (encoded opacity values). + //!< Specifying this flag may reduce traversal performance. + NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_DATA_ACCESS_EX = NV_BIT(9), //!< Allows triangle and micro-triangle data to be accessed through the BLAS via shader intrinsics. +} NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS_EX; + +//! This enum extends \c D3D12_RAYTRACING_GEOMETRY_TYPE with additional values. +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_EX +{ + // D3D12_RAYTRACING_GEOMETRY_TYPE flags + NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_TRIANGLES_EX = 0x0, //!< This geometry is made of basic triangles. + NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_PROCEDURAL_PRIMITIVE_AABBS_EX = 0x1, //!< This geometry is made of axis-aligned bounding boxes (AABBs). + + // NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_EX specific flags + NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_OMM_TRIANGLES_EX = 0x2, //!< Shares most fields with the basic triangle geometry type, but allows an OMM Array to be attached to the geometry. + //!< The basic triangle type and this OMM-enabled type geometries may be mixed in the same BLAS build. + NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_DMM_TRIANGLES_EX = 0x3, //!< Triangle geometry with attached DMM data. + //!< This geometry cannot be mixed with other geometry types in the same BLAS. + NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_SPHERES_EX = 0x4, //!< This geometry contains sphere primitives. + //!< Cannot be mixed with other geometry types in the same BLAS. + NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_LSS_EX = 0x5, //!< This geometry contains linear swept sphere primitives. + //!< Cannot be mixed with other geometry types in the same BLAS. + +} NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_EX; + +//! If a triangle has a uniform OMM state in a BLAS build, it is preferable to signal this explicitly rather than attaching a single state OMM. +//! This can be accomplished by supplying these special indices as entries in \c opacityMicromapIndexBuffer, in #NVAPI_D3D12_RAYTRACING_GEOMETRY_OMM_TRIANGLES_DESC. +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_SPECIAL_INDEX +{ + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_SPECIAL_INDEX_FULLY_TRANSPARENT = -1, //!< Uniform transparent OMM state. + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_SPECIAL_INDEX_FULLY_OPAQUE = -2, //!< Uniform opaque OMM state. + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_SPECIAL_INDEX_FULLY_UNKNOWN_TRANSPARENT = -3, //!< Uniform unknown-transparent OMM state. + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_SPECIAL_INDEX_FULLY_UNKNOWN_OPAQUE = -4, //!< Uniform unknown-opaque OMM state. + + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_SPECIAL_INDEX_CLUSTER_SKIP_OMM = -5, //!< Don't apply any OMM for triangle. Reverts to using the geometry supplied opaque/non-opaque state. This special index is only available for the opacity micromap index buffer supplied to + //!< NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_MULTI_INDIRECT_TRIANGLE_TEMPLATE_ARGS and NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_MULTI_INDIRECT_TRIANGLE_CLUSTER_ARGS. + //!< This state does _not_ require the AS to be built with NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_CLUSTER_FLAG_ALLOW_DISABLE_OMMS, as that only applies to the instance flag. +} NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_SPECIAL_INDEX; + +//! Geometry descriptor attachment with Opacity Micromaps. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_RAYTRACING_GEOMETRY_OMM_ATTACHMENT_DESC +{ + D3D12_GPU_VIRTUAL_ADDRESS_AND_STRIDE opacityMicromapIndexBuffer; //!< Optional buffer specifying which OMM index to use for each triangle; if \c NULL, there is a 1:1 mapping between input triangles and OMM Array entries. + //!< Special values can be used to encode OMMs with uniform state for individual triangles (see #NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_SPECIAL_INDEX). + //!< For BLAS updates, this input buffer must match that of the original build if the #NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_OMM_UPDATE_EX build flag is not set. + DXGI_FORMAT opacityMicromapIndexFormat; //!< Format of \c opacityMicromapIndexBuffer, either \c DXGI_FORMAT_R32_UINT or \c DXGI_FORMAT_R16_UINT. + NvU32 opacityMicromapBaseLocation; //!< Constant added to all non-negative OMM indices in \p opacityMicromapIndexBuffer. + D3D12_GPU_VIRTUAL_ADDRESS opacityMicromapArray; //!< Pointer to an OMM Array used by this geometry; it may be set to \c NULL if no non-uniform OMMs are used. + //!< Unlike vertex, index, and transform buffers, this resource is dereferenced during raytracing. + + NvU32 numOMMUsageCounts; //!< Number of OMM usage count entries in the \p pOMMUsageCounts array. + const NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_USAGE_COUNT* pOMMUsageCounts; //!< Usage counts for each subdivision level and format combination across all the OMM entries referred-to by the OMM index buffer specified by this geometry. + +} NVAPI_D3D12_RAYTRACING_GEOMETRY_OMM_ATTACHMENT_DESC; + +//! The edge vA..vB is decimated: after subdivision the number of micro-triangles on that edge is halved. +//! (i.e. the neighboring primitive can have a lower subdivision level without introducing cracks) +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_PRIMITIVE_FLAGS +{ + NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_PRIMITIVE_FLAG_DECIMATE_01 = NV_BIT(0), + NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_PRIMITIVE_FLAG_DECIMATE_12 = NV_BIT(1), + NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_PRIMITIVE_FLAG_DECIMATE_20 = NV_BIT(2), + +} NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_PRIMITIVE_FLAGS; + +//! Geometry descriptor attachment with Displacement Micromaps. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_RAYTRACING_GEOMETRY_DMM_ATTACHMENT_DESC +{ + D3D12_GPU_VIRTUAL_ADDRESS_AND_STRIDE triangleMicromapIndexBuffer; //!< Optional buffer specifying which DMM index to use for each triangle; if \c NULL, there is a 1:1 mapping between input triangles and DMM Array entries. + //!< For BLAS updates, this input buffer must match that of the original build. + DXGI_FORMAT triangleMicromapIndexFormat; //!< Format of \c displacementMicromapIndexBuffer, either \c DXGI_FORMAT_R32_UINT or \c DXGI_FORMAT_R16_UINT. + NvU32 triangleMicromapBaseLocation; //!< Constant added to all DMM indices in \p displacementMicromapIndexBuffer. + + D3D12_GPU_VIRTUAL_ADDRESS_AND_STRIDE trianglePrimitiveFlagsBuffer; //!< Optional, per-triangle UINT8 mode flags (#NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_PRIMITIVE_FLAGS) + + D3D12_GPU_VIRTUAL_ADDRESS_AND_STRIDE vertexBiasAndScaleBuffer; //!< Optional displacement base vertex bias and displacement vector scale buffer. If not supplied, bias defaults to 0 and scale to 1. + DXGI_FORMAT vertexBiasAndScaleFormat; //!< Format of \c displacementBiasAndScaleBuffer. Supported formats are \c DXGI_FORMAT_R16G16_FLOAT and \c DXGI_FORMAT_R32G32_FLOAT + + D3D12_GPU_VIRTUAL_ADDRESS_AND_STRIDE vertexDisplacementVectorBuffer; //!< Per-vertex displacement vector buffer. This buffer is indexed using the index buffer from the base triangle geometry. + DXGI_FORMAT vertexDisplacementVectorFormat; //!< Format of \c displacementVectorBuffer. Supported formats are \c DXGI_FORMAT_R32G32B32_FLOAT, \c DXGI_FORMAT_R32G32B32A32_FLOAT, and \c DXGI_FORMAT_R16G16B16A16_FLOAT (The Alpha channel is ignored, and stride can be set accordingly). + + D3D12_GPU_VIRTUAL_ADDRESS displacementMicromapArray; //!< Pointer to a DMM Array used by this geometry. + //!< Unlike vertex, index, and transform buffers, this resource is dereferenced during raytracing. + + NvU32 numDMMUsageCounts; //!< Number of DMM usage count entries in the \p pDMMUsageCounts array. + const NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_USAGE_COUNT* pDMMUsageCounts; //!< Usage counts for each subdivision level and format combination across all the DMM entries referred-to by the DMM index buffer specified by this geometry. + +} NVAPI_D3D12_RAYTRACING_GEOMETRY_DMM_ATTACHMENT_DESC; + +//! Geometry triangle descriptor with attached augmented Displacement Micromaps. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_RAYTRACING_GEOMETRY_DMM_TRIANGLES_DESC +{ + D3D12_RAYTRACING_GEOMETRY_TRIANGLES_DESC triangles; //!< Triangle mesh descriptor. + NVAPI_D3D12_RAYTRACING_GEOMETRY_DMM_ATTACHMENT_DESC dmmAttachment; //!< Displacement Micromap attachment descriptor. +} NVAPI_D3D12_RAYTRACING_GEOMETRY_DMM_TRIANGLES_DESC; + +//! Geometry triangle descriptor with attached augmented Opacity Micromaps. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_RAYTRACING_GEOMETRY_OMM_TRIANGLES_DESC +{ + D3D12_RAYTRACING_GEOMETRY_TRIANGLES_DESC triangles; //!< Triangle mesh descriptor. + NVAPI_D3D12_RAYTRACING_GEOMETRY_OMM_ATTACHMENT_DESC ommAttachment; //!< Opacity Micromap attachment descriptor. +} NVAPI_D3D12_RAYTRACING_GEOMETRY_OMM_TRIANGLES_DESC; + +//! Sphere geometry descriptor. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_RAYTRACING_GEOMETRY_SPHERES_DESC +{ + NvU32 vertexCount; //!< The largest valid index plus one. + NvU32 indexCount; //!< Number of indices in index buffer. + + D3D12_GPU_VIRTUAL_ADDRESS_AND_STRIDE vertexPositionBuffer; //!< Vertex position buffer and stride. + DXGI_FORMAT vertexPositionFormat; //!< Supports the same formats as the triangle vertex buffers. + + D3D12_GPU_VIRTUAL_ADDRESS_AND_STRIDE vertexRadiusBuffer; //!< Vertex radius buffer and stride. Radii must be 0 or greater. The stride can be set to 0 to set a constant radius for all primitives in the geometry. + DXGI_FORMAT vertexRadiusFormat; //!< Supported formats are `DXGI_FORMAT_R32_FLOAT` and `DXGI_FORMAT_R16_FLOAT`. + + D3D12_GPU_VIRTUAL_ADDRESS_AND_STRIDE indexBuffer; //!< Indices to positions and radii, one entry per sphere primitive. May be set to NULL to use the list of positions and radii from vertexPositionBuffer and vertexRadiusBuffer. + DXGI_FORMAT indexFormat; //!< Supported formats are `DXGI_FORMAT_R32_UINT`, `DXGI_FORMAT_R16_UINT`, `DXGI_FORMAT_R8_UINT`. + +} NVAPI_D3D12_RAYTRACING_GEOMETRY_SPHERES_DESC; + +//! Describes the endcap enable/disable behavior of LSS primitives. +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_LSS_ENDCAP_MODE +{ + NVAPI_D3D12_RAYTRACING_LSS_ENDCAP_MODE_NONE = 0, //!< None of the primitives have endcaps enabled. Only midsections may be intersected. + NVAPI_D3D12_RAYTRACING_LSS_ENDCAP_MODE_CHAINED = 1, //!< The last primitive in each chain has both endcaps enabled. Preceding primitives within chains only have their first endcap enabled. + +} NVAPI_D3D12_RAYTRACING_LSS_ENDCAP_MODE; + +//! Describes how LSS primitives are constructed from vertex and index buffer inputs. +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_LSS_PRIMITIVE_FORMAT +{ + NVAPI_D3D12_RAYTRACING_LSS_PRIMITIVE_FORMAT_LIST = 0, //!< Each LSS primitive is defined by a pair of vertices. The index buffer is optional for this format. + NVAPI_D3D12_RAYTRACING_LSS_PRIMITIVE_FORMAT_SUCCESSIVE_IMPLICIT = 1, //!< Each LSS is defined by two successive vertices (k, k + 1), where k is the entry in the index buffer. This format requires an index buffer to be supplied in the geometry descriptor. + +} NVAPI_D3D12_RAYTRACING_LSS_PRIMITIVE_FORMAT; + +//! Linear Swept Sphere (LSS) geometry descriptor. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_RAYTRACING_GEOMETRY_LSS_DESC +{ + NvU32 vertexCount; //!< The largest valid index plus one. + NvU32 indexCount; //!< Number of indices in index buffer. + NvU32 primitiveCount; //!< Specifies the total number of primitives (including inactive ones). + + D3D12_GPU_VIRTUAL_ADDRESS_AND_STRIDE vertexPositionBuffer; //!< Vertex position buffer and stride. + DXGI_FORMAT vertexPositionFormat; //!< Supports the same formats as the triangle vertex buffers. + + D3D12_GPU_VIRTUAL_ADDRESS_AND_STRIDE vertexRadiusBuffer; //!< Vertex radius buffer and stride. Radii must be 0 or greater. The stride can be set to 0 to set a constant radius for all primitives in the geometry. + DXGI_FORMAT vertexRadiusFormat; //!< Supported formats are `DXGI_FORMAT_R32_FLOAT` and `DXGI_FORMAT_R16_FLOAT`. + + D3D12_GPU_VIRTUAL_ADDRESS_AND_STRIDE indexBuffer; //!< Indices to positions and radii, one entry per sphere primitive. May be set to NULL to use the list of positions and radii from vertexPositionBuffer and vertexRadiusBuffer. + DXGI_FORMAT indexFormat; //!< Supported formats are `DXGI_FORMAT_R32_UINT`, `DXGI_FORMAT_R16_UINT`, `DXGI_FORMAT_R8_UINT`. + + NVAPI_D3D12_RAYTRACING_LSS_ENDCAP_MODE endcapMode; //!< Specifies which endcaps are enabled and disabled over collections of LSS primitives within the geometry. + NVAPI_D3D12_RAYTRACING_LSS_PRIMITIVE_FORMAT primitiveFormat; //!< Selects how input buffers are to be interpreted to construct LSS primitives. + +} NVAPI_D3D12_RAYTRACING_GEOMETRY_LSS_DESC; + +//! This structure extends \c D3D12_RAYTRACING_GEOMETRY_DESC by supporting additional geometry types. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_RAYTRACING_GEOMETRY_DESC_EX +{ + NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_EX type; //!< The type of geometry stored in the union of this structure. + D3D12_RAYTRACING_GEOMETRY_FLAGS flags; //!< Flags affecting how this geometry is processed by the raytracing pipeline. + union + { + D3D12_RAYTRACING_GEOMETRY_TRIANGLES_DESC triangles; //!< Describes triangle geometry if \c type is #NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_TRIANGLES_EX. + //!< Otherwise, this parameter is unused (space repurposed in a union). + D3D12_RAYTRACING_GEOMETRY_AABBS_DESC aabbs; //!< Describes AABB geometry if \c type is #NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_PROCEDURAL_PRIMITIVE_AABBS_EX. + //!< Otherwise, this parameter is unused (space repurposed in a union). + NVAPI_D3D12_RAYTRACING_GEOMETRY_OMM_TRIANGLES_DESC ommTriangles; //!< Describes triangle geometry which may optionally use Opacity Micromaps, if \c type is #NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_OMM_TRIANGLES_EX. + //!< Otherwise, this parameter is unused (space repurposed in a union). + NVAPI_D3D12_RAYTRACING_GEOMETRY_DMM_TRIANGLES_DESC dmmTriangles; //!< Describes micro-triangle geometry, if \c type is #NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_DMM_TRIANGLES_EX. + //!< Otherwise, this parameter is unused (space repurposed in a union). + NVAPI_D3D12_RAYTRACING_GEOMETRY_SPHERES_DESC spheres; //!< Describes sphere geometry if \c type is #NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_SPHERE_EX. + //!< Otherwise, this parameter is unused (space repurposed in a union). + NVAPI_D3D12_RAYTRACING_GEOMETRY_LSS_DESC lss; //!< Describes linear swept sphere geometry if \c type is #NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_LSS_EX. + //!< Otherwise, this parameter is unused (space repurposed in a union). + }; +} NVAPI_D3D12_RAYTRACING_GEOMETRY_DESC_EX; + +//! This enum extends \c D3D12_RAYTRACING_INSTANCE_FLAGS with additional values. +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_INSTANCE_FLAGS_EX +{ + // D3D12_RAYTRACING_INSTANCE_FLAGS flags + NVAPI_D3D12_RAYTRACING_INSTANCE_FLAG_NONE_EX = 0x0, //!< No options specified for this instance. + NVAPI_D3D12_RAYTRACING_INSTANCE_FLAG_TRIANGLE_CULL_DISABLE_EX = NV_BIT(0), //!< Disable triangle culling for this instance. + NVAPI_D3D12_RAYTRACING_INSTANCE_FLAG_TRIANGLE_FRONT_COUNTERCLOCKWISE_EX = NV_BIT(1), //!< Use counter-clockwise winding for defining front faces, instead of the default of clockwise winding. + NVAPI_D3D12_RAYTRACING_INSTANCE_FLAG_FORCE_OPAQUE_EX = NV_BIT(2), //!< Force all geometries in this instance to be opaque. + NVAPI_D3D12_RAYTRACING_INSTANCE_FLAG_FORCE_NON_OPAQUE_EX = NV_BIT(3), //!< All geometries in this instance will be processed as if they never had the \c D3D12_RAYTRACING_GEOMETRY_FLAG_OPAQUE flag applied to them. + + // NVAPI_D3D12_RAYTRACING_INSTANCE_FLAGS_EX specific flags + NVAPI_D3D12_RAYTRACING_INSTANCE_FLAG_FORCE_OMM_2_STATE_EX = NV_BIT(4), //!< Ignore the Unknown state and only consider the Transparent/Opaque bit for all 4-state OMMs encountered during traversal. + //!< This flag has no effect if #NVAPI_D3D12_RAYTRACING_INSTANCE_FLAG_DISABLE_OMMS_EX is set. + NVAPI_D3D12_RAYTRACING_INSTANCE_FLAG_DISABLE_OMMS_EX = NV_BIT(5) //!< Disable OMMs for all triangles, and revert to using geometry opaque/non-opaque state instead (legacy behavior). + //!< This flag is only valid if the referenced BLAS was built with the #NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_DISABLE_OMMS_EX flag; omitting that flag during BLAS build will result in undefined behavior. +} NVAPI_D3D12_RAYTRACING_INSTANCE_FLAGS_EX; + +//! This structure extends \c D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS by supporting additional geometry types. +//! Only modified members are fully described below; for more information on the other members, please check Microsoft's DirectX Raytracing Specification. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS_EX +{ + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE type; //!< Whether a top-level acceleration structure (TLAS) or bottom-level acceleration structure (BLAS) will be built using this information. + NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS_EX flags; //!< Options influencing how the acceleration structure is built and which of its features can be used. + NvU32 numDescs; //!< If \c type is \c D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TOP_LEVEL, it represents the number of descriptions stored in \c instanceDescs. + //!< Otherwise, it contains the number of geometry descriptions stored in \c pGeometryDescs or \c ppGeometryDescs. + D3D12_ELEMENTS_LAYOUT descsLayout; //!< If \c type is \c D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BOTTOM_LEVEL, it specifies which of \c pGeometryDescs and \c ppGeometryDescs to use. + //!< Otherwise, this parameter is unused. + NvU32 geometryDescStrideInBytes; //!< Stride between consecutive geometry descriptors. Should typically be set to sizeof(NVAPI_D3D12_RAYTRACING_GEOMETRY_DESC_EX). + //!< Only used if \c type is \c D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL and \c descLayout is \c D3D12_ELEMENTS_LAYOUT_ARRAY. + //!< This field guarantees backwards compatibility, even if the geometry descriptor size increases in future NVAPI versions. + union + { + D3D12_GPU_VIRTUAL_ADDRESS instanceDescs; //!< If \c type is \c D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TOP_LEVEL, the referenced instance structures can used the extended set of flags #NVAPI_D3D12_RAYTRACING_INSTANCE_FLAGS_EX in place of the \c D3D12_RAYTRACING_INSTANCE_FLAGS mentioned in \c D3D12_RAYTRACING_INSTANCE_DESC. + //!< Otherwise, this parameter is unused (space repurposed in a union). + const NVAPI_D3D12_RAYTRACING_GEOMETRY_DESC_EX* pGeometryDescs; //!< If \c type is \c D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BOTTOM_LEVEL and \c descLayout is \c D3D12_ELEMENTS_LAYOUT_ARRAY, it contains the descriptions of all geometries to be built into a BLAS. + //!< Otherwise, this parameter is unused (space repurposed in a union). + const NVAPI_D3D12_RAYTRACING_GEOMETRY_DESC_EX*const* ppGeometryDescs; //!< If \c type is \c D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BOTTOM_LEVEL and \c descLayout is \c D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS, it contains the addresses of descriptions for all geometries to be built into a BLAS. + //!< Otherwise, this parameter is unused (space repurposed in a union). + }; +} NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS_EX; + +//! Parameters given to NvAPI_D3D12_GetRaytracingAccelerationStructurePrebuildInfoEx(). +//! +//! \ingroup dx +typedef struct _NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS_V1 +{ + NvU32 version; //!< [in] Structure version; it should be set to #NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS_VER. + const NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS_EX* pDesc; //!< [in] Description of the acceleration-structure build. + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO* pInfo; //!< [out] Result of the query. +} NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS_V1; +#define NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS_VER1 MAKE_NVAPI_VERSION(NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS_V1, 1) +typedef NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS_V1 NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS; +#define NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS_VER NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_GetRaytracingAccelerationStructurePrebuildInfoEx +// +//! DESCRIPTION: This function is an extension of ID3D12Device5::GetRaytracingAccelerationStructurePrebuildInfo() with additional input types. +//! +//! \note Only supported on GPUs capable of DXR. +//! Some of the flags and fields have further restrictions, in which case their description will include a note with more details. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 520 +//! +//! \param [in] pDevice Device on which the acceleration structure will be built. +//! \param [in,out] pParams Wrapper around the inputs and outputs of the function. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_GetRaytracingAccelerationStructurePrebuildInfoEx( + __in ID3D12Device5* pDevice, + __inout NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS* pParams); + +#endif // defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12Device5_INTERFACE_DEFINED__) + +#if defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__) + +//! Description of the inputs and memory areas used during the building of OMM Arrays. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_DESC +{ + D3D12_GPU_VIRTUAL_ADDRESS destOpacityMicromapArrayData; //!< Output location for the OMM Array build. + //!< NvAPI_D3D12_GetRaytracingOpacityMicromapArrayPrebuildInfo() reports the amount of memory required for the result given a set of input parameters. + //!< The address must be aligned to 256 bytes (#NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BYTE_ALIGNMENT). + NVAPI_D3D12_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_INPUTS inputs; //!< Description of the input data for the OMM Array build. + D3D12_GPU_VIRTUAL_ADDRESS scratchOpacityMicromapArrayData; //!< Location where the build will store temporary data. + //!< NvAPI_D3D12_GetRaytracingOpacityMicromapArrayPrebuildInfo() reports the amount of scratch memory the implementation will need for a given set of input parameters. + //!< The address must be aligned to 256 bytes (#NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BYTE_ALIGNMENT). + //!< Contents of this memory going into a build on the GPU timeline are irrelevant and will not be preserved. + //!< After the build is complete on the GPU timeline, the memory is left with whatever undefined contents the build finished with. + //!< The memory pointed to must be in state \c D3D12_RESOURCE_STATE_UNORDERED_ACCESS. +} NVAPI_D3D12_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_DESC; + +//! Structure emitted by NvAPI_D3D12_EmitRaytracingOpacityMicromapArrayPostbuildInfo(), and optionally NvAPI_D3D12_BuildRaytracingOpacityMicromapArray(), when \c type equals #NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_CURRENT_SIZE. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_CURRENT_SIZE_DESC +{ + NvU64 currentSizeInBytes; //!< Size of the OMM Array buffer. + //!< The queried size may be smaller than the size reported by NvAPI_D3D12_GetRaytracingOpacityMicromapArrayPrebuildInfo(). + //!< This allows the application to move and relocate the OMM Array to a smaller buffer to reclaim any unused memory after the OMM Array build is complete. +} NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_CURRENT_SIZE_DESC; + +//! Type of postbuild info to emit after an OMM Array build. +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_TYPE +{ + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_CURRENT_SIZE = 0x0 //!< Size of the current OMM Array. May be smaller than reported by the NvAPI_D3D12_GetRaytracingOpacityMicromapArrayPrebuildInfo() call. + //!< Unused memory can be reclaimed by copying the OMM Array into a new resource; see #NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_CURRENT_SIZE_DESC. +} NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_TYPE; + +//! Description of the postbuild information to generate from an OMM Array. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_DESC +{ + D3D12_GPU_VIRTUAL_ADDRESS destBuffer; //!< Result storage. + //!< Size required and the layout of the contents written by the system depend on \p infoType. + //!< The memory pointed to must be in state \c D3D12_RESOURCE_STATE_UNORDERED_ACCESS. + //!< The memory must be aligned to the natural alignment for the members of the particular output structure being generated (e.g. 8 bytes for a struct with the largest member being \c NvU64). + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_TYPE infoType; //!< Type of postbuild information to retrieve. +} NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_DESC; + +//! Parameters given to NvAPI_D3D12_BuildRaytracingOpacityMicromapArray(). +//! +//! \ingroup dx +typedef struct _NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_V1 +{ + NvU32 version; //!< [in] Structure version; it should be set to #NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_VER. + const NVAPI_D3D12_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_DESC* pDesc; //!< [in] Description of the OMM Array build. + NvU32 numPostbuildInfoDescs; //!< [in] Size of postbuild info desc array. Set to 0 if none are needed. + const NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_DESC* pPostbuildInfoDescs; //!< [in] Optional array of descriptions for postbuild info to generate describing properties of the acceleration structure that was built. + //!< [in] Any given postbuild info type, \c D3D12_RAYTRACING_ACCEELRATION_STRUCTURE_POSTBUILD_INFO_TYPE, can only be selected for output by at most one array entry. +} NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_V1; +#define NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_VER1 MAKE_NVAPI_VERSION(NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_V1, 1) +typedef NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_V1 NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS; +#define NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_VER NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_BuildRaytracingOpacityMicromapArray +// +//! DESCRIPTION: Construct OMM Array for a collection of OMMs on the GPU. +//! The CPU-side input buffers are not referenced after this call. +//! The GPU-side input resources are not referenced after the build has concluded after ExecuteCommandList(). +//! Additionally, the application may optionally output postbuild information immediately after the build. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 520 +//! +//! \param [in] pCommandList Command list on which the command will execute. +//! \param [in] pParams Wrapper around the inputs and outputs of the function. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \retval NVAPI_INVALID_COMBINATION pParams->pPostbuildInfoDescs was set to \c NULL while pParams->numPostbuildInfoDescs is non zero. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_BuildRaytracingOpacityMicromapArray( + __in ID3D12GraphicsCommandList4* pCommandList, + __in NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS* pParams); + +#endif // defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__) + +#if defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__) + +//! Parameters given to NvAPI_D3D12_RelocateRaytracingOpacityMicromapArray(). +//! +//! \ingroup dx +typedef struct _NVAPI_RELOCATE_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_V1 +{ + NvU32 version; //!< [in] Structure version; it should be set to #NVAPI_RELOCATE_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_VER. + D3D12_GPU_VIRTUAL_ADDRESS opacityMicromapArray; //!< [in] OMM Array current memory address; it must be 256-byte aligned (#NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BYTE_ALIGNMENT). +} NVAPI_RELOCATE_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_V1; +#define NVAPI_RELOCATE_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_VER1 MAKE_NVAPI_VERSION(NVAPI_RELOCATE_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_V1, 1) +typedef NVAPI_RELOCATE_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_V1 NVAPI_RELOCATE_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS; +#define NVAPI_RELOCATE_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_VER NVAPI_RELOCATE_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_RelocateRaytracingOpacityMicromapArray +// +//! DESCRIPTION: Makes the OMM Array usable at its current location in memory. +//! An OMM Array that has been copied to a new location must be relocated using this function before it may be attached to any BLAS. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 520 +//! +//! \param [in] pCommandList Command list on which the command will execute. +//! \param [in] pParams Wrapper around the inputs and outputs of the function. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_RelocateRaytracingOpacityMicromapArray( + __in ID3D12GraphicsCommandList4* pCommandList, + __in const NVAPI_RELOCATE_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS* pParams); + +#endif // defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__) + +#if defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__) + +//! Description of the inputs and memory areas used during the building of DMM Arrays. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_BUILD_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_DESC +{ + D3D12_GPU_VIRTUAL_ADDRESS destDisplacementMicromapArrayData; //!< Output location for the DMM Array build. + //!< NvAPI_D3D12_GetRaytracingDisplacementMicromapArrayPrebuildInfo() reports the amount of memory required for the result given a set of input parameters. + //!< The address must be aligned to 256 bytes (#NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_BYTE_ALIGNMENT). + NVAPI_D3D12_BUILD_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_INPUTS inputs; //!< Description of the input data for the DMM Array build. + D3D12_GPU_VIRTUAL_ADDRESS scratchDisplacementMicromapArrayData; //!< Location where the build will store temporary data. + //!< NvAPI_D3D12_GetRaytracingDisplacementMicromapArrayPrebuildInfo() reports the amount of scratch memory the implementation will need for a given set of input parameters. + //!< The address must be aligned to 256 bytes (#NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_BYTE_ALIGNMENT). + //!< Contents of this memory going into a build on the GPU timeline are irrelevant and will not be preserved. + //!< After the build is complete on the GPU timeline, the memory is left with whatever undefined contents the build finished with. + //!< The memory pointed to must be in state \c D3D12_RESOURCE_STATE_UNORDERED_ACCESS. +} NVAPI_D3D12_BUILD_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_DESC; + +//! Structure emitted by NvAPI_D3D12_EmitRaytracingDisplacementMicromapArrayPostbuildInfo(), and optionally NvAPI_D3D12_BuildRaytracingDisplacementMicromapArray(), when \c type equals #NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_POSTBUILD_INFO_CURRENT_SIZE. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_POSTBUILD_INFO_CURRENT_SIZE_DESC +{ + NvU64 currentSizeInBytes; //!< Size of the DMM Array buffer. + //!< The queried size may be smaller than the size reported by NvAPI_D3D12_GetRaytracingDisplacementMicromapArrayPrebuildInfo(). + //!< This allows the application to move and relocate the DMM Array to a smaller buffer to reclaim any unused memory after the DMM Array build is complete. +} NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_POSTBUILD_INFO_CURRENT_SIZE_DESC; + +//! Type of postbuild info to emit after a DMM Array build. +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_POSTBUILD_INFO_TYPE +{ + NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_POSTBUILD_INFO_CURRENT_SIZE = 0x0, //!< Size of the current DMM Array. May be smaller than reported by the NvAPI_D3D12_GetRaytracingDisplacementMicromapArrayPrebuildInfo() call. + //!< Unused memory can be reclaimed by copying the DMM Array into a new resource; see #NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_POSTBUILD_INFO_CURRENT_SIZE_DESC. +} NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_POSTBUILD_INFO_TYPE; + +//! Description of the postbuild information to generate from a DMM Array. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_POSTBUILD_INFO_DESC +{ + D3D12_GPU_VIRTUAL_ADDRESS destBuffer; //!< Result storage. + //!< Size required and the layout of the contents written by the system depend on \p infoType. + //!< The memory pointed to must be in state \c D3D12_RESOURCE_STATE_UNORDERED_ACCESS. + //!< The memory must be aligned to the natural alignment for the members of the particular output structure being generated (e.g. 8 bytes for a struct with the largest member being \c NvU64). + NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_POSTBUILD_INFO_TYPE infoType; //!< Type of postbuild information to retrieve. +} NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_POSTBUILD_INFO_DESC; + +//! Parameters given to NvAPI_D3D12_BuildRaytracingDisplacementMicromapArray(). +//! +//! \ingroup dx +typedef struct _NVAPI_BUILD_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_PARAMS_V1 +{ + NvU32 version; //!< [in] Structure version; it should be set to #NVAPI_BUILD_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_PARAMS_VER. + const NVAPI_D3D12_BUILD_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_DESC* pDesc; //!< [in] Description of the DMM Array build. + NvU32 numPostbuildInfoDescs; //!< [in] Size of postbuild info desc array. Set to 0 if none are needed. + const NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_POSTBUILD_INFO_DESC* pPostbuildInfoDescs; //!< [in] Optional array of descriptions for postbuild info to generate describing properties of the acceleration structure that was built. + //!< [in] Any given postbuild info type, \c D3D12_RAYTRACING_ACCEELRATION_STRUCTURE_POSTBUILD_INFO_TYPE, can only be selected for output by at most one array entry. +} NVAPI_BUILD_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_PARAMS_V1; +#define NVAPI_BUILD_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_PARAMS_VER1 MAKE_NVAPI_VERSION(NVAPI_BUILD_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_PARAMS_V1, 1) +typedef NVAPI_BUILD_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_PARAMS_V1 NVAPI_BUILD_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_PARAMS; +#define NVAPI_BUILD_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_PARAMS_VER NVAPI_BUILD_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_PARAMS_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_BuildRaytracingDisplacementMicromapArray +// +//! DESCRIPTION: Construct DMM Array for a collection of DMMs on the GPU. +//! The CPU-side input buffers are not referenced after this call. +//! The GPU-side input resources are not referenced after the build has concluded after ExecuteCommandList(). +//! Additionally, the application may optionally output postbuild information immediately after the build. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! \deprecated Do not use this function - it is deprecated in release 570. +//! +//! \since Release: 525 +//! +//! \param [in] pCommandList Command list on which the command will execute. +//! \param [in] pParams Wrapper around the inputs of the function. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \retval NVAPI_INVALID_COMBINATION pParams->pPostbuildInfoDescs was set to \c NULL while pParams->numPostbuildInfoDescs is non zero. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +__nvapi_deprecated_function("Do not use this function - it is deprecated in release 570.") +NVAPI_INTERFACE NvAPI_D3D12_BuildRaytracingDisplacementMicromapArray( + __in ID3D12GraphicsCommandList4* pCommandList, + __in NVAPI_BUILD_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_PARAMS* pParams); + +#endif // defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__) + +#if defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__) + +//! Parameters given to NvAPI_D3D12_RelocateRaytracingDisplacementMicromapArray(). +//! +//! \ingroup dx +typedef struct _NVAPI_RELOCATE_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_PARAMS_V1 +{ + NvU32 version; //!< [in] Structure version; it should be set to #NVAPI_RELOCATE_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_PARAMS_VER. + D3D12_GPU_VIRTUAL_ADDRESS displacementMicromapArray; //!< [in] DMM Array current memory address; it must be 256-byte aligned (#NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_BYTE_ALIGNMENT). +} NVAPI_RELOCATE_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_PARAMS_V1; +#define NVAPI_RELOCATE_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_PARAMS_VER1 MAKE_NVAPI_VERSION(NVAPI_RELOCATE_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_PARAMS_V1, 1) +typedef NVAPI_RELOCATE_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_PARAMS_V1 NVAPI_RELOCATE_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_PARAMS; +#define NVAPI_RELOCATE_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_PARAMS_VER NVAPI_RELOCATE_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_PARAMS_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_RelocateRaytracingDisplacementMicromapArray +// +//! DESCRIPTION: Makes the DMM Array usable at its current location in memory. +//! A DMM Array that has been copied to a new location must be relocated using this function before it may be attached to any BLAS. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! \deprecated Do not use this function - it is deprecated in release 570. +//! +//! \since Release: 525 +//! +//! \param [in] pCommandList Command list on which the command will execute. +//! \param [in] pParams Wrapper around the inputs of the function. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +__nvapi_deprecated_function("Do not use this function - it is deprecated in release 570.") +NVAPI_INTERFACE NvAPI_D3D12_RelocateRaytracingDisplacementMicromapArray( + __in ID3D12GraphicsCommandList4* pCommandList, + __in const NVAPI_RELOCATE_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_PARAMS* pParams); + +#endif // defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__) + +#if defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__) + +//! Parameters given to NvAPI_D3D12_EmitRaytracingDisplacementMicromapArrayPostbuildInfo(). +//! +//! \ingroup dx +typedef struct _NVAPI_EMIT_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS_V1 +{ + NvU32 version; //!< [in] Structure version; it should be set to #NVAPI_EMIT_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS_VER. + const NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_POSTBUILD_INFO_DESC* pDesc; //!< [in] Description of which postbuild info to emit. + NvU32 numSources; //!< [in] Number of DMM Arrays in \p pSources. + const D3D12_GPU_VIRTUAL_ADDRESS* pSources; //!< [in] List of DMM Arrays for which postbuild info should be emitted. +} NVAPI_EMIT_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS_V1; +#define NVAPI_EMIT_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS_VER1 MAKE_NVAPI_VERSION(NVAPI_EMIT_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS_V1, 1) +typedef NVAPI_EMIT_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS_V1 NVAPI_EMIT_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS; +#define NVAPI_EMIT_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS_VER NVAPI_EMIT_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_EmitRaytracingDisplacementMicromapArrayPostbuildInfo +// +//! DESCRIPTION: Emits information about one or more DMM Arrays, only available after the DMM Array constructions have finished. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! \deprecated Do not use this function - it is deprecated in release 570. +//! +//! \since Release: 525 +//! +//! \param [in] pCommandList Command list on which the command will execute. +//! \param [in] pParams Wrapper around the inputs of the function. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +__nvapi_deprecated_function("Do not use this function - it is deprecated in release 570.") +NVAPI_INTERFACE NvAPI_D3D12_EmitRaytracingDisplacementMicromapArrayPostbuildInfo( + __in ID3D12GraphicsCommandList4* pCommandList, + __in const NVAPI_EMIT_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS* pParams); + +#endif // defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__) + +#if defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__) + +//! Parameters given to NvAPI_D3D12_EmitRaytracingOpacityMicromapArrayPostbuildInfo(). +//! +//! \ingroup dx +typedef struct _NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS_V1 +{ + NvU32 version; //!< [in] Structure version; it should be set to #NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS_VER. + const NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_DESC* pDesc; //!< [in] Description of which postbuild info to emit. + NvU32 numSources; //!< [in] Number of OMM Arrays in \p pSources. + const D3D12_GPU_VIRTUAL_ADDRESS* pSources; //!< [in] List of OMM Arrays for which postbuild info should be emitted. +} NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS_V1; +#define NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS_VER1 MAKE_NVAPI_VERSION(NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS_V1, 1) +typedef NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS_V1 NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS; +#define NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS_VER NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_EmitRaytracingOpacityMicromapArrayPostbuildInfo +// +//! DESCRIPTION: Emits information about one or more OMM Arrays, only available after the OMM Array constructions have finished. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 520 +//! +//! \param [in] pCommandList Command list on which the command will execute. +//! \param [in] pParams Wrapper around the inputs and outputs of the function. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_EmitRaytracingOpacityMicromapArrayPostbuildInfo( + __in ID3D12GraphicsCommandList4* pCommandList, + __in const NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS* pParams); + +#endif // defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__) + +#if defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__) + +//! This structure extends \c D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC by supporting additional geometry types as inputs. +//! For more information on the different members, please check Microsoft's DirectX Raytracing Specification. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC_EX +{ + D3D12_GPU_VIRTUAL_ADDRESS destAccelerationStructureData; //!< Memory where the resulting acceleration structure will be stored. + NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS_EX inputs; //!< The inputs to the build process. + D3D12_GPU_VIRTUAL_ADDRESS sourceAccelerationStructureData; //!< The acceleration structure to be updated. + //!< Otherwise if the acceleration structure should be rebuilt entirely, this value must be \c NULL. + D3D12_GPU_VIRTUAL_ADDRESS scratchAccelerationStructureData; //!< Memory that will be temporarily used during the building process. +} NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC_EX; + +//! Parameters given to NvAPI_D3D12_RelocateRaytracingOpacityMicromapArray(). +//! +//! \ingroup dx +typedef struct _NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS_V1 +{ + NvU32 version; //!< [in] Structure version; it should be set to #NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS_VER. + const NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC_EX* pDesc; //!< [in] Description of the acceleration structure to build. + NvU32 numPostbuildInfoDescs; //!< [in] Size of postbuild info desc array. Set to 0 if none are needed. + const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_DESC* pPostbuildInfoDescs; //!< [in] Optional array of descriptions for postbuild info to generate describing properties of the acceleration structure that was built. + //!< Any given postbuild info type, \c D3D12_RAYTRACING_ACCEELRATION_STRUCTURE_POSTBUILD_INFO_TYPE, can only be selected for output by at most one array entry. +} NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS_V1; +#define NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS_VER1 MAKE_NVAPI_VERSION(NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS_V1, 1) +typedef NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS_V1 NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS; +#define NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS_VER NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_BuildRaytracingAccelerationStructureEx +// +//! DESCRIPTION: Perform an acceleration structure build on the GPU. +//! Also optionally output postbuild information immediately after the build. +//! This function is an extension of ID3D12GraphicsCommandList4::BuildRaytracingAccelerationStructure() with additional serialized data types. +//! +//! \note Only supported on GPUs capable of DXR. +//! Some of the flags and fields have further restrictions, in which case their description will include a note with more details. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 520 +//! +//! \param [in] pCommandList Command list on which the command will execute. +//! \param [in] pParams Wrapper around the inputs and outputs of the function. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \retval NVAPI_INVALID_COMBINATION pParams->pPostbuildInfoDescs was set to \c NULL while pParams->numPostbuildInfoDescs is non zero. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_BuildRaytracingAccelerationStructureEx( + __in ID3D12GraphicsCommandList4* pCommandList, + __in const NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS* pParams); + +#endif // defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__) + +#if defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__) + +/////////////////////////////////////////////////////////////////////////////// +// +// Miscellaneous +// +/////////////////////////////////////////////////////////////////////////////// + +//! Opacity Micromap micro-triangle states. +//! Not part of any input, but listed here for convenience. +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_STATE +{ + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_STATE_TRANSPARENT = 0, //!< Transparent OMM state: hit is ignored. + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_STATE_OPAQUE = 1, //!< Opaque OMM state: hit is committed. + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_STATE_UNKNOWN_TRANSPARENT = 2, //!< Unknown-transparent OMM state. + //!< * If operating in 2-state mode, ignore hit. + //!< * If operating in 4-state mode, invoke any-hit shader. + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_STATE_UNKNOWN_OPAQUE = 3 //!< Unknown-opaque OMM state. + //!< * If operating in 2-state mode, commit hit. + //!< * If operating in 4-state mode, invoke any-hit shader. +} NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_STATE; + +//! Mandatory alignment for the address of an OMM Array. +//! +//! \ingroup dx +#define NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BYTE_ALIGNMENT 256 + +//! Highest subdivision-level allowed with OC1. +//! +//! \ingroup dx +#define NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_OC1_MAX_SUBDIVISION_LEVEL 12 + +//! A list of flags that can be given to the \c TraceRay() function in HLSL. +//! +//! \ingroup dx +typedef enum _NVAPI_RAY_FLAGS_EX +{ + // RAY_FLAGS flags + NVAPI_RAY_FLAG_NONE_EX = 0x0, //!< No flag specified. + NVAPI_RAY_FLAG_FORCE_OPAQUE_EX = NV_BIT( 0), //!< Consider all intersected geometries to be opaque, regardless of the flags specified at the geometry and instance level. + NVAPI_RAY_FLAG_FORCE_NON_OPAQUE_EX = NV_BIT( 1), //!< Consider all intersected geometries to be non-opaque, regardless of the flags specified at the geometry and instance level. + NVAPI_RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH_EX = NV_BIT( 2), //!< End the traversal as soon as a geometry is hit, and that hit is not ignored by the any hit shader. + NVAPI_RAY_FLAG_SKIP_CLOSEST_HIT_SHADER_EX = NV_BIT( 3), //!< Do not invoke the closest hit shader once the traversal ends. + NVAPI_RAY_FLAG_CULL_BACK_FACING_TRIANGLES_EX = NV_BIT( 4), //!< Never intersect triangle geometries that are back facing with regard to the ray. + NVAPI_RAY_FLAG_CULL_FRONT_FACING_TRIANGLES_EX = NV_BIT( 5), //!< Never intersect triangle geometries that are front facing with regard to the ray. + NVAPI_RAY_FLAG_CULL_OPAQUE_EX = NV_BIT( 6), //!< Never intersect geometries that were flagged as opaque. + NVAPI_RAY_FLAG_CULL_NON_OPAQUE_EX = NV_BIT( 7), //!< Never intersect geometries that were not flagged as opaque. + NVAPI_RAY_FLAG_SKIP_TRIANGLES_EX = NV_BIT( 8), //!< Never intersect triangle geometries. + NVAPI_RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES_EX = NV_BIT( 9), //!< Never intersect AABB geometries. + + // NVAPI_RAY_FLAGS_EX specific flags + NVAPI_RAY_FLAG_FORCE_OMM_2_STATE_EX = NV_BIT(10), //!< Treat unknown-opaque and unknown-transparent as opaque and transparent, respectively, during traversal. + //!< If an instance is flagged with #NVAPI_D3D12_RAYTRACING_INSTANCE_FLAG_DISABLE_OMMS_EX, that takes precedence over this flag. +} NVAPI_RAY_FLAG_EX; + +//! Mandatory alignment for the address of a DMM Array. +//! +//! \ingroup dx +#define NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_ARRAY_BYTE_ALIGNMENT 256 + +//! Highest subdivision-level allowed with DC1. +//! +//! \ingroup dx +#define NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_DC1_MAX_SUBDIVISION_LEVEL 5 + +#endif // defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__) + + +//! \ingroup DX +typedef enum _NV_D3D12_WORKSTATION_FEATURE_TYPE +{ + NV_D3D12_WORKSTATION_FEATURE_TYPE_PRESENT_BARRIER = 1, // PresentBarrier feature + NV_D3D12_WORKSTATION_FEATURE_TYPE_RDMA_BAR1_SUPPORT = 2, // RDMA heap supported via Bar1 carveout +} NV_D3D12_WORKSTATION_FEATURE_TYPE; + +// parameter structure for NV_D3D12_WORKSTATION_FEATURE_TYPE_RDMA_BAR1_AVAILABLE related information +typedef struct _NV_D3D12_WORKSTATION_FEATURE_RDMA_PROPERTIES +{ + NvU64 rdmaHeapSize; // maximum available Bar1 heap size for RDMA allocations +} NV_D3D12_WORKSTATION_FEATURE_RDMA_PROPERTIES; + +// parameter structure for querying workstation feature information +typedef struct _NV_D3D12_WORKSTATION_FEATURE_PROPERTIES +{ + NvU32 version; //!< (IN) Structure version + NV_D3D12_WORKSTATION_FEATURE_TYPE workstationFeatureType; //!< (IN) the type of workstation feature to be queried + NvBool supported; //!< (OUT) boolean returning if feature is supported + union + { + NV_D3D12_WORKSTATION_FEATURE_RDMA_PROPERTIES rdmaInfo; //!< (OUT) RDMA feature related information, returned only if + //!< workstationFeatureType is NV_D3D12_WORKSTATION_FEATURE_TYPE_RDMA_BAR1_SUPPORT + }; +} NVAPI_D3D12_WORKSTATION_FEATURE_PROPERTIES_PARAMS_V1; + +#define NVAPI_D3D12_WORKSTATION_FEATURE_PROPERTIES_PARAMS_VER1 MAKE_NVAPI_VERSION(NVAPI_D3D12_WORKSTATION_FEATURE_PROPERTIES_PARAMS_V1,1) +#define NVAPI_D3D12_WORKSTATION_FEATURE_PROPERTIES_PARAMS_VER NVAPI_D3D12_WORKSTATION_FEATURE_PROPERTIES_PARAMS_VER1 +#define NVAPI_D3D12_WORKSTATION_FEATURE_PROPERTIES_PARAMS NVAPI_D3D12_WORKSTATION_FEATURE_PROPERTIES_PARAMS_V1 + + +#if defined(__cplusplus) && defined(__d3d12_h__) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_QueryWorkstationFeatureProperties +// +//! DESCRIPTION: This API returns information about the properties of specific workstation features on the specified device. +//! +//! \since Release: 530 +//! +//! \param [in] pDevice The ID3D12Device device which is queried for feature properties +//! \param [inout] pWorkstationFeatureProperties Pointer to a structure containing workstation feature query information. +//! +//! \return ::NVAPI_OK the call succeeded +//! \return ::NVAPI_ERROR the call failed +//! \return ::NVAPI_NO_IMPLEMENTATION the API is not implemented +//! \return ::NVAPI_INVALID_POINTER an invalid pointer was passed as an argument +//! \retval ::NVAPI_API_NOT_INITIALIZED NvAPI not initialized +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_QueryWorkstationFeatureProperties(__in ID3D12Device *pDevice, __inout NVAPI_D3D12_WORKSTATION_FEATURE_PROPERTIES_PARAMS *pWorkstationFeatureProperties); +#endif // defined(__cplusplus) && defined(__d3d12_h__) + + +#if defined (__cplusplus) && defined(__d3d12_h__) + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_CreateCommittedRDMABuffer +// +//! \since Release: 530 +// +//! \code +//! DESCRIPTION: NvAPI_D3D12_CreateCommittedRDMABuffer is a wrapper of ID3D12Device::CreateCommittedResource +//! which allows to allocate linear memory which can be used for remote direct memory access (RDMA) from other devices. +//! It creates an implicit D3D12 heap of the requested size, allocates the resource and returns an RDMA address for remote direct memory access. +//! The created memory will reside on the specified device local memory and won't be cpu accessible. +//! +//! \param [in] pDevice A pointer to a D3D12 device. +//! \param [in] size Size in bytes of the linear buffer to be allocated for the resource. +//! \param [in] heapCreationNodeMask This mask indicates the node where the resource should be created. +//! \param [in] heapVisibleNodeMask This mask indicates on which nodes the resource is accessible. +//! \param [in] riidResource The globally unique identifier (GUID) for the resource interface. +//! \param [out] ppvResource A pointer to memory that receives the requested interface pointer to the created resource object. +//! \param [out] ppRDMAAddress A pointer to memory that receives the Bar1 memory region for remote direct memory access. + +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. +//! +//! \endcode +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_CreateCommittedRDMABuffer( + __in ID3D12Device* pDevice, + __in NvU64 size, + __in NvU32 heapCreationNodeMask, + __in NvU32 heapVisibleNodeMask, + __in REFIID riidResource, + __out void **ppvResource, + __out void **ppRDMAAddress); + +#endif //defined(__cplusplus) && defined(__d3d12_h__) + +#if defined(__cplusplus) && (defined(__d3d12_h__)) + +//! Underlying component types of a cooperative vector or matrix +//! \ingroup dx +typedef enum NVAPI_COOPERATIVE_VECTOR_COMPONENT_TYPE { + NVAPI_COOPERATIVE_VECTOR_COMPONENT_TYPE_FLOAT16 = 0, + NVAPI_COOPERATIVE_VECTOR_COMPONENT_TYPE_FLOAT32 = 1, + NVAPI_COOPERATIVE_VECTOR_COMPONENT_TYPE_FLOAT64 = 2, + NVAPI_COOPERATIVE_VECTOR_COMPONENT_TYPE_SINT8 = 3, + NVAPI_COOPERATIVE_VECTOR_COMPONENT_TYPE_SINT16 = 4, + NVAPI_COOPERATIVE_VECTOR_COMPONENT_TYPE_SINT32 = 5, + NVAPI_COOPERATIVE_VECTOR_COMPONENT_TYPE_SINT64 = 6, + NVAPI_COOPERATIVE_VECTOR_COMPONENT_TYPE_UINT8 = 7, + NVAPI_COOPERATIVE_VECTOR_COMPONENT_TYPE_UINT16 = 8, + NVAPI_COOPERATIVE_VECTOR_COMPONENT_TYPE_UINT32 = 9, + NVAPI_COOPERATIVE_VECTOR_COMPONENT_TYPE_UINT64 = 10, + NVAPI_COOPERATIVE_VECTOR_COMPONENT_TYPE_SINT8_PACKED = 11, + NVAPI_COOPERATIVE_VECTOR_COMPONENT_TYPE_UINT8_PACKED = 12, + NVAPI_COOPERATIVE_VECTOR_COMPONENT_TYPE_FLOAT_E4M3 = 13, + NVAPI_COOPERATIVE_VECTOR_COMPONENT_TYPE_FLOAT_E5M2 = 14, + NVAPI_COOPERATIVE_VECTOR_COMPONENT_TYPE_INVALID = 0x7FFFFFFF +} NVAPI_COOPERATIVE_VECTOR_COMPONENT_TYPE; + +//! Device or Host address descriptor to specify source/destination addresses. Both source and destination must either be on host or both on device. +//! \ingroup dx +typedef struct NVAPI_DEVICE_OR_HOST_ADDRESS +{ + NvBool bIsDeviceAlloc; //!< [in] Is false if allocation is on CPU, true otherwise. The value of this also decides whether conversion will happen on host or device. + union + { + D3D12_GPU_VIRTUAL_ADDRESS deviceAddress; //!< [in] GPU VA of allocation + void* pHostAddress; //!< [in] Pointer to host allocation + }; +} NVAPI_DEVICE_OR_HOST_ADDRESS; + +//! Possible cooperative vector matrix layouts +//! \ingroup dx +typedef enum NVAPI_COOPERATIVE_VECTOR_MATRIX_LAYOUT { + NVAPI_COOPERATIVE_VECTOR_MATRIX_LAYOUT_ROW_MAJOR = 0, + NVAPI_COOPERATIVE_VECTOR_MATRIX_LAYOUT_COLUMN_MAJOR = 1, + NVAPI_COOPERATIVE_VECTOR_MATRIX_LAYOUT_INFERENCING_OPTIMAL = 2, + NVAPI_COOPERATIVE_VECTOR_MATRIX_LAYOUT_TRAINING_OPTIMAL = 3, + NVAPI_COOPERATIVE_VECTOR_MATRIX_LAYOUT_INVALID = 0x7FFFFFFF +} NVAPI_COOPERATIVE_VECTOR_MATRIX_LAYOUT; + +//! Structure specifying a request to convert the layout and type of a cooperative vector matrix +//! \ingroup dx +typedef struct _NVAPI_CONVERT_COOPERATIVE_VECTOR_MATRIX_DESC_V1 +{ + NvU32 version; //!< [in] Struct version. + size_t srcSize; //!< [in] Is the length in bytes of srcData + NVAPI_DEVICE_OR_HOST_ADDRESS srcData; //!< [in] Is either `NULL` when calling the command to query the required size of the destination or a pointer to the source data in the source layout. + size_t* pDstSize; //!< [inout] Is a pointer to an integer storing the number of bytes required or requested to convert. + NVAPI_DEVICE_OR_HOST_ADDRESS dstData; //!< [inout] Is either `NULL` when calling the command to query the required size of the destination or a pointer to the destination data. + NVAPI_COOPERATIVE_VECTOR_COMPONENT_TYPE srcComponentType; //!< [in] Is the type of a source matrix element + NVAPI_COOPERATIVE_VECTOR_COMPONENT_TYPE dstComponentType; //!< [in] Is the type of a destination matrix element. + NvU32 numRows; //!< [in] Is the number of rows in the matrix. + NvU32 numColumns; //!< [in] Is the number of columns in the matrix. + NVAPI_COOPERATIVE_VECTOR_MATRIX_LAYOUT srcLayout; //!< [in] Is the layout of the source matrix. + size_t srcStride; //!< [in] Is the number of bytes between a consecutive row or column (depending on srcLayout) of the source matrix, if it is row-major or column-major. + NVAPI_COOPERATIVE_VECTOR_MATRIX_LAYOUT dstLayout; //!< [in] Is the layout the matrix is converted to. + size_t dstStride; //!< [in] Is the number of bytes between a consecutive row or column (depending on dstLayout) of the destination matrix if it is row-major or column-major. +} NVAPI_CONVERT_COOPERATIVE_VECTOR_MATRIX_DESC_V1; + +typedef NVAPI_CONVERT_COOPERATIVE_VECTOR_MATRIX_DESC_V1 NVAPI_CONVERT_COOPERATIVE_VECTOR_MATRIX_DESC; + +//! Macro for deriving version of NVAPI_CONVERT_COOPERATIVE_VECTOR_MATRIX_DESC +#define NVAPI_CONVERT_COOPERATIVE_VECTOR_MATRIX_DESC_VER1 MAKE_NVAPI_VERSION(NVAPI_CONVERT_COOPERATIVE_VECTOR_MATRIX_DESC_V1,1) +#define NVAPI_CONVERT_COOPERATIVE_VECTOR_MATRIX_DESC_VER NVAPI_CONVERT_COOPERATIVE_VECTOR_MATRIX_DESC_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_ConvertCooperativeVectorMatrix +// +//! DESCRIPTION: Queries the size of a cooperative vector matrix or converts a matrix to another layout and type. Performs a single conversion using descriptor pointed to by pDesc. +//! For multiple conversions in a single API call, use NvAPI_D3D12_ConvertCooperativeVectorMatrixMultiple. +//! If both the source (src) and destination (dst) allocations reside on the host, the conversion is run on the host. +//! Conversely, if both allocations are on the device, the conversion is run on the device. +//! Note that conversions between host and device allocations (and vice versa) are not supported. +//! If pDstData is `NULL`, then the number of bytes required to store the converted matrix is returned in pDstSize. +//! Otherwise, pDstSize must point to a variable set by the user to the number of bytes in pDstData, and on return the +//! variable is overwritten with the number of bytes actually written to pDstData. +//! pSrcData can be `NULL` when pDstData is `NULL`. If pDstSize is less than the number of bytes required to store the +//! converted matrix, no bytes will be written, and NVAPI_INSUFFICIENT_BUFFER will be returned instead of NVAPI_OK, to indicate that not enough space was provided. +//! The size of the destination is only a function of the destination layout information, and does not depend on the source layout information. +//! If srcLayout is row-major or column-major, then srcStride should be greater than the length of a row/column, and a multiple of the element size. +//! If dstLayout is row-major or column-major, then dstStride should be greater than the length of a row/column, and a multiple of the element size. +//! If srcComponentType is not a supported MatrixInterpretation value as reported by NvAPI_D3D12_GetPhysicalDeviceCooperativeVectorProperties() then srcComponentType should be `NVAPI_COOPERATIVE_VECTOR_COMPONENT_TYPE_FLOAT32`. +//! If dstComponentType is not a supported MatrixInterpretation value as reported by NvAPI_D3D12_GetPhysicalDeviceCooperativeVectorProperties() then dstComponentType should be `NVAPI_COOPERATIVE_VECTOR_COMPONENT_TYPE_FLOAT32`. +//! If srcComponentType and dstComponentType are not equal, then one should be `NVAPI_COOPERATIVE_VECTOR_COMPONENT_TYPE_FLOAT32` or `NVAPI_COOPERATIVE_VECTOR_COMPONENT_TYPE_FLOAT16` and the other should be a lower-precision floating-point type. +//! If dstComponentType is `NVAPI_COOPERATIVE_VECTOR_COMPONENT_TYPE_FLOAT_E4M3` or `NVAPI_COOPERATIVE_VECTOR_COMPONENT_TYPE_FLOAT_E5M2`, then dstLayout should be `NVAPI_COOPERATIVE_VECTOR_MATRIX_LAYOUT_INFERENCING_OPTIMAL` or `NVAPI_COOPERATIVE_VECTOR_MATRIX_LAYOUT_TRAINING_OPTIMAL`. +//! +//! \since Release: 570 +//! SUPPORTED OS: Windows 10 and higher +//! +//! \param [in] pDevice Pointer to the ID3D12Device created by application. +//! \param [in] pCommandList Pointer to ID3D12GraphicsCommandList for device side conversion. Ignored if host side conversion is desired. +//! \param [in] pDesc Pointer to NVAPI_CONVERT_COOPERATIVE_VECTOR_MATRIX_DESC created by app +//! +//! \retval NVAPI_OK Conversion done succesfully. +//! \retval NVAPI_API_NOT_INITIALIZED NvAPI was not yet initialized. +//! \retval NVAPI_INVALID_POINTER pDevice and/or pDesc is NULL. +//! \retval NVAPI_INVALID_COMBINATION Either src and dst data are both not on CPU/GPU or both are on GPU but pCommandList is NULL. +//! \retval NVAPI_INSUFFICIENT_BUFFER Destination size passed was not enough for the conversion result. +//! \retval NVAPI_INCOMPATIBLE_STRUCT_VERSION Incompatible structure version of pDesc. +//! \retval NVAPI_ERROR Generic error. +//! \retval NVAPI_OUT_OF_MEMORY Internal allocation failed. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_ConvertCooperativeVectorMatrix(__in ID3D12Device *pDevice, __in ID3D12GraphicsCommandList* pCommandList, __in NVAPI_CONVERT_COOPERATIVE_VECTOR_MATRIX_DESC const* const pDesc); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_ConvertCooperativeVectorMatrixMultiple +// +//! DESCRIPTION: Queries the size of a cooperative vector matrix or converts a matrix to another layout and type. +//! Can perform multiple conversions in a single API call. The number of descriptors pointed to by pDesc is specified using descCount. +//! Function returns error if any of the conversions fail and does not process further conversions. +//! Behavior is same as NvAPI_D3D12_ConvertCooperativeVectorMatrix if descCount is equal to 1. +//! If both the source (src) and destination (dst) allocations reside on the host, the conversion is run on the host. +//! See NvAPI_D3D12_ConvertCooperativeVectorMatrix documentation for valid usage. +//! +//! \since Release: 570 +//! SUPPORTED OS: Windows 10 and higher +//! +//! \param [in] pDevice Pointer to the ID3D12Device created by application. +//! \param [in] pCommandList Pointer to ID3D12GraphicsCommandList for device side conversion. Ignored if host side conversion is desired. +//! \param [in] pDesc Pointer to NVAPI_CONVERT_COOPERATIVE_VECTOR_MATRIX_DESC created by app. +//! \param [in] desCount Specifies the count of NVAPI_CONVERT_COOPERATIVE_VECTOR_MATRIX_DESC descriptors pointed to by pDesc. Must be greater than 0. +//! +//! \retval NVAPI_OK Conversion done succesfully. +//! \retval NVAPI_API_NOT_INITIALIZED NvAPI was not yet initialized. +//! \retval NVAPI_INVALID_POINTER pDevice and/or pDesc is NULL. +//! \retval NVAPI_INVALID_ARGUMENT descCount is 0. +//! \retval NVAPI_INVALID_COMBINATION Either src and dst data are both not on CPU/GPU or both are on GPU but pCommandList is NULL. +//! \retval NVAPI_INSUFFICIENT_BUFFER Destination size passed was not enough for the conversion result. +//! \retval NVAPI_INCOMPATIBLE_STRUCT_VERSION Incompatible structure version of pDesc. +//! \retval NVAPI_ERROR Generic error. +//! \retval NVAPI_OUT_OF_MEMORY Internal allocation failed. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_ConvertCooperativeVectorMatrixMultiple(__in ID3D12Device *pDevice, __in ID3D12GraphicsCommandList* pCommandList, __in NVAPI_CONVERT_COOPERATIVE_VECTOR_MATRIX_DESC const* const pDesc, __in NvU32 descCount); + +//! Each structure describes a single supported combination of types for a matrix-vector multiply (or multiply-add) operation +//! \ingroup dx +typedef struct _NVAPI_COOPERATIVE_VECTOR_PROPERTIES_V1 +{ + NvU32 version; //!< [out] Struct version. App must verify the value returned by API. + NVAPI_COOPERATIVE_VECTOR_COMPONENT_TYPE inputType; //!< [out] Is the component type of vector Input, of type NVAPI_COOPERATIVE_VECTOR_COMPONENT_TYPE. + NVAPI_COOPERATIVE_VECTOR_COMPONENT_TYPE inputInterpretation; //!< [out] Is the value of InputInterpretation, of type NVAPI_COOPERATIVE_VECTOR_COMPONENT_TYPE. + NVAPI_COOPERATIVE_VECTOR_COMPONENT_TYPE matrixInterpretation; //!< [out] Is the value of MatrixInterpretation, of type NVAPI_COOPERATIVE_VECTOR_COMPONENT_TYPE. + NVAPI_COOPERATIVE_VECTOR_COMPONENT_TYPE biasInterpretation; //!< [out] Is the value of BiasInterpretation, of type NVAPI_COOPERATIVE_VECTOR_COMPONENT_TYPE. + NVAPI_COOPERATIVE_VECTOR_COMPONENT_TYPE resultType; //!< [out] Is the component type of Result Type, of type NVAPI_COOPERATIVE_VECTOR_COMPONENT_TYPE. + bool transpose; //!< [out] Is a boolean indicating whether opaque layout matrices with this combination of input and output types supports transposition. +}NVAPI_COOPERATIVE_VECTOR_PROPERTIES_V1; + +typedef NVAPI_COOPERATIVE_VECTOR_PROPERTIES_V1 NVAPI_COOPERATIVE_VECTOR_PROPERTIES; + +//! Macro for deriving version of NVAPI_COOPERATIVE_VECTOR_PROPERTIES +#define NVAPI_COOPERATIVE_VECTOR_PROPERTIES_VER1 MAKE_NVAPI_VERSION(NVAPI_COOPERATIVE_VECTOR_PROPERTIES_V1,1) +#define NVAPI_COOPERATIVE_VECTOR_PROPERTIES_VER NVAPI_COOPERATIVE_VECTOR_PROPERTIES_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_GetPhysicalDeviceCooperativeVectorProperties +// +//! DESCRIPTION: Enumerates supported cooperative vector types combinations. +//! If pProperties is `NULL`, then the number of cooperative vector properties available is returned in pPropertyCount. +//! Otherwise, pPropertyCount must point to a variable set by the user to the number of elements in the pProperties array, +//! and on return the variable is overwritten with the number of structures actually written to pProperties. +//! If pPropertyCount is less than the number of cooperative vector properties available, at most pPropertyCount structures will be +//! written, and status will be NVAPI_INSUFFICIENT_BUFFER to indicate that not all the available cooperative vector properties were returned. +//! +//! \since Release: 570 +//! SUPPORTED OS: Windows 10 and higher +//! +//! \param [in] pDevice Pointer to the ID3D12Device created by application. +//! \param [inout] propertyCount Number of cooperative vector properties available or queried +//! \param [inout] pProperties Is either `NULL` or a pointer to an array of NVAPI_COOPERATIVE_VECTOR_PROPERTIES structures. +//! +//! \retval NVAPI_OK Properties returned succesfully +//! \retval NVAPI_API_NOT_INITIALIZED NvAPI was not yet initialized. +//! \retval NVAPI_INSUFFICIENT_BUFFER Not an error but all supported properties could not be written into as propertyCount was insufficient. +//! \retval NVAPI_INVALID_POINTER pDevice is NULL. +//! \retval NVAPI_ERROR Generic error. +//! \retval NVAPI_OUT_OF_MEMORY Internal allocation failed. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_GetPhysicalDeviceCooperativeVectorProperties(__in ID3D12Device *pDevice, __inout NvU32 *pPropertyCount, __inout NVAPI_COOPERATIVE_VECTOR_PROPERTIES* pProperties); +#endif //defined(__cplusplus) && (defined(__d3d12_h__)) + +#if defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12Device5_INTERFACE_DEFINED__) + +//! \ingroup dx +#define NVAPI_D3D12_RAYTRACING_CLAS_BYTE_ALIGNMENT 128 //!< The alignment required for storage of CLAS objects + +//! \ingroup dx +#define NVAPI_D3D12_RAYTRACING_CLUSTER_TEMPLATE_BYTE_ALIGNMENT 32 //!< The alignment required for storage of Cluster Templates objects + +//! \ingroup dx +#define NVAPI_D3D12_RAYTRACING_CLUSTER_TEMPLATE_BOUNDS_BYTE_ALIGNMENT 32 //!< The alignment required for the optional AABB provided to Cluster Template builds + +//! \ingroup dx +#define NVAPI_D3D12_RAYTRACING_MAXIMUM_GEOMETRY_INDEX 16777215 //!< The maximum supported geometry index for use with cluster objects + +//! Enumeration listing permitted flag values for NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INPUTS +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_FLAGS +{ + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_FLAG_NONE = 0x0, //!< No option specified for the multi indirect cluster operation + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_FLAG_FAST_TRACE = NV_BIT(0), //!< Indicates that the operation should optimize results for trace performance at the cost of build performance + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_FLAG_FAST_BUILD = NV_BIT(1), //!< Indicates that the operation should optimize results for build performance at the cost of trace performance + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_FLAG_NO_OVERLAP = NV_BIT(2), //!< For cluster operations that permit input/output overlap this indicates that no such overlap exists, invalid to use when inputs and outputs are overlapping + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_FLAG_ALLOW_OMM = NV_BIT(3), //!< Specifies that the operation will interact with Cluster BLAS, CLAS or Templates that reference OMMs. All Operation Types & Modes require this field to be set correctly when interacting (building or consuming) objects that contain OMMs. +} NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_FLAGS; + +//! Enumeration listing permitted geometry flag values for Clusters +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_CLUSTER_FLAGS +{ + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_CLUSTER_FLAG_NONE = 0x0, //!< No option specified for the argument of this cluster + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_CLUSTER_FLAG_ALLOW_DISABLE_OMMS = NV_BIT(0), //!< If set, any instances referencing a Cluster BLAS containing this CLAS are allowed to disable the OMM test through the `NVAPI_D3D12_RAYTRACING_INSTANCE_FLAG_DISABLE_OMMS` flag. +} NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_CLUSTER_FLAGS; + +//! Enumeration listing permitted geometry flag values for Cluster Geometry +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_GEOMETRY_FLAGS +{ + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_GEOMETRY_FLAG_NONE = 0x0, //!< No option specified for the argument of this cluster geometry + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_GEOMETRY_FLAG_CULL_DISABLE = NV_BIT(29), //!< Disables front and back face culling for affected triangles, see D3D12_RAYTRACING_INSTANCE_FLAG_TRIANGLE_CULL_DISABLE + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_GEOMETRY_FLAG_NO_DUPLICATE_ANYHIT_INVOCATION = NV_BIT(30), //!< Same behavior as D3D12_RAYTRACING_GEOMETRY_FLAG_NO_DUPLICATE_ANYHIT_INVOCATION for non-cluster Geometry + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_GEOMETRY_FLAG_OPAQUE = NV_BIT(31), //!< Same behavior as D3D12_RAYTRACING_GEOMETRY_FLAG_OPAQUE for non-cluster Geometry +} NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_GEOMETRY_FLAGS; + +//! Enumeration listing possible values for Cluster Operation types +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_TYPE +{ + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_TYPE_MOVE_CLUSTER_OBJECT = 0, //!< Copies/moves CLAS, Cluster BLAS or Templates, use NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INPUT_MOVES_DESC as desc. Alignment requirement depends on the type of object moved. + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_TYPE_BUILD_BLAS_FROM_CLAS = 1, //!< Constructs Cluster BLAS from arrays of CLAS addresses, use NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INPUT_CLAS_DESC as desc. Alignment of Cluster BLAS is a multiple of D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BYTE_ALIGNMENT. + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_TYPE_BUILD_CLAS_FROM_TRIANGLES = 2, //!< Constructs CLAS from triangle data, use NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INPUT_TRIANGLES_DESC as desc. Alignment of CLAS is a multiple of NVAPI_D3D12_RAYTRACING_CLAS_BYTE_ALIGNMENT. + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_TYPE_BUILD_CLUSTER_TEMPLATES_FROM_TRIANGLES = 3, //!< Constructs Cluster Templates from triangle data, use NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INPUT_TRIANGLES_DESC as desc. Alignment of Cluster Templates is a multiple of NVAPI_D3D12_RAYTRACING_CLUSTER_TEMPLATE_BYTE_ALIGNMENT. + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_TYPE_INSTANTIATE_CLUSTER_TEMPLATES = 4, //!< Instantiates Cluster Templates to create CLAS results, use NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INPUT_TRIANGLES_DESC as desc. Alignment of CLAS is a multiple of NVAPI_D3D12_RAYTRACING_CLAS_BYTE_ALIGNMENT. +} NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_TYPE; + +//! Specifies the index format to use for cluster operations. +//! The values are chosen to match Vulkan's style of index size (in bytes) for the appropriate format +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INDEX_FORMAT +{ + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INDEX_FORMAT_8BIT = 1, //!< Use 8-bit indices + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INDEX_FORMAT_16BIT = 2, //!< Use 16-bit indices + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INDEX_FORMAT_32BIT = 4, //!< Use 32-bit indices +} NVAPI_3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INDEX_FORMAT; + +//! Enumeration listing possible values for the Cluster Operation mode +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_MODE +{ + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_MODE_IMPLICIT_DESTINATIONS = 0, //!< User provides total buffer space, driver places results within, returns VAs and actual sizes + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_MODE_EXPLICIT_DESTINATIONS = 1, //!< User provides individual target VAs, driver places them there, returns actual sizes + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_MODE_GET_SIZES = 2, //!< Driver returns maximum sizes per element, results may only be used with NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_MODE_EXPLICIT_DESTINATIONS +} NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_MODE; + +//! Desc used for operation type NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_TYPE_BUILD_BLAS_FROM_CLAS +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INPUT_CLAS_DESC +{ + NvU32 maxTotalClasCount; //!< The maximum total number of CLAS used by all BLAS that will be built by one call (reuse of the same CLAS counts multiple times) + NvU32 maxClasCountPerArg; //!< The maximum number of CLAS used by an individual Arg (equivalent to a BLAS being built). Maximum supported value is D3D12_RAYTRACING_MAXIMUM_PRIMITIVE_COUNT. +} NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INPUT_CLAS_DESC; + +//! Desc used for operation type either NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_TYPE_BUILD_CLAS_FROM_TRIANGLES, NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_TYPE_BUILD_CLUSTER_TEMPLATES_FROM_TRIANGLES or NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_TYPE_INSTANTIATE_CLUSTER_TEMPLATES +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INPUT_TRIANGLES_DESC +{ + NvU32 vertexFormat; //!< A value of `DXGI_FORMAT` describing the vertex format used in the vertex buffer passed for the cluster operation. For a list of supported vertex formats, see D3D12_RAYTRACING_GEOMETRY_TRIANGLES_DESC::VertexFormat + NvU32 maxGeometryIndexValue; //!< The maximum value of any geometry index that will occur in the results of the cluster operation (including instanced CLAS from templates constructed by it). Maximum supported value is NVAPI_D3D12_RAYTRACING_MAXIMUM_GEOMETRY_INDEX. + NvU32 maxUniqueGeometryCountPerArg; //!< The maximum value number of unique geometry index values that will occur in each individual result of the cluster operation. A value of 0 is treated as a value of 1. Maximum supported value is 256. + NvU32 maxTriangleCountPerArg; //!< The maximum number of triangles that will occur in each individual result of the cluster operation. Maximum supported value is 256. + NvU32 maxVertexCountPerArg; //!< The maximum number of vertices that will occur in each individual result of the cluster operation. Maximum supported value is 256. + NvU32 maxTotalTriangleCount; //!< The maximum total value of summing up the number of triangles that will occur in each individual result of the cluster operation. + NvU32 maxTotalVertexCount; //!< The maximum total value of summing up the number of vertices that will occur in each individual result of the cluster operation. + NvU32 minPositionTruncateBitCount; //!< The minimum number of bits that will be truncated from vertex positions in the arguments of this cluster operation. Maximum supported value is 32. +} NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INPUT_TRIANGLES_DESC; + +//! Enumeration listing the possible types used by NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INPUT_MOVES_DESC +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_MOVE_TYPE +{ + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_MOVE_TYPE_BOTTOM_LEVEL_ACCELERATION_STRUCTURE = 0, //!< The moved objects are Clustered BLAS + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_MOVE_TYPE_CLUSTER_LEVEL_ACCELERATION_STRUCTURE = 1, //!< The moved objects are CLAS + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_MOVE_TYPE_TEMPLATE = 2, //!< The moved objects are Cluster Templates +} NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_MOVE_TYPE; + +//! Desc used for operation type NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_TYPE_MOVE_CLUSTER_OBJECT +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_MOVES_DESC +{ + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_MOVE_TYPE type; //!< Determines the type of object to be copied/moved by this cluster operation + NvU32 maxBytesMoved; //!< Determines the maximum total number of bytes copied/moved by the operation (maximum sum of sizes of all copied/moved objects) +} NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INPUT_MOVES_DESC; + +//! Inputs describing the configuration of a cluster operation, used to determine the memory requirement for the operation +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INPUTS +{ + NvU32 maxArgCount; //!< Represents the maximum number of arguments to process + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_FLAGS flags; //!< Flags to modify the operation + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_TYPE type; //!< The type of multi indirect operation to execute + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_MODE mode; //!< The operation mode executed + + //!< Different arguments depending on type, see NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_TYPE for details + union + { + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INPUT_CLAS_DESC clasDesc; //!< Use when type is equal to NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_TYPE_BUILD_BLAS_FROM_CLAS + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INPUT_TRIANGLES_DESC trianglesDesc; //!< Use when type is equal to one of: NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_TYPE_BUILD_CLAS_FROM_TRIANGLES, NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_TYPE_BUILD_CLUSTER_TEMPLATES_FROM_TRIANGLES, NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_TYPE_INSTANTIATE_CLUSTER_TEMPLATES + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INPUT_MOVES_DESC movesDesc; //!< Use when type is equal to NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_TYPE_MOVE_CLUSTER_OBJECT + }; +} NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INPUTS; + +//! Structure describing the memory requirement of a cluster operation +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_REQUIREMENTS_INFO +{ + NvU64 resultDataMaxSizeInBytes; //!< Allocated size required to hold the result of the multi indirect operation based on the specified inputs. + //!< For NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_MODE_EXPLICIT_DESTINATIONS this will be one per object. + //!< For NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_MODE_IMPLICIT_DESTINATIONS this will be for all objects in the given call. + //!< For NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_MODE_GET_SIZES always 0. + NvU64 scratchDataSizeInBytes; //!< Scratch storage on GPU required during multi indirect operation based on the specified inputs. +} NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_REQUIREMENTS_INFO; + +//! Parameter structure for NvAPI_D3D12_GetRaytracingMultiIndirectClusterOperationRequirementsInfo +//! +//! \ingroup dx +typedef struct _NVAPI_GET_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_REQUIREMENTS_INFO_PARAMS_V1 +{ + NvU32 version; //!< [in] Structure version; it should be set to #NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_MULTI_INDIRECT_PREBUILD_INFO_PARAMS_VER. + const NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INPUTS* pInput; //!< [in] Description of the multi indirect operation. + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_REQUIREMENTS_INFO* pInfo; //!< [out] Result of the query. +} NVAPI_GET_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_REQUIREMENTS_INFO_PARAMS_V1; +#define NVAPI_GET_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_REQUIREMENTS_INFO_PARAMS_VER1 MAKE_NVAPI_VERSION(NVAPI_GET_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_REQUIREMENTS_INFO_PARAMS_V1, 1) +typedef NVAPI_GET_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_REQUIREMENTS_INFO_PARAMS_V1 NVAPI_GET_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_REQUIREMENTS_INFO_PARAMS; +#define NVAPI_GET_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_REQUIREMENTS_INFO_PARAMS_VER NVAPI_GET_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_REQUIREMENTS_INFO_PARAMS_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_GetRaytracingMultiIndirectClusterOperationRequirementsInfo +// +//! DESCRIPTION: Function call used to determine the memory requirements for a future call to NvAPI_D3D12_RaytracingExecuteMultiIndirectClusterOperation +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 560 +//! +//! \param [in] pDevice The D3D device that owns the command list the future call to NvAPI_D3D12_RaytracingExecuteMultiIndirectClusterOperation will be made from +//! \param [in] pParams API parameters +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \retval ::NVAPI_OK Completed request +//! \retval ::NVAPI_INVALID_POINTER A null pointer was passed as pDevice, pParams, pParams->pInput +//! or pParams->pInfo argument +//! \retval ::NVAPI_INVALID_ARGUMENT The pParams->pInfo parameter was set in an invalid way +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_GetRaytracingMultiIndirectClusterOperationRequirementsInfo( + __in ID3D12Device5* pDevice, + __inout const NVAPI_GET_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_REQUIREMENTS_INFO_PARAMS* pParams); + +//! Argument structure used on device for operation type NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_TYPE_BUILD_BLAS_FROM_CLAS +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_MULTI_INDIRECT_CLUSTER_ARGS +{ + NvU32 clusterCount; //!< [in] The size of the array referenced by clusterVAs. Must be less than or equal to NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INPUT_CLAS_DESC::maxClasCountPerArg + NvU32 reserved; //!< [in] Reserved, must be 0 + D3D12_GPU_VIRTUAL_ADDRESS clusterVAs; //!< [in] Address of an array of D3D12_GPU_VIRTUAL_ADDRESS holding valid addresses of CLAS previously constructed. 8 byte stride/alignment. +} NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_MULTI_INDIRECT_CLUSTER_ARGS; + +//! Argument structure used on device for operation type NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_TYPE_BUILD_CLAS_FROM_TRIANGLES +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_MULTI_INDIRECT_TRIANGLE_CLUSTER_ARGS +{ + NvU32 clusterId; //!< [in] The user specified cluster Id to encode in the CLAS. + NvU32 clusterFlags; //!< [in] Values of NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_CLUSTER_FLAGS to use as Cluster Flags. + NvU32 triangleCount : 9; //!< [in] The number of triangles used by the CLAS (max NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INPUT_TRIANGLES_DESC::maxTriangleCountPerArg). + NvU32 vertexCount : 9; //!< [in] The number of vertices used by the CLAS (max NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INPUT_TRIANGLES_DESC::maxVertexCountPerArg). + NvU32 positionTruncateBitCount : 6; //!< [in] The number of bits to truncate from the position values (min NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INPUT_TRIANGLES_DESC::minPositionTruncateBitCount). + NvU32 indexFormat : 4; //!< [in] The index format to use for the indexBuffer, see NVAPI_3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INDEX_FORMAT for possible values. + NvU32 opacityMicromapIndexFormat : 4; //!< [in] The index format to use for the opacityMicromapIndexBuffer, see NVAPI_3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INDEX_FORMAT for possible values. + NvU32 baseGeometryIndexAndFlags; //!< [in] The base geometry index (lower 24 bit) and base geometry flags (NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_GEOMETRY_FLAGS), see geometryIndexBuffer. + NvU16 indexBufferStride; //!< [in] The stride of the elements of indexBuffer, in bytes. If set to 0, will use index size as stride. + NvU16 vertexBufferStride; //!< [in] The stride of the elements of vertexBuffer, in bytes. If set to 0, will use vertex size as stride. + NvU16 geometryIndexAndFlagsBufferStride; //!< [in] The stride of the elements of geometryIndexBuffer, in bytes. If set to 0, will use 4 byte size as stride. + NvU16 opacityMicromapIndexBufferStride; //!< [in] The stride of the elements of opacityMicromapIndexBuffer, in bytes. If set to 0, will use index size as stride. + D3D12_GPU_VIRTUAL_ADDRESS indexBuffer; //!< [in] The index buffer to construct the CLAS. The memory pointed to must be in state D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE. + D3D12_GPU_VIRTUAL_ADDRESS vertexBuffer; //!< [in] The vertex buffer to construct the CLAS. The memory pointed to must be in state D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE. + D3D12_GPU_VIRTUAL_ADDRESS geometryIndexAndFlagsBuffer; //!< [in] (optional) Address of an array of 32-bit geometry indices and geometry flags with size equal to the triangle count. + //!< Each 32-bit value is organized the same as baseGeometryIndexAndFlags. + //!< If non-zero, the geometry indices of the CLAS triangles will be equal to the lower 24-bit of geometryIndexBuffer[triangleIndex] + baseGeometryIndex. + //!< If non-zero, the geometry flags for each triangle will be the bitwise OR of the flags in the upper 8 bits of baseGeometryIndex and geometryIndexBuffer[triangleIndex]. + //!< Otherwise all triangles will have a geometry index equal to baseGeometryIndexAndFlags. + //!< The number of unique elements may not exceed NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INPUT_TRIANGLES_DESC::maxUniqueGeometryCountPerArg. + //!< If used, the memory pointed to must be in state D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE. + D3D12_GPU_VIRTUAL_ADDRESS opacityMicromapArray; //!< [in] (optional) Address of a valid OMM array, if used NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_FLAG_ALLOW_OMM must be set on this and all other cluster operation calls interacting with the object(s) constructed. + D3D12_GPU_VIRTUAL_ADDRESS opacityMicromapIndexBuffer; //!< [in] (optional) Address of an array of indices into the OMM array. Note that an additional OMM special index is reserved and can be used to turn off OMM for specific triangles. +} NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_MULTI_INDIRECT_TRIANGLE_CLUSTER_ARGS; + +//! Argument structure used on device for operation type NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_TYPE_BUILD_CLUSTER_TEMPLATES_FROM_TRIANGLES +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_MULTI_INDIRECT_TRIANGLE_TEMPLATE_ARGS +{ + NvU32 clusterId; //!< [in] The user specified cluster Id to encode in the cluster template. + NvU32 clusterFlags; //!< [in] Values of NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_CLUSTER_FLAGS to use as Cluster Flags. + NvU32 triangleCount : 9; //!< [in] The number of triangles used by the cluster template (max NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INPUT_TRIANGLES_DESC::maxTriangleCountPerArg). + NvU32 vertexCount : 9; //!< [in] The number of vertices used by the cluster template (max NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INPUT_TRIANGLES_DESC::maxTriangleCountPerArg). + NvU32 positionTruncateBitCount : 6; //!< [in] The number of bits to truncate from the position values (min NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INPUT_TRIANGLES_DESC::minPositionTruncateBitCount). + NvU32 indexFormat : 4; //!< [in] The index format to use for the indexBuffer, see NVAPI_3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INDEX_FORMAT for possible values. + NvU32 opacityMicromapIndexFormat : 4; //!< [in] The index format to use for the opacityMicromapIndexBuffer, see NVAPI_3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INDEX_FORMAT for possible values. + NvU32 baseGeometryIndexAndFlags; //!< [in] The base geometry index (lower 24 bit) and base geometry flags (NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_GEOMETRY_FLAGS), see geometryIndexBuffer. + NvU16 indexBufferStride; //!< [in] The stride of the elements of indexBuffer, in bytes. If set to 0, will use index size as stride. + NvU16 vertexBufferStride; //!< [in] The stride of the elements of vertexBuffer, in bytes. If set to 0, will use vertex size as stride. + NvU16 geometryIndexAndFlagsBufferStride; //!< [in] The stride of the elements of geometryIndexBuffer, in bytes. If set to 0, will use 4 byte size as stride. + NvU16 opacityMicromapIndexBufferStride; //!< [in] The stride of the elements of opacityMicromapIndexBuffer, in bytes. If set to 0, will use index size as stride. + D3D12_GPU_VIRTUAL_ADDRESS indexBuffer; //!< [in] The index buffer to construct the cluster template. The memory pointed to must be in state D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE. + D3D12_GPU_VIRTUAL_ADDRESS vertexBuffer; //!< [in] (optional) The vertex buffer to optimize the cluster template, the vertices will not be stored in the cluster template. If used, the memory pointed to must be in state D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE. + D3D12_GPU_VIRTUAL_ADDRESS geometryIndexAndFlagsBuffer; //!< [in] (optional) Address of an array of 32-bit geometry indices and geometry flags with size equal to the triangle count. + //!< Each 32-bit value is organized the same as baseGeometryIndexAndFlags. + //!< If non-zero, the geometry indices of the CLAS triangles will be equal to the lower 24-bit of geometryIndexBuffer[triangleIndex] + baseGeometryIndex + geometryIndexOffset. + //!< If non-zero, the geometry flags for each triangle will be the bitwise OR of the flags in the upper 8 bits of baseGeometryIndex and geometryIndexBuffer[triangleIndex]. + //!< Otherwise all triangles will have a geometry index equal to baseGeometryIndexAndFlags + geometryIndexOffset. + //!< The number of unique elements may not exceed NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INPUT_TRIANGLES_DESC::maxUniqueGeometryCountPerArg. + //!< If used, the memory pointed to must be in state D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE. + D3D12_GPU_VIRTUAL_ADDRESS opacityMicromapArray; //!< [in] (optional) Address of a valid OMM array, if used NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_FLAG_ALLOW_OMM must be set on this and all other cluster operation calls interacting with the object(s) constructed. + D3D12_GPU_VIRTUAL_ADDRESS opacityMicromapIndexBuffer; //!< [in] (optional) Address of an array of indices into the OMM array. Note that an additional OMM special index is reserved and can be used to turn off OMM for specific triangles. + D3D12_GPU_VIRTUAL_ADDRESS instantiationBoundingBoxLimit; //!< [in] (optional) Pointer to 6 floats with alignment NVAPI_D3D12_RAYTRACING_CLUSTER_TEMPLATE_BOUNDS_BYTE_ALIGNMENT representing the limits of the positions of any vertices the template will ever be instantiated with. If used, the memory pointed to must be in state D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE. +} NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_MULTI_INDIRECT_TRIANGLE_TEMPLATE_ARGS; + +//! Argument structure used on device for operation type NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_TYPE_INSTANTIATE_CLUSTER_TEMPLATES +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_MULTI_INDIRECT_INSTANTIATE_TEMPLATE_ARGS +{ + NvU32 clusterIdOffset; //!< [in] The offset added to the clusterId stored in the Cluster template to calculate the final clusterId that will be written to the instantiated CLAS + NvU32 geometryIndexOffset; //!< [in] The offset added to the geometry index stored for each triangle in the Cluster template to calculate the final geometry index that will be written to the triangles of the instantiated CLAS, the resulting value may not exceed maxGeometryIndexValue both of this call, and the call used to construct the original cluster template referenced + D3D12_GPU_VIRTUAL_ADDRESS clusterTemplate; //!< [in] Address of a previously built cluster template to be instantiated. + D3D12_GPU_VIRTUAL_ADDRESS_AND_STRIDE vertexBuffer; //!< [in] Vertex buffer with stride to use to fetch the vertex positions used for instantiation. May be NULL only when used with NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_MODE_GET_SIZES, which will cause the maximum size for all possible vertex inputs to be returned. If used, the memory pointed to must be in state D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE. +} NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_MULTI_INDIRECT_INSTANTIATE_TEMPLATE_ARGS; + +//! Argument structure used on device for operation type NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_TYPE_MOVE_CLUSTER_OBJECT +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_MULTI_INDIRECT_MOVE_ARGS +{ + D3D12_GPU_VIRTUAL_ADDRESS srcAccelerationStructure; //!< [in] The address of the object to copy/move. The source object will not become clobbered unless the destinations of the call overlap with it. +} NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_MULTI_INDIRECT_MOVE_ARGS; + + +//! Enumeration listing permitted address resolution flag values for NvAPI_D3D12_RaytracingMultiIndirectClusterOperation +//! Unless flags are set, each address in NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_DESC will point directly to the data value or array of data values +//! By setting these flags the behavior is changed (one flag per field) for the field to be interpreted as containing the address of a piece of memory holding the address to the data value or array of data values. +//! If used, the memory for any intermediate references must be in state D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE. The requirements for the final argument contents remain unchanged. +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_ADDRESS_RESOLUTION_FLAGS +{ + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_ADDRESS_RESOLUTION_FLAG_NONE = 0x0, //!< Interpret all addresses as direct references to the destination data + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_ADDRESS_RESOLUTION_FLAG_INDIRECTED_BATCH_RESULT = NV_BIT(0), //!< Interpret the batchResultData as containing the device address of a D3D12_GPU_VIRTUAL_ADDRESS containing the address of data + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_ADDRESS_RESOLUTION_FLAG_INDIRECTED_BATCH_SCRATCH = NV_BIT(1), //!< Interpret the batchScratchData as containing the device address of a D3D12_GPU_VIRTUAL_ADDRESS containing the address of data + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_ADDRESS_RESOLUTION_FLAG_INDIRECTED_DESTINATION_ADDRESS_ARRAY = NV_BIT(2), //!< Interpret the destinationAddressArray as containing the device address of a D3D12_GPU_VIRTUAL_ADDRESS containing the address of data + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_ADDRESS_RESOLUTION_FLAG_INDIRECTED_RESULT_SIZE_ARRAY = NV_BIT(3), //!< Interpret the resultSizeArray as containing the device address of a D3D12_GPU_VIRTUAL_ADDRESS containing the address of data + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_ADDRESS_RESOLUTION_FLAG_INDIRECTED_INDIRECT_ARG_ARRAY = NV_BIT(4), //!< Interpret the indirectArgArray as containing the device address of a D3D12_GPU_VIRTUAL_ADDRESS containing the address of data + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_ADDRESS_RESOLUTION_FLAG_INDIRECTED_INDIRECT_ARG_COUNT = NV_BIT(5), //!< Interpret the indirectArgCount as containing the device address of a D3D12_GPU_VIRTUAL_ADDRESS containing the address of data +} NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_ADDRESS_RESOLUTION_FLAGS; + +//! Describes the parameters for a call to NvAPI_D3D12_RaytracingExecuteMultiIndirectClusterOperation +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_DESC +{ + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INPUTS inputs; //!< [in] The inputs previously used with NvAPI_D3D12_GetRaytracingMultiIndirectClusterOperationRequirementsInfo to determine the memory requirement for the operation + NvU32 addressResolutionFlags; //!< [in] Flags to control the resolution of address references by this structure (NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_ADDRESS_RESOLUTION_FLAGS) + D3D12_GPU_VIRTUAL_ADDRESS batchResultData; //!< [out] If inputs.mode is equal to NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_MODE_IMPLICIT_DESTINATIONS this must point to an memory that is sufficient for resultDataMaxSizeInBytes, otherwise ignored. If set, must be in D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE state, and aligned depending on the object type being processed. + D3D12_GPU_VIRTUAL_ADDRESS batchScratchData; //!< [in] Must point to memory that is sufficient for scratchDataSizeInBytes, with alignment of D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BYTE_ALIGNMENT and in D3D12_RESOURCE_STATE_UNORDERED_ACCESS state. May be NULL only if scratchDataSizeInBytes was 0 + D3D12_GPU_VIRTUAL_ADDRESS_AND_STRIDE destinationAddressArray; //!< [inout] Address and stride of an array of D3D12_GPU_VIRTUAL_ADDRESS. If inputs.mode is NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_MODE_IMPLICIT_DESTINATIONS this will be filled out by the call, otherwise if inputs.mode is NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_MODE_EXPLICIT_DESTINATIONS each element of the array must contain the destination address with sufficient memory for either resultDataMaxSizeInBytes or a previous call with NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_MODE_GET_SIZES. + //!< The array must be in D3D12_RESOURCE_STATE_UNORDERED_ACCESS state. If inputs.mode is NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_MODE_EXPLICIT_DESTINATIONS the addresses referenced by the array must be in D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE state. + D3D12_GPU_VIRTUAL_ADDRESS_AND_STRIDE resultSizeArray; //!< [out] Address and stride of an array of 32bit values. If inputs.mode is equal to NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_MODE_EXPLICIT_DESTINATIONS, will be populated by the call with the projected sizes of each result object based on the provided input. Otherwise, this field is optional and will be populated with the sizes of the objects written to destinationAddressArray. Must be in D3D12_RESOURCE_STATE_UNORDERED_ACCESS state. + D3D12_GPU_VIRTUAL_ADDRESS_AND_STRIDE indirectArgArray; //!< [in] Address and stride of an array of type determined by inputs.type, see NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_MULTI_INDIRECT_*_ARGS. Structures must be tightly packed and aligned to the default C structure alignment of the structures. The memory pointed to must be in state D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE. + D3D12_GPU_VIRTUAL_ADDRESS indirectArgCount; //!< [in] Determines the argument count, if 0 the value of inputs.maxArgCount will be used instead. If non-zero, the arrays in destinationAddressArray, resultSizeArray and indirectArgArray must all be equal to the argument count. The memory pointed to must be in state D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE. +} NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_DESC; + +//! Parameters given to NvAPI_D3D12_RaytracingMultiIndirectClusterOperation(). +//! +//! \ingroup dx +typedef struct _NVAPI_RAYTRACING_EXECUTE_MULTI_INDIRECT_CLUSTER_OPERATION_PARAMS_V1 +{ + NvU32 version; //!< [in] Structure version; it should be set to #NVAPI_RAYTRACING_EXECUTE_MULTI_INDIRECT_CLUSTER_OPERATION_PARAMS_VER. + const NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_DESC* pDesc; //!< [in] Description of the multi indirect operation. +} NVAPI_RAYTRACING_EXECUTE_MULTI_INDIRECT_CLUSTER_OPERATION_PARAMS_V1; +#define NVAPI_RAYTRACING_EXECUTE_MULTI_INDIRECT_CLUSTER_OPERATION_PARAMS_VER1 MAKE_NVAPI_VERSION(NVAPI_RAYTRACING_EXECUTE_MULTI_INDIRECT_CLUSTER_OPERATION_PARAMS_V1, 1) +typedef NVAPI_RAYTRACING_EXECUTE_MULTI_INDIRECT_CLUSTER_OPERATION_PARAMS_V1 NVAPI_RAYTRACING_EXECUTE_MULTI_INDIRECT_CLUSTER_OPERATION_PARAMS; +#define NVAPI_RAYTRACING_EXECUTE_MULTI_INDIRECT_CLUSTER_OPERATION_PARAMS_VER NVAPI_RAYTRACING_EXECUTE_MULTI_INDIRECT_CLUSTER_OPERATION_PARAMS_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_RaytracingExecuteMultiIndirectClusterOperation +// +//! DESCRIPTION: Execute a multi indirect cluster operation +//! The CPU-side input buffers are not referenced after this call. +//! The GPU-side input resources are not referenced after the build has concluded after ExecuteCommandList(). +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 560 +//! +//! \param [in] pCommandList DX command list +//! \param [in] pParams API parameters +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \retval ::NVAPI_OK Completed request +//! \retval ::NVAPI_INVALID_POINTER A null pointer was passed as command list, pParams or a required field within the pParams argument +//! \retval ::NVAPI_INVALID_ARGUMENT The pParams parameter was set in an invalid way +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_RaytracingExecuteMultiIndirectClusterOperation( + __in ID3D12GraphicsCommandList4* pCommandList, + __in const NVAPI_RAYTRACING_EXECUTE_MULTI_INDIRECT_CLUSTER_OPERATION_PARAMS* pParams); + +//! Enumeration listing permitted flag values for NVAPI_D3D12_BUILD_RAYTRACING_PARTITIONED_TLAS_INDIRECT_INPUTS +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_PARTITIONED_TLAS_FLAGS +{ + NVAPI_D3D12_RAYTRACING_PARTITIONED_TLAS_FLAG_NONE = 0x0, //!< No option specified for the the partitioned TLAS + NVAPI_D3D12_RAYTRACING_PARTITIONED_TLAS_FLAG_FAST_TRACE = NV_BIT(0), //!< Optimize the Partitioned TLAS for fast trace performance, mutually exclusive with NVAPI_D3D12_RAYTRACING_PARTITIONED_TLAS_FLAG_FAST_BUILD + NVAPI_D3D12_RAYTRACING_PARTITIONED_TLAS_FLAG_FAST_BUILD = NV_BIT(1), //!< Optimize the Partitioned TLAS for fast update/build performance, mutually exclusive with NVAPI_D3D12_RAYTRACING_PARTITIONED_TLAS_FLAG_FAST_TRACE + NVAPI_D3D12_RAYTRACING_PARTITIONED_TLAS_FLAG_ENABLE_PARTITION_TRANSLATION = NV_BIT(2), //!< Enable the partition translation feature of the Partitioned TLAS +} NVAPI_D3D12_RAYTRACING_PARTITIONED_TLAS_FLAGS; + +//! Inputs describing the configuration of a Partitioned TLAS, used to determine the memory requirement structure and updates +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_BUILD_RAYTRACING_PARTITIONED_TLAS_INDIRECT_INPUTS +{ + NVAPI_D3D12_RAYTRACING_PARTITIONED_TLAS_FLAGS flags; //!< See NVAPI_D3D12_RAYTRACING_PARTITIONED_TLAS_FLAGS + NvU32 instanceCount; //!< The instance count of the Partitioned TLAS. Maximum supported value is 2^24. + NvU32 maxInstancePerPartitionCount; //!< The maximum number of instances that will ever be referenced by any single partition (excluding the global partition) of the Partitioned TLAS. Maximum supported value is 2^24. + NvU32 partitionCount; //!< The number of partitions of the Partitioned TLAS. Maximum supported value is 2^24. + NvU32 maxInstanceInGlobalPartitionCount; //!< The maximum number of instances that will ever be referenced by the global partition of the Partitioned TLAS. Maximum supported value is 2^24. +} NVAPI_D3D12_BUILD_RAYTRACING_PARTITIONED_TLAS_INDIRECT_INPUTS; + +//! Structure describing the memory requirement of a Partitioned TLAS +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_BUILD_RAYTRACING_PARTITIONED_TLAS_INDIRECT_PREBUILD_INFO +{ + NvU64 resultDataMaxSizeInBytes; //!< Size of the resulting Partitioned TLAS in bytes based on the specified inputs + NvU64 scratchDataSizeInBytes; //!< Scratch storage on GPU required during builds/updates of the Partitioned TLAS based on the specified inputs +} NVAPI_D3D12_BUILD_RAYTRACING_PARTITIONED_TLAS_INDIRECT_PREBUILD_INFO; + +//! Parameter structure for NvAPI_D3D12_GetRaytracingPartitionedTlasIndirectPrebuildInfo +//! +//! \ingroup dx +typedef struct _NVAPI_GET_BUILD_RAYTRACING_PARTITIONED_TLAS_INDIRECT_PREBUILD_INFO_PARAMS_V1 +{ + NvU32 version; //!< [in] Structure version; it should be set to #NVAPI_GET_BUILD_RAYTRACING_PARTITIONED_TLAS_INDIRECT_PREBUILD_INFO_PARAMS_VER. + const NVAPI_D3D12_BUILD_RAYTRACING_PARTITIONED_TLAS_INDIRECT_INPUTS* pInput; //!< [in] Description of the partitioned TLAS build + NVAPI_D3D12_BUILD_RAYTRACING_PARTITIONED_TLAS_INDIRECT_PREBUILD_INFO* pInfo; //!< [out] Result of the query. +} NVAPI_GET_BUILD_RAYTRACING_PARTITIONED_TLAS_INDIRECT_PREBUILD_INFO_PARAMS_V1; +#define NVAPI_GET_BUILD_RAYTRACING_PARTITIONED_TLAS_INDIRECT_PREBUILD_INFO_PARAMS_VER1 MAKE_NVAPI_VERSION(NVAPI_GET_BUILD_RAYTRACING_PARTITIONED_TLAS_INDIRECT_PREBUILD_INFO_PARAMS_V1, 1) +typedef NVAPI_GET_BUILD_RAYTRACING_PARTITIONED_TLAS_INDIRECT_PREBUILD_INFO_PARAMS_V1 NVAPI_GET_BUILD_RAYTRACING_PARTITIONED_TLAS_INDIRECT_PREBUILD_INFO_PARAMS; +#define NVAPI_GET_BUILD_RAYTRACING_PARTITIONED_TLAS_INDIRECT_PREBUILD_INFO_PARAMS_VER NVAPI_GET_BUILD_RAYTRACING_PARTITIONED_TLAS_INDIRECT_PREBUILD_INFO_PARAMS_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_GetRaytracingPartitionedTlasIndirectPrebuildInfo +// +//! DESCRIPTION: Function call used to determine the memory requirements for a Partitioned TLAS +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 560 +//! +//! \param [in] pDevice The D3D device that will own the Partitioned TLAS +//! \param [in,out] pParams API parameters +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \retval ::NVAPI_OK Completed request +//! \retval ::NVAPI_INVALID_POINTER A null pointer was passed as device or pParams argument +//! \retval ::NVAPI_INVALID_ARGUMENT The pParams parameter was set in an invalid way +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_GetRaytracingPartitionedTlasIndirectPrebuildInfo( + __in ID3D12Device5* pDevice, + __inout const NVAPI_GET_BUILD_RAYTRACING_PARTITIONED_TLAS_INDIRECT_PREBUILD_INFO_PARAMS* pParams); + +//! Enumeration listing permitted instance flag values for NVAPI_D3D12_BUILD_RAYTRACING_PARTITIONED_TLAS_OP_ARG_WRITE_INSTANCE +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_PARTITIONED_TLAS_INSTANCE_FLAGS +{ + NVAPI_D3D12_RAYTRACING_PARTITIONED_TLAS_INSTANCE_FLAG_NONE = 0x0, //!< [in] No options specified. + NVAPI_D3D12_RAYTRACING_PARTITIONED_TLAS_INSTANCE_FLAG_TRIANGLE_CULL_DISABLE = NV_BIT(0), //!< [in] Identical to D3D12_RAYTRACING_INSTANCE_FLAG_TRIANGLE_CULL_DISABLE + NVAPI_D3D12_RAYTRACING_PARTITIONED_TLAS_INSTANCE_FLAG_TRIANGLE_FRONT_COUNTERCLOCKWISE = NV_BIT(1), //!< [in] Identical to D3D12_RAYTRACING_INSTANCE_FLAG_TRIANGLE_FRONT_COUNTERCLOCKWISE + NVAPI_D3D12_RAYTRACING_PARTITIONED_TLAS_INSTANCE_FLAG_FORCE_OPAQUE = NV_BIT(2), //!< [in] Identical to D3D12_RAYTRACING_INSTANCE_FLAG_FORCE_OPAQUE + NVAPI_D3D12_RAYTRACING_PARTITIONED_TLAS_INSTANCE_FLAG_FORCE_NON_OPAQUE = NV_BIT(3), //!< [in] Identical to D3D12_RAYTRACING_INSTANCE_FLAG_FORCE_NON_OPAQUE + NVAPI_D3D12_RAYTRACING_PARTITIONED_TLAS_INSTANCE_FLAG_ENABLE_EXPLICIT_AABB = NV_BIT(4), //!< [in] Enables the usage of explicitly provided partition-space (if partition translation is enabled) or world-space bounds, reduces cost of NVAPI_D3D12_BUILD_RAYTRACING_PARTITIONED_TLAS_OP_UPDATE_PARTITION for affected instances +} NVAPI_D3D12_RAYTRACING_PARTITIONED_TLAS_INSTANCE_FLAGS; + +//! Enumeration listing special reserved values for partition indices +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_PARTITIONED_TLAS_PARTITION_INDEX +{ + NVAPI_D3D12_RAYTRACING_PARTITIONED_TLAS_PARTITION_INDEX_GLOBAL_PARTITION = 0xffffffff, //!< [in] Used to select the global partition for a partition write operation +} NVAPI_D3D12_RAYTRACING_PARTITIONED_TLAS_PARTITION_INDEX; + +//! Type determining what the type of an operation to apply to a partitioned TLAS, determines which argument structure is used +//! +//! \ingroup dx +enum NVAPI_D3D12_BUILD_RAYTRACING_PARTITIONED_TLAS_OP_TYPE +{ + NVAPI_D3D12_BUILD_RAYTRACING_PARTITIONED_TLAS_OP_WRITE_INSTANCE = 0, //!< [in] Write instance data, use with NVAPI_D3D12_BUILD_RAYTRACING_PARTITIONED_TLAS_OP_ARG_WRITE_INSTANCE + NVAPI_D3D12_BUILD_RAYTRACING_PARTITIONED_TLAS_OP_UPDATE_INSTANCE = 1, //!< [in] Update instance data, use with NVAPI_D3D12_BUILD_RAYTRACING_PARTITIONED_TLAS_OP_ARG_UPDATE_INSTANCE + NVAPI_D3D12_BUILD_RAYTRACING_PARTITIONED_TLAS_OP_WRITE_PARTITION = 2, //!< [in] Write partition data, use with NVAPI_D3D12_BUILD_RAYTRACING_PARTITIONED_TLAS_OP_ARG_WRITE_PARTITION +}; + +//! Argument structure for operation type NVAPI_D3D12_BUILD_RAYTRACING_PARTITIONED_TLAS_OP_WRITE_INSTANCE +//! +//! \ingroup dx +struct NVAPI_D3D12_BUILD_RAYTRACING_PARTITIONED_TLAS_OP_ARG_WRITE_INSTANCE +{ + NvF32 transform[3][4]; //!< [in] Identical to Transform in D3D12_RAYTRACING_INSTANCE_DESC, except if set to all zero or if any value is NaN the instance is considered degenerate and will not be added to the resulting accelerations structure. + NvF32 userAABB[6]; //!< [in] 6 floating point values representing the min/max bounds of a world or partition space bounds of the vertices of the acceleration structure, ignored if NVAPI_D3D12_RAYTRACING_PARTITIONED_TLAS_INSTANCE_FLAG_ENABLE_EXPLICIT_AABB is not set + NvU32 instanceID; //!< [in] Identical to InstanceID in D3D12_RAYTRACING_INSTANCE_DESC + NvU32 instanceMask; //!< [in] Identical to InstanceMask in D3D12_RAYTRACING_INSTANCE_DESC + NvU32 instanceContributionToHitGroupIndex; //!< [in] Identical to InstanceContributionToHitGroupIndex in D3D12_RAYTRACING_INSTANCE_DESC + NvU32 instanceFlags; //!< [in] See NVAPI_D3D12_RAYTRACING_PARTITIONED_TLAS_INSTANCE_FLAGS + NvU32 instanceIndex; //!< [in] Select which instance to write with this argument struct, must be less than the instanceCount of the Partitioned TLAS. Each partition index may only be referenced by one instance write or update argument for each build call. + NvU32 partitionIndex; //!< [in] Select the partition the instance is part of. Must be less than the partitionCount of the Partitioned TLAS. + D3D12_GPU_VIRTUAL_ADDRESS accelerationStructure; //!< [in] Acceleration structure to set, see AccelerationStructure in D3D12_RAYTRACING_INSTANCE_DESC. If NVAPI_D3D12_RAYTRACING_PARTITIONED_TLAS_INSTANCE_FLAG_ENABLE_EXPLICIT_AABB was used, the existing bounds must cover the assigned acceleration structure (after transform is applied) + //!< When NULL the instance is inactive but still included in the build, it will not participate in any trace operations but may become active again when a non-NULL accelerationStructure is assigned to it. +}; + +//! Argument structure for operation type NVAPI_D3D12_BUILD_RAYTRACING_PARTITIONED_TLAS_OP_UPDATE_INSTANCE +//! +//! \ingroup dx +struct NVAPI_D3D12_BUILD_RAYTRACING_PARTITIONED_TLAS_OP_ARG_UPDATE_INSTANCE +{ + NvU32 instanceIndex; //!< [in] Select which instance to update with this argument struct, must be less than the instanceCount of the Partitioned TLAS. Each partition index may only be referenced by one instance write or update argument for each build call. + NvU32 instanceContributionToHitGroupIndex; //!< [in] Identical to InstanceContributionToHitGroupIndex in D3D12_RAYTRACING_INSTANCE_DESC + D3D12_GPU_VIRTUAL_ADDRESS accelerationStructure; //!< [in] New acceleration structure to set, see AccelerationStructure in D3D12_RAYTRACING_INSTANCE_DESC. If NVAPI_D3D12_RAYTRACING_PARTITIONED_TLAS_INSTANCE_FLAG_ENABLE_EXPLICIT_AABB was used, the existing bounds must cover the newly assigned acceleration structure (after transform is applied) + //!< When NULL the instance is inactive but still included in the build, it will not participate in any trace operations but may become active again when a non-NULL accelerationStructure is assigned to it. +}; + +//! Argument structure for operation type NVAPI_D3D12_BUILD_RAYTRACING_PARTITIONED_TLAS_OP_WRITE_PARTITION +//! +//! \ingroup dx +typedef struct NVAPI_D3D12_BUILD_RAYTRACING_PARTITIONED_TLAS_OP_ARG_WRITE_PARTITION +{ + NvU32 partitionIndex; //!< [in] Select which partition to write with this argument struct, must be either less than partition count or equal to NVAPI_D3D12_RAYTRACING_PARTITIONED_TLAS_PARTITION_INDEX_GLOBAL_PARTITION. Each partition index may only be referenced by one argument for each build call. + NvF32 partitionTranslation[3]; //!< [in] The partition translation, all instances within this partition are translated by the X, Y, Z coordinates provided by this vector +} NVAPI_D3D12_BUILD_RAYTRACING_PARTITIONED_TLAS_OP_ARG_WRITE_PARTITION; + +//! Describes an individual operation described by a type and array of parameters to apply as a modification to a Partitioned TLAS +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_BUILD_RAYTRACING_PARTITIONED_TLAS_OP +{ + NVAPI_D3D12_BUILD_RAYTRACING_PARTITIONED_TLAS_OP_TYPE type; //!< [in] Identify the type of operation to apply, determines the type of the array referenced by data. See NVAPI_D3D12_BUILD_RAYTRACING_PARTITIONED_TLAS_OP_TYPE. Each type may only be referenced once by each call to NvAPI_D3D12_BuildRaytracingPartitionedTlasIndirect + NvU32 count; //!< [in] The number of elements of the argument array referenced by data + D3D12_GPU_VIRTUAL_ADDRESS_AND_STRIDE data; //!< [in] The address and stride of a device array of argument structures determined by type. Must be aligned based on C structure alignment requirements of the referenced structure. +} NVAPI_D3D12_BUILD_RAYTRACING_PARTITIONED_TLAS_OP; + +//! Describes the parameters for building a Partitioned TLAS +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_BUILD_RAYTRACING_PARTITIONED_TLAS_INDIRECT_DESC +{ + NVAPI_D3D12_BUILD_RAYTRACING_PARTITIONED_TLAS_INDIRECT_INPUTS inputs; //!< [in] The inputs previously used with NvAPI_D3D12_GetRaytracingPartitionedTlasIndirectPrebuildInfo to determine the memory requirement for the Partitioned TLAS + D3D12_GPU_VIRTUAL_ADDRESS srcAccelerationStructureData; //!< [in] Either 0 to construct a new Partitioned TLAS or the pointer to a previously constructed Partitioned TLAS, all its content will be inherited. If set, must be in state D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE and aligned to D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BYTE_ALIGNMENT. + D3D12_GPU_VIRTUAL_ADDRESS destAccelerationStructureData; //!< [in] The destination address to construct the new Partitioned TLAS, if it overlaps with the srcAccelerationStructureData address, the previous Partitioned TLAS will become invalid. Must have enough memory as determined by NvAPI_D3D12_GetRaytracingPartitionedTlasIndirectPrebuildInfo. Must be in state D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE and aligned to D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BYTE_ALIGNMENT. + D3D12_GPU_VIRTUAL_ADDRESS scratchAccelerationStructureData; //!< [in] Address of scratch memory used during the build call, size must be determined by NvAPI_D3D12_GetRaytracingPartitionedTlasIndirectPrebuildInfo. Must be in state D3D12_RESOURCE_STATE_UNORDERED_ACCESS and aligned to D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BYTE_ALIGNMENT. + D3D12_GPU_VIRTUAL_ADDRESS indirectOpCount; //!< [in] Address of device memory containing the 32-bit unsigned integer containing the size of the indirectOps array. The memory pointed to must be in state D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE. + D3D12_GPU_VIRTUAL_ADDRESS indirectOps; //!< [in] Address of device memory containing the array of NVAPI_D3D12_BUILD_RAYTRACING_PARTITIONED_TLAS_OP with size equal to the value referenced by indirectOpCount. The memory pointed to must be in state D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE. +} NVAPI_D3D12_BUILD_RAYTRACING_PARTITIONED_TLAS_INDIRECT_DESC; + +//! Parameters given to NvAPI_D3D12_BuildRaytracingPartitionedTlasIndirect(). +//! +//! \ingroup dx +typedef struct _NVAPI_BUILD_RAYTRACING_PARTITIONED_TLAS_INDIRECT_PARAMS_V1 +{ + NvU32 version; //!< [in] Structure version; it should be set to #NVAPI_BUILD_RAYTRACING_PARTITIONED_TLAS_INDIRECT_PARAMS_VER. + const NVAPI_D3D12_BUILD_RAYTRACING_PARTITIONED_TLAS_INDIRECT_DESC* pDesc; //!< [in] Description of the Partitioned TLAS build. +} NVAPI_BUILD_RAYTRACING_PARTITIONED_TLAS_INDIRECT_PARAMS_V1; +#define NVAPI_BUILD_RAYTRACING_PARTITIONED_TLAS_INDIRECT_PARAMS_VER1 MAKE_NVAPI_VERSION(NVAPI_BUILD_RAYTRACING_PARTITIONED_TLAS_INDIRECT_PARAMS_V1, 1) +typedef NVAPI_BUILD_RAYTRACING_PARTITIONED_TLAS_INDIRECT_PARAMS_V1 NVAPI_BUILD_RAYTRACING_PARTITIONED_TLAS_INDIRECT_PARAMS; +#define NVAPI_BUILD_RAYTRACING_PARTITIONED_TLAS_INDIRECT_PARAMS_VER NVAPI_BUILD_RAYTRACING_PARTITIONED_TLAS_INDIRECT_PARAMS_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_BuildRaytracingPartitionedTlasIndirect +// +//! DESCRIPTION: Builds a Partitioned TLAS +//! The CPU-side input buffers are not referenced after this call. +//! The GPU-side input resources are not referenced after the build has concluded after ExecuteCommandList(). +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 560 +//! +//! \param [in] pCommandList DX command list +//! \param [in] pParams API parameters +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \retval ::NVAPI_OK Completed request +//! \retval ::NVAPI_INVALID_POINTER A null pointer was passed as command list or pParams argument +//! \retval ::NVAPI_INVALID_ARGUMENT The pParams parameter was set in an invalid way +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_BuildRaytracingPartitionedTlasIndirect( + __in ID3D12GraphicsCommandList4* pCommandList, + __in const NVAPI_BUILD_RAYTRACING_PARTITIONED_TLAS_INDIRECT_PARAMS* pParams); + +#endif // defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12Device5_INTERFACE_DEFINED__) + +//----------------------------------------------------------------------------- +// NGX APIs +//----------------------------------------------------------------------------- + +typedef enum { + NV_NGX_DLSS_OVERRIDE_FLAG_INITIALIZED = NV_BIT(0), + NV_NGX_DLSS_OVERRIDE_FLAG_ENABLED = NV_BIT(1), + NV_NGX_DLSS_OVERRIDE_FLAG_DLL_EXISTS = NV_BIT(2), + NV_NGX_DLSS_OVERRIDE_FLAG_DLL_LOADED = NV_BIT(3), + NV_NGX_DLSS_OVERRIDE_FLAG_DLL_SELECTED = NV_BIT(4), + NV_NGX_DLSS_OVERRIDE_FLAG_PRESET = NV_BIT(5), + NV_NGX_DLSS_OVERRIDE_FLAG_PERF_MODE = NV_BIT(6), + NV_NGX_DLSS_OVERRIDE_FLAG_SCALING_RATIO = NV_BIT(7), + NV_NGX_DLSS_OVERRIDE_FLAG_OPTIMAL_SETTINGS = NV_BIT(8), + NV_NGX_DLSS_OVERRIDE_FLAG_CREATED = NV_BIT(9), + NV_NGX_DLSS_OVERRIDE_FLAG_EVALUATE = NV_BIT(10), + + NV_NGX_DLSS_OVERRIDE_FLAG_SR_DLAA_MODE = NV_BIT(14), + NV_NGX_DLSS_OVERRIDE_FLAG_FG_MULTI_FRAME = NV_BIT(15), + + NV_NGX_DLSS_OVERRIDE_FLAG_ERR_FAILED = NV_BIT(16), + NV_NGX_DLSS_OVERRIDE_FLAG_ERR_DENIED = NV_BIT(17), + NV_NGX_DLSS_OVERRIDE_FLAG_ERR_DRS = NV_BIT(18), + NV_NGX_DLSS_OVERRIDE_FLAG_ERR_NOT_FOUND = NV_BIT(19), + NV_NGX_DLSS_OVERRIDE_FLAG_ERR_DLL_LOAD = NV_BIT(20), +} NV_NGX_DLSS_OVERRIDE_BITFIELD; + +#define NV_NGX_DLSS_OVERRIDE_FEATURE_INDEX_SR 1 +#define NV_NGX_DLSS_OVERRIDE_FEATURE_INDEX_RR 2 +#define NV_NGX_DLSS_OVERRIDE_FEATURE_INDEX_FG 3 + +typedef struct _NV_NGX_DLSS_OVERRIDE_GET_STATE_PARAMS_V1 +{ + NvU32 version; //!< [in] Structure version + NvU32 processIdentifier; //!< [in] Process Identifier + NvU64 feedbackMaskSR; //!< [out] Feedback bits for Super Resolution + NvU64 feedbackMaskRR; //!< [out] Feedback bits for Ray Reconstruction + NvU64 feedbackMaskFG; //!< [out] Feedback bits for Frame Generation + NvF32 scalingRatio; //!< [out] Scaling Ratio + NvU32 performanceMode; //!< [out] Performance Mode + NvU32 renderPreset; //!< [out] Render Preset for SR/RR + NvU32 frameGenerationCount; //!< [out] FG Override Frame Count Target + NvU64 reserved[2]; //!< Reserved for future use. Must be zero. +} NV_NGX_DLSS_OVERRIDE_GET_STATE_PARAMS_V1; + +#define NV_NGX_DLSS_OVERRIDE_GET_STATE_PARAMS_VER1 MAKE_NVAPI_VERSION(NV_NGX_DLSS_OVERRIDE_GET_STATE_PARAMS_V1, 1) +#define NV_NGX_DLSS_OVERRIDE_GET_STATE_PARAMS_VER NV_NGX_DLSS_OVERRIDE_GET_STATE_PARAMS_VER1 +typedef NV_NGX_DLSS_OVERRIDE_GET_STATE_PARAMS_V1 NV_NGX_DLSS_OVERRIDE_GET_STATE_PARAMS; +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_NGX_GetNGXOverrideState +// +//! DESCRIPTION: This API returns the current state of NGX Override for a given process. +//! +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! +//! \since Release: 570 +//! +//! \param [inout] pGetOverrideStateParams //!< [inout] Sets listening version and PID returns state flags for active features +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_NGX_GetNGXOverrideState(__inout NV_NGX_DLSS_OVERRIDE_GET_STATE_PARAMS* pGetOverrideStateParams); + +typedef struct _NV_NGX_DLSS_OVERRIDE_SET_STATE_PARAMS_V1 +{ + NvU32 version; //!< [in] Structure version + NvU32 processIdentifier; //!< [in] Process identifier of caller + NvU32 feature; //!< [in] DLSS feature ID + NvU64 feedbackMask; //!< [in] Feedback bits for DLSS feature + NvU64 reserved[4]; //!< Reserved for future use. Must be zero. +} NV_NGX_DLSS_OVERRIDE_SET_STATE_PARAMS_V1; + +#define NV_NGX_DLSS_OVERRIDE_SET_STATE_PARAMS_VER1 MAKE_NVAPI_VERSION(NV_NGX_DLSS_OVERRIDE_SET_STATE_PARAMS_V1, 1) +#define NV_NGX_DLSS_OVERRIDE_SET_STATE_PARAMS_VER NV_NGX_DLSS_OVERRIDE_SET_STATE_PARAMS_VER1 +typedef NV_NGX_DLSS_OVERRIDE_SET_STATE_PARAMS_V1 NV_NGX_DLSS_OVERRIDE_SET_STATE_PARAMS; +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_NGX_SetNGXOverrideState +// +//! DESCRIPTION: This API sets the state of NGX Override from a filtered process. +//! +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! +//! \since Release: 570 +//! +//! \param [in] pSetOverrideStateParams //!< [in] Sets the override state using a preselected PID +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_NGX_SetNGXOverrideState(__in NV_NGX_DLSS_OVERRIDE_SET_STATE_PARAMS* pSetOverrideStateParams); + + +//! SUPPORTED OS: Windows 10 and higher +//! +#if defined(__cplusplus) && defined(__d3d12_h__) + +/////////////////////////////////////////////////////////////////////////////// +// +// NVAPI DIRECT TYPE NAME: INvAPI_DirectD3D12GraphicsCommandList +// +/////////////////////////////////////////////////////////////////////////////// +class INvAPI_DirectD3D12GraphicsCommandList +{ +public: + virtual bool IsValid() const = 0; + virtual ID3D12GraphicsCommandList* GetID3D12GraphicsCommandList() const = 0; + + void DispatchGraphics(NvU32 numDispatches); + void SetMarker(void* pMarkerData, NvU32 markerSize); +}; + +/////////////////////////////////////////////////////////////////////////////// +// +// NVAPI DIRECT FUNCTION NAME: NvAPI_DirectD3D12GraphicsCommandList_Create +// +//! DESCRIPTION: Create the NvAPI_DirectD3D12GraphicsCommandList handle. +//! This function must be called after ID3D12Device::CreateCommandList. +//! +//! \param [in] pDXD3D12GraphicsCommandList The ID3D12GraphicsCommandList +//! \param [out] ppReturnD3D12GraphicsCommandList The corresponding NvAPI_DirectD3D12GraphicsCommandList handle +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DirectD3D12GraphicsCommandList_Create(__in ID3D12GraphicsCommandList *pDXD3D12GraphicsCommandList, + __out INvAPI_DirectD3D12GraphicsCommandList **ppReturnD3D12GraphicsCommandList); + +/////////////////////////////////////////////////////////////////////////////// +// +// NVAPI DIRECT FUNCTION NAME: NvAPI_DirectD3D12GraphicsCommandList_Release +// +//! DESCRIPTION: release the NvAPI_DirectD3D12GraphicsCommandList handle. +//! +//! \param [in] pD3D12GraphicsCommandList The NvAPI_DirectD3D12GraphicsCommandList handle to release +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DirectD3D12GraphicsCommandList_Release(__in INvAPI_DirectD3D12GraphicsCommandList *pD3D12GraphicsCommandList); + + +/////////////////////////////////////////////////////////////////////////////// +// +// NVAPI DIRECT FUNCTION NAME: NvAPI_DirectD3D12GraphicsCommandList_Reset +// +//! DESCRIPTION: reset the NvAPI_DirectD3D12GraphicsCommandList handle. +//! This function must be called after ID3D12GraphicsCommandList::Reset() and before any other NvAPI_Direct +//! function calls such as dispatchGraphics() etc. +//! +//! \param [in] pD3D12GraphicsCommandList The NvAPI_DirectD3D12GraphicsCommandList handle to reset +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DirectD3D12GraphicsCommandList_Reset(__in INvAPI_DirectD3D12GraphicsCommandList *pD3D12GraphicsCommandList); +#endif + + + +///////////////////////////////////////////////////////////////////////// +// Video Input Output (VIO) API +///////////////////////////////////////////////////////////////////////// + + + +//! \ingroup vidio +//! Unique identifier for VIO owner (process identifier or NVVIOOWNERID_NONE) +typedef NvU32 NVVIOOWNERID; + + +//! \addtogroup vidio +//! @{ + + +#define NVVIOOWNERID_NONE 0 //!< Unregistered ownerId + + +//! Owner type for device +typedef enum _NVVIOOWNERTYPE +{ + NVVIOOWNERTYPE_NONE , //!< No owner for the device + NVVIOOWNERTYPE_APPLICATION , //!< Application owns the device + NVVIOOWNERTYPE_DESKTOP , //!< Desktop transparent mode owns the device (not applicable for video input) +}NVVIOOWNERTYPE; + +// Access rights for NvAPI_VIO_Open() + +//! Read access (not applicable for video output) +#define NVVIO_O_READ 0x00000000 + +//! Write exclusive access (not applicable for video input) +#define NVVIO_O_WRITE_EXCLUSIVE 0x00010001 + +//! +#define NVVIO_VALID_ACCESSRIGHTS (NVVIO_O_READ | \ + NVVIO_O_WRITE_EXCLUSIVE ) + + +//! VIO_DATA.ulOwnerID high-bit is set only if device has been initialized by VIOAPI +//! examined at NvAPI_GetCapabilities|NvAPI_VIO_Open to determine if settings need to be applied from registry or POR state read +#define NVVIO_OWNERID_INITIALIZED 0x80000000 + +//! VIO_DATA.ulOwnerID next-bit is set only if device is currently in exclusive write access mode from NvAPI_VIO_Open() +#define NVVIO_OWNERID_EXCLUSIVE 0x40000000 + +//! VIO_DATA.ulOwnerID lower bits are: +//! NVGVOOWNERTYPE_xxx enumerations indicating use context +#define NVVIO_OWNERID_TYPEMASK 0x0FFFFFFF //!< mask for NVVIOOWNERTYPE_xxx + + +//! @} + +//--------------------------------------------------------------------- +// Enumerations +//--------------------------------------------------------------------- + + +//! \addtogroup vidio +//! @{ + +//! Video signal format and resolution +typedef enum _NVVIOSIGNALFORMAT +{ + NVVIOSIGNALFORMAT_NONE, //!< Invalid signal format + NVVIOSIGNALFORMAT_487I_59_94_SMPTE259_NTSC, //!< 01 487i 59.94Hz (SMPTE259) NTSC + NVVIOSIGNALFORMAT_576I_50_00_SMPTE259_PAL, //!< 02 576i 50.00Hz (SMPTE259) PAL + NVVIOSIGNALFORMAT_1035I_60_00_SMPTE260, //!< 03 1035i 60.00Hz (SMPTE260) + NVVIOSIGNALFORMAT_1035I_59_94_SMPTE260, //!< 04 1035i 59.94Hz (SMPTE260) + NVVIOSIGNALFORMAT_1080I_50_00_SMPTE295, //!< 05 1080i 50.00Hz (SMPTE295) + NVVIOSIGNALFORMAT_1080I_60_00_SMPTE274, //!< 06 1080i 60.00Hz (SMPTE274) + NVVIOSIGNALFORMAT_1080I_59_94_SMPTE274, //!< 07 1080i 59.94Hz (SMPTE274) + NVVIOSIGNALFORMAT_1080I_50_00_SMPTE274, //!< 08 1080i 50.00Hz (SMPTE274) + NVVIOSIGNALFORMAT_1080P_30_00_SMPTE274, //!< 09 1080p 30.00Hz (SMPTE274) + NVVIOSIGNALFORMAT_1080P_29_97_SMPTE274, //!< 10 1080p 29.97Hz (SMPTE274) + NVVIOSIGNALFORMAT_1080P_25_00_SMPTE274, //!< 11 1080p 25.00Hz (SMPTE274) + NVVIOSIGNALFORMAT_1080P_24_00_SMPTE274, //!< 12 1080p 24.00Hz (SMPTE274) + NVVIOSIGNALFORMAT_1080P_23_976_SMPTE274, //!< 13 1080p 23.976Hz (SMPTE274) + NVVIOSIGNALFORMAT_720P_60_00_SMPTE296, //!< 14 720p 60.00Hz (SMPTE296) + NVVIOSIGNALFORMAT_720P_59_94_SMPTE296, //!< 15 720p 59.94Hz (SMPTE296) + NVVIOSIGNALFORMAT_720P_50_00_SMPTE296, //!< 16 720p 50.00Hz (SMPTE296) + NVVIOSIGNALFORMAT_1080I_48_00_SMPTE274, //!< 17 1080I 48.00Hz (SMPTE274) + NVVIOSIGNALFORMAT_1080I_47_96_SMPTE274, //!< 18 1080I 47.96Hz (SMPTE274) + NVVIOSIGNALFORMAT_720P_30_00_SMPTE296, //!< 19 720p 30.00Hz (SMPTE296) + NVVIOSIGNALFORMAT_720P_29_97_SMPTE296, //!< 20 720p 29.97Hz (SMPTE296) + NVVIOSIGNALFORMAT_720P_25_00_SMPTE296, //!< 21 720p 25.00Hz (SMPTE296) + NVVIOSIGNALFORMAT_720P_24_00_SMPTE296, //!< 22 720p 24.00Hz (SMPTE296) + NVVIOSIGNALFORMAT_720P_23_98_SMPTE296, //!< 23 720p 23.98Hz (SMPTE296) + NVVIOSIGNALFORMAT_2048P_30_00_SMPTE372, //!< 24 2048p 30.00Hz (SMPTE372) + NVVIOSIGNALFORMAT_2048P_29_97_SMPTE372, //!< 25 2048p 29.97Hz (SMPTE372) + NVVIOSIGNALFORMAT_2048I_60_00_SMPTE372, //!< 26 2048i 60.00Hz (SMPTE372) + NVVIOSIGNALFORMAT_2048I_59_94_SMPTE372, //!< 27 2048i 59.94Hz (SMPTE372) + NVVIOSIGNALFORMAT_2048P_25_00_SMPTE372, //!< 28 2048p 25.00Hz (SMPTE372) + NVVIOSIGNALFORMAT_2048I_50_00_SMPTE372, //!< 29 2048i 50.00Hz (SMPTE372) + NVVIOSIGNALFORMAT_2048P_24_00_SMPTE372, //!< 30 2048p 24.00Hz (SMPTE372) + NVVIOSIGNALFORMAT_2048P_23_98_SMPTE372, //!< 31 2048p 23.98Hz (SMPTE372) + NVVIOSIGNALFORMAT_2048I_48_00_SMPTE372, //!< 32 2048i 48.00Hz (SMPTE372) + NVVIOSIGNALFORMAT_2048I_47_96_SMPTE372, //!< 33 2048i 47.96Hz (SMPTE372) + + NVVIOSIGNALFORMAT_1080PSF_25_00_SMPTE274, //!< 34 1080PsF 25.00Hz (SMPTE274) + NVVIOSIGNALFORMAT_1080PSF_29_97_SMPTE274, //!< 35 1080PsF 29.97Hz (SMPTE274) + NVVIOSIGNALFORMAT_1080PSF_30_00_SMPTE274, //!< 36 1080PsF 30.00Hz (SMPTE274) + NVVIOSIGNALFORMAT_1080PSF_24_00_SMPTE274, //!< 37 1080PsF 24.00Hz (SMPTE274) + NVVIOSIGNALFORMAT_1080PSF_23_98_SMPTE274, //!< 38 1080PsF 23.98Hz (SMPTE274) + + NVVIOSIGNALFORMAT_1080P_50_00_SMPTE274_3G_LEVEL_A, //!< 39 1080P 50.00Hz (SMPTE274) 3G Level A + NVVIOSIGNALFORMAT_1080P_59_94_SMPTE274_3G_LEVEL_A, //!< 40 1080P 59.94Hz (SMPTE274) 3G Level A + NVVIOSIGNALFORMAT_1080P_60_00_SMPTE274_3G_LEVEL_A, //!< 41 1080P 60.00Hz (SMPTE274) 3G Level A + + NVVIOSIGNALFORMAT_1080P_60_00_SMPTE274_3G_LEVEL_B, //!< 42 1080p 60.00Hz (SMPTE274) 3G Level B + NVVIOSIGNALFORMAT_1080I_60_00_SMPTE274_3G_LEVEL_B, //!< 43 1080i 60.00Hz (SMPTE274) 3G Level B + NVVIOSIGNALFORMAT_2048I_60_00_SMPTE372_3G_LEVEL_B, //!< 44 2048i 60.00Hz (SMPTE372) 3G Level B + NVVIOSIGNALFORMAT_1080P_50_00_SMPTE274_3G_LEVEL_B, //!< 45 1080p 50.00Hz (SMPTE274) 3G Level B + NVVIOSIGNALFORMAT_1080I_50_00_SMPTE274_3G_LEVEL_B, //!< 46 1080i 50.00Hz (SMPTE274) 3G Level B + NVVIOSIGNALFORMAT_2048I_50_00_SMPTE372_3G_LEVEL_B, //!< 47 2048i 50.00Hz (SMPTE372) 3G Level B + NVVIOSIGNALFORMAT_1080P_30_00_SMPTE274_3G_LEVEL_B, //!< 48 1080p 30.00Hz (SMPTE274) 3G Level B + NVVIOSIGNALFORMAT_2048P_30_00_SMPTE372_3G_LEVEL_B, //!< 49 2048p 30.00Hz (SMPTE372) 3G Level B + NVVIOSIGNALFORMAT_1080P_25_00_SMPTE274_3G_LEVEL_B, //!< 50 1080p 25.00Hz (SMPTE274) 3G Level B + NVVIOSIGNALFORMAT_2048P_25_00_SMPTE372_3G_LEVEL_B, //!< 51 2048p 25.00Hz (SMPTE372) 3G Level B + NVVIOSIGNALFORMAT_1080P_24_00_SMPTE274_3G_LEVEL_B, //!< 52 1080p 24.00Hz (SMPTE274) 3G Level B + NVVIOSIGNALFORMAT_2048P_24_00_SMPTE372_3G_LEVEL_B, //!< 53 2048p 24.00Hz (SMPTE372) 3G Level B + NVVIOSIGNALFORMAT_1080I_48_00_SMPTE274_3G_LEVEL_B, //!< 54 1080i 48.00Hz (SMPTE274) 3G Level B + NVVIOSIGNALFORMAT_2048I_48_00_SMPTE372_3G_LEVEL_B, //!< 55 2048i 48.00Hz (SMPTE372) 3G Level B + NVVIOSIGNALFORMAT_1080P_59_94_SMPTE274_3G_LEVEL_B, //!< 56 1080p 59.94Hz (SMPTE274) 3G Level B + NVVIOSIGNALFORMAT_1080I_59_94_SMPTE274_3G_LEVEL_B, //!< 57 1080i 59.94Hz (SMPTE274) 3G Level B + NVVIOSIGNALFORMAT_2048I_59_94_SMPTE372_3G_LEVEL_B, //!< 58 2048i 59.94Hz (SMPTE372) 3G Level B + NVVIOSIGNALFORMAT_1080P_29_97_SMPTE274_3G_LEVEL_B, //!< 59 1080p 29.97Hz (SMPTE274) 3G Level B + NVVIOSIGNALFORMAT_2048P_29_97_SMPTE372_3G_LEVEL_B, //!< 60 2048p 29.97Hz (SMPTE372) 3G Level B + NVVIOSIGNALFORMAT_1080P_23_98_SMPTE274_3G_LEVEL_B, //!< 61 1080p 29.98Hz (SMPTE274) 3G Level B + NVVIOSIGNALFORMAT_2048P_23_98_SMPTE372_3G_LEVEL_B, //!< 62 2048p 29.98Hz (SMPTE372) 3G Level B + NVVIOSIGNALFORMAT_1080I_47_96_SMPTE274_3G_LEVEL_B, //!< 63 1080i 47.96Hz (SMPTE274) 3G Level B + NVVIOSIGNALFORMAT_2048I_47_96_SMPTE372_3G_LEVEL_B, //!< 64 2048i 47.96Hz (SMPTE372) 3G Level B + + NVVIOSIGNALFORMAT_END //!< 65 To indicate end of signal format list + +}NVVIOSIGNALFORMAT; + +//! SMPTE standards format +typedef enum _NVVIOVIDEOSTANDARD +{ + NVVIOVIDEOSTANDARD_SMPTE259 , //!< SMPTE259 + NVVIOVIDEOSTANDARD_SMPTE260 , //!< SMPTE260 + NVVIOVIDEOSTANDARD_SMPTE274 , //!< SMPTE274 + NVVIOVIDEOSTANDARD_SMPTE295 , //!< SMPTE295 + NVVIOVIDEOSTANDARD_SMPTE296 , //!< SMPTE296 + NVVIOVIDEOSTANDARD_SMPTE372 , //!< SMPTE372 +}NVVIOVIDEOSTANDARD; + +//! HD or SD video type +typedef enum _NVVIOVIDEOTYPE +{ + NVVIOVIDEOTYPE_SD , //!< Standard-definition (SD) + NVVIOVIDEOTYPE_HD , //!< High-definition (HD) +}NVVIOVIDEOTYPE; + +//! Interlace mode +typedef enum _NVVIOINTERLACEMODE +{ + NVVIOINTERLACEMODE_PROGRESSIVE , //!< Progressive (p) + NVVIOINTERLACEMODE_INTERLACE , //!< Interlace (i) + NVVIOINTERLACEMODE_PSF , //!< Progressive Segment Frame (psf) +}NVVIOINTERLACEMODE; + +//! Video data format +typedef enum _NVVIODATAFORMAT +{ + NVVIODATAFORMAT_UNKNOWN = -1 , //!< Invalid DataFormat + NVVIODATAFORMAT_R8G8B8_TO_YCRCB444 , //!< R8:G8:B8 => YCrCb (4:4:4) + NVVIODATAFORMAT_R8G8B8A8_TO_YCRCBA4444 , //!< R8:G8:B8:A8 => YCrCbA (4:4:4:4) + NVVIODATAFORMAT_R8G8B8Z10_TO_YCRCBZ4444 , //!< R8:G8:B8:Z10 => YCrCbZ (4:4:4:4) + NVVIODATAFORMAT_R8G8B8_TO_YCRCB422 , //!< R8:G8:B8 => YCrCb (4:2:2) + NVVIODATAFORMAT_R8G8B8A8_TO_YCRCBA4224 , //!< R8:G8:B8:A8 => YCrCbA (4:2:2:4) + NVVIODATAFORMAT_R8G8B8Z10_TO_YCRCBZ4224 , //!< R8:G8:B8:Z10 => YCrCbZ (4:2:2:4) + NVVIODATAFORMAT_X8X8X8_444_PASSTHRU , //!< R8:G8:B8 => RGB (4:4:4) + NVVIODATAFORMAT_X8X8X8A8_4444_PASSTHRU , //!< R8:G8:B8:A8 => RGBA (4:4:4:4) + NVVIODATAFORMAT_X8X8X8Z10_4444_PASSTHRU , //!< R8:G8:B8:Z10 => RGBZ (4:4:4:4) + NVVIODATAFORMAT_X10X10X10_444_PASSTHRU , //!< Y10:CR10:CB10 => YCrCb (4:4:4) + NVVIODATAFORMAT_X10X8X8_444_PASSTHRU , //!< Y10:CR8:CB8 => YCrCb (4:4:4) + NVVIODATAFORMAT_X10X8X8A10_4444_PASSTHRU , //!< Y10:CR8:CB8:A10 => YCrCbA (4:4:4:4) + NVVIODATAFORMAT_X10X8X8Z10_4444_PASSTHRU , //!< Y10:CR8:CB8:Z10 => YCrCbZ (4:4:4:4) + NVVIODATAFORMAT_DUAL_R8G8B8_TO_DUAL_YCRCB422 , //!< R8:G8:B8 + R8:G8:B8 => YCrCb (4:2:2 + 4:2:2) + NVVIODATAFORMAT_DUAL_X8X8X8_TO_DUAL_422_PASSTHRU , //!< Y8:CR8:CB8 + Y8:CR8:CB8 => YCrCb (4:2:2 + 4:2:2) + NVVIODATAFORMAT_R10G10B10_TO_YCRCB422 , //!< R10:G10:B10 => YCrCb (4:2:2) + NVVIODATAFORMAT_R10G10B10_TO_YCRCB444 , //!< R10:G10:B10 => YCrCb (4:4:4) + NVVIODATAFORMAT_X12X12X12_444_PASSTHRU , //!< X12:X12:X12 => XXX (4:4:4) + NVVIODATAFORMAT_X12X12X12_422_PASSTHRU , //!< X12:X12:X12 => XXX (4:2:2) + NVVIODATAFORMAT_Y10CR10CB10_TO_YCRCB422 , //!< Y10:CR10:CB10 => YCrCb (4:2:2) + NVVIODATAFORMAT_Y8CR8CB8_TO_YCRCB422 , //!< Y8:CR8:CB8 => YCrCb (4:2:2) + NVVIODATAFORMAT_Y10CR8CB8A10_TO_YCRCBA4224 , //!< Y10:CR8:CB8:A10 => YCrCbA (4:2:2:4) + NVVIODATAFORMAT_R10G10B10_TO_RGB444 , //!< R10:G10:B10 => RGB (4:4:4) + NVVIODATAFORMAT_R12G12B12_TO_YCRCB444 , //!< R12:G12:B12 => YCrCb (4:4:4) + NVVIODATAFORMAT_R12G12B12_TO_YCRCB422 , //!< R12:G12:B12 => YCrCb (4:2:2) +}NVVIODATAFORMAT; + +//! Video output area +typedef enum _NVVIOOUTPUTAREA +{ + NVVIOOUTPUTAREA_FULLSIZE , //!< Output to entire video resolution (full size) + NVVIOOUTPUTAREA_SAFEACTION , //!< Output to centered 90% of video resolution (safe action) + NVVIOOUTPUTAREA_SAFETITLE , //!< Output to centered 80% of video resolution (safe title) +}NVVIOOUTPUTAREA; + +//! Synchronization source +typedef enum _NVVIOSYNCSOURCE +{ + NVVIOSYNCSOURCE_SDISYNC , //!< SDI Sync (Digital input) + NVVIOSYNCSOURCE_COMPSYNC , //!< COMP Sync (Composite input) +}NVVIOSYNCSOURCE; + +//! Composite synchronization type +typedef enum _NVVIOCOMPSYNCTYPE +{ + NVVIOCOMPSYNCTYPE_AUTO , //!< Auto-detect + NVVIOCOMPSYNCTYPE_BILEVEL , //!< Bi-level signal + NVVIOCOMPSYNCTYPE_TRILEVEL , //!< Tri-level signal +}NVVIOCOMPSYNCTYPE; + +//! Video input output status +typedef enum _NVVIOINPUTOUTPUTSTATUS +{ + NVINPUTOUTPUTSTATUS_OFF , //!< Not in use + NVINPUTOUTPUTSTATUS_ERROR , //!< Error detected + NVINPUTOUTPUTSTATUS_SDI_SD , //!< SDI (standard-definition) + NVINPUTOUTPUTSTATUS_SDI_HD , //!< SDI (high-definition) +}NVVIOINPUTOUTPUTSTATUS; + +//! Synchronization input status +typedef enum _NVVIOSYNCSTATUS +{ + NVVIOSYNCSTATUS_OFF , //!< Sync not detected + NVVIOSYNCSTATUS_ERROR , //!< Error detected + NVVIOSYNCSTATUS_SYNCLOSS , //!< Genlock in use, format mismatch with output + NVVIOSYNCSTATUS_COMPOSITE , //!< Composite sync + NVVIOSYNCSTATUS_SDI_SD , //!< SDI sync (standard-definition) + NVVIOSYNCSTATUS_SDI_HD , //!< SDI sync (high-definition) +}NVVIOSYNCSTATUS; + +//! Video Capture Status +typedef enum _NVVIOCAPTURESTATUS +{ + NVVIOSTATUS_STOPPED , //!< Sync not detected + NVVIOSTATUS_RUNNING , //!< Error detected + NVVIOSTATUS_ERROR , //!< Genlock in use, format mismatch with output +}NVVIOCAPTURESTATUS; + +//! Video Capture Status +typedef enum _NVVIOSTATUSTYPE +{ + NVVIOSTATUSTYPE_IN , //!< Input Status + NVVIOSTATUSTYPE_OUT , //!< Output Status +}NVVIOSTATUSTYPE; + + +//! Assumption, maximum 4 SDI input and 4 SDI output cards supported on a system +#define NVAPI_MAX_VIO_DEVICES 8 + +//! 4 physical jacks supported on each SDI input card. +#define NVAPI_MAX_VIO_JACKS 4 + + +//! Each physical jack an on SDI input card can have +//! two "channels" in the case of "3G" VideoFormats, as specified +//! by SMPTE 425; for non-3G VideoFormats, only the first channel within +//! a physical jack is valid. +#define NVAPI_MAX_VIO_CHANNELS_PER_JACK 2 + +//! 4 Streams, 1 per physical jack +#define NVAPI_MAX_VIO_STREAMS 4 + +#define NVAPI_MIN_VIO_STREAMS 1 + +//! SDI input supports a max of 2 links per stream +#define NVAPI_MAX_VIO_LINKS_PER_STREAM 2 + + +#define NVAPI_MAX_FRAMELOCK_MAPPING_MODES 20 + +//! Min number of capture images +#define NVAPI_GVI_MIN_RAW_CAPTURE_IMAGES 1 + +//! Max number of capture images +#define NVAPI_GVI_MAX_RAW_CAPTURE_IMAGES 32 + +//! Default number of capture images +#define NVAPI_GVI_DEFAULT_RAW_CAPTURE_IMAGES 5 + + + +// Data Signal notification events. These need a event handler in RM. +// Register/Unregister and PopEvent NVAPI's are already available. + +//! Device configuration +typedef enum _NVVIOCONFIGTYPE +{ + NVVIOCONFIGTYPE_IN , //!< Input Status + NVVIOCONFIGTYPE_OUT , //!< Output Status +}NVVIOCONFIGTYPE; + +typedef enum _NVVIOCOLORSPACE +{ + NVVIOCOLORSPACE_UNKNOWN, + NVVIOCOLORSPACE_YCBCR, + NVVIOCOLORSPACE_YCBCRA, + NVVIOCOLORSPACE_YCBCRD, + NVVIOCOLORSPACE_GBR, + NVVIOCOLORSPACE_GBRA, + NVVIOCOLORSPACE_GBRD, +} NVVIOCOLORSPACE; + +//! Component sampling +typedef enum _NVVIOCOMPONENTSAMPLING +{ + NVVIOCOMPONENTSAMPLING_UNKNOWN, + NVVIOCOMPONENTSAMPLING_4444, + NVVIOCOMPONENTSAMPLING_4224, + NVVIOCOMPONENTSAMPLING_444, + NVVIOCOMPONENTSAMPLING_422 +} NVVIOCOMPONENTSAMPLING; + +typedef enum _NVVIOBITSPERCOMPONENT +{ + NVVIOBITSPERCOMPONENT_UNKNOWN, + NVVIOBITSPERCOMPONENT_8, + NVVIOBITSPERCOMPONENT_10, + NVVIOBITSPERCOMPONENT_12, +} NVVIOBITSPERCOMPONENT; + +typedef enum _NVVIOLINKID +{ + NVVIOLINKID_UNKNOWN, + NVVIOLINKID_A, + NVVIOLINKID_B, + NVVIOLINKID_C, + NVVIOLINKID_D +} NVVIOLINKID; + + +typedef enum _NVVIOANCPARITYCOMPUTATION +{ + NVVIOANCPARITYCOMPUTATION_AUTO, + NVVIOANCPARITYCOMPUTATION_ON, + NVVIOANCPARITYCOMPUTATION_OFF +} NVVIOANCPARITYCOMPUTATION; + + + +//! @} + + +//--------------------------------------------------------------------- +// Structures +//--------------------------------------------------------------------- + +//! \addtogroup vidio +//! @{ + + +//! Supports Serial Digital Interface (SDI) output +#define NVVIOCAPS_VIDOUT_SDI 0x00000001 + +//! Supports Internal timing source +#define NVVIOCAPS_SYNC_INTERNAL 0x00000100 + +//! Supports Genlock timing source +#define NVVIOCAPS_SYNC_GENLOCK 0x00000200 + +//! Supports Serial Digital Interface (SDI) synchronization input +#define NVVIOCAPS_SYNCSRC_SDI 0x00001000 + +//! Supports Composite synchronization input +#define NVVIOCAPS_SYNCSRC_COMP 0x00002000 + +//! Supports Desktop transparent mode +#define NVVIOCAPS_OUTPUTMODE_DESKTOP 0x00010000 + +//! Supports OpenGL application mode +#define NVVIOCAPS_OUTPUTMODE_OPENGL 0x00020000 + +//! Supports Serial Digital Interface (SDI) input +#define NVVIOCAPS_VIDIN_SDI 0x00100000 + +//! Supports Packed ANC +#define NVVIOCAPS_PACKED_ANC_SUPPORTED 0x00200000 + +//! Supports ANC audio blanking +#define NVVIOCAPS_AUDIO_BLANKING_SUPPORTED 0x00400000 + +//! SDI-class interface: SDI output with two genlock inputs +#define NVVIOCLASS_SDI 0x00000001 + +//! Device capabilities +typedef struct _NVVIOCAPS +{ + NvU32 version; //!< Structure version + NvAPI_String adapterName; //!< Graphics adapter name + NvU32 adapterClass; //!< Graphics adapter classes (NVVIOCLASS_SDI mask) + NvU32 adapterCaps; //!< Graphics adapter capabilities (NVVIOCAPS_* mask) + NvU32 dipSwitch; //!< On-board DIP switch settings bits + NvU32 dipSwitchReserved; //!< On-board DIP switch settings reserved bits + NvU32 boardID; //!< Board ID + //! Driver version + struct // + { + NvU32 majorVersion; //!< Major version. For GVI, majorVersion contains MajorVersion(HIWORD) And MinorVersion(LOWORD) + NvU32 minorVersion; //!< Minor version. For GVI, minorVersion contains Revison(HIWORD) And Build(LOWORD) + } driver; // + //! Firmware version + struct + { + NvU32 majorVersion; //!< Major version. In version 2, for both GVI and GVO, majorVersion contains MajorVersion(HIWORD) And MinorVersion(LOWORD) + NvU32 minorVersion; //!< Minor version. In version 2, for both GVI and GVO, minorVersion contains Revison(HIWORD) And Build(LOWORD) + } firmWare; // + NVVIOOWNERID ownerId; //!< Unique identifier for owner of video output (NVVIOOWNERID_INVALID if free running) + NVVIOOWNERTYPE ownerType; //!< Owner type (OpenGL application or Desktop mode) +} NVVIOCAPS; + +//! Macro for constructing the version field of NVVIOCAPS +#define NVVIOCAPS_VER1 MAKE_NVAPI_VERSION(NVVIOCAPS,1) +#define NVVIOCAPS_VER2 MAKE_NVAPI_VERSION(NVVIOCAPS,2) +#define NVVIOCAPS_VER NVVIOCAPS_VER2 + +//! Input channel status +typedef struct _NVVIOCHANNELSTATUS +{ + NvU32 smpte352; //!< 4-byte SMPTE 352 video payload identifier + NVVIOSIGNALFORMAT signalFormat; //!< Signal format + NVVIOBITSPERCOMPONENT bitsPerComponent; //!< Bits per component + NVVIOCOMPONENTSAMPLING samplingFormat; //!< Sampling format + NVVIOCOLORSPACE colorSpace; //!< Color space + NVVIOLINKID linkID; //!< Link ID +} NVVIOCHANNELSTATUS; + +//! Input device status +typedef struct _NVVIOINPUTSTATUS +{ + NVVIOCHANNELSTATUS vidIn[NVAPI_MAX_VIO_JACKS][NVAPI_MAX_VIO_CHANNELS_PER_JACK]; //!< Video input status per channel within a jack + NVVIOCAPTURESTATUS captureStatus; //!< status of video capture +} NVVIOINPUTSTATUS; + +//! Output device status +typedef struct _NVVIOOUTPUTSTATUS +{ + NVVIOINPUTOUTPUTSTATUS vid1Out; //!< Video 1 output status + NVVIOINPUTOUTPUTSTATUS vid2Out; //!< Video 2 output status + NVVIOSYNCSTATUS sdiSyncIn; //!< SDI sync input status + NVVIOSYNCSTATUS compSyncIn; //!< Composite sync input status + NvU32 syncEnable; //!< Sync enable (TRUE if using syncSource) + NVVIOSYNCSOURCE syncSource; //!< Sync source + NVVIOSIGNALFORMAT syncFormat; //!< Sync format + NvU32 frameLockEnable; //!< Framelock enable flag + NvU32 outputVideoLocked; //!< Output locked status + NvU32 dataIntegrityCheckErrorCount; //!< Data integrity check error count + NvU32 dataIntegrityCheckEnabled; //!< Data integrity check status enabled + NvU32 dataIntegrityCheckFailed; //!< Data integrity check status failed + NvU32 uSyncSourceLocked; //!< genlocked to framelocked to ref signal + NvU32 uPowerOn; //!< TRUE: indicates there is sufficient power +} NVVIOOUTPUTSTATUS; + +//! Video device status. +typedef struct _NVVIOSTATUS +{ + NvU32 version; //!< Structure version + NVVIOSTATUSTYPE nvvioStatusType; //!< Input or Output status + union + { + NVVIOINPUTSTATUS inStatus; //!< Input device status + NVVIOOUTPUTSTATUS outStatus; //!< Output device status + }vioStatus; +} NVVIOSTATUS; + +//! Macro for constructingthe version field of NVVIOSTATUS +#define NVVIOSTATUS_VER MAKE_NVAPI_VERSION(NVVIOSTATUS,1) + +//! Output region +typedef struct _NVVIOOUTPUTREGION +{ + NvU32 x; //!< Horizontal origin in pixels + NvU32 y; //!< Vertical origin in pixels + NvU32 width; //!< Width of region in pixels + NvU32 height; //!< Height of region in pixels +} NVVIOOUTPUTREGION; + +//! Gamma ramp (8-bit index) +typedef struct _NVVIOGAMMARAMP8 +{ + NvU16 uRed[256]; //!< Red channel gamma ramp (8-bit index, 16-bit values) + NvU16 uGreen[256]; //!< Green channel gamma ramp (8-bit index, 16-bit values) + NvU16 uBlue[256]; //!< Blue channel gamma ramp (8-bit index, 16-bit values) +} NVVIOGAMMARAMP8; + +//! Gamma ramp (10-bit index) +typedef struct _NVVIOGAMMARAMP10 +{ + NvU16 uRed[1024]; //!< Red channel gamma ramp (10-bit index, 16-bit values) + NvU16 uGreen[1024]; //!< Green channel gamma ramp (10-bit index, 16-bit values) + NvU16 uBlue[1024]; //!< Blue channel gamma ramp (10-bit index, 16-bit values) +} NVVIOGAMMARAMP10; + + +//! Sync delay +typedef struct _NVVIOSYNCDELAY +{ + NvU32 version; //!< Structure version + NvU32 horizontalDelay; //!< Horizontal delay in pixels + NvU32 verticalDelay; //!< Vertical delay in lines +} NVVIOSYNCDELAY; + +//! Macro for constructing the version field of NVVIOSYNCDELAY +#define NVVIOSYNCDELAY_VER MAKE_NVAPI_VERSION(NVVIOSYNCDELAY,1) + + +//! Video mode information +typedef struct _NVVIOVIDEOMODE +{ + NvU32 horizontalPixels; //!< Horizontal resolution (in pixels) + NvU32 verticalLines; //!< Vertical resolution for frame (in lines) + float fFrameRate; //!< Frame rate + NVVIOINTERLACEMODE interlaceMode; //!< Interlace mode + NVVIOVIDEOSTANDARD videoStandard; //!< SMPTE standards format + NVVIOVIDEOTYPE videoType; //!< HD or SD signal classification +} NVVIOVIDEOMODE; + +//! Signal format details +typedef struct _NVVIOSIGNALFORMATDETAIL +{ + NVVIOSIGNALFORMAT signalFormat; //!< Signal format enumerated value + NVVIOVIDEOMODE videoMode; //!< Video mode for signal format +}NVVIOSIGNALFORMATDETAIL; + + +//! R8:G8:B8 +#define NVVIOBUFFERFORMAT_R8G8B8 0x00000001 + +//! R8:G8:B8:Z24 +#define NVVIOBUFFERFORMAT_R8G8B8Z24 0x00000002 + +//! R8:G8:B8:A8 +#define NVVIOBUFFERFORMAT_R8G8B8A8 0x00000004 + +//! R8:G8:B8:A8:Z24 +#define NVVIOBUFFERFORMAT_R8G8B8A8Z24 0x00000008 + +//! R16FP:G16FP:B16FP +#define NVVIOBUFFERFORMAT_R16FPG16FPB16FP 0x00000010 + +//! R16FP:G16FP:B16FP:Z24 +#define NVVIOBUFFERFORMAT_R16FPG16FPB16FPZ24 0x00000020 + +//! R16FP:G16FP:B16FP:A16FP +#define NVVIOBUFFERFORMAT_R16FPG16FPB16FPA16FP 0x00000040 + +//! R16FP:G16FP:B16FP:A16FP:Z24 +#define NVVIOBUFFERFORMAT_R16FPG16FPB16FPA16FPZ24 0x00000080 + + + +//! Data format details +typedef struct _NVVIODATAFORMATDETAIL +{ + NVVIODATAFORMAT dataFormat; //!< Data format enumerated value + NvU32 vioCaps; //!< Data format capabilities (NVVIOCAPS_* mask) +}NVVIODATAFORMATDETAIL; + +//! Colorspace conversion +typedef struct _NVVIOCOLORCONVERSION +{ + NvU32 version; //!< Structure version + float colorMatrix[3][3]; //!< Output[n] = + float colorOffset[3]; //!< Input[0] * colorMatrix[n][0] + + float colorScale[3]; //!< Input[1] * colorMatrix[n][1] + + //!< Input[2] * colorMatrix[n][2] + + //!< OutputRange * colorOffset[n] + //!< where OutputRange is the standard magnitude of + //!< Output[n][n] and colorMatrix and colorOffset + //!< values are within the range -1.0 to +1.0 + NvU32 compositeSafe; //!< compositeSafe constrains luminance range when using composite output +} NVVIOCOLORCONVERSION; + +//! macro for constructing the version field of _NVVIOCOLORCONVERSION. +#define NVVIOCOLORCONVERSION_VER MAKE_NVAPI_VERSION(NVVIOCOLORCONVERSION,1) + +//! Gamma correction +typedef struct _NVVIOGAMMACORRECTION +{ + NvU32 version; //!< Structure version + NvU32 vioGammaCorrectionType; //!< Gamma correction type (8-bit or 10-bit) + //! Gamma correction: + union + { + NVVIOGAMMARAMP8 gammaRamp8; //!< Gamma ramp (8-bit index, 16-bit values) + NVVIOGAMMARAMP10 gammaRamp10; //!< Gamma ramp (10-bit index, 16-bit values) + }gammaRamp; + float fGammaValueR; //!< Red Gamma value within gamma ranges. 0.5 - 6.0 + float fGammaValueG; //!< Green Gamma value within gamma ranges. 0.5 - 6.0 + float fGammaValueB; //!< Blue Gamma value within gamma ranges. 0.5 - 6.0 +} NVVIOGAMMACORRECTION; + +//! Macro for constructing thevesion field of _NVVIOGAMMACORRECTION +#define NVVIOGAMMACORRECTION_VER MAKE_NVAPI_VERSION(NVVIOGAMMACORRECTION,1) + +//! Maximum number of ranges per channel +#define MAX_NUM_COMPOSITE_RANGE 2 + + +typedef struct _NVVIOCOMPOSITERANGE +{ + NvU32 uRange; + NvU32 uEnabled; + NvU32 uMin; + NvU32 uMax; +} NVVIOCOMPOSITERANGE; + + + +// Device configuration (fields masks indicating NVVIOCONFIG fields to use for NvAPI_VIO_GetConfig/NvAPI_VIO_SetConfig() ) +// +#define NVVIOCONFIG_SIGNALFORMAT 0x00000001 //!< fields: signalFormat +#define NVVIOCONFIG_DATAFORMAT 0x00000002 //!< fields: dataFormat +#define NVVIOCONFIG_OUTPUTREGION 0x00000004 //!< fields: outputRegion +#define NVVIOCONFIG_OUTPUTAREA 0x00000008 //!< fields: outputArea +#define NVVIOCONFIG_COLORCONVERSION 0x00000010 //!< fields: colorConversion +#define NVVIOCONFIG_GAMMACORRECTION 0x00000020 //!< fields: gammaCorrection +#define NVVIOCONFIG_SYNCSOURCEENABLE 0x00000040 //!< fields: syncSource and syncEnable +#define NVVIOCONFIG_SYNCDELAY 0x00000080 //!< fields: syncDelay +#define NVVIOCONFIG_COMPOSITESYNCTYPE 0x00000100 //!< fields: compositeSyncType +#define NVVIOCONFIG_FRAMELOCKENABLE 0x00000200 //!< fields: EnableFramelock +#define NVVIOCONFIG_422FILTER 0x00000400 //!< fields: bEnable422Filter +#define NVVIOCONFIG_COMPOSITETERMINATE 0x00000800 //!< fields: bCompositeTerminate (Not supported on Quadro FX 4000 SDI) +#define NVVIOCONFIG_DATAINTEGRITYCHECK 0x00001000 //!< fields: bEnableDataIntegrityCheck (Not supported on Quadro FX 4000 SDI) +#define NVVIOCONFIG_CSCOVERRIDE 0x00002000 //!< fields: colorConversion override +#define NVVIOCONFIG_FLIPQUEUELENGTH 0x00004000 //!< fields: flipqueuelength control +#define NVVIOCONFIG_ANCTIMECODEGENERATION 0x00008000 //!< fields: bEnableANCTimeCodeGeneration +#define NVVIOCONFIG_COMPOSITE 0x00010000 //!< fields: bEnableComposite +#define NVVIOCONFIG_ALPHAKEYCOMPOSITE 0x00020000 //!< fields: bEnableAlphaKeyComposite +#define NVVIOCONFIG_COMPOSITE_Y 0x00040000 //!< fields: compRange +#define NVVIOCONFIG_COMPOSITE_CR 0x00080000 //!< fields: compRange +#define NVVIOCONFIG_COMPOSITE_CB 0x00100000 //!< fields: compRange +#define NVVIOCONFIG_FULL_COLOR_RANGE 0x00200000 //!< fields: bEnableFullColorRange +#define NVVIOCONFIG_RGB_DATA 0x00400000 //!< fields: bEnableRGBData +#define NVVIOCONFIG_RESERVED_SDIOUTPUTENABLE 0x00800000 //!< fields: bEnableSDIOutput +#define NVVIOCONFIG_STREAMS 0x01000000 //!< fields: streams +#define NVVIOCONFIG_ANC_PARITY_COMPUTATION 0x02000000 //!< fields: ancParityComputation +#define NVVIOCONFIG_ANC_AUDIO_REPEAT 0x04000000 //!< fields: enableAudioBlanking + + +// Don't forget to update NVVIOCONFIG_VALIDFIELDS in nvapi.spec when NVVIOCONFIG_ALLFIELDS changes. +#define NVVIOCONFIG_ALLFIELDS ( NVVIOCONFIG_SIGNALFORMAT | \ + NVVIOCONFIG_DATAFORMAT | \ + NVVIOCONFIG_OUTPUTREGION | \ + NVVIOCONFIG_OUTPUTAREA | \ + NVVIOCONFIG_COLORCONVERSION | \ + NVVIOCONFIG_GAMMACORRECTION | \ + NVVIOCONFIG_SYNCSOURCEENABLE | \ + NVVIOCONFIG_SYNCDELAY | \ + NVVIOCONFIG_COMPOSITESYNCTYPE | \ + NVVIOCONFIG_FRAMELOCKENABLE | \ + NVVIOCONFIG_422FILTER | \ + NVVIOCONFIG_COMPOSITETERMINATE | \ + NVVIOCONFIG_DATAINTEGRITYCHECK | \ + NVVIOCONFIG_CSCOVERRIDE | \ + NVVIOCONFIG_FLIPQUEUELENGTH | \ + NVVIOCONFIG_ANCTIMECODEGENERATION | \ + NVVIOCONFIG_COMPOSITE | \ + NVVIOCONFIG_ALPHAKEYCOMPOSITE | \ + NVVIOCONFIG_COMPOSITE_Y | \ + NVVIOCONFIG_COMPOSITE_CR | \ + NVVIOCONFIG_COMPOSITE_CB | \ + NVVIOCONFIG_FULL_COLOR_RANGE | \ + NVVIOCONFIG_RGB_DATA | \ + NVVIOCONFIG_RESERVED_SDIOUTPUTENABLE | \ + NVVIOCONFIG_STREAMS | \ + NVVIOCONFIG_ANC_PARITY_COMPUTATION | \ + NVVIOCONFIG_ANC_AUDIO_REPEAT ) + +#define NVVIOCONFIG_VALIDFIELDS ( NVVIOCONFIG_SIGNALFORMAT | \ + NVVIOCONFIG_DATAFORMAT | \ + NVVIOCONFIG_OUTPUTREGION | \ + NVVIOCONFIG_OUTPUTAREA | \ + NVVIOCONFIG_COLORCONVERSION | \ + NVVIOCONFIG_GAMMACORRECTION | \ + NVVIOCONFIG_SYNCSOURCEENABLE | \ + NVVIOCONFIG_SYNCDELAY | \ + NVVIOCONFIG_COMPOSITESYNCTYPE | \ + NVVIOCONFIG_FRAMELOCKENABLE | \ + NVVIOCONFIG_RESERVED_SDIOUTPUTENABLE | \ + NVVIOCONFIG_422FILTER | \ + NVVIOCONFIG_COMPOSITETERMINATE | \ + NVVIOCONFIG_DATAINTEGRITYCHECK | \ + NVVIOCONFIG_CSCOVERRIDE | \ + NVVIOCONFIG_FLIPQUEUELENGTH | \ + NVVIOCONFIG_ANCTIMECODEGENERATION | \ + NVVIOCONFIG_COMPOSITE | \ + NVVIOCONFIG_ALPHAKEYCOMPOSITE | \ + NVVIOCONFIG_COMPOSITE_Y | \ + NVVIOCONFIG_COMPOSITE_CR | \ + NVVIOCONFIG_COMPOSITE_CB | \ + NVVIOCONFIG_FULL_COLOR_RANGE | \ + NVVIOCONFIG_RGB_DATA | \ + NVVIOCONFIG_RESERVED_SDIOUTPUTENABLE | \ + NVVIOCONFIG_STREAMS | \ + NVVIOCONFIG_ANC_PARITY_COMPUTATION | \ + NVVIOCONFIG_ANC_AUDIO_REPEAT) + +#define NVVIOCONFIG_DRIVERFIELDS ( NVVIOCONFIG_OUTPUTREGION | \ + NVVIOCONFIG_OUTPUTAREA | \ + NVVIOCONFIG_COLORCONVERSION | \ + NVVIOCONFIG_FLIPQUEUELENGTH) + +#define NVVIOCONFIG_GAMMAFIELDS ( NVVIOCONFIG_GAMMACORRECTION ) + +#define NVVIOCONFIG_RMCTRLFIELDS ( NVVIOCONFIG_SIGNALFORMAT | \ + NVVIOCONFIG_DATAFORMAT | \ + NVVIOCONFIG_SYNCSOURCEENABLE | \ + NVVIOCONFIG_COMPOSITESYNCTYPE | \ + NVVIOCONFIG_FRAMELOCKENABLE | \ + NVVIOCONFIG_422FILTER | \ + NVVIOCONFIG_COMPOSITETERMINATE | \ + NVVIOCONFIG_DATAINTEGRITYCHECK | \ + NVVIOCONFIG_COMPOSITE | \ + NVVIOCONFIG_ALPHAKEYCOMPOSITE | \ + NVVIOCONFIG_COMPOSITE_Y | \ + NVVIOCONFIG_COMPOSITE_CR | \ + NVVIOCONFIG_COMPOSITE_CB) + +#define NVVIOCONFIG_RMSKEWFIELDS ( NVVIOCONFIG_SYNCDELAY ) + +#define NVVIOCONFIG_ALLOWSDIRUNNING_FIELDS ( NVVIOCONFIG_DATAINTEGRITYCHECK | \ + NVVIOCONFIG_SYNCDELAY | \ + NVVIOCONFIG_CSCOVERRIDE | \ + NVVIOCONFIG_ANCTIMECODEGENERATION | \ + NVVIOCONFIG_COMPOSITE | \ + NVVIOCONFIG_ALPHAKEYCOMPOSITE | \ + NVVIOCONFIG_COMPOSITE_Y | \ + NVVIOCONFIG_COMPOSITE_CR | \ + NVVIOCONFIG_COMPOSITE_CB | \ + NVVIOCONFIG_ANC_PARITY_COMPUTATION) + + + #define NVVIOCONFIG_RMMODESET_FIELDS ( NVVIOCONFIG_SIGNALFORMAT | \ + NVVIOCONFIG_DATAFORMAT | \ + NVVIOCONFIG_SYNCSOURCEENABLE | \ + NVVIOCONFIG_FRAMELOCKENABLE | \ + NVVIOCONFIG_COMPOSITESYNCTYPE | \ + NVVIOCONFIG_ANC_AUDIO_REPEAT) + + +//! Output device configuration +// No members can be deleted from below structure. Only add new members at the +// end of the structure. +typedef struct _NVVIOOUTPUTCONFIG_V1 +{ + NVVIOSIGNALFORMAT signalFormat; //!< Signal format for video output + NVVIODATAFORMAT dataFormat; //!< Data format for video output + NVVIOOUTPUTREGION outputRegion; //!< Region for video output (Desktop mode) + NVVIOOUTPUTAREA outputArea; //!< Usable resolution for video output (safe area) + NVVIOCOLORCONVERSION colorConversion; //!< Color conversion. + NVVIOGAMMACORRECTION gammaCorrection; + NvU32 syncEnable; //!< Sync enable (TRUE to use syncSource) + NVVIOSYNCSOURCE syncSource; //!< Sync source + NVVIOSYNCDELAY syncDelay; //!< Sync delay + NVVIOCOMPSYNCTYPE compositeSyncType; //!< Composite sync type + NvU32 frameLockEnable; //!< Flag indicating whether framelock was on/off + NvU32 psfSignalFormat; //!< Indicates whether contained format is PSF Signal format + NvU32 enable422Filter; //!< Enables/Disables 4:2:2 filter + NvU32 compositeTerminate; //!< Composite termination + NvU32 enableDataIntegrityCheck; //!< Enable data integrity check: true - enable, false - disable + NvU32 cscOverride; //!< Use provided CSC color matrix to overwrite + NvU32 flipQueueLength; //!< Number of buffers used for the internal flipqueue + NvU32 enableANCTimeCodeGeneration; //!< Enable SDI ANC time code generation + NvU32 enableComposite; //!< Enable composite + NvU32 enableAlphaKeyComposite; //!< Enable Alpha key composite + NVVIOCOMPOSITERANGE compRange; //!< Composite ranges + NvU8 reservedData[256]; //!< Inicates last stored SDI output state TRUE-ON / FALSE-OFF + NvU32 enableFullColorRange; //!< Flag indicating Full Color Range + NvU32 enableRGBData; //!< Indicates data is in RGB format +} NVVIOOUTPUTCONFIG_V1; + +typedef struct _NVVIOOUTPUTCONFIG_V2 +{ + NVVIOSIGNALFORMAT signalFormat; //!< Signal format for video output + NVVIODATAFORMAT dataFormat; //!< Data format for video output + NVVIOOUTPUTREGION outputRegion; //!< Region for video output (Desktop mode) + NVVIOOUTPUTAREA outputArea; //!< Usable resolution for video output (safe area) + NVVIOCOLORCONVERSION colorConversion; //!< Color conversion. + NVVIOGAMMACORRECTION gammaCorrection; + NvU32 syncEnable; //!< Sync enable (TRUE to use syncSource) + NVVIOSYNCSOURCE syncSource; //!< Sync source + NVVIOSYNCDELAY syncDelay; //!< Sync delay + NVVIOCOMPSYNCTYPE compositeSyncType; //!< Composite sync type + NvU32 frameLockEnable; //!< Flag indicating whether framelock was on/off + NvU32 psfSignalFormat; //!< Indicates whether contained format is PSF Signal format + NvU32 enable422Filter; //!< Enables/Disables 4:2:2 filter + NvU32 compositeTerminate; //!< Composite termination + NvU32 enableDataIntegrityCheck; //!< Enable data integrity check: true - enable, false - disable + NvU32 cscOverride; //!< Use provided CSC color matrix to overwrite + NvU32 flipQueueLength; //!< Number of buffers used for the internal flip queue + NvU32 enableANCTimeCodeGeneration; //!< Enable SDI ANC time code generation + NvU32 enableComposite; //!< Enable composite + NvU32 enableAlphaKeyComposite; //!< Enable Alpha key composite + NVVIOCOMPOSITERANGE compRange; //!< Composite ranges + NvU8 reservedData[256]; //!< Indicates last stored SDI output state TRUE-ON / FALSE-OFF + NvU32 enableFullColorRange; //!< Flag indicating Full Color Range + NvU32 enableRGBData; //!< Indicates data is in RGB format + NVVIOANCPARITYCOMPUTATION ancParityComputation; //!< Enable HW ANC parity bit computation (auto/on/off) +} NVVIOOUTPUTCONFIG_V2; + +typedef struct _NVVIOOUTPUTCONFIG_V3 +{ + NVVIOSIGNALFORMAT signalFormat; //!< Signal format for video output + NVVIODATAFORMAT dataFormat; //!< Data format for video output + NVVIOOUTPUTREGION outputRegion; //!< Region for video output (Desktop mode) + NVVIOOUTPUTAREA outputArea; //!< Usable resolution for video output (safe area) + NVVIOCOLORCONVERSION colorConversion; //!< Color conversion. + NVVIOGAMMACORRECTION gammaCorrection; + NvU32 syncEnable; //!< Sync enable (TRUE to use syncSource) + NVVIOSYNCSOURCE syncSource; //!< Sync source + NVVIOSYNCDELAY syncDelay; //!< Sync delay + NVVIOCOMPSYNCTYPE compositeSyncType; //!< Composite sync type + NvU32 frameLockEnable; //!< Flag indicating whether framelock was on/off + NvU32 psfSignalFormat; //!< Indicates whether contained format is PSF Signal format + NvU32 enable422Filter; //!< Enables/Disables 4:2:2 filter + NvU32 compositeTerminate; //!< Composite termination + NvU32 enableDataIntegrityCheck; //!< Enable data integrity check: true - enable, false - disable + NvU32 cscOverride; //!< Use provided CSC color matrix to overwrite + NvU32 flipQueueLength; //!< Number of buffers used for the internal flip queue + NvU32 enableANCTimeCodeGeneration; //!< Enable SDI ANC time code generation + NvU32 enableComposite; //!< Enable composite + NvU32 enableAlphaKeyComposite; //!< Enable Alpha key composite + NVVIOCOMPOSITERANGE compRange; //!< Composite ranges + NvU8 reservedData[256]; //!< Indicates last stored SDI output state TRUE-ON / FALSE-OFF + NvU32 enableFullColorRange; //!< Flag indicating Full Color Range + NvU32 enableRGBData; //!< Indicates data is in RGB format + NVVIOANCPARITYCOMPUTATION ancParityComputation; //!< Enable HW ANC parity bit computation (auto/on/off) + NvU32 enableAudioBlanking; //!< Enable HANC audio blanking on repeat frames +} NVVIOOUTPUTCONFIG_V3; + +//! Stream configuration +typedef struct _NVVIOSTREAM +{ + NvU32 bitsPerComponent; //!< Bits per component + NVVIOCOMPONENTSAMPLING sampling; //!< Sampling + NvU32 expansionEnable; //!< Enable/disable 4:2:2->4:4:4 expansion + NvU32 numLinks; //!< Number of active links + struct + { + NvU32 jack; //!< This stream's link[i] will use the specified (0-based) channel within the + NvU32 channel; //!< specified (0-based) jack + } links[NVAPI_MAX_VIO_LINKS_PER_STREAM]; +} NVVIOSTREAM; + +//! Input device configuration +typedef struct _NVVIOINPUTCONFIG +{ + NvU32 numRawCaptureImages; //!< numRawCaptureImages is the number of frames to keep in the capture queue. + //!< must be between NVAPI_GVI_MIN_RAW_CAPTURE_IMAGES and NVAPI_GVI_MAX_RAW_CAPTURE_IMAGES, + NVVIOSIGNALFORMAT signalFormat; //!< Signal format. + //!< Please note that both numRawCaptureImages and signalFormat should be set together. + NvU32 numStreams; //!< Number of active streams. + NVVIOSTREAM streams[NVAPI_MAX_VIO_STREAMS]; //!< Stream configurations + NvU32 bTestMode; //!< This attribute controls the GVI test mode. + //!< Possible values 0/1. When testmode enabled, the + //!< GVI device will generate fake data as quickly as possible. +} NVVIOINPUTCONFIG; + +typedef struct _NVVIOCONFIG_V1 +{ + NvU32 version; //!< Structure version + NvU32 fields; //!< Caller sets to NVVIOCONFIG_* mask for fields to use + NVVIOCONFIGTYPE nvvioConfigType; //!< Input or Output configuration + union + { + NVVIOINPUTCONFIG inConfig; //!< Input device configuration + NVVIOOUTPUTCONFIG_V1 outConfig; //!< Output device configuration + }vioConfig; +} NVVIOCONFIG_V1; + + +typedef struct _NVVIOCONFIG_V2 +{ + NvU32 version; //!< Structure version + NvU32 fields; //!< Caller sets to NVVIOCONFIG_* mask for fields to use + NVVIOCONFIGTYPE nvvioConfigType; //!< Input or Output configuration + union + { + NVVIOINPUTCONFIG inConfig; //!< Input device configuration + NVVIOOUTPUTCONFIG_V2 outConfig; //!< Output device configuration + }vioConfig; +} NVVIOCONFIG_V2; + +typedef struct _NVVIOCONFIG_V3 +{ + NvU32 version; //!< Structure version + NvU32 fields; //!< Caller sets to NVVIOCONFIG_* mask for fields to use + NVVIOCONFIGTYPE nvvioConfigType; //!< Input or Output configuration + union + { + NVVIOINPUTCONFIG inConfig; //!< Input device configuration + NVVIOOUTPUTCONFIG_V3 outConfig; //!< Output device configuration + }vioConfig; +} NVVIOCONFIG_V3; +typedef NVVIOOUTPUTCONFIG_V3 NVVIOOUTPUTCONFIG; +typedef NVVIOCONFIG_V3 NVVIOCONFIG; + +#define NVVIOCONFIG_VER1 MAKE_NVAPI_VERSION(NVVIOCONFIG_V1,1) +#define NVVIOCONFIG_VER2 MAKE_NVAPI_VERSION(NVVIOCONFIG_V2,2) +#define NVVIOCONFIG_VER3 MAKE_NVAPI_VERSION(NVVIOCONFIG_V3,3) +#define NVVIOCONFIG_VER NVVIOCONFIG_VER3 + + +typedef struct +{ + NvPhysicalGpuHandle hPhysicalGpu; //!< Handle to Physical GPU (This could be NULL for GVI device if its not binded) + NvVioHandle hVioHandle; //!Create Stereo Handle->InitActivation->Reset Device +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! \since Release: 302 +//! +//! \param [in] stereoHandle Stereo handle corresponding to the device interface. +//! \param [in] bDelayed Use delayed activation +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, +//! they are listed below. +//! \retval ::NVAPI_STEREO_NOT_INITIALIZED - Stereo part of NVAPI not initialized. +//! +//! \ingroup stereoapi +/////////////////////////////////////////////////////////////////////////////// + +//! \addtogroup stereoapi +//! @{ + +//! InitActivation Flags +typedef enum _NVAPI_STEREO_INIT_ACTIVATION_FLAGS +{ + NVAPI_STEREO_INIT_ACTIVATION_IMMEDIATE = 0X00, + NVAPI_STEREO_INIT_ACTIVATION_DELAYED = 0x01, +} NVAPI_STEREO_INIT_ACTIVATION_FLAGS; + +NVAPI_INTERFACE NvAPI_Stereo_InitActivation(__in StereoHandle hStereoHandle, __in NVAPI_STEREO_INIT_ACTIVATION_FLAGS flags); + +//! @} + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Stereo_Trigger_Activation +// +//! DESCRIPTION: This API allows an application to trigger creation of a stereo desktop, +//! in case the creation was stopped on application launch. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! \since Release: 302 +//! +//! \param [in] stereoHandle Stereo handle that corresponds to the device interface. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, +//! they are listed below. +//! \retval ::NVAPI_STEREO_INIT_ACTIVATION_NOT_DONE - Stereo InitActivation not called. +//! \retval ::NVAPI_STEREO_NOT_INITIALIZED - Stereo part of NVAPI not initialized. +//! +//! \ingroup stereoapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Stereo_Trigger_Activation(__in StereoHandle hStereoHandle); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Stereo_CapturePngImage +// +//! DESCRIPTION: This API captures the current stereo image in PNG stereo format. +//! Only the last capture call per flip will be effective. +//! +//! WHEN TO USE: After the stereo handle for the device interface is created via successfull call to the appropriate NvAPI_Stereo_CreateHandleFrom() function. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 180 +//! +//! \param [in] stereoHandle Stereo handle corresponding to the device interface. +//! +//! \retval ::NVAPI_OK Image captured. +//! \retval ::NVAPI_STEREO_INVALID_DEVICE_INTERFACE Device interface is not valid. Create again, then attach again. +//! \retval ::NVAPI_API_NOT_INTIALIZED +//! \retval ::NVAPI_STEREO_NOT_INITIALIZED Stereo part of NVAPI not initialized. +//! \retval ::NVAPI_ERROR +//! +//! \ingroup stereoapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Stereo_CapturePngImage(StereoHandle stereoHandle); + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Stereo_ReverseStereoBlitControl +// +//! DESCRIPTION: This API turns on/off reverse stereo blit. +//! +//! HOW TO USE: Use after the stereo handle for the device interface is created via successfull call to the appropriate +//! NvAPI_Stereo_CreateHandleFrom() function. +//! After reversed stereo blit control is turned on, blits from the stereo surface will +//! produce the right-eye image in the left side of the destination surface and the left-eye +//! image in the right side of the destination surface. +//! +//! In DirectX 9, the destination surface must be created as the render target, and StretchRect must be used. +//! Conditions: +//! - DstWidth == 2*SrcWidth +//! - DstHeight == SrcHeight +//! - Src surface is the stereo surface. +//! - SrcRect must be {0,0,SrcWidth,SrcHeight} +//! - DstRect must be {0,0,DstWidth,DstHeight} +//! +//! In DirectX 10, ResourceCopyRegion must be used. +//! Conditions: +//! - DstWidth == 2*SrcWidth +//! - DstHeight == SrcHeight +//! - dstX == 0, +//! - dstY == 0, +//! - dstZ == 0, +//! - SrcBox: left=top=front==0; right==SrcWidth; bottom==SrcHeight; back==1; +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 185 +//! +//! \param [in] stereoHandle Stereo handle corresponding to the device interface. +//! \param [in] TurnOn != 0 : Turns on \n +//! == 0 : Turns off +//! +//! +//! \retval ::NVAPI_OK Retrieval of frustum adjust mode was successfull. +//! \retval ::NVAPI_STEREO_INVALID_DEVICE_INTERFACE Device interface is not valid. Create again, then attach again. +//! \retval ::NVAPI_API_NOT_INTIALIZED +//! \retval ::NVAPI_STEREO_NOT_INITIALIZED Stereo part of NVAPI not initialized. +//! \retval ::NVAPI_ERROR +//! +//! \ingroup stereoapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Stereo_ReverseStereoBlitControl(StereoHandle hStereoHandle, NvU8 TurnOn); + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Stereo_SetNotificationMessage +// +//! DESCRIPTION: This API is a Setup notification message that the stereo driver uses to notify the application +//! when the user changes the stereo driver state. +//! +//! When the user changes the stereo state (Activated or Deactivated, separation or conversion) +//! the stereo driver posts a defined message with the following parameters: +//! +//! lParam is the current conversion. (Actual conversion is *(float*)&lParam ) +//! +//! wParam == MAKEWPARAM(l, h) where +//! - l == 0 if stereo is deactivated +//! - l == 1 if stereo is deactivated +//! - h is the current separation. (Actual separation is float(h*100.f/0xFFFF) +//! +//! Call this API with NULL hWnd to prohibit notification. +//! +//! WHEN TO USE: Use after the stereo handle for device interface is created via successful call to appropriate +//! NvAPI_Stereo_CreateHandleFrom() function. +//! +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 180 +//! +//! +//! \param [in] stereoHandle Stereo handle corresponding to the device interface. +//! \param [in] hWnd Window HWND that will be notified when the user changes the stereo driver state. +//! Actual HWND must be cast to an NvU64. +//! \param [in] messageID MessageID of the message that will be posted to hWnd +//! +//! \retval ::NVAPI_OK Notification set. +//! \retval ::NVAPI_STEREO_INVALID_DEVICE_INTERFACE Device interface is not valid. Create again, then attach again. +//! \retval ::NVAPI_API_NOT_INTIALIZED +//! \retval ::NVAPI_STEREO_NOT_INITIALIZED Stereo part of NVAPI not initialized. +//! \retval ::NVAPI_ERROR +//! +//! \ingroup stereoapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Stereo_SetNotificationMessage(StereoHandle hStereoHandle, NvU64 hWnd,NvU64 messageID); + + + + + + + + + + + + + + + +//! \ingroup stereoapi +#define NVAPI_STEREO_QUADBUFFERED_API_VERSION 0x2 + +//! \ingroup stereoapi + typedef enum _NV_StereoSwapChainMode + { + NVAPI_STEREO_SWAPCHAIN_DEFAULT = 0, + NVAPI_STEREO_SWAPCHAIN_STEREO = 1, + NVAPI_STEREO_SWAPCHAIN_MONO = 2, + } NV_STEREO_SWAPCHAIN_MODE; + +#if defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D1x_CreateSwapChain +// +//! DESCRIPTION: This API allows the user to create a mono or a stereo swap chain. +//! +//! NOTE: NvAPI_D3D1x_CreateSwapChain is a wrapper of the method IDXGIFactory::CreateSwapChain which +//! additionally notifies the D3D driver of the mode in which stereo mode the swap chain is to be +//! created. +//! +//! \since Release: 285 +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] hStereoHandle Stereo handle that corresponds to the device interface. +//! A pointer to the device that will write 2D images to the swap chain. +//! \param [in] pDesc A pointer to the swap-chain description (DXGI_SWAP_CHAIN_DESC). This parameter cannot be NULL. +//! \param [out] ppSwapChain A pointer to the swap chain created. +//! \param [in] mode The stereo mode fot the swap chain. +//! NVAPI_STEREO_SWAPCHAIN_DEFAULT +//! NVAPI_STEREO_SWAPCHAIN_STEREO +//! NVAPI_STEREO_SWAPCHAIN_MONO +//! +//! \retval ::NVAPI_OK The swap chain was created successfully. +//! \retval ::NVAPI_ERROR The operation failed. +//! +//! \ingroup stereoapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D1x_CreateSwapChain(StereoHandle hStereoHandle, + DXGI_SWAP_CHAIN_DESC* pDesc, + IDXGISwapChain** ppSwapChain, + NV_STEREO_SWAPCHAIN_MODE mode); + +#endif //if defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) + + +#if defined(_D3D9_H_) //NvAPI_D3D9_CreateSwapChain +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D9_CreateSwapChain +// +//! DESCRIPTION: This API allows the user to create a mono or a stereo swap chain. +//! +//! NOTE: NvAPI_D3D9_CreateSwapChain is a wrapper of the method IDirect3DDevice9::CreateAdditionalSwapChain which +//! additionally notifies the D3D driver if the swap chain creation mode must be stereo or mono. +//! +//! +//! \since Release: 285 +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] hStereoHandle Stereo handle that corresponds to the device interface. +//! \param [in, out] pPresentationParameters A pointer to the swap-chain description (DXGI). This parameter cannot be NULL. +//! \param [out] ppSwapChain A pointer to the swap chain created. +//! \param [in] mode The stereo mode for the swap chain. +//! NVAPI_STEREO_SWAPCHAIN_DEFAULT +//! NVAPI_STEREO_SWAPCHAIN_STEREO +//! NVAPI_STEREO_SWAPCHAIN_MONO +//! +//! \retval ::NVAPI_OK The swap chain creation was successful +//! \retval ::NVAPI_ERROR The operation failed. +//! +//!\ingroup stereoapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D9_CreateSwapChain(StereoHandle hStereoHandle, + D3DPRESENT_PARAMETERS *pPresentationParameters, + IDirect3DSwapChain9 **ppSwapChain, + NV_STEREO_SWAPCHAIN_MODE mode); +#endif //if defined(_D3D9_H_) //NvAPI_D3D9_CreateSwapChain + + + +//! SUPPORTED OS: Windows 10 and higher +//! +#if defined(__cplusplus) && defined(_WINNT_) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Vulkan_InitLowLatencyDevice +// +// !!** DEPRECATED, please use VK_NV_low_latency2 instead **!! +//! \deprecated Do not use this function - it is deprecated in release 565. +// +//! DESCRIPTION: This function has to be used to initialize a Vulkan device +//! as a low latency device. The driver initializes a set of parameters to +//! be used in subsequent low latency API calls for this device. +//! The returned NV_VULKAN_LOW_LATENCY_DEVICE_PARAMS has to be passed into +//! any low latency API call as a parameter to ensure use of these parameters. +//! +//! \since Release: 455 +//! \param [in] vkDevice The Vulkan device handle +//! \param [out] signalDemaphoreHandle pointer to a VkSemaphore handle that is signalling in Sleep +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup oglapi +/////////////////////////////////////////////////////////////////////////////// +__nvapi_deprecated_function("Do not use this function - it is deprecated in release 565.") +NVAPI_INTERFACE NvAPI_Vulkan_InitLowLatencyDevice(__in HANDLE vkDevice, __out HANDLE *signalSemaphoreHandle); +#endif // defined(__cplusplus) && defined(_WINNT_) + +//! SUPPORTED OS: Windows 10 and higher +//! +#if defined(__cplusplus) && defined(_WINNT_) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Vulkan_DestroyLowLatencyDevice +// +// !!** DEPRECATED, please use VK_NV_low_latency2 instead **!! +//! \deprecated Do not use this function - it is deprecated in release 565. +// +//! DESCRIPTION: This function releases the set of low latency device +//! parameters. +//! +//! \since Release: 455 +//! \param [in] vkDevice The Vulkan device handle +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup oglapi +/////////////////////////////////////////////////////////////////////////////// +__nvapi_deprecated_function("Do not use this function - it is deprecated in release 565.") +NVAPI_INTERFACE NvAPI_Vulkan_DestroyLowLatencyDevice(__in HANDLE vkDevice); +#endif // defined(__cplusplus) && defined(_WINNT_) + + +//! SUPPORTED OS: Windows 10 and higher +//! +//! Used to get sleep status +//! \ingroup oglapi +typedef struct _NV_VULKAN_GET_SLEEP_STATUS_PARAMS +{ + NvU32 version; //!< (IN) Structure version + NvBool bLowLatencyMode; //!< (OUT) Is low latency mode enabled? + NvU8 rsvd[128]; //!< (IN) Reserved. Must be set to 0s. +} NV_VULKAN_GET_SLEEP_STATUS_PARAMS_V1; + +typedef NV_VULKAN_GET_SLEEP_STATUS_PARAMS_V1 NV_VULKAN_GET_SLEEP_STATUS_PARAMS; +#define NV_VULKAN_GET_SLEEP_STATUS_PARAMS_VER1 MAKE_NVAPI_VERSION(NV_VULKAN_GET_SLEEP_STATUS_PARAMS_V1, 1) +#define NV_VULKAN_GET_SLEEP_STATUS_PARAMS_VER NV_VULKAN_GET_SLEEP_STATUS_PARAMS_VER1 + +//! SUPPORTED OS: Windows 10 and higher +//! +#if defined(__cplusplus) && defined(_WINNT_) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Vulkan_GetSleepStatus +// +// !!** DEPRECATED, please use VK_NV_low_latency2 instead **!! +//! \deprecated Do not use this function - it is deprecated in release 565. +// +//! DESCRIPTION: This function can be used to get the latest sleep status. +//! bLowLatencyMode indicates whether low latency mode is currently +//! enabled in the driver. +//! Note that it may not always reflect the previously requested sleep mode, +//! as the feature may not be available on the platform, or the setting has +//! been overridden by the control panel, for example. +//! +//! \since Release: 455 +//! \param [in] vkDevice The Vulkan device handle +//! \param [inout] pGetSleepStatusParams Sleep status params. +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup oglapi +/////////////////////////////////////////////////////////////////////////////// +__nvapi_deprecated_function("Do not use this function - it is deprecated in release 565.") +NVAPI_INTERFACE NvAPI_Vulkan_GetSleepStatus(__in HANDLE vkDevice, __inout NV_VULKAN_GET_SLEEP_STATUS_PARAMS *pGetSleepStatusParams); +#endif // defined(__cplusplus) && defined(_WINNT_) + +//! SUPPORTED OS: Windows 10 and higher +//! +//! Used to set sleep mode +//! \ingroup oglapi +#if defined(__cplusplus) && defined(_WINNT_) +typedef struct _NV_VULKAN_SET_SLEEP_MODE_PARAMS +{ + NvU32 version; //!< (IN) Structure version + NvBool bLowLatencyMode; //!< (IN) Low latency mode enable/disable. + NvBool bLowLatencyBoost; //!< (IN) Request maximum GPU clock frequency regardless of workload. + NvU32 minimumIntervalUs; //!< (IN) Minimum frame interval in microseconds. 0 = no frame rate limit. + NvU8 rsvd[32]; //!< (IN) Reserved. Must be set to 0s. +} NV_VULKAN_SET_SLEEP_MODE_PARAMS_V1; + +typedef NV_VULKAN_SET_SLEEP_MODE_PARAMS_V1 NV_VULKAN_SET_SLEEP_MODE_PARAMS; +#define NV_VULKAN_SET_SLEEP_MODE_PARAMS_VER1 MAKE_NVAPI_VERSION(NV_VULKAN_SET_SLEEP_MODE_PARAMS_V1, 1) +#define NV_VULKAN_SET_SLEEP_MODE_PARAMS_VER NV_VULKAN_SET_SLEEP_MODE_PARAMS_VER1 +#endif // defined(__cplusplus) && defined(_WINNT_) + +//! SUPPORTED OS: Windows 10 and higher +//! +#if defined(__cplusplus) && defined(_WINNT_) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Vulkan_SetSleepMode +// +// !!** DEPRECATED, please use VK_NV_low_latency2 instead **!! +//! \deprecated Do not use this function - it is deprecated in release 565. +// +//! DESCRIPTION: This function can be used to update sleep mode dynamically. +//! The settings are not dependent to each other, meaning low latency mode +//! can be enabled/disabled regardless of whether minimum interval is set or +//! not. The former is to intelligently lower latency without impacting frame +//! rate. The later is to limit frame rate (e.g. minimumIntervalUs = 10000 +//! limits frame rate to 100 FPS). They work well separately and/or together. +//! Note that minimumIntervalUs usage is not limited to lowering latency, so +//! feel free to use it to limit frame rate for menu, cut scenes, etc. +//! Note that low latency mode can be enabled, and/or minimum interval can +//! be set, even without using NvAPI_D3D_Sleep(). However, without it, the +//! sleep to achieve these features would happen at a less optimal point, +//! resulting in higher overall latency. +//! The bLowLatencyBoost will request the GPU run at max clocks even in +//! scenarios where it is idle most of the frame and would normally try +//! to save power. This can decrease latency in CPU-limited scenarios. +//! While this function can be called as often as needed, it is not +//! necessary nor recommended to call this too frequently (e.g. every frame), +//! as the settings persist for the target device. +//! +//! \since Release: 455 +//! \param [in] vkDevice The Vulkan device handle +//! \param [in] pSetSleepModeParams Sleep mode params. +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup oglapi +/////////////////////////////////////////////////////////////////////////////// +__nvapi_deprecated_function("Do not use this function - it is deprecated in release 565.") +NVAPI_INTERFACE NvAPI_Vulkan_SetSleepMode(__in HANDLE vkDevice, __in NV_VULKAN_SET_SLEEP_MODE_PARAMS *pSetSleepModeParams); +#endif // defined(__cplusplus) && defined(_WINNT_) + +//! SUPPORTED OS: Windows 10 and higher +//! +#if defined(__cplusplus) && defined(_WINNT_) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Vulkan_Sleep +// +// !!** DEPRECATED, please use VK_NV_low_latency2 instead **!! +//! \deprecated Do not use this function - it is deprecated in release 565. +// +//! DESCRIPTION: It is recommended to call this function at the very start of +//! each frame (e.g. before input sampling). If there is a need to sleep, +//! due to frame rate limit and/or low latency features, for example, +//! this call provides an entry point for the driver to sleep at the most +//! optimal spot to achieve the lowest latency. +//! It is recommended to call this function even when low latency mode is +//! disabled and minimum interval is 0. Other features, such as Maximum Frame +//! Rate setting, could be enabled in the control panel to benefit from this. +//! It is OK to start (or stop) using this function at any time. However, +//! when using this function, it must be called exactly once on each frame. +//! If this function is not called, after several frames, the driver would +//! fallback to sleep at its less optimal spot. +//! +//! \since Release: 455 +//! \param [in] vkDevice The Vulkan device handle +//! \param [in] signalValue Value that will be signalled in signalDemaphoreHandle semaphore at Sleep +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup oglapi +/////////////////////////////////////////////////////////////////////////////// +__nvapi_deprecated_function("Do not use this function - it is deprecated in release 565.") +NVAPI_INTERFACE NvAPI_Vulkan_Sleep(__in HANDLE vkDevice, __in NvU64 signalValue); +#endif // defined(__cplusplus) && defined(_WINNT_) + +//! SUPPORTED OS: Windows 10 and higher +//! +//! Used to get latency report. +//! \ingroup oglapi +typedef struct _NV_VULKAN_LATENCY_RESULT_PARAMS +{ + NvU32 version; //!< (IN) Structure version + struct vkFrameReport { + NvU64 frameID; + NvU64 inputSampleTime; + NvU64 simStartTime; + NvU64 simEndTime; + NvU64 renderSubmitStartTime; + NvU64 renderSubmitEndTime; + NvU64 presentStartTime; + NvU64 presentEndTime; + NvU64 driverStartTime; + NvU64 driverEndTime; + NvU64 osRenderQueueStartTime; + NvU64 osRenderQueueEndTime; + NvU64 gpuRenderStartTime; + NvU64 gpuRenderEndTime; + NvU8 rsvd[128]; + } frameReport[64]; + NvU8 rsvd[32]; +} NV_VULKAN_LATENCY_RESULT_PARAMS_V1; + +typedef NV_VULKAN_LATENCY_RESULT_PARAMS_V1 NV_VULKAN_LATENCY_RESULT_PARAMS; +#define NV_VULKAN_LATENCY_RESULT_PARAMS_VER1 MAKE_NVAPI_VERSION(NV_VULKAN_LATENCY_RESULT_PARAMS_V1, 1) +#define NV_VULKAN_LATENCY_RESULT_PARAMS_VER NV_VULKAN_LATENCY_RESULT_PARAMS_VER1 + +//! SUPPORTED OS: Windows 10 and higher +//! +#if defined(__cplusplus) && defined(_WINNT_) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Vulkan_GetLatency +// +// !!** DEPRECATED, please use VK_NV_low_latency2 instead **!! +//! \deprecated Do not use this function - it is deprecated in release 565. +// +//! DESCRIPTION: Get a latency report including the timestamps of the +//! application latency markers set with NvAPI_Vulkan_SetLatencyMarker as well +//! as driver, OS queue and graphics hardware times. Requires calling +//! NvAPI_Vulkan_SetLatencyMarker with incrementing frameID for valid results. +//! Rendering for at least 90 frames is recommended to properly fill out the +//! structure. The newest completed frame is at the end (element 63) and +//! is preceeded by older frames. If not enough frames are valid then all +//! frames are returned with all zeroes. +//! +//! \since Release: 455 +//! \param [in] vkDevice The Vulkan device handle +//! \param [inout] pGetLatencyParams The latency result structure. +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup oglapi +/////////////////////////////////////////////////////////////////////////////// +__nvapi_deprecated_function("Do not use this function - it is deprecated in release 565.") +NVAPI_INTERFACE NvAPI_Vulkan_GetLatency(__in HANDLE vkDevice, __inout NV_VULKAN_LATENCY_RESULT_PARAMS* pGetLatencyParams); +#endif // defined(__cplusplus) && defined(_WINNT_) + +//! SUPPORTED OS: Windows 10 and higher +//! +//! Used define latency marker type +//! \ingroup oglapi +typedef enum +{ + VULKAN_SIMULATION_START = 0, + VULKAN_SIMULATION_END = 1, + VULKAN_RENDERSUBMIT_START = 2, + VULKAN_RENDERSUBMIT_END = 3, + VULKAN_PRESENT_START = 4, + VULKAN_PRESENT_END = 5, + VULKAN_INPUT_SAMPLE = 6, + VULKAN_TRIGGER_FLASH = 7, + VULKAN_PC_LATENCY_PING = 8, + VULKAN_OUT_OF_BAND_RENDERSUBMIT_START = 9, + VULKAN_OUT_OF_BAND_RENDERSUBMIT_END = 10, + VULKAN_OUT_OF_BAND_PRESENT_START = 11, + VULKAN_OUT_OF_BAND_PRESENT_END = 12, +} NV_VULKAN_LATENCY_MARKER_TYPE; + +//! SUPPORTED OS: Windows 10 and higher +//! +//! Used set latency markers +//! \ingroup oglapi +typedef struct _NV_VULKAN_LATENCY_MARKER_PARAMS +{ + NvU32 version; //!< (IN) Structure version + NvU64 frameID; + NV_VULKAN_LATENCY_MARKER_TYPE markerType; + NvU8 rsvd[64]; +} NV_VULKAN_LATENCY_MARKER_PARAMS_V1; + +typedef NV_VULKAN_LATENCY_MARKER_PARAMS_V1 NV_VULKAN_LATENCY_MARKER_PARAMS; +#define NV_VULKAN_LATENCY_MARKER_PARAMS_VER1 MAKE_NVAPI_VERSION(NV_VULKAN_LATENCY_MARKER_PARAMS_V1, 1) +#define NV_VULKAN_LATENCY_MARKER_PARAMS_VER NV_VULKAN_LATENCY_MARKER_PARAMS_VER1 + +//! SUPPORTED OS: Windows 10 and higher +//! +#if defined(__cplusplus) && defined(_WINNT_) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Vulkan_SetLatencyMarker +// +// !!** DEPRECATED, please use VK_NV_low_latency2 instead **!! +//! \deprecated Do not use this function - it is deprecated in release 565. +// +//! DESCRIPTION: Set a latency marker to be tracked by the +//! NvAPI_Vulkan_GetLatency function. SIMULATION_START must be the first marker +//! sent in a frame, after the previous frame's Sleep call (if used). +//! INPUT_SAMPLE may be sent to record the moment user input was sampled and +//! should come between SIMULATION_START and SIMULATION_END. +//! RENDERSUBMIT_START should come before any Vulkan API calls are made for +//! the given frame and RENDERSUBMIT_END should come before calling Present. +//! PRESENT_START and END should wrap the Present call to inform the driver +//! of a present block done by the OS before the driver receives the Present. +//! TRIGGER_FLASH tells the driver to render its flash indicator for latency +//! testing, typically driven by a mouse click. +//! The frameID can start at an abitrary moment in the application lifetime +//! but must strictly increment from that point forward for consistent results. + +//! +//! \since Release: 455 +//! \param [in] vkDevice The Vulkan device handle +//! \param [in] pSetLatencyMarkerParams The latency marker structure +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup oglapi +/////////////////////////////////////////////////////////////////////////////// +__nvapi_deprecated_function("Do not use this function - it is deprecated in release 565.") +NVAPI_INTERFACE NvAPI_Vulkan_SetLatencyMarker(__in HANDLE vkDevice, __in NV_VULKAN_LATENCY_MARKER_PARAMS* pSetLatencyMarkerParams); +#endif // defined(__cplusplus) && defined(_WINNT_) + +//! SUPPORTED OS: Windows 10 and higher +//! +//! Used in NvAPI_Vulkan_NotifyOutOfBandVkQueue +//! \ingroup oglapi +typedef enum +{ + VULKAN_OUT_OF_BAND_QUEUE_TYPE_RENDER = 0, + VULKAN_OUT_OF_BAND_QUEUE_TYPE_PRESENT = 1, +} NV_VULKAN_OUT_OF_BAND_QUEUE_TYPE; + +#if defined(__cplusplus) && defined(_WINNT_) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Vulkan_NotifyOutOfBandVkQueue +// +// !!** DEPRECATED, please use VK_NV_low_latency2 instead **!! +//! \deprecated Do not use this function - it is deprecated in release 565. +// +//! DESCRIPTION: Notifies the driver that this command queue runs out of band +//! from the application's frame cadence. +//! +//! \since Release: 520 +//! \param [in] vkDevice The Vulkan device handle +//! \param [in] queueHandle The VkQueue +//! \param [in] queueType The type of out of band VkQueue +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup oglapi +/////////////////////////////////////////////////////////////////////////////// +__nvapi_deprecated_function("Do not use this function - it is deprecated in release 565.") +NVAPI_INTERFACE NvAPI_Vulkan_NotifyOutOfBandVkQueue(__in HANDLE vkDevice, __in HANDLE queueHandle, __in NV_VULKAN_OUT_OF_BAND_QUEUE_TYPE queueType); +#endif // defined(__cplusplus) && defined(_WINNT_) + + +//! \addtogroup drsapi +//! @{ + + +// GPU Profile APIs + +NV_DECLARE_HANDLE(NvDRSSessionHandle); +NV_DECLARE_HANDLE(NvDRSProfileHandle); + +#define NVAPI_DRS_GLOBAL_PROFILE ((NvDRSProfileHandle) -1) + +#define NVAPI_SETTING_MAX_VALUES 100 + +typedef enum _NVDRS_SETTING_TYPE +{ + NVDRS_DWORD_TYPE, + NVDRS_BINARY_TYPE, + NVDRS_STRING_TYPE, + NVDRS_WSTRING_TYPE +} NVDRS_SETTING_TYPE; + +typedef enum _NVDRS_SETTING_LOCATION +{ + NVDRS_CURRENT_PROFILE_LOCATION, + NVDRS_GLOBAL_PROFILE_LOCATION, + NVDRS_BASE_PROFILE_LOCATION, + NVDRS_DEFAULT_PROFILE_LOCATION +} NVDRS_SETTING_LOCATION; + + +typedef struct _NVDRS_GPU_SUPPORT +{ + NvU32 geforce : 1; + NvU32 quadro : 1; + NvU32 nvs : 1; + NvU32 reserved4 : 1; + NvU32 reserved5 : 1; + NvU32 reserved6 : 1; + NvU32 reserved7 : 1; + NvU32 reserved8 : 1; + NvU32 reserved9 : 1; + NvU32 reserved10 : 1; + NvU32 reserved11 : 1; + NvU32 reserved12 : 1; + NvU32 reserved13 : 1; + NvU32 reserved14 : 1; + NvU32 reserved15 : 1; + NvU32 reserved16 : 1; + NvU32 reserved17 : 1; + NvU32 reserved18 : 1; + NvU32 reserved19 : 1; + NvU32 reserved20 : 1; + NvU32 reserved21 : 1; + NvU32 reserved22 : 1; + NvU32 reserved23 : 1; + NvU32 reserved24 : 1; + NvU32 reserved25 : 1; + NvU32 reserved26 : 1; + NvU32 reserved27 : 1; + NvU32 reserved28 : 1; + NvU32 reserved29 : 1; + NvU32 reserved30 : 1; + NvU32 reserved31 : 1; + NvU32 reserved32 : 1; +} NVDRS_GPU_SUPPORT; + +//! Enum to decide on the datatype of setting value. +typedef struct _NVDRS_BINARY_SETTING +{ + NvU32 valueLength; //!< valueLength should always be in number of bytes. + NvU8 valueData[NVAPI_BINARY_DATA_MAX]; +} NVDRS_BINARY_SETTING; + +typedef struct _NVDRS_SETTING_VALUES +{ + NvU32 version; //!< Structure Version + NvU32 numSettingValues; //!< Total number of values available in a setting. + NVDRS_SETTING_TYPE settingType; //!< Type of setting value. + union //!< Setting can hold either DWORD or Binary value or string. Not mixed types. + { + NvU32 u32DefaultValue; //!< Accessing default DWORD value of this setting. + NVDRS_BINARY_SETTING binaryDefaultValue; //!< Accessing default Binary value of this setting. + //!< Must be allocated by caller with valueLength specifying buffer size, or only valueLength will be filled in. + NvAPI_UnicodeString wszDefaultValue; //!< Accessing default unicode string value of this setting. + }; + union //!< Setting values can be of either DWORD, Binary values or String type, + { //!< NOT mixed types. + NvU32 u32Value; //!< All possible DWORD values for a setting + NVDRS_BINARY_SETTING binaryValue; //!< All possible Binary values for a setting + NvAPI_UnicodeString wszValue; //!< Accessing current unicode string value of this setting. + }settingValues[NVAPI_SETTING_MAX_VALUES]; +} NVDRS_SETTING_VALUES; + +//! Macro for constructing the version field of ::_NVDRS_SETTING_VALUES +#define NVDRS_SETTING_VALUES_VER MAKE_NVAPI_VERSION(NVDRS_SETTING_VALUES,1) + +typedef struct _NVDRS_SETTING_V1 +{ + NvU32 version; //!< Structure Version + NvAPI_UnicodeString settingName; //!< String name of setting + NvU32 settingId; //!< 32 bit setting Id + NVDRS_SETTING_TYPE settingType; //!< Type of setting value. + NVDRS_SETTING_LOCATION settingLocation; //!< Describes where the value in CurrentValue comes from. + NvU32 isCurrentPredefined; //!< It is different than 0 if the currentValue is a predefined Value, + //!< 0 if the currentValue is a user value. + NvU32 isPredefinedValid; //!< It is different than 0 if the PredefinedValue union contains a valid value. + union //!< Setting can hold either DWORD or Binary value or string. Not mixed types. + { + NvU32 u32PredefinedValue; //!< Accessing default DWORD value of this setting. + NVDRS_BINARY_SETTING binaryPredefinedValue; //!< Accessing default Binary value of this setting. + //!< Must be allocated by caller with valueLength specifying buffer size, + //!< or only valueLength will be filled in. + NvAPI_UnicodeString wszPredefinedValue; //!< Accessing default unicode string value of this setting. + }; + union //!< Setting can hold either DWORD or Binary value or string. Not mixed types. + { + NvU32 u32CurrentValue; //!< Accessing current DWORD value of this setting. + NVDRS_BINARY_SETTING binaryCurrentValue; //!< Accessing current Binary value of this setting. + //!< Must be allocated by caller with valueLength specifying buffer size, + //!< or only valueLength will be filled in. + NvAPI_UnicodeString wszCurrentValue; //!< Accessing current unicode string value of this setting. + }; +} NVDRS_SETTING_V1; + +//! Macro for constructing the version field of ::_NVDRS_SETTING +#define NVDRS_SETTING_VER1 MAKE_NVAPI_VERSION(NVDRS_SETTING_V1, 1) + +typedef NVDRS_SETTING_V1 NVDRS_SETTING; +#define NVDRS_SETTING_VER NVDRS_SETTING_VER1 + +typedef struct _NVDRS_APPLICATION_V1 +{ + NvU32 version; //!< Structure Version + NvU32 isPredefined; //!< Is the application userdefined/predefined + NvAPI_UnicodeString appName; //!< String name of the Application + NvAPI_UnicodeString userFriendlyName; //!< UserFriendly name of the Application + NvAPI_UnicodeString launcher; //!< Indicates the name (if any) of the launcher that starts the application +} NVDRS_APPLICATION_V1; + +typedef struct _NVDRS_APPLICATION_V2 +{ + NvU32 version; //!< Structure Version + NvU32 isPredefined; //!< Is the application userdefined/predefined + NvAPI_UnicodeString appName; //!< String name of the Application + NvAPI_UnicodeString userFriendlyName; //!< UserFriendly name of the Application + NvAPI_UnicodeString launcher; //!< Indicates the name (if any) of the launcher that starts the Application + NvAPI_UnicodeString fileInFolder; //!< Select this application only if this file is found. + //!< When specifying multiple files, separate them using the ':' character. +} NVDRS_APPLICATION_V2; + +typedef struct _NVDRS_APPLICATION_V3 +{ + NvU32 version; //!< Structure Version + NvU32 isPredefined; //!< Is the application userdefined/predefined + NvAPI_UnicodeString appName; //!< String name of the Application + NvAPI_UnicodeString userFriendlyName; //!< UserFriendly name of the Application + NvAPI_UnicodeString launcher; //!< Indicates the name (if any) of the launcher that starts the Application + NvAPI_UnicodeString fileInFolder; //!< Select this application only if this file is found. + //!< When specifying multiple files, separate them using the ':' character. + NvU32 isMetro:1; //!< Windows 8 style app + NvU32 isCommandLine:1; //!< Command line parsing for the application name + NvU32 reserved:30; //!< Reserved. Should be 0. +} NVDRS_APPLICATION_V3; + +typedef struct _NVDRS_APPLICATION_V4 +{ + NvU32 version; //!< Structure Version + NvU32 isPredefined; //!< Is the application userdefined/predefined + NvAPI_UnicodeString appName; //!< String name of the Application + NvAPI_UnicodeString userFriendlyName; //!< UserFriendly name of the Application + NvAPI_UnicodeString launcher; //!< Indicates the name (if any) of the launcher that starts the Application + NvAPI_UnicodeString fileInFolder; //!< Select this application only if this file is found. + //!< When specifying multiple files, separate them using the ':' character. + NvU32 isMetro:1; //!< Windows 8 style app + NvU32 isCommandLine:1; //!< Command line parsing for the application name + NvU32 reserved:30; //!< Reserved. Should be 0. + NvAPI_UnicodeString commandLine; //!< If isCommandLine is set to 0 this must be an empty. If isCommandLine is set to 1 + //!< this contains application's command line as if it was returned by GetCommandLineW. +} NVDRS_APPLICATION_V4; + +#define NVDRS_APPLICATION_VER_V1 MAKE_NVAPI_VERSION(NVDRS_APPLICATION_V1,1) +#define NVDRS_APPLICATION_VER_V2 MAKE_NVAPI_VERSION(NVDRS_APPLICATION_V2,2) +#define NVDRS_APPLICATION_VER_V3 MAKE_NVAPI_VERSION(NVDRS_APPLICATION_V3,3) +#define NVDRS_APPLICATION_VER_V4 MAKE_NVAPI_VERSION(NVDRS_APPLICATION_V4,4) + +typedef NVDRS_APPLICATION_V4 NVDRS_APPLICATION; +#define NVDRS_APPLICATION_VER NVDRS_APPLICATION_VER_V4 + +typedef struct _NVDRS_PROFILE_V1 +{ + NvU32 version; //!< Structure Version + NvAPI_UnicodeString profileName; //!< String name of the Profile + NVDRS_GPU_SUPPORT gpuSupport; //!< This read-only flag indicates the profile support on either + //!< Quadro, or Geforce, or both. + NvU32 isPredefined; //!< Is the Profile user-defined, or predefined + NvU32 numOfApps; //!< Total number of applications that belong to this profile. Read-only + NvU32 numOfSettings; //!< Total number of settings applied for this Profile. Read-only +} NVDRS_PROFILE_V1; + +typedef NVDRS_PROFILE_V1 NVDRS_PROFILE; + +//! Macro for constructing the version field of ::NVDRS_PROFILE +#define NVDRS_PROFILE_VER1 MAKE_NVAPI_VERSION(NVDRS_PROFILE_V1,1) +#define NVDRS_PROFILE_VER NVDRS_PROFILE_VER1 + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DRS_CreateSession +// +//! DESCRIPTION: This API allocates memory and initializes the session. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [out] *phSession Return pointer to the session handle. +//! +//! \retval ::NVAPI_OK SUCCESS +//! \retval ::NVAPI_ERROR: For miscellaneous errors. +// +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DRS_CreateSession(NvDRSSessionHandle *phSession); + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DRS_DestroySession +// +//! DESCRIPTION: This API frees the allocation: cleanup of NvDrsSession. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] hSession Input to the session handle. +//! +//! \retval ::NVAPI_OK SUCCESS +//! \retval ::NVAPI_ERROR For miscellaneous errors. +// +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DRS_DestroySession(NvDRSSessionHandle hSession); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DRS_LoadSettings +// +//! DESCRIPTION: This API loads and parses the settings data. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] hSession Input to the session handle. +//! +//! \retval ::NVAPI_OK SUCCESS +//! \retval ::NVAPI_ERROR For miscellaneous errors. +// +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DRS_LoadSettings(NvDRSSessionHandle hSession); + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DRS_SaveSettings +// +//! DESCRIPTION: This API saves the settings data to the system. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] hSession Input to the session handle. +//! +//! \retval ::NVAPI_OK SUCCESS +//! \retval ::NVAPI_ERROR For miscellaneous errors. +// +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DRS_SaveSettings(NvDRSSessionHandle hSession); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DRS_LoadSettingsFromFile +// +//! DESCRIPTION: This API loads settings from the given file path. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] hSession Input to the session handle +//! \param [in] fileName Binary File Name/Path +//! +//! \retval ::NVAPI_OK SUCCESS +//! \retval ::NVAPI_ERROR For miscellaneous errors. +// +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DRS_LoadSettingsFromFile(NvDRSSessionHandle hSession, NvAPI_UnicodeString fileName); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DRS_SaveSettingsToFile +// +//! DESCRIPTION: This API saves settings to the given file path. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] hSession Input to the session handle. +//! \param [in] fileName Binary File Name/Path +//! +//! \retval ::NVAPI_OK SUCCESS +//! \retval ::NVAPI_ERROR For miscellaneous errors. +// +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DRS_SaveSettingsToFile(NvDRSSessionHandle hSession, NvAPI_UnicodeString fileName); + +//! @} + + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DRS_CreateProfile +// +//! DESCRIPTION: This API creates an empty profile. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] hSession Input to the session handle. +//! \param [in] *pProfileInfo Input pointer to NVDRS_PROFILE. +//! \param [in] *phProfile Returns pointer to profile handle. +//! +//! \retval ::NVAPI_OK SUCCESS +//! \retval ::NVAPI_ERROR For miscellaneous errors. +//! +//! \ingroup drsapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DRS_CreateProfile(NvDRSSessionHandle hSession, NVDRS_PROFILE *pProfileInfo, NvDRSProfileHandle *phProfile); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DRS_DeleteProfile +// +//! DESCRIPTION: This API deletes a profile or sets it back to a predefined value. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] hSession Input to the session handle. +//! \param [in] hProfile Input profile handle. +//! +//! \retval ::NVAPI_OK SUCCESS if the profile is found +//! \retval ::NVAPI_ERROR For miscellaneous errors. +//! +//! \ingroup drsapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DRS_DeleteProfile(NvDRSSessionHandle hSession, NvDRSProfileHandle hProfile); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DRS_SetCurrentGlobalProfile +// +//! DESCRIPTION: This API sets the current global profile in the driver. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] hSession Input to the session handle. +//! \param [in] wszGlobalProfileName Input current Global profile name. +//! +//! \retval ::NVAPI_OK SUCCESS +//! \retval ::NVAPI_ERROR For miscellaneous errors. +//! +//! \ingroup drsapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DRS_SetCurrentGlobalProfile(NvDRSSessionHandle hSession, NvAPI_UnicodeString wszGlobalProfileName); + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DRS_GetCurrentGlobalProfile +// +//! DESCRIPTION: This API returns the handle to the current global profile. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] hSession Input to the session handle. +//! \param [out] *phProfile Returns current Global profile handle. +//! +//! \retval ::NVAPI_OK SUCCESS +//! \retval ::NVAPI_ERROR For miscellaneous errors. +//! +//! \ingroup drsapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DRS_GetCurrentGlobalProfile(NvDRSSessionHandle hSession, NvDRSProfileHandle *phProfile); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DRS_GetProfileInfo +// +//! DESCRIPTION: This API gets information about the given profile. User needs to specify the name of the Profile. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] hSession Input to the session handle. +//! \param [in] hProfile Input profile handle. +//! \param [out] *pProfileInfo Return the profile info. +//! +//! \retval ::NVAPI_OK SUCCESS +//! \retval ::NVAPI_ERROR For miscellaneous errors. +//! +//! \ingroup drsapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DRS_GetProfileInfo(NvDRSSessionHandle hSession, NvDRSProfileHandle hProfile, NVDRS_PROFILE *pProfileInfo); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DRS_SetProfileInfo +// +//! DESCRIPTION: Specifies flags for a given profile. Currently only the NVDRS_GPU_SUPPORT is +//! used to update the profile. Neither the name, number of settings or applications +//! or other profile information can be changed with this function. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] hSession Input to the session handle. +//! \param [in] hProfile Input profile handle. +//! \param [in] *pProfileInfo Input the new profile info. +//! +//! \retval ::NVAPI_OK SUCCESS +//! \retval ::NVAPI_ERROR For miscellaneous errors. +//! +//! \ingroup drsapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DRS_SetProfileInfo(NvDRSSessionHandle hSession, NvDRSProfileHandle hProfile, NVDRS_PROFILE *pProfileInfo); + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DRS_FindProfileByName +// +//! DESCRIPTION: This API finds a profile in the current session. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] hSession Input to the session handle. +//! \param [in] profileName Input profileName. +//! \param [out] phProfile Input profile handle. +//! +//! \retval ::NVAPI_OK SUCCESS if the profile is found +//! \retval ::NVAPI_PROFILE_NOT_FOUND if profile is not found +//! \retval ::NVAPI_ERROR For miscellaneous errors. +//! +//! \ingroup drsapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DRS_FindProfileByName(NvDRSSessionHandle hSession, NvAPI_UnicodeString profileName, NvDRSProfileHandle* phProfile); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DRS_EnumProfiles +// +//! DESCRIPTION: This API enumerates through all the profiles in the session. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] hSession Input to the session handle. +//! \param [in] index Input the index for enumeration. +//! \param [out] *phProfile Returns profile handle. +//! +//! RETURN STATUS: NVAPI_OK: SUCCESS if the profile is found +//! NVAPI_ERROR: For miscellaneous errors. +//! NVAPI_END_ENUMERATION: index exceeds the total number of available Profiles in DB. +//! +//! \ingroup drsapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DRS_EnumProfiles(NvDRSSessionHandle hSession, NvU32 index, NvDRSProfileHandle *phProfile); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DRS_GetNumProfiles +// +//! DESCRIPTION: This API obtains the number of profiles in the current session object. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] hSession Input to the session handle. +//! \param out] *numProfiles Returns count of profiles in the current hSession. +//! +//! \retval ::NVAPI_OK SUCCESS +//! \retval ::NVAPI_API_NOT_INTIALIZED Failed to initialize. +//! \retval ::NVAPI_INVALID_ARGUMENT Invalid Arguments. +//! +//! \ingroup drsapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DRS_GetNumProfiles(NvDRSSessionHandle hSession, NvU32 *numProfiles); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DRS_CreateApplication +// +//! DESCRIPTION: This API adds an executable name to a profile. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] hSession Input to the session handle. +//! \param [in] hProfile Input profile handle. +//! \param [in] *pApplication Input NVDRS_APPLICATION struct with the executable name to be added. +//! +//! \retval ::NVAPI_OK SUCCESS +//! \retval ::NVAPI_ERROR For miscellaneous errors. +//! +//! \ingroup drsapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DRS_CreateApplication(NvDRSSessionHandle hSession, NvDRSProfileHandle hProfile, NVDRS_APPLICATION *pApplication); + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DRS_DeleteApplicationEx +// +//! DESCRIPTION: This API removes an executable from a profile. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] hSession - Input to the session handle. +//! \param [in] hProfile - Input profile handle. +//! \param [in] *pApp - Input all the information about the application to be removed. +//! +//! \retval ::NVAPI_OK SUCCESS +//! \retval ::NVAPI_ERROR For miscellaneous errors. +//! \retval ::NVAPI_EXECUTABLE_PATH_IS_AMBIGUOUS If the path provided could refer to two different executables, +//! this error will be returned. +//! +//! \ingroup drsapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DRS_DeleteApplicationEx(NvDRSSessionHandle hSession, NvDRSProfileHandle hProfile, NVDRS_APPLICATION *pApp); + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DRS_DeleteApplication +// +//! DESCRIPTION: This API removes an executable name from a profile. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] hSessionPARAMETERS Input to the session handle. +//! \param [in] hProfile Input profile handle. +//! \param [in] appName Input the executable name to be removed. +//! +//! \retval ::NVAPI_OK SUCCESS +//! \retval ::NVAPI_ERROR For miscellaneous errors. +//! \retval ::NVAPI_EXECUTABLE_PATH_IS_AMBIGUOUS If the path provided could refer to two different executables, +//! this error will be returned +//! +//! \ingroup drsapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DRS_DeleteApplication(NvDRSSessionHandle hSession, NvDRSProfileHandle hProfile, NvAPI_UnicodeString appName); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DRS_GetApplicationInfo +// +//! DESCRIPTION: This API gets information about the given application. The input application name +//! must match exactly what the Profile has stored for the application. +//! This function is better used to retrieve application information from a previous +//! enumeration. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] hSession Input to the session handle. +//! \param [in] hProfile Input profile handle. +//! \param [in] appName Input application name. +//! \param [out] *pApplication Returns NVDRS_APPLICATION struct with all the attributes. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, +//! they are listed below. +//! \retval ::NVAPI_EXECUTABLE_PATH_IS_AMBIGUOUS The application name could not +// single out only one executable. +//! \retval ::NVAPI_EXECUTABLE_NOT_FOUND No application with that name is found on the profile. +//! +//! \ingroup drsapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DRS_GetApplicationInfo(NvDRSSessionHandle hSession, NvDRSProfileHandle hProfile, NvAPI_UnicodeString appName, NVDRS_APPLICATION *pApplication); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DRS_EnumApplications +// +//! DESCRIPTION: This API enumerates all the applications in a given profile from the starting index to the maximum length. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] hSession Input to the session handle. +//! \param [in] hProfile Input profile handle. +//! \param [in] startIndex Indicates starting index for enumeration. +//! \param [in,out] *appCount Input maximum length of the passed in arrays. Returns the actual length. +//! \param [out] *pApplication Returns NVDRS_APPLICATION struct with all the attributes. +//! +//! \retval ::NVAPI_OK SUCCESS +//! \retval ::NVAPI_ERROR For miscellaneous errors. +//! \retval ::NVAPI_END_ENUMERATION startIndex exceeds the total appCount. +//! +//! \ingroup drsapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DRS_EnumApplications(NvDRSSessionHandle hSession, NvDRSProfileHandle hProfile, NvU32 startIndex, NvU32 *appCount, NVDRS_APPLICATION *pApplication); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DRS_FindApplicationByName +// +//! DESCRIPTION: This API searches the application and the associated profile for the given application name. +//! If a fully qualified path is provided, this function will always return the profile +//! the driver will apply upon running the application (on the path provided). +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] hSession Input to the hSession handle +//! \param [in] appName Input appName. For best results, provide a fully qualified path of the type +//! c:/Folder1/Folder2/App.exe +//! \param [out] *phProfile Returns profile handle. +//! \param [in,out] *pApplication Returns NVDRS_APPLICATION struct pointer. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, +//! they are listed below: +//! \retval ::NVAPI_APPLICATION_NOT_FOUND If App not found +//! \retval ::NVAPI_EXECUTABLE_PATH_IS_AMBIGUOUS If the input appName was not fully qualified, this error might return in the case of multiple matches +//! +//! \ingroup drsapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DRS_FindApplicationByName(__in NvDRSSessionHandle hSession, __in NvAPI_UnicodeString appName, __out NvDRSProfileHandle *phProfile, __inout NVDRS_APPLICATION *pApplication); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DRS_SetSetting +// +//! DESCRIPTION: This API adds/modifies a setting to a profile. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] hSession Input to the session handle. +//! \param [in] hProfile Input profile handle. +//! \param [in] *pSetting Input NVDRS_SETTING struct pointer. +//! +//! \retval ::NVAPI_OK SUCCESS +//! \retval ::NVAPI_ERROR For miscellaneous errors. +//! +//! \ingroup drsapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DRS_SetSetting(NvDRSSessionHandle hSession, NvDRSProfileHandle hProfile, NVDRS_SETTING *pSetting); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DRS_GetSetting +// +//! DESCRIPTION: This API gets information about the given setting. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] hSession Input to the session handle. +//! \param [in] hProfile Input profile handle. +//! \param [in] settingId Input settingId. +//! \param [out] *pSetting Returns all the setting info +//! +//! \retval ::NVAPI_OK SUCCESS +//! \retval ::NVAPI_ERROR For miscellaneous errors. +//! +//! \ingroup drsapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DRS_GetSetting(NvDRSSessionHandle hSession, NvDRSProfileHandle hProfile, NvU32 settingId, NVDRS_SETTING *pSetting); + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DRS_EnumSettings +// +//! DESCRIPTION: This API enumerates all the settings of a given profile from startIndex to the maximum length. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] hSession Input to the session handle. +//! \param [in] hProfile Input profile handle. +//! \param [in] startIndex Indicates starting index for enumeration. +//! \param [in,out] *settingsCount Input max length of the passed in arrays, Returns the actual length. +//! \param [out] *pSetting Returns all the settings info. +//! +//! \retval ::NVAPI_OK SUCCESS +//! \retval ::NVAPI_ERROR For miscellaneous errors. +//! \retval ::NVAPI_END_ENUMERATION startIndex exceeds the total appCount. +//! +//! \ingroup drsapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DRS_EnumSettings(NvDRSSessionHandle hSession, NvDRSProfileHandle hProfile, NvU32 startIndex, NvU32 *settingsCount, NVDRS_SETTING *pSetting); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DRS_EnumAvailableSettingIds +// +//! DESCRIPTION: This API enumerates all the Ids of all the settings recognized by NVAPI. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [out] pSettingIds User-provided array of length *pMaxCount that NVAPI will fill with IDs. +//! \param [in,out] pMaxCount Input max length of the passed in array, Returns the actual length. +//! +//! \retval ::NVAPI_OK SUCCESS +//! \retval ::NVAPI_ERROR For miscellaneous errors. +//! NVAPI_END_ENUMERATION: the provided pMaxCount is not enough to hold all settingIds. +//! +//! \ingroup drsapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DRS_EnumAvailableSettingIds(NvU32 *pSettingIds, NvU32 *pMaxCount); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DRS_EnumAvailableSettingValues +// +//! DESCRIPTION: This API enumerates all available setting values for a given setting. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] settingId Input settingId. +//! \param [in,out] pMaxNumValues Input max length of the passed in arrays, Returns the actual length. +//! \param [out] *pSettingValues Returns all available setting values and its count. +//! +//! \retval ::NVAPI_OK SUCCESS +//! \retval ::NVAPI_ERROR For miscellaneous errors. +//! +//! \ingroup drsapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DRS_EnumAvailableSettingValues(NvU32 settingId, NvU32 *pMaxNumValues, NVDRS_SETTING_VALUES *pSettingValues); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DRS_GetSettingIdFromName +// +//! DESCRIPTION: This API gets the binary ID of a setting given the setting name. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] settingName Input Unicode settingName. +//! \param [out] *pSettingId Returns corresponding settingId. +//! +//! \retval ::NVAPI_OK SUCCESS if the profile is found +//! \retval ::NVAPI_PROFILE_NOT_FOUND if profile is not found +//! \retval ::NVAPI_SETTING_NOT_FOUND if setting is not found +//! \retval ::NVAPI_ERROR For miscellaneous errors. +//! +//! \ingroup drsapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DRS_GetSettingIdFromName(NvAPI_UnicodeString settingName, NvU32 *pSettingId); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DRS_GetSettingNameFromId +// +//! DESCRIPTION: This API gets the setting name given the binary ID. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] settingId Input settingId. +//! \param [in] *pSettingName Returns corresponding Unicode settingName. +//! +//! \retval ::NVAPI_OK SUCCESS if the profile is found +//! \retval ::NVAPI_PROFILE_NOT_FOUND if profile is not found +//! \retval ::NVAPI_SETTING_NOT_FOUND if setting is not found +//! \retval ::NVAPI_ERROR For miscellaneous errors. +//! +//! \ingroup drsapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DRS_GetSettingNameFromId(NvU32 settingId, NvAPI_UnicodeString *pSettingName); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DRS_DeleteProfileSetting +// +//! DESCRIPTION: This API deletes a setting or sets it back to predefined value. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] hSession Input to the session handle. +//! \param [in] hProfile Input profile handle. +//! \param [in] settingId Input settingId to be deleted. +//! +//! \retval ::NVAPI_OK SUCCESS if the profile is found +//! \retval ::NVAPI_ERROR For miscellaneous errors. +//! +//! \ingroup drsapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DRS_DeleteProfileSetting(NvDRSSessionHandle hSession, NvDRSProfileHandle hProfile, NvU32 settingId); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DRS_RestoreAllDefaults +// +//! DESCRIPTION: This API restores the whole system to predefined(default) values. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] hSession Input to the session handle. +//! +//! \retval ::NVAPI_OK SUCCESS if the profile is found +//! \retval ::NVAPI_ERROR For miscellaneous errors. +//! +//! \ingroup drsapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DRS_RestoreAllDefaults(NvDRSSessionHandle hSession); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DRS_RestoreProfileDefault +// +//! DESCRIPTION: This API restores the given profile to predefined(default) values. +//! Any and all user specified modifications will be removed. +//! If the whole profile was set by the user, the profile will be removed. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] hSession Input to the session handle. +//! \param [in] hProfile Input profile handle. +//! +//! \retval ::NVAPI_OK SUCCESS if the profile is found +//! \retval ::NVAPI_ERROR For miscellaneous errors. +//! \retval ::NVAPI_PROFILE_REMOVED SUCCESS, and the hProfile is no longer valid. +//! \retval ::NVAPI_ERROR For miscellaneous errors. +//! +//! \ingroup drsapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DRS_RestoreProfileDefault(NvDRSSessionHandle hSession, NvDRSProfileHandle hProfile); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DRS_RestoreProfileDefaultSetting +// +//! DESCRIPTION: This API restores the given profile setting to predefined(default) values. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] hSession Input to the session handle. +//! \param [in] hProfile Input profile handle. +//! \param [in] settingId Input settingId. +//! +//! \retval ::NVAPI_OK SUCCESS if the profile is found +//! \retval ::NVAPI_ERROR For miscellaneous errors. +//! +//! \ingroup drsapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DRS_RestoreProfileDefaultSetting(NvDRSSessionHandle hSession, NvDRSProfileHandle hProfile, NvU32 settingId); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DRS_GetBaseProfile +// +//! DESCRIPTION: Returns the handle to the current global profile. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] hSession Input to the session handle. +//! \param [in] *phProfile Returns Base profile handle. +//! +//! \retval ::NVAPI_OK SUCCESS if the profile is found +//! \retval ::NVAPI_ERROR For miscellaneous errors. +//! +//! \ingroup drsapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DRS_GetBaseProfile(NvDRSSessionHandle hSession, NvDRSProfileHandle *phProfile); + + + + +//! \addtogroup sysgeneral +//! @{ + +typedef struct +{ + NvU32 version; //!< structure version + NvU32 vendorId; //!< Chipset vendor identification + NvU32 deviceId; //!< Chipset device identification + NvAPI_ShortString szVendorName; //!< Chipset vendor Name + NvAPI_ShortString szChipsetName; //!< Chipset device Name + NvU32 flags; //!< Chipset info flags - obsolete + NvU32 subSysVendorId; //!< Chipset subsystem vendor identification + NvU32 subSysDeviceId; //!< Chipset subsystem device identification + NvAPI_ShortString szSubSysVendorName; //!< subsystem vendor Name + NvU32 HBvendorId; //!< Host bridge vendor identification + NvU32 HBdeviceId; //!< Host bridge device identification + NvU32 HBsubSysVendorId; //!< Host bridge subsystem vendor identification + NvU32 HBsubSysDeviceId; //!< Host bridge subsystem device identification + +} NV_CHIPSET_INFO_v4; + +typedef struct +{ + NvU32 version; //!< structure version + NvU32 vendorId; //!< vendor ID + NvU32 deviceId; //!< device ID + NvAPI_ShortString szVendorName; //!< vendor Name + NvAPI_ShortString szChipsetName; //!< device Name + NvU32 flags; //!< Chipset info flags - obsolete + NvU32 subSysVendorId; //!< subsystem vendor ID + NvU32 subSysDeviceId; //!< subsystem device ID + NvAPI_ShortString szSubSysVendorName; //!< subsystem vendor Name +} NV_CHIPSET_INFO_v3; + +typedef enum +{ + NV_CHIPSET_INFO_HYBRID = 0x00000001, +} NV_CHIPSET_INFO_FLAGS; + +typedef struct +{ + NvU32 version; //!< structure version + NvU32 vendorId; //!< vendor ID + NvU32 deviceId; //!< device ID + NvAPI_ShortString szVendorName; //!< vendor Name + NvAPI_ShortString szChipsetName; //!< device Name + NvU32 flags; //!< Chipset info flags +} NV_CHIPSET_INFO_v2; + +typedef struct +{ + NvU32 version; //structure version + NvU32 vendorId; //vendor ID + NvU32 deviceId; //device ID + NvAPI_ShortString szVendorName; //vendor Name + NvAPI_ShortString szChipsetName; //device Name +} NV_CHIPSET_INFO_v1; + +#define NV_CHIPSET_INFO_VER_1 MAKE_NVAPI_VERSION(NV_CHIPSET_INFO_v1,1) +#define NV_CHIPSET_INFO_VER_2 MAKE_NVAPI_VERSION(NV_CHIPSET_INFO_v2,2) +#define NV_CHIPSET_INFO_VER_3 MAKE_NVAPI_VERSION(NV_CHIPSET_INFO_v3,3) +#define NV_CHIPSET_INFO_VER_4 MAKE_NVAPI_VERSION(NV_CHIPSET_INFO_v4,4) + +#define NV_CHIPSET_INFO NV_CHIPSET_INFO_v4 +#define NV_CHIPSET_INFO_VER NV_CHIPSET_INFO_VER_4 + +//! @} + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_SYS_GetChipSetInfo +// +//! This function returns information about the system's chipset. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 95 +//! +//! \retval NVAPI_INVALID_ARGUMENT pChipSetInfo is NULL. +//! \retval NVAPI_OK *pChipSetInfo is now set. +//! \retval NVAPI_INCOMPATIBLE_STRUCT_VERSION NV_CHIPSET_INFO version not compatible with driver. +//! \ingroup sysgeneral +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_SYS_GetChipSetInfo(NV_CHIPSET_INFO *pChipSetInfo); + + +//! \ingroup sysgeneral +//! Lid and dock information - used in NvAPI_GetLidDockInfo() +typedef struct +{ + NvU32 version; //! Structure version, constructed from the macro #NV_LID_DOCK_PARAMS_VER + NvU32 currentLidState; + NvU32 currentDockState; + NvU32 currentLidPolicy; + NvU32 currentDockPolicy; + NvU32 forcedLidMechanismPresent; + NvU32 forcedDockMechanismPresent; +}NV_LID_DOCK_PARAMS; + + +//! ingroup sysgeneral +#define NV_LID_DOCK_PARAMS_VER MAKE_NVAPI_VERSION(NV_LID_DOCK_PARAMS,1) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GetLidDockInfo +// +//! DESCRIPTION: This function returns the current lid and dock information. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 177 +//! +//! \retval ::NVAPI_OK +//! \retval ::NVAPI_ERROR +//! \retval ::NVAPI_NOT_SUPPORTED +//! \retval ::NVAPI_HANDLE_INVALIDATED +//! \retval ::NVAPI_API_NOT_INTIALIZED +//! +//! \ingroup sysgeneral +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_SYS_GetLidAndDockInfo(NV_LID_DOCK_PARAMS *pLidAndDock); + + + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_SYS_GetDisplayIdFromGpuAndOutputId +// +//! DESCRIPTION: This API converts a Physical GPU handle and output ID to a +//! display ID. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] hPhysicalGpu Handle to the physical GPU +//! \param [in] outputId Connected display output ID on the +//! target GPU - must only have one bit set +//! \param [out] displayId Pointer to an NvU32 which contains +//! the display ID +//! +//! \retval ::NVAPI_OK - completed request +//! \retval ::NVAPI_API_NOT_INTIALIZED - NVAPI not initialized +//! \retval ::NVAPI_ERROR - miscellaneous error occurred +//! \retval ::NVAPI_INVALID_ARGUMENT - Invalid input parameter. +//! +//! \ingroup sysgeneral +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_SYS_GetDisplayIdFromGpuAndOutputId(NvPhysicalGpuHandle hPhysicalGpu, NvU32 outputId, NvU32* displayId); + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_SYS_GetGpuAndOutputIdFromDisplayId +// +//! DESCRIPTION: This API converts a display ID to a Physical GPU handle and output ID. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] displayId Display ID of display to retrieve +//! GPU and outputId for +//! \param [out] hPhysicalGpu Handle to the physical GPU +//! \param [out] outputId ) Connected display output ID on the +//! target GPU will only have one bit set. +//! +//! \retval ::NVAPI_OK +//! \retval ::NVAPI_API_NOT_INTIALIZED +//! \retval ::NVAPI_ID_OUT_OF_RANGE The DisplayId corresponds to a +//! display which is not within the +//! normal outputId range. +//! \retval ::NVAPI_ERROR +//! \retval ::NVAPI_INVALID_ARGUMENT +//! +//! \ingroup sysgeneral +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_SYS_GetGpuAndOutputIdFromDisplayId(NvU32 displayId, NvPhysicalGpuHandle *hPhysicalGpu, NvU32 *outputId); + + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_SYS_GetPhysicalGpuFromDisplayId +// +//! \code +//! DESCRIPTION: This API retrieves the Physical GPU handle of the connected display +//! +//! \since Release: 313 +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! PARAMETERS: displayId(IN) - Display ID of display to retrieve +//! GPU handle +//! hPhysicalGpu(OUT) - Handle to the physical GPU +//! +//! RETURN STATUS: +//! NVAPI_OK - completed request +//! NVAPI_API_NOT_INTIALIZED - NVAPI not initialized +//! NVAPI_ERROR - miscellaneous error occurred +//! NVAPI_INVALID_ARGUMENT - Invalid input parameter. +//! \endcode +//! \ingroup sysgeneral +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_SYS_GetPhysicalGpuFromDisplayId(NvU32 displayId, NvPhysicalGpuHandle *hPhysicalGpu); + +typedef struct _NV_DISPLAY_DRIVER_INFO +{ + NvU32 version; //!< Structure Version. + NvU32 driverVersion; //!< Contains the driver version after successful return. + NvAPI_ShortString szBuildBranch; //!< Contains the driver-branch string after successful return. + NvU32 bIsDCHDriver : 1; //!< Contains the driver DCH status after successful return. + //!< Value of 1 means that this is DCH driver. + //!< Value of 0 means that this is not a DCH driver (NVAPI may be unable to query the DCH status of the driver due to some registry API errors, in that case the API will return with NVAPI_ERROR) + NvU32 bIsNVIDIAStudioPackage : 1; //!< On successful return, this field provides information about whether the installed driver is from an NVIDIA Studio Driver package. + //!< Value of 1 means that this driver is from the NVIDIA Studio Driver package. + NvU32 bIsNVIDIAGameReadyPackage : 1; //!< On successful return, this field provides information about whether the installed driver is from an NVIDIA Game Ready Driver package. + //!< Value of 1 means that this driver is from the NVIDIA Game Ready Driver package. + NvU32 bIsNVIDIARTXProductionBranchPackage : 1; //!< On successful return, this field confirms whether the installed driver package is from an NVIDIA RTX Enterprise Production Branch which offers ISV certifications, long life-cycle support, regular security updates, and access to the same functionality as corresponding NVIDIA Studio Driver Packages (i.e., of the same driver version number). + //!< Value of 1 means that this driver is from the NVIDIA RTX Enterprise Production Branch package. + NvU32 bIsNVIDIARTXNewFeatureBranchPackage : 1; //!< On successful return, this field confirms whether the installed driver package is from an NVIDIA RTX New Feature Branch. + //!< This driver typically gives access to new features, bug fixes, new operating system support, and other driver enhancements offered between NVIDIA RTX Enterprise Production Branch releases. Support duration for NVIDIA RTX New Feature Branches is shorter than that for NVIDIA RTX Enterprise Production Branches. + //!< Value of 1 means that this driver is from the NVIDIA RTX New Feature Branch package. + NvU32 reserved : 27; //!< Reserved for future use. +} NV_DISPLAY_DRIVER_INFO_V1; + +typedef struct _NV_DISPLAY_DRIVER_INFO_V2 +{ + NvU32 version; //!< Structure Version. + NvU32 driverVersion; //!< Contains the driver version after successful return. + NvAPI_ShortString szBuildBranch; //!< Contains the driver-branch string after successful return. + NvU32 bIsDCHDriver : 1; //!< Contains the driver DCH status after successful return. + //!< Value of 1 means that this is DCH driver. + //!< Value of 0 means that this is not a DCH driver (NVAPI may be unable to query the DCH status of the driver due to some registry API errors, in that case the API will return with NVAPI_ERROR) + NvU32 bIsNVIDIAStudioPackage : 1; //!< On successful return, this field provides information about whether the installed driver is from an NVIDIA Studio Driver package. + //!< Value of 1 means that this driver is from the NVIDIA Studio Driver package. + NvU32 bIsNVIDIAGameReadyPackage : 1; //!< On successful return, this field provides information about whether the installed driver is from an NVIDIA Game Ready Driver package. + //!< Value of 1 means that this driver is from the NVIDIA Game Ready Driver package. + NvU32 bIsNVIDIARTXProductionBranchPackage : 1; //!< On successful return, this field confirms whether the installed driver package is from an NVIDIA RTX Enterprise Production Branch which offers ISV certifications, long life-cycle support, regular security updates, and access to the same functionality as corresponding NVIDIA Studio Driver Packages (i.e., of the same driver version number). + //!< Value of 1 means that this driver is from the NVIDIA RTX Enterprise Production Branch package. + NvU32 bIsNVIDIARTXNewFeatureBranchPackage : 1; //!< On successful return, this field confirms whether the installed driver package is from an NVIDIA RTX New Feature Branch. + //!< This driver typically gives access to new features, bug fixes, new operating system support, and other driver enhancements offered between NVIDIA RTX Enterprise Production Branch releases. Support duration for NVIDIA RTX New Feature Branches is shorter than that for NVIDIA RTX Enterprise Production Branches. + //!< Value of 1 means that this driver is from the NVIDIA RTX New Feature Branch package. + NvU32 reserved : 27; //!< Reserved for future use. + NvAPI_ShortString szBuildBaseBranch; //!< (OUT) Contains the driver base branch string after successful return. + NvU32 reservedEx; //!< Reserved for future use +} NV_DISPLAY_DRIVER_INFO_V2; + +#define NV_DISPLAY_DRIVER_INFO_VER1 MAKE_NVAPI_VERSION(NV_DISPLAY_DRIVER_INFO_V1, 1) +#define NV_DISPLAY_DRIVER_INFO_VER2 MAKE_NVAPI_VERSION(NV_DISPLAY_DRIVER_INFO_V2, 2) +typedef NV_DISPLAY_DRIVER_INFO_V2 NV_DISPLAY_DRIVER_INFO; +#define NV_DISPLAY_DRIVER_INFO_VER NV_DISPLAY_DRIVER_INFO_VER2 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_SYS_GetDisplayDriverInfo +// +//! DESCRIPTION: This API will return information related to the NVIDIA Display Driver. +//! Note that out of the driver types - Studio, Game Ready, RTX Production Branch, RTX New Feature Branch - only one driver type can be available in system. +//! If NVAPI is unable to get the information of particular driver type, we report all flags as 0 (Unknown). +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 396 +//! +//! \param [inout] pDriverInfo - This structure will be filled with required information. +//! +//! \return This API can return any of the error codes enumerated in +//! #NvAPI_Status. If there are return error codes with specific +//! meaning for this API, they are listed below. +//! +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_SYS_GetDisplayDriverInfo(__inout NV_DISPLAY_DRIVER_INFO *pDriverInfo); + + +typedef struct _NV_PHYSICAL_GPU_HANDLE_DATA +{ + NvPhysicalGpuHandle hPhysicalGpu; // 1399) && !defined(NVAPI_INTERNAL) && !defined(NVAPI_DEPRECATED_OLD) +#ifndef __nvapi_deprecated_function +#define __nvapi_deprecated_function(message) __declspec(deprecated(message)) +#endif +#ifndef __nvapi_deprecated_datatype +#define __nvapi_deprecated_datatype(FirstRelease) __declspec(deprecated("Do not use this data type - it is deprecated in release " #FirstRelease ".")) +#endif +#else +#ifndef __nvapi_deprecated_function +#define __nvapi_deprecated_function(message) +#endif +#ifndef __nvapi_deprecated_datatype +#define __nvapi_deprecated_datatype(FirstRelease) +#endif +#endif + +#define NV_U8_MAX (+255U) +#define NV_U16_MAX (+65535U) +#define NV_S32_MAX (+2147483647) +#define NV_U32_MIN (0U) +#define NV_U32_MAX (+4294967295U) +#define NV_U64_MAX (+18446744073709551615ULL) + +/* 64-bit types for compilers that support them, plus some obsolete variants */ +#if defined(__GNUC__) || defined(__arm) || defined(__IAR_SYSTEMS_ICC__) || defined(__ghs__) || defined(_WIN64) +typedef unsigned long long NvU64; /* 0 to 18446744073709551615 */ +typedef long long NvS64; /* -9223372036854775808 to 9223372036854775807 */ +#else +typedef unsigned __int64 NvU64; /* 0 to 18446744073709551615 */ +typedef __int64 NvS64; /* -9223372036854775808 to 9223372036854775807 */ +#endif + +#ifndef NVAPI_USE_STDINT +#define NVAPI_USE_STDINT 0 +#endif + +#if NVAPI_USE_STDINT +typedef uint32_t NvV32; /* "void": enumerated or multiple fields */ +typedef uint32_t NvU32; /* 0 to 4294967295 */ +typedef int32_t NvS32; /* -2147483648 to 2147483647 */ + +#else +// mac os 32-bit still needs this +#if (defined(macintosh) || defined(__APPLE__)) && !defined(__LP64__) +typedef signed long NvS32; /* -2147483648 to 2147483647 */ +#else +typedef signed int NvS32; /* -2147483648 to 2147483647 */ +#endif + +#if !((defined(NV_UNIX)) || (defined(__unix))) +// mac os 32-bit still needs this +#if ( (defined(macintosh) && defined(__LP64__) && (__NVAPI_RESERVED0__)) || \ + (!defined(macintosh) && defined(__NVAPI_RESERVED0__)) ) +typedef unsigned int NvU32; /* 0 to 4294967295 */ +#elif defined(__clang__) +typedef unsigned int NvU32; /* 0 to 4294967295 */ +#else +typedef unsigned long NvU32; /* 0 to 4294967295 */ +#endif +#else +typedef unsigned int NvU32; /* 0 to 4294967295 */ +#endif +#endif + +typedef unsigned long temp_NvU32; /* 0 to 4294967295 */ +typedef signed short NvS16; +typedef unsigned short NvU16; +typedef unsigned char NvU8; +typedef signed char NvS8; +typedef float NvF32; +typedef double NvF64; + +/*! + * Macro to convert NvU32 to NvF32. + */ +#define NvU32TONvF32(_pData) *(NvF32 *)(_pData) +/*! + * Macro to convert NvF32 to NvU32. + */ +#define NvF32TONvU32(_pData) *(NvU32 *)(_pData) + +#define NVAPI_SDK_VERSION 58087 +/* Boolean type */ +typedef NvU8 NvBool; +#define NV_TRUE ((NvBool)(0 == 0)) +#define NV_FALSE ((NvBool)(0 != 0)) + +typedef struct _NV_RECT +{ + NvU32 left; + NvU32 top; + NvU32 right; + NvU32 bottom; +} NV_RECT; + + +#define NV_DECLARE_HANDLE(name) struct name##__ { int unused; }; typedef struct name##__ *name + +//! \addtogroup nvapihandles +//! NVAPI Handles - These handles are retrieved from various calls and passed in to others in NvAPI +//! These are meant to be opaque types. Do not assume they correspond to indices, HDCs, +//! display indexes or anything else. +//! +//! Most handles remain valid until a display re-configuration (display mode set) or GPU +//! reconfiguration (going into or out of SLI modes) occurs. If NVAPI_HANDLE_INVALIDATED +//! is received by an app, it should discard all handles, and re-enumerate them. +//! @{ +NV_DECLARE_HANDLE(NvLogicalGpuHandle); //!< One or more physical GPUs acting in concert (SLI) +NV_DECLARE_HANDLE(NvPhysicalGpuHandle); //!< A single physical GPU +NV_DECLARE_HANDLE(NvDisplayHandle); //!< Display Device driven by NVIDIA GPU(s) (an attached display) +NV_DECLARE_HANDLE(NvMonitorHandle); //!< Monitor handle +NV_DECLARE_HANDLE(NvUnAttachedDisplayHandle); //!< Unattached Display Device driven by NVIDIA GPU(s) +NV_DECLARE_HANDLE(NvVisualComputingDeviceHandle); //!< A handle to a Visual Computing Device +NV_DECLARE_HANDLE(NvEventHandle); //!< A handle to an event registration instance + + +NV_DECLARE_HANDLE(NvHICHandle); //!< A handle to a Host Interface Card +NV_DECLARE_HANDLE(NvGSyncDeviceHandle); //!< A handle to a Sync device +NV_DECLARE_HANDLE(NvVioHandle); //!< A handle to an SDI device +NV_DECLARE_HANDLE(NvTransitionHandle); //!< A handle to address a single transition request +NV_DECLARE_HANDLE(NvAudioHandle); //!< NVIDIA HD Audio Device +NV_DECLARE_HANDLE(Nv3DVPContextHandle); //!< A handle for a 3D Vision Pro (3DVP) context +NV_DECLARE_HANDLE(Nv3DVPTransceiverHandle); //!< A handle for a 3DVP RF transceiver +NV_DECLARE_HANDLE(Nv3DVPGlassesHandle); //!< A handle for a pair of 3DVP RF shutter glasses +NV_DECLARE_HANDLE(NvPcfClientHandle); //!< A handle for NVPCF clients + +typedef void* StereoHandle; //!< A stereo handle, that corresponds to the device interface + +NV_DECLARE_HANDLE(NvSourceHandle); //!< Unique source handle on the system +NV_DECLARE_HANDLE(NvTargetHandle); //!< Unique target handle on the system +NV_DECLARE_HANDLE(NVDX_SwapChainHandle); //!< DirectX SwapChain objects +static const NVDX_SwapChainHandle NVDX_SWAPCHAIN_NONE = 0; +NV_DECLARE_HANDLE(NvPresentBarrierClientHandle); //!< PresentBarrier client object +//! @} + +//! \ingroup nvapihandles +//! @{ +#define NVAPI_DEFAULT_HANDLE 0 +#define NV_BIT(x) (1 << (x)) +//! @} + + + +//! \addtogroup nvapitypes +//! @{ +#define NVAPI_GENERIC_STRING_MAX 4096 +#define NVAPI_LONG_STRING_MAX 256 +#define NVAPI_SHORT_STRING_MAX 64 + +typedef struct +{ + NvS32 sX; + NvS32 sY; + NvS32 sWidth; + NvS32 sHeight; +} NvSBox; + +#ifndef NvGUID_Defined +#define NvGUID_Defined + +typedef struct +{ + NvU32 data1; + NvU16 data2; + NvU16 data3; + NvU8 data4[8]; +} NvGUID, NvLUID; + + +#endif //#ifndef NvGUID_Defined +#define NVAPI_MAX_PHYSICAL_GPUS 64 + + +#define NVAPI_MAX_PHYSICAL_BRIDGES 100 +#define NVAPI_PHYSICAL_GPUS 32 +#define NVAPI_MAX_LOGICAL_GPUS 64 +#define NVAPI_MAX_AVAILABLE_GPU_TOPOLOGIES 256 +#define NVAPI_MAX_AVAILABLE_SLI_GROUPS 256 +#define NVAPI_MAX_GPU_TOPOLOGIES NVAPI_MAX_PHYSICAL_GPUS +#define NVAPI_MAX_GPU_PER_TOPOLOGY 8 +#define NVAPI_MAX_DISPLAY_HEADS 2 +#define NVAPI_ADVANCED_DISPLAY_HEADS 4 +#define NVAPI_MAX_DISPLAYS NVAPI_PHYSICAL_GPUS * NVAPI_ADVANCED_DISPLAY_HEADS +#define NVAPI_MAX_ACPI_IDS 16 +#define NVAPI_MAX_VIEW_MODES 8 + + +#define NVAPI_SYSTEM_MAX_HWBCS 128 +#define NVAPI_SYSTEM_HWBC_INVALID_ID 0xffffffff + +#define NVAPI_SYSTEM_MAX_DISPLAYS NVAPI_MAX_PHYSICAL_GPUS * NV_MAX_HEADS +#define NV_MAX_HEADS 4 //!< Maximum heads, each with NVAPI_DESKTOP_RES resolution +#define NVAPI_MAX_HEADS_PER_GPU 32 +#define NV_MAX_VID_STREAMS 4 //!< Maximum number of input video streams, each with a #NVAPI_VIDEO_SRC_INFO +#define NV_MAX_VID_STREAMS_EX 20 //!< Increasing MAX no. of input video streams, each with a #NVAPI_VIDEO_SRC_INFO +#define NV_MAX_VID_PROFILES 4 //!< Maximum number of output video profiles supported + +#define NVAPI_MAX_AUDIO_DEVICES 16 + + +typedef char NvAPI_String[NVAPI_GENERIC_STRING_MAX]; +typedef char NvAPI_LongString[NVAPI_LONG_STRING_MAX]; +typedef char NvAPI_ShortString[NVAPI_SHORT_STRING_MAX]; +typedef NvU16 NvAPI_UnicodeShortString[NVAPI_SHORT_STRING_MAX]; +//! @} + + +// ========================================================================================= +//! NvAPI Version Definition \n +//! Maintain per structure specific version define using the MAKE_NVAPI_VERSION macro. \n +//! Usage: #define NV_GENLOCK_STATUS_VER MAKE_NVAPI_VERSION(NV_GENLOCK_STATUS, 1) +//! \ingroup nvapitypes +// ========================================================================================= +#define MAKE_NVAPI_VERSION(typeName,ver) (NvU32)(sizeof(typeName) | ((ver)<<16)) + +//! \ingroup nvapitypes +#define GET_NVAPI_VERSION(ver) (NvU32)((ver)>>16) + +//! \ingroup nvapitypes +#define GET_NVAPI_SIZE(ver) (NvU32)((ver) & 0xffff) + + +// ==================================================== +//! NvAPI Status Values +//! All NvAPI functions return one of these codes. +//! \ingroup nvapistatus +// ==================================================== + + +typedef enum _NvAPI_Status +{ + NVAPI_OK = 0, //!< Success. Request is completed. + NVAPI_ERROR = -1, //!< Generic error + NVAPI_LIBRARY_NOT_FOUND = -2, //!< NVAPI support library cannot be loaded. + NVAPI_NO_IMPLEMENTATION = -3, //!< not implemented in current driver installation + NVAPI_API_NOT_INITIALIZED = -4, //!< NvAPI_Initialize has not been called (successfully) + NVAPI_INVALID_ARGUMENT = -5, //!< The argument/parameter value is not valid or NULL. + NVAPI_NVIDIA_DEVICE_NOT_FOUND = -6, //!< No NVIDIA display driver, or NVIDIA GPU driving a display, was found. + NVAPI_END_ENUMERATION = -7, //!< No more items to enumerate + NVAPI_INVALID_HANDLE = -8, //!< Invalid handle + NVAPI_INCOMPATIBLE_STRUCT_VERSION = -9, //!< An argument's structure version is not supported + NVAPI_HANDLE_INVALIDATED = -10, //!< The handle is no longer valid (likely due to GPU or display re-configuration) + NVAPI_OPENGL_CONTEXT_NOT_CURRENT = -11, //!< No NVIDIA OpenGL context is current (but needs to be) + NVAPI_INVALID_POINTER = -14, //!< An invalid pointer, usually NULL, was passed as a parameter + NVAPI_NO_GL_EXPERT = -12, //!< OpenGL Expert is not supported by the current drivers + NVAPI_INSTRUMENTATION_DISABLED = -13, //!< OpenGL Expert is supported, but driver instrumentation is currently disabled + NVAPI_NO_GL_NSIGHT = -15, //!< OpenGL does not support Nsight + + NVAPI_EXPECTED_LOGICAL_GPU_HANDLE = -100, //!< Expected a logical GPU handle for one or more parameters + NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE = -101, //!< Expected a physical GPU handle for one or more parameters + NVAPI_EXPECTED_DISPLAY_HANDLE = -102, //!< Expected an NV display handle for one or more parameters + NVAPI_INVALID_COMBINATION = -103, //!< The combination of parameters is not valid. + NVAPI_NOT_SUPPORTED = -104, //!< Requested feature is not supported in the selected GPU + NVAPI_PORTID_NOT_FOUND = -105, //!< No port ID was found for the I2C transaction + NVAPI_EXPECTED_UNATTACHED_DISPLAY_HANDLE = -106, //!< Expected an unattached display handle as one of the input parameters. + NVAPI_INVALID_PERF_LEVEL = -107, //!< Invalid perf level + NVAPI_DEVICE_BUSY = -108, //!< Device is busy; request not fulfilled + NVAPI_NV_PERSIST_FILE_NOT_FOUND = -109, //!< NV persist file is not found + NVAPI_PERSIST_DATA_NOT_FOUND = -110, //!< NV persist data is not found + NVAPI_EXPECTED_TV_DISPLAY = -111, //!< Expected a TV output display + NVAPI_EXPECTED_TV_DISPLAY_ON_DCONNECTOR = -112, //!< Expected a TV output on the D Connector - HDTV_EIAJ4120. + NVAPI_NO_ACTIVE_SLI_TOPOLOGY = -113, //!< SLI is not active on this device. + NVAPI_SLI_RENDERING_MODE_NOTALLOWED = -114, //!< Setup of SLI rendering mode is not possible right now. + NVAPI_EXPECTED_DIGITAL_FLAT_PANEL = -115, //!< Expected a digital flat panel. + NVAPI_ARGUMENT_EXCEED_MAX_SIZE = -116, //!< Argument exceeds the expected size. + NVAPI_DEVICE_SWITCHING_NOT_ALLOWED = -117, //!< Inhibit is ON due to one of the flags in NV_GPU_DISPLAY_CHANGE_INHIBIT or SLI active. + NVAPI_TESTING_CLOCKS_NOT_SUPPORTED = -118, //!< Testing of clocks is not supported. + NVAPI_UNKNOWN_UNDERSCAN_CONFIG = -119, //!< The specified underscan config is from an unknown source (e.g. INF) + NVAPI_TIMEOUT_RECONFIGURING_GPU_TOPO = -120, //!< Timeout while reconfiguring GPUs + NVAPI_DATA_NOT_FOUND = -121, //!< Requested data was not found + NVAPI_EXPECTED_ANALOG_DISPLAY = -122, //!< Expected an analog display + NVAPI_NO_VIDLINK = -123, //!< No SLI video bridge is present + NVAPI_REQUIRES_REBOOT = -124, //!< NVAPI requires a reboot for the settings to take effect + NVAPI_INVALID_HYBRID_MODE = -125, //!< The function is not supported with the current Hybrid mode. + NVAPI_MIXED_TARGET_TYPES = -126, //!< The target types are not all the same + NVAPI_SYSWOW64_NOT_SUPPORTED = -127, //!< The function is not supported from 32-bit on a 64-bit system. + NVAPI_IMPLICIT_SET_GPU_TOPOLOGY_CHANGE_NOT_ALLOWED = -128, //!< There is no implicit GPU topology active. Use NVAPI_SetHybridMode to change topology. + NVAPI_REQUEST_USER_TO_CLOSE_NON_MIGRATABLE_APPS = -129, //!< Prompt the user to close all non-migratable applications. + NVAPI_OUT_OF_MEMORY = -130, //!< Could not allocate sufficient memory to complete the call. + NVAPI_WAS_STILL_DRAWING = -131, //!< The previous operation that is transferring information to or from this surface is incomplete. + NVAPI_FILE_NOT_FOUND = -132, //!< The file was not found. + NVAPI_TOO_MANY_UNIQUE_STATE_OBJECTS = -133, //!< There are too many unique instances of a particular type of state object. + NVAPI_INVALID_CALL = -134, //!< The method call is invalid. For example, a method's parameter may not be a valid pointer. + NVAPI_D3D10_1_LIBRARY_NOT_FOUND = -135, //!< d3d10_1.dll cannot be loaded. + NVAPI_FUNCTION_NOT_FOUND = -136, //!< Couldn't find the function in the loaded DLL. + NVAPI_INVALID_USER_PRIVILEGE = -137, //!< The application will require Administrator privileges to access this API. + //!< The application can be elevated to a higher permission level by selecting "Run as Administrator". + NVAPI_EXPECTED_NON_PRIMARY_DISPLAY_HANDLE = -138, //!< The handle corresponds to GDIPrimary. + NVAPI_EXPECTED_COMPUTE_GPU_HANDLE = -139, //!< Setting Physx GPU requires that the GPU is compute-capable. + NVAPI_STEREO_NOT_INITIALIZED = -140, //!< The Stereo part of NVAPI failed to initialize completely. Check if the stereo driver is installed. + NVAPI_STEREO_REGISTRY_ACCESS_FAILED = -141, //!< Access to stereo-related registry keys or values has failed. + NVAPI_STEREO_REGISTRY_PROFILE_TYPE_NOT_SUPPORTED = -142, //!< The given registry profile type is not supported. + NVAPI_STEREO_REGISTRY_VALUE_NOT_SUPPORTED = -143, //!< The given registry value is not supported. + NVAPI_STEREO_NOT_ENABLED = -144, //!< Stereo is not enabled and the function needed it to execute completely. + NVAPI_STEREO_NOT_TURNED_ON = -145, //!< Stereo is not turned on and the function needed it to execute completely. + NVAPI_STEREO_INVALID_DEVICE_INTERFACE = -146, //!< Invalid device interface. + NVAPI_STEREO_PARAMETER_OUT_OF_RANGE = -147, //!< Separation percentage or JPEG image capture quality is out of [0-100] range. + NVAPI_STEREO_FRUSTUM_ADJUST_MODE_NOT_SUPPORTED = -148, //!< The given frustum adjust mode is not supported. + NVAPI_TOPO_NOT_POSSIBLE = -149, //!< The mosaic topology is not possible given the current state of the hardware. + NVAPI_MODE_CHANGE_FAILED = -150, //!< An attempt to do a display resolution mode change has failed. + NVAPI_D3D11_LIBRARY_NOT_FOUND = -151, //!< d3d11.dll/d3d11_beta.dll cannot be loaded. + NVAPI_INVALID_ADDRESS = -152, //!< Address is outside of valid range. + NVAPI_STRING_TOO_SMALL = -153, //!< The pre-allocated string is too small to hold the result. + NVAPI_MATCHING_DEVICE_NOT_FOUND = -154, //!< The input does not match any of the available devices. + NVAPI_DRIVER_RUNNING = -155, //!< Driver is running. + NVAPI_DRIVER_NOTRUNNING = -156, //!< Driver is not running. + NVAPI_ERROR_DRIVER_RELOAD_REQUIRED = -157, //!< A driver reload is required to apply these settings. + NVAPI_SET_NOT_ALLOWED = -158, //!< Intended setting is not allowed. + NVAPI_ADVANCED_DISPLAY_TOPOLOGY_REQUIRED = -159, //!< Information can't be returned due to "advanced display topology". + NVAPI_SETTING_NOT_FOUND = -160, //!< Setting is not found. + NVAPI_SETTING_SIZE_TOO_LARGE = -161, //!< Setting size is too large. + NVAPI_TOO_MANY_SETTINGS_IN_PROFILE = -162, //!< There are too many settings for a profile. + NVAPI_PROFILE_NOT_FOUND = -163, //!< Profile is not found. + NVAPI_PROFILE_NAME_IN_USE = -164, //!< Profile name is duplicated. + NVAPI_PROFILE_NAME_EMPTY = -165, //!< Profile name is empty. + NVAPI_EXECUTABLE_NOT_FOUND = -166, //!< Application not found in the Profile. + NVAPI_EXECUTABLE_ALREADY_IN_USE = -167, //!< Application already exists in the other profile. + NVAPI_DATATYPE_MISMATCH = -168, //!< Data Type mismatch + NVAPI_PROFILE_REMOVED = -169, //!< The profile passed as parameter has been removed and is no longer valid. + NVAPI_UNREGISTERED_RESOURCE = -170, //!< An unregistered resource was passed as a parameter. + NVAPI_ID_OUT_OF_RANGE = -171, //!< The DisplayId corresponds to a display which is not within the normal outputId range. + NVAPI_DISPLAYCONFIG_VALIDATION_FAILED = -172, //!< Display topology is not valid so the driver cannot do a mode set on this configuration. + NVAPI_DPMST_CHANGED = -173, //!< Display Port Multi-Stream topology has been changed. + NVAPI_INSUFFICIENT_BUFFER = -174, //!< Input buffer is insufficient to hold the contents. + NVAPI_ACCESS_DENIED = -175, //!< No access to the caller. + NVAPI_MOSAIC_NOT_ACTIVE = -176, //!< The requested action cannot be performed without Mosaic being enabled. + NVAPI_SHARE_RESOURCE_RELOCATED = -177, //!< The surface is relocated away from video memory. + NVAPI_REQUEST_USER_TO_DISABLE_DWM = -178, //!< The user should disable DWM before calling NvAPI. + NVAPI_D3D_DEVICE_LOST = -179, //!< D3D device status is D3DERR_DEVICELOST or D3DERR_DEVICENOTRESET - the user has to reset the device. + NVAPI_INVALID_CONFIGURATION = -180, //!< The requested action cannot be performed in the current state. + NVAPI_STEREO_HANDSHAKE_NOT_DONE = -181, //!< Call failed as stereo handshake not completed. + NVAPI_EXECUTABLE_PATH_IS_AMBIGUOUS = -182, //!< The path provided was too short to determine the correct NVDRS_APPLICATION + NVAPI_DEFAULT_STEREO_PROFILE_IS_NOT_DEFINED = -183, //!< Default stereo profile is not currently defined + NVAPI_DEFAULT_STEREO_PROFILE_DOES_NOT_EXIST = -184, //!< Default stereo profile does not exist + NVAPI_CLUSTER_ALREADY_EXISTS = -185, //!< A cluster is already defined with the given configuration. + NVAPI_DPMST_DISPLAY_ID_EXPECTED = -186, //!< The input display id is not that of a multi stream enabled connector or a display device in a multi stream topology + NVAPI_INVALID_DISPLAY_ID = -187, //!< The input display id is not valid or the monitor associated to it does not support the current operation + NVAPI_STREAM_IS_OUT_OF_SYNC = -188, //!< While playing secure audio stream, stream goes out of sync + NVAPI_INCOMPATIBLE_AUDIO_DRIVER = -189, //!< Older audio driver version than required + NVAPI_VALUE_ALREADY_SET = -190, //!< Value already set, setting again not allowed. + NVAPI_TIMEOUT = -191, //!< Requested operation timed out + NVAPI_GPU_WORKSTATION_FEATURE_INCOMPLETE = -192, //!< The requested workstation feature set has incomplete driver internal allocation resources + NVAPI_STEREO_INIT_ACTIVATION_NOT_DONE = -193, //!< Call failed because InitActivation was not called. + NVAPI_SYNC_NOT_ACTIVE = -194, //!< The requested action cannot be performed without Sync being enabled. + NVAPI_SYNC_MASTER_NOT_FOUND = -195, //!< The requested action cannot be performed without Sync Master being enabled. + NVAPI_INVALID_SYNC_TOPOLOGY = -196, //!< Invalid displays passed in the NV_GSYNC_DISPLAY pointer. + NVAPI_ECID_SIGN_ALGO_UNSUPPORTED = -197, //!< The specified signing algorithm is not supported. Either an incorrect value was entered or the current installed driver/hardware does not support the input value. + NVAPI_ECID_KEY_VERIFICATION_FAILED = -198, //!< The encrypted public key verification has failed. + NVAPI_FIRMWARE_OUT_OF_DATE = -199, //!< The device's firmware is out of date. + NVAPI_FIRMWARE_REVISION_NOT_SUPPORTED = -200, //!< The device's firmware is not supported. + NVAPI_LICENSE_CALLER_AUTHENTICATION_FAILED = -201, //!< The caller is not authorized to modify the License. + NVAPI_D3D_DEVICE_NOT_REGISTERED = -202, //!< The user tried to use a deferred context without registering the device first + NVAPI_RESOURCE_NOT_ACQUIRED = -203, //!< Head or SourceId was not reserved for the VR Display before doing the Modeset or the dedicated display. + NVAPI_TIMING_NOT_SUPPORTED = -204, //!< Provided timing is not supported. + NVAPI_HDCP_ENCRYPTION_FAILED = -205, //!< HDCP Encryption Failed for the device. Would be applicable when the device is HDCP Capable. + NVAPI_PCLK_LIMITATION_FAILED = -206, //!< Provided mode is over sink device pclk limitation. + NVAPI_NO_CONNECTOR_FOUND = -207, //!< No connector on GPU found. + NVAPI_HDCP_DISABLED = -208, //!< When a non-HDCP capable HMD is connected, we would inform user by this code. + NVAPI_API_IN_USE = -209, //!< Atleast an API is still being called + NVAPI_NVIDIA_DISPLAY_NOT_FOUND = -210, //!< No display found on Nvidia GPU(s). + NVAPI_PRIV_SEC_VIOLATION = -211, //!< Priv security violation, improper access to a secured register. + NVAPI_INCORRECT_VENDOR = -212, //!< NVAPI cannot be called by this vendor + NVAPI_DISPLAY_IN_USE = -213, //!< DirectMode Display is already in use + NVAPI_UNSUPPORTED_CONFIG_NON_HDCP_HMD = -214, //!< The Config is having Non-NVidia GPU with Non-HDCP HMD connected + NVAPI_MAX_DISPLAY_LIMIT_REACHED = -215, //!< GPU's Max Display Limit has Reached + NVAPI_INVALID_DIRECT_MODE_DISPLAY = -216, //!< DirectMode not Enabled on the Display + NVAPI_GPU_IN_DEBUG_MODE = -217, //!< GPU is in debug mode, OC is NOT allowed. + NVAPI_D3D_CONTEXT_NOT_FOUND = -218, //!< No NvAPI context was found for this D3D object + NVAPI_STEREO_VERSION_MISMATCH = -219, //!< there is version mismatch between stereo driver and dx driver + NVAPI_GPU_NOT_POWERED = -220, //!< GPU is not powered and so the request cannot be completed. + NVAPI_ERROR_DRIVER_RELOAD_IN_PROGRESS = -221, //!< The display driver update in progress. + NVAPI_WAIT_FOR_HW_RESOURCE = -222, //!< Wait for HW resources allocation + NVAPI_REQUIRE_FURTHER_HDCP_ACTION = -223, //!< operation requires further HDCP action + NVAPI_DISPLAY_MUX_TRANSITION_FAILED = -224, //!< Dynamic Mux transition failure + NVAPI_INVALID_DSC_VERSION = -225, //!< Invalid DSC version + NVAPI_INVALID_DSC_SLICECOUNT = -226, //!< Invalid DSC slice count + NVAPI_INVALID_DSC_OUTPUT_BPP = -227, //!< Invalid DSC output BPP + NVAPI_FAILED_TO_LOAD_FROM_DRIVER_STORE = -228, //!< There was an error while loading nvapi.dll from the driver store. + NVAPI_NO_VULKAN = -229, //!< OpenGL does not export Vulkan fake extensions + NVAPI_REQUEST_PENDING = -230, //!< A request for NvTOPPs telemetry CData has already been made and is pending a response. + NVAPI_RESOURCE_IN_USE = -231, //!< Operation cannot be performed because the resource is in use. + NVAPI_INVALID_IMAGE = -232, //!< Device kernel image is invalid + NVAPI_INVALID_PTX = -233, //!< PTX JIT compilation failed + NVAPI_NVLINK_UNCORRECTABLE = -234, //!< Uncorrectable NVLink error was detected during the execution + NVAPI_JIT_COMPILER_NOT_FOUND = -235, //!< PTX JIT compiler library was not found. + NVAPI_INVALID_SOURCE = -236, //!< Device kernel source is invalid. + NVAPI_ILLEGAL_INSTRUCTION = -237, //!< While executing a kernel, the device encountered an illegal instruction. + NVAPI_INVALID_PC = -238, //!< While executing a kernel, the device program counter wrapped its address space + NVAPI_LAUNCH_FAILED = -239, //!< An exception occurred on the device while executing a kernel + NVAPI_NOT_PERMITTED = -240, //!< Attempted operation is not permitted. + NVAPI_CALLBACK_ALREADY_REGISTERED = -241, //!< The callback function has already been registered. + NVAPI_CALLBACK_NOT_FOUND = -242, //!< The callback function is not found or not registered. + NVAPI_INVALID_OUTPUT_WIRE_FORMAT = -243, //!< Invalid Wire Format for the VR HMD +} NvAPI_Status; + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_SYS_GetDriverAndBranchVersion +// +//! DESCRIPTION: This API returns display driver version and driver-branch string. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [out] pDriverVersion Contains the driver version after successful return. +//! \param [out] szBuildBranchString Contains the driver-branch string after successful return. +//! +//! \retval ::NVAPI_INVALID_ARGUMENT: either pDriverVersion is NULL or enum index too big +//! \retval ::NVAPI_OK - completed request +//! \retval ::NVAPI_API_NOT_INTIALIZED - NVAPI not initialized +//! \retval ::NVAPI_ERROR - miscellaneous error occurred +//! +//! \ingroup driverapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_SYS_GetDriverAndBranchVersion(NvU32* pDriverVersion, NvAPI_ShortString szBuildBranchString); +//! \ingroup driverapi +//! Used in NvAPI_GPU_GetMemoryInfo(). +typedef struct +{ + NvU32 version; //!< Version info + NvU32 dedicatedVideoMemory; //!< Size(in kb) of the physical framebuffer. + NvU32 availableDedicatedVideoMemory; //!< Size(in kb) of the available physical framebuffer for allocating video memory surfaces. + NvU32 systemVideoMemory; //!< Size(in kb) of system memory the driver allocates at load time. + NvU32 sharedSystemMemory; //!< Size(in kb) of shared system memory that driver is allowed to commit for surfaces across all allocations. + +} NV_DISPLAY_DRIVER_MEMORY_INFO_V1; + + +//! \ingroup driverapi +//! Used in NvAPI_GPU_GetMemoryInfo(). +typedef struct +{ + NvU32 version; //!< Version info + NvU32 dedicatedVideoMemory; //!< Size(in kb) of the physical framebuffer. + NvU32 availableDedicatedVideoMemory; //!< Size(in kb) of the available physical framebuffer for allocating video memory surfaces. + NvU32 systemVideoMemory; //!< Size(in kb) of system memory the driver allocates at load time. + NvU32 sharedSystemMemory; //!< Size(in kb) of shared system memory that driver is allowed to commit for surfaces across all allocations. + NvU32 curAvailableDedicatedVideoMemory; //!< Size(in kb) of the current available physical framebuffer for allocating video memory surfaces. + +} NV_DISPLAY_DRIVER_MEMORY_INFO_V2; + +//! \ingroup driverapi +//! Used in NvAPI_GPU_GetMemoryInfo(). +typedef struct +{ + NvU32 version; //!< Version info + NvU32 dedicatedVideoMemory; //!< Size(in kb) of the physical framebuffer. + NvU32 availableDedicatedVideoMemory; //!< Size(in kb) of the available physical framebuffer for allocating video memory surfaces. + NvU32 systemVideoMemory; //!< Size(in kb) of system memory the driver allocates at load time. + NvU32 sharedSystemMemory; //!< Size(in kb) of shared system memory that driver is allowed to commit for surfaces across all allocations. + NvU32 curAvailableDedicatedVideoMemory; //!< Size(in kb) of the current available physical framebuffer for allocating video memory surfaces. + NvU32 dedicatedVideoMemoryEvictionsSize; //!< Size(in kb) of the total size of memory released as a result of the evictions. + NvU32 dedicatedVideoMemoryEvictionCount; //!< Indicates the number of eviction events that caused an allocation to be removed from dedicated video memory to free GPU + //!< video memory to make room for other allocations. +} NV_DISPLAY_DRIVER_MEMORY_INFO_V3; + +//! \ingroup driverapi +typedef NV_DISPLAY_DRIVER_MEMORY_INFO_V3 NV_DISPLAY_DRIVER_MEMORY_INFO; + +//! \ingroup driverapi +//! Macro for constructing the version field of NV_DISPLAY_DRIVER_MEMORY_INFO_V1 +#define NV_DISPLAY_DRIVER_MEMORY_INFO_VER_1 MAKE_NVAPI_VERSION(NV_DISPLAY_DRIVER_MEMORY_INFO_V1,1) + +//! \ingroup driverapi +//! Macro for constructing the version field of NV_DISPLAY_DRIVER_MEMORY_INFO_V2 +#define NV_DISPLAY_DRIVER_MEMORY_INFO_VER_2 MAKE_NVAPI_VERSION(NV_DISPLAY_DRIVER_MEMORY_INFO_V2,2) + +//! \ingroup driverapi +//! Macro for constructing the version field of NV_DISPLAY_DRIVER_MEMORY_INFO_V3 +#define NV_DISPLAY_DRIVER_MEMORY_INFO_VER_3 MAKE_NVAPI_VERSION(NV_DISPLAY_DRIVER_MEMORY_INFO_V3,3) + +//! \ingroup driverapi +#define NV_DISPLAY_DRIVER_MEMORY_INFO_VER NV_DISPLAY_DRIVER_MEMORY_INFO_VER_3 + + + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetMemoryInfo +// +//! DESCRIPTION: This function retrieves the available driver memory footprint for the specified GPU. +//! If the GPU is in TCC Mode, only dedicatedVideoMemory will be returned in pMemoryInfo (NV_DISPLAY_DRIVER_MEMORY_INFO). +//! +//! \deprecated Do not use this function - it is deprecated in release 520. Instead, use NvAPI_GPU_GetMemoryInfoEx. +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! \since Release: 177 +//! +//! \param [in] hPhysicalGpu Handle of the physical GPU for which the memory information is to be extracted. +//! \param [out] pMemoryInfo The memory footprint available in the driver. See NV_DISPLAY_DRIVER_MEMORY_INFO. +//! +//! \retval NVAPI_INVALID_ARGUMENT pMemoryInfo is NULL. +//! \retval NVAPI_OK Call successful. +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found. +//! \retval NVAPI_INCOMPATIBLE_STRUCT_VERSION NV_DISPLAY_DRIVER_MEMORY_INFO structure version mismatch. +//! +//! \ingroup driverapi +/////////////////////////////////////////////////////////////////////////////// +__nvapi_deprecated_function("Do not use this function - it is deprecated in release 520. Instead, use NvAPI_GPU_GetMemoryInfoEx.") +NVAPI_INTERFACE NvAPI_GPU_GetMemoryInfo(NvPhysicalGpuHandle hPhysicalGpu, NV_DISPLAY_DRIVER_MEMORY_INFO *pMemoryInfo); + + +//! \ingroup driverapi +//! Used in NvAPI_GPU_GetMemoryInfoEx(). +typedef struct +{ + NvU32 version; //!< Structure version + NvU64 dedicatedVideoMemory; //!< Size(in bytes) of the physical framebuffer. Refers to the dedicated video memory on discrete GPUs. + //! It is more performant for GPU operations than the reserved systemVideoMemory. + NvU64 availableDedicatedVideoMemory; //!< Size(in bytes) of the available physical framebuffer for allocating video memory surfaces. + NvU64 systemVideoMemory; //!< Size(in bytes) of system memory the driver allocates at load time. It is a substitute for dedicated video memory. + //!< Typically used with integrated GPUs that do not have dedicated video memory. + NvU64 sharedSystemMemory; //!< Size(in bytes) of shared system memory that driver is allowed to commit for surfaces across all allocations. + //!< On discrete GPUs, it is used to utilize system memory for various operations. It does not need to be reserved during boot. + //!< It may be used by both GPU and CPU, and has an "on-demand" type of usage. + NvU64 curAvailableDedicatedVideoMemory; //!< Size(in bytes) of the current available physical framebuffer for allocating video memory surfaces. + NvU64 dedicatedVideoMemoryEvictionsSize; //!< Size(in bytes) of the total size of memory released as a result of the evictions. + NvU64 dedicatedVideoMemoryEvictionCount; //!< Indicates the number of eviction events that caused an allocation to be removed from dedicated video memory to free GPU + //!< video memory to make room for other allocations. + NvU64 dedicatedVideoMemoryPromotionsSize; //!< Size(in bytes) of the total size of memory allocated as a result of the promotions. + NvU64 dedicatedVideoMemoryPromotionCount; //!< Indicates the number of promotion events that caused an allocation to be promoted to dedicated video memory +} NV_GPU_MEMORY_INFO_EX_V1; + +//! \ingroup driverapi +typedef NV_GPU_MEMORY_INFO_EX_V1 NV_GPU_MEMORY_INFO_EX; + +//! \ingroup driverapi +//! Macro for constructing the version field of NV_GPU_MEMORY_INFO_EX_V1 +#define NV_GPU_MEMORY_INFO_EX_VER_1 MAKE_NVAPI_VERSION(NV_GPU_MEMORY_INFO_EX_V1,1) + +//! \ingroup driverapi +#define NV_GPU_MEMORY_INFO_EX_VER NV_GPU_MEMORY_INFO_EX_VER_1 + + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetMemoryInfoEx +// +//! DESCRIPTION: This function retrieves the available driver memory footprint for the specified GPU. +//! If the GPU is in TCC Mode, only dedicatedVideoMemory will be returned in pMemoryInfo (NV_GPU_MEMORY_INFO_EX). +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! MCDM_SUPPORTED +//! +//! \since Release: 520 +//! +//! \param [in] hPhysicalGpu Handle of the physical GPU for which the memory information is to be extracted. +//! \param [out] pMemoryInfo The memory footprint available in the driver. See NV_GPU_MEMORY_INFO_EX. +//! +//! \retval NVAPI_INVALID_ARGUMENT pMemoryInfo is NULL. +//! \retval NVAPI_OK Call successful. +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found. +//! \retval NVAPI_INCOMPATIBLE_STRUCT_VERSION NV_GPU_MEMORY_INFO_EX structure version mismatch. +//! +//! \ingroup driverapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetMemoryInfoEx(NvPhysicalGpuHandle hPhysicalGpu, NV_GPU_MEMORY_INFO_EX *pMemoryInfo); +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_EnumPhysicalGPUs +// +//! This function returns an array of physical GPU handles. +//! Each handle represents a physical GPU present in the system. +//! That GPU may be part of an SLI configuration, or may not be visible to the OS directly. +//! +//! At least one GPU must be present in the system and running an NVIDIA display driver. +//! +//! The array nvGPUHandle will be filled with physical GPU handle values. The returned +//! gpuCount determines how many entries in the array are valid. +//! +//! \note In drivers older than 105.00, all physical GPU handles get invalidated on a +//! modeset. So the calling applications need to renum the handles after every modeset.\n +//! With drivers 105.00 and up, all physical GPU handles are constant. +//! Physical GPU handles are constant as long as the GPUs are not physically moved and +//! the SBIOS VGA order is unchanged. +//! +//! For GPU handles in TCC MODE please use NvAPI_EnumTCCPhysicalGPUs() +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \par Introduced in +//! \since Release: 80 +//! +//! \retval NVAPI_INVALID_ARGUMENT nvGPUHandle or pGpuCount is NULL +//! \retval NVAPI_OK One or more handles were returned +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_EnumPhysicalGPUs(NvPhysicalGpuHandle nvGPUHandle[NVAPI_MAX_PHYSICAL_GPUS], NvU32 *pGpuCount); +#if defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d11_h__) || defined(__d3d12_h__) + +NV_DECLARE_HANDLE(NVDX_ObjectHandle); // DX Objects +static const NVDX_ObjectHandle NVDX_OBJECT_NONE = 0; + +#endif //if defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d11_h__) || defined(__d3d12_h__) +#if defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d11_h__) || defined(__d3d12_h__) + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D_GetObjectHandleForResource +// +//! DESCRIPTION: This API gets a handle to a resource. +//! +//! \param [in] pDev The ID3D11Device, ID3D10Device or IDirect3DDevice9 or ID3D11DeviceContext to use +//! \param [in] pResource The ID3D11Resource, ID3D10Resource or IDirect3DResource9 from which +//! we want the NvAPI handle +//! \param [out] pHandle A handle to the resource +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 185 +//! +//! \return ::NVAPI_OK if the handle was populated. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D_GetObjectHandleForResource( + IUnknown *pDevice, + IUnknown *pResource, + NVDX_ObjectHandle *pHandle); + + +#endif //if defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d11_h__) || defined(__d3d12_h__) + +#include"nvapi_lite_salend.h" +#ifdef __cplusplus +} +#endif +#pragma pack(pop) diff --git a/Source/ThirdParty/nvapi/nvapi_lite_d3dext.h b/Source/ThirdParty/nvapi/nvapi_lite_d3dext.h new file mode 100644 index 000000000..416d3a51d --- /dev/null +++ b/Source/ThirdParty/nvapi/nvapi_lite_d3dext.h @@ -0,0 +1,184 @@ +/*********************************************************************************************************\ +|* *| +|* SPDX-FileCopyrightText: Copyright (c) 2019-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. *| +|* SPDX-License-Identifier: MIT *| +|* *| +|* Permission is hereby granted, free of charge, to any person obtaining a *| +|* copy of this software and associated documentation files (the "Software"), *| +|* to deal in the Software without restriction, including without limitation *| +|* the rights to use, copy, modify, merge, publish, distribute, sublicense, *| +|* and/or sell copies of the Software, and to permit persons to whom the *| +|* Software is furnished to do so, subject to the following conditions: *| +|* *| +|* The above copyright notice and this permission notice shall be included in *| +|* all copies or substantial portions of the Software. *| +|* *| +|* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *| +|* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *| +|* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *| +|* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *| +|* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING *| +|* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER *| +|* DEALINGS IN THE SOFTWARE. *| +|* *| +|* *| +\*********************************************************************************************************/ + +#pragma once +#include"nvapi_lite_salstart.h" +#include"nvapi_lite_common.h" +#pragma pack(push,8) +#ifdef __cplusplus +extern "C" { +#endif +#if defined(__cplusplus) && (defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__)) +//! \ingroup dx +//! D3D_FEATURE_LEVEL supported - used in NvAPI_D3D11_CreateDevice() and NvAPI_D3D11_CreateDeviceAndSwapChain() +typedef enum +{ + NVAPI_DEVICE_FEATURE_LEVEL_NULL = -1, + NVAPI_DEVICE_FEATURE_LEVEL_10_0 = 0, + NVAPI_DEVICE_FEATURE_LEVEL_10_0_PLUS = 1, + NVAPI_DEVICE_FEATURE_LEVEL_10_1 = 2, + NVAPI_DEVICE_FEATURE_LEVEL_11_0 = 3, +} NVAPI_DEVICE_FEATURE_LEVEL; + +#endif //defined(__cplusplus) && (defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__)) +#if defined(__cplusplus) && defined(__d3d11_h__) + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_CreateDevice +// +//! DESCRIPTION: This function tries to create a DirectX 11 device. If the call fails (if we are running +//! on pre-DirectX 11 hardware), depending on the type of hardware it will try to create a DirectX 10.1 OR DirectX 10.0+ +//! OR DirectX 10.0 device. The function call is the same as D3D11CreateDevice(), but with an extra +//! argument (D3D_FEATURE_LEVEL supported by the device) that the function fills in. This argument +//! can contain -1 (NVAPI_DEVICE_FEATURE_LEVEL_NULL), if the requested featureLevel is less than DirecX 10.0. +//! +//! NOTE: When NvAPI_D3D11_CreateDevice is called with 10+ feature level we have an issue on few set of +//! tesla hardware (G80/G84/G86/G92/G94/G96) which does not support all feature level 10+ functionality +//! e.g. calling driver with mismatch between RenderTarget and Depth Buffer. App developers should +//! take into consideration such limitation when using NVAPI on such tesla hardwares. +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 185 +//! +//! \param [in] pAdapter +//! \param [in] DriverType +//! \param [in] Software +//! \param [in] Flags +//! \param [in] *pFeatureLevels +//! \param [in] FeatureLevels +//! \param [in] SDKVersion +//! \param [in] **ppDevice +//! \param [in] *pFeatureLevel +//! \param [in] **ppImmediateContext +//! \param [in] *pSupportedLevel D3D_FEATURE_LEVEL supported +//! +//! \return NVAPI_OK if the createDevice call succeeded. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D11_CreateDevice(IDXGIAdapter* pAdapter, + D3D_DRIVER_TYPE DriverType, + HMODULE Software, + UINT Flags, + CONST D3D_FEATURE_LEVEL *pFeatureLevels, + UINT FeatureLevels, + UINT SDKVersion, + ID3D11Device **ppDevice, + D3D_FEATURE_LEVEL *pFeatureLevel, + ID3D11DeviceContext **ppImmediateContext, + NVAPI_DEVICE_FEATURE_LEVEL *pSupportedLevel); + + +#endif //defined(__cplusplus) && defined(__d3d11_h__) +#if defined(__cplusplus) && defined(__d3d11_h__) + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_CreateDeviceAndSwapChain +// +//! DESCRIPTION: This function tries to create a DirectX 11 device and swap chain. If the call fails (if we are +//! running on pre=DirectX 11 hardware), depending on the type of hardware it will try to create a DirectX 10.1 OR +//! DirectX 10.0+ OR DirectX 10.0 device. The function call is the same as D3D11CreateDeviceAndSwapChain, +//! but with an extra argument (D3D_FEATURE_LEVEL supported by the device) that the function fills +//! in. This argument can contain -1 (NVAPI_DEVICE_FEATURE_LEVEL_NULL), if the requested featureLevel +//! is less than DirectX 10.0. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 185 +//! +//! \param [in] pAdapter +//! \param [in] DriverType +//! \param [in] Software +//! \param [in] Flags +//! \param [in] *pFeatureLevels +//! \param [in] FeatureLevels +//! \param [in] SDKVersion +//! \param [in] *pSwapChainDesc +//! \param [in] **ppSwapChain +//! \param [in] **ppDevice +//! \param [in] *pFeatureLevel +//! \param [in] **ppImmediateContext +//! \param [in] *pSupportedLevel D3D_FEATURE_LEVEL supported +//! +//!return NVAPI_OK if the createDevice with swap chain call succeeded. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D11_CreateDeviceAndSwapChain(IDXGIAdapter* pAdapter, + D3D_DRIVER_TYPE DriverType, + HMODULE Software, + UINT Flags, + CONST D3D_FEATURE_LEVEL *pFeatureLevels, + UINT FeatureLevels, + UINT SDKVersion, + CONST DXGI_SWAP_CHAIN_DESC *pSwapChainDesc, + IDXGISwapChain **ppSwapChain, + ID3D11Device **ppDevice, + D3D_FEATURE_LEVEL *pFeatureLevel, + ID3D11DeviceContext **ppImmediateContext, + NVAPI_DEVICE_FEATURE_LEVEL *pSupportedLevel); + + + +#endif //defined(__cplusplus) && defined(__d3d11_h__) +#if defined(__cplusplus) && defined(__d3d11_h__) + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D11_SetDepthBoundsTest +// +//! DESCRIPTION: This function enables/disables the depth bounds test +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] pDeviceOrContext The device or device context to set depth bounds test +//! \param [in] bEnable Enable(non-zero)/disable(zero) the depth bounds test +//! \param [in] fMinDepth The minimum depth for depth bounds test +//! \param [in] fMaxDepth The maximum depth for depth bounds test +//! The valid values for fMinDepth and fMaxDepth +//! are such that 0 <= fMinDepth <= fMaxDepth <= 1 +//! +//! \return ::NVAPI_OK if the depth bounds test was correcly enabled or disabled +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D11_SetDepthBoundsTest(IUnknown* pDeviceOrContext, + NvU32 bEnable, + float fMinDepth, + float fMaxDepth); + +#endif //defined(__cplusplus) && defined(__d3d11_h__) + +#include"nvapi_lite_salend.h" +#ifdef __cplusplus +} +#endif +#pragma pack(pop) diff --git a/Source/ThirdParty/nvapi/nvapi_lite_salend.h b/Source/ThirdParty/nvapi/nvapi_lite_salend.h new file mode 100644 index 000000000..d05c23b0d --- /dev/null +++ b/Source/ThirdParty/nvapi/nvapi_lite_salend.h @@ -0,0 +1,809 @@ +/*********************************************************************************************************\ +|* *| +|* SPDX-FileCopyrightText: Copyright (c) 2019-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. *| +|* SPDX-License-Identifier: MIT *| +|* *| +|* Permission is hereby granted, free of charge, to any person obtaining a *| +|* copy of this software and associated documentation files (the "Software"), *| +|* to deal in the Software without restriction, including without limitation *| +|* the rights to use, copy, modify, merge, publish, distribute, sublicense, *| +|* and/or sell copies of the Software, and to permit persons to whom the *| +|* Software is furnished to do so, subject to the following conditions: *| +|* *| +|* The above copyright notice and this permission notice shall be included in *| +|* all copies or substantial portions of the Software. *| +|* *| +|* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *| +|* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *| +|* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *| +|* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *| +|* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING *| +|* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER *| +|* DEALINGS IN THE SOFTWARE. *| +|* *| +|* *| +\*********************************************************************************************************/ + +#ifndef __NVAPI_EMPTY_SAL +#ifdef __nvapi_undef__ecount + #undef __ecount + #undef __nvapi_undef__ecount +#endif +#ifdef __nvapi_undef__bcount + #undef __bcount + #undef __nvapi_undef__bcount +#endif +#ifdef __nvapi_undef__in + #undef __in + #undef __nvapi_undef__in +#endif +#ifdef __nvapi_undef__in_ecount + #undef __in_ecount + #undef __nvapi_undef__in_ecount +#endif +#ifdef __nvapi_undef__in_bcount + #undef __in_bcount + #undef __nvapi_undef__in_bcount +#endif +#ifdef __nvapi_undef__in_z + #undef __in_z + #undef __nvapi_undef__in_z +#endif +#ifdef __nvapi_undef__in_ecount_z + #undef __in_ecount_z + #undef __nvapi_undef__in_ecount_z +#endif +#ifdef __nvapi_undef__in_bcount_z + #undef __in_bcount_z + #undef __nvapi_undef__in_bcount_z +#endif +#ifdef __nvapi_undef__in_nz + #undef __in_nz + #undef __nvapi_undef__in_nz +#endif +#ifdef __nvapi_undef__in_ecount_nz + #undef __in_ecount_nz + #undef __nvapi_undef__in_ecount_nz +#endif +#ifdef __nvapi_undef__in_bcount_nz + #undef __in_bcount_nz + #undef __nvapi_undef__in_bcount_nz +#endif +#ifdef __nvapi_undef__out + #undef __out + #undef __nvapi_undef__out +#endif +#ifdef __nvapi_undef__out_ecount + #undef __out_ecount + #undef __nvapi_undef__out_ecount +#endif +#ifdef __nvapi_undef__out_bcount + #undef __out_bcount + #undef __nvapi_undef__out_bcount +#endif +#ifdef __nvapi_undef__out_ecount_part + #undef __out_ecount_part + #undef __nvapi_undef__out_ecount_part +#endif +#ifdef __nvapi_undef__out_bcount_part + #undef __out_bcount_part + #undef __nvapi_undef__out_bcount_part +#endif +#ifdef __nvapi_undef__out_ecount_full + #undef __out_ecount_full + #undef __nvapi_undef__out_ecount_full +#endif +#ifdef __nvapi_undef__out_bcount_full + #undef __out_bcount_full + #undef __nvapi_undef__out_bcount_full +#endif +#ifdef __nvapi_undef__out_z + #undef __out_z + #undef __nvapi_undef__out_z +#endif +#ifdef __nvapi_undef__out_z_opt + #undef __out_z_opt + #undef __nvapi_undef__out_z_opt +#endif +#ifdef __nvapi_undef__out_ecount_z + #undef __out_ecount_z + #undef __nvapi_undef__out_ecount_z +#endif +#ifdef __nvapi_undef__out_bcount_z + #undef __out_bcount_z + #undef __nvapi_undef__out_bcount_z +#endif +#ifdef __nvapi_undef__out_ecount_part_z + #undef __out_ecount_part_z + #undef __nvapi_undef__out_ecount_part_z +#endif +#ifdef __nvapi_undef__out_bcount_part_z + #undef __out_bcount_part_z + #undef __nvapi_undef__out_bcount_part_z +#endif +#ifdef __nvapi_undef__out_ecount_full_z + #undef __out_ecount_full_z + #undef __nvapi_undef__out_ecount_full_z +#endif +#ifdef __nvapi_undef__out_bcount_full_z + #undef __out_bcount_full_z + #undef __nvapi_undef__out_bcount_full_z +#endif +#ifdef __nvapi_undef__out_nz + #undef __out_nz + #undef __nvapi_undef__out_nz +#endif +#ifdef __nvapi_undef__out_nz_opt + #undef __out_nz_opt + #undef __nvapi_undef__out_nz_opt +#endif +#ifdef __nvapi_undef__out_ecount_nz + #undef __out_ecount_nz + #undef __nvapi_undef__out_ecount_nz +#endif +#ifdef __nvapi_undef__out_bcount_nz + #undef __out_bcount_nz + #undef __nvapi_undef__out_bcount_nz +#endif +#ifdef __nvapi_undef__inout + #undef __inout + #undef __nvapi_undef__inout +#endif +#ifdef __nvapi_undef__inout_ecount + #undef __inout_ecount + #undef __nvapi_undef__inout_ecount +#endif +#ifdef __nvapi_undef__inout_bcount + #undef __inout_bcount + #undef __nvapi_undef__inout_bcount +#endif +#ifdef __nvapi_undef__inout_ecount_part + #undef __inout_ecount_part + #undef __nvapi_undef__inout_ecount_part +#endif +#ifdef __nvapi_undef__inout_bcount_part + #undef __inout_bcount_part + #undef __nvapi_undef__inout_bcount_part +#endif +#ifdef __nvapi_undef__inout_ecount_full + #undef __inout_ecount_full + #undef __nvapi_undef__inout_ecount_full +#endif +#ifdef __nvapi_undef__inout_bcount_full + #undef __inout_bcount_full + #undef __nvapi_undef__inout_bcount_full +#endif +#ifdef __nvapi_undef__inout_z + #undef __inout_z + #undef __nvapi_undef__inout_z +#endif +#ifdef __nvapi_undef__inout_ecount_z + #undef __inout_ecount_z + #undef __nvapi_undef__inout_ecount_z +#endif +#ifdef __nvapi_undef__inout_bcount_z + #undef __inout_bcount_z + #undef __nvapi_undef__inout_bcount_z +#endif +#ifdef __nvapi_undef__inout_nz + #undef __inout_nz + #undef __nvapi_undef__inout_nz +#endif +#ifdef __nvapi_undef__inout_ecount_nz + #undef __inout_ecount_nz + #undef __nvapi_undef__inout_ecount_nz +#endif +#ifdef __nvapi_undef__inout_bcount_nz + #undef __inout_bcount_nz + #undef __nvapi_undef__inout_bcount_nz +#endif +#ifdef __nvapi_undef__ecount_opt + #undef __ecount_opt + #undef __nvapi_undef__ecount_opt +#endif +#ifdef __nvapi_undef__bcount_opt + #undef __bcount_opt + #undef __nvapi_undef__bcount_opt +#endif +#ifdef __nvapi_undef__in_opt + #undef __in_opt + #undef __nvapi_undef__in_opt +#endif +#ifdef __nvapi_undef__in_ecount_opt + #undef __in_ecount_opt + #undef __nvapi_undef__in_ecount_opt +#endif +#ifdef __nvapi_undef__in_bcount_opt + #undef __in_bcount_opt + #undef __nvapi_undef__in_bcount_opt +#endif +#ifdef __nvapi_undef__in_z_opt + #undef __in_z_opt + #undef __nvapi_undef__in_z_opt +#endif +#ifdef __nvapi_undef__in_ecount_z_opt + #undef __in_ecount_z_opt + #undef __nvapi_undef__in_ecount_z_opt +#endif +#ifdef __nvapi_undef__in_bcount_z_opt + #undef __in_bcount_z_opt + #undef __nvapi_undef__in_bcount_z_opt +#endif +#ifdef __nvapi_undef__in_nz_opt + #undef __in_nz_opt + #undef __nvapi_undef__in_nz_opt +#endif +#ifdef __nvapi_undef__in_ecount_nz_opt + #undef __in_ecount_nz_opt + #undef __nvapi_undef__in_ecount_nz_opt +#endif +#ifdef __nvapi_undef__in_bcount_nz_opt + #undef __in_bcount_nz_opt + #undef __nvapi_undef__in_bcount_nz_opt +#endif +#ifdef __nvapi_undef__out_opt + #undef __out_opt + #undef __nvapi_undef__out_opt +#endif +#ifdef __nvapi_undef__out_ecount_opt + #undef __out_ecount_opt + #undef __nvapi_undef__out_ecount_opt +#endif +#ifdef __nvapi_undef__out_bcount_opt + #undef __out_bcount_opt + #undef __nvapi_undef__out_bcount_opt +#endif +#ifdef __nvapi_undef__out_ecount_part_opt + #undef __out_ecount_part_opt + #undef __nvapi_undef__out_ecount_part_opt +#endif +#ifdef __nvapi_undef__out_bcount_part_opt + #undef __out_bcount_part_opt + #undef __nvapi_undef__out_bcount_part_opt +#endif +#ifdef __nvapi_undef__out_ecount_full_opt + #undef __out_ecount_full_opt + #undef __nvapi_undef__out_ecount_full_opt +#endif +#ifdef __nvapi_undef__out_bcount_full_opt + #undef __out_bcount_full_opt + #undef __nvapi_undef__out_bcount_full_opt +#endif +#ifdef __nvapi_undef__out_ecount_z_opt + #undef __out_ecount_z_opt + #undef __nvapi_undef__out_ecount_z_opt +#endif +#ifdef __nvapi_undef__out_bcount_z_opt + #undef __out_bcount_z_opt + #undef __nvapi_undef__out_bcount_z_opt +#endif +#ifdef __nvapi_undef__out_ecount_part_z_opt + #undef __out_ecount_part_z_opt + #undef __nvapi_undef__out_ecount_part_z_opt +#endif +#ifdef __nvapi_undef__out_bcount_part_z_opt + #undef __out_bcount_part_z_opt + #undef __nvapi_undef__out_bcount_part_z_opt +#endif +#ifdef __nvapi_undef__out_ecount_full_z_opt + #undef __out_ecount_full_z_opt + #undef __nvapi_undef__out_ecount_full_z_opt +#endif +#ifdef __nvapi_undef__out_bcount_full_z_opt + #undef __out_bcount_full_z_opt + #undef __nvapi_undef__out_bcount_full_z_opt +#endif +#ifdef __nvapi_undef__out_ecount_nz_opt + #undef __out_ecount_nz_opt + #undef __nvapi_undef__out_ecount_nz_opt +#endif +#ifdef __nvapi_undef__out_bcount_nz_opt + #undef __out_bcount_nz_opt + #undef __nvapi_undef__out_bcount_nz_opt +#endif +#ifdef __nvapi_undef__inout_opt + #undef __inout_opt + #undef __nvapi_undef__inout_opt +#endif +#ifdef __nvapi_undef__inout_ecount_opt + #undef __inout_ecount_opt + #undef __nvapi_undef__inout_ecount_opt +#endif +#ifdef __nvapi_undef__inout_bcount_opt + #undef __inout_bcount_opt + #undef __nvapi_undef__inout_bcount_opt +#endif +#ifdef __nvapi_undef__inout_ecount_part_opt + #undef __inout_ecount_part_opt + #undef __nvapi_undef__inout_ecount_part_opt +#endif +#ifdef __nvapi_undef__inout_bcount_part_opt + #undef __inout_bcount_part_opt + #undef __nvapi_undef__inout_bcount_part_opt +#endif +#ifdef __nvapi_undef__inout_ecount_full_opt + #undef __inout_ecount_full_opt + #undef __nvapi_undef__inout_ecount_full_opt +#endif +#ifdef __nvapi_undef__inout_bcount_full_opt + #undef __inout_bcount_full_opt + #undef __nvapi_undef__inout_bcount_full_opt +#endif +#ifdef __nvapi_undef__inout_z_opt + #undef __inout_z_opt + #undef __nvapi_undef__inout_z_opt +#endif +#ifdef __nvapi_undef__inout_ecount_z_opt + #undef __inout_ecount_z_opt + #undef __nvapi_undef__inout_ecount_z_opt +#endif +#ifdef __nvapi_undef__inout_ecount_z_opt + #undef __inout_ecount_z_opt + #undef __nvapi_undef__inout_ecount_z_opt +#endif +#ifdef __nvapi_undef__inout_bcount_z_opt + #undef __inout_bcount_z_opt + #undef __nvapi_undef__inout_bcount_z_opt +#endif +#ifdef __nvapi_undef__inout_nz_opt + #undef __inout_nz_opt + #undef __nvapi_undef__inout_nz_opt +#endif +#ifdef __nvapi_undef__inout_ecount_nz_opt + #undef __inout_ecount_nz_opt + #undef __nvapi_undef__inout_ecount_nz_opt +#endif +#ifdef __nvapi_undef__inout_bcount_nz_opt + #undef __inout_bcount_nz_opt + #undef __nvapi_undef__inout_bcount_nz_opt +#endif +#ifdef __nvapi_undef__deref_ecount + #undef __deref_ecount + #undef __nvapi_undef__deref_ecount +#endif +#ifdef __nvapi_undef__deref_bcount + #undef __deref_bcount + #undef __nvapi_undef__deref_bcount +#endif +#ifdef __nvapi_undef__deref_out + #undef __deref_out + #undef __nvapi_undef__deref_out +#endif +#ifdef __nvapi_undef__deref_out_ecount + #undef __deref_out_ecount + #undef __nvapi_undef__deref_out_ecount +#endif +#ifdef __nvapi_undef__deref_out_bcount + #undef __deref_out_bcount + #undef __nvapi_undef__deref_out_bcount +#endif +#ifdef __nvapi_undef__deref_out_ecount_part + #undef __deref_out_ecount_part + #undef __nvapi_undef__deref_out_ecount_part +#endif +#ifdef __nvapi_undef__deref_out_bcount_part + #undef __deref_out_bcount_part + #undef __nvapi_undef__deref_out_bcount_part +#endif +#ifdef __nvapi_undef__deref_out_ecount_full + #undef __deref_out_ecount_full + #undef __nvapi_undef__deref_out_ecount_full +#endif +#ifdef __nvapi_undef__deref_out_bcount_full + #undef __deref_out_bcount_full + #undef __nvapi_undef__deref_out_bcount_full +#endif +#ifdef __nvapi_undef__deref_out_z + #undef __deref_out_z + #undef __nvapi_undef__deref_out_z +#endif +#ifdef __nvapi_undef__deref_out_ecount_z + #undef __deref_out_ecount_z + #undef __nvapi_undef__deref_out_ecount_z +#endif +#ifdef __nvapi_undef__deref_out_bcount_z + #undef __deref_out_bcount_z + #undef __nvapi_undef__deref_out_bcount_z +#endif +#ifdef __nvapi_undef__deref_out_nz + #undef __deref_out_nz + #undef __nvapi_undef__deref_out_nz +#endif +#ifdef __nvapi_undef__deref_out_ecount_nz + #undef __deref_out_ecount_nz + #undef __nvapi_undef__deref_out_ecount_nz +#endif +#ifdef __nvapi_undef__deref_out_bcount_nz + #undef __deref_out_bcount_nz + #undef __nvapi_undef__deref_out_bcount_nz +#endif +#ifdef __nvapi_undef__deref_inout + #undef __deref_inout + #undef __nvapi_undef__deref_inout +#endif +#ifdef __nvapi_undef__deref_inout_z + #undef __deref_inout_z + #undef __nvapi_undef__deref_inout_z +#endif +#ifdef __nvapi_undef__deref_inout_ecount + #undef __deref_inout_ecount + #undef __nvapi_undef__deref_inout_ecount +#endif +#ifdef __nvapi_undef__deref_inout_bcount + #undef __deref_inout_bcount + #undef __nvapi_undef__deref_inout_bcount +#endif +#ifdef __nvapi_undef__deref_inout_ecount_part + #undef __deref_inout_ecount_part + #undef __nvapi_undef__deref_inout_ecount_part +#endif +#ifdef __nvapi_undef__deref_inout_bcount_part + #undef __deref_inout_bcount_part + #undef __nvapi_undef__deref_inout_bcount_part +#endif +#ifdef __nvapi_undef__deref_inout_ecount_full + #undef __deref_inout_ecount_full + #undef __nvapi_undef__deref_inout_ecount_full +#endif +#ifdef __nvapi_undef__deref_inout_bcount_full + #undef __deref_inout_bcount_full + #undef __nvapi_undef__deref_inout_bcount_full +#endif +#ifdef __nvapi_undef__deref_inout_z + #undef __deref_inout_z + #undef __nvapi_undef__deref_inout_z +#endif +#ifdef __nvapi_undef__deref_inout_ecount_z + #undef __deref_inout_ecount_z + #undef __nvapi_undef__deref_inout_ecount_z +#endif +#ifdef __nvapi_undef__deref_inout_bcount_z + #undef __deref_inout_bcount_z + #undef __nvapi_undef__deref_inout_bcount_z +#endif +#ifdef __nvapi_undef__deref_inout_nz + #undef __deref_inout_nz + #undef __nvapi_undef__deref_inout_nz +#endif +#ifdef __nvapi_undef__deref_inout_ecount_nz + #undef __deref_inout_ecount_nz + #undef __nvapi_undef__deref_inout_ecount_nz +#endif +#ifdef __nvapi_undef__deref_inout_bcount_nz + #undef __deref_inout_bcount_nz + #undef __nvapi_undef__deref_inout_bcount_nz +#endif +#ifdef __nvapi_undef__deref_ecount_opt + #undef __deref_ecount_opt + #undef __nvapi_undef__deref_ecount_opt +#endif +#ifdef __nvapi_undef__deref_bcount_opt + #undef __deref_bcount_opt + #undef __nvapi_undef__deref_bcount_opt +#endif +#ifdef __nvapi_undef__deref_out_opt + #undef __deref_out_opt + #undef __nvapi_undef__deref_out_opt +#endif +#ifdef __nvapi_undef__deref_out_ecount_opt + #undef __deref_out_ecount_opt + #undef __nvapi_undef__deref_out_ecount_opt +#endif +#ifdef __nvapi_undef__deref_out_bcount_opt + #undef __deref_out_bcount_opt + #undef __nvapi_undef__deref_out_bcount_opt +#endif +#ifdef __nvapi_undef__deref_out_ecount_part_opt + #undef __deref_out_ecount_part_opt + #undef __nvapi_undef__deref_out_ecount_part_opt +#endif +#ifdef __nvapi_undef__deref_out_bcount_part_opt + #undef __deref_out_bcount_part_opt + #undef __nvapi_undef__deref_out_bcount_part_opt +#endif +#ifdef __nvapi_undef__deref_out_ecount_full_opt + #undef __deref_out_ecount_full_opt + #undef __nvapi_undef__deref_out_ecount_full_opt +#endif +#ifdef __nvapi_undef__deref_out_bcount_full_opt + #undef __deref_out_bcount_full_opt + #undef __nvapi_undef__deref_out_bcount_full_opt +#endif +#ifdef __nvapi_undef__deref_out_z_opt + #undef __deref_out_z_opt + #undef __nvapi_undef__deref_out_z_opt +#endif +#ifdef __nvapi_undef__deref_out_ecount_z_opt + #undef __deref_out_ecount_z_opt + #undef __nvapi_undef__deref_out_ecount_z_opt +#endif +#ifdef __nvapi_undef__deref_out_bcount_z_opt + #undef __deref_out_bcount_z_opt + #undef __nvapi_undef__deref_out_bcount_z_opt +#endif +#ifdef __nvapi_undef__deref_out_nz_opt + #undef __deref_out_nz_opt + #undef __nvapi_undef__deref_out_nz_opt +#endif +#ifdef __nvapi_undef__deref_out_ecount_nz_opt + #undef __deref_out_ecount_nz_opt + #undef __nvapi_undef__deref_out_ecount_nz_opt +#endif +#ifdef __nvapi_undef__deref_out_bcount_nz_opt + #undef __deref_out_bcount_nz_opt + #undef __nvapi_undef__deref_out_bcount_nz_opt +#endif +#ifdef __nvapi_undef__deref_inout_opt + #undef __deref_inout_opt + #undef __nvapi_undef__deref_inout_opt +#endif +#ifdef __nvapi_undef__deref_inout_ecount_opt + #undef __deref_inout_ecount_opt + #undef __nvapi_undef__deref_inout_ecount_opt +#endif +#ifdef __nvapi_undef__deref_inout_bcount_opt + #undef __deref_inout_bcount_opt + #undef __nvapi_undef__deref_inout_bcount_opt +#endif +#ifdef __nvapi_undef__deref_inout_ecount_part_opt + #undef __deref_inout_ecount_part_opt + #undef __nvapi_undef__deref_inout_ecount_part_opt +#endif +#ifdef __nvapi_undef__deref_inout_bcount_part_opt + #undef __deref_inout_bcount_part_opt + #undef __nvapi_undef__deref_inout_bcount_part_opt +#endif +#ifdef __nvapi_undef__deref_inout_ecount_full_opt + #undef __deref_inout_ecount_full_opt + #undef __nvapi_undef__deref_inout_ecount_full_opt +#endif +#ifdef __nvapi_undef__deref_inout_bcount_full_opt + #undef __deref_inout_bcount_full_opt + #undef __nvapi_undef__deref_inout_bcount_full_opt +#endif +#ifdef __nvapi_undef__deref_inout_z_opt + #undef __deref_inout_z_opt + #undef __nvapi_undef__deref_inout_z_opt +#endif +#ifdef __nvapi_undef__deref_inout_ecount_z_opt + #undef __deref_inout_ecount_z_opt + #undef __nvapi_undef__deref_inout_ecount_z_opt +#endif +#ifdef __nvapi_undef__deref_inout_bcount_z_opt + #undef __deref_inout_bcount_z_opt + #undef __nvapi_undef__deref_inout_bcount_z_opt +#endif +#ifdef __nvapi_undef__deref_inout_nz_opt + #undef __deref_inout_nz_opt + #undef __nvapi_undef__deref_inout_nz_opt +#endif +#ifdef __nvapi_undef__deref_inout_ecount_nz_opt + #undef __deref_inout_ecount_nz_opt + #undef __nvapi_undef__deref_inout_ecount_nz_opt +#endif +#ifdef __nvapi_undef__deref_inout_bcount_nz_opt + #undef __deref_inout_bcount_nz_opt + #undef __nvapi_undef__deref_inout_bcount_nz_opt +#endif +#ifdef __nvapi_undef__deref_opt_ecount + #undef __deref_opt_ecount + #undef __nvapi_undef__deref_opt_ecount +#endif +#ifdef __nvapi_undef__deref_opt_bcount + #undef __deref_opt_bcount + #undef __nvapi_undef__deref_opt_bcount +#endif +#ifdef __nvapi_undef__deref_opt_out + #undef __deref_opt_out + #undef __nvapi_undef__deref_opt_out +#endif +#ifdef __nvapi_undef__deref_opt_out_z + #undef __deref_opt_out_z + #undef __nvapi_undef__deref_opt_out_z +#endif +#ifdef __nvapi_undef__deref_opt_out_ecount + #undef __deref_opt_out_ecount + #undef __nvapi_undef__deref_opt_out_ecount +#endif +#ifdef __nvapi_undef__deref_opt_out_bcount + #undef __deref_opt_out_bcount + #undef __nvapi_undef__deref_opt_out_bcount +#endif +#ifdef __nvapi_undef__deref_opt_out_ecount_part + #undef __deref_opt_out_ecount_part + #undef __nvapi_undef__deref_opt_out_ecount_part +#endif +#ifdef __nvapi_undef__deref_opt_out_bcount_part + #undef __deref_opt_out_bcount_part + #undef __nvapi_undef__deref_opt_out_bcount_part +#endif +#ifdef __nvapi_undef__deref_opt_out_ecount_full + #undef __deref_opt_out_ecount_full + #undef __nvapi_undef__deref_opt_out_ecount_full +#endif +#ifdef __nvapi_undef__deref_opt_out_bcount_full + #undef __deref_opt_out_bcount_full + #undef __nvapi_undef__deref_opt_out_bcount_full +#endif +#ifdef __nvapi_undef__deref_opt_inout + #undef __deref_opt_inout + #undef __nvapi_undef__deref_opt_inout +#endif +#ifdef __nvapi_undef__deref_opt_inout_ecount + #undef __deref_opt_inout_ecount + #undef __nvapi_undef__deref_opt_inout_ecount +#endif +#ifdef __nvapi_undef__deref_opt_inout_bcount + #undef __deref_opt_inout_bcount + #undef __nvapi_undef__deref_opt_inout_bcount +#endif +#ifdef __nvapi_undef__deref_opt_inout_ecount_part + #undef __deref_opt_inout_ecount_part + #undef __nvapi_undef__deref_opt_inout_ecount_part +#endif +#ifdef __nvapi_undef__deref_opt_inout_bcount_part + #undef __deref_opt_inout_bcount_part + #undef __nvapi_undef__deref_opt_inout_bcount_part +#endif +#ifdef __nvapi_undef__deref_opt_inout_ecount_full + #undef __deref_opt_inout_ecount_full + #undef __nvapi_undef__deref_opt_inout_ecount_full +#endif +#ifdef __nvapi_undef__deref_opt_inout_bcount_full + #undef __deref_opt_inout_bcount_full + #undef __nvapi_undef__deref_opt_inout_bcount_full +#endif +#ifdef __nvapi_undef__deref_opt_inout_z + #undef __deref_opt_inout_z + #undef __nvapi_undef__deref_opt_inout_z +#endif +#ifdef __nvapi_undef__deref_opt_inout_ecount_z + #undef __deref_opt_inout_ecount_z + #undef __nvapi_undef__deref_opt_inout_ecount_z +#endif +#ifdef __nvapi_undef__deref_opt_inout_bcount_z + #undef __deref_opt_inout_bcount_z + #undef __nvapi_undef__deref_opt_inout_bcount_z +#endif +#ifdef __nvapi_undef__deref_opt_inout_nz + #undef __deref_opt_inout_nz + #undef __nvapi_undef__deref_opt_inout_nz +#endif +#ifdef __nvapi_undef__deref_opt_inout_ecount_nz + #undef __deref_opt_inout_ecount_nz + #undef __nvapi_undef__deref_opt_inout_ecount_nz +#endif +#ifdef __nvapi_undef__deref_opt_inout_bcount_nz + #undef __deref_opt_inout_bcount_nz + #undef __nvapi_undef__deref_opt_inout_bcount_nz +#endif +#ifdef __nvapi_undef__deref_opt_ecount_opt + #undef __deref_opt_ecount_opt + #undef __nvapi_undef__deref_opt_ecount_opt +#endif +#ifdef __nvapi_undef__deref_opt_bcount_opt + #undef __deref_opt_bcount_opt + #undef __nvapi_undef__deref_opt_bcount_opt +#endif +#ifdef __nvapi_undef__deref_opt_out_opt + #undef __deref_opt_out_opt + #undef __nvapi_undef__deref_opt_out_opt +#endif +#ifdef __nvapi_undef__deref_opt_out_ecount_opt + #undef __deref_opt_out_ecount_opt + #undef __nvapi_undef__deref_opt_out_ecount_opt +#endif +#ifdef __nvapi_undef__deref_opt_out_bcount_opt + #undef __deref_opt_out_bcount_opt + #undef __nvapi_undef__deref_opt_out_bcount_opt +#endif +#ifdef __nvapi_undef__deref_opt_out_ecount_part_opt + #undef __deref_opt_out_ecount_part_opt + #undef __nvapi_undef__deref_opt_out_ecount_part_opt +#endif +#ifdef __nvapi_undef__deref_opt_out_bcount_part_opt + #undef __deref_opt_out_bcount_part_opt + #undef __nvapi_undef__deref_opt_out_bcount_part_opt +#endif +#ifdef __nvapi_undef__deref_opt_out_ecount_full_opt + #undef __deref_opt_out_ecount_full_opt + #undef __nvapi_undef__deref_opt_out_ecount_full_opt +#endif +#ifdef __nvapi_undef__deref_opt_out_bcount_full_opt + #undef __deref_opt_out_bcount_full_opt + #undef __nvapi_undef__deref_opt_out_bcount_full_opt +#endif +#ifdef __nvapi_undef__deref_opt_out_z_opt + #undef __deref_opt_out_z_opt + #undef __nvapi_undef__deref_opt_out_z_opt +#endif +#ifdef __nvapi_undef__deref_opt_out_ecount_z_opt + #undef __deref_opt_out_ecount_z_opt + #undef __nvapi_undef__deref_opt_out_ecount_z_opt +#endif +#ifdef __nvapi_undef__deref_opt_out_bcount_z_opt + #undef __deref_opt_out_bcount_z_opt + #undef __nvapi_undef__deref_opt_out_bcount_z_opt +#endif +#ifdef __nvapi_undef__deref_opt_out_nz_opt + #undef __deref_opt_out_nz_opt + #undef __nvapi_undef__deref_opt_out_nz_opt +#endif +#ifdef __nvapi_undef__deref_opt_out_ecount_nz_opt + #undef __deref_opt_out_ecount_nz_opt + #undef __nvapi_undef__deref_opt_out_ecount_nz_opt +#endif +#ifdef __nvapi_undef__deref_opt_out_bcount_nz_opt + #undef __deref_opt_out_bcount_nz_opt + #undef __nvapi_undef__deref_opt_out_bcount_nz_opt +#endif +#ifdef __nvapi_undef__deref_opt_inout_opt + #undef __deref_opt_inout_opt + #undef __nvapi_undef__deref_opt_inout_opt +#endif +#ifdef __nvapi_undef__deref_opt_inout_ecount_opt + #undef __deref_opt_inout_ecount_opt + #undef __nvapi_undef__deref_opt_inout_ecount_opt +#endif +#ifdef __nvapi_undef__deref_opt_inout_bcount_opt + #undef __deref_opt_inout_bcount_opt + #undef __nvapi_undef__deref_opt_inout_bcount_opt +#endif +#ifdef __nvapi_undef__deref_opt_inout_ecount_part_opt + #undef __deref_opt_inout_ecount_part_opt + #undef __nvapi_undef__deref_opt_inout_ecount_part_opt +#endif +#ifdef __nvapi_undef__deref_opt_inout_bcount_part_opt + #undef __deref_opt_inout_bcount_part_opt + #undef __nvapi_undef__deref_opt_inout_bcount_part_opt +#endif +#ifdef __nvapi_undef__deref_opt_inout_ecount_full_opt + #undef __deref_opt_inout_ecount_full_opt + #undef __nvapi_undef__deref_opt_inout_ecount_full_opt +#endif +#ifdef __nvapi_undef__deref_opt_inout_bcount_full_opt + #undef __deref_opt_inout_bcount_full_opt + #undef __nvapi_undef__deref_opt_inout_bcount_full_opt +#endif +#ifdef __nvapi_undef__deref_opt_inout_z_opt + #undef __deref_opt_inout_z_opt + #undef __nvapi_undef__deref_opt_inout_z_opt +#endif +#ifdef __nvapi_undef__deref_opt_inout_ecount_z_opt + #undef __deref_opt_inout_ecount_z_opt + #undef __nvapi_undef__deref_opt_inout_ecount_z_opt +#endif +#ifdef __nvapi_undef__deref_opt_inout_bcount_z_opt + #undef __deref_opt_inout_bcount_z_opt + #undef __nvapi_undef__deref_opt_inout_bcount_z_opt +#endif +#ifdef __nvapi_undef__deref_opt_inout_nz_opt + #undef __deref_opt_inout_nz_opt + #undef __nvapi_undef__deref_opt_inout_nz_opt +#endif +#ifdef __nvapi_undef__deref_opt_inout_ecount_nz_opt + #undef __deref_opt_inout_ecount_nz_opt + #undef __nvapi_undef__deref_opt_inout_ecount_nz_opt +#endif +#ifdef __nvapi_undef__deref_opt_inout_bcount_nz_opt + #undef __deref_opt_inout_bcount_nz_opt + #undef __nvapi_undef__deref_opt_inout_bcount_nz_opt +#endif +#ifdef __nvapi_success + #undef __success + #undef __nvapi_success +#endif +#ifdef __nvapi__Ret_notnull_ + #undef __nvapi__Ret_notnull_ + #undef _Ret_notnull_ +#endif +#ifdef __nvapi__Post_writable_byte_size_ + #undef __nvapi__Post_writable_byte_size_ + #undef _Post_writable_byte_size_ +#endif +#ifdef __nvapi_Outptr_ + #undef __nvapi_Outptr_ + #undef _Outptr_ +#endif + +#endif // __NVAPI_EMPTY_SAL diff --git a/Source/ThirdParty/nvapi/nvapi_lite_salstart.h b/Source/ThirdParty/nvapi/nvapi_lite_salstart.h new file mode 100644 index 000000000..493dbdcd9 --- /dev/null +++ b/Source/ThirdParty/nvapi/nvapi_lite_salstart.h @@ -0,0 +1,813 @@ +/*********************************************************************************************************\ +|* *| +|* SPDX-FileCopyrightText: Copyright (c) 2019-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. *| +|* SPDX-License-Identifier: MIT *| +|* *| +|* Permission is hereby granted, free of charge, to any person obtaining a *| +|* copy of this software and associated documentation files (the "Software"), *| +|* to deal in the Software without restriction, including without limitation *| +|* the rights to use, copy, modify, merge, publish, distribute, sublicense, *| +|* and/or sell copies of the Software, and to permit persons to whom the *| +|* Software is furnished to do so, subject to the following conditions: *| +|* *| +|* The above copyright notice and this permission notice shall be included in *| +|* all copies or substantial portions of the Software. *| +|* *| +|* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *| +|* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *| +|* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *| +|* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *| +|* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING *| +|* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER *| +|* DEALINGS IN THE SOFTWARE. *| +|* *| +|* *| +\*********************************************************************************************************/ + +// ==================================================== +// SAL related support +// ==================================================== + +#ifndef __ecount + #define __nvapi_undef__ecount + #define __ecount(size) +#endif +#ifndef __bcount + #define __nvapi_undef__bcount + #define __bcount(size) +#endif +#ifndef __in + #define __nvapi_undef__in + #define __in +#endif +#ifndef __in_ecount + #define __nvapi_undef__in_ecount + #define __in_ecount(size) +#endif +#ifndef __in_bcount + #define __nvapi_undef__in_bcount + #define __in_bcount(size) +#endif +#ifndef __in_z + #define __nvapi_undef__in_z + #define __in_z +#endif +#ifndef __in_ecount_z + #define __nvapi_undef__in_ecount_z + #define __in_ecount_z(size) +#endif +#ifndef __in_bcount_z + #define __nvapi_undef__in_bcount_z + #define __in_bcount_z(size) +#endif +#ifndef __in_nz + #define __nvapi_undef__in_nz + #define __in_nz +#endif +#ifndef __in_ecount_nz + #define __nvapi_undef__in_ecount_nz + #define __in_ecount_nz(size) +#endif +#ifndef __in_bcount_nz + #define __nvapi_undef__in_bcount_nz + #define __in_bcount_nz(size) +#endif +#ifndef __out + #define __nvapi_undef__out + #define __out +#endif +#ifndef __out_ecount + #define __nvapi_undef__out_ecount + #define __out_ecount(size) +#endif +#ifndef __out_bcount + #define __nvapi_undef__out_bcount + #define __out_bcount(size) +#endif +#ifndef __out_ecount_part + #define __nvapi_undef__out_ecount_part + #define __out_ecount_part(size,length) +#endif +#ifndef __out_bcount_part + #define __nvapi_undef__out_bcount_part + #define __out_bcount_part(size,length) +#endif +#ifndef __out_ecount_full + #define __nvapi_undef__out_ecount_full + #define __out_ecount_full(size) +#endif +#ifndef __out_bcount_full + #define __nvapi_undef__out_bcount_full + #define __out_bcount_full(size) +#endif +#ifndef __out_z + #define __nvapi_undef__out_z + #define __out_z +#endif +#ifndef __out_z_opt + #define __nvapi_undef__out_z_opt + #define __out_z_opt +#endif +#ifndef __out_ecount_z + #define __nvapi_undef__out_ecount_z + #define __out_ecount_z(size) +#endif +#ifndef __out_bcount_z + #define __nvapi_undef__out_bcount_z + #define __out_bcount_z(size) +#endif +#ifndef __out_ecount_part_z + #define __nvapi_undef__out_ecount_part_z + #define __out_ecount_part_z(size,length) +#endif +#ifndef __out_bcount_part_z + #define __nvapi_undef__out_bcount_part_z + #define __out_bcount_part_z(size,length) +#endif +#ifndef __out_ecount_full_z + #define __nvapi_undef__out_ecount_full_z + #define __out_ecount_full_z(size) +#endif +#ifndef __out_bcount_full_z + #define __nvapi_undef__out_bcount_full_z + #define __out_bcount_full_z(size) +#endif +#ifndef __out_nz + #define __nvapi_undef__out_nz + #define __out_nz +#endif +#ifndef __out_nz_opt + #define __nvapi_undef__out_nz_opt + #define __out_nz_opt +#endif +#ifndef __out_ecount_nz + #define __nvapi_undef__out_ecount_nz + #define __out_ecount_nz(size) +#endif +#ifndef __out_bcount_nz + #define __nvapi_undef__out_bcount_nz + #define __out_bcount_nz(size) +#endif +#ifndef __inout + #define __nvapi_undef__inout + #define __inout +#endif +#ifndef __inout_ecount + #define __nvapi_undef__inout_ecount + #define __inout_ecount(size) +#endif +#ifndef __inout_bcount + #define __nvapi_undef__inout_bcount + #define __inout_bcount(size) +#endif +#ifndef __inout_ecount_part + #define __nvapi_undef__inout_ecount_part + #define __inout_ecount_part(size,length) +#endif +#ifndef __inout_bcount_part + #define __nvapi_undef__inout_bcount_part + #define __inout_bcount_part(size,length) +#endif +#ifndef __inout_ecount_full + #define __nvapi_undef__inout_ecount_full + #define __inout_ecount_full(size) +#endif +#ifndef __inout_bcount_full + #define __nvapi_undef__inout_bcount_full + #define __inout_bcount_full(size) +#endif +#ifndef __inout_z + #define __nvapi_undef__inout_z + #define __inout_z +#endif +#ifndef __inout_ecount_z + #define __nvapi_undef__inout_ecount_z + #define __inout_ecount_z(size) +#endif +#ifndef __inout_bcount_z + #define __nvapi_undef__inout_bcount_z + #define __inout_bcount_z(size) +#endif +#ifndef __inout_nz + #define __nvapi_undef__inout_nz + #define __inout_nz +#endif +#ifndef __inout_ecount_nz + #define __nvapi_undef__inout_ecount_nz + #define __inout_ecount_nz(size) +#endif +#ifndef __inout_bcount_nz + #define __nvapi_undef__inout_bcount_nz + #define __inout_bcount_nz(size) +#endif +#ifndef __ecount_opt + #define __nvapi_undef__ecount_opt + #define __ecount_opt(size) +#endif +#ifndef __bcount_opt + #define __nvapi_undef__bcount_opt + #define __bcount_opt(size) +#endif +#ifndef __in_opt + #define __nvapi_undef__in_opt + #define __in_opt +#endif +#ifndef __in_ecount_opt + #define __nvapi_undef__in_ecount_opt + #define __in_ecount_opt(size) +#endif +#ifndef __in_bcount_opt + #define __nvapi_undef__in_bcount_opt + #define __in_bcount_opt(size) +#endif +#ifndef __in_z_opt + #define __nvapi_undef__in_z_opt + #define __in_z_opt +#endif +#ifndef __in_ecount_z_opt + #define __nvapi_undef__in_ecount_z_opt + #define __in_ecount_z_opt(size) +#endif +#ifndef __in_bcount_z_opt + #define __nvapi_undef__in_bcount_z_opt + #define __in_bcount_z_opt(size) +#endif +#ifndef __in_nz_opt + #define __nvapi_undef__in_nz_opt + #define __in_nz_opt +#endif +#ifndef __in_ecount_nz_opt + #define __nvapi_undef__in_ecount_nz_opt + #define __in_ecount_nz_opt(size) +#endif +#ifndef __in_bcount_nz_opt + #define __nvapi_undef__in_bcount_nz_opt + #define __in_bcount_nz_opt(size) +#endif +#ifndef __out_opt + #define __nvapi_undef__out_opt + #define __out_opt +#endif +#ifndef __out_ecount_opt + #define __nvapi_undef__out_ecount_opt + #define __out_ecount_opt(size) +#endif +#ifndef __out_bcount_opt + #define __nvapi_undef__out_bcount_opt + #define __out_bcount_opt(size) +#endif +#ifndef __out_ecount_part_opt + #define __nvapi_undef__out_ecount_part_opt + #define __out_ecount_part_opt(size,length) +#endif +#ifndef __out_bcount_part_opt + #define __nvapi_undef__out_bcount_part_opt + #define __out_bcount_part_opt(size,length) +#endif +#ifndef __out_ecount_full_opt + #define __nvapi_undef__out_ecount_full_opt + #define __out_ecount_full_opt(size) +#endif +#ifndef __out_bcount_full_opt + #define __nvapi_undef__out_bcount_full_opt + #define __out_bcount_full_opt(size) +#endif +#ifndef __out_ecount_z_opt + #define __nvapi_undef__out_ecount_z_opt + #define __out_ecount_z_opt(size) +#endif +#ifndef __out_bcount_z_opt + #define __nvapi_undef__out_bcount_z_opt + #define __out_bcount_z_opt(size) +#endif +#ifndef __out_ecount_part_z_opt + #define __nvapi_undef__out_ecount_part_z_opt + #define __out_ecount_part_z_opt(size,length) +#endif +#ifndef __out_bcount_part_z_opt + #define __nvapi_undef__out_bcount_part_z_opt + #define __out_bcount_part_z_opt(size,length) +#endif +#ifndef __out_ecount_full_z_opt + #define __nvapi_undef__out_ecount_full_z_opt + #define __out_ecount_full_z_opt(size) +#endif +#ifndef __out_bcount_full_z_opt + #define __nvapi_undef__out_bcount_full_z_opt + #define __out_bcount_full_z_opt(size) +#endif +#ifndef __out_ecount_nz_opt + #define __nvapi_undef__out_ecount_nz_opt + #define __out_ecount_nz_opt(size) +#endif +#ifndef __out_bcount_nz_opt + #define __nvapi_undef__out_bcount_nz_opt + #define __out_bcount_nz_opt(size) +#endif +#ifndef __inout_opt + #define __nvapi_undef__inout_opt + #define __inout_opt +#endif +#ifndef __inout_ecount_opt + #define __nvapi_undef__inout_ecount_opt + #define __inout_ecount_opt(size) +#endif +#ifndef __inout_bcount_opt + #define __nvapi_undef__inout_bcount_opt + #define __inout_bcount_opt(size) +#endif +#ifndef __inout_ecount_part_opt + #define __nvapi_undef__inout_ecount_part_opt + #define __inout_ecount_part_opt(size,length) +#endif +#ifndef __inout_bcount_part_opt + #define __nvapi_undef__inout_bcount_part_opt + #define __inout_bcount_part_opt(size,length) +#endif +#ifndef __inout_ecount_full_opt + #define __nvapi_undef__inout_ecount_full_opt + #define __inout_ecount_full_opt(size) +#endif +#ifndef __inout_bcount_full_opt + #define __nvapi_undef__inout_bcount_full_opt + #define __inout_bcount_full_opt(size) +#endif +#ifndef __inout_z_opt + #define __nvapi_undef__inout_z_opt + #define __inout_z_opt +#endif +#ifndef __inout_ecount_z_opt + #define __nvapi_undef__inout_ecount_z_opt + #define __inout_ecount_z_opt(size) +#endif +#ifndef __inout_ecount_z_opt + #define __nvapi_undef__inout_ecount_z_opt + #define __inout_ecount_z_opt(size) +#endif +#ifndef __inout_bcount_z_opt + #define __nvapi_undef__inout_bcount_z_opt + #define __inout_bcount_z_opt(size) +#endif +#ifndef __inout_nz_opt + #define __nvapi_undef__inout_nz_opt + #define __inout_nz_opt +#endif +#ifndef __inout_ecount_nz_opt + #define __nvapi_undef__inout_ecount_nz_opt + #define __inout_ecount_nz_opt(size) +#endif +#ifndef __inout_bcount_nz_opt + #define __nvapi_undef__inout_bcount_nz_opt + #define __inout_bcount_nz_opt(size) +#endif +#ifndef __deref_ecount + #define __nvapi_undef__deref_ecount + #define __deref_ecount(size) +#endif +#ifndef __deref_bcount + #define __nvapi_undef__deref_bcount + #define __deref_bcount(size) +#endif +#ifndef __deref_out + #define __nvapi_undef__deref_out + #define __deref_out +#endif +#ifndef __deref_out_ecount + #define __nvapi_undef__deref_out_ecount + #define __deref_out_ecount(size) +#endif +#ifndef __deref_out_bcount + #define __nvapi_undef__deref_out_bcount + #define __deref_out_bcount(size) +#endif +#ifndef __deref_out_ecount_part + #define __nvapi_undef__deref_out_ecount_part + #define __deref_out_ecount_part(size,length) +#endif +#ifndef __deref_out_bcount_part + #define __nvapi_undef__deref_out_bcount_part + #define __deref_out_bcount_part(size,length) +#endif +#ifndef __deref_out_ecount_full + #define __nvapi_undef__deref_out_ecount_full + #define __deref_out_ecount_full(size) +#endif +#ifndef __deref_out_bcount_full + #define __nvapi_undef__deref_out_bcount_full + #define __deref_out_bcount_full(size) +#endif +#ifndef __deref_out_z + #define __nvapi_undef__deref_out_z + #define __deref_out_z +#endif +#ifndef __deref_out_ecount_z + #define __nvapi_undef__deref_out_ecount_z + #define __deref_out_ecount_z(size) +#endif +#ifndef __deref_out_bcount_z + #define __nvapi_undef__deref_out_bcount_z + #define __deref_out_bcount_z(size) +#endif +#ifndef __deref_out_nz + #define __nvapi_undef__deref_out_nz + #define __deref_out_nz +#endif +#ifndef __deref_out_ecount_nz + #define __nvapi_undef__deref_out_ecount_nz + #define __deref_out_ecount_nz(size) +#endif +#ifndef __deref_out_bcount_nz + #define __nvapi_undef__deref_out_bcount_nz + #define __deref_out_bcount_nz(size) +#endif +#ifndef __deref_inout + #define __nvapi_undef__deref_inout + #define __deref_inout +#endif +#ifndef __deref_inout_z + #define __nvapi_undef__deref_inout_z + #define __deref_inout_z +#endif +#ifndef __deref_inout_ecount + #define __nvapi_undef__deref_inout_ecount + #define __deref_inout_ecount(size) +#endif +#ifndef __deref_inout_bcount + #define __nvapi_undef__deref_inout_bcount + #define __deref_inout_bcount(size) +#endif +#ifndef __deref_inout_ecount_part + #define __nvapi_undef__deref_inout_ecount_part + #define __deref_inout_ecount_part(size,length) +#endif +#ifndef __deref_inout_bcount_part + #define __nvapi_undef__deref_inout_bcount_part + #define __deref_inout_bcount_part(size,length) +#endif +#ifndef __deref_inout_ecount_full + #define __nvapi_undef__deref_inout_ecount_full + #define __deref_inout_ecount_full(size) +#endif +#ifndef __deref_inout_bcount_full + #define __nvapi_undef__deref_inout_bcount_full + #define __deref_inout_bcount_full(size) +#endif +#ifndef __deref_inout_z + #define __nvapi_undef__deref_inout_z + #define __deref_inout_z +#endif +#ifndef __deref_inout_ecount_z + #define __nvapi_undef__deref_inout_ecount_z + #define __deref_inout_ecount_z(size) +#endif +#ifndef __deref_inout_bcount_z + #define __nvapi_undef__deref_inout_bcount_z + #define __deref_inout_bcount_z(size) +#endif +#ifndef __deref_inout_nz + #define __nvapi_undef__deref_inout_nz + #define __deref_inout_nz +#endif +#ifndef __deref_inout_ecount_nz + #define __nvapi_undef__deref_inout_ecount_nz + #define __deref_inout_ecount_nz(size) +#endif +#ifndef __deref_inout_bcount_nz + #define __nvapi_undef__deref_inout_bcount_nz + #define __deref_inout_bcount_nz(size) +#endif +#ifndef __deref_ecount_opt + #define __nvapi_undef__deref_ecount_opt + #define __deref_ecount_opt(size) +#endif +#ifndef __deref_bcount_opt + #define __nvapi_undef__deref_bcount_opt + #define __deref_bcount_opt(size) +#endif +#ifndef __deref_out_opt + #define __nvapi_undef__deref_out_opt + #define __deref_out_opt +#endif +#ifndef __deref_out_ecount_opt + #define __nvapi_undef__deref_out_ecount_opt + #define __deref_out_ecount_opt(size) +#endif +#ifndef __deref_out_bcount_opt + #define __nvapi_undef__deref_out_bcount_opt + #define __deref_out_bcount_opt(size) +#endif +#ifndef __deref_out_ecount_part_opt + #define __nvapi_undef__deref_out_ecount_part_opt + #define __deref_out_ecount_part_opt(size,length) +#endif +#ifndef __deref_out_bcount_part_opt + #define __nvapi_undef__deref_out_bcount_part_opt + #define __deref_out_bcount_part_opt(size,length) +#endif +#ifndef __deref_out_ecount_full_opt + #define __nvapi_undef__deref_out_ecount_full_opt + #define __deref_out_ecount_full_opt(size) +#endif +#ifndef __deref_out_bcount_full_opt + #define __nvapi_undef__deref_out_bcount_full_opt + #define __deref_out_bcount_full_opt(size) +#endif +#ifndef __deref_out_z_opt + #define __nvapi_undef__deref_out_z_opt + #define __deref_out_z_opt +#endif +#ifndef __deref_out_ecount_z_opt + #define __nvapi_undef__deref_out_ecount_z_opt + #define __deref_out_ecount_z_opt(size) +#endif +#ifndef __deref_out_bcount_z_opt + #define __nvapi_undef__deref_out_bcount_z_opt + #define __deref_out_bcount_z_opt(size) +#endif +#ifndef __deref_out_nz_opt + #define __nvapi_undef__deref_out_nz_opt + #define __deref_out_nz_opt +#endif +#ifndef __deref_out_ecount_nz_opt + #define __nvapi_undef__deref_out_ecount_nz_opt + #define __deref_out_ecount_nz_opt(size) +#endif +#ifndef __deref_out_bcount_nz_opt + #define __nvapi_undef__deref_out_bcount_nz_opt + #define __deref_out_bcount_nz_opt(size) +#endif +#ifndef __deref_inout_opt + #define __nvapi_undef__deref_inout_opt + #define __deref_inout_opt +#endif +#ifndef __deref_inout_ecount_opt + #define __nvapi_undef__deref_inout_ecount_opt + #define __deref_inout_ecount_opt(size) +#endif +#ifndef __deref_inout_bcount_opt + #define __nvapi_undef__deref_inout_bcount_opt + #define __deref_inout_bcount_opt(size) +#endif +#ifndef __deref_inout_ecount_part_opt + #define __nvapi_undef__deref_inout_ecount_part_opt + #define __deref_inout_ecount_part_opt(size,length) +#endif +#ifndef __deref_inout_bcount_part_opt + #define __nvapi_undef__deref_inout_bcount_part_opt + #define __deref_inout_bcount_part_opt(size,length) +#endif +#ifndef __deref_inout_ecount_full_opt + #define __nvapi_undef__deref_inout_ecount_full_opt + #define __deref_inout_ecount_full_opt(size) +#endif +#ifndef __deref_inout_bcount_full_opt + #define __nvapi_undef__deref_inout_bcount_full_opt + #define __deref_inout_bcount_full_opt(size) +#endif +#ifndef __deref_inout_z_opt + #define __nvapi_undef__deref_inout_z_opt + #define __deref_inout_z_opt +#endif +#ifndef __deref_inout_ecount_z_opt + #define __nvapi_undef__deref_inout_ecount_z_opt + #define __deref_inout_ecount_z_opt(size) +#endif +#ifndef __deref_inout_bcount_z_opt + #define __nvapi_undef__deref_inout_bcount_z_opt + #define __deref_inout_bcount_z_opt(size) +#endif +#ifndef __deref_inout_nz_opt + #define __nvapi_undef__deref_inout_nz_opt + #define __deref_inout_nz_opt +#endif +#ifndef __deref_inout_ecount_nz_opt + #define __nvapi_undef__deref_inout_ecount_nz_opt + #define __deref_inout_ecount_nz_opt(size) +#endif +#ifndef __deref_inout_bcount_nz_opt + #define __nvapi_undef__deref_inout_bcount_nz_opt + #define __deref_inout_bcount_nz_opt(size) +#endif +#ifndef __deref_opt_ecount + #define __nvapi_undef__deref_opt_ecount + #define __deref_opt_ecount(size) +#endif +#ifndef __deref_opt_bcount + #define __nvapi_undef__deref_opt_bcount + #define __deref_opt_bcount(size) +#endif +#ifndef __deref_opt_out + #define __nvapi_undef__deref_opt_out + #define __deref_opt_out +#endif +#ifndef __deref_opt_out_z + #define __nvapi_undef__deref_opt_out_z + #define __deref_opt_out_z +#endif +#ifndef __deref_opt_out_ecount + #define __nvapi_undef__deref_opt_out_ecount + #define __deref_opt_out_ecount(size) +#endif +#ifndef __deref_opt_out_bcount + #define __nvapi_undef__deref_opt_out_bcount + #define __deref_opt_out_bcount(size) +#endif +#ifndef __deref_opt_out_ecount_part + #define __nvapi_undef__deref_opt_out_ecount_part + #define __deref_opt_out_ecount_part(size,length) +#endif +#ifndef __deref_opt_out_bcount_part + #define __nvapi_undef__deref_opt_out_bcount_part + #define __deref_opt_out_bcount_part(size,length) +#endif +#ifndef __deref_opt_out_ecount_full + #define __nvapi_undef__deref_opt_out_ecount_full + #define __deref_opt_out_ecount_full(size) +#endif +#ifndef __deref_opt_out_bcount_full + #define __nvapi_undef__deref_opt_out_bcount_full + #define __deref_opt_out_bcount_full(size) +#endif +#ifndef __deref_opt_inout + #define __nvapi_undef__deref_opt_inout + #define __deref_opt_inout +#endif +#ifndef __deref_opt_inout_ecount + #define __nvapi_undef__deref_opt_inout_ecount + #define __deref_opt_inout_ecount(size) +#endif +#ifndef __deref_opt_inout_bcount + #define __nvapi_undef__deref_opt_inout_bcount + #define __deref_opt_inout_bcount(size) +#endif +#ifndef __deref_opt_inout_ecount_part + #define __nvapi_undef__deref_opt_inout_ecount_part + #define __deref_opt_inout_ecount_part(size,length) +#endif +#ifndef __deref_opt_inout_bcount_part + #define __nvapi_undef__deref_opt_inout_bcount_part + #define __deref_opt_inout_bcount_part(size,length) +#endif +#ifndef __deref_opt_inout_ecount_full + #define __nvapi_undef__deref_opt_inout_ecount_full + #define __deref_opt_inout_ecount_full(size) +#endif +#ifndef __deref_opt_inout_bcount_full + #define __nvapi_undef__deref_opt_inout_bcount_full + #define __deref_opt_inout_bcount_full(size) +#endif +#ifndef __deref_opt_inout_z + #define __nvapi_undef__deref_opt_inout_z + #define __deref_opt_inout_z +#endif +#ifndef __deref_opt_inout_ecount_z + #define __nvapi_undef__deref_opt_inout_ecount_z + #define __deref_opt_inout_ecount_z(size) +#endif +#ifndef __deref_opt_inout_bcount_z + #define __nvapi_undef__deref_opt_inout_bcount_z + #define __deref_opt_inout_bcount_z(size) +#endif +#ifndef __deref_opt_inout_nz + #define __nvapi_undef__deref_opt_inout_nz + #define __deref_opt_inout_nz +#endif +#ifndef __deref_opt_inout_ecount_nz + #define __nvapi_undef__deref_opt_inout_ecount_nz + #define __deref_opt_inout_ecount_nz(size) +#endif +#ifndef __deref_opt_inout_bcount_nz + #define __nvapi_undef__deref_opt_inout_bcount_nz + #define __deref_opt_inout_bcount_nz(size) +#endif +#ifndef __deref_opt_ecount_opt + #define __nvapi_undef__deref_opt_ecount_opt + #define __deref_opt_ecount_opt(size) +#endif +#ifndef __deref_opt_bcount_opt + #define __nvapi_undef__deref_opt_bcount_opt + #define __deref_opt_bcount_opt(size) +#endif +#ifndef __deref_opt_out_opt + #define __nvapi_undef__deref_opt_out_opt + #define __deref_opt_out_opt +#endif +#ifndef __deref_opt_out_ecount_opt + #define __nvapi_undef__deref_opt_out_ecount_opt + #define __deref_opt_out_ecount_opt(size) +#endif +#ifndef __deref_opt_out_bcount_opt + #define __nvapi_undef__deref_opt_out_bcount_opt + #define __deref_opt_out_bcount_opt(size) +#endif +#ifndef __deref_opt_out_ecount_part_opt + #define __nvapi_undef__deref_opt_out_ecount_part_opt + #define __deref_opt_out_ecount_part_opt(size,length) +#endif +#ifndef __deref_opt_out_bcount_part_opt + #define __nvapi_undef__deref_opt_out_bcount_part_opt + #define __deref_opt_out_bcount_part_opt(size,length) +#endif +#ifndef __deref_opt_out_ecount_full_opt + #define __nvapi_undef__deref_opt_out_ecount_full_opt + #define __deref_opt_out_ecount_full_opt(size) +#endif +#ifndef __deref_opt_out_bcount_full_opt + #define __nvapi_undef__deref_opt_out_bcount_full_opt + #define __deref_opt_out_bcount_full_opt(size) +#endif +#ifndef __deref_opt_out_z_opt + #define __nvapi_undef__deref_opt_out_z_opt + #define __deref_opt_out_z_opt +#endif +#ifndef __deref_opt_out_ecount_z_opt + #define __nvapi_undef__deref_opt_out_ecount_z_opt + #define __deref_opt_out_ecount_z_opt(size) +#endif +#ifndef __deref_opt_out_bcount_z_opt + #define __nvapi_undef__deref_opt_out_bcount_z_opt + #define __deref_opt_out_bcount_z_opt(size) +#endif +#ifndef __deref_opt_out_nz_opt + #define __nvapi_undef__deref_opt_out_nz_opt + #define __deref_opt_out_nz_opt +#endif +#ifndef __deref_opt_out_ecount_nz_opt + #define __nvapi_undef__deref_opt_out_ecount_nz_opt + #define __deref_opt_out_ecount_nz_opt(size) +#endif +#ifndef __deref_opt_out_bcount_nz_opt + #define __nvapi_undef__deref_opt_out_bcount_nz_opt + #define __deref_opt_out_bcount_nz_opt(size) +#endif +#ifndef __deref_opt_inout_opt + #define __nvapi_undef__deref_opt_inout_opt + #define __deref_opt_inout_opt +#endif +#ifndef __deref_opt_inout_ecount_opt + #define __nvapi_undef__deref_opt_inout_ecount_opt + #define __deref_opt_inout_ecount_opt(size) +#endif +#ifndef __deref_opt_inout_bcount_opt + #define __nvapi_undef__deref_opt_inout_bcount_opt + #define __deref_opt_inout_bcount_opt(size) +#endif +#ifndef __deref_opt_inout_ecount_part_opt + #define __nvapi_undef__deref_opt_inout_ecount_part_opt + #define __deref_opt_inout_ecount_part_opt(size,length) +#endif +#ifndef __deref_opt_inout_bcount_part_opt + #define __nvapi_undef__deref_opt_inout_bcount_part_opt + #define __deref_opt_inout_bcount_part_opt(size,length) +#endif +#ifndef __deref_opt_inout_ecount_full_opt + #define __nvapi_undef__deref_opt_inout_ecount_full_opt + #define __deref_opt_inout_ecount_full_opt(size) +#endif +#ifndef __deref_opt_inout_bcount_full_opt + #define __nvapi_undef__deref_opt_inout_bcount_full_opt + #define __deref_opt_inout_bcount_full_opt(size) +#endif +#ifndef __deref_opt_inout_z_opt + #define __nvapi_undef__deref_opt_inout_z_opt + #define __deref_opt_inout_z_opt +#endif +#ifndef __deref_opt_inout_ecount_z_opt + #define __nvapi_undef__deref_opt_inout_ecount_z_opt + #define __deref_opt_inout_ecount_z_opt(size) +#endif +#ifndef __deref_opt_inout_bcount_z_opt + #define __nvapi_undef__deref_opt_inout_bcount_z_opt + #define __deref_opt_inout_bcount_z_opt(size) +#endif +#ifndef __deref_opt_inout_nz_opt + #define __nvapi_undef__deref_opt_inout_nz_opt + #define __deref_opt_inout_nz_opt +#endif +#ifndef __deref_opt_inout_ecount_nz_opt + #define __nvapi_undef__deref_opt_inout_ecount_nz_opt + #define __deref_opt_inout_ecount_nz_opt(size) +#endif +#ifndef __deref_opt_inout_bcount_nz_opt + #define __nvapi_undef__deref_opt_inout_bcount_nz_opt + #define __deref_opt_inout_bcount_nz_opt(size) +#endif +#ifndef __success + #define __nvapi_success + #define __success(epxr) +#endif +#ifndef _Ret_notnull_ + #define __nvapi__Ret_notnull_ + #define _Ret_notnull_ +#endif +#ifndef _Post_writable_byte_size_ + #define __nvapi__Post_writable_byte_size_ + #define _Post_writable_byte_size_(n) +#endif +#ifndef _Outptr_ + #define __nvapi_Outptr_ + #define _Outptr_ +#endif + + +#define NVAPI_INTERFACE extern __success(return == NVAPI_OK) NvAPI_Status __cdecl diff --git a/Source/ThirdParty/nvapi/nvapi_lite_sli.h b/Source/ThirdParty/nvapi/nvapi_lite_sli.h new file mode 100644 index 000000000..dde7e7fb5 --- /dev/null +++ b/Source/ThirdParty/nvapi/nvapi_lite_sli.h @@ -0,0 +1,247 @@ +/*********************************************************************************************************\ +|* *| +|* SPDX-FileCopyrightText: Copyright (c) 2019-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. *| +|* SPDX-License-Identifier: MIT *| +|* *| +|* Permission is hereby granted, free of charge, to any person obtaining a *| +|* copy of this software and associated documentation files (the "Software"), *| +|* to deal in the Software without restriction, including without limitation *| +|* the rights to use, copy, modify, merge, publish, distribute, sublicense, *| +|* and/or sell copies of the Software, and to permit persons to whom the *| +|* Software is furnished to do so, subject to the following conditions: *| +|* *| +|* The above copyright notice and this permission notice shall be included in *| +|* all copies or substantial portions of the Software. *| +|* *| +|* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *| +|* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *| +|* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *| +|* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *| +|* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING *| +|* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER *| +|* DEALINGS IN THE SOFTWARE. *| +|* *| +|* *| +\*********************************************************************************************************/ + +#pragma once +#include"nvapi_lite_salstart.h" +#include"nvapi_lite_common.h" +#pragma pack(push,8) +#ifdef __cplusplus +extern "C" { +#endif +//----------------------------------------------------------------------------- +// DirectX APIs +//----------------------------------------------------------------------------- + + +//! \ingroup dx +//! Used in NvAPI_D3D10_GetCurrentSLIState(), and NvAPI_D3D_GetCurrentSLIState(). +typedef struct +{ + NvU32 version; //!< Structure version + NvU32 maxNumAFRGroups; //!< [OUT] The maximum possible value of numAFRGroups + NvU32 numAFRGroups; //!< [OUT] The number of AFR groups enabled in the system + NvU32 currentAFRIndex; //!< [OUT] The AFR group index for the frame currently being rendered + NvU32 nextFrameAFRIndex; //!< [OUT] What the AFR group index will be for the next frame (i.e. after calling Present) + NvU32 previousFrameAFRIndex; //!< [OUT] The AFR group index that was used for the previous frame (~0 if more than one frame has not been rendered yet) + NvU32 bIsCurAFRGroupNew; //!< [OUT] Boolean: Is this frame the first time running on the current AFR group + +} NV_GET_CURRENT_SLI_STATE_V1; + +typedef struct +{ + NvU32 version; //!< Structure version + NvU32 maxNumAFRGroups; //!< [OUT] The maximum possible value of numAFRGroups + NvU32 numAFRGroups; //!< [OUT] The number of AFR groups enabled in the system + NvU32 currentAFRIndex; //!< [OUT] The AFR group index for the frame currently being rendered + NvU32 nextFrameAFRIndex; //!< [OUT] What the AFR group index will be for the next frame (i.e. after calling Present) + NvU32 previousFrameAFRIndex; //!< [OUT] The AFR group index that was used for the previous frame (~0 if more than one frame has not been rendered yet) + NvU32 bIsCurAFRGroupNew; //!< [OUT] Boolean: Is this frame the first time running on the current AFR group + NvU32 numVRSLIGpus; //!< [OUT] The number of GPUs used in VR-SLI. If it is 0 VR-SLI is not active + +} NV_GET_CURRENT_SLI_STATE_V2; + +//! \ingroup dx +#define NV_GET_CURRENT_SLI_STATE_VER1 MAKE_NVAPI_VERSION(NV_GET_CURRENT_SLI_STATE_V1,1) +#define NV_GET_CURRENT_SLI_STATE_VER2 MAKE_NVAPI_VERSION(NV_GET_CURRENT_SLI_STATE_V2,1) +#define NV_GET_CURRENT_SLI_STATE_VER NV_GET_CURRENT_SLI_STATE_VER2 +#define NV_GET_CURRENT_SLI_STATE NV_GET_CURRENT_SLI_STATE_V2 +#if defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d11_h__) + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D_GetCurrentSLIState +// +//! DESCRIPTION: This function returns the current SLI state for the specified device. The structure +//! contains the number of AFR groups, the current AFR group index, +//! and what the AFR group index will be for the next frame. \p +//! pDevice can be either a IDirect3DDevice9 or ID3D10Device pointer. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 173 +//! +//! \retval NVAPI_OK Completed request +//! \retval NVAPI_ERROR Error occurred +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D_GetCurrentSLIState(IUnknown *pDevice, NV_GET_CURRENT_SLI_STATE *pSliState); +#endif //if defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d11_h__) +#if defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d11_h__) + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D_SetResourceHint +// +//! \fn NvAPI_D3D_SetResourceHint(IUnknown *pDev, NVDX_ObjectHandle obj, +//! NVAPI_D3D_SETRESOURCEHINT_CATEGORY dwHintCategory, +//! NvU32 dwHintName, +//! NvU32 *pdwHintValue) +//! +//! DESCRIPTION: This is a general purpose function for passing down various resource +//! related hints to the driver. Hints are divided into categories +//! and types within each category. For DX11 devices this function is free-threaded. +//! An application is responsible to complete this call before making use of the resource +//! in a rendering context (therefore applying inter-thread synchronization as appropriate). +//! As a debug help to an application the driver enforces that a resource in this call was never bound. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 185 +//! +//! \param [in] pDev The ID3D10Device or IDirect3DDevice9 that is a using the resource +//! \param [in] obj Previously obtained HV resource handle +//! \param [in] dwHintCategory Category of the hints +//! \param [in] dwHintName A hint within this category +//! \param [in/out] *pdwHintValue Pointer to location containing hint value, function returns previous hint value in this slot +//! +//! \return an int which could be an NvAPI status or DX HRESULT code +//! +//! \retval ::NVAPI_OK +//! \retval ::NVAPI_INVALID_ARGUMENT +//! \retval ::NVAPI_INVALID_CALL It is illegal to change a hint dynamically when the resource is already bound. +// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + +//! \ingroup dx +//! Valid categories for NvAPI_D3D_SetResourceHint() +typedef enum _NVAPI_D3D_SETRESOURCEHINT_CATEGORY +{ + NVAPI_D3D_SRH_CATEGORY_SLI = 1 +} NVAPI_D3D_SETRESOURCEHINT_CATEGORY; + + +// +// NVAPI_D3D_SRH_SLI_APP_CONTROLLED_INTERFRAME_CONTENT_SYNC: +// NVAPI_D3D_SRH_SLI_ASK_FOR_BROADCAST_USING: +// NVAPI_D3D_SRH_SLI_RESPECT_DRIVER_INTERFRAME_CONTENT_SYNC: + + +//! \ingroup dx +//! Types of SLI hints; \n +//! NVAPI_D3D_SRH_SLI_APP_CONTROLLED_INTERFRAME_CONTENT_SYNC: Valid values : 0 or 1 \n +//! Default value: 0 \n +//! Explanation: If the value is 1, the driver will not track any rendering operations that would mark this resource as dirty, +//! avoiding any form of synchronization across frames rendered in parallel in multiple GPUs in AFR mode. +//! +//! NVAPI_D3D_SRH_SLI_ASK_FOR_BROADCAST_USAGE: Valid values : 0 or 1 \n +//! Default value: 0 \n +//! Explanation: If the value is 1, the driver will try to perform operations which involved target resource in broadcast, +//! where it's possible. Hint is static and must be set before resource starts using. +//! +//! NVAPI_D3D_SRH_SLI_RESPECT_DRIVER_INTERFRAME_CONTENT_SYNC: Valid values : 0 or 1 \n +//! Default value: 0 \n +//! Explanation: If the value is 1, the driver will do dirty resource resolve regardless of discard flags in the application profile or +//! AFR-FriendlyD3DHints.exe name using. +//! +typedef enum _NVAPI_D3D_SETRESOURCEHINT_SLI +{ + NVAPI_D3D_SRH_SLI_APP_CONTROLLED_INTERFRAME_CONTENT_SYNC = 1, + NVAPI_D3D_SRH_SLI_ASK_FOR_BROADCAST_USAGE = 2, + NVAPI_D3D_SRH_SLI_RESPECT_DRIVER_INTERFRAME_CONTENT_SYNC = 3 +} NVAPI_D3D_SETRESOURCEHINT_SLI; + +//! \ingroup dx +NVAPI_INTERFACE NvAPI_D3D_SetResourceHint(IUnknown *pDev, NVDX_ObjectHandle obj, + NVAPI_D3D_SETRESOURCEHINT_CATEGORY dwHintCategory, + NvU32 dwHintName, + NvU32 *pdwHintValue); +#endif //defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d11_h__) + +#if defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d11_h__) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D_BeginResourceRendering +// +//! \fn NvAPI_D3D_BeginResourceRendering(IUnknown *pDeviceOrContext, NVDX_ObjectHandle obj, NvU32 Flags) +//! DESCRIPTION: This function tells the driver that the resource will begin to receive updates. It must be used in combination with NvAPI_D3D_EndResourceRendering(). +//! The primary use of this function is allow the driver to initiate early inter-frame synchronization of resources while running in AFR SLI mode. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 185 +//! +//! \param [in] pDev IDirect3DDevice9, ID3D10Device, ID3D11Device or ID3D11DeviceContext that is using the resource +//! \param [in] obj Previously obtained HV resource handle +//! \param [in] Flags The flags for functionality applied to resource while being used. +//! +//! \retval ::NVAPI_OK Function succeeded, if used properly and driver can initiate proper sync'ing of the resources. +//! \retval ::NVAPI_INVALID_ARGUMENT Bad argument(s) or invalid flag values +//! \retval ::NVAPI_INVALID_CALL Mismatched begin/end calls +// +/////////////////////////////////////////////////////////////////////////////// + +//! \ingroup dx +//! Used in NvAPI_D3D_BeginResourceRendering(). +typedef enum _NVAPI_D3D_RESOURCERENDERING_FLAG +{ + NVAPI_D3D_RR_FLAG_DEFAULTS = 0x00000000, //!< All bits set to 0 are defaults. + NVAPI_D3D_RR_FLAG_FORCE_DISCARD_CONTENT = 0x00000001, //!< (bit 0) The flag forces to discard previous content of the resource regardless of the NvApiHints_Sli_Disable_InterframeSync hint + NVAPI_D3D_RR_FLAG_FORCE_KEEP_CONTENT = 0x00000002, //!< (bit 1) The flag forces to respect previous content of the resource regardless of the NvApiHints_Sli_Disable_InterframeSync hint + NVAPI_D3D_RR_FLAG_MULTI_FRAME = 0x00000004 //!< (bit 2) The flag hints the driver that content will be used for many frames. If not specified then the driver assumes that content is used only on the next frame +} NVAPI_D3D_RESOURCERENDERING_FLAG; + +//! \ingroup dx +NVAPI_INTERFACE NvAPI_D3D_BeginResourceRendering(IUnknown *pDeviceOrContext, NVDX_ObjectHandle obj, NvU32 Flags); + +#endif //defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d11_h__) + +#if defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d11_h__) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D_EndResourceRendering +// +//! DESCRIPTION: This function tells the driver that the resource is done receiving updates. It must be used in combination with +//! NvAPI_D3D_BeginResourceRendering(). +//! The primary use of this function is allow the driver to initiate early inter-frame syncs of resources while running in AFR SLI mode. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 185 +//! +//! \param [in] pDev IDirect3DDevice9, ID3D10Device, ID3D11Device or ID3D11DeviceContext that is using the resource +//! \param [in] obj Previously obtained HV resource handle +//! \param [in] Flags Reserved, must be zero +// +//! \retval ::NVAPI_OK Function succeeded, if used properly and driver can initiate proper sync'ing of the resources. +//! \retval ::NVAPI_INVALID_ARGUMENT Bad argument(s) or invalid flag values +//! \retval ::NVAPI_INVALID_CALL Mismatched begin/end calls +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D_EndResourceRendering(IUnknown *pDeviceOrContext, NVDX_ObjectHandle obj, NvU32 Flags); +#endif //if defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d11_h__) + +#include"nvapi_lite_salend.h" +#ifdef __cplusplus +} +#endif +#pragma pack(pop) diff --git a/Source/ThirdParty/nvapi/nvapi_lite_stereo.h b/Source/ThirdParty/nvapi/nvapi_lite_stereo.h new file mode 100644 index 000000000..ebcfddfb0 --- /dev/null +++ b/Source/ThirdParty/nvapi/nvapi_lite_stereo.h @@ -0,0 +1,592 @@ +/*********************************************************************************************************\ +|* *| +|* SPDX-FileCopyrightText: Copyright (c) 2019-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. *| +|* SPDX-License-Identifier: MIT *| +|* *| +|* Permission is hereby granted, free of charge, to any person obtaining a *| +|* copy of this software and associated documentation files (the "Software"), *| +|* to deal in the Software without restriction, including without limitation *| +|* the rights to use, copy, modify, merge, publish, distribute, sublicense, *| +|* and/or sell copies of the Software, and to permit persons to whom the *| +|* Software is furnished to do so, subject to the following conditions: *| +|* *| +|* The above copyright notice and this permission notice shall be included in *| +|* all copies or substantial portions of the Software. *| +|* *| +|* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *| +|* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *| +|* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *| +|* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *| +|* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING *| +|* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER *| +|* DEALINGS IN THE SOFTWARE. *| +|* *| +|* *| +\*********************************************************************************************************/ + +#pragma once +#include"nvapi_lite_salstart.h" +#include"nvapi_lite_common.h" +#pragma pack(push,8) +#ifdef __cplusplus +extern "C" { +#endif +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Stereo_Enable +// +//! DESCRIPTION: This APU enables stereo mode in the registry. +//! Calls to this function affect the entire system. +//! If stereo is not enabled, then calls to functions that require that stereo is enabled have no effect, +//! and will return the appropriate error code. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 180 +//! +//! \retval ::NVAPI_OK Stereo is now enabled. +//! \retval ::NVAPI_API_NOT_INTIALIZED +//! \retval ::NVAPI_STEREO_NOT_INITIALIZED Stereo part of NVAPI not initialized. +//! \retval ::NVAPI_ERROR +//! +//! \ingroup stereoapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Stereo_Enable(void); +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Stereo_Disable +// +//! DESCRIPTION: This API disables stereo mode in the registry. +//! Calls to this function affect the entire system. +//! If stereo is not enabled, then calls to functions that require that stereo is enabled have no effect, +//! and will return the appropriate error code. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 180 +//! +//! \retval ::NVAPI_OK Stereo is now disabled. +//! \retval ::NVAPI_API_NOT_INTIALIZED +//! \retval ::NVAPI_STEREO_NOT_INITIALIZED Stereo part of NVAPI not initialized. +//! \retval ::NVAPI_ERROR +//! +//! \ingroup stereoapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Stereo_Disable(void); +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Stereo_IsEnabled +// +//! DESCRIPTION: This API checks if stereo mode is enabled in the registry. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 180 +//! +//! \param [out] pIsStereoEnabled Address where the result of the inquiry will be placed. +//! +//! \retval ::NVAPI_OK Check was sucessfully completed and result reflects current state of stereo availability. +//! \retval ::NVAPI_API_NOT_INTIALIZED +//! \retval ::NVAPI_STEREO_NOT_INITIALIZED Stereo part of NVAPI not initialized. +//! \retval ::NVAPI_ERROR +//! +//! \ingroup stereoapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Stereo_IsEnabled(NvU8 *pIsStereoEnabled); +#if defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d11_h__)|| defined(__d3d12_h__) + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Stereo_CreateHandleFromIUnknown +// +//! DESCRIPTION: This API creates a stereo handle that is used in subsequent calls related to a given device interface. +//! This must be called before any other NvAPI_Stereo_ function for that handle. +//! Multiple devices can be used at one time using multiple calls to this function (one per each device). +//! +//! HOW TO USE: After the Direct3D device is created, create the stereo handle. +//! On call success: +//! -# Use all other NvAPI_Stereo_ functions that have stereo handle as first parameter. +//! -# After the device interface that corresponds to the the stereo handle is destroyed, +//! the application should call NvAPI_DestroyStereoHandle() for that stereo handle. +//! +//! WHEN TO USE: After the stereo handle for the device interface is created via successfull call to the appropriate NvAPI_Stereo_CreateHandleFrom() function. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 180 +//! +//! \param [in] pDevice Pointer to IUnknown interface that is IDirect3DDevice9* in DX9, ID3D10Device*. +//! \param [out] pStereoHandle Pointer to the newly created stereo handle. +//! +//! \retval ::NVAPI_OK Stereo handle is created for given device interface. +//! \retval ::NVAPI_INVALID_ARGUMENT Provided device interface is invalid. +//! \retval ::NVAPI_API_NOT_INTIALIZED +//! \retval ::NVAPI_STEREO_NOT_INITIALIZED Stereo part of NVAPI not initialized. +//! \retval ::NVAPI_ERROR +//! +//! \ingroup stereoapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Stereo_CreateHandleFromIUnknown(IUnknown *pDevice, StereoHandle *pStereoHandle); + +#endif // defined(_D3D9_H_) || defined(__d3d10_h__) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Stereo_DestroyHandle +// +//! DESCRIPTION: This API destroys the stereo handle created with one of the NvAPI_Stereo_CreateHandleFrom() functions. +//! This should be called after the device corresponding to the handle has been destroyed. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 180 +//! +//! \param [in] stereoHandle Stereo handle that is to be destroyed. +//! +//! \retval ::NVAPI_OK Stereo handle is destroyed. +//! \retval ::NVAPI_API_NOT_INTIALIZED +//! \retval ::NVAPI_STEREO_NOT_INITIALIZED Stereo part of NVAPI not initialized. +//! \retval ::NVAPI_ERROR +//! +//! \ingroup stereoapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Stereo_DestroyHandle(StereoHandle stereoHandle); +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Stereo_Activate +// +//! DESCRIPTION: This API activates stereo for the device interface corresponding to the given stereo handle. +//! Activating stereo is possible only if stereo was enabled previously in the registry. +//! If stereo is not activated, then calls to functions that require that stereo is activated have no effect, +//! and will return the appropriate error code. +//! +//! WHEN TO USE: After the stereo handle for the device interface is created via successfull call to the appropriate NvAPI_Stereo_CreateHandleFrom() function. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 180 +//! +//! \param [in] stereoHandle Stereo handle corresponding to the device interface. +//! +//! \retval ::NVAPI_OK Stereo is turned on. +//! \retval ::NVAPI_STEREO_INVALID_DEVICE_INTERFACE Device interface is not valid. Create again, then attach again. +//! \retval ::NVAPI_API_NOT_INTIALIZED +//! \retval ::NVAPI_STEREO_NOT_INITIALIZED Stereo part of NVAPI not initialized. +//! \retval ::NVAPI_ERROR +//! +//! \ingroup stereoapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Stereo_Activate(StereoHandle stereoHandle); +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Stereo_Deactivate +// +//! DESCRIPTION: This API deactivates stereo for the given device interface. +//! If stereo is not activated, then calls to functions that require that stereo is activated have no effect, +//! and will return the appropriate error code. +//! +//! WHEN TO USE: After the stereo handle for the device interface is created via successfull call to the appropriate NvAPI_Stereo_CreateHandleFrom() function. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 180 +//! +//! \param [in] stereoHandle Stereo handle that corresponds to the device interface. +//! +//! \retval ::NVAPI_OK Stereo is turned off. +//! \retval ::NVAPI_STEREO_INVALID_DEVICE_INTERFACE Device interface is not valid. Create again, then attach again. +//! \retval ::NVAPI_API_NOT_INTIALIZED +//! \retval ::NVAPI_STEREO_NOT_INITIALIZED Stereo part of NVAPI not initialized. +//! \retval ::NVAPI_ERROR +//! +//! \ingroup stereoapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Stereo_Deactivate(StereoHandle stereoHandle); +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Stereo_IsActivated +// +//! DESCRIPTION: This API checks if stereo is activated for the given device interface. +//! +//! WHEN TO USE: After the stereo handle for the device interface is created via successfull call to the appropriate NvAPI_Stereo_CreateHandleFrom() function. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 180 +//! +//! \param [in] stereoHandle Stereo handle that corresponds to the device interface. +//! \param [in] pIsStereoOn Address where result of the inquiry will be placed. +//! +//! \retval ::NVAPI_OK - Check was sucessfully completed and result reflects current state of stereo (on/off). +//! \retval ::NVAPI_STEREO_INVALID_DEVICE_INTERFACE - Device interface is not valid. Create again, then attach again. +//! \retval ::NVAPI_API_NOT_INTIALIZED - NVAPI not initialized. +//! \retval ::NVAPI_STEREO_NOT_INITIALIZED - Stereo part of NVAPI not initialized. +//! \retval ::NVAPI_ERROR - Something is wrong (generic error). +//! +//! \ingroup stereoapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Stereo_IsActivated(StereoHandle stereoHandle, NvU8 *pIsStereoOn); +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Stereo_GetSeparation +// +//! DESCRIPTION: This API gets current separation value (in percents). +//! +//! WHEN TO USE: After the stereo handle for the device interface is created via successfull call to the appropriate NvAPI_Stereo_CreateHandleFrom() function. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 180 +//! +//! \param [in] stereoHandle Stereo handle that corresponds to the device interface. +//! \param [out] pSeparationPercentage Address of @c float type variable to store current separation percentage in. +//! +//! \retval ::NVAPI_OK Retrieval of separation percentage was successfull. +//! \retval ::NVAPI_STEREO_INVALID_DEVICE_INTERFACE Device interface is not valid. Create again, then attach again. +//! \retval ::NVAPI_API_NOT_INTIALIZED +//! \retval ::NVAPI_STEREO_NOT_INITIALIZED Stereo part of NVAPI not initialized. +//! \retval ::NVAPI_ERROR +//! +//! \ingroup stereoapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Stereo_GetSeparation(StereoHandle stereoHandle, float *pSeparationPercentage); +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Stereo_SetSeparation +// +//! DESCRIPTION: This API sets separation to given percentage. +//! +//! WHEN TO USE: After the stereo handle for the device interface is created via successfull call to appropriate NvAPI_Stereo_CreateHandleFrom() function. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 180 +//! +//! \param [in] stereoHandle Stereo handle that corresponds to the device interface. +//! \param [in] newSeparationPercentage New value for separation percentage. +//! +//! \retval ::NVAPI_OK Setting of separation percentage was successfull. +//! \retval ::NVAPI_STEREO_INVALID_DEVICE_INTERFACE Device interface is not valid. Create again, then attach again. +//! \retval ::NVAPI_API_NOT_INTIALIZED NVAPI not initialized. +//! \retval ::NVAPI_STEREO_NOT_INITIALIZED Stereo part of NVAPI not initialized. +//! \retval ::NVAPI_STEREO_PARAMETER_OUT_OF_RANGE Given separation percentage is out of [0..100] range. +//! \retval ::NVAPI_ERROR +//! +//! \ingroup stereoapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Stereo_SetSeparation(StereoHandle stereoHandle, float newSeparationPercentage); +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Stereo_GetConvergence +// +//! DESCRIPTION: This API gets the current convergence value. +//! +//! WHEN TO USE: After the stereo handle for the device interface is created via successfull call to the appropriate NvAPI_Stereo_CreateHandleFrom() function. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 180 +//! +//! \param [in] stereoHandle Stereo handle that corresponds to the device interface. +//! \param [out] pConvergence Address of @c float type variable to store current convergence value in. +//! +//! \retval ::NVAPI_OK Retrieval of convergence value was successfull. +//! \retval ::NVAPI_STEREO_INVALID_DEVICE_INTERFACE Device interface is not valid. Create again, then attach again. +//! \retval ::NVAPI_API_NOT_INTIALIZED +//! \retval ::NVAPI_STEREO_NOT_INITIALIZED Stereo part of NVAPI not initialized. +//! \retval ::NVAPI_ERROR +//! +//! \ingroup stereoapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Stereo_GetConvergence(StereoHandle stereoHandle, float *pConvergence); +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Stereo_SetConvergence +// +//! DESCRIPTION: This API sets convergence to the given value. +//! +//! WHEN TO USE: After the stereo handle for the device interface is created via successfull call to the appropriate NvAPI_Stereo_CreateHandleFrom() function. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 180 +//! +//! \param [in] stereoHandle Stereo handle that corresponds to the device interface. +//! \param [in] newConvergence New value for convergence. +//! +//! \retval ::NVAPI_OK Setting of convergence value was successfull. +//! \retval ::NVAPI_STEREO_INVALID_DEVICE_INTERFACE Device interface is not valid. Create again, then attach again. +//! \retval ::NVAPI_API_NOT_INTIALIZED +//! \retval ::NVAPI_STEREO_NOT_INITIALIZED Stereo part of NVAPI not initialized. +//! \retval ::NVAPI_ERROR +//! +//! \ingroup stereoapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Stereo_SetConvergence(StereoHandle stereoHandle, float newConvergence); +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Stereo_SetActiveEye +// +//! \fn NvAPI_Stereo_SetActiveEye(StereoHandle hStereoHandle, NV_STEREO_ACTIVE_EYE StereoEye); +//! DESCRIPTION: This API sets the back buffer to left or right in Direct stereo mode. +//! +//! HOW TO USE: After the stereo handle for device interface is created via successfull call to appropriate +//! NvAPI_Stereo_CreateHandleFrom function. +//! +//! \since Release: 285 +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] stereoHandle Stereo handle that corresponds to the device interface. +//! \param [in] StereoEye Defines active eye in Direct stereo mode +//! +//! \retval ::NVAPI_OK - Active eye is set. +//! \retval ::NVAPI_STEREO_INVALID_DEVICE_INTERFACE - Device interface is not valid. Create again, then attach again. +//! \retval ::NVAPI_API_NOT_INTIALIZED - NVAPI not initialized. +//! \retval ::NVAPI_STEREO_NOT_INITIALIZED - Stereo part of NVAPI not initialized. +//! \retval ::NVAPI_INVALID_ARGUMENT - StereoEye parameter has not allowed value. +//! \retval ::NVAPI_SET_NOT_ALLOWED - Current stereo mode is not Direct +//! \retval ::NVAPI_ERROR - Something is wrong (generic error). +// +/////////////////////////////////////////////////////////////////////////////// + +//! \ingroup stereoapi +typedef enum _NV_StereoActiveEye +{ + NVAPI_STEREO_EYE_RIGHT = 1, + NVAPI_STEREO_EYE_LEFT = 2, + NVAPI_STEREO_EYE_MONO = 3, +} NV_STEREO_ACTIVE_EYE; + +//! \ingroup stereoapi +NVAPI_INTERFACE NvAPI_Stereo_SetActiveEye(StereoHandle hStereoHandle, NV_STEREO_ACTIVE_EYE StereoEye); +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Stereo_SetDriverMode +// +//! \fn NvAPI_Stereo_SetDriverMode( NV_STEREO_DRIVER_MODE mode ); +//! DESCRIPTION: This API sets the 3D stereo driver mode: Direct or Automatic +//! +//! HOW TO USE: This API must be called before the device is created. +//! Applies to DirectX 9 and higher. +//! +//! \since Release: 285 +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] mode Defines the 3D stereo driver mode: Direct or Automatic +//! +//! \retval ::NVAPI_OK Active eye is set. +//! \retval ::NVAPI_API_NOT_INTIALIZED NVAPI not initialized. +//! \retval ::NVAPI_STEREO_NOT_INITIALIZED Stereo part of NVAPI not initialized. +//! \retval ::NVAPI_INVALID_ARGUMENT mode parameter has not allowed value. +//! \retval ::NVAPI_ERROR Something is wrong (generic error). +// +/////////////////////////////////////////////////////////////////////////////// + +//! \ingroup stereoapi +typedef enum _NV_StereoDriverMode +{ + NVAPI_STEREO_DRIVER_MODE_AUTOMATIC = 0, + NVAPI_STEREO_DRIVER_MODE_DIRECT = 2, +} NV_STEREO_DRIVER_MODE; + +//! \ingroup stereoapi +NVAPI_INTERFACE NvAPI_Stereo_SetDriverMode( NV_STEREO_DRIVER_MODE mode ); + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Stereo_GetEyeSeparation +// +//! DESCRIPTION: This API returns eye separation as a ratio of /. +//! +//! HOW TO USE: After the stereo handle for device interface is created via successfull call to appropriate API. Applies only to DirectX 9 and up. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [in] stereoHandle Stereo handle that corresponds to the device interface. +//! \param [out] pSeparation Eye separation. +//! +//! \retval ::NVAPI_OK Active eye is set. +//! \retval ::NVAPI_STEREO_INVALID_DEVICE_INTERFACE Device interface is not valid. Create again, then attach again. +//! \retval ::NVAPI_API_NOT_INTIALIZED NVAPI not initialized. +//! \retval ::NVAPI_STEREO_NOT_INITIALIZED Stereo part of NVAPI not initialized. +//! \retval ::NVAPI_ERROR (generic error). +//! +//! \ingroup stereoapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Stereo_GetEyeSeparation(StereoHandle hStereoHandle, float *pSeparation ); +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Stereo_IsWindowedModeSupported +// +//! DESCRIPTION: This API returns availability of windowed mode stereo +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \param [out] bSupported(OUT) != 0 - supported, \n +//! == 0 - is not supported +//! +//! +//! \retval ::NVAPI_OK Retrieval of frustum adjust mode was successfull. +//! \retval ::NVAPI_API_NOT_INTIALIZED NVAPI not initialized. +//! \retval ::NVAPI_STEREO_NOT_INITIALIZED Stereo part of NVAPI not initialized. +//! \retval ::NVAPI_ERROR Something is wrong (generic error). +//! +//! \ingroup stereoapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Stereo_IsWindowedModeSupported(NvU8* bSupported); +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Stereo_SetSurfaceCreationMode +// +//! \function NvAPI_Stereo_SetSurfaceCreationMode(StereoHandle hStereoHandle, NVAPI_STEREO_SURFACECREATEMODE creationMode) +//! \param [in] hStereoHandle Stereo handle that corresponds to the device interface. +//! \param [in] creationMode New surface creation mode for this device interface. +//! +//! \since Release: 285 +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! DESCRIPTION: This API sets surface creation mode for this device interface. +//! +//! WHEN TO USE: After the stereo handle for device interface is created via successful call to appropriate NvAPI_Stereo_CreateHandleFrom function. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! There are no return error codes with specific meaning for this API. +//! +/////////////////////////////////////////////////////////////////////////////// + +//! \ingroup stereoapi +typedef enum _NVAPI_STEREO_SURFACECREATEMODE +{ + NVAPI_STEREO_SURFACECREATEMODE_AUTO, //!< Use driver registry profile settings for surface creation mode. + NVAPI_STEREO_SURFACECREATEMODE_FORCESTEREO, //!< Always create stereo surfaces. + NVAPI_STEREO_SURFACECREATEMODE_FORCEMONO //!< Always create mono surfaces. +} NVAPI_STEREO_SURFACECREATEMODE; + +//! \ingroup stereoapi +NVAPI_INTERFACE NvAPI_Stereo_SetSurfaceCreationMode(__in StereoHandle hStereoHandle, __in NVAPI_STEREO_SURFACECREATEMODE creationMode); +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Stereo_GetSurfaceCreationMode +// +//! \function NvAPI_Stereo_GetSurfaceCreationMode(StereoHandle hStereoHandle, NVAPI_STEREO_SURFACECREATEMODE* pCreationMode) +//! \param [in] hStereoHandle Stereo handle that corresponds to the device interface. +//! \param [out] pCreationMode The current creation mode for this device interface. +//! +//! \since Release: 295 +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! DESCRIPTION: This API gets surface creation mode for this device interface. +//! +//! WHEN TO USE: After the stereo handle for device interface is created via successful call to appropriate NvAPI_Stereo_CreateHandleFrom function. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! There are no return error codes with specific meaning for this API. +//! +/////////////////////////////////////////////////////////////////////////////// + +//! \ingroup stereoapi +NVAPI_INTERFACE NvAPI_Stereo_GetSurfaceCreationMode(__in StereoHandle hStereoHandle, __in NVAPI_STEREO_SURFACECREATEMODE* pCreationMode); +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Stereo_Debug_WasLastDrawStereoized +// +//! \param [in] hStereoHandle Stereo handle that corresponds to the device interface. +//! \param [out] pWasStereoized Address where result of the inquiry will be placed. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! DESCRIPTION: This API checks if the last draw call was stereoized. It is a very expensive to call and should be used for debugging purpose *only*. +//! +//! WHEN TO USE: After the stereo handle for device interface is created via successful call to appropriate NvAPI_Stereo_CreateHandleFrom function. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! There are no return error codes with specific meaning for this API. +//! +//! \ingroup stereoapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Stereo_Debug_WasLastDrawStereoized(__in StereoHandle hStereoHandle, __out NvU8 *pWasStereoized); +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Stereo_SetDefaultProfile +// +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! DESCRIPTION: This API defines the stereo profile used by the driver in case the application has no associated profile. +//! +//! WHEN TO USE: To take effect, this API must be called before D3D device is created. Calling once a device has been created will not affect the current device. +//! +//! \param [in] szProfileName Default profile name. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! Error codes specific to this API are described below. +//! +//! \retval NVAPI_SUCCESS - Default stereo profile name has been copied into szProfileName. +//! \retval NVAPI_INVALID_ARGUMENT - szProfileName == NULL. +//! \retval NVAPI_DEFAULT_STEREO_PROFILE_DOES_NOT_EXIST - Default stereo profile does not exist +//! +//! \ingroup stereoapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Stereo_SetDefaultProfile(__in const char* szProfileName); +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Stereo_GetDefaultProfile +// +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! DESCRIPTION: This API retrieves the current default stereo profile. +//! +//! After call cbSizeOut contain 0 if default profile is not set required buffer size cbSizeOut. +//! To get needed buffer size this function can be called with szProfileName==0 and cbSizeIn == 0. +//! +//! WHEN TO USE: This API can be called at any time. +//! +//! +//! \param [in] cbSizeIn Size of buffer allocated for default stereo profile name. +//! \param [out] szProfileName Default stereo profile name. +//! \param [out] pcbSizeOut Required buffer size. +//! # ==0 - there is no default stereo profile name currently set +//! # !=0 - size of buffer required for currently set default stereo profile name including trailing '0'. +//! +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! Error codes specific to this API are described below. +//! +//! \retval NVAPI_SUCCESS - Default stereo profile name has been copied into szProfileName. +//! \retval NVAPI_DEFAULT_STEREO_PROFILE_IS_NOT_DEFINED - There is no default stereo profile set at this time. +//! \retval NVAPI_INVALID_ARGUMENT - pcbSizeOut == 0 or cbSizeIn >= *pcbSizeOut && szProfileName == 0 +//! \retval NVAPI_INSUFFICIENT_BUFFER - cbSizeIn < *pcbSizeOut +//! +//! \ingroup stereoapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Stereo_GetDefaultProfile( __in NvU32 cbSizeIn, __out_bcount_part_opt(cbSizeIn, *pcbSizeOut) char* szProfileName, __out NvU32 *pcbSizeOut); + +#include"nvapi_lite_salend.h" +#ifdef __cplusplus +} +#endif +#pragma pack(pop) diff --git a/Source/ThirdParty/nvapi/nvapi_lite_surround.h b/Source/ThirdParty/nvapi/nvapi_lite_surround.h new file mode 100644 index 000000000..bb58bfefb --- /dev/null +++ b/Source/ThirdParty/nvapi/nvapi_lite_surround.h @@ -0,0 +1,95 @@ +/*********************************************************************************************************\ +|* *| +|* SPDX-FileCopyrightText: Copyright (c) 2019-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. *| +|* SPDX-License-Identifier: MIT *| +|* *| +|* Permission is hereby granted, free of charge, to any person obtaining a *| +|* copy of this software and associated documentation files (the "Software"), *| +|* to deal in the Software without restriction, including without limitation *| +|* the rights to use, copy, modify, merge, publish, distribute, sublicense, *| +|* and/or sell copies of the Software, and to permit persons to whom the *| +|* Software is furnished to do so, subject to the following conditions: *| +|* *| +|* The above copyright notice and this permission notice shall be included in *| +|* all copies or substantial portions of the Software. *| +|* *| +|* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *| +|* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *| +|* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *| +|* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *| +|* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING *| +|* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER *| +|* DEALINGS IN THE SOFTWARE. *| +|* *| +|* *| +\*********************************************************************************************************/ + +#pragma once +#include"nvapi_lite_salstart.h" +#include"nvapi_lite_common.h" +#pragma pack(push,8) +#ifdef __cplusplus +extern "C" { +#endif +//! SUPPORTED OS: Windows 10 and higher +//! +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_DISP_GetGDIPrimaryDisplayId +// +//! DESCRIPTION: This API returns the Display ID of the GDI Primary. +//! +//! \param [out] displayId Display ID of the GDI Primary display. +//! +//! \retval ::NVAPI_OK: Capabilties have been returned. +//! \retval ::NVAPI_NVIDIA_DEVICE_NOT_FOUND: GDI Primary not on an NVIDIA GPU. +//! \retval ::NVAPI_INVALID_ARGUMENT: One or more args passed in are invalid. +//! \retval ::NVAPI_API_NOT_INTIALIZED: The NvAPI API needs to be initialized first +//! \retval ::NVAPI_NO_IMPLEMENTATION: This entrypoint not available +//! \retval ::NVAPI_ERROR: Miscellaneous error occurred +//! +//! \ingroup dispcontrol +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_DISP_GetGDIPrimaryDisplayId(NvU32* displayId); +#define NV_MOSAIC_MAX_DISPLAYS (64) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_Mosaic_GetDisplayViewportsByResolution +// +//! DESCRIPTION: This API returns the viewports that would be applied on +//! the requested display. +//! +//! \param [in] displayId Display ID of a single display in the active +//! mosaic topology to query. +//! \param [in] srcWidth Width of full display topology. If both +//! width and height are 0, the current +//! resolution is used. +//! \param [in] srcHeight Height of full display topology. If both +//! width and height are 0, the current +//! resolution is used. +//! \param [out] viewports Array of NV_RECT viewports. +//! SUPPORTED OS: Windows 10 and higher +//! +//! If the requested resolution is a single-wide +//! resolution, only viewports[0] will +//! contain the viewport details, regardless +//! of which display is driving the display. +//! \param [out] bezelCorrected Returns 1 if the requested resolution is +//! bezel corrected. May be NULL. +//! +//! \retval ::NVAPI_OK Capabilties have been returned. +//! \retval ::NVAPI_INVALID_ARGUMENT One or more args passed in are invalid. +//! \retval ::NVAPI_API_NOT_INTIALIZED The NvAPI API needs to be initialized first +//! \retval ::NVAPI_MOSAIC_NOT_ACTIVE The display does not belong to an active Mosaic Topology +//! \retval ::NVAPI_NO_IMPLEMENTATION This entrypoint not available +//! \retval ::NVAPI_ERROR Miscellaneous error occurred +//! +//! \ingroup mosaicapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Mosaic_GetDisplayViewportsByResolution(NvU32 displayId, NvU32 srcWidth, NvU32 srcHeight, NV_RECT viewports[NV_MOSAIC_MAX_DISPLAYS], NvU8* bezelCorrected); + +#include"nvapi_lite_salend.h" +#ifdef __cplusplus +} +#endif +#pragma pack(pop) diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/nvapi.cs b/Source/Tools/Flax.Build/Deps/Dependencies/nvapi.cs new file mode 100644 index 000000000..68ef2eaf7 --- /dev/null +++ b/Source/Tools/Flax.Build/Deps/Dependencies/nvapi.cs @@ -0,0 +1,57 @@ +// Copyright (c) Wojciech Figat. All rights reserved. + +using Flax.Build; +using System.IO; + +namespace Flax.Deps.Dependencies +{ + /// + /// NVAPI is NVIDIA's core software development kit that allows direct access to NVIDIA GPUs and drivers on supported platforms. + /// https://github.com/NVIDIA/nvapi + /// + /// + class nvapi : Dependency + { + /// + public override TargetPlatform[] Platforms + { + get => new[] { TargetPlatform.Windows }; + } + + /// + public override void Build(BuildOptions options) + { + var root = options.IntermediateFolder; + var moduleFolder = Path.Combine(options.ThirdPartyFolder, "nvapi"); + + // Get the source + CloneGitRepoFast(root, "https://github.com/NVIDIA/nvapi.git"); + + // Copy files + foreach (var platform in options.Platforms) + { + BuildStarted(platform); + var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); + Utilities.FileCopy(Path.Combine(root, "amd64/nvapi64.lib"), Path.Combine(depsFolder, "nvapi64.lib")); + } + + // Copy license and header files + Utilities.FileCopy(Path.Combine(root, "License.txt"), Path.Combine(moduleFolder, "LICENSE.txt")); + var files = new[] + { + "nvHLSLExtns.h", + "nvHLSLExtnsInternal.h", + "nvapi.h", + "nvapi_lite_common.h", + "nvapi_lite_d3dext.h", + "nvapi_lite_salstart.h", + "nvapi_lite_salend.h", + "nvapi_lite_sli.h", + "nvapi_lite_stereo.h", + "nvapi_lite_surround.h", + }; + foreach (var file in files) + Utilities.FileCopy(Path.Combine(root, file), Path.Combine(moduleFolder, file)); + } + } +} From 4fd6343fb9b0d3c071dc12a50dc44d80a2dd681e Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 10 Aug 2025 16:04:24 +0200 Subject: [PATCH 177/211] Fix missing memory barrier on particle indirect args building to ensure buffer copies are done afetr whole buffer update ends --- Source/Engine/Particles/Particles.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index 98fc230c6..46649f82a 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -813,6 +813,9 @@ void DrawEmittersGPU(RenderContextBatch& renderContextBatch) // Upload default arguments context->UpdateBuffer(GPUIndirectArgsBuffer, indirectArgsMemory, indirectArgsOffset); + // Wait for whole buffer write end before submitting buffer copies + pass.MemoryBarrier(); + // Copy particle counts into draw commands indirectArgsOffset = 0; for (GPUEmitterDraw& draw : GPUEmitterDraws) From c1c806490ff51b1e760eb38ce153936283377d04 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 10 Aug 2025 16:05:18 +0200 Subject: [PATCH 178/211] Add `nvapi` lib to D3D11 for efficient UAV writes overlaps on NVIDIA GPUs --- Source/Engine/Graphics/GPUPass.h | 6 ++++ .../DirectX/DX11/GPUContextDX11.cpp | 21 +++++++++++++ .../DirectX/DX11/GPUContextDX11.h | 1 + .../DirectX/DX11/GPUDeviceDX11.cpp | 30 ++++++++++++++++++- .../DirectX/DX11/GraphicsDeviceDX11.Build.cs | 2 ++ 5 files changed, 59 insertions(+), 1 deletion(-) diff --git a/Source/Engine/Graphics/GPUPass.h b/Source/Engine/Graphics/GPUPass.h index 59f8608e2..5a0520ec0 100644 --- a/Source/Engine/Graphics/GPUPass.h +++ b/Source/Engine/Graphics/GPUPass.h @@ -46,6 +46,12 @@ struct FLAXENGINE_API GPUMemoryPass : GPUPass { Context->MemoryBarrier(); } + + // Inserts a global memory barrier on data copies between resources. Use to ensure all writes and before submitting another commands. + void MemoryBarrier() + { + Context->MemoryBarrier(); + } }; /// diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp index 3d94cdd96..6d0957014 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp @@ -14,6 +14,10 @@ #include "Engine/Core/Math/Viewport.h" #include "Engine/Core/Math/Rectangle.h" #include "Engine/Profiler/RenderStats.h" +#if COMPILE_WITH_NVAPI +#include +extern bool EnableNvapi; +#endif #define DX11_CLEAR_SR_ON_STAGE_DISABLE 0 @@ -903,6 +907,23 @@ void GPUContextDX11::CopySubresource(GPUResource* dstResource, uint32 dstSubreso _context->CopySubresourceRegion(dstResourceDX11->GetResource(), dstSubresource, 0, 0, 0, srcResourceDX11->GetResource(), srcSubresource, nullptr); } +void GPUContextDX11::OverlapUA(bool end) +{ + // DirectX 11 doesn't support UAV barriers control but custom GPU driver extensions allow to manually specify overlap sections. +#if COMPILE_WITH_NVAPI + if (EnableNvapi) + { + if (end) + NvAPI_D3D11_EndUAVOverlap(_context); + else + NvAPI_D3D11_BeginUAVOverlap(_context); + return; + } +#endif + // TODO: add support for AMD extensions to overlap UAV writes (agsDriverExtensionsDX11_BeginUAVOverlap/agsDriverExtensionsDX11_EndUAVOverlap) + // TODO: add support for Intel extensions to overlap UAV writes (INTC_D3D11_BeginUAVOverlap/INTC_D3D11_EndUAVOverlap) +} + void GPUContextDX11::flushSRVs() { #define FLUSH_STAGE(STAGE) if (Current##STAGE) _context->STAGE##SetShaderResources(0, ARRAY_COUNT(_srHandles), _srHandles) diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h index 6d1877534..ccdac0d70 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h @@ -167,6 +167,7 @@ public: void CopyCounter(GPUBuffer* dstBuffer, uint32 dstOffset, GPUBuffer* srcBuffer) override; void CopyResource(GPUResource* dstResource, GPUResource* srcResource) override; void CopySubresource(GPUResource* dstResource, uint32 dstSubresource, GPUResource* srcResource, uint32 srcSubresource) override; + void OverlapUA(bool end) override; }; #endif diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp index 411d9dd92..ca479a7c0 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp @@ -20,7 +20,10 @@ #include "Engine/Graphics/PixelFormatExtensions.h" #include "Engine/Engine/CommandLine.h" #include "Engine/Profiler/ProfilerMemory.h" - +#if COMPILE_WITH_NVAPI +#include +bool EnableNvapi = false; +#endif #if !USE_EDITOR && PLATFORM_WINDOWS #include "Engine/Core/Config/PlatformSettings.h" #endif @@ -420,6 +423,31 @@ bool GPUDeviceDX11::Init() { HRESULT result; + // Driver extensions +#if COMPILE_WITH_NVAPI + if (_adapter->IsNVIDIA()) + { + NvAPI_Status status = NvAPI_Initialize(); + if (status == NVAPI_OK) + { + EnableNvapi = true; + + NvU32 driverVersion; + NvAPI_ShortString buildBranch(""); + if (NvAPI_SYS_GetDriverAndBranchVersion(&driverVersion, buildBranch) == NVAPI_OK) + { + LOG(Info, "NvApi driver version: {}, {}", driverVersion, StringAsUTF16<>(buildBranch).Get()); + } + } + else + { + NvAPI_ShortString desc; + NvAPI_GetErrorMessage(status, desc); + LOG(Warning, "NvAPI_Initialize failed with result {} ({})", (int32)status, String(desc)); + } + } +#endif + // Get DXGI adapter ComPtr adapter; if (_factoryDXGI->EnumAdapters(_adapter->Index, &adapter) == DXGI_ERROR_NOT_FOUND || adapter == nullptr) diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GraphicsDeviceDX11.Build.cs b/Source/Engine/GraphicsDevice/DirectX/DX11/GraphicsDeviceDX11.Build.cs index a5f288594..b8ffd2629 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GraphicsDeviceDX11.Build.cs +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GraphicsDeviceDX11.Build.cs @@ -14,5 +14,7 @@ public class GraphicsDeviceDX11 : GraphicsDeviceBaseModule options.PublicDefinitions.Add("GRAPHICS_API_DIRECTX11"); options.OutputFiles.Add("d3d11.lib"); + if (nvapi.Use(options)) + options.PrivateDependencies.Add("nvapi"); } } From bc0e1f81e7a6b67d8a0631c4473a996883f032d3 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 10 Aug 2025 17:16:09 +0200 Subject: [PATCH 179/211] Add `AGS` third party module AGS 6.3 --- .../Binaries/ThirdParty/x64/amd_ags_x64.dll | 3 + .../Binaries/ThirdParty/x64/amd_ags_x64.lib | 3 + Source/ThirdParty/AGS/AGS.Build.cs | 51 + Source/ThirdParty/AGS/LICENSE.txt | 19 + .../AGS/ags_shader_intrinsics_dx11.hlsl | 3599 +++++++++++++++ .../AGS/ags_shader_intrinsics_dx12.hlsl | 3958 +++++++++++++++++ Source/ThirdParty/AGS/amd_ags.h | 1394 ++++++ .../nvapi/{LICENSE.txt => License.txt} | 0 .../Tools/Flax.Build/Deps/Dependencies/AGS.cs | 46 + .../Flax.Build/Deps/Dependencies/nvapi.cs | 2 +- 10 files changed, 9074 insertions(+), 1 deletion(-) create mode 100644 Source/Platforms/Windows/Binaries/ThirdParty/x64/amd_ags_x64.dll create mode 100644 Source/Platforms/Windows/Binaries/ThirdParty/x64/amd_ags_x64.lib create mode 100644 Source/ThirdParty/AGS/AGS.Build.cs create mode 100644 Source/ThirdParty/AGS/LICENSE.txt create mode 100644 Source/ThirdParty/AGS/ags_shader_intrinsics_dx11.hlsl create mode 100644 Source/ThirdParty/AGS/ags_shader_intrinsics_dx12.hlsl create mode 100644 Source/ThirdParty/AGS/amd_ags.h rename Source/ThirdParty/nvapi/{LICENSE.txt => License.txt} (100%) create mode 100644 Source/Tools/Flax.Build/Deps/Dependencies/AGS.cs diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/amd_ags_x64.dll b/Source/Platforms/Windows/Binaries/ThirdParty/x64/amd_ags_x64.dll new file mode 100644 index 000000000..acbf0f736 --- /dev/null +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/amd_ags_x64.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b27b070ca39dc37984fb3dde0187515d36094e72cb881d7a99bd1055befd8da2 +size 179408 diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/amd_ags_x64.lib b/Source/Platforms/Windows/Binaries/ThirdParty/x64/amd_ags_x64.lib new file mode 100644 index 000000000..166cc964a --- /dev/null +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/amd_ags_x64.lib @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b10497284289dcef89117fbcc93cff4ad53f9d9fdd80a31e3427ab62636001e +size 13506 diff --git a/Source/ThirdParty/AGS/AGS.Build.cs b/Source/ThirdParty/AGS/AGS.Build.cs new file mode 100644 index 000000000..5ed6a2800 --- /dev/null +++ b/Source/ThirdParty/AGS/AGS.Build.cs @@ -0,0 +1,51 @@ +// Copyright (c) Wojciech Figat. All rights reserved. + +using System.IO; +using Flax.Build; +using Flax.Build.NativeCpp; + +/// +/// https://github.com/GPUOpen-LibrariesAndSDKs/AGS_SDK +/// +public class AGS : DepsModule +{ + public static bool Use(BuildOptions options) + { + return options.Platform.Target == TargetPlatform.Windows && options.Architecture == TargetArchitecture.x64; + } + + /// + public override void Init() + { + base.Init(); + + LicenseType = LicenseTypes.MIT; + LicenseFilePath = "LICENSE.txt"; + + // Merge third-party modules into engine binary + BinaryModuleName = "FlaxEngine"; + } + + /// + public override void Setup(BuildOptions options) + { + base.Setup(options); + + var depsRoot = options.DepsFolder; + options.PublicDefinitions.Add("COMPILE_WITH_AGS"); + switch (options.Platform.Target) + { + case TargetPlatform.Windows: + switch (options.Architecture) + { + case TargetArchitecture.x64: + options.OutputFiles.Add(Path.Combine(depsRoot, "amd_ags_x64.lib")); + options.OptionalDependencyFiles.Add(Path.Combine(depsRoot, "amd_ags_x64.dll")); + break; + default: throw new InvalidArchitectureException(options.Architecture); + } + break; + default: throw new InvalidPlatformException(options.Platform.Target); + } + } +} diff --git a/Source/ThirdParty/AGS/LICENSE.txt b/Source/ThirdParty/AGS/LICENSE.txt new file mode 100644 index 000000000..d4e81a71c --- /dev/null +++ b/Source/ThirdParty/AGS/LICENSE.txt @@ -0,0 +1,19 @@ +Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/Source/ThirdParty/AGS/ags_shader_intrinsics_dx11.hlsl b/Source/ThirdParty/AGS/ags_shader_intrinsics_dx11.hlsl new file mode 100644 index 000000000..e3c7ad358 --- /dev/null +++ b/Source/ThirdParty/AGS/ags_shader_intrinsics_dx11.hlsl @@ -0,0 +1,3599 @@ +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +/** +************************************************************************************************************* +* @file ags_shader_intrinsics_dx11.hlsl +* +* @brief +* AMD D3D Shader Intrinsics API hlsl file. +* This include file contains the shader intrinsics definitions (structures, enums, constant) +* and HLSL shader intrinsics functions. +* +* @version 2.3 +* +************************************************************************************************************* +*/ + +#ifndef _AMDDXEXTSHADERINTRINSICS_HLSL_ +#define _AMDDXEXTSHADERINTRINSICS_HLSL_ + +/** +************************************************************************************************************* +* Definitions to construct the intrinsic instruction composed of an opcode and optional immediate data. +************************************************************************************************************* +*/ +#define AmdDxExtShaderIntrinsics_MagicCodeShift 28 +#define AmdDxExtShaderIntrinsics_MagicCodeMask 0xf +#define AmdDxExtShaderIntrinsics_OpcodePhaseShift 24 +#define AmdDxExtShaderIntrinsics_OpcodePhaseMask 0x3 +#define AmdDxExtShaderIntrinsics_DataShift 8 +#define AmdDxExtShaderIntrinsics_DataMask 0xffff +#define AmdDxExtShaderIntrinsics_OpcodeShift 0 +#define AmdDxExtShaderIntrinsics_OpcodeMask 0xff + +#define AmdDxExtShaderIntrinsics_MagicCode 0x5 + + +/** +************************************************************************************************************* +* Intrinsic opcodes. +************************************************************************************************************* +*/ +#define AmdDxExtShaderIntrinsicsOpcode_Readfirstlane 0x01 +#define AmdDxExtShaderIntrinsicsOpcode_Readlane 0x02 +#define AmdDxExtShaderIntrinsicsOpcode_LaneId 0x03 +#define AmdDxExtShaderIntrinsicsOpcode_Swizzle 0x04 +#define AmdDxExtShaderIntrinsicsOpcode_Ballot 0x05 +#define AmdDxExtShaderIntrinsicsOpcode_MBCnt 0x06 +#define AmdDxExtShaderIntrinsicsOpcode_Min3U 0x08 +#define AmdDxExtShaderIntrinsicsOpcode_Min3F 0x09 +#define AmdDxExtShaderIntrinsicsOpcode_Med3U 0x0a +#define AmdDxExtShaderIntrinsicsOpcode_Med3F 0x0b +#define AmdDxExtShaderIntrinsicsOpcode_Max3U 0x0c +#define AmdDxExtShaderIntrinsicsOpcode_Max3F 0x0d +#define AmdDxExtShaderIntrinsicsOpcode_BaryCoord 0x0e +#define AmdDxExtShaderIntrinsicsOpcode_VtxParam 0x0f +#define AmdDxExtShaderIntrinsicsOpcode_ViewportIndex 0x10 +#define AmdDxExtShaderIntrinsicsOpcode_RtArraySlice 0x11 +#define AmdDxExtShaderIntrinsicsOpcode_WaveReduce 0x12 +#define AmdDxExtShaderIntrinsicsOpcode_WaveScan 0x13 +#define AmdDxExtShaderIntrinsicsOpcode_Reserved1 0x14 +#define AmdDxExtShaderIntrinsicsOpcode_Reserved2 0x15 +#define AmdDxExtShaderIntrinsicsOpcode_Reserved3 0x16 +#define AmdDxExtShaderIntrinsicsOpcode_DrawIndex 0x17 +#define AmdDxExtShaderIntrinsicsOpcode_AtomicU64 0x18 +#define AmdDxExtShaderIntrinsicsOpcode_GetWaveSize 0x19 +#define AmdDxExtShaderIntrinsicsOpcode_BaseInstance 0x1a +#define AmdDxExtShaderIntrinsicsOpcode_BaseVertex 0x1b + + +/** +************************************************************************************************************* +* Intrinsic opcode phases. +************************************************************************************************************* +*/ +#define AmdDxExtShaderIntrinsicsOpcodePhase_0 0x0 +#define AmdDxExtShaderIntrinsicsOpcodePhase_1 0x1 +#define AmdDxExtShaderIntrinsicsOpcodePhase_2 0x2 +#define AmdDxExtShaderIntrinsicsOpcodePhase_3 0x3 + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsicsSwizzle defines for common swizzles. Can be used as the operation parameter for +* the AmdDxExtShaderIntrinsics_Swizzle intrinsic. +************************************************************************************************************* +*/ +#define AmdDxExtShaderIntrinsicsSwizzle_SwapX1 0x041f +#define AmdDxExtShaderIntrinsicsSwizzle_SwapX2 0x081f +#define AmdDxExtShaderIntrinsicsSwizzle_SwapX4 0x101f +#define AmdDxExtShaderIntrinsicsSwizzle_SwapX8 0x201f +#define AmdDxExtShaderIntrinsicsSwizzle_SwapX16 0x401f +#define AmdDxExtShaderIntrinsicsSwizzle_ReverseX2 0x041f +#define AmdDxExtShaderIntrinsicsSwizzle_ReverseX4 0x0c1f +#define AmdDxExtShaderIntrinsicsSwizzle_ReverseX8 0x1c1f +#define AmdDxExtShaderIntrinsicsSwizzle_ReverseX16 0x3c1f +#define AmdDxExtShaderIntrinsicsSwizzle_ReverseX32 0x7c1f +#define AmdDxExtShaderIntrinsicsSwizzle_BCastX2 0x003e +#define AmdDxExtShaderIntrinsicsSwizzle_BCastX4 0x003c +#define AmdDxExtShaderIntrinsicsSwizzle_BCastX8 0x0038 +#define AmdDxExtShaderIntrinsicsSwizzle_BCastX16 0x0030 +#define AmdDxExtShaderIntrinsicsSwizzle_BCastX32 0x0020 + + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsicsBarycentric defines for barycentric interpolation mode. To be used with +* AmdDxExtShaderIntrinsicsOpcode_IjBarycentricCoords to specify the interpolation mode. +************************************************************************************************************* +*/ +#define AmdDxExtShaderIntrinsicsBarycentric_LinearCenter 0x1 +#define AmdDxExtShaderIntrinsicsBarycentric_LinearCentroid 0x2 +#define AmdDxExtShaderIntrinsicsBarycentric_LinearSample 0x3 +#define AmdDxExtShaderIntrinsicsBarycentric_PerspCenter 0x4 +#define AmdDxExtShaderIntrinsicsBarycentric_PerspCentroid 0x5 +#define AmdDxExtShaderIntrinsicsBarycentric_PerspSample 0x6 +#define AmdDxExtShaderIntrinsicsBarycentric_PerspPullModel 0x7 + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsicsBarycentric defines for specifying vertex and parameter indices. To be used as +* the inputs to the AmdDxExtShaderIntrinsicsOpcode_VertexParameter function +************************************************************************************************************* +*/ +#define AmdDxExtShaderIntrinsicsBarycentric_Vertex0 0x0 +#define AmdDxExtShaderIntrinsicsBarycentric_Vertex1 0x1 +#define AmdDxExtShaderIntrinsicsBarycentric_Vertex2 0x2 + +#define AmdDxExtShaderIntrinsicsBarycentric_Param0 0x00 +#define AmdDxExtShaderIntrinsicsBarycentric_Param1 0x01 +#define AmdDxExtShaderIntrinsicsBarycentric_Param2 0x02 +#define AmdDxExtShaderIntrinsicsBarycentric_Param3 0x03 +#define AmdDxExtShaderIntrinsicsBarycentric_Param4 0x04 +#define AmdDxExtShaderIntrinsicsBarycentric_Param5 0x05 +#define AmdDxExtShaderIntrinsicsBarycentric_Param6 0x06 +#define AmdDxExtShaderIntrinsicsBarycentric_Param7 0x07 +#define AmdDxExtShaderIntrinsicsBarycentric_Param8 0x08 +#define AmdDxExtShaderIntrinsicsBarycentric_Param9 0x09 +#define AmdDxExtShaderIntrinsicsBarycentric_Param10 0x0a +#define AmdDxExtShaderIntrinsicsBarycentric_Param11 0x0b +#define AmdDxExtShaderIntrinsicsBarycentric_Param12 0x0c +#define AmdDxExtShaderIntrinsicsBarycentric_Param13 0x0d +#define AmdDxExtShaderIntrinsicsBarycentric_Param14 0x0e +#define AmdDxExtShaderIntrinsicsBarycentric_Param15 0x0f +#define AmdDxExtShaderIntrinsicsBarycentric_Param16 0x10 +#define AmdDxExtShaderIntrinsicsBarycentric_Param17 0x11 +#define AmdDxExtShaderIntrinsicsBarycentric_Param18 0x12 +#define AmdDxExtShaderIntrinsicsBarycentric_Param19 0x13 +#define AmdDxExtShaderIntrinsicsBarycentric_Param20 0x14 +#define AmdDxExtShaderIntrinsicsBarycentric_Param21 0x15 +#define AmdDxExtShaderIntrinsicsBarycentric_Param22 0x16 +#define AmdDxExtShaderIntrinsicsBarycentric_Param23 0x17 +#define AmdDxExtShaderIntrinsicsBarycentric_Param24 0x18 +#define AmdDxExtShaderIntrinsicsBarycentric_Param25 0x19 +#define AmdDxExtShaderIntrinsicsBarycentric_Param26 0x1a +#define AmdDxExtShaderIntrinsicsBarycentric_Param27 0x1b +#define AmdDxExtShaderIntrinsicsBarycentric_Param28 0x1c +#define AmdDxExtShaderIntrinsicsBarycentric_Param29 0x1d +#define AmdDxExtShaderIntrinsicsBarycentric_Param30 0x1e +#define AmdDxExtShaderIntrinsicsBarycentric_Param31 0x1f + +#define AmdDxExtShaderIntrinsicsBarycentric_ComponentX 0x0 +#define AmdDxExtShaderIntrinsicsBarycentric_ComponentY 0x1 +#define AmdDxExtShaderIntrinsicsBarycentric_ComponentZ 0x2 +#define AmdDxExtShaderIntrinsicsBarycentric_ComponentW 0x3 + +#define AmdDxExtShaderIntrinsicsBarycentric_ParamShift 0 +#define AmdDxExtShaderIntrinsicsBarycentric_ParamMask 0x1f +#define AmdDxExtShaderIntrinsicsBarycentric_VtxShift 0x5 +#define AmdDxExtShaderIntrinsicsBarycentric_VtxMask 0x3 +#define AmdDxExtShaderIntrinsicsBarycentric_ComponentShift 0x7 +#define AmdDxExtShaderIntrinsicsBarycentric_ComponentMask 0x3 + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsicsWaveOp defines for supported operations. Can be used as the parameter for the +* AmdDxExtShaderIntrinsicsOpcode_WaveOp intrinsic. +************************************************************************************************************* +*/ +#define AmdDxExtShaderIntrinsicsWaveOp_AddF 0x01 +#define AmdDxExtShaderIntrinsicsWaveOp_AddI 0x02 +#define AmdDxExtShaderIntrinsicsWaveOp_AddU 0x03 +#define AmdDxExtShaderIntrinsicsWaveOp_MulF 0x04 +#define AmdDxExtShaderIntrinsicsWaveOp_MulI 0x05 +#define AmdDxExtShaderIntrinsicsWaveOp_MulU 0x06 +#define AmdDxExtShaderIntrinsicsWaveOp_MinF 0x07 +#define AmdDxExtShaderIntrinsicsWaveOp_MinI 0x08 +#define AmdDxExtShaderIntrinsicsWaveOp_MinU 0x09 +#define AmdDxExtShaderIntrinsicsWaveOp_MaxF 0x0a +#define AmdDxExtShaderIntrinsicsWaveOp_MaxI 0x0b +#define AmdDxExtShaderIntrinsicsWaveOp_MaxU 0x0c +#define AmdDxExtShaderIntrinsicsWaveOp_And 0x0d // Reduction only +#define AmdDxExtShaderIntrinsicsWaveOp_Or 0x0e // Reduction only +#define AmdDxExtShaderIntrinsicsWaveOp_Xor 0x0f // Reduction only + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsicsWaveOp masks and shifts for opcode and flags +************************************************************************************************************* +*/ +#define AmdDxExtShaderIntrinsicsWaveOp_OpcodeShift 0 +#define AmdDxExtShaderIntrinsicsWaveOp_OpcodeMask 0xff +#define AmdDxExtShaderIntrinsicsWaveOp_FlagShift 8 +#define AmdDxExtShaderIntrinsicsWaveOp_FlagMask 0xff + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsicsWaveOp flags for use with AmdDxExtShaderIntrinsicsOpcode_WaveScan. +************************************************************************************************************* +*/ +#define AmdDxExtShaderIntrinsicsWaveOp_Inclusive 0x01 +#define AmdDxExtShaderIntrinsicsWaveOp_Exclusive 0x02 + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsicsAtomic defines for supported operations. Can be used as the parameter for the +* AmdDxExtShaderIntrinsicsOpcode_AtomicU64 intrinsic. +************************************************************************************************************* +*/ +#define AmdDxExtShaderIntrinsicsAtomicOp_MinU64 0x01 +#define AmdDxExtShaderIntrinsicsAtomicOp_MaxU64 0x02 +#define AmdDxExtShaderIntrinsicsAtomicOp_AndU64 0x03 +#define AmdDxExtShaderIntrinsicsAtomicOp_OrU64 0x04 +#define AmdDxExtShaderIntrinsicsAtomicOp_XorU64 0x05 +#define AmdDxExtShaderIntrinsicsAtomicOp_AddU64 0x06 +#define AmdDxExtShaderIntrinsicsAtomicOp_XchgU64 0x07 +#define AmdDxExtShaderIntrinsicsAtomicOp_CmpXchgU64 0x08 + + +/** +************************************************************************************************************* +* Resource slots for intrinsics using imm_atomic_cmp_exch. +************************************************************************************************************* +*/ +#ifndef AmdDxExtShaderIntrinsicsUAVSlot +#define AmdDxExtShaderIntrinsicsUAVSlot u7 +#endif + +RWByteAddressBuffer AmdDxExtShaderIntrinsicsUAV : register(AmdDxExtShaderIntrinsicsUAVSlot); + +/** +************************************************************************************************************* +* Resource and sampler slots for intrinsics using sample_l. +************************************************************************************************************* +*/ +#ifndef AmdDxExtShaderIntrinsicsResSlot +#define AmdDxExtShaderIntrinsicsResSlot t127 +#endif + +#ifndef AmdDxExtShaderIntrinsicsSamplerSlot +#define AmdDxExtShaderIntrinsicsSamplerSlot s15 +#endif + +SamplerState AmdDxExtShaderIntrinsicsSamplerState : register (AmdDxExtShaderIntrinsicsSamplerSlot); +Texture3D AmdDxExtShaderIntrinsicsResource : register (AmdDxExtShaderIntrinsicsResSlot); + +/** +************************************************************************************************************* +* MakeAmdShaderIntrinsicsInstruction +* +* Creates instruction from supplied opcode and immediate data. +* NOTE: This is an internal function and should not be called by the source HLSL shader directly. +* +************************************************************************************************************* +*/ +uint MakeAmdShaderIntrinsicsInstruction(uint opcode, uint opcodePhase, uint immediateData) +{ + return ((AmdDxExtShaderIntrinsics_MagicCode << AmdDxExtShaderIntrinsics_MagicCodeShift) | + (immediateData << AmdDxExtShaderIntrinsics_DataShift) | + (opcodePhase << AmdDxExtShaderIntrinsics_OpcodePhaseShift) | + (opcode << AmdDxExtShaderIntrinsics_OpcodeShift)); +} + + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_ReadfirstlaneF +* +* Returns the value of float src for the first active lane of the wavefront. +* +************************************************************************************************************* +*/ +float AmdDxExtShaderIntrinsics_ReadfirstlaneF(float src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Readfirstlane, + 0, 0); + + uint retVal; + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src), 0, retVal); + return asfloat(retVal); +} + + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_ReadfirstlaneU +* +* Returns the value of unsigned integer src for the first active lane of the wavefront. +* +************************************************************************************************************* +*/ +uint AmdDxExtShaderIntrinsics_ReadfirstlaneU(uint src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Readfirstlane, + 0, 0); + + uint retVal; + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src, 0, retVal); + return retVal; +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_ReadlaneF +* +* Returns the value of float src for the lane within the wavefront specified by laneId. +* +************************************************************************************************************* +*/ +float AmdDxExtShaderIntrinsics_ReadlaneF(float src, uint laneId) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Readlane, 0, + laneId); + + uint retVal; + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src), 0, retVal); + return asfloat(retVal); +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_ReadlaneU +* +* Returns the value of unsigned integer src for the lane within the wavefront specified by laneId. +* +************************************************************************************************************* +*/ +uint AmdDxExtShaderIntrinsics_ReadlaneU(uint src, uint laneId) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Readlane, 0, + laneId); + + uint retVal; + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src, 0, retVal); + return retVal; +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_LaneId +* +* Returns the current lane id for the thread within the wavefront. +* +************************************************************************************************************* +*/ +uint AmdDxExtShaderIntrinsics_LaneId() +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_LaneId, 0, 0); + + uint retVal; + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal); + return retVal; +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_GetWaveSize +* +* Returns the wave size for the current shader, including active, inactive and helper lanes. +* +************************************************************************************************************* +*/ +uint AmdDxExtShaderIntrinsics_GetWaveSize() +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_GetWaveSize, 0, 0); + + uint retVal; + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal); + return retVal; +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_Swizzle +* +* Generic instruction to shuffle the float src value among different lanes as specified by the +* operation. +* Note that the operation parameter must be an immediately specified value not a value from a variable. +* +************************************************************************************************************* +*/ +float AmdDxExtShaderIntrinsics_SwizzleF(float src, uint operation) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Swizzle, 0, + operation); + + uint retVal; + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src), 0, retVal); + return asfloat(retVal); +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_SwizzleU +* +* Generic instruction to shuffle the unsigned integer src value among different lanes as specified by the +* operation. +* Note that the operation parameter must be an immediately specified value not a value from a variable. +* +************************************************************************************************************* +*/ +uint AmdDxExtShaderIntrinsics_SwizzleU(uint src, uint operation) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Swizzle, 0, + operation); + + uint retVal; + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src, 0, retVal); + return retVal; +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_Ballot +* +* Given an input predicate returns a bit mask indicating for which lanes the predicate is true. +* Inactive or non-existent lanes will always return 0. The number of existent lanes is the +* wavefront size. +* +************************************************************************************************************* +*/ +uint2 AmdDxExtShaderIntrinsics_Ballot(bool predicate) +{ + uint instruction; + + uint retVal1; + instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Ballot, + AmdDxExtShaderIntrinsicsOpcodePhase_0, 0); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, predicate, 0, retVal1); + + uint retVal2; + instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Ballot, + AmdDxExtShaderIntrinsicsOpcodePhase_1, 0); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, predicate, 0, retVal2); + + return uint2(retVal1, retVal2); +} + + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_BallotAny +* +* Convenience routine that uses Ballot and returns true if for any of the active lanes the predicate +* is true. +* +************************************************************************************************************* +*/ +bool AmdDxExtShaderIntrinsics_BallotAny(bool predicate) +{ + uint2 retVal = AmdDxExtShaderIntrinsics_Ballot(predicate); + + return ((retVal.x | retVal.y) != 0 ? true : false); +} + + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_BallotAll +* +* Convenience routine that uses Ballot and returns true if for all of the active lanes the predicate +* is true. +* +************************************************************************************************************* +*/ +bool AmdDxExtShaderIntrinsics_BallotAll(bool predicate) +{ + uint2 ballot = AmdDxExtShaderIntrinsics_Ballot(predicate); + + uint2 execMask = AmdDxExtShaderIntrinsics_Ballot(true); + + return ((ballot.x == execMask.x) && (ballot.y == execMask.y)); +} + + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_MBCnt +* +* Returns the masked bit count of the source register for this thread within all the active threads +* within a wavefront. +* +************************************************************************************************************* +*/ +uint AmdDxExtShaderIntrinsics_MBCnt(uint2 src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_MBCnt, 0, 0); + + uint retVal; + + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.x, src.y, retVal); + + return retVal; +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_Min3F +* +* Returns the minimum value of the three floating point source arguments. +* +************************************************************************************************************* +*/ +float AmdDxExtShaderIntrinsics_Min3F(float src0, float src1, float src2) +{ + uint minimum; + + uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Min3F, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + 0); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, asuint(src0), asuint(src1), minimum); + + uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Min3F, + AmdDxExtShaderIntrinsicsOpcodePhase_1, + 0); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, asuint(src2), minimum, minimum); + + return asfloat(minimum); +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_Min3U +* +* Returns the minimum value of the three unsigned integer source arguments. +* +************************************************************************************************************* +*/ +uint AmdDxExtShaderIntrinsics_Min3U(uint src0, uint src1, uint src2) +{ + uint minimum; + + uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Min3U, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + 0); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, src0, src1, minimum); + + uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Min3U, + AmdDxExtShaderIntrinsicsOpcodePhase_1, + 0); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, src2, minimum, minimum); + + return minimum; +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_Med3F +* +* Returns the median value of the three floating point source arguments. +* +************************************************************************************************************* +*/ +float AmdDxExtShaderIntrinsics_Med3F(float src0, float src1, float src2) +{ + uint median; + + uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Med3F, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + 0); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, asuint(src0), asuint(src1), median); + + uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Med3F, + AmdDxExtShaderIntrinsicsOpcodePhase_1, + 0); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, asuint(src2), median, median); + + return asfloat(median); +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_Med3U +* +* Returns the median value of the three unsigned integer source arguments. +* +************************************************************************************************************* +*/ +uint AmdDxExtShaderIntrinsics_Med3U(uint src0, uint src1, uint src2) +{ + uint median; + + uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Med3U, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + 0); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, src0, src1, median); + + uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Med3U, + AmdDxExtShaderIntrinsicsOpcodePhase_1, + 0); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, src2, median, median); + + return median; +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_Max3F +* +* Returns the maximum value of the three floating point source arguments. +* +************************************************************************************************************* +*/ +float AmdDxExtShaderIntrinsics_Max3F(float src0, float src1, float src2) +{ + uint maximum; + + uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Max3F, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + 0); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, asuint(src0), asuint(src1), maximum); + + uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Max3F, + AmdDxExtShaderIntrinsicsOpcodePhase_1, + 0); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, asuint(src2), maximum, maximum); + + return asfloat(maximum); +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_Max3U +* +* Returns the maximum value of the three unsigned integer source arguments. +* +************************************************************************************************************* +*/ +uint AmdDxExtShaderIntrinsics_Max3U(uint src0, uint src1, uint src2) +{ + uint maximum; + + uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Max3U, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + 0); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, src0, src1, maximum); + + uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Max3U, + AmdDxExtShaderIntrinsicsOpcodePhase_1, + 0); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, src2, maximum, maximum); + + return maximum; +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_IjBarycentricCoords +* +* Returns the (i, j) barycentric coordinate pair for this shader invocation with the specified +* interpolation mode at the specified pixel location. Should not be used for "pull-model" interpolation, +* PullModelBarycentricCoords should be used instead +* +* Can only be used in pixel shader stages. +* +************************************************************************************************************* +*/ +float2 AmdDxExtShaderIntrinsics_IjBarycentricCoords(uint interpMode) +{ + uint2 retVal; + + uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_BaryCoord, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + interpMode); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, 0, 0, retVal.x); + + uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_BaryCoord, + AmdDxExtShaderIntrinsicsOpcodePhase_1, + interpMode); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, retVal.x, 0, retVal.y); + + return float2(asfloat(retVal.x), asfloat(retVal.y)); +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_PullModelBarycentricCoords +* +* Returns the (1/W,1/I,1/J) coordinates at the pixel center which can be used for custom interpolation at +* any location in the pixel. +* +* Can only be used in pixel shader stages. +* +************************************************************************************************************* +*/ +float3 AmdDxExtShaderIntrinsics_PullModelBarycentricCoords() +{ + uint3 retVal; + + uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_BaryCoord, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + AmdDxExtShaderIntrinsicsBarycentric_PerspPullModel); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, 0, 0, retVal.x); + + uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_BaryCoord, + AmdDxExtShaderIntrinsicsOpcodePhase_1, + AmdDxExtShaderIntrinsicsBarycentric_PerspPullModel); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, retVal.x, 0, retVal.y); + + uint instruction3 = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_BaryCoord, + AmdDxExtShaderIntrinsicsOpcodePhase_2, + AmdDxExtShaderIntrinsicsBarycentric_PerspPullModel); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction3, retVal.y, 0, retVal.z); + + return float3(asfloat(retVal.x), asfloat(retVal.y), asfloat(retVal.z)); +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_VertexParameter +* +* Returns the triangle's parameter information at the specified triangle vertex. +* The vertex and parameter indices must specified as immediate values. +* +* Only available in pixel shader stages. +* +************************************************************************************************************* +*/ +float4 AmdDxExtShaderIntrinsics_VertexParameter(uint vertexIdx, uint parameterIdx) +{ + uint4 retVal; + uint4 instruction; + + instruction.x = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_VtxParam, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + ((vertexIdx << AmdDxExtShaderIntrinsicsBarycentric_VtxShift) | + (parameterIdx << AmdDxExtShaderIntrinsicsBarycentric_ParamShift) | + (AmdDxExtShaderIntrinsicsBarycentric_ComponentX << AmdDxExtShaderIntrinsicsBarycentric_ComponentShift))); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction.x, 0, 0, retVal.x); + + instruction.y = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_VtxParam, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + ((vertexIdx << AmdDxExtShaderIntrinsicsBarycentric_VtxShift) | + (parameterIdx << AmdDxExtShaderIntrinsicsBarycentric_ParamShift) | + (AmdDxExtShaderIntrinsicsBarycentric_ComponentY << AmdDxExtShaderIntrinsicsBarycentric_ComponentShift))); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction.y, 0, 0, retVal.y); + + instruction.z = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_VtxParam, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + ((vertexIdx << AmdDxExtShaderIntrinsicsBarycentric_VtxShift) | + (parameterIdx << AmdDxExtShaderIntrinsicsBarycentric_ParamShift) | + (AmdDxExtShaderIntrinsicsBarycentric_ComponentZ << AmdDxExtShaderIntrinsicsBarycentric_ComponentShift))); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction.z, 0, 0, retVal.z); + + instruction.w = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_VtxParam, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + ((vertexIdx << AmdDxExtShaderIntrinsicsBarycentric_VtxShift) | + (parameterIdx << AmdDxExtShaderIntrinsicsBarycentric_ParamShift) | + (AmdDxExtShaderIntrinsicsBarycentric_ComponentW << AmdDxExtShaderIntrinsicsBarycentric_ComponentShift))); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction.w, 0, 0, retVal.w); + + return float4(asfloat(retVal.x), asfloat(retVal.y), asfloat(retVal.z), asfloat(retVal.w)); +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_VertexParameterComponent +* +* Returns the triangle's parameter information at the specified triangle vertex and component. +* The vertex, parameter and component indices must be specified as immediate values. +* +* Only available in pixel shader stages. +* +************************************************************************************************************* +*/ +float AmdDxExtShaderIntrinsics_VertexParameterComponent(uint vertexIdx, uint parameterIdx, uint componentIdx) +{ + uint retVal; + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_VtxParam, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + ((vertexIdx << AmdDxExtShaderIntrinsicsBarycentric_VtxShift) | + (parameterIdx << AmdDxExtShaderIntrinsicsBarycentric_ParamShift) | + (componentIdx << AmdDxExtShaderIntrinsicsBarycentric_ComponentShift))); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal); + + return asfloat(retVal); +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_GetViewportIndex +* +* Returns current viewport index for replicated draws when MultiView extension is enabled (broadcast masks +* are set). +* +* Only available in vertex/geometry/domain shader stages. +* +************************************************************************************************************* +*/ +uint AmdDxExtShaderIntrinsics_GetViewportIndex() +{ + uint retVal; + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_ViewportIndex, 0, 0); + + retVal = asuint(AmdDxExtShaderIntrinsicsResource.SampleLevel(AmdDxExtShaderIntrinsicsSamplerState, + float3(0, 0, 0), + asfloat(instruction)).x); + return retVal; +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_GetViewportIndexPsOnly +* +* Returns current viewport index for replicated draws when MultiView extension is enabled (broadcast masks +* are set). +* +* Only available in pixel shader stage. +* +************************************************************************************************************* +*/ +uint AmdDxExtShaderIntrinsics_GetViewportIndexPsOnly() +{ + uint retVal; + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_ViewportIndex, 0, 0); + + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal); + + return retVal; +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_GetRTArraySlice +* +* Returns current RT array slice for replicated draws when MultiView extension is enabled (broadcast masks +* are set). +* +* Only available in vertex/geometry/domain shader stages. +* +************************************************************************************************************* +*/ +uint AmdDxExtShaderIntrinsics_GetRTArraySlice() +{ + uint retVal; + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_RtArraySlice, 0, 0); + + retVal = asuint(AmdDxExtShaderIntrinsicsResource.SampleLevel(AmdDxExtShaderIntrinsicsSamplerState, + float3(0, 0, 0), + asfloat(instruction)).x); + return retVal; +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_GetRTArraySlicePsOnly +* +* Returns current RT array slice for replicated draws when MultiView extension is enabled (broadcast masks +* are set). +* +* Only available in pixel shader stage. +* +************************************************************************************************************* +*/ +uint AmdDxExtShaderIntrinsics_GetRTArraySlicePsOnly() +{ + uint retVal; + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_RtArraySlice, 0, 0); + + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal); + + return retVal; +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_WaveReduce +* +* The following functions perform the specified reduction operation across a wavefront. +* +* Available in all shader stages. +* +************************************************************************************************************* +*/ + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_WaveReduce : float +************************************************************************************************************* +*/ +float AmdDxExtShaderIntrinsics_WaveReduce(uint waveOp, float src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_WaveReduce, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdDxExtShaderIntrinsicsWaveOp_OpcodeShift)); + uint retVal; + + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src), 0, retVal); + + return asfloat(retVal); +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_WaveReduce : float2 +************************************************************************************************************* +*/ +float2 AmdDxExtShaderIntrinsics_WaveReduce(uint waveOp, float2 src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_WaveReduce, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdDxExtShaderIntrinsicsWaveOp_OpcodeShift)); + uint2 retVal; + + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.x), 0, retVal.x); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.y), 0, retVal.y); + + return float2(asfloat(retVal.x), asfloat(retVal.y)); +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_WaveReduce : float3 +************************************************************************************************************* +*/ +float3 AmdDxExtShaderIntrinsics_WaveReduce(uint waveOp, float3 src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_WaveReduce, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdDxExtShaderIntrinsicsWaveOp_OpcodeShift)); + uint3 retVal; + + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.x), 0, retVal.x); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.y), 0, retVal.y); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.z), 0, retVal.z); + + return float3(asfloat(retVal.x), asfloat(retVal.y), asfloat(retVal.z)); +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_WaveReduce : float4 +************************************************************************************************************* +*/ +float4 AmdDxExtShaderIntrinsics_WaveReduce(uint waveOp, float4 src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_WaveReduce, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdDxExtShaderIntrinsicsWaveOp_OpcodeShift)); + uint4 retVal; + + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.x), 0, retVal.x); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.y), 0, retVal.y); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.z), 0, retVal.z); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.w), 0, retVal.w); + + return float4(asfloat(retVal.x), asfloat(retVal.y), asfloat(retVal.z), asfloat(retVal.w)); +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_WaveReduce : int +************************************************************************************************************* +*/ +int AmdDxExtShaderIntrinsics_WaveReduce(uint waveOp, int src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_WaveReduce, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdDxExtShaderIntrinsicsWaveOp_OpcodeShift)); + uint retVal; + + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src), 0, retVal); + + return retVal; +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_WaveReduce : int2 +************************************************************************************************************* +*/ +int2 AmdDxExtShaderIntrinsics_WaveReduce(uint waveOp, int2 src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_WaveReduce, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdDxExtShaderIntrinsicsWaveOp_OpcodeShift)); + uint2 retVal; + + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.x), 0, retVal.x); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.y), 0, retVal.y); + + return retVal; +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_WaveReduce : int3 +************************************************************************************************************* +*/ +int3 AmdDxExtShaderIntrinsics_WaveReduce(uint waveOp, int3 src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_WaveReduce, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdDxExtShaderIntrinsicsWaveOp_OpcodeShift)); + uint3 retVal; + + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.x), 0, retVal.x); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.y), 0, retVal.y); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.z), 0, retVal.z); + + return retVal; +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_WaveReduce : int4 +************************************************************************************************************* +*/ +int4 AmdDxExtShaderIntrinsics_WaveReduce(uint waveOp, int4 src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_WaveReduce, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdDxExtShaderIntrinsicsWaveOp_OpcodeShift)); + uint4 retVal; + + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.x), 0, retVal.x); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.y), 0, retVal.y); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.z), 0, retVal.z); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.w), 0, retVal.w); + + return retVal; +} + + + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_WaveScan +* +* The following functions perform the specified scan operation across a wavefront. +* +* Available in all shader stages. +* +************************************************************************************************************* +*/ + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_WaveScan : float +************************************************************************************************************* +*/ +float AmdDxExtShaderIntrinsics_WaveScan(uint waveOp, uint flags, float src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_WaveScan, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdDxExtShaderIntrinsicsWaveOp_OpcodeShift) | + (flags << AmdDxExtShaderIntrinsicsWaveOp_FlagShift)); + uint retVal; + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src), 0, retVal); + + return asfloat(retVal); +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_WaveScan : float2 +************************************************************************************************************* +*/ +float2 AmdDxExtShaderIntrinsics_WaveScan(uint waveOp, uint flags, float2 src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_WaveScan, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdDxExtShaderIntrinsicsWaveOp_OpcodeShift) | + (flags << AmdDxExtShaderIntrinsicsWaveOp_FlagShift)); + uint2 retVal; + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.x), 0, retVal.x); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.y), 0, retVal.y); + + return float2(asfloat(retVal.x), asfloat(retVal.y)); +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_WaveScan : float3 +************************************************************************************************************* +*/ +float3 AmdDxExtShaderIntrinsics_WaveScan(uint waveOp, uint flags, float3 src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_WaveScan, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdDxExtShaderIntrinsicsWaveOp_OpcodeShift) | + (flags << AmdDxExtShaderIntrinsicsWaveOp_FlagShift)); + uint3 retVal; + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.x), 0, retVal.x); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.y), 0, retVal.y); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.z), 0, retVal.z); + + return float3(asfloat(retVal.x), asfloat(retVal.y), asfloat(retVal.z)); +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_WaveScan : float4 +************************************************************************************************************* +*/ +float4 AmdDxExtShaderIntrinsics_WaveScan(uint waveOp, uint flags, float4 src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_WaveScan, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdDxExtShaderIntrinsicsWaveOp_OpcodeShift) | + (flags << AmdDxExtShaderIntrinsicsWaveOp_FlagShift)); + uint4 retVal; + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.x), 0, retVal.x); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.y), 0, retVal.y); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.z), 0, retVal.z); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.w), 0, retVal.w); + + return float4(asfloat(retVal.x), asfloat(retVal.y), asfloat(retVal.z), asfloat(retVal.w)); +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_GetDrawIndex +* +* Returns the 0-based draw index in an indirect draw. Always returns 0 for direct draws. +* +* Only available in vertex shader stage. +* +************************************************************************************************************* +*/ +uint AmdDxExtShaderIntrinsics_GetDrawIndex() +{ + uint retVal; + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_DrawIndex, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + 0); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal); + + return retVal; +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_GetBaseInstance +* +* Returns the StartInstanceLocation parameter passed to direct or indirect drawing commands. +* +* Only available in vertex shader stage. +* +************************************************************************************************************* +*/ +uint AmdDxExtShaderIntrinsics_GetBaseInstance() +{ + uint retVal; + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_BaseInstance, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + 0); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal); + + return retVal; +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_GetBaseVertex +* +* For non-indexed draw commands, returns the StartVertexLocation parameter. For indexed draw commands, +* returns the BaseVertexLocation parameter. +* +* Only available in vertex shader stage. +* +************************************************************************************************************* +*/ +uint AmdDxExtShaderIntrinsics_GetBaseVertex() +{ + uint retVal; + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_BaseVertex, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + 0); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal); + + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_MakeAtomicInstructions +* +* Creates uint4 with x/y/z/w components containing phase 0/1/2/3 for atomic instructions. +* NOTE: This is an internal function and should not be called by the source HLSL shader directly. +* +*********************************************************************************************************************** +*/ +uint4 AmdDxExtShaderIntrinsics_MakeAtomicInstructions(uint op) +{ + uint4 instructions; + instructions.x = MakeAmdShaderIntrinsicsInstruction( + AmdDxExtShaderIntrinsicsOpcode_AtomicU64, AmdDxExtShaderIntrinsicsOpcodePhase_0, op); + instructions.y = MakeAmdShaderIntrinsicsInstruction( + AmdDxExtShaderIntrinsicsOpcode_AtomicU64, AmdDxExtShaderIntrinsicsOpcodePhase_1, op); + instructions.z = MakeAmdShaderIntrinsicsInstruction( + AmdDxExtShaderIntrinsicsOpcode_AtomicU64, AmdDxExtShaderIntrinsicsOpcodePhase_2, op); + instructions.w = MakeAmdShaderIntrinsicsInstruction( + AmdDxExtShaderIntrinsicsOpcode_AtomicU64, AmdDxExtShaderIntrinsicsOpcodePhase_3, op); + return instructions; +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_AtomicOp +* +* Creates intrinstic instructions for the specified atomic op. +* NOTE: These are internal functions and should not be called by the source HLSL shader directly. +* +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_AtomicOp(RWByteAddressBuffer uav, uint3 address, uint2 value, uint op) +{ + uint2 retVal; + + const uint4 instructions = AmdDxExtShaderIntrinsics_MakeAtomicInstructions(op); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x, address.y, retVal.x); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z, value.x, retVal.y); + uav.Store(retVal.x, retVal.y); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y, retVal.y, retVal.y); + + return retVal; +} + +uint2 AmdDxExtShaderIntrinsics_AtomicOp(RWTexture1D uav, uint3 address, uint2 value, uint op) +{ + uint2 retVal; + + const uint4 instructions = AmdDxExtShaderIntrinsics_MakeAtomicInstructions(op); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x, address.y, retVal.x); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z, value.x, retVal.y); + uav[retVal.x] = retVal.y; + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y, retVal.y, retVal.y); + + return retVal; +} + +uint2 AmdDxExtShaderIntrinsics_AtomicOp(RWTexture2D uav, uint3 address, uint2 value, uint op) +{ + uint2 retVal; + + const uint4 instructions = AmdDxExtShaderIntrinsics_MakeAtomicInstructions(op); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x, address.y, retVal.x); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z, value.x, retVal.y); + uav[uint2(retVal.x, retVal.x)] = retVal.y; + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y, retVal.y, retVal.y); + + return retVal; +} + +uint2 AmdDxExtShaderIntrinsics_AtomicOp(RWTexture3D uav, uint3 address, uint2 value, uint op) +{ + uint2 retVal; + + const uint4 instructions = AmdDxExtShaderIntrinsics_MakeAtomicInstructions(op); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x, address.y, retVal.x); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z, value.x, retVal.y); + uav[uint3(retVal.x, retVal.x, retVal.x)] = retVal.y; + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y, retVal.y, retVal.y); + + return retVal; +} + +uint2 AmdDxExtShaderIntrinsics_AtomicOp( + RWByteAddressBuffer uav, uint3 address, uint2 compare_value, uint2 value, uint op) +{ + uint2 retVal; + + const uint4 instructions = AmdDxExtShaderIntrinsics_MakeAtomicInstructions(op); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x, address.y, retVal.x); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z, value.x, retVal.y); + uav.Store(retVal.x, retVal.y); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y, compare_value.x, retVal.y); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.w, compare_value.y, retVal.y, retVal.y); + + return retVal; +} + +uint2 AmdDxExtShaderIntrinsics_AtomicOp( + RWTexture1D uav, uint3 address, uint2 compare_value, uint2 value, uint op) +{ + uint2 retVal; + + const uint4 instructions = AmdDxExtShaderIntrinsics_MakeAtomicInstructions(op); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x, address.y, retVal.x); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z, value.x, retVal.y); + uav[retVal.x] = retVal.y; + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y, compare_value.x, retVal.y); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.w, compare_value.y, retVal.y, retVal.y); + + return retVal; +} + +uint2 AmdDxExtShaderIntrinsics_AtomicOp( + RWTexture2D uav, uint3 address, uint2 compare_value, uint2 value, uint op) +{ + uint2 retVal; + + const uint4 instructions = AmdDxExtShaderIntrinsics_MakeAtomicInstructions(op); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x, address.y, retVal.x); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z, value.x, retVal.y); + uav[uint2(retVal.x, retVal.x)] = retVal.y; + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y, compare_value.x, retVal.y); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.w, compare_value.y, retVal.y, retVal.y); + + return retVal; +} + +uint2 AmdDxExtShaderIntrinsics_AtomicOp( + RWTexture3D uav, uint3 address, uint2 compare_value, uint2 value, uint op) +{ + uint2 retVal; + + const uint4 instructions = AmdDxExtShaderIntrinsics_MakeAtomicInstructions(op); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x, address.y, retVal.x); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z, value.x, retVal.y); + uav[uint3(retVal.x, retVal.x, retVal.x)] = retVal.y; + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y, compare_value.x, retVal.y); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.w, compare_value.y, retVal.y, retVal.y); + + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_AtomicMinU64 +* +* Performs 64-bit atomic minimum of value with the UAV at address, returns the original value. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_AtomicMinU64(RWByteAddressBuffer uav, uint address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_MinU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicMinU64(RWTexture1D uav, uint address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_MinU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicMinU64(RWTexture2D uav, uint2 address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_MinU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicMinU64(RWTexture3D uav, uint3 address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_MinU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), value, op); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_AtomicMaxU64 +* +* Performs 64-bit atomic maximum of value with the UAV at address, returns the original value. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_AtomicMaxU64(RWByteAddressBuffer uav, uint address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_MaxU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicMaxU64(RWTexture1D uav, uint address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_MaxU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicMaxU64(RWTexture2D uav, uint2 address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_MaxU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicMaxU64(RWTexture3D uav, uint3 address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_MaxU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), value, op); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_AtomicAndU64 +* +* Performs 64-bit atomic AND of value with the UAV at address, returns the original value. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_AtomicAndU64(RWByteAddressBuffer uav, uint address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_AndU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicAndU64(RWTexture1D uav, uint address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_AndU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicAndU64(RWTexture2D uav, uint2 address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_AndU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicAndU64(RWTexture3D uav, uint3 address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_AndU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), value, op); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_AtomicOrU64 +* +* Performs 64-bit atomic OR of value with the UAV at address, returns the original value. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_AtomicOrU64(RWByteAddressBuffer uav, uint address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_OrU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicOrU64(RWTexture1D uav, uint address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_OrU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicOrU64(RWTexture2D uav, uint2 address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_OrU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicOrU64(RWTexture3D uav, uint3 address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_OrU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), value, op); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_AtomicXorU64 +* +* Performs 64-bit atomic XOR of value with the UAV at address, returns the original value. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_AtomicXorU64(RWByteAddressBuffer uav, uint address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_XorU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicXorU64(RWTexture1D uav, uint address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_XorU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicXorU64(RWTexture2D uav, uint2 address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_XorU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicXorU64(RWTexture3D uav, uint3 address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_XorU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), value, op); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_AtomicAddU64 +* +* Performs 64-bit atomic add of value with the UAV at address, returns the original value. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_AtomicAddU64(RWByteAddressBuffer uav, uint address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_AddU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicAddU64(RWTexture1D uav, uint address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_AddU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicAddU64(RWTexture2D uav, uint2 address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_AddU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicAddU64(RWTexture3D uav, uint3 address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_AddU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), value, op); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_AtomicXchgU64 +* +* Performs 64-bit atomic exchange of value with the UAV at address, returns the original value. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_AtomicXchgU64(RWByteAddressBuffer uav, uint address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_XchgU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicXchgU64(RWTexture1D uav, uint address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_XchgU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicXchgU64(RWTexture2D uav, uint2 address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_XchgU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicXchgU64(RWTexture3D uav, uint3 address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_XchgU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), value, op); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_AtomicCmpXchgU64 +* +* Performs 64-bit atomic compare of comparison value with UAV at address, stores value if values match, +* returns the original value. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_AtomicCmpXchgU64( + RWByteAddressBuffer uav, uint address, uint2 compare_value, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_CmpXchgU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), compare_value, value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicCmpXchgU64( + RWTexture1D uav, uint address, uint2 compare_value, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_CmpXchgU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), compare_value, value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicCmpXchgU64( + RWTexture2D uav, uint2 address, uint2 compare_value, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_CmpXchgU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), compare_value, value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicCmpXchgU64( + RWTexture3D uav, uint3 address, uint2 compare_value, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_CmpXchgU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), compare_value, value, op); +} + + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveSum +* +* Performs reduction operation across a wave and returns the result of the reduction (sum of all threads in a wave) +* to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdDxExtShaderIntrinsics_WaveActiveSum(float src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_AddF, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +float2 AmdDxExtShaderIntrinsics_WaveActiveSum(float2 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_AddF, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +float3 AmdDxExtShaderIntrinsics_WaveActiveSum(float3 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_AddF, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +float4 AmdDxExtShaderIntrinsics_WaveActiveSum(float4 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_AddF, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +int AmdDxExtShaderIntrinsics_WaveActiveSum(int src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_AddI, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +int2 AmdDxExtShaderIntrinsics_WaveActiveSum(int2 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_AddI, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +int3 AmdDxExtShaderIntrinsics_WaveActiveSum(int3 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_AddI, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +int4 AmdDxExtShaderIntrinsics_WaveActiveSum(int4 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_AddI, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +uint AmdDxExtShaderIntrinsics_WaveActiveSum(uint src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_AddU, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_WaveActiveSum(uint2 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_AddU, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +uint3 AmdDxExtShaderIntrinsics_WaveActiveSum(uint3 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_AddU, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +uint4 AmdDxExtShaderIntrinsics_WaveActiveSum(uint4 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_AddU, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveProduct +* +* Performs reduction operation across a wave and returns the result of the reduction (product of all threads in a +* wave) to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdDxExtShaderIntrinsics_WaveActiveProduct(float src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MulF, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +float2 AmdDxExtShaderIntrinsics_WaveActiveProduct(float2 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MulF, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +float3 AmdDxExtShaderIntrinsics_WaveActiveProduct(float3 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MulF, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +float4 AmdDxExtShaderIntrinsics_WaveActiveProduct(float4 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MulF, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +int AmdDxExtShaderIntrinsics_WaveActiveProduct(int src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MulI, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +int2 AmdDxExtShaderIntrinsics_WaveActiveProduct(int2 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MulI, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +int3 AmdDxExtShaderIntrinsics_WaveActiveProduct(int3 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MulI, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +int4 AmdDxExtShaderIntrinsics_WaveActiveProduct(int4 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MulI, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +uint AmdDxExtShaderIntrinsics_WaveActiveProduct(uint src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MulU, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_WaveActiveProduct(uint2 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MulU, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +uint3 AmdDxExtShaderIntrinsics_WaveActiveProduct(uint3 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MulU, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +uint4 AmdDxExtShaderIntrinsics_WaveActiveProduct(uint4 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MulU, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMin +* +* Performs reduction operation across a wave and returns the result of the reduction (minimum of all threads in a +* wave) to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdDxExtShaderIntrinsics_WaveActiveMin(float src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MinF, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +float2 AmdDxExtShaderIntrinsics_WaveActiveMin(float2 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MinF, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +float3 AmdDxExtShaderIntrinsics_WaveActiveMin(float3 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MinF, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +float4 AmdDxExtShaderIntrinsics_WaveActiveMin(float4 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MinF, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +int AmdDxExtShaderIntrinsics_WaveActiveMin(int src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MinI, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +int2 AmdDxExtShaderIntrinsics_WaveActiveMin(int2 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MinI, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +int3 AmdDxExtShaderIntrinsics_WaveActiveMin(int3 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MinI, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +int4 AmdDxExtShaderIntrinsics_WaveActiveMin(int4 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MinI, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +uint AmdDxExtShaderIntrinsics_WaveActiveMin(uint src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MinU, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_WaveActiveMin(uint2 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MinU, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +uint3 AmdDxExtShaderIntrinsics_WaveActiveMin(uint3 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MinU, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +uint4 AmdDxExtShaderIntrinsics_WaveActiveMin(uint4 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MinU, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMax +* +* Performs reduction operation across a wave and returns the result of the reduction (maximum of all threads in a +* wave) to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdDxExtShaderIntrinsics_WaveActiveMax(float src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MaxF, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +float2 AmdDxExtShaderIntrinsics_WaveActiveMax(float2 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MaxF, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +float3 AmdDxExtShaderIntrinsics_WaveActiveMax(float3 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MaxF, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +float4 AmdDxExtShaderIntrinsics_WaveActiveMax(float4 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MaxF, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +int AmdDxExtShaderIntrinsics_WaveActiveMax(int src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MaxI, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +int2 AmdDxExtShaderIntrinsics_WaveActiveMax(int2 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MaxI, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +int3 AmdDxExtShaderIntrinsics_WaveActiveMax(int3 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MaxI, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +int4 AmdDxExtShaderIntrinsics_WaveActiveMax(int4 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MaxI, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +uint AmdDxExtShaderIntrinsics_WaveActiveMax(uint src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MaxU, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_WaveActiveMax(uint2 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MaxU, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +uint3 AmdDxExtShaderIntrinsics_WaveActiveMax(uint3 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MaxU, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +uint4 AmdDxExtShaderIntrinsics_WaveActiveMax(uint4 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MaxU, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitAnd +* +* Performs reduction operation across a wave and returns the result of the reduction (Bitwise AND of all threads in a +* wave) to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitAnd +*********************************************************************************************************************** +*/ +int AmdDxExtShaderIntrinsics_WaveActiveBitAnd(int src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_And, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitAnd +*********************************************************************************************************************** +*/ +int2 AmdDxExtShaderIntrinsics_WaveActiveBitAnd(int2 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_And, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitAnd +*********************************************************************************************************************** +*/ +int3 AmdDxExtShaderIntrinsics_WaveActiveBitAnd(int3 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_And, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitAnd +*********************************************************************************************************************** +*/ +int4 AmdDxExtShaderIntrinsics_WaveActiveBitAnd(int4 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_And, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitAnd +*********************************************************************************************************************** +*/ +uint AmdDxExtShaderIntrinsics_WaveActiveBitAnd(uint src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_And, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitAnd +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_WaveActiveBitAnd(uint2 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_And, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitAnd +*********************************************************************************************************************** +*/ +uint3 AmdDxExtShaderIntrinsics_WaveActiveBitAnd(uint3 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_And, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitAnd +*********************************************************************************************************************** +*/ +uint4 AmdDxExtShaderIntrinsics_WaveActiveBitAnd(uint4 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_And, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitOr +* +* Performs reduction operation across a wave and returns the result of the reduction (Bitwise OR of all threads in a +* wave) to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitOr +*********************************************************************************************************************** +*/ +int AmdDxExtShaderIntrinsics_WaveActiveBitOr(int src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce( AmdDxExtShaderIntrinsicsWaveOp_Or, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitOr +*********************************************************************************************************************** +*/ +int2 AmdDxExtShaderIntrinsics_WaveActiveBitOr(int2 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_Or, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitOr +*********************************************************************************************************************** +*/ +int3 AmdDxExtShaderIntrinsics_WaveActiveBitOr(int3 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_Or, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitOr +*********************************************************************************************************************** +*/ +int4 AmdDxExtShaderIntrinsics_WaveActiveBitOr(int4 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_Or, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitOr +*********************************************************************************************************************** +*/ +uint AmdDxExtShaderIntrinsics_WaveActiveBitOr(uint src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_Or, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitOr +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_WaveActiveBitOr(uint2 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_Or, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitOr +*********************************************************************************************************************** +*/ +uint3 AmdDxExtShaderIntrinsics_WaveActiveBitOr(uint3 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_Or, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitOr +*********************************************************************************************************************** +*/ +uint4 AmdDxExtShaderIntrinsics_WaveActiveBitOr(uint4 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_Or, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitXor +* +* Performs reduction operation across a wave and returns the result of the reduction (Bitwise XOR of all threads in a +* wave) to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitXor +*********************************************************************************************************************** +*/ +int AmdDxExtShaderIntrinsics_WaveActiveBitXor(int src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_Xor, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitXor +*********************************************************************************************************************** +*/ +int2 AmdDxExtShaderIntrinsics_WaveActiveBitXor(int2 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_Xor, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitXor +*********************************************************************************************************************** +*/ +int3 AmdDxExtShaderIntrinsics_WaveActiveBitXor(int3 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_Xor, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitXor +*********************************************************************************************************************** +*/ +int4 AmdDxExtShaderIntrinsics_WaveActiveBitXor(int4 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_Xor, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitXor +*********************************************************************************************************************** +*/ +uint AmdDxExtShaderIntrinsics_WaveActiveBitXor(uint src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_Xor, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitXor +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_WaveActiveBitXor(uint2 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_Xor, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitXor +*********************************************************************************************************************** +*/ +uint3 AmdDxExtShaderIntrinsics_WaveActiveBitXor(uint3 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_Xor, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitXor +*********************************************************************************************************************** +*/ +uint4 AmdDxExtShaderIntrinsics_WaveActiveBitXor(uint4 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_Xor, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixSum +* +* Performs a prefix (exclusive) scan operation across a wave and returns the resulting sum to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdDxExtShaderIntrinsics_WavePrefixSum(float src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddF, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +float2 AmdDxExtShaderIntrinsics_WavePrefixSum(float2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddF, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +float3 AmdDxExtShaderIntrinsics_WavePrefixSum(float3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddF, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +float4 AmdDxExtShaderIntrinsics_WavePrefixSum(float4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddF, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +int AmdDxExtShaderIntrinsics_WavePrefixSum(int src) +{ + return AmdDxExtShaderIntrinsics_WaveScan( + AmdDxExtShaderIntrinsicsWaveOp_AddI, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +int2 AmdDxExtShaderIntrinsics_WavePrefixSum(int2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddI, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +int3 AmdDxExtShaderIntrinsics_WavePrefixSum(int3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddI, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +int4 AmdDxExtShaderIntrinsics_WavePrefixSum(int4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddI, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +uint AmdDxExtShaderIntrinsics_WavePrefixSum(uint src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddU, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_WavePrefixSum(uint2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddU, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +uint3 AmdDxExtShaderIntrinsics_WavePrefixSum(uint3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddU, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +uint4 AmdDxExtShaderIntrinsics_WavePrefixSum(uint4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddU, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixProduct +* +* Performs a prefix scan operation across a wave and returns the resulting product to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdDxExtShaderIntrinsics_WavePrefixProduct(float src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulF, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +float2 AmdDxExtShaderIntrinsics_WavePrefixProduct(float2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulF, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +float3 AmdDxExtShaderIntrinsics_WavePrefixProduct(float3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulF, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +float4 AmdDxExtShaderIntrinsics_WavePrefixProduct(float4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulF, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +int AmdDxExtShaderIntrinsics_WavePrefixProduct(int src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulI, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +int2 AmdDxExtShaderIntrinsics_WavePrefixProduct(int2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulI, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +int3 AmdDxExtShaderIntrinsics_WavePrefixProduct(int3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulI, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +int4 AmdDxExtShaderIntrinsics_WavePrefixProduct(int4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulI, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +uint AmdDxExtShaderIntrinsics_WavePrefixProduct(uint src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulU, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_WavePrefixProduct(uint2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulU, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +uint3 AmdDxExtShaderIntrinsics_WavePrefixProduct(uint3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulU, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +uint4 AmdDxExtShaderIntrinsics_WavePrefixProduct(uint4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulU, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMin +* +* Performs a prefix scan operation across a wave and returns the resulting minimum value to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdDxExtShaderIntrinsics_WavePrefixMin(float src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinF, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +float2 AmdDxExtShaderIntrinsics_WavePrefixMin(float2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinF, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +float3 AmdDxExtShaderIntrinsics_WavePrefixMin(float3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinF, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +float4 AmdDxExtShaderIntrinsics_WavePrefixMin(float4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinF, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +int AmdDxExtShaderIntrinsics_WavePrefixMin(int src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinI, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +int2 AmdDxExtShaderIntrinsics_WavePrefixMin(int2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinI, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +int3 AmdDxExtShaderIntrinsics_WavePrefixMin(int3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinI, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +int4 AmdDxExtShaderIntrinsics_WavePrefixMin(int4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinI, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +uint AmdDxExtShaderIntrinsics_WavePrefixMin(uint src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinU, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_WavePrefixMin(uint2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinU, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +uint3 AmdDxExtShaderIntrinsics_WavePrefixMin(uint3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinU, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +uint4 AmdDxExtShaderIntrinsics_WavePrefixMin(uint4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinU, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMax +* +* Performs a prefix scan operation across a wave and returns the resulting maximum value to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdDxExtShaderIntrinsics_WavePrefixMax(float src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxF, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +float2 AmdDxExtShaderIntrinsics_WavePrefixMax(float2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxF, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +float3 AmdDxExtShaderIntrinsics_WavePrefixMax(float3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxF, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +float4 AmdDxExtShaderIntrinsics_WavePrefixMax(float4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxF, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +int AmdDxExtShaderIntrinsics_WavePrefixMax(int src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxI, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +int2 AmdDxExtShaderIntrinsics_WavePrefixMax(int2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxI, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +int3 AmdDxExtShaderIntrinsics_WavePrefixMax(int3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxI, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +int4 AmdDxExtShaderIntrinsics_WavePrefixMax(int4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxI, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +uint AmdDxExtShaderIntrinsics_WavePrefixMax(uint src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxU, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_WavePrefixMax(uint2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxU, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +uint3 AmdDxExtShaderIntrinsics_WavePrefixMax(uint3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxU, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +uint4 AmdDxExtShaderIntrinsics_WavePrefixMax(uint4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxU, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixSum +* +* Performs a Postfix (Inclusive) scan operation across a wave and returns the resulting sum to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdDxExtShaderIntrinsics_WavePostfixSum(float src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddF, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +float2 AmdDxExtShaderIntrinsics_WavePostfixSum(float2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddF, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +float3 AmdDxExtShaderIntrinsics_WavePostfixSum(float3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddF, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +float4 AmdDxExtShaderIntrinsics_WavePostfixSum(float4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddF, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +int AmdDxExtShaderIntrinsics_WavePostfixSum(int src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddI, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +int2 AmdDxExtShaderIntrinsics_WavePostfixSum(int2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddI, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +int3 AmdDxExtShaderIntrinsics_WavePostfixSum(int3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddI, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +int4 AmdDxExtShaderIntrinsics_WavePostfixSum(int4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddI, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +uint AmdDxExtShaderIntrinsics_WavePostfixSum(uint src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddU, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_WavePostfixSum(uint2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddU, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +uint3 AmdDxExtShaderIntrinsics_WavePostfixSum(uint3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddU, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +uint4 AmdDxExtShaderIntrinsics_WavePostfixSum(uint4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddU, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixProduct +* +* Performs a Postfix scan operation across a wave and returns the resulting product to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdDxExtShaderIntrinsics_WavePostfixProduct(float src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulF, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +float2 AmdDxExtShaderIntrinsics_WavePostfixProduct(float2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulF, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +float3 AmdDxExtShaderIntrinsics_WavePostfixProduct(float3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulF, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +float4 AmdDxExtShaderIntrinsics_WavePostfixProduct(float4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulF, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +int AmdDxExtShaderIntrinsics_WavePostfixProduct(int src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulI, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +int2 AmdDxExtShaderIntrinsics_WavePostfixProduct(int2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulI, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +int3 AmdDxExtShaderIntrinsics_WavePostfixProduct(int3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulI, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +int4 AmdDxExtShaderIntrinsics_WavePostfixProduct(int4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulI, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +uint AmdDxExtShaderIntrinsics_WavePostfixProduct(uint src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulU, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_WavePostfixProduct(uint2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulU, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +uint3 AmdDxExtShaderIntrinsics_WavePostfixProduct(uint3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulU, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +uint4 AmdDxExtShaderIntrinsics_WavePostfixProduct(uint4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulU, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMin +* +* Performs a Postfix scan operation across a wave and returns the resulting minimum value to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdDxExtShaderIntrinsics_WavePostfixMin(float src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinF, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +float2 AmdDxExtShaderIntrinsics_WavePostfixMin(float2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinF, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +float3 AmdDxExtShaderIntrinsics_WavePostfixMin(float3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinF, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +float4 AmdDxExtShaderIntrinsics_WavePostfixMin(float4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinF, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +int AmdDxExtShaderIntrinsics_WavePostfixMin(int src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinI, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +int2 AmdDxExtShaderIntrinsics_WavePostfixMin(int2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinI, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +int3 AmdDxExtShaderIntrinsics_WavePostfixMin(int3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinI, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +int4 AmdDxExtShaderIntrinsics_WavePostfixMin(int4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinI, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +uint AmdDxExtShaderIntrinsics_WavePostfixMin(uint src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinU, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_WavePostfixMin(uint2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinU, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +uint3 AmdDxExtShaderIntrinsics_WavePostfixMin(uint3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinU, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +uint4 AmdDxExtShaderIntrinsics_WavePostfixMin(uint4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinU, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMax +* +* Performs a Postfix scan operation across a wave and returns the resulting maximum value to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdDxExtShaderIntrinsics_WavePostfixMax(float src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxF, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +float2 AmdDxExtShaderIntrinsics_WavePostfixMax(float2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxF, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +float3 AmdDxExtShaderIntrinsics_WavePostfixMax(float3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxF, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +float4 AmdDxExtShaderIntrinsics_WavePostfixMax(float4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxF, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +int AmdDxExtShaderIntrinsics_WavePostfixMax(int src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxI, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +int2 AmdDxExtShaderIntrinsics_WavePostfixMax(int2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxI, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +int3 AmdDxExtShaderIntrinsics_WavePostfixMax(int3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxI, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +int4 AmdDxExtShaderIntrinsics_WavePostfixMax(int4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxI, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +uint AmdDxExtShaderIntrinsics_WavePostfixMax(uint src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxU, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_WavePostfixMax(uint2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxU, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +uint3 AmdDxExtShaderIntrinsics_WavePostfixMax(uint3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxU, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +uint4 AmdDxExtShaderIntrinsics_WavePostfixMax(uint4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxU, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + + +#endif // _AMDDXEXTSHADERINTRINSICS_HLSL_ diff --git a/Source/ThirdParty/AGS/ags_shader_intrinsics_dx12.hlsl b/Source/ThirdParty/AGS/ags_shader_intrinsics_dx12.hlsl new file mode 100644 index 000000000..a46fb02ed --- /dev/null +++ b/Source/ThirdParty/AGS/ags_shader_intrinsics_dx12.hlsl @@ -0,0 +1,3958 @@ +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +/** +*********************************************************************************************************************** +* @file ags_shader_intrinsics_dx12.hlsl +* @brief +* AMD D3D Shader Intrinsics HLSL include file. +* This include file contains the Shader Intrinsics definitions used in shader code by the application. +* @note +* This does not work with immediate values or values that the compiler determines can produces denorms +* +*********************************************************************************************************************** +*/ + +#ifndef _AMDEXTD3DSHADERINTRINICS_HLSL +#define _AMDEXTD3DSHADERINTRINICS_HLSL + +// Default AMD shader intrinsics designated SpaceId. +#define AmdExtD3DShaderIntrinsicsSpaceId space2147420894 + +// Dummy UAV used to access shader intrinsics. Applications need to add a root signature entry for this resource in +// order to use shader extensions. Applications may specify an alternate UAV binding by defining AMD_EXT_SHADER_INTRINSIC_UAV_OVERRIDE. +#ifdef AMD_EXT_SHADER_INTRINSIC_UAV_OVERRIDE +RWByteAddressBuffer AmdExtD3DShaderIntrinsicsUAV : register(AMD_EXT_SHADER_INTRINSIC_UAV_OVERRIDE); +#else +RWByteAddressBuffer AmdExtD3DShaderIntrinsicsUAV : register(u0, AmdExtD3DShaderIntrinsicsSpaceId); +#endif + +/** +*********************************************************************************************************************** +* Definitions to construct the intrinsic instruction composed of an opcode and optional immediate data. +*********************************************************************************************************************** +*/ +#define AmdExtD3DShaderIntrinsics_MagicCodeShift 28 +#define AmdExtD3DShaderIntrinsics_MagicCodeMask 0xf +#define AmdExtD3DShaderIntrinsics_OpcodePhaseShift 24 +#define AmdExtD3DShaderIntrinsics_OpcodePhaseMask 0x3 +#define AmdExtD3DShaderIntrinsics_DataShift 8 +#define AmdExtD3DShaderIntrinsics_DataMask 0xffff +#define AmdExtD3DShaderIntrinsics_OpcodeShift 0 +#define AmdExtD3DShaderIntrinsics_OpcodeMask 0xff + +#define AmdExtD3DShaderIntrinsics_MagicCode 0x5 + + +/** +*********************************************************************************************************************** +* Intrinsic opcodes. +*********************************************************************************************************************** +*/ +#define AmdExtD3DShaderIntrinsicsOpcode_Readfirstlane 0x01 +#define AmdExtD3DShaderIntrinsicsOpcode_Readlane 0x02 +#define AmdExtD3DShaderIntrinsicsOpcode_LaneId 0x03 +#define AmdExtD3DShaderIntrinsicsOpcode_Swizzle 0x04 +#define AmdExtD3DShaderIntrinsicsOpcode_Ballot 0x05 +#define AmdExtD3DShaderIntrinsicsOpcode_MBCnt 0x06 +#define AmdExtD3DShaderIntrinsicsOpcode_Min3U 0x07 +#define AmdExtD3DShaderIntrinsicsOpcode_Min3F 0x08 +#define AmdExtD3DShaderIntrinsicsOpcode_Med3U 0x09 +#define AmdExtD3DShaderIntrinsicsOpcode_Med3F 0x0a +#define AmdExtD3DShaderIntrinsicsOpcode_Max3U 0x0b +#define AmdExtD3DShaderIntrinsicsOpcode_Max3F 0x0c +#define AmdExtD3DShaderIntrinsicsOpcode_BaryCoord 0x0d +#define AmdExtD3DShaderIntrinsicsOpcode_VtxParam 0x0e +#define AmdExtD3DShaderIntrinsicsOpcode_Reserved1 0x0f +#define AmdExtD3DShaderIntrinsicsOpcode_Reserved2 0x10 +#define AmdExtD3DShaderIntrinsicsOpcode_Reserved3 0x11 +#define AmdExtD3DShaderIntrinsicsOpcode_WaveReduce 0x12 +#define AmdExtD3DShaderIntrinsicsOpcode_WaveScan 0x13 +#define AmdExtD3DShaderIntrinsicsOpcode_LoadDwAtAddr 0x14 +#define AmdExtD3DShaderIntrinsicsOpcode_DrawIndex 0x17 +#define AmdExtD3DShaderIntrinsicsOpcode_AtomicU64 0x18 +#define AmdExtD3DShaderIntrinsicsOpcode_GetWaveSize 0x19 +#define AmdExtD3DShaderIntrinsicsOpcode_BaseInstance 0x1a +#define AmdExtD3DShaderIntrinsicsOpcode_BaseVertex 0x1b +#define AmdExtD3DShaderIntrinsicsOpcode_FloatConversion 0x1c +#define AmdExtD3DShaderIntrinsicsOpcode_ReadlaneAt 0x1d +#define AmdExtD3DShaderIntrinsicsOpcode_ShaderClock 0x1f +#define AmdExtD3DShaderIntrinsicsOpcode_ShaderRealtimeClock 0x20 + +/** +*********************************************************************************************************************** +* Intrinsic opcode phases. +*********************************************************************************************************************** +*/ +#define AmdExtD3DShaderIntrinsicsOpcodePhase_0 0x0 +#define AmdExtD3DShaderIntrinsicsOpcodePhase_1 0x1 +#define AmdExtD3DShaderIntrinsicsOpcodePhase_2 0x2 +#define AmdExtD3DShaderIntrinsicsOpcodePhase_3 0x3 + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsicsWaveOp defines for supported operations. Can be used as the parameter for the +* AmdExtD3DShaderIntrinsicsOpcode_WaveOp intrinsic. +*********************************************************************************************************************** +*/ +#define AmdExtD3DShaderIntrinsicsWaveOp_AddF 0x01 +#define AmdExtD3DShaderIntrinsicsWaveOp_AddI 0x02 +#define AmdExtD3DShaderIntrinsicsWaveOp_AddU 0x03 +#define AmdExtD3DShaderIntrinsicsWaveOp_MulF 0x04 +#define AmdExtD3DShaderIntrinsicsWaveOp_MulI 0x05 +#define AmdExtD3DShaderIntrinsicsWaveOp_MulU 0x06 +#define AmdExtD3DShaderIntrinsicsWaveOp_MinF 0x07 +#define AmdExtD3DShaderIntrinsicsWaveOp_MinI 0x08 +#define AmdExtD3DShaderIntrinsicsWaveOp_MinU 0x09 +#define AmdExtD3DShaderIntrinsicsWaveOp_MaxF 0x0a +#define AmdExtD3DShaderIntrinsicsWaveOp_MaxI 0x0b +#define AmdExtD3DShaderIntrinsicsWaveOp_MaxU 0x0c +#define AmdExtD3DShaderIntrinsicsWaveOp_And 0x0d // Reduction only +#define AmdExtD3DShaderIntrinsicsWaveOp_Or 0x0e // Reduction only +#define AmdExtD3DShaderIntrinsicsWaveOp_Xor 0x0f // Reduction only + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsicsWaveOp masks and shifts for opcode and flags +*********************************************************************************************************************** +*/ +#define AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift 0 +#define AmdExtD3DShaderIntrinsicsWaveOp_OpcodeMask 0xff +#define AmdExtD3DShaderIntrinsicsWaveOp_FlagShift 8 +#define AmdExtD3DShaderIntrinsicsWaveOp_FlagMask 0xff + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsicsWaveOp flags for use with AmdExtD3DShaderIntrinsicsOpcode_WaveScan. +*********************************************************************************************************************** +*/ +#define AmdExtD3DShaderIntrinsicsWaveOp_Inclusive 0x01 +#define AmdExtD3DShaderIntrinsicsWaveOp_Exclusive 0x02 + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsicsSwizzle defines for common swizzles. Can be used as the operation parameter for the +* AmdExtD3DShaderIntrinsics_Swizzle intrinsic. +*********************************************************************************************************************** +*/ +#define AmdExtD3DShaderIntrinsicsSwizzle_SwapX1 0x041f +#define AmdExtD3DShaderIntrinsicsSwizzle_SwapX2 0x081f +#define AmdExtD3DShaderIntrinsicsSwizzle_SwapX4 0x101f +#define AmdExtD3DShaderIntrinsicsSwizzle_SwapX8 0x201f +#define AmdExtD3DShaderIntrinsicsSwizzle_SwapX16 0x401f +#define AmdExtD3DShaderIntrinsicsSwizzle_ReverseX2 0x041f +#define AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4 0x0c1f +#define AmdExtD3DShaderIntrinsicsSwizzle_ReverseX8 0x1c1f +#define AmdExtD3DShaderIntrinsicsSwizzle_ReverseX16 0x3c1f +#define AmdExtD3DShaderIntrinsicsSwizzle_ReverseX32 0x7c1f +#define AmdExtD3DShaderIntrinsicsSwizzle_BCastX2 0x003e +#define AmdExtD3DShaderIntrinsicsSwizzle_BCastX4 0x003c +#define AmdExtD3DShaderIntrinsicsSwizzle_BCastX8 0x0038 +#define AmdExtD3DShaderIntrinsicsSwizzle_BCastX16 0x0030 +#define AmdExtD3DShaderIntrinsicsSwizzle_BCastX32 0x0020 + + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsicsBarycentric defines for barycentric interpolation mode. To be used with +* AmdExtD3DShaderIntrinsicsOpcode_IjBarycentricCoords to specify the interpolation mode. +*********************************************************************************************************************** +*/ +#define AmdExtD3DShaderIntrinsicsBarycentric_LinearCenter 0x1 +#define AmdExtD3DShaderIntrinsicsBarycentric_LinearCentroid 0x2 +#define AmdExtD3DShaderIntrinsicsBarycentric_LinearSample 0x3 +#define AmdExtD3DShaderIntrinsicsBarycentric_PerspCenter 0x4 +#define AmdExtD3DShaderIntrinsicsBarycentric_PerspCentroid 0x5 +#define AmdExtD3DShaderIntrinsicsBarycentric_PerspSample 0x6 +#define AmdExtD3DShaderIntrinsicsBarycentric_PerspPullModel 0x7 + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsicsBarycentric defines for specifying vertex and parameter indices. To be used as inputs to +* the AmdExtD3DShaderIntrinsicsOpcode_VertexParameter function +*********************************************************************************************************************** +*/ +#define AmdExtD3DShaderIntrinsicsBarycentric_Vertex0 0x0 +#define AmdExtD3DShaderIntrinsicsBarycentric_Vertex1 0x1 +#define AmdExtD3DShaderIntrinsicsBarycentric_Vertex2 0x2 + +#define AmdExtD3DShaderIntrinsicsBarycentric_Param0 0x00 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param1 0x01 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param2 0x02 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param3 0x03 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param4 0x04 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param5 0x05 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param6 0x06 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param7 0x07 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param8 0x08 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param9 0x09 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param10 0x0a +#define AmdExtD3DShaderIntrinsicsBarycentric_Param11 0x0b +#define AmdExtD3DShaderIntrinsicsBarycentric_Param12 0x0c +#define AmdExtD3DShaderIntrinsicsBarycentric_Param13 0x0d +#define AmdExtD3DShaderIntrinsicsBarycentric_Param14 0x0e +#define AmdExtD3DShaderIntrinsicsBarycentric_Param15 0x0f +#define AmdExtD3DShaderIntrinsicsBarycentric_Param16 0x10 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param17 0x11 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param18 0x12 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param19 0x13 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param20 0x14 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param21 0x15 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param22 0x16 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param23 0x17 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param24 0x18 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param25 0x19 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param26 0x1a +#define AmdExtD3DShaderIntrinsicsBarycentric_Param27 0x1b +#define AmdExtD3DShaderIntrinsicsBarycentric_Param28 0x1c +#define AmdExtD3DShaderIntrinsicsBarycentric_Param29 0x1d +#define AmdExtD3DShaderIntrinsicsBarycentric_Param30 0x1e +#define AmdExtD3DShaderIntrinsicsBarycentric_Param31 0x1f + +#define AmdExtD3DShaderIntrinsicsBarycentric_ComponentX 0x0 +#define AmdExtD3DShaderIntrinsicsBarycentric_ComponentY 0x1 +#define AmdExtD3DShaderIntrinsicsBarycentric_ComponentZ 0x2 +#define AmdExtD3DShaderIntrinsicsBarycentric_ComponentW 0x3 + +#define AmdExtD3DShaderIntrinsicsBarycentric_ParamShift 0 +#define AmdExtD3DShaderIntrinsicsBarycentric_ParamMask 0x1f +#define AmdExtD3DShaderIntrinsicsBarycentric_VtxShift 0x5 +#define AmdExtD3DShaderIntrinsicsBarycentric_VtxMask 0x3 +#define AmdExtD3DShaderIntrinsicsBarycentric_ComponentShift 0x7 +#define AmdExtD3DShaderIntrinsicsBarycentric_ComponentMask 0x3 + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsicsAtomic defines for supported operations. Can be used as the parameter for the +* AmdExtD3DShaderIntrinsicsOpcode_AtomicU64 intrinsic. +*********************************************************************************************************************** +*/ +#define AmdExtD3DShaderIntrinsicsAtomicOp_MinU64 0x01 +#define AmdExtD3DShaderIntrinsicsAtomicOp_MaxU64 0x02 +#define AmdExtD3DShaderIntrinsicsAtomicOp_AndU64 0x03 +#define AmdExtD3DShaderIntrinsicsAtomicOp_OrU64 0x04 +#define AmdExtD3DShaderIntrinsicsAtomicOp_XorU64 0x05 +#define AmdExtD3DShaderIntrinsicsAtomicOp_AddU64 0x06 +#define AmdExtD3DShaderIntrinsicsAtomicOp_XchgU64 0x07 +#define AmdExtD3DShaderIntrinsicsAtomicOp_CmpXchgU64 0x08 + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsicsFloatConversion defines for supported rounding modes from float to float16 conversions. +* To be used as an input AmdExtD3DShaderIntrinsicsOpcode_FloatConversion instruction +*********************************************************************************************************************** +*/ +#define AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16Near 0x01 +#define AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16NegInf 0x02 +#define AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16PlusInf 0x03 + + +/** +*********************************************************************************************************************** +* MakeAmdShaderIntrinsicsInstruction +* +* Creates instruction from supplied opcode and immediate data. +* NOTE: This is an internal function and should not be called by the source HLSL shader directly. +* +*********************************************************************************************************************** +*/ +uint MakeAmdShaderIntrinsicsInstruction(uint opcode, uint opcodePhase, uint immediateData) +{ + return ((AmdExtD3DShaderIntrinsics_MagicCode << AmdExtD3DShaderIntrinsics_MagicCodeShift) | + (immediateData << AmdExtD3DShaderIntrinsics_DataShift) | + (opcodePhase << AmdExtD3DShaderIntrinsics_OpcodePhaseShift) | + (opcode << AmdExtD3DShaderIntrinsics_OpcodeShift)); +} + + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_ReadfirstlaneF +* +* Returns the value of float src for the first active lane of the wavefront. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_ReadfirstlaneF(float src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Readfirstlane, 0, 0); + + uint retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src), 0, retVal); + return asfloat(retVal); +} + + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_ReadfirstlaneU +* +* Returns the value of unsigned integer src for the first active lane of the wavefront. +* +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_ReadfirstlaneU(uint src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Readfirstlane, 0, 0); + + uint retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src, 0, retVal); + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_Readlane +* +* Returns the value of float src for the lane within the wavefront specified by laneId. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_ReadlaneF(float src, uint laneId) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Readlane, 0, laneId); + + uint retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src), 0, retVal); + return asfloat(retVal); +} + + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_ReadlaneU +* +* Returns the value of unsigned integer src for the lane within the wavefront specified by laneId. +* +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_ReadlaneU(uint src, uint laneId) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Readlane, 0, laneId); + + uint retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src, 0, retVal); + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_LaneId +* +* Returns the current lane id for the thread within the wavefront. +* +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_LaneId() +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_LaneId, 0, 0); + + uint retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal); + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_GetWaveSize +* +* Returns the wave size for the current shader, including active, inactive and helper lanes. +* +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_GetWaveSize() +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_GetWaveSize, 0, 0); + + uint retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal); + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_Swizzle +* +* Generic instruction to shuffle the float src value among different lanes as specified by the operation. +* Note that the operation parameter must be an immediately specified value not a value from a variable. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_SwizzleF(float src, uint operation) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Swizzle, 0, operation); + + uint retVal; + //InterlockedCompareExchange(AmdExtD3DShaderIntrinsicsUAV[instruction], asuint(src), 0, retVal); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src), 0, retVal); + return asfloat(retVal); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_SwizzleU +* +* Generic instruction to shuffle the unsigned integer src value among different lanes as specified by the operation. +* Note that the operation parameter must be an immediately specified value not a value from a variable. +* +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_SwizzleU(uint src, uint operation) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Swizzle, 0, operation); + + uint retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src, 0, retVal); + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_Ballot +* +* Given an input predicate returns a bit mask indicating for which lanes the predicate is true. +* Inactive or non-existent lanes will always return 0. The number of existent lanes is the wavefront size. +* +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_Ballot(bool predicate) +{ + uint instruction; + + uint retVal1; + instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Ballot, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, predicate, 0, retVal1); + + uint retVal2; + instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Ballot, + AmdExtD3DShaderIntrinsicsOpcodePhase_1, 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, predicate, 0, retVal2); + + return uint2(retVal1, retVal2); +} + + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_BallotAny +* +* Convenience routine that uses Ballot and returns true if for any of the active lanes the predicate is true. +* +*********************************************************************************************************************** +*/ +bool AmdExtD3DShaderIntrinsics_BallotAny(bool predicate) +{ + uint2 retVal = AmdExtD3DShaderIntrinsics_Ballot(predicate); + + return ((retVal.x | retVal.y) != 0 ? true : false); +} + + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_BallotAll +* +* Convenience routine that uses Ballot and returns true if for all of the active lanes the predicate is true. +* +*********************************************************************************************************************** +*/ +bool AmdExtD3DShaderIntrinsics_BallotAll(bool predicate) +{ + uint2 ballot = AmdExtD3DShaderIntrinsics_Ballot(predicate); + + uint2 execMask = AmdExtD3DShaderIntrinsics_Ballot(true); + + return ((ballot.x == execMask.x) && (ballot.y == execMask.y)); +} + + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_MBCnt +* +* Returns the masked bit count of the source register for this thread within all the active threads within a +* wavefront. +* +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_MBCnt(uint2 src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_MBCnt, 0, 0); + + uint retVal; + + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.x, src.y, retVal); + + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_Min3F +* +* Returns the minimum value of the three floating point source arguments. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_Min3F(float src0, float src1, float src2) +{ + uint minimum; + + uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Min3F, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, asuint(src0), asuint(src1), minimum); + + uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Min3F, + AmdExtD3DShaderIntrinsicsOpcodePhase_1, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, asuint(src2), minimum, minimum); + + return asfloat(minimum); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_Min3U +* +* Returns the minimum value of the three unsigned integer source arguments. +* +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_Min3U(uint src0, uint src1, uint src2) +{ + uint minimum; + + uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Min3U, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, src0, src1, minimum); + + uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Min3U, + AmdExtD3DShaderIntrinsicsOpcodePhase_1, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, src2, minimum, minimum); + + return minimum; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_Med3F +* +* Returns the median value of the three floating point source arguments. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_Med3F(float src0, float src1, float src2) +{ + uint median; + + uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Med3F, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, asuint(src0), asuint(src1), median); + + uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Med3F, + AmdExtD3DShaderIntrinsicsOpcodePhase_1, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, asuint(src2), median, median); + + return asfloat(median); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_Med3U +* +* Returns the median value of the three unsigned integer source arguments. +* +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_Med3U(uint src0, uint src1, uint src2) +{ + uint median; + + uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Med3U, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, src0, src1, median); + + uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Med3U, + AmdExtD3DShaderIntrinsicsOpcodePhase_1, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, src2, median, median); + + return median; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_Max3F +* +* Returns the maximum value of the three floating point source arguments. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_Max3F(float src0, float src1, float src2) +{ + uint maximum; + + uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Max3F, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, asuint(src0), asuint(src1), maximum); + + uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Max3F, + AmdExtD3DShaderIntrinsicsOpcodePhase_1, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, asuint(src2), maximum, maximum); + + return asfloat(maximum); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_Max3U +* +* Returns the maximum value of the three unsigned integer source arguments. +* +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_Max3U(uint src0, uint src1, uint src2) +{ + uint maximum; + + uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Max3U, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, src0, src1, maximum); + + uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Max3U, + AmdExtD3DShaderIntrinsicsOpcodePhase_1, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, src2, maximum, maximum); + + return maximum; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_IjBarycentricCoords +* +* Returns the (i, j) barycentric coordinate pair for this shader invocation with the specified interpolation mode at +* the specified pixel location. Should not be used for "pull-model" interpolation, PullModelBarycentricCoords should +* be used instead +* +* Can only be used in pixel shader stages. +* +*********************************************************************************************************************** +*/ +float2 AmdExtD3DShaderIntrinsics_IjBarycentricCoords(uint interpMode) +{ + uint2 retVal; + + uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_BaryCoord, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + interpMode); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, 0, 0, retVal.x); + + uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_BaryCoord, + AmdExtD3DShaderIntrinsicsOpcodePhase_1, + interpMode); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, retVal.x, 0, retVal.y); + + return float2(asfloat(retVal.x), asfloat(retVal.y)); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_PullModelBarycentricCoords +* +* Returns the (1/W,1/I,1/J) coordinates at the pixel center which can be used for custom interpolation at any +* location in the pixel. +* +* Can only be used in pixel shader stages. +* +*********************************************************************************************************************** +*/ +float3 AmdExtD3DShaderIntrinsics_PullModelBarycentricCoords() +{ + uint3 retVal; + + uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_BaryCoord, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + AmdExtD3DShaderIntrinsicsBarycentric_PerspPullModel); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, 0, 0, retVal.x); + + uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_BaryCoord, + AmdExtD3DShaderIntrinsicsOpcodePhase_1, + AmdExtD3DShaderIntrinsicsBarycentric_PerspPullModel); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, retVal.x, 0, retVal.y); + + uint instruction3 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_BaryCoord, + AmdExtD3DShaderIntrinsicsOpcodePhase_2, + AmdExtD3DShaderIntrinsicsBarycentric_PerspPullModel); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction3, retVal.y, 0, retVal.z); + + return float3(asfloat(retVal.x), asfloat(retVal.y), asfloat(retVal.z)); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_VertexParameter +* +* Returns the triangle's parameter information at the specified triangle vertex. +* The vertex and parameter indices must specified as immediate values. +* +* Only available in pixel shader stages. +* +*********************************************************************************************************************** +*/ +float4 AmdExtD3DShaderIntrinsics_VertexParameter(uint vertexIdx, uint parameterIdx) +{ + uint4 retVal; + uint4 instruction; + + instruction.x = MakeAmdShaderIntrinsicsInstruction( + AmdExtD3DShaderIntrinsicsOpcode_VtxParam, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + ((vertexIdx << AmdExtD3DShaderIntrinsicsBarycentric_VtxShift) | + (parameterIdx << AmdExtD3DShaderIntrinsicsBarycentric_ParamShift) | + (AmdExtD3DShaderIntrinsicsBarycentric_ComponentX << AmdExtD3DShaderIntrinsicsBarycentric_ComponentShift))); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction.x, 0, 0, retVal.x); + + instruction.y = MakeAmdShaderIntrinsicsInstruction( + AmdExtD3DShaderIntrinsicsOpcode_VtxParam, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + ((vertexIdx << AmdExtD3DShaderIntrinsicsBarycentric_VtxShift) | + (parameterIdx << AmdExtD3DShaderIntrinsicsBarycentric_ParamShift) | + (AmdExtD3DShaderIntrinsicsBarycentric_ComponentY << AmdExtD3DShaderIntrinsicsBarycentric_ComponentShift))); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction.y, 0, 0, retVal.y); + + instruction.z = MakeAmdShaderIntrinsicsInstruction( + AmdExtD3DShaderIntrinsicsOpcode_VtxParam, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + ((vertexIdx << AmdExtD3DShaderIntrinsicsBarycentric_VtxShift) | + (parameterIdx << AmdExtD3DShaderIntrinsicsBarycentric_ParamShift) | + (AmdExtD3DShaderIntrinsicsBarycentric_ComponentZ << AmdExtD3DShaderIntrinsicsBarycentric_ComponentShift))); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction.z, 0, 0, retVal.z); + + instruction.w = MakeAmdShaderIntrinsicsInstruction( + AmdExtD3DShaderIntrinsicsOpcode_VtxParam, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + ((vertexIdx << AmdExtD3DShaderIntrinsicsBarycentric_VtxShift) | + (parameterIdx << AmdExtD3DShaderIntrinsicsBarycentric_ParamShift) | + (AmdExtD3DShaderIntrinsicsBarycentric_ComponentW << AmdExtD3DShaderIntrinsicsBarycentric_ComponentShift))); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction.w, 0, 0, retVal.w); + + return float4(asfloat(retVal.x), asfloat(retVal.y), asfloat(retVal.z), asfloat(retVal.w)); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_VertexParameterComponent +* +* Returns the triangle's parameter information at the specified triangle vertex and component. +* The vertex, parameter and component indices must be specified as immediate values. +* +* Only available in pixel shader stages. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_VertexParameterComponent(uint vertexIdx, uint parameterIdx, uint componentIdx) +{ + uint retVal; + uint instruction = + MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_VtxParam, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + ((vertexIdx << AmdExtD3DShaderIntrinsicsBarycentric_VtxShift) | + (parameterIdx << AmdExtD3DShaderIntrinsicsBarycentric_ParamShift) | + (componentIdx << AmdExtD3DShaderIntrinsicsBarycentric_ComponentShift))); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal); + + return asfloat(retVal); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveReduce +* +* Performs reduction operation on wavefront (thread group) data. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveReduce : float +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_WaveReduce(uint waveOp, float src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveReduce, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift)); + uint retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src), 0, retVal); + + return asfloat(retVal); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveReduce : float2 +*********************************************************************************************************************** +*/ +float2 AmdExtD3DShaderIntrinsics_WaveReduce(uint waveOp, float2 src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveReduce, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift)); + + uint2 retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.x), 0, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.y), 0, retVal.y); + + return float2(asfloat(retVal.x), asfloat(retVal.y)); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveReduce : float3 +*********************************************************************************************************************** +*/ +float3 AmdExtD3DShaderIntrinsics_WaveReduce(uint waveOp, float3 src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveReduce, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift)); + + uint3 retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.x), 0, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.y), 0, retVal.y); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.z), 0, retVal.z); + + return float3(asfloat(retVal.x), asfloat(retVal.y), asfloat(retVal.z)); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveReduce : float4 +*********************************************************************************************************************** +*/ +float4 AmdExtD3DShaderIntrinsics_WaveReduce(uint waveOp, float4 src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveReduce, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift)); + + uint4 retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.x), 0, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.y), 0, retVal.y); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.z), 0, retVal.z); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.w), 0, retVal.w); + + return float4(asfloat(retVal.x), asfloat(retVal.y), asfloat(retVal.z), asfloat(retVal.w)); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveReduce : int +*********************************************************************************************************************** +*/ +int AmdExtD3DShaderIntrinsics_WaveReduce(uint waveOp, int src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveReduce, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift)); + + int retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src, 0, retVal); + + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveReduce : int2 +*********************************************************************************************************************** +*/ +int2 AmdExtD3DShaderIntrinsics_WaveReduce(uint waveOp, int2 src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveReduce, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift)); + + int2 retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.x, 0, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.y, 0, retVal.y); + + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveReduce : int3 +*********************************************************************************************************************** +*/ +int3 AmdExtD3DShaderIntrinsics_WaveReduce(uint waveOp, int3 src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveReduce, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift)); + + int3 retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.x, 0, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.y, 0, retVal.y); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.z, 0, retVal.z); + + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveReduce : int4 +*********************************************************************************************************************** +*/ +int4 AmdExtD3DShaderIntrinsics_WaveReduce(uint waveOp, int4 src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveReduce, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift)); + + int4 retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.x, 0, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.y, 0, retVal.y); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.z, 0, retVal.z); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.w, 0, retVal.w); + + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveScan +* +* Performs scan operation on wavefront (thread group) data. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveScan : float +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_WaveScan(uint waveOp, uint flags, float src) +{ + const uint waveScanOp = (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift) | + (flags << AmdExtD3DShaderIntrinsicsWaveOp_FlagShift); + + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveScan, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + waveScanOp); + uint retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src), 0, retVal); + + return asfloat(retVal); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveScan : float2 +*********************************************************************************************************************** +*/ +float2 AmdExtD3DShaderIntrinsics_WaveScan(uint waveOp, uint flags, float2 src) +{ + const uint waveScanOp = (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift) | + (flags << AmdExtD3DShaderIntrinsicsWaveOp_FlagShift); + + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveScan, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + waveScanOp); + + uint2 retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.x), 0, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.y), 0, retVal.y); + + return float2(asfloat(retVal.x), asfloat(retVal.y)); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveScan : float3 +*********************************************************************************************************************** +*/ +float3 AmdExtD3DShaderIntrinsics_WaveScan(uint waveOp, uint flags, float3 src) +{ + const uint waveScanOp = (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift) | + (flags << AmdExtD3DShaderIntrinsicsWaveOp_FlagShift); + + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveScan, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + waveScanOp); + + uint3 retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.x), 0, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.y), 0, retVal.y); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.z), 0, retVal.z); + + return float3(asfloat(retVal.x), asfloat(retVal.y), asfloat(retVal.z)); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveScan : float4 +*********************************************************************************************************************** +*/ +float4 AmdExtD3DShaderIntrinsics_WaveScan(uint waveOp, uint flags, float4 src) +{ + const uint waveScanOp = (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift) | + (flags << AmdExtD3DShaderIntrinsicsWaveOp_FlagShift); + + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveScan, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + waveScanOp); + + uint4 retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.x), 0, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.y), 0, retVal.y); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.z), 0, retVal.z); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.w), 0, retVal.w); + + return float4(asfloat(retVal.x), asfloat(retVal.y), asfloat(retVal.z), asfloat(retVal.w)); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveScan : int +*********************************************************************************************************************** +*/ +int AmdExtD3DShaderIntrinsics_WaveScan(uint waveOp, uint flags, int src) +{ + const uint waveScanOp = (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift) | + (flags << AmdExtD3DShaderIntrinsicsWaveOp_FlagShift); + + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveScan, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + waveScanOp); + + int retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src, 0, retVal); + + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveScan : int2 +*********************************************************************************************************************** +*/ +int2 AmdExtD3DShaderIntrinsics_WaveScan(uint waveOp, uint flags, int2 src) +{ + const uint waveScanOp = (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift) | + (flags << AmdExtD3DShaderIntrinsicsWaveOp_FlagShift); + + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveScan, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + waveScanOp); + + int2 retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.x, 0, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.y, 0, retVal.y); + + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveScan : int3 +*********************************************************************************************************************** +*/ +int3 AmdExtD3DShaderIntrinsics_WaveScan(uint waveOp, uint flags, int3 src) +{ + const uint waveScanOp = (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift) | + (flags << AmdExtD3DShaderIntrinsicsWaveOp_FlagShift); + + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveScan, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + waveScanOp); + + int3 retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.x, 0, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.y, 0, retVal.y); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.z, 0, retVal.z); + + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveScan : int4 +*********************************************************************************************************************** +*/ +int4 AmdExtD3DShaderIntrinsics_WaveScan(uint waveOp, uint flags, int4 src) +{ + const uint waveScanOp = (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift) | + (flags << AmdExtD3DShaderIntrinsicsWaveOp_FlagShift); + + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveScan, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + waveScanOp); + + int4 retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.x, 0, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.y, 0, retVal.y); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.z, 0, retVal.z); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.w, 0, retVal.w); + + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_LoadDwordAtAddr +* +* Loads a DWORD from GPU memory from a given 64-bit GPU VA and 32-bit offset. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_LoadDwordAtAddr +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_LoadDwordAtAddr(uint gpuVaLoBits, uint gpuVaHiBits, uint offset) +{ + uint retVal; + + uint instruction; + instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_LoadDwAtAddr, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, gpuVaLoBits, gpuVaHiBits, retVal); + + instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_LoadDwAtAddr, + AmdExtD3DShaderIntrinsicsOpcodePhase_1, + 0); + + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, offset, 0, retVal); + + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_LoadDwordAtAddrx2 +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_LoadDwordAtAddrx2(uint gpuVaLoBits, uint gpuVaHiBits, uint offset) +{ + uint2 retVal; + + retVal.x = AmdExtD3DShaderIntrinsics_LoadDwordAtAddr(gpuVaLoBits, gpuVaHiBits, offset); + retVal.y = AmdExtD3DShaderIntrinsics_LoadDwordAtAddr(gpuVaLoBits, gpuVaHiBits, offset + 0x4); + + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_LoadDwordAtAddrx4 +*********************************************************************************************************************** +*/ +uint4 AmdExtD3DShaderIntrinsics_LoadDwordAtAddrx4(uint gpuVaLoBits, uint gpuVaHiBits, uint offset) +{ + uint4 retVal; + + retVal.x = AmdExtD3DShaderIntrinsics_LoadDwordAtAddr(gpuVaLoBits, gpuVaHiBits, offset); + retVal.y = AmdExtD3DShaderIntrinsics_LoadDwordAtAddr(gpuVaLoBits, gpuVaHiBits, offset + 0x4); + retVal.z = AmdExtD3DShaderIntrinsics_LoadDwordAtAddr(gpuVaLoBits, gpuVaHiBits, offset + 0x8); + retVal.w = AmdExtD3DShaderIntrinsics_LoadDwordAtAddr(gpuVaLoBits, gpuVaHiBits, offset + 0xC); + + return retVal; +} + + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_GetDrawIndex +* +* Returns the 0-based draw index in an indirect draw. Always returns 0 for direct draws. +* +* Available in vertex shader stage only. +* +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_GetDrawIndex() +{ + uint retVal; + + uint instruction; + instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_DrawIndex, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal); + + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_GetBaseInstance +* +* Returns the StartInstanceLocation parameter passed to direct or indirect drawing commands. +* +* Available in vertex shader stage only. +* +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_GetBaseInstance() +{ + uint retVal; + + uint instruction; + instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_BaseInstance, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal); + + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_GetBaseVertex +* +* For non-indexed draw commands, returns the StartVertexLocation parameter. For indexed draw commands, returns the +* BaseVertexLocation parameter. +* +* Available in vertex shader stage only. +* +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_GetBaseVertex() +{ + uint retVal; + + uint instruction; + instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_BaseVertex, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal); + + return retVal; +} + + + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_ReadlaneAt : uint +* +* Returns the value of the source for the given lane index within the specified wave. The lane index +* can be non-uniform across the wave. +* +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_ReadlaneAt(uint src, uint laneId) +{ + uint retVal; + + uint instruction; + instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_ReadlaneAt, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src, laneId, retVal); + + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_ReadlaneAt : int +*********************************************************************************************************************** +*/ +int AmdExtD3DShaderIntrinsics_ReadlaneAt(int src, uint laneId) +{ + uint retVal; + + uint instruction; + instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_ReadlaneAt, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src), laneId, retVal); + + return asint(retVal); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_ReadlaneAt : float +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_ReadlaneAt(float src, uint laneId) +{ + uint retVal; + + uint instruction; + instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_ReadlaneAt, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src), laneId, retVal); + + return asfloat(retVal); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_ConvertF32toF16 +* +* Converts 32bit floating point numbers into 16bit floating point number using a specified rounding mode +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_ConvertF32toF16 - helper to convert f32 to f16 number +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_ConvertF32toF16(in uint convOp, in float3 val) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_FloatConversion, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + convOp); + + uint3 retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(val.x), 0, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(val.y), 0, retVal.y); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(val.z), 0, retVal.z); + + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_ConvertF32toF16Near - convert f32 to f16 number using nearest rounding mode +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_ConvertF32toF16Near(in float3 inVec) +{ + return AmdExtD3DShaderIntrinsics_ConvertF32toF16(AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16Near, inVec); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_ConvertF32toF16Near - convert f32 to f16 number using -inf rounding mode +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_ConvertF32toF16NegInf(in float3 inVec) +{ + return AmdExtD3DShaderIntrinsics_ConvertF32toF16(AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16NegInf, inVec); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_ConvertF32toF16Near - convert f32 to f16 number using +inf rounding mode +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_ConvertF32toF16PosInf(in float3 inVec) +{ + return AmdExtD3DShaderIntrinsics_ConvertF32toF16(AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16PlusInf, inVec); +} + + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_ShaderClock +* +* Returns the current value of the timestamp clock. The value monotonically increments and will wrap after it +* exceeds the maximum representable value. The units are not defined and need not be constant, and the value +* is not guaranteed to be dynamically uniform across a single draw or dispatch. +* +* The function serves as a code motion barrier. Available in all shader stages. +* +*********************************************************************************************************************** +*/ + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_ShaderClock +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_ShaderClock() +{ + uint2 retVal; + + uint instruction; + instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_ShaderClock, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal.x); + + instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_ShaderClock, + AmdExtD3DShaderIntrinsicsOpcodePhase_1, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal.y); + + return retVal; +} + + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_ShaderRealtimeClock +* +* Returns a value representing the real-time clock that is globally coherent by all invocations on the GPU. +* The units are not defined and the value will wrap after exceeding the maximum representable value. +* +* The function serves as a code motion barrier. Available in all shader stages. +* +*********************************************************************************************************************** +*/ + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_ShaderRealtimeClock +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_ShaderRealtimeClock() +{ + uint2 retVal; + + uint instruction; + instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_ShaderRealtimeClock, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal.x); + + instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_ShaderRealtimeClock, + AmdExtD3DShaderIntrinsicsOpcodePhase_1, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal.y); + + return retVal; +} + + + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_MakeAtomicInstructions +* +* Creates uint4 with x/y/z/w components containing phase 0/1/2/3 for atomic instructions. +* NOTE: This is an internal function and should not be called by the source HLSL shader directly. +* +*********************************************************************************************************************** +*/ +uint4 AmdExtD3DShaderIntrinsics_MakeAtomicInstructions(uint op) +{ + uint4 instructions; + instructions.x = MakeAmdShaderIntrinsicsInstruction( + AmdExtD3DShaderIntrinsicsOpcode_AtomicU64, AmdExtD3DShaderIntrinsicsOpcodePhase_0, op); + instructions.y = MakeAmdShaderIntrinsicsInstruction( + AmdExtD3DShaderIntrinsicsOpcode_AtomicU64, AmdExtD3DShaderIntrinsicsOpcodePhase_1, op); + instructions.z = MakeAmdShaderIntrinsicsInstruction( + AmdExtD3DShaderIntrinsicsOpcode_AtomicU64, AmdExtD3DShaderIntrinsicsOpcodePhase_2, op); + instructions.w = MakeAmdShaderIntrinsicsInstruction( + AmdExtD3DShaderIntrinsicsOpcode_AtomicU64, AmdExtD3DShaderIntrinsicsOpcodePhase_3, op); + return instructions; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_AtomicOp +* +* Creates intrinstic instructions for the specified atomic op. +* NOTE: These are internal functions and should not be called by the source HLSL shader directly. +* +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_AtomicOp(RWByteAddressBuffer uav, uint3 address, uint2 value, uint op) +{ + uint2 retVal; + + const uint4 instructions = AmdExtD3DShaderIntrinsics_MakeAtomicInstructions(op); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x, address.y, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z, value.x, retVal.y); + uav.Store(retVal.x, retVal.y); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y, retVal.y, retVal.y); + + return retVal; +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicOp(RWTexture1D uav, uint3 address, uint2 value, uint op) +{ + uint2 retVal; + + const uint4 instructions = AmdExtD3DShaderIntrinsics_MakeAtomicInstructions(op); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x, address.y, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z, value.x, retVal.y); + uav[retVal.x] = retVal.y; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y, retVal.y, retVal.y); + + return retVal; +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicOp(RWTexture2D uav, uint3 address, uint2 value, uint op) +{ + uint2 retVal; + + const uint4 instructions = AmdExtD3DShaderIntrinsics_MakeAtomicInstructions(op); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x, address.y, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z, value.x, retVal.y); + uav[uint2(retVal.x, retVal.x)] = retVal.y; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y, retVal.y, retVal.y); + + return retVal; +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicOp(RWTexture3D uav, uint3 address, uint2 value, uint op) +{ + uint2 retVal; + + const uint4 instructions = AmdExtD3DShaderIntrinsics_MakeAtomicInstructions(op); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x, address.y, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z, value.x, retVal.y); + uav[uint3(retVal.x, retVal.x, retVal.x)] = retVal.y; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y, retVal.y, retVal.y); + + return retVal; +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicOp( + RWByteAddressBuffer uav, uint3 address, uint2 compare_value, uint2 value, uint op) +{ + uint2 retVal; + + const uint4 instructions = AmdExtD3DShaderIntrinsics_MakeAtomicInstructions(op); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x, address.y, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z, value.x, retVal.y); + uav.Store(retVal.x, retVal.y); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y, compare_value.x, retVal.y); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.w, compare_value.y, retVal.y, retVal.y); + + return retVal; +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicOp( + RWTexture1D uav, uint3 address, uint2 compare_value, uint2 value, uint op) +{ + uint2 retVal; + + const uint4 instructions = AmdExtD3DShaderIntrinsics_MakeAtomicInstructions(op); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x, address.y, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z, value.x, retVal.y); + uav[retVal.x] = retVal.y; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y, compare_value.x, retVal.y); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.w, compare_value.y, retVal.y, retVal.y); + + return retVal; +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicOp( + RWTexture2D uav, uint3 address, uint2 compare_value, uint2 value, uint op) +{ + uint2 retVal; + + const uint4 instructions = AmdExtD3DShaderIntrinsics_MakeAtomicInstructions(op); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x, address.y, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z, value.x, retVal.y); + uav[uint2(retVal.x, retVal.x)] = retVal.y; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y, compare_value.x, retVal.y); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.w, compare_value.y, retVal.y, retVal.y); + + return retVal; +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicOp( + RWTexture3D uav, uint3 address, uint2 compare_value, uint2 value, uint op) +{ + uint2 retVal; + + const uint4 instructions = AmdExtD3DShaderIntrinsics_MakeAtomicInstructions(op); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x, address.y, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z, value.x, retVal.y); + uav[uint3(retVal.x, retVal.x, retVal.x)] = retVal.y; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y, compare_value.x, retVal.y); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.w, compare_value.y, retVal.y, retVal.y); + + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_AtomicMinU64 +* +* Performs 64-bit atomic minimum of value with the UAV at address, returns the original value. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_AtomicMinU64(RWByteAddressBuffer uav, uint address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_MinU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicMinU64(RWTexture1D uav, uint address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_MinU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicMinU64(RWTexture2D uav, uint2 address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_MinU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicMinU64(RWTexture3D uav, uint3 address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_MinU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), value, op); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_AtomicMaxU64 +* +* Performs 64-bit atomic maximum of value with the UAV at address, returns the original value. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_AtomicMaxU64(RWByteAddressBuffer uav, uint address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_MaxU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicMaxU64(RWTexture1D uav, uint address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_MaxU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicMaxU64(RWTexture2D uav, uint2 address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_MaxU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicMaxU64(RWTexture3D uav, uint3 address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_MaxU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), value, op); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_AtomicAndU64 +* +* Performs 64-bit atomic AND of value with the UAV at address, returns the original value. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_AtomicAndU64(RWByteAddressBuffer uav, uint address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_AndU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicAndU64(RWTexture1D uav, uint address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_AndU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicAndU64(RWTexture2D uav, uint2 address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_AndU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicAndU64(RWTexture3D uav, uint3 address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_AndU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), value, op); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_AtomicOrU64 +* +* Performs 64-bit atomic OR of value with the UAV at address, returns the original value. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_AtomicOrU64(RWByteAddressBuffer uav, uint address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_OrU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicOrU64(RWTexture1D uav, uint address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_OrU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicOrU64(RWTexture2D uav, uint2 address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_OrU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicOrU64(RWTexture3D uav, uint3 address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_OrU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), value, op); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_AtomicXorU64 +* +* Performs 64-bit atomic XOR of value with the UAV at address, returns the original value. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_AtomicXorU64(RWByteAddressBuffer uav, uint address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_XorU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicXorU64(RWTexture1D uav, uint address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_XorU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicXorU64(RWTexture2D uav, uint2 address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_XorU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicXorU64(RWTexture3D uav, uint3 address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_XorU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), value, op); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_AtomicAddU64 +* +* Performs 64-bit atomic add of value with the UAV at address, returns the original value. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_AtomicAddU64(RWByteAddressBuffer uav, uint address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_AddU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicAddU64(RWTexture1D uav, uint address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_AddU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicAddU64(RWTexture2D uav, uint2 address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_AddU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicAddU64(RWTexture3D uav, uint3 address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_AddU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), value, op); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_AtomicXchgU64 +* +* Performs 64-bit atomic exchange of value with the UAV at address, returns the original value. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_AtomicXchgU64(RWByteAddressBuffer uav, uint address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_XchgU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicXchgU64(RWTexture1D uav, uint address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_XchgU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicXchgU64(RWTexture2D uav, uint2 address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_XchgU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicXchgU64(RWTexture3D uav, uint3 address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_XchgU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), value, op); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_AtomicCmpXchgU64 +* +* Performs 64-bit atomic compare of comparison value with UAV at address, stores value if values match, +* returns the original value. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_AtomicCmpXchgU64( + RWByteAddressBuffer uav, uint address, uint2 compare_value, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_CmpXchgU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), compare_value, value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicCmpXchgU64( + RWTexture1D uav, uint address, uint2 compare_value, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_CmpXchgU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), compare_value, value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicCmpXchgU64( + RWTexture2D uav, uint2 address, uint2 compare_value, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_CmpXchgU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), compare_value, value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicCmpXchgU64( + RWTexture3D uav, uint3 address, uint2 compare_value, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_CmpXchgU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), compare_value, value, op); +} + + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveSum +* +* Performs reduction operation across a wave and returns the result of the reduction (sum of all threads in a wave) +* to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_WaveActiveSum(float src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_AddF, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +float2 AmdExtD3DShaderIntrinsics_WaveActiveSum(float2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_AddF, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +float3 AmdExtD3DShaderIntrinsics_WaveActiveSum(float3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_AddF, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +float4 AmdExtD3DShaderIntrinsics_WaveActiveSum(float4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_AddF, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +int AmdExtD3DShaderIntrinsics_WaveActiveSum(int src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_AddI, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +int2 AmdExtD3DShaderIntrinsics_WaveActiveSum(int2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_AddI, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +int3 AmdExtD3DShaderIntrinsics_WaveActiveSum(int3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_AddI, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +int4 AmdExtD3DShaderIntrinsics_WaveActiveSum(int4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_AddI, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_WaveActiveSum(uint src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_AddU, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_WaveActiveSum(uint2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_AddU, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_WaveActiveSum(uint3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_AddU, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +uint4 AmdExtD3DShaderIntrinsics_WaveActiveSum(uint4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_AddU, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveProduct +* +* Performs reduction operation across a wave and returns the result of the reduction (product of all threads in a +* wave) to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_WaveActiveProduct(float src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MulF, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +float2 AmdExtD3DShaderIntrinsics_WaveActiveProduct(float2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MulF, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +float3 AmdExtD3DShaderIntrinsics_WaveActiveProduct(float3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MulF, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +float4 AmdExtD3DShaderIntrinsics_WaveActiveProduct(float4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MulF, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +int AmdExtD3DShaderIntrinsics_WaveActiveProduct(int src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MulI, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +int2 AmdExtD3DShaderIntrinsics_WaveActiveProduct(int2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MulI, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +int3 AmdExtD3DShaderIntrinsics_WaveActiveProduct(int3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MulI, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +int4 AmdExtD3DShaderIntrinsics_WaveActiveProduct(int4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MulI, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_WaveActiveProduct(uint src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MulU, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_WaveActiveProduct(uint2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MulU, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_WaveActiveProduct(uint3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MulU, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +uint4 AmdExtD3DShaderIntrinsics_WaveActiveProduct(uint4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MulU, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMin +* +* Performs reduction operation across a wave and returns the result of the reduction (minimum of all threads in a +* wave) to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_WaveActiveMin(float src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MinF, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +float2 AmdExtD3DShaderIntrinsics_WaveActiveMin(float2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MinF, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +float3 AmdExtD3DShaderIntrinsics_WaveActiveMin(float3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MinF, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +float4 AmdExtD3DShaderIntrinsics_WaveActiveMin(float4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MinF, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +int AmdExtD3DShaderIntrinsics_WaveActiveMin(int src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MinI, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +int2 AmdExtD3DShaderIntrinsics_WaveActiveMin(int2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MinI, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +int3 AmdExtD3DShaderIntrinsics_WaveActiveMin(int3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MinI, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +int4 AmdExtD3DShaderIntrinsics_WaveActiveMin(int4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MinI, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_WaveActiveMin(uint src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MinU, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_WaveActiveMin(uint2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MinU, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_WaveActiveMin(uint3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MinU, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +uint4 AmdExtD3DShaderIntrinsics_WaveActiveMin(uint4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MinU, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMax +* +* Performs reduction operation across a wave and returns the result of the reduction (maximum of all threads in a +* wave) to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_WaveActiveMax(float src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MaxF, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +float2 AmdExtD3DShaderIntrinsics_WaveActiveMax(float2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MaxF, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +float3 AmdExtD3DShaderIntrinsics_WaveActiveMax(float3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MaxF, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +float4 AmdExtD3DShaderIntrinsics_WaveActiveMax(float4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MaxF, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +int AmdExtD3DShaderIntrinsics_WaveActiveMax(int src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MaxI, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +int2 AmdExtD3DShaderIntrinsics_WaveActiveMax(int2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MaxI, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +int3 AmdExtD3DShaderIntrinsics_WaveActiveMax(int3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MaxI, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +int4 AmdExtD3DShaderIntrinsics_WaveActiveMax(int4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MaxI, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_WaveActiveMax(uint src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MaxU, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_WaveActiveMax(uint2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MaxU, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_WaveActiveMax(uint3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MaxU, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +uint4 AmdExtD3DShaderIntrinsics_WaveActiveMax(uint4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MaxU, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitAnd +* +* Performs reduction operation across a wave and returns the result of the reduction (Bitwise AND of all threads in a +* wave) to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitAnd +*********************************************************************************************************************** +*/ +int AmdExtD3DShaderIntrinsics_WaveActiveBitAnd(int src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_And, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitAnd +*********************************************************************************************************************** +*/ +int2 AmdExtD3DShaderIntrinsics_WaveActiveBitAnd(int2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_And, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitAnd +*********************************************************************************************************************** +*/ +int3 AmdExtD3DShaderIntrinsics_WaveActiveBitAnd(int3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_And, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitAnd +*********************************************************************************************************************** +*/ +int4 AmdExtD3DShaderIntrinsics_WaveActiveBitAnd(int4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_And, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitAnd +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_WaveActiveBitAnd(uint src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_And, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitAnd +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_WaveActiveBitAnd(uint2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_And, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitAnd +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_WaveActiveBitAnd(uint3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_And, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitAnd +*********************************************************************************************************************** +*/ +uint4 AmdExtD3DShaderIntrinsics_WaveActiveBitAnd(uint4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_And, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitOr +* +* Performs reduction operation across a wave and returns the result of the reduction (Bitwise OR of all threads in a +* wave) to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitOr +*********************************************************************************************************************** +*/ +int AmdExtD3DShaderIntrinsics_WaveActiveBitOr(int src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Or, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitOr +*********************************************************************************************************************** +*/ +int2 AmdExtD3DShaderIntrinsics_WaveActiveBitOr(int2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Or, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitOr +*********************************************************************************************************************** +*/ +int3 AmdExtD3DShaderIntrinsics_WaveActiveBitOr(int3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Or, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitOr +*********************************************************************************************************************** +*/ +int4 AmdExtD3DShaderIntrinsics_WaveActiveBitOr(int4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Or, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitOr +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_WaveActiveBitOr(uint src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Or, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitOr +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_WaveActiveBitOr(uint2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Or, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitOr +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_WaveActiveBitOr(uint3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Or, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitOr +*********************************************************************************************************************** +*/ +uint4 AmdExtD3DShaderIntrinsics_WaveActiveBitOr(uint4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Or, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitXor +* +* Performs reduction operation across a wave and returns the result of the reduction (Bitwise XOR of all threads in a +* wave) to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitXor +*********************************************************************************************************************** +*/ +int AmdExtD3DShaderIntrinsics_WaveActiveBitXor(int src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Xor, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitXor +*********************************************************************************************************************** +*/ +int2 AmdExtD3DShaderIntrinsics_WaveActiveBitXor(int2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Xor, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitXor +*********************************************************************************************************************** +*/ +int3 AmdExtD3DShaderIntrinsics_WaveActiveBitXor(int3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Xor, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitXor +*********************************************************************************************************************** +*/ +int4 AmdExtD3DShaderIntrinsics_WaveActiveBitXor(int4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Xor, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitXor +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_WaveActiveBitXor(uint src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Xor, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitXor +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_WaveActiveBitXor(uint2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Xor, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitXor +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_WaveActiveBitXor(uint3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Xor, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitXor +*********************************************************************************************************************** +*/ +uint4 AmdExtD3DShaderIntrinsics_WaveActiveBitXor(uint4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Xor, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixSum +* +* Performs a prefix (exclusive) scan operation across a wave and returns the resulting sum to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_WavePrefixSum(float src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddF, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +float2 AmdExtD3DShaderIntrinsics_WavePrefixSum(float2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddF, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +float3 AmdExtD3DShaderIntrinsics_WavePrefixSum(float3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddF, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +float4 AmdExtD3DShaderIntrinsics_WavePrefixSum(float4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddF, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +int AmdExtD3DShaderIntrinsics_WavePrefixSum(int src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddI, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +int2 AmdExtD3DShaderIntrinsics_WavePrefixSum(int2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddI, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +int3 AmdExtD3DShaderIntrinsics_WavePrefixSum(int3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddI, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +int4 AmdExtD3DShaderIntrinsics_WavePrefixSum(int4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddI, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_WavePrefixSum(uint src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddU, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_WavePrefixSum(uint2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddU, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_WavePrefixSum(uint3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddU, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +uint4 AmdExtD3DShaderIntrinsics_WavePrefixSum(uint4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddU, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixProduct +* +* Performs a prefix scan operation across a wave and returns the resulting product to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_WavePrefixProduct(float src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulF, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +float2 AmdExtD3DShaderIntrinsics_WavePrefixProduct(float2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulF, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +float3 AmdExtD3DShaderIntrinsics_WavePrefixProduct(float3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulF, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +float4 AmdExtD3DShaderIntrinsics_WavePrefixProduct(float4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulF, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +int AmdExtD3DShaderIntrinsics_WavePrefixProduct(int src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulI, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +int2 AmdExtD3DShaderIntrinsics_WavePrefixProduct(int2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulI, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +int3 AmdExtD3DShaderIntrinsics_WavePrefixProduct(int3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulI, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +int4 AmdExtD3DShaderIntrinsics_WavePrefixProduct(int4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulI, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_WavePrefixProduct(uint src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulU, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_WavePrefixProduct(uint2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulU, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_WavePrefixProduct(uint3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulU, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +uint4 AmdExtD3DShaderIntrinsics_WavePrefixProduct(uint4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulU, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMin +* +* Performs a prefix scan operation across a wave and returns the resulting minimum value to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_WavePrefixMin(float src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinF, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +float2 AmdExtD3DShaderIntrinsics_WavePrefixMin(float2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinF, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +float3 AmdExtD3DShaderIntrinsics_WavePrefixMin(float3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinF, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +float4 AmdExtD3DShaderIntrinsics_WavePrefixMin(float4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinF, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +int AmdExtD3DShaderIntrinsics_WavePrefixMin(int src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinI, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +int2 AmdExtD3DShaderIntrinsics_WavePrefixMin(int2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinI, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +int3 AmdExtD3DShaderIntrinsics_WavePrefixMin(int3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinI, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +int4 AmdExtD3DShaderIntrinsics_WavePrefixMin(int4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinI, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_WavePrefixMin(uint src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinU, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_WavePrefixMin(uint2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinU, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_WavePrefixMin(uint3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinU, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +uint4 AmdExtD3DShaderIntrinsics_WavePrefixMin(uint4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinU, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMax +* +* Performs a prefix scan operation across a wave and returns the resulting maximum value to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_WavePrefixMax(float src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxF, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +float2 AmdExtD3DShaderIntrinsics_WavePrefixMax(float2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxF, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +float3 AmdExtD3DShaderIntrinsics_WavePrefixMax(float3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxF, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +float4 AmdExtD3DShaderIntrinsics_WavePrefixMax(float4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxF, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +int AmdExtD3DShaderIntrinsics_WavePrefixMax(int src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxI, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +int2 AmdExtD3DShaderIntrinsics_WavePrefixMax(int2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxI, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +int3 AmdExtD3DShaderIntrinsics_WavePrefixMax(int3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxI, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +int4 AmdExtD3DShaderIntrinsics_WavePrefixMax(int4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxI, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_WavePrefixMax(uint src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxU, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_WavePrefixMax(uint2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxU, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_WavePrefixMax(uint3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxU, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +uint4 AmdExtD3DShaderIntrinsics_WavePrefixMax(uint4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxU, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixSum +* +* Performs a Postfix (Inclusive) scan operation across a wave and returns the resulting sum to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_WavePostfixSum(float src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddF, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +float2 AmdExtD3DShaderIntrinsics_WavePostfixSum(float2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddF, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +float3 AmdExtD3DShaderIntrinsics_WavePostfixSum(float3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddF, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +float4 AmdExtD3DShaderIntrinsics_WavePostfixSum(float4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddF, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +int AmdExtD3DShaderIntrinsics_WavePostfixSum(int src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddI, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +int2 AmdExtD3DShaderIntrinsics_WavePostfixSum(int2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddI, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +int3 AmdExtD3DShaderIntrinsics_WavePostfixSum(int3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddI, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +int4 AmdExtD3DShaderIntrinsics_WavePostfixSum(int4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddI, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_WavePostfixSum(uint src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddU, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_WavePostfixSum(uint2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddU, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_WavePostfixSum(uint3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddU, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +uint4 AmdExtD3DShaderIntrinsics_WavePostfixSum(uint4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddU, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixProduct +* +* Performs a Postfix scan operation across a wave and returns the resulting product to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_WavePostfixProduct(float src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulF, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +float2 AmdExtD3DShaderIntrinsics_WavePostfixProduct(float2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulF, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +float3 AmdExtD3DShaderIntrinsics_WavePostfixProduct(float3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulF, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +float4 AmdExtD3DShaderIntrinsics_WavePostfixProduct(float4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulF, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +int AmdExtD3DShaderIntrinsics_WavePostfixProduct(int src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulI, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +int2 AmdExtD3DShaderIntrinsics_WavePostfixProduct(int2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulI, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +int3 AmdExtD3DShaderIntrinsics_WavePostfixProduct(int3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulI, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +int4 AmdExtD3DShaderIntrinsics_WavePostfixProduct(int4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulI, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_WavePostfixProduct(uint src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulU, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_WavePostfixProduct(uint2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulU, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_WavePostfixProduct(uint3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulU, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +uint4 AmdExtD3DShaderIntrinsics_WavePostfixProduct(uint4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulU, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMin +* +* Performs a Postfix scan operation across a wave and returns the resulting minimum value to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_WavePostfixMin(float src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinF, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +float2 AmdExtD3DShaderIntrinsics_WavePostfixMin(float2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinF, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +float3 AmdExtD3DShaderIntrinsics_WavePostfixMin(float3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinF, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +float4 AmdExtD3DShaderIntrinsics_WavePostfixMin(float4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinF, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +int AmdExtD3DShaderIntrinsics_WavePostfixMin(int src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinI, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +int2 AmdExtD3DShaderIntrinsics_WavePostfixMin(int2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinI, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +int3 AmdExtD3DShaderIntrinsics_WavePostfixMin(int3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinI, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +int4 AmdExtD3DShaderIntrinsics_WavePostfixMin(int4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinI, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_WavePostfixMin(uint src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinU, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_WavePostfixMin(uint2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinU, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_WavePostfixMin(uint3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinU, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +uint4 AmdExtD3DShaderIntrinsics_WavePostfixMin(uint4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinU, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMax +* +* Performs a Postfix scan operation across a wave and returns the resulting maximum value to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_WavePostfixMax(float src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxF, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +float2 AmdExtD3DShaderIntrinsics_WavePostfixMax(float2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxF, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +float3 AmdExtD3DShaderIntrinsics_WavePostfixMax(float3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxF, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +float4 AmdExtD3DShaderIntrinsics_WavePostfixMax(float4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxF, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +int AmdExtD3DShaderIntrinsics_WavePostfixMax(int src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxI, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +int2 AmdExtD3DShaderIntrinsics_WavePostfixMax(int2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxI, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +int3 AmdExtD3DShaderIntrinsics_WavePostfixMax(int3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxI, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +int4 AmdExtD3DShaderIntrinsics_WavePostfixMax(int4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxI, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_WavePostfixMax(uint src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxU, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_WavePostfixMax(uint2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxU, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_WavePostfixMax(uint3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxU, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +uint4 AmdExtD3DShaderIntrinsics_WavePostfixMax(uint4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxU, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +#if defined (AGS_RAY_HIT_TOKEN) + +//===================================================================================================================== +struct AmdExtRtHitToken +{ + uint dword[2]; +}; + +/** +*********************************************************************************************************************** +* @brief +* AmdExtD3DShaderIntrinsicsRT structure when included in a Ray Tracing payload will indicate to the driver +* that the dwords are already supplied in AmdExtRtHitTokenIn and only requires a call to intersect +* ray, bypassing the traversal of the acceleration structure. +*********************************************************************************************************************** +*/ +struct AmdExtRtHitTokenIn : AmdExtRtHitToken { }; + +/** +*********************************************************************************************************************** +* @brief +* AmdExtD3DShaderIntrinsicsRT structure when included in a Ray Tracing payload will indicate to the driver +* that the dwords must be patched into the payload after traversal. The application can store this +* data in a buffer which can then be used for hit group sorting so shading divergence can be avoided. +*********************************************************************************************************************** +*/ +struct AmdExtRtHitTokenOut : AmdExtRtHitToken { }; + +/** +*********************************************************************************************************************** +* @brief +* Group shared memory reserved for temprary storage of hit tokens. Not intended to touched by the app shader. +* Application shader must only use the extension functions defined below to access the hit tokens +* +*********************************************************************************************************************** +*/ +groupshared AmdExtRtHitToken AmdHitToken; + +/** +*********************************************************************************************************************** +* @brief +* Accessor function to obtain the hit tokens from the last call to TraceRays(). The data returned by this +* function only guarantees valid values for the last call to TraceRays() prior to calling this function. +* +*********************************************************************************************************************** +*/ +uint2 AmdGetLastHitToken() +{ + return uint2(AmdHitToken.dword[0], AmdHitToken.dword[1]); +} + +/** +*********************************************************************************************************************** +* @brief +* This function initialises hit tokens for subsequent TraceRays() call. Note, any TraceRay() that intends to use +* these hit tokens must include this function call in the same basic block. Applications can use a convenience macro +* defined below to enforce that. +* +*********************************************************************************************************************** +*/ +void AmdSetHitToken(uint2 token) +{ + AmdHitToken.dword[0] = token.x; + AmdHitToken.dword[1] = token.y; +} + +/** +*********************************************************************************************************************** +* @brief +* Convenience macro for calling TraceRays that uses the hit token +* +*********************************************************************************************************************** +*/ +#define AmdTraceRay(accelStruct, \ + rayFlags, \ + instanceInclusionMask, \ + rayContributionToHitGroupIndex, \ + geometryMultiplier, \ + missShaderIndex, \ + ray, \ + payload, \ + token) \ +AmdSetHitToken(token); \ +TraceRay(accelStruct, \ + rayFlags, \ + instanceInclusionMask, \ + rayContributionToHitGroupIndex, \ + geometryMultiplier, \ + missShaderIndex, \ + ray, \ + payload); \ + +#endif // AGS_RAY_HIT_TOKEN + +#endif // _AMDEXTD3DSHADERINTRINICS_HLSL diff --git a/Source/ThirdParty/AGS/amd_ags.h b/Source/ThirdParty/AGS/amd_ags.h new file mode 100644 index 000000000..ad0f5c4bc --- /dev/null +++ b/Source/ThirdParty/AGS/amd_ags.h @@ -0,0 +1,1394 @@ +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +/// \file +/// \mainpage +/// AGS Library Overview +/// -------------------- +/// This document provides an overview of the AGS (AMD GPU Services) library. The AGS library provides software developers with the ability to query +/// AMD GPU software and hardware state information that is not normally available through standard operating systems or graphic APIs. +/// +/// The latest version of the API is publicly hosted here: https://github.com/GPUOpen-LibrariesAndSDKs/AGS_SDK/. +/// It is also worth checking http://gpuopen.com/gaming-product/amd-gpu-services-ags-library/ for any updates and articles on AGS. +/// \internal +/// Online documentation is publicly hosted here: http://gpuopen-librariesandsdks.github.io/ags/ +/// \endinternal +/// +/// --------------------------------------- +/// What's new in AGS 6.3 since version 6.2 +/// --------------------------------------- +/// AGS 6.3 includes the following updates: +/// * RDNA4 detection +/// * More robust driver version and GPU specifications (eg. numCUs, teraFlops) detection. Prior AGS versions may return empty values for these fields in certain cases like after dirty driver install. +/// * agsGetGPUInfo function to fill in a \ref AGSGPUInfo structure at any point after calling \ref agsInitialize. +/// * \ref agsSetDisplayMode is marked as deprecated. Please use DXGI for HDR10 and Freesync HDR +/// +/// --------------------------------------- +/// What's new in AGS 6.2 since version 6.1 +/// --------------------------------------- +/// AGS 6.2 includes the following updates: +/// * Shader clock intrinsics +/// * Minor improvements and fixes +/// +/// --------------------------------------- +/// What's new in AGS 6.1 since version 6.0 +/// --------------------------------------- +/// AGS 6.1 includes the following updates: +/// * RDNA3 detection +/// +/// --------------------------------------- +/// What's new in AGS 6.0 since version 5.4.2 +/// --------------------------------------- +/// AGS 6.0 includes the following updates: +/// * DX12 ray tracing hit token for RDNA2 hardware. +/// * Shader intrinsic that exposes ReadLaneAt in DX12. +/// * Shader intrinsics that expose explicit float conversions in DX12. +/// * Refactored and revised API to minimize user error. +/// * Added agsGetVersionNumber. +/// * Detection for external GPUs. +/// * Detection of RDNA2 architecture. +/// * Grouped the more established intrinsics together into per year support. +/// * Function pointer typedefs for the API +/// +/// --------------------------------------- +/// What's new in AGS 5.4.2 since version 5.4.1 +/// --------------------------------------- +/// AGS 5.4.2 includes the following updates: +/// * sharedMemoryInBytes has been reinstated. +/// * Clock speed returned for APUs. +/// +/// --------------------------------------- +/// What's new in AGS 5.4.1 since version 5.4.0 +/// --------------------------------------- +/// AGS 5.4.1 includes the following updates: +/// * AsicFamily_Count to help with code maintenance. +/// * Visual Studio 2019 support. +/// * x86 support +/// * BaseInstance and BaseVertex intrinsics along with corresponding caps bits. +/// * GetWaveSize intrinsic along with corresponding caps bits. +/// +/// --------------------------------------- +/// What's new in AGS 5.4 since version 5.3 +/// --------------------------------------- +/// AGS 5.4 includes the following updates: +/// * A more detailed description of the GPU architecture, now including RDNA GPUs. +/// * Radeon 7 core and memory speeds returned. +/// * Draw index and Atomic U64 intrinsics for both DX11 and DX12. +/// +/// --------------------------------------- +/// What's new in AGS 5.3 since version 5.2 +/// --------------------------------------- +/// AGS 5.3 includes the following updates: +/// * DX11 deferred context support for Multi Draw Indirect and UAV Overlap extensions. +/// * A Radeon Software Version helper to determine whether the installed driver meets your game's minimum driver version requirements. +/// * Freesync HDR Gamma 2.2 mode which uses a 1010102 swapchain and can be considered as an alternative to using the 64 bit swapchain required for Freesync HDR scRGB. +/// +/// Using the AGS library +/// --------------------- +/// It is recommended to take a look at the source code for the samples that come with the AGS SDK: +/// * AGSSample +/// * CrossfireSample +/// * EyefinitySample +/// The AGSSample application is the simplest of the three examples and demonstrates the code required to initialize AGS and use it to query the GPU and Eyefinity state. +/// The CrossfireSample application demonstrates the use of the new API to transfer resources on GPUs in Crossfire mode. Lastly, the EyefinitySample application provides a more +/// extensive example of Eyefinity setup than the basic example provided in AGSSample. +/// There are other samples on Github that demonstrate the DirectX shader extensions, such as the Barycentrics11 and Barycentrics12 samples. +/// +/// To add AGS support to an existing project, follow these steps: +/// * Link your project against the correct import library. Choose from either the 32 bit or 64 bit version. +/// * Copy the AGS dll into the same directory as your game executable. +/// * Include the amd_ags.h header file from your source code. +/// * Include the AGS hlsl files if you are using the shader intrinsics. +/// * Declare a pointer to an AGSContext and make this available for all subsequent calls to AGS. +/// * On game initialization, call \ref agsInitialize passing in the address of the context. On success, this function will return a valid context pointer. +/// +/// Don't forget to cleanup AGS by calling \ref agsDeInitialize when the app exits, after the device has been destroyed. + +#ifndef AMD_AGS_H +#define AMD_AGS_H + +#define AMD_AGS_VERSION_MAJOR 6 ///< AGS major version +#define AMD_AGS_VERSION_MINOR 3 ///< AGS minor version +#define AMD_AGS_VERSION_PATCH 0 ///< AGS patch version + +#ifdef __cplusplus +extern "C" { +#endif + +/// \defgroup Defines AGS defines +/// @{ +#if defined (AGS_GCC) +#define AMD_AGS_API +#else +#define AMD_AGS_API __declspec(dllexport) ///< AGS exported functions +#endif + +#define AGS_MAKE_VERSION( major, minor, patch ) ( ( major << 22 ) | ( minor << 12 ) | patch ) ///< Macro to create the app and engine versions for the fields in \ref AGSDX12ExtensionParams and \ref AGSDX11ExtensionParams and the Radeon Software Version +#define AGS_UNSPECIFIED_VERSION 0xFFFFAD00 ///< Use this to specify no version +#define AGS_CURRENT_VERSION AGS_MAKE_VERSION( AMD_AGS_VERSION_MAJOR, AMD_AGS_VERSION_MINOR, AMD_AGS_VERSION_PATCH ) ///< Macro to return the current AGS version as defined by the AGS header file +/// @} + +#if !defined (AGS_EXCLUDE_DIRECTX_TYPES) +// Forward declaration of D3D and DXGI types +typedef struct IDXGIAdapter IDXGIAdapter; +typedef struct IDXGISwapChain IDXGISwapChain; +typedef struct DXGI_SWAP_CHAIN_DESC DXGI_SWAP_CHAIN_DESC; +enum D3D_DRIVER_TYPE; +enum D3D_FEATURE_LEVEL; +enum D3D_PRIMITIVE_TOPOLOGY; + +#if !defined (AGS_EXCLUDE_DIRECTX_11) +// Forward declaration of D3D11 types +typedef struct ID3D11Device ID3D11Device; +typedef struct ID3D11DeviceContext ID3D11DeviceContext; +typedef struct ID3D11Resource ID3D11Resource; +typedef struct ID3D11Buffer ID3D11Buffer; +typedef struct ID3D11Texture1D ID3D11Texture1D; +typedef struct ID3D11Texture2D ID3D11Texture2D; +typedef struct ID3D11Texture3D ID3D11Texture3D; +typedef struct D3D11_BUFFER_DESC D3D11_BUFFER_DESC; +typedef struct D3D11_TEXTURE1D_DESC D3D11_TEXTURE1D_DESC; +typedef struct D3D11_TEXTURE2D_DESC D3D11_TEXTURE2D_DESC; +typedef struct D3D11_TEXTURE3D_DESC D3D11_TEXTURE3D_DESC; +typedef struct D3D11_SUBRESOURCE_DATA D3D11_SUBRESOURCE_DATA; +typedef struct tagRECT tagRECT; +typedef struct tagRECT D3D11_RECT; ///< typedef this ourselves so we don't have to drag d3d11.h in +#endif +#if !defined (AGS_EXCLUDE_DIRECTX_12) +// Forward declaration of D3D12 types +typedef struct ID3D12Device ID3D12Device; +typedef struct ID3D12GraphicsCommandList ID3D12GraphicsCommandList; +#endif +#endif + +/// \defgroup enums General enumerations +/// @{ + +/// The return codes +typedef enum AGSReturnCode +{ + AGS_SUCCESS, ///< Successful function call + AGS_FAILURE, ///< Failed to complete call for some unspecified reason + AGS_INVALID_ARGS, ///< Invalid arguments into the function + AGS_OUT_OF_MEMORY, ///< Out of memory when allocating space internally + AGS_MISSING_D3D_DLL, ///< Returned when a D3D dll fails to load + AGS_LEGACY_DRIVER, ///< Returned if a feature is not present in the installed driver + AGS_NO_AMD_DRIVER_INSTALLED, ///< Returned if the AMD GPU driver does not appear to be installed + AGS_EXTENSION_NOT_SUPPORTED, ///< Returned if the driver does not support the requested driver extension + AGS_ADL_FAILURE, ///< Failure in ADL (the AMD Display Library) + AGS_DX_FAILURE, ///< Failure from DirectX runtime + AGS_D3DDEVICE_NOT_CREATED ///< Failure due to not creating the D3D device successfully via AGS. +} AGSReturnCode; + +/// @} + +typedef struct AGSContext AGSContext; ///< All function calls in AGS require a pointer to a context. This is generated via \ref agsInitialize + +/// The rectangle struct used by AGS. +typedef struct AGSRect +{ + int offsetX; ///< Offset on X axis + int offsetY; ///< Offset on Y axis + int width; ///< Width of rectangle + int height; ///< Height of rectangle +} AGSRect; + +/// The display info struct used to describe a display enumerated by AGS +typedef struct AGSDisplayInfo +{ + char name[ 256 ]; ///< The name of the display + char displayDeviceName[ 32 ]; ///< The display device name, i.e. DISPLAY_DEVICE::DeviceName + + unsigned int isPrimaryDisplay : 1; ///< Whether this display is marked as the primary display + unsigned int HDR10 : 1; ///< HDR10 is supported on this display + unsigned int dolbyVision : 1; ///< Dolby Vision is supported on this display + unsigned int freesync : 1; ///< Freesync is supported on this display + unsigned int freesyncHDR : 1; ///< Freesync HDR is supported on this display + unsigned int eyefinityInGroup : 1; ///< The display is part of the Eyefinity group + unsigned int eyefinityPreferredDisplay : 1; ///< The display is the preferred display in the Eyefinity group for displaying the UI + unsigned int eyefinityInPortraitMode : 1; ///< The display is in the Eyefinity group but in portrait mode + unsigned int reservedPadding : 24; ///< Reserved for future use + + int maxResolutionX; ///< The maximum supported resolution of the unrotated display + int maxResolutionY; ///< The maximum supported resolution of the unrotated display + float maxRefreshRate; ///< The maximum supported refresh rate of the display + + AGSRect currentResolution; ///< The current resolution and position in the desktop, ignoring Eyefinity bezel compensation + AGSRect visibleResolution; ///< The visible resolution and position. When Eyefinity bezel compensation is enabled this will + ///< be the sub region in the Eyefinity single large surface (SLS) + float currentRefreshRate; ///< The current refresh rate + + int eyefinityGridCoordX; ///< The X coordinate in the Eyefinity grid. -1 if not in an Eyefinity group + int eyefinityGridCoordY; ///< The Y coordinate in the Eyefinity grid. -1 if not in an Eyefinity group + + double chromaticityRedX; ///< Red display primary X coord + double chromaticityRedY; ///< Red display primary Y coord + + double chromaticityGreenX; ///< Green display primary X coord + double chromaticityGreenY; ///< Green display primary Y coord + + double chromaticityBlueX; ///< Blue display primary X coord + double chromaticityBlueY; ///< Blue display primary Y coord + + double chromaticityWhitePointX; ///< White point X coord + double chromaticityWhitePointY; ///< White point Y coord + + double screenDiffuseReflectance; ///< Percentage expressed between 0 - 1 + double screenSpecularReflectance; ///< Percentage expressed between 0 - 1 + + double minLuminance; ///< The minimum luminance of the display in nits + double maxLuminance; ///< The maximum luminance of the display in nits + double avgLuminance; ///< The average luminance of the display in nits + + int logicalDisplayIndex; ///< The internally used index of this display + int adlAdapterIndex; ///< The internally used ADL adapter index + int reserved; ///< reserved field +} AGSDisplayInfo; + +/// The ASIC family +typedef enum AGSAsicFamily +{ + AGSAsicFamily_Unknown, ///< Unknown architecture, potentially from another IHV. Check \ref AGSDeviceInfo::vendorId + AGSAsicFamily_PreGCN, ///< Pre GCN architecture. + AGSAsicFamily_GCN1, ///< AMD GCN 1 architecture: Oland, Cape Verde, Pitcairn & Tahiti. + AGSAsicFamily_GCN2, ///< AMD GCN 2 architecture: Hawaii & Bonaire. This also includes APUs Kaveri and Carrizo. + AGSAsicFamily_GCN3, ///< AMD GCN 3 architecture: Tonga & Fiji. + AGSAsicFamily_GCN4, ///< AMD GCN 4 architecture: Polaris. + AGSAsicFamily_Vega, ///< AMD Vega architecture, including Raven Ridge (ie AMD Ryzen CPU + AMD Vega GPU). + AGSAsicFamily_RDNA, ///< AMD RDNA architecture + AGSAsicFamily_RDNA2, ///< AMD RDNA2 architecture + AGSAsicFamily_RDNA3, ///< AMD RDNA3 architecture + AGSAsicFamily_RDNA4, ///< AMD RDNA4 architecture + + AGSAsicFamily_Count ///< Number of enumerated ASIC families +} AGSAsicFamily; + +/// The device info struct used to describe a physical GPU enumerated by AGS +typedef struct AGSDeviceInfo +{ + const char* adapterString; ///< The adapter name string + AGSAsicFamily asicFamily; ///< Set to Unknown if not AMD hardware + unsigned int isAPU : 1; ///< Whether this device is an APU + unsigned int isPrimaryDevice : 1; ///< Whether this device is marked as the primary device + unsigned int isExternal :1; ///< Whether this device is a detachable, external device + unsigned int reservedPadding : 29; ///< Reserved for future use + + int vendorId; ///< The vendor id + int deviceId; ///< The device id + int revisionId; ///< The revision id + + int numCUs; ///< Number of compute units + int numWGPs; ///< Number of RDNA Work Group Processors. Only valid if ASIC is RDNA onwards. + + int numROPs; ///< Number of ROPs + int coreClock; ///< Core clock speed at 100% power in MHz + int memoryClock; ///< Memory clock speed at 100% power in MHz + int memoryBandwidth; ///< Memory bandwidth in MB/s + float teraFlops; ///< Teraflops of GPU. Zero if not GCN onwards. Calculated from iCoreClock * iNumCUs * 64 Pixels/clk * 2 instructions/MAD + + unsigned long long localMemoryInBytes; ///< The size of local memory in bytes. 0 for non AMD hardware. + unsigned long long sharedMemoryInBytes; ///< The size of system memory available to the GPU in bytes. It is important to factor this into your VRAM budget for APUs + ///< as the reported local memory will only be a small fraction of the total memory available to the GPU. + + int numDisplays; ///< The number of active displays found to be attached to this adapter. + AGSDisplayInfo* displays; ///< List of displays allocated by AGS to be numDisplays in length. + + int eyefinityEnabled; ///< Indicates if Eyefinity is active + int eyefinityGridWidth; ///< Contains width of the multi-monitor grid that makes up the Eyefinity Single Large Surface. + int eyefinityGridHeight; ///< Contains height of the multi-monitor grid that makes up the Eyefinity Single Large Surface. + int eyefinityResolutionX; ///< Contains width in pixels of the multi-monitor Single Large Surface. + int eyefinityResolutionY; ///< Contains height in pixels of the multi-monitor Single Large Surface. + int eyefinityBezelCompensated; ///< Indicates if bezel compensation is used for the current SLS display area. 1 if enabled, and 0 if disabled. + + int adlAdapterIndex; ///< Internally used index into the ADL list of adapters + int reserved; ///< reserved field +} AGSDeviceInfo; + +/// \defgroup general General API functions +/// API for initialization, cleanup and HDR display modes. +/// @{ + +typedef void* (__stdcall *AGS_ALLOC_CALLBACK)( size_t allocationSize ); ///< AGS user defined allocation prototype +typedef void (__stdcall *AGS_FREE_CALLBACK)( void* allocationPtr ); ///< AGS user defined free prototype + +/// The configuration options that can be passed in to \ref agsInitialize +typedef struct AGSConfiguration +{ + AGS_ALLOC_CALLBACK allocCallback; ///< Optional memory allocation callback. If not supplied, malloc() is used + AGS_FREE_CALLBACK freeCallback; ///< Optional memory freeing callback. If not supplied, free() is used +} AGSConfiguration; + +/// The top level GPU information returned from \ref agsInitialize +typedef struct AGSGPUInfo +{ + const char* driverVersion; ///< The AMD internal driver version + const char* radeonSoftwareVersion; ///< The Radeon Software Version + + int numDevices; ///< Number of GPUs in the system + AGSDeviceInfo* devices; ///< List of GPUs in the system +} AGSGPUInfo; + +/// The display mode +typedef enum AGSDisplayMode +{ + AGSDisplayMode_SDR, ///< SDR mode + AGSDisplayMode_HDR10_PQ, ///< HDR10 PQ encoding, requiring a 1010102 UNORM swapchain and PQ encoding in the output shader. + AGSDisplayMode_HDR10_scRGB, ///< HDR10 scRGB, requiring an FP16 swapchain. Values of 1.0 == 80 nits, 125.0 == 10000 nits. + AGSDisplayMode_FreesyncHDR_scRGB, ///< Freesync HDR scRGB, requiring an FP16 swapchain. A value of 1.0 == 80 nits. + AGSDisplayMode_FreesyncHDR_Gamma22, ///< Freesync HDR Gamma 2.2, requiring a 1010102 UNORM swapchain. The output needs to be encoded to gamma 2.2. + AGSDisplayMode_DolbyVision, ///< Dolby Vision, requiring an 8888 UNORM swapchain + + Mode_Count ///< Number of enumerated display modes +} AGSDisplayMode; + +/// The struct to specify the display settings to the driver. +typedef struct AGSDisplaySettings +{ + AGSDisplayMode mode; ///< The display mode to set the display into + + double chromaticityRedX; ///< Red display primary X coord + double chromaticityRedY; ///< Red display primary Y coord + + double chromaticityGreenX; ///< Green display primary X coord + double chromaticityGreenY; ///< Green display primary Y coord + + double chromaticityBlueX; ///< Blue display primary X coord + double chromaticityBlueY; ///< Blue display primary Y coord + + double chromaticityWhitePointX; ///< White point X coord + double chromaticityWhitePointY; ///< White point Y coord + + double minLuminance; ///< The minimum scene luminance in nits + double maxLuminance; ///< The maximum scene luminance in nits + + double maxContentLightLevel; ///< The maximum content light level in nits (MaxCLL) + double maxFrameAverageLightLevel; ///< The maximum frame average light level in nits (MaxFALL) + + unsigned int disableLocalDimming : 1; ///< Disables local dimming if possible + unsigned int reservedPadding : 31; ///< Reserved +} AGSDisplaySettings; + + +/// The result returned from \ref agsCheckDriverVersion +typedef enum AGSDriverVersionResult +{ + AGS_SOFTWAREVERSIONCHECK_OK, ///< The reported Radeon Software Version is newer or the same as the required version + AGS_SOFTWAREVERSIONCHECK_OLDER, ///< The reported Radeon Software Version is older than the required version + AGS_SOFTWAREVERSIONCHECK_UNDEFINED ///< The check could not determine as result. This could be because it is a private or custom driver or just invalid arguments. +} AGSDriverVersionResult; + +/// +/// Helper function to check the installed software version against the required software version. +/// +/// \param [in] radeonSoftwareVersionReported The Radeon Software Version returned from \ref AGSGPUInfo::radeonSoftwareVersion. +/// \param [in] radeonSoftwareVersionRequired The Radeon Software Version to check against. This is specified using \ref AGS_MAKE_VERSION. +/// \return The result of the check. +/// +AMD_AGS_API AGSDriverVersionResult agsCheckDriverVersion( const char* radeonSoftwareVersionReported, unsigned int radeonSoftwareVersionRequired ); + +/// +/// Function to return the AGS version number. +/// +/// \return The version number made using AGS_MAKE_VERSION( AMD_AGS_VERSION_MAJOR, AMD_AGS_VERSION_MINOR, AMD_AGS_VERSION_PATCH ). +/// +AMD_AGS_API int agsGetVersionNumber(); + +/// +/// Function used to initialize the AGS library. +/// agsVersion must be specified as AGS_CURRENT_VERSION or the call will return \ref AGS_INVALID_ARGS. +/// Must be called prior to any of the subsequent AGS API calls. +/// Must be called prior to ID3D11Device or ID3D12Device creation. +/// \note The caller of this function should handle the possibility of the call failing in the cases below. One option is to do a vendor id check and only call \ref agsInitialize if there is an AMD GPU present. +/// \note This function will fail with \ref AGS_NO_AMD_DRIVER_INSTALLED if there is no AMD driver found on the system. +/// \note This function will fail with \ref AGS_LEGACY_DRIVER in Catalyst versions before 12.20. +/// +/// \param [in] agsVersion The API version specified using the \ref AGS_CURRENT_VERSION macro. If this does not match the version in the binary this initialization call will fail. +/// \param [in] config Optional pointer to a AGSConfiguration struct to override the default library configuration. +/// \param [out] context Address of a pointer to a context. This function allocates a context on the heap which is then required for all subsequent API calls. +/// \param [out] gpuInfo Optional pointer to a AGSGPUInfo struct which will get filled in for all the GPUs in the system. +/// +AMD_AGS_API AGSReturnCode agsInitialize( int agsVersion, const AGSConfiguration* config, AGSContext** context, AGSGPUInfo* gpuInfo ); + +/// +/// Function used to clean up the AGS library. +/// +/// \param [in] context Pointer to a context. This function will deallocate the context from the heap. +/// +AMD_AGS_API AGSReturnCode agsDeInitialize( AGSContext* context ); + +/// +/// Function used to fill out a \ref AGSGPUInfo structure. +/// +/// \param [in] context Pointer to a context. This is generated by \ref agsInitialize +/// \param [out] gpuInfo Pointer to a \ref AGSGPUInfo struct which will get filled in for all the GPUs in the system. +/// +AMD_AGS_API AGSReturnCode agsGetGPUInfo( AGSContext* context, AGSGPUInfo* gpuInfo ); + +/// +/// Function used to set a specific display into HDR mode +/// **DEPRECATED FUNCTION - Please use DXGI for HDR10 and Freesync HDR** +/// \note Setting all of the values apart from color space and transfer function to zero will cause the display to use defaults. +/// \note Call this function after each mode change (switch to fullscreen, any change in swapchain etc). +/// \note HDR10 PQ mode requires a 1010102 swapchain. +/// \note HDR10 scRGB mode requires an FP16 swapchain. +/// \note Freesync HDR scRGB mode requires an FP16 swapchain. +/// \note Freesync HDR Gamma 2.2 mode requires a 1010102 swapchain. +/// \note Dolby Vision requires a 8888 UNORM swapchain. +/// +/// \param [in] context Pointer to a context. This is generated by \ref agsInitialize +/// \param [in] deviceIndex The index of the device listed in \ref AGSGPUInfo::devices. +/// \param [in] displayIndex The index of the display listed in \ref AGSDeviceInfo::displays. +/// \param [in] settings Pointer to the display settings to use. +/// +AMD_AGS_API AGSReturnCode agsSetDisplayMode( AGSContext* context, int deviceIndex, int displayIndex, const AGSDisplaySettings* settings ); + +/// @} + +/// \defgroup dxappreg App Registration +/// @{ +/// This extension allows an application to voluntarily register itself with the driver, providing a more robust app detection solution and avoid the issue of the driver relying on exe names to match the app to a driver profile. +/// It is available when creating the device for both DirectX11 and DirectX12 via \ref agsDriverExtensionsDX11_CreateDevice and \ref agsDriverExtensionsDX12_CreateDevice respectively. +/// This feature is supported in Radeon Software Version 17.9.2 onwards. +/// Rules: +/// * AppName or EngineName must be set, but both are not required. Engine profiles will be used only if app specific profiles do not exist. +/// * In an engine, the EngineName should be set, so a default profile can be built. If an app modifies the engine, the AppName should be set, to allow a profile for the specific app. +/// * Version number is not mandatory, but recommended. The use of which can prevent the use of profiles for incompatible versions (for instance engine versions that introduce or change features), and can help prevent older profiles from being used (and introducing new bugs) before the profile is tested with new app builds. +/// * If Version numbers are used and a new version is introduced, a new profile will not be enabled until an AMD engineer has been able to update a previous profile, or make a new one. +/// +/// The cases for profile selection are as follows: +/// +/// |Case|Profile Applied| +/// |----|---------------| +/// | App or Engine Version has profile | The profile is used. | +/// | App or Engine Version num < profile version num | The closest profile > the version number is used. | +/// | App or Engine Version num > profile version num | No profile selected/The previous method is used. | +/// | App and Engine Version have profile | The App's profile is used. | +/// | App and Engine Version num < profile version | The closest App profile > the version number is used. | +/// | App and Engine Version, no App profile found | The Engine profile will be used. | +/// | App/Engine name but no Version, has profile | The latest profile is used. | +/// | No name or version, or no profile | The previous app detection method is used. | +/// +/// As shown above, if an App name is given, and a profile is found for that app, that will be prioritized. The Engine name and profile will be used only if no app name is given, or no viable profile is found for the app name. +/// In the case that App nor Engine have a profile, the previous app detection methods will be used. If given a version number that is larger than any profile version number, no profile will be selected. +/// This is specifically to prevent cases where an update to an engine or app will cause catastrophic breaks in the profile, allowing an engineer to test the profile before clearing it for public use with the new engine/app update. +/// +/// @} + +#if !defined (AGS_EXCLUDE_DIRECTX_12) + +/// \defgroup dx12 DirectX12 Extensions +/// DirectX12 driver extensions +/// @{ + +/// \defgroup dx12init Device and device object creation and cleanup +/// It is now mandatory to call \ref agsDriverExtensionsDX12_CreateDevice when creating a device if the user wants to access any future DX12 AMD extensions. +/// The corresponding \ref agsDriverExtensionsDX12_DestroyDevice call must be called to release the device and free up the internal resources allocated by the create call. +/// @{ + +/// The struct to specify the DX12 device creation parameters +typedef struct AGSDX12DeviceCreationParams +{ + IDXGIAdapter* pAdapter; ///< Pointer to the adapter to use when creating the device. This may be null. + IID iid; ///< The interface ID for the type of device to be created. + D3D_FEATURE_LEVEL FeatureLevel; ///< The minimum feature level to create the device with. +} AGSDX12DeviceCreationParams; + +/// The struct to specify DX12 additional device creation parameters +typedef struct AGSDX12ExtensionParams +{ + const WCHAR* pAppName; ///< Application name + const WCHAR* pEngineName; ///< Engine name + unsigned int appVersion; ///< Application version + unsigned int engineVersion; ///< Engine version + unsigned int uavSlot; ///< The UAV slot reserved for intrinsic support. Refer to the \ref agsDriverExtensionsDX12_CreateDevice documentation for more details. +} AGSDX12ExtensionParams; + +/// Extensions for DX12 +typedef struct AGSDX12ExtensionsSupported +{ + unsigned int intrinsics16 : 1; ///< Supported in Radeon Software Version 16.9.2 onwards. ReadFirstLane, ReadLane, LaneID, Swizzle, Ballot, MBCount, Med3, Barycentrics + unsigned int intrinsics17 : 1; ///< Supported in Radeon Software Version 17.9.1 onwards. WaveReduce, WaveScan + unsigned int userMarkers : 1; ///< Supported in Radeon Software Version 17.9.1 onwards. + unsigned int appRegistration : 1; ///< Supported in Radeon Software Version 17.9.1 onwards. + unsigned int UAVBindSlot : 1; ///< Supported in Radeon Software Version 19.5.1 onwards. + unsigned int intrinsics19 : 1; ///< Supported in Radeon Software Version 19.12.2 onwards. DrawIndex, AtomicU64 + unsigned int baseVertex : 1; ///< Supported in Radeon Software Version 20.2.1 onwards. + unsigned int baseInstance : 1; ///< Supported in Radeon Software Version 20.2.1 onwards. + unsigned int getWaveSize : 1; ///< Supported in Radeon Software Version 20.5.1 onwards. + unsigned int floatConversion : 1; ///< Supported in Radeon Software Version 20.5.1 onwards. + unsigned int readLaneAt : 1; ///< Supported in Radeon Software Version 20.11.2 onwards. + unsigned int rayHitToken : 1; ///< Supported in Radeon Software Version 20.11.2 onwards. + unsigned int shaderClock : 1; ///< Supported in Radeon Software Version 23.1.1 onwards. + unsigned int padding : 19; ///< Reserved +} AGSDX12ExtensionsSupported; + +/// The struct to hold all the returned parameters from the device creation call +typedef struct AGSDX12ReturnedParams +{ + ID3D12Device* pDevice; ///< The newly created device + AGSDX12ExtensionsSupported extensionsSupported; ///< List of supported extensions +} AGSDX12ReturnedParams; + +/// The space id for DirectX12 intrinsic support +const unsigned int AGS_DX12_SHADER_INTRINSICS_SPACE_ID = 0x7FFF0ADE; // 2147420894 + +/// +/// Function used to create a D3D12 device with additional AMD-specific initialization parameters. +/// +/// When using the HLSL shader extensions please note: +/// * The shader compiler should not use the D3DCOMPILE_SKIP_OPTIMIZATION (/Od) option or /O0, otherwise it will not work. +/// * The shader compiler needs D3DCOMPILE_ENABLE_STRICTNESS (/Ges) enabled. +/// * The intrinsic instructions require a 5.1 shader model. +/// * The Root Signature will need to reserve an extra UAV resource slot. This is not a real resource that requires allocating, it is just used to encode the intrinsic instructions. +/// +/// The easiest way to set up the reserved UAV slot is to specify it at u0. The register space id will automatically be assumed to be \ref AGS_DX12_SHADER_INTRINSICS_SPACE_ID. +/// The HLSL expects this as default and the set up code would look similar to this: +/// \code{.cpp} +/// CD3DX12_DESCRIPTOR_RANGE range[]; +/// ... +/// range[ 0 ].Init( D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, AGS_DX12_SHADER_INTRINSICS_SPACE_ID ); // u0 at driver-reserved space id +/// \endcode +/// +/// Newer drivers also support a user-specified slot in which case the register space id is assumed to be 0. It is important that the \ref AGSDX12ExtensionsSupported::UAVBindSlot bit is set. +/// to ensure the driver can support this. If not, then u0 and \ref AGS_DX12_SHADER_INTRINSICS_SPACE_ID must be used. +/// If the driver does support this feature and a non zero slot is required, then the HLSL must also define AMD_EXT_SHADER_INTRINSIC_UAV_OVERRIDE as the matching slot value. +/// +/// The AGS context pointer is added to the user data of the D3D device using the SetPrivateData API. The GUID used is {d5a2a91b-7003-4f12-89de-209beb51fb94}: +/// \code{.cpp} +/// static const GUID IID_AGSContextData = {0xd5a2a91b, 0x7003, 0x4f12, {0x89, 0xde, 0x20, 0x9b, 0xeb, 0x51, 0xfb, 0x94}}; +/// \endcode +/// +/// \param [in] context Pointer to a context. This is generated by \ref agsInitialize +/// \param [in] creationParams Pointer to the struct to specify the existing DX12 device creation parameters. +/// \param [in] extensionParams Optional pointer to the struct to specify DX12 additional device creation parameters. +/// \param [out] returnedParams Pointer to struct to hold all the returned parameters from the call. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX12_CreateDevice( AGSContext* context, const AGSDX12DeviceCreationParams* creationParams, const AGSDX12ExtensionParams* extensionParams, AGSDX12ReturnedParams* returnedParams ); + +/// +/// Function to destroy the D3D12 device. +/// This call will also cleanup any AMD-specific driver extensions for D3D12. +/// +/// \param [in] context Pointer to a context. +/// \param [in] device Pointer to the D3D12 device. +/// \param [out] deviceReferences Optional pointer to an unsigned int that will be set to the value returned from device->Release(). +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX12_DestroyDevice( AGSContext* context, ID3D12Device* device, unsigned int* deviceReferences ); + +/// @} + +/// \defgroup dx12usermarkers User Markers +/// @{ + +/// +/// Function used to push an AMD user marker onto the command list. +/// This is only has an effect if \ref AGSDX12ExtensionsSupported::userMarkers is present. +/// Supported in Radeon Software Version 17.9.1 onwards. +/// +/// \param [in] context Pointer to a context. +/// \param [in] commandList Pointer to the command list. +/// \param [in] data The UTF-8 marker string. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX12_PushMarker( AGSContext* context, ID3D12GraphicsCommandList* commandList, const char* data ); + +/// +/// Function used to pop an AMD user marker on the command list. +/// Supported in Radeon Software Version 17.9.1 onwards. +/// +/// \param [in] context Pointer to a context. +/// \param [in] commandList Pointer to the command list. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX12_PopMarker( AGSContext* context, ID3D12GraphicsCommandList* commandList ); + +/// +/// Function used to insert an single event AMD user marker onto the command list. +/// Supported in Radeon Software Version 17.9.1 onwards. +/// +/// \param [in] context Pointer to a context. +/// \param [in] commandList Pointer to the command list. +/// \param [in] data The UTF-8 marker string. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX12_SetMarker( AGSContext* context, ID3D12GraphicsCommandList* commandList, const char* data ); + +/// @} +/// @} + +#endif // AGS_EXCLUDE_DIRECTX_12 + +#if !defined (AGS_EXCLUDE_DIRECTX_11) + +/// \defgroup dx11 DirectX11 Extensions +/// DirectX11 driver extensions +/// @{ + +/// \defgroup dx11init Device creation and cleanup +/// It is now mandatory to call \ref agsDriverExtensionsDX11_CreateDevice when creating a device if the user wants to access any DX11 AMD extensions. +/// The corresponding \ref agsDriverExtensionsDX11_DestroyDevice call must be called to release the device and free up the internal resources allocated by the create call. +/// @{ + +/// The different modes to control Crossfire behavior. +typedef enum AGSCrossfireMode +{ + AGS_CROSSFIRE_MODE_DRIVER_AFR = 0, ///< Use the default driver-based AFR rendering. If this mode is specified, do NOT use the agsDriverExtensionsDX11_Create*() APIs to create resources + AGS_CROSSFIRE_MODE_EXPLICIT_AFR, ///< Use the AGS Crossfire API functions to perform explicit AFR rendering without requiring a CF driver profile + AGS_CROSSFIRE_MODE_DISABLE ///< Completely disable AFR rendering +} AGSCrossfireMode; + +/// The struct to specify the existing DX11 device creation parameters +typedef struct AGSDX11DeviceCreationParams +{ + IDXGIAdapter* pAdapter; ///< Consult the DX documentation on D3D11CreateDevice for this parameter + D3D_DRIVER_TYPE DriverType; ///< Consult the DX documentation on D3D11CreateDevice for this parameter + HMODULE Software; ///< Consult the DX documentation on D3D11CreateDevice for this parameter + UINT Flags; ///< Consult the DX documentation on D3D11CreateDevice for this parameter + const D3D_FEATURE_LEVEL* pFeatureLevels; ///< Consult the DX documentation on D3D11CreateDevice for this parameter + UINT FeatureLevels; ///< Consult the DX documentation on D3D11CreateDevice for this parameter + UINT SDKVersion; ///< Consult the DX documentation on D3D11CreateDevice for this parameter + const DXGI_SWAP_CHAIN_DESC* pSwapChainDesc; ///< Optional swapchain description. Specify this to invoke D3D11CreateDeviceAndSwapChain instead of D3D11CreateDevice. +} AGSDX11DeviceCreationParams; + +/// The struct to specify DX11 additional device creation parameters +typedef struct AGSDX11ExtensionParams +{ + const WCHAR* pAppName; ///< Application name + const WCHAR* pEngineName; ///< Engine name + unsigned int appVersion; ///< Application version + unsigned int engineVersion; ///< Engine version + unsigned int numBreadcrumbMarkers; ///< The number of breadcrumb markers to allocate. Each marker is a uint64 (ie 8 bytes). If 0, the system is disabled. + unsigned int uavSlot; ///< The UAV slot reserved for intrinsic support. This must match the slot defined in the HLSL, i.e. "#define AmdDxExtShaderIntrinsicsUAVSlot". + /// The default slot is 7, but the caller is free to use an alternative slot. + /// If 0 is specified, then the default of 7 will be used. + AGSCrossfireMode crossfireMode; ///< Desired Crossfire mode +} AGSDX11ExtensionParams; + +/// Extensions for DX11 +typedef struct AGSDX11ExtensionsSupported +{ + unsigned int quadList : 1; ///< Supported in Radeon Software Version 16.9.2 onwards. + unsigned int screenRectList : 1; ///< Supported in Radeon Software Version 16.9.2 onwards. + unsigned int uavOverlap : 1; ///< Supported in Radeon Software Version 16.9.2 onwards. + unsigned int depthBoundsTest : 1; ///< Supported in Radeon Software Version 16.9.2 onwards. + unsigned int multiDrawIndirect : 1; ///< Supported in Radeon Software Version 16.9.2 onwards. + unsigned int multiDrawIndirectCountIndirect : 1; ///< Supported in Radeon Software Version 16.9.2 onwards. + unsigned int crossfireAPI : 1; ///< Supported in Radeon Software Version 16.9.2 onwards. + unsigned int createShaderControls : 1; ///< Supported in Radeon Software Version 16.9.2 onwards. + unsigned int intrinsics16 : 1; ///< Supported in Radeon Software Version 16.9.2 onwards. ReadFirstLane, ReadLane, LaneID, Swizzle, Ballot, MBCount, Med3, Barycentrics + unsigned int multiView : 1; ///< Supported in Radeon Software Version 16.12.1 onwards. + unsigned int intrinsics17 : 1; ///< Supported in Radeon Software Version 17.9.1 onwards. WaveReduce, WaveScan + unsigned int appRegistration : 1; ///< Supported in Radeon Software Version 17.9.1 onwards. + unsigned int breadcrumbMarkers : 1; ///< Supported in Radeon Software Version 17.11.1 onwards. + unsigned int MDIDeferredContexts : 1; ///< Supported in Radeon Software Version 18.8.1 onwards. + unsigned int UAVOverlapDeferredContexts : 1; ///< Supported in Radeon Software Version 18.8.1 onwards. + unsigned int depthBoundsDeferredContexts : 1; ///< Supported in Radeon Software Version 18.8.1 onwards. + unsigned int intrinsics19 : 1; ///< Supported in Radeon Software Version 19.12.2 onwards. DrawIndex, AtomicU64 + unsigned int getWaveSize : 1; ///< Supported in Radeon Software Version 20.2.1 onwards. + unsigned int baseVertex : 1; ///< Supported in Radeon Software Version 20.2.1 onwards. + unsigned int baseInstance : 1; ///< Supported in Radeon Software Version 20.2.1 onwards. + unsigned int padding : 12; ///< Reserved +} AGSDX11ExtensionsSupported; + +/// The struct to hold all the returned parameters from the device creation call +typedef struct AGSDX11ReturnedParams +{ + ID3D11Device* pDevice; ///< The newly created device + ID3D11DeviceContext* pImmediateContext; ///< The newly created immediate device context + IDXGISwapChain* pSwapChain; ///< The newly created swap chain. This is only created if a valid pSwapChainDesc is supplied in AGSDX11DeviceCreationParams. + D3D_FEATURE_LEVEL featureLevel; ///< The feature level supported by the newly created device + AGSDX11ExtensionsSupported extensionsSupported; ///< List of supported extensions + unsigned int crossfireGPUCount; ///< The number of GPUs that are active for this app + void* breadcrumbBuffer; ///< The CPU buffer returned if the initialization of the breadcrumb was successful +} AGSDX11ReturnedParams; + +/// +/// Function used to create a D3D11 device with additional AMD-specific initialization parameters. +/// +/// When using the HLSL shader extensions please note: +/// * The shader compiler should not use the D3DCOMPILE_SKIP_OPTIMIZATION (/Od) option, otherwise it will not work. +/// * The shader compiler needs D3DCOMPILE_ENABLE_STRICTNESS (/Ges) enabled. +/// +/// \param [in] context Pointer to a context. This is generated by \ref agsInitialize +/// \param [in] creationParams Pointer to the struct to specify the existing DX11 device creation parameters. +/// \param [in] extensionParams Optional pointer to the struct to specify DX11 additional device creation parameters. +/// \param [out] returnedParams Pointer to struct to hold all the returned parameters from the call. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_CreateDevice( AGSContext* context, const AGSDX11DeviceCreationParams* creationParams, const AGSDX11ExtensionParams* extensionParams, AGSDX11ReturnedParams* returnedParams ); + +/// +/// Function to destroy the D3D11 device and its immediate context. +/// This call will also cleanup any AMD-specific driver extensions for D3D11. +/// +/// \param [in] context Pointer to a context. +/// \param [in] device Pointer to the D3D11 device. +/// \param [out] deviceReferences Optional pointer to an unsigned int that will be set to the value returned from device->Release(). +/// \param [in] immediateContext Pointer to the D3D11 immediate device context. +/// \param [out] immediateContextReferences Optional pointer to an unsigned int that will be set to the value returned from immediateContext->Release(). +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_DestroyDevice( AGSContext* context, ID3D11Device* device, unsigned int* deviceReferences, ID3D11DeviceContext* immediateContext, unsigned int* immediateContextReferences ); + +/// @} + +/// \defgroup breadcrumbs Breadcrumb API +/// API for writing top-of-pipe and bottom-of-pipe markers to help track down GPU hangs. +/// +/// The API is available if the \ref AGSDX11ExtensionsSupported::breadcrumbMarkers is present. +/// +/// To use the API, a non zero value needs to be specified in \ref AGSDX11ExtensionParams::numBreadcrumbMarkers. This enables the API (if available) and allocates a system memory buffer +/// which is returned to the user in \ref AGSDX11ReturnedParams::breadcrumbBuffer. +/// +/// The user can now write markers before and after draw calls using \ref agsDriverExtensionsDX11_WriteBreadcrumb. +/// +/// \section background Background +/// +/// A top-of-pipe (TOP) command is scheduled for execution as soon as the command processor (CP) reaches the command. +/// A bottom-of-pipe (BOP) command is scheduled for execution once the previous rendering commands (draw and dispatch) finish execution. +/// TOP and BOP commands do not block CP. i.e. the CP schedules the command for execution then proceeds to the next command without waiting. +/// To effectively use TOP and BOP commands, it is important to understand how they interact with rendering commands: +/// +/// When the CP encounters a rendering command it queues it for execution and moves to the next command. The queued rendering commands are issued in order. +/// There can be multiple rendering commands running in parallel. When a rendering command is issued we say it is at the top of the pipe. When a rendering command +/// finishes execution we say it has reached the bottom of the pipe. +/// +/// A BOP command remains in a waiting queue and is executed once prior rendering commands finish. The queue of BOP commands is limited to 64 entries in GCN generation 1, 2, 3, 4 and 5. +/// If the 64 limit is reached the CP will stop queueing BOP commands and also rendering commands. Developers should limit the number of BOP commands that write markers to avoid contention. +/// In general, developers should limit both TOP and BOP commands to avoid stalling the CP. +/// +/// \subsection eg1 Example 1: +/// +/// \code{.cpp} +/// // Start of a command buffer +/// WriteMarker(TopOfPipe, 1) +/// WriteMarker(BottomOfPipe, 2) +/// WriteMarker(BottomOfPipe, 3) +/// DrawX +/// WriteMarker(BottomOfPipe, 4) +/// WriteMarker(BottomOfPipe, 5) +/// WriteMarker(TopOfPipe, 6) +/// // End of command buffer +/// \endcode +/// +/// In the above example, the CP writes markers 1, 2 and 3 without waiting: +/// Marker 1 is TOP so it's independent from other commands +/// There's no wait for marker 2 and 3 because there are no draws preceding the BOP commands +/// Marker 4 is only written once DrawX finishes execution +/// Marker 5 doesn't wait for additional draws so it is written right after marker 4 +/// Marker 6 can be written as soon as the CP reaches the command. For instance, it is very possible that CP writes marker 6 while DrawX +/// is running and therefore marker 6 gets written before markers 4 and 5 +/// +/// \subsection eg2 Example 2: +/// +/// \code{.cpp} +/// WriteMarker(TopOfPipe, 1) +/// DrawX +/// WriteMarker(BottomOfPipe, 2) +/// WriteMarker(TopOfPipe, 3) +/// DrawY +/// WriteMarker(BottomOfPipe, 4) +/// \endcode +/// +/// In this example marker 1 is written before the start of DrawX +/// Marker 2 is written once DrawX finishes execution +/// Similarly marker 3 is written before the start of DrawY +/// Marker 4 is written once DrawY finishes execution +/// In case of a GPU hang, if markers 1 and 3 are written but markers 2 and 4 are missing we can conclude that: +/// The CP has reached both DrawX and DrawY commands since marker 1 and 3 are present +/// The fact that marker 2 and 4 are missing means that either DrawX is hanging while DrawY is at the top of the pipe or both DrawX and DrawY +/// started and both are simultaneously hanging +/// +/// \subsection eg3 Example 3: +/// +/// \code{.cpp} +/// // Start of a command buffer +/// WriteMarker(BottomOfPipe, 1) +/// DrawX +/// WriteMarker(BottomOfPipe, 2) +/// DrawY +/// WriteMarker(BottomOfPipe, 3) +/// DrawZ +/// WriteMarker(BottomOfPipe, 4) +/// // End of command buffer +/// \endcode +/// +/// In this example marker 1 is written before the start of DrawX +/// Marker 2 is written once DrawX finishes +/// Marker 3 is written once DrawY finishes +/// Marker 4 is written once DrawZ finishes +/// If the GPU hangs and only marker 1 is written we can conclude that the hang is happening in either DrawX, DrawY or DrawZ +/// If the GPU hangs and only marker 1 and 2 are written we can conclude that the hang is happening in DrawY or DrawZ +/// If the GPU hangs and only marker 4 is missing we can conclude that the hang is happening in DrawZ +/// +/// \subsection eg4 Example 4: +/// +/// \code{.cpp} +/// Start of a command buffer +/// WriteMarker(TopOfPipe, 1) +/// DrawX +/// WriteMarker(TopOfPipe, 2) +/// DrawY +/// WriteMarker(TopOfPipe, 3) +/// DrawZ +/// // End of command buffer +/// \endcode +/// +/// In this example, in case the GPU hangs and only marker 1 is written we can conclude that the hang is happening in DrawX +/// In case the GPU hangs and only marker 1 and 2 are written we can conclude that the hang is happening in DrawX or DrawY +/// In case the GPU hangs and all 3 markers are written we can conclude that the hang is happening in any of DrawX, DrawY or DrawZ +/// +/// \subsection eg5 Example 5: +/// +/// \code{.cpp} +/// DrawX +/// WriteMarker(TopOfPipe, 1) +/// WriteMarker(BottomOfPipe, 2) +/// DrawY +/// WriteMarker(TopOfPipe, 3) +/// WriteMarker(BottomOfPipe, 4) +/// \endcode +/// +/// Marker 1 is written right after DrawX is queued for execution. +/// Marker 2 is only written once DrawX finishes execution. +/// Marker 3 is written right after DrawY is queued for execution. +/// Marker 4 is only written once DrawY finishes execution +/// If marker 1 is written we would know that the CP has reached the command DrawX (DrawX at the top of the pipe). +/// If marker 2 is written we can say that DrawX has finished execution (DrawX at the bottom of the pipe). +/// In case the GPU hangs and only marker 1 and 3 are written we can conclude that the hang is happening in DrawX or DrawY +/// In case the GPU hangs and only marker 1 is written we can conclude that the hang is happening in DrawX +/// In case the GPU hangs and only marker 4 is missing we can conclude that the hang is happening in DrawY +/// +/// \section data Retrieving GPU Data +/// +/// In the event of a GPU hang, the user can inspect the system memory buffer to determine which draw has caused the hang. +/// For example: +/// \code{.cpp} +/// // Force the work to be flushed to prevent CPU ahead of GPU +/// g_pImmediateContext->Flush(); +/// +/// // Present the information rendered to the back buffer to the front buffer (the screen) +/// HRESULT hr = g_pSwapChain->Present( 0, 0 ); +/// +/// // Read the marker data buffer once detect device lost +/// if ( hr != S_OK ) +/// { +/// for (UINT i = 0; i < g_NumMarkerWritten; i++) +/// { +/// UINT64* pTempData; +/// pTempData = static_cast(pMarkerBuffer); +/// +/// // Write the marker data to file +/// ofs << i << "\r\n"; +/// ofs << std::hex << *(pTempData + i * 2) << "\r\n"; +/// ofs << std::hex << *(pTempData + (i * 2 + 1)) << "\r\n"; +/// +/// WCHAR s1[256]; +/// setlocale(LC_NUMERIC, "en_US.iso88591"); +/// +/// // Output the marker data to console +/// swprintf(s1, 256, L" The Draw count is %d; The Top maker is % 016llX and the Bottom marker is % 016llX \r\n", i, *(pTempData + i * 2), *(pTempData + (i * 2 + 1))); +/// +/// OutputDebugStringW(s1); +/// } +/// } +/// \endcode +/// +/// The console output would resemble something like: +/// \code{.cpp} +/// D3D11: Removing Device. +/// D3D11 ERROR: ID3D11Device::RemoveDevice: Device removal has been triggered for the following reason (DXGI_ERROR_DEVICE_HUNG: The Device took an unreasonable amount of time to execute its commands, or the hardware crashed/hung. As a result, the TDR (Timeout Detection and Recovery) mechanism has been triggered. The current Device Context was executing commands when the hang occurred. The application may want to respawn and fallback to less aggressive use of the display hardware). [ EXECUTION ERROR #378: DEVICE_REMOVAL_PROCESS_AT_FAULT] +/// The Draw count is 0; The Top maker is 00000000DEADCAFE and the Bottom marker is 00000000DEADBEEF +/// The Draw count is 1; The Top maker is 00000000DEADCAFE and the Bottom marker is 00000000DEADBEEF +/// The Draw count is 2; The Top maker is 00000000DEADCAFE and the Bottom marker is 00000000DEADBEEF +/// The Draw count is 3; The Top maker is 00000000DEADCAFE and the Bottom marker is 00000000DEADBEEF +/// The Draw count is 4; The Top maker is 00000000DEADCAFE and the Bottom marker is 00000000DEADBEEF +/// The Draw count is 5; The Top maker is CDCDCDCDCDCDCDCD and the Bottom marker is CDCDCDCDCDCDCDCD +/// The Draw count is 6; The Top maker is CDCDCDCDCDCDCDCD and the Bottom marker is CDCDCDCDCDCDCDCD +/// The Draw count is 7; The Top maker is CDCDCDCDCDCDCDCD and the Bottom marker is CDCDCDCDCDCDCDCD +/// \endcode +/// +/// @{ + +/// The breadcrumb marker type +typedef enum AGSBreadcrumbMarkerType +{ + AGSTopOfPipe = 0, ///< Top-of-pipe marker + AGSBottomOfPipe = 1 ///< Bottom-of-pipe marker +} AGSBreadcrumbMarkerType; + +/// The breadcrumb marker struct used by \ref agsDriverExtensionsDX11_WriteBreadcrumb +typedef struct AGSBreadcrumbMarker +{ + unsigned long long markerData; ///< The user data to write. + AGSBreadcrumbMarkerType type; ///< Whether this marker is top or bottom of pipe. + unsigned int index; ///< The index of the marker. This should be less than the value specified in \ref AGSDX11ExtensionParams::numBreadcrumbMarkers +} AGSBreadcrumbMarker; + +/// +/// Function to write a breadcrumb marker. +/// +/// This method inserts a write marker operation in the GPU command stream. In the case where the GPU is hanging the write +/// command will never be reached and the marker will never get written to memory. +/// +/// In order to use this function, \ref AGSDX11ExtensionParams::numBreadcrumbMarkers must be set to a non zero value. +/// +/// \param [in] context Pointer to a context. +/// \param [in] marker Pointer to a marker. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_WriteBreadcrumb( AGSContext* context, const AGSBreadcrumbMarker* marker ); + +/// @} + +/// \defgroup dx11Topology Extended Topology +/// API for primitive topologies +/// @{ + +/// Additional topologies supported via extensions +typedef enum AGSPrimitiveTopologyDX11 +{ + AGS_PRIMITIVE_TOPOLOGY_QUADLIST = 7, ///< Quad list + AGS_PRIMITIVE_TOPOLOGY_SCREENRECTLIST = 9 ///< Screen rect list +} AGSPrimitiveTopologyDX11; + +/// +/// Function used to set the primitive topology. If you are using any of the extended topology types, then this function should +/// be called to set ALL topology types. +/// +/// The Quad List extension is a convenient way to submit quads without using an index buffer. Note that this still submits two triangles at the driver level. +/// In order to use this function, AGS must already be initialized and agsDriverExtensionsDX11_Init must have been called successfully. +/// +/// The Screen Rect extension, which is only available on GCN hardware, allows the user to pass in three of the four corners of a rectangle. +/// The hardware then uses the bounding box of the vertices to rasterize the rectangle primitive (i.e. as a rectangle rather than two triangles). +/// \note Note that this will not return valid interpolated values, only valid SV_Position values. +/// \note If either the Quad List or Screen Rect extension are used, then agsDriverExtensionsDX11_IASetPrimitiveTopology should be called in place of the native DirectX11 equivalent all the time. +/// +/// \param [in] context Pointer to a context. +/// \param [in] topology The topology to set on the D3D11 device. This can be either an AGS-defined topology such as AGS_PRIMITIVE_TOPOLOGY_QUADLIST +/// or a standard D3D-defined topology such as D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP. +/// NB. the AGS-defined types will require casting to a D3D_PRIMITIVE_TOPOLOGY type. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_IASetPrimitiveTopology( AGSContext* context, D3D_PRIMITIVE_TOPOLOGY topology ); + +/// @} + +/// \defgroup dx11UAVOverlap UAV Overlap +/// API for enabling overlapping UAV writes +/// +/// The AMD DX11 driver will automatically track resource usage and insert barriers as necessary to clear read-after-write (RAW) and write-after-write (WAW) +/// hazards. The UAV overlap extension allows applications to indicate to the driver it can skip inserting barriers for UAV resources used in +/// dispatches and draws within the \ref agsDriverExtensionsDX11_BeginUAVOverlap/ \ref agsDriverExtensionsDX11_EndUAVOverlap calls. This can be useful for applications to allow +/// multiple back-to-back dispatches or draws in flight even if they are accessing the same UAV resource but the data written or read does not overlap within the resource. +/// +/// Usage would be as follows: +/// \code{.cpp} +/// m_device->Dispatch( ... ); // First call that writes to the UAV +/// +/// // Disable automatic WAW syncs +/// agsDriverExtensionsDX11_BeginUAVOverlap( m_agsContext ); +/// +/// // Submit other dispatches that write to the same UAV concurrently +/// m_device->Dispatch( ... ); +/// m_device->Dispatch( ... ); +/// m_device->Dispatch( ... ); +/// +/// // Reenable automatic WAW syncs +/// agsDriverExtensionsDX11_EndUAVOverlap( m_agsContext ); +/// \endcode +/// @{ + +/// +/// Function used indicate to the driver the start of the overlap scope. +/// +/// \param [in] context Pointer to a context. +/// \param [in] dxContext Pointer to the DirectX device context. If this is to work using the non-immediate context, then you need to check support. If nullptr is specified, then the immediate context is assumed. +/// with the AGS_DX11_EXTENSION_DEFERRED_CONTEXTS bit. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_BeginUAVOverlap( AGSContext* context, ID3D11DeviceContext* dxContext ); + +/// +/// Function used indicate to the driver the end of the overlap scope. +/// +/// \param [in] context Pointer to a context. +/// \param [in] dxContext Pointer to the DirectX device context. If this is to work using the non-immediate context, then you need to check support. If nullptr is specified, then the immediate context is assumed. +/// with the AGS_DX11_EXTENSION_DEFERRED_CONTEXTS bit. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_EndUAVOverlap( AGSContext* context, ID3D11DeviceContext* dxContext ); + +/// @} + +/// \defgroup dx11DepthBoundsTest Depth Bounds Test +/// API for enabling depth bounds testing +/// @{ + +/// +/// Function used to set the depth bounds test extension +/// +/// \param [in] context Pointer to a context +/// \param [in] dxContext Pointer to the DirectX device context. If this is to work using the non-immediate context, then you need to check support. If nullptr is specified, then the immediate context is assumed. +/// \param [in] enabled Whether to enable or disable the depth bounds testing. If disabled, the next two args are ignored. +/// \param [in] minDepth The near depth range to clip against. +/// \param [in] maxDepth The far depth range to clip against. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_SetDepthBounds( AGSContext* context, ID3D11DeviceContext* dxContext, bool enabled, float minDepth, float maxDepth ); + +/// @} + +/// \defgroup mdi Multi Draw Indirect (MDI) +/// API for dispatching multiple instanced draw commands. +/// The multi draw indirect extensions allow multiple sets of DrawInstancedIndirect to be submitted in one API call. +/// The draw calls are issued on the GPU's command processor (CP), potentially saving the significant CPU overheads incurred by submitting the equivalent draw calls on the CPU. +/// +/// The extension allows the following code: +/// \code{.cpp} +/// // Submit n batches of DrawIndirect calls +/// for ( int i = 0; i < n; i++ ) +/// deviceContext->DrawIndexedInstancedIndirect( buffer, i * sizeof( cmd ) ); +/// \endcode +/// To be replaced by the following call: +/// \code{.cpp} +/// // Submit all n batches in one call +/// agsDriverExtensionsDX11_MultiDrawIndexedInstancedIndirect( m_agsContext, deviceContext, n, buffer, 0, sizeof( cmd ) ); +/// \endcode +/// +/// The buffer used for the indirect args must be of the following formats: +/// \code{.cpp} +/// // Buffer layout for agsDriverExtensions_MultiDrawInstancedIndirect +/// struct DrawInstancedIndirectArgs +/// { +/// UINT VertexCountPerInstance; +/// UINT InstanceCount; +/// UINT StartVertexLocation; +/// UINT StartInstanceLocation; +/// }; +/// +/// // Buffer layout for agsDriverExtensions_MultiDrawIndexedInstancedIndirect +/// struct DrawIndexedInstancedIndirectArgs +/// { +/// UINT IndexCountPerInstance; +/// UINT InstanceCount; +/// UINT StartIndexLocation; +/// UINT BaseVertexLocation; +/// UINT StartInstanceLocation; +/// }; +/// \endcode +/// +/// Example usage can be seen in AMD's GeometryFX (https://github.com/GPUOpen-Effects/GeometryFX). In particular, in this file: https://github.com/GPUOpen-Effects/GeometryFX/blob/master/amd_geometryfx/src/AMD_GeometryFX_Filtering.cpp +/// +/// @{ + +/// +/// Function used to submit a batch of draws via MultiDrawIndirect +/// +/// \param [in] context Pointer to a context. +/// \param [in] dxContext Pointer to the DirectX device context. If this is to work using the non-immediate context, then you need to check support. If nullptr is specified, then the immediate context is assumed. +/// \param [in] drawCount The number of draws. +/// \param [in] pBufferForArgs The args buffer. +/// \param [in] alignedByteOffsetForArgs The offset into the args buffer. +/// \param [in] byteStrideForArgs The per element stride of the args buffer. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_MultiDrawInstancedIndirect( AGSContext* context, ID3D11DeviceContext* dxContext, unsigned int drawCount, ID3D11Buffer* pBufferForArgs, unsigned int alignedByteOffsetForArgs, unsigned int byteStrideForArgs ); + +/// +/// Function used to submit a batch of draws via MultiDrawIndirect +/// +/// \param [in] context Pointer to a context. +/// \param [in] dxContext Pointer to the DirectX device context. If this is to work using the non-immediate context, then you need to check support. If nullptr is specified, then the immediate context is assumed. +/// \param [in] drawCount The number of draws. +/// \param [in] pBufferForArgs The args buffer. +/// \param [in] alignedByteOffsetForArgs The offset into the args buffer. +/// \param [in] byteStrideForArgs The per element stride of the args buffer. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_MultiDrawIndexedInstancedIndirect( AGSContext* context, ID3D11DeviceContext* dxContext, unsigned int drawCount, ID3D11Buffer* pBufferForArgs, unsigned int alignedByteOffsetForArgs, unsigned int byteStrideForArgs ); + +/// +/// Function used to submit a batch of draws via MultiDrawIndirect +/// +/// \param [in] context Pointer to a context. +/// \param [in] dxContext Pointer to the DirectX device context. If this is to work using the non-immediate context, then you need to check support. If nullptr is specified, then the immediate context is assumed. +/// \param [in] pBufferForDrawCount The draw count buffer. +/// \param [in] alignedByteOffsetForDrawCount The offset into the draw count buffer. +/// \param [in] pBufferForArgs The args buffer. +/// \param [in] alignedByteOffsetForArgs The offset into the args buffer. +/// \param [in] byteStrideForArgs The per element stride of the args buffer. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_MultiDrawInstancedIndirectCountIndirect( AGSContext* context, ID3D11DeviceContext* dxContext, ID3D11Buffer* pBufferForDrawCount, unsigned int alignedByteOffsetForDrawCount, ID3D11Buffer* pBufferForArgs, unsigned int alignedByteOffsetForArgs, unsigned int byteStrideForArgs ); + +/// +/// Function used to submit a batch of draws via MultiDrawIndirect +/// +/// \param [in] context Pointer to a context. +/// \param [in] dxContext Pointer to the DirectX device context. If this is to work using the non-immediate context, then you need to check support. If nullptr is specified, then the immediate context is assumed. +/// \param [in] pBufferForDrawCount The draw count buffer. +/// \param [in] alignedByteOffsetForDrawCount The offset into the draw count buffer. +/// \param [in] pBufferForArgs The args buffer. +/// \param [in] alignedByteOffsetForArgs The offset into the args buffer. +/// \param [in] byteStrideForArgs The per element stride of the args buffer. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_MultiDrawIndexedInstancedIndirectCountIndirect( AGSContext* context, ID3D11DeviceContext* dxContext, ID3D11Buffer* pBufferForDrawCount, unsigned int alignedByteOffsetForDrawCount, ID3D11Buffer* pBufferForArgs, unsigned int alignedByteOffsetForArgs, unsigned int byteStrideForArgs ); + +/// @} + +/// \defgroup shadercompiler Shader Compiler Controls +/// API for controlling DirectX11 shader compilation. +/// Check support for this feature using the AGS_DX11_EXTENSION_CREATE_SHADER_CONTROLS bit. +/// Supported in Radeon Software Version 16.9.2 (driver version 16.40.2311) onwards. +/// @{ + +/// +/// This method can be used to limit the maximum number of threads the driver uses for asynchronous shader compilation. +/// Setting it to 0 will disable asynchronous compilation completely and force the shaders to be compiled "inline" on the threads that call Create*Shader. +/// +/// This method can only be called before any shaders are created and being compiled by the driver. +/// If this method is called after shaders have been created the function will return AGS_FAILURE. +/// This function only sets an upper limit.The driver may create fewer threads than allowed by this function. +/// +/// \param [in] context Pointer to a context. +/// \param [in] numberOfThreads The maximum number of threads to use. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_SetMaxAsyncCompileThreadCount( AGSContext* context, unsigned int numberOfThreads ); + +/// +/// This method can be used to determine the total number of asynchronous shader compile jobs that are either +/// queued for waiting for compilation or being compiled by the driver's asynchronous compilation threads. +/// This method can be called at any during the lifetime of the driver. +/// +/// \param [in] context Pointer to a context. +/// \param [out] numberOfJobs Pointer to the number of jobs in flight currently. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_NumPendingAsyncCompileJobs( AGSContext* context, unsigned int* numberOfJobs ); + +/// +/// This method can be used to enable or disable the disk based shader cache. +/// Enabling/disabling the disk cache is not supported if is it disabled explicitly via Radeon Settings or by an app profile. +/// Calling this method under these conditions will result in AGS_FAILURE being returned. +/// It is recommended that this method be called before any shaders are created by the application and being compiled by the driver. +/// Doing so at any other time may result in the cache being left in an inconsistent state. +/// +/// \param [in] context Pointer to a context. +/// \param [in] enable Whether to enable the disk cache. 0 to disable, 1 to enable. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_SetDiskShaderCacheEnabled( AGSContext* context, int enable ); + +/// @} + +/// \defgroup multiview Multiview +/// API for multiview broadcasting. +/// Check support for this feature using the AGS_DX11_EXTENSION_MULTIVIEW bit. +/// Supported in Radeon Software Version 16.12.1 (driver version 16.50.2001) onwards. +/// @{ + +/// +/// Function to control draw calls replication to multiple viewports and RT slices. +/// Setting any mask to 0 disables draw replication. +/// +/// \param [in] context Pointer to a context. +/// \param [in] vpMask Viewport control bit mask. +/// \param [in] rtSliceMask RT slice control bit mask. +/// \param [in] vpMaskPerRtSliceEnabled If 0, 16 lower bits of vpMask apply to all RT slices; if 1 each 16 bits of 64-bit mask apply to corresponding 4 RT slices. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_SetViewBroadcastMasks( AGSContext* context, unsigned long long vpMask, unsigned long long rtSliceMask, int vpMaskPerRtSliceEnabled ); + +/// +/// Function returns max number of supported clip rectangles. +/// +/// \param [in] context Pointer to a context. +/// \param [out] maxRectCount Returned max number of clip rectangles. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_GetMaxClipRects( AGSContext* context, unsigned int* maxRectCount ); + +/// The inclusion mode for the rect +typedef enum AGSClipRectMode +{ + AGSClipRectIncluded = 0, ///< Include the rect + AGSClipRectExcluded = 1 ///< Exclude the rect +} AGSClipRectMode; + +/// The clip rectangle struct used by \ref agsDriverExtensionsDX11_SetClipRects +typedef struct AGSClipRect +{ + AGSClipRectMode mode; ///< Include/exclude rect region + AGSRect rect; ///< The rect to include/exclude +} AGSClipRect; + +/// +/// Function sets clip rectangles. +/// +/// \param [in] context Pointer to a context. +/// \param [in] clipRectCount Number of specified clip rectangles. Use 0 to disable clip rectangles. +/// \param [in] clipRects Array of clip rectangles. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_SetClipRects( AGSContext* context, unsigned int clipRectCount, const AGSClipRect* clipRects ); + +/// @} + +/// \defgroup cfxapi Explicit Crossfire API +/// API for explicit control over Crossfire +/// @{ + +/// The Crossfire API transfer types +typedef enum AGSAfrTransferType +{ + AGS_AFR_TRANSFER_DEFAULT = 0, ///< Default Crossfire driver resource tracking + AGS_AFR_TRANSFER_DISABLE = 1, ///< Turn off driver resource tracking + AGS_AFR_TRANSFER_1STEP_P2P = 2, ///< App controlled GPU to next GPU transfer + AGS_AFR_TRANSFER_2STEP_NO_BROADCAST = 3, ///< App controlled GPU to next GPU transfer using intermediate system memory + AGS_AFR_TRANSFER_2STEP_WITH_BROADCAST = 4, ///< App controlled GPU to all render GPUs transfer using intermediate system memory +} AGSAfrTransferType; + +/// The Crossfire API transfer engines +typedef enum AGSAfrTransferEngine +{ + AGS_AFR_TRANSFERENGINE_DEFAULT = 0, ///< Use default engine for Crossfire API transfers + AGS_AFR_TRANSFERENGINE_3D_ENGINE = 1, ///< Use 3D engine for Crossfire API transfers + AGS_AFR_TRANSFERENGINE_COPY_ENGINE = 2, ///< Use Copy engine for Crossfire API transfers +} AGSAfrTransferEngine; + +/// +/// Function to create a Direct3D11 resource with the specified AFR transfer type and specified transfer engine. +/// +/// \param [in] context Pointer to a context. +/// \param [in] desc Pointer to the D3D11 resource description. +/// \param [in] initialData Optional pointer to the initializing data for the resource. +/// \param [out] buffer Returned pointer to the resource. +/// \param [in] transferType The transfer behavior. +/// \param [in] transferEngine The transfer engine to use. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_CreateBuffer( AGSContext* context, const D3D11_BUFFER_DESC* desc, const D3D11_SUBRESOURCE_DATA* initialData, ID3D11Buffer** buffer, AGSAfrTransferType transferType, AGSAfrTransferEngine transferEngine ); + +/// +/// Function to create a Direct3D11 resource with the specified AFR transfer type and specified transfer engine. +/// +/// \param [in] context Pointer to a context. +/// \param [in] desc Pointer to the D3D11 resource description. +/// \param [in] initialData Optional pointer to the initializing data for the resource. +/// \param [out] texture1D Returned pointer to the resource. +/// \param [in] transferType The transfer behavior. +/// \param [in] transferEngine The transfer engine to use. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_CreateTexture1D( AGSContext* context, const D3D11_TEXTURE1D_DESC* desc, const D3D11_SUBRESOURCE_DATA* initialData, ID3D11Texture1D** texture1D, AGSAfrTransferType transferType, AGSAfrTransferEngine transferEngine ); + +/// +/// Function to create a Direct3D11 resource with the specified AFR transfer type and specified transfer engine. +/// +/// \param [in] context Pointer to a context. +/// \param [in] desc Pointer to the D3D11 resource description. +/// \param [in] initialData Optional pointer to the initializing data for the resource. +/// \param [out] texture2D Returned pointer to the resource. +/// \param [in] transferType The transfer behavior. +/// \param [in] transferEngine The transfer engine to use. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_CreateTexture2D( AGSContext* context, const D3D11_TEXTURE2D_DESC* desc, const D3D11_SUBRESOURCE_DATA* initialData, ID3D11Texture2D** texture2D, AGSAfrTransferType transferType, AGSAfrTransferEngine transferEngine ); + +/// +/// Function to create a Direct3D11 resource with the specified AFR transfer type and specified transfer engine. +/// +/// \param [in] context Pointer to a context. +/// \param [in] desc Pointer to the D3D11 resource description. +/// \param [in] initialData Optional pointer to the initializing data for the resource. +/// \param [out] texture3D Returned pointer to the resource. +/// \param [in] transferType The transfer behavior. +/// \param [in] transferEngine The transfer engine to use. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_CreateTexture3D( AGSContext* context, const D3D11_TEXTURE3D_DESC* desc, const D3D11_SUBRESOURCE_DATA* initialData, ID3D11Texture3D** texture3D, AGSAfrTransferType transferType, AGSAfrTransferEngine transferEngine ); + +/// +/// Function to notify the driver that we have finished writing to the resource this frame. +/// This will initiate a transfer for AGS_AFR_TRANSFER_1STEP_P2P, +/// AGS_AFR_TRANSFER_2STEP_NO_BROADCAST, and AGS_AFR_TRANSFER_2STEP_WITH_BROADCAST. +/// +/// \param [in] context Pointer to a context. +/// \param [in] resource Pointer to the resource. +/// \param [in] transferRegions An array of transfer regions (can be null to specify the whole area). +/// \param [in] subresourceArray An array of subresource indices (can be null to specify all subresources). +/// \param [in] numSubresources The number of subresources in subresourceArray OR number of transferRegions. Use 0 to specify ALL subresources and one transferRegion (which may be null if specifying the whole area). +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_NotifyResourceEndWrites( AGSContext* context, ID3D11Resource* resource, const D3D11_RECT* transferRegions, const unsigned int* subresourceArray, unsigned int numSubresources ); + +/// +/// This will notify the driver that the app will begin read/write access to the resource. +/// +/// \param [in] context Pointer to a context. +/// \param [in] resource Pointer to the resource. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_NotifyResourceBeginAllAccess( AGSContext* context, ID3D11Resource* resource ); + +/// +/// This is used for AGS_AFR_TRANSFER_1STEP_P2P to notify when it is safe to initiate a transfer. +/// This call in frame N-(NumGpus-1) allows a 1 step P2P in frame N to start. +/// This should be called after agsDriverExtensionsDX11_NotifyResourceEndWrites. +/// +/// \param [in] context Pointer to a context. +/// \param [in] resource Pointer to the resource. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_NotifyResourceEndAllAccess( AGSContext* context, ID3D11Resource* resource ); + +/// @} +/// @} + +#endif // AGS_EXCLUDE_DIRECTX_11 + +/// \defgroup typedefs Function pointer typedefs +/// List of function pointer typedefs for the API +/// @{ + +typedef AMD_AGS_API AGSDriverVersionResult (*AGS_CHECKDRIVERVERSION)( const char*, unsigned int ); ///< \ref agsCheckDriverVersion +typedef AMD_AGS_API int (*AGS_GETVERSIONNUMBER)(); ///< \ref agsGetVersionNumber +typedef AMD_AGS_API AGSReturnCode (*AGS_INITIALIZE)( int, const AGSConfiguration*, AGSContext**, AGSGPUInfo* ); ///< \ref agsInitialize +typedef AMD_AGS_API AGSReturnCode (*AGS_DEINITIALIZE)( AGSContext* ); ///< \ref agsDeInitialize +typedef AMD_AGS_API AGSReturnCode (*AGS_GETGPUINFO)( AGSContext*, AGSGPUInfo* ); ///< \ref agsGetGPUInfo +typedef AMD_AGS_API AGSReturnCode (*AGS_SETDISPLAYMODE)( AGSContext*, int, int, const AGSDisplaySettings* ); ///< \ref agsSetDisplayMode +#if !defined (AGS_EXCLUDE_DIRECTX_12) +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX12_CREATEDEVICE)( AGSContext*, const AGSDX12DeviceCreationParams*, const AGSDX12ExtensionParams*, AGSDX12ReturnedParams* ); ///< \ref agsDriverExtensionsDX12_CreateDevice +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX12_DESTROYDEVICE)( AGSContext*, ID3D12Device*, unsigned int* ); ///< \ref agsDriverExtensionsDX12_DestroyDevice +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX12_PUSHMARKER)( AGSContext*, ID3D12GraphicsCommandList*, const char* ); ///< \ref agsDriverExtensionsDX12_PushMarker +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX12_POPMARKER)( AGSContext*, ID3D12GraphicsCommandList* ); ///< \ref agsDriverExtensionsDX12_PopMarker +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX12_SETMARKER)( AGSContext*, ID3D12GraphicsCommandList*, const char* ); ///< \ref agsDriverExtensionsDX12_SetMarker +#endif +#if !defined (AGS_EXCLUDE_DIRECTX_11) +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_CREATEDEVICE)( AGSContext*, const AGSDX11DeviceCreationParams*, const AGSDX11ExtensionParams*, AGSDX11ReturnedParams* ); ///< \ref agsDriverExtensionsDX11_CreateDevice +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_DESTROYDEVICE)( AGSContext*, ID3D11Device*, unsigned int*, ID3D11DeviceContext*, unsigned int* ); ///< \ref agsDriverExtensionsDX11_DestroyDevice +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_WRITEBREADCRUMB)( AGSContext*, const AGSBreadcrumbMarker* ); ///< \ref agsDriverExtensionsDX11_WriteBreadcrumb +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_IASETPRIMITIVETOPOLOGY)( AGSContext*, enum D3D_PRIMITIVE_TOPOLOGY ); ///< \ref agsDriverExtensionsDX11_IASetPrimitiveTopology +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_BEGINUAVOVERLAP)( AGSContext*, ID3D11DeviceContext* ); ///< \ref agsDriverExtensionsDX11_BeginUAVOverlap +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_ENDUAVOVERLAP)( AGSContext*, ID3D11DeviceContext* ); ///< \ref agsDriverExtensionsDX11_EndUAVOverlap +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_SETDEPTHBOUNDS)( AGSContext*, ID3D11DeviceContext*, bool, float, float ); ///< \ref agsDriverExtensionsDX11_SetDepthBounds +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_MULTIDRAWINSTANCEDINDIRECT)( AGSContext*, ID3D11DeviceContext*, unsigned int, ID3D11Buffer*, unsigned int, unsigned int ); ///< \ref agsDriverExtensionsDX11_MultiDrawInstancedIndirect +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_MULTIDRAWINDEXEDINSTANCEDINDIRECT)( AGSContext*, ID3D11DeviceContext*, unsigned int, ID3D11Buffer*, unsigned int, unsigned int ); ///< \ref agsDriverExtensionsDX11_MultiDrawIndexedInstancedIndirect +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_MULTIDRAWINSTANCEDINDIRECTCOUNTINDIRECT)( AGSContext*, ID3D11DeviceContext*, ID3D11Buffer*, unsigned int, ID3D11Buffer*, unsigned int, unsigned int ); ///< \ref agsDriverExtensionsDX11_MultiDrawInstancedIndirectCountIndirect +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_MULTIDRAWINDEXEDINSTANCEDINDIRECTCOUNTINDIRECT)( AGSContext*, ID3D11DeviceContext*, ID3D11Buffer*, unsigned int, ID3D11Buffer*, unsigned int, unsigned int ); ///< \ref agsDriverExtensionsDX11_MultiDrawIndexedInstancedIndirectCountIndirect +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_SETMAXASYNCCOMPILETHREADCOUNT)( AGSContext*, unsigned int ); ///< \ref agsDriverExtensionsDX11_SetMaxAsyncCompileThreadCount +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_NUMPENDINGASYNCOMPILEJOBS)( AGSContext*, unsigned int* ); ///< \ref agsDriverExtensionsDX11_NumPendingAsyncCompileJobs +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_SETDISKSHADERCACHEENABLED)( AGSContext*, int ); ///< \ref agsDriverExtensionsDX11_SetDiskShaderCacheEnabled +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_SETVIEWBROADCASTMASKS)( AGSContext*, unsigned long long, unsigned long long, int ); ///< \ref agsDriverExtensionsDX11_SetViewBroadcastMasks +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_GETMAXCLIPRECTS)( AGSContext*, unsigned int* ); ///< \ref agsDriverExtensionsDX11_GetMaxClipRects +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_SETCLIPRECTS)( AGSContext*, unsigned int, const AGSClipRect* ); ///< \ref agsDriverExtensionsDX11_SetClipRects +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_CREATEBUFFER)( AGSContext*, const D3D11_BUFFER_DESC*, const D3D11_SUBRESOURCE_DATA*, ID3D11Buffer**, AGSAfrTransferType, AGSAfrTransferEngine ); ///< \ref agsDriverExtensionsDX11_CreateBuffer +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_CREATETEXTURE1D)( AGSContext*, const D3D11_TEXTURE1D_DESC*, const D3D11_SUBRESOURCE_DATA*, ID3D11Texture1D**, AGSAfrTransferType, AGSAfrTransferEngine ); ///< \ref agsDriverExtensionsDX11_CreateTexture1D +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_CREATETEXTURE2D)( AGSContext*, const D3D11_TEXTURE2D_DESC*, const D3D11_SUBRESOURCE_DATA*, ID3D11Texture2D**, AGSAfrTransferType, AGSAfrTransferEngine ); ///< \ref agsDriverExtensionsDX11_CreateTexture2D +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_CREATETEXTURE3D)( AGSContext*, const D3D11_TEXTURE3D_DESC*, const D3D11_SUBRESOURCE_DATA*, ID3D11Texture3D**, AGSAfrTransferType, AGSAfrTransferEngine ); ///< \ref agsDriverExtensionsDX11_CreateTexture3D +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_NOTIFYRESOURCEENDWRITES)( AGSContext*, ID3D11Resource*, const D3D11_RECT*, const unsigned int*, unsigned int ); ///< \ref agsDriverExtensionsDX11_NotifyResourceEndWrites +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_NOTIFYRESOURCEBEGINALLACCESS)( AGSContext*, ID3D11Resource* ); ///< \ref agsDriverExtensionsDX11_NotifyResourceBeginAllAccess +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_NOTIFYRESOURCEENDALLACCESS)( AGSContext*, ID3D11Resource* ); ///< \ref agsDriverExtensionsDX11_NotifyResourceEndAllAccess +#endif +/// @} + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // AMD_AGS_H diff --git a/Source/ThirdParty/nvapi/LICENSE.txt b/Source/ThirdParty/nvapi/License.txt similarity index 100% rename from Source/ThirdParty/nvapi/LICENSE.txt rename to Source/ThirdParty/nvapi/License.txt diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/AGS.cs b/Source/Tools/Flax.Build/Deps/Dependencies/AGS.cs new file mode 100644 index 000000000..60be17f0b --- /dev/null +++ b/Source/Tools/Flax.Build/Deps/Dependencies/AGS.cs @@ -0,0 +1,46 @@ +// Copyright (c) Wojciech Figat. All rights reserved. + +using Flax.Build; +using System.IO; + +namespace Flax.Deps.Dependencies +{ + /// + /// AMD GPU Services (AGS) library + /// https://github.com/GPUOpen-LibrariesAndSDKs/AGS_SDK + /// + /// + class AGS : Dependency + { + /// + public override TargetPlatform[] Platforms + { + get => new[] { TargetPlatform.Windows }; + } + + /// + public override void Build(BuildOptions options) + { + var root = options.IntermediateFolder; + var moduleFolder = Path.Combine(options.ThirdPartyFolder, "AGS"); + + // Get the source + CloneGitRepoFast(root, "https://github.com/GPUOpen-LibrariesAndSDKs/AGS_SDK.git"); + + // Copy files + foreach (var platform in options.Platforms) + { + BuildStarted(platform); + var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); + Utilities.FileCopy(Path.Combine(root, "ags_lib/lib/amd_ags_x64.lib"), Path.Combine(depsFolder, "amd_ags_x64.lib")); + Utilities.FileCopy(Path.Combine(root, "ags_lib/lib/amd_ags_x64.dll"), Path.Combine(depsFolder, "amd_ags_x64.dll")); + } + + // Copy license and header files + Utilities.FileCopy(Path.Combine(root, "LICENSE.txt"), Path.Combine(moduleFolder, "LICENSE.txt")); + Utilities.FileCopy(Path.Combine(root, "ags_lib/inc/amd_ags.h"), Path.Combine(moduleFolder, "amd_ags.h")); + Utilities.FileCopy(Path.Combine(root, "ags_lib/hlsl/ags_shader_intrinsics_dx11.hlsl"), Path.Combine(moduleFolder, "ags_shader_intrinsics_dx11.hlsl")); + Utilities.FileCopy(Path.Combine(root, "ags_lib/hlsl/ags_shader_intrinsics_dx12.hlsl"), Path.Combine(moduleFolder, "ags_shader_intrinsics_dx12.hlsl")); + } + } +} diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/nvapi.cs b/Source/Tools/Flax.Build/Deps/Dependencies/nvapi.cs index 68ef2eaf7..d1d94b4c1 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/nvapi.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/nvapi.cs @@ -36,7 +36,7 @@ namespace Flax.Deps.Dependencies } // Copy license and header files - Utilities.FileCopy(Path.Combine(root, "License.txt"), Path.Combine(moduleFolder, "LICENSE.txt")); + Utilities.FileCopy(Path.Combine(root, "License.txt"), Path.Combine(moduleFolder, "License.txt")); var files = new[] { "nvHLSLExtns.h", From d109e5ca9fff153663350e924a5e2945c2ea48a4 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 10 Aug 2025 17:16:36 +0200 Subject: [PATCH 180/211] Add `AGS` lib to D3D11 for efficient UAV writes overlaps on AMD GPUs --- .../DirectX/DX11/GPUContextDX11.cpp | 15 ++++++- .../DirectX/DX11/GPUDeviceDX11.cpp | 44 ++++++++++++++++++- .../DirectX/DX11/GraphicsDeviceDX11.Build.cs | 2 + Source/Engine/Utilities/StringConverter.h | 2 + 4 files changed, 61 insertions(+), 2 deletions(-) diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp index 6d0957014..9b648bd51 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp @@ -18,6 +18,10 @@ #include extern bool EnableNvapi; #endif +#if COMPILE_WITH_AGS +#include +extern AGSContext* AgsContext; +#endif #define DX11_CLEAR_SR_ON_STAGE_DISABLE 0 @@ -920,7 +924,16 @@ void GPUContextDX11::OverlapUA(bool end) return; } #endif - // TODO: add support for AMD extensions to overlap UAV writes (agsDriverExtensionsDX11_BeginUAVOverlap/agsDriverExtensionsDX11_EndUAVOverlap) +#if COMPILE_WITH_AGS + if (AgsContext) + { + if (end) + agsDriverExtensionsDX11_EndUAVOverlap(AgsContext, _context); + else + agsDriverExtensionsDX11_BeginUAVOverlap(AgsContext, _context); + return; + } +#endif // TODO: add support for Intel extensions to overlap UAV writes (INTC_D3D11_BeginUAVOverlap/INTC_D3D11_EndUAVOverlap) } diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp index ca479a7c0..b187ed3c7 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp @@ -24,6 +24,10 @@ #include bool EnableNvapi = false; #endif +#if COMPILE_WITH_AGS +#include +AGSContext* AgsContext = nullptr; +#endif #if !USE_EDITOR && PLATFORM_WINDOWS #include "Engine/Core/Config/PlatformSettings.h" #endif @@ -436,7 +440,7 @@ bool GPUDeviceDX11::Init() NvAPI_ShortString buildBranch(""); if (NvAPI_SYS_GetDriverAndBranchVersion(&driverVersion, buildBranch) == NVAPI_OK) { - LOG(Info, "NvApi driver version: {}, {}", driverVersion, StringAsUTF16<>(buildBranch).Get()); + LOG(Info, "NvApi driver version: {}, {}", driverVersion, TO_UTF16(buildBranch)); } } else @@ -447,6 +451,44 @@ bool GPUDeviceDX11::Init() } } #endif +#if COMPILE_WITH_AGS + if (_adapter->IsAMD()) + { + AGSGPUInfo gpuInfo = {}; + AGSConfiguration config = {}; + AGSReturnCode returnCode = agsInitialize(AGS_CURRENT_VERSION, &config, &AgsContext, &gpuInfo); + if (returnCode == AGS_SUCCESS) + { + LOG(Info, "AMD driver version: {}, Radeon Software Version {}", TO_UTF16(gpuInfo.driverVersion), TO_UTF16(gpuInfo.radeonSoftwareVersion)); + for (int32 i = 0; i < gpuInfo.numDevices; i++) + { + AGSDeviceInfo& deviceInfo = gpuInfo.devices[i]; + const Char* asicFamily[] = + { + TEXT("Unknown"), + TEXT("Pre GCN"), + TEXT("GCN Gen1"), + TEXT("GCN Gen2"), + TEXT("GCN Gen3"), + TEXT("GCN Gen4"), + TEXT("Vega"), + TEXT("RDNA"), + TEXT("RDNA2"), + TEXT("RDNA3"), + TEXT("RDNA4"), + }; + LOG(Info, " > GPU {}: {} ({})", i, TO_UTF16(deviceInfo.adapterString), asicFamily[deviceInfo.asicFamily <= AGSAsicFamily_RDNA4 ? deviceInfo.asicFamily : 0]); + LOG(Info, " CUs: {}, WGPs: {}, ROPs: {}", deviceInfo.numCUs, deviceInfo.numWGPs, deviceInfo.numROPs); + LOG(Info, " Core clock: {} MHz, Memory clock: {} MHz, {:.2f} Tflops", deviceInfo.coreClock, deviceInfo.memoryClock, deviceInfo.teraFlops); + LOG(Info, " Local memory: {} MB ({:.2f} GB/s), Shared memory: {} MB", (int32)(deviceInfo.localMemoryInBytes / (1024ull * 1024ull)), (float)deviceInfo.memoryBandwidth / 1024.0f, (int32)(deviceInfo.sharedMemoryInBytes / (1024ull * 1024ull))); + } + } + else + { + LOG(Warning, "agsInitialize failed with result {} ({})", (int32)returnCode); + } + } +#endif // Get DXGI adapter ComPtr adapter; diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GraphicsDeviceDX11.Build.cs b/Source/Engine/GraphicsDevice/DirectX/DX11/GraphicsDeviceDX11.Build.cs index b8ffd2629..0e84fd95b 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GraphicsDeviceDX11.Build.cs +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GraphicsDeviceDX11.Build.cs @@ -16,5 +16,7 @@ public class GraphicsDeviceDX11 : GraphicsDeviceBaseModule options.OutputFiles.Add("d3d11.lib"); if (nvapi.Use(options)) options.PrivateDependencies.Add("nvapi"); + if (AGS.Use(options)) + options.PrivateDependencies.Add("AGS"); } } diff --git a/Source/Engine/Utilities/StringConverter.h b/Source/Engine/Utilities/StringConverter.h index 7f1d07e06..9e6fa9569 100644 --- a/Source/Engine/Utilities/StringConverter.h +++ b/Source/Engine/Utilities/StringConverter.h @@ -133,6 +133,8 @@ public: } }; +#define TO_UTF16(str) StringAsUTF16<>(str).Get() + template class StringAsTerminated { From c946fa239ec6f27b1c513e58254f2df846f1a714 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 10 Aug 2025 22:54:37 +0200 Subject: [PATCH 181/211] Fix gpu particles crash from 0ea555b041f1df5bc8e1a9d0565ec6700e8af0c8 on first draw --- Source/Engine/Particles/Particles.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index 46649f82a..bf529776f 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -1354,9 +1354,16 @@ void UpdateGPU(RenderTask* task, GPUContext* context) if (!emitter || !emitter->IsLoaded() || emitter->SimulationMode != ParticlesSimulationMode::GPU || instance.Emitters.Count() <= emitterIndex) continue; ParticleEmitterInstance& data = instance.Emitters[emitterIndex]; - if (!data.Buffer || !emitter->GPU.CanSim(emitter, data)) + if (!data.Buffer) continue; ASSERT(emitter->Capacity != 0 && emitter->Graph.Layout.Size != 0); + if (!emitter->GPU.CanSim(emitter, data)) + { + // Emitters that are culled still might need to clear the particle counter (used for indirect draws) + if (data.Buffer->GPU.PendingClear) + emitter->GPU.PreSim(context, emitter, effect, emitterIndex, data); + continue; + } sims.Add({ effect, emitter, emitterIndex, data }); } } From a18314c6693929b4dfdc0bf1eb26b72983bb1483 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 11 Aug 2025 13:23:46 +0200 Subject: [PATCH 182/211] Add various minor improvements --- Content/Shaders/PostProcessing.flax | 4 ++-- Source/Editor/Surface/VisjectSurface.Input.cs | 2 ++ .../Engine/Physics/Actors/WheeledVehicle.cpp | 2 +- Source/Engine/Physics/Actors/WheeledVehicle.h | 2 +- .../Physics/PhysX/PhysicsBackendPhysX.cpp | 4 +++- Source/Engine/Renderer/ColorGradingPass.cpp | 1 + Source/Engine/Renderer/PostProcessingPass.cpp | 20 +++++++++++-------- Source/Shaders/PostProcessing.shader | 2 +- 8 files changed, 23 insertions(+), 14 deletions(-) diff --git a/Content/Shaders/PostProcessing.flax b/Content/Shaders/PostProcessing.flax index 931c0b436..e34c5c185 100644 --- a/Content/Shaders/PostProcessing.flax +++ b/Content/Shaders/PostProcessing.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e477eae82ac3a988f70c59913e5eeac39438edb8191b81fe6ca970b87754cb93 -size 22689 +oid sha256:e442c2d6607e40da68e3aa9414390386d44cc7bc8c677a1f5a5e4a536857b906 +size 22688 diff --git a/Source/Editor/Surface/VisjectSurface.Input.cs b/Source/Editor/Surface/VisjectSurface.Input.cs index 09df195eb..891750fd7 100644 --- a/Source/Editor/Surface/VisjectSurface.Input.cs +++ b/Source/Editor/Surface/VisjectSurface.Input.cs @@ -120,6 +120,8 @@ namespace FlaxEditor.Surface private void UpdateSelectionRectangle() { + if (Root == null) + return; var p1 = _rootControl.PointFromParent(ref _leftMouseDownPos); var p2 = _rootControl.PointFromParent(ref _mousePos); var selectionRect = Rectangle.FromPoints(p1, p2); diff --git a/Source/Engine/Physics/Actors/WheeledVehicle.cpp b/Source/Engine/Physics/Actors/WheeledVehicle.cpp index 28808be17..248bffc20 100644 --- a/Source/Engine/Physics/Actors/WheeledVehicle.cpp +++ b/Source/Engine/Physics/Actors/WheeledVehicle.cpp @@ -191,7 +191,7 @@ void WheeledVehicle::SetThrottle(float value) _throttle = Math::Clamp(value, -1.0f, 1.0f); } -float WheeledVehicle::GetThrottle() +float WheeledVehicle::GetThrottle() const { return _throttle; } diff --git a/Source/Engine/Physics/Actors/WheeledVehicle.h b/Source/Engine/Physics/Actors/WheeledVehicle.h index 037157158..87424fe72 100644 --- a/Source/Engine/Physics/Actors/WheeledVehicle.h +++ b/Source/Engine/Physics/Actors/WheeledVehicle.h @@ -545,7 +545,7 @@ public: /// Get the vehicle throttle. It is the analog accelerator pedal value in range (0,1) where 1 represents the pedal fully pressed and 0 represents the pedal in its rest state. /// /// The vehicle throttle. - API_FUNCTION() float GetThrottle(); + API_FUNCTION() float GetThrottle() const; /// /// Sets the input for vehicle steering. Steer is the analog steer value in range (-1,1) where -1 represents the steering wheel at left lock and +1 represents the steering wheel at right lock. diff --git a/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp b/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp index bacd976a1..5805301ff 100644 --- a/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp +++ b/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp @@ -496,7 +496,9 @@ protected: #define PxHitFlagEmpty (PxHitFlags)0 #define SCENE_QUERY_FLAGS (PxHitFlag::ePOSITION | PxHitFlag::eNORMAL | PxHitFlag::eFACE_INDEX | PxHitFlag::eUV) -#define SCENE_QUERY_SETUP(blockSingle) auto scenePhysX = (ScenePhysX*)scene; if (scene == nullptr) return false; \ +#define SCENE_QUERY_SETUP(blockSingle) PROFILE_CPU(); \ + auto scenePhysX = (ScenePhysX*)scene; \ + if (scene == nullptr) return false; \ PxQueryFilterData filterData; \ filterData.flags |= PxQueryFlag::ePREFILTER; \ filterData.data.word0 = layerMask; \ diff --git a/Source/Engine/Renderer/ColorGradingPass.cpp b/Source/Engine/Renderer/ColorGradingPass.cpp index caa5ee44c..bbb45ef4c 100644 --- a/Source/Engine/Renderer/ColorGradingPass.cpp +++ b/Source/Engine/Renderer/ColorGradingPass.cpp @@ -194,6 +194,7 @@ GPUTexture* ColorGradingPass::RenderLUT(RenderContext& renderContext) // Check if LUT parameter hasn't been changed since the last time if (Platform::MemoryCompare(&colorGradingBuffer.CachedData , &data, sizeof(Data)) == 0 && colorGradingBuffer.Mode == toneMapping.Mode && + Engine::FrameCount > 30 && // Skip caching when engine is starting TODO: find why this hack is needed colorGradingBuffer.LutTexture == lutTexture) { // Resue existing texture diff --git a/Source/Engine/Renderer/PostProcessingPass.cpp b/Source/Engine/Renderer/PostProcessingPass.cpp index 0945e74de..a9eba14d2 100644 --- a/Source/Engine/Renderer/PostProcessingPass.cpp +++ b/Source/Engine/Renderer/PostProcessingPass.cpp @@ -367,15 +367,19 @@ void PostProcessingPass::Render(RenderContext& renderContext, GPUTexture* input, // Bloom auto tempDesc = GPUTextureDescription::New2D(w2, h2, bloomMipCount, output->Format(), GPUTextureFlags::ShaderResource | GPUTextureFlags::RenderTarget | GPUTextureFlags::PerMipViews); - auto bloomBuffer1 = RenderTargetPool::Get(tempDesc); - RENDER_TARGET_POOL_SET_NAME(bloomBuffer1, "PostProcessing.Bloom"); - auto bloomBuffer2 = RenderTargetPool::Get(tempDesc); - RENDER_TARGET_POOL_SET_NAME(bloomBuffer2, "PostProcessing.Bloom"); - - for (int32 mip = 0; mip < bloomMipCount; mip++) + GPUTexture* bloomBuffer1 = nullptr, *bloomBuffer2 = nullptr; + if (useBloom || useLensFlares) { - context->Clear(bloomBuffer1->View(0, mip), Color::Transparent); - context->Clear(bloomBuffer2->View(0, mip), Color::Transparent); + bloomBuffer1 = RenderTargetPool::Get(tempDesc); + bloomBuffer2 = RenderTargetPool::Get(tempDesc); + RENDER_TARGET_POOL_SET_NAME(bloomBuffer1, "PostProcessing.Bloom"); + RENDER_TARGET_POOL_SET_NAME(bloomBuffer2, "PostProcessing.Bloom"); + + for (int32 mip = 0; mip < bloomMipCount; mip++) + { + context->Clear(bloomBuffer1->View(0, mip), Color::Transparent); + context->Clear(bloomBuffer2->View(0, mip), Color::Transparent); + } } if (useBloom) diff --git a/Source/Shaders/PostProcessing.shader b/Source/Shaders/PostProcessing.shader index 851d31e38..aa70833d2 100644 --- a/Source/Shaders/PostProcessing.shader +++ b/Source/Shaders/PostProcessing.shader @@ -650,7 +650,7 @@ float4 PS_Composite(Quad_VS2PS input) : SV_Target } else { - color = Input0.Sample(SamplerLinearClamp, uv); + color = Input0.Sample(SamplerPointClamp, uv); } // Lens Flares From 9f14bb72790ec688940a46b2540c70db27519424 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 11 Aug 2025 13:24:11 +0200 Subject: [PATCH 183/211] Optimize GPU particles simulation by scheduling larger sims first to overlap more work --- Source/Engine/Particles/Particles.cpp | 41 ++++++++++++++++++--------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index bf529776f..54b3d1dd8 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -1333,7 +1333,19 @@ void UpdateGPU(RenderTask* task, GPUContext* context) ParticleEffect* Effect; ParticleEmitter* Emitter; int32 EmitterIndex; - ParticleEmitterInstance& Data; + ParticleEmitterInstance* Data; + + bool operator<(const GPUSim& other) const + { + // Sort by particle count (larger effects start first) + if (Data->Buffer->GPU.ParticlesCountMax != other.Data->Buffer->GPU.ParticlesCountMax) + return Data->Buffer->GPU.ParticlesCountMax > other.Data->Buffer->GPU.ParticlesCountMax; + if (Emitter->Capacity != other.Emitter->Capacity) + return Emitter->Capacity > other.Emitter->Capacity; + + // Merge emitters together (compute pipeline switches) + return (uintptr)Emitter < (uintptr)other.Emitter; + } }; Array sims; sims.EnsureCapacity(Math::AlignUp(GpuUpdateList.Count(), 64)); // Preallocate with some slack @@ -1364,11 +1376,14 @@ void UpdateGPU(RenderTask* task, GPUContext* context) emitter->GPU.PreSim(context, emitter, effect, emitterIndex, data); continue; } - sims.Add({ effect, emitter, emitterIndex, data }); + sims.Add({ effect, emitter, emitterIndex, &data }); } } GpuUpdateList.Clear(); + // Sort particles by emitter type to reduce compute pipeline switches + Sorting::QuickSort(sims); + // Pre-pass with buffers setup { PROFILE_CPU_NAMED("PreSim"); @@ -1376,14 +1391,14 @@ void UpdateGPU(RenderTask* task, GPUContext* context) GPUMemoryPass pass(context); for (GPUSim& sim : sims) { - if (sim.Data.Buffer->GPU.PendingClear) - pass.Transition(sim.Data.Buffer->GPU.Buffer, GPUResourceAccess::CopyWrite); - pass.Transition(sim.Data.Buffer->GPU.BufferSecondary, GPUResourceAccess::CopyWrite); + if (sim.Data->Buffer->GPU.PendingClear) + pass.Transition(sim.Data->Buffer->GPU.Buffer, GPUResourceAccess::CopyWrite); + pass.Transition(sim.Data->Buffer->GPU.BufferSecondary, GPUResourceAccess::CopyWrite); } for (GPUSim& sim : sims) { - sim.Emitter->GPU.PreSim(context, sim.Emitter, sim.Effect, sim.EmitterIndex, sim.Data); + sim.Emitter->GPU.PreSim(context, sim.Emitter, sim.Effect, sim.EmitterIndex, *sim.Data); } } @@ -1394,13 +1409,13 @@ void UpdateGPU(RenderTask* task, GPUContext* context) GPUComputePass pass(context); for (GPUSim& sim : sims) { - pass.Transition(sim.Data.Buffer->GPU.Buffer, GPUResourceAccess::ShaderReadCompute); - pass.Transition(sim.Data.Buffer->GPU.BufferSecondary, GPUResourceAccess::UnorderedAccess); + pass.Transition(sim.Data->Buffer->GPU.Buffer, GPUResourceAccess::ShaderReadCompute); + pass.Transition(sim.Data->Buffer->GPU.BufferSecondary, GPUResourceAccess::UnorderedAccess); } for (GPUSim& sim : sims) { - sim.Emitter->GPU.Sim(context, sim.Emitter, sim.Effect, sim.EmitterIndex, sim.Data); + sim.Emitter->GPU.Sim(context, sim.Emitter, sim.Effect, sim.EmitterIndex, *sim.Data); } } @@ -1411,16 +1426,16 @@ void UpdateGPU(RenderTask* task, GPUContext* context) GPUMemoryPass pass(context); for (GPUSim& sim : sims) { - if (sim.Data.CustomData.HasItems()) + if (sim.Data->CustomData.HasItems()) { - pass.Transition(sim.Data.Buffer->GPU.BufferSecondary, GPUResourceAccess::CopyRead); - pass.Transition(sim.Data.Buffer->GPU.Buffer, GPUResourceAccess::CopyWrite); + pass.Transition(sim.Data->Buffer->GPU.BufferSecondary, GPUResourceAccess::CopyRead); + pass.Transition(sim.Data->Buffer->GPU.Buffer, GPUResourceAccess::CopyWrite); } } for (GPUSim& sim : sims) { - sim.Emitter->GPU.PostSim(context, sim.Emitter, sim.Effect, sim.EmitterIndex, sim.Data); + sim.Emitter->GPU.PostSim(context, sim.Emitter, sim.Effect, sim.EmitterIndex, *sim.Data); } } From 99707b65862ba4a01e1ba398a7bb526c10e9ddca Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 11 Aug 2025 18:15:47 +0200 Subject: [PATCH 184/211] Add various profiler events for more insights --- Source/Engine/Audio/AudioClip.cpp | 10 ++++++++++ Source/Engine/Scripting/Runtime/DotNet.cpp | 6 ++++-- Source/Engine/Tools/AudioTool/AudioTool.cpp | 5 +++++ .../Tools/AudioTool/OggVorbisDecoder.cpp | 18 ++++++++++++++++++ .../Engine/Tools/AudioTool/OggVorbisDecoder.h | 17 ++--------------- 5 files changed, 39 insertions(+), 17 deletions(-) diff --git a/Source/Engine/Audio/AudioClip.cpp b/Source/Engine/Audio/AudioClip.cpp index 2ca7f3512..0b8e40c81 100644 --- a/Source/Engine/Audio/AudioClip.cpp +++ b/Source/Engine/Audio/AudioClip.cpp @@ -7,6 +7,7 @@ #include "Engine/Core/Log.h" #include "Engine/Content/Upgraders/AudioClipUpgrader.h" #include "Engine/Content/Factories/BinaryAssetFactory.h" +#include "Engine/Profiler/ProfilerCPU.h" #include "Engine/Scripting/ManagedCLR/MUtils.h" #include "Engine/Streaming/StreamingGroup.h" #include "Engine/Serialization/MemoryReadStream.h" @@ -19,10 +20,15 @@ REGISTER_BINARY_ASSET_WITH_UPGRADER(AudioClip, "FlaxEngine.AudioClip", AudioClip bool AudioClip::StreamingTask::Run() { + PROFILE_CPU_NAMED("AudioStreaming"); PROFILE_MEM(Audio); AssetReference ref = _asset.Get(); if (ref == nullptr || AudioBackend::Instance == nullptr) return true; +#if TRACY_ENABLE + const StringView name(ref->GetPath()); + ZoneName(*name, name.Length()); +#endif ScopeLock lock(ref->Locker); const auto& queue = ref->StreamingQueue; if (queue.Count() == 0) @@ -421,6 +427,8 @@ bool AudioClip::WriteBuffer(int32 chunkIndex) // Ensure audio backend exists if (AudioBackend::Instance == nullptr) return true; + PROFILE_CPU(); + PROFILE_MEM(Audio); const auto chunk = GetChunk(chunkIndex); if (chunk == nullptr || chunk->IsMissing()) @@ -432,6 +440,7 @@ bool AudioClip::WriteBuffer(int32 chunkIndex) Array tmp1, tmp2; AudioDataInfo info = AudioHeader.Info; const uint32 bytesPerSample = info.BitDepth / 8; + ZoneValue(chunk->Size() / 1024); // Audio data size (in kB) // Get raw data or decompress it switch (Format()) @@ -439,6 +448,7 @@ bool AudioClip::WriteBuffer(int32 chunkIndex) case AudioFormat::Vorbis: { #if COMPILE_WITH_OGG_VORBIS + PROFILE_CPU_NAMED("OggVorbisDecode"); OggVorbisDecoder decoder; MemoryReadStream stream(chunk->Get(), chunk->Size()); AudioDataInfo tmpInfo; diff --git a/Source/Engine/Scripting/Runtime/DotNet.cpp b/Source/Engine/Scripting/Runtime/DotNet.cpp index 3f3e33135..7e717d185 100644 --- a/Source/Engine/Scripting/Runtime/DotNet.cpp +++ b/Source/Engine/Scripting/Runtime/DotNet.cpp @@ -535,7 +535,8 @@ void MCore::GC::Collect(int32 generation) void MCore::GC::Collect(int32 generation, MGCCollectionMode collectionMode, bool blocking, bool compacting) { - PROFILE_CPU(); + PROFILE_CPU_NAMED("GC Collect"); + ZoneColor(0xe3c349); static void* GCCollectPtr = GetStaticMethodPointer(TEXT("GCCollect")); CallStaticMethod(GCCollectPtr, generation, (int)collectionMode, blocking, compacting); } @@ -554,7 +555,8 @@ void MCore::GC::MemoryInfo(int64& totalCommitted, int64& heapSize) void MCore::GC::WaitForPendingFinalizers() { - PROFILE_CPU(); + PROFILE_CPU_NAMED("GC WaitForPendingFinalizers"); + ZoneColor(TracyWaitZoneColor); static void* GCWaitForPendingFinalizersPtr = GetStaticMethodPointer(TEXT("GCWaitForPendingFinalizers")); CallStaticMethod(GCWaitForPendingFinalizersPtr); } diff --git a/Source/Engine/Tools/AudioTool/AudioTool.cpp b/Source/Engine/Tools/AudioTool/AudioTool.cpp index 90af5b14a..d456d7aed 100644 --- a/Source/Engine/Tools/AudioTool/AudioTool.cpp +++ b/Source/Engine/Tools/AudioTool/AudioTool.cpp @@ -6,6 +6,7 @@ #include "Engine/Core/Core.h" #include "Engine/Core/Math/Math.h" #include "Engine/Core/Memory/Allocation.h" +#include "Engine/Profiler/ProfilerCPU.h" #if USE_EDITOR #include "Engine/Serialization/Serialization.h" #include "Engine/Scripting/Enums.h" @@ -181,6 +182,7 @@ void Convert32To24Bits(const int32* input, uint8* output, uint32 numSamples) void AudioTool::ConvertToMono(const byte* input, byte* output, uint32 bitDepth, uint32 numSamples, uint32 numChannels) { + PROFILE_CPU(); switch (bitDepth) { case 8: @@ -203,6 +205,7 @@ void AudioTool::ConvertToMono(const byte* input, byte* output, uint32 bitDepth, void AudioTool::ConvertBitDepth(const byte* input, uint32 inBitDepth, byte* output, uint32 outBitDepth, uint32 numSamples) { + PROFILE_CPU(); int32* srcBuffer = nullptr; const bool needTempBuffer = inBitDepth != 32; @@ -262,6 +265,7 @@ void AudioTool::ConvertBitDepth(const byte* input, uint32 inBitDepth, byte* outp void AudioTool::ConvertToFloat(const byte* input, uint32 inBitDepth, float* output, uint32 numSamples) { + PROFILE_CPU(); if (inBitDepth == 8) { for (uint32 i = 0; i < numSamples; i++) @@ -306,6 +310,7 @@ void AudioTool::ConvertToFloat(const byte* input, uint32 inBitDepth, float* outp void AudioTool::ConvertFromFloat(const float* input, int32* output, uint32 numSamples) { + PROFILE_CPU(); for (uint32 i = 0; i < numSamples; i++) { float sample = *(float*)input; diff --git a/Source/Engine/Tools/AudioTool/OggVorbisDecoder.cpp b/Source/Engine/Tools/AudioTool/OggVorbisDecoder.cpp index cde9afbc5..f8fd284df 100644 --- a/Source/Engine/Tools/AudioTool/OggVorbisDecoder.cpp +++ b/Source/Engine/Tools/AudioTool/OggVorbisDecoder.cpp @@ -5,6 +5,7 @@ #include "OggVorbisDecoder.h" #include "Engine/Core/Log.h" #include "Engine/Core/Math/Math.h" +#include "Engine/Profiler/ProfilerCPU.h" #include "Engine/Serialization/MemoryReadStream.h" #include @@ -42,10 +43,25 @@ long oggTell(void* data) return static_cast(decoderData->Stream->GetPosition() - decoderData->Offset); } +OggVorbisDecoder::OggVorbisDecoder() +{ + Stream = nullptr; + Offset = 0; + ChannelCount = 0; + OggVorbisFile.datasource = nullptr; +} + +OggVorbisDecoder::~OggVorbisDecoder() +{ + if (OggVorbisFile.datasource != nullptr) + ov_clear(&OggVorbisFile); +} + bool OggVorbisDecoder::Open(ReadStream* stream, AudioDataInfo& info, uint32 offset) { if (stream == nullptr) return false; + PROFILE_CPU(); stream->SetPosition(offset); Stream = stream; @@ -71,11 +87,13 @@ bool OggVorbisDecoder::Open(ReadStream* stream, AudioDataInfo& info, uint32 offs void OggVorbisDecoder::Seek(uint32 offset) { + PROFILE_CPU(); ov_pcm_seek(&OggVorbisFile, offset / ChannelCount); } void OggVorbisDecoder::Read(byte* samples, uint32 numSamples) { + PROFILE_CPU(); uint32 numReadSamples = 0; while (numReadSamples < numSamples) { diff --git a/Source/Engine/Tools/AudioTool/OggVorbisDecoder.h b/Source/Engine/Tools/AudioTool/OggVorbisDecoder.h index 23dce6cbb..32b41c872 100644 --- a/Source/Engine/Tools/AudioTool/OggVorbisDecoder.h +++ b/Source/Engine/Tools/AudioTool/OggVorbisDecoder.h @@ -15,36 +15,23 @@ class OggVorbisDecoder : public AudioDecoder { public: - ReadStream* Stream; uint32 Offset; uint32 ChannelCount; OggVorbis_File OggVorbisFile; public: - /// /// Initializes a new instance of the class. /// - OggVorbisDecoder() - { - Stream = nullptr; - Offset = 0; - ChannelCount = 0; - OggVorbisFile.datasource = nullptr; - } + OggVorbisDecoder(); /// /// Finalizes an instance of the class. /// - ~OggVorbisDecoder() - { - if (OggVorbisFile.datasource != nullptr) - ov_clear(&OggVorbisFile); - } + ~OggVorbisDecoder(); public: - // [AudioDecoder] bool Open(ReadStream* stream, AudioDataInfo& info, uint32 offset = 0) override; void Seek(uint32 offset) override; From 80d19a002f1e458161509688d61188a2efe20dd7 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 11 Aug 2025 18:42:55 +0200 Subject: [PATCH 185/211] Optimize Audio streaming to reduce lock contention --- Source/Engine/Audio/AudioClip.cpp | 54 ++++++++++++++++++------------- Source/Engine/Audio/AudioClip.h | 15 ++------- 2 files changed, 34 insertions(+), 35 deletions(-) diff --git a/Source/Engine/Audio/AudioClip.cpp b/Source/Engine/Audio/AudioClip.cpp index 0b8e40c81..336ddb7bd 100644 --- a/Source/Engine/Audio/AudioClip.cpp +++ b/Source/Engine/Audio/AudioClip.cpp @@ -18,47 +18,56 @@ REGISTER_BINARY_ASSET_WITH_UPGRADER(AudioClip, "FlaxEngine.AudioClip", AudioClipUpgrader, false); +AudioClip::StreamingTask::StreamingTask(AudioClip* asset) + : _asset(asset) + , _dataLock(asset->Storage->Lock()) +{ +} + +bool AudioClip::StreamingTask::HasReference(Object* resource) const +{ + return _asset == resource; +} + bool AudioClip::StreamingTask::Run() { PROFILE_CPU_NAMED("AudioStreaming"); PROFILE_MEM(Audio); - AssetReference ref = _asset.Get(); - if (ref == nullptr || AudioBackend::Instance == nullptr) + AssetReference clip = _asset.Get(); + if (clip == nullptr || AudioBackend::Instance == nullptr) return true; #if TRACY_ENABLE - const StringView name(ref->GetPath()); + const StringView name(clip->GetPath()); ZoneName(*name, name.Length()); #endif - ScopeLock lock(ref->Locker); - const auto& queue = ref->StreamingQueue; - if (queue.Count() == 0) - return false; - auto clip = ref.Get(); - // Update the buffers + // Process the loading queue (hold the asset lock) + clip->Locker.Lock(); + const auto& queue = clip->StreamingQueue; + Array> loadQueue; for (int32 i = 0; i < queue.Count(); i++) { - const auto idx = queue[i]; + const int32 idx = queue[i]; uint32& bufferID = clip->Buffers[idx]; if (bufferID == 0) { - bufferID = AudioBackend::Buffer::Create(); + // Load buffers outside the asset lock to prevent lock contention + loadQueue.Add(idx); } else { - // Release unused data + // Release unused buffer AudioBackend::Buffer::Delete(bufferID); bufferID = 0; } } + clip->Locker.Unlock(); // Load missing buffers data (from asset chunks) - for (int32 i = 0; i < queue.Count(); i++) + for (int32 i = 0; i < loadQueue.Count(); i++) { - if (clip->WriteBuffer(queue[i])) - { + if (clip->WriteBuffer(loadQueue[i])) return true; - } } // Update the sources @@ -419,11 +428,6 @@ void AudioClip::unload(bool isReloading) bool AudioClip::WriteBuffer(int32 chunkIndex) { - // Ignore if buffer is not created - const uint32 bufferID = Buffers[chunkIndex]; - if (bufferID == 0) - return false; - // Ensure audio backend exists if (AudioBackend::Instance == nullptr) return true; @@ -485,7 +489,13 @@ bool AudioClip::WriteBuffer(int32 chunkIndex) data = Span(tmp2.Get(), tmp2.Count()); } - // Write samples to the audio buffer + // Write samples to the audio buffer (create one if missing) + Locker.Lock(); // StreamingTask loads buffers without lock so do it here + uint32& bufferID = Buffers[chunkIndex]; + if (bufferID == 0) + bufferID = AudioBackend::Buffer::Create(); AudioBackend::Buffer::Write(bufferID, data.Get(), info); + Locker.Unlock(); + return false; } diff --git a/Source/Engine/Audio/AudioClip.h b/Source/Engine/Audio/AudioClip.h index 667a47178..d35bd18fc 100644 --- a/Source/Engine/Audio/AudioClip.h +++ b/Source/Engine/Audio/AudioClip.h @@ -44,22 +44,11 @@ public: FlaxStorage::LockData _dataLock; public: - /// - /// Init - /// - /// Parent asset - StreamingTask(AudioClip* asset) - : _asset(asset) - , _dataLock(asset->Storage->Lock()) - { - } + StreamingTask(AudioClip* asset); public: // [ThreadPoolTask] - bool HasReference(Object* resource) const override - { - return _asset == resource; - } + bool HasReference(Object* resource) const override; protected: // [ThreadPoolTask] From 1bedfd3adf0baf97d6cd32af9910ce5707208f53 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 11 Aug 2025 19:00:38 +0200 Subject: [PATCH 186/211] Disable auto-GC every 60 ticks in game --- Source/Engine/Scripting/Scripting.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Engine/Scripting/Scripting.cpp b/Source/Engine/Scripting/Scripting.cpp index addd3bf5f..d1203a1f3 100644 --- a/Source/Engine/Scripting/Scripting.cpp +++ b/Source/Engine/Scripting/Scripting.cpp @@ -245,7 +245,7 @@ void ScriptingService::Update() PROFILE_CPU_NAMED("Scripting::Update"); INVOKE_EVENT(Update); -#ifdef USE_NETCORE +#if defined(USE_NETCORE) && !USE_EDITOR // Force GC to run in background periodically to avoid large blocking collections causing hitches if (Time::Update.TicksCount % 60 == 0) { From 7fcf6f9c977eca35d561cd3c42cb34c8e40fa46f Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 11 Aug 2025 23:47:48 +0200 Subject: [PATCH 187/211] Add various improvements --- Source/Engine/Animations/Animations.cpp | 2 ++ .../Content/Storage/ContentStorageManager.cpp | 15 ++++++++------- Source/Engine/Core/ObjectsRemovalService.cpp | 3 ++- Source/Engine/Level/Scene/SceneRendering.cpp | 4 ++-- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/Source/Engine/Animations/Animations.cpp b/Source/Engine/Animations/Animations.cpp index cc3be45b0..d2708edad 100644 --- a/Source/Engine/Animations/Animations.cpp +++ b/Source/Engine/Animations/Animations.cpp @@ -171,10 +171,12 @@ void AnimationsSystem::PostExecute(TaskGraph* graph) void Animations::AddToUpdate(AnimatedModel* obj) { + ConcurrentSystemLocker::WriteScope lock(SystemLocker, true); AnimationManagerInstance.UpdateList.Add(obj); } void Animations::RemoveFromUpdate(AnimatedModel* obj) { + ConcurrentSystemLocker::WriteScope lock(SystemLocker, true); AnimationManagerInstance.UpdateList.Remove(obj); } diff --git a/Source/Engine/Content/Storage/ContentStorageManager.cpp b/Source/Engine/Content/Storage/ContentStorageManager.cpp index f94235664..80744ae45 100644 --- a/Source/Engine/Content/Storage/ContentStorageManager.cpp +++ b/Source/Engine/Content/Storage/ContentStorageManager.cpp @@ -15,14 +15,9 @@ namespace { CriticalSection Locker; -#if USE_EDITOR - Array Files(1024); - Array Packages; -#else Array Files; - Array Packages(64); -#endif - Dictionary StorageMap(2048); + Array Packages; + Dictionary StorageMap; } class ContentStorageService : public EngineService @@ -231,6 +226,12 @@ void ContentStorageManager::GetStorage(Array& result) bool ContentStorageService::Init() { +#if USE_EDITOR + Files.EnsureCapacity(1024); +#else + Packages.EnsureCapacity(64); +#endif + StorageMap.EnsureCapacity(2048); System = New(); Engine::UpdateGraph->AddSystem(System); return false; diff --git a/Source/Engine/Core/ObjectsRemovalService.cpp b/Source/Engine/Core/ObjectsRemovalService.cpp index 88f054a30..f8eb1d78b 100644 --- a/Source/Engine/Core/ObjectsRemovalService.cpp +++ b/Source/Engine/Core/ObjectsRemovalService.cpp @@ -19,7 +19,7 @@ namespace CriticalSection PoolLocker; double LastUpdate; float LastUpdateGameTime; - Dictionary Pool(8192); + Dictionary Pool; uint64 PoolCounter = 0; } @@ -114,6 +114,7 @@ void ObjectsRemovalService::Flush(float dt, float gameDelta) bool ObjectsRemoval::Init() { + Pool.EnsureCapacity(8192); LastUpdate = Platform::GetTimeSeconds(); LastUpdateGameTime = 0; return false; diff --git a/Source/Engine/Level/Scene/SceneRendering.cpp b/Source/Engine/Level/Scene/SceneRendering.cpp index 18f631833..01ac689dc 100644 --- a/Source/Engine/Level/Scene/SceneRendering.cpp +++ b/Source/Engine/Level/Scene/SceneRendering.cpp @@ -191,7 +191,7 @@ void SceneRendering::UpdateActor(Actor* a, int32& key, ISceneRenderingListener:: const int32 category = a->_drawCategory; ConcurrentSystemLocker::ReadScope lock(Locker); // Read-access only as list doesn't get resized (like Add/Remove do) so allow updating actors from different threads at once auto& list = Actors[category]; - if (list.Count() <= key) // Ignore invalid key softly + if (list.Count() <= key || key < 0) // Ignore invalid key softly return; auto& e = list[key]; if (e.Actor == a) @@ -211,7 +211,7 @@ void SceneRendering::RemoveActor(Actor* a, int32& key) const int32 category = a->_drawCategory; ConcurrentSystemLocker::WriteScope lock(Locker, true); auto& list = Actors[category]; - if (list.Count() > key) // Ignore invalid key softly (eg. list after batch clear during scene unload) + if (list.Count() > key || key < 0) // Ignore invalid key softly (eg. list after batch clear during scene unload) { auto& e = list.Get()[key]; if (e.Actor == a) From 9cf9fae453272eab44257805ca7b1ad9b4dbc70c Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 12 Aug 2025 00:09:54 +0200 Subject: [PATCH 188/211] Add Vulkan device cache customization per-platform and reuse code --- .../GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp | 92 +++++++++---------- .../Vulkan/VulkanPlatformBase.h | 3 + 2 files changed, 44 insertions(+), 51 deletions(-) diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp index 2820d5777..8c91501d7 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp @@ -1498,17 +1498,28 @@ PixelFormat GPUDeviceVulkan::GetClosestSupportedPixelFormat(PixelFormat format, return format; } -#if VULKAN_USE_PIPELINE_CACHE - -void GetPipelineCachePath(String& path) +bool VulkanPlatformBase::LoadCache(const String& folder, const Char* filename, Array& data) { -#if USE_EDITOR - path = Globals::ProjectCacheFolder / TEXT("VulkanPipeline.cache"); -#else - path = Globals::ProductLocalFolder / TEXT("VulkanPipeline.cache"); -#endif + String path = folder / filename; + if (FileSystem::FileExists(path)) + { + LOG(Info, "Loading Vulkan cache from file '{}'", path); + return File::ReadAllBytes(path, data); + } + return false; } +bool VulkanPlatformBase::SaveCache(const String& folder, const Char* filename, const Array& data) +{ + String path = folder / filename; + LOG(Info, "Saving Vulkan cache to file '{}' ({} kB)", path, data.Count() / 1024); + return File::WriteAllBytes(path, data); +} + +#if USE_EDITOR +#define CACHE_FOLDER Globals::ProjectCacheFolder +#else +#define CACHE_FOLDER Globals::ProductLocalFolder #endif bool GPUDeviceVulkan::SavePipelineCache() @@ -1516,6 +1527,8 @@ bool GPUDeviceVulkan::SavePipelineCache() #if VULKAN_USE_PIPELINE_CACHE if (PipelineCache == VK_NULL_HANDLE || !vkGetPipelineCacheData) return false; + PROFILE_CPU(); + PROFILE_MEM(Graphics); // Query data size size_t dataSize = 0; @@ -1531,9 +1544,7 @@ bool GPUDeviceVulkan::SavePipelineCache() LOG_VULKAN_RESULT_WITH_RETURN(result); // Save data - String path; - GetPipelineCachePath(path); - return File::WriteAllBytes(path, data); + return VulkanPlatform::SaveCache(CACHE_FOLDER, TEXT("VulkanPipeline.cache"), data); #else return false; #endif @@ -1541,19 +1552,12 @@ bool GPUDeviceVulkan::SavePipelineCache() #if VULKAN_USE_VALIDATION_CACHE -void GetValidationCachePath(String& path) -{ -#if USE_EDITOR - path = Globals::ProjectCacheFolder / TEXT("VulkanValidation.cache"); -#else - path = Globals::ProductLocalFolder / TEXT("VulkanValidation.cache"); -#endif -} - bool GPUDeviceVulkan::SaveValidationCache() { if (ValidationCache == VK_NULL_HANDLE || !vkGetValidationCacheDataEXT) return false; + PROFILE_CPU(); + PROFILE_MEM(Graphics); // Query data size size_t dataSize = 0; @@ -1569,9 +1573,7 @@ bool GPUDeviceVulkan::SaveValidationCache() LOG_VULKAN_RESULT_WITH_RETURN(result); // Save data - String path; - GetValidationCachePath(path); - return File::WriteAllBytes(path, data); + return VulkanPlatform::SaveCache(CACHE_FOLDER, TEXT("VulkanValidation.cache"), data); } #endif @@ -1987,13 +1989,7 @@ bool GPUDeviceVulkan::Init() if (vkCreatePipelineCache) { Array data; - String path; - GetPipelineCachePath(path); - if (FileSystem::FileExists(path)) - { - LOG(Info, "Trying to load Vulkan pipeline cache file {0}", path); - File::ReadAllBytes(path, data); - } + VulkanPlatform::LoadCache(CACHE_FOLDER, TEXT("VulkanPipeline.cache"), data); VkPipelineCacheCreateInfo pipelineCacheCreateInfo; RenderToolsVulkan::ZeroStruct(pipelineCacheCreateInfo, VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO); pipelineCacheCreateInfo.initialDataSize = data.Count(); @@ -2006,36 +2002,30 @@ bool GPUDeviceVulkan::Init() if (OptionalDeviceExtensions.HasEXTValidationCache && vkCreateValidationCacheEXT && vkDestroyValidationCacheEXT) { Array data; - String path; - GetValidationCachePath(path); - if (FileSystem::FileExists(path)) + VulkanPlatform::LoadCache(CACHE_FOLDER, TEXT("VulkanValidation.cache"), data); + if (data.HasItems()) { - LOG(Info, "Trying to load Vulkan validation cache file {0}", path); - File::ReadAllBytes(path, data); - if (data.HasItems()) + int32* dataPtr = (int32*)data.Get(); + if (*dataPtr > 0) { - int32* dataPtr = (int32*)data.Get(); - if (*dataPtr > 0) + const int32 cacheSize = *dataPtr++; + const int32 cacheVersion = *dataPtr++; + const int32 cacheVersionExpected = VK_PIPELINE_CACHE_HEADER_VERSION_ONE; + if (cacheVersion == cacheVersionExpected) { - const int32 cacheSize = *dataPtr++; - const int32 cacheVersion = *dataPtr++; - const int32 cacheVersionExpected = VK_PIPELINE_CACHE_HEADER_VERSION_ONE; - if (cacheVersion == cacheVersionExpected) - { - dataPtr += VK_UUID_SIZE / sizeof(int32); - } - else - { - LOG(Warning, "Bad validation cache file, version: {0}, expected: {1}", cacheVersion, cacheVersionExpected); - data.Clear(); - } + dataPtr += VK_UUID_SIZE / sizeof(int32); } else { - LOG(Warning, "Bad validation cache file, header size: {0}", *dataPtr); + LOG(Warning, "Bad validation cache file, version: {0}, expected: {1}", cacheVersion, cacheVersionExpected); data.Clear(); } } + else + { + LOG(Warning, "Bad validation cache file, header size: {0}", *dataPtr); + data.Clear(); + } } VkValidationCacheCreateInfoEXT validationCreateInfo; RenderToolsVulkan::ZeroStruct(validationCreateInfo, VK_STRUCTURE_TYPE_VALIDATION_CACHE_CREATE_INFO_EXT); diff --git a/Source/Engine/GraphicsDevice/Vulkan/VulkanPlatformBase.h b/Source/Engine/GraphicsDevice/Vulkan/VulkanPlatformBase.h index 4cfbd2716..2122b3709 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/VulkanPlatformBase.h +++ b/Source/Engine/GraphicsDevice/Vulkan/VulkanPlatformBase.h @@ -49,6 +49,9 @@ public: featuresToEnable.sparseResidency8Samples = VK_FALSE; featuresToEnable.sparseResidencyAliased = VK_FALSE; } + + static bool LoadCache(const String& folder, const Char* filename, Array& data); + static bool SaveCache(const String& folder, const Char* filename, const Array& data); }; #endif From b4d501cd6a8c82a70f8e0bc217952a559338bb9e Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 12 Aug 2025 10:21:51 +0200 Subject: [PATCH 189/211] Add `OcclusionQueryPools` to Vulkan and move code to cpp file --- .../GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp | 19 +++++++++++++ .../GraphicsDevice/Vulkan/GPUDeviceVulkan.h | 28 ++----------------- .../Vulkan/GPUTimerQueryVulkan.cpp | 2 +- 3 files changed, 22 insertions(+), 27 deletions(-) diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp index 8c91501d7..93819cfb5 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp @@ -1349,6 +1349,24 @@ GPUDeviceVulkan::~GPUDeviceVulkan() GPUDeviceVulkan::Dispose(); } +BufferedQueryPoolVulkan* GPUDeviceVulkan::FindAvailableQueryPool(VkQueryType queryType) +{ + auto& pools = queryType == VK_QUERY_TYPE_OCCLUSION ? OcclusionQueryPools : TimestampQueryPools; + + // Try to use pool with available space inside + for (int32 i = 0; i < pools.Count(); i++) + { + auto pool = pools.Get()[i]; + if (pool->HasRoom()) + return pool; + } + + // Create new pool + const auto pool = New(this, queryType == VK_QUERY_TYPE_OCCLUSION ? 4096 : 1024, queryType); + pools.Add(pool); + return pool; +} + RenderPassVulkan* GPUDeviceVulkan::GetOrCreateRenderPass(RenderTargetLayoutVulkan& layout) { RenderPassVulkan* renderPass; @@ -2075,6 +2093,7 @@ void GPUDeviceVulkan::Dispose() HelperResources.Dispose(); StagingManager.Dispose(); TimestampQueryPools.ClearDelete(); + OcclusionQueryPools.ClearDelete(); SAFE_DELETE_GPU_RESOURCE(UniformBufferUploader); Delete(DescriptorPoolsManager); SAFE_DELETE(MainContext); diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.h b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.h index 5e7fa6aa1..b593315d8 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.h +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.h @@ -531,37 +531,13 @@ public: VkPhysicalDeviceFeatures PhysicalDeviceFeatures; Array TimestampQueryPools; + Array OcclusionQueryPools; #if VULKAN_RESET_QUERY_POOLS Array QueriesToReset; #endif - inline BufferedQueryPoolVulkan* FindAvailableQueryPool(Array& pools, VkQueryType queryType) - { - // Try to use pool with available space inside - for (int32 i = 0; i < pools.Count(); i++) - { - auto pool = pools.Get()[i]; - if (pool->HasRoom()) - return pool; - } - - // Create new pool - enum - { - NUM_OCCLUSION_QUERIES_PER_POOL = 4096, - NUM_TIMESTAMP_QUERIES_PER_POOL = 1024, - }; - const auto pool = New(this, queryType == VK_QUERY_TYPE_OCCLUSION ? NUM_OCCLUSION_QUERIES_PER_POOL : NUM_TIMESTAMP_QUERIES_PER_POOL, queryType); - pools.Add(pool); - return pool; - } - - inline BufferedQueryPoolVulkan* FindAvailableTimestampQueryPool() - { - return FindAvailableQueryPool(TimestampQueryPools, VK_QUERY_TYPE_TIMESTAMP); - } - + BufferedQueryPoolVulkan* FindAvailableQueryPool(VkQueryType queryType); RenderPassVulkan* GetOrCreateRenderPass(RenderTargetLayoutVulkan& layout); FramebufferVulkan* GetOrCreateFramebuffer(FramebufferVulkan::Key& key, VkExtent2D& extent, uint32 layers); PipelineLayoutVulkan* GetOrCreateLayout(DescriptorSetLayoutInfoVulkan& key); diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUTimerQueryVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUTimerQueryVulkan.cpp index 19c90b447..2dd3b07d5 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUTimerQueryVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUTimerQueryVulkan.cpp @@ -58,7 +58,7 @@ bool GPUTimerQueryVulkan::GetResult(Query& query) void GPUTimerQueryVulkan::WriteTimestamp(CmdBufferVulkan* cmdBuffer, Query& query, VkPipelineStageFlagBits stage) const { - auto pool = _device->FindAvailableTimestampQueryPool(); + auto pool = _device->FindAvailableQueryPool(VK_QUERY_TYPE_TIMESTAMP); uint32 index; if (pool->AcquireQuery(cmdBuffer, index)) { From cdff7708fb8e5b47060a36775b7075fdcf4ee9a0 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 12 Aug 2025 10:25:02 +0200 Subject: [PATCH 190/211] Add automatic periodic Vulkan Pipeline State Cache serialization --- .../GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp | 62 ++++++++++++++----- .../GraphicsDevice/Vulkan/GPUDeviceVulkan.h | 9 ++- .../Vulkan/GPUPipelineStateVulkan.cpp | 2 + 3 files changed, 58 insertions(+), 15 deletions(-) diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp index 93819cfb5..9ff9ab77b 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp @@ -36,6 +36,7 @@ #include "Engine/Profiler/ProfilerCPU.h" #include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Threading/Threading.h" +#include "Engine/Threading/ThreadPoolTask.h" #include "Engine/Scripting/Enums.h" #if !USE_EDITOR && (PLATFORM_WINDOWS || PLATFORM_LINUX) @@ -1540,29 +1541,50 @@ bool VulkanPlatformBase::SaveCache(const String& folder, const Char* filename, c #define CACHE_FOLDER Globals::ProductLocalFolder #endif -bool GPUDeviceVulkan::SavePipelineCache() +#if VULKAN_USE_PIPELINE_CACHE +bool SavePipelineCacheAsync() +{ + PROFILE_CPU(); + ((GPUDeviceVulkan*)GPUDevice::Instance)->SavePipelineCache(false, true); + return false; +} +#endif + +bool GPUDeviceVulkan::SavePipelineCache(bool async, bool cached) { #if VULKAN_USE_PIPELINE_CACHE - if (PipelineCache == VK_NULL_HANDLE || !vkGetPipelineCacheData) + if (PipelineCache == VK_NULL_HANDLE || !vkGetPipelineCacheData || PipelineCacheUsage == 0) return false; PROFILE_CPU(); PROFILE_MEM(Graphics); - // Query data size - size_t dataSize = 0; - VkResult result = vkGetPipelineCacheData(Device, PipelineCache, &dataSize, nullptr); - LOG_VULKAN_RESULT_WITH_RETURN(result); - if (dataSize <= 0) - return false; + if (!cached) + { + // Query data size + size_t dataSize = 0; + VkResult result = vkGetPipelineCacheData(Device, PipelineCache, &dataSize, nullptr); + LOG_VULKAN_RESULT_WITH_RETURN(result); + if (dataSize <= 0) + return false; - // Query data - Array data; - data.Resize((int32)dataSize); - result = vkGetPipelineCacheData(Device, PipelineCache, &dataSize, data.Get()); - LOG_VULKAN_RESULT_WITH_RETURN(result); + // Query data + PipelineCacheSaveData.Resize((int32)dataSize); + result = vkGetPipelineCacheData(Device, PipelineCache, &dataSize, PipelineCacheSaveData.Get()); + LOG_VULKAN_RESULT_WITH_RETURN(result); + } + + if (async) + { + // Kick off the async job that will save the cached bytes + Function action(SavePipelineCacheAsync); + return Task::StartNew(action) != nullptr; + } + + // Reset usage counter + PipelineCacheUsage = 0; // Save data - return VulkanPlatform::SaveCache(CACHE_FOLDER, TEXT("VulkanPipeline.cache"), data); + return VulkanPlatform::SaveCache(CACHE_FOLDER, TEXT("VulkanPipeline.cache"), PipelineCacheSaveData); #else return false; #endif @@ -2014,6 +2036,7 @@ bool GPUDeviceVulkan::Init() pipelineCacheCreateInfo.pInitialData = data.Count() > 0 ? data.Get() : nullptr; const VkResult result = vkCreatePipelineCache(Device, &pipelineCacheCreateInfo, nullptr, &PipelineCache); LOG_VULKAN_RESULT(result); + PipelineCacheSaveTime = Platform::GetTimeSeconds(); } #endif #if VULKAN_USE_VALIDATION_CACHE @@ -2067,6 +2090,17 @@ void GPUDeviceVulkan::DrawBegin() DeferredDeletionQueue.ReleaseResources(); StagingManager.ProcessPendingFree(); DescriptorPoolsManager->GC(); + +#if VULKAN_USE_PIPELINE_CACHE + // Serialize pipeline cache periodically for less PSO hitches on next app run + const double time = Platform::GetTimeSeconds(); + const double saveTimeFrequency = Engine::FrameCount < 60 * Math::Clamp(Engine::GetFramesPerSecond(), 30, 60) ? 10 : 180; // More frequent saves during the first 1min of gameplay + if (Engine::HasFocus && time - PipelineCacheSaveTime >= saveTimeFrequency) + { + SavePipelineCache(true); + PipelineCacheSaveTime = time; + } +#endif } void GPUDeviceVulkan::Dispose() diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.h b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.h index b593315d8..a30dbda1c 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.h +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.h @@ -502,6 +502,11 @@ public: /// The pipeline cache. /// VkPipelineCache PipelineCache = VK_NULL_HANDLE; +#if VULKAN_USE_PIPELINE_CACHE + uint32 PipelineCacheUsage = 0; + double PipelineCacheSaveTime = 0.0f; + Array PipelineCacheSaveData; +#endif #if VULKAN_USE_VALIDATION_CACHE /// @@ -562,7 +567,9 @@ public: /// /// Saves the pipeline cache. /// - bool SavePipelineCache(); + /// Enables async writing to file to reduce stuttering of main thread. + /// Uses cached results from the last call to vkGetPipelineCacheData, used to just save cached data when running in async. + bool SavePipelineCache(bool async = false, bool cached = false); #if VK_EXT_validation_cache /// diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUPipelineStateVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUPipelineStateVulkan.cpp index 927e4fbca..ef7520f68 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUPipelineStateVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUPipelineStateVulkan.cpp @@ -113,6 +113,7 @@ ComputePipelineStateVulkan* GPUShaderProgramCSVulkan::GetOrCreateState() // Create pipeline object VkPipeline pipeline; VkResult result = vkCreateComputePipelines(_device->Device, _device->PipelineCache, 1, &desc, nullptr, &pipeline); + _device->PipelineCacheUsage++; LOG_VULKAN_RESULT(result); if (result != VK_SUCCESS) return nullptr; @@ -313,6 +314,7 @@ VkPipeline GPUPipelineStateVulkan::GetState(RenderPassVulkan* renderPass, GPUVer auto depthWrite = _descDepthStencil.depthWriteEnable; _descDepthStencil.depthWriteEnable &= renderPass->CanDepthWrite ? 1 : 0; const VkResult result = vkCreateGraphicsPipelines(_device->Device, _device->PipelineCache, 1, &_desc, nullptr, &pipeline); + _device->PipelineCacheUsage++; _descDepthStencil.depthWriteEnable = depthWrite; LOG_VULKAN_RESULT(result); if (result != VK_SUCCESS) From 278dead0bd345226c660a64d9dd0dac2dc982c6f Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 12 Aug 2025 12:34:32 +0200 Subject: [PATCH 191/211] Add `PLATFORM_CONSOLE` define and fix desktop to be as Windows/UWP, not Win32 --- Source/Engine/Graphics/Graphics.cpp | 2 +- Source/Engine/Platform/Defines.h | 3 +++ Source/Engine/Platform/UWP/UWPDefines.h | 1 + Source/Engine/Platform/Win32/Win32Defines.h | 1 - Source/Engine/Platform/Windows/WindowsDefines.h | 1 + 5 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Graphics/Graphics.cpp b/Source/Engine/Graphics/Graphics.cpp index ec9cd5067..dd3344f5d 100644 --- a/Source/Engine/Graphics/Graphics.cpp +++ b/Source/Engine/Graphics/Graphics.cpp @@ -202,7 +202,7 @@ bool GraphicsService::Init() #endif ) { -#if !USE_EDITOR && BUILD_RELEASE && !PLATFORM_LINUX // IsDebugToolAttached seams to be enabled on many Linux machines via VK_EXT_tooling_info +#if !USE_EDITOR && BUILD_RELEASE && !PLATFORM_LINUX && !PLATFORM_CONSOLE // IsDebugToolAttached seams to be enabled on many Linux machines via VK_EXT_tooling_info // Block graphics debugging to protect contents Platform::Fatal(TEXT("Graphics debugger attached.")); #endif diff --git a/Source/Engine/Platform/Defines.h b/Source/Engine/Platform/Defines.h index 29f64052d..de56fdcb6 100644 --- a/Source/Engine/Platform/Defines.h +++ b/Source/Engine/Platform/Defines.h @@ -174,6 +174,9 @@ API_ENUM() enum class ArchitectureType #ifndef PLATFORM_DESKTOP #define PLATFORM_DESKTOP 0 #endif +#ifndef PLATFORM_CONSOLE +#define PLATFORM_CONSOLE 0 +#endif #ifndef PLATFORM_ARCH_X64 #define PLATFORM_ARCH_X64 0 #endif diff --git a/Source/Engine/Platform/UWP/UWPDefines.h b/Source/Engine/Platform/UWP/UWPDefines.h index a989bcaac..a5a6cabc8 100644 --- a/Source/Engine/Platform/UWP/UWPDefines.h +++ b/Source/Engine/Platform/UWP/UWPDefines.h @@ -8,6 +8,7 @@ // Platform description #define PLATFORM_TYPE PlatformType::UWP +#define PLATFORM_DESKTOP 1 // Use AOT for Mono #define USE_MONO_AOT 1 diff --git a/Source/Engine/Platform/Win32/Win32Defines.h b/Source/Engine/Platform/Win32/Win32Defines.h index c6a0438fa..758cfde20 100644 --- a/Source/Engine/Platform/Win32/Win32Defines.h +++ b/Source/Engine/Platform/Win32/Win32Defines.h @@ -5,7 +5,6 @@ #if PLATFORM_WIN32 // Platform description -#define PLATFORM_DESKTOP 1 #if defined(WIN64) && defined(_M_X64) #define PLATFORM_64BITS 1 #define PLATFORM_ARCH_X64 1 diff --git a/Source/Engine/Platform/Windows/WindowsDefines.h b/Source/Engine/Platform/Windows/WindowsDefines.h index 35180ba37..7db9538c4 100644 --- a/Source/Engine/Platform/Windows/WindowsDefines.h +++ b/Source/Engine/Platform/Windows/WindowsDefines.h @@ -9,5 +9,6 @@ // Platform description #define PLATFORM_TYPE PlatformType::Windows #define PLATFORM_HAS_HEADLESS_MODE 1 +#define PLATFORM_DESKTOP 1 #endif From cdb09847ecad7ba8c5a5be828ab7ac6b20b979a5 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 13 Aug 2025 21:30:18 +0200 Subject: [PATCH 192/211] Fix draw calls batching to ignore distance in opaque passes for better draw calls instancing #2271 --- Source/Engine/Renderer/RenderList.cpp | 95 +++++++++++++++++---------- Source/Engine/Renderer/RenderList.h | 7 +- Source/Engine/Renderer/Renderer.cpp | 2 +- 3 files changed, 66 insertions(+), 38 deletions(-) diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index c3989c253..e2477355e 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -514,45 +514,61 @@ void RenderList::Clear() Memory.Free(); } -struct PackedSortKey +// Sorting order: By Sort Order -> By Material -> By Geometry -> By Distance +PACK_STRUCT(struct PackedSortKey { - union - { - uint64 Data; + uint32 DistanceKey; + uint8 DrawKey; + uint16 BatchKey; + uint8 SortKey; +}); - PACK_BEGIN() +// Sorting order: By Sort Order -> By Material -> By Geometry -> By Distance +PACK_STRUCT(struct PackedSortKeyDistance +{ + uint8 DrawKey; + uint16 BatchKey; + uint32 DistanceKey; + uint8 SortKey; +}); - struct - { - // Sorting order: By Sort Order -> By Distance -> By Material -> By Geometry - uint8 DrawKey; - uint16 BatchKey; - uint32 DistanceKey; - uint8 SortKey; - } PACK_END(); - }; -}; +static_assert(sizeof(PackedSortKey) == sizeof(uint64), "Invalid sort key size"); +static_assert(sizeof(PackedSortKeyDistance) == sizeof(uint64), "Invalid sort key size"); -FORCE_INLINE void CalculateSortKey(const RenderContext& renderContext, DrawCall& drawCall, int8 sortOrder) +FORCE_INLINE void CalculateSortKey(const RenderContext& renderContext, DrawCall& drawCall, DrawPass drawModes, int8 sortOrder) { const Float3 planeNormal = renderContext.View.Direction; const float planePoint = -Float3::Dot(planeNormal, renderContext.View.Position); const float distance = Float3::Dot(planeNormal, drawCall.ObjectPosition) - planePoint; - PackedSortKey key; - key.DistanceKey = RenderTools::ComputeDistanceSortKey(distance); + uint32 distanceKey = RenderTools::ComputeDistanceSortKey(distance); uint32 batchKey = GetHash(drawCall.Material); IMaterial::InstancingHandler handler; if (drawCall.Material->CanUseInstancing(handler)) handler.GetHash(drawCall, batchKey); - key.BatchKey = (uint16)batchKey; uint32 drawKey = (uint32)(471 * drawCall.WorldDeterminantSign); drawKey = (drawKey * 397) ^ GetHash(drawCall.Geometry.VertexBuffers[0]); drawKey = (drawKey * 397) ^ GetHash(drawCall.Geometry.VertexBuffers[1]); drawKey = (drawKey * 397) ^ GetHash(drawCall.Geometry.VertexBuffers[2]); drawKey = (drawKey * 397) ^ GetHash(drawCall.Geometry.IndexBuffer); - key.DrawKey = (uint8)drawKey; - key.SortKey = (uint8)(sortOrder - MIN_int8); - drawCall.SortKey = key.Data; + if ((drawModes & DrawPass::Forward) != DrawPass::None) + { + // Distance takes precedence over batching efficiency + PackedSortKeyDistance key; + key.BatchKey = (uint16)batchKey; + key.DistanceKey = distanceKey; + key.DrawKey = (uint8)drawKey; + key.SortKey = (uint8)(sortOrder - MIN_int8); + drawCall.SortKey = *(uint64*)&key; + } + else + { + PackedSortKey key; + key.BatchKey = (uint16)batchKey; + key.DistanceKey = distanceKey; + key.DrawKey = (uint8)drawKey; + key.SortKey = (uint8)(sortOrder - MIN_int8); + drawCall.SortKey = *(uint64*)&key; + } } void RenderList::AddDrawCall(const RenderContext& renderContext, DrawPass drawModes, StaticFlags staticFlags, DrawCall& drawCall, bool receivesDecals, int8 sortOrder) @@ -564,7 +580,7 @@ void RenderList::AddDrawCall(const RenderContext& renderContext, DrawPass drawMo #endif // Append draw call data - CalculateSortKey(renderContext, drawCall, sortOrder); + CalculateSortKey(renderContext, drawCall, drawModes, sortOrder); const int32 index = DrawCalls.Add(drawCall); // Add draw call to proper draw lists @@ -603,7 +619,7 @@ void RenderList::AddDrawCall(const RenderContextBatch& renderContextBatch, DrawP const RenderContext& mainRenderContext = renderContextBatch.Contexts.Get()[0]; // Append draw call data - CalculateSortKey(mainRenderContext, drawCall, sortOrder); + CalculateSortKey(mainRenderContext, drawCall, drawModes, sortOrder); const int32 index = DrawCalls.Add(drawCall); // Add draw call to proper draw lists @@ -678,7 +694,7 @@ void RenderList::BuildObjectsBuffer() ZoneValue(ObjectBuffer.Data.Count() / 1024); // Objects Buffer size in kB } -void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseDistance, DrawCallsList& list, const RenderListBuffer& drawCalls, DrawPass pass, bool stable) +void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseDistance, DrawCallsList& list, const RenderListBuffer& drawCalls, DrawCallsListType listType, DrawPass pass) { PROFILE_CPU(); PROFILE_MEM(GraphicsCommands); @@ -698,14 +714,27 @@ void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseD // Setup sort keys if (reverseDistance) { - for (int32 i = 0; i < listSize; i++) + if (listType == DrawCallsListType::Forward) // Transparency uses distance over batching for correct draw order { - const DrawCall& drawCall = drawCallsData[listData[i]]; - PackedSortKey key; - key.Data = drawCall.SortKey; - key.DistanceKey ^= MAX_uint32; // Reverse depth - key.SortKey ^= MAX_uint8; // Reverse sort order - sortedKeys[i] = key.Data; + for (int32 i = 0; i < listSize; i++) + { + const DrawCall& drawCall = drawCallsData[listData[i]]; + PackedSortKeyDistance key = *(PackedSortKeyDistance*)&drawCall.SortKey; + key.DistanceKey ^= MAX_uint32; // Reverse depth + key.SortKey ^= MAX_uint8; // Reverse sort order + sortedKeys[i] = *(uint64*)&key; + } + } + else + { + for (int32 i = 0; i < listSize; i++) + { + const DrawCall& drawCall = drawCallsData[listData[i]]; + PackedSortKey key = *(PackedSortKey*)&drawCall.SortKey; + key.DistanceKey ^= MAX_uint32; // Reverse depth + key.SortKey ^= MAX_uint8; // Reverse sort order + sortedKeys[i] = *(uint64*)&key; + } } } else @@ -762,7 +791,7 @@ void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseD } // When using depth buffer draw calls are already almost ideally sorted by Radix Sort but transparency needs more stability to prevent flickering - if (stable) + if (listType == DrawCallsListType::Forward) { // Sort draw calls batches by depth Array sortingBatches; diff --git a/Source/Engine/Renderer/RenderList.h b/Source/Engine/Renderer/RenderList.h index ae02b36a9..af1e1f44a 100644 --- a/Source/Engine/Renderer/RenderList.h +++ b/Source/Engine/Renderer/RenderList.h @@ -569,8 +569,7 @@ public: /// The draw pass (optional). API_FUNCTION() FORCE_INLINE void SortDrawCalls(API_PARAM(Ref) const RenderContext& renderContext, bool reverseDistance, DrawCallsListType listType, DrawPass pass = DrawPass::All) { - const bool stable = listType == DrawCallsListType::Forward; - SortDrawCalls(renderContext, reverseDistance, DrawCallsLists[(int32)listType], DrawCalls, pass, stable); + SortDrawCalls(renderContext, reverseDistance, DrawCallsLists[(int32)listType], DrawCalls, listType); } /// @@ -580,9 +579,9 @@ public: /// If set to true reverse draw call distance to the view. Results in back to front sorting. /// The collected draw calls indices list. /// The collected draw calls list. + /// The hint about draw calls list type (optional). /// The draw pass (optional). - /// If set to true draw batches will be additionally sorted to prevent any flickering, otherwise Depth Buffer will smooth out any non-stability in sorting. - void SortDrawCalls(const RenderContext& renderContext, bool reverseDistance, DrawCallsList& list, const RenderListBuffer& drawCalls, DrawPass pass = DrawPass::All, bool stable = false); + void SortDrawCalls(const RenderContext& renderContext, bool reverseDistance, DrawCallsList& list, const RenderListBuffer& drawCalls, DrawCallsListType listType = DrawCallsListType::GBuffer, DrawPass pass = DrawPass::All); /// /// Executes the collected draw calls. diff --git a/Source/Engine/Renderer/Renderer.cpp b/Source/Engine/Renderer/Renderer.cpp index c775b50e1..866377641 100644 --- a/Source/Engine/Renderer/Renderer.cpp +++ b/Source/Engine/Renderer/Renderer.cpp @@ -507,7 +507,7 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont // Shadow context sorting auto& shadowContext = RenderContextBatch.Contexts[index - ARRAY_COUNT(MainContextSorting)]; shadowContext.List->SortDrawCalls(shadowContext, false, DrawCallsListType::Depth, DrawPass::Depth); - shadowContext.List->SortDrawCalls(shadowContext, false, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls, DrawPass::Depth); + shadowContext.List->SortDrawCalls(shadowContext, false, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls, DrawCallsListType::Depth, DrawPass::Depth); } } } processor = { renderContextBatch }; From 26f4bcbc25aef5d1664452d2a40a84fb52d67fc3 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 25 Aug 2025 10:09:25 +0200 Subject: [PATCH 193/211] Fix UI Control Reference picker to support base/inherited types --- Source/Editor/CustomEditors/Editors/ControlReferenceEditor.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Editor/CustomEditors/Editors/ControlReferenceEditor.cs b/Source/Editor/CustomEditors/Editors/ControlReferenceEditor.cs index b31b40e53..e742c8b02 100644 --- a/Source/Editor/CustomEditors/Editors/ControlReferenceEditor.cs +++ b/Source/Editor/CustomEditors/Editors/ControlReferenceEditor.cs @@ -22,7 +22,7 @@ internal class UIControlRefPickerControl : FlaxObjectRefPickerControl /// protected override bool IsValid(Object obj) { - return obj == null || (obj is UIControl control && control.Control.GetType() == ControlType); + return obj == null || (obj is UIControl control && ControlType.IsAssignableFrom(control.Control.GetType())); } } From 5a2555f845c9c56285c9234372fe54b104ea4429 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 2 Sep 2025 22:30:50 +0200 Subject: [PATCH 194/211] Improve cdb09847ecad7ba8c5a5be828ab7ac6b20b979a5 and 499ef51875b9febee667d8c4d9bc52e13d9a89bd to swap key bits during sorting #2271 --- Source/Engine/Renderer/RenderList.cpp | 46 +++++++++++---------------- 1 file changed, 19 insertions(+), 27 deletions(-) diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index e2477355e..6bd53ec87 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -535,7 +535,7 @@ PACK_STRUCT(struct PackedSortKeyDistance static_assert(sizeof(PackedSortKey) == sizeof(uint64), "Invalid sort key size"); static_assert(sizeof(PackedSortKeyDistance) == sizeof(uint64), "Invalid sort key size"); -FORCE_INLINE void CalculateSortKey(const RenderContext& renderContext, DrawCall& drawCall, DrawPass drawModes, int8 sortOrder) +FORCE_INLINE void CalculateSortKey(const RenderContext& renderContext, DrawCall& drawCall, int8 sortOrder) { const Float3 planeNormal = renderContext.View.Direction; const float planePoint = -Float3::Dot(planeNormal, renderContext.View.Position); @@ -550,25 +550,13 @@ FORCE_INLINE void CalculateSortKey(const RenderContext& renderContext, DrawCall& drawKey = (drawKey * 397) ^ GetHash(drawCall.Geometry.VertexBuffers[1]); drawKey = (drawKey * 397) ^ GetHash(drawCall.Geometry.VertexBuffers[2]); drawKey = (drawKey * 397) ^ GetHash(drawCall.Geometry.IndexBuffer); - if ((drawModes & DrawPass::Forward) != DrawPass::None) - { - // Distance takes precedence over batching efficiency - PackedSortKeyDistance key; - key.BatchKey = (uint16)batchKey; - key.DistanceKey = distanceKey; - key.DrawKey = (uint8)drawKey; - key.SortKey = (uint8)(sortOrder - MIN_int8); - drawCall.SortKey = *(uint64*)&key; - } - else - { - PackedSortKey key; - key.BatchKey = (uint16)batchKey; - key.DistanceKey = distanceKey; - key.DrawKey = (uint8)drawKey; - key.SortKey = (uint8)(sortOrder - MIN_int8); - drawCall.SortKey = *(uint64*)&key; - } + + PackedSortKey key; + key.BatchKey = (uint16)batchKey; + key.DistanceKey = distanceKey; + key.DrawKey = (uint8)drawKey; + key.SortKey = (uint8)(sortOrder - MIN_int8); + drawCall.SortKey = *(uint64*)&key; } void RenderList::AddDrawCall(const RenderContext& renderContext, DrawPass drawModes, StaticFlags staticFlags, DrawCall& drawCall, bool receivesDecals, int8 sortOrder) @@ -580,7 +568,7 @@ void RenderList::AddDrawCall(const RenderContext& renderContext, DrawPass drawMo #endif // Append draw call data - CalculateSortKey(renderContext, drawCall, drawModes, sortOrder); + CalculateSortKey(renderContext, drawCall, sortOrder); const int32 index = DrawCalls.Add(drawCall); // Add draw call to proper draw lists @@ -619,7 +607,7 @@ void RenderList::AddDrawCall(const RenderContextBatch& renderContextBatch, DrawP const RenderContext& mainRenderContext = renderContextBatch.Contexts.Get()[0]; // Append draw call data - CalculateSortKey(mainRenderContext, drawCall, drawModes, sortOrder); + CalculateSortKey(mainRenderContext, drawCall, sortOrder); const int32 index = DrawCalls.Add(drawCall); // Add draw call to proper draw lists @@ -714,15 +702,19 @@ void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseD // Setup sort keys if (reverseDistance) { - if (listType == DrawCallsListType::Forward) // Transparency uses distance over batching for correct draw order + if (listType == DrawCallsListType::Forward) { + // Transparency uses distance to take precedence over batching efficiency for correct draw order for (int32 i = 0; i < listSize; i++) { const DrawCall& drawCall = drawCallsData[listData[i]]; - PackedSortKeyDistance key = *(PackedSortKeyDistance*)&drawCall.SortKey; - key.DistanceKey ^= MAX_uint32; // Reverse depth - key.SortKey ^= MAX_uint8; // Reverse sort order - sortedKeys[i] = *(uint64*)&key; + PackedSortKey key = *(PackedSortKey*)&drawCall.SortKey; + PackedSortKeyDistance forwardKey; + forwardKey.BatchKey = key.BatchKey; + forwardKey.DistanceKey = key.DistanceKey ^ MAX_uint32; // Reverse depth + forwardKey.DrawKey = key.DrawKey; + forwardKey.SortKey = key.SortKey ^ MAX_uint8; // Reverse sort order + sortedKeys[i] = *(uint64*)&forwardKey; } } else From 212b0de29ba5d3f746088ce7a9183bf9230e07f4 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 3 Sep 2025 13:00:28 +0200 Subject: [PATCH 195/211] Add `RenderListAlloc` to simplify rendering memory allocations --- Source/Engine/Particles/Particles.cpp | 31 +++++---------------------- Source/Engine/Renderer/RenderList.cpp | 31 +++++++++++++++++++++------ Source/Engine/Renderer/RenderList.h | 24 +++++++++++++++++++++ 3 files changed, 53 insertions(+), 33 deletions(-) diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index 54b3d1dd8..88fa2cd88 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -210,31 +210,10 @@ void DrawEmitterCPU(RenderContextBatch& renderContextBatch, ParticleBuffer* buff const int32 stride = buffer->Stride; const int32 listSize = buffer->CPU.Count; const int32 indicesByteSize = listSize * buffer->GPU.SortedIndices->GetStride(); - Array sortingKeysList[4]; - Array sortingIndicesList[2]; - uint32* sortingKeys[2]; - void* sortingIndices[2]; - if (listSize < 500) - { - // Use fast stack allocator from RenderList - auto& memory = renderContextBatch.GetMainContext().List->Memory; - sortingKeys[0] = memory.Allocate(listSize); - sortingKeys[1] = memory.Allocate(listSize); - sortingIndices[0] = memory.Allocate(indicesByteSize, GPU_SHADER_DATA_ALIGNMENT); - sortingIndices[1] = memory.Allocate(indicesByteSize, GPU_SHADER_DATA_ALIGNMENT); - } - else - { - // Use shared pooled memory from RendererAllocation - sortingKeysList[0].Resize(listSize); - sortingKeysList[1].Resize(listSize); - sortingIndicesList[0].Resize(indicesByteSize); - sortingIndicesList[1].Resize(indicesByteSize); - sortingKeys[0] = sortingKeysList[0].Get(); - sortingKeys[1] = sortingKeysList[1].Get(); - sortingIndices[0] = sortingIndicesList[0].Get(); - sortingIndices[1] = sortingIndicesList[1].Get(); - } + RenderListAlloc sortingAllocs[4]; + auto* renderList = renderContextBatch.GetMainContext().List; + uint32* sortingKeys[2] = { sortingAllocs[0].Init(renderList, listSize), sortingAllocs[1].Init(renderList, listSize) }; + void* sortingIndices[2] = { sortingAllocs[2].Init(renderList, indicesByteSize, GPU_SHADER_DATA_ALIGNMENT), sortingAllocs[3].Init(renderList, indicesByteSize, GPU_SHADER_DATA_ALIGNMENT) }; uint32* sortedKeys = sortingKeys[0]; const uint32 sortKeyXor = sortMode != ParticleSortMode::CustomAscending ? MAX_uint32 : 0; switch (sortMode) @@ -321,7 +300,7 @@ void DrawEmitterCPU(RenderContextBatch& renderContextBatch, ParticleBuffer* buff { case PixelFormat::R16_UInt: for (int32 i = 0; i < listSize; i++) - ((uint16*)sortedIndices)[i] = i; + ((uint16*)sortedIndices)[i] = (uint16)i; break; case PixelFormat::R32_UInt: for (int32 i = 0; i < listSize; i++) diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index 6bd53ec87..2c62ebdd6 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -461,6 +461,25 @@ bool DrawCallsList::IsEmpty() const return Indices.Count() + PreBatchedDrawCalls.Count() == 0; } +RenderListAlloc::~RenderListAlloc() +{ + if (!List && Data) // Render List memory doesn't need free (arena allocator) + RendererAllocation::Free(Data, Size); +} + +void* RenderListAlloc::Init(RenderList* list, uintptr size, uintptr alignment) +{ + ASSERT_LOW_LAYER(!Data); + Size = size; + bool useList = alignment <= 16 && size < 1024; + List = useList ? list : nullptr; + if (useList) + Data = list->Memory.Allocate(size, alignment); + else + Data = RendererAllocation::Allocate(size); + return Data; +} + RenderList::RenderList(const SpawnParams& params) : ScriptingObject(params) , Memory(4 * 1024 * 1024, RendererAllocation::Allocate, RendererAllocation::Free) // 4MB pages, use page pooling via RendererAllocation @@ -692,12 +711,10 @@ void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseD ZoneValue(listSize); // Use shared memory from renderer allocator - Array SortingKeys[2]; - Array SortingIndices; - SortingKeys[0].Resize(listSize); - SortingKeys[1].Resize(listSize); - SortingIndices.Resize(listSize); - uint64* sortedKeys = SortingKeys[0].Get(); + RenderListAlloc allocs[3]; + uint64* sortedKeys = allocs[0].Init(this, listSize); + uint64* tempKeys = allocs[1].Init(this, listSize); + int32* tempIndices = allocs[2].Init(this, listSize); // Setup sort keys if (reverseDistance) @@ -740,7 +757,7 @@ void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseD // Sort draw calls indices int32* resultIndices = list.Indices.Get(); - Sorting::RadixSort(sortedKeys, resultIndices, SortingKeys[1].Get(), SortingIndices.Get(), listSize); + Sorting::RadixSort(sortedKeys, resultIndices, tempKeys, tempIndices, listSize); if (resultIndices != list.Indices.Get()) Platform::MemoryCopy(list.Indices.Get(), resultIndices, sizeof(int32) * listSize); diff --git a/Source/Engine/Renderer/RenderList.h b/Source/Engine/Renderer/RenderList.h index af1e1f44a..ce73f1dcd 100644 --- a/Source/Engine/Renderer/RenderList.h +++ b/Source/Engine/Renderer/RenderList.h @@ -278,6 +278,30 @@ struct DrawCallsList bool IsEmpty() const; }; +// Small utility for allocating memory from RenderList arena pool with automatic fallback to shared RendererAllocation for larger memory blocks. +struct RenderListAlloc +{ + RenderList* List; + void* Data = nullptr; + uintptr Size; + + ~RenderListAlloc(); + + void* Init(RenderList* list, uintptr size, uintptr alignment = 1); + + template + FORCE_INLINE T* Init(RenderList* list, int32 count, uintptr alignment = 1) + { + return (T*)Init(list, count * sizeof(T), alignment); + } + + template + FORCE_INLINE T* Get() + { + return (T*)Data; + } +}; + /// /// Rendering cache container object for the draw calls collecting, sorting and executing. /// From f1c4fd464afeaf4be955d5391e3a9e79acc923e0 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 3 Sep 2025 22:11:26 +0200 Subject: [PATCH 196/211] Optimize GPU textures and buffers uploads with a batched memory barrier --- Source/Engine/Graphics/Async/DefaultGPUTasksExecutor.cpp | 4 ++++ Source/Engine/Render2D/Render2D.cpp | 8 ++++++-- Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp | 2 ++ Source/Engine/Renderer/Renderer.cpp | 2 ++ 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Graphics/Async/DefaultGPUTasksExecutor.cpp b/Source/Engine/Graphics/Async/DefaultGPUTasksExecutor.cpp index d2418357d..18bf65ca3 100644 --- a/Source/Engine/Graphics/Async/DefaultGPUTasksExecutor.cpp +++ b/Source/Engine/Graphics/Async/DefaultGPUTasksExecutor.cpp @@ -5,6 +5,7 @@ #include "GPUTask.h" #include "GPUTasksManager.h" #include "Engine/Graphics/GPUDevice.h" +#include "Engine/Graphics/GPUPass.h" #include "Engine/Profiler/ProfilerCPU.h" DefaultGPUTasksExecutor::DefaultGPUTasksExecutor() @@ -30,6 +31,9 @@ void DefaultGPUTasksExecutor::FrameBegin() // Default implementation performs async operations on start of the frame which is synchronized with a rendering thread GPUTask* buffer[32]; const int32 count = GPUDevice::Instance->GetTasksManager()->RequestWork(buffer, 32); + if (count == 0) + return; + GPUMemoryPass pass(_context->GPU); for (int32 i = 0; i < count; i++) { _context->Run(buffer[i]); diff --git a/Source/Engine/Render2D/Render2D.cpp b/Source/Engine/Render2D/Render2D.cpp index 251781c99..5ae034bc5 100644 --- a/Source/Engine/Render2D/Render2D.cpp +++ b/Source/Engine/Render2D/Render2D.cpp @@ -14,6 +14,7 @@ #include "Engine/Graphics/GPUContext.h" #include "Engine/Graphics/GPUDevice.h" #include "Engine/Graphics/GPUPipelineState.h" +#include "Engine/Graphics/GPUPass.h" #include "Engine/Graphics/RenderTask.h" #include "Engine/Graphics/RenderTargetPool.h" #include "Engine/Graphics/DynamicBuffer.h" @@ -741,8 +742,11 @@ void Render2D::End() } // Flush geometry buffers - VB.Flush(Context); - IB.Flush(Context); + { + GPUMemoryPass pass(Context); + VB.Flush(Context); + IB.Flush(Context); + } // Set output Context->ResetSR(); diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp index bdb542208..1c1b9e4e3 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp @@ -15,6 +15,7 @@ #include "Engine/Content/Content.h" #include "Engine/Graphics/GPUContext.h" #include "Engine/Graphics/GPUDevice.h" +#include "Engine/Graphics/GPUPass.h" #include "Engine/Graphics/RenderTask.h" #include "Engine/Graphics/RenderBuffers.h" #include "Engine/Graphics/RenderTargetPool.h" @@ -939,6 +940,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co // Send objects data to the GPU { PROFILE_GPU_CPU_NAMED("Update Objects"); + GPUMemoryPass pass(context); surfaceAtlasData.ObjectsBuffer.Flush(context); surfaceAtlasData.ObjectsListBuffer.Flush(context); } diff --git a/Source/Engine/Renderer/Renderer.cpp b/Source/Engine/Renderer/Renderer.cpp index 866377641..4c884cc1b 100644 --- a/Source/Engine/Renderer/Renderer.cpp +++ b/Source/Engine/Renderer/Renderer.cpp @@ -2,6 +2,7 @@ #include "Renderer.h" #include "Engine/Graphics/GPUContext.h" +#include "Engine/Graphics/GPUPass.h" #include "Engine/Graphics/RenderTargetPool.h" #include "Engine/Graphics/RenderBuffers.h" #include "Engine/Graphics/RenderTask.h" @@ -523,6 +524,7 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont JobSystem::Wait(buildObjectsBufferJob); { PROFILE_CPU_NAMED("FlushObjectsBuffer"); + GPUMemoryPass pass(context); for (auto& e : renderContextBatch.Contexts) e.List->ObjectBuffer.Flush(context); } From f1ecbf828ec78b0297be8233ed58aa52f99687bc Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 3 Sep 2025 22:11:48 +0200 Subject: [PATCH 197/211] Fix debug check on Vulkan backbuffer texture usage --- Source/Engine/GraphicsDevice/Vulkan/GPUTextureVulkan.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUTextureVulkan.h b/Source/Engine/GraphicsDevice/Vulkan/GPUTextureVulkan.h index bc8f397a9..999e0f602 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUTextureVulkan.h +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUTextureVulkan.h @@ -62,8 +62,8 @@ public: void DescriptorAsImage(GPUContextVulkan* context, VkImageView& imageView, VkImageLayout& layout) override; void DescriptorAsStorageImage(GPUContextVulkan* context, VkImageView& imageView, VkImageLayout& layout) override; #if !BUILD_RELEASE - bool HasSRV() const override { return ((GPUTexture*)_parent)->IsShaderResource(); } - bool HasUAV() const override { return ((GPUTexture*)_parent)->IsUnorderedAccess(); } + bool HasSRV() const override { return !_parent || ((GPUTexture*)_parent)->IsShaderResource(); } + bool HasUAV() const override { return !_parent || ((GPUTexture*)_parent)->IsUnorderedAccess(); } #endif }; From 80de56f469b4dcae7917dbf20b3bfb68d72364cf Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 3 Sep 2025 23:12:55 +0200 Subject: [PATCH 198/211] Optimize Global SDF dispatches with UAV write overlaps 2-3x faster rasterization --- .../Renderer/GlobalSignDistanceFieldPass.cpp | 169 +++++++++--------- 1 file changed, 83 insertions(+), 86 deletions(-) diff --git a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp index c8c997180..85502cc97 100644 --- a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp +++ b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp @@ -10,6 +10,7 @@ #include "Engine/Content/Content.h" #include "Engine/Graphics/GPUContext.h" #include "Engine/Graphics/GPUDevice.h" +#include "Engine/Graphics/GPUPass.h" #include "Engine/Graphics/Graphics.h" #include "Engine/Graphics/RenderTask.h" #include "Engine/Graphics/RenderBuffers.h" @@ -841,9 +842,10 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex context->BindCB(1, _cb1); constexpr int32 chunkDispatchGroups = GLOBAL_SDF_RASTERIZE_CHUNK_SIZE / GLOBAL_SDF_RASTERIZE_GROUP_SIZE; int32 chunkDispatches = 0; - if (!reset) + if (!reset && cascade.NonEmptyChunks.HasItems()) { PROFILE_GPU_CPU_NAMED("Clear Chunks"); + GPUComputePass pass(context); for (auto it = cascade.NonEmptyChunks.Begin(); it.IsNotEnd(); ++it) { auto& key = it->Item; @@ -856,7 +858,6 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex context->UpdateCB(_cb1, &data); context->Dispatch(_csClearChunk, chunkDispatchGroups, chunkDispatchGroups, chunkDispatchGroups); chunkDispatches++; - // TODO: don't stall with UAV barrier on D3D12/Vulkan if UAVs don't change between dispatches } ZoneValue(chunkDispatches); } @@ -877,106 +878,102 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex context->BindSR(0, _objectsBuffer->GetBuffer() ? _objectsBuffer->GetBuffer()->View() : nullptr); // Rasterize non-empty chunks (first layer so can override existing chunk data) - for (const auto& e : cascade.Chunks) + uint32 maxLayer = 0; { - if (e.Key.Layer != 0) - continue; - auto& chunk = e.Value; - cascade.NonEmptyChunks.Add(e.Key); - - for (int32 i = 0; i < chunk.ModelsCount; i++) + GPUComputePass pass(context); + for (const auto& e : cascade.Chunks) { - auto objectIndex = objectIndexToDataIndex.At(chunk.Models[i]); - data.Objects[i] = objectIndex; - context->BindSR(i + 1, objectsTextures[objectIndex]); - } - for (int32 i = chunk.ModelsCount; i < GLOBAL_SDF_RASTERIZE_HEIGHTFIELD_MAX_COUNT; i++) - context->UnBindSR(i + 1); - data.ChunkCoord = e.Key.Coord * GLOBAL_SDF_RASTERIZE_CHUNK_SIZE; - data.ObjectsCount = chunk.ModelsCount; - context->UpdateCB(_cb1, &data); - auto cs = data.ObjectsCount != 0 ? _csRasterizeModel0 : _csClearChunk; // Terrain-only chunk can be quickly cleared - context->Dispatch(cs, chunkDispatchGroups, chunkDispatchGroups, chunkDispatchGroups); - chunkDispatches++; - // TODO: don't stall with UAV barrier on D3D12/Vulkan if UAVs don't change between dispatches (maybe cache per-shader write/read flags for all UAVs?) + if (e.Key.Layer != 0) + continue; + auto& chunk = e.Value; + data.ChunkCoord = e.Key.Coord * GLOBAL_SDF_RASTERIZE_CHUNK_SIZE; + cascade.NonEmptyChunks.Add(e.Key); - if (chunk.HeightfieldsCount != 0) - { - // Inject heightfield (additive) - for (int32 i = 0; i < chunk.HeightfieldsCount; i++) - { - auto objectIndex = objectIndexToDataIndex.At(chunk.Heightfields[i]); - data.Objects[i] = objectIndex; - context->BindSR(i + 1, objectsTextures[objectIndex]); - } - for (int32 i = chunk.HeightfieldsCount; i < GLOBAL_SDF_RASTERIZE_HEIGHTFIELD_MAX_COUNT; i++) - context->UnBindSR(i + 1); - data.ObjectsCount = chunk.HeightfieldsCount; - context->UpdateCB(_cb1, &data); - context->Dispatch(_csRasterizeHeightfield, chunkDispatchGroups, chunkDispatchGroups, chunkDispatchGroups); - chunkDispatches++; - } - -#if GLOBAL_SDF_DEBUG_CHUNKS - // Debug draw chunk bounds in world space with number of models in it - if (cascadeIndex + 1 == GLOBAL_SDF_DEBUG_CHUNKS) - { - int32 count = chunk.ModelsCount + chunk.HeightfieldsCount; - RasterizeChunkKey tmp = e.Key; - tmp.NextLayer(); - while (cascade.Chunks.ContainsKey(tmp)) - { - count += cascade.Chunks[tmp].ModelsCount + cascade.Chunks[tmp].HeightfieldsCount; - tmp.NextLayer(); - } - Float3 chunkMin = cascade.Bounds.Minimum + Float3(e.Key.Coord) * cascade.ChunkSize; - BoundingBox chunkBounds(chunkMin, chunkMin + cascade.ChunkSize); - DebugDraw::DrawWireBox(chunkBounds, Color::Red, 0, false); - DebugDraw::DrawText(StringUtils::ToString(count), chunkBounds.GetCenter(), Color::Red); - } -#endif - } - - // Rasterize non-empty chunks (additive layers so need combine with existing chunk data) - for (const auto& e : cascade.Chunks) - { - if (e.Key.Layer == 0) - continue; - auto& chunk = e.Value; - data.ChunkCoord = e.Key.Coord * GLOBAL_SDF_RASTERIZE_CHUNK_SIZE; - - if (chunk.ModelsCount != 0) - { - // Inject models (additive) for (int32 i = 0; i < chunk.ModelsCount; i++) { auto objectIndex = objectIndexToDataIndex.At(chunk.Models[i]); data.Objects[i] = objectIndex; context->BindSR(i + 1, objectsTextures[objectIndex]); } - for (int32 i = chunk.ModelsCount; i < GLOBAL_SDF_RASTERIZE_HEIGHTFIELD_MAX_COUNT; i++) + for (int32 i = chunk.ModelsCount; i < GLOBAL_SDF_RASTERIZE_MODEL_MAX_COUNT; i++) context->UnBindSR(i + 1); data.ObjectsCount = chunk.ModelsCount; context->UpdateCB(_cb1, &data); - context->Dispatch(_csRasterizeModel1, chunkDispatchGroups, chunkDispatchGroups, chunkDispatchGroups); + auto cs = data.ObjectsCount != 0 ? _csRasterizeModel0 : _csClearChunk; // Terrain-only chunk can be quickly cleared + context->Dispatch(cs, chunkDispatchGroups, chunkDispatchGroups, chunkDispatchGroups); chunkDispatches++; - } - if (chunk.HeightfieldsCount != 0) - { - // Inject heightfields (additive) - for (int32 i = 0; i < chunk.HeightfieldsCount; i++) +#if GLOBAL_SDF_DEBUG_CHUNKS + // Debug draw chunk bounds in world space with number of models in it + if (cascadeIndex + 1 == GLOBAL_SDF_DEBUG_CHUNKS) { - auto objectIndex = objectIndexToDataIndex.At(chunk.Heightfields[i]); - data.Objects[i] = objectIndex; - context->BindSR(i + 1, objectsTextures[objectIndex]); + int32 count = chunk.ModelsCount + chunk.HeightfieldsCount; + RasterizeChunkKey tmp = e.Key; + tmp.NextLayer(); + while (cascade.Chunks.ContainsKey(tmp)) + { + count += cascade.Chunks[tmp].ModelsCount + cascade.Chunks[tmp].HeightfieldsCount; + tmp.NextLayer(); + } + Float3 chunkMin = cascade.Bounds.Minimum + Float3(e.Key.Coord) * cascade.ChunkSize; + BoundingBox chunkBounds(chunkMin, chunkMin + cascade.ChunkSize); + DebugDraw::DrawWireBox(chunkBounds, Color::Red, 0, false); + DebugDraw::DrawText(StringUtils::ToString(count), chunkBounds.GetCenter(), Color::Red); + } +#endif + } + } + +#if PLATFORM_WINDOWS + // Hack to fix D3D11 bug that doesn't insert UAV barrier after overlap region ends (between two GPUComputePass) + if (context->GetDevice()->GetRendererType() == RendererType::DirectX11) + context->Dispatch(_csRasterizeModel0, chunkDispatchGroups, chunkDispatchGroups, chunkDispatchGroups); +#endif + + // Rasterize non-empty chunks (additive layers so need combine with existing chunk data) + for (uint32 layer = 0; layer <= maxLayer; layer++) + { + GPUComputePass pass(context); + for (const auto& e : cascade.Chunks) + { + if (e.Key.Layer != layer) + continue; + auto& chunk = e.Value; + data.ChunkCoord = e.Key.Coord * GLOBAL_SDF_RASTERIZE_CHUNK_SIZE; + + if (chunk.ModelsCount != 0 && layer != 0) // Models from layer 0 has been already written + { + // Inject models (additive) + for (int32 i = 0; i < chunk.ModelsCount; i++) + { + auto objectIndex = objectIndexToDataIndex.At(chunk.Models[i]); + data.Objects[i] = objectIndex; + context->BindSR(i + 1, objectsTextures[objectIndex]); + } + for (int32 i = chunk.ModelsCount; i < GLOBAL_SDF_RASTERIZE_MODEL_MAX_COUNT; i++) + context->UnBindSR(i + 1); + data.ObjectsCount = chunk.ModelsCount; + context->UpdateCB(_cb1, &data); + context->Dispatch(_csRasterizeModel1, chunkDispatchGroups, chunkDispatchGroups, chunkDispatchGroups); + chunkDispatches++; + } + + if (chunk.HeightfieldsCount != 0) + { + // Inject heightfields (additive) + for (int32 i = 0; i < chunk.HeightfieldsCount; i++) + { + auto objectIndex = objectIndexToDataIndex.At(chunk.Heightfields[i]); + data.Objects[i] = objectIndex; + context->BindSR(i + 1, objectsTextures[objectIndex]); + } + for (int32 i = chunk.HeightfieldsCount; i < GLOBAL_SDF_RASTERIZE_HEIGHTFIELD_MAX_COUNT; i++) + context->UnBindSR(i + 1); + data.ObjectsCount = chunk.HeightfieldsCount; + context->UpdateCB(_cb1, &data); + context->Dispatch(_csRasterizeHeightfield, chunkDispatchGroups, chunkDispatchGroups, chunkDispatchGroups); + chunkDispatches++; } - for (int32 i = chunk.HeightfieldsCount; i < GLOBAL_SDF_RASTERIZE_HEIGHTFIELD_MAX_COUNT; i++) - context->UnBindSR(i + 1); - data.ObjectsCount = chunk.HeightfieldsCount; - context->UpdateCB(_cb1, &data); - context->Dispatch(_csRasterizeHeightfield, chunkDispatchGroups, chunkDispatchGroups, chunkDispatchGroups); - chunkDispatches++; } } From c0c9df49dc8c1864538f83d04c58c9bdac36d051 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Wed, 3 Sep 2025 23:21:45 +0200 Subject: [PATCH 199/211] Optimize DDGI probes update to overlap both irradiance and distance data at once --- Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp index 782f15260..a954cf31f 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp @@ -15,6 +15,7 @@ #include "Engine/Debug/DebugDraw.h" #include "Engine/Graphics/GPUContext.h" #include "Engine/Graphics/GPUDevice.h" +#include "Engine/Graphics/GPUPass.h" #include "Engine/Graphics/Graphics.h" #include "Engine/Graphics/RenderTask.h" #include "Engine/Graphics/RenderBuffers.h" @@ -618,6 +619,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont // Update probes irradiance and distance textures (one thread-group per probe) { PROFILE_GPU_CPU_NAMED("Update Probes"); + GPUComputePass pass(context); // Distance context->BindSR(0, ddgiData.Result.ProbesData); From c44d939c083710171b058d20fe06e74d420efd85 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 4 Sep 2025 14:46:56 +0200 Subject: [PATCH 200/211] Add `ReadWriteLock` to platform api --- Source/Engine/Platform/ReadWriteLock.h | 15 ++++ Source/Engine/Platform/Types.h | 22 ++++++ .../Engine/Platform/Unix/UnixReadWriteLock.h | 71 +++++++++++++++++++ .../Platform/Win32/Win32ReadWriteLock.h | 69 ++++++++++++++++++ Source/Engine/Platform/Win32/WindowsMinimal.h | 11 +++ 5 files changed, 188 insertions(+) create mode 100644 Source/Engine/Platform/ReadWriteLock.h create mode 100644 Source/Engine/Platform/Unix/UnixReadWriteLock.h create mode 100644 Source/Engine/Platform/Win32/Win32ReadWriteLock.h diff --git a/Source/Engine/Platform/ReadWriteLock.h b/Source/Engine/Platform/ReadWriteLock.h new file mode 100644 index 000000000..5386e6ab3 --- /dev/null +++ b/Source/Engine/Platform/ReadWriteLock.h @@ -0,0 +1,15 @@ +// Copyright (c) Wojciech Figat. All rights reserved. + +#pragma once + +#if PLATFORM_WINDOWS || PLATFORM_UWP || PLATFORM_XBOX_ONE || PLATFORM_XBOX_SCARLETT +#include "Win32/Win32ReadWriteLock.h" +#elif PLATFORM_LINUX || PLATFORM_ANDROID || PLATFORM_PS4 || PLATFORM_PS5 || PLATFORM_MAC || PLATFORM_IOS +#include "Unix/UnixReadWriteLock.h" +#elif PLATFORM_SWITCH +#include "Platforms/Switch/Engine/Platform/SwitchReadWriteLock.h" +#else +#error Missing Read Write Lock implementation! +#endif + +#include "Types.h" diff --git a/Source/Engine/Platform/Types.h b/Source/Engine/Platform/Types.h index 50af4279c..79fe02efc 100644 --- a/Source/Engine/Platform/Types.h +++ b/Source/Engine/Platform/Types.h @@ -8,6 +8,8 @@ class WindowsClipboard; typedef WindowsClipboard Clipboard; class Win32CriticalSection; typedef Win32CriticalSection CriticalSection; +class Win32ReadWriteLock; +typedef Win32ReadWriteLock ReadWriteLock; class Win32ConditionVariable; typedef Win32ConditionVariable ConditionVariable; class WindowsFileSystem; @@ -33,6 +35,8 @@ class ClipboardBase; typedef ClipboardBase Clipboard; class Win32CriticalSection; typedef Win32CriticalSection CriticalSection; +class Win32ReadWriteLock; +typedef Win32ReadWriteLock ReadWriteLock; class Win32ConditionVariable; typedef Win32ConditionVariable ConditionVariable; class UWPFileSystem; @@ -58,6 +62,8 @@ class LinuxClipboard; typedef LinuxClipboard Clipboard; class UnixCriticalSection; typedef UnixCriticalSection CriticalSection; +class UnixReadWriteLock; +typedef UnixReadWriteLock ReadWriteLock; class UnixConditionVariable; typedef UnixConditionVariable ConditionVariable; class LinuxFileSystem; @@ -83,6 +89,8 @@ class ClipboardBase; typedef ClipboardBase Clipboard; class UnixCriticalSection; typedef UnixCriticalSection CriticalSection; +class UnixReadWriteLock; +typedef UnixReadWriteLock ReadWriteLock; class UnixConditionVariable; typedef UnixConditionVariable ConditionVariable; class PS4FileSystem; @@ -108,6 +116,8 @@ class ClipboardBase; typedef ClipboardBase Clipboard; class UnixCriticalSection; typedef UnixCriticalSection CriticalSection; +class UnixReadWriteLock; +typedef UnixReadWriteLock ReadWriteLock; class UnixConditionVariable; typedef UnixConditionVariable ConditionVariable; class PS5FileSystem; @@ -133,6 +143,8 @@ class ClipboardBase; typedef ClipboardBase Clipboard; class Win32CriticalSection; typedef Win32CriticalSection CriticalSection; +class Win32ReadWriteLock; +typedef Win32ReadWriteLock ReadWriteLock; class Win32ConditionVariable; typedef Win32ConditionVariable ConditionVariable; class XboxOneFileSystem; @@ -158,6 +170,8 @@ class ClipboardBase; typedef ClipboardBase Clipboard; class Win32CriticalSection; typedef Win32CriticalSection CriticalSection; +class Win32ReadWriteLock; +typedef Win32ReadWriteLock ReadWriteLock; class Win32ConditionVariable; typedef Win32ConditionVariable ConditionVariable; class XboxScarlettFileSystem; @@ -183,6 +197,8 @@ class ClipboardBase; typedef ClipboardBase Clipboard; class UnixCriticalSection; typedef UnixCriticalSection CriticalSection; +class UnixReadWriteLock; +typedef UnixReadWriteLock ReadWriteLock; class UnixConditionVariable; typedef UnixConditionVariable ConditionVariable; class AndroidFileSystem; @@ -208,6 +224,8 @@ class ClipboardBase; typedef ClipboardBase Clipboard; class SwitchCriticalSection; typedef SwitchCriticalSection CriticalSection; +class SwitchReadWriteLock; +typedef SwitchReadWriteLock ReadWriteLock; class SwitchConditionVariable; typedef SwitchConditionVariable ConditionVariable; class SwitchFileSystem; @@ -233,6 +251,8 @@ class MacClipboard; typedef MacClipboard Clipboard; class UnixCriticalSection; typedef UnixCriticalSection CriticalSection; +class UnixReadWriteLock; +typedef UnixReadWriteLock ReadWriteLock; class UnixConditionVariable; typedef UnixConditionVariable ConditionVariable; class MacFileSystem; @@ -258,6 +278,8 @@ class ClipboardBase; typedef ClipboardBase Clipboard; class UnixCriticalSection; typedef UnixCriticalSection CriticalSection; +class UnixReadWriteLock; +typedef UnixReadWriteLock ReadWriteLock; class UnixConditionVariable; typedef UnixConditionVariable ConditionVariable; class iOSFileSystem; diff --git a/Source/Engine/Platform/Unix/UnixReadWriteLock.h b/Source/Engine/Platform/Unix/UnixReadWriteLock.h new file mode 100644 index 000000000..368e814dc --- /dev/null +++ b/Source/Engine/Platform/Unix/UnixReadWriteLock.h @@ -0,0 +1,71 @@ +// Copyright (c) Wojciech Figat. All rights reserved. + +#pragma once + +#if PLATFORM_UNIX + +#include "Engine/Platform/Platform.h" +#include + +/// +/// Unix implementation of a read/write lock that allows for shared reading by multiple threads and exclusive writing by a single thread. +/// +class FLAXENGINE_API UnixReadWriteLock +{ +private: + mutable pthread_rwlock_t _lock; + +private: + NON_COPYABLE(UnixReadWriteLock); + +public: + /// + /// Initializes a new instance of the class. + /// + UnixReadWriteLock() + { + pthread_rwlock_init(&_lock, nullptr); + } + + /// + /// Finalizes an instance of the class. + /// + ~UnixReadWriteLock() + { + pthread_rwlock_destroy(&_lock); + } + +public: + /// + /// Locks for shared reading. + /// + void ReadLock() const + { + pthread_rwlock_rdlock(&_lock); + } + + /// + /// Releases the lock after shared reading. + /// + void ReadUnlock() const + { + pthread_rwlock_unlock(&_lock); + } + /// + /// Locks for exclusive writing. + /// + void WriteLock() const + { + pthread_rwlock_wrlock(&_lock); + } + + /// + /// Releases the lock after exclusive writing. + /// + void WriteUnlock() const + { + pthread_rwlock_unlock(&_lock); + } +}; + +#endif diff --git a/Source/Engine/Platform/Win32/Win32ReadWriteLock.h b/Source/Engine/Platform/Win32/Win32ReadWriteLock.h new file mode 100644 index 000000000..94bcdd5e3 --- /dev/null +++ b/Source/Engine/Platform/Win32/Win32ReadWriteLock.h @@ -0,0 +1,69 @@ +// Copyright (c) Wojciech Figat. All rights reserved. + +#pragma once + +#if PLATFORM_WIN32 + +#include "WindowsMinimal.h" + +/// +/// Win32 implementation of a read/write lock that allows for shared reading by multiple threads and exclusive writing by a single thread. +/// +class FLAXENGINE_API Win32ReadWriteLock +{ +private: + mutable Windows::SRWLOCK _lock; + +private: + NON_COPYABLE(Win32ReadWriteLock); + +public: + /// + /// Initializes a new instance of the class. + /// + Win32ReadWriteLock() + { + Windows::InitializeSRWLock(&_lock); + } + + /// + /// Finalizes an instance of the class. + /// + ~Win32ReadWriteLock() + { + } + +public: + /// + /// Locks for shared reading. + /// + __forceinline void ReadLock() const + { + Windows::AcquireSRWLockShared(&_lock); + } + + /// + /// Releases the lock after shared reading. + /// + __forceinline void ReadUnlock() const + { + Windows::ReleaseSRWLockShared(&_lock); + } + /// + /// Locks for exclusive writing. + /// + __forceinline void WriteLock() const + { + Windows::AcquireSRWLockExclusive(&_lock); + } + + /// + /// Releases the lock after exclusive writing. + /// + __forceinline void WriteUnlock() const + { + Windows::ReleaseSRWLockExclusive(&_lock); + } +}; + +#endif diff --git a/Source/Engine/Platform/Win32/WindowsMinimal.h b/Source/Engine/Platform/Win32/WindowsMinimal.h index 04a595e72..137b6195e 100644 --- a/Source/Engine/Platform/Win32/WindowsMinimal.h +++ b/Source/Engine/Platform/Win32/WindowsMinimal.h @@ -65,6 +65,11 @@ namespace Windows void* Ptr; }; + struct SRWLOCK + { + void* Ptr; + }; + struct OVERLAPPED { void* Data1[3]; @@ -96,6 +101,12 @@ namespace Windows WIN_API void WIN_API_CALLCONV WakeConditionVariable(CONDITION_VARIABLE* ConditionVariable); WIN_API void WIN_API_CALLCONV WakeAllConditionVariable(CONDITION_VARIABLE* ConditionVariable); + WIN_API void WIN_API_CALLCONV InitializeSRWLock(SRWLOCK* SRWLock); + WIN_API void WIN_API_CALLCONV AcquireSRWLockShared(SRWLOCK* SRWLock); + WIN_API void WIN_API_CALLCONV ReleaseSRWLockShared(SRWLOCK* SRWLock); + WIN_API void WIN_API_CALLCONV AcquireSRWLockExclusive(SRWLOCK* SRWLock); + WIN_API void WIN_API_CALLCONV ReleaseSRWLockExclusive(SRWLOCK* SRWLock); + class IDataObject; typedef GUID IID; From 3e363c82754057281adf04c650ad610d362266d2 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 4 Sep 2025 14:48:52 +0200 Subject: [PATCH 201/211] Remove `ConcurrentSystemLocker` and use `ReadWriteLock` instead of better threading synchronization --- Source/Engine/Animations/Animations.cpp | 10 +-- Source/Engine/Animations/Animations.h | 3 +- Source/Engine/Content/Asset.h | 1 - Source/Engine/Content/Assets/Animation.cpp | 4 +- .../Engine/Content/Assets/AnimationGraph.cpp | 8 +- .../Content/Assets/AnimationGraphFunction.cpp | 6 +- Source/Engine/Level/Scene/SceneRendering.cpp | 21 +++--- Source/Engine/Level/Scene/SceneRendering.h | 4 +- Source/Engine/Particles/ParticleEmitter.cpp | 13 ++-- .../Particles/ParticleEmitterFunction.cpp | 6 +- Source/Engine/Particles/Particles.cpp | 14 ++-- Source/Engine/Particles/Particles.h | 5 +- .../Renderer/GlobalSignDistanceFieldPass.cpp | 16 ++-- .../Threading/ConcurrentSystemLocker.cpp | 75 ------------------- .../Engine/Threading/ConcurrentSystemLocker.h | 47 ------------ Source/Engine/Threading/Threading.h | 68 +++++++++++++---- 16 files changed, 107 insertions(+), 194 deletions(-) delete mode 100644 Source/Engine/Threading/ConcurrentSystemLocker.cpp delete mode 100644 Source/Engine/Threading/ConcurrentSystemLocker.h diff --git a/Source/Engine/Animations/Animations.cpp b/Source/Engine/Animations/Animations.cpp index d2708edad..f20f41a58 100644 --- a/Source/Engine/Animations/Animations.cpp +++ b/Source/Engine/Animations/Animations.cpp @@ -53,7 +53,7 @@ namespace AnimationsService AnimationManagerInstance; TaskGraphSystem* Animations::System = nullptr; -ConcurrentSystemLocker Animations::SystemLocker; +ReadWriteLock Animations::SystemLocker; #if USE_EDITOR Delegate Animations::DebugFlow; #endif @@ -124,7 +124,7 @@ void AnimationsSystem::Execute(TaskGraph* graph) Active = true; // Ensure no animation assets can be reloaded/modified during async update - Animations::SystemLocker.Begin(false); + Animations::SystemLocker.ReadLock(); // Setup data for async update const auto& tickData = Time::Update; @@ -165,18 +165,18 @@ void AnimationsSystem::PostExecute(TaskGraph* graph) // Cleanup AnimationManagerInstance.UpdateList.Clear(); - Animations::SystemLocker.End(false); + Animations::SystemLocker.ReadUnlock(); Active = false; } void Animations::AddToUpdate(AnimatedModel* obj) { - ConcurrentSystemLocker::WriteScope lock(SystemLocker, true); + ScopeWriteLock lock(SystemLocker); AnimationManagerInstance.UpdateList.Add(obj); } void Animations::RemoveFromUpdate(AnimatedModel* obj) { - ConcurrentSystemLocker::WriteScope lock(SystemLocker, true); + ScopeWriteLock lock(SystemLocker); AnimationManagerInstance.UpdateList.Remove(obj); } diff --git a/Source/Engine/Animations/Animations.h b/Source/Engine/Animations/Animations.h index ac30640dc..f8aa28965 100644 --- a/Source/Engine/Animations/Animations.h +++ b/Source/Engine/Animations/Animations.h @@ -4,7 +4,6 @@ #include "Engine/Scripting/ScriptingType.h" #include "Engine/Core/Delegate.h" -#include "Engine/Threading/ConcurrentSystemLocker.h" class TaskGraphSystem; class AnimatedModel; @@ -23,7 +22,7 @@ API_CLASS(Static) class FLAXENGINE_API Animations API_FIELD(ReadOnly) static TaskGraphSystem* System; // Data access locker for animations data. - static ConcurrentSystemLocker SystemLocker; + static ReadWriteLock SystemLocker; #if USE_EDITOR // Data wrapper for the debug flow information. diff --git a/Source/Engine/Content/Asset.h b/Source/Engine/Content/Asset.h index 32f2a5e31..e6607e62c 100644 --- a/Source/Engine/Content/Asset.h +++ b/Source/Engine/Content/Asset.h @@ -7,7 +7,6 @@ #include "Engine/Core/Types/String.h" #include "Engine/Platform/CriticalSection.h" #include "Engine/Scripting/ScriptingObject.h" -#include "Engine/Threading/ConcurrentSystemLocker.h" #include "Config.h" #include "Types.h" diff --git a/Source/Engine/Content/Assets/Animation.cpp b/Source/Engine/Content/Assets/Animation.cpp index 93397397a..015f09e4e 100644 --- a/Source/Engine/Content/Assets/Animation.cpp +++ b/Source/Engine/Content/Assets/Animation.cpp @@ -600,7 +600,7 @@ void Animation::OnScriptingDispose() Asset::LoadResult Animation::load() { PROFILE_MEM(AnimationsData); - ConcurrentSystemLocker::WriteScope systemScope(Animations::SystemLocker); + ScopeWriteLock systemScope(Animations::SystemLocker); // Get stream with animations data const auto dataChunk = GetChunk(0); @@ -732,7 +732,7 @@ Asset::LoadResult Animation::load() void Animation::unload(bool isReloading) { - ConcurrentSystemLocker::WriteScope systemScope(Animations::SystemLocker); + ScopeWriteLock systemScope(Animations::SystemLocker); #if USE_EDITOR if (_registeredForScriptingReload) { diff --git a/Source/Engine/Content/Assets/AnimationGraph.cpp b/Source/Engine/Content/Assets/AnimationGraph.cpp index acab48b2f..ad6353196 100644 --- a/Source/Engine/Content/Assets/AnimationGraph.cpp +++ b/Source/Engine/Content/Assets/AnimationGraph.cpp @@ -27,7 +27,7 @@ AnimationGraph::AnimationGraph(const SpawnParams& params, const AssetInfo* info) Asset::LoadResult AnimationGraph::load() { PROFILE_MEM(AnimationsData); - ConcurrentSystemLocker::WriteScope systemScope(Animations::SystemLocker); + ScopeWriteLock systemScope(Animations::SystemLocker); // Get stream with graph data const auto surfaceChunk = GetChunk(0); @@ -53,7 +53,7 @@ Asset::LoadResult AnimationGraph::load() void AnimationGraph::unload(bool isReloading) { - ConcurrentSystemLocker::WriteScope systemScope(Animations::SystemLocker); + ScopeWriteLock systemScope(Animations::SystemLocker); Graph.Clear(); } @@ -86,7 +86,7 @@ bool AnimationGraph::InitAsAnimation(SkinnedModel* baseModel, Animation* anim, b return true; } PROFILE_MEM(AnimationsData); - ConcurrentSystemLocker::WriteScope systemScope(Animations::SystemLocker); + ScopeWriteLock systemScope(Animations::SystemLocker); // Create Graph data MemoryWriteStream writeStream(512); @@ -172,7 +172,7 @@ bool AnimationGraph::SaveSurface(const BytesContainer& data) { if (OnCheckSave()) return true; - ConcurrentSystemLocker::WriteScope systemScope(Animations::SystemLocker); + ScopeWriteLock systemScope(Animations::SystemLocker); ScopeLock lock(Locker); if (IsVirtual()) diff --git a/Source/Engine/Content/Assets/AnimationGraphFunction.cpp b/Source/Engine/Content/Assets/AnimationGraphFunction.cpp index 3e8ce62e8..812a8f090 100644 --- a/Source/Engine/Content/Assets/AnimationGraphFunction.cpp +++ b/Source/Engine/Content/Assets/AnimationGraphFunction.cpp @@ -22,7 +22,7 @@ AnimationGraphFunction::AnimationGraphFunction(const SpawnParams& params, const Asset::LoadResult AnimationGraphFunction::load() { PROFILE_MEM(AnimationsData); - ConcurrentSystemLocker::WriteScope systemScope(Animations::SystemLocker); + ScopeWriteLock systemScope(Animations::SystemLocker); // Get graph data from chunk const auto surfaceChunk = GetChunk(0); @@ -49,7 +49,7 @@ Asset::LoadResult AnimationGraphFunction::load() void AnimationGraphFunction::unload(bool isReloading) { - ConcurrentSystemLocker::WriteScope systemScope(Animations::SystemLocker); + ScopeWriteLock systemScope(Animations::SystemLocker); GraphData.Release(); Inputs.Clear(); Outputs.Clear(); @@ -98,7 +98,7 @@ bool AnimationGraphFunction::SaveSurface(const BytesContainer& data) const { if (OnCheckSave()) return true; - ConcurrentSystemLocker::WriteScope systemScope(Animations::SystemLocker); + ScopeWriteLock systemScope(Animations::SystemLocker); ScopeLock lock(Locker); // Set Visject Surface data diff --git a/Source/Engine/Level/Scene/SceneRendering.cpp b/Source/Engine/Level/Scene/SceneRendering.cpp index 1fffa79c8..fac756af8 100644 --- a/Source/Engine/Level/Scene/SceneRendering.cpp +++ b/Source/Engine/Level/Scene/SceneRendering.cpp @@ -19,7 +19,7 @@ #define CHECK_SCENE_EDIT_ACCESS() #else #define CHECK_SCENE_EDIT_ACCESS() \ - if (Locker.HasLock(false) && IsInMainThread() && GPUDevice::Instance && GPUDevice::Instance->IsRendering()) \ + if (_isRendering && IsInMainThread() && GPUDevice::Instance && GPUDevice::Instance->IsRendering()) \ { \ LOG(Error, "Adding/removing actors during rendering is not supported ({}, '{}').", a->ToString(), a->GetNamePath()); \ return; \ @@ -58,20 +58,21 @@ FORCE_INLINE bool FrustumsListCull(const BoundingSphere& bounds, const ArrayScenes.Add(this); - - // Add additional lock during scene rendering (prevents any Actors cache modifications on content streaming threads - eg. when model residency changes) - Locker.Begin(false); } else if (category == PostRender) { // Release additional lock - Locker.End(false); + _isRendering = false; + Locker.ReadUnlock(); } auto& view = renderContextBatch.GetMainContext().View; auto& list = Actors[(int32)category]; @@ -142,7 +143,7 @@ void SceneRendering::CollectPostFxVolumes(RenderContext& renderContext) void SceneRendering::Clear() { - ConcurrentSystemLocker::WriteScope lock(Locker, true); + ScopeWriteLock lock(Locker); for (auto* listener : _listeners) { listener->OnSceneRenderingClear(this); @@ -165,7 +166,7 @@ void SceneRendering::AddActor(Actor* a, int32& key) PROFILE_MEM(Graphics); CHECK_SCENE_EDIT_ACCESS(); const int32 category = a->_drawCategory; - ConcurrentSystemLocker::WriteScope lock(Locker, true); + ScopeWriteLock lock(Locker); auto& list = Actors[category]; if (FreeActors[category].HasItems()) { @@ -190,7 +191,7 @@ void SceneRendering::AddActor(Actor* a, int32& key) void SceneRendering::UpdateActor(Actor* a, int32& key, ISceneRenderingListener::UpdateFlags flags) { const int32 category = a->_drawCategory; - ConcurrentSystemLocker::ReadScope lock(Locker); // Read-access only as list doesn't get resized (like Add/Remove do) so allow updating actors from different threads at once + ScopeReadLock lock(Locker); // Read-access only as list doesn't get resized (like Add/Remove do) so allow updating actors from different threads at once auto& list = Actors[category]; if (list.Count() <= key || key < 0) // Ignore invalid key softly return; @@ -210,7 +211,7 @@ void SceneRendering::RemoveActor(Actor* a, int32& key) { CHECK_SCENE_EDIT_ACCESS(); const int32 category = a->_drawCategory; - ConcurrentSystemLocker::WriteScope lock(Locker, true); + ScopeWriteLock lock(Locker); auto& list = Actors[category]; if (list.Count() > key || key < 0) // Ignore invalid key softly (eg. list after batch clear during scene unload) { diff --git a/Source/Engine/Level/Scene/SceneRendering.h b/Source/Engine/Level/Scene/SceneRendering.h index 293927ea8..c11c94de8 100644 --- a/Source/Engine/Level/Scene/SceneRendering.h +++ b/Source/Engine/Level/Scene/SceneRendering.h @@ -7,7 +7,6 @@ #include "Engine/Core/Math/BoundingSphere.h" #include "Engine/Core/Math/BoundingFrustum.h" #include "Engine/Level/Actor.h" -#include "Engine/Threading/ConcurrentSystemLocker.h" class SceneRenderTask; class SceneRendering; @@ -102,9 +101,10 @@ public: Array Actors[MAX]; Array FreeActors[MAX]; Array PostFxProviders; - ConcurrentSystemLocker Locker; + ReadWriteLock Locker; private: + bool _isRendering = false; #if USE_EDITOR Array PhysicsDebug; Array LightsDebug; diff --git a/Source/Engine/Particles/ParticleEmitter.cpp b/Source/Engine/Particles/ParticleEmitter.cpp index 9991692c2..80738e65d 100644 --- a/Source/Engine/Particles/ParticleEmitter.cpp +++ b/Source/Engine/Particles/ParticleEmitter.cpp @@ -106,7 +106,7 @@ namespace Asset::LoadResult ParticleEmitter::load() { PROFILE_MEM(Particles); - ConcurrentSystemLocker::WriteScope systemScope(Particles::SystemLocker); + ScopeWriteLock systemScope(Particles::SystemLocker); // Load the graph const auto surfaceChunk = GetChunk(SHADER_FILE_CHUNK_VISJECT_SURFACE); @@ -330,6 +330,7 @@ Asset::LoadResult ParticleEmitter::load() // Wait for resources used by the emitter to be loaded // eg. texture used to place particles on spawn needs to be available + // Free Particles::SystemLocker when waiting on asset load to prevent lock-contention. bool waitForAsset = false; for (const auto& node : Graph.Nodes) { @@ -341,7 +342,7 @@ Asset::LoadResult ParticleEmitter::load() if (!waitForAsset) { waitForAsset = true; - Particles::SystemLocker.End(true); + Particles::SystemLocker.WriteUnlock(); } WaitForAsset(texture); } @@ -354,20 +355,20 @@ Asset::LoadResult ParticleEmitter::load() if (!waitForAsset) { waitForAsset = true; - Particles::SystemLocker.End(true); + Particles::SystemLocker.WriteUnlock(); } WaitForAsset((Asset*)parameter.Value); } } if (waitForAsset) - Particles::SystemLocker.Begin(true); + Particles::SystemLocker.WriteLock(); return LoadResult::Ok; } void ParticleEmitter::unload(bool isReloading) { - ConcurrentSystemLocker::WriteScope systemScope(Particles::SystemLocker); + ScopeWriteLock systemScope(Particles::SystemLocker); #if COMPILE_WITH_SHADER_COMPILER UnregisterForShaderReloads(this); #endif @@ -458,7 +459,7 @@ bool ParticleEmitter::SaveSurface(const BytesContainer& data) { if (OnCheckSave()) return true; - ConcurrentSystemLocker::WriteScope systemScope(Particles::SystemLocker); + ScopeWriteLock systemScope(Particles::SystemLocker); ScopeLock lock(Locker); // Release all chunks diff --git a/Source/Engine/Particles/ParticleEmitterFunction.cpp b/Source/Engine/Particles/ParticleEmitterFunction.cpp index f8aa5c62a..37e879172 100644 --- a/Source/Engine/Particles/ParticleEmitterFunction.cpp +++ b/Source/Engine/Particles/ParticleEmitterFunction.cpp @@ -43,7 +43,7 @@ ParticleEmitterFunction::ParticleEmitterFunction(const SpawnParams& params, cons Asset::LoadResult ParticleEmitterFunction::load() { PROFILE_MEM(Particles); - ConcurrentSystemLocker::WriteScope systemScope(Particles::SystemLocker); + ScopeWriteLock systemScope(Particles::SystemLocker); // Load graph const auto surfaceChunk = GetChunk(0); @@ -96,7 +96,7 @@ Asset::LoadResult ParticleEmitterFunction::load() void ParticleEmitterFunction::unload(bool isReloading) { - ConcurrentSystemLocker::WriteScope systemScope(Particles::SystemLocker); + ScopeWriteLock systemScope(Particles::SystemLocker); Graph.Clear(); #if COMPILE_WITH_PARTICLE_GPU_GRAPH GraphGPU.Clear(); @@ -189,7 +189,7 @@ bool ParticleEmitterFunction::SaveSurface(const BytesContainer& data) const { if (OnCheckSave()) return true; - ConcurrentSystemLocker::WriteScope systemScope(Particles::SystemLocker); + ScopeWriteLock systemScope(Particles::SystemLocker); ScopeLock lock(Locker); // Set Visject Surface data diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index 88fa2cd88..ac02e46d3 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -134,7 +134,7 @@ namespace ParticleManagerImpl using namespace ParticleManagerImpl; TaskGraphSystem* Particles::System = nullptr; -ConcurrentSystemLocker Particles::SystemLocker; +ReadWriteLock Particles::SystemLocker; bool Particles::EnableParticleBufferPooling = true; float Particles::ParticleBufferRecycleTimeout = 10.0f; @@ -680,7 +680,7 @@ void CleanupGPUParticlesSorting() void DrawEmittersGPU(RenderContextBatch& renderContextBatch) { PROFILE_GPU_CPU_NAMED("DrawEmittersGPU"); - ConcurrentSystemLocker::ReadScope systemScope(Particles::SystemLocker); + ScopeReadLock systemScope(Particles::SystemLocker); GPUContext* context = GPUDevice::Instance->GetMainContext(); // Count draws and sorting passes needed for resources allocation @@ -1135,7 +1135,7 @@ void Particles::DrawParticles(RenderContextBatch& renderContextBatch, ParticleEf viewsDrawModes &= effect->DrawModes; // Setup - ConcurrentSystemLocker::ReadScope systemScope(SystemLocker); + ScopeReadLock systemScope(SystemLocker); Matrix worlds[2]; Matrix::Translation(-viewOrigin, worlds[0]); // World renderContextBatch.GetMainContext().View.GetWorldMatrix(effect->GetTransform(), worlds[1]); // Local @@ -1277,7 +1277,7 @@ void Particles::DrawParticles(RenderContextBatch& renderContextBatch, ParticleEf void Particles::DebugDraw(ParticleEffect* effect) { PROFILE_CPU_NAMED("Particles.DrawDebug"); - ConcurrentSystemLocker::ReadScope systemScope(SystemLocker); + ScopeReadLock systemScope(SystemLocker); // Draw all emitters for (auto& emitterData : effect->Instance.Emitters) @@ -1304,7 +1304,7 @@ void UpdateGPU(RenderTask* task, GPUContext* context) PROFILE_CPU_NAMED("GPUParticles"); PROFILE_GPU("GPU Particles"); PROFILE_MEM(Particles); - ConcurrentSystemLocker::ReadScope systemScope(Particles::SystemLocker); + ScopeReadLock systemScope(Particles::SystemLocker); // Collect valid emitter tracks to update struct GPUSim @@ -1728,7 +1728,7 @@ void ParticlesSystem::Execute(TaskGraph* graph) Active = true; // Ensure no particle assets can be reloaded/modified during async update - Particles::SystemLocker.Begin(false); + Particles::SystemLocker.ReadLock(); // Setup data for async update const auto& tickData = Time::Update; @@ -1751,7 +1751,7 @@ void ParticlesSystem::PostExecute(TaskGraph* graph) PROFILE_MEM(Particles); // Cleanup - Particles::SystemLocker.End(false); + Particles::SystemLocker.ReadUnlock(); Active = false; UpdateList.Clear(); diff --git a/Source/Engine/Particles/Particles.h b/Source/Engine/Particles/Particles.h index 38ec39d2d..2007a3d8d 100644 --- a/Source/Engine/Particles/Particles.h +++ b/Source/Engine/Particles/Particles.h @@ -3,7 +3,6 @@ #pragma once #include "Engine/Scripting/ScriptingType.h" -#include "Engine/Threading/ConcurrentSystemLocker.h" class TaskGraphSystem; struct RenderContextBatch; @@ -28,8 +27,8 @@ API_CLASS(Static) class FLAXENGINE_API Particles /// API_FIELD(ReadOnly) static TaskGraphSystem* System; - // Data access locker for animations data. - static ConcurrentSystemLocker SystemLocker; + // Data access locker for particles data. + static ReadWriteLock SystemLocker; public: /// diff --git a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp index 85502cc97..1240f148c 100644 --- a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp +++ b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp @@ -198,7 +198,7 @@ public: GPUTexture* Texture = nullptr; GPUTexture* TextureMip = nullptr; Vector3 Origin = Vector3::Zero; - ConcurrentSystemLocker Locker; + ReadWriteLock Locker; Array> Cascades; HashSet ObjectTypes; HashSet SDFTextures; @@ -238,7 +238,7 @@ public: OnSDFTextureDeleted(texture); // Clear static chunks cache - ConcurrentSystemLocker::WriteScope lock(Locker, true); + ScopeWriteLock lock(Locker); for (auto& cascade : Cascades) cascade.StaticChunks.Clear(); } @@ -398,7 +398,7 @@ public: { if (GLOBAL_SDF_ACTOR_IS_STATIC(a) && ObjectTypes.Contains(a->GetTypeHandle())) { - ConcurrentSystemLocker::WriteScope lock(Locker, true); + ScopeWriteLock lock(Locker); OnSceneRenderingDirty(a->GetBox()); } } @@ -407,7 +407,7 @@ public: { if (GLOBAL_SDF_ACTOR_IS_STATIC(a) && ObjectTypes.Contains(a->GetTypeHandle())) { - ConcurrentSystemLocker::WriteScope lock(Locker, true); + ScopeWriteLock lock(Locker); OnSceneRenderingDirty(BoundingBox::FromSphere(prevBounds)); OnSceneRenderingDirty(a->GetBox()); } @@ -417,14 +417,14 @@ public: { if (GLOBAL_SDF_ACTOR_IS_STATIC(a) && ObjectTypes.Contains(a->GetTypeHandle())) { - ConcurrentSystemLocker::WriteScope lock(Locker, true); + ScopeWriteLock lock(Locker); OnSceneRenderingDirty(a->GetBox()); } } void OnSceneRenderingClear(SceneRendering* scene) override { - ConcurrentSystemLocker::WriteScope lock(Locker, true); + ScopeWriteLock lock(Locker); for (auto& cascade : Cascades) cascade.StaticChunks.Clear(); } @@ -583,7 +583,7 @@ void GlobalSignDistanceFieldCustomBuffer::DrawCascadeJob(int32 cascadeIndex) if (!cascade.Dirty) return; PROFILE_CPU(); - ConcurrentSystemLocker::ReadScope lock(Locker); + ScopeReadLock lock(Locker); CurrentCascade.Set(&cascade); DrawCascadeActors(cascade); UpdateCascadeChunks(cascade); @@ -798,7 +798,7 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex Current = &sdfData; sdfData.StartDrawing(renderContext, false, reset); // (ignored if not started earlier this frame) sdfData.WaitForDrawing(); - ConcurrentSystemLocker::WriteScope lock(sdfData.Locker); + ScopeWriteLock lock(sdfData.Locker); // Rasterize world geometry into Global SDF bool anyDraw = false; diff --git a/Source/Engine/Threading/ConcurrentSystemLocker.cpp b/Source/Engine/Threading/ConcurrentSystemLocker.cpp deleted file mode 100644 index 4b64a12ac..000000000 --- a/Source/Engine/Threading/ConcurrentSystemLocker.cpp +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright (c) Wojciech Figat. All rights reserved. - -#include "ConcurrentSystemLocker.h" -#include "Engine/Platform/Platform.h" -#if !BUILD_RELEASE -#include "Engine/Core/Log.h" -#endif - -ConcurrentSystemLocker::ConcurrentSystemLocker() -{ - _counters[0] = _counters[1] = 0; -} - -void ConcurrentSystemLocker::Begin(bool write, bool exclusively) -{ - volatile int64* thisCounter = &_counters[write]; - volatile int64* otherCounter = &_counters[!write]; - -#if !BUILD_RELEASE - int32 retries = 0; - double startTime = Platform::GetTimeSeconds(); -#endif -RETRY: -#if !BUILD_RELEASE - retries++; - if (retries > 1000) - { - double endTime = Platform::GetTimeSeconds(); - if (endTime - startTime > 0.5f) - { - LOG(Error, "Deadlock detected in ConcurrentSystemLocker! Thread 0x{0:x} waits for {1} ms...", Platform::GetCurrentThreadID(), (int32)((endTime - startTime) * 1000.0)); - retries = 0; - } - } -#endif - - // Check if we can enter (cannot read while someone else is writing and vice versa) - if (Platform::AtomicRead(otherCounter) != 0) - { - // Someone else is doing opposite operation so wait for it's end - // TODO: use ConditionVariable+CriticalSection to prevent active-waiting - Platform::Yield(); - goto RETRY; - } - - // Writers might want to check themselves for a single writer at the same time - just like a mutex - if (exclusively && Platform::AtomicRead(thisCounter) != 0) - { - // Someone else is doing opposite operation so wait for it's end - Platform::Yield(); - goto RETRY; - } - - // Mark that we entered this section - Platform::InterlockedIncrement(thisCounter); - - // Double-check if we're safe to go - if (Platform::InterlockedCompareExchange(otherCounter, 0, 0)) - { - // Someone else is doing opposite operation while this thread was doing counter increment so retry - Platform::InterlockedDecrement(thisCounter); - goto RETRY; - } -} - -void ConcurrentSystemLocker::End(bool write) -{ - // Mark that we left this section - Platform::InterlockedDecrement(&_counters[write]); -} - -bool ConcurrentSystemLocker::HasLock(bool write) const -{ - return Platform::AtomicRead(&_counters[write]) != 0; -} diff --git a/Source/Engine/Threading/ConcurrentSystemLocker.h b/Source/Engine/Threading/ConcurrentSystemLocker.h deleted file mode 100644 index 0b46a64f5..000000000 --- a/Source/Engine/Threading/ConcurrentSystemLocker.h +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright (c) Wojciech Figat. All rights reserved. - -#pragma once - -#include "Engine/Core/Core.h" -#include "Engine/Core/Types/BaseTypes.h" - -/// -/// Utility for guarding system data access from different threads depending on the resources usage (eg. block read on write). -/// -struct ConcurrentSystemLocker -{ -private: - volatile int64 _counters[2]; - -public: - NON_COPYABLE(ConcurrentSystemLocker); - ConcurrentSystemLocker(); - - void Begin(bool write, bool exclusively = false); - void End(bool write); - bool HasLock(bool write) const; - -public: - template - struct Scope - { - NON_COPYABLE(Scope); - - Scope(ConcurrentSystemLocker& locker, bool exclusively = false) - : _locker(locker) - { - _locker.Begin(Write, exclusively); - } - - ~Scope() - { - _locker.End(Write); - } - - private: - ConcurrentSystemLocker& _locker; - }; - - typedef Scope ReadScope; - typedef Scope WriteScope; -}; diff --git a/Source/Engine/Threading/Threading.h b/Source/Engine/Threading/Threading.h index 101d058d0..a075351ea 100644 --- a/Source/Engine/Threading/Threading.h +++ b/Source/Engine/Threading/Threading.h @@ -3,6 +3,7 @@ #pragma once #include "Engine/Platform/CriticalSection.h" +#include "Engine/Platform/ReadWriteLock.h" /// /// Checks if current execution in on the main thread. @@ -10,35 +11,70 @@ FLAXENGINE_API bool IsInMainThread(); /// -/// Scope locker for critical section. +/// Scope lock for critical section (mutex). Ensures no other thread can enter scope. /// class ScopeLock { private: - const CriticalSection* _section; - - ScopeLock() = default; - ScopeLock(const ScopeLock&) = delete; - ScopeLock& operator=(const ScopeLock&) = delete; + ScopeLock() = delete; + NON_COPYABLE(ScopeLock); public: - - /// - /// Init, enters critical section. - /// - /// The synchronization object to lock. - ScopeLock(const CriticalSection& section) + FORCE_INLINE ScopeLock(const CriticalSection& section) : _section(§ion) { _section->Lock(); } - /// - /// Destructor, releases critical section. - /// - ~ScopeLock() + FORCE_INLINE ~ScopeLock() { _section->Unlock(); } }; + +/// +/// Scope lock for read/write lock that allows for shared reading by multiple threads (no writers allowed). +/// +class ScopeReadLock +{ +private: + const ReadWriteLock* _lock; + ScopeReadLock() = delete; + NON_COPYABLE(ScopeReadLock); + +public: + FORCE_INLINE ScopeReadLock(const ReadWriteLock& lock) + : _lock(&lock) + { + _lock->ReadLock(); + } + + FORCE_INLINE ~ScopeReadLock() + { + _lock->ReadUnlock(); + } +}; + +/// +/// Scope lock for read/write lock that allows for exclusive writing by a single thread (no readers allowed). +/// +class ScopeWriteLock +{ +private: + const ReadWriteLock* _lock; + ScopeWriteLock() = delete; + NON_COPYABLE(ScopeWriteLock); + +public: + FORCE_INLINE ScopeWriteLock(const ReadWriteLock& lock) + : _lock(&lock) + { + _lock->WriteLock(); + } + + FORCE_INLINE ~ScopeWriteLock() + { + _lock->WriteUnlock(); + } +}; From cd22cd059d033603821bfc68faa1f3259e74beab Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 4 Sep 2025 15:56:33 +0200 Subject: [PATCH 202/211] Various small fixes and improvements --- .../DebugMaterials/DDGIDebugProbes.flax | 2 +- .../DirectX/DX11/GPUContextDX11.cpp | 62 +++++++++---------- .../DirectX/DX11/GPUContextDX11.h | 2 + .../DirectX/DX12/GPUBufferDX12.cpp | 2 +- .../Platform/Win32/Win32ConditionVariable.h | 10 +-- .../Platform/Win32/Win32CriticalSection.h | 10 +-- .../Renderer/GlobalSignDistanceFieldPass.cpp | 6 -- 7 files changed, 43 insertions(+), 51 deletions(-) diff --git a/Content/Editor/DebugMaterials/DDGIDebugProbes.flax b/Content/Editor/DebugMaterials/DDGIDebugProbes.flax index 4289244c8..39677f815 100644 --- a/Content/Editor/DebugMaterials/DDGIDebugProbes.flax +++ b/Content/Editor/DebugMaterials/DDGIDebugProbes.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:740621fb235edae990ffa259a833b12001eb5027bc6036af0aa34ebca4bcec64 +oid sha256:d317dc7b2fc2700b28e4a2581c567b888ea1ebb62c5da84f826d9b29c510ff17 size 40805 diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp index 9b648bd51..2a50fbfd8 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp @@ -95,6 +95,7 @@ void GPUContextDX11::FrameBegin() GPUContext::FrameBegin(); // Setup + _flushOnDispatch = false; _omDirtyFlag = false; _uaDirtyFlag = false; _cbDirtyFlag = false; @@ -497,50 +498,19 @@ void GPUContextDX11::UpdateCB(GPUConstantBuffer* cb, const void* data) void GPUContextDX11::Dispatch(GPUShaderProgramCS* shader, uint32 threadGroupCountX, uint32 threadGroupCountY, uint32 threadGroupCountZ) { - CurrentCS = (GPUShaderProgramCSDX11*)shader; - - // Flush - flushCBs(); - flushSRVs(); - flushUAVs(); - flushOM(); - - // Dispatch - auto compute = (ID3D11ComputeShader*)shader->GetBufferHandle(); - if (_currentCompute != compute) - { - _currentCompute = compute; - _context->CSSetShader(compute, nullptr, 0); - } + onDispatch(shader); _context->Dispatch(threadGroupCountX, threadGroupCountY, threadGroupCountZ); RENDER_STAT_DISPATCH_CALL(); - CurrentCS = nullptr; } void GPUContextDX11::DispatchIndirect(GPUShaderProgramCS* shader, GPUBuffer* bufferForArgs, uint32 offsetForArgs) { ASSERT(bufferForArgs && EnumHasAnyFlags(bufferForArgs->GetFlags(), GPUBufferFlags::Argument)); - CurrentCS = (GPUShaderProgramCSDX11*)shader; - auto bufferForArgsDX11 = (GPUBufferDX11*)bufferForArgs; - - // Flush - flushCBs(); - flushSRVs(); - flushUAVs(); - flushOM(); - - // Dispatch - auto compute = (ID3D11ComputeShader*)shader->GetBufferHandle(); - if (_currentCompute != compute) - { - _currentCompute = compute; - _context->CSSetShader(compute, nullptr, 0); - } + onDispatch(shader); _context->DispatchIndirect(bufferForArgsDX11->GetBuffer(), offsetForArgs); RENDER_STAT_DISPATCH_CALL(); - CurrentCS = nullptr; } @@ -921,6 +891,7 @@ void GPUContextDX11::OverlapUA(bool end) NvAPI_D3D11_EndUAVOverlap(_context); else NvAPI_D3D11_BeginUAVOverlap(_context); + _flushOnDispatch |= end; return; } #endif @@ -931,6 +902,7 @@ void GPUContextDX11::OverlapUA(bool end) agsDriverExtensionsDX11_EndUAVOverlap(AgsContext, _context); else agsDriverExtensionsDX11_BeginUAVOverlap(AgsContext, _context); + _flushOnDispatch |= end; return; } #endif @@ -1046,6 +1018,7 @@ void GPUContextDX11::flushIA() void GPUContextDX11::onDrawCall() { + _flushOnDispatch = false; flushCBs(); flushSRVs(); flushUAVs(); @@ -1053,4 +1026,27 @@ void GPUContextDX11::onDrawCall() flushOM(); } +void GPUContextDX11::onDispatch(GPUShaderProgramCS* shader) +{ + CurrentCS = (GPUShaderProgramCSDX11*)shader; + + flushCBs(); + flushSRVs(); + flushUAVs(); + flushOM(); + + if (_flushOnDispatch) + { + _flushOnDispatch = false; + _context->Flush(); + } + + auto compute = (ID3D11ComputeShader*)shader->GetBufferHandle(); + if (_currentCompute != compute) + { + _currentCompute = compute; + _context->CSSetShader(compute, nullptr, 0); + } +} + #endif diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h index ccdac0d70..7dc693019 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h @@ -30,6 +30,7 @@ private: byte _tracyZone[TracyD3D11ZoneSize]; #endif int32 _maxUASlots; + bool _flushOnDispatch; // Output Merger bool _omDirtyFlag; @@ -111,6 +112,7 @@ private: void flushOM(); void flushIA(); void onDrawCall(); + void onDispatch(GPUShaderProgramCS* shader); public: diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUBufferDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUBufferDX12.cpp index 673146148..bb1eaacdb 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUBufferDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUBufferDX12.cpp @@ -38,7 +38,7 @@ void* GPUBufferDX12::Map(GPUResourceMapMode mode) { D3D12_RANGE readRange; D3D12_RANGE* readRangePtr; - switch (mode) + switch (mode & GPUResourceMapMode::ReadWrite) { case GPUResourceMapMode::Read: readRangePtr = nullptr; diff --git a/Source/Engine/Platform/Win32/Win32ConditionVariable.h b/Source/Engine/Platform/Win32/Win32ConditionVariable.h index fa9c04ce4..5b8c23c05 100644 --- a/Source/Engine/Platform/Win32/Win32ConditionVariable.h +++ b/Source/Engine/Platform/Win32/Win32ConditionVariable.h @@ -26,7 +26,7 @@ public: /// /// Initializes a new instance of the class. /// - Win32ConditionVariable() + __forceinline Win32ConditionVariable() { Windows::InitializeConditionVariable(&_cond); } @@ -44,7 +44,7 @@ public: /// Blocks the current thread execution until the condition variable is woken up. /// /// The critical section locked by the current thread. - void Wait(const Win32CriticalSection& lock) + __forceinline void Wait(const Win32CriticalSection& lock) { Windows::SleepConditionVariableCS(&_cond, &lock._criticalSection, 0xFFFFFFFF); } @@ -55,7 +55,7 @@ public: /// The critical section locked by the current thread. /// The time-out interval, in milliseconds. If the time-out interval elapses, the function re-acquires the critical section and returns zero. If timeout is zero, the function tests the states of the specified objects and returns immediately. If timeout is INFINITE, the function's time-out interval never elapses. /// If the function succeeds, the return value is true, otherwise, if the function fails or the time-out interval elapses, the return value is false. - bool Wait(const Win32CriticalSection& lock, const int32 timeout) + __forceinline bool Wait(const Win32CriticalSection& lock, const int32 timeout) { return !!Windows::SleepConditionVariableCS(&_cond, &lock._criticalSection, timeout); } @@ -63,7 +63,7 @@ public: /// /// Notifies one waiting thread. /// - void NotifyOne() + __forceinline void NotifyOne() { Windows::WakeConditionVariable(&_cond); } @@ -71,7 +71,7 @@ public: /// /// Notifies all waiting threads. /// - void NotifyAll() + __forceinline void NotifyAll() { Windows::WakeAllConditionVariable(&_cond); } diff --git a/Source/Engine/Platform/Win32/Win32CriticalSection.h b/Source/Engine/Platform/Win32/Win32CriticalSection.h index e6375c3fc..748840530 100644 --- a/Source/Engine/Platform/Win32/Win32CriticalSection.h +++ b/Source/Engine/Platform/Win32/Win32CriticalSection.h @@ -26,7 +26,7 @@ public: /// /// Initializes a new instance of the class. /// - Win32CriticalSection() + __forceinline Win32CriticalSection() { Windows::InitializeCriticalSectionEx(&_criticalSection, 4000, 0x01000000); } @@ -34,7 +34,7 @@ public: /// /// Finalizes an instance of the class. /// - ~Win32CriticalSection() + __forceinline ~Win32CriticalSection() { Windows::DeleteCriticalSection(&_criticalSection); } @@ -43,7 +43,7 @@ public: /// /// Locks the critical section. /// - void Lock() const + __forceinline void Lock() const { Windows::EnterCriticalSection(&_criticalSection); } @@ -52,7 +52,7 @@ public: /// Attempts to enter a critical section without blocking. If the call is successful, the calling thread takes ownership of the critical section. /// /// True if calling thread took ownership of the critical section. - bool TryLock() const + __forceinline bool TryLock() const { return Windows::TryEnterCriticalSection(&_criticalSection) != 0; } @@ -60,7 +60,7 @@ public: /// /// Releases the lock on the critical section. /// - void Unlock() const + __forceinline void Unlock() const { Windows::LeaveCriticalSection(&_criticalSection); } diff --git a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp index 1240f148c..e4ed7d74f 100644 --- a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp +++ b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp @@ -924,12 +924,6 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex } } -#if PLATFORM_WINDOWS - // Hack to fix D3D11 bug that doesn't insert UAV barrier after overlap region ends (between two GPUComputePass) - if (context->GetDevice()->GetRendererType() == RendererType::DirectX11) - context->Dispatch(_csRasterizeModel0, chunkDispatchGroups, chunkDispatchGroups, chunkDispatchGroups); -#endif - // Rasterize non-empty chunks (additive layers so need combine with existing chunk data) for (uint32 layer = 0; layer <= maxLayer; layer++) { From 831fb0f4425aebd3bd6b2713fd08faa66a699681 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 4 Sep 2025 21:38:07 +0200 Subject: [PATCH 203/211] Optimize textures/buffers uploading on Vulkan with page allocator --- .../DirectX/DX12/GPUContextDX12.cpp | 8 +- .../DirectX/DX12/GPUDeviceDX12.cpp | 9 +- .../DirectX/DX12/GPUDeviceDX12.h | 7 +- .../DirectX/DX12/UploadBufferDX12.cpp | 154 ++++++------ .../DirectX/DX12/UploadBufferDX12.h | 222 ++++-------------- .../Vulkan/GPUContextVulkan.cpp | 17 +- .../GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp | 133 +---------- .../GraphicsDevice/Vulkan/GPUDeviceVulkan.h | 44 +--- .../Vulkan/UploadBufferVulkan.cpp | 195 +++++++++++++++ .../Vulkan/UploadBufferVulkan.h | 79 +++++++ 10 files changed, 427 insertions(+), 441 deletions(-) create mode 100644 Source/Engine/GraphicsDevice/Vulkan/UploadBufferVulkan.cpp create mode 100644 Source/Engine/GraphicsDevice/Vulkan/UploadBufferVulkan.h diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp index c132c7343..c68e7f262 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp @@ -1119,7 +1119,7 @@ void GPUContextDX12::UpdateCB(GPUConstantBuffer* cb, const void* data) return; // Allocate bytes for the buffer - DynamicAllocation allocation = _device->UploadBuffer->Allocate(size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); + auto allocation = _device->UploadBuffer.Allocate(size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); // Copy data Platform::MemoryCopy(allocation.CPUAddress, data, allocation.Size); @@ -1343,7 +1343,7 @@ void GPUContextDX12::UpdateBuffer(GPUBuffer* buffer, const void* data, uint32 si SetResourceState(bufferDX12, D3D12_RESOURCE_STATE_COPY_DEST); flushRBs(); - _device->UploadBuffer->UploadBuffer(this, bufferDX12->GetResource(), offset, data, size); + _device->UploadBuffer.UploadBuffer(GetCommandList(), bufferDX12->GetResource(), offset, data, size); } void GPUContextDX12::CopyBuffer(GPUBuffer* dstBuffer, GPUBuffer* srcBuffer, uint32 size, uint32 dstOffset, uint32 srcOffset) @@ -1369,7 +1369,7 @@ void GPUContextDX12::UpdateTexture(GPUTexture* texture, int32 arrayIndex, int32 SetResourceState(textureDX12, D3D12_RESOURCE_STATE_COPY_DEST); flushRBs(); - _device->UploadBuffer->UploadTexture(this, textureDX12->GetResource(), data, rowPitch, slicePitch, mipIndex, arrayIndex); + _device->UploadBuffer.UploadTexture(GetCommandList(), textureDX12->GetResource(), data, rowPitch, slicePitch, mipIndex, arrayIndex); } void GPUContextDX12::CopyTexture(GPUTexture* dstResource, uint32 dstSubresource, uint32 dstX, uint32 dstY, uint32 dstZ, GPUTexture* srcResource, uint32 srcSubresource) @@ -1424,7 +1424,7 @@ void GPUContextDX12::ResetCounter(GPUBuffer* buffer) flushRBs(); uint32 value = 0; - _device->UploadBuffer->UploadBuffer(this, counter->GetResource(), 0, &value, 4); + _device->UploadBuffer.UploadBuffer(GetCommandList(), counter->GetResource(), 0, &value, 4); SetResourceState(counter, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); } diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp index e176d199c..e140da37e 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp @@ -244,7 +244,7 @@ GPUDeviceDX12::GPUDeviceDX12(IDXGIFactory4* dxgiFactory, GPUAdapterDX* adapter) , _rootSignature(nullptr) , _commandQueue(nullptr) , _mainContext(nullptr) - , UploadBuffer(nullptr) + , UploadBuffer(this) , TimestampQueryHeap(this, D3D12_QUERY_HEAP_TYPE_TIMESTAMP, DX12_BACK_BUFFER_COUNT * 1024) , Heap_CBV_SRV_UAV(this, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 4 * 1024, false) , Heap_RTV(this, D3D12_DESCRIPTOR_HEAP_TYPE_RTV, 1 * 1024, false) @@ -701,9 +701,6 @@ bool GPUDeviceDX12::Init() VALIDATE_DIRECTX_CALL(_device->CreateRootSignature(0, signature->GetBufferPointer(), signature->GetBufferSize(), IID_PPV_ARGS(&_rootSignature))); } - // Upload buffer - UploadBuffer = New(this); - if (TimestampQueryHeap.Init()) return true; @@ -740,7 +737,7 @@ void GPUDeviceDX12::DrawBegin() GPUDeviceDX::DrawBegin(); updateRes2Dispose(); - UploadBuffer->BeginGeneration(Engine::FrameCount); + UploadBuffer.BeginGeneration(Engine::FrameCount); } void GPUDeviceDX12::RenderEnd() @@ -811,7 +808,7 @@ void GPUDeviceDX12::Dispose() Heap_Sampler.ReleaseGPU(); RingHeap_CBV_SRV_UAV.ReleaseGPU(); RingHeap_Sampler.ReleaseGPU(); - SAFE_DELETE(UploadBuffer); + UploadBuffer.ReleaseGPU(); SAFE_DELETE(DrawIndirectCommandSignature); SAFE_DELETE(_mainContext); SAFE_DELETE(_commandQueue); diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.h b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.h index d2ddeb1a6..064ed9a01 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.h +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.h @@ -8,6 +8,7 @@ #include "Engine/Graphics/GPUResource.h" #include "../IncludeDirectXHeaders.h" #include "ResourceOwnerDX12.h" +#include "UploadBufferDX12.h" #include "QueryHeapDX12.h" #include "DescriptorHeapDX12.h" @@ -26,7 +27,6 @@ class Engine; class WindowsWindow; class GPUContextDX12; class GPUSwapChainDX12; -class UploadBufferDX12; class CommandQueueDX12; class CommandSignatureDX12; @@ -70,11 +70,10 @@ public: ~GPUDeviceDX12(); public: - /// - /// Upload buffer for general purpose + /// Data uploading utility via pages. /// - UploadBufferDX12* UploadBuffer; + UploadBufferDX12 UploadBuffer; /// /// The timestamp queries heap. diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/UploadBufferDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/UploadBufferDX12.cpp index 8fdfd5ac3..a0ae79f51 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/UploadBufferDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/UploadBufferDX12.cpp @@ -4,10 +4,57 @@ #include "UploadBufferDX12.h" #include "GPUTextureDX12.h" -#include "GPUContextDX12.h" #include "../RenderToolsDX.h" +#include "Engine/Graphics/GPUResource.h" #include "Engine/Profiler/ProfilerMemory.h" +/// +/// Single page for the upload buffer +/// +class UploadBufferPageDX12 : public GPUResourceBase, public ResourceOwnerDX12 +{ +public: + UploadBufferPageDX12(GPUDeviceDX12* device, uint64 size); + +public: + /// + /// Last generation that has been using that page + /// + uint64 LastGen; + + /// + /// CPU memory address of the page + /// + void* CPUAddress; + + /// + /// GPU memory address of the page + /// + D3D12_GPU_VIRTUAL_ADDRESS GPUAddress; + + /// + /// Page size in bytes + /// + uint64 Size; + +public: + // [GPUResourceDX12] + GPUResourceType GetResourceType() const final override + { + return GPUResourceType::Buffer; + } + + // [ResourceOwnerDX12] + GPUResource* AsGPUResource() const override + { + return (GPUResource*)this; + } + +protected: + // [GPUResourceDX12] + void OnReleaseGPU() final override; +}; + UploadBufferDX12::UploadBufferDX12(GPUDeviceDX12* device) : _device(device) , _currentPage(nullptr) @@ -16,24 +63,11 @@ UploadBufferDX12::UploadBufferDX12(GPUDeviceDX12* device) { } -UploadBufferDX12::~UploadBufferDX12() -{ - _freePages.Add(_usedPages); - for (auto page : _freePages) - { - page->ReleaseGPU(); - Delete(page); - } -} - -DynamicAllocation UploadBufferDX12::Allocate(uint64 size, uint64 align) +UploadBufferDX12::Allocation UploadBufferDX12::Allocate(uint64 size, uint64 align) { const uint64 alignmentMask = align - 1; - ASSERT((alignmentMask & align) == 0); - - // Check if use default or bigger page - const bool useDefaultSize = size <= DX12_DEFAULT_UPLOAD_PAGE_SIZE; - const uint64 pageSize = useDefaultSize ? DX12_DEFAULT_UPLOAD_PAGE_SIZE : size; + ASSERT_LOW_LAYER((alignmentMask & align) == 0); + const uint64 pageSize = Math::Max(size, DX12_DEFAULT_UPLOAD_PAGE_SIZE); const uint64 alignedSize = Math::AlignUpWithMask(size, alignmentMask); // Align the allocation @@ -41,14 +75,26 @@ DynamicAllocation UploadBufferDX12::Allocate(uint64 size, uint64 align) // Check if there is enough space for that chunk of the data in the current page if (_currentPage && _currentOffset + alignedSize > _currentPage->Size) - { _currentPage = nullptr; - } // Check if need to get new page if (_currentPage == nullptr) { - _currentPage = requestPage(pageSize); + // Try reusing existing page + for (int32 i = 0; i < _freePages.Count(); i++) + { + UploadBufferPageDX12* page = _freePages.Get()[i]; + if (page->Size == pageSize) + { + _freePages.RemoveAt(i); + _currentPage = page; + break; + } + } + if (_currentPage == nullptr) + _currentPage = New(_device, pageSize); + _usedPages.Add(_currentPage); + ASSERT_LOW_LAYER(_currentPage->GetResource()); _currentOffset = 0; } @@ -56,32 +102,27 @@ DynamicAllocation UploadBufferDX12::Allocate(uint64 size, uint64 align) _currentPage->LastGen = _currentGeneration; // Create allocation result - const DynamicAllocation result(static_cast(_currentPage->CPUAddress) + _currentOffset, _currentOffset, size, _currentPage->GPUAddress + _currentOffset, _currentPage, _currentGeneration); + const Allocation result { (byte*)_currentPage->CPUAddress + _currentOffset, _currentOffset, size, _currentPage->GPUAddress + _currentOffset, _currentPage->GetResource(), _currentGeneration }; - // Move in the page + // Move within a page _currentOffset += size; - ASSERT(_currentPage->GetResource()); return result; } -bool UploadBufferDX12::UploadBuffer(GPUContextDX12* context, ID3D12Resource* buffer, uint32 bufferOffset, const void* data, uint64 size) +void UploadBufferDX12::UploadBuffer(ID3D12GraphicsCommandList* commandList, ID3D12Resource* buffer, uint32 bufferOffset, const void* data, uint64 size) { // Allocate data - const DynamicAllocation allocation = Allocate(size, 4); - if (allocation.IsInvalid()) - return true; + const auto allocation = Allocate(size, GPU_SHADER_DATA_ALIGNMENT); // Copy data - Platform::MemoryCopy(allocation.CPUAddress, data, static_cast(size)); + Platform::MemoryCopy(allocation.CPUAddress, data, size); // Copy buffer region - context->GetCommandList()->CopyBufferRegion(buffer, bufferOffset, allocation.Page->GetResource(), allocation.Offset, size); - - return false; + commandList->CopyBufferRegion(buffer, bufferOffset, allocation.Resource, allocation.Offset, size); } -bool UploadBufferDX12::UploadTexture(GPUContextDX12* context, ID3D12Resource* texture, const void* srcData, uint32 srcRowPitch, uint32 srcSlicePitch, int32 mipIndex, int32 arrayIndex) +void UploadBufferDX12::UploadTexture(ID3D12GraphicsCommandList* commandList, ID3D12Resource* texture, const void* srcData, uint32 srcRowPitch, uint32 srcSlicePitch, int32 mipIndex, int32 arrayIndex) { D3D12_RESOURCE_DESC resourceDesc = texture->GetDesc(); const UINT subresourceIndex = RenderToolsDX::CalcSubresourceIndex(mipIndex, arrayIndex, resourceDesc.MipLevels); @@ -95,9 +136,7 @@ bool UploadBufferDX12::UploadTexture(GPUContextDX12* context, ID3D12Resource* te const uint64 sliceSizeAligned = numSlices * mipSizeAligned; // Allocate data - const DynamicAllocation allocation = Allocate(sliceSizeAligned, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT); - if (allocation.Size != sliceSizeAligned) - return true; + const auto allocation = Allocate(sliceSizeAligned, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT); byte* ptr = (byte*)srcData; ASSERT(srcSlicePitch <= sliceSizeAligned); @@ -128,15 +167,13 @@ bool UploadBufferDX12::UploadTexture(GPUContextDX12* context, ID3D12Resource* te // Source buffer copy location description D3D12_TEXTURE_COPY_LOCATION srcLocation; - srcLocation.pResource = allocation.Page->GetResource(); + srcLocation.pResource = allocation.Resource; srcLocation.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; srcLocation.PlacedFootprint.Offset = allocation.Offset; srcLocation.PlacedFootprint.Footprint = footprint.Footprint; // Copy texture region - context->GetCommandList()->CopyTextureRegion(&dstLocation, 0, 0, 0, &srcLocation, nullptr); - - return false; + commandList->CopyTextureRegion(&dstLocation, 0, 0, 0, &srcLocation, nullptr); } void UploadBufferDX12::BeginGeneration(uint64 generation) @@ -170,41 +207,18 @@ void UploadBufferDX12::BeginGeneration(uint64 generation) _currentGeneration = generation; } -UploadBufferPageDX12* UploadBufferDX12::requestPage(uint64 size) +void UploadBufferDX12::ReleaseGPU() { - // Try to find valid page - int32 freePageIndex = -1; - for (int32 i = 0; i < _freePages.Count(); i++) + _freePages.Add(_usedPages); + for (auto page : _freePages) { - if (_freePages[i]->Size == size) - { - freePageIndex = i; - break; - } + page->ReleaseGPU(); + Delete(page); } - - // Check if create a new page - UploadBufferPageDX12* page; - if (freePageIndex == -1) - { - // Get a new page to use - page = New(_device, size); - } - else - { - // Remove from free pages - page = _freePages[freePageIndex]; - _freePages.RemoveAt(freePageIndex); - } - - // Mark page as used - _usedPages.Add(page); - - return page; } UploadBufferPageDX12::UploadBufferPageDX12(GPUDeviceDX12* device, uint64 size) - : GPUResourceDX12(device, TEXT("Upload Buffer Page")) + : GPUResourceBase(device, TEXT("Upload Buffer Page")) , LastGen(0) , CPUAddress(nullptr) , GPUAddress(0) @@ -234,7 +248,7 @@ UploadBufferPageDX12::UploadBufferPageDX12(GPUDeviceDX12* device, uint64 size) // Set state initResource(resource, D3D12_RESOURCE_STATE_GENERIC_READ, 1); - DX_SET_DEBUG_NAME(_resource, GPUResourceDX12::GetName()); + DX_SET_DEBUG_NAME(_resource, GetName()); _memoryUsage = size; PROFILE_MEM_INC(GraphicsCommands, _memoryUsage); GPUAddress = _resource->GetGPUVirtualAddress(); @@ -249,9 +263,7 @@ void UploadBufferPageDX12::OnReleaseGPU() // Unmap if (_resource && CPUAddress) - { _resource->Unmap(0, nullptr); - } GPUAddress = 0; CPUAddress = nullptr; diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/UploadBufferDX12.h b/Source/Engine/GraphicsDevice/DirectX/DX12/UploadBufferDX12.h index a2b18a7a5..a43b25a01 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/UploadBufferDX12.h +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/UploadBufferDX12.h @@ -2,11 +2,15 @@ #pragma once -#include "GPUDeviceDX12.h" +#include "Engine/Graphics/GPUDevice.h" #include "ResourceOwnerDX12.h" #if GRAPHICS_API_DIRECTX12 +class GPUDeviceDX12; +class UploadBufferPageDX12; + +// Upload buffer page size #define DX12_DEFAULT_UPLOAD_PAGE_SIZE (4 * 1014 * 1024) // 4 MB // Upload buffer generations timeout to dispose @@ -15,223 +19,93 @@ // Upload buffer pages that are not used for a few frames are disposed #define DX12_UPLOAD_PAGE_NOT_USED_FRAME_TIMEOUT 60 -class GPUTextureDX12; - -/// -/// Single page for the upload buffer -/// -class UploadBufferPageDX12 : public GPUResourceDX12, public ResourceOwnerDX12 -{ -public: - - /// - /// Init - /// - /// Graphics Device - /// Page size - UploadBufferPageDX12(GPUDeviceDX12* device, uint64 size); - -public: - - /// - /// Last generation that has been using that page - /// - uint64 LastGen; - - /// - /// CPU memory address of the page - /// - void* CPUAddress; - - /// - /// GPU memory address of the page - /// - D3D12_GPU_VIRTUAL_ADDRESS GPUAddress; - - /// - /// Page size in bytes - /// - uint64 Size; - -public: - - // [GPUResourceDX12] - GPUResourceType GetResourceType() const final override - { - return GPUResourceType::Buffer; - } - - // [ResourceOwnerDX12] - GPUResource* AsGPUResource() const override - { - return (GPUResource*)this; - } - -protected: - - // [GPUResourceDX12] - void OnReleaseGPU() final override; -}; - -/// -/// Upload buffer allocation -/// -struct DynamicAllocation -{ - /// - /// CPU memory address of the allocation start. - /// - void* CPUAddress; - - /// - /// Allocation offset in bytes (from the start of the heap buffer). - /// - uint64 Offset; - - /// - /// Allocation size in bytes - /// - uint64 Size; - - /// - /// GPU virtual memory address of the allocation start. - /// - D3D12_GPU_VIRTUAL_ADDRESS GPUAddress; - - /// - /// Upload buffer page that owns that allocation - /// - UploadBufferPageDX12* Page; - - /// - /// Generation number of that allocation (generally allocation is invalid after one or two generations) - /// - uint64 Generation; - - /// - /// Init - /// - DynamicAllocation() - : CPUAddress(nullptr) - , Offset(0) - , Size(0) - , GPUAddress(0) - , Page(nullptr) - , Generation(0) - { - } - - /// - /// Init - /// - /// CPU memory address - /// Offset in byes - /// Size in byes - /// GPU memory address - /// Parent page - /// Generation - DynamicAllocation(void* address, uint64 offset, uint64 size, D3D12_GPU_VIRTUAL_ADDRESS gpuAddress, UploadBufferPageDX12* page, uint64 generation) - : CPUAddress(address) - , Offset(offset) - , Size(size) - , GPUAddress(gpuAddress) - , Page(page) - , Generation(generation) - { - } - - /// - /// Returns true if allocation is invalid. - /// - bool IsInvalid() const - { - return CPUAddress == nullptr || Size == 0 || Page == nullptr; - } -}; - /// /// Uploading data to GPU buffer utility /// class UploadBufferDX12 { -private: +public: + /// + /// Upload buffer allocation + /// + struct Allocation + { + /// + /// CPU memory address of the allocation start. + /// + void* CPUAddress; + /// + /// Allocation offset in bytes (from the start of the heap buffer). + /// + uint64 Offset; + + /// + /// Allocation size in bytes + /// + uint64 Size; + + /// + /// GPU virtual memory address of the allocation start. + /// + D3D12_GPU_VIRTUAL_ADDRESS GPUAddress; + + /// + /// Upload buffer page resource that owns that allocation + /// + ID3D12Resource* Resource; + + /// + /// Generation number of that allocation (generally allocation is invalid after one or two generations) + /// + uint64 Generation; + }; + +private: GPUDeviceDX12* _device; UploadBufferPageDX12* _currentPage; uint64 _currentOffset; uint64 _currentGeneration; - Array> _freePages; Array> _usedPages; public: - - /// - /// Init - /// - /// Graphics Device UploadBufferDX12(GPUDeviceDX12* device); - /// - /// Destructor - /// - ~UploadBufferDX12(); - public: - - /// - /// Gets the current generation number. - /// - FORCE_INLINE uint64 GetCurrentGeneration() const - { - return _currentGeneration; - } - -public: - /// /// Allocates memory for custom data in the buffer. /// /// Size of the data in bytes /// Data alignment in buffer in bytes /// Dynamic location - DynamicAllocation Allocate(uint64 size, uint64 align); + Allocation Allocate(uint64 size, uint64 align); /// /// Uploads data to the buffer. /// - /// GPU context to record upload command to it + /// GPU command list to record upload command to it /// Destination buffer /// Destination buffer offset in bytes. /// Data to allocate /// Size of the data in bytes - /// True if cannot upload data, otherwise false. - bool UploadBuffer(GPUContextDX12* context, ID3D12Resource* buffer, uint32 bufferOffset, const void* data, uint64 size); + void UploadBuffer(ID3D12GraphicsCommandList* commandList, ID3D12Resource* buffer, uint32 bufferOffset, const void* data, uint64 size); /// /// Uploads data to the texture. /// - /// GPU context to record upload command to it + /// GPU command list to record upload command to it /// Destination texture /// Data to allocate /// Source data row pitch value to upload. /// Source data slice pitch value to upload. /// Mip map to stream index /// Texture array index - /// True if cannot upload data, otherwise false. - bool UploadTexture(GPUContextDX12* context, ID3D12Resource* texture, const void* srcData, uint32 srcRowPitch, uint32 srcSlicePitch, int32 mipIndex, int32 arrayIndex); + void UploadTexture(ID3D12GraphicsCommandList* commandList, ID3D12Resource* texture, const void* srcData, uint32 srcRowPitch, uint32 srcSlicePitch, int32 mipIndex, int32 arrayIndex); public: - - /// - /// Begins new generation. - /// - /// The generation ID to begin. void BeginGeneration(uint64 generation); - -private: - - UploadBufferPageDX12* requestPage(uint64 size); + void ReleaseGPU(); }; #endif diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp index 34ad63a66..45dccf467 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp @@ -1386,16 +1386,13 @@ void GPUContextVulkan::UpdateBuffer(GPUBuffer* buffer, const void* data, uint32 } else { - auto staging = _device->StagingManager.AcquireBuffer(size, GPUResourceUsage::StagingUpload); - staging->SetData(data, size); + auto allocation = _device->UploadBuffer.Upload(data, size, 4); VkBufferCopy region; region.size = size; - region.srcOffset = 0; + region.srcOffset = allocation.Offset; region.dstOffset = offset; - vkCmdCopyBuffer(cmdBuffer->GetHandle(), ((GPUBufferVulkan*)staging)->GetHandle(), ((GPUBufferVulkan*)buffer)->GetHandle(), 1, ®ion); - - _device->StagingManager.ReleaseBuffer(cmdBuffer, staging); + vkCmdCopyBuffer(cmdBuffer->GetHandle(), allocation.Buffer, ((GPUBufferVulkan*)buffer)->GetHandle(), 1, ®ion); } // Memory transfer barrier to ensure buffer is ready to read (eg. by Draw or Dispatch) @@ -1444,14 +1441,14 @@ void GPUContextVulkan::UpdateTexture(GPUTexture* texture, int32 arrayIndex, int3 AddImageBarrier(textureVulkan, mipIndex, arrayIndex, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); FlushBarriers(); - auto buffer = _device->StagingManager.AcquireBuffer(slicePitch, GPUResourceUsage::StagingUpload); - buffer->SetData(data, slicePitch); + auto allocation = _device->UploadBuffer.Upload(data, slicePitch, 512); // Setup buffer copy region int32 mipWidth, mipHeight, mipDepth; texture->GetMipSize(mipIndex, mipWidth, mipHeight, mipDepth); VkBufferImageCopy bufferCopyRegion; Platform::MemoryClear(&bufferCopyRegion, sizeof(bufferCopyRegion)); + bufferCopyRegion.bufferOffset = allocation.Offset; bufferCopyRegion.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; bufferCopyRegion.imageSubresource.mipLevel = mipIndex; bufferCopyRegion.imageSubresource.baseArrayLayer = arrayIndex; @@ -1461,9 +1458,7 @@ void GPUContextVulkan::UpdateTexture(GPUTexture* texture, int32 arrayIndex, int3 bufferCopyRegion.imageExtent.depth = static_cast(mipDepth); // Copy mip level from staging buffer - vkCmdCopyBufferToImage(cmdBuffer->GetHandle(), ((GPUBufferVulkan*)buffer)->GetHandle(), textureVulkan->GetHandle(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &bufferCopyRegion); - - _device->StagingManager.ReleaseBuffer(cmdBuffer, buffer); + vkCmdCopyBufferToImage(cmdBuffer->GetHandle(), allocation.Buffer, textureVulkan->GetHandle(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &bufferCopyRegion); } void GPUContextVulkan::CopyTexture(GPUTexture* dstResource, uint32 dstSubresource, uint32 dstX, uint32 dstY, uint32 dstZ, GPUTexture* srcResource, uint32 srcSubresource) diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp index 9ff9ab77b..dfed9bbb8 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp @@ -972,133 +972,6 @@ void HelperResourcesVulkan::Dispose() } } -StagingManagerVulkan::StagingManagerVulkan(GPUDeviceVulkan* device) - : _device(device) -{ -} - -GPUBuffer* StagingManagerVulkan::AcquireBuffer(uint32 size, GPUResourceUsage usage) -{ - // Try reuse free buffer - { - ScopeLock lock(_locker); - - for (int32 i = 0; i < _freeBuffers.Count(); i++) - { - auto& freeBuffer = _freeBuffers[i]; - if (freeBuffer.Buffer->GetSize() == size && freeBuffer.Buffer->GetDescription().Usage == usage) - { - const auto buffer = freeBuffer.Buffer; - _freeBuffers.RemoveAt(i); - return buffer; - } - } - } - - // Allocate new buffer - auto buffer = _device->CreateBuffer(TEXT("Pooled Staging")); - if (buffer->Init(GPUBufferDescription::Buffer(size, GPUBufferFlags::None, PixelFormat::Unknown, nullptr, 0, usage))) - { - LOG(Warning, "Failed to create pooled staging buffer."); - return nullptr; - } - - // Cache buffer - { - ScopeLock lock(_locker); - - _allBuffers.Add(buffer); -#if !BUILD_RELEASE - _allBuffersAllocSize += size; - _allBuffersTotalSize += size; - _allBuffersPeekSize = Math::Max(_allBuffersTotalSize, _allBuffersPeekSize); -#endif - } - - return buffer; -} - -void StagingManagerVulkan::ReleaseBuffer(CmdBufferVulkan* cmdBuffer, GPUBuffer*& buffer) -{ - ScopeLock lock(_locker); - - if (cmdBuffer) - { - // Return to pending pool (need to wait until command buffer will be executed and buffer will be reusable) - auto& item = _pendingBuffers.AddOne(); - item.Buffer = buffer; - item.CmdBuffer = cmdBuffer; - item.FenceCounter = cmdBuffer->GetFenceSignaledCounter(); - } - else - { - // Return to pool - _freeBuffers.Add({ buffer, Engine::FrameCount }); - } - - // Clear reference - buffer = nullptr; -} - -void StagingManagerVulkan::ProcessPendingFree() -{ - ScopeLock lock(_locker); - - // Find staging buffers that has been processed by the GPU and can be reused - for (int32 i = _pendingBuffers.Count() - 1; i >= 0; i--) - { - auto& e = _pendingBuffers[i]; - if (e.FenceCounter < e.CmdBuffer->GetFenceSignaledCounter()) - { - // Return to pool - _freeBuffers.Add({ e.Buffer, Engine::FrameCount }); - _pendingBuffers.RemoveAt(i); - } - } - - // Free staging buffers that has not been used for a few frames - for (int32 i = _freeBuffers.Count() - 1; i >= 0; i--) - { - auto& e = _freeBuffers.Get()[i]; - if (e.FrameNumber + VULKAN_RESOURCE_DELETE_SAFE_FRAMES_COUNT < Engine::FrameCount) - { - auto buffer = e.Buffer; - - // Remove buffer from lists - _allBuffers.Remove(buffer); - _freeBuffers.RemoveAt(i); - -#if !BUILD_RELEASE - // Update stats - _allBuffersFreeSize += buffer->GetSize(); - _allBuffersTotalSize -= buffer->GetSize(); -#endif - - // Release memory - buffer->ReleaseGPU(); - Delete(buffer); - } - } -} - -void StagingManagerVulkan::Dispose() -{ - ScopeLock lock(_locker); - -#if BUILD_DEBUG - LOG(Info, "Vulkan staging buffers peek memory usage: {0}, allocs: {1}, frees: {2}", Utilities::BytesToText(_allBuffersPeekSize), Utilities::BytesToText(_allBuffersAllocSize), Utilities::BytesToText(_allBuffersFreeSize)); -#endif - - // Release buffers and clear memory - for (auto buffer : _allBuffers) - { - buffer->ReleaseGPU(); - Delete(buffer); - } - _allBuffers.Resize(0); - _pendingBuffers.Resize(0); -} - GPUDeviceVulkan::GPUDeviceVulkan(ShaderProfile shaderProfile, GPUAdapterVulkan* adapter) : GPUDevice(RendererType::Vulkan, shaderProfile) , _renderPasses(512) @@ -1106,7 +979,7 @@ GPUDeviceVulkan::GPUDeviceVulkan(ShaderProfile shaderProfile, GPUAdapterVulkan* , _layouts(4096) , Adapter(adapter) , DeferredDeletionQueue(this) - , StagingManager(this) + , UploadBuffer(this) , HelperResources(this) { } @@ -2088,8 +1961,8 @@ void GPUDeviceVulkan::DrawBegin() // Flush resources DeferredDeletionQueue.ReleaseResources(); - StagingManager.ProcessPendingFree(); DescriptorPoolsManager->GC(); + UploadBuffer.BeginGeneration(Engine::FrameCount); #if VULKAN_USE_PIPELINE_CACHE // Serialize pipeline cache periodically for less PSO hitches on next app run @@ -2125,7 +1998,7 @@ void GPUDeviceVulkan::Dispose() _renderPasses.ClearDelete(); _layouts.ClearDelete(); HelperResources.Dispose(); - StagingManager.Dispose(); + UploadBuffer.Dispose(); TimestampQueryPools.ClearDelete(); OcclusionQueryPools.ClearDelete(); SAFE_DELETE_GPU_RESOURCE(UniformBufferUploader); diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.h b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.h index a30dbda1c..ae265100c 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.h +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.h @@ -7,6 +7,7 @@ #include "Engine/Graphics/GPUDevice.h" #include "Engine/Graphics/GPUResource.h" #include "DescriptorSetVulkan.h" +#include "UploadBufferVulkan.h" #include "IncludeVulkanHeaders.h" #include "Config.h" @@ -326,45 +327,6 @@ public: void Dispose(); }; -/// -/// Vulkan staging buffers manager. -/// -class StagingManagerVulkan -{ -private: - struct PendingEntry - { - GPUBuffer* Buffer; - CmdBufferVulkan* CmdBuffer; - uint64 FenceCounter; - }; - - struct FreeEntry - { - GPUBuffer* Buffer; - uint64 FrameNumber; - }; - - GPUDeviceVulkan* _device; - CriticalSection _locker; - Array _allBuffers; - Array _freeBuffers; - Array _pendingBuffers; -#if !BUILD_RELEASE - uint64 _allBuffersTotalSize = 0; - uint64 _allBuffersPeekSize = 0; - uint64 _allBuffersAllocSize = 0; - uint64 _allBuffersFreeSize = 0; -#endif - -public: - StagingManagerVulkan(GPUDeviceVulkan* device); - GPUBuffer* AcquireBuffer(uint32 size, GPUResourceUsage usage); - void ReleaseBuffer(CmdBufferVulkan* cmdBuffer, GPUBuffer*& buffer); - void ProcessPendingFree(); - void Dispose(); -}; - /// /// Implementation of Graphics Device for Vulkan backend. /// @@ -464,9 +426,9 @@ public: DeferredDeletionQueueVulkan DeferredDeletionQueue; /// - /// The staging buffers manager. + /// Data uploading utility via pages. /// - StagingManagerVulkan StagingManager; + UploadBufferVulkan UploadBuffer; /// /// The helper device resources manager. diff --git a/Source/Engine/GraphicsDevice/Vulkan/UploadBufferVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/UploadBufferVulkan.cpp new file mode 100644 index 000000000..15b58a8b4 --- /dev/null +++ b/Source/Engine/GraphicsDevice/Vulkan/UploadBufferVulkan.cpp @@ -0,0 +1,195 @@ +// Copyright (c) Wojciech Figat. All rights reserved. + +#if GRAPHICS_API_VULKAN + +#include "UploadBufferVulkan.h" +#include "GPUDeviceVulkan.h" +#include "RenderToolsVulkan.h" +#include "Engine/Graphics/GPUResource.h" +#include "Engine/Profiler/ProfilerMemory.h" + +/// +/// Single page for the upload buffer +/// +class UploadBufferPageVulkan : public GPUResourceBase, public ResourceOwnerVulkan +{ +public: + UploadBufferPageVulkan(GPUDeviceVulkan* device, uint64 size); + +public: + /// + /// Last generation that has been using that page + /// + uint64 LastGen; + + /// + /// Page size in bytes + /// + uint64 Size; + + /// + /// CPU memory address of the page + /// + void* Mapped; + + /// + /// Buffer that stored the page data + /// + VkBuffer Buffer; + + /// + /// Buffer memory allocation + /// + VmaAllocation Allocation; + +public: + // [GPUResourceVulkan] + GPUResourceType GetResourceType() const final override + { + return GPUResourceType::Buffer; + } + + // [ResourceOwnerVulkan] + GPUResource* AsGPUResource() const override + { + return (GPUResource*)this; + } + +protected: + // [GPUResourceVulkan] + void OnReleaseGPU() final override; +}; + +UploadBufferVulkan::UploadBufferVulkan(GPUDeviceVulkan* device) + : _device(device) + , _currentPage(nullptr) + , _currentOffset(0) + , _currentGeneration(0) +{ +} + +UploadBufferVulkan::Allocation UploadBufferVulkan::Allocate(uint64 size, uint64 align) +{ + const uint64 alignmentMask = align - 1; + ASSERT_LOW_LAYER((alignmentMask & align) == 0); + const uint64 pageSize = Math::Max(size, VULKAN_DEFAULT_UPLOAD_PAGE_SIZE); + const uint64 alignedSize = Math::AlignUpWithMask(size, alignmentMask); + + // Align the allocation + _currentOffset = Math::AlignUpWithMask(_currentOffset, alignmentMask); + + // Check if there is enough space for that chunk of the data in the current page + if (_currentPage && _currentOffset + alignedSize > _currentPage->Size) + _currentPage = nullptr; + + // Check if need to get new page + if (_currentPage == nullptr) + { + // Try reusing existing page + for (int32 i = 0; i < _freePages.Count(); i++) + { + UploadBufferPageVulkan* page = _freePages.Get()[i]; + if (page->Size == pageSize) + { + _freePages.RemoveAt(i); + _currentPage = page; + break; + } + } + if (_currentPage == nullptr) + _currentPage = New(_device, pageSize); + _usedPages.Add(_currentPage); + ASSERT_LOW_LAYER(_currentPage->Buffer); + _currentOffset = 0; + } + + // Mark page as used in this generation + _currentPage->LastGen = _currentGeneration; + + // Create allocation result + const Allocation result{ (byte*)_currentPage->Mapped + _currentOffset, _currentOffset, size, _currentPage->Buffer, _currentGeneration }; + + // Move within a page + _currentOffset += size; + + return result; +} + +UploadBufferVulkan::Allocation UploadBufferVulkan::Upload(const void* data, uint64 size, uint64 align) +{ + auto allocation = Allocate(size, align); + Platform::MemoryCopy(allocation.Mapped, data, size); + return allocation; +} + +void UploadBufferVulkan::BeginGeneration(uint64 generation) +{ + // Restore ready pages to be reused + for (int32 i = 0; _usedPages.HasItems() && i < _usedPages.Count(); i++) + { + auto page = _usedPages[i]; + if (page->LastGen + VULKAN_UPLOAD_PAGE_GEN_TIMEOUT < generation) + { + _usedPages.RemoveAt(i); + i--; + _freePages.Add(page); + } + } + + // Remove old pages + for (int32 i = _freePages.Count() - 1; i >= 0 && _freePages.HasItems(); i--) + { + auto page = _freePages[i]; + if (page->LastGen + VULKAN_UPLOAD_PAGE_GEN_TIMEOUT + VULKAN_UPLOAD_PAGE_NOT_USED_FRAME_TIMEOUT < generation) + { + _freePages.RemoveAt(i); + i--; + page->ReleaseGPU(); + Delete(page); + } + } + + // Set new generation + _currentGeneration = generation; +} + +void UploadBufferVulkan::Dispose() +{ + _freePages.Add(_usedPages); + for (auto page : _freePages) + { + page->ReleaseGPU(); + Delete(page); + } +} + +UploadBufferPageVulkan::UploadBufferPageVulkan(GPUDeviceVulkan* device, uint64 size) + : GPUResourceBase(device, TEXT("Upload Buffer Page")) + , LastGen(0) + , Size(size) +{ + VkBufferCreateInfo bufferInfo; + RenderToolsVulkan::ZeroStruct(bufferInfo, VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO); + bufferInfo.size = size; + bufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + VmaAllocationCreateInfo allocCreateInfo = {}; + allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT; + VmaAllocationInfo allocInfo; + vmaCreateBuffer(_device->Allocator, &bufferInfo, &allocCreateInfo, &Buffer, &Allocation, &allocInfo); + Mapped = allocInfo.pMappedData; + ASSERT_LOW_LAYER(Mapped); + _memoryUsage = size; + PROFILE_MEM_INC(GraphicsCommands, _memoryUsage); +} + +void UploadBufferPageVulkan::OnReleaseGPU() +{ + PROFILE_MEM_DEC(GraphicsCommands, _memoryUsage); + vmaDestroyBuffer(_device->Allocator, Buffer, Allocation); + Buffer = VK_NULL_HANDLE; + Allocation = VK_NULL_HANDLE; + Mapped = nullptr; +} + +#endif diff --git a/Source/Engine/GraphicsDevice/Vulkan/UploadBufferVulkan.h b/Source/Engine/GraphicsDevice/Vulkan/UploadBufferVulkan.h new file mode 100644 index 000000000..bf13340d1 --- /dev/null +++ b/Source/Engine/GraphicsDevice/Vulkan/UploadBufferVulkan.h @@ -0,0 +1,79 @@ +// Copyright (c) Wojciech Figat. All rights reserved. + +#pragma once + +#include "Engine/Graphics/GPUDevice.h" +#include "ResourceOwnerVulkan.h" + +#if GRAPHICS_API_VULKAN + +class GPUDeviceVulkan; +class UploadBufferPageVulkan; + +// Upload buffer page size +#define VULKAN_DEFAULT_UPLOAD_PAGE_SIZE (4 * 1014 * 1024) // 4 MB + +// Upload buffer generations timeout to dispose +#define VULKAN_UPLOAD_PAGE_GEN_TIMEOUT 3 + +// Upload buffer pages that are not used for a few frames are disposed +#define VULKAN_UPLOAD_PAGE_NOT_USED_FRAME_TIMEOUT 60 + +/// +/// Uploading data to GPU buffer utility +/// +class UploadBufferVulkan +{ +public: + /// + /// Upload buffer allocation + /// + struct Allocation + { + /// + /// CPU memory address of the allocation start. + /// + void* Mapped; + + /// + /// Allocation offset in bytes (from the start of the heap buffer). + /// + uint64 Offset; + + /// + /// Allocation size in bytes + /// + uint64 Size; + + /// + /// Upload buffer page resource that owns that allocation + /// + VkBuffer Buffer; + + /// + /// Generation number of that allocation (generally allocation is invalid after one or two generations) + /// + uint64 Generation; + }; + +private: + GPUDeviceVulkan* _device; + UploadBufferPageVulkan* _currentPage; + uint64 _currentOffset; + uint64 _currentGeneration; + Array> _freePages; + Array> _usedPages; + +public: + UploadBufferVulkan(GPUDeviceVulkan* device); + +public: + Allocation Allocate(uint64 size, uint64 align); + Allocation Upload(const void* data, uint64 size, uint64 align); + +public: + void BeginGeneration(uint64 generation); + void Dispose(); +}; + +#endif From 3b19e1b40c1fcb7fa152d45efea378649c89fd08 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 4 Sep 2025 21:47:51 +0200 Subject: [PATCH 204/211] Fix collision data memory preview text --- .../Windows/Assets/CollisionDataWindow.cs | 26 +++++++------------ 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/Source/Editor/Windows/Assets/CollisionDataWindow.cs b/Source/Editor/Windows/Assets/CollisionDataWindow.cs index fc8a5659d..e781447df 100644 --- a/Source/Editor/Windows/Assets/CollisionDataWindow.cs +++ b/Source/Editor/Windows/Assets/CollisionDataWindow.cs @@ -181,15 +181,8 @@ namespace FlaxEditor.Windows.Assets private class CollisionDataPreview : ModelBasePreview { - public bool ShowCollisionData = false; - private int _verticesCount = 0; - private int _trianglesCount = 0; - - public void SetVerticesAndTriangleCount(int verticesCount, int triangleCount) - { - _verticesCount = verticesCount; - _trianglesCount = triangleCount; - } + public bool ShowInfo; + public string Info; /// public CollisionDataPreview(bool useWidgets) @@ -204,13 +197,12 @@ namespace FlaxEditor.Windows.Assets { base.Draw(); - if (ShowCollisionData) + if (ShowInfo) { - var text = string.Format("\nTriangles: {0:N0}\nVertices: {1:N0}\nMemory Size: {2}", _trianglesCount, _verticesCount, Utilities.Utils.FormatBytesCount(Asset.MemoryUsage)); var font = Style.Current.FontMedium; var pos = new Float2(10, 50); - Render2D.DrawText(font, text, new Rectangle(pos + Float2.One, Size), Color.Black); - Render2D.DrawText(font, text, new Rectangle(pos, Size), Color.White); + Render2D.DrawText(font, Info, new Rectangle(pos + Float2.One, Size), Color.Black); + Render2D.DrawText(font, Info, new Rectangle(pos, Size), Color.White); } } } @@ -222,11 +214,11 @@ namespace FlaxEditor.Windows.Assets // Toolstrip _toolstrip.AddSeparator(); _toolstrip.AddButton(editor.Icons.CenterView64, () => _preview.ResetCamera()).LinkTooltip("Show whole collision"); - var infoButton = (ToolStripButton)_toolstrip.AddButton(editor.Icons.Info64).LinkTooltip("Show Collision Data"); + var infoButton = (ToolStripButton)_toolstrip.AddButton(editor.Icons.Info64).LinkTooltip("Show Collision Data info"); infoButton.Clicked += () => { - _preview.ShowCollisionData = !_preview.ShowCollisionData; - infoButton.Checked = _preview.ShowCollisionData; + _preview.ShowInfo = !_preview.ShowInfo; + infoButton.Checked = _preview.ShowInfo; }; _toolstrip.AddButton(editor.Icons.Docs64, () => Platform.OpenUrl(Utilities.Constants.DocsUrl + "manual/physics/colliders/collision-data.html")).LinkTooltip("See documentation to learn more"); @@ -293,7 +285,7 @@ namespace FlaxEditor.Windows.Assets } _collisionWiresShowActor.Model = _collisionWiresModel; _collisionWiresShowActor.SetMaterial(0, FlaxEngine.Content.LoadAsyncInternal(EditorAssets.WiresDebugMaterial)); - _preview.SetVerticesAndTriangleCount(triangleCount, indicesCount / 3); + _preview.Info = string.Format("\nTriangles: {0:N0}\nVertices: {1:N0}\nMemory Size: {2}", triangleCount, indicesCount / 3, Utilities.Utils.FormatBytesCount(Asset.MemoryUsage)); _preview.Asset = FlaxEngine.Content.LoadAsync(_asset.Options.Model); } From c5d06b2c8bdbe9d74661e343f6f0c5d7519f860d Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 4 Sep 2025 22:05:31 +0200 Subject: [PATCH 205/211] Fix crash regression from #3588 when editing active particle emitters --- Source/Engine/Particles/ParticleEffect.cpp | 27 ++++++++++++++++++---- Source/Engine/Particles/ParticleEffect.h | 8 +++++-- 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/Source/Engine/Particles/ParticleEffect.cpp b/Source/Engine/Particles/ParticleEffect.cpp index f30f6a495..6e94594b0 100644 --- a/Source/Engine/Particles/ParticleEffect.cpp +++ b/Source/Engine/Particles/ParticleEffect.cpp @@ -543,6 +543,9 @@ void ParticleEffect::ApplyModifiedParameters() void ParticleEffect::OnAssetChanged(Asset* asset, void* caller) { +#if USE_EDITOR + OnParticleEmittersClear(); +#endif Instance.ClearState(); _parameters.Resize(0); _parametersVersion = 0; @@ -553,19 +556,30 @@ void ParticleEffect::OnAssetLoaded(Asset* asset, void* caller) ApplyModifiedParameters(); #if USE_EDITOR // When one of the emitters gets edited, cached parameters need to be applied - auto& emitters = ParticleSystem.Get()->Emitters; - for (auto& emitter : emitters) - { - emitter.Loaded.BindUnique(this); - } + OnParticleEmittersClear(); + _cachedEmitters = ParticleSystem.Get()->Emitters; + for (auto& emitter : _cachedEmitters) + emitter.Loaded.Bind(this); + #endif } +#if USE_EDITOR + +void ParticleEffect::OnParticleEmittersClear() +{ + for (auto& emitter : _cachedEmitters) + emitter.Loaded.Unbind(this); + _cachedEmitters.Clear(); +} + void ParticleEffect::OnParticleEmitterLoaded() { ApplyModifiedParameters(); } +#endif + void ParticleEffect::OnAssetUnloaded(Asset* asset, void* caller) { } @@ -807,6 +821,9 @@ void ParticleEffect::Deserialize(DeserializeStream& stream, ISerializeModifier* void ParticleEffect::EndPlay() { +#if USE_EDITOR + OnParticleEmittersClear(); +#endif CacheModifiedParameters(); Particles::OnEffectDestroy(this); Instance.ClearState(); diff --git a/Source/Engine/Particles/ParticleEffect.h b/Source/Engine/Particles/ParticleEffect.h index c54fa5639..4de88f1d5 100644 --- a/Source/Engine/Particles/ParticleEffect.h +++ b/Source/Engine/Particles/ParticleEffect.h @@ -170,6 +170,9 @@ private: Array _parametersOverrides; // Cached parameter modifications to be applied to the parameters bool _isPlaying = false; bool _isStopped = false; +#if USE_EDITOR + Array> _cachedEmitters; +#endif public: /// @@ -392,9 +395,10 @@ private: #endif void CacheModifiedParameters(); void ApplyModifiedParameters(); - void OnParticleSystemModified(); - void OnParticleSystemLoaded(); +#if USE_EDITOR + void OnParticleEmittersClear(); void OnParticleEmitterLoaded(); +#endif // [IAssetReference] void OnAssetChanged(Asset* asset, void* caller) override; From 67220d3f80c1a6db9c32bef45f2276751de82d75 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 4 Sep 2025 22:28:53 +0200 Subject: [PATCH 206/211] Add support for large 16kb page sizes on Android --- Source/Tools/Flax.Build/Platforms/Android/AndroidToolchain.cs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Source/Tools/Flax.Build/Platforms/Android/AndroidToolchain.cs b/Source/Tools/Flax.Build/Platforms/Android/AndroidToolchain.cs index 7180ced43..d88cdbd51 100644 --- a/Source/Tools/Flax.Build/Platforms/Android/AndroidToolchain.cs +++ b/Source/Tools/Flax.Build/Platforms/Android/AndroidToolchain.cs @@ -137,6 +137,9 @@ namespace Flax.Build.Platforms args.Add("-Wl,--build-id=sha1"); args.Add("-Wl,-gc-sections"); + // Support 16kb pages + args.Add("-Wl,-z,max-page-size=16384"); + if (options.LinkEnv.Output == LinkerOutput.Executable) { // Prevent linker from stripping the entry point for the Android Native Activity From 1f6d83711719fcf8c8d99d93f7581d1737801815 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 5 Sep 2025 08:40:13 +0200 Subject: [PATCH 207/211] Fix regression from 6d05bf16b15c5021a3b1177561b90f846c58849d --- Source/Engine/Content/Asset.cpp | 2 +- Source/Engine/Content/JsonAsset.cpp | 2 +- Source/Engine/Graphics/Models/MeshBase.cpp | 2 +- Source/Engine/Graphics/Shaders/Cache/ShaderAssetBase.cpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Source/Engine/Content/Asset.cpp b/Source/Engine/Content/Asset.cpp index 1ba0039bb..cb711013b 100644 --- a/Source/Engine/Content/Asset.cpp +++ b/Source/Engine/Content/Asset.cpp @@ -279,7 +279,7 @@ void Asset::OnDeleteObject() const bool wasMarkedToDelete = _deleteFileOnUnload != 0; #if USE_EDITOR - const String path = wasMarkedToDelete ? GetPath() : StringView::Empty; + const String path = wasMarkedToDelete ? String(GetPath()) : String::Empty; #endif const Guid id = GetID(); diff --git a/Source/Engine/Content/JsonAsset.cpp b/Source/Engine/Content/JsonAsset.cpp index 4b5ee7b63..1474933b6 100644 --- a/Source/Engine/Content/JsonAsset.cpp +++ b/Source/Engine/Content/JsonAsset.cpp @@ -180,7 +180,7 @@ bool JsonAssetBase::Save(const StringView& path) _isResaving = false; // Save json to file - if (File::WriteAllBytes(path.HasChars() ? path : StringView(GetPath()), (byte*)buffer.GetString(), (int32)buffer.GetSize())) + if (File::WriteAllBytes(path.HasChars() ? path : GetPath(), (byte*)buffer.GetString(), (int32)buffer.GetSize())) { LOG(Error, "Cannot save \'{0}\'", ToString()); return true; diff --git a/Source/Engine/Graphics/Models/MeshBase.cpp b/Source/Engine/Graphics/Models/MeshBase.cpp index c62820748..3434cd91a 100644 --- a/Source/Engine/Graphics/Models/MeshBase.cpp +++ b/Source/Engine/Graphics/Models/MeshBase.cpp @@ -444,7 +444,7 @@ bool MeshBase::Init(uint32 vertices, uint32 triangles, const ArrayGetPath(); + const String modelPath = _model->GetPath(); #define MESH_BUFFER_NAME(postfix) modelPath + TEXT(postfix) #else #define MESH_BUFFER_NAME(postfix) String::Empty diff --git a/Source/Engine/Graphics/Shaders/Cache/ShaderAssetBase.cpp b/Source/Engine/Graphics/Shaders/Cache/ShaderAssetBase.cpp index 813ec9a0c..eae1f9882 100644 --- a/Source/Engine/Graphics/Shaders/Cache/ShaderAssetBase.cpp +++ b/Source/Engine/Graphics/Shaders/Cache/ShaderAssetBase.cpp @@ -217,7 +217,7 @@ bool ShaderAssetBase::LoadShaderCache(ShaderCacheResult& result) && parent->HasChunk(SHADER_FILE_CHUNK_SOURCE)) { result.Data.Release(); - const String parentPath = parent->GetPath(); + const StringView parentPath = parent->GetPath(); const Guid parentID = parent->GetID(); LOG(Info, "Compiling shader '{0}':{1}...", parentPath, parentID); From 857b0c5ac3d4587228e2b67b7bc1e430cb1a109c Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 6 Sep 2025 23:32:54 +0200 Subject: [PATCH 208/211] Fix various graphics issues detected by Vulkan Validation Layers on Android --- Source/Engine/Graphics/Materials/MaterialShader.h | 2 +- Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h | 2 +- Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.Layers.cpp | 3 +++ Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp | 1 + .../Tools/MaterialGenerator/MaterialGenerator.Material.cpp | 2 +- 5 files changed, 7 insertions(+), 3 deletions(-) diff --git a/Source/Engine/Graphics/Materials/MaterialShader.h b/Source/Engine/Graphics/Materials/MaterialShader.h index aedf2e870..117246671 100644 --- a/Source/Engine/Graphics/Materials/MaterialShader.h +++ b/Source/Engine/Graphics/Materials/MaterialShader.h @@ -10,7 +10,7 @@ /// /// Current materials shader version. /// -#define MATERIAL_GRAPH_VERSION 174 +#define MATERIAL_GRAPH_VERSION 175 class Material; class GPUShader; diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h index bba79c498..8ed541089 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h @@ -58,7 +58,7 @@ struct PipelineBarrierVulkan FORCE_INLINE bool HasBarrier() const { - return ImageBarriers.Count() + BufferBarriers.Count() + MemoryBarriers.Count() != 0; + return ImageBarriers.Count() + BufferBarriers.Count() + MemoryBarriers.Count() != 0 || SourceStage + DestStage != 0; } void Execute(const CmdBufferVulkan* cmdBuffer); diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.Layers.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.Layers.cpp index ad99d8850..2ce634207 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.Layers.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.Layers.cpp @@ -44,6 +44,9 @@ static const char* GInstanceExtensions[] = #endif #if defined(VK_KHR_display) && 0 VK_KHR_DISPLAY_EXTENSION_NAME, +#endif +#if GPU_ENABLE_TRACY && VK_EXT_calibrated_timestamps && VK_EXT_host_query_reset + VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME, // Required by VK_EXT_host_query_reset (unless using Vulkan 1.1 or newer) #endif nullptr }; diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp index dfed9bbb8..7c54be973 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp @@ -143,6 +143,7 @@ static VKAPI_ATTR VkBool32 VKAPI_PTR DebugUtilsCallback(VkDebugUtilsMessageSever case 3: // Attachment 2 not written by fragment shader case 5: // SPIR-V module not valid: MemoryBarrier: Vulkan specification requires Memory Semantics to have one of the following bits set: Acquire, Release, AcquireRelease or SequentiallyConsistent case -1666394502: // After query pool creation, each query must be reset before it is used. Queries must also be reset between uses. + case 1203141749: case 602160055: // Attachment 4 not written by fragment shader; undefined values will be written to attachment. TODO: investigate it for PS_GBuffer shader from Deferred material with USE_LIGHTMAP=1 case 7060244: // Image Operand Offset can only be used with OpImage*Gather operations case -1539028524: // SortedIndices is null so Vulkan backend sets it to default R32_SFLOAT format which is not good for UINT format of the buffer diff --git a/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Material.cpp b/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Material.cpp index 9105fcd2e..cdf5d4b35 100644 --- a/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Material.cpp +++ b/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Material.cpp @@ -263,7 +263,7 @@ void MaterialGenerator::ProcessGroupMaterial(Box* box, Node* node, Value& value) // Sample scene depth buffer auto sceneDepthTexture = findOrAddSceneTexture(MaterialSceneTextures::SceneDepth); - auto depthSample = writeLocal(VariantType::Float, String::Format(TEXT("{0}.SampleLevel(SamplerLinearClamp, {1}, 0).x"), sceneDepthTexture.ShaderName, screenUVs.Value), node); + auto depthSample = writeLocal(VariantType::Float, String::Format(TEXT("{0}.SampleLevel(SamplerPointClamp, {1}, 0).x"), sceneDepthTexture.ShaderName, screenUVs.Value), node); // Linearize raw device depth Value sceneDepth; From 44e70692a28ac49a713c3bbe8d7e0b5780526a53 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 6 Sep 2025 23:36:54 +0200 Subject: [PATCH 209/211] Fix render memory alignment issues to prevent crashes on Android --- Source/Engine/Core/Memory/Allocation.cpp | 11 ++++++++++- Source/Engine/Core/Memory/ArenaAllocation.h | 12 ++++++------ Source/Engine/Core/Memory/CrtAllocator.h | 3 +-- Source/Engine/Platform/Defines.h | 3 +++ Source/Engine/Renderer/RenderList.cpp | 16 ++++++++-------- Source/Engine/Renderer/RenderList.h | 8 ++++---- 6 files changed, 32 insertions(+), 21 deletions(-) diff --git a/Source/Engine/Core/Memory/Allocation.cpp b/Source/Engine/Core/Memory/Allocation.cpp index 239228b67..7fb8e8c7a 100644 --- a/Source/Engine/Core/Memory/Allocation.cpp +++ b/Source/Engine/Core/Memory/Allocation.cpp @@ -24,6 +24,11 @@ void ArenaAllocator::Free() void* ArenaAllocator::Allocate(uint64 size, uint64 alignment) { + if (size == 0) + return nullptr; + if (alignment < PLATFORM_MEMORY_ALIGNMENT) + alignment = PLATFORM_MEMORY_ALIGNMENT; + // Find the first page that has some space left Page* page = _first; while (page && page->Offset + size + alignment > page->Size) @@ -79,6 +84,10 @@ void ConcurrentArenaAllocator::Free() void* ConcurrentArenaAllocator::Allocate(uint64 size, uint64 alignment) { + if (size == 0) + return nullptr; + if (alignment < PLATFORM_MEMORY_ALIGNMENT) + alignment = PLATFORM_MEMORY_ALIGNMENT; RETRY: // Check if the current page has some space left @@ -120,6 +129,6 @@ RETRY: _locker.Unlock(); - // Use a single cde for allocation + // Use a single code for allocation goto RETRY; } diff --git a/Source/Engine/Core/Memory/ArenaAllocation.h b/Source/Engine/Core/Memory/ArenaAllocation.h index c519ffab3..4b3084085 100644 --- a/Source/Engine/Core/Memory/ArenaAllocation.h +++ b/Source/Engine/Core/Memory/ArenaAllocation.h @@ -79,7 +79,6 @@ private: int64 Size; }; - int32 _pageSize; volatile int64 _first = 0; volatile int64 _totalBytes = 0; void*(*_allocate1)(uint64 size, uint64 alignment) = nullptr; @@ -87,19 +86,20 @@ private: void*(*_allocate2)(uint64 size) = nullptr; void(*_free2)(void* ptr, uint64 size) = nullptr; CriticalSection _locker; + int32 _pageSize; public: ConcurrentArenaAllocator(int32 pageSizeBytes, void* (*customAllocate)(uint64 size, uint64 alignment), void(*customFree)(void* ptr)) - : _pageSize(pageSizeBytes) - , _allocate1(customAllocate) + : _allocate1(customAllocate) , _free1(customFree) + , _pageSize(pageSizeBytes) { } ConcurrentArenaAllocator(int32 pageSizeBytes, void* (*customAllocate)(uint64 size), void(*customFree)(void* ptr, uint64 size)) - : _pageSize(pageSizeBytes) - , _allocate2(customAllocate) + : _allocate2(customAllocate) , _free2(customFree) + , _pageSize(pageSizeBytes) { } @@ -120,7 +120,7 @@ public: } // Allocates a chunk of unitialized memory. - void* Allocate(uint64 size, uint64 alignment = 1); + void* Allocate(uint64 size, uint64 alignment = PLATFORM_MEMORY_ALIGNMENT); // Frees all memory allocations within allocator. void Free(); diff --git a/Source/Engine/Core/Memory/CrtAllocator.h b/Source/Engine/Core/Memory/CrtAllocator.h index b8a71a970..d990ce57d 100644 --- a/Source/Engine/Core/Memory/CrtAllocator.h +++ b/Source/Engine/Core/Memory/CrtAllocator.h @@ -11,14 +11,13 @@ class CrtAllocator { public: - /// /// Allocates memory on a specified alignment boundary. /// /// The size of the allocation (in bytes). /// The memory alignment (in bytes). Must be an integer power of 2. /// The pointer to the allocated chunk of the memory. The pointer is a multiple of alignment. - FORCE_INLINE static void* Allocate(uint64 size, uint64 alignment = 16) + FORCE_INLINE static void* Allocate(uint64 size, uint64 alignment = PLATFORM_MEMORY_ALIGNMENT) { return Platform::Allocate(size, alignment); } diff --git a/Source/Engine/Platform/Defines.h b/Source/Engine/Platform/Defines.h index de56fdcb6..9958545fd 100644 --- a/Source/Engine/Platform/Defines.h +++ b/Source/Engine/Platform/Defines.h @@ -201,6 +201,9 @@ API_ENUM() enum class ArchitectureType #ifndef PLATFORM_THREADS_LIMIT #define PLATFORM_THREADS_LIMIT 64 #endif +#ifndef PLATFORM_MEMORY_ALIGNMENT +#define PLATFORM_MEMORY_ALIGNMENT 16 +#endif #define PLATFORM_32BITS (!PLATFORM_64BITS) // Platform family defines diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index 2c62ebdd6..50bf00f57 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -463,20 +463,20 @@ bool DrawCallsList::IsEmpty() const RenderListAlloc::~RenderListAlloc() { - if (!List && Data) // Render List memory doesn't need free (arena allocator) + if (NeedFree && Data) // Render List memory doesn't need free (arena allocator) RendererAllocation::Free(Data, Size); } -void* RenderListAlloc::Init(RenderList* list, uintptr size, uintptr alignment) +void* RenderListAlloc::Init(RenderList* list, uint32 size, uint32 alignment) { ASSERT_LOW_LAYER(!Data); Size = size; - bool useList = alignment <= 16 && size < 1024; - List = useList ? list : nullptr; - if (useList) - Data = list->Memory.Allocate(size, alignment); - else - Data = RendererAllocation::Allocate(size); + if (size == 0) + return nullptr; + if (size < 1024 || (alignment != 16 && alignment != 8 && alignment != 4 && alignment != 1)) + return (Data = list->Memory.Allocate(size, alignment)); + NeedFree = true; + Data = RendererAllocation::Allocate(size); return Data; } diff --git a/Source/Engine/Renderer/RenderList.h b/Source/Engine/Renderer/RenderList.h index ce73f1dcd..24262162d 100644 --- a/Source/Engine/Renderer/RenderList.h +++ b/Source/Engine/Renderer/RenderList.h @@ -281,16 +281,16 @@ struct DrawCallsList // Small utility for allocating memory from RenderList arena pool with automatic fallback to shared RendererAllocation for larger memory blocks. struct RenderListAlloc { - RenderList* List; void* Data = nullptr; - uintptr Size; + uint32 Size = 0; + bool NeedFree = false; ~RenderListAlloc(); - void* Init(RenderList* list, uintptr size, uintptr alignment = 1); + void* Init(RenderList* list, uint32 size, uint32 alignment = 1); template - FORCE_INLINE T* Init(RenderList* list, int32 count, uintptr alignment = 1) + FORCE_INLINE T* Init(RenderList* list, int32 count, uint32 alignment = 1) { return (T*)Init(list, count * sizeof(T), alignment); } From 576b0710e0f3356784e64d709df09b4185122fc4 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 6 Sep 2025 23:37:24 +0200 Subject: [PATCH 210/211] Fix GPU particles issues with missing sorting keys data when shader is not yet loaded --- .../Materials/ParticleMaterialShader.cpp | 3 +- Source/Engine/Particles/Particles.cpp | 29 +++++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Graphics/Materials/ParticleMaterialShader.cpp b/Source/Engine/Graphics/Materials/ParticleMaterialShader.cpp index 6b2e15505..6c780cc55 100644 --- a/Source/Engine/Graphics/Materials/ParticleMaterialShader.cpp +++ b/Source/Engine/Graphics/Materials/ParticleMaterialShader.cpp @@ -49,7 +49,6 @@ void ParticleMaterialShader::Bind(BindParameters& params) auto context = params.GPUContext; auto& view = params.RenderContext.View; auto& drawCall = *params.DrawCall; - const uint32 sortedIndicesOffset = drawCall.Particle.Module->SortedIndicesOffset; Span cb(_cbData.Get(), _cbData.Count()); ASSERT_LOW_LAYER(cb.Length() >= sizeof(ParticleMaterialShaderData)); auto materialData = reinterpret_cast(cb.Get()); @@ -103,7 +102,7 @@ void ParticleMaterialShader::Bind(BindParameters& params) static StringView ParticleModelFacingModeOffset(TEXT("ModelFacingMode")); materialData->WorldMatrix.SetMatrixTranspose(drawCall.World); - materialData->SortedIndicesOffset = drawCall.Particle.Particles->GPU.SortedIndices && params.RenderContext.View.Pass != DrawPass::Depth ? sortedIndicesOffset : 0xFFFFFFFF; + materialData->SortedIndicesOffset = drawCall.Particle.Particles->GPU.SortedIndices && params.RenderContext.View.Pass != DrawPass::Depth ? drawCall.Particle.Module->SortedIndicesOffset : 0xFFFFFFFF; materialData->PerInstanceRandom = drawCall.PerInstanceRandom; materialData->ParticleStride = drawCall.Particle.Particles->Stride; materialData->PositionOffset = drawCall.Particle.Particles->Layout->FindAttributeOffset(ParticlePosition, ParticleAttribute::ValueTypes::Float3); diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index ac02e46d3..bfbf35f29 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -970,6 +970,35 @@ void DrawEmittersGPU(RenderContextBatch& renderContextBatch) } } } + else + { + // Initialize with identity sort indices in case the buffer has been allocated + for (const GPUEmitterDraw& draw : GPUEmitterDraws) + { + if (!draw.Sorting || !draw.Buffer->GPU.SortedIndices) + continue; + const int32 capacity = draw.Buffer->Capacity; + const int32 capacityBytes = capacity * draw.Buffer->GPU.SortedIndices->GetStride(); + const int32 indicesBytes = draw.Buffer->GPU.SortedIndices->GetSize(); + RenderListAlloc sortedIndicesAlloc; + auto* renderList = renderContextBatch.GetMainContext().List; + void* indices = sortedIndicesAlloc.Init(renderList, indicesBytes, GPU_SHADER_DATA_ALIGNMENT); + switch (draw.Buffer->GPU.SortedIndices->GetFormat()) + { + case PixelFormat::R16_UInt: + for (int32 i = 0; i < capacity; i++) + ((uint16*)indices)[i] = (uint16)i; + break; + case PixelFormat::R32_UInt: + for (int32 i = 0; i < capacity; i++) + ((uint32*)indices)[i] = i; + break; + } + for (int32 i = 1; i < draw.Buffer->Emitter->Graph.SortModules.Count(); i++) + Platform::MemoryCopy((byte*)indices + i * capacityBytes, indices, capacityBytes); + context->UpdateBuffer(draw.Buffer->GPU.SortedIndices, indices, indicesBytes, 0); + } + } // TODO: transition here SortedIndices into ShaderReadNonPixel and Buffer into ShaderReadGraphics to reduce barriers during particles rendering From 42b542d19040ebb531bc56b4e828c4bfcf517fe9 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 7 Sep 2025 00:17:11 +0200 Subject: [PATCH 211/211] Improve recent changes to memory barriers on Vulkan --- Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp index 45dccf467..2db4e202a 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp @@ -345,7 +345,7 @@ void GPUContextVulkan::AddMemoryBarrier() memoryBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; memoryBarrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT; _barriers.SourceStage |= VK_PIPELINE_STAGE_TRANSFER_BIT; - _barriers.DestStage |= VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + _barriers.DestStage |= VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; #if !VK_ENABLE_BARRIERS_BATCHING // Auto-flush without batching @@ -1375,6 +1375,8 @@ void GPUContextVulkan::UpdateBuffer(GPUBuffer* buffer, const void* data, uint32 const auto bufferVulkan = static_cast(buffer); // Transition resource + if (_pass == 0) + AddMemoryBarrier(); AddBufferBarrier(bufferVulkan, VK_ACCESS_TRANSFER_WRITE_BIT); FlushBarriers();