diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/amd_ags_x64.dll b/Source/Platforms/Windows/Binaries/ThirdParty/x64/amd_ags_x64.dll new file mode 100644 index 000000000..acbf0f736 --- /dev/null +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/amd_ags_x64.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b27b070ca39dc37984fb3dde0187515d36094e72cb881d7a99bd1055befd8da2 +size 179408 diff --git a/Source/Platforms/Windows/Binaries/ThirdParty/x64/amd_ags_x64.lib b/Source/Platforms/Windows/Binaries/ThirdParty/x64/amd_ags_x64.lib new file mode 100644 index 000000000..166cc964a --- /dev/null +++ b/Source/Platforms/Windows/Binaries/ThirdParty/x64/amd_ags_x64.lib @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b10497284289dcef89117fbcc93cff4ad53f9d9fdd80a31e3427ab62636001e +size 13506 diff --git a/Source/ThirdParty/AGS/AGS.Build.cs b/Source/ThirdParty/AGS/AGS.Build.cs new file mode 100644 index 000000000..5ed6a2800 --- /dev/null +++ b/Source/ThirdParty/AGS/AGS.Build.cs @@ -0,0 +1,51 @@ +// Copyright (c) Wojciech Figat. All rights reserved. + +using System.IO; +using Flax.Build; +using Flax.Build.NativeCpp; + +/// +/// https://github.com/GPUOpen-LibrariesAndSDKs/AGS_SDK +/// +public class AGS : DepsModule +{ + public static bool Use(BuildOptions options) + { + return options.Platform.Target == TargetPlatform.Windows && options.Architecture == TargetArchitecture.x64; + } + + /// + public override void Init() + { + base.Init(); + + LicenseType = LicenseTypes.MIT; + LicenseFilePath = "LICENSE.txt"; + + // Merge third-party modules into engine binary + BinaryModuleName = "FlaxEngine"; + } + + /// + public override void Setup(BuildOptions options) + { + base.Setup(options); + + var depsRoot = options.DepsFolder; + options.PublicDefinitions.Add("COMPILE_WITH_AGS"); + switch (options.Platform.Target) + { + case TargetPlatform.Windows: + switch (options.Architecture) + { + case TargetArchitecture.x64: + options.OutputFiles.Add(Path.Combine(depsRoot, "amd_ags_x64.lib")); + options.OptionalDependencyFiles.Add(Path.Combine(depsRoot, "amd_ags_x64.dll")); + break; + default: throw new InvalidArchitectureException(options.Architecture); + } + break; + default: throw new InvalidPlatformException(options.Platform.Target); + } + } +} diff --git a/Source/ThirdParty/AGS/LICENSE.txt b/Source/ThirdParty/AGS/LICENSE.txt new file mode 100644 index 000000000..d4e81a71c --- /dev/null +++ b/Source/ThirdParty/AGS/LICENSE.txt @@ -0,0 +1,19 @@ +Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/Source/ThirdParty/AGS/ags_shader_intrinsics_dx11.hlsl b/Source/ThirdParty/AGS/ags_shader_intrinsics_dx11.hlsl new file mode 100644 index 000000000..e3c7ad358 --- /dev/null +++ b/Source/ThirdParty/AGS/ags_shader_intrinsics_dx11.hlsl @@ -0,0 +1,3599 @@ +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +/** +************************************************************************************************************* +* @file ags_shader_intrinsics_dx11.hlsl +* +* @brief +* AMD D3D Shader Intrinsics API hlsl file. +* This include file contains the shader intrinsics definitions (structures, enums, constant) +* and HLSL shader intrinsics functions. +* +* @version 2.3 +* +************************************************************************************************************* +*/ + +#ifndef _AMDDXEXTSHADERINTRINSICS_HLSL_ +#define _AMDDXEXTSHADERINTRINSICS_HLSL_ + +/** +************************************************************************************************************* +* Definitions to construct the intrinsic instruction composed of an opcode and optional immediate data. +************************************************************************************************************* +*/ +#define AmdDxExtShaderIntrinsics_MagicCodeShift 28 +#define AmdDxExtShaderIntrinsics_MagicCodeMask 0xf +#define AmdDxExtShaderIntrinsics_OpcodePhaseShift 24 +#define AmdDxExtShaderIntrinsics_OpcodePhaseMask 0x3 +#define AmdDxExtShaderIntrinsics_DataShift 8 +#define AmdDxExtShaderIntrinsics_DataMask 0xffff +#define AmdDxExtShaderIntrinsics_OpcodeShift 0 +#define AmdDxExtShaderIntrinsics_OpcodeMask 0xff + +#define AmdDxExtShaderIntrinsics_MagicCode 0x5 + + +/** +************************************************************************************************************* +* Intrinsic opcodes. +************************************************************************************************************* +*/ +#define AmdDxExtShaderIntrinsicsOpcode_Readfirstlane 0x01 +#define AmdDxExtShaderIntrinsicsOpcode_Readlane 0x02 +#define AmdDxExtShaderIntrinsicsOpcode_LaneId 0x03 +#define AmdDxExtShaderIntrinsicsOpcode_Swizzle 0x04 +#define AmdDxExtShaderIntrinsicsOpcode_Ballot 0x05 +#define AmdDxExtShaderIntrinsicsOpcode_MBCnt 0x06 +#define AmdDxExtShaderIntrinsicsOpcode_Min3U 0x08 +#define AmdDxExtShaderIntrinsicsOpcode_Min3F 0x09 +#define AmdDxExtShaderIntrinsicsOpcode_Med3U 0x0a +#define AmdDxExtShaderIntrinsicsOpcode_Med3F 0x0b +#define AmdDxExtShaderIntrinsicsOpcode_Max3U 0x0c +#define AmdDxExtShaderIntrinsicsOpcode_Max3F 0x0d +#define AmdDxExtShaderIntrinsicsOpcode_BaryCoord 0x0e +#define AmdDxExtShaderIntrinsicsOpcode_VtxParam 0x0f +#define AmdDxExtShaderIntrinsicsOpcode_ViewportIndex 0x10 +#define AmdDxExtShaderIntrinsicsOpcode_RtArraySlice 0x11 +#define AmdDxExtShaderIntrinsicsOpcode_WaveReduce 0x12 +#define AmdDxExtShaderIntrinsicsOpcode_WaveScan 0x13 +#define AmdDxExtShaderIntrinsicsOpcode_Reserved1 0x14 +#define AmdDxExtShaderIntrinsicsOpcode_Reserved2 0x15 +#define AmdDxExtShaderIntrinsicsOpcode_Reserved3 0x16 +#define AmdDxExtShaderIntrinsicsOpcode_DrawIndex 0x17 +#define AmdDxExtShaderIntrinsicsOpcode_AtomicU64 0x18 +#define AmdDxExtShaderIntrinsicsOpcode_GetWaveSize 0x19 +#define AmdDxExtShaderIntrinsicsOpcode_BaseInstance 0x1a +#define AmdDxExtShaderIntrinsicsOpcode_BaseVertex 0x1b + + +/** +************************************************************************************************************* +* Intrinsic opcode phases. +************************************************************************************************************* +*/ +#define AmdDxExtShaderIntrinsicsOpcodePhase_0 0x0 +#define AmdDxExtShaderIntrinsicsOpcodePhase_1 0x1 +#define AmdDxExtShaderIntrinsicsOpcodePhase_2 0x2 +#define AmdDxExtShaderIntrinsicsOpcodePhase_3 0x3 + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsicsSwizzle defines for common swizzles. Can be used as the operation parameter for +* the AmdDxExtShaderIntrinsics_Swizzle intrinsic. +************************************************************************************************************* +*/ +#define AmdDxExtShaderIntrinsicsSwizzle_SwapX1 0x041f +#define AmdDxExtShaderIntrinsicsSwizzle_SwapX2 0x081f +#define AmdDxExtShaderIntrinsicsSwizzle_SwapX4 0x101f +#define AmdDxExtShaderIntrinsicsSwizzle_SwapX8 0x201f +#define AmdDxExtShaderIntrinsicsSwizzle_SwapX16 0x401f +#define AmdDxExtShaderIntrinsicsSwizzle_ReverseX2 0x041f +#define AmdDxExtShaderIntrinsicsSwizzle_ReverseX4 0x0c1f +#define AmdDxExtShaderIntrinsicsSwizzle_ReverseX8 0x1c1f +#define AmdDxExtShaderIntrinsicsSwizzle_ReverseX16 0x3c1f +#define AmdDxExtShaderIntrinsicsSwizzle_ReverseX32 0x7c1f +#define AmdDxExtShaderIntrinsicsSwizzle_BCastX2 0x003e +#define AmdDxExtShaderIntrinsicsSwizzle_BCastX4 0x003c +#define AmdDxExtShaderIntrinsicsSwizzle_BCastX8 0x0038 +#define AmdDxExtShaderIntrinsicsSwizzle_BCastX16 0x0030 +#define AmdDxExtShaderIntrinsicsSwizzle_BCastX32 0x0020 + + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsicsBarycentric defines for barycentric interpolation mode. To be used with +* AmdDxExtShaderIntrinsicsOpcode_IjBarycentricCoords to specify the interpolation mode. +************************************************************************************************************* +*/ +#define AmdDxExtShaderIntrinsicsBarycentric_LinearCenter 0x1 +#define AmdDxExtShaderIntrinsicsBarycentric_LinearCentroid 0x2 +#define AmdDxExtShaderIntrinsicsBarycentric_LinearSample 0x3 +#define AmdDxExtShaderIntrinsicsBarycentric_PerspCenter 0x4 +#define AmdDxExtShaderIntrinsicsBarycentric_PerspCentroid 0x5 +#define AmdDxExtShaderIntrinsicsBarycentric_PerspSample 0x6 +#define AmdDxExtShaderIntrinsicsBarycentric_PerspPullModel 0x7 + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsicsBarycentric defines for specifying vertex and parameter indices. To be used as +* the inputs to the AmdDxExtShaderIntrinsicsOpcode_VertexParameter function +************************************************************************************************************* +*/ +#define AmdDxExtShaderIntrinsicsBarycentric_Vertex0 0x0 +#define AmdDxExtShaderIntrinsicsBarycentric_Vertex1 0x1 +#define AmdDxExtShaderIntrinsicsBarycentric_Vertex2 0x2 + +#define AmdDxExtShaderIntrinsicsBarycentric_Param0 0x00 +#define AmdDxExtShaderIntrinsicsBarycentric_Param1 0x01 +#define AmdDxExtShaderIntrinsicsBarycentric_Param2 0x02 +#define AmdDxExtShaderIntrinsicsBarycentric_Param3 0x03 +#define AmdDxExtShaderIntrinsicsBarycentric_Param4 0x04 +#define AmdDxExtShaderIntrinsicsBarycentric_Param5 0x05 +#define AmdDxExtShaderIntrinsicsBarycentric_Param6 0x06 +#define AmdDxExtShaderIntrinsicsBarycentric_Param7 0x07 +#define AmdDxExtShaderIntrinsicsBarycentric_Param8 0x08 +#define AmdDxExtShaderIntrinsicsBarycentric_Param9 0x09 +#define AmdDxExtShaderIntrinsicsBarycentric_Param10 0x0a +#define AmdDxExtShaderIntrinsicsBarycentric_Param11 0x0b +#define AmdDxExtShaderIntrinsicsBarycentric_Param12 0x0c +#define AmdDxExtShaderIntrinsicsBarycentric_Param13 0x0d +#define AmdDxExtShaderIntrinsicsBarycentric_Param14 0x0e +#define AmdDxExtShaderIntrinsicsBarycentric_Param15 0x0f +#define AmdDxExtShaderIntrinsicsBarycentric_Param16 0x10 +#define AmdDxExtShaderIntrinsicsBarycentric_Param17 0x11 +#define AmdDxExtShaderIntrinsicsBarycentric_Param18 0x12 +#define AmdDxExtShaderIntrinsicsBarycentric_Param19 0x13 +#define AmdDxExtShaderIntrinsicsBarycentric_Param20 0x14 +#define AmdDxExtShaderIntrinsicsBarycentric_Param21 0x15 +#define AmdDxExtShaderIntrinsicsBarycentric_Param22 0x16 +#define AmdDxExtShaderIntrinsicsBarycentric_Param23 0x17 +#define AmdDxExtShaderIntrinsicsBarycentric_Param24 0x18 +#define AmdDxExtShaderIntrinsicsBarycentric_Param25 0x19 +#define AmdDxExtShaderIntrinsicsBarycentric_Param26 0x1a +#define AmdDxExtShaderIntrinsicsBarycentric_Param27 0x1b +#define AmdDxExtShaderIntrinsicsBarycentric_Param28 0x1c +#define AmdDxExtShaderIntrinsicsBarycentric_Param29 0x1d +#define AmdDxExtShaderIntrinsicsBarycentric_Param30 0x1e +#define AmdDxExtShaderIntrinsicsBarycentric_Param31 0x1f + +#define AmdDxExtShaderIntrinsicsBarycentric_ComponentX 0x0 +#define AmdDxExtShaderIntrinsicsBarycentric_ComponentY 0x1 +#define AmdDxExtShaderIntrinsicsBarycentric_ComponentZ 0x2 +#define AmdDxExtShaderIntrinsicsBarycentric_ComponentW 0x3 + +#define AmdDxExtShaderIntrinsicsBarycentric_ParamShift 0 +#define AmdDxExtShaderIntrinsicsBarycentric_ParamMask 0x1f +#define AmdDxExtShaderIntrinsicsBarycentric_VtxShift 0x5 +#define AmdDxExtShaderIntrinsicsBarycentric_VtxMask 0x3 +#define AmdDxExtShaderIntrinsicsBarycentric_ComponentShift 0x7 +#define AmdDxExtShaderIntrinsicsBarycentric_ComponentMask 0x3 + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsicsWaveOp defines for supported operations. Can be used as the parameter for the +* AmdDxExtShaderIntrinsicsOpcode_WaveOp intrinsic. +************************************************************************************************************* +*/ +#define AmdDxExtShaderIntrinsicsWaveOp_AddF 0x01 +#define AmdDxExtShaderIntrinsicsWaveOp_AddI 0x02 +#define AmdDxExtShaderIntrinsicsWaveOp_AddU 0x03 +#define AmdDxExtShaderIntrinsicsWaveOp_MulF 0x04 +#define AmdDxExtShaderIntrinsicsWaveOp_MulI 0x05 +#define AmdDxExtShaderIntrinsicsWaveOp_MulU 0x06 +#define AmdDxExtShaderIntrinsicsWaveOp_MinF 0x07 +#define AmdDxExtShaderIntrinsicsWaveOp_MinI 0x08 +#define AmdDxExtShaderIntrinsicsWaveOp_MinU 0x09 +#define AmdDxExtShaderIntrinsicsWaveOp_MaxF 0x0a +#define AmdDxExtShaderIntrinsicsWaveOp_MaxI 0x0b +#define AmdDxExtShaderIntrinsicsWaveOp_MaxU 0x0c +#define AmdDxExtShaderIntrinsicsWaveOp_And 0x0d // Reduction only +#define AmdDxExtShaderIntrinsicsWaveOp_Or 0x0e // Reduction only +#define AmdDxExtShaderIntrinsicsWaveOp_Xor 0x0f // Reduction only + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsicsWaveOp masks and shifts for opcode and flags +************************************************************************************************************* +*/ +#define AmdDxExtShaderIntrinsicsWaveOp_OpcodeShift 0 +#define AmdDxExtShaderIntrinsicsWaveOp_OpcodeMask 0xff +#define AmdDxExtShaderIntrinsicsWaveOp_FlagShift 8 +#define AmdDxExtShaderIntrinsicsWaveOp_FlagMask 0xff + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsicsWaveOp flags for use with AmdDxExtShaderIntrinsicsOpcode_WaveScan. +************************************************************************************************************* +*/ +#define AmdDxExtShaderIntrinsicsWaveOp_Inclusive 0x01 +#define AmdDxExtShaderIntrinsicsWaveOp_Exclusive 0x02 + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsicsAtomic defines for supported operations. Can be used as the parameter for the +* AmdDxExtShaderIntrinsicsOpcode_AtomicU64 intrinsic. +************************************************************************************************************* +*/ +#define AmdDxExtShaderIntrinsicsAtomicOp_MinU64 0x01 +#define AmdDxExtShaderIntrinsicsAtomicOp_MaxU64 0x02 +#define AmdDxExtShaderIntrinsicsAtomicOp_AndU64 0x03 +#define AmdDxExtShaderIntrinsicsAtomicOp_OrU64 0x04 +#define AmdDxExtShaderIntrinsicsAtomicOp_XorU64 0x05 +#define AmdDxExtShaderIntrinsicsAtomicOp_AddU64 0x06 +#define AmdDxExtShaderIntrinsicsAtomicOp_XchgU64 0x07 +#define AmdDxExtShaderIntrinsicsAtomicOp_CmpXchgU64 0x08 + + +/** +************************************************************************************************************* +* Resource slots for intrinsics using imm_atomic_cmp_exch. +************************************************************************************************************* +*/ +#ifndef AmdDxExtShaderIntrinsicsUAVSlot +#define AmdDxExtShaderIntrinsicsUAVSlot u7 +#endif + +RWByteAddressBuffer AmdDxExtShaderIntrinsicsUAV : register(AmdDxExtShaderIntrinsicsUAVSlot); + +/** +************************************************************************************************************* +* Resource and sampler slots for intrinsics using sample_l. +************************************************************************************************************* +*/ +#ifndef AmdDxExtShaderIntrinsicsResSlot +#define AmdDxExtShaderIntrinsicsResSlot t127 +#endif + +#ifndef AmdDxExtShaderIntrinsicsSamplerSlot +#define AmdDxExtShaderIntrinsicsSamplerSlot s15 +#endif + +SamplerState AmdDxExtShaderIntrinsicsSamplerState : register (AmdDxExtShaderIntrinsicsSamplerSlot); +Texture3D AmdDxExtShaderIntrinsicsResource : register (AmdDxExtShaderIntrinsicsResSlot); + +/** +************************************************************************************************************* +* MakeAmdShaderIntrinsicsInstruction +* +* Creates instruction from supplied opcode and immediate data. +* NOTE: This is an internal function and should not be called by the source HLSL shader directly. +* +************************************************************************************************************* +*/ +uint MakeAmdShaderIntrinsicsInstruction(uint opcode, uint opcodePhase, uint immediateData) +{ + return ((AmdDxExtShaderIntrinsics_MagicCode << AmdDxExtShaderIntrinsics_MagicCodeShift) | + (immediateData << AmdDxExtShaderIntrinsics_DataShift) | + (opcodePhase << AmdDxExtShaderIntrinsics_OpcodePhaseShift) | + (opcode << AmdDxExtShaderIntrinsics_OpcodeShift)); +} + + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_ReadfirstlaneF +* +* Returns the value of float src for the first active lane of the wavefront. +* +************************************************************************************************************* +*/ +float AmdDxExtShaderIntrinsics_ReadfirstlaneF(float src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Readfirstlane, + 0, 0); + + uint retVal; + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src), 0, retVal); + return asfloat(retVal); +} + + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_ReadfirstlaneU +* +* Returns the value of unsigned integer src for the first active lane of the wavefront. +* +************************************************************************************************************* +*/ +uint AmdDxExtShaderIntrinsics_ReadfirstlaneU(uint src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Readfirstlane, + 0, 0); + + uint retVal; + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src, 0, retVal); + return retVal; +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_ReadlaneF +* +* Returns the value of float src for the lane within the wavefront specified by laneId. +* +************************************************************************************************************* +*/ +float AmdDxExtShaderIntrinsics_ReadlaneF(float src, uint laneId) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Readlane, 0, + laneId); + + uint retVal; + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src), 0, retVal); + return asfloat(retVal); +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_ReadlaneU +* +* Returns the value of unsigned integer src for the lane within the wavefront specified by laneId. +* +************************************************************************************************************* +*/ +uint AmdDxExtShaderIntrinsics_ReadlaneU(uint src, uint laneId) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Readlane, 0, + laneId); + + uint retVal; + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src, 0, retVal); + return retVal; +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_LaneId +* +* Returns the current lane id for the thread within the wavefront. +* +************************************************************************************************************* +*/ +uint AmdDxExtShaderIntrinsics_LaneId() +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_LaneId, 0, 0); + + uint retVal; + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal); + return retVal; +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_GetWaveSize +* +* Returns the wave size for the current shader, including active, inactive and helper lanes. +* +************************************************************************************************************* +*/ +uint AmdDxExtShaderIntrinsics_GetWaveSize() +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_GetWaveSize, 0, 0); + + uint retVal; + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal); + return retVal; +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_Swizzle +* +* Generic instruction to shuffle the float src value among different lanes as specified by the +* operation. +* Note that the operation parameter must be an immediately specified value not a value from a variable. +* +************************************************************************************************************* +*/ +float AmdDxExtShaderIntrinsics_SwizzleF(float src, uint operation) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Swizzle, 0, + operation); + + uint retVal; + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src), 0, retVal); + return asfloat(retVal); +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_SwizzleU +* +* Generic instruction to shuffle the unsigned integer src value among different lanes as specified by the +* operation. +* Note that the operation parameter must be an immediately specified value not a value from a variable. +* +************************************************************************************************************* +*/ +uint AmdDxExtShaderIntrinsics_SwizzleU(uint src, uint operation) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Swizzle, 0, + operation); + + uint retVal; + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src, 0, retVal); + return retVal; +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_Ballot +* +* Given an input predicate returns a bit mask indicating for which lanes the predicate is true. +* Inactive or non-existent lanes will always return 0. The number of existent lanes is the +* wavefront size. +* +************************************************************************************************************* +*/ +uint2 AmdDxExtShaderIntrinsics_Ballot(bool predicate) +{ + uint instruction; + + uint retVal1; + instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Ballot, + AmdDxExtShaderIntrinsicsOpcodePhase_0, 0); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, predicate, 0, retVal1); + + uint retVal2; + instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Ballot, + AmdDxExtShaderIntrinsicsOpcodePhase_1, 0); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, predicate, 0, retVal2); + + return uint2(retVal1, retVal2); +} + + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_BallotAny +* +* Convenience routine that uses Ballot and returns true if for any of the active lanes the predicate +* is true. +* +************************************************************************************************************* +*/ +bool AmdDxExtShaderIntrinsics_BallotAny(bool predicate) +{ + uint2 retVal = AmdDxExtShaderIntrinsics_Ballot(predicate); + + return ((retVal.x | retVal.y) != 0 ? true : false); +} + + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_BallotAll +* +* Convenience routine that uses Ballot and returns true if for all of the active lanes the predicate +* is true. +* +************************************************************************************************************* +*/ +bool AmdDxExtShaderIntrinsics_BallotAll(bool predicate) +{ + uint2 ballot = AmdDxExtShaderIntrinsics_Ballot(predicate); + + uint2 execMask = AmdDxExtShaderIntrinsics_Ballot(true); + + return ((ballot.x == execMask.x) && (ballot.y == execMask.y)); +} + + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_MBCnt +* +* Returns the masked bit count of the source register for this thread within all the active threads +* within a wavefront. +* +************************************************************************************************************* +*/ +uint AmdDxExtShaderIntrinsics_MBCnt(uint2 src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_MBCnt, 0, 0); + + uint retVal; + + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.x, src.y, retVal); + + return retVal; +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_Min3F +* +* Returns the minimum value of the three floating point source arguments. +* +************************************************************************************************************* +*/ +float AmdDxExtShaderIntrinsics_Min3F(float src0, float src1, float src2) +{ + uint minimum; + + uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Min3F, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + 0); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, asuint(src0), asuint(src1), minimum); + + uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Min3F, + AmdDxExtShaderIntrinsicsOpcodePhase_1, + 0); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, asuint(src2), minimum, minimum); + + return asfloat(minimum); +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_Min3U +* +* Returns the minimum value of the three unsigned integer source arguments. +* +************************************************************************************************************* +*/ +uint AmdDxExtShaderIntrinsics_Min3U(uint src0, uint src1, uint src2) +{ + uint minimum; + + uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Min3U, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + 0); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, src0, src1, minimum); + + uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Min3U, + AmdDxExtShaderIntrinsicsOpcodePhase_1, + 0); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, src2, minimum, minimum); + + return minimum; +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_Med3F +* +* Returns the median value of the three floating point source arguments. +* +************************************************************************************************************* +*/ +float AmdDxExtShaderIntrinsics_Med3F(float src0, float src1, float src2) +{ + uint median; + + uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Med3F, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + 0); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, asuint(src0), asuint(src1), median); + + uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Med3F, + AmdDxExtShaderIntrinsicsOpcodePhase_1, + 0); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, asuint(src2), median, median); + + return asfloat(median); +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_Med3U +* +* Returns the median value of the three unsigned integer source arguments. +* +************************************************************************************************************* +*/ +uint AmdDxExtShaderIntrinsics_Med3U(uint src0, uint src1, uint src2) +{ + uint median; + + uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Med3U, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + 0); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, src0, src1, median); + + uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Med3U, + AmdDxExtShaderIntrinsicsOpcodePhase_1, + 0); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, src2, median, median); + + return median; +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_Max3F +* +* Returns the maximum value of the three floating point source arguments. +* +************************************************************************************************************* +*/ +float AmdDxExtShaderIntrinsics_Max3F(float src0, float src1, float src2) +{ + uint maximum; + + uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Max3F, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + 0); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, asuint(src0), asuint(src1), maximum); + + uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Max3F, + AmdDxExtShaderIntrinsicsOpcodePhase_1, + 0); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, asuint(src2), maximum, maximum); + + return asfloat(maximum); +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_Max3U +* +* Returns the maximum value of the three unsigned integer source arguments. +* +************************************************************************************************************* +*/ +uint AmdDxExtShaderIntrinsics_Max3U(uint src0, uint src1, uint src2) +{ + uint maximum; + + uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Max3U, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + 0); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, src0, src1, maximum); + + uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_Max3U, + AmdDxExtShaderIntrinsicsOpcodePhase_1, + 0); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, src2, maximum, maximum); + + return maximum; +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_IjBarycentricCoords +* +* Returns the (i, j) barycentric coordinate pair for this shader invocation with the specified +* interpolation mode at the specified pixel location. Should not be used for "pull-model" interpolation, +* PullModelBarycentricCoords should be used instead +* +* Can only be used in pixel shader stages. +* +************************************************************************************************************* +*/ +float2 AmdDxExtShaderIntrinsics_IjBarycentricCoords(uint interpMode) +{ + uint2 retVal; + + uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_BaryCoord, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + interpMode); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, 0, 0, retVal.x); + + uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_BaryCoord, + AmdDxExtShaderIntrinsicsOpcodePhase_1, + interpMode); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, retVal.x, 0, retVal.y); + + return float2(asfloat(retVal.x), asfloat(retVal.y)); +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_PullModelBarycentricCoords +* +* Returns the (1/W,1/I,1/J) coordinates at the pixel center which can be used for custom interpolation at +* any location in the pixel. +* +* Can only be used in pixel shader stages. +* +************************************************************************************************************* +*/ +float3 AmdDxExtShaderIntrinsics_PullModelBarycentricCoords() +{ + uint3 retVal; + + uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_BaryCoord, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + AmdDxExtShaderIntrinsicsBarycentric_PerspPullModel); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, 0, 0, retVal.x); + + uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_BaryCoord, + AmdDxExtShaderIntrinsicsOpcodePhase_1, + AmdDxExtShaderIntrinsicsBarycentric_PerspPullModel); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, retVal.x, 0, retVal.y); + + uint instruction3 = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_BaryCoord, + AmdDxExtShaderIntrinsicsOpcodePhase_2, + AmdDxExtShaderIntrinsicsBarycentric_PerspPullModel); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction3, retVal.y, 0, retVal.z); + + return float3(asfloat(retVal.x), asfloat(retVal.y), asfloat(retVal.z)); +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_VertexParameter +* +* Returns the triangle's parameter information at the specified triangle vertex. +* The vertex and parameter indices must specified as immediate values. +* +* Only available in pixel shader stages. +* +************************************************************************************************************* +*/ +float4 AmdDxExtShaderIntrinsics_VertexParameter(uint vertexIdx, uint parameterIdx) +{ + uint4 retVal; + uint4 instruction; + + instruction.x = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_VtxParam, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + ((vertexIdx << AmdDxExtShaderIntrinsicsBarycentric_VtxShift) | + (parameterIdx << AmdDxExtShaderIntrinsicsBarycentric_ParamShift) | + (AmdDxExtShaderIntrinsicsBarycentric_ComponentX << AmdDxExtShaderIntrinsicsBarycentric_ComponentShift))); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction.x, 0, 0, retVal.x); + + instruction.y = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_VtxParam, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + ((vertexIdx << AmdDxExtShaderIntrinsicsBarycentric_VtxShift) | + (parameterIdx << AmdDxExtShaderIntrinsicsBarycentric_ParamShift) | + (AmdDxExtShaderIntrinsicsBarycentric_ComponentY << AmdDxExtShaderIntrinsicsBarycentric_ComponentShift))); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction.y, 0, 0, retVal.y); + + instruction.z = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_VtxParam, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + ((vertexIdx << AmdDxExtShaderIntrinsicsBarycentric_VtxShift) | + (parameterIdx << AmdDxExtShaderIntrinsicsBarycentric_ParamShift) | + (AmdDxExtShaderIntrinsicsBarycentric_ComponentZ << AmdDxExtShaderIntrinsicsBarycentric_ComponentShift))); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction.z, 0, 0, retVal.z); + + instruction.w = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_VtxParam, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + ((vertexIdx << AmdDxExtShaderIntrinsicsBarycentric_VtxShift) | + (parameterIdx << AmdDxExtShaderIntrinsicsBarycentric_ParamShift) | + (AmdDxExtShaderIntrinsicsBarycentric_ComponentW << AmdDxExtShaderIntrinsicsBarycentric_ComponentShift))); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction.w, 0, 0, retVal.w); + + return float4(asfloat(retVal.x), asfloat(retVal.y), asfloat(retVal.z), asfloat(retVal.w)); +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_VertexParameterComponent +* +* Returns the triangle's parameter information at the specified triangle vertex and component. +* The vertex, parameter and component indices must be specified as immediate values. +* +* Only available in pixel shader stages. +* +************************************************************************************************************* +*/ +float AmdDxExtShaderIntrinsics_VertexParameterComponent(uint vertexIdx, uint parameterIdx, uint componentIdx) +{ + uint retVal; + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_VtxParam, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + ((vertexIdx << AmdDxExtShaderIntrinsicsBarycentric_VtxShift) | + (parameterIdx << AmdDxExtShaderIntrinsicsBarycentric_ParamShift) | + (componentIdx << AmdDxExtShaderIntrinsicsBarycentric_ComponentShift))); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal); + + return asfloat(retVal); +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_GetViewportIndex +* +* Returns current viewport index for replicated draws when MultiView extension is enabled (broadcast masks +* are set). +* +* Only available in vertex/geometry/domain shader stages. +* +************************************************************************************************************* +*/ +uint AmdDxExtShaderIntrinsics_GetViewportIndex() +{ + uint retVal; + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_ViewportIndex, 0, 0); + + retVal = asuint(AmdDxExtShaderIntrinsicsResource.SampleLevel(AmdDxExtShaderIntrinsicsSamplerState, + float3(0, 0, 0), + asfloat(instruction)).x); + return retVal; +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_GetViewportIndexPsOnly +* +* Returns current viewport index for replicated draws when MultiView extension is enabled (broadcast masks +* are set). +* +* Only available in pixel shader stage. +* +************************************************************************************************************* +*/ +uint AmdDxExtShaderIntrinsics_GetViewportIndexPsOnly() +{ + uint retVal; + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_ViewportIndex, 0, 0); + + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal); + + return retVal; +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_GetRTArraySlice +* +* Returns current RT array slice for replicated draws when MultiView extension is enabled (broadcast masks +* are set). +* +* Only available in vertex/geometry/domain shader stages. +* +************************************************************************************************************* +*/ +uint AmdDxExtShaderIntrinsics_GetRTArraySlice() +{ + uint retVal; + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_RtArraySlice, 0, 0); + + retVal = asuint(AmdDxExtShaderIntrinsicsResource.SampleLevel(AmdDxExtShaderIntrinsicsSamplerState, + float3(0, 0, 0), + asfloat(instruction)).x); + return retVal; +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_GetRTArraySlicePsOnly +* +* Returns current RT array slice for replicated draws when MultiView extension is enabled (broadcast masks +* are set). +* +* Only available in pixel shader stage. +* +************************************************************************************************************* +*/ +uint AmdDxExtShaderIntrinsics_GetRTArraySlicePsOnly() +{ + uint retVal; + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_RtArraySlice, 0, 0); + + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal); + + return retVal; +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_WaveReduce +* +* The following functions perform the specified reduction operation across a wavefront. +* +* Available in all shader stages. +* +************************************************************************************************************* +*/ + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_WaveReduce : float +************************************************************************************************************* +*/ +float AmdDxExtShaderIntrinsics_WaveReduce(uint waveOp, float src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_WaveReduce, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdDxExtShaderIntrinsicsWaveOp_OpcodeShift)); + uint retVal; + + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src), 0, retVal); + + return asfloat(retVal); +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_WaveReduce : float2 +************************************************************************************************************* +*/ +float2 AmdDxExtShaderIntrinsics_WaveReduce(uint waveOp, float2 src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_WaveReduce, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdDxExtShaderIntrinsicsWaveOp_OpcodeShift)); + uint2 retVal; + + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.x), 0, retVal.x); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.y), 0, retVal.y); + + return float2(asfloat(retVal.x), asfloat(retVal.y)); +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_WaveReduce : float3 +************************************************************************************************************* +*/ +float3 AmdDxExtShaderIntrinsics_WaveReduce(uint waveOp, float3 src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_WaveReduce, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdDxExtShaderIntrinsicsWaveOp_OpcodeShift)); + uint3 retVal; + + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.x), 0, retVal.x); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.y), 0, retVal.y); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.z), 0, retVal.z); + + return float3(asfloat(retVal.x), asfloat(retVal.y), asfloat(retVal.z)); +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_WaveReduce : float4 +************************************************************************************************************* +*/ +float4 AmdDxExtShaderIntrinsics_WaveReduce(uint waveOp, float4 src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_WaveReduce, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdDxExtShaderIntrinsicsWaveOp_OpcodeShift)); + uint4 retVal; + + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.x), 0, retVal.x); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.y), 0, retVal.y); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.z), 0, retVal.z); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.w), 0, retVal.w); + + return float4(asfloat(retVal.x), asfloat(retVal.y), asfloat(retVal.z), asfloat(retVal.w)); +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_WaveReduce : int +************************************************************************************************************* +*/ +int AmdDxExtShaderIntrinsics_WaveReduce(uint waveOp, int src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_WaveReduce, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdDxExtShaderIntrinsicsWaveOp_OpcodeShift)); + uint retVal; + + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src), 0, retVal); + + return retVal; +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_WaveReduce : int2 +************************************************************************************************************* +*/ +int2 AmdDxExtShaderIntrinsics_WaveReduce(uint waveOp, int2 src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_WaveReduce, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdDxExtShaderIntrinsicsWaveOp_OpcodeShift)); + uint2 retVal; + + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.x), 0, retVal.x); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.y), 0, retVal.y); + + return retVal; +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_WaveReduce : int3 +************************************************************************************************************* +*/ +int3 AmdDxExtShaderIntrinsics_WaveReduce(uint waveOp, int3 src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_WaveReduce, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdDxExtShaderIntrinsicsWaveOp_OpcodeShift)); + uint3 retVal; + + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.x), 0, retVal.x); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.y), 0, retVal.y); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.z), 0, retVal.z); + + return retVal; +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_WaveReduce : int4 +************************************************************************************************************* +*/ +int4 AmdDxExtShaderIntrinsics_WaveReduce(uint waveOp, int4 src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_WaveReduce, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdDxExtShaderIntrinsicsWaveOp_OpcodeShift)); + uint4 retVal; + + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.x), 0, retVal.x); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.y), 0, retVal.y); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.z), 0, retVal.z); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.w), 0, retVal.w); + + return retVal; +} + + + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_WaveScan +* +* The following functions perform the specified scan operation across a wavefront. +* +* Available in all shader stages. +* +************************************************************************************************************* +*/ + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_WaveScan : float +************************************************************************************************************* +*/ +float AmdDxExtShaderIntrinsics_WaveScan(uint waveOp, uint flags, float src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_WaveScan, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdDxExtShaderIntrinsicsWaveOp_OpcodeShift) | + (flags << AmdDxExtShaderIntrinsicsWaveOp_FlagShift)); + uint retVal; + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src), 0, retVal); + + return asfloat(retVal); +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_WaveScan : float2 +************************************************************************************************************* +*/ +float2 AmdDxExtShaderIntrinsics_WaveScan(uint waveOp, uint flags, float2 src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_WaveScan, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdDxExtShaderIntrinsicsWaveOp_OpcodeShift) | + (flags << AmdDxExtShaderIntrinsicsWaveOp_FlagShift)); + uint2 retVal; + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.x), 0, retVal.x); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.y), 0, retVal.y); + + return float2(asfloat(retVal.x), asfloat(retVal.y)); +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_WaveScan : float3 +************************************************************************************************************* +*/ +float3 AmdDxExtShaderIntrinsics_WaveScan(uint waveOp, uint flags, float3 src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_WaveScan, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdDxExtShaderIntrinsicsWaveOp_OpcodeShift) | + (flags << AmdDxExtShaderIntrinsicsWaveOp_FlagShift)); + uint3 retVal; + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.x), 0, retVal.x); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.y), 0, retVal.y); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.z), 0, retVal.z); + + return float3(asfloat(retVal.x), asfloat(retVal.y), asfloat(retVal.z)); +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_WaveScan : float4 +************************************************************************************************************* +*/ +float4 AmdDxExtShaderIntrinsics_WaveScan(uint waveOp, uint flags, float4 src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_WaveScan, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdDxExtShaderIntrinsicsWaveOp_OpcodeShift) | + (flags << AmdDxExtShaderIntrinsicsWaveOp_FlagShift)); + uint4 retVal; + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.x), 0, retVal.x); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.y), 0, retVal.y); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.z), 0, retVal.z); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.w), 0, retVal.w); + + return float4(asfloat(retVal.x), asfloat(retVal.y), asfloat(retVal.z), asfloat(retVal.w)); +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_GetDrawIndex +* +* Returns the 0-based draw index in an indirect draw. Always returns 0 for direct draws. +* +* Only available in vertex shader stage. +* +************************************************************************************************************* +*/ +uint AmdDxExtShaderIntrinsics_GetDrawIndex() +{ + uint retVal; + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_DrawIndex, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + 0); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal); + + return retVal; +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_GetBaseInstance +* +* Returns the StartInstanceLocation parameter passed to direct or indirect drawing commands. +* +* Only available in vertex shader stage. +* +************************************************************************************************************* +*/ +uint AmdDxExtShaderIntrinsics_GetBaseInstance() +{ + uint retVal; + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_BaseInstance, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + 0); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal); + + return retVal; +} + +/** +************************************************************************************************************* +* AmdDxExtShaderIntrinsics_GetBaseVertex +* +* For non-indexed draw commands, returns the StartVertexLocation parameter. For indexed draw commands, +* returns the BaseVertexLocation parameter. +* +* Only available in vertex shader stage. +* +************************************************************************************************************* +*/ +uint AmdDxExtShaderIntrinsics_GetBaseVertex() +{ + uint retVal; + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdDxExtShaderIntrinsicsOpcode_BaseVertex, + AmdDxExtShaderIntrinsicsOpcodePhase_0, + 0); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal); + + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_MakeAtomicInstructions +* +* Creates uint4 with x/y/z/w components containing phase 0/1/2/3 for atomic instructions. +* NOTE: This is an internal function and should not be called by the source HLSL shader directly. +* +*********************************************************************************************************************** +*/ +uint4 AmdDxExtShaderIntrinsics_MakeAtomicInstructions(uint op) +{ + uint4 instructions; + instructions.x = MakeAmdShaderIntrinsicsInstruction( + AmdDxExtShaderIntrinsicsOpcode_AtomicU64, AmdDxExtShaderIntrinsicsOpcodePhase_0, op); + instructions.y = MakeAmdShaderIntrinsicsInstruction( + AmdDxExtShaderIntrinsicsOpcode_AtomicU64, AmdDxExtShaderIntrinsicsOpcodePhase_1, op); + instructions.z = MakeAmdShaderIntrinsicsInstruction( + AmdDxExtShaderIntrinsicsOpcode_AtomicU64, AmdDxExtShaderIntrinsicsOpcodePhase_2, op); + instructions.w = MakeAmdShaderIntrinsicsInstruction( + AmdDxExtShaderIntrinsicsOpcode_AtomicU64, AmdDxExtShaderIntrinsicsOpcodePhase_3, op); + return instructions; +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_AtomicOp +* +* Creates intrinstic instructions for the specified atomic op. +* NOTE: These are internal functions and should not be called by the source HLSL shader directly. +* +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_AtomicOp(RWByteAddressBuffer uav, uint3 address, uint2 value, uint op) +{ + uint2 retVal; + + const uint4 instructions = AmdDxExtShaderIntrinsics_MakeAtomicInstructions(op); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x, address.y, retVal.x); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z, value.x, retVal.y); + uav.Store(retVal.x, retVal.y); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y, retVal.y, retVal.y); + + return retVal; +} + +uint2 AmdDxExtShaderIntrinsics_AtomicOp(RWTexture1D uav, uint3 address, uint2 value, uint op) +{ + uint2 retVal; + + const uint4 instructions = AmdDxExtShaderIntrinsics_MakeAtomicInstructions(op); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x, address.y, retVal.x); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z, value.x, retVal.y); + uav[retVal.x] = retVal.y; + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y, retVal.y, retVal.y); + + return retVal; +} + +uint2 AmdDxExtShaderIntrinsics_AtomicOp(RWTexture2D uav, uint3 address, uint2 value, uint op) +{ + uint2 retVal; + + const uint4 instructions = AmdDxExtShaderIntrinsics_MakeAtomicInstructions(op); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x, address.y, retVal.x); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z, value.x, retVal.y); + uav[uint2(retVal.x, retVal.x)] = retVal.y; + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y, retVal.y, retVal.y); + + return retVal; +} + +uint2 AmdDxExtShaderIntrinsics_AtomicOp(RWTexture3D uav, uint3 address, uint2 value, uint op) +{ + uint2 retVal; + + const uint4 instructions = AmdDxExtShaderIntrinsics_MakeAtomicInstructions(op); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x, address.y, retVal.x); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z, value.x, retVal.y); + uav[uint3(retVal.x, retVal.x, retVal.x)] = retVal.y; + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y, retVal.y, retVal.y); + + return retVal; +} + +uint2 AmdDxExtShaderIntrinsics_AtomicOp( + RWByteAddressBuffer uav, uint3 address, uint2 compare_value, uint2 value, uint op) +{ + uint2 retVal; + + const uint4 instructions = AmdDxExtShaderIntrinsics_MakeAtomicInstructions(op); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x, address.y, retVal.x); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z, value.x, retVal.y); + uav.Store(retVal.x, retVal.y); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y, compare_value.x, retVal.y); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.w, compare_value.y, retVal.y, retVal.y); + + return retVal; +} + +uint2 AmdDxExtShaderIntrinsics_AtomicOp( + RWTexture1D uav, uint3 address, uint2 compare_value, uint2 value, uint op) +{ + uint2 retVal; + + const uint4 instructions = AmdDxExtShaderIntrinsics_MakeAtomicInstructions(op); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x, address.y, retVal.x); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z, value.x, retVal.y); + uav[retVal.x] = retVal.y; + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y, compare_value.x, retVal.y); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.w, compare_value.y, retVal.y, retVal.y); + + return retVal; +} + +uint2 AmdDxExtShaderIntrinsics_AtomicOp( + RWTexture2D uav, uint3 address, uint2 compare_value, uint2 value, uint op) +{ + uint2 retVal; + + const uint4 instructions = AmdDxExtShaderIntrinsics_MakeAtomicInstructions(op); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x, address.y, retVal.x); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z, value.x, retVal.y); + uav[uint2(retVal.x, retVal.x)] = retVal.y; + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y, compare_value.x, retVal.y); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.w, compare_value.y, retVal.y, retVal.y); + + return retVal; +} + +uint2 AmdDxExtShaderIntrinsics_AtomicOp( + RWTexture3D uav, uint3 address, uint2 compare_value, uint2 value, uint op) +{ + uint2 retVal; + + const uint4 instructions = AmdDxExtShaderIntrinsics_MakeAtomicInstructions(op); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x, address.y, retVal.x); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z, value.x, retVal.y); + uav[uint3(retVal.x, retVal.x, retVal.x)] = retVal.y; + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y, compare_value.x, retVal.y); + AmdDxExtShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.w, compare_value.y, retVal.y, retVal.y); + + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_AtomicMinU64 +* +* Performs 64-bit atomic minimum of value with the UAV at address, returns the original value. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_AtomicMinU64(RWByteAddressBuffer uav, uint address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_MinU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicMinU64(RWTexture1D uav, uint address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_MinU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicMinU64(RWTexture2D uav, uint2 address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_MinU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicMinU64(RWTexture3D uav, uint3 address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_MinU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), value, op); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_AtomicMaxU64 +* +* Performs 64-bit atomic maximum of value with the UAV at address, returns the original value. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_AtomicMaxU64(RWByteAddressBuffer uav, uint address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_MaxU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicMaxU64(RWTexture1D uav, uint address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_MaxU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicMaxU64(RWTexture2D uav, uint2 address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_MaxU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicMaxU64(RWTexture3D uav, uint3 address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_MaxU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), value, op); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_AtomicAndU64 +* +* Performs 64-bit atomic AND of value with the UAV at address, returns the original value. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_AtomicAndU64(RWByteAddressBuffer uav, uint address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_AndU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicAndU64(RWTexture1D uav, uint address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_AndU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicAndU64(RWTexture2D uav, uint2 address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_AndU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicAndU64(RWTexture3D uav, uint3 address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_AndU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), value, op); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_AtomicOrU64 +* +* Performs 64-bit atomic OR of value with the UAV at address, returns the original value. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_AtomicOrU64(RWByteAddressBuffer uav, uint address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_OrU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicOrU64(RWTexture1D uav, uint address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_OrU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicOrU64(RWTexture2D uav, uint2 address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_OrU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicOrU64(RWTexture3D uav, uint3 address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_OrU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), value, op); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_AtomicXorU64 +* +* Performs 64-bit atomic XOR of value with the UAV at address, returns the original value. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_AtomicXorU64(RWByteAddressBuffer uav, uint address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_XorU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicXorU64(RWTexture1D uav, uint address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_XorU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicXorU64(RWTexture2D uav, uint2 address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_XorU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicXorU64(RWTexture3D uav, uint3 address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_XorU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), value, op); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_AtomicAddU64 +* +* Performs 64-bit atomic add of value with the UAV at address, returns the original value. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_AtomicAddU64(RWByteAddressBuffer uav, uint address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_AddU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicAddU64(RWTexture1D uav, uint address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_AddU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicAddU64(RWTexture2D uav, uint2 address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_AddU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicAddU64(RWTexture3D uav, uint3 address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_AddU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), value, op); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_AtomicXchgU64 +* +* Performs 64-bit atomic exchange of value with the UAV at address, returns the original value. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_AtomicXchgU64(RWByteAddressBuffer uav, uint address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_XchgU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicXchgU64(RWTexture1D uav, uint address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_XchgU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicXchgU64(RWTexture2D uav, uint2 address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_XchgU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicXchgU64(RWTexture3D uav, uint3 address, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_XchgU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), value, op); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_AtomicCmpXchgU64 +* +* Performs 64-bit atomic compare of comparison value with UAV at address, stores value if values match, +* returns the original value. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_AtomicCmpXchgU64( + RWByteAddressBuffer uav, uint address, uint2 compare_value, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_CmpXchgU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), compare_value, value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicCmpXchgU64( + RWTexture1D uav, uint address, uint2 compare_value, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_CmpXchgU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), compare_value, value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicCmpXchgU64( + RWTexture2D uav, uint2 address, uint2 compare_value, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_CmpXchgU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), compare_value, value, op); +} + +uint2 AmdDxExtShaderIntrinsics_AtomicCmpXchgU64( + RWTexture3D uav, uint3 address, uint2 compare_value, uint2 value) +{ + const uint op = AmdDxExtShaderIntrinsicsAtomicOp_CmpXchgU64; + return AmdDxExtShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), compare_value, value, op); +} + + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveSum +* +* Performs reduction operation across a wave and returns the result of the reduction (sum of all threads in a wave) +* to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdDxExtShaderIntrinsics_WaveActiveSum(float src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_AddF, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +float2 AmdDxExtShaderIntrinsics_WaveActiveSum(float2 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_AddF, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +float3 AmdDxExtShaderIntrinsics_WaveActiveSum(float3 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_AddF, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +float4 AmdDxExtShaderIntrinsics_WaveActiveSum(float4 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_AddF, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +int AmdDxExtShaderIntrinsics_WaveActiveSum(int src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_AddI, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +int2 AmdDxExtShaderIntrinsics_WaveActiveSum(int2 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_AddI, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +int3 AmdDxExtShaderIntrinsics_WaveActiveSum(int3 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_AddI, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +int4 AmdDxExtShaderIntrinsics_WaveActiveSum(int4 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_AddI, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +uint AmdDxExtShaderIntrinsics_WaveActiveSum(uint src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_AddU, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_WaveActiveSum(uint2 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_AddU, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +uint3 AmdDxExtShaderIntrinsics_WaveActiveSum(uint3 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_AddU, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +uint4 AmdDxExtShaderIntrinsics_WaveActiveSum(uint4 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_AddU, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveProduct +* +* Performs reduction operation across a wave and returns the result of the reduction (product of all threads in a +* wave) to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdDxExtShaderIntrinsics_WaveActiveProduct(float src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MulF, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +float2 AmdDxExtShaderIntrinsics_WaveActiveProduct(float2 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MulF, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +float3 AmdDxExtShaderIntrinsics_WaveActiveProduct(float3 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MulF, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +float4 AmdDxExtShaderIntrinsics_WaveActiveProduct(float4 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MulF, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +int AmdDxExtShaderIntrinsics_WaveActiveProduct(int src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MulI, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +int2 AmdDxExtShaderIntrinsics_WaveActiveProduct(int2 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MulI, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +int3 AmdDxExtShaderIntrinsics_WaveActiveProduct(int3 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MulI, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +int4 AmdDxExtShaderIntrinsics_WaveActiveProduct(int4 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MulI, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +uint AmdDxExtShaderIntrinsics_WaveActiveProduct(uint src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MulU, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_WaveActiveProduct(uint2 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MulU, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +uint3 AmdDxExtShaderIntrinsics_WaveActiveProduct(uint3 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MulU, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +uint4 AmdDxExtShaderIntrinsics_WaveActiveProduct(uint4 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MulU, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMin +* +* Performs reduction operation across a wave and returns the result of the reduction (minimum of all threads in a +* wave) to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdDxExtShaderIntrinsics_WaveActiveMin(float src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MinF, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +float2 AmdDxExtShaderIntrinsics_WaveActiveMin(float2 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MinF, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +float3 AmdDxExtShaderIntrinsics_WaveActiveMin(float3 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MinF, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +float4 AmdDxExtShaderIntrinsics_WaveActiveMin(float4 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MinF, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +int AmdDxExtShaderIntrinsics_WaveActiveMin(int src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MinI, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +int2 AmdDxExtShaderIntrinsics_WaveActiveMin(int2 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MinI, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +int3 AmdDxExtShaderIntrinsics_WaveActiveMin(int3 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MinI, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +int4 AmdDxExtShaderIntrinsics_WaveActiveMin(int4 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MinI, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +uint AmdDxExtShaderIntrinsics_WaveActiveMin(uint src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MinU, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_WaveActiveMin(uint2 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MinU, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +uint3 AmdDxExtShaderIntrinsics_WaveActiveMin(uint3 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MinU, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +uint4 AmdDxExtShaderIntrinsics_WaveActiveMin(uint4 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MinU, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMax +* +* Performs reduction operation across a wave and returns the result of the reduction (maximum of all threads in a +* wave) to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdDxExtShaderIntrinsics_WaveActiveMax(float src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MaxF, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +float2 AmdDxExtShaderIntrinsics_WaveActiveMax(float2 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MaxF, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +float3 AmdDxExtShaderIntrinsics_WaveActiveMax(float3 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MaxF, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +float4 AmdDxExtShaderIntrinsics_WaveActiveMax(float4 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MaxF, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +int AmdDxExtShaderIntrinsics_WaveActiveMax(int src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MaxI, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +int2 AmdDxExtShaderIntrinsics_WaveActiveMax(int2 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MaxI, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +int3 AmdDxExtShaderIntrinsics_WaveActiveMax(int3 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MaxI, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +int4 AmdDxExtShaderIntrinsics_WaveActiveMax(int4 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MaxI, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +uint AmdDxExtShaderIntrinsics_WaveActiveMax(uint src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MaxU, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_WaveActiveMax(uint2 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MaxU, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +uint3 AmdDxExtShaderIntrinsics_WaveActiveMax(uint3 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MaxU, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +uint4 AmdDxExtShaderIntrinsics_WaveActiveMax(uint4 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_MaxU, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitAnd +* +* Performs reduction operation across a wave and returns the result of the reduction (Bitwise AND of all threads in a +* wave) to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitAnd +*********************************************************************************************************************** +*/ +int AmdDxExtShaderIntrinsics_WaveActiveBitAnd(int src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_And, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitAnd +*********************************************************************************************************************** +*/ +int2 AmdDxExtShaderIntrinsics_WaveActiveBitAnd(int2 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_And, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitAnd +*********************************************************************************************************************** +*/ +int3 AmdDxExtShaderIntrinsics_WaveActiveBitAnd(int3 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_And, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitAnd +*********************************************************************************************************************** +*/ +int4 AmdDxExtShaderIntrinsics_WaveActiveBitAnd(int4 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_And, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitAnd +*********************************************************************************************************************** +*/ +uint AmdDxExtShaderIntrinsics_WaveActiveBitAnd(uint src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_And, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitAnd +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_WaveActiveBitAnd(uint2 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_And, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitAnd +*********************************************************************************************************************** +*/ +uint3 AmdDxExtShaderIntrinsics_WaveActiveBitAnd(uint3 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_And, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitAnd +*********************************************************************************************************************** +*/ +uint4 AmdDxExtShaderIntrinsics_WaveActiveBitAnd(uint4 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_And, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitOr +* +* Performs reduction operation across a wave and returns the result of the reduction (Bitwise OR of all threads in a +* wave) to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitOr +*********************************************************************************************************************** +*/ +int AmdDxExtShaderIntrinsics_WaveActiveBitOr(int src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce( AmdDxExtShaderIntrinsicsWaveOp_Or, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitOr +*********************************************************************************************************************** +*/ +int2 AmdDxExtShaderIntrinsics_WaveActiveBitOr(int2 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_Or, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitOr +*********************************************************************************************************************** +*/ +int3 AmdDxExtShaderIntrinsics_WaveActiveBitOr(int3 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_Or, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitOr +*********************************************************************************************************************** +*/ +int4 AmdDxExtShaderIntrinsics_WaveActiveBitOr(int4 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_Or, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitOr +*********************************************************************************************************************** +*/ +uint AmdDxExtShaderIntrinsics_WaveActiveBitOr(uint src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_Or, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitOr +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_WaveActiveBitOr(uint2 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_Or, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitOr +*********************************************************************************************************************** +*/ +uint3 AmdDxExtShaderIntrinsics_WaveActiveBitOr(uint3 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_Or, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitOr +*********************************************************************************************************************** +*/ +uint4 AmdDxExtShaderIntrinsics_WaveActiveBitOr(uint4 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_Or, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitXor +* +* Performs reduction operation across a wave and returns the result of the reduction (Bitwise XOR of all threads in a +* wave) to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitXor +*********************************************************************************************************************** +*/ +int AmdDxExtShaderIntrinsics_WaveActiveBitXor(int src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_Xor, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitXor +*********************************************************************************************************************** +*/ +int2 AmdDxExtShaderIntrinsics_WaveActiveBitXor(int2 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_Xor, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitXor +*********************************************************************************************************************** +*/ +int3 AmdDxExtShaderIntrinsics_WaveActiveBitXor(int3 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_Xor, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitXor +*********************************************************************************************************************** +*/ +int4 AmdDxExtShaderIntrinsics_WaveActiveBitXor(int4 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_Xor, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitXor +*********************************************************************************************************************** +*/ +uint AmdDxExtShaderIntrinsics_WaveActiveBitXor(uint src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_Xor, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitXor +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_WaveActiveBitXor(uint2 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_Xor, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitXor +*********************************************************************************************************************** +*/ +uint3 AmdDxExtShaderIntrinsics_WaveActiveBitXor(uint3 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_Xor, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WaveActiveBitXor +*********************************************************************************************************************** +*/ +uint4 AmdDxExtShaderIntrinsics_WaveActiveBitXor(uint4 src) +{ + return AmdDxExtShaderIntrinsics_WaveReduce(AmdDxExtShaderIntrinsicsWaveOp_Xor, src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixSum +* +* Performs a prefix (exclusive) scan operation across a wave and returns the resulting sum to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdDxExtShaderIntrinsics_WavePrefixSum(float src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddF, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +float2 AmdDxExtShaderIntrinsics_WavePrefixSum(float2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddF, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +float3 AmdDxExtShaderIntrinsics_WavePrefixSum(float3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddF, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +float4 AmdDxExtShaderIntrinsics_WavePrefixSum(float4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddF, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +int AmdDxExtShaderIntrinsics_WavePrefixSum(int src) +{ + return AmdDxExtShaderIntrinsics_WaveScan( + AmdDxExtShaderIntrinsicsWaveOp_AddI, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +int2 AmdDxExtShaderIntrinsics_WavePrefixSum(int2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddI, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +int3 AmdDxExtShaderIntrinsics_WavePrefixSum(int3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddI, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +int4 AmdDxExtShaderIntrinsics_WavePrefixSum(int4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddI, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +uint AmdDxExtShaderIntrinsics_WavePrefixSum(uint src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddU, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_WavePrefixSum(uint2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddU, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +uint3 AmdDxExtShaderIntrinsics_WavePrefixSum(uint3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddU, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +uint4 AmdDxExtShaderIntrinsics_WavePrefixSum(uint4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddU, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixProduct +* +* Performs a prefix scan operation across a wave and returns the resulting product to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdDxExtShaderIntrinsics_WavePrefixProduct(float src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulF, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +float2 AmdDxExtShaderIntrinsics_WavePrefixProduct(float2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulF, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +float3 AmdDxExtShaderIntrinsics_WavePrefixProduct(float3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulF, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +float4 AmdDxExtShaderIntrinsics_WavePrefixProduct(float4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulF, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +int AmdDxExtShaderIntrinsics_WavePrefixProduct(int src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulI, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +int2 AmdDxExtShaderIntrinsics_WavePrefixProduct(int2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulI, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +int3 AmdDxExtShaderIntrinsics_WavePrefixProduct(int3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulI, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +int4 AmdDxExtShaderIntrinsics_WavePrefixProduct(int4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulI, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +uint AmdDxExtShaderIntrinsics_WavePrefixProduct(uint src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulU, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_WavePrefixProduct(uint2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulU, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +uint3 AmdDxExtShaderIntrinsics_WavePrefixProduct(uint3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulU, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +uint4 AmdDxExtShaderIntrinsics_WavePrefixProduct(uint4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulU, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMin +* +* Performs a prefix scan operation across a wave and returns the resulting minimum value to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdDxExtShaderIntrinsics_WavePrefixMin(float src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinF, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +float2 AmdDxExtShaderIntrinsics_WavePrefixMin(float2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinF, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +float3 AmdDxExtShaderIntrinsics_WavePrefixMin(float3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinF, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +float4 AmdDxExtShaderIntrinsics_WavePrefixMin(float4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinF, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +int AmdDxExtShaderIntrinsics_WavePrefixMin(int src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinI, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +int2 AmdDxExtShaderIntrinsics_WavePrefixMin(int2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinI, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +int3 AmdDxExtShaderIntrinsics_WavePrefixMin(int3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinI, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +int4 AmdDxExtShaderIntrinsics_WavePrefixMin(int4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinI, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +uint AmdDxExtShaderIntrinsics_WavePrefixMin(uint src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinU, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_WavePrefixMin(uint2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinU, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +uint3 AmdDxExtShaderIntrinsics_WavePrefixMin(uint3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinU, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +uint4 AmdDxExtShaderIntrinsics_WavePrefixMin(uint4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinU, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMax +* +* Performs a prefix scan operation across a wave and returns the resulting maximum value to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdDxExtShaderIntrinsics_WavePrefixMax(float src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxF, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +float2 AmdDxExtShaderIntrinsics_WavePrefixMax(float2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxF, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +float3 AmdDxExtShaderIntrinsics_WavePrefixMax(float3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxF, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +float4 AmdDxExtShaderIntrinsics_WavePrefixMax(float4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxF, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +int AmdDxExtShaderIntrinsics_WavePrefixMax(int src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxI, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +int2 AmdDxExtShaderIntrinsics_WavePrefixMax(int2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxI, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +int3 AmdDxExtShaderIntrinsics_WavePrefixMax(int3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxI, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +int4 AmdDxExtShaderIntrinsics_WavePrefixMax(int4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxI, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +uint AmdDxExtShaderIntrinsics_WavePrefixMax(uint src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxU, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_WavePrefixMax(uint2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxU, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +uint3 AmdDxExtShaderIntrinsics_WavePrefixMax(uint3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxU, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +uint4 AmdDxExtShaderIntrinsics_WavePrefixMax(uint4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxU, + AmdDxExtShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixSum +* +* Performs a Postfix (Inclusive) scan operation across a wave and returns the resulting sum to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdDxExtShaderIntrinsics_WavePostfixSum(float src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddF, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +float2 AmdDxExtShaderIntrinsics_WavePostfixSum(float2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddF, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +float3 AmdDxExtShaderIntrinsics_WavePostfixSum(float3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddF, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +float4 AmdDxExtShaderIntrinsics_WavePostfixSum(float4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddF, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +int AmdDxExtShaderIntrinsics_WavePostfixSum(int src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddI, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +int2 AmdDxExtShaderIntrinsics_WavePostfixSum(int2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddI, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +int3 AmdDxExtShaderIntrinsics_WavePostfixSum(int3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddI, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +int4 AmdDxExtShaderIntrinsics_WavePostfixSum(int4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddI, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +uint AmdDxExtShaderIntrinsics_WavePostfixSum(uint src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddU, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_WavePostfixSum(uint2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddU, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +uint3 AmdDxExtShaderIntrinsics_WavePostfixSum(uint3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddU, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +uint4 AmdDxExtShaderIntrinsics_WavePostfixSum(uint4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_AddU, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixProduct +* +* Performs a Postfix scan operation across a wave and returns the resulting product to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdDxExtShaderIntrinsics_WavePostfixProduct(float src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulF, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +float2 AmdDxExtShaderIntrinsics_WavePostfixProduct(float2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulF, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +float3 AmdDxExtShaderIntrinsics_WavePostfixProduct(float3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulF, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +float4 AmdDxExtShaderIntrinsics_WavePostfixProduct(float4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulF, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +int AmdDxExtShaderIntrinsics_WavePostfixProduct(int src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulI, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +int2 AmdDxExtShaderIntrinsics_WavePostfixProduct(int2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulI, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +int3 AmdDxExtShaderIntrinsics_WavePostfixProduct(int3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulI, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +int4 AmdDxExtShaderIntrinsics_WavePostfixProduct(int4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulI, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +uint AmdDxExtShaderIntrinsics_WavePostfixProduct(uint src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulU, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_WavePostfixProduct(uint2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulU, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +uint3 AmdDxExtShaderIntrinsics_WavePostfixProduct(uint3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulU, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +uint4 AmdDxExtShaderIntrinsics_WavePostfixProduct(uint4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MulU, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMin +* +* Performs a Postfix scan operation across a wave and returns the resulting minimum value to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdDxExtShaderIntrinsics_WavePostfixMin(float src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinF, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +float2 AmdDxExtShaderIntrinsics_WavePostfixMin(float2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinF, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +float3 AmdDxExtShaderIntrinsics_WavePostfixMin(float3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinF, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +float4 AmdDxExtShaderIntrinsics_WavePostfixMin(float4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinF, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +int AmdDxExtShaderIntrinsics_WavePostfixMin(int src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinI, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +int2 AmdDxExtShaderIntrinsics_WavePostfixMin(int2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinI, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +int3 AmdDxExtShaderIntrinsics_WavePostfixMin(int3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinI, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +int4 AmdDxExtShaderIntrinsics_WavePostfixMin(int4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinI, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +uint AmdDxExtShaderIntrinsics_WavePostfixMin(uint src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinU, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_WavePostfixMin(uint2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinU, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +uint3 AmdDxExtShaderIntrinsics_WavePostfixMin(uint3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinU, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +uint4 AmdDxExtShaderIntrinsics_WavePostfixMin(uint4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MinU, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMax +* +* Performs a Postfix scan operation across a wave and returns the resulting maximum value to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdDxExtShaderIntrinsics_WavePostfixMax(float src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxF, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +float2 AmdDxExtShaderIntrinsics_WavePostfixMax(float2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxF, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +float3 AmdDxExtShaderIntrinsics_WavePostfixMax(float3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxF, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +float4 AmdDxExtShaderIntrinsics_WavePostfixMax(float4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxF, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +int AmdDxExtShaderIntrinsics_WavePostfixMax(int src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxI, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +int2 AmdDxExtShaderIntrinsics_WavePostfixMax(int2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxI, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +int3 AmdDxExtShaderIntrinsics_WavePostfixMax(int3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxI, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +int4 AmdDxExtShaderIntrinsics_WavePostfixMax(int4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxI, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +uint AmdDxExtShaderIntrinsics_WavePostfixMax(uint src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxU, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +uint2 AmdDxExtShaderIntrinsics_WavePostfixMax(uint2 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxU, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +uint3 AmdDxExtShaderIntrinsics_WavePostfixMax(uint3 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxU, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdDxExtShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +uint4 AmdDxExtShaderIntrinsics_WavePostfixMax(uint4 src) +{ + return AmdDxExtShaderIntrinsics_WaveScan(AmdDxExtShaderIntrinsicsWaveOp_MaxU, + AmdDxExtShaderIntrinsicsWaveOp_Inclusive, + src); +} + + +#endif // _AMDDXEXTSHADERINTRINSICS_HLSL_ diff --git a/Source/ThirdParty/AGS/ags_shader_intrinsics_dx12.hlsl b/Source/ThirdParty/AGS/ags_shader_intrinsics_dx12.hlsl new file mode 100644 index 000000000..a46fb02ed --- /dev/null +++ b/Source/ThirdParty/AGS/ags_shader_intrinsics_dx12.hlsl @@ -0,0 +1,3958 @@ +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +/** +*********************************************************************************************************************** +* @file ags_shader_intrinsics_dx12.hlsl +* @brief +* AMD D3D Shader Intrinsics HLSL include file. +* This include file contains the Shader Intrinsics definitions used in shader code by the application. +* @note +* This does not work with immediate values or values that the compiler determines can produces denorms +* +*********************************************************************************************************************** +*/ + +#ifndef _AMDEXTD3DSHADERINTRINICS_HLSL +#define _AMDEXTD3DSHADERINTRINICS_HLSL + +// Default AMD shader intrinsics designated SpaceId. +#define AmdExtD3DShaderIntrinsicsSpaceId space2147420894 + +// Dummy UAV used to access shader intrinsics. Applications need to add a root signature entry for this resource in +// order to use shader extensions. Applications may specify an alternate UAV binding by defining AMD_EXT_SHADER_INTRINSIC_UAV_OVERRIDE. +#ifdef AMD_EXT_SHADER_INTRINSIC_UAV_OVERRIDE +RWByteAddressBuffer AmdExtD3DShaderIntrinsicsUAV : register(AMD_EXT_SHADER_INTRINSIC_UAV_OVERRIDE); +#else +RWByteAddressBuffer AmdExtD3DShaderIntrinsicsUAV : register(u0, AmdExtD3DShaderIntrinsicsSpaceId); +#endif + +/** +*********************************************************************************************************************** +* Definitions to construct the intrinsic instruction composed of an opcode and optional immediate data. +*********************************************************************************************************************** +*/ +#define AmdExtD3DShaderIntrinsics_MagicCodeShift 28 +#define AmdExtD3DShaderIntrinsics_MagicCodeMask 0xf +#define AmdExtD3DShaderIntrinsics_OpcodePhaseShift 24 +#define AmdExtD3DShaderIntrinsics_OpcodePhaseMask 0x3 +#define AmdExtD3DShaderIntrinsics_DataShift 8 +#define AmdExtD3DShaderIntrinsics_DataMask 0xffff +#define AmdExtD3DShaderIntrinsics_OpcodeShift 0 +#define AmdExtD3DShaderIntrinsics_OpcodeMask 0xff + +#define AmdExtD3DShaderIntrinsics_MagicCode 0x5 + + +/** +*********************************************************************************************************************** +* Intrinsic opcodes. +*********************************************************************************************************************** +*/ +#define AmdExtD3DShaderIntrinsicsOpcode_Readfirstlane 0x01 +#define AmdExtD3DShaderIntrinsicsOpcode_Readlane 0x02 +#define AmdExtD3DShaderIntrinsicsOpcode_LaneId 0x03 +#define AmdExtD3DShaderIntrinsicsOpcode_Swizzle 0x04 +#define AmdExtD3DShaderIntrinsicsOpcode_Ballot 0x05 +#define AmdExtD3DShaderIntrinsicsOpcode_MBCnt 0x06 +#define AmdExtD3DShaderIntrinsicsOpcode_Min3U 0x07 +#define AmdExtD3DShaderIntrinsicsOpcode_Min3F 0x08 +#define AmdExtD3DShaderIntrinsicsOpcode_Med3U 0x09 +#define AmdExtD3DShaderIntrinsicsOpcode_Med3F 0x0a +#define AmdExtD3DShaderIntrinsicsOpcode_Max3U 0x0b +#define AmdExtD3DShaderIntrinsicsOpcode_Max3F 0x0c +#define AmdExtD3DShaderIntrinsicsOpcode_BaryCoord 0x0d +#define AmdExtD3DShaderIntrinsicsOpcode_VtxParam 0x0e +#define AmdExtD3DShaderIntrinsicsOpcode_Reserved1 0x0f +#define AmdExtD3DShaderIntrinsicsOpcode_Reserved2 0x10 +#define AmdExtD3DShaderIntrinsicsOpcode_Reserved3 0x11 +#define AmdExtD3DShaderIntrinsicsOpcode_WaveReduce 0x12 +#define AmdExtD3DShaderIntrinsicsOpcode_WaveScan 0x13 +#define AmdExtD3DShaderIntrinsicsOpcode_LoadDwAtAddr 0x14 +#define AmdExtD3DShaderIntrinsicsOpcode_DrawIndex 0x17 +#define AmdExtD3DShaderIntrinsicsOpcode_AtomicU64 0x18 +#define AmdExtD3DShaderIntrinsicsOpcode_GetWaveSize 0x19 +#define AmdExtD3DShaderIntrinsicsOpcode_BaseInstance 0x1a +#define AmdExtD3DShaderIntrinsicsOpcode_BaseVertex 0x1b +#define AmdExtD3DShaderIntrinsicsOpcode_FloatConversion 0x1c +#define AmdExtD3DShaderIntrinsicsOpcode_ReadlaneAt 0x1d +#define AmdExtD3DShaderIntrinsicsOpcode_ShaderClock 0x1f +#define AmdExtD3DShaderIntrinsicsOpcode_ShaderRealtimeClock 0x20 + +/** +*********************************************************************************************************************** +* Intrinsic opcode phases. +*********************************************************************************************************************** +*/ +#define AmdExtD3DShaderIntrinsicsOpcodePhase_0 0x0 +#define AmdExtD3DShaderIntrinsicsOpcodePhase_1 0x1 +#define AmdExtD3DShaderIntrinsicsOpcodePhase_2 0x2 +#define AmdExtD3DShaderIntrinsicsOpcodePhase_3 0x3 + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsicsWaveOp defines for supported operations. Can be used as the parameter for the +* AmdExtD3DShaderIntrinsicsOpcode_WaveOp intrinsic. +*********************************************************************************************************************** +*/ +#define AmdExtD3DShaderIntrinsicsWaveOp_AddF 0x01 +#define AmdExtD3DShaderIntrinsicsWaveOp_AddI 0x02 +#define AmdExtD3DShaderIntrinsicsWaveOp_AddU 0x03 +#define AmdExtD3DShaderIntrinsicsWaveOp_MulF 0x04 +#define AmdExtD3DShaderIntrinsicsWaveOp_MulI 0x05 +#define AmdExtD3DShaderIntrinsicsWaveOp_MulU 0x06 +#define AmdExtD3DShaderIntrinsicsWaveOp_MinF 0x07 +#define AmdExtD3DShaderIntrinsicsWaveOp_MinI 0x08 +#define AmdExtD3DShaderIntrinsicsWaveOp_MinU 0x09 +#define AmdExtD3DShaderIntrinsicsWaveOp_MaxF 0x0a +#define AmdExtD3DShaderIntrinsicsWaveOp_MaxI 0x0b +#define AmdExtD3DShaderIntrinsicsWaveOp_MaxU 0x0c +#define AmdExtD3DShaderIntrinsicsWaveOp_And 0x0d // Reduction only +#define AmdExtD3DShaderIntrinsicsWaveOp_Or 0x0e // Reduction only +#define AmdExtD3DShaderIntrinsicsWaveOp_Xor 0x0f // Reduction only + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsicsWaveOp masks and shifts for opcode and flags +*********************************************************************************************************************** +*/ +#define AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift 0 +#define AmdExtD3DShaderIntrinsicsWaveOp_OpcodeMask 0xff +#define AmdExtD3DShaderIntrinsicsWaveOp_FlagShift 8 +#define AmdExtD3DShaderIntrinsicsWaveOp_FlagMask 0xff + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsicsWaveOp flags for use with AmdExtD3DShaderIntrinsicsOpcode_WaveScan. +*********************************************************************************************************************** +*/ +#define AmdExtD3DShaderIntrinsicsWaveOp_Inclusive 0x01 +#define AmdExtD3DShaderIntrinsicsWaveOp_Exclusive 0x02 + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsicsSwizzle defines for common swizzles. Can be used as the operation parameter for the +* AmdExtD3DShaderIntrinsics_Swizzle intrinsic. +*********************************************************************************************************************** +*/ +#define AmdExtD3DShaderIntrinsicsSwizzle_SwapX1 0x041f +#define AmdExtD3DShaderIntrinsicsSwizzle_SwapX2 0x081f +#define AmdExtD3DShaderIntrinsicsSwizzle_SwapX4 0x101f +#define AmdExtD3DShaderIntrinsicsSwizzle_SwapX8 0x201f +#define AmdExtD3DShaderIntrinsicsSwizzle_SwapX16 0x401f +#define AmdExtD3DShaderIntrinsicsSwizzle_ReverseX2 0x041f +#define AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4 0x0c1f +#define AmdExtD3DShaderIntrinsicsSwizzle_ReverseX8 0x1c1f +#define AmdExtD3DShaderIntrinsicsSwizzle_ReverseX16 0x3c1f +#define AmdExtD3DShaderIntrinsicsSwizzle_ReverseX32 0x7c1f +#define AmdExtD3DShaderIntrinsicsSwizzle_BCastX2 0x003e +#define AmdExtD3DShaderIntrinsicsSwizzle_BCastX4 0x003c +#define AmdExtD3DShaderIntrinsicsSwizzle_BCastX8 0x0038 +#define AmdExtD3DShaderIntrinsicsSwizzle_BCastX16 0x0030 +#define AmdExtD3DShaderIntrinsicsSwizzle_BCastX32 0x0020 + + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsicsBarycentric defines for barycentric interpolation mode. To be used with +* AmdExtD3DShaderIntrinsicsOpcode_IjBarycentricCoords to specify the interpolation mode. +*********************************************************************************************************************** +*/ +#define AmdExtD3DShaderIntrinsicsBarycentric_LinearCenter 0x1 +#define AmdExtD3DShaderIntrinsicsBarycentric_LinearCentroid 0x2 +#define AmdExtD3DShaderIntrinsicsBarycentric_LinearSample 0x3 +#define AmdExtD3DShaderIntrinsicsBarycentric_PerspCenter 0x4 +#define AmdExtD3DShaderIntrinsicsBarycentric_PerspCentroid 0x5 +#define AmdExtD3DShaderIntrinsicsBarycentric_PerspSample 0x6 +#define AmdExtD3DShaderIntrinsicsBarycentric_PerspPullModel 0x7 + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsicsBarycentric defines for specifying vertex and parameter indices. To be used as inputs to +* the AmdExtD3DShaderIntrinsicsOpcode_VertexParameter function +*********************************************************************************************************************** +*/ +#define AmdExtD3DShaderIntrinsicsBarycentric_Vertex0 0x0 +#define AmdExtD3DShaderIntrinsicsBarycentric_Vertex1 0x1 +#define AmdExtD3DShaderIntrinsicsBarycentric_Vertex2 0x2 + +#define AmdExtD3DShaderIntrinsicsBarycentric_Param0 0x00 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param1 0x01 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param2 0x02 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param3 0x03 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param4 0x04 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param5 0x05 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param6 0x06 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param7 0x07 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param8 0x08 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param9 0x09 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param10 0x0a +#define AmdExtD3DShaderIntrinsicsBarycentric_Param11 0x0b +#define AmdExtD3DShaderIntrinsicsBarycentric_Param12 0x0c +#define AmdExtD3DShaderIntrinsicsBarycentric_Param13 0x0d +#define AmdExtD3DShaderIntrinsicsBarycentric_Param14 0x0e +#define AmdExtD3DShaderIntrinsicsBarycentric_Param15 0x0f +#define AmdExtD3DShaderIntrinsicsBarycentric_Param16 0x10 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param17 0x11 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param18 0x12 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param19 0x13 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param20 0x14 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param21 0x15 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param22 0x16 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param23 0x17 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param24 0x18 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param25 0x19 +#define AmdExtD3DShaderIntrinsicsBarycentric_Param26 0x1a +#define AmdExtD3DShaderIntrinsicsBarycentric_Param27 0x1b +#define AmdExtD3DShaderIntrinsicsBarycentric_Param28 0x1c +#define AmdExtD3DShaderIntrinsicsBarycentric_Param29 0x1d +#define AmdExtD3DShaderIntrinsicsBarycentric_Param30 0x1e +#define AmdExtD3DShaderIntrinsicsBarycentric_Param31 0x1f + +#define AmdExtD3DShaderIntrinsicsBarycentric_ComponentX 0x0 +#define AmdExtD3DShaderIntrinsicsBarycentric_ComponentY 0x1 +#define AmdExtD3DShaderIntrinsicsBarycentric_ComponentZ 0x2 +#define AmdExtD3DShaderIntrinsicsBarycentric_ComponentW 0x3 + +#define AmdExtD3DShaderIntrinsicsBarycentric_ParamShift 0 +#define AmdExtD3DShaderIntrinsicsBarycentric_ParamMask 0x1f +#define AmdExtD3DShaderIntrinsicsBarycentric_VtxShift 0x5 +#define AmdExtD3DShaderIntrinsicsBarycentric_VtxMask 0x3 +#define AmdExtD3DShaderIntrinsicsBarycentric_ComponentShift 0x7 +#define AmdExtD3DShaderIntrinsicsBarycentric_ComponentMask 0x3 + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsicsAtomic defines for supported operations. Can be used as the parameter for the +* AmdExtD3DShaderIntrinsicsOpcode_AtomicU64 intrinsic. +*********************************************************************************************************************** +*/ +#define AmdExtD3DShaderIntrinsicsAtomicOp_MinU64 0x01 +#define AmdExtD3DShaderIntrinsicsAtomicOp_MaxU64 0x02 +#define AmdExtD3DShaderIntrinsicsAtomicOp_AndU64 0x03 +#define AmdExtD3DShaderIntrinsicsAtomicOp_OrU64 0x04 +#define AmdExtD3DShaderIntrinsicsAtomicOp_XorU64 0x05 +#define AmdExtD3DShaderIntrinsicsAtomicOp_AddU64 0x06 +#define AmdExtD3DShaderIntrinsicsAtomicOp_XchgU64 0x07 +#define AmdExtD3DShaderIntrinsicsAtomicOp_CmpXchgU64 0x08 + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsicsFloatConversion defines for supported rounding modes from float to float16 conversions. +* To be used as an input AmdExtD3DShaderIntrinsicsOpcode_FloatConversion instruction +*********************************************************************************************************************** +*/ +#define AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16Near 0x01 +#define AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16NegInf 0x02 +#define AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16PlusInf 0x03 + + +/** +*********************************************************************************************************************** +* MakeAmdShaderIntrinsicsInstruction +* +* Creates instruction from supplied opcode and immediate data. +* NOTE: This is an internal function and should not be called by the source HLSL shader directly. +* +*********************************************************************************************************************** +*/ +uint MakeAmdShaderIntrinsicsInstruction(uint opcode, uint opcodePhase, uint immediateData) +{ + return ((AmdExtD3DShaderIntrinsics_MagicCode << AmdExtD3DShaderIntrinsics_MagicCodeShift) | + (immediateData << AmdExtD3DShaderIntrinsics_DataShift) | + (opcodePhase << AmdExtD3DShaderIntrinsics_OpcodePhaseShift) | + (opcode << AmdExtD3DShaderIntrinsics_OpcodeShift)); +} + + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_ReadfirstlaneF +* +* Returns the value of float src for the first active lane of the wavefront. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_ReadfirstlaneF(float src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Readfirstlane, 0, 0); + + uint retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src), 0, retVal); + return asfloat(retVal); +} + + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_ReadfirstlaneU +* +* Returns the value of unsigned integer src for the first active lane of the wavefront. +* +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_ReadfirstlaneU(uint src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Readfirstlane, 0, 0); + + uint retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src, 0, retVal); + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_Readlane +* +* Returns the value of float src for the lane within the wavefront specified by laneId. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_ReadlaneF(float src, uint laneId) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Readlane, 0, laneId); + + uint retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src), 0, retVal); + return asfloat(retVal); +} + + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_ReadlaneU +* +* Returns the value of unsigned integer src for the lane within the wavefront specified by laneId. +* +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_ReadlaneU(uint src, uint laneId) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Readlane, 0, laneId); + + uint retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src, 0, retVal); + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_LaneId +* +* Returns the current lane id for the thread within the wavefront. +* +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_LaneId() +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_LaneId, 0, 0); + + uint retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal); + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_GetWaveSize +* +* Returns the wave size for the current shader, including active, inactive and helper lanes. +* +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_GetWaveSize() +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_GetWaveSize, 0, 0); + + uint retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal); + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_Swizzle +* +* Generic instruction to shuffle the float src value among different lanes as specified by the operation. +* Note that the operation parameter must be an immediately specified value not a value from a variable. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_SwizzleF(float src, uint operation) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Swizzle, 0, operation); + + uint retVal; + //InterlockedCompareExchange(AmdExtD3DShaderIntrinsicsUAV[instruction], asuint(src), 0, retVal); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src), 0, retVal); + return asfloat(retVal); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_SwizzleU +* +* Generic instruction to shuffle the unsigned integer src value among different lanes as specified by the operation. +* Note that the operation parameter must be an immediately specified value not a value from a variable. +* +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_SwizzleU(uint src, uint operation) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Swizzle, 0, operation); + + uint retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src, 0, retVal); + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_Ballot +* +* Given an input predicate returns a bit mask indicating for which lanes the predicate is true. +* Inactive or non-existent lanes will always return 0. The number of existent lanes is the wavefront size. +* +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_Ballot(bool predicate) +{ + uint instruction; + + uint retVal1; + instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Ballot, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, predicate, 0, retVal1); + + uint retVal2; + instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Ballot, + AmdExtD3DShaderIntrinsicsOpcodePhase_1, 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, predicate, 0, retVal2); + + return uint2(retVal1, retVal2); +} + + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_BallotAny +* +* Convenience routine that uses Ballot and returns true if for any of the active lanes the predicate is true. +* +*********************************************************************************************************************** +*/ +bool AmdExtD3DShaderIntrinsics_BallotAny(bool predicate) +{ + uint2 retVal = AmdExtD3DShaderIntrinsics_Ballot(predicate); + + return ((retVal.x | retVal.y) != 0 ? true : false); +} + + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_BallotAll +* +* Convenience routine that uses Ballot and returns true if for all of the active lanes the predicate is true. +* +*********************************************************************************************************************** +*/ +bool AmdExtD3DShaderIntrinsics_BallotAll(bool predicate) +{ + uint2 ballot = AmdExtD3DShaderIntrinsics_Ballot(predicate); + + uint2 execMask = AmdExtD3DShaderIntrinsics_Ballot(true); + + return ((ballot.x == execMask.x) && (ballot.y == execMask.y)); +} + + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_MBCnt +* +* Returns the masked bit count of the source register for this thread within all the active threads within a +* wavefront. +* +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_MBCnt(uint2 src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_MBCnt, 0, 0); + + uint retVal; + + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.x, src.y, retVal); + + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_Min3F +* +* Returns the minimum value of the three floating point source arguments. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_Min3F(float src0, float src1, float src2) +{ + uint minimum; + + uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Min3F, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, asuint(src0), asuint(src1), minimum); + + uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Min3F, + AmdExtD3DShaderIntrinsicsOpcodePhase_1, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, asuint(src2), minimum, minimum); + + return asfloat(minimum); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_Min3U +* +* Returns the minimum value of the three unsigned integer source arguments. +* +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_Min3U(uint src0, uint src1, uint src2) +{ + uint minimum; + + uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Min3U, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, src0, src1, minimum); + + uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Min3U, + AmdExtD3DShaderIntrinsicsOpcodePhase_1, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, src2, minimum, minimum); + + return minimum; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_Med3F +* +* Returns the median value of the three floating point source arguments. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_Med3F(float src0, float src1, float src2) +{ + uint median; + + uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Med3F, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, asuint(src0), asuint(src1), median); + + uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Med3F, + AmdExtD3DShaderIntrinsicsOpcodePhase_1, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, asuint(src2), median, median); + + return asfloat(median); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_Med3U +* +* Returns the median value of the three unsigned integer source arguments. +* +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_Med3U(uint src0, uint src1, uint src2) +{ + uint median; + + uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Med3U, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, src0, src1, median); + + uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Med3U, + AmdExtD3DShaderIntrinsicsOpcodePhase_1, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, src2, median, median); + + return median; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_Max3F +* +* Returns the maximum value of the three floating point source arguments. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_Max3F(float src0, float src1, float src2) +{ + uint maximum; + + uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Max3F, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, asuint(src0), asuint(src1), maximum); + + uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Max3F, + AmdExtD3DShaderIntrinsicsOpcodePhase_1, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, asuint(src2), maximum, maximum); + + return asfloat(maximum); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_Max3U +* +* Returns the maximum value of the three unsigned integer source arguments. +* +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_Max3U(uint src0, uint src1, uint src2) +{ + uint maximum; + + uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Max3U, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, src0, src1, maximum); + + uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_Max3U, + AmdExtD3DShaderIntrinsicsOpcodePhase_1, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, src2, maximum, maximum); + + return maximum; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_IjBarycentricCoords +* +* Returns the (i, j) barycentric coordinate pair for this shader invocation with the specified interpolation mode at +* the specified pixel location. Should not be used for "pull-model" interpolation, PullModelBarycentricCoords should +* be used instead +* +* Can only be used in pixel shader stages. +* +*********************************************************************************************************************** +*/ +float2 AmdExtD3DShaderIntrinsics_IjBarycentricCoords(uint interpMode) +{ + uint2 retVal; + + uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_BaryCoord, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + interpMode); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, 0, 0, retVal.x); + + uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_BaryCoord, + AmdExtD3DShaderIntrinsicsOpcodePhase_1, + interpMode); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, retVal.x, 0, retVal.y); + + return float2(asfloat(retVal.x), asfloat(retVal.y)); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_PullModelBarycentricCoords +* +* Returns the (1/W,1/I,1/J) coordinates at the pixel center which can be used for custom interpolation at any +* location in the pixel. +* +* Can only be used in pixel shader stages. +* +*********************************************************************************************************************** +*/ +float3 AmdExtD3DShaderIntrinsics_PullModelBarycentricCoords() +{ + uint3 retVal; + + uint instruction1 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_BaryCoord, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + AmdExtD3DShaderIntrinsicsBarycentric_PerspPullModel); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction1, 0, 0, retVal.x); + + uint instruction2 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_BaryCoord, + AmdExtD3DShaderIntrinsicsOpcodePhase_1, + AmdExtD3DShaderIntrinsicsBarycentric_PerspPullModel); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction2, retVal.x, 0, retVal.y); + + uint instruction3 = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_BaryCoord, + AmdExtD3DShaderIntrinsicsOpcodePhase_2, + AmdExtD3DShaderIntrinsicsBarycentric_PerspPullModel); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction3, retVal.y, 0, retVal.z); + + return float3(asfloat(retVal.x), asfloat(retVal.y), asfloat(retVal.z)); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_VertexParameter +* +* Returns the triangle's parameter information at the specified triangle vertex. +* The vertex and parameter indices must specified as immediate values. +* +* Only available in pixel shader stages. +* +*********************************************************************************************************************** +*/ +float4 AmdExtD3DShaderIntrinsics_VertexParameter(uint vertexIdx, uint parameterIdx) +{ + uint4 retVal; + uint4 instruction; + + instruction.x = MakeAmdShaderIntrinsicsInstruction( + AmdExtD3DShaderIntrinsicsOpcode_VtxParam, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + ((vertexIdx << AmdExtD3DShaderIntrinsicsBarycentric_VtxShift) | + (parameterIdx << AmdExtD3DShaderIntrinsicsBarycentric_ParamShift) | + (AmdExtD3DShaderIntrinsicsBarycentric_ComponentX << AmdExtD3DShaderIntrinsicsBarycentric_ComponentShift))); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction.x, 0, 0, retVal.x); + + instruction.y = MakeAmdShaderIntrinsicsInstruction( + AmdExtD3DShaderIntrinsicsOpcode_VtxParam, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + ((vertexIdx << AmdExtD3DShaderIntrinsicsBarycentric_VtxShift) | + (parameterIdx << AmdExtD3DShaderIntrinsicsBarycentric_ParamShift) | + (AmdExtD3DShaderIntrinsicsBarycentric_ComponentY << AmdExtD3DShaderIntrinsicsBarycentric_ComponentShift))); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction.y, 0, 0, retVal.y); + + instruction.z = MakeAmdShaderIntrinsicsInstruction( + AmdExtD3DShaderIntrinsicsOpcode_VtxParam, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + ((vertexIdx << AmdExtD3DShaderIntrinsicsBarycentric_VtxShift) | + (parameterIdx << AmdExtD3DShaderIntrinsicsBarycentric_ParamShift) | + (AmdExtD3DShaderIntrinsicsBarycentric_ComponentZ << AmdExtD3DShaderIntrinsicsBarycentric_ComponentShift))); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction.z, 0, 0, retVal.z); + + instruction.w = MakeAmdShaderIntrinsicsInstruction( + AmdExtD3DShaderIntrinsicsOpcode_VtxParam, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + ((vertexIdx << AmdExtD3DShaderIntrinsicsBarycentric_VtxShift) | + (parameterIdx << AmdExtD3DShaderIntrinsicsBarycentric_ParamShift) | + (AmdExtD3DShaderIntrinsicsBarycentric_ComponentW << AmdExtD3DShaderIntrinsicsBarycentric_ComponentShift))); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction.w, 0, 0, retVal.w); + + return float4(asfloat(retVal.x), asfloat(retVal.y), asfloat(retVal.z), asfloat(retVal.w)); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_VertexParameterComponent +* +* Returns the triangle's parameter information at the specified triangle vertex and component. +* The vertex, parameter and component indices must be specified as immediate values. +* +* Only available in pixel shader stages. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_VertexParameterComponent(uint vertexIdx, uint parameterIdx, uint componentIdx) +{ + uint retVal; + uint instruction = + MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_VtxParam, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + ((vertexIdx << AmdExtD3DShaderIntrinsicsBarycentric_VtxShift) | + (parameterIdx << AmdExtD3DShaderIntrinsicsBarycentric_ParamShift) | + (componentIdx << AmdExtD3DShaderIntrinsicsBarycentric_ComponentShift))); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal); + + return asfloat(retVal); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveReduce +* +* Performs reduction operation on wavefront (thread group) data. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveReduce : float +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_WaveReduce(uint waveOp, float src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveReduce, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift)); + uint retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src), 0, retVal); + + return asfloat(retVal); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveReduce : float2 +*********************************************************************************************************************** +*/ +float2 AmdExtD3DShaderIntrinsics_WaveReduce(uint waveOp, float2 src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveReduce, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift)); + + uint2 retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.x), 0, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.y), 0, retVal.y); + + return float2(asfloat(retVal.x), asfloat(retVal.y)); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveReduce : float3 +*********************************************************************************************************************** +*/ +float3 AmdExtD3DShaderIntrinsics_WaveReduce(uint waveOp, float3 src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveReduce, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift)); + + uint3 retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.x), 0, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.y), 0, retVal.y); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.z), 0, retVal.z); + + return float3(asfloat(retVal.x), asfloat(retVal.y), asfloat(retVal.z)); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveReduce : float4 +*********************************************************************************************************************** +*/ +float4 AmdExtD3DShaderIntrinsics_WaveReduce(uint waveOp, float4 src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveReduce, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift)); + + uint4 retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.x), 0, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.y), 0, retVal.y); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.z), 0, retVal.z); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.w), 0, retVal.w); + + return float4(asfloat(retVal.x), asfloat(retVal.y), asfloat(retVal.z), asfloat(retVal.w)); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveReduce : int +*********************************************************************************************************************** +*/ +int AmdExtD3DShaderIntrinsics_WaveReduce(uint waveOp, int src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveReduce, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift)); + + int retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src, 0, retVal); + + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveReduce : int2 +*********************************************************************************************************************** +*/ +int2 AmdExtD3DShaderIntrinsics_WaveReduce(uint waveOp, int2 src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveReduce, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift)); + + int2 retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.x, 0, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.y, 0, retVal.y); + + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveReduce : int3 +*********************************************************************************************************************** +*/ +int3 AmdExtD3DShaderIntrinsics_WaveReduce(uint waveOp, int3 src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveReduce, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift)); + + int3 retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.x, 0, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.y, 0, retVal.y); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.z, 0, retVal.z); + + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveReduce : int4 +*********************************************************************************************************************** +*/ +int4 AmdExtD3DShaderIntrinsics_WaveReduce(uint waveOp, int4 src) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveReduce, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift)); + + int4 retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.x, 0, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.y, 0, retVal.y); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.z, 0, retVal.z); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.w, 0, retVal.w); + + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveScan +* +* Performs scan operation on wavefront (thread group) data. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveScan : float +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_WaveScan(uint waveOp, uint flags, float src) +{ + const uint waveScanOp = (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift) | + (flags << AmdExtD3DShaderIntrinsicsWaveOp_FlagShift); + + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveScan, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + waveScanOp); + uint retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src), 0, retVal); + + return asfloat(retVal); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveScan : float2 +*********************************************************************************************************************** +*/ +float2 AmdExtD3DShaderIntrinsics_WaveScan(uint waveOp, uint flags, float2 src) +{ + const uint waveScanOp = (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift) | + (flags << AmdExtD3DShaderIntrinsicsWaveOp_FlagShift); + + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveScan, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + waveScanOp); + + uint2 retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.x), 0, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.y), 0, retVal.y); + + return float2(asfloat(retVal.x), asfloat(retVal.y)); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveScan : float3 +*********************************************************************************************************************** +*/ +float3 AmdExtD3DShaderIntrinsics_WaveScan(uint waveOp, uint flags, float3 src) +{ + const uint waveScanOp = (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift) | + (flags << AmdExtD3DShaderIntrinsicsWaveOp_FlagShift); + + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveScan, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + waveScanOp); + + uint3 retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.x), 0, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.y), 0, retVal.y); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.z), 0, retVal.z); + + return float3(asfloat(retVal.x), asfloat(retVal.y), asfloat(retVal.z)); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveScan : float4 +*********************************************************************************************************************** +*/ +float4 AmdExtD3DShaderIntrinsics_WaveScan(uint waveOp, uint flags, float4 src) +{ + const uint waveScanOp = (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift) | + (flags << AmdExtD3DShaderIntrinsicsWaveOp_FlagShift); + + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveScan, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + waveScanOp); + + uint4 retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.x), 0, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.y), 0, retVal.y); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.z), 0, retVal.z); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src.w), 0, retVal.w); + + return float4(asfloat(retVal.x), asfloat(retVal.y), asfloat(retVal.z), asfloat(retVal.w)); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveScan : int +*********************************************************************************************************************** +*/ +int AmdExtD3DShaderIntrinsics_WaveScan(uint waveOp, uint flags, int src) +{ + const uint waveScanOp = (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift) | + (flags << AmdExtD3DShaderIntrinsicsWaveOp_FlagShift); + + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveScan, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + waveScanOp); + + int retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src, 0, retVal); + + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveScan : int2 +*********************************************************************************************************************** +*/ +int2 AmdExtD3DShaderIntrinsics_WaveScan(uint waveOp, uint flags, int2 src) +{ + const uint waveScanOp = (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift) | + (flags << AmdExtD3DShaderIntrinsicsWaveOp_FlagShift); + + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveScan, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + waveScanOp); + + int2 retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.x, 0, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.y, 0, retVal.y); + + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveScan : int3 +*********************************************************************************************************************** +*/ +int3 AmdExtD3DShaderIntrinsics_WaveScan(uint waveOp, uint flags, int3 src) +{ + const uint waveScanOp = (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift) | + (flags << AmdExtD3DShaderIntrinsicsWaveOp_FlagShift); + + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveScan, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + waveScanOp); + + int3 retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.x, 0, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.y, 0, retVal.y); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.z, 0, retVal.z); + + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveScan : int4 +*********************************************************************************************************************** +*/ +int4 AmdExtD3DShaderIntrinsics_WaveScan(uint waveOp, uint flags, int4 src) +{ + const uint waveScanOp = (waveOp << AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift) | + (flags << AmdExtD3DShaderIntrinsicsWaveOp_FlagShift); + + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_WaveScan, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + waveScanOp); + + int4 retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.x, 0, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.y, 0, retVal.y); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.z, 0, retVal.z); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src.w, 0, retVal.w); + + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_LoadDwordAtAddr +* +* Loads a DWORD from GPU memory from a given 64-bit GPU VA and 32-bit offset. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_LoadDwordAtAddr +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_LoadDwordAtAddr(uint gpuVaLoBits, uint gpuVaHiBits, uint offset) +{ + uint retVal; + + uint instruction; + instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_LoadDwAtAddr, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, gpuVaLoBits, gpuVaHiBits, retVal); + + instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_LoadDwAtAddr, + AmdExtD3DShaderIntrinsicsOpcodePhase_1, + 0); + + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, offset, 0, retVal); + + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_LoadDwordAtAddrx2 +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_LoadDwordAtAddrx2(uint gpuVaLoBits, uint gpuVaHiBits, uint offset) +{ + uint2 retVal; + + retVal.x = AmdExtD3DShaderIntrinsics_LoadDwordAtAddr(gpuVaLoBits, gpuVaHiBits, offset); + retVal.y = AmdExtD3DShaderIntrinsics_LoadDwordAtAddr(gpuVaLoBits, gpuVaHiBits, offset + 0x4); + + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_LoadDwordAtAddrx4 +*********************************************************************************************************************** +*/ +uint4 AmdExtD3DShaderIntrinsics_LoadDwordAtAddrx4(uint gpuVaLoBits, uint gpuVaHiBits, uint offset) +{ + uint4 retVal; + + retVal.x = AmdExtD3DShaderIntrinsics_LoadDwordAtAddr(gpuVaLoBits, gpuVaHiBits, offset); + retVal.y = AmdExtD3DShaderIntrinsics_LoadDwordAtAddr(gpuVaLoBits, gpuVaHiBits, offset + 0x4); + retVal.z = AmdExtD3DShaderIntrinsics_LoadDwordAtAddr(gpuVaLoBits, gpuVaHiBits, offset + 0x8); + retVal.w = AmdExtD3DShaderIntrinsics_LoadDwordAtAddr(gpuVaLoBits, gpuVaHiBits, offset + 0xC); + + return retVal; +} + + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_GetDrawIndex +* +* Returns the 0-based draw index in an indirect draw. Always returns 0 for direct draws. +* +* Available in vertex shader stage only. +* +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_GetDrawIndex() +{ + uint retVal; + + uint instruction; + instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_DrawIndex, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal); + + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_GetBaseInstance +* +* Returns the StartInstanceLocation parameter passed to direct or indirect drawing commands. +* +* Available in vertex shader stage only. +* +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_GetBaseInstance() +{ + uint retVal; + + uint instruction; + instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_BaseInstance, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal); + + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_GetBaseVertex +* +* For non-indexed draw commands, returns the StartVertexLocation parameter. For indexed draw commands, returns the +* BaseVertexLocation parameter. +* +* Available in vertex shader stage only. +* +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_GetBaseVertex() +{ + uint retVal; + + uint instruction; + instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_BaseVertex, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal); + + return retVal; +} + + + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_ReadlaneAt : uint +* +* Returns the value of the source for the given lane index within the specified wave. The lane index +* can be non-uniform across the wave. +* +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_ReadlaneAt(uint src, uint laneId) +{ + uint retVal; + + uint instruction; + instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_ReadlaneAt, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src, laneId, retVal); + + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_ReadlaneAt : int +*********************************************************************************************************************** +*/ +int AmdExtD3DShaderIntrinsics_ReadlaneAt(int src, uint laneId) +{ + uint retVal; + + uint instruction; + instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_ReadlaneAt, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src), laneId, retVal); + + return asint(retVal); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_ReadlaneAt : float +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_ReadlaneAt(float src, uint laneId) +{ + uint retVal; + + uint instruction; + instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_ReadlaneAt, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src), laneId, retVal); + + return asfloat(retVal); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_ConvertF32toF16 +* +* Converts 32bit floating point numbers into 16bit floating point number using a specified rounding mode +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_ConvertF32toF16 - helper to convert f32 to f16 number +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_ConvertF32toF16(in uint convOp, in float3 val) +{ + uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_FloatConversion, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + convOp); + + uint3 retVal; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(val.x), 0, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(val.y), 0, retVal.y); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(val.z), 0, retVal.z); + + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_ConvertF32toF16Near - convert f32 to f16 number using nearest rounding mode +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_ConvertF32toF16Near(in float3 inVec) +{ + return AmdExtD3DShaderIntrinsics_ConvertF32toF16(AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16Near, inVec); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_ConvertF32toF16Near - convert f32 to f16 number using -inf rounding mode +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_ConvertF32toF16NegInf(in float3 inVec) +{ + return AmdExtD3DShaderIntrinsics_ConvertF32toF16(AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16NegInf, inVec); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_ConvertF32toF16Near - convert f32 to f16 number using +inf rounding mode +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_ConvertF32toF16PosInf(in float3 inVec) +{ + return AmdExtD3DShaderIntrinsics_ConvertF32toF16(AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16PlusInf, inVec); +} + + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_ShaderClock +* +* Returns the current value of the timestamp clock. The value monotonically increments and will wrap after it +* exceeds the maximum representable value. The units are not defined and need not be constant, and the value +* is not guaranteed to be dynamically uniform across a single draw or dispatch. +* +* The function serves as a code motion barrier. Available in all shader stages. +* +*********************************************************************************************************************** +*/ + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_ShaderClock +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_ShaderClock() +{ + uint2 retVal; + + uint instruction; + instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_ShaderClock, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal.x); + + instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_ShaderClock, + AmdExtD3DShaderIntrinsicsOpcodePhase_1, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal.y); + + return retVal; +} + + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_ShaderRealtimeClock +* +* Returns a value representing the real-time clock that is globally coherent by all invocations on the GPU. +* The units are not defined and the value will wrap after exceeding the maximum representable value. +* +* The function serves as a code motion barrier. Available in all shader stages. +* +*********************************************************************************************************************** +*/ + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_ShaderRealtimeClock +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_ShaderRealtimeClock() +{ + uint2 retVal; + + uint instruction; + instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_ShaderRealtimeClock, + AmdExtD3DShaderIntrinsicsOpcodePhase_0, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal.x); + + instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_ShaderRealtimeClock, + AmdExtD3DShaderIntrinsicsOpcodePhase_1, + 0); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, 0, 0, retVal.y); + + return retVal; +} + + + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_MakeAtomicInstructions +* +* Creates uint4 with x/y/z/w components containing phase 0/1/2/3 for atomic instructions. +* NOTE: This is an internal function and should not be called by the source HLSL shader directly. +* +*********************************************************************************************************************** +*/ +uint4 AmdExtD3DShaderIntrinsics_MakeAtomicInstructions(uint op) +{ + uint4 instructions; + instructions.x = MakeAmdShaderIntrinsicsInstruction( + AmdExtD3DShaderIntrinsicsOpcode_AtomicU64, AmdExtD3DShaderIntrinsicsOpcodePhase_0, op); + instructions.y = MakeAmdShaderIntrinsicsInstruction( + AmdExtD3DShaderIntrinsicsOpcode_AtomicU64, AmdExtD3DShaderIntrinsicsOpcodePhase_1, op); + instructions.z = MakeAmdShaderIntrinsicsInstruction( + AmdExtD3DShaderIntrinsicsOpcode_AtomicU64, AmdExtD3DShaderIntrinsicsOpcodePhase_2, op); + instructions.w = MakeAmdShaderIntrinsicsInstruction( + AmdExtD3DShaderIntrinsicsOpcode_AtomicU64, AmdExtD3DShaderIntrinsicsOpcodePhase_3, op); + return instructions; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_AtomicOp +* +* Creates intrinstic instructions for the specified atomic op. +* NOTE: These are internal functions and should not be called by the source HLSL shader directly. +* +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_AtomicOp(RWByteAddressBuffer uav, uint3 address, uint2 value, uint op) +{ + uint2 retVal; + + const uint4 instructions = AmdExtD3DShaderIntrinsics_MakeAtomicInstructions(op); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x, address.y, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z, value.x, retVal.y); + uav.Store(retVal.x, retVal.y); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y, retVal.y, retVal.y); + + return retVal; +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicOp(RWTexture1D uav, uint3 address, uint2 value, uint op) +{ + uint2 retVal; + + const uint4 instructions = AmdExtD3DShaderIntrinsics_MakeAtomicInstructions(op); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x, address.y, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z, value.x, retVal.y); + uav[retVal.x] = retVal.y; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y, retVal.y, retVal.y); + + return retVal; +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicOp(RWTexture2D uav, uint3 address, uint2 value, uint op) +{ + uint2 retVal; + + const uint4 instructions = AmdExtD3DShaderIntrinsics_MakeAtomicInstructions(op); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x, address.y, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z, value.x, retVal.y); + uav[uint2(retVal.x, retVal.x)] = retVal.y; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y, retVal.y, retVal.y); + + return retVal; +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicOp(RWTexture3D uav, uint3 address, uint2 value, uint op) +{ + uint2 retVal; + + const uint4 instructions = AmdExtD3DShaderIntrinsics_MakeAtomicInstructions(op); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x, address.y, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z, value.x, retVal.y); + uav[uint3(retVal.x, retVal.x, retVal.x)] = retVal.y; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y, retVal.y, retVal.y); + + return retVal; +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicOp( + RWByteAddressBuffer uav, uint3 address, uint2 compare_value, uint2 value, uint op) +{ + uint2 retVal; + + const uint4 instructions = AmdExtD3DShaderIntrinsics_MakeAtomicInstructions(op); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x, address.y, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z, value.x, retVal.y); + uav.Store(retVal.x, retVal.y); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y, compare_value.x, retVal.y); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.w, compare_value.y, retVal.y, retVal.y); + + return retVal; +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicOp( + RWTexture1D uav, uint3 address, uint2 compare_value, uint2 value, uint op) +{ + uint2 retVal; + + const uint4 instructions = AmdExtD3DShaderIntrinsics_MakeAtomicInstructions(op); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x, address.y, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z, value.x, retVal.y); + uav[retVal.x] = retVal.y; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y, compare_value.x, retVal.y); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.w, compare_value.y, retVal.y, retVal.y); + + return retVal; +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicOp( + RWTexture2D uav, uint3 address, uint2 compare_value, uint2 value, uint op) +{ + uint2 retVal; + + const uint4 instructions = AmdExtD3DShaderIntrinsics_MakeAtomicInstructions(op); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x, address.y, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z, value.x, retVal.y); + uav[uint2(retVal.x, retVal.x)] = retVal.y; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y, compare_value.x, retVal.y); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.w, compare_value.y, retVal.y, retVal.y); + + return retVal; +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicOp( + RWTexture3D uav, uint3 address, uint2 compare_value, uint2 value, uint op) +{ + uint2 retVal; + + const uint4 instructions = AmdExtD3DShaderIntrinsics_MakeAtomicInstructions(op); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.x, address.x, address.y, retVal.x); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.y, address.z, value.x, retVal.y); + uav[uint3(retVal.x, retVal.x, retVal.x)] = retVal.y; + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.z, value.y, compare_value.x, retVal.y); + AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instructions.w, compare_value.y, retVal.y, retVal.y); + + return retVal; +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_AtomicMinU64 +* +* Performs 64-bit atomic minimum of value with the UAV at address, returns the original value. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_AtomicMinU64(RWByteAddressBuffer uav, uint address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_MinU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicMinU64(RWTexture1D uav, uint address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_MinU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicMinU64(RWTexture2D uav, uint2 address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_MinU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicMinU64(RWTexture3D uav, uint3 address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_MinU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), value, op); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_AtomicMaxU64 +* +* Performs 64-bit atomic maximum of value with the UAV at address, returns the original value. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_AtomicMaxU64(RWByteAddressBuffer uav, uint address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_MaxU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicMaxU64(RWTexture1D uav, uint address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_MaxU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicMaxU64(RWTexture2D uav, uint2 address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_MaxU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicMaxU64(RWTexture3D uav, uint3 address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_MaxU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), value, op); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_AtomicAndU64 +* +* Performs 64-bit atomic AND of value with the UAV at address, returns the original value. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_AtomicAndU64(RWByteAddressBuffer uav, uint address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_AndU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicAndU64(RWTexture1D uav, uint address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_AndU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicAndU64(RWTexture2D uav, uint2 address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_AndU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicAndU64(RWTexture3D uav, uint3 address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_AndU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), value, op); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_AtomicOrU64 +* +* Performs 64-bit atomic OR of value with the UAV at address, returns the original value. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_AtomicOrU64(RWByteAddressBuffer uav, uint address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_OrU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicOrU64(RWTexture1D uav, uint address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_OrU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicOrU64(RWTexture2D uav, uint2 address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_OrU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicOrU64(RWTexture3D uav, uint3 address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_OrU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), value, op); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_AtomicXorU64 +* +* Performs 64-bit atomic XOR of value with the UAV at address, returns the original value. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_AtomicXorU64(RWByteAddressBuffer uav, uint address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_XorU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicXorU64(RWTexture1D uav, uint address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_XorU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicXorU64(RWTexture2D uav, uint2 address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_XorU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicXorU64(RWTexture3D uav, uint3 address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_XorU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), value, op); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_AtomicAddU64 +* +* Performs 64-bit atomic add of value with the UAV at address, returns the original value. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_AtomicAddU64(RWByteAddressBuffer uav, uint address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_AddU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicAddU64(RWTexture1D uav, uint address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_AddU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicAddU64(RWTexture2D uav, uint2 address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_AddU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicAddU64(RWTexture3D uav, uint3 address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_AddU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), value, op); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_AtomicXchgU64 +* +* Performs 64-bit atomic exchange of value with the UAV at address, returns the original value. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_AtomicXchgU64(RWByteAddressBuffer uav, uint address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_XchgU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicXchgU64(RWTexture1D uav, uint address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_XchgU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicXchgU64(RWTexture2D uav, uint2 address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_XchgU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicXchgU64(RWTexture3D uav, uint3 address, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_XchgU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), value, op); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_AtomicCmpXchgU64 +* +* Performs 64-bit atomic compare of comparison value with UAV at address, stores value if values match, +* returns the original value. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_AtomicCmpXchgU64( + RWByteAddressBuffer uav, uint address, uint2 compare_value, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_CmpXchgU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), compare_value, value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicCmpXchgU64( + RWTexture1D uav, uint address, uint2 compare_value, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_CmpXchgU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address, 0, 0), compare_value, value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicCmpXchgU64( + RWTexture2D uav, uint2 address, uint2 compare_value, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_CmpXchgU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, 0), compare_value, value, op); +} + +uint2 AmdExtD3DShaderIntrinsics_AtomicCmpXchgU64( + RWTexture3D uav, uint3 address, uint2 compare_value, uint2 value) +{ + const uint op = AmdExtD3DShaderIntrinsicsAtomicOp_CmpXchgU64; + return AmdExtD3DShaderIntrinsics_AtomicOp(uav, uint3(address.x, address.y, address.z), compare_value, value, op); +} + + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveSum +* +* Performs reduction operation across a wave and returns the result of the reduction (sum of all threads in a wave) +* to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_WaveActiveSum(float src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_AddF, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +float2 AmdExtD3DShaderIntrinsics_WaveActiveSum(float2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_AddF, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +float3 AmdExtD3DShaderIntrinsics_WaveActiveSum(float3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_AddF, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +float4 AmdExtD3DShaderIntrinsics_WaveActiveSum(float4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_AddF, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +int AmdExtD3DShaderIntrinsics_WaveActiveSum(int src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_AddI, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +int2 AmdExtD3DShaderIntrinsics_WaveActiveSum(int2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_AddI, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +int3 AmdExtD3DShaderIntrinsics_WaveActiveSum(int3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_AddI, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +int4 AmdExtD3DShaderIntrinsics_WaveActiveSum(int4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_AddI, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_WaveActiveSum(uint src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_AddU, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_WaveActiveSum(uint2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_AddU, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_WaveActiveSum(uint3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_AddU, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveSum +*********************************************************************************************************************** +*/ +uint4 AmdExtD3DShaderIntrinsics_WaveActiveSum(uint4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_AddU, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveProduct +* +* Performs reduction operation across a wave and returns the result of the reduction (product of all threads in a +* wave) to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_WaveActiveProduct(float src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MulF, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +float2 AmdExtD3DShaderIntrinsics_WaveActiveProduct(float2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MulF, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +float3 AmdExtD3DShaderIntrinsics_WaveActiveProduct(float3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MulF, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +float4 AmdExtD3DShaderIntrinsics_WaveActiveProduct(float4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MulF, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +int AmdExtD3DShaderIntrinsics_WaveActiveProduct(int src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MulI, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +int2 AmdExtD3DShaderIntrinsics_WaveActiveProduct(int2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MulI, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +int3 AmdExtD3DShaderIntrinsics_WaveActiveProduct(int3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MulI, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +int4 AmdExtD3DShaderIntrinsics_WaveActiveProduct(int4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MulI, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_WaveActiveProduct(uint src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MulU, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_WaveActiveProduct(uint2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MulU, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_WaveActiveProduct(uint3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MulU, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveProduct +*********************************************************************************************************************** +*/ +uint4 AmdExtD3DShaderIntrinsics_WaveActiveProduct(uint4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MulU, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMin +* +* Performs reduction operation across a wave and returns the result of the reduction (minimum of all threads in a +* wave) to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_WaveActiveMin(float src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MinF, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +float2 AmdExtD3DShaderIntrinsics_WaveActiveMin(float2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MinF, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +float3 AmdExtD3DShaderIntrinsics_WaveActiveMin(float3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MinF, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +float4 AmdExtD3DShaderIntrinsics_WaveActiveMin(float4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MinF, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +int AmdExtD3DShaderIntrinsics_WaveActiveMin(int src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MinI, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +int2 AmdExtD3DShaderIntrinsics_WaveActiveMin(int2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MinI, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +int3 AmdExtD3DShaderIntrinsics_WaveActiveMin(int3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MinI, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +int4 AmdExtD3DShaderIntrinsics_WaveActiveMin(int4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MinI, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_WaveActiveMin(uint src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MinU, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_WaveActiveMin(uint2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MinU, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_WaveActiveMin(uint3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MinU, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMin +*********************************************************************************************************************** +*/ +uint4 AmdExtD3DShaderIntrinsics_WaveActiveMin(uint4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MinU, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMax +* +* Performs reduction operation across a wave and returns the result of the reduction (maximum of all threads in a +* wave) to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_WaveActiveMax(float src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MaxF, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +float2 AmdExtD3DShaderIntrinsics_WaveActiveMax(float2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MaxF, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +float3 AmdExtD3DShaderIntrinsics_WaveActiveMax(float3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MaxF, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +float4 AmdExtD3DShaderIntrinsics_WaveActiveMax(float4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MaxF, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +int AmdExtD3DShaderIntrinsics_WaveActiveMax(int src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MaxI, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +int2 AmdExtD3DShaderIntrinsics_WaveActiveMax(int2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MaxI, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +int3 AmdExtD3DShaderIntrinsics_WaveActiveMax(int3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MaxI, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +int4 AmdExtD3DShaderIntrinsics_WaveActiveMax(int4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MaxI, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_WaveActiveMax(uint src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MaxU, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_WaveActiveMax(uint2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MaxU, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_WaveActiveMax(uint3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MaxU, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveMax +*********************************************************************************************************************** +*/ +uint4 AmdExtD3DShaderIntrinsics_WaveActiveMax(uint4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_MaxU, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitAnd +* +* Performs reduction operation across a wave and returns the result of the reduction (Bitwise AND of all threads in a +* wave) to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitAnd +*********************************************************************************************************************** +*/ +int AmdExtD3DShaderIntrinsics_WaveActiveBitAnd(int src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_And, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitAnd +*********************************************************************************************************************** +*/ +int2 AmdExtD3DShaderIntrinsics_WaveActiveBitAnd(int2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_And, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitAnd +*********************************************************************************************************************** +*/ +int3 AmdExtD3DShaderIntrinsics_WaveActiveBitAnd(int3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_And, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitAnd +*********************************************************************************************************************** +*/ +int4 AmdExtD3DShaderIntrinsics_WaveActiveBitAnd(int4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_And, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitAnd +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_WaveActiveBitAnd(uint src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_And, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitAnd +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_WaveActiveBitAnd(uint2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_And, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitAnd +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_WaveActiveBitAnd(uint3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_And, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitAnd +*********************************************************************************************************************** +*/ +uint4 AmdExtD3DShaderIntrinsics_WaveActiveBitAnd(uint4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_And, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitOr +* +* Performs reduction operation across a wave and returns the result of the reduction (Bitwise OR of all threads in a +* wave) to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitOr +*********************************************************************************************************************** +*/ +int AmdExtD3DShaderIntrinsics_WaveActiveBitOr(int src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Or, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitOr +*********************************************************************************************************************** +*/ +int2 AmdExtD3DShaderIntrinsics_WaveActiveBitOr(int2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Or, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitOr +*********************************************************************************************************************** +*/ +int3 AmdExtD3DShaderIntrinsics_WaveActiveBitOr(int3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Or, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitOr +*********************************************************************************************************************** +*/ +int4 AmdExtD3DShaderIntrinsics_WaveActiveBitOr(int4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Or, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitOr +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_WaveActiveBitOr(uint src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Or, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitOr +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_WaveActiveBitOr(uint2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Or, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitOr +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_WaveActiveBitOr(uint3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Or, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitOr +*********************************************************************************************************************** +*/ +uint4 AmdExtD3DShaderIntrinsics_WaveActiveBitOr(uint4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Or, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitXor +* +* Performs reduction operation across a wave and returns the result of the reduction (Bitwise XOR of all threads in a +* wave) to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitXor +*********************************************************************************************************************** +*/ +int AmdExtD3DShaderIntrinsics_WaveActiveBitXor(int src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Xor, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitXor +*********************************************************************************************************************** +*/ +int2 AmdExtD3DShaderIntrinsics_WaveActiveBitXor(int2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Xor, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitXor +*********************************************************************************************************************** +*/ +int3 AmdExtD3DShaderIntrinsics_WaveActiveBitXor(int3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Xor, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitXor +*********************************************************************************************************************** +*/ +int4 AmdExtD3DShaderIntrinsics_WaveActiveBitXor(int4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Xor, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitXor +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_WaveActiveBitXor(uint src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Xor, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitXor +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_WaveActiveBitXor(uint2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Xor, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitXor +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_WaveActiveBitXor(uint3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Xor, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WaveActiveBitXor +*********************************************************************************************************************** +*/ +uint4 AmdExtD3DShaderIntrinsics_WaveActiveBitXor(uint4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveReduce(AmdExtD3DShaderIntrinsicsWaveOp_Xor, src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixSum +* +* Performs a prefix (exclusive) scan operation across a wave and returns the resulting sum to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_WavePrefixSum(float src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddF, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +float2 AmdExtD3DShaderIntrinsics_WavePrefixSum(float2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddF, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +float3 AmdExtD3DShaderIntrinsics_WavePrefixSum(float3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddF, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +float4 AmdExtD3DShaderIntrinsics_WavePrefixSum(float4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddF, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +int AmdExtD3DShaderIntrinsics_WavePrefixSum(int src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddI, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +int2 AmdExtD3DShaderIntrinsics_WavePrefixSum(int2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddI, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +int3 AmdExtD3DShaderIntrinsics_WavePrefixSum(int3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddI, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +int4 AmdExtD3DShaderIntrinsics_WavePrefixSum(int4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddI, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_WavePrefixSum(uint src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddU, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_WavePrefixSum(uint2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddU, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_WavePrefixSum(uint3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddU, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixSum +*********************************************************************************************************************** +*/ +uint4 AmdExtD3DShaderIntrinsics_WavePrefixSum(uint4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddU, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixProduct +* +* Performs a prefix scan operation across a wave and returns the resulting product to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_WavePrefixProduct(float src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulF, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +float2 AmdExtD3DShaderIntrinsics_WavePrefixProduct(float2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulF, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +float3 AmdExtD3DShaderIntrinsics_WavePrefixProduct(float3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulF, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +float4 AmdExtD3DShaderIntrinsics_WavePrefixProduct(float4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulF, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +int AmdExtD3DShaderIntrinsics_WavePrefixProduct(int src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulI, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +int2 AmdExtD3DShaderIntrinsics_WavePrefixProduct(int2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulI, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +int3 AmdExtD3DShaderIntrinsics_WavePrefixProduct(int3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulI, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +int4 AmdExtD3DShaderIntrinsics_WavePrefixProduct(int4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulI, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_WavePrefixProduct(uint src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulU, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_WavePrefixProduct(uint2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulU, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_WavePrefixProduct(uint3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulU, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixProduct +*********************************************************************************************************************** +*/ +uint4 AmdExtD3DShaderIntrinsics_WavePrefixProduct(uint4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulU, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMin +* +* Performs a prefix scan operation across a wave and returns the resulting minimum value to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_WavePrefixMin(float src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinF, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +float2 AmdExtD3DShaderIntrinsics_WavePrefixMin(float2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinF, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +float3 AmdExtD3DShaderIntrinsics_WavePrefixMin(float3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinF, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +float4 AmdExtD3DShaderIntrinsics_WavePrefixMin(float4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinF, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +int AmdExtD3DShaderIntrinsics_WavePrefixMin(int src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinI, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +int2 AmdExtD3DShaderIntrinsics_WavePrefixMin(int2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinI, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +int3 AmdExtD3DShaderIntrinsics_WavePrefixMin(int3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinI, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +int4 AmdExtD3DShaderIntrinsics_WavePrefixMin(int4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinI, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_WavePrefixMin(uint src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinU, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_WavePrefixMin(uint2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinU, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_WavePrefixMin(uint3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinU, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMin +*********************************************************************************************************************** +*/ +uint4 AmdExtD3DShaderIntrinsics_WavePrefixMin(uint4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinU, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMax +* +* Performs a prefix scan operation across a wave and returns the resulting maximum value to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_WavePrefixMax(float src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxF, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +float2 AmdExtD3DShaderIntrinsics_WavePrefixMax(float2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxF, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +float3 AmdExtD3DShaderIntrinsics_WavePrefixMax(float3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxF, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +float4 AmdExtD3DShaderIntrinsics_WavePrefixMax(float4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxF, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +int AmdExtD3DShaderIntrinsics_WavePrefixMax(int src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxI, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +int2 AmdExtD3DShaderIntrinsics_WavePrefixMax(int2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxI, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +int3 AmdExtD3DShaderIntrinsics_WavePrefixMax(int3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxI, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +int4 AmdExtD3DShaderIntrinsics_WavePrefixMax(int4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxI, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_WavePrefixMax(uint src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxU, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_WavePrefixMax(uint2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxU, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_WavePrefixMax(uint3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxU, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePrefixMax +*********************************************************************************************************************** +*/ +uint4 AmdExtD3DShaderIntrinsics_WavePrefixMax(uint4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxU, + AmdExtD3DShaderIntrinsicsWaveOp_Exclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixSum +* +* Performs a Postfix (Inclusive) scan operation across a wave and returns the resulting sum to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_WavePostfixSum(float src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddF, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +float2 AmdExtD3DShaderIntrinsics_WavePostfixSum(float2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddF, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +float3 AmdExtD3DShaderIntrinsics_WavePostfixSum(float3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddF, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +float4 AmdExtD3DShaderIntrinsics_WavePostfixSum(float4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddF, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +int AmdExtD3DShaderIntrinsics_WavePostfixSum(int src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddI, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +int2 AmdExtD3DShaderIntrinsics_WavePostfixSum(int2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddI, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +int3 AmdExtD3DShaderIntrinsics_WavePostfixSum(int3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddI, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +int4 AmdExtD3DShaderIntrinsics_WavePostfixSum(int4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddI, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_WavePostfixSum(uint src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddU, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_WavePostfixSum(uint2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddU, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_WavePostfixSum(uint3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddU, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixSum +*********************************************************************************************************************** +*/ +uint4 AmdExtD3DShaderIntrinsics_WavePostfixSum(uint4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_AddU, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixProduct +* +* Performs a Postfix scan operation across a wave and returns the resulting product to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_WavePostfixProduct(float src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulF, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +float2 AmdExtD3DShaderIntrinsics_WavePostfixProduct(float2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulF, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +float3 AmdExtD3DShaderIntrinsics_WavePostfixProduct(float3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulF, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +float4 AmdExtD3DShaderIntrinsics_WavePostfixProduct(float4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulF, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +int AmdExtD3DShaderIntrinsics_WavePostfixProduct(int src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulI, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +int2 AmdExtD3DShaderIntrinsics_WavePostfixProduct(int2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulI, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +int3 AmdExtD3DShaderIntrinsics_WavePostfixProduct(int3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulI, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +int4 AmdExtD3DShaderIntrinsics_WavePostfixProduct(int4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulI, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_WavePostfixProduct(uint src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulU, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_WavePostfixProduct(uint2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulU, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_WavePostfixProduct(uint3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulU, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixProduct +*********************************************************************************************************************** +*/ +uint4 AmdExtD3DShaderIntrinsics_WavePostfixProduct(uint4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MulU, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMin +* +* Performs a Postfix scan operation across a wave and returns the resulting minimum value to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_WavePostfixMin(float src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinF, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +float2 AmdExtD3DShaderIntrinsics_WavePostfixMin(float2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinF, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +float3 AmdExtD3DShaderIntrinsics_WavePostfixMin(float3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinF, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +float4 AmdExtD3DShaderIntrinsics_WavePostfixMin(float4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinF, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +int AmdExtD3DShaderIntrinsics_WavePostfixMin(int src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinI, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +int2 AmdExtD3DShaderIntrinsics_WavePostfixMin(int2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinI, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +int3 AmdExtD3DShaderIntrinsics_WavePostfixMin(int3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinI, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +int4 AmdExtD3DShaderIntrinsics_WavePostfixMin(int4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinI, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_WavePostfixMin(uint src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinU, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_WavePostfixMin(uint2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinU, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_WavePostfixMin(uint3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinU, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMin +*********************************************************************************************************************** +*/ +uint4 AmdExtD3DShaderIntrinsics_WavePostfixMin(uint4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MinU, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMax +* +* Performs a Postfix scan operation across a wave and returns the resulting maximum value to all participating lanes. +* +* Available in all shader stages. +* +*********************************************************************************************************************** +*/ +float AmdExtD3DShaderIntrinsics_WavePostfixMax(float src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxF, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +float2 AmdExtD3DShaderIntrinsics_WavePostfixMax(float2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxF, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +float3 AmdExtD3DShaderIntrinsics_WavePostfixMax(float3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxF, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +float4 AmdExtD3DShaderIntrinsics_WavePostfixMax(float4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxF, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +int AmdExtD3DShaderIntrinsics_WavePostfixMax(int src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxI, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +int2 AmdExtD3DShaderIntrinsics_WavePostfixMax(int2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxI, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +int3 AmdExtD3DShaderIntrinsics_WavePostfixMax(int3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxI, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +int4 AmdExtD3DShaderIntrinsics_WavePostfixMax(int4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxI, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +uint AmdExtD3DShaderIntrinsics_WavePostfixMax(uint src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxU, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +uint2 AmdExtD3DShaderIntrinsics_WavePostfixMax(uint2 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxU, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +uint3 AmdExtD3DShaderIntrinsics_WavePostfixMax(uint3 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxU, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +/** +*********************************************************************************************************************** +* AmdExtD3DShaderIntrinsics_WavePostfixMax +*********************************************************************************************************************** +*/ +uint4 AmdExtD3DShaderIntrinsics_WavePostfixMax(uint4 src) +{ + return AmdExtD3DShaderIntrinsics_WaveScan(AmdExtD3DShaderIntrinsicsWaveOp_MaxU, + AmdExtD3DShaderIntrinsicsWaveOp_Inclusive, + src); +} + +#if defined (AGS_RAY_HIT_TOKEN) + +//===================================================================================================================== +struct AmdExtRtHitToken +{ + uint dword[2]; +}; + +/** +*********************************************************************************************************************** +* @brief +* AmdExtD3DShaderIntrinsicsRT structure when included in a Ray Tracing payload will indicate to the driver +* that the dwords are already supplied in AmdExtRtHitTokenIn and only requires a call to intersect +* ray, bypassing the traversal of the acceleration structure. +*********************************************************************************************************************** +*/ +struct AmdExtRtHitTokenIn : AmdExtRtHitToken { }; + +/** +*********************************************************************************************************************** +* @brief +* AmdExtD3DShaderIntrinsicsRT structure when included in a Ray Tracing payload will indicate to the driver +* that the dwords must be patched into the payload after traversal. The application can store this +* data in a buffer which can then be used for hit group sorting so shading divergence can be avoided. +*********************************************************************************************************************** +*/ +struct AmdExtRtHitTokenOut : AmdExtRtHitToken { }; + +/** +*********************************************************************************************************************** +* @brief +* Group shared memory reserved for temprary storage of hit tokens. Not intended to touched by the app shader. +* Application shader must only use the extension functions defined below to access the hit tokens +* +*********************************************************************************************************************** +*/ +groupshared AmdExtRtHitToken AmdHitToken; + +/** +*********************************************************************************************************************** +* @brief +* Accessor function to obtain the hit tokens from the last call to TraceRays(). The data returned by this +* function only guarantees valid values for the last call to TraceRays() prior to calling this function. +* +*********************************************************************************************************************** +*/ +uint2 AmdGetLastHitToken() +{ + return uint2(AmdHitToken.dword[0], AmdHitToken.dword[1]); +} + +/** +*********************************************************************************************************************** +* @brief +* This function initialises hit tokens for subsequent TraceRays() call. Note, any TraceRay() that intends to use +* these hit tokens must include this function call in the same basic block. Applications can use a convenience macro +* defined below to enforce that. +* +*********************************************************************************************************************** +*/ +void AmdSetHitToken(uint2 token) +{ + AmdHitToken.dword[0] = token.x; + AmdHitToken.dword[1] = token.y; +} + +/** +*********************************************************************************************************************** +* @brief +* Convenience macro for calling TraceRays that uses the hit token +* +*********************************************************************************************************************** +*/ +#define AmdTraceRay(accelStruct, \ + rayFlags, \ + instanceInclusionMask, \ + rayContributionToHitGroupIndex, \ + geometryMultiplier, \ + missShaderIndex, \ + ray, \ + payload, \ + token) \ +AmdSetHitToken(token); \ +TraceRay(accelStruct, \ + rayFlags, \ + instanceInclusionMask, \ + rayContributionToHitGroupIndex, \ + geometryMultiplier, \ + missShaderIndex, \ + ray, \ + payload); \ + +#endif // AGS_RAY_HIT_TOKEN + +#endif // _AMDEXTD3DSHADERINTRINICS_HLSL diff --git a/Source/ThirdParty/AGS/amd_ags.h b/Source/ThirdParty/AGS/amd_ags.h new file mode 100644 index 000000000..ad0f5c4bc --- /dev/null +++ b/Source/ThirdParty/AGS/amd_ags.h @@ -0,0 +1,1394 @@ +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +/// \file +/// \mainpage +/// AGS Library Overview +/// -------------------- +/// This document provides an overview of the AGS (AMD GPU Services) library. The AGS library provides software developers with the ability to query +/// AMD GPU software and hardware state information that is not normally available through standard operating systems or graphic APIs. +/// +/// The latest version of the API is publicly hosted here: https://github.com/GPUOpen-LibrariesAndSDKs/AGS_SDK/. +/// It is also worth checking http://gpuopen.com/gaming-product/amd-gpu-services-ags-library/ for any updates and articles on AGS. +/// \internal +/// Online documentation is publicly hosted here: http://gpuopen-librariesandsdks.github.io/ags/ +/// \endinternal +/// +/// --------------------------------------- +/// What's new in AGS 6.3 since version 6.2 +/// --------------------------------------- +/// AGS 6.3 includes the following updates: +/// * RDNA4 detection +/// * More robust driver version and GPU specifications (eg. numCUs, teraFlops) detection. Prior AGS versions may return empty values for these fields in certain cases like after dirty driver install. +/// * agsGetGPUInfo function to fill in a \ref AGSGPUInfo structure at any point after calling \ref agsInitialize. +/// * \ref agsSetDisplayMode is marked as deprecated. Please use DXGI for HDR10 and Freesync HDR +/// +/// --------------------------------------- +/// What's new in AGS 6.2 since version 6.1 +/// --------------------------------------- +/// AGS 6.2 includes the following updates: +/// * Shader clock intrinsics +/// * Minor improvements and fixes +/// +/// --------------------------------------- +/// What's new in AGS 6.1 since version 6.0 +/// --------------------------------------- +/// AGS 6.1 includes the following updates: +/// * RDNA3 detection +/// +/// --------------------------------------- +/// What's new in AGS 6.0 since version 5.4.2 +/// --------------------------------------- +/// AGS 6.0 includes the following updates: +/// * DX12 ray tracing hit token for RDNA2 hardware. +/// * Shader intrinsic that exposes ReadLaneAt in DX12. +/// * Shader intrinsics that expose explicit float conversions in DX12. +/// * Refactored and revised API to minimize user error. +/// * Added agsGetVersionNumber. +/// * Detection for external GPUs. +/// * Detection of RDNA2 architecture. +/// * Grouped the more established intrinsics together into per year support. +/// * Function pointer typedefs for the API +/// +/// --------------------------------------- +/// What's new in AGS 5.4.2 since version 5.4.1 +/// --------------------------------------- +/// AGS 5.4.2 includes the following updates: +/// * sharedMemoryInBytes has been reinstated. +/// * Clock speed returned for APUs. +/// +/// --------------------------------------- +/// What's new in AGS 5.4.1 since version 5.4.0 +/// --------------------------------------- +/// AGS 5.4.1 includes the following updates: +/// * AsicFamily_Count to help with code maintenance. +/// * Visual Studio 2019 support. +/// * x86 support +/// * BaseInstance and BaseVertex intrinsics along with corresponding caps bits. +/// * GetWaveSize intrinsic along with corresponding caps bits. +/// +/// --------------------------------------- +/// What's new in AGS 5.4 since version 5.3 +/// --------------------------------------- +/// AGS 5.4 includes the following updates: +/// * A more detailed description of the GPU architecture, now including RDNA GPUs. +/// * Radeon 7 core and memory speeds returned. +/// * Draw index and Atomic U64 intrinsics for both DX11 and DX12. +/// +/// --------------------------------------- +/// What's new in AGS 5.3 since version 5.2 +/// --------------------------------------- +/// AGS 5.3 includes the following updates: +/// * DX11 deferred context support for Multi Draw Indirect and UAV Overlap extensions. +/// * A Radeon Software Version helper to determine whether the installed driver meets your game's minimum driver version requirements. +/// * Freesync HDR Gamma 2.2 mode which uses a 1010102 swapchain and can be considered as an alternative to using the 64 bit swapchain required for Freesync HDR scRGB. +/// +/// Using the AGS library +/// --------------------- +/// It is recommended to take a look at the source code for the samples that come with the AGS SDK: +/// * AGSSample +/// * CrossfireSample +/// * EyefinitySample +/// The AGSSample application is the simplest of the three examples and demonstrates the code required to initialize AGS and use it to query the GPU and Eyefinity state. +/// The CrossfireSample application demonstrates the use of the new API to transfer resources on GPUs in Crossfire mode. Lastly, the EyefinitySample application provides a more +/// extensive example of Eyefinity setup than the basic example provided in AGSSample. +/// There are other samples on Github that demonstrate the DirectX shader extensions, such as the Barycentrics11 and Barycentrics12 samples. +/// +/// To add AGS support to an existing project, follow these steps: +/// * Link your project against the correct import library. Choose from either the 32 bit or 64 bit version. +/// * Copy the AGS dll into the same directory as your game executable. +/// * Include the amd_ags.h header file from your source code. +/// * Include the AGS hlsl files if you are using the shader intrinsics. +/// * Declare a pointer to an AGSContext and make this available for all subsequent calls to AGS. +/// * On game initialization, call \ref agsInitialize passing in the address of the context. On success, this function will return a valid context pointer. +/// +/// Don't forget to cleanup AGS by calling \ref agsDeInitialize when the app exits, after the device has been destroyed. + +#ifndef AMD_AGS_H +#define AMD_AGS_H + +#define AMD_AGS_VERSION_MAJOR 6 ///< AGS major version +#define AMD_AGS_VERSION_MINOR 3 ///< AGS minor version +#define AMD_AGS_VERSION_PATCH 0 ///< AGS patch version + +#ifdef __cplusplus +extern "C" { +#endif + +/// \defgroup Defines AGS defines +/// @{ +#if defined (AGS_GCC) +#define AMD_AGS_API +#else +#define AMD_AGS_API __declspec(dllexport) ///< AGS exported functions +#endif + +#define AGS_MAKE_VERSION( major, minor, patch ) ( ( major << 22 ) | ( minor << 12 ) | patch ) ///< Macro to create the app and engine versions for the fields in \ref AGSDX12ExtensionParams and \ref AGSDX11ExtensionParams and the Radeon Software Version +#define AGS_UNSPECIFIED_VERSION 0xFFFFAD00 ///< Use this to specify no version +#define AGS_CURRENT_VERSION AGS_MAKE_VERSION( AMD_AGS_VERSION_MAJOR, AMD_AGS_VERSION_MINOR, AMD_AGS_VERSION_PATCH ) ///< Macro to return the current AGS version as defined by the AGS header file +/// @} + +#if !defined (AGS_EXCLUDE_DIRECTX_TYPES) +// Forward declaration of D3D and DXGI types +typedef struct IDXGIAdapter IDXGIAdapter; +typedef struct IDXGISwapChain IDXGISwapChain; +typedef struct DXGI_SWAP_CHAIN_DESC DXGI_SWAP_CHAIN_DESC; +enum D3D_DRIVER_TYPE; +enum D3D_FEATURE_LEVEL; +enum D3D_PRIMITIVE_TOPOLOGY; + +#if !defined (AGS_EXCLUDE_DIRECTX_11) +// Forward declaration of D3D11 types +typedef struct ID3D11Device ID3D11Device; +typedef struct ID3D11DeviceContext ID3D11DeviceContext; +typedef struct ID3D11Resource ID3D11Resource; +typedef struct ID3D11Buffer ID3D11Buffer; +typedef struct ID3D11Texture1D ID3D11Texture1D; +typedef struct ID3D11Texture2D ID3D11Texture2D; +typedef struct ID3D11Texture3D ID3D11Texture3D; +typedef struct D3D11_BUFFER_DESC D3D11_BUFFER_DESC; +typedef struct D3D11_TEXTURE1D_DESC D3D11_TEXTURE1D_DESC; +typedef struct D3D11_TEXTURE2D_DESC D3D11_TEXTURE2D_DESC; +typedef struct D3D11_TEXTURE3D_DESC D3D11_TEXTURE3D_DESC; +typedef struct D3D11_SUBRESOURCE_DATA D3D11_SUBRESOURCE_DATA; +typedef struct tagRECT tagRECT; +typedef struct tagRECT D3D11_RECT; ///< typedef this ourselves so we don't have to drag d3d11.h in +#endif +#if !defined (AGS_EXCLUDE_DIRECTX_12) +// Forward declaration of D3D12 types +typedef struct ID3D12Device ID3D12Device; +typedef struct ID3D12GraphicsCommandList ID3D12GraphicsCommandList; +#endif +#endif + +/// \defgroup enums General enumerations +/// @{ + +/// The return codes +typedef enum AGSReturnCode +{ + AGS_SUCCESS, ///< Successful function call + AGS_FAILURE, ///< Failed to complete call for some unspecified reason + AGS_INVALID_ARGS, ///< Invalid arguments into the function + AGS_OUT_OF_MEMORY, ///< Out of memory when allocating space internally + AGS_MISSING_D3D_DLL, ///< Returned when a D3D dll fails to load + AGS_LEGACY_DRIVER, ///< Returned if a feature is not present in the installed driver + AGS_NO_AMD_DRIVER_INSTALLED, ///< Returned if the AMD GPU driver does not appear to be installed + AGS_EXTENSION_NOT_SUPPORTED, ///< Returned if the driver does not support the requested driver extension + AGS_ADL_FAILURE, ///< Failure in ADL (the AMD Display Library) + AGS_DX_FAILURE, ///< Failure from DirectX runtime + AGS_D3DDEVICE_NOT_CREATED ///< Failure due to not creating the D3D device successfully via AGS. +} AGSReturnCode; + +/// @} + +typedef struct AGSContext AGSContext; ///< All function calls in AGS require a pointer to a context. This is generated via \ref agsInitialize + +/// The rectangle struct used by AGS. +typedef struct AGSRect +{ + int offsetX; ///< Offset on X axis + int offsetY; ///< Offset on Y axis + int width; ///< Width of rectangle + int height; ///< Height of rectangle +} AGSRect; + +/// The display info struct used to describe a display enumerated by AGS +typedef struct AGSDisplayInfo +{ + char name[ 256 ]; ///< The name of the display + char displayDeviceName[ 32 ]; ///< The display device name, i.e. DISPLAY_DEVICE::DeviceName + + unsigned int isPrimaryDisplay : 1; ///< Whether this display is marked as the primary display + unsigned int HDR10 : 1; ///< HDR10 is supported on this display + unsigned int dolbyVision : 1; ///< Dolby Vision is supported on this display + unsigned int freesync : 1; ///< Freesync is supported on this display + unsigned int freesyncHDR : 1; ///< Freesync HDR is supported on this display + unsigned int eyefinityInGroup : 1; ///< The display is part of the Eyefinity group + unsigned int eyefinityPreferredDisplay : 1; ///< The display is the preferred display in the Eyefinity group for displaying the UI + unsigned int eyefinityInPortraitMode : 1; ///< The display is in the Eyefinity group but in portrait mode + unsigned int reservedPadding : 24; ///< Reserved for future use + + int maxResolutionX; ///< The maximum supported resolution of the unrotated display + int maxResolutionY; ///< The maximum supported resolution of the unrotated display + float maxRefreshRate; ///< The maximum supported refresh rate of the display + + AGSRect currentResolution; ///< The current resolution and position in the desktop, ignoring Eyefinity bezel compensation + AGSRect visibleResolution; ///< The visible resolution and position. When Eyefinity bezel compensation is enabled this will + ///< be the sub region in the Eyefinity single large surface (SLS) + float currentRefreshRate; ///< The current refresh rate + + int eyefinityGridCoordX; ///< The X coordinate in the Eyefinity grid. -1 if not in an Eyefinity group + int eyefinityGridCoordY; ///< The Y coordinate in the Eyefinity grid. -1 if not in an Eyefinity group + + double chromaticityRedX; ///< Red display primary X coord + double chromaticityRedY; ///< Red display primary Y coord + + double chromaticityGreenX; ///< Green display primary X coord + double chromaticityGreenY; ///< Green display primary Y coord + + double chromaticityBlueX; ///< Blue display primary X coord + double chromaticityBlueY; ///< Blue display primary Y coord + + double chromaticityWhitePointX; ///< White point X coord + double chromaticityWhitePointY; ///< White point Y coord + + double screenDiffuseReflectance; ///< Percentage expressed between 0 - 1 + double screenSpecularReflectance; ///< Percentage expressed between 0 - 1 + + double minLuminance; ///< The minimum luminance of the display in nits + double maxLuminance; ///< The maximum luminance of the display in nits + double avgLuminance; ///< The average luminance of the display in nits + + int logicalDisplayIndex; ///< The internally used index of this display + int adlAdapterIndex; ///< The internally used ADL adapter index + int reserved; ///< reserved field +} AGSDisplayInfo; + +/// The ASIC family +typedef enum AGSAsicFamily +{ + AGSAsicFamily_Unknown, ///< Unknown architecture, potentially from another IHV. Check \ref AGSDeviceInfo::vendorId + AGSAsicFamily_PreGCN, ///< Pre GCN architecture. + AGSAsicFamily_GCN1, ///< AMD GCN 1 architecture: Oland, Cape Verde, Pitcairn & Tahiti. + AGSAsicFamily_GCN2, ///< AMD GCN 2 architecture: Hawaii & Bonaire. This also includes APUs Kaveri and Carrizo. + AGSAsicFamily_GCN3, ///< AMD GCN 3 architecture: Tonga & Fiji. + AGSAsicFamily_GCN4, ///< AMD GCN 4 architecture: Polaris. + AGSAsicFamily_Vega, ///< AMD Vega architecture, including Raven Ridge (ie AMD Ryzen CPU + AMD Vega GPU). + AGSAsicFamily_RDNA, ///< AMD RDNA architecture + AGSAsicFamily_RDNA2, ///< AMD RDNA2 architecture + AGSAsicFamily_RDNA3, ///< AMD RDNA3 architecture + AGSAsicFamily_RDNA4, ///< AMD RDNA4 architecture + + AGSAsicFamily_Count ///< Number of enumerated ASIC families +} AGSAsicFamily; + +/// The device info struct used to describe a physical GPU enumerated by AGS +typedef struct AGSDeviceInfo +{ + const char* adapterString; ///< The adapter name string + AGSAsicFamily asicFamily; ///< Set to Unknown if not AMD hardware + unsigned int isAPU : 1; ///< Whether this device is an APU + unsigned int isPrimaryDevice : 1; ///< Whether this device is marked as the primary device + unsigned int isExternal :1; ///< Whether this device is a detachable, external device + unsigned int reservedPadding : 29; ///< Reserved for future use + + int vendorId; ///< The vendor id + int deviceId; ///< The device id + int revisionId; ///< The revision id + + int numCUs; ///< Number of compute units + int numWGPs; ///< Number of RDNA Work Group Processors. Only valid if ASIC is RDNA onwards. + + int numROPs; ///< Number of ROPs + int coreClock; ///< Core clock speed at 100% power in MHz + int memoryClock; ///< Memory clock speed at 100% power in MHz + int memoryBandwidth; ///< Memory bandwidth in MB/s + float teraFlops; ///< Teraflops of GPU. Zero if not GCN onwards. Calculated from iCoreClock * iNumCUs * 64 Pixels/clk * 2 instructions/MAD + + unsigned long long localMemoryInBytes; ///< The size of local memory in bytes. 0 for non AMD hardware. + unsigned long long sharedMemoryInBytes; ///< The size of system memory available to the GPU in bytes. It is important to factor this into your VRAM budget for APUs + ///< as the reported local memory will only be a small fraction of the total memory available to the GPU. + + int numDisplays; ///< The number of active displays found to be attached to this adapter. + AGSDisplayInfo* displays; ///< List of displays allocated by AGS to be numDisplays in length. + + int eyefinityEnabled; ///< Indicates if Eyefinity is active + int eyefinityGridWidth; ///< Contains width of the multi-monitor grid that makes up the Eyefinity Single Large Surface. + int eyefinityGridHeight; ///< Contains height of the multi-monitor grid that makes up the Eyefinity Single Large Surface. + int eyefinityResolutionX; ///< Contains width in pixels of the multi-monitor Single Large Surface. + int eyefinityResolutionY; ///< Contains height in pixels of the multi-monitor Single Large Surface. + int eyefinityBezelCompensated; ///< Indicates if bezel compensation is used for the current SLS display area. 1 if enabled, and 0 if disabled. + + int adlAdapterIndex; ///< Internally used index into the ADL list of adapters + int reserved; ///< reserved field +} AGSDeviceInfo; + +/// \defgroup general General API functions +/// API for initialization, cleanup and HDR display modes. +/// @{ + +typedef void* (__stdcall *AGS_ALLOC_CALLBACK)( size_t allocationSize ); ///< AGS user defined allocation prototype +typedef void (__stdcall *AGS_FREE_CALLBACK)( void* allocationPtr ); ///< AGS user defined free prototype + +/// The configuration options that can be passed in to \ref agsInitialize +typedef struct AGSConfiguration +{ + AGS_ALLOC_CALLBACK allocCallback; ///< Optional memory allocation callback. If not supplied, malloc() is used + AGS_FREE_CALLBACK freeCallback; ///< Optional memory freeing callback. If not supplied, free() is used +} AGSConfiguration; + +/// The top level GPU information returned from \ref agsInitialize +typedef struct AGSGPUInfo +{ + const char* driverVersion; ///< The AMD internal driver version + const char* radeonSoftwareVersion; ///< The Radeon Software Version + + int numDevices; ///< Number of GPUs in the system + AGSDeviceInfo* devices; ///< List of GPUs in the system +} AGSGPUInfo; + +/// The display mode +typedef enum AGSDisplayMode +{ + AGSDisplayMode_SDR, ///< SDR mode + AGSDisplayMode_HDR10_PQ, ///< HDR10 PQ encoding, requiring a 1010102 UNORM swapchain and PQ encoding in the output shader. + AGSDisplayMode_HDR10_scRGB, ///< HDR10 scRGB, requiring an FP16 swapchain. Values of 1.0 == 80 nits, 125.0 == 10000 nits. + AGSDisplayMode_FreesyncHDR_scRGB, ///< Freesync HDR scRGB, requiring an FP16 swapchain. A value of 1.0 == 80 nits. + AGSDisplayMode_FreesyncHDR_Gamma22, ///< Freesync HDR Gamma 2.2, requiring a 1010102 UNORM swapchain. The output needs to be encoded to gamma 2.2. + AGSDisplayMode_DolbyVision, ///< Dolby Vision, requiring an 8888 UNORM swapchain + + Mode_Count ///< Number of enumerated display modes +} AGSDisplayMode; + +/// The struct to specify the display settings to the driver. +typedef struct AGSDisplaySettings +{ + AGSDisplayMode mode; ///< The display mode to set the display into + + double chromaticityRedX; ///< Red display primary X coord + double chromaticityRedY; ///< Red display primary Y coord + + double chromaticityGreenX; ///< Green display primary X coord + double chromaticityGreenY; ///< Green display primary Y coord + + double chromaticityBlueX; ///< Blue display primary X coord + double chromaticityBlueY; ///< Blue display primary Y coord + + double chromaticityWhitePointX; ///< White point X coord + double chromaticityWhitePointY; ///< White point Y coord + + double minLuminance; ///< The minimum scene luminance in nits + double maxLuminance; ///< The maximum scene luminance in nits + + double maxContentLightLevel; ///< The maximum content light level in nits (MaxCLL) + double maxFrameAverageLightLevel; ///< The maximum frame average light level in nits (MaxFALL) + + unsigned int disableLocalDimming : 1; ///< Disables local dimming if possible + unsigned int reservedPadding : 31; ///< Reserved +} AGSDisplaySettings; + + +/// The result returned from \ref agsCheckDriverVersion +typedef enum AGSDriverVersionResult +{ + AGS_SOFTWAREVERSIONCHECK_OK, ///< The reported Radeon Software Version is newer or the same as the required version + AGS_SOFTWAREVERSIONCHECK_OLDER, ///< The reported Radeon Software Version is older than the required version + AGS_SOFTWAREVERSIONCHECK_UNDEFINED ///< The check could not determine as result. This could be because it is a private or custom driver or just invalid arguments. +} AGSDriverVersionResult; + +/// +/// Helper function to check the installed software version against the required software version. +/// +/// \param [in] radeonSoftwareVersionReported The Radeon Software Version returned from \ref AGSGPUInfo::radeonSoftwareVersion. +/// \param [in] radeonSoftwareVersionRequired The Radeon Software Version to check against. This is specified using \ref AGS_MAKE_VERSION. +/// \return The result of the check. +/// +AMD_AGS_API AGSDriverVersionResult agsCheckDriverVersion( const char* radeonSoftwareVersionReported, unsigned int radeonSoftwareVersionRequired ); + +/// +/// Function to return the AGS version number. +/// +/// \return The version number made using AGS_MAKE_VERSION( AMD_AGS_VERSION_MAJOR, AMD_AGS_VERSION_MINOR, AMD_AGS_VERSION_PATCH ). +/// +AMD_AGS_API int agsGetVersionNumber(); + +/// +/// Function used to initialize the AGS library. +/// agsVersion must be specified as AGS_CURRENT_VERSION or the call will return \ref AGS_INVALID_ARGS. +/// Must be called prior to any of the subsequent AGS API calls. +/// Must be called prior to ID3D11Device or ID3D12Device creation. +/// \note The caller of this function should handle the possibility of the call failing in the cases below. One option is to do a vendor id check and only call \ref agsInitialize if there is an AMD GPU present. +/// \note This function will fail with \ref AGS_NO_AMD_DRIVER_INSTALLED if there is no AMD driver found on the system. +/// \note This function will fail with \ref AGS_LEGACY_DRIVER in Catalyst versions before 12.20. +/// +/// \param [in] agsVersion The API version specified using the \ref AGS_CURRENT_VERSION macro. If this does not match the version in the binary this initialization call will fail. +/// \param [in] config Optional pointer to a AGSConfiguration struct to override the default library configuration. +/// \param [out] context Address of a pointer to a context. This function allocates a context on the heap which is then required for all subsequent API calls. +/// \param [out] gpuInfo Optional pointer to a AGSGPUInfo struct which will get filled in for all the GPUs in the system. +/// +AMD_AGS_API AGSReturnCode agsInitialize( int agsVersion, const AGSConfiguration* config, AGSContext** context, AGSGPUInfo* gpuInfo ); + +/// +/// Function used to clean up the AGS library. +/// +/// \param [in] context Pointer to a context. This function will deallocate the context from the heap. +/// +AMD_AGS_API AGSReturnCode agsDeInitialize( AGSContext* context ); + +/// +/// Function used to fill out a \ref AGSGPUInfo structure. +/// +/// \param [in] context Pointer to a context. This is generated by \ref agsInitialize +/// \param [out] gpuInfo Pointer to a \ref AGSGPUInfo struct which will get filled in for all the GPUs in the system. +/// +AMD_AGS_API AGSReturnCode agsGetGPUInfo( AGSContext* context, AGSGPUInfo* gpuInfo ); + +/// +/// Function used to set a specific display into HDR mode +/// **DEPRECATED FUNCTION - Please use DXGI for HDR10 and Freesync HDR** +/// \note Setting all of the values apart from color space and transfer function to zero will cause the display to use defaults. +/// \note Call this function after each mode change (switch to fullscreen, any change in swapchain etc). +/// \note HDR10 PQ mode requires a 1010102 swapchain. +/// \note HDR10 scRGB mode requires an FP16 swapchain. +/// \note Freesync HDR scRGB mode requires an FP16 swapchain. +/// \note Freesync HDR Gamma 2.2 mode requires a 1010102 swapchain. +/// \note Dolby Vision requires a 8888 UNORM swapchain. +/// +/// \param [in] context Pointer to a context. This is generated by \ref agsInitialize +/// \param [in] deviceIndex The index of the device listed in \ref AGSGPUInfo::devices. +/// \param [in] displayIndex The index of the display listed in \ref AGSDeviceInfo::displays. +/// \param [in] settings Pointer to the display settings to use. +/// +AMD_AGS_API AGSReturnCode agsSetDisplayMode( AGSContext* context, int deviceIndex, int displayIndex, const AGSDisplaySettings* settings ); + +/// @} + +/// \defgroup dxappreg App Registration +/// @{ +/// This extension allows an application to voluntarily register itself with the driver, providing a more robust app detection solution and avoid the issue of the driver relying on exe names to match the app to a driver profile. +/// It is available when creating the device for both DirectX11 and DirectX12 via \ref agsDriverExtensionsDX11_CreateDevice and \ref agsDriverExtensionsDX12_CreateDevice respectively. +/// This feature is supported in Radeon Software Version 17.9.2 onwards. +/// Rules: +/// * AppName or EngineName must be set, but both are not required. Engine profiles will be used only if app specific profiles do not exist. +/// * In an engine, the EngineName should be set, so a default profile can be built. If an app modifies the engine, the AppName should be set, to allow a profile for the specific app. +/// * Version number is not mandatory, but recommended. The use of which can prevent the use of profiles for incompatible versions (for instance engine versions that introduce or change features), and can help prevent older profiles from being used (and introducing new bugs) before the profile is tested with new app builds. +/// * If Version numbers are used and a new version is introduced, a new profile will not be enabled until an AMD engineer has been able to update a previous profile, or make a new one. +/// +/// The cases for profile selection are as follows: +/// +/// |Case|Profile Applied| +/// |----|---------------| +/// | App or Engine Version has profile | The profile is used. | +/// | App or Engine Version num < profile version num | The closest profile > the version number is used. | +/// | App or Engine Version num > profile version num | No profile selected/The previous method is used. | +/// | App and Engine Version have profile | The App's profile is used. | +/// | App and Engine Version num < profile version | The closest App profile > the version number is used. | +/// | App and Engine Version, no App profile found | The Engine profile will be used. | +/// | App/Engine name but no Version, has profile | The latest profile is used. | +/// | No name or version, or no profile | The previous app detection method is used. | +/// +/// As shown above, if an App name is given, and a profile is found for that app, that will be prioritized. The Engine name and profile will be used only if no app name is given, or no viable profile is found for the app name. +/// In the case that App nor Engine have a profile, the previous app detection methods will be used. If given a version number that is larger than any profile version number, no profile will be selected. +/// This is specifically to prevent cases where an update to an engine or app will cause catastrophic breaks in the profile, allowing an engineer to test the profile before clearing it for public use with the new engine/app update. +/// +/// @} + +#if !defined (AGS_EXCLUDE_DIRECTX_12) + +/// \defgroup dx12 DirectX12 Extensions +/// DirectX12 driver extensions +/// @{ + +/// \defgroup dx12init Device and device object creation and cleanup +/// It is now mandatory to call \ref agsDriverExtensionsDX12_CreateDevice when creating a device if the user wants to access any future DX12 AMD extensions. +/// The corresponding \ref agsDriverExtensionsDX12_DestroyDevice call must be called to release the device and free up the internal resources allocated by the create call. +/// @{ + +/// The struct to specify the DX12 device creation parameters +typedef struct AGSDX12DeviceCreationParams +{ + IDXGIAdapter* pAdapter; ///< Pointer to the adapter to use when creating the device. This may be null. + IID iid; ///< The interface ID for the type of device to be created. + D3D_FEATURE_LEVEL FeatureLevel; ///< The minimum feature level to create the device with. +} AGSDX12DeviceCreationParams; + +/// The struct to specify DX12 additional device creation parameters +typedef struct AGSDX12ExtensionParams +{ + const WCHAR* pAppName; ///< Application name + const WCHAR* pEngineName; ///< Engine name + unsigned int appVersion; ///< Application version + unsigned int engineVersion; ///< Engine version + unsigned int uavSlot; ///< The UAV slot reserved for intrinsic support. Refer to the \ref agsDriverExtensionsDX12_CreateDevice documentation for more details. +} AGSDX12ExtensionParams; + +/// Extensions for DX12 +typedef struct AGSDX12ExtensionsSupported +{ + unsigned int intrinsics16 : 1; ///< Supported in Radeon Software Version 16.9.2 onwards. ReadFirstLane, ReadLane, LaneID, Swizzle, Ballot, MBCount, Med3, Barycentrics + unsigned int intrinsics17 : 1; ///< Supported in Radeon Software Version 17.9.1 onwards. WaveReduce, WaveScan + unsigned int userMarkers : 1; ///< Supported in Radeon Software Version 17.9.1 onwards. + unsigned int appRegistration : 1; ///< Supported in Radeon Software Version 17.9.1 onwards. + unsigned int UAVBindSlot : 1; ///< Supported in Radeon Software Version 19.5.1 onwards. + unsigned int intrinsics19 : 1; ///< Supported in Radeon Software Version 19.12.2 onwards. DrawIndex, AtomicU64 + unsigned int baseVertex : 1; ///< Supported in Radeon Software Version 20.2.1 onwards. + unsigned int baseInstance : 1; ///< Supported in Radeon Software Version 20.2.1 onwards. + unsigned int getWaveSize : 1; ///< Supported in Radeon Software Version 20.5.1 onwards. + unsigned int floatConversion : 1; ///< Supported in Radeon Software Version 20.5.1 onwards. + unsigned int readLaneAt : 1; ///< Supported in Radeon Software Version 20.11.2 onwards. + unsigned int rayHitToken : 1; ///< Supported in Radeon Software Version 20.11.2 onwards. + unsigned int shaderClock : 1; ///< Supported in Radeon Software Version 23.1.1 onwards. + unsigned int padding : 19; ///< Reserved +} AGSDX12ExtensionsSupported; + +/// The struct to hold all the returned parameters from the device creation call +typedef struct AGSDX12ReturnedParams +{ + ID3D12Device* pDevice; ///< The newly created device + AGSDX12ExtensionsSupported extensionsSupported; ///< List of supported extensions +} AGSDX12ReturnedParams; + +/// The space id for DirectX12 intrinsic support +const unsigned int AGS_DX12_SHADER_INTRINSICS_SPACE_ID = 0x7FFF0ADE; // 2147420894 + +/// +/// Function used to create a D3D12 device with additional AMD-specific initialization parameters. +/// +/// When using the HLSL shader extensions please note: +/// * The shader compiler should not use the D3DCOMPILE_SKIP_OPTIMIZATION (/Od) option or /O0, otherwise it will not work. +/// * The shader compiler needs D3DCOMPILE_ENABLE_STRICTNESS (/Ges) enabled. +/// * The intrinsic instructions require a 5.1 shader model. +/// * The Root Signature will need to reserve an extra UAV resource slot. This is not a real resource that requires allocating, it is just used to encode the intrinsic instructions. +/// +/// The easiest way to set up the reserved UAV slot is to specify it at u0. The register space id will automatically be assumed to be \ref AGS_DX12_SHADER_INTRINSICS_SPACE_ID. +/// The HLSL expects this as default and the set up code would look similar to this: +/// \code{.cpp} +/// CD3DX12_DESCRIPTOR_RANGE range[]; +/// ... +/// range[ 0 ].Init( D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, AGS_DX12_SHADER_INTRINSICS_SPACE_ID ); // u0 at driver-reserved space id +/// \endcode +/// +/// Newer drivers also support a user-specified slot in which case the register space id is assumed to be 0. It is important that the \ref AGSDX12ExtensionsSupported::UAVBindSlot bit is set. +/// to ensure the driver can support this. If not, then u0 and \ref AGS_DX12_SHADER_INTRINSICS_SPACE_ID must be used. +/// If the driver does support this feature and a non zero slot is required, then the HLSL must also define AMD_EXT_SHADER_INTRINSIC_UAV_OVERRIDE as the matching slot value. +/// +/// The AGS context pointer is added to the user data of the D3D device using the SetPrivateData API. The GUID used is {d5a2a91b-7003-4f12-89de-209beb51fb94}: +/// \code{.cpp} +/// static const GUID IID_AGSContextData = {0xd5a2a91b, 0x7003, 0x4f12, {0x89, 0xde, 0x20, 0x9b, 0xeb, 0x51, 0xfb, 0x94}}; +/// \endcode +/// +/// \param [in] context Pointer to a context. This is generated by \ref agsInitialize +/// \param [in] creationParams Pointer to the struct to specify the existing DX12 device creation parameters. +/// \param [in] extensionParams Optional pointer to the struct to specify DX12 additional device creation parameters. +/// \param [out] returnedParams Pointer to struct to hold all the returned parameters from the call. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX12_CreateDevice( AGSContext* context, const AGSDX12DeviceCreationParams* creationParams, const AGSDX12ExtensionParams* extensionParams, AGSDX12ReturnedParams* returnedParams ); + +/// +/// Function to destroy the D3D12 device. +/// This call will also cleanup any AMD-specific driver extensions for D3D12. +/// +/// \param [in] context Pointer to a context. +/// \param [in] device Pointer to the D3D12 device. +/// \param [out] deviceReferences Optional pointer to an unsigned int that will be set to the value returned from device->Release(). +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX12_DestroyDevice( AGSContext* context, ID3D12Device* device, unsigned int* deviceReferences ); + +/// @} + +/// \defgroup dx12usermarkers User Markers +/// @{ + +/// +/// Function used to push an AMD user marker onto the command list. +/// This is only has an effect if \ref AGSDX12ExtensionsSupported::userMarkers is present. +/// Supported in Radeon Software Version 17.9.1 onwards. +/// +/// \param [in] context Pointer to a context. +/// \param [in] commandList Pointer to the command list. +/// \param [in] data The UTF-8 marker string. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX12_PushMarker( AGSContext* context, ID3D12GraphicsCommandList* commandList, const char* data ); + +/// +/// Function used to pop an AMD user marker on the command list. +/// Supported in Radeon Software Version 17.9.1 onwards. +/// +/// \param [in] context Pointer to a context. +/// \param [in] commandList Pointer to the command list. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX12_PopMarker( AGSContext* context, ID3D12GraphicsCommandList* commandList ); + +/// +/// Function used to insert an single event AMD user marker onto the command list. +/// Supported in Radeon Software Version 17.9.1 onwards. +/// +/// \param [in] context Pointer to a context. +/// \param [in] commandList Pointer to the command list. +/// \param [in] data The UTF-8 marker string. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX12_SetMarker( AGSContext* context, ID3D12GraphicsCommandList* commandList, const char* data ); + +/// @} +/// @} + +#endif // AGS_EXCLUDE_DIRECTX_12 + +#if !defined (AGS_EXCLUDE_DIRECTX_11) + +/// \defgroup dx11 DirectX11 Extensions +/// DirectX11 driver extensions +/// @{ + +/// \defgroup dx11init Device creation and cleanup +/// It is now mandatory to call \ref agsDriverExtensionsDX11_CreateDevice when creating a device if the user wants to access any DX11 AMD extensions. +/// The corresponding \ref agsDriverExtensionsDX11_DestroyDevice call must be called to release the device and free up the internal resources allocated by the create call. +/// @{ + +/// The different modes to control Crossfire behavior. +typedef enum AGSCrossfireMode +{ + AGS_CROSSFIRE_MODE_DRIVER_AFR = 0, ///< Use the default driver-based AFR rendering. If this mode is specified, do NOT use the agsDriverExtensionsDX11_Create*() APIs to create resources + AGS_CROSSFIRE_MODE_EXPLICIT_AFR, ///< Use the AGS Crossfire API functions to perform explicit AFR rendering without requiring a CF driver profile + AGS_CROSSFIRE_MODE_DISABLE ///< Completely disable AFR rendering +} AGSCrossfireMode; + +/// The struct to specify the existing DX11 device creation parameters +typedef struct AGSDX11DeviceCreationParams +{ + IDXGIAdapter* pAdapter; ///< Consult the DX documentation on D3D11CreateDevice for this parameter + D3D_DRIVER_TYPE DriverType; ///< Consult the DX documentation on D3D11CreateDevice for this parameter + HMODULE Software; ///< Consult the DX documentation on D3D11CreateDevice for this parameter + UINT Flags; ///< Consult the DX documentation on D3D11CreateDevice for this parameter + const D3D_FEATURE_LEVEL* pFeatureLevels; ///< Consult the DX documentation on D3D11CreateDevice for this parameter + UINT FeatureLevels; ///< Consult the DX documentation on D3D11CreateDevice for this parameter + UINT SDKVersion; ///< Consult the DX documentation on D3D11CreateDevice for this parameter + const DXGI_SWAP_CHAIN_DESC* pSwapChainDesc; ///< Optional swapchain description. Specify this to invoke D3D11CreateDeviceAndSwapChain instead of D3D11CreateDevice. +} AGSDX11DeviceCreationParams; + +/// The struct to specify DX11 additional device creation parameters +typedef struct AGSDX11ExtensionParams +{ + const WCHAR* pAppName; ///< Application name + const WCHAR* pEngineName; ///< Engine name + unsigned int appVersion; ///< Application version + unsigned int engineVersion; ///< Engine version + unsigned int numBreadcrumbMarkers; ///< The number of breadcrumb markers to allocate. Each marker is a uint64 (ie 8 bytes). If 0, the system is disabled. + unsigned int uavSlot; ///< The UAV slot reserved for intrinsic support. This must match the slot defined in the HLSL, i.e. "#define AmdDxExtShaderIntrinsicsUAVSlot". + /// The default slot is 7, but the caller is free to use an alternative slot. + /// If 0 is specified, then the default of 7 will be used. + AGSCrossfireMode crossfireMode; ///< Desired Crossfire mode +} AGSDX11ExtensionParams; + +/// Extensions for DX11 +typedef struct AGSDX11ExtensionsSupported +{ + unsigned int quadList : 1; ///< Supported in Radeon Software Version 16.9.2 onwards. + unsigned int screenRectList : 1; ///< Supported in Radeon Software Version 16.9.2 onwards. + unsigned int uavOverlap : 1; ///< Supported in Radeon Software Version 16.9.2 onwards. + unsigned int depthBoundsTest : 1; ///< Supported in Radeon Software Version 16.9.2 onwards. + unsigned int multiDrawIndirect : 1; ///< Supported in Radeon Software Version 16.9.2 onwards. + unsigned int multiDrawIndirectCountIndirect : 1; ///< Supported in Radeon Software Version 16.9.2 onwards. + unsigned int crossfireAPI : 1; ///< Supported in Radeon Software Version 16.9.2 onwards. + unsigned int createShaderControls : 1; ///< Supported in Radeon Software Version 16.9.2 onwards. + unsigned int intrinsics16 : 1; ///< Supported in Radeon Software Version 16.9.2 onwards. ReadFirstLane, ReadLane, LaneID, Swizzle, Ballot, MBCount, Med3, Barycentrics + unsigned int multiView : 1; ///< Supported in Radeon Software Version 16.12.1 onwards. + unsigned int intrinsics17 : 1; ///< Supported in Radeon Software Version 17.9.1 onwards. WaveReduce, WaveScan + unsigned int appRegistration : 1; ///< Supported in Radeon Software Version 17.9.1 onwards. + unsigned int breadcrumbMarkers : 1; ///< Supported in Radeon Software Version 17.11.1 onwards. + unsigned int MDIDeferredContexts : 1; ///< Supported in Radeon Software Version 18.8.1 onwards. + unsigned int UAVOverlapDeferredContexts : 1; ///< Supported in Radeon Software Version 18.8.1 onwards. + unsigned int depthBoundsDeferredContexts : 1; ///< Supported in Radeon Software Version 18.8.1 onwards. + unsigned int intrinsics19 : 1; ///< Supported in Radeon Software Version 19.12.2 onwards. DrawIndex, AtomicU64 + unsigned int getWaveSize : 1; ///< Supported in Radeon Software Version 20.2.1 onwards. + unsigned int baseVertex : 1; ///< Supported in Radeon Software Version 20.2.1 onwards. + unsigned int baseInstance : 1; ///< Supported in Radeon Software Version 20.2.1 onwards. + unsigned int padding : 12; ///< Reserved +} AGSDX11ExtensionsSupported; + +/// The struct to hold all the returned parameters from the device creation call +typedef struct AGSDX11ReturnedParams +{ + ID3D11Device* pDevice; ///< The newly created device + ID3D11DeviceContext* pImmediateContext; ///< The newly created immediate device context + IDXGISwapChain* pSwapChain; ///< The newly created swap chain. This is only created if a valid pSwapChainDesc is supplied in AGSDX11DeviceCreationParams. + D3D_FEATURE_LEVEL featureLevel; ///< The feature level supported by the newly created device + AGSDX11ExtensionsSupported extensionsSupported; ///< List of supported extensions + unsigned int crossfireGPUCount; ///< The number of GPUs that are active for this app + void* breadcrumbBuffer; ///< The CPU buffer returned if the initialization of the breadcrumb was successful +} AGSDX11ReturnedParams; + +/// +/// Function used to create a D3D11 device with additional AMD-specific initialization parameters. +/// +/// When using the HLSL shader extensions please note: +/// * The shader compiler should not use the D3DCOMPILE_SKIP_OPTIMIZATION (/Od) option, otherwise it will not work. +/// * The shader compiler needs D3DCOMPILE_ENABLE_STRICTNESS (/Ges) enabled. +/// +/// \param [in] context Pointer to a context. This is generated by \ref agsInitialize +/// \param [in] creationParams Pointer to the struct to specify the existing DX11 device creation parameters. +/// \param [in] extensionParams Optional pointer to the struct to specify DX11 additional device creation parameters. +/// \param [out] returnedParams Pointer to struct to hold all the returned parameters from the call. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_CreateDevice( AGSContext* context, const AGSDX11DeviceCreationParams* creationParams, const AGSDX11ExtensionParams* extensionParams, AGSDX11ReturnedParams* returnedParams ); + +/// +/// Function to destroy the D3D11 device and its immediate context. +/// This call will also cleanup any AMD-specific driver extensions for D3D11. +/// +/// \param [in] context Pointer to a context. +/// \param [in] device Pointer to the D3D11 device. +/// \param [out] deviceReferences Optional pointer to an unsigned int that will be set to the value returned from device->Release(). +/// \param [in] immediateContext Pointer to the D3D11 immediate device context. +/// \param [out] immediateContextReferences Optional pointer to an unsigned int that will be set to the value returned from immediateContext->Release(). +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_DestroyDevice( AGSContext* context, ID3D11Device* device, unsigned int* deviceReferences, ID3D11DeviceContext* immediateContext, unsigned int* immediateContextReferences ); + +/// @} + +/// \defgroup breadcrumbs Breadcrumb API +/// API for writing top-of-pipe and bottom-of-pipe markers to help track down GPU hangs. +/// +/// The API is available if the \ref AGSDX11ExtensionsSupported::breadcrumbMarkers is present. +/// +/// To use the API, a non zero value needs to be specified in \ref AGSDX11ExtensionParams::numBreadcrumbMarkers. This enables the API (if available) and allocates a system memory buffer +/// which is returned to the user in \ref AGSDX11ReturnedParams::breadcrumbBuffer. +/// +/// The user can now write markers before and after draw calls using \ref agsDriverExtensionsDX11_WriteBreadcrumb. +/// +/// \section background Background +/// +/// A top-of-pipe (TOP) command is scheduled for execution as soon as the command processor (CP) reaches the command. +/// A bottom-of-pipe (BOP) command is scheduled for execution once the previous rendering commands (draw and dispatch) finish execution. +/// TOP and BOP commands do not block CP. i.e. the CP schedules the command for execution then proceeds to the next command without waiting. +/// To effectively use TOP and BOP commands, it is important to understand how they interact with rendering commands: +/// +/// When the CP encounters a rendering command it queues it for execution and moves to the next command. The queued rendering commands are issued in order. +/// There can be multiple rendering commands running in parallel. When a rendering command is issued we say it is at the top of the pipe. When a rendering command +/// finishes execution we say it has reached the bottom of the pipe. +/// +/// A BOP command remains in a waiting queue and is executed once prior rendering commands finish. The queue of BOP commands is limited to 64 entries in GCN generation 1, 2, 3, 4 and 5. +/// If the 64 limit is reached the CP will stop queueing BOP commands and also rendering commands. Developers should limit the number of BOP commands that write markers to avoid contention. +/// In general, developers should limit both TOP and BOP commands to avoid stalling the CP. +/// +/// \subsection eg1 Example 1: +/// +/// \code{.cpp} +/// // Start of a command buffer +/// WriteMarker(TopOfPipe, 1) +/// WriteMarker(BottomOfPipe, 2) +/// WriteMarker(BottomOfPipe, 3) +/// DrawX +/// WriteMarker(BottomOfPipe, 4) +/// WriteMarker(BottomOfPipe, 5) +/// WriteMarker(TopOfPipe, 6) +/// // End of command buffer +/// \endcode +/// +/// In the above example, the CP writes markers 1, 2 and 3 without waiting: +/// Marker 1 is TOP so it's independent from other commands +/// There's no wait for marker 2 and 3 because there are no draws preceding the BOP commands +/// Marker 4 is only written once DrawX finishes execution +/// Marker 5 doesn't wait for additional draws so it is written right after marker 4 +/// Marker 6 can be written as soon as the CP reaches the command. For instance, it is very possible that CP writes marker 6 while DrawX +/// is running and therefore marker 6 gets written before markers 4 and 5 +/// +/// \subsection eg2 Example 2: +/// +/// \code{.cpp} +/// WriteMarker(TopOfPipe, 1) +/// DrawX +/// WriteMarker(BottomOfPipe, 2) +/// WriteMarker(TopOfPipe, 3) +/// DrawY +/// WriteMarker(BottomOfPipe, 4) +/// \endcode +/// +/// In this example marker 1 is written before the start of DrawX +/// Marker 2 is written once DrawX finishes execution +/// Similarly marker 3 is written before the start of DrawY +/// Marker 4 is written once DrawY finishes execution +/// In case of a GPU hang, if markers 1 and 3 are written but markers 2 and 4 are missing we can conclude that: +/// The CP has reached both DrawX and DrawY commands since marker 1 and 3 are present +/// The fact that marker 2 and 4 are missing means that either DrawX is hanging while DrawY is at the top of the pipe or both DrawX and DrawY +/// started and both are simultaneously hanging +/// +/// \subsection eg3 Example 3: +/// +/// \code{.cpp} +/// // Start of a command buffer +/// WriteMarker(BottomOfPipe, 1) +/// DrawX +/// WriteMarker(BottomOfPipe, 2) +/// DrawY +/// WriteMarker(BottomOfPipe, 3) +/// DrawZ +/// WriteMarker(BottomOfPipe, 4) +/// // End of command buffer +/// \endcode +/// +/// In this example marker 1 is written before the start of DrawX +/// Marker 2 is written once DrawX finishes +/// Marker 3 is written once DrawY finishes +/// Marker 4 is written once DrawZ finishes +/// If the GPU hangs and only marker 1 is written we can conclude that the hang is happening in either DrawX, DrawY or DrawZ +/// If the GPU hangs and only marker 1 and 2 are written we can conclude that the hang is happening in DrawY or DrawZ +/// If the GPU hangs and only marker 4 is missing we can conclude that the hang is happening in DrawZ +/// +/// \subsection eg4 Example 4: +/// +/// \code{.cpp} +/// Start of a command buffer +/// WriteMarker(TopOfPipe, 1) +/// DrawX +/// WriteMarker(TopOfPipe, 2) +/// DrawY +/// WriteMarker(TopOfPipe, 3) +/// DrawZ +/// // End of command buffer +/// \endcode +/// +/// In this example, in case the GPU hangs and only marker 1 is written we can conclude that the hang is happening in DrawX +/// In case the GPU hangs and only marker 1 and 2 are written we can conclude that the hang is happening in DrawX or DrawY +/// In case the GPU hangs and all 3 markers are written we can conclude that the hang is happening in any of DrawX, DrawY or DrawZ +/// +/// \subsection eg5 Example 5: +/// +/// \code{.cpp} +/// DrawX +/// WriteMarker(TopOfPipe, 1) +/// WriteMarker(BottomOfPipe, 2) +/// DrawY +/// WriteMarker(TopOfPipe, 3) +/// WriteMarker(BottomOfPipe, 4) +/// \endcode +/// +/// Marker 1 is written right after DrawX is queued for execution. +/// Marker 2 is only written once DrawX finishes execution. +/// Marker 3 is written right after DrawY is queued for execution. +/// Marker 4 is only written once DrawY finishes execution +/// If marker 1 is written we would know that the CP has reached the command DrawX (DrawX at the top of the pipe). +/// If marker 2 is written we can say that DrawX has finished execution (DrawX at the bottom of the pipe). +/// In case the GPU hangs and only marker 1 and 3 are written we can conclude that the hang is happening in DrawX or DrawY +/// In case the GPU hangs and only marker 1 is written we can conclude that the hang is happening in DrawX +/// In case the GPU hangs and only marker 4 is missing we can conclude that the hang is happening in DrawY +/// +/// \section data Retrieving GPU Data +/// +/// In the event of a GPU hang, the user can inspect the system memory buffer to determine which draw has caused the hang. +/// For example: +/// \code{.cpp} +/// // Force the work to be flushed to prevent CPU ahead of GPU +/// g_pImmediateContext->Flush(); +/// +/// // Present the information rendered to the back buffer to the front buffer (the screen) +/// HRESULT hr = g_pSwapChain->Present( 0, 0 ); +/// +/// // Read the marker data buffer once detect device lost +/// if ( hr != S_OK ) +/// { +/// for (UINT i = 0; i < g_NumMarkerWritten; i++) +/// { +/// UINT64* pTempData; +/// pTempData = static_cast(pMarkerBuffer); +/// +/// // Write the marker data to file +/// ofs << i << "\r\n"; +/// ofs << std::hex << *(pTempData + i * 2) << "\r\n"; +/// ofs << std::hex << *(pTempData + (i * 2 + 1)) << "\r\n"; +/// +/// WCHAR s1[256]; +/// setlocale(LC_NUMERIC, "en_US.iso88591"); +/// +/// // Output the marker data to console +/// swprintf(s1, 256, L" The Draw count is %d; The Top maker is % 016llX and the Bottom marker is % 016llX \r\n", i, *(pTempData + i * 2), *(pTempData + (i * 2 + 1))); +/// +/// OutputDebugStringW(s1); +/// } +/// } +/// \endcode +/// +/// The console output would resemble something like: +/// \code{.cpp} +/// D3D11: Removing Device. +/// D3D11 ERROR: ID3D11Device::RemoveDevice: Device removal has been triggered for the following reason (DXGI_ERROR_DEVICE_HUNG: The Device took an unreasonable amount of time to execute its commands, or the hardware crashed/hung. As a result, the TDR (Timeout Detection and Recovery) mechanism has been triggered. The current Device Context was executing commands when the hang occurred. The application may want to respawn and fallback to less aggressive use of the display hardware). [ EXECUTION ERROR #378: DEVICE_REMOVAL_PROCESS_AT_FAULT] +/// The Draw count is 0; The Top maker is 00000000DEADCAFE and the Bottom marker is 00000000DEADBEEF +/// The Draw count is 1; The Top maker is 00000000DEADCAFE and the Bottom marker is 00000000DEADBEEF +/// The Draw count is 2; The Top maker is 00000000DEADCAFE and the Bottom marker is 00000000DEADBEEF +/// The Draw count is 3; The Top maker is 00000000DEADCAFE and the Bottom marker is 00000000DEADBEEF +/// The Draw count is 4; The Top maker is 00000000DEADCAFE and the Bottom marker is 00000000DEADBEEF +/// The Draw count is 5; The Top maker is CDCDCDCDCDCDCDCD and the Bottom marker is CDCDCDCDCDCDCDCD +/// The Draw count is 6; The Top maker is CDCDCDCDCDCDCDCD and the Bottom marker is CDCDCDCDCDCDCDCD +/// The Draw count is 7; The Top maker is CDCDCDCDCDCDCDCD and the Bottom marker is CDCDCDCDCDCDCDCD +/// \endcode +/// +/// @{ + +/// The breadcrumb marker type +typedef enum AGSBreadcrumbMarkerType +{ + AGSTopOfPipe = 0, ///< Top-of-pipe marker + AGSBottomOfPipe = 1 ///< Bottom-of-pipe marker +} AGSBreadcrumbMarkerType; + +/// The breadcrumb marker struct used by \ref agsDriverExtensionsDX11_WriteBreadcrumb +typedef struct AGSBreadcrumbMarker +{ + unsigned long long markerData; ///< The user data to write. + AGSBreadcrumbMarkerType type; ///< Whether this marker is top or bottom of pipe. + unsigned int index; ///< The index of the marker. This should be less than the value specified in \ref AGSDX11ExtensionParams::numBreadcrumbMarkers +} AGSBreadcrumbMarker; + +/// +/// Function to write a breadcrumb marker. +/// +/// This method inserts a write marker operation in the GPU command stream. In the case where the GPU is hanging the write +/// command will never be reached and the marker will never get written to memory. +/// +/// In order to use this function, \ref AGSDX11ExtensionParams::numBreadcrumbMarkers must be set to a non zero value. +/// +/// \param [in] context Pointer to a context. +/// \param [in] marker Pointer to a marker. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_WriteBreadcrumb( AGSContext* context, const AGSBreadcrumbMarker* marker ); + +/// @} + +/// \defgroup dx11Topology Extended Topology +/// API for primitive topologies +/// @{ + +/// Additional topologies supported via extensions +typedef enum AGSPrimitiveTopologyDX11 +{ + AGS_PRIMITIVE_TOPOLOGY_QUADLIST = 7, ///< Quad list + AGS_PRIMITIVE_TOPOLOGY_SCREENRECTLIST = 9 ///< Screen rect list +} AGSPrimitiveTopologyDX11; + +/// +/// Function used to set the primitive topology. If you are using any of the extended topology types, then this function should +/// be called to set ALL topology types. +/// +/// The Quad List extension is a convenient way to submit quads without using an index buffer. Note that this still submits two triangles at the driver level. +/// In order to use this function, AGS must already be initialized and agsDriverExtensionsDX11_Init must have been called successfully. +/// +/// The Screen Rect extension, which is only available on GCN hardware, allows the user to pass in three of the four corners of a rectangle. +/// The hardware then uses the bounding box of the vertices to rasterize the rectangle primitive (i.e. as a rectangle rather than two triangles). +/// \note Note that this will not return valid interpolated values, only valid SV_Position values. +/// \note If either the Quad List or Screen Rect extension are used, then agsDriverExtensionsDX11_IASetPrimitiveTopology should be called in place of the native DirectX11 equivalent all the time. +/// +/// \param [in] context Pointer to a context. +/// \param [in] topology The topology to set on the D3D11 device. This can be either an AGS-defined topology such as AGS_PRIMITIVE_TOPOLOGY_QUADLIST +/// or a standard D3D-defined topology such as D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP. +/// NB. the AGS-defined types will require casting to a D3D_PRIMITIVE_TOPOLOGY type. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_IASetPrimitiveTopology( AGSContext* context, D3D_PRIMITIVE_TOPOLOGY topology ); + +/// @} + +/// \defgroup dx11UAVOverlap UAV Overlap +/// API for enabling overlapping UAV writes +/// +/// The AMD DX11 driver will automatically track resource usage and insert barriers as necessary to clear read-after-write (RAW) and write-after-write (WAW) +/// hazards. The UAV overlap extension allows applications to indicate to the driver it can skip inserting barriers for UAV resources used in +/// dispatches and draws within the \ref agsDriverExtensionsDX11_BeginUAVOverlap/ \ref agsDriverExtensionsDX11_EndUAVOverlap calls. This can be useful for applications to allow +/// multiple back-to-back dispatches or draws in flight even if they are accessing the same UAV resource but the data written or read does not overlap within the resource. +/// +/// Usage would be as follows: +/// \code{.cpp} +/// m_device->Dispatch( ... ); // First call that writes to the UAV +/// +/// // Disable automatic WAW syncs +/// agsDriverExtensionsDX11_BeginUAVOverlap( m_agsContext ); +/// +/// // Submit other dispatches that write to the same UAV concurrently +/// m_device->Dispatch( ... ); +/// m_device->Dispatch( ... ); +/// m_device->Dispatch( ... ); +/// +/// // Reenable automatic WAW syncs +/// agsDriverExtensionsDX11_EndUAVOverlap( m_agsContext ); +/// \endcode +/// @{ + +/// +/// Function used indicate to the driver the start of the overlap scope. +/// +/// \param [in] context Pointer to a context. +/// \param [in] dxContext Pointer to the DirectX device context. If this is to work using the non-immediate context, then you need to check support. If nullptr is specified, then the immediate context is assumed. +/// with the AGS_DX11_EXTENSION_DEFERRED_CONTEXTS bit. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_BeginUAVOverlap( AGSContext* context, ID3D11DeviceContext* dxContext ); + +/// +/// Function used indicate to the driver the end of the overlap scope. +/// +/// \param [in] context Pointer to a context. +/// \param [in] dxContext Pointer to the DirectX device context. If this is to work using the non-immediate context, then you need to check support. If nullptr is specified, then the immediate context is assumed. +/// with the AGS_DX11_EXTENSION_DEFERRED_CONTEXTS bit. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_EndUAVOverlap( AGSContext* context, ID3D11DeviceContext* dxContext ); + +/// @} + +/// \defgroup dx11DepthBoundsTest Depth Bounds Test +/// API for enabling depth bounds testing +/// @{ + +/// +/// Function used to set the depth bounds test extension +/// +/// \param [in] context Pointer to a context +/// \param [in] dxContext Pointer to the DirectX device context. If this is to work using the non-immediate context, then you need to check support. If nullptr is specified, then the immediate context is assumed. +/// \param [in] enabled Whether to enable or disable the depth bounds testing. If disabled, the next two args are ignored. +/// \param [in] minDepth The near depth range to clip against. +/// \param [in] maxDepth The far depth range to clip against. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_SetDepthBounds( AGSContext* context, ID3D11DeviceContext* dxContext, bool enabled, float minDepth, float maxDepth ); + +/// @} + +/// \defgroup mdi Multi Draw Indirect (MDI) +/// API for dispatching multiple instanced draw commands. +/// The multi draw indirect extensions allow multiple sets of DrawInstancedIndirect to be submitted in one API call. +/// The draw calls are issued on the GPU's command processor (CP), potentially saving the significant CPU overheads incurred by submitting the equivalent draw calls on the CPU. +/// +/// The extension allows the following code: +/// \code{.cpp} +/// // Submit n batches of DrawIndirect calls +/// for ( int i = 0; i < n; i++ ) +/// deviceContext->DrawIndexedInstancedIndirect( buffer, i * sizeof( cmd ) ); +/// \endcode +/// To be replaced by the following call: +/// \code{.cpp} +/// // Submit all n batches in one call +/// agsDriverExtensionsDX11_MultiDrawIndexedInstancedIndirect( m_agsContext, deviceContext, n, buffer, 0, sizeof( cmd ) ); +/// \endcode +/// +/// The buffer used for the indirect args must be of the following formats: +/// \code{.cpp} +/// // Buffer layout for agsDriverExtensions_MultiDrawInstancedIndirect +/// struct DrawInstancedIndirectArgs +/// { +/// UINT VertexCountPerInstance; +/// UINT InstanceCount; +/// UINT StartVertexLocation; +/// UINT StartInstanceLocation; +/// }; +/// +/// // Buffer layout for agsDriverExtensions_MultiDrawIndexedInstancedIndirect +/// struct DrawIndexedInstancedIndirectArgs +/// { +/// UINT IndexCountPerInstance; +/// UINT InstanceCount; +/// UINT StartIndexLocation; +/// UINT BaseVertexLocation; +/// UINT StartInstanceLocation; +/// }; +/// \endcode +/// +/// Example usage can be seen in AMD's GeometryFX (https://github.com/GPUOpen-Effects/GeometryFX). In particular, in this file: https://github.com/GPUOpen-Effects/GeometryFX/blob/master/amd_geometryfx/src/AMD_GeometryFX_Filtering.cpp +/// +/// @{ + +/// +/// Function used to submit a batch of draws via MultiDrawIndirect +/// +/// \param [in] context Pointer to a context. +/// \param [in] dxContext Pointer to the DirectX device context. If this is to work using the non-immediate context, then you need to check support. If nullptr is specified, then the immediate context is assumed. +/// \param [in] drawCount The number of draws. +/// \param [in] pBufferForArgs The args buffer. +/// \param [in] alignedByteOffsetForArgs The offset into the args buffer. +/// \param [in] byteStrideForArgs The per element stride of the args buffer. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_MultiDrawInstancedIndirect( AGSContext* context, ID3D11DeviceContext* dxContext, unsigned int drawCount, ID3D11Buffer* pBufferForArgs, unsigned int alignedByteOffsetForArgs, unsigned int byteStrideForArgs ); + +/// +/// Function used to submit a batch of draws via MultiDrawIndirect +/// +/// \param [in] context Pointer to a context. +/// \param [in] dxContext Pointer to the DirectX device context. If this is to work using the non-immediate context, then you need to check support. If nullptr is specified, then the immediate context is assumed. +/// \param [in] drawCount The number of draws. +/// \param [in] pBufferForArgs The args buffer. +/// \param [in] alignedByteOffsetForArgs The offset into the args buffer. +/// \param [in] byteStrideForArgs The per element stride of the args buffer. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_MultiDrawIndexedInstancedIndirect( AGSContext* context, ID3D11DeviceContext* dxContext, unsigned int drawCount, ID3D11Buffer* pBufferForArgs, unsigned int alignedByteOffsetForArgs, unsigned int byteStrideForArgs ); + +/// +/// Function used to submit a batch of draws via MultiDrawIndirect +/// +/// \param [in] context Pointer to a context. +/// \param [in] dxContext Pointer to the DirectX device context. If this is to work using the non-immediate context, then you need to check support. If nullptr is specified, then the immediate context is assumed. +/// \param [in] pBufferForDrawCount The draw count buffer. +/// \param [in] alignedByteOffsetForDrawCount The offset into the draw count buffer. +/// \param [in] pBufferForArgs The args buffer. +/// \param [in] alignedByteOffsetForArgs The offset into the args buffer. +/// \param [in] byteStrideForArgs The per element stride of the args buffer. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_MultiDrawInstancedIndirectCountIndirect( AGSContext* context, ID3D11DeviceContext* dxContext, ID3D11Buffer* pBufferForDrawCount, unsigned int alignedByteOffsetForDrawCount, ID3D11Buffer* pBufferForArgs, unsigned int alignedByteOffsetForArgs, unsigned int byteStrideForArgs ); + +/// +/// Function used to submit a batch of draws via MultiDrawIndirect +/// +/// \param [in] context Pointer to a context. +/// \param [in] dxContext Pointer to the DirectX device context. If this is to work using the non-immediate context, then you need to check support. If nullptr is specified, then the immediate context is assumed. +/// \param [in] pBufferForDrawCount The draw count buffer. +/// \param [in] alignedByteOffsetForDrawCount The offset into the draw count buffer. +/// \param [in] pBufferForArgs The args buffer. +/// \param [in] alignedByteOffsetForArgs The offset into the args buffer. +/// \param [in] byteStrideForArgs The per element stride of the args buffer. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_MultiDrawIndexedInstancedIndirectCountIndirect( AGSContext* context, ID3D11DeviceContext* dxContext, ID3D11Buffer* pBufferForDrawCount, unsigned int alignedByteOffsetForDrawCount, ID3D11Buffer* pBufferForArgs, unsigned int alignedByteOffsetForArgs, unsigned int byteStrideForArgs ); + +/// @} + +/// \defgroup shadercompiler Shader Compiler Controls +/// API for controlling DirectX11 shader compilation. +/// Check support for this feature using the AGS_DX11_EXTENSION_CREATE_SHADER_CONTROLS bit. +/// Supported in Radeon Software Version 16.9.2 (driver version 16.40.2311) onwards. +/// @{ + +/// +/// This method can be used to limit the maximum number of threads the driver uses for asynchronous shader compilation. +/// Setting it to 0 will disable asynchronous compilation completely and force the shaders to be compiled "inline" on the threads that call Create*Shader. +/// +/// This method can only be called before any shaders are created and being compiled by the driver. +/// If this method is called after shaders have been created the function will return AGS_FAILURE. +/// This function only sets an upper limit.The driver may create fewer threads than allowed by this function. +/// +/// \param [in] context Pointer to a context. +/// \param [in] numberOfThreads The maximum number of threads to use. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_SetMaxAsyncCompileThreadCount( AGSContext* context, unsigned int numberOfThreads ); + +/// +/// This method can be used to determine the total number of asynchronous shader compile jobs that are either +/// queued for waiting for compilation or being compiled by the driver's asynchronous compilation threads. +/// This method can be called at any during the lifetime of the driver. +/// +/// \param [in] context Pointer to a context. +/// \param [out] numberOfJobs Pointer to the number of jobs in flight currently. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_NumPendingAsyncCompileJobs( AGSContext* context, unsigned int* numberOfJobs ); + +/// +/// This method can be used to enable or disable the disk based shader cache. +/// Enabling/disabling the disk cache is not supported if is it disabled explicitly via Radeon Settings or by an app profile. +/// Calling this method under these conditions will result in AGS_FAILURE being returned. +/// It is recommended that this method be called before any shaders are created by the application and being compiled by the driver. +/// Doing so at any other time may result in the cache being left in an inconsistent state. +/// +/// \param [in] context Pointer to a context. +/// \param [in] enable Whether to enable the disk cache. 0 to disable, 1 to enable. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_SetDiskShaderCacheEnabled( AGSContext* context, int enable ); + +/// @} + +/// \defgroup multiview Multiview +/// API for multiview broadcasting. +/// Check support for this feature using the AGS_DX11_EXTENSION_MULTIVIEW bit. +/// Supported in Radeon Software Version 16.12.1 (driver version 16.50.2001) onwards. +/// @{ + +/// +/// Function to control draw calls replication to multiple viewports and RT slices. +/// Setting any mask to 0 disables draw replication. +/// +/// \param [in] context Pointer to a context. +/// \param [in] vpMask Viewport control bit mask. +/// \param [in] rtSliceMask RT slice control bit mask. +/// \param [in] vpMaskPerRtSliceEnabled If 0, 16 lower bits of vpMask apply to all RT slices; if 1 each 16 bits of 64-bit mask apply to corresponding 4 RT slices. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_SetViewBroadcastMasks( AGSContext* context, unsigned long long vpMask, unsigned long long rtSliceMask, int vpMaskPerRtSliceEnabled ); + +/// +/// Function returns max number of supported clip rectangles. +/// +/// \param [in] context Pointer to a context. +/// \param [out] maxRectCount Returned max number of clip rectangles. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_GetMaxClipRects( AGSContext* context, unsigned int* maxRectCount ); + +/// The inclusion mode for the rect +typedef enum AGSClipRectMode +{ + AGSClipRectIncluded = 0, ///< Include the rect + AGSClipRectExcluded = 1 ///< Exclude the rect +} AGSClipRectMode; + +/// The clip rectangle struct used by \ref agsDriverExtensionsDX11_SetClipRects +typedef struct AGSClipRect +{ + AGSClipRectMode mode; ///< Include/exclude rect region + AGSRect rect; ///< The rect to include/exclude +} AGSClipRect; + +/// +/// Function sets clip rectangles. +/// +/// \param [in] context Pointer to a context. +/// \param [in] clipRectCount Number of specified clip rectangles. Use 0 to disable clip rectangles. +/// \param [in] clipRects Array of clip rectangles. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_SetClipRects( AGSContext* context, unsigned int clipRectCount, const AGSClipRect* clipRects ); + +/// @} + +/// \defgroup cfxapi Explicit Crossfire API +/// API for explicit control over Crossfire +/// @{ + +/// The Crossfire API transfer types +typedef enum AGSAfrTransferType +{ + AGS_AFR_TRANSFER_DEFAULT = 0, ///< Default Crossfire driver resource tracking + AGS_AFR_TRANSFER_DISABLE = 1, ///< Turn off driver resource tracking + AGS_AFR_TRANSFER_1STEP_P2P = 2, ///< App controlled GPU to next GPU transfer + AGS_AFR_TRANSFER_2STEP_NO_BROADCAST = 3, ///< App controlled GPU to next GPU transfer using intermediate system memory + AGS_AFR_TRANSFER_2STEP_WITH_BROADCAST = 4, ///< App controlled GPU to all render GPUs transfer using intermediate system memory +} AGSAfrTransferType; + +/// The Crossfire API transfer engines +typedef enum AGSAfrTransferEngine +{ + AGS_AFR_TRANSFERENGINE_DEFAULT = 0, ///< Use default engine for Crossfire API transfers + AGS_AFR_TRANSFERENGINE_3D_ENGINE = 1, ///< Use 3D engine for Crossfire API transfers + AGS_AFR_TRANSFERENGINE_COPY_ENGINE = 2, ///< Use Copy engine for Crossfire API transfers +} AGSAfrTransferEngine; + +/// +/// Function to create a Direct3D11 resource with the specified AFR transfer type and specified transfer engine. +/// +/// \param [in] context Pointer to a context. +/// \param [in] desc Pointer to the D3D11 resource description. +/// \param [in] initialData Optional pointer to the initializing data for the resource. +/// \param [out] buffer Returned pointer to the resource. +/// \param [in] transferType The transfer behavior. +/// \param [in] transferEngine The transfer engine to use. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_CreateBuffer( AGSContext* context, const D3D11_BUFFER_DESC* desc, const D3D11_SUBRESOURCE_DATA* initialData, ID3D11Buffer** buffer, AGSAfrTransferType transferType, AGSAfrTransferEngine transferEngine ); + +/// +/// Function to create a Direct3D11 resource with the specified AFR transfer type and specified transfer engine. +/// +/// \param [in] context Pointer to a context. +/// \param [in] desc Pointer to the D3D11 resource description. +/// \param [in] initialData Optional pointer to the initializing data for the resource. +/// \param [out] texture1D Returned pointer to the resource. +/// \param [in] transferType The transfer behavior. +/// \param [in] transferEngine The transfer engine to use. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_CreateTexture1D( AGSContext* context, const D3D11_TEXTURE1D_DESC* desc, const D3D11_SUBRESOURCE_DATA* initialData, ID3D11Texture1D** texture1D, AGSAfrTransferType transferType, AGSAfrTransferEngine transferEngine ); + +/// +/// Function to create a Direct3D11 resource with the specified AFR transfer type and specified transfer engine. +/// +/// \param [in] context Pointer to a context. +/// \param [in] desc Pointer to the D3D11 resource description. +/// \param [in] initialData Optional pointer to the initializing data for the resource. +/// \param [out] texture2D Returned pointer to the resource. +/// \param [in] transferType The transfer behavior. +/// \param [in] transferEngine The transfer engine to use. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_CreateTexture2D( AGSContext* context, const D3D11_TEXTURE2D_DESC* desc, const D3D11_SUBRESOURCE_DATA* initialData, ID3D11Texture2D** texture2D, AGSAfrTransferType transferType, AGSAfrTransferEngine transferEngine ); + +/// +/// Function to create a Direct3D11 resource with the specified AFR transfer type and specified transfer engine. +/// +/// \param [in] context Pointer to a context. +/// \param [in] desc Pointer to the D3D11 resource description. +/// \param [in] initialData Optional pointer to the initializing data for the resource. +/// \param [out] texture3D Returned pointer to the resource. +/// \param [in] transferType The transfer behavior. +/// \param [in] transferEngine The transfer engine to use. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_CreateTexture3D( AGSContext* context, const D3D11_TEXTURE3D_DESC* desc, const D3D11_SUBRESOURCE_DATA* initialData, ID3D11Texture3D** texture3D, AGSAfrTransferType transferType, AGSAfrTransferEngine transferEngine ); + +/// +/// Function to notify the driver that we have finished writing to the resource this frame. +/// This will initiate a transfer for AGS_AFR_TRANSFER_1STEP_P2P, +/// AGS_AFR_TRANSFER_2STEP_NO_BROADCAST, and AGS_AFR_TRANSFER_2STEP_WITH_BROADCAST. +/// +/// \param [in] context Pointer to a context. +/// \param [in] resource Pointer to the resource. +/// \param [in] transferRegions An array of transfer regions (can be null to specify the whole area). +/// \param [in] subresourceArray An array of subresource indices (can be null to specify all subresources). +/// \param [in] numSubresources The number of subresources in subresourceArray OR number of transferRegions. Use 0 to specify ALL subresources and one transferRegion (which may be null if specifying the whole area). +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_NotifyResourceEndWrites( AGSContext* context, ID3D11Resource* resource, const D3D11_RECT* transferRegions, const unsigned int* subresourceArray, unsigned int numSubresources ); + +/// +/// This will notify the driver that the app will begin read/write access to the resource. +/// +/// \param [in] context Pointer to a context. +/// \param [in] resource Pointer to the resource. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_NotifyResourceBeginAllAccess( AGSContext* context, ID3D11Resource* resource ); + +/// +/// This is used for AGS_AFR_TRANSFER_1STEP_P2P to notify when it is safe to initiate a transfer. +/// This call in frame N-(NumGpus-1) allows a 1 step P2P in frame N to start. +/// This should be called after agsDriverExtensionsDX11_NotifyResourceEndWrites. +/// +/// \param [in] context Pointer to a context. +/// \param [in] resource Pointer to the resource. +/// +AMD_AGS_API AGSReturnCode agsDriverExtensionsDX11_NotifyResourceEndAllAccess( AGSContext* context, ID3D11Resource* resource ); + +/// @} +/// @} + +#endif // AGS_EXCLUDE_DIRECTX_11 + +/// \defgroup typedefs Function pointer typedefs +/// List of function pointer typedefs for the API +/// @{ + +typedef AMD_AGS_API AGSDriverVersionResult (*AGS_CHECKDRIVERVERSION)( const char*, unsigned int ); ///< \ref agsCheckDriverVersion +typedef AMD_AGS_API int (*AGS_GETVERSIONNUMBER)(); ///< \ref agsGetVersionNumber +typedef AMD_AGS_API AGSReturnCode (*AGS_INITIALIZE)( int, const AGSConfiguration*, AGSContext**, AGSGPUInfo* ); ///< \ref agsInitialize +typedef AMD_AGS_API AGSReturnCode (*AGS_DEINITIALIZE)( AGSContext* ); ///< \ref agsDeInitialize +typedef AMD_AGS_API AGSReturnCode (*AGS_GETGPUINFO)( AGSContext*, AGSGPUInfo* ); ///< \ref agsGetGPUInfo +typedef AMD_AGS_API AGSReturnCode (*AGS_SETDISPLAYMODE)( AGSContext*, int, int, const AGSDisplaySettings* ); ///< \ref agsSetDisplayMode +#if !defined (AGS_EXCLUDE_DIRECTX_12) +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX12_CREATEDEVICE)( AGSContext*, const AGSDX12DeviceCreationParams*, const AGSDX12ExtensionParams*, AGSDX12ReturnedParams* ); ///< \ref agsDriverExtensionsDX12_CreateDevice +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX12_DESTROYDEVICE)( AGSContext*, ID3D12Device*, unsigned int* ); ///< \ref agsDriverExtensionsDX12_DestroyDevice +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX12_PUSHMARKER)( AGSContext*, ID3D12GraphicsCommandList*, const char* ); ///< \ref agsDriverExtensionsDX12_PushMarker +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX12_POPMARKER)( AGSContext*, ID3D12GraphicsCommandList* ); ///< \ref agsDriverExtensionsDX12_PopMarker +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX12_SETMARKER)( AGSContext*, ID3D12GraphicsCommandList*, const char* ); ///< \ref agsDriverExtensionsDX12_SetMarker +#endif +#if !defined (AGS_EXCLUDE_DIRECTX_11) +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_CREATEDEVICE)( AGSContext*, const AGSDX11DeviceCreationParams*, const AGSDX11ExtensionParams*, AGSDX11ReturnedParams* ); ///< \ref agsDriverExtensionsDX11_CreateDevice +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_DESTROYDEVICE)( AGSContext*, ID3D11Device*, unsigned int*, ID3D11DeviceContext*, unsigned int* ); ///< \ref agsDriverExtensionsDX11_DestroyDevice +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_WRITEBREADCRUMB)( AGSContext*, const AGSBreadcrumbMarker* ); ///< \ref agsDriverExtensionsDX11_WriteBreadcrumb +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_IASETPRIMITIVETOPOLOGY)( AGSContext*, enum D3D_PRIMITIVE_TOPOLOGY ); ///< \ref agsDriverExtensionsDX11_IASetPrimitiveTopology +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_BEGINUAVOVERLAP)( AGSContext*, ID3D11DeviceContext* ); ///< \ref agsDriverExtensionsDX11_BeginUAVOverlap +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_ENDUAVOVERLAP)( AGSContext*, ID3D11DeviceContext* ); ///< \ref agsDriverExtensionsDX11_EndUAVOverlap +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_SETDEPTHBOUNDS)( AGSContext*, ID3D11DeviceContext*, bool, float, float ); ///< \ref agsDriverExtensionsDX11_SetDepthBounds +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_MULTIDRAWINSTANCEDINDIRECT)( AGSContext*, ID3D11DeviceContext*, unsigned int, ID3D11Buffer*, unsigned int, unsigned int ); ///< \ref agsDriverExtensionsDX11_MultiDrawInstancedIndirect +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_MULTIDRAWINDEXEDINSTANCEDINDIRECT)( AGSContext*, ID3D11DeviceContext*, unsigned int, ID3D11Buffer*, unsigned int, unsigned int ); ///< \ref agsDriverExtensionsDX11_MultiDrawIndexedInstancedIndirect +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_MULTIDRAWINSTANCEDINDIRECTCOUNTINDIRECT)( AGSContext*, ID3D11DeviceContext*, ID3D11Buffer*, unsigned int, ID3D11Buffer*, unsigned int, unsigned int ); ///< \ref agsDriverExtensionsDX11_MultiDrawInstancedIndirectCountIndirect +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_MULTIDRAWINDEXEDINSTANCEDINDIRECTCOUNTINDIRECT)( AGSContext*, ID3D11DeviceContext*, ID3D11Buffer*, unsigned int, ID3D11Buffer*, unsigned int, unsigned int ); ///< \ref agsDriverExtensionsDX11_MultiDrawIndexedInstancedIndirectCountIndirect +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_SETMAXASYNCCOMPILETHREADCOUNT)( AGSContext*, unsigned int ); ///< \ref agsDriverExtensionsDX11_SetMaxAsyncCompileThreadCount +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_NUMPENDINGASYNCOMPILEJOBS)( AGSContext*, unsigned int* ); ///< \ref agsDriverExtensionsDX11_NumPendingAsyncCompileJobs +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_SETDISKSHADERCACHEENABLED)( AGSContext*, int ); ///< \ref agsDriverExtensionsDX11_SetDiskShaderCacheEnabled +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_SETVIEWBROADCASTMASKS)( AGSContext*, unsigned long long, unsigned long long, int ); ///< \ref agsDriverExtensionsDX11_SetViewBroadcastMasks +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_GETMAXCLIPRECTS)( AGSContext*, unsigned int* ); ///< \ref agsDriverExtensionsDX11_GetMaxClipRects +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_SETCLIPRECTS)( AGSContext*, unsigned int, const AGSClipRect* ); ///< \ref agsDriverExtensionsDX11_SetClipRects +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_CREATEBUFFER)( AGSContext*, const D3D11_BUFFER_DESC*, const D3D11_SUBRESOURCE_DATA*, ID3D11Buffer**, AGSAfrTransferType, AGSAfrTransferEngine ); ///< \ref agsDriverExtensionsDX11_CreateBuffer +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_CREATETEXTURE1D)( AGSContext*, const D3D11_TEXTURE1D_DESC*, const D3D11_SUBRESOURCE_DATA*, ID3D11Texture1D**, AGSAfrTransferType, AGSAfrTransferEngine ); ///< \ref agsDriverExtensionsDX11_CreateTexture1D +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_CREATETEXTURE2D)( AGSContext*, const D3D11_TEXTURE2D_DESC*, const D3D11_SUBRESOURCE_DATA*, ID3D11Texture2D**, AGSAfrTransferType, AGSAfrTransferEngine ); ///< \ref agsDriverExtensionsDX11_CreateTexture2D +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_CREATETEXTURE3D)( AGSContext*, const D3D11_TEXTURE3D_DESC*, const D3D11_SUBRESOURCE_DATA*, ID3D11Texture3D**, AGSAfrTransferType, AGSAfrTransferEngine ); ///< \ref agsDriverExtensionsDX11_CreateTexture3D +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_NOTIFYRESOURCEENDWRITES)( AGSContext*, ID3D11Resource*, const D3D11_RECT*, const unsigned int*, unsigned int ); ///< \ref agsDriverExtensionsDX11_NotifyResourceEndWrites +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_NOTIFYRESOURCEBEGINALLACCESS)( AGSContext*, ID3D11Resource* ); ///< \ref agsDriverExtensionsDX11_NotifyResourceBeginAllAccess +typedef AMD_AGS_API AGSReturnCode (*AGS_DRIVEREXTENSIONSDX11_NOTIFYRESOURCEENDALLACCESS)( AGSContext*, ID3D11Resource* ); ///< \ref agsDriverExtensionsDX11_NotifyResourceEndAllAccess +#endif +/// @} + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // AMD_AGS_H diff --git a/Source/ThirdParty/nvapi/LICENSE.txt b/Source/ThirdParty/nvapi/License.txt similarity index 100% rename from Source/ThirdParty/nvapi/LICENSE.txt rename to Source/ThirdParty/nvapi/License.txt diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/AGS.cs b/Source/Tools/Flax.Build/Deps/Dependencies/AGS.cs new file mode 100644 index 000000000..60be17f0b --- /dev/null +++ b/Source/Tools/Flax.Build/Deps/Dependencies/AGS.cs @@ -0,0 +1,46 @@ +// Copyright (c) Wojciech Figat. All rights reserved. + +using Flax.Build; +using System.IO; + +namespace Flax.Deps.Dependencies +{ + /// + /// AMD GPU Services (AGS) library + /// https://github.com/GPUOpen-LibrariesAndSDKs/AGS_SDK + /// + /// + class AGS : Dependency + { + /// + public override TargetPlatform[] Platforms + { + get => new[] { TargetPlatform.Windows }; + } + + /// + public override void Build(BuildOptions options) + { + var root = options.IntermediateFolder; + var moduleFolder = Path.Combine(options.ThirdPartyFolder, "AGS"); + + // Get the source + CloneGitRepoFast(root, "https://github.com/GPUOpen-LibrariesAndSDKs/AGS_SDK.git"); + + // Copy files + foreach (var platform in options.Platforms) + { + BuildStarted(platform); + var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); + Utilities.FileCopy(Path.Combine(root, "ags_lib/lib/amd_ags_x64.lib"), Path.Combine(depsFolder, "amd_ags_x64.lib")); + Utilities.FileCopy(Path.Combine(root, "ags_lib/lib/amd_ags_x64.dll"), Path.Combine(depsFolder, "amd_ags_x64.dll")); + } + + // Copy license and header files + Utilities.FileCopy(Path.Combine(root, "LICENSE.txt"), Path.Combine(moduleFolder, "LICENSE.txt")); + Utilities.FileCopy(Path.Combine(root, "ags_lib/inc/amd_ags.h"), Path.Combine(moduleFolder, "amd_ags.h")); + Utilities.FileCopy(Path.Combine(root, "ags_lib/hlsl/ags_shader_intrinsics_dx11.hlsl"), Path.Combine(moduleFolder, "ags_shader_intrinsics_dx11.hlsl")); + Utilities.FileCopy(Path.Combine(root, "ags_lib/hlsl/ags_shader_intrinsics_dx12.hlsl"), Path.Combine(moduleFolder, "ags_shader_intrinsics_dx12.hlsl")); + } + } +} diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/nvapi.cs b/Source/Tools/Flax.Build/Deps/Dependencies/nvapi.cs index 68ef2eaf7..d1d94b4c1 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/nvapi.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/nvapi.cs @@ -36,7 +36,7 @@ namespace Flax.Deps.Dependencies } // Copy license and header files - Utilities.FileCopy(Path.Combine(root, "License.txt"), Path.Combine(moduleFolder, "LICENSE.txt")); + Utilities.FileCopy(Path.Combine(root, "License.txt"), Path.Combine(moduleFolder, "License.txt")); var files = new[] { "nvHLSLExtns.h",